Spaces:
Runtime error
Runtime error
| ''' | |
| import altair as alt | |
| import numpy as np | |
| import pandas as pd | |
| import streamlit as st | |
| """ | |
| # Welcome to Streamlit! | |
| Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:. | |
| If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community | |
| forums](https://discuss.streamlit.io). | |
| In the meantime, below is an example of what you can do with just a few lines of code: | |
| """ | |
| num_points = st.slider("Number of points in spiral", 1, 10000, 1100) | |
| num_turns = st.slider("Number of turns in spiral", 1, 300, 31) | |
| indices = np.linspace(0, 1, num_points) | |
| theta = 2 * np.pi * num_turns * indices | |
| radius = indices | |
| x = radius * np.cos(theta) | |
| y = radius * np.sin(theta) | |
| df = pd.DataFrame({ | |
| "x": x, | |
| "y": y, | |
| "idx": indices, | |
| "rand": np.random.randn(num_points), | |
| }) | |
| st.altair_chart(alt.Chart(df, height=700, width=700) | |
| .mark_point(filled=True) | |
| .encode( | |
| x=alt.X("x", axis=None), | |
| y=alt.Y("y", axis=None), | |
| color=alt.Color("idx", legend=None, scale=alt.Scale()), | |
| size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])), | |
| )) | |
| ''' | |
| ''' | |
| import streamlit as st | |
| from transformers import AutoTokenizer, AutoModelForSequenceClassification | |
| import torch | |
| import torch.nn.functional as F | |
| import os | |
| st.set_page_config(page_title="FinBERT Sentiment", layout="centered") | |
| st.title("💰 FinBERT: Financial Sentiment Analysis") | |
| st.markdown("Модель: `yiyanghkust/finbert-tone` — обучена на финансовых текстах") | |
| @st.cache_resource | |
| def load_model(): | |
| # Установка кастомного пути к кэшу | |
| cache_dir = "/tmp/huggingface" | |
| os.makedirs(cache_dir, exist_ok=True) | |
| tokenizer = AutoTokenizer.from_pretrained("yiyanghkust/finbert-tone", cache_dir=cache_dir) | |
| model = AutoModelForSequenceClassification.from_pretrained("yiyanghkust/finbert-tone", cache_dir=cache_dir) | |
| return tokenizer, model | |
| tokenizer, model = load_model() | |
| text = st.text_area("Введите финансовую новость или отчёт:", height=150) | |
| if st.button("Анализировать тональность") and text.strip(): | |
| inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True) | |
| with torch.no_grad(): | |
| outputs = model(**inputs) | |
| probs = F.softmax(outputs.logits, dim=1).squeeze() | |
| labels = ["📉 Negative", "😐 Neutral", "📈 Positive"] | |
| for label, prob in zip(labels, probs): | |
| st.write(f"**{label}:** {prob.item():.3f}") | |
| ''' | |
| import streamlit as st | |
| from transformers import AutoTokenizer, AutoModelForSequenceClassification | |
| import torch | |
| import torch.nn.functional as F | |
| import os | |
| import pandas as pd | |
| import plotly.express as px | |
| import plotly.graph_objects as go | |
| from datetime import datetime | |
| import re | |
| # Page configuration | |
| st.set_page_config( | |
| page_title="FinBERT Sentiment Analyzer", | |
| page_icon="💰", | |
| layout="wide", | |
| initial_sidebar_state="expanded" | |
| ) | |
| # Custom CSS for better styling | |
| st.markdown(""" | |
| <style> | |
| .main-header { | |
| text-align: center; | |
| color: #1f77b4; | |
| margin-bottom: 2rem; | |
| } | |
| .sentiment-card { | |
| padding: 1rem; | |
| border-radius: 10px; | |
| margin: 0.5rem 0; | |
| text-align: center; | |
| } | |
| .negative { background-color: #ffebee; border-left: 5px solid #f44336; } | |
| .neutral { background-color: #f3e5f5; border-left: 5px solid #9c27b0; } | |
| .positive { background-color: #e8f5e8; border-left: 5px solid #4caf50; } | |
| .metric-container { | |
| background-color: #f8f9fa; | |
| padding: 1rem; | |
| border-radius: 10px; | |
| margin: 1rem 0; | |
| } | |
| </style> | |
| """, unsafe_allow_html=True) | |
| st.markdown('<h1 class="main-header">💰 FinBERT: Financial Sentiment Analysis</h1>', unsafe_allow_html=True) | |
| # Sidebar | |
| with st.sidebar: | |
| st.header("ℹ️ About") | |
| st.markdown(""" | |
| **Model:** `yiyanghkust/finbert-tone` | |
| Trained specifically on financial texts for accurate sentiment analysis of: | |
| - Financial news | |
| - Earnings reports | |
| - Market analysis | |
| - Investment research | |
| """) | |
| st.header("⚙️ Settings") | |
| confidence_threshold = st.slider("Confidence Threshold", 0.0, 1.0, 0.5, help="Minimum confidence for sentiment classification") | |
| show_probabilities = st.checkbox("Show All Probabilities", value=True) | |
| batch_analysis = st.checkbox("Enable Batch Analysis", help="Analyze multiple texts at once") | |
| def load_model(): | |
| """Load FinBERT model and tokenizer with error handling""" | |
| try: | |
| cache_dir = "/tmp/huggingface" | |
| os.makedirs(cache_dir, exist_ok=True) | |
| with st.spinner("Loading FinBERT model... This may take a moment."): | |
| tokenizer = AutoTokenizer.from_pretrained( | |
| "yiyanghkust/finbert-tone", | |
| cache_dir=cache_dir | |
| ) | |
| model = AutoModelForSequenceClassification.from_pretrained( | |
| "yiyanghkust/finbert-tone", | |
| cache_dir=cache_dir | |
| ) | |
| return tokenizer, model, None | |
| except Exception as e: | |
| return None, None, str(e) | |
| def analyze_sentiment(text, tokenizer, model): | |
| """Analyze sentiment with error handling and additional metrics""" | |
| try: | |
| # Preprocess text | |
| text = re.sub(r'\s+', ' ', text.strip()) | |
| inputs = tokenizer( | |
| text, | |
| return_tensors="pt", | |
| truncation=True, | |
| padding=True, | |
| max_length=512 | |
| ) | |
| with torch.no_grad(): | |
| outputs = model(**inputs) | |
| probs = F.softmax(outputs.logits, dim=1).squeeze() | |
| labels = ["Negative", "Neutral", "Positive"] | |
| sentiment_scores = {label: prob.item() for label, prob in zip(labels, probs)} | |
| # Determine primary sentiment | |
| max_prob = max(sentiment_scores.values()) | |
| primary_sentiment = max(sentiment_scores, key=sentiment_scores.get) | |
| return sentiment_scores, primary_sentiment, max_prob, None | |
| except Exception as e: | |
| return None, None, None, str(e) | |
| def create_sentiment_chart(sentiment_scores): | |
| """Create an interactive sentiment visualization""" | |
| labels = list(sentiment_scores.keys()) | |
| values = list(sentiment_scores.values()) | |
| colors = ['#f44336', '#9c27b0', '#4caf50'] | |
| fig = go.Figure(data=[ | |
| go.Bar( | |
| x=labels, | |
| y=values, | |
| marker_color=colors, | |
| text=[f'{v:.3f}' for v in values], | |
| textposition='auto', | |
| ) | |
| ]) | |
| fig.update_layout( | |
| title="Sentiment Analysis Results", | |
| xaxis_title="Sentiment", | |
| yaxis_title="Confidence Score", | |
| yaxis=dict(range=[0, 1]), | |
| height=400, | |
| showlegend=False | |
| ) | |
| return fig | |
| # Load model | |
| tokenizer, model, error = load_model() | |
| if error: | |
| st.error(f"Failed to load model: {error}") | |
| st.stop() | |
| if tokenizer and model: | |
| st.success("✅ FinBERT model loaded successfully!") | |
| # Main analysis interface | |
| if not batch_analysis: | |
| st.header("📝 Single Text Analysis") | |
| text = st.text_area( | |
| "Enter financial news, report, or analysis:", | |
| height=150, | |
| placeholder="Example: The company reported strong quarterly earnings with revenue growth of 15% year-over-year..." | |
| ) | |
| col1, col2, col3 = st.columns([1, 1, 2]) | |
| with col1: | |
| analyze_button = st.button("🔍 Analyze Sentiment", type="primary") | |
| with col2: | |
| clear_button = st.button("🗑️ Clear") | |
| if clear_button: | |
| st.rerun() | |
| if analyze_button and text.strip(): | |
| with st.spinner("Analyzing sentiment..."): | |
| sentiment_scores, primary_sentiment, confidence, error = analyze_sentiment(text, tokenizer, model) | |
| if error: | |
| st.error(f"Analysis failed: {error}") | |
| else: | |
| # Results section | |
| st.header("📊 Analysis Results") | |
| # Primary sentiment with confidence | |
| col1, col2, col3 = st.columns(3) | |
| sentiment_emojis = {"Negative": "📉", "Neutral": "😐", "Positive": "📈"} | |
| sentiment_colors = {"Negative": "red", "Neutral": "gray", "Positive": "green"} | |
| with col1: | |
| st.metric( | |
| "Primary Sentiment", | |
| f"{sentiment_emojis[primary_sentiment]} {primary_sentiment}", | |
| delta=f"{confidence:.1%} confidence" | |
| ) | |
| with col2: | |
| st.metric( | |
| "Text Length", | |
| f"{len(text)} characters", | |
| delta=f"{len(text.split())} words" | |
| ) | |
| with col3: | |
| reliability = "High" if confidence > 0.7 else "Medium" if confidence > 0.5 else "Low" | |
| st.metric("Reliability", reliability) | |
| # Detailed probabilities | |
| if show_probabilities: | |
| st.subheader("Detailed Sentiment Scores") | |
| for sentiment, score in sentiment_scores.items(): | |
| emoji = sentiment_emojis[sentiment] | |
| color = "negative" if sentiment == "Negative" else "neutral" if sentiment == "Neutral" else "positive" | |
| st.markdown(f""" | |
| <div class="sentiment-card {color}"> | |
| <h4>{emoji} {sentiment}</h4> | |
| <h2>{score:.3f}</h2> | |
| <div style="width: 100%; background-color: #ddd; border-radius: 25px;"> | |
| <div style="width: {score*100}%; height: 10px; background-color: {sentiment_colors[sentiment]}; border-radius: 25px;"></div> | |
| </div> | |
| </div> | |
| """, unsafe_allow_html=True) | |
| # Visualization | |
| st.subheader("📈 Sentiment Visualization") | |
| fig = create_sentiment_chart(sentiment_scores) | |
| st.plotly_chart(fig, use_container_width=True) | |
| else: | |
| # Batch analysis mode | |
| st.header("📊 Batch Analysis") | |
| # Option to upload file or enter multiple texts | |
| analysis_method = st.radio( | |
| "Choose analysis method:", | |
| ["Enter multiple texts", "Upload CSV file"] | |
| ) | |
| if analysis_method == "Enter multiple texts": | |
| texts_input = st.text_area( | |
| "Enter multiple texts (one per line):", | |
| height=200, | |
| placeholder="Text 1: Company reports strong earnings...\nText 2: Market volatility increases...\nText 3: New regulations impact sector..." | |
| ) | |
| if st.button("🔍 Analyze All Texts") and texts_input.strip(): | |
| texts = [text.strip() for text in texts_input.split('\n') if text.strip()] | |
| if texts: | |
| results = [] | |
| progress_bar = st.progress(0) | |
| for i, text in enumerate(texts): | |
| sentiment_scores, primary_sentiment, confidence, error = analyze_sentiment(text, tokenizer, model) | |
| if not error: | |
| results.append({ | |
| 'Text': text[:100] + '...' if len(text) > 100 else text, | |
| 'Primary Sentiment': primary_sentiment, | |
| 'Confidence': confidence, | |
| 'Negative': sentiment_scores['Negative'], | |
| 'Neutral': sentiment_scores['Neutral'], | |
| 'Positive': sentiment_scores['Positive'] | |
| }) | |
| progress_bar.progress((i + 1) / len(texts)) | |
| if results: | |
| df = pd.DataFrame(results) | |
| # Summary statistics | |
| st.subheader("📈 Batch Analysis Summary") | |
| col1, col2, col3 = st.columns(3) | |
| with col1: | |
| positive_count = len(df[df['Primary Sentiment'] == 'Positive']) | |
| st.metric("Positive Texts", positive_count, f"{positive_count/len(df)*100:.1f}%") | |
| with col2: | |
| neutral_count = len(df[df['Primary Sentiment'] == 'Neutral']) | |
| st.metric("Neutral Texts", neutral_count, f"{neutral_count/len(df)*100:.1f}%") | |
| with col3: | |
| negative_count = len(df[df['Primary Sentiment'] == 'Negative']) | |
| st.metric("Negative Texts", negative_count, f"{negative_count/len(df)*100:.1f}%") | |
| # Results table | |
| st.subheader("📋 Detailed Results") | |
| st.dataframe(df, use_container_width=True) | |
| # Download results | |
| csv = df.to_csv(index=False) | |
| st.download_button( | |
| "📥 Download Results (CSV)", | |
| csv, | |
| f"sentiment_analysis_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv", | |
| "text/csv" | |
| ) | |
| elif analysis_method == "Upload CSV file": | |
| uploaded_file = st.file_uploader( | |
| "Choose a CSV file with a 'text' column", | |
| type=['csv'] | |
| ) | |
| if uploaded_file is not None: | |
| try: | |
| df = pd.read_csv(uploaded_file) | |
| if 'text' not in df.columns: | |
| st.error("CSV file must contain a 'text' column") | |
| else: | |
| st.write(f"Loaded {len(df)} texts from CSV file") | |
| st.dataframe(df.head(), use_container_width=True) | |
| if st.button("🔍 Analyze CSV Data"): | |
| results = [] | |
| progress_bar = st.progress(0) | |
| for i, row in df.iterrows(): | |
| text = str(row['text']) | |
| sentiment_scores, primary_sentiment, confidence, error = analyze_sentiment(text, tokenizer, model) | |
| if not error: | |
| result_row = row.to_dict() | |
| result_row.update({ | |
| 'Primary Sentiment': primary_sentiment, | |
| 'Confidence': confidence, | |
| 'Negative Score': sentiment_scores['Negative'], | |
| 'Neutral Score': sentiment_scores['Neutral'], | |
| 'Positive Score': sentiment_scores['Positive'] | |
| }) | |
| results.append(result_row) | |
| progress_bar.progress((i + 1) / len(df)) | |
| if results: | |
| results_df = pd.DataFrame(results) | |
| # Display results | |
| st.subheader("📋 Analysis Results") | |
| st.dataframe(results_df, use_container_width=True) | |
| # Download enhanced results | |
| csv = results_df.to_csv(index=False) | |
| st.download_button( | |
| "📥 Download Enhanced Results (CSV)", | |
| csv, | |
| f"enhanced_sentiment_analysis_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv", | |
| "text/csv" | |
| ) | |
| except Exception as e: | |
| st.error(f"Error processing CSV file: {str(e)}") | |
| # Footer | |
| st.markdown("---") | |
| st.markdown(""" | |
| <div style='text-align: center; color: #666; margin-top: 2rem;'> | |
| <p>💡 <strong>Tip:</strong> For best results, use complete sentences and financial context</p> | |
| <p>Built with Streamlit • Powered by FinBERT</p> | |
| </div> | |
| """, unsafe_allow_html=True) |