Spaces:

ResearchEngineering
/

news_sentiment_analyzer

Runtime error

App Files Files Community

ResearchEngineering commited on Aug 1

Commit

200ce84

verified ·

1 Parent(s): 596bb4b

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +367 -1

src/streamlit_app.py CHANGED Viewed

@@ -43,6 +43,8 @@ st.altair_chart(alt.Chart(df, height=700, width=700)
 '''
 import streamlit as st
 from transformers import AutoTokenizer, AutoModelForSequenceClassification
 import torch
@@ -75,4 +77,368 @@ if st.button("Анализировать тональность") and text.strip
     labels = ["📉 Negative", "😐 Neutral", "📈 Positive"]
     for label, prob in zip(labels, probs):
-        st.write(f"**{label}:** {prob.item():.3f}")

 '''
+'''
 import streamlit as st
 from transformers import AutoTokenizer, AutoModelForSequenceClassification
 import torch
     labels = ["📉 Negative", "😐 Neutral", "📈 Positive"]
     for label, prob in zip(labels, probs):
+        st.write(f"**{label}:** {prob.item():.3f}")
+'''
+import streamlit as st
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+import torch
+import torch.nn.functional as F
+import os
+import pandas as pd
+import plotly.express as px
+import plotly.graph_objects as go
+from datetime import datetime
+import re
+# Page configuration
+st.set_page_config(
+    page_title="FinBERT Sentiment Analyzer",
+    page_icon="💰",
+    layout="wide",
+    initial_sidebar_state="expanded"
+)
+# Custom CSS for better styling
+st.markdown("""
+<style>
+    .main-header {
+        text-align: center;
+        color: #1f77b4;
+        margin-bottom: 2rem;
+    }
+    .sentiment-card {
+        padding: 1rem;
+        border-radius: 10px;
+        margin: 0.5rem 0;
+        text-align: center;
+    }
+    .negative { background-color: #ffebee; border-left: 5px solid #f44336; }
+    .neutral { background-color: #f3e5f5; border-left: 5px solid #9c27b0; }
+    .positive { background-color: #e8f5e8; border-left: 5px solid #4caf50; }
+    .metric-container {
+        background-color: #f8f9fa;
+        padding: 1rem;
+        border-radius: 10px;
+        margin: 1rem 0;
+    }
+</style>
+""", unsafe_allow_html=True)
+st.markdown('<h1 class="main-header">💰 FinBERT: Financial Sentiment Analysis</h1>', unsafe_allow_html=True)
+# Sidebar
+with st.sidebar:
+    st.header("ℹ️ About")
+    st.markdown("""
+    **Model:** `yiyanghkust/finbert-tone`
+    Trained specifically on financial texts for accurate sentiment analysis of:
+    - Financial news
+    - Earnings reports
+    - Market analysis
+    - Investment research
+    """)
+    st.header("⚙️ Settings")
+    confidence_threshold = st.slider("Confidence Threshold", 0.0, 1.0, 0.5, help="Minimum confidence for sentiment classification")
+    show_probabilities = st.checkbox("Show All Probabilities", value=True)
+    batch_analysis = st.checkbox("Enable Batch Analysis", help="Analyze multiple texts at once")
+@st.cache_resource(show_spinner=False)
+def load_model():
+    """Load FinBERT model and tokenizer with error handling"""
+    try:
+        cache_dir = "/tmp/huggingface"
+        os.makedirs(cache_dir, exist_ok=True)
+        with st.spinner("Loading FinBERT model... This may take a moment."):
+            tokenizer = AutoTokenizer.from_pretrained(
+                "yiyanghkust/finbert-tone",
+                cache_dir=cache_dir
+            )
+            model = AutoModelForSequenceClassification.from_pretrained(
+                "yiyanghkust/finbert-tone",
+                cache_dir=cache_dir
+            )
+        return tokenizer, model, None
+    except Exception as e:
+        return None, None, str(e)
+def analyze_sentiment(text, tokenizer, model):
+    """Analyze sentiment with error handling and additional metrics"""
+    try:
+        # Preprocess text
+        text = re.sub(r'\s+', ' ', text.strip())
+        inputs = tokenizer(
+            text,
+            return_tensors="pt",
+            truncation=True,
+            padding=True,
+            max_length=512
+        )
+        with torch.no_grad():
+            outputs = model(**inputs)
+            probs = F.softmax(outputs.logits, dim=1).squeeze()
+        labels = ["Negative", "Neutral", "Positive"]
+        sentiment_scores = {label: prob.item() for label, prob in zip(labels, probs)}
+        # Determine primary sentiment
+        max_prob = max(sentiment_scores.values())
+        primary_sentiment = max(sentiment_scores, key=sentiment_scores.get)
+        return sentiment_scores, primary_sentiment, max_prob, None
+    except Exception as e:
+        return None, None, None, str(e)
+def create_sentiment_chart(sentiment_scores):
+    """Create an interactive sentiment visualization"""
+    labels = list(sentiment_scores.keys())
+    values = list(sentiment_scores.values())
+    colors = ['#f44336', '#9c27b0', '#4caf50']
+    fig = go.Figure(data=[
+        go.Bar(
+            x=labels,
+            y=values,
+            marker_color=colors,
+            text=[f'{v:.3f}' for v in values],
+            textposition='auto',
+        )
+    ])
+    fig.update_layout(
+        title="Sentiment Analysis Results",
+        xaxis_title="Sentiment",
+        yaxis_title="Confidence Score",
+        yaxis=dict(range=[0, 1]),
+        height=400,
+        showlegend=False
+    )
+    return fig
+# Load model
+tokenizer, model, error = load_model()
+if error:
+    st.error(f"Failed to load model: {error}")
+    st.stop()
+if tokenizer and model:
+    st.success("✅ FinBERT model loaded successfully!")
+    # Main analysis interface
+    if not batch_analysis:
+        st.header("📝 Single Text Analysis")
+        text = st.text_area(
+            "Enter financial news, report, or analysis:",
+            height=150,
+            placeholder="Example: The company reported strong quarterly earnings with revenue growth of 15% year-over-year..."
+        )
+        col1, col2, col3 = st.columns([1, 1, 2])
+        with col1:
+            analyze_button = st.button("🔍 Analyze Sentiment", type="primary")
+        with col2:
+            clear_button = st.button("🗑️ Clear")
+        if clear_button:
+            st.rerun()
+        if analyze_button and text.strip():
+            with st.spinner("Analyzing sentiment..."):
+                sentiment_scores, primary_sentiment, confidence, error = analyze_sentiment(text, tokenizer, model)
+            if error:
+                st.error(f"Analysis failed: {error}")
+            else:
+                # Results section
+                st.header("📊 Analysis Results")
+                # Primary sentiment with confidence
+                col1, col2, col3 = st.columns(3)
+                sentiment_emojis = {"Negative": "📉", "Neutral": "😐", "Positive": "📈"}
+                sentiment_colors = {"Negative": "red", "Neutral": "gray", "Positive": "green"}
+                with col1:
+                    st.metric(
+                        "Primary Sentiment",
+                        f"{sentiment_emojis[primary_sentiment]} {primary_sentiment}",
+                        delta=f"{confidence:.1%} confidence"
+                    )
+                with col2:
+                    st.metric(
+                        "Text Length",
+                        f"{len(text)} characters",
+                        delta=f"{len(text.split())} words"
+                    )
+                with col3:
+                    reliability = "High" if confidence > 0.7 else "Medium" if confidence > 0.5 else "Low"
+                    st.metric("Reliability", reliability)
+                # Detailed probabilities
+                if show_probabilities:
+                    st.subheader("Detailed Sentiment Scores")
+                    for sentiment, score in sentiment_scores.items():
+                        emoji = sentiment_emojis[sentiment]
+                        color = "negative" if sentiment == "Negative" else "neutral" if sentiment == "Neutral" else "positive"
+                        st.markdown(f"""
+                        <div class="sentiment-card {color}">
+                            <h4>{emoji} {sentiment}</h4>
+                            <h2>{score:.3f}</h2>
+                            <div style="width: 100%; background-color: #ddd; border-radius: 25px;">
+                                <div style="width: {score*100}%; height: 10px; background-color: {sentiment_colors[sentiment]}; border-radius: 25px;"></div>
+                            </div>
+                        </div>
+                        """, unsafe_allow_html=True)
+                # Visualization
+                st.subheader("📈 Sentiment Visualization")
+                fig = create_sentiment_chart(sentiment_scores)
+                st.plotly_chart(fig, use_container_width=True)
+    else:
+        # Batch analysis mode
+        st.header("📊 Batch Analysis")
+        # Option to upload file or enter multiple texts
+        analysis_method = st.radio(
+            "Choose analysis method:",
+            ["Enter multiple texts", "Upload CSV file"]
+        )
+        if analysis_method == "Enter multiple texts":
+            texts_input = st.text_area(
+                "Enter multiple texts (one per line):",
+                height=200,
+                placeholder="Text 1: Company reports strong earnings...\nText 2: Market volatility increases...\nText 3: New regulations impact sector..."
+            )
+            if st.button("🔍 Analyze All Texts") and texts_input.strip():
+                texts = [text.strip() for text in texts_input.split('\n') if text.strip()]
+                if texts:
+                    results = []
+                    progress_bar = st.progress(0)
+                    for i, text in enumerate(texts):
+                        sentiment_scores, primary_sentiment, confidence, error = analyze_sentiment(text, tokenizer, model)
+                        if not error:
+                            results.append({
+                                'Text': text[:100] + '...' if len(text) > 100 else text,
+                                'Primary Sentiment': primary_sentiment,
+                                'Confidence': confidence,
+                                'Negative': sentiment_scores['Negative'],
+                                'Neutral': sentiment_scores['Neutral'],
+                                'Positive': sentiment_scores['Positive']
+                            })
+                        progress_bar.progress((i + 1) / len(texts))
+                    if results:
+                        df = pd.DataFrame(results)
+                        # Summary statistics
+                        st.subheader("📈 Batch Analysis Summary")
+                        col1, col2, col3 = st.columns(3)
+                        with col1:
+                            positive_count = len(df[df['Primary Sentiment'] == 'Positive'])
+                            st.metric("Positive Texts", positive_count, f"{positive_count/len(df)*100:.1f}%")
+                        with col2:
+                            neutral_count = len(df[df['Primary Sentiment'] == 'Neutral'])
+                            st.metric("Neutral Texts", neutral_count, f"{neutral_count/len(df)*100:.1f}%")
+                        with col3:
+                            negative_count = len(df[df['Primary Sentiment'] == 'Negative'])
+                            st.metric("Negative Texts", negative_count, f"{negative_count/len(df)*100:.1f}%")
+                        # Results table
+                        st.subheader("📋 Detailed Results")
+                        st.dataframe(df, use_container_width=True)
+                        # Download results
+                        csv = df.to_csv(index=False)
+                        st.download_button(
+                            "📥 Download Results (CSV)",
+                            csv,
+                            f"sentiment_analysis_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv",
+                            "text/csv"
+                        )
+        elif analysis_method == "Upload CSV file":
+            uploaded_file = st.file_uploader(
+                "Choose a CSV file with a 'text' column",
+                type=['csv']
+            )
+            if uploaded_file is not None:
+                try:
+                    df = pd.read_csv(uploaded_file)
+                    if 'text' not in df.columns:
+                        st.error("CSV file must contain a 'text' column")
+                    else:
+                        st.write(f"Loaded {len(df)} texts from CSV file")
+                        st.dataframe(df.head(), use_container_width=True)
+                        if st.button("🔍 Analyze CSV Data"):
+                            results = []
+                            progress_bar = st.progress(0)
+                            for i, row in df.iterrows():
+                                text = str(row['text'])
+                                sentiment_scores, primary_sentiment, confidence, error = analyze_sentiment(text, tokenizer, model)
+                                if not error:
+                                    result_row = row.to_dict()
+                                    result_row.update({
+                                        'Primary Sentiment': primary_sentiment,
+                                        'Confidence': confidence,
+                                        'Negative Score': sentiment_scores['Negative'],
+                                        'Neutral Score': sentiment_scores['Neutral'],
+                                        'Positive Score': sentiment_scores['Positive']
+                                    })
+                                    results.append(result_row)
+                                progress_bar.progress((i + 1) / len(df))
+                            if results:
+                                results_df = pd.DataFrame(results)
+                                # Display results
+                                st.subheader("📋 Analysis Results")
+                                st.dataframe(results_df, use_container_width=True)
+                                # Download enhanced results
+                                csv = results_df.to_csv(index=False)
+                                st.download_button(
+                                    "📥 Download Enhanced Results (CSV)",
+                                    csv,
+                                    f"enhanced_sentiment_analysis_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv",
+                                    "text/csv"
+                                )
+                except Exception as e:
+                    st.error(f"Error processing CSV file: {str(e)}")
+# Footer
+st.markdown("---")
+st.markdown("""
+<div style='text-align: center; color: #666; margin-top: 2rem;'>
+    <p>💡 <strong>Tip:</strong> For best results, use complete sentences and financial context</p>
+    <p>Built with Streamlit • Powered by FinBERT</p>
+</div>
+""", unsafe_allow_html=True)