💰 FinBERT: Financial Sentiment Analysis

'''

import altair as alt
import numpy as np
import pandas as pd
import streamlit as st

"""
# Welcome to Streamlit!

Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
forums](https://discuss.streamlit.io).

In the meantime, below is an example of what you can do with just a few lines of code:
"""

num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
num_turns = st.slider("Number of turns in spiral", 1, 300, 31)

indices = np.linspace(0, 1, num_points)
theta = 2 * np.pi * num_turns * indices
radius = indices

x = radius * np.cos(theta)
y = radius * np.sin(theta)

df = pd.DataFrame({
    "x": x,
    "y": y,
    "idx": indices,
    "rand": np.random.randn(num_points),
})

st.altair_chart(alt.Chart(df, height=700, width=700)
    .mark_point(filled=True)
    .encode(
        x=alt.X("x", axis=None),
        y=alt.Y("y", axis=None),
        color=alt.Color("idx", legend=None, scale=alt.Scale()),
        size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
    ))

'''


'''
import streamlit as st
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import torch.nn.functional as F
import os

st.set_page_config(page_title="FinBERT Sentiment", layout="centered")
st.title("💰 FinBERT: Financial Sentiment Analysis")
st.markdown("Модель: `yiyanghkust/finbert-tone` — обучена на финансовых текстах")

@st.cache_resource
def load_model():
    # Установка кастомного пути к кэшу
    cache_dir = "/tmp/huggingface"
    os.makedirs(cache_dir, exist_ok=True)

    tokenizer = AutoTokenizer.from_pretrained("yiyanghkust/finbert-tone", cache_dir=cache_dir)
    model = AutoModelForSequenceClassification.from_pretrained("yiyanghkust/finbert-tone", cache_dir=cache_dir)
    return tokenizer, model

tokenizer, model = load_model()

text = st.text_area("Введите финансовую новость или отчёт:", height=150)

if st.button("Анализировать тональность") and text.strip():
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
    with torch.no_grad():
        outputs = model(**inputs)
        probs = F.softmax(outputs.logits, dim=1).squeeze()

    labels = ["📉 Negative", "😐 Neutral", "📈 Positive"]
    for label, prob in zip(labels, probs):
        st.write(f"**{label}:** {prob.item():.3f}")

'''


import streamlit as st
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import torch.nn.functional as F
import os
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from datetime import datetime
import re

# Page configuration
st.set_page_config(
    page_title="FinBERT Sentiment Analyzer",
    page_icon="💰",
    layout="wide",
    initial_sidebar_state="expanded"
)

# Custom CSS for better styling
st.markdown("""
<style>
    .main-header {
        text-align: center;
        color: #1f77b4;
        margin-bottom: 2rem;
    }
    .sentiment-card {
        padding: 1rem;
        border-radius: 10px;
        margin: 0.5rem 0;
        text-align: center;
    }
    .negative { background-color: #ffebee; border-left: 5px solid #f44336; }
    .neutral { background-color: #f3e5f5; border-left: 5px solid #9c27b0; }
    .positive { background-color: #e8f5e8; border-left: 5px solid #4caf50; }
    .metric-container {
        background-color: #f8f9fa;
        padding: 1rem;
        border-radius: 10px;
        margin: 1rem 0;
    }
</style>
""", unsafe_allow_html=True)

st.markdown('<h1 class="main-header">💰 FinBERT: Financial Sentiment Analysis</h1>', unsafe_allow_html=True)

# Sidebar
with st.sidebar:
    st.header("ℹ️ About")
    st.markdown("""
    **Model:** `yiyanghkust/finbert-tone`  
    Trained specifically on financial texts for accurate sentiment analysis of:
    - Financial news
    - Earnings reports  
    - Market analysis
    - Investment research
    """)
    
    st.header("⚙️ Settings")
    confidence_threshold = st.slider("Confidence Threshold", 0.0, 1.0, 0.5, help="Minimum confidence for sentiment classification")
    show_probabilities = st.checkbox("Show All Probabilities", value=True)
    batch_analysis = st.checkbox("Enable Batch Analysis", help="Analyze multiple texts at once")

@st.cache_resource(show_spinner=False)
def load_model():
    """Load FinBERT model and tokenizer with error handling"""
    try:
        cache_dir = "/tmp/huggingface"
        os.makedirs(cache_dir, exist_ok=True)
        
        with st.spinner("Loading FinBERT model... This may take a moment."):
            tokenizer = AutoTokenizer.from_pretrained(
                "yiyanghkust/finbert-tone", 
                cache_dir=cache_dir
            )
            model = AutoModelForSequenceClassification.from_pretrained(
                "yiyanghkust/finbert-tone", 
                cache_dir=cache_dir
            )
        return tokenizer, model, None
    except Exception as e:
        return None, None, str(e)

def analyze_sentiment(text, tokenizer, model):
    """Analyze sentiment with error handling and additional metrics"""
    try:
        # Preprocess text
        text = re.sub(r'\s+', ' ', text.strip())
        
        inputs = tokenizer(
            text, 
            return_tensors="pt", 
            truncation=True, 
            padding=True,
            max_length=512
        )
        
        with torch.no_grad():
            outputs = model(**inputs)
            probs = F.softmax(outputs.logits, dim=1).squeeze()
        
        labels = ["Negative", "Neutral", "Positive"]
        sentiment_scores = {label: prob.item() for label, prob in zip(labels, probs)}
        
        # Determine primary sentiment
        max_prob = max(sentiment_scores.values())
        primary_sentiment = max(sentiment_scores, key=sentiment_scores.get)
        
        return sentiment_scores, primary_sentiment, max_prob, None
    except Exception as e:
        return None, None, None, str(e)

def create_sentiment_chart(sentiment_scores):
    """Create an interactive sentiment visualization"""
    labels = list(sentiment_scores.keys())
    values = list(sentiment_scores.values())
    colors = ['#f44336', '#9c27b0', '#4caf50']
    
    fig = go.Figure(data=[
        go.Bar(
            x=labels,
            y=values,
            marker_color=colors,
            text=[f'{v:.3f}' for v in values],
            textposition='auto',
        )
    ])
    
    fig.update_layout(
        title="Sentiment Analysis Results",
        xaxis_title="Sentiment",
        yaxis_title="Confidence Score",
        yaxis=dict(range=[0, 1]),
        height=400,
        showlegend=False
    )
    
    return fig

# Load model
tokenizer, model, error = load_model()

if error:
    st.error(f"Failed to load model: {error}")
    st.stop()

if tokenizer and model:
    st.success("✅ FinBERT model loaded successfully!")
    
    # Main analysis interface
    if not batch_analysis:
        st.header("📝 Single Text Analysis")
        text = st.text_area(
            "Enter financial news, report, or analysis:",
            height=150,
            placeholder="Example: The company reported strong quarterly earnings with revenue growth of 15% year-over-year..."
        )
        
        col1, col2, col3 = st.columns([1, 1, 2])
        with col1:
            analyze_button = st.button("🔍 Analyze Sentiment", type="primary")
        with col2:
            clear_button = st.button("🗑️ Clear")
        
        if clear_button:
            st.rerun()
        
        if analyze_button and text.strip():
            with st.spinner("Analyzing sentiment..."):
                sentiment_scores, primary_sentiment, confidence, error = analyze_sentiment(text, tokenizer, model)
            
            if error:
                st.error(f"Analysis failed: {error}")
            else:
                # Results section
                st.header("📊 Analysis Results")
                
                # Primary sentiment with confidence
                col1, col2, col3 = st.columns(3)
                
                sentiment_emojis = {"Negative": "📉", "Neutral": "😐", "Positive": "📈"}
                sentiment_colors = {"Negative": "red", "Neutral": "gray", "Positive": "green"}
                
                with col1:
                    st.metric(
                        "Primary Sentiment",
                        f"{sentiment_emojis[primary_sentiment]} {primary_sentiment}",
                        delta=f"{confidence:.1%} confidence"
                    )
                
                with col2:
                    st.metric(
                        "Text Length",
                        f"{len(text)} characters",
                        delta=f"{len(text.split())} words"
                    )
                
                with col3:
                    reliability = "High" if confidence > 0.7 else "Medium" if confidence > 0.5 else "Low"
                    st.metric("Reliability", reliability)
                
                # Detailed probabilities
                if show_probabilities:
                    st.subheader("Detailed Sentiment Scores")
                    
                    for sentiment, score in sentiment_scores.items():
                        emoji = sentiment_emojis[sentiment]
                        color = "negative" if sentiment == "Negative" else "neutral" if sentiment == "Neutral" else "positive"
                        
                        st.markdown(f"""
                        <div class="sentiment-card {color}">
                            <h4>{emoji} {sentiment}</h4>
                            <h2>{score:.3f}</h2>
                            <div style="width: 100%; background-color: #ddd; border-radius: 25px;">
                                <div style="width: {score*100}%; height: 10px; background-color: {sentiment_colors[sentiment]}; border-radius: 25px;"></div>
                            </div>
                        </div>
                        """, unsafe_allow_html=True)
                
                # Visualization
                st.subheader("📈 Sentiment Visualization")
                fig = create_sentiment_chart(sentiment_scores)
                st.plotly_chart(fig, use_container_width=True)
    
    else:
        # Batch analysis mode
        st.header("📊 Batch Analysis")
        
        # Option to upload file or enter multiple texts
        analysis_method = st.radio(
            "Choose analysis method:",
            ["Enter multiple texts", "Upload CSV file"]
        )
        
        if analysis_method == "Enter multiple texts":
            texts_input = st.text_area(
                "Enter multiple texts (one per line):",
                height=200,
                placeholder="Text 1: Company reports strong earnings...\nText 2: Market volatility increases...\nText 3: New regulations impact sector..."
            )
            
            if st.button("🔍 Analyze All Texts") and texts_input.strip():
                texts = [text.strip() for text in texts_input.split('\n') if text.strip()]
                
                if texts:
                    results = []
                    progress_bar = st.progress(0)
                    
                    for i, text in enumerate(texts):
                        sentiment_scores, primary_sentiment, confidence, error = analyze_sentiment(text, tokenizer, model)
                        
                        if not error:
                            results.append({
                                'Text': text[:100] + '...' if len(text) > 100 else text,
                                'Primary Sentiment': primary_sentiment,
                                'Confidence': confidence,
                                'Negative': sentiment_scores['Negative'],
                                'Neutral': sentiment_scores['Neutral'],
                                'Positive': sentiment_scores['Positive']
                            })
                        
                        progress_bar.progress((i + 1) / len(texts))
                    
                    if results:
                        df = pd.DataFrame(results)
                        
                        # Summary statistics
                        st.subheader("📈 Batch Analysis Summary")
                        col1, col2, col3 = st.columns(3)
                        
                        with col1:
                            positive_count = len(df[df['Primary Sentiment'] == 'Positive'])
                            st.metric("Positive Texts", positive_count, f"{positive_count/len(df)*100:.1f}%")
                        
                        with col2:
                            neutral_count = len(df[df['Primary Sentiment'] == 'Neutral'])
                            st.metric("Neutral Texts", neutral_count, f"{neutral_count/len(df)*100:.1f}%")
                        
                        with col3:
                            negative_count = len(df[df['Primary Sentiment'] == 'Negative'])
                            st.metric("Negative Texts", negative_count, f"{negative_count/len(df)*100:.1f}%")
                        
                        # Results table
                        st.subheader("📋 Detailed Results")
                        st.dataframe(df, use_container_width=True)
                        
                        # Download results
                        csv = df.to_csv(index=False)
                        st.download_button(
                            "📥 Download Results (CSV)",
                            csv,
                            f"sentiment_analysis_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv",
                            "text/csv"
                        )
        
        elif analysis_method == "Upload CSV file":
            uploaded_file = st.file_uploader(
                "Choose a CSV file with a 'text' column",
                type=['csv']
            )
            
            if uploaded_file is not None:
                try:
                    df = pd.read_csv(uploaded_file)
                    
                    if 'text' not in df.columns:
                        st.error("CSV file must contain a 'text' column")
                    else:
                        st.write(f"Loaded {len(df)} texts from CSV file")
                        st.dataframe(df.head(), use_container_width=True)
                        
                        if st.button("🔍 Analyze CSV Data"):
                            results = []
                            progress_bar = st.progress(0)
                            
                            for i, row in df.iterrows():
                                text = str(row['text'])
                                sentiment_scores, primary_sentiment, confidence, error = analyze_sentiment(text, tokenizer, model)
                                
                                if not error:
                                    result_row = row.to_dict()
                                    result_row.update({
                                        'Primary Sentiment': primary_sentiment,
                                        'Confidence': confidence,
                                        'Negative Score': sentiment_scores['Negative'],
                                        'Neutral Score': sentiment_scores['Neutral'],
                                        'Positive Score': sentiment_scores['Positive']
                                    })
                                    results.append(result_row)
                                
                                progress_bar.progress((i + 1) / len(df))
                            
                            if results:
                                results_df = pd.DataFrame(results)
                                
                                # Display results
                                st.subheader("📋 Analysis Results")
                                st.dataframe(results_df, use_container_width=True)
                                
                                # Download enhanced results
                                csv = results_df.to_csv(index=False)
                                st.download_button(
                                    "📥 Download Enhanced Results (CSV)",
                                    csv,
                                    f"enhanced_sentiment_analysis_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv",
                                    "text/csv"
                                )
                
                except Exception as e:
                    st.error(f"Error processing CSV file: {str(e)}")

# Footer
st.markdown("---")
st.markdown("""
<div style='text-align: center; color: #666; margin-top: 2rem;'>
    <p>💡 <strong>Tip:</strong> For best results, use complete sentences and financial context</p>
    <p>Built with Streamlit • Powered by FinBERT</p>
</div>
""", unsafe_allow_html=True)