''' import altair as alt import numpy as np import pandas as pd import streamlit as st """ # Welcome to Streamlit! Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:. If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community forums](https://discuss.streamlit.io). In the meantime, below is an example of what you can do with just a few lines of code: """ num_points = st.slider("Number of points in spiral", 1, 10000, 1100) num_turns = st.slider("Number of turns in spiral", 1, 300, 31) indices = np.linspace(0, 1, num_points) theta = 2 * np.pi * num_turns * indices radius = indices x = radius * np.cos(theta) y = radius * np.sin(theta) df = pd.DataFrame({ "x": x, "y": y, "idx": indices, "rand": np.random.randn(num_points), }) st.altair_chart(alt.Chart(df, height=700, width=700) .mark_point(filled=True) .encode( x=alt.X("x", axis=None), y=alt.Y("y", axis=None), color=alt.Color("idx", legend=None, scale=alt.Scale()), size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])), )) ''' ''' import streamlit as st from transformers import AutoTokenizer, AutoModelForSequenceClassification import torch import torch.nn.functional as F import os st.set_page_config(page_title="FinBERT Sentiment", layout="centered") st.title("💰 FinBERT: Financial Sentiment Analysis") st.markdown("Модель: `yiyanghkust/finbert-tone` — обучена на финансовых текстах") @st.cache_resource def load_model(): # Установка кастомного пути к кэшу cache_dir = "/tmp/huggingface" os.makedirs(cache_dir, exist_ok=True) tokenizer = AutoTokenizer.from_pretrained("yiyanghkust/finbert-tone", cache_dir=cache_dir) model = AutoModelForSequenceClassification.from_pretrained("yiyanghkust/finbert-tone", cache_dir=cache_dir) return tokenizer, model tokenizer, model = load_model() text = st.text_area("Введите финансовую новость или отчёт:", height=150) if st.button("Анализировать тональность") and text.strip(): inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True) with torch.no_grad(): outputs = model(**inputs) probs = F.softmax(outputs.logits, dim=1).squeeze() labels = ["📉 Negative", "😐 Neutral", "📈 Positive"] for label, prob in zip(labels, probs): st.write(f"**{label}:** {prob.item():.3f}") ''' import streamlit as st from transformers import AutoTokenizer, AutoModelForSequenceClassification import torch import torch.nn.functional as F import os import pandas as pd import plotly.express as px import plotly.graph_objects as go from datetime import datetime import re # Page configuration st.set_page_config( page_title="FinBERT Sentiment Analyzer", page_icon="💰", layout="wide", initial_sidebar_state="expanded" ) # Custom CSS for better styling st.markdown(""" """, unsafe_allow_html=True) st.markdown('

💰 FinBERT: Financial Sentiment Analysis

', unsafe_allow_html=True) # Sidebar with st.sidebar: st.header("ℹ️ About") st.markdown(""" **Model:** `yiyanghkust/finbert-tone` Trained specifically on financial texts for accurate sentiment analysis of: - Financial news - Earnings reports - Market analysis - Investment research """) st.header("⚙️ Settings") confidence_threshold = st.slider("Confidence Threshold", 0.0, 1.0, 0.5, help="Minimum confidence for sentiment classification") show_probabilities = st.checkbox("Show All Probabilities", value=True) batch_analysis = st.checkbox("Enable Batch Analysis", help="Analyze multiple texts at once") @st.cache_resource(show_spinner=False) def load_model(): """Load FinBERT model and tokenizer with error handling""" try: cache_dir = "/tmp/huggingface" os.makedirs(cache_dir, exist_ok=True) with st.spinner("Loading FinBERT model... This may take a moment."): tokenizer = AutoTokenizer.from_pretrained( "yiyanghkust/finbert-tone", cache_dir=cache_dir ) model = AutoModelForSequenceClassification.from_pretrained( "yiyanghkust/finbert-tone", cache_dir=cache_dir ) return tokenizer, model, None except Exception as e: return None, None, str(e) def analyze_sentiment(text, tokenizer, model): """Analyze sentiment with error handling and additional metrics""" try: # Preprocess text text = re.sub(r'\s+', ' ', text.strip()) inputs = tokenizer( text, return_tensors="pt", truncation=True, padding=True, max_length=512 ) with torch.no_grad(): outputs = model(**inputs) probs = F.softmax(outputs.logits, dim=1).squeeze() labels = ["Negative", "Neutral", "Positive"] sentiment_scores = {label: prob.item() for label, prob in zip(labels, probs)} # Determine primary sentiment max_prob = max(sentiment_scores.values()) primary_sentiment = max(sentiment_scores, key=sentiment_scores.get) return sentiment_scores, primary_sentiment, max_prob, None except Exception as e: return None, None, None, str(e) def create_sentiment_chart(sentiment_scores): """Create an interactive sentiment visualization""" labels = list(sentiment_scores.keys()) values = list(sentiment_scores.values()) colors = ['#f44336', '#9c27b0', '#4caf50'] fig = go.Figure(data=[ go.Bar( x=labels, y=values, marker_color=colors, text=[f'{v:.3f}' for v in values], textposition='auto', ) ]) fig.update_layout( title="Sentiment Analysis Results", xaxis_title="Sentiment", yaxis_title="Confidence Score", yaxis=dict(range=[0, 1]), height=400, showlegend=False ) return fig # Load model tokenizer, model, error = load_model() if error: st.error(f"Failed to load model: {error}") st.stop() if tokenizer and model: st.success("✅ FinBERT model loaded successfully!") # Main analysis interface if not batch_analysis: st.header("📝 Single Text Analysis") text = st.text_area( "Enter financial news, report, or analysis:", height=150, placeholder="Example: The company reported strong quarterly earnings with revenue growth of 15% year-over-year..." ) col1, col2, col3 = st.columns([1, 1, 2]) with col1: analyze_button = st.button("🔍 Analyze Sentiment", type="primary") with col2: clear_button = st.button("🗑️ Clear") if clear_button: st.rerun() if analyze_button and text.strip(): with st.spinner("Analyzing sentiment..."): sentiment_scores, primary_sentiment, confidence, error = analyze_sentiment(text, tokenizer, model) if error: st.error(f"Analysis failed: {error}") else: # Results section st.header("📊 Analysis Results") # Primary sentiment with confidence col1, col2, col3 = st.columns(3) sentiment_emojis = {"Negative": "📉", "Neutral": "😐", "Positive": "📈"} sentiment_colors = {"Negative": "red", "Neutral": "gray", "Positive": "green"} with col1: st.metric( "Primary Sentiment", f"{sentiment_emojis[primary_sentiment]} {primary_sentiment}", delta=f"{confidence:.1%} confidence" ) with col2: st.metric( "Text Length", f"{len(text)} characters", delta=f"{len(text.split())} words" ) with col3: reliability = "High" if confidence > 0.7 else "Medium" if confidence > 0.5 else "Low" st.metric("Reliability", reliability) # Detailed probabilities if show_probabilities: st.subheader("Detailed Sentiment Scores") for sentiment, score in sentiment_scores.items(): emoji = sentiment_emojis[sentiment] color = "negative" if sentiment == "Negative" else "neutral" if sentiment == "Neutral" else "positive" st.markdown(f"""

{emoji} {sentiment}

{score:.3f}

""", unsafe_allow_html=True) # Visualization st.subheader("📈 Sentiment Visualization") fig = create_sentiment_chart(sentiment_scores) st.plotly_chart(fig, use_container_width=True) else: # Batch analysis mode st.header("📊 Batch Analysis") # Option to upload file or enter multiple texts analysis_method = st.radio( "Choose analysis method:", ["Enter multiple texts", "Upload CSV file"] ) if analysis_method == "Enter multiple texts": texts_input = st.text_area( "Enter multiple texts (one per line):", height=200, placeholder="Text 1: Company reports strong earnings...\nText 2: Market volatility increases...\nText 3: New regulations impact sector..." ) if st.button("🔍 Analyze All Texts") and texts_input.strip(): texts = [text.strip() for text in texts_input.split('\n') if text.strip()] if texts: results = [] progress_bar = st.progress(0) for i, text in enumerate(texts): sentiment_scores, primary_sentiment, confidence, error = analyze_sentiment(text, tokenizer, model) if not error: results.append({ 'Text': text[:100] + '...' if len(text) > 100 else text, 'Primary Sentiment': primary_sentiment, 'Confidence': confidence, 'Negative': sentiment_scores['Negative'], 'Neutral': sentiment_scores['Neutral'], 'Positive': sentiment_scores['Positive'] }) progress_bar.progress((i + 1) / len(texts)) if results: df = pd.DataFrame(results) # Summary statistics st.subheader("📈 Batch Analysis Summary") col1, col2, col3 = st.columns(3) with col1: positive_count = len(df[df['Primary Sentiment'] == 'Positive']) st.metric("Positive Texts", positive_count, f"{positive_count/len(df)*100:.1f}%") with col2: neutral_count = len(df[df['Primary Sentiment'] == 'Neutral']) st.metric("Neutral Texts", neutral_count, f"{neutral_count/len(df)*100:.1f}%") with col3: negative_count = len(df[df['Primary Sentiment'] == 'Negative']) st.metric("Negative Texts", negative_count, f"{negative_count/len(df)*100:.1f}%") # Results table st.subheader("📋 Detailed Results") st.dataframe(df, use_container_width=True) # Download results csv = df.to_csv(index=False) st.download_button( "📥 Download Results (CSV)", csv, f"sentiment_analysis_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv", "text/csv" ) elif analysis_method == "Upload CSV file": uploaded_file = st.file_uploader( "Choose a CSV file with a 'text' column", type=['csv'] ) if uploaded_file is not None: try: df = pd.read_csv(uploaded_file) if 'text' not in df.columns: st.error("CSV file must contain a 'text' column") else: st.write(f"Loaded {len(df)} texts from CSV file") st.dataframe(df.head(), use_container_width=True) if st.button("🔍 Analyze CSV Data"): results = [] progress_bar = st.progress(0) for i, row in df.iterrows(): text = str(row['text']) sentiment_scores, primary_sentiment, confidence, error = analyze_sentiment(text, tokenizer, model) if not error: result_row = row.to_dict() result_row.update({ 'Primary Sentiment': primary_sentiment, 'Confidence': confidence, 'Negative Score': sentiment_scores['Negative'], 'Neutral Score': sentiment_scores['Neutral'], 'Positive Score': sentiment_scores['Positive'] }) results.append(result_row) progress_bar.progress((i + 1) / len(df)) if results: results_df = pd.DataFrame(results) # Display results st.subheader("📋 Analysis Results") st.dataframe(results_df, use_container_width=True) # Download enhanced results csv = results_df.to_csv(index=False) st.download_button( "📥 Download Enhanced Results (CSV)", csv, f"enhanced_sentiment_analysis_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv", "text/csv" ) except Exception as e: st.error(f"Error processing CSV file: {str(e)}") # Footer st.markdown("---") st.markdown("""

💡 Tip: For best results, use complete sentences and financial context

Built with Streamlit • Powered by FinBERT

""", unsafe_allow_html=True)