news_sentiment_analyzer / src /streamlit_app.py
ResearchEngineering's picture
Update src/streamlit_app.py
200ce84 verified
raw
history blame
17.3 kB
'''
import altair as alt
import numpy as np
import pandas as pd
import streamlit as st
"""
# Welcome to Streamlit!
Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
forums](https://discuss.streamlit.io).
In the meantime, below is an example of what you can do with just a few lines of code:
"""
num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
indices = np.linspace(0, 1, num_points)
theta = 2 * np.pi * num_turns * indices
radius = indices
x = radius * np.cos(theta)
y = radius * np.sin(theta)
df = pd.DataFrame({
"x": x,
"y": y,
"idx": indices,
"rand": np.random.randn(num_points),
})
st.altair_chart(alt.Chart(df, height=700, width=700)
.mark_point(filled=True)
.encode(
x=alt.X("x", axis=None),
y=alt.Y("y", axis=None),
color=alt.Color("idx", legend=None, scale=alt.Scale()),
size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
))
'''
'''
import streamlit as st
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import torch.nn.functional as F
import os
st.set_page_config(page_title="FinBERT Sentiment", layout="centered")
st.title("💰 FinBERT: Financial Sentiment Analysis")
st.markdown("Модель: `yiyanghkust/finbert-tone` — обучена на финансовых текстах")
@st.cache_resource
def load_model():
# Установка кастомного пути к кэшу
cache_dir = "/tmp/huggingface"
os.makedirs(cache_dir, exist_ok=True)
tokenizer = AutoTokenizer.from_pretrained("yiyanghkust/finbert-tone", cache_dir=cache_dir)
model = AutoModelForSequenceClassification.from_pretrained("yiyanghkust/finbert-tone", cache_dir=cache_dir)
return tokenizer, model
tokenizer, model = load_model()
text = st.text_area("Введите финансовую новость или отчёт:", height=150)
if st.button("Анализировать тональность") and text.strip():
inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
with torch.no_grad():
outputs = model(**inputs)
probs = F.softmax(outputs.logits, dim=1).squeeze()
labels = ["📉 Negative", "😐 Neutral", "📈 Positive"]
for label, prob in zip(labels, probs):
st.write(f"**{label}:** {prob.item():.3f}")
'''
import streamlit as st
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import torch.nn.functional as F
import os
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from datetime import datetime
import re
# Page configuration
st.set_page_config(
page_title="FinBERT Sentiment Analyzer",
page_icon="💰",
layout="wide",
initial_sidebar_state="expanded"
)
# Custom CSS for better styling
st.markdown("""
<style>
.main-header {
text-align: center;
color: #1f77b4;
margin-bottom: 2rem;
}
.sentiment-card {
padding: 1rem;
border-radius: 10px;
margin: 0.5rem 0;
text-align: center;
}
.negative { background-color: #ffebee; border-left: 5px solid #f44336; }
.neutral { background-color: #f3e5f5; border-left: 5px solid #9c27b0; }
.positive { background-color: #e8f5e8; border-left: 5px solid #4caf50; }
.metric-container {
background-color: #f8f9fa;
padding: 1rem;
border-radius: 10px;
margin: 1rem 0;
}
</style>
""", unsafe_allow_html=True)
st.markdown('<h1 class="main-header">💰 FinBERT: Financial Sentiment Analysis</h1>', unsafe_allow_html=True)
# Sidebar
with st.sidebar:
st.header("ℹ️ About")
st.markdown("""
**Model:** `yiyanghkust/finbert-tone`
Trained specifically on financial texts for accurate sentiment analysis of:
- Financial news
- Earnings reports
- Market analysis
- Investment research
""")
st.header("⚙️ Settings")
confidence_threshold = st.slider("Confidence Threshold", 0.0, 1.0, 0.5, help="Minimum confidence for sentiment classification")
show_probabilities = st.checkbox("Show All Probabilities", value=True)
batch_analysis = st.checkbox("Enable Batch Analysis", help="Analyze multiple texts at once")
@st.cache_resource(show_spinner=False)
def load_model():
"""Load FinBERT model and tokenizer with error handling"""
try:
cache_dir = "/tmp/huggingface"
os.makedirs(cache_dir, exist_ok=True)
with st.spinner("Loading FinBERT model... This may take a moment."):
tokenizer = AutoTokenizer.from_pretrained(
"yiyanghkust/finbert-tone",
cache_dir=cache_dir
)
model = AutoModelForSequenceClassification.from_pretrained(
"yiyanghkust/finbert-tone",
cache_dir=cache_dir
)
return tokenizer, model, None
except Exception as e:
return None, None, str(e)
def analyze_sentiment(text, tokenizer, model):
"""Analyze sentiment with error handling and additional metrics"""
try:
# Preprocess text
text = re.sub(r'\s+', ' ', text.strip())
inputs = tokenizer(
text,
return_tensors="pt",
truncation=True,
padding=True,
max_length=512
)
with torch.no_grad():
outputs = model(**inputs)
probs = F.softmax(outputs.logits, dim=1).squeeze()
labels = ["Negative", "Neutral", "Positive"]
sentiment_scores = {label: prob.item() for label, prob in zip(labels, probs)}
# Determine primary sentiment
max_prob = max(sentiment_scores.values())
primary_sentiment = max(sentiment_scores, key=sentiment_scores.get)
return sentiment_scores, primary_sentiment, max_prob, None
except Exception as e:
return None, None, None, str(e)
def create_sentiment_chart(sentiment_scores):
"""Create an interactive sentiment visualization"""
labels = list(sentiment_scores.keys())
values = list(sentiment_scores.values())
colors = ['#f44336', '#9c27b0', '#4caf50']
fig = go.Figure(data=[
go.Bar(
x=labels,
y=values,
marker_color=colors,
text=[f'{v:.3f}' for v in values],
textposition='auto',
)
])
fig.update_layout(
title="Sentiment Analysis Results",
xaxis_title="Sentiment",
yaxis_title="Confidence Score",
yaxis=dict(range=[0, 1]),
height=400,
showlegend=False
)
return fig
# Load model
tokenizer, model, error = load_model()
if error:
st.error(f"Failed to load model: {error}")
st.stop()
if tokenizer and model:
st.success("✅ FinBERT model loaded successfully!")
# Main analysis interface
if not batch_analysis:
st.header("📝 Single Text Analysis")
text = st.text_area(
"Enter financial news, report, or analysis:",
height=150,
placeholder="Example: The company reported strong quarterly earnings with revenue growth of 15% year-over-year..."
)
col1, col2, col3 = st.columns([1, 1, 2])
with col1:
analyze_button = st.button("🔍 Analyze Sentiment", type="primary")
with col2:
clear_button = st.button("🗑️ Clear")
if clear_button:
st.rerun()
if analyze_button and text.strip():
with st.spinner("Analyzing sentiment..."):
sentiment_scores, primary_sentiment, confidence, error = analyze_sentiment(text, tokenizer, model)
if error:
st.error(f"Analysis failed: {error}")
else:
# Results section
st.header("📊 Analysis Results")
# Primary sentiment with confidence
col1, col2, col3 = st.columns(3)
sentiment_emojis = {"Negative": "📉", "Neutral": "😐", "Positive": "📈"}
sentiment_colors = {"Negative": "red", "Neutral": "gray", "Positive": "green"}
with col1:
st.metric(
"Primary Sentiment",
f"{sentiment_emojis[primary_sentiment]} {primary_sentiment}",
delta=f"{confidence:.1%} confidence"
)
with col2:
st.metric(
"Text Length",
f"{len(text)} characters",
delta=f"{len(text.split())} words"
)
with col3:
reliability = "High" if confidence > 0.7 else "Medium" if confidence > 0.5 else "Low"
st.metric("Reliability", reliability)
# Detailed probabilities
if show_probabilities:
st.subheader("Detailed Sentiment Scores")
for sentiment, score in sentiment_scores.items():
emoji = sentiment_emojis[sentiment]
color = "negative" if sentiment == "Negative" else "neutral" if sentiment == "Neutral" else "positive"
st.markdown(f"""
<div class="sentiment-card {color}">
<h4>{emoji} {sentiment}</h4>
<h2>{score:.3f}</h2>
<div style="width: 100%; background-color: #ddd; border-radius: 25px;">
<div style="width: {score*100}%; height: 10px; background-color: {sentiment_colors[sentiment]}; border-radius: 25px;"></div>
</div>
</div>
""", unsafe_allow_html=True)
# Visualization
st.subheader("📈 Sentiment Visualization")
fig = create_sentiment_chart(sentiment_scores)
st.plotly_chart(fig, use_container_width=True)
else:
# Batch analysis mode
st.header("📊 Batch Analysis")
# Option to upload file or enter multiple texts
analysis_method = st.radio(
"Choose analysis method:",
["Enter multiple texts", "Upload CSV file"]
)
if analysis_method == "Enter multiple texts":
texts_input = st.text_area(
"Enter multiple texts (one per line):",
height=200,
placeholder="Text 1: Company reports strong earnings...\nText 2: Market volatility increases...\nText 3: New regulations impact sector..."
)
if st.button("🔍 Analyze All Texts") and texts_input.strip():
texts = [text.strip() for text in texts_input.split('\n') if text.strip()]
if texts:
results = []
progress_bar = st.progress(0)
for i, text in enumerate(texts):
sentiment_scores, primary_sentiment, confidence, error = analyze_sentiment(text, tokenizer, model)
if not error:
results.append({
'Text': text[:100] + '...' if len(text) > 100 else text,
'Primary Sentiment': primary_sentiment,
'Confidence': confidence,
'Negative': sentiment_scores['Negative'],
'Neutral': sentiment_scores['Neutral'],
'Positive': sentiment_scores['Positive']
})
progress_bar.progress((i + 1) / len(texts))
if results:
df = pd.DataFrame(results)
# Summary statistics
st.subheader("📈 Batch Analysis Summary")
col1, col2, col3 = st.columns(3)
with col1:
positive_count = len(df[df['Primary Sentiment'] == 'Positive'])
st.metric("Positive Texts", positive_count, f"{positive_count/len(df)*100:.1f}%")
with col2:
neutral_count = len(df[df['Primary Sentiment'] == 'Neutral'])
st.metric("Neutral Texts", neutral_count, f"{neutral_count/len(df)*100:.1f}%")
with col3:
negative_count = len(df[df['Primary Sentiment'] == 'Negative'])
st.metric("Negative Texts", negative_count, f"{negative_count/len(df)*100:.1f}%")
# Results table
st.subheader("📋 Detailed Results")
st.dataframe(df, use_container_width=True)
# Download results
csv = df.to_csv(index=False)
st.download_button(
"📥 Download Results (CSV)",
csv,
f"sentiment_analysis_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv",
"text/csv"
)
elif analysis_method == "Upload CSV file":
uploaded_file = st.file_uploader(
"Choose a CSV file with a 'text' column",
type=['csv']
)
if uploaded_file is not None:
try:
df = pd.read_csv(uploaded_file)
if 'text' not in df.columns:
st.error("CSV file must contain a 'text' column")
else:
st.write(f"Loaded {len(df)} texts from CSV file")
st.dataframe(df.head(), use_container_width=True)
if st.button("🔍 Analyze CSV Data"):
results = []
progress_bar = st.progress(0)
for i, row in df.iterrows():
text = str(row['text'])
sentiment_scores, primary_sentiment, confidence, error = analyze_sentiment(text, tokenizer, model)
if not error:
result_row = row.to_dict()
result_row.update({
'Primary Sentiment': primary_sentiment,
'Confidence': confidence,
'Negative Score': sentiment_scores['Negative'],
'Neutral Score': sentiment_scores['Neutral'],
'Positive Score': sentiment_scores['Positive']
})
results.append(result_row)
progress_bar.progress((i + 1) / len(df))
if results:
results_df = pd.DataFrame(results)
# Display results
st.subheader("📋 Analysis Results")
st.dataframe(results_df, use_container_width=True)
# Download enhanced results
csv = results_df.to_csv(index=False)
st.download_button(
"📥 Download Enhanced Results (CSV)",
csv,
f"enhanced_sentiment_analysis_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv",
"text/csv"
)
except Exception as e:
st.error(f"Error processing CSV file: {str(e)}")
# Footer
st.markdown("---")
st.markdown("""
<div style='text-align: center; color: #666; margin-top: 2rem;'>
<p>💡 <strong>Tip:</strong> For best results, use complete sentences and financial context</p>
<p>Built with Streamlit • Powered by FinBERT</p>
</div>
""", unsafe_allow_html=True)