Spaces:

ResearchEngineering
/

news_sentiment_analyzer

Runtime error

App Files Files Community

news_sentiment_analyzer / src /streamlit_app.py

ResearchEngineering

Update src/streamlit_app.py

200ce84 verified 4 months ago

raw

history blame

17.3 kB

	'''

	import altair as alt
	import numpy as np
	import pandas as pd
	import streamlit as st

	"""
	# Welcome to Streamlit!

	Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
	If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
	forums](https://discuss.streamlit.io).

	In the meantime, below is an example of what you can do with just a few lines of code:
	"""

	num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
	num_turns = st.slider("Number of turns in spiral", 1, 300, 31)

	indices = np.linspace(0, 1, num_points)
	theta = 2 * np.pi * num_turns * indices
	radius = indices

	x = radius * np.cos(theta)
	y = radius * np.sin(theta)

	df = pd.DataFrame({
	"x": x,
	"y": y,
	"idx": indices,
	"rand": np.random.randn(num_points),
	})

	st.altair_chart(alt.Chart(df, height=700, width=700)
	.mark_point(filled=True)
	.encode(
	x=alt.X("x", axis=None),
	y=alt.Y("y", axis=None),
	color=alt.Color("idx", legend=None, scale=alt.Scale()),
	size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
	))

	'''


	'''
	import streamlit as st
	from transformers import AutoTokenizer, AutoModelForSequenceClassification
	import torch
	import torch.nn.functional as F
	import os

	st.set_page_config(page_title="FinBERT Sentiment", layout="centered")
	st.title("💰 FinBERT: Financial Sentiment Analysis")
	st.markdown("Модель: `yiyanghkust/finbert-tone` — обучена на финансовых текстах")

	@st.cache_resource
	def load_model():
	# Установка кастомного пути к кэшу
	cache_dir = "/tmp/huggingface"
	os.makedirs(cache_dir, exist_ok=True)

	tokenizer = AutoTokenizer.from_pretrained("yiyanghkust/finbert-tone", cache_dir=cache_dir)
	model = AutoModelForSequenceClassification.from_pretrained("yiyanghkust/finbert-tone", cache_dir=cache_dir)
	return tokenizer, model

	tokenizer, model = load_model()

	text = st.text_area("Введите финансовую новость или отчёт:", height=150)

	if st.button("Анализировать тональность") and text.strip():
	inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
	with torch.no_grad():
	outputs = model(**inputs)
	probs = F.softmax(outputs.logits, dim=1).squeeze()

	labels = ["📉 Negative", "😐 Neutral", "📈 Positive"]
	for label, prob in zip(labels, probs):
	st.write(f"{label}: {prob.item():.3f}")

	'''


	import streamlit as st
	from transformers import AutoTokenizer, AutoModelForSequenceClassification
	import torch
	import torch.nn.functional as F
	import os
	import pandas as pd
	import plotly.express as px
	import plotly.graph_objects as go
	from datetime import datetime
	import re

	# Page configuration
	st.set_page_config(
	page_title="FinBERT Sentiment Analyzer",
	page_icon="💰",
	layout="wide",
	initial_sidebar_state="expanded"
	)

	# Custom CSS for better styling
	st.markdown("""
	<style>
	.main-header {
	text-align: center;
	color: #1f77b4;
	margin-bottom: 2rem;
	}
	.sentiment-card {
	padding: 1rem;
	border-radius: 10px;
	margin: 0.5rem 0;
	text-align: center;
	}
	.negative { background-color: #ffebee; border-left: 5px solid #f44336; }
	.neutral { background-color: #f3e5f5; border-left: 5px solid #9c27b0; }
	.positive { background-color: #e8f5e8; border-left: 5px solid #4caf50; }
	.metric-container {
	background-color: #f8f9fa;
	padding: 1rem;
	border-radius: 10px;
	margin: 1rem 0;
	}
	</style>
	""", unsafe_allow_html=True)

	st.markdown('<h1 class="main-header">💰 FinBERT: Financial Sentiment Analysis</h1>', unsafe_allow_html=True)

	# Sidebar
	with st.sidebar:
	st.header("ℹ️ About")
	st.markdown("""
	Model: `yiyanghkust/finbert-tone`
	Trained specifically on financial texts for accurate sentiment analysis of:
	- Financial news
	- Earnings reports
	- Market analysis
	- Investment research
	""")

	st.header("⚙️ Settings")
	confidence_threshold = st.slider("Confidence Threshold", 0.0, 1.0, 0.5, help="Minimum confidence for sentiment classification")
	show_probabilities = st.checkbox("Show All Probabilities", value=True)
	batch_analysis = st.checkbox("Enable Batch Analysis", help="Analyze multiple texts at once")

	@st.cache_resource(show_spinner=False)
	def load_model():
	"""Load FinBERT model and tokenizer with error handling"""
	try:
	cache_dir = "/tmp/huggingface"
	os.makedirs(cache_dir, exist_ok=True)

	with st.spinner("Loading FinBERT model... This may take a moment."):
	tokenizer = AutoTokenizer.from_pretrained(
	"yiyanghkust/finbert-tone",
	cache_dir=cache_dir
	)
	model = AutoModelForSequenceClassification.from_pretrained(
	"yiyanghkust/finbert-tone",
	cache_dir=cache_dir
	)
	return tokenizer, model, None
	except Exception as e:
	return None, None, str(e)

	def analyze_sentiment(text, tokenizer, model):
	"""Analyze sentiment with error handling and additional metrics"""
	try:
	# Preprocess text
	text = re.sub(r'\s+', ' ', text.strip())

	inputs = tokenizer(
	text,
	return_tensors="pt",
	truncation=True,
	padding=True,
	max_length=512
	)

	with torch.no_grad():
	outputs = model(**inputs)
	probs = F.softmax(outputs.logits, dim=1).squeeze()

	labels = ["Negative", "Neutral", "Positive"]
	sentiment_scores = {label: prob.item() for label, prob in zip(labels, probs)}

	# Determine primary sentiment
	max_prob = max(sentiment_scores.values())
	primary_sentiment = max(sentiment_scores, key=sentiment_scores.get)

	return sentiment_scores, primary_sentiment, max_prob, None
	except Exception as e:
	return None, None, None, str(e)

	def create_sentiment_chart(sentiment_scores):
	"""Create an interactive sentiment visualization"""
	labels = list(sentiment_scores.keys())
	values = list(sentiment_scores.values())
	colors = ['#f44336', '#9c27b0', '#4caf50']

	fig = go.Figure(data=[
	go.Bar(
	x=labels,
	y=values,
	marker_color=colors,
	text=[f'{v:.3f}' for v in values],
	textposition='auto',
	)
	])

	fig.update_layout(
	title="Sentiment Analysis Results",
	xaxis_title="Sentiment",
	yaxis_title="Confidence Score",
	yaxis=dict(range=[0, 1]),
	height=400,
	showlegend=False
	)

	return fig

	# Load model
	tokenizer, model, error = load_model()

	if error:
	st.error(f"Failed to load model: {error}")
	st.stop()

	if tokenizer and model:
	st.success("✅ FinBERT model loaded successfully!")

	# Main analysis interface
	if not batch_analysis:
	st.header("📝 Single Text Analysis")
	text = st.text_area(
	"Enter financial news, report, or analysis:",
	height=150,
	placeholder="Example: The company reported strong quarterly earnings with revenue growth of 15% year-over-year..."
	)

	col1, col2, col3 = st.columns([1, 1, 2])
	with col1:
	analyze_button = st.button("🔍 Analyze Sentiment", type="primary")
	with col2:
	clear_button = st.button("🗑️ Clear")

	if clear_button:
	st.rerun()

	if analyze_button and text.strip():
	with st.spinner("Analyzing sentiment..."):
	sentiment_scores, primary_sentiment, confidence, error = analyze_sentiment(text, tokenizer, model)

	if error:
	st.error(f"Analysis failed: {error}")
	else:
	# Results section
	st.header("📊 Analysis Results")

	# Primary sentiment with confidence
	col1, col2, col3 = st.columns(3)

	sentiment_emojis = {"Negative": "📉", "Neutral": "😐", "Positive": "📈"}
	sentiment_colors = {"Negative": "red", "Neutral": "gray", "Positive": "green"}

	with col1:
	st.metric(
	"Primary Sentiment",
	f"{sentiment_emojis[primary_sentiment]} {primary_sentiment}",
	delta=f"{confidence:.1%} confidence"
	)

	with col2:
	st.metric(
	"Text Length",
	f"{len(text)} characters",
	delta=f"{len(text.split())} words"
	)

	with col3:
	reliability = "High" if confidence > 0.7 else "Medium" if confidence > 0.5 else "Low"
	st.metric("Reliability", reliability)

	# Detailed probabilities
	if show_probabilities:
	st.subheader("Detailed Sentiment Scores")

	for sentiment, score in sentiment_scores.items():
	emoji = sentiment_emojis[sentiment]
	color = "negative" if sentiment == "Negative" else "neutral" if sentiment == "Neutral" else "positive"

	st.markdown(f"""
	<div class="sentiment-card {color}">
	<h4>{emoji} {sentiment}</h4>
	<h2>{score:.3f}</h2>
	<div style="width: 100%; background-color: #ddd; border-radius: 25px;">
	<div style="width: {score*100}%; height: 10px; background-color: {sentiment_colors[sentiment]}; border-radius: 25px;"></div>
	</div>
	</div>
	""", unsafe_allow_html=True)

	# Visualization
	st.subheader("📈 Sentiment Visualization")
	fig = create_sentiment_chart(sentiment_scores)
	st.plotly_chart(fig, use_container_width=True)

	else:
	# Batch analysis mode
	st.header("📊 Batch Analysis")

	# Option to upload file or enter multiple texts
	analysis_method = st.radio(
	"Choose analysis method:",
	["Enter multiple texts", "Upload CSV file"]
	)

	if analysis_method == "Enter multiple texts":
	texts_input = st.text_area(
	"Enter multiple texts (one per line):",
	height=200,
	placeholder="Text 1: Company reports strong earnings...\nText 2: Market volatility increases...\nText 3: New regulations impact sector..."
	)

	if st.button("🔍 Analyze All Texts") and texts_input.strip():
	texts = [text.strip() for text in texts_input.split('\n') if text.strip()]

	if texts:
	results = []
	progress_bar = st.progress(0)

	for i, text in enumerate(texts):
	sentiment_scores, primary_sentiment, confidence, error = analyze_sentiment(text, tokenizer, model)

	if not error:
	results.append({
	'Text': text[:100] + '...' if len(text) > 100 else text,
	'Primary Sentiment': primary_sentiment,
	'Confidence': confidence,
	'Negative': sentiment_scores['Negative'],
	'Neutral': sentiment_scores['Neutral'],
	'Positive': sentiment_scores['Positive']
	})

	progress_bar.progress((i + 1) / len(texts))

	if results:
	df = pd.DataFrame(results)

	# Summary statistics
	st.subheader("📈 Batch Analysis Summary")
	col1, col2, col3 = st.columns(3)

	with col1:
	positive_count = len(df[df['Primary Sentiment'] == 'Positive'])
	st.metric("Positive Texts", positive_count, f"{positive_count/len(df)*100:.1f}%")

	with col2:
	neutral_count = len(df[df['Primary Sentiment'] == 'Neutral'])
	st.metric("Neutral Texts", neutral_count, f"{neutral_count/len(df)*100:.1f}%")

	with col3:
	negative_count = len(df[df['Primary Sentiment'] == 'Negative'])
	st.metric("Negative Texts", negative_count, f"{negative_count/len(df)*100:.1f}%")

	# Results table
	st.subheader("📋 Detailed Results")
	st.dataframe(df, use_container_width=True)

	# Download results
	csv = df.to_csv(index=False)
	st.download_button(
	"📥 Download Results (CSV)",
	csv,
	f"sentiment_analysis_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv",
	"text/csv"
	)

	elif analysis_method == "Upload CSV file":
	uploaded_file = st.file_uploader(
	"Choose a CSV file with a 'text' column",
	type=['csv']
	)

	if uploaded_file is not None:
	try:
	df = pd.read_csv(uploaded_file)

	if 'text' not in df.columns:
	st.error("CSV file must contain a 'text' column")
	else:
	st.write(f"Loaded {len(df)} texts from CSV file")
	st.dataframe(df.head(), use_container_width=True)

	if st.button("🔍 Analyze CSV Data"):
	results = []
	progress_bar = st.progress(0)

	for i, row in df.iterrows():
	text = str(row['text'])
	sentiment_scores, primary_sentiment, confidence, error = analyze_sentiment(text, tokenizer, model)

	if not error:
	result_row = row.to_dict()
	result_row.update({
	'Primary Sentiment': primary_sentiment,
	'Confidence': confidence,
	'Negative Score': sentiment_scores['Negative'],
	'Neutral Score': sentiment_scores['Neutral'],
	'Positive Score': sentiment_scores['Positive']
	})
	results.append(result_row)

	progress_bar.progress((i + 1) / len(df))

	if results:
	results_df = pd.DataFrame(results)

	# Display results
	st.subheader("📋 Analysis Results")
	st.dataframe(results_df, use_container_width=True)

	# Download enhanced results
	csv = results_df.to_csv(index=False)
	st.download_button(
	"📥 Download Enhanced Results (CSV)",
	csv,
	f"enhanced_sentiment_analysis_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv",
	"text/csv"
	)

	except Exception as e:
	st.error(f"Error processing CSV file: {str(e)}")

	# Footer
	st.markdown("---")
	st.markdown("""
	<div style='text-align: center; color: #666; margin-top: 2rem;'>
	<p>💡 <strong>Tip:</strong> For best results, use complete sentences and financial context</p>
	<p>Built with Streamlit • Powered by FinBERT</p>
	</div>
	""", unsafe_allow_html=True)