ResearchEngineering commited on
Commit
200ce84
Β·
verified Β·
1 Parent(s): 596bb4b

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +367 -1
src/streamlit_app.py CHANGED
@@ -43,6 +43,8 @@ st.altair_chart(alt.Chart(df, height=700, width=700)
43
 
44
  '''
45
 
 
 
46
  import streamlit as st
47
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
48
  import torch
@@ -75,4 +77,368 @@ if st.button("ΠΠ½Π°Π»ΠΈΠ·ΠΈΡ€ΠΎΠ²Π°Ρ‚ΡŒ Ρ‚ΠΎΠ½Π°Π»ΡŒΠ½ΠΎΡΡ‚ΡŒ") and text.strip
75
 
76
  labels = ["πŸ“‰ Negative", "😐 Neutral", "πŸ“ˆ Positive"]
77
  for label, prob in zip(labels, probs):
78
- st.write(f"**{label}:** {prob.item():.3f}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
 
44
  '''
45
 
46
+
47
+ '''
48
  import streamlit as st
49
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
50
  import torch
 
77
 
78
  labels = ["πŸ“‰ Negative", "😐 Neutral", "πŸ“ˆ Positive"]
79
  for label, prob in zip(labels, probs):
80
+ st.write(f"**{label}:** {prob.item():.3f}")
81
+
82
+ '''
83
+
84
+
85
+ import streamlit as st
86
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
87
+ import torch
88
+ import torch.nn.functional as F
89
+ import os
90
+ import pandas as pd
91
+ import plotly.express as px
92
+ import plotly.graph_objects as go
93
+ from datetime import datetime
94
+ import re
95
+
96
+ # Page configuration
97
+ st.set_page_config(
98
+ page_title="FinBERT Sentiment Analyzer",
99
+ page_icon="πŸ’°",
100
+ layout="wide",
101
+ initial_sidebar_state="expanded"
102
+ )
103
+
104
+ # Custom CSS for better styling
105
+ st.markdown("""
106
+ <style>
107
+ .main-header {
108
+ text-align: center;
109
+ color: #1f77b4;
110
+ margin-bottom: 2rem;
111
+ }
112
+ .sentiment-card {
113
+ padding: 1rem;
114
+ border-radius: 10px;
115
+ margin: 0.5rem 0;
116
+ text-align: center;
117
+ }
118
+ .negative { background-color: #ffebee; border-left: 5px solid #f44336; }
119
+ .neutral { background-color: #f3e5f5; border-left: 5px solid #9c27b0; }
120
+ .positive { background-color: #e8f5e8; border-left: 5px solid #4caf50; }
121
+ .metric-container {
122
+ background-color: #f8f9fa;
123
+ padding: 1rem;
124
+ border-radius: 10px;
125
+ margin: 1rem 0;
126
+ }
127
+ </style>
128
+ """, unsafe_allow_html=True)
129
+
130
+ st.markdown('<h1 class="main-header">πŸ’° FinBERT: Financial Sentiment Analysis</h1>', unsafe_allow_html=True)
131
+
132
+ # Sidebar
133
+ with st.sidebar:
134
+ st.header("ℹ️ About")
135
+ st.markdown("""
136
+ **Model:** `yiyanghkust/finbert-tone`
137
+ Trained specifically on financial texts for accurate sentiment analysis of:
138
+ - Financial news
139
+ - Earnings reports
140
+ - Market analysis
141
+ - Investment research
142
+ """)
143
+
144
+ st.header("βš™οΈ Settings")
145
+ confidence_threshold = st.slider("Confidence Threshold", 0.0, 1.0, 0.5, help="Minimum confidence for sentiment classification")
146
+ show_probabilities = st.checkbox("Show All Probabilities", value=True)
147
+ batch_analysis = st.checkbox("Enable Batch Analysis", help="Analyze multiple texts at once")
148
+
149
+ @st.cache_resource(show_spinner=False)
150
+ def load_model():
151
+ """Load FinBERT model and tokenizer with error handling"""
152
+ try:
153
+ cache_dir = "/tmp/huggingface"
154
+ os.makedirs(cache_dir, exist_ok=True)
155
+
156
+ with st.spinner("Loading FinBERT model... This may take a moment."):
157
+ tokenizer = AutoTokenizer.from_pretrained(
158
+ "yiyanghkust/finbert-tone",
159
+ cache_dir=cache_dir
160
+ )
161
+ model = AutoModelForSequenceClassification.from_pretrained(
162
+ "yiyanghkust/finbert-tone",
163
+ cache_dir=cache_dir
164
+ )
165
+ return tokenizer, model, None
166
+ except Exception as e:
167
+ return None, None, str(e)
168
+
169
+ def analyze_sentiment(text, tokenizer, model):
170
+ """Analyze sentiment with error handling and additional metrics"""
171
+ try:
172
+ # Preprocess text
173
+ text = re.sub(r'\s+', ' ', text.strip())
174
+
175
+ inputs = tokenizer(
176
+ text,
177
+ return_tensors="pt",
178
+ truncation=True,
179
+ padding=True,
180
+ max_length=512
181
+ )
182
+
183
+ with torch.no_grad():
184
+ outputs = model(**inputs)
185
+ probs = F.softmax(outputs.logits, dim=1).squeeze()
186
+
187
+ labels = ["Negative", "Neutral", "Positive"]
188
+ sentiment_scores = {label: prob.item() for label, prob in zip(labels, probs)}
189
+
190
+ # Determine primary sentiment
191
+ max_prob = max(sentiment_scores.values())
192
+ primary_sentiment = max(sentiment_scores, key=sentiment_scores.get)
193
+
194
+ return sentiment_scores, primary_sentiment, max_prob, None
195
+ except Exception as e:
196
+ return None, None, None, str(e)
197
+
198
+ def create_sentiment_chart(sentiment_scores):
199
+ """Create an interactive sentiment visualization"""
200
+ labels = list(sentiment_scores.keys())
201
+ values = list(sentiment_scores.values())
202
+ colors = ['#f44336', '#9c27b0', '#4caf50']
203
+
204
+ fig = go.Figure(data=[
205
+ go.Bar(
206
+ x=labels,
207
+ y=values,
208
+ marker_color=colors,
209
+ text=[f'{v:.3f}' for v in values],
210
+ textposition='auto',
211
+ )
212
+ ])
213
+
214
+ fig.update_layout(
215
+ title="Sentiment Analysis Results",
216
+ xaxis_title="Sentiment",
217
+ yaxis_title="Confidence Score",
218
+ yaxis=dict(range=[0, 1]),
219
+ height=400,
220
+ showlegend=False
221
+ )
222
+
223
+ return fig
224
+
225
+ # Load model
226
+ tokenizer, model, error = load_model()
227
+
228
+ if error:
229
+ st.error(f"Failed to load model: {error}")
230
+ st.stop()
231
+
232
+ if tokenizer and model:
233
+ st.success("βœ… FinBERT model loaded successfully!")
234
+
235
+ # Main analysis interface
236
+ if not batch_analysis:
237
+ st.header("πŸ“ Single Text Analysis")
238
+ text = st.text_area(
239
+ "Enter financial news, report, or analysis:",
240
+ height=150,
241
+ placeholder="Example: The company reported strong quarterly earnings with revenue growth of 15% year-over-year..."
242
+ )
243
+
244
+ col1, col2, col3 = st.columns([1, 1, 2])
245
+ with col1:
246
+ analyze_button = st.button("πŸ” Analyze Sentiment", type="primary")
247
+ with col2:
248
+ clear_button = st.button("πŸ—‘οΈ Clear")
249
+
250
+ if clear_button:
251
+ st.rerun()
252
+
253
+ if analyze_button and text.strip():
254
+ with st.spinner("Analyzing sentiment..."):
255
+ sentiment_scores, primary_sentiment, confidence, error = analyze_sentiment(text, tokenizer, model)
256
+
257
+ if error:
258
+ st.error(f"Analysis failed: {error}")
259
+ else:
260
+ # Results section
261
+ st.header("πŸ“Š Analysis Results")
262
+
263
+ # Primary sentiment with confidence
264
+ col1, col2, col3 = st.columns(3)
265
+
266
+ sentiment_emojis = {"Negative": "πŸ“‰", "Neutral": "😐", "Positive": "πŸ“ˆ"}
267
+ sentiment_colors = {"Negative": "red", "Neutral": "gray", "Positive": "green"}
268
+
269
+ with col1:
270
+ st.metric(
271
+ "Primary Sentiment",
272
+ f"{sentiment_emojis[primary_sentiment]} {primary_sentiment}",
273
+ delta=f"{confidence:.1%} confidence"
274
+ )
275
+
276
+ with col2:
277
+ st.metric(
278
+ "Text Length",
279
+ f"{len(text)} characters",
280
+ delta=f"{len(text.split())} words"
281
+ )
282
+
283
+ with col3:
284
+ reliability = "High" if confidence > 0.7 else "Medium" if confidence > 0.5 else "Low"
285
+ st.metric("Reliability", reliability)
286
+
287
+ # Detailed probabilities
288
+ if show_probabilities:
289
+ st.subheader("Detailed Sentiment Scores")
290
+
291
+ for sentiment, score in sentiment_scores.items():
292
+ emoji = sentiment_emojis[sentiment]
293
+ color = "negative" if sentiment == "Negative" else "neutral" if sentiment == "Neutral" else "positive"
294
+
295
+ st.markdown(f"""
296
+ <div class="sentiment-card {color}">
297
+ <h4>{emoji} {sentiment}</h4>
298
+ <h2>{score:.3f}</h2>
299
+ <div style="width: 100%; background-color: #ddd; border-radius: 25px;">
300
+ <div style="width: {score*100}%; height: 10px; background-color: {sentiment_colors[sentiment]}; border-radius: 25px;"></div>
301
+ </div>
302
+ </div>
303
+ """, unsafe_allow_html=True)
304
+
305
+ # Visualization
306
+ st.subheader("πŸ“ˆ Sentiment Visualization")
307
+ fig = create_sentiment_chart(sentiment_scores)
308
+ st.plotly_chart(fig, use_container_width=True)
309
+
310
+ else:
311
+ # Batch analysis mode
312
+ st.header("πŸ“Š Batch Analysis")
313
+
314
+ # Option to upload file or enter multiple texts
315
+ analysis_method = st.radio(
316
+ "Choose analysis method:",
317
+ ["Enter multiple texts", "Upload CSV file"]
318
+ )
319
+
320
+ if analysis_method == "Enter multiple texts":
321
+ texts_input = st.text_area(
322
+ "Enter multiple texts (one per line):",
323
+ height=200,
324
+ placeholder="Text 1: Company reports strong earnings...\nText 2: Market volatility increases...\nText 3: New regulations impact sector..."
325
+ )
326
+
327
+ if st.button("πŸ” Analyze All Texts") and texts_input.strip():
328
+ texts = [text.strip() for text in texts_input.split('\n') if text.strip()]
329
+
330
+ if texts:
331
+ results = []
332
+ progress_bar = st.progress(0)
333
+
334
+ for i, text in enumerate(texts):
335
+ sentiment_scores, primary_sentiment, confidence, error = analyze_sentiment(text, tokenizer, model)
336
+
337
+ if not error:
338
+ results.append({
339
+ 'Text': text[:100] + '...' if len(text) > 100 else text,
340
+ 'Primary Sentiment': primary_sentiment,
341
+ 'Confidence': confidence,
342
+ 'Negative': sentiment_scores['Negative'],
343
+ 'Neutral': sentiment_scores['Neutral'],
344
+ 'Positive': sentiment_scores['Positive']
345
+ })
346
+
347
+ progress_bar.progress((i + 1) / len(texts))
348
+
349
+ if results:
350
+ df = pd.DataFrame(results)
351
+
352
+ # Summary statistics
353
+ st.subheader("πŸ“ˆ Batch Analysis Summary")
354
+ col1, col2, col3 = st.columns(3)
355
+
356
+ with col1:
357
+ positive_count = len(df[df['Primary Sentiment'] == 'Positive'])
358
+ st.metric("Positive Texts", positive_count, f"{positive_count/len(df)*100:.1f}%")
359
+
360
+ with col2:
361
+ neutral_count = len(df[df['Primary Sentiment'] == 'Neutral'])
362
+ st.metric("Neutral Texts", neutral_count, f"{neutral_count/len(df)*100:.1f}%")
363
+
364
+ with col3:
365
+ negative_count = len(df[df['Primary Sentiment'] == 'Negative'])
366
+ st.metric("Negative Texts", negative_count, f"{negative_count/len(df)*100:.1f}%")
367
+
368
+ # Results table
369
+ st.subheader("πŸ“‹ Detailed Results")
370
+ st.dataframe(df, use_container_width=True)
371
+
372
+ # Download results
373
+ csv = df.to_csv(index=False)
374
+ st.download_button(
375
+ "πŸ“₯ Download Results (CSV)",
376
+ csv,
377
+ f"sentiment_analysis_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv",
378
+ "text/csv"
379
+ )
380
+
381
+ elif analysis_method == "Upload CSV file":
382
+ uploaded_file = st.file_uploader(
383
+ "Choose a CSV file with a 'text' column",
384
+ type=['csv']
385
+ )
386
+
387
+ if uploaded_file is not None:
388
+ try:
389
+ df = pd.read_csv(uploaded_file)
390
+
391
+ if 'text' not in df.columns:
392
+ st.error("CSV file must contain a 'text' column")
393
+ else:
394
+ st.write(f"Loaded {len(df)} texts from CSV file")
395
+ st.dataframe(df.head(), use_container_width=True)
396
+
397
+ if st.button("πŸ” Analyze CSV Data"):
398
+ results = []
399
+ progress_bar = st.progress(0)
400
+
401
+ for i, row in df.iterrows():
402
+ text = str(row['text'])
403
+ sentiment_scores, primary_sentiment, confidence, error = analyze_sentiment(text, tokenizer, model)
404
+
405
+ if not error:
406
+ result_row = row.to_dict()
407
+ result_row.update({
408
+ 'Primary Sentiment': primary_sentiment,
409
+ 'Confidence': confidence,
410
+ 'Negative Score': sentiment_scores['Negative'],
411
+ 'Neutral Score': sentiment_scores['Neutral'],
412
+ 'Positive Score': sentiment_scores['Positive']
413
+ })
414
+ results.append(result_row)
415
+
416
+ progress_bar.progress((i + 1) / len(df))
417
+
418
+ if results:
419
+ results_df = pd.DataFrame(results)
420
+
421
+ # Display results
422
+ st.subheader("πŸ“‹ Analysis Results")
423
+ st.dataframe(results_df, use_container_width=True)
424
+
425
+ # Download enhanced results
426
+ csv = results_df.to_csv(index=False)
427
+ st.download_button(
428
+ "πŸ“₯ Download Enhanced Results (CSV)",
429
+ csv,
430
+ f"enhanced_sentiment_analysis_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv",
431
+ "text/csv"
432
+ )
433
+
434
+ except Exception as e:
435
+ st.error(f"Error processing CSV file: {str(e)}")
436
+
437
+ # Footer
438
+ st.markdown("---")
439
+ st.markdown("""
440
+ <div style='text-align: center; color: #666; margin-top: 2rem;'>
441
+ <p>πŸ’‘ <strong>Tip:</strong> For best results, use complete sentences and financial context</p>
442
+ <p>Built with Streamlit β€’ Powered by FinBERT</p>
443
+ </div>
444
+ """, unsafe_allow_html=True)