SelmaNajih001 commited on
Commit
d8c3dc6
·
verified ·
1 Parent(s): 9567589

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +111 -65
app.py CHANGED
@@ -8,11 +8,12 @@ import yfinance as yf
8
  df = pd.DataFrame(load_dataset("SelmaNajih001/NewsSentiment")["train"])
9
  df = df[df["Company"].isin(["Tesla", "Microsoft", "Apple", "Facebook", "Google"])]
10
 
11
- # --- CONVERT DATE TO DATETIME SAFELY ---
12
  df['Date'] = pd.to_datetime(df['Date'], errors='coerce')
13
  df['Year'] = df['Date'].dt.year
14
  df['Month'] = df['Date'].dt.to_period('M')
15
  df['Day'] = df['Date'].dt.date
 
16
 
17
  # --- TICKERS ---
18
  TICKERS = {
@@ -24,49 +25,61 @@ TICKERS = {
24
  "NASDAQ": "^IXIC"
25
  }
26
 
27
- # --- FETCH STOCK PRICES ROBUSTLY ---
28
  prices = {}
29
  for company, ticker in TICKERS.items():
30
- start_date = df['Date'].min()
31
  end_date = pd.Timestamp.today()
32
  df_prices = yf.download(ticker, start=start_date, end=end_date)
33
-
34
- # Flatten MultiIndex columns if present
35
  if isinstance(df_prices.columns, pd.MultiIndex):
36
  df_prices.columns = ['_'.join([str(c) for c in col]).strip() for col in df_prices.columns]
37
-
38
- # Ensure 'Close' column exists
39
- if 'Close' not in df_prices.columns:
40
- if 'Adj Close' in df_prices.columns:
41
- df_prices['Close'] = df_prices['Adj Close']
42
- else:
43
- df_prices['Close'] = None
44
-
45
- df_prices = df_prices.reset_index()[['Date', 'Close']]
46
  prices[company] = df_prices
47
 
48
- # --- MERGE PRICES INTO DATASET ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  df_merged = df.copy()
50
- for company, df_price in prices.items():
51
- if company == "NASDAQ":
52
- continue # NASDAQ only for comparison
53
- mask = df_merged['Company'] == company
54
- temp = df_merged[mask].merge(df_price, on='Date', how='left', suffixes=('', '_tmp'))
55
- df_merged.loc[mask, 'Close'] = temp['Close'].values
56
 
57
  # --- GRADIO FUNCTION ---
58
- def show_sentiment(selected_companies, aggregation="Day"):
59
- # Filter selected companies
60
- if selected_companies:
61
- df_filtered = df_merged[df_merged['Company'].isin(selected_companies)].copy()
62
- else:
63
- df_filtered = df_merged.copy()
64
- # Add NASDAQ data for general sentiment
65
- df_nasdaq = prices['NASDAQ'].copy()
66
- df_nasdaq['Company'] = "NASDAQ"
67
- df_filtered = pd.concat([df_filtered, df_nasdaq], ignore_index=True, sort=False)
68
 
69
- # Determine aggregation column
 
 
 
 
 
70
  if aggregation == "Day":
71
  group_col = "Day"
72
  elif aggregation == "Month":
@@ -76,39 +89,67 @@ def show_sentiment(selected_companies, aggregation="Day"):
76
  group_col = "Year"
77
  else:
78
  group_col = "Day"
79
-
80
- # Aggregate sentiment
81
- if selected_companies:
82
- df_grouped = df_filtered.groupby([group_col, 'Company'])['Score'].sum().reset_index()
83
- else:
84
- df_grouped = df_filtered.groupby([group_col, 'Company']).agg({
85
- 'Score':'sum',
86
- 'Close':'last'
87
- }).reset_index()
88
-
89
- # --- PLOT ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
  fig = px.line(df_grouped, x=group_col, y='Score', color='Company',
91
- title=f"Sentiment Score by {aggregation} per Company")
92
-
93
- # Add stock price as secondary y-axis if available
94
- if 'Close' in df_grouped.columns:
95
- for c in df_grouped['Company'].unique():
96
- df_c = df_grouped[df_grouped['Company'] == c]
97
- if df_c['Close'].notnull().any():
98
- fig.add_scatter(x=df_c[group_col], y=df_c['Close'], mode='lines',
99
- name=f"{c} Price", yaxis="y2", line=dict(dash='dot'))
100
- fig.update_layout(
101
- yaxis2=dict(
102
- title="Stock Price",
103
- overlaying="y",
104
- side="right"
105
- )
 
 
 
 
 
106
  )
 
107
 
108
  return df_grouped.tail(30), fig
109
 
110
  # --- GRADIO INTERFACE ---
111
- companies = sorted(df['Company'].unique().tolist())
 
112
 
113
  demo = gr.Interface(
114
  fn=show_sentiment,
@@ -116,13 +157,18 @@ demo = gr.Interface(
116
  gr.Dropdown(
117
  choices=companies,
118
  value=None,
119
- label="Select Companies (leave empty for general sentiment with NASDAQ)",
120
- multiselect=True
121
  ),
122
  gr.Radio(
123
- choices=["Day", "Month", "Year"],
124
- value="Day",
125
  label="Aggregation Level"
 
 
 
 
 
126
  )
127
  ],
128
  outputs=[
@@ -130,7 +176,7 @@ demo = gr.Interface(
130
  gr.Plot(label="Sentiment Trend"),
131
  ],
132
  title="Dynamic Sentiment Dashboard",
133
- description="Shows sentiment scores aggregated by day, month, or year. Positive = +score, Negative = -score, Neutral = 0."
134
  )
135
 
136
- demo.launch()
 
8
  df = pd.DataFrame(load_dataset("SelmaNajih001/NewsSentiment")["train"])
9
  df = df[df["Company"].isin(["Tesla", "Microsoft", "Apple", "Facebook", "Google"])]
10
 
11
+ # --- CONVERT DATE TO DATETIME ---
12
  df['Date'] = pd.to_datetime(df['Date'], errors='coerce')
13
  df['Year'] = df['Date'].dt.year
14
  df['Month'] = df['Date'].dt.to_period('M')
15
  df['Day'] = df['Date'].dt.date
16
+ df = df[df['Year'] >= 2015] # filtro anno
17
 
18
  # --- TICKERS ---
19
  TICKERS = {
 
25
  "NASDAQ": "^IXIC"
26
  }
27
 
28
+ # --- FETCH STOCK PRICES ---
29
  prices = {}
30
  for company, ticker in TICKERS.items():
31
+ start_date = "2015-01-01"
32
  end_date = pd.Timestamp.today()
33
  df_prices = yf.download(ticker, start=start_date, end=end_date)
 
 
34
  if isinstance(df_prices.columns, pd.MultiIndex):
35
  df_prices.columns = ['_'.join([str(c) for c in col]).strip() for col in df_prices.columns]
36
+ df_prices = df_prices.reset_index()[['Date', f'Close_{ticker}']]
37
+ if company == "NASDAQ":
38
+ df_prices = df_prices.rename(columns={f'Close_{ticker}': 'Close_NASDAQ'})
 
 
 
 
 
 
39
  prices[company] = df_prices
40
 
41
+ # --- FUNZIONE PER PREZZI INTERPOLATI ---
42
+ def get_prices_for_agg(agg_col):
43
+ df_prices_agg = {}
44
+ for company, df_price in prices.items():
45
+ df_temp = df_price.copy()
46
+ if company == "NASDAQ":
47
+ col = "Close_NASDAQ"
48
+ else:
49
+ col = f"Close_{TICKERS[company]}"
50
+ df_temp = df_temp.rename(columns={f"Close_{TICKERS[company]}": col})
51
+
52
+ if agg_col == "Day":
53
+ df_temp = df_temp.set_index('Date').resample('D').mean().interpolate('linear').reset_index()
54
+ elif agg_col == "Month":
55
+ df_temp['Month'] = df_temp['Date'].dt.to_period('M').dt.to_timestamp()
56
+ df_temp = df_temp.groupby('Month')[col].last().reset_index()
57
+ elif agg_col == "Year":
58
+ df_temp['Year'] = df_temp['Date'].dt.year
59
+ df_temp = df_temp.groupby('Year')[col].last().reset_index()
60
+ df_prices_agg[company] = df_temp
61
+ return df_prices_agg
62
+
63
+ # --- MERGE NEWS CON PREZZI ---
64
  df_merged = df.copy()
65
+ for company in df['Company'].unique():
66
+ ticker_col = f"Close_{TICKERS[company]}"
67
+ df_temp = prices[company][['Date', ticker_col]]
68
+ df_merged = df_merged.merge(df_temp, on='Date', how='left')
69
+ # NASDAQ per tutte le righe
70
+ df_merged = df_merged.merge(prices['NASDAQ'][['Date', 'Close_NASDAQ']], on='Date', how='left')
71
 
72
  # --- GRADIO FUNCTION ---
73
+ def show_sentiment(selected_companies=None, aggregation="Day", selected_year=None):
74
+ if not selected_companies:
75
+ selected_companies = ["NASDAQ"]
 
 
 
 
 
 
 
76
 
77
+ df_filtered = df_merged.copy()
78
+ if selected_year != "All":
79
+ df_filtered = df_filtered[df_filtered['Year'] == int(selected_year)]
80
+
81
+
82
+ # colonna aggregazione
83
  if aggregation == "Day":
84
  group_col = "Day"
85
  elif aggregation == "Month":
 
89
  group_col = "Year"
90
  else:
91
  group_col = "Day"
92
+
93
+
94
+ # prezzi interpolati per il livello di aggregazione
95
+ prices_agg = get_prices_for_agg(group_col)
96
+
97
+ include_nasdaq = "NASDAQ" in selected_companies
98
+ companies_to_plot = [c for c in selected_companies if c != "NASDAQ"]
99
+
100
+ df_grouped_list = []
101
+
102
+ # aziende selezionate
103
+ if companies_to_plot:
104
+ df_sent = df_filtered[df_filtered['Company'].isin(companies_to_plot)]
105
+ df_tmp = df_sent.groupby([group_col, 'Company']).agg({'Score':'sum'}).reset_index()
106
+ for c in companies_to_plot:
107
+ ticker_col = f"Close_{TICKERS[c]}"
108
+ df_price_col = prices_agg[c][[group_col, ticker_col]]
109
+ df_tmp = df_tmp.merge(df_price_col, on=group_col, how='left')
110
+ df_grouped_list.append(df_tmp)
111
+
112
+ # NASDAQ con sentiment generale
113
+ if include_nasdaq:
114
+ df_general = df_filtered.groupby(group_col).agg({'Score':'sum'}).reset_index()
115
+ df_general['Company'] = 'General'
116
+ df_general = df_general.merge(prices_agg['NASDAQ'].rename(columns={'Date':group_col}), on=group_col, how='left')
117
+ df_grouped_list.append(df_general)
118
+
119
+ # unisci tutto
120
+ df_grouped = pd.concat(df_grouped_list, ignore_index=True, sort=False)
121
+ df_grouped = df_grouped.sort_values([group_col, 'Company'])
122
+
123
+ # --- Plot ---
124
  fig = px.line(df_grouped, x=group_col, y='Score', color='Company',
125
+ title=f"Sentiment Trend ({aggregation} Aggregation)")
126
+
127
+ for c in companies_to_plot:
128
+ ticker_col = f"Close_{TICKERS[c]}"
129
+ df_c = df_grouped[df_grouped['Company'] == c]
130
+ if ticker_col in df_c.columns and df_c[ticker_col].notnull().any():
131
+ fig.add_scatter(x=df_c[group_col], y=df_c[ticker_col],
132
+ mode='lines', name=f"{c} Price", yaxis="y2", line=dict(dash='dot'))
133
+
134
+ if include_nasdaq:
135
+ df_c = df_grouped[df_grouped['Company'] == 'General']
136
+ if 'Close_NASDAQ' in df_c.columns and df_c['Close_NASDAQ'].notnull().any():
137
+ fig.add_scatter(x=df_c[group_col], y=df_c['Close_NASDAQ'],
138
+ mode='lines', name="NASDAQ Price", yaxis="y2", line=dict(dash='dot'))
139
+
140
+ fig.update_layout(
141
+ yaxis2=dict(
142
+ title="Stock Price",
143
+ overlaying="y",
144
+ side="right"
145
  )
146
+ )
147
 
148
  return df_grouped.tail(30), fig
149
 
150
  # --- GRADIO INTERFACE ---
151
+ companies = sorted(df['Company'].unique().tolist()) + ["NASDAQ"]
152
+ years = sorted(df['Year'].dropna().unique().tolist())
153
 
154
  demo = gr.Interface(
155
  fn=show_sentiment,
 
157
  gr.Dropdown(
158
  choices=companies,
159
  value=None,
160
+ label="Select Companies (NASDAQ compares with general sentiment)",
161
+ multiselect=False
162
  ),
163
  gr.Radio(
164
+ choices=["Month", "Year"],
165
+ value="Year",
166
  label="Aggregation Level"
167
+ ),
168
+ gr.Dropdown(
169
+ choices=["All"] + years,
170
+ value="All",
171
+ label="Select Year"
172
  )
173
  ],
174
  outputs=[
 
176
  gr.Plot(label="Sentiment Trend"),
177
  ],
178
  title="Dynamic Sentiment Dashboard",
179
+ description="Shows sentiment scores aggregated by day, month, or year. NASDAQ compares with general sentiment if selected."
180
  )
181
 
182
+ demo.launch()