Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -8,11 +8,12 @@ import yfinance as yf
|
|
| 8 |
df = pd.DataFrame(load_dataset("SelmaNajih001/NewsSentiment")["train"])
|
| 9 |
df = df[df["Company"].isin(["Tesla", "Microsoft", "Apple", "Facebook", "Google"])]
|
| 10 |
|
| 11 |
-
# --- CONVERT DATE TO DATETIME
|
| 12 |
df['Date'] = pd.to_datetime(df['Date'], errors='coerce')
|
| 13 |
df['Year'] = df['Date'].dt.year
|
| 14 |
df['Month'] = df['Date'].dt.to_period('M')
|
| 15 |
df['Day'] = df['Date'].dt.date
|
|
|
|
| 16 |
|
| 17 |
# --- TICKERS ---
|
| 18 |
TICKERS = {
|
|
@@ -24,49 +25,61 @@ TICKERS = {
|
|
| 24 |
"NASDAQ": "^IXIC"
|
| 25 |
}
|
| 26 |
|
| 27 |
-
# --- FETCH STOCK PRICES
|
| 28 |
prices = {}
|
| 29 |
for company, ticker in TICKERS.items():
|
| 30 |
-
start_date =
|
| 31 |
end_date = pd.Timestamp.today()
|
| 32 |
df_prices = yf.download(ticker, start=start_date, end=end_date)
|
| 33 |
-
|
| 34 |
-
# Flatten MultiIndex columns if present
|
| 35 |
if isinstance(df_prices.columns, pd.MultiIndex):
|
| 36 |
df_prices.columns = ['_'.join([str(c) for c in col]).strip() for col in df_prices.columns]
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
if 'Adj Close' in df_prices.columns:
|
| 41 |
-
df_prices['Close'] = df_prices['Adj Close']
|
| 42 |
-
else:
|
| 43 |
-
df_prices['Close'] = None
|
| 44 |
-
|
| 45 |
-
df_prices = df_prices.reset_index()[['Date', 'Close']]
|
| 46 |
prices[company] = df_prices
|
| 47 |
|
| 48 |
-
# ---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
df_merged = df.copy()
|
| 50 |
-
for company
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
|
| 57 |
# --- GRADIO FUNCTION ---
|
| 58 |
-
def show_sentiment(selected_companies, aggregation="Day"):
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
df_filtered = df_merged[df_merged['Company'].isin(selected_companies)].copy()
|
| 62 |
-
else:
|
| 63 |
-
df_filtered = df_merged.copy()
|
| 64 |
-
# Add NASDAQ data for general sentiment
|
| 65 |
-
df_nasdaq = prices['NASDAQ'].copy()
|
| 66 |
-
df_nasdaq['Company'] = "NASDAQ"
|
| 67 |
-
df_filtered = pd.concat([df_filtered, df_nasdaq], ignore_index=True, sort=False)
|
| 68 |
|
| 69 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 70 |
if aggregation == "Day":
|
| 71 |
group_col = "Day"
|
| 72 |
elif aggregation == "Month":
|
|
@@ -76,39 +89,67 @@ def show_sentiment(selected_companies, aggregation="Day"):
|
|
| 76 |
group_col = "Year"
|
| 77 |
else:
|
| 78 |
group_col = "Day"
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 90 |
fig = px.line(df_grouped, x=group_col, y='Score', color='Company',
|
| 91 |
-
title=f"Sentiment
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 106 |
)
|
|
|
|
| 107 |
|
| 108 |
return df_grouped.tail(30), fig
|
| 109 |
|
| 110 |
# --- GRADIO INTERFACE ---
|
| 111 |
-
companies = sorted(df['Company'].unique().tolist())
|
|
|
|
| 112 |
|
| 113 |
demo = gr.Interface(
|
| 114 |
fn=show_sentiment,
|
|
@@ -116,13 +157,18 @@ demo = gr.Interface(
|
|
| 116 |
gr.Dropdown(
|
| 117 |
choices=companies,
|
| 118 |
value=None,
|
| 119 |
-
label="Select Companies (
|
| 120 |
-
multiselect=
|
| 121 |
),
|
| 122 |
gr.Radio(
|
| 123 |
-
choices=["
|
| 124 |
-
value="
|
| 125 |
label="Aggregation Level"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 126 |
)
|
| 127 |
],
|
| 128 |
outputs=[
|
|
@@ -130,7 +176,7 @@ demo = gr.Interface(
|
|
| 130 |
gr.Plot(label="Sentiment Trend"),
|
| 131 |
],
|
| 132 |
title="Dynamic Sentiment Dashboard",
|
| 133 |
-
description="Shows sentiment scores aggregated by day, month, or year.
|
| 134 |
)
|
| 135 |
|
| 136 |
-
demo.launch()
|
|
|
|
| 8 |
df = pd.DataFrame(load_dataset("SelmaNajih001/NewsSentiment")["train"])
|
| 9 |
df = df[df["Company"].isin(["Tesla", "Microsoft", "Apple", "Facebook", "Google"])]
|
| 10 |
|
| 11 |
+
# --- CONVERT DATE TO DATETIME ---
|
| 12 |
df['Date'] = pd.to_datetime(df['Date'], errors='coerce')
|
| 13 |
df['Year'] = df['Date'].dt.year
|
| 14 |
df['Month'] = df['Date'].dt.to_period('M')
|
| 15 |
df['Day'] = df['Date'].dt.date
|
| 16 |
+
df = df[df['Year'] >= 2015] # filtro anno
|
| 17 |
|
| 18 |
# --- TICKERS ---
|
| 19 |
TICKERS = {
|
|
|
|
| 25 |
"NASDAQ": "^IXIC"
|
| 26 |
}
|
| 27 |
|
| 28 |
+
# --- FETCH STOCK PRICES ---
|
| 29 |
prices = {}
|
| 30 |
for company, ticker in TICKERS.items():
|
| 31 |
+
start_date = "2015-01-01"
|
| 32 |
end_date = pd.Timestamp.today()
|
| 33 |
df_prices = yf.download(ticker, start=start_date, end=end_date)
|
|
|
|
|
|
|
| 34 |
if isinstance(df_prices.columns, pd.MultiIndex):
|
| 35 |
df_prices.columns = ['_'.join([str(c) for c in col]).strip() for col in df_prices.columns]
|
| 36 |
+
df_prices = df_prices.reset_index()[['Date', f'Close_{ticker}']]
|
| 37 |
+
if company == "NASDAQ":
|
| 38 |
+
df_prices = df_prices.rename(columns={f'Close_{ticker}': 'Close_NASDAQ'})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
prices[company] = df_prices
|
| 40 |
|
| 41 |
+
# --- FUNZIONE PER PREZZI INTERPOLATI ---
|
| 42 |
+
def get_prices_for_agg(agg_col):
|
| 43 |
+
df_prices_agg = {}
|
| 44 |
+
for company, df_price in prices.items():
|
| 45 |
+
df_temp = df_price.copy()
|
| 46 |
+
if company == "NASDAQ":
|
| 47 |
+
col = "Close_NASDAQ"
|
| 48 |
+
else:
|
| 49 |
+
col = f"Close_{TICKERS[company]}"
|
| 50 |
+
df_temp = df_temp.rename(columns={f"Close_{TICKERS[company]}": col})
|
| 51 |
+
|
| 52 |
+
if agg_col == "Day":
|
| 53 |
+
df_temp = df_temp.set_index('Date').resample('D').mean().interpolate('linear').reset_index()
|
| 54 |
+
elif agg_col == "Month":
|
| 55 |
+
df_temp['Month'] = df_temp['Date'].dt.to_period('M').dt.to_timestamp()
|
| 56 |
+
df_temp = df_temp.groupby('Month')[col].last().reset_index()
|
| 57 |
+
elif agg_col == "Year":
|
| 58 |
+
df_temp['Year'] = df_temp['Date'].dt.year
|
| 59 |
+
df_temp = df_temp.groupby('Year')[col].last().reset_index()
|
| 60 |
+
df_prices_agg[company] = df_temp
|
| 61 |
+
return df_prices_agg
|
| 62 |
+
|
| 63 |
+
# --- MERGE NEWS CON PREZZI ---
|
| 64 |
df_merged = df.copy()
|
| 65 |
+
for company in df['Company'].unique():
|
| 66 |
+
ticker_col = f"Close_{TICKERS[company]}"
|
| 67 |
+
df_temp = prices[company][['Date', ticker_col]]
|
| 68 |
+
df_merged = df_merged.merge(df_temp, on='Date', how='left')
|
| 69 |
+
# NASDAQ per tutte le righe
|
| 70 |
+
df_merged = df_merged.merge(prices['NASDAQ'][['Date', 'Close_NASDAQ']], on='Date', how='left')
|
| 71 |
|
| 72 |
# --- GRADIO FUNCTION ---
|
| 73 |
+
def show_sentiment(selected_companies=None, aggregation="Day", selected_year=None):
|
| 74 |
+
if not selected_companies:
|
| 75 |
+
selected_companies = ["NASDAQ"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 76 |
|
| 77 |
+
df_filtered = df_merged.copy()
|
| 78 |
+
if selected_year != "All":
|
| 79 |
+
df_filtered = df_filtered[df_filtered['Year'] == int(selected_year)]
|
| 80 |
+
|
| 81 |
+
|
| 82 |
+
# colonna aggregazione
|
| 83 |
if aggregation == "Day":
|
| 84 |
group_col = "Day"
|
| 85 |
elif aggregation == "Month":
|
|
|
|
| 89 |
group_col = "Year"
|
| 90 |
else:
|
| 91 |
group_col = "Day"
|
| 92 |
+
|
| 93 |
+
|
| 94 |
+
# prezzi interpolati per il livello di aggregazione
|
| 95 |
+
prices_agg = get_prices_for_agg(group_col)
|
| 96 |
+
|
| 97 |
+
include_nasdaq = "NASDAQ" in selected_companies
|
| 98 |
+
companies_to_plot = [c for c in selected_companies if c != "NASDAQ"]
|
| 99 |
+
|
| 100 |
+
df_grouped_list = []
|
| 101 |
+
|
| 102 |
+
# aziende selezionate
|
| 103 |
+
if companies_to_plot:
|
| 104 |
+
df_sent = df_filtered[df_filtered['Company'].isin(companies_to_plot)]
|
| 105 |
+
df_tmp = df_sent.groupby([group_col, 'Company']).agg({'Score':'sum'}).reset_index()
|
| 106 |
+
for c in companies_to_plot:
|
| 107 |
+
ticker_col = f"Close_{TICKERS[c]}"
|
| 108 |
+
df_price_col = prices_agg[c][[group_col, ticker_col]]
|
| 109 |
+
df_tmp = df_tmp.merge(df_price_col, on=group_col, how='left')
|
| 110 |
+
df_grouped_list.append(df_tmp)
|
| 111 |
+
|
| 112 |
+
# NASDAQ con sentiment generale
|
| 113 |
+
if include_nasdaq:
|
| 114 |
+
df_general = df_filtered.groupby(group_col).agg({'Score':'sum'}).reset_index()
|
| 115 |
+
df_general['Company'] = 'General'
|
| 116 |
+
df_general = df_general.merge(prices_agg['NASDAQ'].rename(columns={'Date':group_col}), on=group_col, how='left')
|
| 117 |
+
df_grouped_list.append(df_general)
|
| 118 |
+
|
| 119 |
+
# unisci tutto
|
| 120 |
+
df_grouped = pd.concat(df_grouped_list, ignore_index=True, sort=False)
|
| 121 |
+
df_grouped = df_grouped.sort_values([group_col, 'Company'])
|
| 122 |
+
|
| 123 |
+
# --- Plot ---
|
| 124 |
fig = px.line(df_grouped, x=group_col, y='Score', color='Company',
|
| 125 |
+
title=f"Sentiment Trend ({aggregation} Aggregation)")
|
| 126 |
+
|
| 127 |
+
for c in companies_to_plot:
|
| 128 |
+
ticker_col = f"Close_{TICKERS[c]}"
|
| 129 |
+
df_c = df_grouped[df_grouped['Company'] == c]
|
| 130 |
+
if ticker_col in df_c.columns and df_c[ticker_col].notnull().any():
|
| 131 |
+
fig.add_scatter(x=df_c[group_col], y=df_c[ticker_col],
|
| 132 |
+
mode='lines', name=f"{c} Price", yaxis="y2", line=dict(dash='dot'))
|
| 133 |
+
|
| 134 |
+
if include_nasdaq:
|
| 135 |
+
df_c = df_grouped[df_grouped['Company'] == 'General']
|
| 136 |
+
if 'Close_NASDAQ' in df_c.columns and df_c['Close_NASDAQ'].notnull().any():
|
| 137 |
+
fig.add_scatter(x=df_c[group_col], y=df_c['Close_NASDAQ'],
|
| 138 |
+
mode='lines', name="NASDAQ Price", yaxis="y2", line=dict(dash='dot'))
|
| 139 |
+
|
| 140 |
+
fig.update_layout(
|
| 141 |
+
yaxis2=dict(
|
| 142 |
+
title="Stock Price",
|
| 143 |
+
overlaying="y",
|
| 144 |
+
side="right"
|
| 145 |
)
|
| 146 |
+
)
|
| 147 |
|
| 148 |
return df_grouped.tail(30), fig
|
| 149 |
|
| 150 |
# --- GRADIO INTERFACE ---
|
| 151 |
+
companies = sorted(df['Company'].unique().tolist()) + ["NASDAQ"]
|
| 152 |
+
years = sorted(df['Year'].dropna().unique().tolist())
|
| 153 |
|
| 154 |
demo = gr.Interface(
|
| 155 |
fn=show_sentiment,
|
|
|
|
| 157 |
gr.Dropdown(
|
| 158 |
choices=companies,
|
| 159 |
value=None,
|
| 160 |
+
label="Select Companies (NASDAQ compares with general sentiment)",
|
| 161 |
+
multiselect=False
|
| 162 |
),
|
| 163 |
gr.Radio(
|
| 164 |
+
choices=["Month", "Year"],
|
| 165 |
+
value="Year",
|
| 166 |
label="Aggregation Level"
|
| 167 |
+
),
|
| 168 |
+
gr.Dropdown(
|
| 169 |
+
choices=["All"] + years,
|
| 170 |
+
value="All",
|
| 171 |
+
label="Select Year"
|
| 172 |
)
|
| 173 |
],
|
| 174 |
outputs=[
|
|
|
|
| 176 |
gr.Plot(label="Sentiment Trend"),
|
| 177 |
],
|
| 178 |
title="Dynamic Sentiment Dashboard",
|
| 179 |
+
description="Shows sentiment scores aggregated by day, month, or year. NASDAQ compares with general sentiment if selected."
|
| 180 |
)
|
| 181 |
|
| 182 |
+
demo.launch()
|