Spaces:
Running
Running
| import pandas as pd | |
| import plotly.express as px | |
| from datasets import load_dataset | |
| import gradio as gr | |
| import yfinance as yf | |
| # --- LOAD DATASET --- | |
| df = pd.DataFrame(load_dataset("SelmaNajih001/NewsSentiment")["train"]) | |
| df = df[df["Company"].isin(["Tesla", "Microsoft", "Apple", "Facebook", "Google"])] | |
| # --- CONVERT DATE TO DATETIME --- | |
| df['Date'] = pd.to_datetime(df['Date'], errors='coerce') | |
| df['Year'] = df['Date'].dt.year | |
| df['Month'] = df['Date'].dt.to_period('M') | |
| df['Day'] = df['Date'].dt.date | |
| df = df[df['Year'] >= 2015] | |
| # --- TICKERS --- | |
| TICKERS = { | |
| "Tesla": "TSLA", | |
| "Microsoft": "MSFT", | |
| "Apple": "AAPL", | |
| "Facebook": "META", | |
| "Google": "GOOGL", | |
| "NASDAQ": "^IXIC" | |
| } | |
| # --- FETCH STOCK PRICES --- | |
| prices = {} | |
| for company, ticker in TICKERS.items(): | |
| start_date = "2015-01-01" | |
| end_date = pd.Timestamp.today() | |
| df_prices = yf.download(ticker, start=start_date, end=end_date) | |
| if isinstance(df_prices.columns, pd.MultiIndex): | |
| df_prices.columns = ['_'.join([str(c) for c in col]).strip() for col in df_prices.columns] | |
| df_prices = df_prices.reset_index()[['Date', f'Close_{ticker}']] | |
| if company == "NASDAQ": | |
| df_prices = df_prices.rename(columns={f'Close_{ticker}': 'Close_NASDAQ'}) | |
| prices[company] = df_prices | |
| # --- INTERPOLATE PRICES FOR DAY/MONTH --- | |
| def get_prices_for_agg(agg_col): | |
| df_prices_agg = {} | |
| for company, df_price in prices.items(): | |
| df_temp = df_price.copy() | |
| col = 'Close_NASDAQ' if company == 'NASDAQ' else f"Close_{TICKERS[company]}" | |
| df_temp = df_temp.rename(columns={df_temp.columns[1]: col}) | |
| if agg_col == "Day": | |
| df_temp = df_temp.set_index('Date').resample('D').mean().interpolate('linear').reset_index() | |
| elif agg_col == "Month": | |
| df_temp['Month'] = df_temp['Date'].dt.to_period('M').dt.to_timestamp() | |
| df_temp = df_temp.groupby('Month')[col].last().reset_index() | |
| elif agg_col == "Year": | |
| df_temp['Year'] = df_temp['Date'].dt.year | |
| df_temp = df_temp.groupby('Year')[col].last().reset_index() | |
| df_prices_agg[company] = df_temp | |
| return df_prices_agg | |
| # --- MERGE NEWS CON PREZZI --- | |
| df_merged = df.copy() | |
| for company in df['Company'].unique(): | |
| ticker_col = f"Close_{TICKERS[company]}" | |
| df_temp = prices[company][['Date', ticker_col]] | |
| df_merged = df_merged.merge(df_temp, on='Date', how='left') | |
| # NASDAQ per tutte le righe | |
| df_merged = df_merged.merge(prices['NASDAQ'][['Date', 'Close_NASDAQ']], on='Date', how='left') | |
| # --- GRADIO FUNCTION --- | |
| def show_sentiment(selected_companies=None, aggregation="Day", selected_year="All"): | |
| if not selected_companies: | |
| selected_companies = ["NASDAQ"] | |
| if isinstance(selected_companies, str): | |
| selected_companies = [selected_companies] | |
| df_filtered = df_merged.copy() | |
| if selected_year != "All" and selected_year is not None: | |
| df_filtered = df_filtered[df_filtered['Year'] == int(selected_year)] | |
| # colonna aggregazione | |
| group_col = aggregation | |
| if aggregation == "Month": | |
| df_filtered['Month'] = df_filtered['Month'].dt.to_timestamp() | |
| elif aggregation == "Day": | |
| df_filtered['Day'] = df_filtered['Date'] | |
| # prezzi interpolati | |
| prices_agg = get_prices_for_agg(aggregation) | |
| include_nasdaq = "NASDAQ" in selected_companies | |
| companies_to_plot = [c for c in selected_companies if c != "NASDAQ"] | |
| df_grouped_list = [] | |
| # aziende selezionate | |
| if companies_to_plot: | |
| df_sent = df_filtered[df_filtered['Company'].isin(companies_to_plot)] | |
| df_tmp = df_sent.groupby([group_col, 'Company']).agg({'Score':'sum'}).reset_index() | |
| for c in companies_to_plot: | |
| if c not in TICKERS: | |
| continue | |
| ticker_col = f"Close_{TICKERS[c]}" | |
| df_price_col = prices_agg[c][[group_col, ticker_col]] | |
| df_tmp = df_tmp.merge(df_price_col, on=group_col, how='left') | |
| df_grouped_list.append(df_tmp) | |
| # NASDAQ con sentiment generale | |
| if include_nasdaq: | |
| df_general = df_filtered.groupby(group_col).agg({'Score':'sum'}).reset_index() | |
| df_general['Company'] = 'General' | |
| df_general = df_general.merge(prices_agg['NASDAQ'].rename(columns={'Date':group_col}), on=group_col, how='left') | |
| df_grouped_list.append(df_general) | |
| # unisci tutto | |
| df_grouped = pd.concat(df_grouped_list, ignore_index=True, sort=False) | |
| df_grouped = df_grouped.sort_values([group_col, 'Company']) | |
| # --- Plot --- | |
| fig = px.line(df_grouped, x=group_col, y='Score', color='Company', | |
| title=f"Sentiment Trend ({aggregation} Aggregation)") | |
| for c in companies_to_plot: | |
| ticker_col = f"Close_{TICKERS[c]}" | |
| df_c = df_grouped[df_grouped['Company'] == c] | |
| if ticker_col in df_c.columns and df_c[ticker_col].notnull().any(): | |
| fig.add_scatter( | |
| x=df_c[group_col], y=df_c[ticker_col], | |
| mode='lines', name=f"{c} Price", yaxis="y2", | |
| line=dict(dash='dot') | |
| ) | |
| if include_nasdaq: | |
| df_c = df_grouped[df_grouped['Company'] == 'General'] | |
| if 'Close_NASDAQ' in df_c.columns and df_c['Close_NASDAQ'].notnull().any(): | |
| fig.add_scatter( | |
| x=df_c[group_col], y=df_c['Close_NASDAQ'], | |
| mode='lines', name="NASDAQ Price", yaxis="y2", | |
| line=dict(dash='dot') | |
| ) | |
| fig.update_layout( | |
| yaxis2=dict( | |
| title="Stock Price", | |
| overlaying="y", | |
| side="right" | |
| ) | |
| ) | |
| return df_grouped.tail(30), fig | |
| import gradio as gr | |
| # Markdown descrittivo adattato al tuo dashboard | |
| import gradio as gr | |
| # --- Markdown descrittivo --- | |
| description_text = """ | |
| ### Dynamic Sentiment Dashboard | |
| This dashboard allows you to explore the sentiment of news articles related to major tech companies (Apple, Tesla, Microsoft, Meta, Alphabet) and compare it with their stock prices. | |
| - **Dataset structure**: The dataset includes a company column; each row corresponds to a news item for a specific company. | |
| - **Sentiment aggregation**: Select a time aggregation level (Month or Year) to see how sentiment evolves over time. | |
| - **NASDAQ comparison**: Selecting "NASDAQ" shows the general market sentiment alongside the company-specific sentiment. | |
| - **Visual insights**: Top-left graph shows average sentiment score and closing price for the selected company. | |
| """ | |
| # --- Findings from thesis (specific companies and years) --- | |
| findings_text = """ | |
| ### Key Findings | |
| - Some news articles refer to multiple companies, e.g., the same article may mention Apple and Tesla. | |
| - Merging news with stock prices allows analyzing correlations between sentiment and stock movements for each company. | |
| - **Apple (2018, 2019, 2022):** Sentiment trends generally align with closing prices, showing similar monthly patterns. | |
| - **Tesla (2018, 2019, 2022):** More volatility observed; sentiment aligns with stock movement but is more sensitive to news on Elon Musk’s actions. | |
| - **Microsoft, Meta, Alphabet:** Across periods, sentiment trends follow stock prices with moderate correlation. | |
| - The custom sentiment model is more aligned with actual stock movements compared to FinBERT, which is more influenced by word positivity/negativity. | |
| - Aggregating sentiment by month or year helps identify broader trends while reducing noise from daily fluctuations. | |
| - Including “NASDAQ” as a general market reference allows comparison of individual companies’ sentiment with overall market sentiment. | |
| """ | |
| # --- Input options --- | |
| companies = sorted(df['Company'].unique().tolist()) + ["NASDAQ"] | |
| years = sorted(df['Year'].dropna().unique().tolist()) | |
| # --- Build Gradio Blocks --- | |
| with gr.Blocks() as demo: | |
| # Markdown in alto | |
| gr.Markdown("# Dynamic Sentiment Dashboard") | |
| gr.Markdown(description_text) | |
| # Input sotto il Markdown | |
| with gr.Row(): | |
| dropdown_companies = gr.Dropdown( | |
| choices=companies, | |
| value=None, | |
| multiselect=False, | |
| label="Select Companies" | |
| ) | |
| radio_aggregation = gr.Radio( | |
| choices=["Month", "Year"], | |
| value="Month", | |
| label="Aggregation Level" | |
| ) | |
| dropdown_year = gr.Dropdown( | |
| choices=["All"] + years, | |
| value="All", | |
| label="Select Year" | |
| ) | |
| # Bottone submit | |
| submit_btn = gr.Button("Submit") | |
| # Output | |
| data_table = gr.Dataframe(label="Sentiment Table", type="pandas") | |
| sentiment_plot = gr.Plot(label="Sentiment Trend") | |
| # Findings section | |
| gr.Markdown(findings_text) | |
| submit_btn.click( | |
| fn=show_sentiment, | |
| inputs=[dropdown_companies, radio_aggregation, dropdown_year], | |
| outputs=[data_table, sentiment_plot] | |
| ) | |
| demo.launch() | |