import pandas as pd import yfinance as yf from datasets import Dataset, load_dataset from transformers import pipeline import plotly.graph_objects as go import gradio as gr from huggingface_hub import login import os # Login Hugging Face token = os.getenv("HF_TOKEN") login(token=token) # --- Costanti --- HF_DATASET = "SelmaNajih001/Cnbc_MultiCompany" HF_PRIVATE_DATASET = "SelmaNajih001/portfolio_strategy_data2" MODEL_SENTIMENT = "SelmaNajih001/SentimentBasedOnPriceVariation" MODEL_PRICE_TESLA = "SelmaNajih001/PricePredictionForTesla" MODEL_PRICE_MICROSOFT = "SelmaNajih001/PricePredictionForMicrosoft" MODEL_FINBERT = "ProsusAI/finbert" TICKERS = { "Tesla": "TSLA", #Tesla, Inc. "Microsoft": "MSFT" } companies = list(TICKERS.keys()) # --- Pipelines --- sentiment_pipeline = pipeline("sentiment-analysis", model=MODEL_SENTIMENT) price_pipeline_tesla = pipeline("text-classification", model=MODEL_PRICE_TESLA) price_pipeline_msft = pipeline("text-classification", model=MODEL_PRICE_MICROSOFT) finbert_pipeline = pipeline("sentiment-analysis", model=MODEL_FINBERT) # --- Caricamento dataset --- df_multi = pd.DataFrame(load_dataset(HF_DATASET)["train"]) df_multi['date'] = pd.to_datetime(df_multi['Date'], errors='coerce') df_multi['date_merge'] = df_multi['date'].dt.normalize() df_multi.sort_values('date', inplace=True) try: ds_existing = load_dataset(HF_PRIVATE_DATASET)["train"] df_existing = pd.DataFrame(ds_existing) except: df_existing = pd.DataFrame() # --- Determina nuove righe --- if not df_existing.empty: df_to_add = df_multi[~df_multi['Date'].isin(df_existing['Date'])] else: df_to_add = df_multi.copy() # --- Calcolo solo sulle nuove righe --- df_to_add['Sentiment'] = "" df_to_add['Confidence'] = 0.0 df_to_add['Predicted'] = 0.0 df_to_add['FinBERT_Sentiment'] = "" df_to_add['FinBERT_Confidence'] = 0.0 for i, row in df_to_add.iterrows(): company = row['Company'] # Custom sentiment try: res = sentiment_pipeline(row['Title'])[0] df_to_add.at[i,'Sentiment'] = res['label'].upper().strip() df_to_add.at[i,'Confidence'] = res['score'] except: df_to_add.at[i,'Sentiment'] = 'ERROR' df_to_add.at[i,'Confidence'] = 0.0 # FinBERT try: res_f = finbert_pipeline(row['Title'])[0] df_to_add.at[i,'FinBERT_Sentiment'] = res_f['label'].upper().strip() df_to_add.at[i,'FinBERT_Confidence'] = res_f['score'] except: df_to_add.at[i,'FinBERT_Sentiment'] = 'ERROR' df_to_add.at[i,'FinBERT_Confidence'] = 0.0 # Regression try: if company == "Tesla": val = price_pipeline_tesla(row['Title'])[0]['score'] df_to_add.at[i,'Predicted'] = max(val, 1.0) elif company == "Microsoft": val = price_pipeline_msft(row['Title'])[0]['score'] df_to_add.at[i,'Predicted'] = max(val, 1.0) except: df_to_add.at[i,'Predicted'] = 0.0 # --- Aggiorna dataset esistente --- if not df_existing.empty: df_updated = pd.concat([df_existing, df_to_add], ignore_index=True) else: df_updated = df_to_add.copy() # --- Push su Hugging Face --- hf_dataset_updated = Dataset.from_pandas(df_updated) hf_dataset_updated.push_to_hub(HF_PRIVATE_DATASET, private=True) print(f"Dataset aggiornato su Hugging Face: {HF_PRIVATE_DATASET}") # --- Resto del codice (prezzi, strategie, Gradio) --- df_multi = df_updated.copy() prices = {} for company, ticker in TICKERS.items(): start_date = df_multi[df_multi['Company']==company]['date'].min() end_date = pd.Timestamp.today() df_prices = yf.download(ticker, start=start_date, end=end_date)[['Close']].reset_index() df_prices.columns = ['Date_', f'Close_{ticker}'] df_prices['date_merge'] = pd.to_datetime(df_prices['Date_']).dt.normalize() df_prices['PctChangeDaily'] = df_prices[f'Close_{ticker}'].pct_change().shift(-1) prices[company] = df_prices dfs_final = {} for company in companies: df_c = df_multi[df_multi['Company'] == company].copy() if company in prices: df_c = pd.merge(df_c, prices[company], on='date_merge', how='inner') df_c['Day'] = df_c['date'].dt.date df_c['Month'] = df_c['date'].dt.to_period('M').dt.to_timestamp() df_c['Year'] = df_c['date'].dt.year # Strategy A df_c['StrategyA_Cumulative'] = 0.0 for i in range(1, len(df_c)): pct = df_c.loc[i, 'PctChangeDaily'] if pd.notnull(df_c.loc[i,'PctChangeDaily']) else 0 price = df_c.loc[i-1, f'Close_{TICKERS[company]}'] if df_c.loc[i, 'Sentiment'] == "UP" and df_c.loc[i,'Confidence'] > 0.8: df_c.loc[i,'StrategyA_Cumulative'] = df_c.loc[i-1,'StrategyA_Cumulative'] + price * pct elif df_c.loc[i, 'Sentiment'] == "DOWN" and df_c.loc[i,'Confidence'] > 0.8: df_c.loc[i,'StrategyA_Cumulative'] = df_c.loc[i-1,'StrategyA_Cumulative'] - price * pct else: df_c.loc[i,'StrategyA_Cumulative'] = df_c.loc[i-1,'StrategyA_Cumulative'] # Strategy B df_c['StrategyB_Cumulative'] = 0.0 for i in range(1, len(df_c)): pct = df_c.loc[i, 'PctChangeDaily'] if pd.notnull(df_c.loc[i,'PctChangeDaily']) else 0 price = df_c.loc[i-1, f'Close_{TICKERS[company]}'] predicted = df_c.loc[i, 'Predicted'] if predicted > 1: df_c.loc[i,'StrategyB_Cumulative'] = df_c.loc[i-1,'StrategyB_Cumulative'] + price * pct elif predicted < -1: df_c.loc[i,'StrategyB_Cumulative'] = df_c.loc[i-1,'StrategyB_Cumulative'] - price * pct else: df_c.loc[i,'StrategyB_Cumulative'] = df_c.loc[i-1,'StrategyB_Cumulative'] # Strategy C df_c['StrategyC_Cumulative'] = 0.0 for i in range(1, len(df_c)): pct = df_c.loc[i, 'PctChangeDaily'] if pd.notnull(df_c.loc[i,'PctChangeDaily']) else 0 price = df_c.loc[i-1, f'Close_{TICKERS[company]}'] if df_c.loc[i, 'FinBERT_Sentiment'] == "POSITIVE" and df_c.loc[i,'FinBERT_Confidence'] > 0.8: df_c.loc[i,'StrategyC_Cumulative'] = df_c.loc[i-1,'StrategyC_Cumulative'] + price * pct elif df_c.loc[i, 'FinBERT_Sentiment'] == "NEGATIVE" and df_c.loc[i,'FinBERT_Confidence'] > 0.8: df_c.loc[i,'StrategyC_Cumulative'] = df_c.loc[i-1,'StrategyC_Cumulative'] - price * pct else: df_c.loc[i,'StrategyC_Cumulative'] = df_c.loc[i-1,'StrategyC_Cumulative'] dfs_final[company] = df_c.drop(columns=["date", "date_merge"], errors="ignore") # --- Funzione Gradio --- def show_company_data(selected_companies, aggregation="Day"): if not selected_companies: return pd.DataFrame(), None, None agg_col = {"Day": "Day", "Month": "Month", "Year": "Year"}.get(aggregation, "Day") fig_strat = go.Figure() fig_price = go.Figure() dfs_display = [] for c in selected_companies: if c not in dfs_final: continue df_c = dfs_final[c] df_grouped = df_c.groupby(agg_col).agg({ 'StrategyA_Cumulative': 'last', 'StrategyB_Cumulative': 'last', 'StrategyC_Cumulative': 'last', f'Close_{TICKERS[c]}': 'last' }).reset_index() df_grouped['Company'] = c dfs_display.append(df_grouped) strategy_labels = { 'StrategyA_Cumulative': "Custom Sentiment", 'StrategyB_Cumulative': "Regression", 'StrategyC_Cumulative': "FinBERT" } for strat in ['StrategyA_Cumulative', 'StrategyB_Cumulative', 'StrategyC_Cumulative']: fig_strat.add_trace(go.Scatter( x=df_grouped[agg_col], y=df_grouped[strat], mode="lines", name=f"{c} - {strategy_labels[strat]}" )) fig_price.add_trace(go.Scatter( x=df_grouped[agg_col], y=df_grouped[f'Close_{TICKERS[c]}'], mode="lines", name=f"{c} Price" )) fig_strat.update_layout( title="Strategies Comparison (Custom Sentiment, Regression, FinBERT)", xaxis_title=aggregation, yaxis_title="Cumulative Value", template="plotly_dark", hovermode="x unified" ) fig_price.update_layout( title="Stock Prices", xaxis_title=aggregation, yaxis_title="Price", template="plotly_dark", hovermode="x unified" ) #df_display = pd.concat(dfs_display, ignore_index=True) if dfs_display else pd.DataFrame() return fig_strat, fig_price # --- Gradio Interface --- description_text = """ ### Portfolio Strategy Comparison Dashboard This dashboard allows you to compare the performance of three sentiment models in driving trading strategies for Microsoft and Tesla. - **Strategy logic**: Each model's score (or regression value) is used as a buy/sell signal. - If the score exceeds 0.8 → buy - If the score is below -0.8 → sell - Otherwise → no trade - For the regression model, thresholds are +1 and -1. """ companies = ["Microsoft", "Tesla"] with gr.Blocks() as demo: gr.Markdown("# Portfolio Strategy Dashboard") gr.Markdown(description_text) with gr.Row(): dropdown_companies = gr.Dropdown( choices=companies, value=["Microsoft", "Tesla"], multiselect=True, label="Select Companies" ) radio_aggregation = gr.Radio( choices=["Day", "Month", "Year"], value="Day", label="Aggregation Level" ) submit_btn = gr.Button("Submit") #data_table = gr.Dataframe(label="Data Preview", type="pandas") strategies_plot = gr.Plot(label="Strategies") prices_plot = gr.Plot(label="Prices") submit_btn.click( fn=show_company_data, inputs=[dropdown_companies, radio_aggregation], outputs=[strategies_plot, prices_plot] #data_table in caso da aggiungere dopo ) demo.launch()