Starberry15 commited on
Commit
c627e4b
Β·
verified Β·
1 Parent(s): 081976e

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +36 -31
src/streamlit_app.py CHANGED
@@ -13,8 +13,8 @@ from io import StringIO
13
  # βš™οΈ APP CONFIGURATION
14
  # ======================================================
15
  st.set_page_config(page_title="πŸ“Š Smart Data Analyst Pro", layout="wide")
16
- st.title("πŸ“Š Smart Data Analyst Pro")
17
- st.caption("AI that cleans, analyzes, and visualizes your data β€” Hugging Face + Gemini compatible.")
18
 
19
  # ======================================================
20
  # πŸ” Load Environment Variables
@@ -29,7 +29,7 @@ else:
29
  login(token=HF_TOKEN)
30
 
31
  if GEMINI_API_KEY:
32
- genai.api_key = GEMINI_API_KEY
33
  else:
34
  st.warning("⚠️ Gemini API key missing. Gemini 2.5 Flash will not work.")
35
 
@@ -51,6 +51,7 @@ with st.sidebar:
51
  ANALYST_MODEL = st.selectbox(
52
  "Select Analysis Model:",
53
  [
 
54
  "Qwen/Qwen2.5-14B-Instruct",
55
  "mistralai/Mistral-7B-Instruct-v0.3",
56
  "HuggingFaceH4/zephyr-7b-beta"
@@ -61,7 +62,6 @@ with st.sidebar:
61
  temperature = st.slider("Temperature", 0.0, 1.0, 0.3)
62
  max_tokens = st.slider("Max Tokens", 128, 4096, 1024)
63
 
64
- # Initialize HF clients
65
  hf_cleaner_client = InferenceClient(model=CLEANER_MODEL, token=HF_TOKEN)
66
  hf_analyst_client = None
67
  if ANALYST_MODEL != "Gemini 2.5 Flash (Google)":
@@ -80,7 +80,7 @@ def safe_hf_generate(client, prompt, temperature=0.3, max_tokens=512):
80
  )
81
  return resp.strip()
82
  except Exception as e:
83
- if "Supported task: conversational" in str(e) or "not supported" in str(e):
84
  chat_resp = client.chat_completion(
85
  messages=[{"role": "user", "content": prompt}],
86
  max_tokens=max_tokens,
@@ -91,7 +91,7 @@ def safe_hf_generate(client, prompt, temperature=0.3, max_tokens=512):
91
  raise e
92
 
93
  # ======================================================
94
- # 🧩 SMART DATA CLEANING
95
  # ======================================================
96
  def fallback_clean(df: pd.DataFrame) -> pd.DataFrame:
97
  df = df.copy()
@@ -109,7 +109,6 @@ def fallback_clean(df: pd.DataFrame) -> pd.DataFrame:
109
  return df
110
 
111
  def ai_clean_dataset(df: pd.DataFrame) -> (pd.DataFrame, str):
112
- """Returns cleaned df and a status message"""
113
  if len(df) > 50:
114
  return df, "⚠️ AI cleaning skipped: dataset has more than 50 rows."
115
  csv_text = df.to_csv(index=False)
@@ -135,7 +134,7 @@ Dataset:
135
  return df, f"⚠️ AI cleaning failed: {str(e)}"
136
 
137
  # ======================================================
138
- # 🧩 DATA SUMMARY FOR TOKEN-EFFICIENT ANALYSIS
139
  # ======================================================
140
  def summarize_for_analysis(df: pd.DataFrame, sample_rows=10) -> str:
141
  summary = [f"Rows: {len(df)}, Columns: {len(df.columns)}"]
@@ -147,14 +146,13 @@ def summarize_for_analysis(df: pd.DataFrame, sample_rows=10) -> str:
147
  else:
148
  top = df[col].value_counts().head(3).to_dict()
149
  summary.append(f"- {col}: top_values={top}, non_null={non_null}")
150
- # Include a small sample for context
151
  sample = df.head(sample_rows).to_csv(index=False)
152
  summary.append("--- Sample Data ---")
153
  summary.append(sample)
154
  return "\n".join(summary)
155
 
156
  # ======================================================
157
- # 🧩 DATA ANALYSIS
158
  # ======================================================
159
  def query_analysis_model(df: pd.DataFrame, user_query: str, dataset_name: str) -> str:
160
  prompt_summary = summarize_for_analysis(df)
@@ -176,44 +174,51 @@ Respond with:
176
  """
177
  try:
178
  if ANALYST_MODEL == "Gemini 2.5 Flash (Google)":
179
- if GEMINI_API_KEY is None:
180
- return "⚠️ Gemini API key missing."
181
- response = genai.models.generate(
182
- model="gemini-2.5-flash",
183
- messages=[{"author": "user", "content": prompt}],
184
- temperature=temperature,
185
- max_output_tokens=max_tokens
186
  )
187
- return response.candidates[0].content if response.candidates else "No response from Gemini."
188
  else:
189
  return safe_hf_generate(hf_analyst_client, prompt, temperature=temperature, max_tokens=max_tokens)
190
  except Exception as e:
191
  return f"⚠️ Analysis failed: {str(e)}"
192
 
193
  # ======================================================
194
- # πŸš€ MAIN APP LOGIC
195
  # ======================================================
196
  uploaded = st.file_uploader("πŸ“Ž Upload CSV or Excel file", type=["csv", "xlsx"])
 
 
197
 
198
  if uploaded:
199
  df = pd.read_csv(uploaded) if uploaded.name.endswith(".csv") else pd.read_excel(uploaded)
200
 
201
- with st.spinner("🧼 AI Cleaning your dataset..."):
202
  cleaned_df, cleaning_status = ai_clean_dataset(df)
203
 
204
- st.subheader("βœ… Data Cleaning Status")
205
  st.info(cleaning_status)
206
-
207
  st.subheader("πŸ“Š Dataset Preview")
208
  st.dataframe(cleaned_df.head(), use_container_width=True)
209
 
210
- st.subheader("πŸ’¬ Ask AI About Your Data")
211
- user_query = st.text_area("Enter your question:", placeholder="e.g. What factors influence sales the most?")
212
- if st.button("Analyze with AI", use_container_width=True) and user_query:
213
- with st.spinner("πŸ€– Interpreting data..."):
214
- # Analyst can work with original or cleaned dataset
215
- result = query_analysis_model(cleaned_df, user_query, uploaded.name)
216
- st.markdown("### πŸ’‘ Insights")
217
- st.markdown(result)
 
 
 
 
 
 
 
218
  else:
219
- st.info("πŸ“₯ Upload a dataset to begin smart analysis.")
 
13
  # βš™οΈ APP CONFIGURATION
14
  # ======================================================
15
  st.set_page_config(page_title="πŸ“Š Smart Data Analyst Pro", layout="wide")
16
+ st.title("πŸ“Š Smart Data Analyst Pro (Chat Mode)")
17
+ st.caption("Chat with your dataset β€” AI cleans, analyzes, and visualizes data. Hugging Face + Gemini compatible.")
18
 
19
  # ======================================================
20
  # πŸ” Load Environment Variables
 
29
  login(token=HF_TOKEN)
30
 
31
  if GEMINI_API_KEY:
32
+ genai.configure(api_key=GEMINI_API_KEY)
33
  else:
34
  st.warning("⚠️ Gemini API key missing. Gemini 2.5 Flash will not work.")
35
 
 
51
  ANALYST_MODEL = st.selectbox(
52
  "Select Analysis Model:",
53
  [
54
+ "Gemini 2.5 Flash (Google)",
55
  "Qwen/Qwen2.5-14B-Instruct",
56
  "mistralai/Mistral-7B-Instruct-v0.3",
57
  "HuggingFaceH4/zephyr-7b-beta"
 
62
  temperature = st.slider("Temperature", 0.0, 1.0, 0.3)
63
  max_tokens = st.slider("Max Tokens", 128, 4096, 1024)
64
 
 
65
  hf_cleaner_client = InferenceClient(model=CLEANER_MODEL, token=HF_TOKEN)
66
  hf_analyst_client = None
67
  if ANALYST_MODEL != "Gemini 2.5 Flash (Google)":
 
80
  )
81
  return resp.strip()
82
  except Exception as e:
83
+ if "Supported task: conversational" in str(e):
84
  chat_resp = client.chat_completion(
85
  messages=[{"role": "user", "content": prompt}],
86
  max_tokens=max_tokens,
 
91
  raise e
92
 
93
  # ======================================================
94
+ # 🧩 DATA CLEANING
95
  # ======================================================
96
  def fallback_clean(df: pd.DataFrame) -> pd.DataFrame:
97
  df = df.copy()
 
109
  return df
110
 
111
  def ai_clean_dataset(df: pd.DataFrame) -> (pd.DataFrame, str):
 
112
  if len(df) > 50:
113
  return df, "⚠️ AI cleaning skipped: dataset has more than 50 rows."
114
  csv_text = df.to_csv(index=False)
 
134
  return df, f"⚠️ AI cleaning failed: {str(e)}"
135
 
136
  # ======================================================
137
+ # 🧩 DATA SUMMARY (Token-efficient)
138
  # ======================================================
139
  def summarize_for_analysis(df: pd.DataFrame, sample_rows=10) -> str:
140
  summary = [f"Rows: {len(df)}, Columns: {len(df.columns)}"]
 
146
  else:
147
  top = df[col].value_counts().head(3).to_dict()
148
  summary.append(f"- {col}: top_values={top}, non_null={non_null}")
 
149
  sample = df.head(sample_rows).to_csv(index=False)
150
  summary.append("--- Sample Data ---")
151
  summary.append(sample)
152
  return "\n".join(summary)
153
 
154
  # ======================================================
155
+ # 🧠 ANALYSIS FUNCTION
156
  # ======================================================
157
  def query_analysis_model(df: pd.DataFrame, user_query: str, dataset_name: str) -> str:
158
  prompt_summary = summarize_for_analysis(df)
 
174
  """
175
  try:
176
  if ANALYST_MODEL == "Gemini 2.5 Flash (Google)":
177
+ response = genai.GenerativeModel("gemini-2.5-flash").generate_content(
178
+ prompt,
179
+ generation_config={
180
+ "temperature": temperature,
181
+ "max_output_tokens": max_tokens
182
+ }
 
183
  )
184
+ return response.text if hasattr(response, "text") else "No valid text response."
185
  else:
186
  return safe_hf_generate(hf_analyst_client, prompt, temperature=temperature, max_tokens=max_tokens)
187
  except Exception as e:
188
  return f"⚠️ Analysis failed: {str(e)}"
189
 
190
  # ======================================================
191
+ # πŸš€ MAIN CHATBOT LOGIC
192
  # ======================================================
193
  uploaded = st.file_uploader("πŸ“Ž Upload CSV or Excel file", type=["csv", "xlsx"])
194
+ if "messages" not in st.session_state:
195
+ st.session_state.messages = []
196
 
197
  if uploaded:
198
  df = pd.read_csv(uploaded) if uploaded.name.endswith(".csv") else pd.read_excel(uploaded)
199
 
200
+ with st.spinner("🧼 Cleaning your dataset..."):
201
  cleaned_df, cleaning_status = ai_clean_dataset(df)
202
 
203
+ st.subheader("βœ… Cleaning Status")
204
  st.info(cleaning_status)
 
205
  st.subheader("πŸ“Š Dataset Preview")
206
  st.dataframe(cleaned_df.head(), use_container_width=True)
207
 
208
+ st.subheader("πŸ’¬ Chat with Your Dataset")
209
+ for msg in st.session_state.messages:
210
+ with st.chat_message(msg["role"]):
211
+ st.markdown(msg["content"])
212
+
213
+ if user_query := st.chat_input("Ask something about your dataset..."):
214
+ st.session_state.messages.append({"role": "user", "content": user_query})
215
+ with st.chat_message("user"):
216
+ st.markdown(user_query)
217
+
218
+ with st.chat_message("assistant"):
219
+ with st.spinner("πŸ€– Analyzing..."):
220
+ result = query_analysis_model(cleaned_df, user_query, uploaded.name)
221
+ st.markdown(result)
222
+ st.session_state.messages.append({"role": "assistant", "content": result})
223
  else:
224
+ st.info("πŸ“₯ Upload a dataset to begin chatting with your AI analyst.")