Starberry15 commited on
Commit
c73dde2
·
verified ·
1 Parent(s): 34f26fc

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +7 -7
src/streamlit_app.py CHANGED
@@ -114,7 +114,7 @@ def fallback_clean(df: pd.DataFrame) -> pd.DataFrame:
114
  def ai_clean_dataset(df: pd.DataFrame) -> (pd.DataFrame, str):
115
  """Returns cleaned df and a status message"""
116
  if len(df) > 50:
117
- return df, "AI cleaning skipped: dataset has more than 50 rows."
118
  csv_text = df.to_csv(index=False)
119
  prompt = f"""
120
  You are a professional data cleaning assistant.
@@ -133,9 +133,9 @@ Dataset:
133
  cleaned_str = cleaned_str.replace("```csv", "").replace("```", "").replace("###", "").strip()
134
  cleaned_df = pd.read_csv(StringIO(cleaned_str), on_bad_lines="skip")
135
  cleaned_df.columns = [c.strip().replace(" ", "_").lower() for c in cleaned_df.columns]
136
- return cleaned_df, "AI cleaning completed successfully."
137
  except Exception as e:
138
- return df, f"AI cleaning failed: {str(e)}"
139
 
140
  # ======================================================
141
  # 🧩 DATA SUMMARY FOR TOKEN-EFFICIENT ANALYSIS
@@ -150,7 +150,7 @@ def summarize_for_analysis(df: pd.DataFrame, sample_rows=10) -> str:
150
  else:
151
  top = df[col].value_counts().head(3).to_dict()
152
  summary.append(f"- {col}: top_values={top}, non_null={non_null}")
153
- # Include a small sample
154
  sample = df.head(sample_rows).to_csv(index=False)
155
  summary.append("--- Sample Data ---")
156
  summary.append(sample)
@@ -181,13 +181,13 @@ Respond with:
181
  if ANALYST_MODEL == "Gemini 2.5 Flash (Google)":
182
  if GEMINI_API_KEY is None:
183
  return "⚠️ Gemini API key missing."
184
- response = genai.generate_text(
185
  model="gemini-2.5-flash",
186
- prompt=prompt,
187
  temperature=temperature,
188
  max_output_tokens=max_tokens
189
  )
190
- return getattr(response, "candidates", [{"content": "No response from Gemini."}])[0]["content"]
191
  else:
192
  return safe_hf_generate(hf_analyst_client, prompt, temperature=temperature, max_tokens=max_tokens)
193
  except Exception as e:
 
114
  def ai_clean_dataset(df: pd.DataFrame) -> (pd.DataFrame, str):
115
  """Returns cleaned df and a status message"""
116
  if len(df) > 50:
117
+ return df, "⚠️ AI cleaning skipped: dataset has more than 50 rows."
118
  csv_text = df.to_csv(index=False)
119
  prompt = f"""
120
  You are a professional data cleaning assistant.
 
133
  cleaned_str = cleaned_str.replace("```csv", "").replace("```", "").replace("###", "").strip()
134
  cleaned_df = pd.read_csv(StringIO(cleaned_str), on_bad_lines="skip")
135
  cleaned_df.columns = [c.strip().replace(" ", "_").lower() for c in cleaned_df.columns]
136
+ return cleaned_df, "AI cleaning completed successfully."
137
  except Exception as e:
138
+ return df, f"⚠️ AI cleaning failed: {str(e)}"
139
 
140
  # ======================================================
141
  # 🧩 DATA SUMMARY FOR TOKEN-EFFICIENT ANALYSIS
 
150
  else:
151
  top = df[col].value_counts().head(3).to_dict()
152
  summary.append(f"- {col}: top_values={top}, non_null={non_null}")
153
+ # Include a small sample for context
154
  sample = df.head(sample_rows).to_csv(index=False)
155
  summary.append("--- Sample Data ---")
156
  summary.append(sample)
 
181
  if ANALYST_MODEL == "Gemini 2.5 Flash (Google)":
182
  if GEMINI_API_KEY is None:
183
  return "⚠️ Gemini API key missing."
184
+ response = genai.models.generate(
185
  model="gemini-2.5-flash",
186
+ messages=[{"author": "user", "content": prompt}],
187
  temperature=temperature,
188
  max_output_tokens=max_tokens
189
  )
190
+ return response.candidates[0].content if response.candidates else "No response from Gemini."
191
  else:
192
  return safe_hf_generate(hf_analyst_client, prompt, temperature=temperature, max_tokens=max_tokens)
193
  except Exception as e: