Spaces:

DrishtiSharma
/

sql-rag

Sleeping

App Files Files Community

DrishtiSharma commited on Jan 14

Commit

7fac3e3

verified ·

1 Parent(s): 5d45f1f

Update dummy_funcs.py

Browse files

Files changed (1) hide show

dummy_funcs.py +137 -0

dummy_funcs.py CHANGED Viewed

@@ -214,3 +214,140 @@ def handle_visualization_suggestions(suggestions, df):
     # Display all generated visualizations
     for fig in visualizations:
         st.plotly_chart(fig, use_container_width=True)

     # Display all generated visualizations
     for fig in visualizations:
         st.plotly_chart(fig, use_container_width=True)
+-----------------
+def ask_gpt4o_for_visualization(query, df, llm, retries=2):
+    import json
+    # Identify numeric and categorical columns
+    numeric_columns = df.select_dtypes(include='number').columns.tolist()
+    categorical_columns = df.select_dtypes(exclude='number').columns.tolist()
+    # Enhanced Prompt with More Examples
+    prompt = f"""
+    Analyze the following query and suggest the most suitable visualization(s) using the dataset.
+    **Query:** "{query}"
+    **Numeric Columns (for Y-axis):** {', '.join(numeric_columns) if numeric_columns else 'None'}
+    **Categorical Columns (for X-axis or grouping):** {', '.join(categorical_columns) if categorical_columns else 'None'}
+    Suggest visualizations in this exact JSON format:
+    [
+      {{
+        "chart_type": "bar/box/line/scatter/pie/heatmap",
+        "x_axis": "categorical_or_time_column",
+        "y_axis": "numeric_column",
+        "group_by": "optional_column_for_grouping",
+        "title": "Title of the chart",
+        "description": "Why this chart is suitable"
+      }}
+    ]
+    **Examples:**
+    - For salary distribution:
+      {{
+        "chart_type": "box",
+        "x_axis": "job_title",
+        "y_axis": "salary_in_usd",
+        "group_by": "experience_level",
+        "title": "Salary Distribution by Job Title and Experience",
+        "description": "A box plot showing salary ranges across job titles and experience levels."
+      }}
+    - For company size comparison:
+      {{
+        "chart_type": "bar",
+        "x_axis": "company_size",
+        "y_axis": "salary_in_usd",
+        "group_by": null,
+        "title": "Average Salary by Company Size",
+        "description": "A bar chart comparing the average salaries across different company sizes."
+      }}
+    - For revenue trends over time:
+      {{
+        "chart_type": "line",
+        "x_axis": "year",
+        "y_axis": "revenue",
+        "group_by": null,
+        "title": "Revenue Growth Over Years",
+        "description": "A line chart showing the trend of revenue over the years."
+      }}
+    - For market share breakdown:
+      {{
+        "chart_type": "pie",
+        "x_axis": "market_segment",
+        "y_axis": null,
+        "group_by": null,
+        "title": "Market Share by Segment",
+        "description": "A pie chart showing the distribution of market share across various segments."
+      }}
+    - For correlation analysis:
+      {{
+        "chart_type": "scatter",
+        "x_axis": "years_of_experience",
+        "y_axis": "salary_in_usd",
+        "group_by": "job_title",
+        "title": "Experience vs Salary by Job Title",
+        "description": "A scatter plot showing the relationship between years of experience and salary across job titles."
+      }}
+    - For data density:
+      {{
+        "chart_type": "heatmap",
+        "x_axis": "department",
+        "y_axis": "region",
+        "group_by": null,
+        "title": "Employee Distribution by Department and Region",
+        "description": "A heatmap showing the concentration of employees across departments and regions."
+      }}
+    Only suggest visualizations that make sense for the data and the query.
+    """
+    for attempt in range(retries + 1):
+        try:
+            # Generate response from the model
+            response = llm.generate(prompt)
+            # Load JSON response
+            suggestions = json.loads(response)
+            # Validate response structure
+            if isinstance(suggestions, list):
+                valid_suggestions = [
+                    s for s in suggestions if all(k in s for k in ["chart_type", "x_axis", "y_axis"])
+                ]
+                if valid_suggestions:
+                    return valid_suggestions
+                else:
+                    st.warning("⚠️ GPT-4o did not suggest valid visualizations.")
+                    return None
+            elif isinstance(suggestions, dict):
+                if all(k in suggestions for k in ["chart_type", "x_axis", "y_axis"]):
+                    return [suggestions]
+                else:
+                    st.warning("⚠️ GPT-4o's suggestion is incomplete.")
+                    return None
+        except json.JSONDecodeError:
+            st.warning(f"⚠️ Attempt {attempt + 1}: GPT-4o returned invalid JSON.")
+        except Exception as e:
+            st.error(f"⚠️ Error during GPT-4o call: {e}")
+        # Retry if necessary
+        if attempt < retries:
+            st.info("🔄 Retrying visualization suggestion...")
+    st.error("❌ Failed to generate a valid visualization after multiple attempts.")
+    return None