Spaces:

Ekimetrics
/

climate-question-answering

Running

App Files Files Community

timeki

armanddemasson commited on Apr 23

Commit

e9c2c00

1 Parent(s): de89cf7

feature/drias_parallelization (#25)

Browse files

- log to huggingface (f9c4c84a71d320c6db05ee099b9e35492ba7b184)
- Merged in feat/logs_on_huggingface (pull request #5) (261632833e7d939c321f208edfd6576e68947b4b)
- feat: added multithreading to run sql queries in talk to drias (705ccece7775c65a9c7b73b091cdddbc4246f2e7)
- chore: remove prints in talk to drias workflow (a967134f90c70d87bb3d786f2feb81f2e56fdb9f)
- Merged in feat/improve_drias_exeuction_time (pull request #6) (7c38528636ac19c1efa240a122e523ed0c34706a)
- fix import (05b8df9c9b74926459da70797b6852ff07a4d838)
- Merge branch 'main' into dev (8fb231c8beabf8a6406f05cf4cac564c5d81c7ce)
- Merged in dev (pull request #7) (6b9f71b1cf216eef0fd7973412f675d8633a5f4a)
- fix import (b35df2a8160723e43f74a040aa94983069066213)
- Merge branch 'main' of https://bitbucket.org/ekimetrics/climate_qa (f96cfd0715ec2b1ed7a78775ea7f8722f5793d8f)

Co-authored-by: Armand Demasson <armanddemasson@users.noreply.huggingface.co>

Files changed (11) hide show

climateqa/chat.py +5 -54
climateqa/engine/talk_to_data/main.py +5 -2
climateqa/engine/talk_to_data/sql_query.py +3 -2
climateqa/engine/talk_to_data/{workflow.py → talk_to_drias.py} +126 -96
climateqa/handle_stream_events.py +1 -1
climateqa/logging.py +194 -0
data/drias/drias.db +0 -3
front/tabs/chat_interface.py +1 -1
front/tabs/tab_drias.py +6 -31
front/utils.py +0 -11
requirements.txt +1 -0

climateqa/chat.py CHANGED Viewed

@@ -12,15 +12,11 @@ from .handle_stream_events import (
     convert_to_docs_to_html,
     stream_answer,
     handle_retrieved_owid_graphs,
-    serialize_docs,
 )
-# Function to log data on Azure
-def log_on_azure(file, logs, share_client):
-    logs = json.dumps(logs)
-    file_client = share_client.get_file_client(file)
-    file_client.upload_file(logs)
 # Chat functions
 def start_chat(query, history, search_only):
     history = history + [ChatMessage(role="user", content=query)]
@@ -32,28 +28,6 @@ def start_chat(query, history, search_only):
 def finish_chat():
     return gr.update(interactive=True, value="")
-def log_interaction_to_azure(history, output_query, sources, docs, share_client, user_id):
-    try:
-        # Log interaction to Azure if not in local environment
-        if os.getenv("GRADIO_ENV") != "local":
-            timestamp = str(datetime.now().timestamp())
-            prompt = history[1]["content"]
-            logs = {
-                "user_id": str(user_id),
-                "prompt": prompt,
-                "query": prompt,
-                "question": output_query,
-                "sources": sources,
-                "docs": serialize_docs(docs),
-                "answer": history[-1].content,
-                "time": timestamp,
-            }
-            log_on_azure(f"{timestamp}.json", logs, share_client)
-    except Exception as e:
-        print(f"Error logging on Azure Blob Storage: {e}")
-        error_msg = f"ClimateQ&A Error: {str(e)[:100]} - The error has been noted, try another question and if the error remains, you can contact us :)"
-        raise gr.Error(error_msg)
 def handle_numerical_data(event):
     if event["name"] == "retrieve_drias_data" and event["event"] == "on_chain_end":
         numerical_data = event["data"]["output"]["drias_data"]
@@ -61,27 +35,6 @@ def handle_numerical_data(event):
         return numerical_data, sql_query
     return None, None
-def log_drias_interaction_to_azure(query, sql_query, data, share_client, user_id):
-    try:
-        # Log interaction to Azure if not in local environment
-        if os.getenv("GRADIO_ENV") != "local":
-            timestamp = str(datetime.now().timestamp())
-            logs = {
-                "user_id": str(user_id),
-                "query": query,
-                "sql_query": sql_query,
-                # "data": data.to_dict() if data is not None else None,
-                "time": timestamp,
-            }
-            log_on_azure(f"drias_{timestamp}.json", logs, share_client)
-            print(f"Logged Drias interaction to Azure Blob Storage: {logs}")
-        else:
-            print("share_client or user_id is None, or GRADIO_ENV is local")
-    except Exception as e:
-        print(f"Error logging Drias interaction on Azure Blob Storage: {e}")
-        error_msg = f"Drias Error: {str(e)[:100]} - The error has been noted, try another question and if the error remains, you can contact us :)"
-        raise gr.Error(error_msg)
 # Main chat function
 async def chat_stream(
     agent : CompiledStateGraph,
@@ -235,9 +188,7 @@ async def chat_stream(
         print(f"Event {event} has failed")
         raise gr.Error(str(e))
     # Call the function to log interaction
-    log_interaction_to_azure(history, output_query, sources, docs, share_client, user_id)
     yield history, docs_html, output_query, output_language, related_contents, graphs_html, follow_up_examples#, vanna_data

     convert_to_docs_to_html,
     stream_answer,
     handle_retrieved_owid_graphs,
 )
+from .logging import (
+    log_interaction_to_huggingface
+)
 # Chat functions
 def start_chat(query, history, search_only):
     history = history + [ChatMessage(role="user", content=query)]
 def finish_chat():
     return gr.update(interactive=True, value="")
 def handle_numerical_data(event):
     if event["name"] == "retrieve_drias_data" and event["event"] == "on_chain_end":
         numerical_data = event["data"]["output"]["drias_data"]
         return numerical_data, sql_query
     return None, None
 # Main chat function
 async def chat_stream(
     agent : CompiledStateGraph,
         print(f"Event {event} has failed")
         raise gr.Error(str(e))
     # Call the function to log interaction
+    log_interaction_to_huggingface(history, output_query, sources, docs, share_client, user_id)
     yield history, docs_html, output_query, output_language, related_contents, graphs_html, follow_up_examples#, vanna_data

climateqa/engine/talk_to_data/main.py CHANGED Viewed

@@ -1,5 +1,6 @@
-from climateqa.engine.talk_to_data.workflow import drias_workflow
 from climateqa.engine.llm import get_llm
 import ast
 llm = get_llm(provider="openai")
@@ -37,7 +38,7 @@ def ask_llm_column_names(sql_query: str, llm) -> list[str]:
     columns_list = ast.literal_eval(columns.strip("```python\n").strip())
     return columns_list
-async def ask_drias(query: str, index_state: int = 0) -> tuple:
     """Main function to process a DRIAS query and return results.
     This function orchestrates the DRIAS workflow, processing a user query to generate
@@ -85,6 +86,8 @@ async def ask_drias(query: str, index_state: int = 0) -> tuple:
     sql_query = sql_queries[index_state]
     dataframe = result_dataframes[index_state]
     figure = figures[index_state](dataframe)
     return sql_query, dataframe, figure, sql_queries, result_dataframes, figures, index_state, table_list, ""

+from climateqa.engine.talk_to_data.talk_to_drias import drias_workflow
 from climateqa.engine.llm import get_llm
+from climateqa.logging import log_drias_interaction_to_huggingface
 import ast
 llm = get_llm(provider="openai")
     columns_list = ast.literal_eval(columns.strip("```python\n").strip())
     return columns_list
+async def ask_drias(query: str, index_state: int = 0, user_id: str = None) -> tuple:
     """Main function to process a DRIAS query and return results.
     This function orchestrates the DRIAS workflow, processing a user query to generate
     sql_query = sql_queries[index_state]
     dataframe = result_dataframes[index_state]
     figure = figures[index_state](dataframe)
+    log_drias_interaction_to_huggingface(query, sql_query, user_id)
     return sql_query, dataframe, figure, sql_queries, result_dataframes, figures, index_state, table_list, ""

climateqa/engine/talk_to_data/sql_query.py CHANGED Viewed

@@ -22,9 +22,10 @@ async def execute_sql_query(sql_query: str) -> pd.DataFrame:
     """
     def _execute_query():
         # Execute the query
-        results = duckdb.sql(sql_query)
         # return fetched data
-        return results.fetchdf()
     # Run the query in a thread pool to avoid blocking
     loop = asyncio.get_event_loop()

     """
     def _execute_query():
         # Execute the query
+        con = duckdb.connect()
+        results = con.sql(sql_query).fetchdf()
         # return fetched data
+        return results
     # Run the query in a thread pool to avoid blocking
     loop = asyncio.get_event_loop()

climateqa/engine/talk_to_data/{workflow.py → talk_to_drias.py} RENAMED Viewed

@@ -1,10 +1,12 @@
 import os
 from typing import Any, Callable, TypedDict, Optional
 import pandas as pd
 from plotly.graph_objects import Figure
 from climateqa.engine.llm import get_llm
 from climateqa.engine.talk_to_data.config import INDICATOR_COLUMNS_PER_TABLE
 from climateqa.engine.talk_to_data.plot import PLOTS, Plot
 from climateqa.engine.talk_to_data.sql_query import execute_sql_query
@@ -17,6 +19,7 @@ from climateqa.engine.talk_to_data.utils import (
     detect_relevant_tables,
 )
 ROOT_PATH = os.path.dirname(os.path.dirname(os.getcwd()))
 class TableState(TypedDict):
@@ -61,101 +64,6 @@ class State(TypedDict):
     plot_states: dict[str, PlotState]
     error: Optional[str]
-async def drias_workflow(user_input: str) -> State:
-    """Performs the complete workflow of Talk To Drias : from user input to sql queries, dataframes and figures generated
-    Args:
-        user_input (str): initial user input
-    Returns:
-        State: Final state with all the results
-    """
-    state: State = {
-        'user_input': user_input,
-        'plots': [],
-        'plot_states': {}
-    }
-    llm = get_llm(provider="openai")
-    plots = await find_relevant_plots(state, llm)
-    state['plots'] = plots
-    if not state['plots']:
-        state['error'] = 'There is no plot to answer to the question'
-        return state
-    have_relevant_table = False
-    have_sql_query = False
-    have_dataframe = False
-    for plot_name in state['plots']:
-        plot = next((p for p in PLOTS if p['name'] == plot_name), None) # Find the associated plot object
-        if plot is None:
-            continue
-        plot_state: PlotState = {
-            'plot_name': plot_name,
-            'tables': [],
-            'table_states': {}
-        }
-        plot_state['plot_name'] = plot_name
-        relevant_tables = await find_relevant_tables_per_plot(state, plot, llm)
-        if len(relevant_tables) > 0 :
-            have_relevant_table = True
-        plot_state['tables'] = relevant_tables
-        params = {}
-        for param_name in plot['params']:
-            param = await find_param(state, param_name, relevant_tables[0])
-            if param:
-                params.update(param)
-        for n, table in enumerate(plot_state['tables']):
-            if n > 2:
-                break
-            table_state: TableState = {
-                'table_name': table,
-                'params': params,
-                'status': 'OK'
-            }
-            table_state["params"]['indicator_column'] = find_indicator_column(table)
-            sql_query = plot['sql_query'](table, table_state['params'])
-            if sql_query == "":
-                table_state['status'] = 'ERROR'
-                continue
-            else :
-                have_sql_query = True
-            table_state['sql_query'] = sql_query
-            df = await execute_sql_query(sql_query)
-            if len(df) > 0:
-                have_dataframe = True
-            figure = plot['plot_function'](table_state['params'])
-            table_state['dataframe'] = df
-            table_state['figure'] = figure
-            plot_state['table_states'][table] = table_state
-        state['plot_states'][plot_name] = plot_state
-    if not have_relevant_table:
-        state['error'] = "There is no relevant table in the our database to answer your question"
-    elif not have_sql_query:
-        state['error'] = "There is no relevant sql query on our database that can help to answer your question"
-    elif not have_dataframe:
-        state['error'] = "There is no data in our table that can answer to your question"
-    return state
 async def find_relevant_plots(state: State, llm) -> list[str]:
     print("---- Find relevant plots ----")
     relevant_plots = await detect_relevant_plots(state['user_input'], llm)
@@ -238,6 +146,128 @@ def find_indicator_column(table: str) -> str:
     return INDICATOR_COLUMNS_PER_TABLE[table]
 # def make_write_query_node():
 #     def write_query(state):

 import os
 from typing import Any, Callable, TypedDict, Optional
+from numpy import sort
 import pandas as pd
+import asyncio
 from plotly.graph_objects import Figure
 from climateqa.engine.llm import get_llm
+from climateqa.engine.talk_to_data import sql_query
 from climateqa.engine.talk_to_data.config import INDICATOR_COLUMNS_PER_TABLE
 from climateqa.engine.talk_to_data.plot import PLOTS, Plot
 from climateqa.engine.talk_to_data.sql_query import execute_sql_query
     detect_relevant_tables,
 )
 ROOT_PATH = os.path.dirname(os.path.dirname(os.getcwd()))
 class TableState(TypedDict):
     plot_states: dict[str, PlotState]
     error: Optional[str]
 async def find_relevant_plots(state: State, llm) -> list[str]:
     print("---- Find relevant plots ----")
     relevant_plots = await detect_relevant_plots(state['user_input'], llm)
     return INDICATOR_COLUMNS_PER_TABLE[table]
+async def process_table(
+    table: str,
+    params: dict[str, Any],
+    plot: Plot,
+) -> TableState:
+    """Processes a table to extract relevant data and generate visualizations.
+    This function retrieves the SQL query for the specified table, executes it,
+    and generates a visualization based on the results.
+    Args:
+        table (str): The name of the table to process
+        params (dict[str, Any]): Parameters used for querying the table
+        plot (Plot): The plot object containing SQL query and visualization function
+    Returns:
+        TableState: The state of the processed table
+    """
+    table_state: TableState = {
+        'table_name': table,
+        'params': params.copy(),
+        'status': 'OK',
+        'dataframe': None,
+        'sql_query': None,
+        'figure': None
+    }
+    table_state['params']['indicator_column'] = find_indicator_column(table)
+    sql_query = plot['sql_query'](table, table_state['params'])
+    if sql_query == "":
+        table_state['status'] = 'ERROR'
+        return table_state
+    table_state['sql_query'] = sql_query
+    df = await execute_sql_query(sql_query)
+    table_state['dataframe'] = df
+    table_state['figure'] = plot['plot_function'](table_state['params'])
+    return table_state
+async def drias_workflow(user_input: str) -> State:
+    """Performs the complete workflow of Talk To Drias : from user input to sql queries, dataframes and figures generated
+    Args:
+        user_input (str): initial user input
+    Returns:
+        State: Final state with all the results
+    """
+    state: State = {
+        'user_input': user_input,
+        'plots': [],
+        'plot_states': {},
+        'error': ''
+    }
+    llm = get_llm(provider="openai")
+    plots = await find_relevant_plots(state, llm)
+    state['plots'] = plots
+    if len(state['plots']) < 1:
+        state['error'] = 'There is no plot to answer to the question'
+        return state
+    have_relevant_table = False
+    have_sql_query = False
+    have_dataframe = False
+    for plot_name in state['plots']:
+        plot = next((p for p in PLOTS if p['name'] == plot_name), None) # Find the associated plot object
+        if plot is None:
+            continue
+        plot_state: PlotState = {
+            'plot_name': plot_name,
+            'tables': [],
+            'table_states': {}
+        }
+        plot_state['plot_name'] = plot_name
+        relevant_tables = await find_relevant_tables_per_plot(state, plot, llm)
+        if len(relevant_tables) > 0 :
+            have_relevant_table = True
+        plot_state['tables'] = relevant_tables
+        params = {}
+        for param_name in plot['params']:
+            param = await find_param(state, param_name, relevant_tables[0])
+            if param:
+                params.update(param)
+        tasks = [process_table(table, params, plot) for table in plot_state['tables'][:3]]
+        results = await asyncio.gather(*tasks)
+        # Store results back in plot_state
+        have_dataframe = False
+        have_sql_query = False
+        for table_state in results:
+            if table_state['sql_query']:
+                have_sql_query = True
+            if table_state['dataframe'] is not None and len(table_state['dataframe']) > 0:
+                have_dataframe = True
+            plot_state['table_states'][table_state['table_name']] = table_state
+        state['plot_states'][plot_name] = plot_state
+    if not have_relevant_table:
+        state['error'] = "There is no relevant table in our database to answer your question"
+    elif not have_sql_query:
+        state['error'] = "There is no relevant sql query on our database that can help to answer your question"
+    elif not have_dataframe:
+        state['error'] = "There is no data in our table that can answer to your question"
+    return state
 # def make_write_query_node():
 #     def write_query(state):

climateqa/handle_stream_events.py CHANGED Viewed

@@ -1,7 +1,7 @@
 from langchain_core.runnables.schema import StreamEvent
 from gradio import ChatMessage
 from climateqa.engine.chains.prompts import audience_prompts
-from front.utils import make_html_source,parse_output_llm_with_sources,serialize_docs,make_toolbox,generate_html_graphs
 import numpy as np
 def init_audience(audience :str) -> str:

 from langchain_core.runnables.schema import StreamEvent
 from gradio import ChatMessage
 from climateqa.engine.chains.prompts import audience_prompts
+from front.utils import make_html_source,parse_output_llm_with_sources
 import numpy as np
 def init_audience(audience :str) -> str:

climateqa/logging.py ADDED Viewed

	@@ -0,0 +1,194 @@

+import os
+from datetime import datetime
+import json
+from huggingface_hub import HfApi
+import gradio as gr
+import csv
+def serialize_docs(docs:list)->list:
+    new_docs = []
+    for doc in docs:
+        new_doc = {}
+        new_doc["page_content"] = doc.page_content
+        new_doc["metadata"] = doc.metadata
+        new_docs.append(new_doc)
+    return new_docs
+## AZURE LOGGING - DEPRECATED
+# def log_on_azure(file, logs, share_client):
+#     """Log data to Azure Blob Storage.
+#     Args:
+#         file (str): Name of the file to store logs
+#         logs (dict): Log data to store
+#         share_client: Azure share client instance
+#     """
+#     logs = json.dumps(logs)
+#     file_client = share_client.get_file_client(file)
+#     file_client.upload_file(logs)
+# def log_interaction_to_azure(history, output_query, sources, docs, share_client, user_id):
+#     """Log chat interaction to Azure and Hugging Face.
+#     Args:
+#         history (list): Chat message history
+#         output_query (str): Processed query
+#         sources (list): Knowledge base sources used
+#         docs (list): Retrieved documents
+#         share_client: Azure share client instance
+#         user_id (str): User identifier
+#     """
+#     try:
+#         # Log interaction to Azure if not in local environment
+#         if os.getenv("GRADIO_ENV") != "local":
+#             timestamp = str(datetime.now().timestamp())
+#             prompt = history[1]["content"]
+#             logs = {
+#                 "user_id": str(user_id),
+#                 "prompt": prompt,
+#                 "query": prompt,
+#                 "question": output_query,
+#                 "sources": sources,
+#                 "docs": serialize_docs(docs),
+#                 "answer": history[-1].content,
+#                 "time": timestamp,
+#             }
+#             # Log to Azure
+#             log_on_azure(f"{timestamp}.json", logs, share_client)
+#     except Exception as e:
+#         print(f"Error logging on Azure Blob Storage: {e}")
+#         error_msg = f"ClimateQ&A Error: {str(e)[:100]} - The error has been noted, try another question and if the error remains, you can contact us :)"
+#         raise gr.Error(error_msg)
+# def log_drias_interaction_to_azure(query, sql_query, data, share_client, user_id):
+#     """Log Drias data interaction to Azure and Hugging Face.
+#     Args:
+#         query (str): User query
+#         sql_query (str): SQL query used
+#         data: Retrieved data
+#         share_client: Azure share client instance
+#         user_id (str): User identifier
+#     """
+#     try:
+#         # Log interaction to Azure if not in local environment
+#         if os.getenv("GRADIO_ENV") != "local":
+#             timestamp = str(datetime.now().timestamp())
+#             logs = {
+#                 "user_id": str(user_id),
+#                 "query": query,
+#                 "sql_query": sql_query,
+#                 "time": timestamp,
+#             }
+#             log_on_azure(f"drias_{timestamp}.json", logs, share_client)
+#             print(f"Logged Drias interaction to Azure Blob Storage: {logs}")
+#         else:
+#             print("share_client or user_id is None, or GRADIO_ENV is local")
+#     except Exception as e:
+#         print(f"Error logging Drias interaction on Azure Blob Storage: {e}")
+#         error_msg = f"Drias Error: {str(e)[:100]} - The error has been noted, try another question and if the error remains, you can contact us :)"
+#         raise gr.Error(error_msg)
+## HUGGING FACE LOGGING
+def log_on_huggingface(log_filename, logs):
+    """Log data to Hugging Face dataset repository.
+    Args:
+        log_filename (str): Name of the file to store logs
+        logs (dict): Log data to store
+    """
+    try:
+        # Get Hugging Face token from environment
+        hf_token = os.getenv("HF_LOGS_TOKEN")
+        if not hf_token:
+            print("HF_LOGS_TOKEN not found in environment variables")
+            return
+        # Get repository name from environment or use default
+        repo_id = os.getenv("HF_DATASET_REPO", "timeki/climateqa_logs")
+        # Initialize HfApi
+        api = HfApi(token=hf_token)
+        # Add timestamp to the log data
+        logs["timestamp"] = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
+        # Convert logs to JSON string
+        logs_json = json.dumps(logs)
+        # Upload directly from memory
+        api.upload_file(
+            path_or_fileobj=logs_json.encode('utf-8'),
+            path_in_repo=log_filename,
+            repo_id=repo_id,
+            repo_type="dataset"
+        )
+    except Exception as e:
+        print(f"Error logging to Hugging Face: {e}")
+def log_interaction_to_huggingface(history, output_query, sources, docs, share_client, user_id):
+    """Log chat interaction to Hugging Face.
+    Args:
+        history (list): Chat message history
+        output_query (str): Processed query
+        sources (list): Knowledge base sources used
+        docs (list): Retrieved documents
+        share_client: Azure share client instance (unused in this function)
+        user_id (str): User identifier
+    """
+    try:
+        # Log interaction if not in local environment
+        if os.getenv("GRADIO_ENV") != "local":
+            timestamp = str(datetime.now().timestamp())
+            prompt = history[1]["content"]
+            logs = {
+                "user_id": str(user_id),
+                "prompt": prompt,
+                "query": prompt,
+                "question": output_query,
+                "sources": sources,
+                "docs": serialize_docs(docs),
+                "answer": history[-1].content,
+                "time": timestamp,
+            }
+            # Log to Hugging Face
+            log_on_huggingface(f"chat/{timestamp}.json", logs)
+    except Exception as e:
+        print(f"Error logging to Hugging Face: {e}")
+        error_msg = f"ClimateQ&A Error: {str(e)[:100]} - The error has been noted, try another question and if the error remains, you can contact us :)"
+        raise gr.Error(error_msg)
+def log_drias_interaction_to_huggingface(query, sql_query, user_id):
+    """Log Drias data interaction to Hugging Face.
+    Args:
+        query (str): User query
+        sql_query (str): SQL query used
+        data: Retrieved data
+        user_id (str): User identifier
+    """
+    try:
+        if os.getenv("GRADIO_ENV") != "local":
+            timestamp = str(datetime.now().timestamp())
+            logs = {
+                "user_id": str(user_id),
+                "query": query,
+                "sql_query": sql_query,
+                "time": timestamp,
+            }
+            log_on_huggingface(f"drias/drias_{timestamp}.json", logs)
+            print(f"Logged Drias interaction to Hugging Face: {logs}")
+        else:
+            print("share_client or user_id is None, or GRADIO_ENV is local")
+    except Exception as e:
+        print(f"Error logging Drias interaction to Hugging Face: {e}")
+        error_msg = f"Drias Error: {str(e)[:100]} - The error has been noted, try another question and if the error remains, you can contact us :)"
+        raise gr.Error(error_msg)

data/drias/drias.db DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:1e29ba55d0122dc034b76113941769b44214355d4528bcc5b3d8f71f3c50bf59
-size 280621056

front/tabs/chat_interface.py CHANGED Viewed

@@ -39,7 +39,7 @@ What do you want to learn ?
 # """
 init_prompt_poc = """
-Hello, I am ClimateQ&A, a conversational assistant designed to help you understand climate change and biodiversity loss. I will answer your questions by **sifting through the IPCC and IPBES scientific reports, the Paris Climate Action Plan (PCAET), the Biodiversity Plan 2018-2024, and the Acclimaterra reports from the Nouvelle-Aquitaine Region**.
 ❓ How to use
 - **Language**: You can ask me your questions in any language.

 # """
 init_prompt_poc = """
+Hello, I am ClimateQ&A, a conversational assistant designed to help you understand climate change and biodiversity loss. I will answer your questions by **sifting through the IPCC and IPBES scientific reports, the Paris Climate Action Plan (PCAET), the Paris Biodiversity Plan 2018-2024, and the Acclimaterra reports from the Nouvelle-Aquitaine Region**.
 ❓ How to use
 - **Language**: You can ask me your questions in any language.

front/tabs/tab_drias.py CHANGED Viewed

@@ -5,8 +5,6 @@ import pandas as pd
 from climateqa.engine.talk_to_data.main import ask_drias
 from climateqa.engine.talk_to_data.config import DRIAS_MODELS, DRIAS_UI_TEXT
-from climateqa.chat import log_drias_interaction_to_azure
 class DriasUIElements(TypedDict):
     tab: gr.Tab
@@ -28,8 +26,8 @@ class DriasUIElements(TypedDict):
     next_button: gr.Button
-async def ask_drias_query(query: str, index_state: int):
-    result = await ask_drias(query, index_state)
     return result
@@ -196,19 +194,7 @@ def create_drias_ui() -> DriasUIElements:
             next_button=next_button
         )
-def log_drias_to_azure(query: str, sql_query: str, data, share_client, user_id):
-    """Log Drias interaction to Azure storage."""
-    print("log_drias_to_azure")
-    if share_client is not None and user_id is not None:
-        log_drias_interaction_to_azure(
-            query=query,
-            sql_query=sql_query,
-            data=data,
-            share_client=share_client,
-            user_id=user_id
-        )
-    else:
-        print("share_client or user_id is None")
 def setup_drias_events(ui_elements: DriasUIElements, share_client=None, user_id=None) -> None:
     """Set up all event handlers for the DRIAS tab."""
@@ -218,10 +204,7 @@ def setup_drias_events(ui_elements: DriasUIElements, share_client=None, user_id=
     plots_state = gr.State([])
     index_state = gr.State(0)
     table_names_list = gr.State([])
-    def log_drias_interaction(query: str, sql_query: str, data: pd.DataFrame):
-        log_drias_to_azure(query, sql_query, data, share_client, user_id)
     # Handle example selection
     ui_elements["examples_hidden"].change(
@@ -230,7 +213,7 @@ def setup_drias_events(ui_elements: DriasUIElements, share_client=None, user_id=
         outputs=[ui_elements["details_accordion"], ui_elements["drias_direct_question"]]
     ).then(
         ask_drias_query,
-        inputs=[ui_elements["examples_hidden"], index_state],
         outputs=[
             ui_elements["drias_sql_query"],
             ui_elements["drias_table"],
@@ -242,10 +225,6 @@ def setup_drias_events(ui_elements: DriasUIElements, share_client=None, user_id=
             table_names_list,
             ui_elements["result_text"],
         ],
-    ).then(
-        log_drias_interaction,
-        inputs=[ui_elements["examples_hidden"], ui_elements["drias_sql_query"], ui_elements["drias_table"]],
-        outputs=[],
     ).then(
         show_results,
         inputs=[sql_queries_state, dataframes_state, plots_state],
@@ -276,7 +255,7 @@ def setup_drias_events(ui_elements: DriasUIElements, share_client=None, user_id=
         outputs=[ui_elements["details_accordion"]]
     ).then(
         ask_drias_query,
-        inputs=[ui_elements["drias_direct_question"], index_state],
         outputs=[
             ui_elements["drias_sql_query"],
             ui_elements["drias_table"],
@@ -288,10 +267,6 @@ def setup_drias_events(ui_elements: DriasUIElements, share_client=None, user_id=
             table_names_list,
             ui_elements["result_text"],
         ],
-    ).then(
-        log_drias_interaction,
-        inputs=[ui_elements["drias_direct_question"], ui_elements["drias_sql_query"], ui_elements["drias_table"]],
-        outputs=[],
     ).then(
         show_results,
         inputs=[sql_queries_state, dataframes_state, plots_state],

 from climateqa.engine.talk_to_data.main import ask_drias
 from climateqa.engine.talk_to_data.config import DRIAS_MODELS, DRIAS_UI_TEXT
 class DriasUIElements(TypedDict):
     tab: gr.Tab
     next_button: gr.Button
+async def ask_drias_query(query: str, index_state: int, user_id: str):
+    result = await ask_drias(query, index_state, user_id)
     return result
             next_button=next_button
         )
 def setup_drias_events(ui_elements: DriasUIElements, share_client=None, user_id=None) -> None:
     """Set up all event handlers for the DRIAS tab."""
     plots_state = gr.State([])
     index_state = gr.State(0)
     table_names_list = gr.State([])
+    user_id = gr.State(user_id)
     # Handle example selection
     ui_elements["examples_hidden"].change(
         outputs=[ui_elements["details_accordion"], ui_elements["drias_direct_question"]]
     ).then(
         ask_drias_query,
+        inputs=[ui_elements["examples_hidden"], index_state, user_id],
         outputs=[
             ui_elements["drias_sql_query"],
             ui_elements["drias_table"],
             table_names_list,
             ui_elements["result_text"],
         ],
     ).then(
         show_results,
         inputs=[sql_queries_state, dataframes_state, plots_state],
         outputs=[ui_elements["details_accordion"]]
     ).then(
         ask_drias_query,
+        inputs=[ui_elements["drias_direct_question"], index_state, user_id],
         outputs=[
             ui_elements["drias_sql_query"],
             ui_elements["drias_table"],
             table_names_list,
             ui_elements["result_text"],
         ],
     ).then(
         show_results,
         inputs=[sql_queries_state, dataframes_state, plots_state],

front/utils.py CHANGED Viewed

@@ -13,17 +13,6 @@ def make_pairs(lst:list)->list:
     return [(lst[i], lst[i + 1]) for i in range(0, len(lst), 2)]
-def serialize_docs(docs:list)->list:
-    new_docs = []
-    for doc in docs:
-        new_doc = {}
-        new_doc["page_content"] = doc.page_content
-        new_doc["metadata"] = doc.metadata
-        new_docs.append(new_doc)
-    return new_docs
 def parse_output_llm_with_sources(output:str)->str:
     # Split the content into a list of text and "[Doc X]" references
     content_parts = re.split(r'\[(Doc\s?\d+(?:,\s?Doc\s?\d+)*)\]', output)

     return [(lst[i], lst[i + 1]) for i in range(0, len(lst), 2)]
 def parse_output_llm_with_sources(output:str)->str:
     # Split the content into a list of text and "[Doc X]" references
     content_parts = re.split(r'\[(Doc\s?\d+(?:,\s?Doc\s?\d+)*)\]', output)

requirements.txt CHANGED Viewed

@@ -8,6 +8,7 @@ langgraph==0.2.70
 pinecone-client==4.1.0
 sentence-transformers==2.6.0
 huggingface-hub==0.25.2
 pyalex==0.13
 networkx==3.2.1
 pyvis==0.3.2

 pinecone-client==4.1.0
 sentence-transformers==2.6.0
 huggingface-hub==0.25.2
+datasets==3.5.0
 pyalex==0.13
 networkx==3.2.1
 pyvis==0.3.2