Merge remote-tracking branch 'origin' into feature/talk_to_data
Browse files- app.py +1 -1
 - climateqa/chat.py +2 -2
 - climateqa/constants.py +36 -1
 - climateqa/logging.py +172 -83
 
    	
        app.py
    CHANGED
    
    | 
         @@ -81,7 +81,7 @@ llm = get_llm(provider="openai", max_tokens=1024, temperature=0.0) 
     | 
|
| 81 | 
         
             
            if os.environ["GRADIO_ENV"] == "local":
         
     | 
| 82 | 
         
             
                reranker = get_reranker("nano")
         
     | 
| 83 | 
         
             
            else:
         
     | 
| 84 | 
         
            -
                reranker = get_reranker(" 
     | 
| 85 | 
         | 
| 86 | 
         
             
            agent = make_graph_agent(
         
     | 
| 87 | 
         
             
                llm=llm,
         
     | 
| 
         | 
|
| 81 | 
         
             
            if os.environ["GRADIO_ENV"] == "local":
         
     | 
| 82 | 
         
             
                reranker = get_reranker("nano")
         
     | 
| 83 | 
         
             
            else:
         
     | 
| 84 | 
         
            +
                reranker = get_reranker("nano")
         
     | 
| 85 | 
         | 
| 86 | 
         
             
            agent = make_graph_agent(
         
     | 
| 87 | 
         
             
                llm=llm,
         
     | 
    	
        climateqa/chat.py
    CHANGED
    
    | 
         @@ -14,7 +14,7 @@ from .handle_stream_events import ( 
     | 
|
| 14 | 
         
             
                handle_retrieved_owid_graphs,
         
     | 
| 15 | 
         
             
            )
         
     | 
| 16 | 
         
             
            from .logging import (
         
     | 
| 17 | 
         
            -
                 
     | 
| 18 | 
         
             
            )
         
     | 
| 19 | 
         | 
| 20 | 
         
             
            # Chat functions
         
     | 
| 
         @@ -189,6 +189,6 @@ async def chat_stream( 
     | 
|
| 189 | 
         
             
                    raise gr.Error(str(e))
         
     | 
| 190 | 
         | 
| 191 | 
         
             
                # Call the function to log interaction
         
     | 
| 192 | 
         
            -
                 
     | 
| 193 | 
         | 
| 194 | 
         
             
                yield history, docs_html, output_query, output_language, related_contents, graphs_html, follow_up_examples#, vanna_data
         
     | 
| 
         | 
|
| 14 | 
         
             
                handle_retrieved_owid_graphs,
         
     | 
| 15 | 
         
             
            )
         
     | 
| 16 | 
         
             
            from .logging import (
         
     | 
| 17 | 
         
            +
                log_interaction
         
     | 
| 18 | 
         
             
            )
         
     | 
| 19 | 
         | 
| 20 | 
         
             
            # Chat functions
         
     | 
| 
         | 
|
| 189 | 
         
             
                    raise gr.Error(str(e))
         
     | 
| 190 | 
         | 
| 191 | 
         
             
                # Call the function to log interaction
         
     | 
| 192 | 
         
            +
                log_interaction(history, output_query, sources, docs, share_client, user_id)
         
     | 
| 193 | 
         | 
| 194 | 
         
             
                yield history, docs_html, output_query, output_language, related_contents, graphs_html, follow_up_examples#, vanna_data
         
     | 
    	
        climateqa/constants.py
    CHANGED
    
    | 
         @@ -65,4 +65,39 @@ OWID_CATEGORIES = ['Access to Energy', 'Agricultural Production', 
     | 
|
| 65 | 
         
             
                   'Oil Spills', 'Outdoor Air Pollution', 'Ozone Layer', 'Pandemics',
         
     | 
| 66 | 
         
             
                   'Pesticides', 'Plastic Pollution', 'Renewable Energy', 'Soil',
         
     | 
| 67 | 
         
             
                   'Transport', 'Urbanization', 'Waste Management', 'Water Pollution',
         
     | 
| 68 | 
         
            -
                   'Water Use & Stress', 'Wildfires']
         
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 65 | 
         
             
                   'Oil Spills', 'Outdoor Air Pollution', 'Ozone Layer', 'Pandemics',
         
     | 
| 66 | 
         
             
                   'Pesticides', 'Plastic Pollution', 'Renewable Energy', 'Soil',
         
     | 
| 67 | 
         
             
                   'Transport', 'Urbanization', 'Waste Management', 'Water Pollution',
         
     | 
| 68 | 
         
            +
                   'Water Use & Stress', 'Wildfires']
         
     | 
| 69 | 
         
            +
             
     | 
| 70 | 
         
            +
             
     | 
| 71 | 
         
            +
            DOCUMENT_METADATA_DEFAULT_VALUES = {
         
     | 
| 72 | 
         
            +
                "chunk_type": "",
         
     | 
| 73 | 
         
            +
                "document_id": "",
         
     | 
| 74 | 
         
            +
                "document_number": 0.0,
         
     | 
| 75 | 
         
            +
                "element_id": "",
         
     | 
| 76 | 
         
            +
                "figure_code": "",
         
     | 
| 77 | 
         
            +
                "file_size": "",
         
     | 
| 78 | 
         
            +
                "image_path": "",
         
     | 
| 79 | 
         
            +
                "n_pages": 0.0,
         
     | 
| 80 | 
         
            +
                "name": "",
         
     | 
| 81 | 
         
            +
                "num_characters": 0.0,
         
     | 
| 82 | 
         
            +
                "num_tokens": 0.0,
         
     | 
| 83 | 
         
            +
                "num_tokens_approx": 0.0,
         
     | 
| 84 | 
         
            +
                "num_words": 0.0,
         
     | 
| 85 | 
         
            +
                "page_number": 0,
         
     | 
| 86 | 
         
            +
                "release_date": 0.0,
         
     | 
| 87 | 
         
            +
                "report_type": "",
         
     | 
| 88 | 
         
            +
                "section_header": "",
         
     | 
| 89 | 
         
            +
                "short_name": "",
         
     | 
| 90 | 
         
            +
                "source": "",
         
     | 
| 91 | 
         
            +
                "toc_level0": "",
         
     | 
| 92 | 
         
            +
                "toc_level1": "",
         
     | 
| 93 | 
         
            +
                "toc_level2": "",
         
     | 
| 94 | 
         
            +
                "toc_level3": "",
         
     | 
| 95 | 
         
            +
                "url": "",
         
     | 
| 96 | 
         
            +
                "similarity_score": 0.0,
         
     | 
| 97 | 
         
            +
                "content": "",
         
     | 
| 98 | 
         
            +
                "reranking_score": 0.0,
         
     | 
| 99 | 
         
            +
                "query_used_for_retrieval": "",
         
     | 
| 100 | 
         
            +
                "sources_used": [""],
         
     | 
| 101 | 
         
            +
                "question_used": "",
         
     | 
| 102 | 
         
            +
                "index_used": ""
         
     | 
| 103 | 
         
            +
            }
         
     | 
    	
        climateqa/logging.py
    CHANGED
    
    | 
         @@ -4,111 +4,157 @@ import json 
     | 
|
| 4 | 
         
             
            from huggingface_hub import HfApi
         
     | 
| 5 | 
         
             
            import gradio as gr
         
     | 
| 6 | 
         
             
            import csv
         
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 7 | 
         | 
| 8 | 
         
            -
            def serialize_docs(docs:list)->list:
         
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 9 | 
         
             
                new_docs = []
         
     | 
| 10 | 
         
             
                for doc in docs:
         
     | 
| 11 | 
         
            -
                     
     | 
| 12 | 
         
            -
                    new_doc 
     | 
| 13 | 
         
            -
             
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 14 | 
         
             
                    new_docs.append(new_doc)
         
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 15 | 
         
             
                return new_docs
         
     | 
| 16 | 
         | 
| 17 | 
         
             
            ## AZURE LOGGING - DEPRECATED
         
     | 
| 18 | 
         | 
| 19 | 
         
            -
             
     | 
| 20 | 
         
            -
             
     | 
| 21 | 
         | 
| 22 | 
         
            -
             
     | 
| 23 | 
         
            -
             
     | 
| 24 | 
         
            -
             
     | 
| 25 | 
         
            -
             
     | 
| 26 | 
         
            -
             
     | 
| 27 | 
         
            -
             
     | 
| 28 | 
         
            -
             
     | 
| 29 | 
         
            -
             
     | 
| 30 | 
         | 
| 31 | 
         | 
| 32 | 
         
            -
             
     | 
| 33 | 
         
            -
             
     | 
| 34 | 
         | 
| 35 | 
         
            -
             
     | 
| 36 | 
         
            -
             
     | 
| 37 | 
         
            -
             
     | 
| 38 | 
         
            -
             
     | 
| 39 | 
         
            -
             
     | 
| 40 | 
         
            -
             
     | 
| 41 | 
         
            -
             
     | 
| 42 | 
         
            -
             
     | 
| 43 | 
         
            -
             
     | 
| 44 | 
         
            -
            # 
     | 
| 45 | 
         
            -
             
     | 
| 46 | 
         
            -
             
     | 
| 47 | 
         
            -
             
     | 
| 48 | 
         
            -
             
     | 
| 49 | 
         
            -
             
     | 
| 50 | 
         
            -
             
     | 
| 51 | 
         
            -
             
     | 
| 52 | 
         
            -
             
     | 
| 53 | 
         
            -
             
     | 
| 54 | 
         
            -
             
     | 
| 55 | 
         
            -
             
     | 
| 56 | 
         
            -
             
     | 
| 57 | 
         
            -
             
     | 
| 58 | 
         
            -
            # 
     | 
| 59 | 
         
            -
             
     | 
| 60 | 
         
            -
             
     | 
| 61 | 
         
            -
             
     | 
| 62 | 
         
            -
             
     | 
| 63 | 
         
            -
             
     | 
| 64 | 
         | 
| 65 | 
         
            -
             
     | 
| 66 | 
         
            -
             
     | 
| 67 | 
         | 
| 68 | 
         
            -
             
     | 
| 69 | 
         
            -
             
     | 
| 70 | 
         
            -
             
     | 
| 71 | 
         
            -
             
     | 
| 72 | 
         
            -
             
     | 
| 73 | 
         
            -
             
     | 
| 74 | 
         
            -
             
     | 
| 75 | 
         
            -
             
     | 
| 76 | 
         
            -
            # 
     | 
| 77 | 
         
            -
             
     | 
| 78 | 
         
            -
             
     | 
| 79 | 
         
            -
             
     | 
| 80 | 
         
            -
             
     | 
| 81 | 
         
            -
             
     | 
| 82 | 
         
            -
             
     | 
| 83 | 
         
            -
             
     | 
| 84 | 
         
            -
             
     | 
| 85 | 
         
            -
             
     | 
| 86 | 
         
            -
             
     | 
| 87 | 
         
            -
             
     | 
| 88 | 
         
            -
             
     | 
| 89 | 
         
            -
             
     | 
| 90 | 
         
            -
             
     | 
| 91 | 
         
            -
             
     | 
| 92 | 
         
            -
             
     | 
| 93 | 
         | 
| 94 | 
         
             
            ## HUGGING FACE LOGGING
         
     | 
| 95 | 
         | 
| 96 | 
         
            -
            def log_on_huggingface(log_filename, logs):
         
     | 
| 97 | 
         
             
                """Log data to Hugging Face dataset repository.
         
     | 
| 98 | 
         | 
| 99 | 
         
             
                Args:
         
     | 
| 100 | 
         
             
                    log_filename (str): Name of the file to store logs
         
     | 
| 101 | 
         
             
                    logs (dict): Log data to store
         
     | 
| 
         | 
|
| 102 | 
         
             
                """
         
     | 
| 103 | 
         
             
                try:
         
     | 
| 104 | 
         
            -
                     
     | 
| 105 | 
         
            -
             
     | 
| 106 | 
         
            -
             
     | 
| 107 | 
         
            -
                         
     | 
| 108 | 
         
            -
             
     | 
| 
         | 
|
| 109 | 
         | 
| 110 | 
         
            -
             
     | 
| 111 | 
         
            -
             
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 112 | 
         | 
| 113 | 
         
             
                    # Initialize HfApi
         
     | 
| 114 | 
         
             
                    api = HfApi(token=hf_token)
         
     | 
| 
         @@ -158,10 +204,13 @@ def log_interaction_to_huggingface(history, output_query, sources, docs, share_c 
     | 
|
| 158 | 
         
             
                            "time": timestamp,
         
     | 
| 159 | 
         
             
                        }
         
     | 
| 160 | 
         
             
                        # Log to Hugging Face
         
     | 
| 161 | 
         
            -
                        log_on_huggingface(f"chat/{timestamp}.json", logs)
         
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 162 | 
         
             
                except Exception as e:
         
     | 
| 163 | 
         
             
                    print(f"Error logging to Hugging Face: {e}")
         
     | 
| 164 | 
         
            -
                    error_msg = f"ClimateQ&A Error: {str(e)[:100]} 
     | 
| 165 | 
         
             
                    raise gr.Error(error_msg)
         
     | 
| 166 | 
         | 
| 167 | 
         
             
            def log_drias_interaction_to_huggingface(query, sql_query, user_id):
         
     | 
| 
         @@ -182,7 +231,7 @@ def log_drias_interaction_to_huggingface(query, sql_query, user_id): 
     | 
|
| 182 | 
         
             
                            "sql_query": sql_query,
         
     | 
| 183 | 
         
             
                            "time": timestamp,
         
     | 
| 184 | 
         
             
                        }
         
     | 
| 185 | 
         
            -
                        log_on_huggingface(f"drias/drias_{timestamp}.json", logs)
         
     | 
| 186 | 
         
             
                        print(f"Logged Drias interaction to Hugging Face: {logs}")
         
     | 
| 187 | 
         
             
                    else:
         
     | 
| 188 | 
         
             
                        print("share_client or user_id is None, or GRADIO_ENV is local")
         
     | 
| 
         @@ -191,4 +240,44 @@ def log_drias_interaction_to_huggingface(query, sql_query, user_id): 
     | 
|
| 191 | 
         
             
                    error_msg = f"Drias Error: {str(e)[:100]} - The error has been noted, try another question and if the error remains, you can contact us :)"
         
     | 
| 192 | 
         
             
                    raise gr.Error(error_msg)
         
     | 
| 193 | 
         | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 194 | 
         | 
| 
         | 
|
| 4 | 
         
             
            from huggingface_hub import HfApi
         
     | 
| 5 | 
         
             
            import gradio as gr
         
     | 
| 6 | 
         
             
            import csv
         
     | 
| 7 | 
         
            +
            import pandas as pd
         
     | 
| 8 | 
         
            +
            import io
         
     | 
| 9 | 
         
            +
            from typing import TypedDict, List
         
     | 
| 10 | 
         
            +
            from climateqa.constants import DOCUMENT_METADATA_DEFAULT_VALUES
         
     | 
| 11 | 
         
            +
            from langchain_core.documents import Document
         
     | 
| 12 | 
         | 
| 13 | 
         
            +
            def serialize_docs(docs:list[Document])->list:
         
     | 
| 14 | 
         
            +
                """Convert document objects to a simplified format compatible with Hugging Face datasets.
         
     | 
| 15 | 
         
            +
                
         
     | 
| 16 | 
         
            +
                This function processes document objects by extracting their page content and metadata,
         
     | 
| 17 | 
         
            +
                normalizing the metadata structure to ensure consistency. It applies default values 
         
     | 
| 18 | 
         
            +
                from DOCUMENT_METADATA_DEFAULT_VALUES for any missing metadata fields.
         
     | 
| 19 | 
         
            +
                
         
     | 
| 20 | 
         
            +
                Args:
         
     | 
| 21 | 
         
            +
                    docs (list): List of document objects, each with page_content and metadata attributes
         
     | 
| 22 | 
         
            +
                    
         
     | 
| 23 | 
         
            +
                Returns:
         
     | 
| 24 | 
         
            +
                    list: List of dictionaries with standardized "page_content" and "metadata" fields
         
     | 
| 25 | 
         
            +
                """
         
     | 
| 26 | 
         
             
                new_docs = []
         
     | 
| 27 | 
         
             
                for doc in docs:
         
     | 
| 28 | 
         
            +
                    # Make sure we have a clean doc format
         
     | 
| 29 | 
         
            +
                    new_doc = {
         
     | 
| 30 | 
         
            +
                        "page_content": doc.page_content,
         
     | 
| 31 | 
         
            +
                        "metadata": {}
         
     | 
| 32 | 
         
            +
                    }
         
     | 
| 33 | 
         
            +
                    
         
     | 
| 34 | 
         
            +
                    # Ensure all metadata fields exist with defaults if missing
         
     | 
| 35 | 
         
            +
                    for field, default_value in DOCUMENT_METADATA_DEFAULT_VALUES.items():
         
     | 
| 36 | 
         
            +
                        new_value =  doc.metadata.get(field, default_value)
         
     | 
| 37 | 
         
            +
                        try:
         
     | 
| 38 | 
         
            +
                            new_doc["metadata"][field] = type(default_value)(new_value)
         
     | 
| 39 | 
         
            +
                        except:
         
     | 
| 40 | 
         
            +
                            new_doc["metadata"][field] = default_value
         
     | 
| 41 | 
         
            +
             
     | 
| 42 | 
         
             
                    new_docs.append(new_doc)
         
     | 
| 43 | 
         
            +
                    
         
     | 
| 44 | 
         
            +
                if new_docs == []:
         
     | 
| 45 | 
         
            +
                    new_docs = [{"page_content": "No documents found", "metadata": DOCUMENT_METADATA_DEFAULT_VALUES}]
         
     | 
| 46 | 
         
             
                return new_docs
         
     | 
| 47 | 
         | 
| 48 | 
         
             
            ## AZURE LOGGING - DEPRECATED
         
     | 
| 49 | 
         | 
| 50 | 
         
            +
            def log_on_azure(file, logs, share_client):
         
     | 
| 51 | 
         
            +
                """Log data to Azure Blob Storage.
         
     | 
| 52 | 
         | 
| 53 | 
         
            +
                Args:
         
     | 
| 54 | 
         
            +
                    file (str): Name of the file to store logs
         
     | 
| 55 | 
         
            +
                    logs (dict): Log data to store
         
     | 
| 56 | 
         
            +
                    share_client: Azure share client instance
         
     | 
| 57 | 
         
            +
                """
         
     | 
| 58 | 
         
            +
                logs = json.dumps(logs)
         
     | 
| 59 | 
         
            +
                file_client = share_client.get_file_client(file)
         
     | 
| 60 | 
         
            +
                file_client.upload_file(logs)
         
     | 
| 61 | 
         | 
| 62 | 
         | 
| 63 | 
         
            +
            def log_interaction_to_azure(history, output_query, sources, docs, share_client, user_id):
         
     | 
| 64 | 
         
            +
                """Log chat interaction to Azure and Hugging Face.
         
     | 
| 65 | 
         | 
| 66 | 
         
            +
                Args:
         
     | 
| 67 | 
         
            +
                    history (list): Chat message history
         
     | 
| 68 | 
         
            +
                    output_query (str): Processed query
         
     | 
| 69 | 
         
            +
                    sources (list): Knowledge base sources used
         
     | 
| 70 | 
         
            +
                    docs (list): Retrieved documents
         
     | 
| 71 | 
         
            +
                    share_client: Azure share client instance
         
     | 
| 72 | 
         
            +
                    user_id (str): User identifier
         
     | 
| 73 | 
         
            +
                """
         
     | 
| 74 | 
         
            +
                try:
         
     | 
| 75 | 
         
            +
                    # Log interaction to Azure if not in local environment
         
     | 
| 76 | 
         
            +
                    if os.getenv("GRADIO_ENV") != "local":
         
     | 
| 77 | 
         
            +
                        timestamp = str(datetime.now().timestamp())
         
     | 
| 78 | 
         
            +
                        prompt = history[1]["content"]
         
     | 
| 79 | 
         
            +
                        logs = {
         
     | 
| 80 | 
         
            +
                            "user_id": str(user_id),
         
     | 
| 81 | 
         
            +
                            "prompt": prompt,
         
     | 
| 82 | 
         
            +
                            "query": prompt,
         
     | 
| 83 | 
         
            +
                            "question": output_query,
         
     | 
| 84 | 
         
            +
                            "sources": sources,
         
     | 
| 85 | 
         
            +
                            "docs": serialize_docs(docs),
         
     | 
| 86 | 
         
            +
                            "answer": history[-1].content,
         
     | 
| 87 | 
         
            +
                            "time": timestamp,
         
     | 
| 88 | 
         
            +
                        }
         
     | 
| 89 | 
         
            +
                        # Log to Azure
         
     | 
| 90 | 
         
            +
                        log_on_azure(f"{timestamp}.json", logs, share_client)
         
     | 
| 91 | 
         
            +
                except Exception as e:
         
     | 
| 92 | 
         
            +
                    print(f"Error logging on Azure Blob Storage: {e}")
         
     | 
| 93 | 
         
            +
                    error_msg = f"ClimateQ&A Error: {str(e)[:100]} - The error has been noted, try another question and if the error remains, you can contact us :)"
         
     | 
| 94 | 
         
            +
                    raise gr.Error(error_msg)
         
     | 
| 95 | 
         | 
| 96 | 
         
            +
            def log_drias_interaction_to_azure(query, sql_query, data, share_client, user_id):
         
     | 
| 97 | 
         
            +
                """Log Drias data interaction to Azure and Hugging Face.
         
     | 
| 98 | 
         | 
| 99 | 
         
            +
                Args:
         
     | 
| 100 | 
         
            +
                    query (str): User query
         
     | 
| 101 | 
         
            +
                    sql_query (str): SQL query used
         
     | 
| 102 | 
         
            +
                    data: Retrieved data
         
     | 
| 103 | 
         
            +
                    share_client: Azure share client instance
         
     | 
| 104 | 
         
            +
                    user_id (str): User identifier
         
     | 
| 105 | 
         
            +
                """
         
     | 
| 106 | 
         
            +
                try:
         
     | 
| 107 | 
         
            +
                    # Log interaction to Azure if not in local environment
         
     | 
| 108 | 
         
            +
                    if os.getenv("GRADIO_ENV") != "local":
         
     | 
| 109 | 
         
            +
                        timestamp = str(datetime.now().timestamp())
         
     | 
| 110 | 
         
            +
                        logs = {
         
     | 
| 111 | 
         
            +
                            "user_id": str(user_id),
         
     | 
| 112 | 
         
            +
                            "query": query,
         
     | 
| 113 | 
         
            +
                            "sql_query": sql_query,
         
     | 
| 114 | 
         
            +
                            "time": timestamp,
         
     | 
| 115 | 
         
            +
                        }
         
     | 
| 116 | 
         
            +
                        log_on_azure(f"drias_{timestamp}.json", logs, share_client)
         
     | 
| 117 | 
         
            +
                        print(f"Logged Drias interaction to Azure Blob Storage: {logs}")
         
     | 
| 118 | 
         
            +
                    else:
         
     | 
| 119 | 
         
            +
                        print("share_client or user_id is None, or GRADIO_ENV is local")
         
     | 
| 120 | 
         
            +
                except Exception as e:
         
     | 
| 121 | 
         
            +
                    print(f"Error logging Drias interaction on Azure Blob Storage: {e}")
         
     | 
| 122 | 
         
            +
                    error_msg = f"Drias Error: {str(e)[:100]} - The error has been noted, try another question and if the error remains, you can contact us :)"
         
     | 
| 123 | 
         
            +
                    raise gr.Error(error_msg)    
         
     | 
| 124 | 
         | 
| 125 | 
         
             
            ## HUGGING FACE LOGGING
         
     | 
| 126 | 
         | 
| 127 | 
         
            +
            def log_on_huggingface(log_filename, logs, log_type="chat"):
         
     | 
| 128 | 
         
             
                """Log data to Hugging Face dataset repository.
         
     | 
| 129 | 
         | 
| 130 | 
         
             
                Args:
         
     | 
| 131 | 
         
             
                    log_filename (str): Name of the file to store logs
         
     | 
| 132 | 
         
             
                    logs (dict): Log data to store
         
     | 
| 133 | 
         
            +
                    log_type (str): Type of log to store
         
     | 
| 134 | 
         
             
                """
         
     | 
| 135 | 
         
             
                try:
         
     | 
| 136 | 
         
            +
                    if log_type =="chat":   
         
     | 
| 137 | 
         
            +
                        # Get Hugging Face token from environment
         
     | 
| 138 | 
         
            +
                        hf_token = os.getenv("HF_LOGS_TOKEN")
         
     | 
| 139 | 
         
            +
                        if not hf_token:
         
     | 
| 140 | 
         
            +
                            print("HF_LOGS_TOKEN not found in environment variables")
         
     | 
| 141 | 
         
            +
                            return
         
     | 
| 142 | 
         | 
| 143 | 
         
            +
                        # Get repository name from environment or use default
         
     | 
| 144 | 
         
            +
                        repo_id = os.getenv("HF_DATASET_REPO", "Ekimetrics/climateqa_logs")
         
     | 
| 145 | 
         
            +
                    
         
     | 
| 146 | 
         
            +
                    elif log_type =="drias":
         
     | 
| 147 | 
         
            +
                        # Get Hugging Face token from environment
         
     | 
| 148 | 
         
            +
                        hf_token = os.getenv("HF_LOGS_DRIAS_TOKEN")
         
     | 
| 149 | 
         
            +
                        if not hf_token:
         
     | 
| 150 | 
         
            +
                            print("HF_LOGS_DRIAS_TOKEN not found in environment variables")
         
     | 
| 151 | 
         
            +
                            return
         
     | 
| 152 | 
         
            +
             
     | 
| 153 | 
         
            +
                        # Get repository name from environment or use default
         
     | 
| 154 | 
         
            +
                        repo_id = os.getenv("HF_DATASET_REPO_DRIAS", "Ekimetrics/climateqa_logs_talk_to_data")
         
     | 
| 155 | 
         
            +
             
     | 
| 156 | 
         
            +
                    else:
         
     | 
| 157 | 
         
            +
                        raise ValueError(f"Invalid log type: {log_type}")
         
     | 
| 158 | 
         | 
| 159 | 
         
             
                    # Initialize HfApi
         
     | 
| 160 | 
         
             
                    api = HfApi(token=hf_token)
         
     | 
| 
         | 
|
| 204 | 
         
             
                            "time": timestamp,
         
     | 
| 205 | 
         
             
                        }
         
     | 
| 206 | 
         
             
                        # Log to Hugging Face
         
     | 
| 207 | 
         
            +
                        log_on_huggingface(f"chat/{timestamp}.json", logs, log_type="chat")
         
     | 
| 208 | 
         
            +
                        print(f"Logged interaction to Hugging Face")
         
     | 
| 209 | 
         
            +
                    else:
         
     | 
| 210 | 
         
            +
                        print("Did not log to Hugging Face because GRADIO_ENV is local")
         
     | 
| 211 | 
         
             
                except Exception as e:
         
     | 
| 212 | 
         
             
                    print(f"Error logging to Hugging Face: {e}")
         
     | 
| 213 | 
         
            +
                    error_msg = f"ClimateQ&A Error: {str(e)[:100]})"
         
     | 
| 214 | 
         
             
                    raise gr.Error(error_msg)
         
     | 
| 215 | 
         | 
| 216 | 
         
             
            def log_drias_interaction_to_huggingface(query, sql_query, user_id):
         
     | 
| 
         | 
|
| 231 | 
         
             
                            "sql_query": sql_query,
         
     | 
| 232 | 
         
             
                            "time": timestamp,
         
     | 
| 233 | 
         
             
                        }
         
     | 
| 234 | 
         
            +
                        log_on_huggingface(f"drias/drias_{timestamp}.json", logs, log_type="drias")
         
     | 
| 235 | 
         
             
                        print(f"Logged Drias interaction to Hugging Face: {logs}")
         
     | 
| 236 | 
         
             
                    else:
         
     | 
| 237 | 
         
             
                        print("share_client or user_id is None, or GRADIO_ENV is local")
         
     | 
| 
         | 
|
| 240 | 
         
             
                    error_msg = f"Drias Error: {str(e)[:100]} - The error has been noted, try another question and if the error remains, you can contact us :)"
         
     | 
| 241 | 
         
             
                    raise gr.Error(error_msg)
         
     | 
| 242 | 
         | 
| 243 | 
         
            +
            def log_interaction(history, output_query, sources, docs, share_client, user_id):
         
     | 
| 244 | 
         
            +
                """Log chat interaction to Hugging Face, and fall back to Azure if that fails.
         
     | 
| 245 | 
         
            +
                
         
     | 
| 246 | 
         
            +
                Args:
         
     | 
| 247 | 
         
            +
                    history (list): Chat message history
         
     | 
| 248 | 
         
            +
                    output_query (str): Processed query
         
     | 
| 249 | 
         
            +
                    sources (list): Knowledge base sources used
         
     | 
| 250 | 
         
            +
                    docs (list): Retrieved documents
         
     | 
| 251 | 
         
            +
                    share_client: Azure share client instance
         
     | 
| 252 | 
         
            +
                    user_id (str): User identifier
         
     | 
| 253 | 
         
            +
                """
         
     | 
| 254 | 
         
            +
                try:
         
     | 
| 255 | 
         
            +
                    # First try to log to Hugging Face
         
     | 
| 256 | 
         
            +
                    log_interaction_to_huggingface(history, output_query, sources, docs, share_client, user_id)
         
     | 
| 257 | 
         
            +
                except Exception as e:
         
     | 
| 258 | 
         
            +
                    print(f"Failed to log to Hugging Face, falling back to Azure: {e}")
         
     | 
| 259 | 
         
            +
                    try:
         
     | 
| 260 | 
         
            +
                        # Fall back to Azure logging
         
     | 
| 261 | 
         
            +
                        if os.getenv("GRADIO_ENV") != "local":
         
     | 
| 262 | 
         
            +
                            timestamp = str(datetime.now().timestamp())
         
     | 
| 263 | 
         
            +
                            prompt = history[1]["content"]
         
     | 
| 264 | 
         
            +
                            logs = {
         
     | 
| 265 | 
         
            +
                                "user_id": str(user_id),
         
     | 
| 266 | 
         
            +
                                "prompt": prompt,
         
     | 
| 267 | 
         
            +
                                "query": prompt,
         
     | 
| 268 | 
         
            +
                                "question": output_query,
         
     | 
| 269 | 
         
            +
                                "sources": sources,
         
     | 
| 270 | 
         
            +
                                "docs": serialize_docs(docs),
         
     | 
| 271 | 
         
            +
                                "answer": history[-1].content,
         
     | 
| 272 | 
         
            +
                                "time": timestamp,
         
     | 
| 273 | 
         
            +
                            }
         
     | 
| 274 | 
         
            +
                            # Log to Azure
         
     | 
| 275 | 
         
            +
                            log_on_azure(f"{timestamp}.json", logs, share_client)
         
     | 
| 276 | 
         
            +
                            print("Successfully logged to Azure as fallback")
         
     | 
| 277 | 
         
            +
                    except Exception as azure_error:
         
     | 
| 278 | 
         
            +
                        print(f"Error in Azure fallback logging: {azure_error}")
         
     | 
| 279 | 
         
            +
                        error_msg = f"ClimateQ&A Logging Error: {str(azure_error)[:100]})"
         
     | 
| 280 | 
         
            +
                        # Don't raise error to avoid disrupting user experience
         
     | 
| 281 | 
         
            +
                        print(error_msg)
         
     | 
| 282 | 
         
            +
             
     | 
| 283 | 
         |