Spaces:

Ekimetrics
/

climate-question-answering

Running

timeki commited on Oct 22, 2024

Commit

16b79bd

1 Parent(s): ecfd260

feature/add_graphs_on_separate_tab (#15)

- add graphs in a separate pannel (a34da49e065e56c6b3f9784912148039bade982a)
- Merge branch 'feature/add_graphs_in_separate_pannel' into pr/15 (735963f8348ce3e530d8856d7b9f48ea356cd6f8)

Files changed (4) hide show

app.py +65 -31
front/utils.py +50 -2
sandbox/20240310 - CQA - Semantic Routing 1.ipynb +0 -0
style.css +6 -1

app.py CHANGED Viewed

@@ -32,7 +32,7 @@ from utils import create_user_id
 # ClimateQ&A imports
 from climateqa.engine.llm import get_llm
 from climateqa.engine.vectorstore import get_pinecone_vectorstore
-from climateqa.knowledge.retriever import ClimateQARetriever
 from climateqa.engine.reranker import get_reranker
 from climateqa.engine.embeddings import get_embeddings_function
 from climateqa.engine.chains.prompts import audience_prompts
@@ -43,7 +43,7 @@ from climateqa.engine.keywords import make_keywords_chain
 # from climateqa.engine.chains.answer_rag import make_rag_papers_chain
 from climateqa.engine.graph import make_graph_agent,display_graph
-from front.utils import make_html_source,parse_output_llm_with_sources,serialize_docs,make_toolbox
 # Load environment variables in local mode
 try:
@@ -133,6 +133,7 @@ async def chat(query,history,audience,sources,reports):
     output_keywords = ""
     gallery = []
     start_streaming = False
     steps_display = {
         "categorize_intent":("🔄️ Analyzing user message",True),
@@ -151,10 +152,12 @@ async def chat(query,history,audience,sources,reports):
                     try:
                         docs = event["data"]["output"]["documents"]
                         docs_html = []
-                        for i, d in enumerate(docs, 1):
-                            docs_html.append(make_html_source(d, i))
-                        used_documents = used_documents + [d.metadata["name"] for d in docs]
                         history[-1].content = "Adding sources :\n\n - " + "\n - ".join(np.unique(used_documents))
                         docs_html = "".join(docs_html)
@@ -184,7 +187,7 @@ async def chat(query,history,audience,sources,reports):
                 if event["name"] == "categorize_intent" and event["event"] == "on_chain_start":
                     print("X")
-            yield history,docs_html,output_query,output_language,gallery #,output_query,output_keywords
     except Exception as e:
         print(event, "has failed")
@@ -212,12 +215,49 @@ async def chat(query,history,audience,sources,reports):
         print(f"Error logging on Azure Blob Storage: {e}")
         raise gr.Error(f"ClimateQ&A Error: {str(e)[:100]} - The error has been noted, try another question and if the error remains, you can contact us :)")
-    image_dict = {}
-    for i,doc in enumerate(docs):
         if doc.metadata["chunk_type"] == "image":
             try:
                 key = f"Image {i+1}"
                 image_path = doc.metadata["image_path"].split("documents/")[1]
                 img = get_image_from_azure_blob_storage(image_path)
@@ -225,31 +265,18 @@ async def chat(query,history,audience,sources,reports):
                 buffered = BytesIO()
                 img.save(buffered, format="PNG")
                 img_str = base64.b64encode(buffered.getvalue()).decode()
-                # Embedding the base64 string in Markdown
-                markdown_image = f"![Alt text](data:image/png;base64,{img_str})"
-                image_dict[key] = {"img":img,"md":markdown_image,"caption":doc.page_content,"key":key,"figure_code":doc.metadata["figure_code"]}
             except Exception as e:
                 print(f"Skipped adding image {i} because of {e}")
-    if len(image_dict) > 0:
-        gallery = [x["img"] for x in list(image_dict.values())]
-        img = list(image_dict.values())[0]
-        img_md = img["md"]
-        img_caption = img["caption"]
-        img_code = img["figure_code"]
-        if img_code != "N/A":
-            img_name = f"{img['key']} - {img['figure_code']}"
-        else:
-            img_name = f"{img['key']}"
-        history.append(ChatMessage(role="assistant", content = f"\n\n{img_md}\n<p class='chatbot-caption'><b>{img_name}</b> - {img_caption}</p>"))
-        # answer_yet = history[-1][1] + f"\n\n{img_md}\n<p class='chatbot-caption'><b>{img_name}</b> - {img_caption}</p>"
-        # history[-1] = (history[-1][0],answer_yet)
-        # history = [tuple(x) for x in history]
-    yield history,docs_html,output_query,output_language,gallery#,output_query,output_keywords
 def save_feedback(feed: str, user_id):
@@ -382,6 +409,9 @@ with gr.Blocks(title="Climate Q&A", css_paths=os.getcwd()+ "/style.css", theme=t
                     with gr.Tab("Sources",elem_id = "tab-citations",id = 1):
                         sources_textbox = gr.HTML(show_label=False, elem_id="sources-textbox")
                         docs_textbox = gr.State("")
                     # with Modal(visible = False) as config_modal:
                     with gr.Tab("Configuration",elem_id = "tab-config",id = 2):
@@ -415,6 +445,10 @@ with gr.Blocks(title="Climate Q&A", css_paths=os.getcwd()+ "/style.css", theme=t
                         output_language = gr.Textbox(label="Language",show_label = True,elem_id = "language",lines = 1,interactive = False)
 #---------------------------------------------------------------------------------------
 # OTHER TABS
@@ -463,13 +497,13 @@ with gr.Blocks(title="Climate Q&A", css_paths=os.getcwd()+ "/style.css", theme=t
     (textbox
         .submit(start_chat, [textbox,chatbot], [textbox,tabs,chatbot],queue = False,api_name = "start_chat_textbox")
-        .then(chat, [textbox,chatbot,dropdown_audience, dropdown_sources,dropdown_reports], [chatbot,sources_textbox,output_query,output_language,gallery_component],concurrency_limit = 8,api_name = "chat_textbox")
         .then(finish_chat, None, [textbox],api_name = "finish_chat_textbox")
     )
     (examples_hidden
         .change(start_chat, [examples_hidden,chatbot], [textbox,tabs,chatbot],queue = False,api_name = "start_chat_examples")
-        .then(chat, [examples_hidden,chatbot,dropdown_audience, dropdown_sources,dropdown_reports], [chatbot,sources_textbox,output_query,output_language,gallery_component],concurrency_limit = 8,api_name = "chat_examples")
         .then(finish_chat, None, [textbox],api_name = "finish_chat_examples")
     )

 # ClimateQ&A imports
 from climateqa.engine.llm import get_llm
 from climateqa.engine.vectorstore import get_pinecone_vectorstore
+# from climateqa.knowledge.retriever import ClimateQARetriever
 from climateqa.engine.reranker import get_reranker
 from climateqa.engine.embeddings import get_embeddings_function
 from climateqa.engine.chains.prompts import audience_prompts
 # from climateqa.engine.chains.answer_rag import make_rag_papers_chain
 from climateqa.engine.graph import make_graph_agent,display_graph
+from front.utils import make_html_source, make_html_figure_sources,parse_output_llm_with_sources,serialize_docs,make_toolbox
 # Load environment variables in local mode
 try:
     output_keywords = ""
     gallery = []
     start_streaming = False
+    figures = '<div class="figures-container"> <p> Go to the "Figures" tab at the top of the page to see full size images </p> </div>'
     steps_display = {
         "categorize_intent":("🔄️ Analyzing user message",True),
                     try:
                         docs = event["data"]["output"]["documents"]
                         docs_html = []
+                        textual_docs = [d for d in docs if d.metadata["chunk_type"] == "text"]
+                        for i, d in enumerate(textual_docs, 1):
+                            if d.metadata["chunk_type"] == "text":
+                                docs_html.append(make_html_source(d, i))
+                        used_documents = used_documents + [f"{d.metadata['short_name']} - {d.metadata['name']}" for d in docs]
                         history[-1].content = "Adding sources :\n\n - " + "\n - ".join(np.unique(used_documents))
                         docs_html = "".join(docs_html)
                 if event["name"] == "categorize_intent" and event["event"] == "on_chain_start":
                     print("X")
+            yield history,docs_html,output_query,output_language,gallery, figures #,output_query,output_keywords
     except Exception as e:
         print(event, "has failed")
         print(f"Error logging on Azure Blob Storage: {e}")
         raise gr.Error(f"ClimateQ&A Error: {str(e)[:100]} - The error has been noted, try another question and if the error remains, you can contact us :)")
+    # image_dict = {}
+    # for i,doc in enumerate(docs):
+    #     if doc.metadata["chunk_type"] == "image":
+    #         try:
+    #             key = f"Image {i+1}"
+    #             image_path = doc.metadata["image_path"].split("documents/")[1]
+    #             img = get_image_from_azure_blob_storage(image_path)
+    #             # Convert the image to a byte buffer
+    #             buffered = BytesIO()
+    #             img.save(buffered, format="PNG")
+    #             img_str = base64.b64encode(buffered.getvalue()).decode()
+    #             # Embedding the base64 string in Markdown
+    #             markdown_image = f"![Alt text](data:image/png;base64,{img_str})"
+    #             image_dict[key] = {"img":img,"md":markdown_image,"short_name": doc.metadata["short_name"],"figure_code":doc.metadata["figure_code"],"caption":doc.page_content,"key":key,"figure_code":doc.metadata["figure_code"], "img_str" : img_str}
+    #         except Exception as e:
+    #             print(f"Skipped adding image {i} because of {e}")
+    # if len(image_dict) > 0:
+    #     gallery = [x["img"] for x in list(image_dict.values())]
+    #     img = list(image_dict.values())[0]
+    #     img_md = img["md"]
+    #     img_caption = img["caption"]
+    #     img_code = img["figure_code"]
+    #     if img_code != "N/A":
+    #         img_name = f"{img['key']} - {img['figure_code']}"
+    #     else:
+    #         img_name = f"{img['key']}"
+    #     history.append(ChatMessage(role="assistant", content = f"\n\n{img_md}\n<p class='chatbot-caption'><b>{img_name}</b> - {img_caption}</p>"))
+    docs_figures = [d for d in docs if d.metadata["chunk_type"] == "image"]
+    for i, doc in enumerate(docs_figures):
         if doc.metadata["chunk_type"] == "image":
             try:
                 key = f"Image {i+1}"
                 image_path = doc.metadata["image_path"].split("documents/")[1]
                 img = get_image_from_azure_blob_storage(image_path)
                 buffered = BytesIO()
                 img.save(buffered, format="PNG")
                 img_str = base64.b64encode(buffered.getvalue()).decode()
+                figures = figures + make_html_figure_sources(doc, i, img_str)
+                gallery.append(img)
             except Exception as e:
                 print(f"Skipped adding image {i} because of {e}")
+    yield history,docs_html,output_query,output_language,gallery, figures#,output_query,output_keywords
 def save_feedback(feed: str, user_id):
                     with gr.Tab("Sources",elem_id = "tab-citations",id = 1):
                         sources_textbox = gr.HTML(show_label=False, elem_id="sources-textbox")
                         docs_textbox = gr.State("")
                     # with Modal(visible = False) as config_modal:
                     with gr.Tab("Configuration",elem_id = "tab-config",id = 2):
                         output_language = gr.Textbox(label="Language",show_label = True,elem_id = "language",lines = 1,interactive = False)
+                    with gr.Tab("Figures",elem_id = "tab-figures",id = 3):
+                        figures_cards = gr.HTML(show_label=False, elem_id="sources-figures")
 #---------------------------------------------------------------------------------------
 # OTHER TABS
     (textbox
         .submit(start_chat, [textbox,chatbot], [textbox,tabs,chatbot],queue = False,api_name = "start_chat_textbox")
+        .then(chat, [textbox,chatbot,dropdown_audience, dropdown_sources,dropdown_reports], [chatbot,sources_textbox,output_query,output_language,gallery_component,figures_cards],concurrency_limit = 8,api_name = "chat_textbox")
         .then(finish_chat, None, [textbox],api_name = "finish_chat_textbox")
     )
     (examples_hidden
         .change(start_chat, [examples_hidden,chatbot], [textbox,tabs,chatbot],queue = False,api_name = "start_chat_examples")
+        .then(chat, [examples_hidden,chatbot,dropdown_audience, dropdown_sources,dropdown_reports], [chatbot,sources_textbox,output_query,output_language,gallery_component, figures_cards],concurrency_limit = 8,api_name = "chat_examples")
         .then(finish_chat, None, [textbox],api_name = "finish_chat_examples")
     )

front/utils.py CHANGED Viewed

@@ -55,7 +55,7 @@ def make_html_source(source,i):
     score = meta['reranking_score']
     if score > 0.8:
         color = "score-green"
-    elif score > 0.4:
         color = "score-orange"
     else:
         color = "score-red"
@@ -91,8 +91,9 @@ def make_html_source(source,i):
     <div class="card card-image">
         <div class="card-content">
             <h2>Image {i} - {title} - Page {int(meta['page_number'])}</h2>
-            <p>{content}</p>
             <p class='ai-generated'>AI-generated description</p>
             {relevancy_score}
         </div>
         <div class="card-footer">
@@ -107,6 +108,53 @@ def make_html_source(source,i):
     return card
 def make_toolbox(tool_name,description = "",checked = False,elem_id = "toggle"):

     score = meta['reranking_score']
     if score > 0.8:
         color = "score-green"
+    elif score > 0.5:
         color = "score-orange"
     else:
         color = "score-red"
     <div class="card card-image">
         <div class="card-content">
             <h2>Image {i} - {title} - Page {int(meta['page_number'])}</h2>
             <p class='ai-generated'>AI-generated description</p>
+            <p>{content}</p>
             {relevancy_score}
         </div>
         <div class="card-footer">
     return card
+def make_html_figure_sources(source,i,img_str):
+    meta = source.metadata
+    content = source.page_content.strip()
+    score = meta['reranking_score']
+    if score > 0.8:
+        color = "score-green"
+    elif score > 0.5:
+        color = "score-orange"
+    else:
+        color = "score-red"
+    toc_levels = []
+    if len(toc_levels) > 0:
+        name = f"<b>{toc_levels}</b><br/>{meta['name']}"
+    else:
+        name = meta['name']
+    relevancy_score = f"<p class=relevancy-score>Relevancy score: <span class='{color}'>{score:.1%}</span></p>"
+    if meta["figure_code"] != "N/A":
+        title = f"{meta['figure_code']} - {meta['short_name']}"
+    else:
+        title = f"{meta['short_name']}"
+    card = f"""
+    <div class="card card-image">
+        <div class="card-content">
+            <h2>Image {i} - {title} - Page {int(meta['page_number'])}</h2>
+            <p class='ai-generated'>AI-generated description</p>
+            <img src="data:image/png;base64, { img_str } alt="Alt text" />
+            <p>{content}</p>
+            {relevancy_score}
+        </div>
+        <div class="card-footer">
+            <span>{name}</span>
+            <a href="{meta['url']}#page={int(meta['page_number'])}" target="_blank" class="pdf-link">
+                <span role="img" aria-label="Open PDF">🔗</span>
+            </a>
+        </div>
+    </div>
+    """
+    return card
 def make_toolbox(tool_name,description = "",checked = False,elem_id = "toggle"):

sandbox/20240310 - CQA - Semantic Routing 1.ipynb CHANGED Viewed

The diff for this file is too large to render. See raw diff

style.css CHANGED Viewed

@@ -206,6 +206,11 @@ label.selected{
         overflow-y: auto !important;
     }
     div#tab-config{
         height:calc(100vh - 190px) !important;
         overflow-y: auto !important;
@@ -475,7 +480,7 @@ span.chatbot > p > img{
     color:orange !important;
   }
-  .score-orange{
     color:red !important;
   }
 .message-buttons-left.panel.message-buttons.with-avatar {

         overflow-y: auto !important;
     }
+    div#sources-figures{
+        height:calc(100vh - 190px) !important;
+        overflow-y: auto !important;
+    }
     div#tab-config{
         height:calc(100vh - 190px) !important;
         overflow-y: auto !important;
     color:orange !important;
   }
+  .score-red{
     color:red !important;
   }
 .message-buttons-left.panel.message-buttons.with-avatar {