feature/add_graphs_on_separate_tab (#15)
Browse files- add graphs in a separate pannel (a34da49e065e56c6b3f9784912148039bade982a)
- Merge branch 'feature/add_graphs_in_separate_pannel' into pr/15 (735963f8348ce3e530d8856d7b9f48ea356cd6f8)
- app.py +65 -31
- front/utils.py +50 -2
- sandbox/20240310 - CQA - Semantic Routing 1.ipynb +0 -0
- style.css +6 -1
app.py
CHANGED
|
@@ -32,7 +32,7 @@ from utils import create_user_id
|
|
| 32 |
# ClimateQ&A imports
|
| 33 |
from climateqa.engine.llm import get_llm
|
| 34 |
from climateqa.engine.vectorstore import get_pinecone_vectorstore
|
| 35 |
-
from climateqa.knowledge.retriever import ClimateQARetriever
|
| 36 |
from climateqa.engine.reranker import get_reranker
|
| 37 |
from climateqa.engine.embeddings import get_embeddings_function
|
| 38 |
from climateqa.engine.chains.prompts import audience_prompts
|
|
@@ -43,7 +43,7 @@ from climateqa.engine.keywords import make_keywords_chain
|
|
| 43 |
# from climateqa.engine.chains.answer_rag import make_rag_papers_chain
|
| 44 |
from climateqa.engine.graph import make_graph_agent,display_graph
|
| 45 |
|
| 46 |
-
from front.utils import make_html_source,parse_output_llm_with_sources,serialize_docs,make_toolbox
|
| 47 |
|
| 48 |
# Load environment variables in local mode
|
| 49 |
try:
|
|
@@ -133,6 +133,7 @@ async def chat(query,history,audience,sources,reports):
|
|
| 133 |
output_keywords = ""
|
| 134 |
gallery = []
|
| 135 |
start_streaming = False
|
|
|
|
| 136 |
|
| 137 |
steps_display = {
|
| 138 |
"categorize_intent":("ποΈ Analyzing user message",True),
|
|
@@ -151,10 +152,12 @@ async def chat(query,history,audience,sources,reports):
|
|
| 151 |
try:
|
| 152 |
docs = event["data"]["output"]["documents"]
|
| 153 |
docs_html = []
|
| 154 |
-
|
| 155 |
-
|
|
|
|
|
|
|
| 156 |
|
| 157 |
-
used_documents = used_documents + [d.metadata[
|
| 158 |
history[-1].content = "Adding sources :\n\n - " + "\n - ".join(np.unique(used_documents))
|
| 159 |
|
| 160 |
docs_html = "".join(docs_html)
|
|
@@ -184,7 +187,7 @@ async def chat(query,history,audience,sources,reports):
|
|
| 184 |
if event["name"] == "categorize_intent" and event["event"] == "on_chain_start":
|
| 185 |
print("X")
|
| 186 |
|
| 187 |
-
yield history,docs_html,output_query,output_language,gallery #,output_query,output_keywords
|
| 188 |
|
| 189 |
except Exception as e:
|
| 190 |
print(event, "has failed")
|
|
@@ -212,12 +215,49 @@ async def chat(query,history,audience,sources,reports):
|
|
| 212 |
print(f"Error logging on Azure Blob Storage: {e}")
|
| 213 |
raise gr.Error(f"ClimateQ&A Error: {str(e)[:100]} - The error has been noted, try another question and if the error remains, you can contact us :)")
|
| 214 |
|
| 215 |
-
|
| 216 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 217 |
|
|
|
|
|
|
|
| 218 |
if doc.metadata["chunk_type"] == "image":
|
| 219 |
try:
|
| 220 |
key = f"Image {i+1}"
|
|
|
|
| 221 |
image_path = doc.metadata["image_path"].split("documents/")[1]
|
| 222 |
img = get_image_from_azure_blob_storage(image_path)
|
| 223 |
|
|
@@ -225,31 +265,18 @@ async def chat(query,history,audience,sources,reports):
|
|
| 225 |
buffered = BytesIO()
|
| 226 |
img.save(buffered, format="PNG")
|
| 227 |
img_str = base64.b64encode(buffered.getvalue()).decode()
|
|
|
|
|
|
|
|
|
|
|
|
|
| 228 |
|
| 229 |
-
# Embedding the base64 string in Markdown
|
| 230 |
-
markdown_image = f""
|
| 231 |
-
image_dict[key] = {"img":img,"md":markdown_image,"caption":doc.page_content,"key":key,"figure_code":doc.metadata["figure_code"]}
|
| 232 |
except Exception as e:
|
| 233 |
print(f"Skipped adding image {i} because of {e}")
|
|
|
|
|
|
|
|
|
|
| 234 |
|
| 235 |
-
|
| 236 |
-
|
| 237 |
-
gallery = [x["img"] for x in list(image_dict.values())]
|
| 238 |
-
img = list(image_dict.values())[0]
|
| 239 |
-
img_md = img["md"]
|
| 240 |
-
img_caption = img["caption"]
|
| 241 |
-
img_code = img["figure_code"]
|
| 242 |
-
if img_code != "N/A":
|
| 243 |
-
img_name = f"{img['key']} - {img['figure_code']}"
|
| 244 |
-
else:
|
| 245 |
-
img_name = f"{img['key']}"
|
| 246 |
-
|
| 247 |
-
history.append(ChatMessage(role="assistant", content = f"\n\n{img_md}\n<p class='chatbot-caption'><b>{img_name}</b> - {img_caption}</p>"))
|
| 248 |
-
# answer_yet = history[-1][1] + f"\n\n{img_md}\n<p class='chatbot-caption'><b>{img_name}</b> - {img_caption}</p>"
|
| 249 |
-
# history[-1] = (history[-1][0],answer_yet)
|
| 250 |
-
# history = [tuple(x) for x in history]
|
| 251 |
-
|
| 252 |
-
yield history,docs_html,output_query,output_language,gallery#,output_query,output_keywords
|
| 253 |
|
| 254 |
|
| 255 |
def save_feedback(feed: str, user_id):
|
|
@@ -382,6 +409,9 @@ with gr.Blocks(title="Climate Q&A", css_paths=os.getcwd()+ "/style.css", theme=t
|
|
| 382 |
with gr.Tab("Sources",elem_id = "tab-citations",id = 1):
|
| 383 |
sources_textbox = gr.HTML(show_label=False, elem_id="sources-textbox")
|
| 384 |
docs_textbox = gr.State("")
|
|
|
|
|
|
|
|
|
|
| 385 |
|
| 386 |
# with Modal(visible = False) as config_modal:
|
| 387 |
with gr.Tab("Configuration",elem_id = "tab-config",id = 2):
|
|
@@ -415,6 +445,10 @@ with gr.Blocks(title="Climate Q&A", css_paths=os.getcwd()+ "/style.css", theme=t
|
|
| 415 |
output_language = gr.Textbox(label="Language",show_label = True,elem_id = "language",lines = 1,interactive = False)
|
| 416 |
|
| 417 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 418 |
|
| 419 |
#---------------------------------------------------------------------------------------
|
| 420 |
# OTHER TABS
|
|
@@ -463,13 +497,13 @@ with gr.Blocks(title="Climate Q&A", css_paths=os.getcwd()+ "/style.css", theme=t
|
|
| 463 |
|
| 464 |
(textbox
|
| 465 |
.submit(start_chat, [textbox,chatbot], [textbox,tabs,chatbot],queue = False,api_name = "start_chat_textbox")
|
| 466 |
-
.then(chat, [textbox,chatbot,dropdown_audience, dropdown_sources,dropdown_reports], [chatbot,sources_textbox,output_query,output_language,gallery_component],concurrency_limit = 8,api_name = "chat_textbox")
|
| 467 |
.then(finish_chat, None, [textbox],api_name = "finish_chat_textbox")
|
| 468 |
)
|
| 469 |
|
| 470 |
(examples_hidden
|
| 471 |
.change(start_chat, [examples_hidden,chatbot], [textbox,tabs,chatbot],queue = False,api_name = "start_chat_examples")
|
| 472 |
-
.then(chat, [examples_hidden,chatbot,dropdown_audience, dropdown_sources,dropdown_reports], [chatbot,sources_textbox,output_query,output_language,gallery_component],concurrency_limit = 8,api_name = "chat_examples")
|
| 473 |
.then(finish_chat, None, [textbox],api_name = "finish_chat_examples")
|
| 474 |
)
|
| 475 |
|
|
|
|
| 32 |
# ClimateQ&A imports
|
| 33 |
from climateqa.engine.llm import get_llm
|
| 34 |
from climateqa.engine.vectorstore import get_pinecone_vectorstore
|
| 35 |
+
# from climateqa.knowledge.retriever import ClimateQARetriever
|
| 36 |
from climateqa.engine.reranker import get_reranker
|
| 37 |
from climateqa.engine.embeddings import get_embeddings_function
|
| 38 |
from climateqa.engine.chains.prompts import audience_prompts
|
|
|
|
| 43 |
# from climateqa.engine.chains.answer_rag import make_rag_papers_chain
|
| 44 |
from climateqa.engine.graph import make_graph_agent,display_graph
|
| 45 |
|
| 46 |
+
from front.utils import make_html_source, make_html_figure_sources,parse_output_llm_with_sources,serialize_docs,make_toolbox
|
| 47 |
|
| 48 |
# Load environment variables in local mode
|
| 49 |
try:
|
|
|
|
| 133 |
output_keywords = ""
|
| 134 |
gallery = []
|
| 135 |
start_streaming = False
|
| 136 |
+
figures = '<div class="figures-container"> <p> Go to the "Figures" tab at the top of the page to see full size images </p> </div>'
|
| 137 |
|
| 138 |
steps_display = {
|
| 139 |
"categorize_intent":("ποΈ Analyzing user message",True),
|
|
|
|
| 152 |
try:
|
| 153 |
docs = event["data"]["output"]["documents"]
|
| 154 |
docs_html = []
|
| 155 |
+
textual_docs = [d for d in docs if d.metadata["chunk_type"] == "text"]
|
| 156 |
+
for i, d in enumerate(textual_docs, 1):
|
| 157 |
+
if d.metadata["chunk_type"] == "text":
|
| 158 |
+
docs_html.append(make_html_source(d, i))
|
| 159 |
|
| 160 |
+
used_documents = used_documents + [f"{d.metadata['short_name']} - {d.metadata['name']}" for d in docs]
|
| 161 |
history[-1].content = "Adding sources :\n\n - " + "\n - ".join(np.unique(used_documents))
|
| 162 |
|
| 163 |
docs_html = "".join(docs_html)
|
|
|
|
| 187 |
if event["name"] == "categorize_intent" and event["event"] == "on_chain_start":
|
| 188 |
print("X")
|
| 189 |
|
| 190 |
+
yield history,docs_html,output_query,output_language,gallery, figures #,output_query,output_keywords
|
| 191 |
|
| 192 |
except Exception as e:
|
| 193 |
print(event, "has failed")
|
|
|
|
| 215 |
print(f"Error logging on Azure Blob Storage: {e}")
|
| 216 |
raise gr.Error(f"ClimateQ&A Error: {str(e)[:100]} - The error has been noted, try another question and if the error remains, you can contact us :)")
|
| 217 |
|
| 218 |
+
|
| 219 |
+
|
| 220 |
+
|
| 221 |
+
# image_dict = {}
|
| 222 |
+
# for i,doc in enumerate(docs):
|
| 223 |
+
|
| 224 |
+
# if doc.metadata["chunk_type"] == "image":
|
| 225 |
+
# try:
|
| 226 |
+
# key = f"Image {i+1}"
|
| 227 |
+
# image_path = doc.metadata["image_path"].split("documents/")[1]
|
| 228 |
+
# img = get_image_from_azure_blob_storage(image_path)
|
| 229 |
+
|
| 230 |
+
# # Convert the image to a byte buffer
|
| 231 |
+
# buffered = BytesIO()
|
| 232 |
+
# img.save(buffered, format="PNG")
|
| 233 |
+
# img_str = base64.b64encode(buffered.getvalue()).decode()
|
| 234 |
+
|
| 235 |
+
# # Embedding the base64 string in Markdown
|
| 236 |
+
# markdown_image = f""
|
| 237 |
+
# image_dict[key] = {"img":img,"md":markdown_image,"short_name": doc.metadata["short_name"],"figure_code":doc.metadata["figure_code"],"caption":doc.page_content,"key":key,"figure_code":doc.metadata["figure_code"], "img_str" : img_str}
|
| 238 |
+
# except Exception as e:
|
| 239 |
+
# print(f"Skipped adding image {i} because of {e}")
|
| 240 |
+
|
| 241 |
+
# if len(image_dict) > 0:
|
| 242 |
+
|
| 243 |
+
# gallery = [x["img"] for x in list(image_dict.values())]
|
| 244 |
+
# img = list(image_dict.values())[0]
|
| 245 |
+
# img_md = img["md"]
|
| 246 |
+
# img_caption = img["caption"]
|
| 247 |
+
# img_code = img["figure_code"]
|
| 248 |
+
# if img_code != "N/A":
|
| 249 |
+
# img_name = f"{img['key']} - {img['figure_code']}"
|
| 250 |
+
# else:
|
| 251 |
+
# img_name = f"{img['key']}"
|
| 252 |
+
|
| 253 |
+
# history.append(ChatMessage(role="assistant", content = f"\n\n{img_md}\n<p class='chatbot-caption'><b>{img_name}</b> - {img_caption}</p>"))
|
| 254 |
|
| 255 |
+
docs_figures = [d for d in docs if d.metadata["chunk_type"] == "image"]
|
| 256 |
+
for i, doc in enumerate(docs_figures):
|
| 257 |
if doc.metadata["chunk_type"] == "image":
|
| 258 |
try:
|
| 259 |
key = f"Image {i+1}"
|
| 260 |
+
|
| 261 |
image_path = doc.metadata["image_path"].split("documents/")[1]
|
| 262 |
img = get_image_from_azure_blob_storage(image_path)
|
| 263 |
|
|
|
|
| 265 |
buffered = BytesIO()
|
| 266 |
img.save(buffered, format="PNG")
|
| 267 |
img_str = base64.b64encode(buffered.getvalue()).decode()
|
| 268 |
+
|
| 269 |
+
figures = figures + make_html_figure_sources(doc, i, img_str)
|
| 270 |
+
|
| 271 |
+
gallery.append(img)
|
| 272 |
|
|
|
|
|
|
|
|
|
|
| 273 |
except Exception as e:
|
| 274 |
print(f"Skipped adding image {i} because of {e}")
|
| 275 |
+
|
| 276 |
+
|
| 277 |
+
|
| 278 |
|
| 279 |
+
yield history,docs_html,output_query,output_language,gallery, figures#,output_query,output_keywords
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 280 |
|
| 281 |
|
| 282 |
def save_feedback(feed: str, user_id):
|
|
|
|
| 409 |
with gr.Tab("Sources",elem_id = "tab-citations",id = 1):
|
| 410 |
sources_textbox = gr.HTML(show_label=False, elem_id="sources-textbox")
|
| 411 |
docs_textbox = gr.State("")
|
| 412 |
+
|
| 413 |
+
|
| 414 |
+
|
| 415 |
|
| 416 |
# with Modal(visible = False) as config_modal:
|
| 417 |
with gr.Tab("Configuration",elem_id = "tab-config",id = 2):
|
|
|
|
| 445 |
output_language = gr.Textbox(label="Language",show_label = True,elem_id = "language",lines = 1,interactive = False)
|
| 446 |
|
| 447 |
|
| 448 |
+
with gr.Tab("Figures",elem_id = "tab-figures",id = 3):
|
| 449 |
+
figures_cards = gr.HTML(show_label=False, elem_id="sources-figures")
|
| 450 |
+
|
| 451 |
+
|
| 452 |
|
| 453 |
#---------------------------------------------------------------------------------------
|
| 454 |
# OTHER TABS
|
|
|
|
| 497 |
|
| 498 |
(textbox
|
| 499 |
.submit(start_chat, [textbox,chatbot], [textbox,tabs,chatbot],queue = False,api_name = "start_chat_textbox")
|
| 500 |
+
.then(chat, [textbox,chatbot,dropdown_audience, dropdown_sources,dropdown_reports], [chatbot,sources_textbox,output_query,output_language,gallery_component,figures_cards],concurrency_limit = 8,api_name = "chat_textbox")
|
| 501 |
.then(finish_chat, None, [textbox],api_name = "finish_chat_textbox")
|
| 502 |
)
|
| 503 |
|
| 504 |
(examples_hidden
|
| 505 |
.change(start_chat, [examples_hidden,chatbot], [textbox,tabs,chatbot],queue = False,api_name = "start_chat_examples")
|
| 506 |
+
.then(chat, [examples_hidden,chatbot,dropdown_audience, dropdown_sources,dropdown_reports], [chatbot,sources_textbox,output_query,output_language,gallery_component, figures_cards],concurrency_limit = 8,api_name = "chat_examples")
|
| 507 |
.then(finish_chat, None, [textbox],api_name = "finish_chat_examples")
|
| 508 |
)
|
| 509 |
|
front/utils.py
CHANGED
|
@@ -55,7 +55,7 @@ def make_html_source(source,i):
|
|
| 55 |
score = meta['reranking_score']
|
| 56 |
if score > 0.8:
|
| 57 |
color = "score-green"
|
| 58 |
-
elif score > 0.
|
| 59 |
color = "score-orange"
|
| 60 |
else:
|
| 61 |
color = "score-red"
|
|
@@ -91,8 +91,9 @@ def make_html_source(source,i):
|
|
| 91 |
<div class="card card-image">
|
| 92 |
<div class="card-content">
|
| 93 |
<h2>Image {i} - {title} - Page {int(meta['page_number'])}</h2>
|
| 94 |
-
<p>{content}</p>
|
| 95 |
<p class='ai-generated'>AI-generated description</p>
|
|
|
|
|
|
|
| 96 |
{relevancy_score}
|
| 97 |
</div>
|
| 98 |
<div class="card-footer">
|
|
@@ -107,6 +108,53 @@ def make_html_source(source,i):
|
|
| 107 |
return card
|
| 108 |
|
| 109 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 110 |
|
| 111 |
def make_toolbox(tool_name,description = "",checked = False,elem_id = "toggle"):
|
| 112 |
|
|
|
|
| 55 |
score = meta['reranking_score']
|
| 56 |
if score > 0.8:
|
| 57 |
color = "score-green"
|
| 58 |
+
elif score > 0.5:
|
| 59 |
color = "score-orange"
|
| 60 |
else:
|
| 61 |
color = "score-red"
|
|
|
|
| 91 |
<div class="card card-image">
|
| 92 |
<div class="card-content">
|
| 93 |
<h2>Image {i} - {title} - Page {int(meta['page_number'])}</h2>
|
|
|
|
| 94 |
<p class='ai-generated'>AI-generated description</p>
|
| 95 |
+
<p>{content}</p>
|
| 96 |
+
|
| 97 |
{relevancy_score}
|
| 98 |
</div>
|
| 99 |
<div class="card-footer">
|
|
|
|
| 108 |
return card
|
| 109 |
|
| 110 |
|
| 111 |
+
def make_html_figure_sources(source,i,img_str):
|
| 112 |
+
meta = source.metadata
|
| 113 |
+
content = source.page_content.strip()
|
| 114 |
+
|
| 115 |
+
score = meta['reranking_score']
|
| 116 |
+
if score > 0.8:
|
| 117 |
+
color = "score-green"
|
| 118 |
+
elif score > 0.5:
|
| 119 |
+
color = "score-orange"
|
| 120 |
+
else:
|
| 121 |
+
color = "score-red"
|
| 122 |
+
|
| 123 |
+
toc_levels = []
|
| 124 |
+
if len(toc_levels) > 0:
|
| 125 |
+
name = f"<b>{toc_levels}</b><br/>{meta['name']}"
|
| 126 |
+
else:
|
| 127 |
+
name = meta['name']
|
| 128 |
+
|
| 129 |
+
relevancy_score = f"<p class=relevancy-score>Relevancy score: <span class='{color}'>{score:.1%}</span></p>"
|
| 130 |
+
|
| 131 |
+
if meta["figure_code"] != "N/A":
|
| 132 |
+
title = f"{meta['figure_code']} - {meta['short_name']}"
|
| 133 |
+
else:
|
| 134 |
+
title = f"{meta['short_name']}"
|
| 135 |
+
|
| 136 |
+
card = f"""
|
| 137 |
+
<div class="card card-image">
|
| 138 |
+
<div class="card-content">
|
| 139 |
+
<h2>Image {i} - {title} - Page {int(meta['page_number'])}</h2>
|
| 140 |
+
<p class='ai-generated'>AI-generated description</p>
|
| 141 |
+
<img src="data:image/png;base64, { img_str } alt="Alt text" />
|
| 142 |
+
|
| 143 |
+
<p>{content}</p>
|
| 144 |
+
|
| 145 |
+
{relevancy_score}
|
| 146 |
+
</div>
|
| 147 |
+
<div class="card-footer">
|
| 148 |
+
<span>{name}</span>
|
| 149 |
+
<a href="{meta['url']}#page={int(meta['page_number'])}" target="_blank" class="pdf-link">
|
| 150 |
+
<span role="img" aria-label="Open PDF">π</span>
|
| 151 |
+
</a>
|
| 152 |
+
</div>
|
| 153 |
+
</div>
|
| 154 |
+
"""
|
| 155 |
+
return card
|
| 156 |
+
|
| 157 |
+
|
| 158 |
|
| 159 |
def make_toolbox(tool_name,description = "",checked = False,elem_id = "toggle"):
|
| 160 |
|
sandbox/20240310 - CQA - Semantic Routing 1.ipynb
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
style.css
CHANGED
|
@@ -206,6 +206,11 @@ label.selected{
|
|
| 206 |
overflow-y: auto !important;
|
| 207 |
}
|
| 208 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 209 |
div#tab-config{
|
| 210 |
height:calc(100vh - 190px) !important;
|
| 211 |
overflow-y: auto !important;
|
|
@@ -475,7 +480,7 @@ span.chatbot > p > img{
|
|
| 475 |
color:orange !important;
|
| 476 |
}
|
| 477 |
|
| 478 |
-
.score-
|
| 479 |
color:red !important;
|
| 480 |
}
|
| 481 |
.message-buttons-left.panel.message-buttons.with-avatar {
|
|
|
|
| 206 |
overflow-y: auto !important;
|
| 207 |
}
|
| 208 |
|
| 209 |
+
div#sources-figures{
|
| 210 |
+
height:calc(100vh - 190px) !important;
|
| 211 |
+
overflow-y: auto !important;
|
| 212 |
+
}
|
| 213 |
+
|
| 214 |
div#tab-config{
|
| 215 |
height:calc(100vh - 190px) !important;
|
| 216 |
overflow-y: auto !important;
|
|
|
|
| 480 |
color:orange !important;
|
| 481 |
}
|
| 482 |
|
| 483 |
+
.score-red{
|
| 484 |
color:red !important;
|
| 485 |
}
|
| 486 |
.message-buttons-left.panel.message-buttons.with-avatar {
|