Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -53,7 +53,6 @@ file_extractor = {
|
|
| 53 |
# Markdown content definitions
|
| 54 |
description = """
|
| 55 |
## Welcome to DocBot ππ€
|
| 56 |
-
|
| 57 |
DocBot is an intelligent document analysis tool that can help you extract insights from various document formats including:
|
| 58 |
- PDF documents
|
| 59 |
- Word documents (.docx, .doc)
|
|
@@ -63,19 +62,16 @@ DocBot is an intelligent document analysis tool that can help you extract insigh
|
|
| 63 |
- PowerPoint presentations
|
| 64 |
- HTML files
|
| 65 |
- Images with text (JPG, PNG, WebP, SVG)
|
| 66 |
-
|
| 67 |
Simply upload your document, select your preferred embedding model and LLM, then start asking questions!
|
| 68 |
"""
|
| 69 |
|
| 70 |
guide = """
|
| 71 |
### How to Use DocBot:
|
| 72 |
-
|
| 73 |
1. **Upload Document**: Choose any supported file format
|
| 74 |
2. **Select Embedding Model**: Choose from available embedding models (BAAI/bge-small-en-v1.5 is recommended for most cases)
|
| 75 |
3. **Submit**: Click submit to process your document
|
| 76 |
4. **Select LLM**: Choose your preferred language model
|
| 77 |
5. **Ask Questions**: Start chatting with your document!
|
| 78 |
-
|
| 79 |
### Tips:
|
| 80 |
- Smaller embedding models (like bge-small-en-v1.5) are faster but may be less accurate
|
| 81 |
- Larger models provide better understanding but take more time
|
|
@@ -165,6 +161,10 @@ def encode_image_safe(image_path):
|
|
| 165 |
pass
|
| 166 |
return ""
|
| 167 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 168 |
# Encode the images (with fallback for missing images)
|
| 169 |
github_logo_encoded = encode_image_safe("Images/github-logo.png")
|
| 170 |
linkedin_logo_encoded = encode_image_safe("Images/linkedin-logo.png")
|
|
@@ -208,7 +208,7 @@ with gr.Blocks(
|
|
| 208 |
|
| 209 |
with gr.Row():
|
| 210 |
btn = gr.Button("π Process Document", variant='primary', size="lg")
|
| 211 |
-
|
| 212 |
|
| 213 |
output = gr.Textbox(
|
| 214 |
label='Processing Status',
|
|
@@ -246,7 +246,7 @@ with gr.Blocks(
|
|
| 246 |
|
| 247 |
with gr.Row():
|
| 248 |
submit_btn = gr.Button("Send", variant="primary")
|
| 249 |
-
|
| 250 |
|
| 251 |
# Add footer if images exist
|
| 252 |
if any([github_logo_encoded, linkedin_logo_encoded, website_logo_encoded]):
|
|
@@ -261,6 +261,9 @@ with gr.Blocks(
|
|
| 261 |
history.append([message, response])
|
| 262 |
return history, ""
|
| 263 |
|
|
|
|
|
|
|
|
|
|
| 264 |
# Event bindings
|
| 265 |
llm_model_dropdown.change(
|
| 266 |
fn=set_llm_model,
|
|
@@ -286,10 +289,15 @@ with gr.Blocks(
|
|
| 286 |
outputs=[chatbot, msg]
|
| 287 |
)
|
| 288 |
|
| 289 |
-
|
| 290 |
-
|
| 291 |
outputs=[file_input, embed_model_dropdown, output]
|
| 292 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 293 |
|
| 294 |
# Launch the demo
|
| 295 |
if __name__ == "__main__":
|
|
|
|
| 53 |
# Markdown content definitions
|
| 54 |
description = """
|
| 55 |
## Welcome to DocBot ππ€
|
|
|
|
| 56 |
DocBot is an intelligent document analysis tool that can help you extract insights from various document formats including:
|
| 57 |
- PDF documents
|
| 58 |
- Word documents (.docx, .doc)
|
|
|
|
| 62 |
- PowerPoint presentations
|
| 63 |
- HTML files
|
| 64 |
- Images with text (JPG, PNG, WebP, SVG)
|
|
|
|
| 65 |
Simply upload your document, select your preferred embedding model and LLM, then start asking questions!
|
| 66 |
"""
|
| 67 |
|
| 68 |
guide = """
|
| 69 |
### How to Use DocBot:
|
|
|
|
| 70 |
1. **Upload Document**: Choose any supported file format
|
| 71 |
2. **Select Embedding Model**: Choose from available embedding models (BAAI/bge-small-en-v1.5 is recommended for most cases)
|
| 72 |
3. **Submit**: Click submit to process your document
|
| 73 |
4. **Select LLM**: Choose your preferred language model
|
| 74 |
5. **Ask Questions**: Start chatting with your document!
|
|
|
|
| 75 |
### Tips:
|
| 76 |
- Smaller embedding models (like bge-small-en-v1.5) are faster but may be less accurate
|
| 77 |
- Larger models provide better understanding but take more time
|
|
|
|
| 161 |
pass
|
| 162 |
return ""
|
| 163 |
|
| 164 |
+
# Clear function for file processing components
|
| 165 |
+
def clear_file_components():
|
| 166 |
+
return None, embed_models[0], ""
|
| 167 |
+
|
| 168 |
# Encode the images (with fallback for missing images)
|
| 169 |
github_logo_encoded = encode_image_safe("Images/github-logo.png")
|
| 170 |
linkedin_logo_encoded = encode_image_safe("Images/linkedin-logo.png")
|
|
|
|
| 208 |
|
| 209 |
with gr.Row():
|
| 210 |
btn = gr.Button("π Process Document", variant='primary', size="lg")
|
| 211 |
+
clear_btn = gr.Button("ποΈ Clear", size="lg")
|
| 212 |
|
| 213 |
output = gr.Textbox(
|
| 214 |
label='Processing Status',
|
|
|
|
| 246 |
|
| 247 |
with gr.Row():
|
| 248 |
submit_btn = gr.Button("Send", variant="primary")
|
| 249 |
+
clear_chat_btn = gr.Button("Clear Chat")
|
| 250 |
|
| 251 |
# Add footer if images exist
|
| 252 |
if any([github_logo_encoded, linkedin_logo_encoded, website_logo_encoded]):
|
|
|
|
| 261 |
history.append([message, response])
|
| 262 |
return history, ""
|
| 263 |
|
| 264 |
+
def clear_chat():
|
| 265 |
+
return [], ""
|
| 266 |
+
|
| 267 |
# Event bindings
|
| 268 |
llm_model_dropdown.change(
|
| 269 |
fn=set_llm_model,
|
|
|
|
| 289 |
outputs=[chatbot, msg]
|
| 290 |
)
|
| 291 |
|
| 292 |
+
clear_btn.click(
|
| 293 |
+
fn=clear_file_components,
|
| 294 |
outputs=[file_input, embed_model_dropdown, output]
|
| 295 |
)
|
| 296 |
+
|
| 297 |
+
clear_chat_btn.click(
|
| 298 |
+
fn=clear_chat,
|
| 299 |
+
outputs=[chatbot, msg]
|
| 300 |
+
)
|
| 301 |
|
| 302 |
# Launch the demo
|
| 303 |
if __name__ == "__main__":
|