Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -10,16 +10,15 @@ import torch
|
|
| 10 |
def load_models():
|
| 11 |
RAG = RAGMultiModalModel.from_pretrained("vidore/colpali")
|
| 12 |
model = Qwen2VLForConditionalGeneration.from_pretrained("Qwen/Qwen2-VL-2B-Instruct",
|
| 13 |
-
trust_remote_code=True, torch_dtype=torch.float32) #
|
| 14 |
processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-2B-Instruct", trust_remote_code=True)
|
| 15 |
return RAG, model, processor
|
| 16 |
|
| 17 |
RAG, model, processor = load_models()
|
| 18 |
|
| 19 |
# Function for OCR and search
|
| 20 |
-
# Skip RAG search and use Qwen2VL for direct OCR
|
| 21 |
def ocr_and_search(image, keyword):
|
| 22 |
-
|
| 23 |
text_query = "Extract all the text in Sanskrit and English from the image."
|
| 24 |
|
| 25 |
# Prepare message for Qwen model
|
|
@@ -64,9 +63,9 @@ def ocr_and_search(image, keyword):
|
|
| 64 |
return extracted_text, matched_sentences, json_output
|
| 65 |
|
| 66 |
|
| 67 |
-
# Gradio App
|
| 68 |
def app(image, keyword):
|
| 69 |
-
|
| 70 |
extracted_text, search_results, json_output = ocr_and_search(image, keyword)
|
| 71 |
|
| 72 |
search_results_str = "\n".join(search_results) if search_results else "No matches found."
|
|
@@ -77,7 +76,7 @@ def app(image, keyword):
|
|
| 77 |
iface = gr.Interface(
|
| 78 |
fn=app,
|
| 79 |
inputs=[
|
| 80 |
-
gr.Image(type="pil", label="Upload an Image"),
|
| 81 |
gr.Textbox(label="Enter keyword to search in extracted text", placeholder="Keyword")
|
| 82 |
],
|
| 83 |
outputs=[
|
|
|
|
| 10 |
def load_models():
|
| 11 |
RAG = RAGMultiModalModel.from_pretrained("vidore/colpali")
|
| 12 |
model = Qwen2VLForConditionalGeneration.from_pretrained("Qwen/Qwen2-VL-2B-Instruct",
|
| 13 |
+
trust_remote_code=True, torch_dtype=torch.float32) # float32 for CPU
|
| 14 |
processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-2B-Instruct", trust_remote_code=True)
|
| 15 |
return RAG, model, processor
|
| 16 |
|
| 17 |
RAG, model, processor = load_models()
|
| 18 |
|
| 19 |
# Function for OCR and search
|
|
|
|
| 20 |
def ocr_and_search(image, keyword):
|
| 21 |
+
|
| 22 |
text_query = "Extract all the text in Sanskrit and English from the image."
|
| 23 |
|
| 24 |
# Prepare message for Qwen model
|
|
|
|
| 63 |
return extracted_text, matched_sentences, json_output
|
| 64 |
|
| 65 |
|
| 66 |
+
# Gradio App
|
| 67 |
def app(image, keyword):
|
| 68 |
+
|
| 69 |
extracted_text, search_results, json_output = ocr_and_search(image, keyword)
|
| 70 |
|
| 71 |
search_results_str = "\n".join(search_results) if search_results else "No matches found."
|
|
|
|
| 76 |
iface = gr.Interface(
|
| 77 |
fn=app,
|
| 78 |
inputs=[
|
| 79 |
+
gr.Image(type="pil", label="Upload an Image"),
|
| 80 |
gr.Textbox(label="Enter keyword to search in extracted text", placeholder="Keyword")
|
| 81 |
],
|
| 82 |
outputs=[
|