Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -8,6 +8,7 @@ import torch
|
|
| 8 |
from PIL import Image
|
| 9 |
import uuid
|
| 10 |
import io
|
|
|
|
| 11 |
|
| 12 |
# Fine-tuned for OCR-based tasks from Qwen's [ Qwen/Qwen2-VL-2B-Instruct ]
|
| 13 |
MODEL_ID = "prithivMLmods/Qwen2-VL-OCR-2B-Instruct"
|
|
@@ -93,10 +94,19 @@ def model_inference(input_dict, history):
|
|
| 93 |
|
| 94 |
# Apply chat template and process inputs
|
| 95 |
prompt = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 96 |
inputs = processor(
|
| 97 |
text=[prompt],
|
| 98 |
-
images=
|
| 99 |
-
videos=
|
| 100 |
return_tensors="pt",
|
| 101 |
padding=True,
|
| 102 |
).to("cuda")
|
|
@@ -121,7 +131,6 @@ def model_inference(input_dict, history):
|
|
| 121 |
|
| 122 |
# Example inputs
|
| 123 |
examples = [
|
| 124 |
-
[{"text": "Describe the video.", "files": ["examples/demo.mp4"]}],
|
| 125 |
[{"text": "Extract JSON from the image", "files": ["example_images/document.jpg"]}],
|
| 126 |
[{"text": "summarize the letter", "files": ["examples/1.png"]}],
|
| 127 |
[{"text": "Describe the photo", "files": ["examples/3.png"]}],
|
|
@@ -132,6 +141,7 @@ examples = [
|
|
| 132 |
[{"text": "Can you describe this image?", "files": ["example_images/newyork.jpg"]}],
|
| 133 |
[{"text": "Can you describe this image?", "files": ["example_images/dogs.jpg"]}],
|
| 134 |
[{"text": "Where do the severe droughts happen according to this diagram?", "files": ["example_images/examples_weather_events.png"]}],
|
|
|
|
| 135 |
]
|
| 136 |
|
| 137 |
demo = gr.ChatInterface(
|
|
|
|
| 8 |
from PIL import Image
|
| 9 |
import uuid
|
| 10 |
import io
|
| 11 |
+
import os
|
| 12 |
|
| 13 |
# Fine-tuned for OCR-based tasks from Qwen's [ Qwen/Qwen2-VL-2B-Instruct ]
|
| 14 |
MODEL_ID = "prithivMLmods/Qwen2-VL-OCR-2B-Instruct"
|
|
|
|
| 94 |
|
| 95 |
# Apply chat template and process inputs
|
| 96 |
prompt = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
|
| 97 |
+
|
| 98 |
+
# Prepare inputs for the processor
|
| 99 |
+
image_inputs = [load_image(path) for path, media_type in zip(media_paths, media_types) if media_type == "image"]
|
| 100 |
+
video_inputs = [path for path, media_type in zip(media_paths, media_types) if media_type == "video"]
|
| 101 |
+
|
| 102 |
+
# Ensure video_inputs is not empty
|
| 103 |
+
if not video_inputs:
|
| 104 |
+
video_inputs = None
|
| 105 |
+
|
| 106 |
inputs = processor(
|
| 107 |
text=[prompt],
|
| 108 |
+
images=image_inputs if image_inputs else None,
|
| 109 |
+
videos=video_inputs if video_inputs else None,
|
| 110 |
return_tensors="pt",
|
| 111 |
padding=True,
|
| 112 |
).to("cuda")
|
|
|
|
| 131 |
|
| 132 |
# Example inputs
|
| 133 |
examples = [
|
|
|
|
| 134 |
[{"text": "Extract JSON from the image", "files": ["example_images/document.jpg"]}],
|
| 135 |
[{"text": "summarize the letter", "files": ["examples/1.png"]}],
|
| 136 |
[{"text": "Describe the photo", "files": ["examples/3.png"]}],
|
|
|
|
| 141 |
[{"text": "Can you describe this image?", "files": ["example_images/newyork.jpg"]}],
|
| 142 |
[{"text": "Can you describe this image?", "files": ["example_images/dogs.jpg"]}],
|
| 143 |
[{"text": "Where do the severe droughts happen according to this diagram?", "files": ["example_images/examples_weather_events.png"]}],
|
| 144 |
+
[{"text": "Describe the video.", "files": ["example_videos/sample.mp4"]}],
|
| 145 |
]
|
| 146 |
|
| 147 |
demo = gr.ChatInterface(
|