Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
|
@@ -16,6 +16,7 @@ import cv2
|
|
| 16 |
from transformers import (
|
| 17 |
Qwen2VLForConditionalGeneration,
|
| 18 |
Qwen2_5_VLForConditionalGeneration,
|
|
|
|
| 19 |
AutoModelForVision2Seq,
|
| 20 |
AutoProcessor,
|
| 21 |
TextIteratorStreamer,
|
|
@@ -77,6 +78,16 @@ model_x = AutoModelForVision2Seq.from_pretrained(
|
|
| 77 |
torch_dtype=torch.float16
|
| 78 |
).to(device).eval()
|
| 79 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 80 |
# Preprocessing functions for SmolDocling-256M
|
| 81 |
def add_random_padding(image, min_percent=0.1, max_percent=0.10):
|
| 82 |
"""Add random padding to an image based on its size."""
|
|
@@ -143,6 +154,9 @@ def generate_image(model_name: str, text: str, image: Image.Image,
|
|
| 143 |
elif model_name == "Typhoon-OCR-7B":
|
| 144 |
processor = processor_l
|
| 145 |
model = model_l
|
|
|
|
|
|
|
|
|
|
| 146 |
else:
|
| 147 |
yield "Invalid model selected.", "Invalid model selected."
|
| 148 |
return
|
|
@@ -221,6 +235,9 @@ def generate_video(model_name: str, text: str, video_path: str,
|
|
| 221 |
elif model_name == "Typhoon-OCR-7B":
|
| 222 |
processor = processor_l
|
| 223 |
model = model_l
|
|
|
|
|
|
|
|
|
|
| 224 |
else:
|
| 225 |
yield "Invalid model selected.", "Invalid model selected."
|
| 226 |
return
|
|
@@ -351,7 +368,7 @@ with gr.Blocks(css=css, theme="bethecloud/storj_theme") as demo:
|
|
| 351 |
formatted_output = gr.Markdown(label="(Result.md)")
|
| 352 |
|
| 353 |
model_choice = gr.Radio(
|
| 354 |
-
choices=["Nanonets-OCR-s", "MonkeyOCR-Recognition", "
|
| 355 |
label="Select Model",
|
| 356 |
value="Nanonets-OCR-s"
|
| 357 |
)
|
|
|
|
| 16 |
from transformers import (
|
| 17 |
Qwen2VLForConditionalGeneration,
|
| 18 |
Qwen2_5_VLForConditionalGeneration,
|
| 19 |
+
AutoModelForCausalLM,
|
| 20 |
AutoModelForVision2Seq,
|
| 21 |
AutoProcessor,
|
| 22 |
TextIteratorStreamer,
|
|
|
|
| 78 |
torch_dtype=torch.float16
|
| 79 |
).to(device).eval()
|
| 80 |
|
| 81 |
+
# Load dots.ocr
|
| 82 |
+
MODEL_ID_D = "rednote-hilab/dots.ocr"
|
| 83 |
+
processor_d = AutoProcessor.from_pretrained(MODEL_ID_D, trust_remote_code=True)
|
| 84 |
+
model_d = AutoModelForCausalLM.from_pretrained(
|
| 85 |
+
MODEL_ID_D,
|
| 86 |
+
attn_implementation="flash_attention_2",
|
| 87 |
+
trust_remote_code=True,
|
| 88 |
+
torch_dtype=torch.float16
|
| 89 |
+
).to(device).eval()
|
| 90 |
+
|
| 91 |
# Preprocessing functions for SmolDocling-256M
|
| 92 |
def add_random_padding(image, min_percent=0.1, max_percent=0.10):
|
| 93 |
"""Add random padding to an image based on its size."""
|
|
|
|
| 154 |
elif model_name == "Typhoon-OCR-7B":
|
| 155 |
processor = processor_l
|
| 156 |
model = model_l
|
| 157 |
+
elif model_name == "rednote-dots.ocr":
|
| 158 |
+
processor = processor_d
|
| 159 |
+
model = model_d
|
| 160 |
else:
|
| 161 |
yield "Invalid model selected.", "Invalid model selected."
|
| 162 |
return
|
|
|
|
| 235 |
elif model_name == "Typhoon-OCR-7B":
|
| 236 |
processor = processor_l
|
| 237 |
model = model_l
|
| 238 |
+
elif model_name == "rednote-dots.ocr":
|
| 239 |
+
processor = processor_d
|
| 240 |
+
model = model_d
|
| 241 |
else:
|
| 242 |
yield "Invalid model selected.", "Invalid model selected."
|
| 243 |
return
|
|
|
|
| 368 |
formatted_output = gr.Markdown(label="(Result.md)")
|
| 369 |
|
| 370 |
model_choice = gr.Radio(
|
| 371 |
+
choices=["Nanonets-OCR-s", "MonkeyOCR-Recognition", "rednote-dots.ocr", "Typhoon-OCR-7B", "SmolDocling-256M-preview"],
|
| 372 |
label="Select Model",
|
| 373 |
value="Nanonets-OCR-s"
|
| 374 |
)
|