Spaces:
Running
on
Zero
Running
on
Zero
Commit
·
beca8ab
1
Parent(s):
c7a30f7
parse olmo output
Browse files
app.py
CHANGED
|
@@ -3,6 +3,7 @@ from PIL import Image
|
|
| 3 |
import xml.etree.ElementTree as ET
|
| 4 |
import os
|
| 5 |
import torch
|
|
|
|
| 6 |
from transformers import AutoProcessor, AutoModelForImageTextToText, pipeline, Qwen2VLForConditionalGeneration
|
| 7 |
import spaces
|
| 8 |
|
|
@@ -259,6 +260,14 @@ def run_hf_ocr(image_path, model_name="RolmOCR"):
|
|
| 259 |
generated_content = ocr_results[0]["generated_text"]
|
| 260 |
|
| 261 |
if isinstance(generated_content, str):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 262 |
return generated_content
|
| 263 |
|
| 264 |
if isinstance(generated_content, list) and generated_content:
|
|
@@ -272,6 +281,14 @@ def run_hf_ocr(image_path, model_name="RolmOCR"):
|
|
| 272 |
),
|
| 273 |
None,
|
| 274 |
):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 275 |
return assistant_message
|
| 276 |
|
| 277 |
# Fallback if the specific assistant message structure isn't found but there's content
|
|
@@ -284,11 +301,27 @@ def run_hf_ocr(image_path, model_name="RolmOCR"):
|
|
| 284 |
and isinstance(generated_content[1], dict)
|
| 285 |
and "content" in generated_content[1]
|
| 286 |
):
|
| 287 |
-
|
| 288 |
-
|
| 289 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 290 |
else:
|
| 291 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 292 |
|
| 293 |
print(f"Unexpected OCR output structure from HF model: {ocr_results}")
|
| 294 |
return "Error: Could not parse OCR model output. Check console."
|
|
|
|
| 3 |
import xml.etree.ElementTree as ET
|
| 4 |
import os
|
| 5 |
import torch
|
| 6 |
+
import json
|
| 7 |
from transformers import AutoProcessor, AutoModelForImageTextToText, pipeline, Qwen2VLForConditionalGeneration
|
| 8 |
import spaces
|
| 9 |
|
|
|
|
| 260 |
generated_content = ocr_results[0]["generated_text"]
|
| 261 |
|
| 262 |
if isinstance(generated_content, str):
|
| 263 |
+
# Check if it's JSON format from olmOCR
|
| 264 |
+
if model_name == "olmOCR":
|
| 265 |
+
try:
|
| 266 |
+
json_data = json.loads(generated_content)
|
| 267 |
+
if "natural_text" in json_data:
|
| 268 |
+
return json_data["natural_text"]
|
| 269 |
+
except (json.JSONDecodeError, KeyError, TypeError):
|
| 270 |
+
pass
|
| 271 |
return generated_content
|
| 272 |
|
| 273 |
if isinstance(generated_content, list) and generated_content:
|
|
|
|
| 281 |
),
|
| 282 |
None,
|
| 283 |
):
|
| 284 |
+
# Check if it's JSON format from olmOCR
|
| 285 |
+
if model_name == "olmOCR":
|
| 286 |
+
try:
|
| 287 |
+
json_data = json.loads(assistant_message)
|
| 288 |
+
if "natural_text" in json_data:
|
| 289 |
+
return json_data["natural_text"]
|
| 290 |
+
except (json.JSONDecodeError, KeyError, TypeError):
|
| 291 |
+
pass
|
| 292 |
return assistant_message
|
| 293 |
|
| 294 |
# Fallback if the specific assistant message structure isn't found but there's content
|
|
|
|
| 301 |
and isinstance(generated_content[1], dict)
|
| 302 |
and "content" in generated_content[1]
|
| 303 |
):
|
| 304 |
+
content = generated_content[1]["content"]
|
| 305 |
+
# Check if it's JSON format from olmOCR
|
| 306 |
+
if model_name == "olmOCR":
|
| 307 |
+
try:
|
| 308 |
+
json_data = json.loads(content)
|
| 309 |
+
if "natural_text" in json_data:
|
| 310 |
+
return json_data["natural_text"]
|
| 311 |
+
except (json.JSONDecodeError, KeyError, TypeError):
|
| 312 |
+
pass
|
| 313 |
+
return content # Assuming second part is assistant
|
| 314 |
else:
|
| 315 |
+
content = generated_content[0]["content"]
|
| 316 |
+
# Check if it's JSON format from olmOCR
|
| 317 |
+
if model_name == "olmOCR":
|
| 318 |
+
try:
|
| 319 |
+
json_data = json.loads(content)
|
| 320 |
+
if "natural_text" in json_data:
|
| 321 |
+
return json_data["natural_text"]
|
| 322 |
+
except (json.JSONDecodeError, KeyError, TypeError):
|
| 323 |
+
pass
|
| 324 |
+
return content
|
| 325 |
|
| 326 |
print(f"Unexpected OCR output structure from HF model: {ocr_results}")
|
| 327 |
return "Error: Could not parse OCR model output. Check console."
|