Spaces:

akhaliq
/

DeepSeek-OCR

Running on Zero

App Files Files Community

akhaliq HF Staff commited on 7 days ago

Commit

b245a85

verified ·

1 Parent(s): d44e05d

Update Gradio app with multiple files

Browse files

Files changed (1) hide show

app.py +65 -67

app.py CHANGED Viewed

@@ -5,8 +5,9 @@ from PIL import Image
 import os
 import spaces
 import tempfile
-import json
-from pathlib import Path
 # Set CUDA device
 os.environ["CUDA_VISIBLE_DEVICES"] = '0'
@@ -23,6 +24,17 @@ model = AutoModel.from_pretrained(
 model = model.eval()
 @spaces.GPU(duration=120)
 def ocr_process(
     image_input: Image.Image,
@@ -79,81 +91,67 @@ def ocr_process(
         else:
             prompt = "<image>\nFree OCR. "
-        # Run inference with save_results=True to save output
-        result = model.infer(
-            tokenizer,
-            prompt=prompt,
-            image_file=temp_image_path,
-            output_path=temp_dir,
-            base_size=config["base_size"],
-            image_size=config["image_size"],
-            crop_mode=config["crop_mode"],
-            save_results=True,
-            test_compress=True,
-        )
-        # Try to read the saved results
         extracted_text = ""
-        # Check for saved JSON results
-        json_path = Path(temp_dir) / "input_image_outputs.json"
-        if json_path.exists():
-            try:
-                with open(json_path, 'r', encoding='utf-8') as f:
-                    data = json.load(f)
-                    # Extract text from the JSON structure
-                    if isinstance(data, dict):
-                        if 'text' in data:
-                            extracted_text = data['text']
-                        elif 'output' in data:
-                            extracted_text = data['output']
-                        elif 'result' in data:
-                            extracted_text = data['result']
-                        else:
-                            # If the structure is different, try to get the first string value
-                            for key, value in data.items():
-                                if isinstance(value, str) and len(value) > 10:
-                                    extracted_text = value
-                                    break
-                    elif isinstance(data, list) and len(data) > 0:
-                        extracted_text = str(data[0])
-                    else:
-                        extracted_text = str(data)
-            except Exception as e:
-                print(f"Error reading JSON: {e}")
-        # If no JSON, check for text file
-        if not extracted_text:
-            txt_path = Path(temp_dir) / "input_image_outputs.txt"
-            if txt_path.exists():
-                try:
-                    with open(txt_path, 'r', encoding='utf-8') as f:
-                        extracted_text = f.read()
-                except Exception as e:
-                    print(f"Error reading text file: {e}")
-        # If still no text, check for any output files
-        if not extracted_text:
-            output_files = list(Path(temp_dir).glob("*output*"))
-            for file_path in output_files:
-                if file_path.suffix in ['.txt', '.json', '.md']:
-                    try:
-                        with open(file_path, 'r', encoding='utf-8') as f:
-                            content = f.read()
-                            if content.strip():
-                                extracted_text = content
-                                break
-                    except Exception as e:
-                        print(f"Error reading {file_path}: {e}")
-        # If we still don't have text but result is not None, use result directly
         if not extracted_text and result is not None:
             if isinstance(result, str):
                 extracted_text = result
             elif isinstance(result, (list, tuple)) and len(result) > 0:
-                extracted_text = str(result[0])
-            else:
-                extracted_text = str(result)
     # Move model back to CPU to free GPU memory
     model.to("cpu")

 import os
 import spaces
 import tempfile
+import sys
+from io import StringIO
+from contextlib import contextmanager
 # Set CUDA device
 os.environ["CUDA_VISIBLE_DEVICES"] = '0'
 model = model.eval()
+@contextmanager
+def capture_stdout():
+    """Capture stdout to get printed output from model"""
+    old_stdout = sys.stdout
+    sys.stdout = StringIO()
+    try:
+        yield sys.stdout
+    finally:
+        sys.stdout = old_stdout
 @spaces.GPU(duration=120)
 def ocr_process(
     image_input: Image.Image,
         else:
             prompt = "<image>\nFree OCR. "
+        # Capture stdout while running inference
+        captured_output = ""
+        with capture_stdout() as output:
+            result = model.infer(
+                tokenizer,
+                prompt=prompt,
+                image_file=temp_image_path,
+                output_path=temp_dir,
+                base_size=config["base_size"],
+                image_size=config["image_size"],
+                crop_mode=config["crop_mode"],
+                save_results=True,
+                test_compress=True,
+            )
+            captured_output = output.getvalue()
+        # Extract the text from captured output
         extracted_text = ""
+        # Look for the actual OCR result in the captured output
+        # The model prints the extracted text between certain markers
+        lines = captured_output.split('\n')
+        capture_text = False
+        text_lines = []
+        for line in lines:
+            # Start capturing after seeing certain patterns
+            if "# " in line or line.strip().startswith("**"):
+                capture_text = True
+            if capture_text:
+                # Stop at the separator lines
+                if line.startswith("====") or line.startswith("---") and len(line) > 10:
+                    if text_lines:  # Only stop if we've captured something
+                        break
+                # Add non-empty lines that aren't debug output
+                elif line.strip() and not line.startswith("image size:") and not line.startswith("valid image") and not line.startswith("output texts") and not line.startswith("compression"):
+                    text_lines.append(line)
+        if text_lines:
+            extracted_text = '\n'.join(text_lines)
+        # If we didn't get text from stdout, check if result contains text
         if not extracted_text and result is not None:
             if isinstance(result, str):
                 extracted_text = result
             elif isinstance(result, (list, tuple)) and len(result) > 0:
+                # Try to extract text from the result
+                if isinstance(result[0], str):
+                    extracted_text = result[0]
+                elif hasattr(result[0], 'text'):
+                    extracted_text = result[0].text
+        # Clean up any remaining markers from the text
+        if extracted_text:
+            # Remove any remaining debug output patterns
+            clean_lines = []
+            for line in extracted_text.split('\n'):
+                if not any(pattern in line.lower() for pattern in ['image size:', 'valid image', 'compression ratio', 'save results:', 'output texts']):
+                    clean_lines.append(line)
+            extracted_text = '\n'.join(clean_lines).strip()
     # Move model back to CPU to free GPU memory
     model.to("cpu")