Spaces:

akhaliq
/

DeepSeek-OCR

Running on Zero

App Files Files Community

akhaliq HF Staff commited on 8 days ago

Commit

d44e05d

verified ·

1 Parent(s): 15cf5b6

Update Gradio app with multiple files

Browse files

Files changed (1) hide show

app.py +68 -3

app.py CHANGED Viewed

@@ -5,6 +5,8 @@ from PIL import Image
 import os
 import spaces
 import tempfile
 # Set CUDA device
 os.environ["CUDA_VISIBLE_DEVICES"] = '0'
@@ -77,7 +79,7 @@ def ocr_process(
         else:
             prompt = "<image>\nFree OCR. "
-        # Run inference - return the result directly
         result = model.infer(
             tokenizer,
             prompt=prompt,
@@ -89,13 +91,76 @@ def ocr_process(
             save_results=True,
             test_compress=True,
         )
     # Move model back to CPU to free GPU memory
     model.to("cpu")
     torch.cuda.empty_cache()
-    # Return the result directly - the model returns the extracted text
-    return result
 # Create Gradio interface

 import os
 import spaces
 import tempfile
+import json
+from pathlib import Path
 # Set CUDA device
 os.environ["CUDA_VISIBLE_DEVICES"] = '0'
         else:
             prompt = "<image>\nFree OCR. "
+        # Run inference with save_results=True to save output
         result = model.infer(
             tokenizer,
             prompt=prompt,
             save_results=True,
             test_compress=True,
         )
+        # Try to read the saved results
+        extracted_text = ""
+        # Check for saved JSON results
+        json_path = Path(temp_dir) / "input_image_outputs.json"
+        if json_path.exists():
+            try:
+                with open(json_path, 'r', encoding='utf-8') as f:
+                    data = json.load(f)
+                    # Extract text from the JSON structure
+                    if isinstance(data, dict):
+                        if 'text' in data:
+                            extracted_text = data['text']
+                        elif 'output' in data:
+                            extracted_text = data['output']
+                        elif 'result' in data:
+                            extracted_text = data['result']
+                        else:
+                            # If the structure is different, try to get the first string value
+                            for key, value in data.items():
+                                if isinstance(value, str) and len(value) > 10:
+                                    extracted_text = value
+                                    break
+                    elif isinstance(data, list) and len(data) > 0:
+                        extracted_text = str(data[0])
+                    else:
+                        extracted_text = str(data)
+            except Exception as e:
+                print(f"Error reading JSON: {e}")
+        # If no JSON, check for text file
+        if not extracted_text:
+            txt_path = Path(temp_dir) / "input_image_outputs.txt"
+            if txt_path.exists():
+                try:
+                    with open(txt_path, 'r', encoding='utf-8') as f:
+                        extracted_text = f.read()
+                except Exception as e:
+                    print(f"Error reading text file: {e}")
+        # If still no text, check for any output files
+        if not extracted_text:
+            output_files = list(Path(temp_dir).glob("*output*"))
+            for file_path in output_files:
+                if file_path.suffix in ['.txt', '.json', '.md']:
+                    try:
+                        with open(file_path, 'r', encoding='utf-8') as f:
+                            content = f.read()
+                            if content.strip():
+                                extracted_text = content
+                                break
+                    except Exception as e:
+                        print(f"Error reading {file_path}: {e}")
+        # If we still don't have text but result is not None, use result directly
+        if not extracted_text and result is not None:
+            if isinstance(result, str):
+                extracted_text = result
+            elif isinstance(result, (list, tuple)) and len(result) > 0:
+                extracted_text = str(result[0])
+            else:
+                extracted_text = str(result)
     # Move model back to CPU to free GPU memory
     model.to("cpu")
     torch.cuda.empty_cache()
+    # Return the extracted text
+    return extracted_text if extracted_text else "No text could be extracted from the image. Please try a different preset or check if the image contains readable text."
 # Create Gradio interface