Spaces:

akhaliq
/

DeepSeek-OCR

Running on Zero

akhaliq HF Staff commited on 8 days ago

Commit

bd0cfb9

verified ·

1 Parent(s): 35d8939

Update Gradio app with multiple files

Files changed (2) hide show

app.py CHANGED Viewed

@@ -7,6 +7,9 @@ import os
 from typing import Optional
 import spaces
 # Load model and tokenizer
 model_name = "deepseek-ai/DeepSeek-OCR"
 tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
@@ -45,11 +48,15 @@ def ocr_process(
     try:
         # Move model to GPU and set dtype
-        model.to("cuda")
-        model.to(torch.bfloat16)
         # Save image temporarily
         temp_image_path = "/tmp/temp_ocr_image.jpg"
         image_input.save(temp_image_path)
         # Set prompt based on task type
         if task_type == "markdown":
@@ -62,7 +69,7 @@ def ocr_process(
             tokenizer,
             prompt=prompt,
             image_file=temp_image_path,
-            output_path="",
             base_size=base_size,
             image_size=image_size,
             crop_mode=crop_mode,
@@ -122,7 +129,7 @@ with gr.Blocks(title="DeepSeek OCR") as demo:
                 step=128,
                 value=1024,
                 label="Base Size",
-                info="Model processing size (larger = better quality, slower)",
             )
             image_size = gr.Slider(
@@ -131,7 +138,7 @@ with gr.Blocks(title="DeepSeek OCR") as demo:
                 step=128,
                 value=640,
                 label="Image Size",
-                info="Target image size",
             )
             crop_mode = gr.Checkbox(

 from typing import Optional
 import spaces
+# Set CUDA device
+os.environ["CUDA_VISIBLE_DEVICES"] = '0'
 # Load model and tokenizer
 model_name = "deepseek-ai/DeepSeek-OCR"
 tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
     try:
         # Move model to GPU and set dtype
+        model.cuda().to(torch.bfloat16)
         # Save image temporarily
         temp_image_path = "/tmp/temp_ocr_image.jpg"
         image_input.save(temp_image_path)
+        # Create output directory
+        output_path = "/tmp/ocr_output"
+        os.makedirs(output_path, exist_ok=True)
         # Set prompt based on task type
         if task_type == "markdown":
             tokenizer,
             prompt=prompt,
             image_file=temp_image_path,
+            output_path=output_path,
             base_size=base_size,
             image_size=image_size,
             crop_mode=crop_mode,
                 step=128,
                 value=1024,
                 label="Base Size",
+                info="Model processing size - Tiny: 512, Small: 640, Base: 1024, Large: 1280",
             )
             image_size = gr.Slider(
                 step=128,
                 value=640,
                 label="Image Size",
+                info="Target image size - Gundam mode: 640 with crop, others match base_size",
             )
             crop_mode = gr.Checkbox(

requirements.txt CHANGED Viewed

@@ -2,7 +2,7 @@ torch==2.6.0
 transformers==4.46.3
 tokenizers==0.20.3
 einops
-addict
 easydict
 gradio>=4.0.0
 spaces>=0.20.0
@@ -12,4 +12,4 @@ accelerate>=0.24.0
 sentencepiece>=0.1.99
 protobuf>=3.20.0
 torchvision
-flash-attn @ https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.3/flash_attn-2.7.3+cu12torch2.6cxx11abiFALSE-cp310-cp310-linux_x86_64.whl

 transformers==4.46.3
 tokenizers==0.20.3
 einops
+addict
 easydict
 gradio>=4.0.0
 spaces>=0.20.0
 sentencepiece>=0.1.99
 protobuf>=3.20.0
 torchvision
+flash-attn @ https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.3/flash_attn-2.7.3+cu12torch2.6cxx11abiFALSE-cp310-cp310-linux_x86_64.whl