OCR-DEMO

Running on Zero

App Files Files Community

erow commited on Oct 23

Commit

51c0d3d

1 Parent(s): 20b35e8

two tabs

Browse files

Files changed (1) hide show

app.py +43 -39

app.py CHANGED Viewed

@@ -34,7 +34,7 @@ def find_result_image(path):
 # --- 2. Main Processing Function (UPDATED for multi-bbox drawing) ---
 @spaces.GPU
-def process_ocr_task(image, model_size, task_type, ref_text):
     """
     Processes an image with DeepSeek-OCR for all supported tasks.
     Now draws ALL detected bounding boxes for ANY task.
@@ -129,55 +129,59 @@ def process_ocr_task(image, model_size, task_type, ref_text):
 # --- 3. Build the Gradio Interface (UPDATED) ---
-with gr.Blocks(title="🐳DeepSeek-OCR🐳", theme=gr.themes.Soft()) as demo:
     gr.Markdown(
         """
         # 🐳 Full Demo of DeepSeek-OCR 🐳
-        **💡 How to use:**
-        1.  **Upload an image** using the upload box.
-        2.  Select a **Resolution**. `Gundam` is recommended for most documents.
-        3.  Choose a **Task Type**:
-            - **📝 Free OCR**: Extracts raw text from the image.
-            - **📄 Convert to Markdown**: Converts the document into Markdown, preserving structure.
-            - **📈 Parse Figure**: Extracts structured data from charts and figures.
-            - **🔍 Locate Object by Reference**: Finds a specific object/text.
-        4. If this helpful, please give it a like! 🙏 ❤️
         """
     )
-    with gr.Row():
-        with gr.Column(scale=1):
-            image_input = gr.Image(type="pil", label="🖼️ Upload Image", sources=["upload", "clipboard"])
-            model_size = gr.Dropdown(choices=["Tiny", "Small", "Base", "Large", "Gundam (Recommended)"], value="Gundam (Recommended)", label="⚙️ Resolution Size")
-            task_type = gr.Dropdown(choices=["📝 Free OCR", "📄 Convert to Markdown", "📈 Parse Figure", "🔍 Locate Object by Reference"], value="📄 Convert to Markdown", label="🚀 Task Type")
-            ref_text_input = gr.Textbox(label="📝 Reference Text (for Locate task)", placeholder="e.g., the teacher, 20-10, a red car...", visible=False)
-            submit_btn = gr.Button("Process Image", variant="primary")
-        with gr.Column(scale=2):
-            output_text = gr.Textbox(label="📄 Text Result", lines=15, show_copy_button=True)
-            output_image = gr.Image(label="🖼️ Image Result (if any)", type="pil")
-    # --- UI Interaction Logic ---
-    def toggle_ref_text_visibility(task):
-        return gr.Textbox(visible=True) if task == "🔍 Locate Object by Reference" else gr.Textbox(visible=False)
-    task_type.change(fn=toggle_ref_text_visibility, inputs=task_type, outputs=ref_text_input)
-    submit_btn.click(fn=process_ocr_task, inputs=[image_input, model_size, task_type, ref_text_input], outputs=[output_text, output_image])
-    # --- UPDATED Example Images and Tasks ---
     gr.Examples(
         examples=[
-            ["doc_markdown.png", "Gundam (Recommended)", "📄 Convert to Markdown", ""],
-            ["chart.png", "Gundam (Recommended)", "📈 Parse Figure", ""],
-            ["teacher.jpg", "Base", "🔍 Locate Object by Reference", "the teacher"],
-            ["math_locate.jpg", "Small", "🔍 Locate Object by Reference", "20-10"],
-            ["receipt.jpg", "Base", "📝 Free OCR", ""],
         ],
-        inputs=[image_input, model_size, task_type, ref_text_input],
-        outputs=[output_text, output_image],
         fn=process_ocr_task,
-        cache_examples=False, # Disable caching to ensure examples run every time
     )
 # --- 4. Launch the App ---

 # --- 2. Main Processing Function (UPDATED for multi-bbox drawing) ---
 @spaces.GPU
+def process_ocr_task(image, model_size, ref_text, task_type="📝 Free OCR"):
     """
     Processes an image with DeepSeek-OCR for all supported tasks.
     Now draws ALL detected bounding boxes for ANY task.
 # --- 3. Build the Gradio Interface (UPDATED) ---
+with gr.Blocks(title="Text Extraction Demo", theme=gr.themes.Soft()) as demo:
     gr.Markdown(
         """
         # 🐳 Full Demo of DeepSeek-OCR 🐳
+        Use the tabs below to switch between Free OCR and Locate modes.
         """
     )
+    with gr.Tabs():
+        with gr.TabItem("Free OCR"):
+            with gr.Row():
+                with gr.Column(scale=1):
+                    free_image = gr.Image(type="pil", label="🖼️ Upload Image", sources=["upload", "clipboard"])
+                    free_model_size = gr.Dropdown(choices=["Tiny", "Small", "Base", "Large", "Gundam (Recommended)"], value="Base", label="⚙️ Resolution Size")
+                    free_btn = gr.Button("Run Free OCR", variant="primary")
+                with gr.Column(scale=2):
+                    free_output_text = gr.Textbox(label="📄 Text Result", lines=15, show_copy_button=True)
+                    free_output_image = gr.Image(label="🖼️ Image Result (if any)", type="pil")
+            # Wire Free OCR button
+            free_btn.click(fn=process_ocr_task, inputs=[free_image, free_model_size, gr.Textbox(value="", visible=False), "Free OCR"], outputs=[free_output_text, free_output_image])
+        with gr.TabItem("Locate"):
+            with gr.Row():
+                with gr.Column(scale=1):
+                    loc_image = gr.Image(type="pil", label="🖼️ Upload Image", sources=["upload", "clipboard"])
+                    loc_model_size = gr.Dropdown(choices=["Tiny", "Small", "Base", "Large", "Gundam (Recommended)"], value="Base", label="⚙️ Resolution Size")
+                    # ref_text_input = gr.Textbox(label="📝 Reference Text (what to locate)", placeholder="e.g., the teacher, 20-10, a red car...")
+                    ref_text_input = "pets"
+                    loc_btn = gr.Button("Locate", variant="primary")
+                with gr.Column(scale=2):
+                    loc_output_text = gr.Textbox(label="📄 Text Result", lines=15, show_copy_button=True)
+                    loc_output_image = gr.Image(label="🖼️ Image Result (if any)", type="pil")
+            # Wire Locate button
+            loc_btn.click(fn=process_ocr_task, inputs=[loc_image, loc_model_size, ref_text_input, "🔍 Locate Object by Reference"], outputs=[loc_output_text, loc_output_image])
+    # Keep examples (they'll run process_ocr_task directly) - provide a compact examples widget pointing to the free tab inputs
     gr.Examples(
         examples=[
+            ["doc_markdown.png", "Gundam (Recommended)", "", "📄 Convert to Markdown"],
+            ["chart.png", "Gundam (Recommended)", "", "📈 Parse Figure"],
+            ["teacher.jpg", "Base", "the teacher", "🔍 Locate Object by Reference"],
+            ["math_locate.jpg", "Small", "20-10", "🔍 Locate Object by Reference"],
+            ["receipt.jpg", "Base", "", "📝 Free OCR"],
         ],
+        inputs=[free_image, free_model_size, ref_text_input,],
+        outputs=[free_output_text, free_output_image],
         fn=process_ocr_task,
+        cache_examples=False,
     )
 # --- 4. Launch the App ---