erow commited on
Commit
51c0d3d
Β·
1 Parent(s): 20b35e8
Files changed (1) hide show
  1. app.py +43 -39
app.py CHANGED
@@ -34,7 +34,7 @@ def find_result_image(path):
34
 
35
  # --- 2. Main Processing Function (UPDATED for multi-bbox drawing) ---
36
  @spaces.GPU
37
- def process_ocr_task(image, model_size, task_type, ref_text):
38
  """
39
  Processes an image with DeepSeek-OCR for all supported tasks.
40
  Now draws ALL detected bounding boxes for ANY task.
@@ -129,55 +129,59 @@ def process_ocr_task(image, model_size, task_type, ref_text):
129
 
130
 
131
  # --- 3. Build the Gradio Interface (UPDATED) ---
132
- with gr.Blocks(title="🐳DeepSeek-OCR🐳", theme=gr.themes.Soft()) as demo:
133
  gr.Markdown(
134
  """
135
  # 🐳 Full Demo of DeepSeek-OCR 🐳
136
 
137
- **πŸ’‘ How to use:**
138
- 1. **Upload an image** using the upload box.
139
- 2. Select a **Resolution**. `Gundam` is recommended for most documents.
140
- 3. Choose a **Task Type**:
141
- - **πŸ“ Free OCR**: Extracts raw text from the image.
142
- - **πŸ“„ Convert to Markdown**: Converts the document into Markdown, preserving structure.
143
- - **πŸ“ˆ Parse Figure**: Extracts structured data from charts and figures.
144
- - **πŸ” Locate Object by Reference**: Finds a specific object/text.
145
- 4. If this helpful, please give it a like! πŸ™ ❀️
146
  """
147
  )
148
 
149
- with gr.Row():
150
- with gr.Column(scale=1):
151
- image_input = gr.Image(type="pil", label="πŸ–ΌοΈ Upload Image", sources=["upload", "clipboard"])
152
- model_size = gr.Dropdown(choices=["Tiny", "Small", "Base", "Large", "Gundam (Recommended)"], value="Gundam (Recommended)", label="βš™οΈ Resolution Size")
153
- task_type = gr.Dropdown(choices=["πŸ“ Free OCR", "πŸ“„ Convert to Markdown", "πŸ“ˆ Parse Figure", "πŸ” Locate Object by Reference"], value="πŸ“„ Convert to Markdown", label="πŸš€ Task Type")
154
- ref_text_input = gr.Textbox(label="πŸ“ Reference Text (for Locate task)", placeholder="e.g., the teacher, 20-10, a red car...", visible=False)
155
- submit_btn = gr.Button("Process Image", variant="primary")
156
-
157
- with gr.Column(scale=2):
158
- output_text = gr.Textbox(label="πŸ“„ Text Result", lines=15, show_copy_button=True)
159
- output_image = gr.Image(label="πŸ–ΌοΈ Image Result (if any)", type="pil")
160
-
161
- # --- UI Interaction Logic ---
162
- def toggle_ref_text_visibility(task):
163
- return gr.Textbox(visible=True) if task == "πŸ” Locate Object by Reference" else gr.Textbox(visible=False)
164
-
165
- task_type.change(fn=toggle_ref_text_visibility, inputs=task_type, outputs=ref_text_input)
166
- submit_btn.click(fn=process_ocr_task, inputs=[image_input, model_size, task_type, ref_text_input], outputs=[output_text, output_image])
167
-
168
- # --- UPDATED Example Images and Tasks ---
 
 
 
 
 
 
 
 
 
 
 
 
169
  gr.Examples(
170
  examples=[
171
- ["doc_markdown.png", "Gundam (Recommended)", "πŸ“„ Convert to Markdown", ""],
172
- ["chart.png", "Gundam (Recommended)", "πŸ“ˆ Parse Figure", ""],
173
- ["teacher.jpg", "Base", "πŸ” Locate Object by Reference", "the teacher"],
174
- ["math_locate.jpg", "Small", "πŸ” Locate Object by Reference", "20-10"],
175
- ["receipt.jpg", "Base", "πŸ“ Free OCR", ""],
176
  ],
177
- inputs=[image_input, model_size, task_type, ref_text_input],
178
- outputs=[output_text, output_image],
179
  fn=process_ocr_task,
180
- cache_examples=False, # Disable caching to ensure examples run every time
181
  )
182
 
183
  # --- 4. Launch the App ---
 
34
 
35
  # --- 2. Main Processing Function (UPDATED for multi-bbox drawing) ---
36
  @spaces.GPU
37
+ def process_ocr_task(image, model_size, ref_text, task_type="πŸ“ Free OCR"):
38
  """
39
  Processes an image with DeepSeek-OCR for all supported tasks.
40
  Now draws ALL detected bounding boxes for ANY task.
 
129
 
130
 
131
  # --- 3. Build the Gradio Interface (UPDATED) ---
132
+ with gr.Blocks(title="Text Extraction Demo", theme=gr.themes.Soft()) as demo:
133
  gr.Markdown(
134
  """
135
  # 🐳 Full Demo of DeepSeek-OCR 🐳
136
 
137
+ Use the tabs below to switch between Free OCR and Locate modes.
 
 
 
 
 
 
 
 
138
  """
139
  )
140
 
141
+ with gr.Tabs():
142
+ with gr.TabItem("Free OCR"):
143
+ with gr.Row():
144
+ with gr.Column(scale=1):
145
+ free_image = gr.Image(type="pil", label="πŸ–ΌοΈ Upload Image", sources=["upload", "clipboard"])
146
+ free_model_size = gr.Dropdown(choices=["Tiny", "Small", "Base", "Large", "Gundam (Recommended)"], value="Base", label="βš™οΈ Resolution Size")
147
+ free_btn = gr.Button("Run Free OCR", variant="primary")
148
+
149
+ with gr.Column(scale=2):
150
+ free_output_text = gr.Textbox(label="πŸ“„ Text Result", lines=15, show_copy_button=True)
151
+ free_output_image = gr.Image(label="πŸ–ΌοΈ Image Result (if any)", type="pil")
152
+
153
+ # Wire Free OCR button
154
+ free_btn.click(fn=process_ocr_task, inputs=[free_image, free_model_size, gr.Textbox(value="", visible=False), "Free OCR"], outputs=[free_output_text, free_output_image])
155
+
156
+ with gr.TabItem("Locate"):
157
+ with gr.Row():
158
+ with gr.Column(scale=1):
159
+ loc_image = gr.Image(type="pil", label="πŸ–ΌοΈ Upload Image", sources=["upload", "clipboard"])
160
+ loc_model_size = gr.Dropdown(choices=["Tiny", "Small", "Base", "Large", "Gundam (Recommended)"], value="Base", label="βš™οΈ Resolution Size")
161
+ # ref_text_input = gr.Textbox(label="πŸ“ Reference Text (what to locate)", placeholder="e.g., the teacher, 20-10, a red car...")
162
+ ref_text_input = "pets"
163
+ loc_btn = gr.Button("Locate", variant="primary")
164
+
165
+ with gr.Column(scale=2):
166
+ loc_output_text = gr.Textbox(label="πŸ“„ Text Result", lines=15, show_copy_button=True)
167
+ loc_output_image = gr.Image(label="πŸ–ΌοΈ Image Result (if any)", type="pil")
168
+
169
+ # Wire Locate button
170
+ loc_btn.click(fn=process_ocr_task, inputs=[loc_image, loc_model_size, ref_text_input, "πŸ” Locate Object by Reference"], outputs=[loc_output_text, loc_output_image])
171
+
172
+ # Keep examples (they'll run process_ocr_task directly) - provide a compact examples widget pointing to the free tab inputs
173
  gr.Examples(
174
  examples=[
175
+ ["doc_markdown.png", "Gundam (Recommended)", "", "πŸ“„ Convert to Markdown"],
176
+ ["chart.png", "Gundam (Recommended)", "", "πŸ“ˆ Parse Figure"],
177
+ ["teacher.jpg", "Base", "the teacher", "πŸ” Locate Object by Reference"],
178
+ ["math_locate.jpg", "Small", "20-10", "πŸ” Locate Object by Reference"],
179
+ ["receipt.jpg", "Base", "", "πŸ“ Free OCR"],
180
  ],
181
+ inputs=[free_image, free_model_size, ref_text_input,],
182
+ outputs=[free_output_text, free_output_image],
183
  fn=process_ocr_task,
184
+ cache_examples=False,
185
  )
186
 
187
  # --- 4. Launch the App ---