two tabs
Browse files
app.py
CHANGED
|
@@ -34,7 +34,7 @@ def find_result_image(path):
|
|
| 34 |
|
| 35 |
# --- 2. Main Processing Function (UPDATED for multi-bbox drawing) ---
|
| 36 |
@spaces.GPU
|
| 37 |
-
def process_ocr_task(image, model_size,
|
| 38 |
"""
|
| 39 |
Processes an image with DeepSeek-OCR for all supported tasks.
|
| 40 |
Now draws ALL detected bounding boxes for ANY task.
|
|
@@ -129,55 +129,59 @@ def process_ocr_task(image, model_size, task_type, ref_text):
|
|
| 129 |
|
| 130 |
|
| 131 |
# --- 3. Build the Gradio Interface (UPDATED) ---
|
| 132 |
-
with gr.Blocks(title="
|
| 133 |
gr.Markdown(
|
| 134 |
"""
|
| 135 |
# π³ Full Demo of DeepSeek-OCR π³
|
| 136 |
|
| 137 |
-
|
| 138 |
-
1. **Upload an image** using the upload box.
|
| 139 |
-
2. Select a **Resolution**. `Gundam` is recommended for most documents.
|
| 140 |
-
3. Choose a **Task Type**:
|
| 141 |
-
- **π Free OCR**: Extracts raw text from the image.
|
| 142 |
-
- **π Convert to Markdown**: Converts the document into Markdown, preserving structure.
|
| 143 |
-
- **π Parse Figure**: Extracts structured data from charts and figures.
|
| 144 |
-
- **π Locate Object by Reference**: Finds a specific object/text.
|
| 145 |
-
4. If this helpful, please give it a like! π β€οΈ
|
| 146 |
"""
|
| 147 |
)
|
| 148 |
|
| 149 |
-
with gr.
|
| 150 |
-
with gr.
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 169 |
gr.Examples(
|
| 170 |
examples=[
|
| 171 |
-
["doc_markdown.png", "Gundam (Recommended)", "π Convert to Markdown"
|
| 172 |
-
["chart.png", "Gundam (Recommended)", "π Parse Figure"
|
| 173 |
-
["teacher.jpg", "Base", "π Locate Object by Reference"
|
| 174 |
-
["math_locate.jpg", "Small", "π Locate Object by Reference"
|
| 175 |
-
["receipt.jpg", "Base", "π Free OCR"
|
| 176 |
],
|
| 177 |
-
inputs=[
|
| 178 |
-
outputs=[
|
| 179 |
fn=process_ocr_task,
|
| 180 |
-
cache_examples=False,
|
| 181 |
)
|
| 182 |
|
| 183 |
# --- 4. Launch the App ---
|
|
|
|
| 34 |
|
| 35 |
# --- 2. Main Processing Function (UPDATED for multi-bbox drawing) ---
|
| 36 |
@spaces.GPU
|
| 37 |
+
def process_ocr_task(image, model_size, ref_text, task_type="π Free OCR"):
|
| 38 |
"""
|
| 39 |
Processes an image with DeepSeek-OCR for all supported tasks.
|
| 40 |
Now draws ALL detected bounding boxes for ANY task.
|
|
|
|
| 129 |
|
| 130 |
|
| 131 |
# --- 3. Build the Gradio Interface (UPDATED) ---
|
| 132 |
+
with gr.Blocks(title="Text Extraction Demo", theme=gr.themes.Soft()) as demo:
|
| 133 |
gr.Markdown(
|
| 134 |
"""
|
| 135 |
# π³ Full Demo of DeepSeek-OCR π³
|
| 136 |
|
| 137 |
+
Use the tabs below to switch between Free OCR and Locate modes.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 138 |
"""
|
| 139 |
)
|
| 140 |
|
| 141 |
+
with gr.Tabs():
|
| 142 |
+
with gr.TabItem("Free OCR"):
|
| 143 |
+
with gr.Row():
|
| 144 |
+
with gr.Column(scale=1):
|
| 145 |
+
free_image = gr.Image(type="pil", label="πΌοΈ Upload Image", sources=["upload", "clipboard"])
|
| 146 |
+
free_model_size = gr.Dropdown(choices=["Tiny", "Small", "Base", "Large", "Gundam (Recommended)"], value="Base", label="βοΈ Resolution Size")
|
| 147 |
+
free_btn = gr.Button("Run Free OCR", variant="primary")
|
| 148 |
+
|
| 149 |
+
with gr.Column(scale=2):
|
| 150 |
+
free_output_text = gr.Textbox(label="π Text Result", lines=15, show_copy_button=True)
|
| 151 |
+
free_output_image = gr.Image(label="πΌοΈ Image Result (if any)", type="pil")
|
| 152 |
+
|
| 153 |
+
# Wire Free OCR button
|
| 154 |
+
free_btn.click(fn=process_ocr_task, inputs=[free_image, free_model_size, gr.Textbox(value="", visible=False), "Free OCR"], outputs=[free_output_text, free_output_image])
|
| 155 |
+
|
| 156 |
+
with gr.TabItem("Locate"):
|
| 157 |
+
with gr.Row():
|
| 158 |
+
with gr.Column(scale=1):
|
| 159 |
+
loc_image = gr.Image(type="pil", label="πΌοΈ Upload Image", sources=["upload", "clipboard"])
|
| 160 |
+
loc_model_size = gr.Dropdown(choices=["Tiny", "Small", "Base", "Large", "Gundam (Recommended)"], value="Base", label="βοΈ Resolution Size")
|
| 161 |
+
# ref_text_input = gr.Textbox(label="π Reference Text (what to locate)", placeholder="e.g., the teacher, 20-10, a red car...")
|
| 162 |
+
ref_text_input = "pets"
|
| 163 |
+
loc_btn = gr.Button("Locate", variant="primary")
|
| 164 |
+
|
| 165 |
+
with gr.Column(scale=2):
|
| 166 |
+
loc_output_text = gr.Textbox(label="π Text Result", lines=15, show_copy_button=True)
|
| 167 |
+
loc_output_image = gr.Image(label="πΌοΈ Image Result (if any)", type="pil")
|
| 168 |
+
|
| 169 |
+
# Wire Locate button
|
| 170 |
+
loc_btn.click(fn=process_ocr_task, inputs=[loc_image, loc_model_size, ref_text_input, "π Locate Object by Reference"], outputs=[loc_output_text, loc_output_image])
|
| 171 |
+
|
| 172 |
+
# Keep examples (they'll run process_ocr_task directly) - provide a compact examples widget pointing to the free tab inputs
|
| 173 |
gr.Examples(
|
| 174 |
examples=[
|
| 175 |
+
["doc_markdown.png", "Gundam (Recommended)", "", "π Convert to Markdown"],
|
| 176 |
+
["chart.png", "Gundam (Recommended)", "", "π Parse Figure"],
|
| 177 |
+
["teacher.jpg", "Base", "the teacher", "π Locate Object by Reference"],
|
| 178 |
+
["math_locate.jpg", "Small", "20-10", "π Locate Object by Reference"],
|
| 179 |
+
["receipt.jpg", "Base", "", "π Free OCR"],
|
| 180 |
],
|
| 181 |
+
inputs=[free_image, free_model_size, ref_text_input,],
|
| 182 |
+
outputs=[free_output_text, free_output_image],
|
| 183 |
fn=process_ocr_task,
|
| 184 |
+
cache_examples=False,
|
| 185 |
)
|
| 186 |
|
| 187 |
# --- 4. Launch the App ---
|