lucacadalora commited on
Commit
8aaa52d
·
verified ·
1 Parent(s): 604587a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +158 -90
app.py CHANGED
@@ -94,23 +94,35 @@ def extract_images_from_pdf(pdf_path):
94
  return images_by_page
95
 
96
 
97
- def image_to_base64(pil_image, format='PNG'):
98
  """
99
  Convert PIL Image to base64 string for markdown embedding
 
100
  """
 
 
 
 
 
101
  buffered = BytesIO()
102
- pil_image.save(buffered, format=format)
 
 
 
 
 
 
 
 
103
  img_str = base64.b64encode(buffered.getvalue()).decode()
104
  return f"data:image/{format.lower()};base64,{img_str}"
105
 
106
 
107
  def detect_figure_regions(text_result, original_image):
108
  """
109
- Detect figure regions from OCR output and crop them
110
  Returns list of cropped figure images
111
  """
112
- # Look for figure-related patterns in the text
113
- # This is a heuristic approach - adjust based on your needs
114
  figure_images = []
115
 
116
  # Pattern to detect bounding boxes (if model returns them)
@@ -200,40 +212,40 @@ def process_single_page(image, model_runtime, tokenizer, model_size, task_type,
200
  # ===== Embed images if requested =====
201
  if embed_images and markdown_content:
202
  # Check if markdown mentions figures/charts/images
203
- figure_keywords = ['figure', 'chart', 'graph', 'diagram', 'image', 'plot', 'illustration']
204
  has_figure_mention = any(keyword in markdown_content.lower() for keyword in figure_keywords)
205
 
206
  if has_figure_mention:
207
- # Try to detect figure regions
208
  figure_images = detect_figure_regions(plain_text_result, image)
209
 
210
- # If no figures detected by bounding boxes, embed the whole page as figure
211
- if not figure_images and has_figure_mention:
212
- # Embed full page image where figures are mentioned
213
- base64_img = image_to_base64(image, format='JPEG')
214
- figure_markdown = f"\n\n![Page {page_num} Visual Content]({base64_img})\n\n"
215
-
216
- # Insert image after first mention of figure/chart
217
- for keyword in figure_keywords:
218
- if keyword in markdown_content.lower():
219
- # Find the line with the keyword
220
- lines = markdown_content.split('\n')
221
- for i, line in enumerate(lines):
222
- if keyword in line.lower():
223
- # Insert image after this line
224
- lines.insert(i + 1, figure_markdown)
225
- markdown_content = '\n'.join(lines)
226
- break
227
- break
228
-
229
- # If we found specific figure regions, embed them
230
- elif figure_images:
231
  for idx, fig_img in enumerate(figure_images):
232
- base64_img = image_to_base64(fig_img, format='PNG')
233
- fig_markdown = f"\n\n![Figure {idx+1} from Page {page_num}]({base64_img})\n\n"
234
- markdown_content += fig_markdown
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
235
 
236
- return markdown_content
237
 
238
 
239
  # ===== Main Processing Function =====
@@ -241,9 +253,10 @@ def process_single_page(image, model_runtime, tokenizer, model_size, task_type,
241
  def process_pdf(pdf_file, model_size, task_type, ref_text, is_eval_mode, embed_images, progress=gr.Progress()):
242
  """
243
  Process PDF with DeepSeek-OCR and return combined markdown from all pages.
 
244
  """
245
  if pdf_file is None:
246
- return "Please upload a PDF file first."
247
 
248
  # handle CPU/GPU
249
  if torch.cuda.is_available():
@@ -258,13 +271,17 @@ def process_pdf(pdf_file, model_size, task_type, ref_text, is_eval_mode, embed_i
258
  total_pages = len(images)
259
 
260
  if total_pages == 0:
261
- return "No pages found in the PDF."
262
 
263
  # Extract embedded images if needed
264
  embedded_images = {}
265
  if embed_images:
266
- progress(0.05, desc="Extracting embedded images...")
267
- embedded_images = extract_images_from_pdf(pdf_file.name)
 
 
 
 
268
 
269
  progress(0.1, desc=f"Found {total_pages} pages. Starting OCR...")
270
 
@@ -279,7 +296,7 @@ def process_pdf(pdf_file, model_size, task_type, ref_text, is_eval_mode, embed_i
279
  desc=f"Processing page {page_num}/{total_pages}..."
280
  )
281
 
282
- markdown_content = process_single_page(
283
  image,
284
  model_runtime,
285
  tokenizer,
@@ -294,12 +311,15 @@ def process_pdf(pdf_file, model_size, task_type, ref_text, is_eval_mode, embed_i
294
 
295
  # Add embedded images from PDF if any
296
  if embed_images and (page_num - 1) in embedded_images:
297
- markdown_content += "\n\n### Embedded Images from this Page\n\n"
298
  for idx, img in enumerate(embedded_images[page_num - 1]):
299
- base64_img = image_to_base64(img, format='PNG')
300
- markdown_content += f"![Embedded Image {idx+1}]({base64_img})\n\n"
 
 
 
301
 
302
- # Add page separator
303
  page_header = f"\n\n---\n\n# Page {page_num}\n\n"
304
  all_markdown_results.append(page_header + markdown_content)
305
 
@@ -309,24 +329,61 @@ def process_pdf(pdf_file, model_size, task_type, ref_text, is_eval_mode, embed_i
309
  gc.collect()
310
 
311
  except Exception as e:
312
- error_msg = f"\n\n---\n\n# Page {page_num}\n\n**Error processing this page:** {str(e)}\n\n"
313
  all_markdown_results.append(error_msg)
314
  print(f"Error on page {page_num}: {str(e)}")
 
 
 
 
 
315
  continue
316
 
317
  # Combine all results
318
- progress(1.0, desc="Finalizing...")
319
  combined_markdown = "\n\n".join(all_markdown_results)
320
 
321
- # Add document header
322
- final_output = f"# Document OCR Results\n\n**Total Pages:** {total_pages}\n\n{combined_markdown}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
323
 
324
- return final_output
325
 
326
  except Exception as e:
327
- error_message = f"Error processing PDF: {str(e)}\n\nPlease try:\n- Using a smaller model size\n- Processing fewer pages\n- Checking if the PDF is corrupted"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
328
  print(f"Fatal error: {str(e)}")
329
- return error_message
 
 
330
 
331
 
332
  # ===== Theme and UI =====
@@ -334,6 +391,7 @@ theme = Soft(
334
  font=fonts.GoogleFont("Inter"),
335
  font_mono=fonts.GoogleFont("JetBrains Mono"),
336
  )
 
337
  custom_css = """
338
  .gradio-container, body {
339
  font-family: 'Inter', ui-sans-serif, system-ui, -apple-system, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, 'Noto Sans', 'Apple Color Emoji','Segoe UI Emoji','Segoe UI Symbol','Noto Color Emoji' !important;
@@ -341,6 +399,7 @@ custom_css = """
341
  .prose h1 { font-weight: 800; letter-spacing: -0.02em; }
342
  .prose h2, .prose h3 { font-weight: 700; letter-spacing: -0.01em; }
343
  .gr-button { border-radius: 12px; font-weight: 600; }
 
344
  """
345
 
346
 
@@ -357,27 +416,36 @@ with gr.Blocks(
357
  Upload a PDF to extract text and convert to Markdown using **DeepSeek-OCR**.
358
  Each page is processed sequentially and combined into a single markdown document.
359
 
360
- **NEW:** Now supports embedding images/charts directly in markdown output!
 
 
 
 
 
 
 
 
361
 
362
- **Model Sizes:**
363
- - **Tiny** — Fastest, lower accuracy (512×512) - Best for large PDFs
364
- - **Small** — Fast, good accuracy (640×640) - Good for 20+ pages
365
  - **Base** — Balanced performance (1024×1024) - Good for 10-20 pages
366
  - **Large** — Best accuracy, slower (1280×1280) - Best for <10 pages
367
  - **Gundam (Recommended)** — Optimized for documents (1024 base, 640 image, crop mode)
368
 
369
- **Tips for large PDFs:**
370
- - Use Tiny or Small model for 20+ pages
371
- - Enable "Embed Images" to include charts/figures in markdown
372
- - Processing time: ~2-5 seconds per page depending on model size
 
373
  - Maximum recommended: 50 pages at once
 
374
  """
375
  )
376
 
377
  with gr.Row():
378
  with gr.Column(scale=1):
379
  pdf_input = gr.File(
380
- label="Upload PDF",
381
  file_types=[".pdf"],
382
  type="filepath"
383
  )
@@ -385,7 +453,7 @@ with gr.Blocks(
385
  model_size = gr.Dropdown(
386
  choices=["Tiny", "Small", "Base", "Large", "Gundam (Recommended)"],
387
  value="Small",
388
- label="Model Size",
389
  info="Use Tiny/Small for large PDFs (20+ pages)"
390
  )
391
 
@@ -397,58 +465,65 @@ with gr.Blocks(
397
  "🔍 Locate Object by Reference",
398
  ],
399
  value="📄 Convert to Markdown",
400
- label="Task Type",
401
  )
402
 
403
  ref_text_input = gr.Textbox(
404
- label="Reference Text (for Locate task)",
405
  placeholder="e.g., 'the teacher', '20-10', 'a red car'...",
406
  visible=False,
407
  )
408
 
409
- eval_mode_checkbox = gr.Checkbox(
410
- value=False,
411
- label="Enable Evaluation Mode",
412
- info="Returns only plain text (faster).",
413
- )
414
-
415
- embed_images_checkbox = gr.Checkbox(
416
- value=True,
417
- label="🖼️ Embed Images/Charts in Markdown",
418
- info="Include images and charts as base64 in the markdown output"
419
- )
 
420
 
421
  submit_btn = gr.Button("🚀 Process PDF", variant="primary", size="lg")
422
 
423
  gr.Markdown(
424
  """
425
  ---
426
- **Processing Status:**
427
- Watch the progress bar above for real-time updates.
428
 
429
- **Note:** Embedding images will increase output size and processing time.
 
 
 
 
 
 
 
 
430
  """
431
  )
432
 
433
  with gr.Column(scale=2):
434
- gr.Markdown("### 📝 Markdown Output")
435
  output_markdown_preview = gr.Markdown(
436
  label="Rendered Markdown",
437
- value="*Upload a PDF and click 'Process PDF' to see results here.*"
438
  )
439
 
440
- gr.Markdown("### 📄 Markdown Source (Copy/Download)")
441
  output_text = gr.Textbox(
442
  label="Raw Markdown",
443
  lines=25,
444
  show_copy_button=True,
445
  interactive=False,
446
- placeholder="Markdown source will appear here..."
447
  )
448
 
449
  # show/hide reference text box based on selected task
450
  def toggle_ref_text_visibility(task):
451
- return gr.Textbox(visible=True) if task == "🔍 Locate Object by Reference" else gr.Textbox(visible=False)
452
 
453
  task_type.change(
454
  fn=toggle_ref_text_visibility,
@@ -456,18 +531,10 @@ with gr.Blocks(
456
  outputs=ref_text_input,
457
  )
458
 
459
- def update_outputs(markdown_text):
460
- """Update both markdown preview and raw text"""
461
- return markdown_text, markdown_text
462
-
463
  submit_btn.click(
464
  fn=process_pdf,
465
  inputs=[pdf_input, model_size, task_type, ref_text_input, eval_mode_checkbox, embed_images_checkbox],
466
- outputs=output_text,
467
- ).then(
468
- fn=update_outputs,
469
- inputs=output_text,
470
- outputs=[output_markdown_preview, output_text]
471
  )
472
 
473
 
@@ -479,6 +546,7 @@ if __name__ == "__main__":
479
  default_concurrency_limit=2
480
  )
481
  demo.launch(
482
- max_threads=40, # Increase thread limit
483
- show_error=True # Show errors in UI
 
484
  )
 
94
  return images_by_page
95
 
96
 
97
+ def image_to_base64(pil_image, format='JPEG', max_size=(1200, 1200)):
98
  """
99
  Convert PIL Image to base64 string for markdown embedding
100
+ Resize if too large to keep file size manageable
101
  """
102
+ # Resize if image is too large
103
+ if pil_image.size[0] > max_size[0] or pil_image.size[1] > max_size[1]:
104
+ pil_image = pil_image.copy()
105
+ pil_image.thumbnail(max_size, Image.Resampling.LANCZOS)
106
+
107
  buffered = BytesIO()
108
+
109
+ # Convert RGBA to RGB if necessary
110
+ if pil_image.mode == 'RGBA' and format == 'JPEG':
111
+ rgb_image = Image.new('RGB', pil_image.size, (255, 255, 255))
112
+ rgb_image.paste(pil_image, mask=pil_image.split()[3])
113
+ rgb_image.save(buffered, format=format, quality=85)
114
+ else:
115
+ pil_image.save(buffered, format=format, quality=85 if format == 'JPEG' else None)
116
+
117
  img_str = base64.b64encode(buffered.getvalue()).decode()
118
  return f"data:image/{format.lower()};base64,{img_str}"
119
 
120
 
121
  def detect_figure_regions(text_result, original_image):
122
  """
123
+ Detect figure regions from OCR output using bounding boxes
124
  Returns list of cropped figure images
125
  """
 
 
126
  figure_images = []
127
 
128
  # Pattern to detect bounding boxes (if model returns them)
 
212
  # ===== Embed images if requested =====
213
  if embed_images and markdown_content:
214
  # Check if markdown mentions figures/charts/images
215
+ figure_keywords = ['figure', 'chart', 'graph', 'diagram', 'image', 'plot', 'illustration', 'table', 'screenshot']
216
  has_figure_mention = any(keyword in markdown_content.lower() for keyword in figure_keywords)
217
 
218
  if has_figure_mention:
219
+ # Try to detect figure regions from bounding boxes
220
  figure_images = detect_figure_regions(plain_text_result, image)
221
 
222
+ # If specific figures detected, embed them
223
+ if figure_images:
224
+ figures_markdown = "\n\n### Detected Figures\n\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
225
  for idx, fig_img in enumerate(figure_images):
226
+ try:
227
+ base64_img = image_to_base64(fig_img, format='PNG')
228
+ figures_markdown += f"![Figure {idx+1} from Page {page_num}]({base64_img})\n\n"
229
+ except Exception as e:
230
+ print(f"Error embedding figure {idx+1}: {e}")
231
+ markdown_content += figures_markdown
232
+ else:
233
+ # No specific regions detected, but figures mentioned
234
+ # Embed full page image for context
235
+ try:
236
+ base64_img = image_to_base64(image, format='JPEG')
237
+ page_image_markdown = f"\n\n### Page {page_num} Visual Content\n\n![Page {page_num} Full View]({base64_img})\n\n"
238
+
239
+ # Insert image after first paragraph or at the beginning
240
+ lines = markdown_content.split('\n\n', 1)
241
+ if len(lines) > 1:
242
+ markdown_content = lines[0] + page_image_markdown + lines[1]
243
+ else:
244
+ markdown_content = page_image_markdown + markdown_content
245
+ except Exception as e:
246
+ print(f"Error embedding page image: {e}")
247
 
248
+ return markdown_content, plain_text_result
249
 
250
 
251
  # ===== Main Processing Function =====
 
253
  def process_pdf(pdf_file, model_size, task_type, ref_text, is_eval_mode, embed_images, progress=gr.Progress()):
254
  """
255
  Process PDF with DeepSeek-OCR and return combined markdown from all pages.
256
+ Includes both visual images and extracted text content.
257
  """
258
  if pdf_file is None:
259
+ return "Please upload a PDF file first.", "Please upload a PDF file first."
260
 
261
  # handle CPU/GPU
262
  if torch.cuda.is_available():
 
271
  total_pages = len(images)
272
 
273
  if total_pages == 0:
274
+ return "No pages found in the PDF.", "No pages found in the PDF."
275
 
276
  # Extract embedded images if needed
277
  embedded_images = {}
278
  if embed_images:
279
+ progress(0.05, desc="Extracting embedded images from PDF...")
280
+ try:
281
+ embedded_images = extract_images_from_pdf(pdf_file.name)
282
+ print(f"Found embedded images on {len(embedded_images)} pages")
283
+ except Exception as e:
284
+ print(f"Could not extract embedded images: {e}")
285
 
286
  progress(0.1, desc=f"Found {total_pages} pages. Starting OCR...")
287
 
 
296
  desc=f"Processing page {page_num}/{total_pages}..."
297
  )
298
 
299
+ markdown_content, plain_text = process_single_page(
300
  image,
301
  model_runtime,
302
  tokenizer,
 
311
 
312
  # Add embedded images from PDF if any
313
  if embed_images and (page_num - 1) in embedded_images:
314
+ markdown_content += "\n\n### Embedded Images from PDF\n\n"
315
  for idx, img in enumerate(embedded_images[page_num - 1]):
316
+ try:
317
+ base64_img = image_to_base64(img, format='PNG')
318
+ markdown_content += f"![Embedded Image {idx+1} - Page {page_num}]({base64_img})\n\n"
319
+ except Exception as e:
320
+ print(f"Error embedding PDF image {idx+1}: {e}")
321
 
322
+ # Add page separator and content
323
  page_header = f"\n\n---\n\n# Page {page_num}\n\n"
324
  all_markdown_results.append(page_header + markdown_content)
325
 
 
329
  gc.collect()
330
 
331
  except Exception as e:
332
+ error_msg = f"\n\n---\n\n# Page {page_num}\n\n**⚠️ Error processing this page:** {str(e)}\n\n"
333
  all_markdown_results.append(error_msg)
334
  print(f"Error on page {page_num}: {str(e)}")
335
+
336
+ # Clear memory even on error
337
+ if torch.cuda.is_available():
338
+ torch.cuda.empty_cache()
339
+ gc.collect()
340
  continue
341
 
342
  # Combine all results
343
+ progress(1.0, desc="Finalizing document...")
344
  combined_markdown = "\n\n".join(all_markdown_results)
345
 
346
+ # Add document header with metadata
347
+ image_status = " Enabled" if embed_images else "❌ Disabled"
348
+ final_output = f"""# 📄 Document OCR Results
349
+
350
+ **Total Pages:** {total_pages}
351
+ **Model Size:** {model_size}
352
+ **Task Type:** {task_type}
353
+ **Image Embedding:** {image_status}
354
+
355
+ ---
356
+
357
+ {combined_markdown}
358
+
359
+ ---
360
+
361
+ **End of Document** - Processed {total_pages} pages successfully.
362
+ """
363
 
364
+ return final_output, final_output # Return twice: once for preview, once for raw text
365
 
366
  except Exception as e:
367
+ error_message = f"""# Error Processing PDF
368
+
369
+ **Error:** {str(e)}
370
+
371
+ **Troubleshooting Tips:**
372
+ - Try using a smaller model size (Tiny or Small)
373
+ - Disable image embedding for faster processing
374
+ - Check if the PDF is corrupted or password-protected
375
+ - For very large PDFs (50+ pages), consider processing in batches
376
+ - Ensure you have enough GPU memory available
377
+
378
+ **Technical Details:**
379
+ ```
380
+ {str(e)}
381
+ ```
382
+ """
383
  print(f"Fatal error: {str(e)}")
384
+ import traceback
385
+ traceback.print_exc()
386
+ return error_message, error_message # Return twice: once for preview, once for raw text
387
 
388
 
389
  # ===== Theme and UI =====
 
391
  font=fonts.GoogleFont("Inter"),
392
  font_mono=fonts.GoogleFont("JetBrains Mono"),
393
  )
394
+
395
  custom_css = """
396
  .gradio-container, body {
397
  font-family: 'Inter', ui-sans-serif, system-ui, -apple-system, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, 'Noto Sans', 'Apple Color Emoji','Segoe UI Emoji','Segoe UI Symbol','Noto Color Emoji' !important;
 
399
  .prose h1 { font-weight: 800; letter-spacing: -0.02em; }
400
  .prose h2, .prose h3 { font-weight: 700; letter-spacing: -0.01em; }
401
  .gr-button { border-radius: 12px; font-weight: 600; }
402
+ .prose img { max-width: 100%; height: auto; border-radius: 8px; margin: 1rem 0; }
403
  """
404
 
405
 
 
416
  Upload a PDF to extract text and convert to Markdown using **DeepSeek-OCR**.
417
  Each page is processed sequentially and combined into a single markdown document.
418
 
419
+ ## Features
420
+
421
+ - 🖼️ **Image Embedding** - Charts, graphs, and figures embedded directly in markdown
422
+ - 📝 **Text Extraction** - All text content from images and charts extracted
423
+ - 📊 **Table Support** - Tables converted to markdown format
424
+ - 🔍 **Object Detection** - Locate specific elements in documents
425
+ - 🎯 **Multiple Models** - Choose speed vs. accuracy trade-off
426
+
427
+ ## 📏 Model Sizes
428
 
429
+ - **Tiny** — Fastest, lower accuracy (512×512) - Best for large PDFs (30+ pages)
430
+ - **Small** — Fast, good accuracy (640×640) - Good for 15-30 pages
 
431
  - **Base** — Balanced performance (1024×1024) - Good for 10-20 pages
432
  - **Large** — Best accuracy, slower (1280×1280) - Best for <10 pages
433
  - **Gundam (Recommended)** — Optimized for documents (1024 base, 640 image, crop mode)
434
 
435
+ ## 💡 Tips
436
+
437
+ - Enable **"Embed Images"** to include charts/figures (recommended)
438
+ - Use **Tiny or Small** model for large PDFs (20+ pages)
439
+ - Processing time: ~2-5 seconds per page depending on model
440
  - Maximum recommended: 50 pages at once
441
+ - Image embedding increases file size (~1-2MB per page with images)
442
  """
443
  )
444
 
445
  with gr.Row():
446
  with gr.Column(scale=1):
447
  pdf_input = gr.File(
448
+ label="📎 Upload PDF",
449
  file_types=[".pdf"],
450
  type="filepath"
451
  )
 
453
  model_size = gr.Dropdown(
454
  choices=["Tiny", "Small", "Base", "Large", "Gundam (Recommended)"],
455
  value="Small",
456
+ label="🎯 Model Size",
457
  info="Use Tiny/Small for large PDFs (20+ pages)"
458
  )
459
 
 
465
  "🔍 Locate Object by Reference",
466
  ],
467
  value="📄 Convert to Markdown",
468
+ label="📋 Task Type",
469
  )
470
 
471
  ref_text_input = gr.Textbox(
472
+ label="🔍 Reference Text (for Locate task)",
473
  placeholder="e.g., 'the teacher', '20-10', 'a red car'...",
474
  visible=False,
475
  )
476
 
477
+ with gr.Row():
478
+ eval_mode_checkbox = gr.Checkbox(
479
+ value=False,
480
+ label=" Evaluation Mode",
481
+ info="Plain text only (faster)",
482
+ )
483
+
484
+ embed_images_checkbox = gr.Checkbox(
485
+ value=True,
486
+ label="🖼️ Embed Images",
487
+ info="Include charts/figures in output",
488
+ )
489
 
490
  submit_btn = gr.Button("🚀 Process PDF", variant="primary", size="lg")
491
 
492
  gr.Markdown(
493
  """
494
  ---
 
 
495
 
496
+ ### 📊 Processing Status
497
+
498
+ Watch the progress bar for real-time updates.
499
+
500
+ **Note:** Image embedding provides both:
501
+ - 👁️ Visual image (embedded as base64)
502
+ - 📝 Extracted text content (OCR'd from image)
503
+
504
+ You get the best of both worlds!
505
  """
506
  )
507
 
508
  with gr.Column(scale=2):
509
+ gr.Markdown("### 📝 Markdown Output Preview")
510
  output_markdown_preview = gr.Markdown(
511
  label="Rendered Markdown",
512
+ value="*Upload a PDF and click 'Process PDF' to see results here.*\n\n*The output will include both images and extracted text.*"
513
  )
514
 
515
+ gr.Markdown("### 📄 Raw Markdown Source (Copy/Download)")
516
  output_text = gr.Textbox(
517
  label="Raw Markdown",
518
  lines=25,
519
  show_copy_button=True,
520
  interactive=False,
521
+ placeholder="Markdown source will appear here... You can copy/paste this into any markdown editor."
522
  )
523
 
524
  # show/hide reference text box based on selected task
525
  def toggle_ref_text_visibility(task):
526
+ return gr.Textbox(visible=(task == "🔍 Locate Object by Reference"))
527
 
528
  task_type.change(
529
  fn=toggle_ref_text_visibility,
 
531
  outputs=ref_text_input,
532
  )
533
 
 
 
 
 
534
  submit_btn.click(
535
  fn=process_pdf,
536
  inputs=[pdf_input, model_size, task_type, ref_text_input, eval_mode_checkbox, embed_images_checkbox],
537
+ outputs=[output_markdown_preview, output_text],
 
 
 
 
538
  )
539
 
540
 
 
546
  default_concurrency_limit=2
547
  )
548
  demo.launch(
549
+ max_threads=40, # Increase thread limit for better concurrency
550
+ show_error=True, # Show errors in UI for debugging
551
+ share=False # Set to True to create a public link
552
  )