Gabriel commited on
Commit
2c99aea
·
verified ·
1 Parent(s): 07b7c89

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +231 -31
app.py CHANGED
@@ -2,8 +2,10 @@ import gradio as gr
2
  import json
3
  import tempfile
4
  import os
5
- from typing import List, Optional, Literal, Tuple
6
  from PIL import Image
 
 
7
 
8
  import spaces
9
  from pathlib import Path
@@ -142,50 +144,140 @@ PIPELINE_CONFIGS = {
142
  }
143
 
144
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
145
  @spaces.GPU
146
  def _process_htr_pipeline(
147
- image_path: str, document_type: FormatChoices, custom_settings: Optional[str] = None
 
 
 
148
  ) -> Collection:
149
  """Process HTR pipeline and return the processed collection."""
150
-
151
- if not image_path:
152
- raise ValueError("No image provided")
153
 
154
  if custom_settings:
155
  try:
156
  config = json.loads(custom_settings)
157
  except json.JSONDecodeError:
158
- raise ValueError("Invalid JSON in custom_settings parameter")
159
  else:
160
  config = PIPELINE_CONFIGS[document_type]
161
 
 
 
 
162
  collection = Collection([image_path])
163
  pipeline = Pipeline.from_config(config)
164
 
165
  try:
 
 
 
 
 
 
 
166
  processed_collection = pipeline.run(collection)
 
 
 
 
167
  return processed_collection
168
  except Exception as pipeline_error:
169
  raise RuntimeError(f"Pipeline execution failed: {str(pipeline_error)}")
 
 
 
 
 
 
 
170
 
171
 
172
  def htr_text(
173
  image_path: str,
174
  document_type: FormatChoices = "letter_swedish",
175
  custom_settings: Optional[str] = None,
 
176
  ) -> str:
177
- """Extract text from handwritten documents using HTR.
178
-
179
- returns:
180
- str: Extracted text from the image.
 
 
 
 
 
 
 
 
 
181
  """
182
  try:
 
 
183
  processed_collection = _process_htr_pipeline(
184
- image_path, document_type, custom_settings
185
  )
 
 
186
  extracted_text = extract_text_from_collection(processed_collection)
 
 
187
  return extracted_text
188
 
 
 
189
  except Exception as e:
190
  return f"HTR text extraction failed: {str(e)}"
191
 
@@ -196,20 +288,35 @@ def htrflow_file(
196
  output_format: FileChoices = DEFAULT_OUTPUT,
197
  custom_settings: Optional[str] = None,
198
  server_name: str = "https://gabriel-htrflow-mcp.hf.space",
 
199
  ) -> str:
200
  """
201
- Process HTR and return a formatted file for download.
202
-
 
 
 
 
 
 
 
 
 
 
203
  Returns:
204
- str: File path for direct download via gr.File (server_name/gradio_api/file=/tmp/gradio/{temp_folder}/{file_name})
205
  """
206
  try:
207
- original_filename = Path(image_path).stem or "output"
 
 
208
 
209
  processed_collection = _process_htr_pipeline(
210
- image_path, document_type, custom_settings
211
  )
212
 
 
 
213
  temp_dir = Path(tempfile.mkdtemp())
214
  export_dir = temp_dir / output_format
215
  processed_collection.save(directory=str(export_dir), serializer=output_format)
@@ -229,16 +336,79 @@ def htrflow_file(
229
  output_file_path = new_path
230
  break
231
 
 
 
232
  if output_file_path and os.path.exists(output_file_path):
233
  return output_file_path
234
  else:
235
  return None
236
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
237
  except Exception as e:
238
- return None
 
 
 
 
 
 
 
 
 
 
 
239
 
240
 
241
  def extract_text_from_collection(collection: Collection) -> str:
 
242
  text_lines = []
243
  for page in collection.pages:
244
  for node in page.traverse():
@@ -248,70 +418,94 @@ def extract_text_from_collection(collection: Collection) -> str:
248
 
249
 
250
  def create_htrflow_mcp_server():
 
251
  htr_text_interface = gr.Interface(
252
  fn=htr_text,
253
  inputs=[
254
  gr.Image(type="filepath", label="Upload Image or Enter URL"),
255
  gr.Dropdown(
256
- choices=FORMAT_CHOICES, value="letter_swedish", label="Document Type"
 
 
 
257
  ),
258
  gr.Textbox(
259
  label="Custom Settings (JSON)",
260
- placeholder="Optional custom pipeline settings",
261
  value="",
 
262
  ),
263
  ],
264
- outputs=[gr.Textbox(label="Extracted Text", lines=10)],
265
- description="Extract plain text from handwritten documents using HTR",
 
266
  api_name="htr_text",
 
267
  )
268
 
 
269
  htrflow_file_interface = gr.Interface(
270
  fn=htrflow_file,
271
  inputs=[
272
  gr.Image(type="filepath", label="Upload Image or Enter URL"),
273
  gr.Dropdown(
274
- choices=FORMAT_CHOICES, value="letter_swedish", label="Document Type"
 
 
 
275
  ),
276
  gr.Dropdown(
277
- choices=FILE_CHOICES, value=DEFAULT_OUTPUT, label="Output Format"
 
 
 
278
  ),
279
  gr.Textbox(
280
  label="Custom Settings (JSON)",
281
- placeholder="Optional custom pipeline settings",
282
  value="",
 
283
  ),
284
  gr.Textbox(
285
  label="Server Name",
286
  value="https://gabriel-htrflow-mcp.hf.space",
287
  placeholder="Server URL for download links",
 
288
  ),
289
  ],
290
  outputs=[gr.File(label="Download HTR Output File")],
291
- description="Process handwritten text and get formatted file (ALTO XML, PAGE XML, JSON, or TXT)",
 
292
  api_name="htrflow_file",
 
293
  )
294
 
 
295
  htrflow_viz = gr.Interface(
296
- fn=htrflow_visualizer,
297
  inputs=[
298
  gr.Image(type="filepath", label="Upload Original Image"),
299
- gr.File(label="Upload ALTO/PAGE XML File"),
300
  gr.Textbox(
301
  label="Server Name",
302
  value="https://gabriel-htrflow-mcp.hf.space",
303
  placeholder="Server URL for download links",
 
304
  ),
305
  ],
306
  outputs=gr.File(label="Download Visualization Image"),
307
- description="Visualize HTR results by overlaying text regions and polygons on the original image",
 
308
  api_name="htrflow_visualizer",
 
309
  )
310
 
 
311
  demo = gr.TabbedInterface(
312
  [htr_text_interface, htrflow_file_interface, htrflow_viz],
313
- ["HTR Text", "HTR File", "HTR Visualizer"],
314
- title="HTRflow Handwritten Text Recognition",
 
315
  )
316
 
317
  return demo
@@ -319,4 +513,10 @@ def create_htrflow_mcp_server():
319
 
320
  if __name__ == "__main__":
321
  demo = create_htrflow_mcp_server()
322
- demo.launch(mcp_server=True, share=False, debug=False)
 
 
 
 
 
 
 
2
  import json
3
  import tempfile
4
  import os
5
+ from typing import List, Optional, Literal, Tuple, Union
6
  from PIL import Image
7
+ import requests
8
+ from io import BytesIO
9
 
10
  import spaces
11
  from pathlib import Path
 
144
  }
145
 
146
 
147
+ def handle_image_input(image_path: Union[str, None], progress: gr.Progress = None) -> str:
148
+ """
149
+ Handle image input from various sources (local file, URL, or uploaded file).
150
+
151
+ Args:
152
+ image_path: Path to image file or URL
153
+ progress: Progress tracker for UI updates
154
+
155
+ Returns:
156
+ Local file path to the image
157
+ """
158
+ if not image_path:
159
+ raise ValueError("No image provided. Please upload an image or provide a URL.")
160
+
161
+ if progress:
162
+ progress(0.1, desc="Processing image input...")
163
+
164
+ # If it's a URL, download the image
165
+ if isinstance(image_path, str) and (image_path.startswith("http://") or image_path.startswith("https://")):
166
+ try:
167
+ if progress:
168
+ progress(0.2, desc="Downloading image from URL...")
169
+ response = requests.get(image_path, timeout=30)
170
+ response.raise_for_status()
171
+
172
+ # Save to temporary file
173
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as tmp_file:
174
+ tmp_file.write(response.content)
175
+ image_path = tmp_file.name
176
+
177
+ # Verify it's a valid image
178
+ try:
179
+ img = Image.open(image_path)
180
+ img.verify()
181
+ except Exception as e:
182
+ os.unlink(image_path)
183
+ raise ValueError(f"Downloaded file is not a valid image: {str(e)}")
184
+
185
+ except requests.RequestException as e:
186
+ raise ValueError(f"Failed to download image from URL: {str(e)}")
187
+
188
+ # Verify the file exists
189
+ if not os.path.exists(image_path):
190
+ raise ValueError(f"Image file not found: {image_path}")
191
+
192
+ return image_path
193
+
194
+
195
  @spaces.GPU
196
  def _process_htr_pipeline(
197
+ image_path: str,
198
+ document_type: FormatChoices,
199
+ custom_settings: Optional[str] = None,
200
+ progress: gr.Progress = None
201
  ) -> Collection:
202
  """Process HTR pipeline and return the processed collection."""
203
+
204
+ # Handle image input
205
+ image_path = handle_image_input(image_path, progress)
206
 
207
  if custom_settings:
208
  try:
209
  config = json.loads(custom_settings)
210
  except json.JSONDecodeError:
211
+ raise ValueError("Invalid JSON in custom_settings parameter. Please check your JSON syntax.")
212
  else:
213
  config = PIPELINE_CONFIGS[document_type]
214
 
215
+ if progress:
216
+ progress(0.3, desc="Initializing HTR pipeline...")
217
+
218
  collection = Collection([image_path])
219
  pipeline = Pipeline.from_config(config)
220
 
221
  try:
222
+ # Track pipeline steps
223
+ total_steps = len(config.get("steps", []))
224
+
225
+ if progress:
226
+ progress(0.4, desc=f"Running HTR pipeline with {total_steps} steps...")
227
+
228
+ # Run the pipeline (we could add more granular progress here if the pipeline supports it)
229
  processed_collection = pipeline.run(collection)
230
+
231
+ if progress:
232
+ progress(0.9, desc="Pipeline complete, preparing results...")
233
+
234
  return processed_collection
235
  except Exception as pipeline_error:
236
  raise RuntimeError(f"Pipeline execution failed: {str(pipeline_error)}")
237
+ finally:
238
+ # Clean up temporary file if it was downloaded
239
+ if image_path and image_path.startswith(tempfile.gettempdir()):
240
+ try:
241
+ os.unlink(image_path)
242
+ except:
243
+ pass
244
 
245
 
246
  def htr_text(
247
  image_path: str,
248
  document_type: FormatChoices = "letter_swedish",
249
  custom_settings: Optional[str] = None,
250
+ progress: gr.Progress = gr.Progress()
251
  ) -> str:
252
+ """
253
+ Extract text from handwritten documents using HTR (Handwritten Text Recognition).
254
+
255
+ This tool processes historical handwritten documents and extracts the text content.
256
+ Supports various document layouts including letters and book spreads in English and Swedish.
257
+
258
+ Args:
259
+ image_path: Path to the document image file or URL to download from
260
+ document_type: Type of document layout - choose based on your document's structure and language
261
+ custom_settings: Optional JSON configuration for advanced pipeline customization
262
+
263
+ Returns:
264
+ Extracted text from the handwritten document
265
  """
266
  try:
267
+ progress(0, desc="Starting HTR text extraction...")
268
+
269
  processed_collection = _process_htr_pipeline(
270
+ image_path, document_type, custom_settings, progress
271
  )
272
+
273
+ progress(0.95, desc="Extracting text from results...")
274
  extracted_text = extract_text_from_collection(processed_collection)
275
+
276
+ progress(1.0, desc="Text extraction complete!")
277
  return extracted_text
278
 
279
+ except ValueError as e:
280
+ return f"Input error: {str(e)}"
281
  except Exception as e:
282
  return f"HTR text extraction failed: {str(e)}"
283
 
 
288
  output_format: FileChoices = DEFAULT_OUTPUT,
289
  custom_settings: Optional[str] = None,
290
  server_name: str = "https://gabriel-htrflow-mcp.hf.space",
291
+ progress: gr.Progress = gr.Progress()
292
  ) -> str:
293
  """
294
+ Process handwritten document and generate a formatted output file.
295
+
296
+ This tool performs HTR on a document and exports the results in various formats
297
+ suitable for digital archiving, further processing, or integration with other systems.
298
+
299
+ Args:
300
+ image_path: Path to the document image file or URL to download from
301
+ document_type: Type of document layout - affects segmentation and reading order
302
+ output_format: Desired output format (txt for plain text, alto/page for XML with coordinates, json for structured data)
303
+ custom_settings: Optional JSON configuration for advanced pipeline customization
304
+ server_name: Base URL of the server (used for generating download links)
305
+
306
  Returns:
307
+ Path to the generated file for download
308
  """
309
  try:
310
+ progress(0, desc="Starting HTR file processing...")
311
+
312
+ original_filename = Path(image_path).stem if image_path else "output"
313
 
314
  processed_collection = _process_htr_pipeline(
315
+ image_path, document_type, custom_settings, progress
316
  )
317
 
318
+ progress(0.92, desc=f"Generating {output_format.upper()} file...")
319
+
320
  temp_dir = Path(tempfile.mkdtemp())
321
  export_dir = temp_dir / output_format
322
  processed_collection.save(directory=str(export_dir), serializer=output_format)
 
336
  output_file_path = new_path
337
  break
338
 
339
+ progress(1.0, desc="File generation complete!")
340
+
341
  if output_file_path and os.path.exists(output_file_path):
342
  return output_file_path
343
  else:
344
  return None
345
 
346
+ except ValueError as e:
347
+ # Create an error file with the error message
348
+ error_file = tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.txt')
349
+ error_file.write(f"Error: {str(e)}")
350
+ error_file.close()
351
+ return error_file.name
352
+ except Exception as e:
353
+ # Create an error file with the error message
354
+ error_file = tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.txt')
355
+ error_file.write(f"HTR file generation failed: {str(e)}")
356
+ error_file.close()
357
+ return error_file.name
358
+
359
+
360
+ def htrflow_visualizer_with_progress(
361
+ image_path: str,
362
+ htr_document_path: str,
363
+ server_name: str = "https://gabriel-htrflow-mcp.hf.space",
364
+ progress: gr.Progress = gr.Progress()
365
+ ) -> str:
366
+ """
367
+ Create a visualization of HTR results overlaid on the original document.
368
+
369
+ This tool generates an annotated image showing detected text regions, reading order,
370
+ and recognized text overlaid on the original document image. Useful for quality control
371
+ and understanding the HTR process.
372
+
373
+ Args:
374
+ image_path: Path to the original document image file or URL
375
+ htr_document_path: Path to the HTR output file (ALTO or PAGE XML format)
376
+ server_name: Base URL of the server (used for generating download links)
377
+
378
+ Returns:
379
+ Path to the generated visualization image for download
380
+ """
381
+ try:
382
+ progress(0, desc="Starting visualization generation...")
383
+
384
+ # Handle image input
385
+ image_path = handle_image_input(image_path, progress)
386
+
387
+ progress(0.5, desc="Creating visualization...")
388
+
389
+ # Call the original visualizer function
390
+ result = htrflow_visualizer(image_path, htr_document_path, server_name)
391
+
392
+ progress(1.0, desc="Visualization complete!")
393
+
394
+ return result
395
  except Exception as e:
396
+ # Create an error file
397
+ error_file = tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.txt')
398
+ error_file.write(f"Visualization failed: {str(e)}")
399
+ error_file.close()
400
+ return error_file.name
401
+ finally:
402
+ # Clean up temporary file if it was downloaded
403
+ if image_path and image_path.startswith(tempfile.gettempdir()):
404
+ try:
405
+ os.unlink(image_path)
406
+ except:
407
+ pass
408
 
409
 
410
  def extract_text_from_collection(collection: Collection) -> str:
411
+ """Extract and combine text from all nodes in the collection."""
412
  text_lines = []
413
  for page in collection.pages:
414
  for node in page.traverse():
 
418
 
419
 
420
  def create_htrflow_mcp_server():
421
+ # HTR Text extraction interface with improved API description
422
  htr_text_interface = gr.Interface(
423
  fn=htr_text,
424
  inputs=[
425
  gr.Image(type="filepath", label="Upload Image or Enter URL"),
426
  gr.Dropdown(
427
+ choices=FORMAT_CHOICES,
428
+ value="letter_swedish",
429
+ label="Document Type",
430
+ info="Select the type that best matches your document's layout and language"
431
  ),
432
  gr.Textbox(
433
  label="Custom Settings (JSON)",
434
+ placeholder='{"steps": [...]} - Leave empty for default settings',
435
  value="",
436
+ lines=3
437
  ),
438
  ],
439
+ outputs=[gr.Textbox(label="Extracted Text", lines=15)],
440
+ title="Extract Text from Handwritten Documents",
441
+ description="Upload a handwritten document image to extract text using AI-powered HTR",
442
  api_name="htr_text",
443
+ api_description="Extract text from handwritten historical documents using advanced HTR models. Supports letters and book spreads in English and Swedish.",
444
  )
445
 
446
+ # HTR File generation interface
447
  htrflow_file_interface = gr.Interface(
448
  fn=htrflow_file,
449
  inputs=[
450
  gr.Image(type="filepath", label="Upload Image or Enter URL"),
451
  gr.Dropdown(
452
+ choices=FORMAT_CHOICES,
453
+ value="letter_swedish",
454
+ label="Document Type",
455
+ info="Select the type that best matches your document's layout and language"
456
  ),
457
  gr.Dropdown(
458
+ choices=FILE_CHOICES,
459
+ value=DEFAULT_OUTPUT,
460
+ label="Output Format",
461
+ info="ALTO/PAGE: XML with coordinates | JSON: Structured data | TXT: Plain text only"
462
  ),
463
  gr.Textbox(
464
  label="Custom Settings (JSON)",
465
+ placeholder='{"steps": [...]} - Leave empty for default settings',
466
  value="",
467
+ lines=3
468
  ),
469
  gr.Textbox(
470
  label="Server Name",
471
  value="https://gabriel-htrflow-mcp.hf.space",
472
  placeholder="Server URL for download links",
473
+ visible=False # Hide this from UI but keep for API
474
  ),
475
  ],
476
  outputs=[gr.File(label="Download HTR Output File")],
477
+ title="Generate HTR Output Files",
478
+ description="Process handwritten documents and export in various formats (XML, JSON, TXT)",
479
  api_name="htrflow_file",
480
+ api_description="Process handwritten documents and generate formatted output files. Outputs can be in ALTO XML (with text coordinates), PAGE XML, JSON (structured data), or plain text format.",
481
  )
482
 
483
+ # HTR Visualization interface
484
  htrflow_viz = gr.Interface(
485
+ fn=htrflow_visualizer_with_progress,
486
  inputs=[
487
  gr.Image(type="filepath", label="Upload Original Image"),
488
+ gr.File(label="Upload ALTO/PAGE XML File", file_types=[".xml"]),
489
  gr.Textbox(
490
  label="Server Name",
491
  value="https://gabriel-htrflow-mcp.hf.space",
492
  placeholder="Server URL for download links",
493
+ visible=False # Hide this from UI but keep for API
494
  ),
495
  ],
496
  outputs=gr.File(label="Download Visualization Image"),
497
+ title="Visualize HTR Results",
498
+ description="Create an annotated image showing detected text regions and recognized text",
499
  api_name="htrflow_visualizer",
500
+ api_description="Generate a visualization image showing HTR results overlaid on the original document. Shows detected text regions, reading order, and recognized text for quality control.",
501
  )
502
 
503
+ # Create tabbed interface with better organization
504
  demo = gr.TabbedInterface(
505
  [htr_text_interface, htrflow_file_interface, htrflow_viz],
506
+ ["Extract Text", "Generate Files", "Visualize Results"],
507
+ title="🖋️ HTRflow - Handwritten Text Recognition",
508
+ analytics_enabled=False,
509
  )
510
 
511
  return demo
 
513
 
514
  if __name__ == "__main__":
515
  demo = create_htrflow_mcp_server()
516
+ demo.launch(
517
+ mcp_server=True,
518
+ share=False,
519
+ debug=False,
520
+ show_api=True, # Ensure API is visible
521
+ favicon_path=None,
522
+ )