Spaces:
Running
on
Zero
Running
on
Zero
| import gradio as gr | |
| import json | |
| import tempfile | |
| import os | |
| from typing import List, Optional, Literal, Tuple, Union | |
| from PIL import Image | |
| import requests | |
| from io import BytesIO | |
| import spaces | |
| from pathlib import Path | |
| from visualizer import htrflow_visualizer | |
| from htrflow.volume.volume import Collection | |
| from htrflow.pipeline.pipeline import Pipeline | |
| DEFAULT_OUTPUT = "alto" | |
| FORMAT_CHOICES = [ | |
| "letter_english", | |
| "letter_swedish", | |
| "spread_english", | |
| "spread_swedish", | |
| ] | |
| FILE_CHOICES = ["txt", "alto", "page", "json"] | |
| FormatChoices = Literal[ | |
| "letter_english", "letter_swedish", "spread_english", "spread_swedish" | |
| ] | |
| FileChoices = Literal["txt", "alto", "page", "json"] | |
| PIPELINE_CONFIGS = { | |
| "letter_english": { | |
| "steps": [ | |
| { | |
| "step": "Segmentation", | |
| "settings": { | |
| "model": "yolo", | |
| "model_settings": { | |
| "model": "Riksarkivet/yolov9-lines-within-regions-1" | |
| }, | |
| "generation_settings": {"batch_size": 8}, | |
| }, | |
| }, | |
| { | |
| "step": "TextRecognition", | |
| "settings": { | |
| "model": "TrOCR", | |
| "model_settings": {"model": "microsoft/trocr-base-handwritten"}, | |
| "generation_settings": {"batch_size": 16}, | |
| }, | |
| }, | |
| {"step": "OrderLines"}, | |
| ] | |
| }, | |
| "letter_swedish": { | |
| "steps": [ | |
| { | |
| "step": "Segmentation", | |
| "settings": { | |
| "model": "yolo", | |
| "model_settings": { | |
| "model": "Riksarkivet/yolov9-lines-within-regions-1" | |
| }, | |
| "generation_settings": {"batch_size": 8}, | |
| }, | |
| }, | |
| { | |
| "step": "TextRecognition", | |
| "settings": { | |
| "model": "TrOCR", | |
| "model_settings": { | |
| "model": "Riksarkivet/trocr-base-handwritten-hist-swe-2" | |
| }, | |
| "generation_settings": {"batch_size": 16}, | |
| }, | |
| }, | |
| {"step": "OrderLines"}, | |
| ] | |
| }, | |
| "spread_english": { | |
| "steps": [ | |
| { | |
| "step": "Segmentation", | |
| "settings": { | |
| "model": "yolo", | |
| "model_settings": {"model": "Riksarkivet/yolov9-regions-1"}, | |
| "generation_settings": {"batch_size": 4}, | |
| }, | |
| }, | |
| { | |
| "step": "Segmentation", | |
| "settings": { | |
| "model": "yolo", | |
| "model_settings": { | |
| "model": "Riksarkivet/yolov9-lines-within-regions-1" | |
| }, | |
| "generation_settings": {"batch_size": 8}, | |
| }, | |
| }, | |
| { | |
| "step": "TextRecognition", | |
| "settings": { | |
| "model": "TrOCR", | |
| "model_settings": {"model": "microsoft/trocr-base-handwritten"}, | |
| "generation_settings": {"batch_size": 16}, | |
| }, | |
| }, | |
| {"step": "ReadingOrderMarginalia", "settings": {"two_page": True}}, | |
| ] | |
| }, | |
| "spread_swedish": { | |
| "steps": [ | |
| { | |
| "step": "Segmentation", | |
| "settings": { | |
| "model": "yolo", | |
| "model_settings": {"model": "Riksarkivet/yolov9-regions-1"}, | |
| "generation_settings": {"batch_size": 4}, | |
| }, | |
| }, | |
| { | |
| "step": "Segmentation", | |
| "settings": { | |
| "model": "yolo", | |
| "model_settings": { | |
| "model": "Riksarkivet/yolov9-lines-within-regions-1" | |
| }, | |
| "generation_settings": {"batch_size": 8}, | |
| }, | |
| }, | |
| { | |
| "step": "TextRecognition", | |
| "settings": { | |
| "model": "TrOCR", | |
| "model_settings": { | |
| "model": "Riksarkivet/trocr-base-handwritten-hist-swe-2" | |
| }, | |
| "generation_settings": {"batch_size": 16}, | |
| }, | |
| }, | |
| {"step": "ReadingOrderMarginalia", "settings": {"two_page": True}}, | |
| ] | |
| }, | |
| } | |
| def handle_image_input(image_path: Union[str, None], progress: gr.Progress = None) -> str: | |
| """ | |
| Handle image input from various sources (local file, URL, or uploaded file). | |
| Args: | |
| image_path: Path to image file or URL | |
| progress: Progress tracker for UI updates | |
| Returns: | |
| Local file path to the image | |
| """ | |
| if not image_path: | |
| raise ValueError("No image provided. Please upload an image or provide a URL.") | |
| if progress: | |
| progress(0.1, desc="Processing image input...") | |
| # If it's a URL, download the image | |
| if isinstance(image_path, str) and (image_path.startswith("http://") or image_path.startswith("https://")): | |
| try: | |
| if progress: | |
| progress(0.2, desc="Downloading image from URL...") | |
| response = requests.get(image_path, timeout=30) | |
| response.raise_for_status() | |
| # Save to temporary file | |
| with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as tmp_file: | |
| tmp_file.write(response.content) | |
| image_path = tmp_file.name | |
| # Verify it's a valid image | |
| try: | |
| img = Image.open(image_path) | |
| img.verify() | |
| except Exception as e: | |
| os.unlink(image_path) | |
| raise ValueError(f"Downloaded file is not a valid image: {str(e)}") | |
| except requests.RequestException as e: | |
| raise ValueError(f"Failed to download image from URL: {str(e)}") | |
| # Verify the file exists | |
| if not os.path.exists(image_path): | |
| raise ValueError(f"Image file not found: {image_path}") | |
| return image_path | |
| def _process_htr_pipeline( | |
| image_path: str, | |
| document_type: FormatChoices, | |
| custom_settings: Optional[str] = None, | |
| progress: gr.Progress = None | |
| ) -> Collection: | |
| """Process HTR pipeline and return the processed collection.""" | |
| # Handle image input | |
| image_path = handle_image_input(image_path, progress) | |
| if custom_settings: | |
| try: | |
| config = json.loads(custom_settings) | |
| except json.JSONDecodeError: | |
| raise ValueError("Invalid JSON in custom_settings parameter. Please check your JSON syntax.") | |
| else: | |
| config = PIPELINE_CONFIGS[document_type] | |
| if progress: | |
| progress(0.3, desc="Initializing HTR pipeline...") | |
| collection = Collection([image_path]) | |
| pipeline = Pipeline.from_config(config) | |
| try: | |
| # Track pipeline steps | |
| total_steps = len(config.get("steps", [])) | |
| if progress: | |
| progress(0.4, desc=f"Running HTR pipeline with {total_steps} steps...") | |
| # Run the pipeline (we could add more granular progress here if the pipeline supports it) | |
| processed_collection = pipeline.run(collection) | |
| if progress: | |
| progress(0.9, desc="Pipeline complete, preparing results...") | |
| return processed_collection | |
| except Exception as pipeline_error: | |
| raise RuntimeError(f"Pipeline execution failed: {str(pipeline_error)}") | |
| finally: | |
| # Clean up temporary file if it was downloaded | |
| if image_path and image_path.startswith(tempfile.gettempdir()): | |
| try: | |
| os.unlink(image_path) | |
| except: | |
| pass | |
| def htr_text( | |
| image_path: str, | |
| document_type: FormatChoices = "letter_swedish", | |
| custom_settings: Optional[str] = None, | |
| progress: gr.Progress = gr.Progress() | |
| ) -> str: | |
| """ | |
| Extract text from handwritten documents using HTR (Handwritten Text Recognition). | |
| This tool processes historical handwritten documents and extracts the text content. | |
| Supports various document layouts including letters and book spreads in English and Swedish. | |
| Args: | |
| image_path: Path to the document image file or URL to download from | |
| document_type: Type of document layout - choose based on your document's structure and language | |
| custom_settings: Optional JSON configuration for advanced pipeline customization | |
| Returns: | |
| Extracted text from the handwritten document | |
| """ | |
| try: | |
| progress(0, desc="Starting HTR text extraction...") | |
| processed_collection = _process_htr_pipeline( | |
| image_path, document_type, custom_settings, progress | |
| ) | |
| progress(0.95, desc="Extracting text from results...") | |
| extracted_text = extract_text_from_collection(processed_collection) | |
| progress(1.0, desc="Text extraction complete!") | |
| return extracted_text | |
| except ValueError as e: | |
| return f"Input error: {str(e)}" | |
| except Exception as e: | |
| return f"HTR text extraction failed: {str(e)}" | |
| def htrflow_file( | |
| image_path: str, | |
| document_type: FormatChoices = "letter_swedish", | |
| output_format: FileChoices = DEFAULT_OUTPUT, | |
| custom_settings: Optional[str] = None, | |
| server_name: str = "https://gabriel-htrflow-mcp.hf.space", | |
| progress: gr.Progress = gr.Progress() | |
| ) -> str: | |
| """ | |
| Process handwritten document and generate a formatted output file. | |
| This tool performs HTR on a document and exports the results in various formats | |
| suitable for digital archiving, further processing, or integration with other systems. | |
| Args: | |
| image_path: Path to the document image file or URL to download from | |
| document_type: Type of document layout - affects segmentation and reading order | |
| output_format: Desired output format (txt for plain text, alto/page for XML with coordinates, json for structured data) | |
| custom_settings: Optional JSON configuration for advanced pipeline customization | |
| server_name: Base URL of the server (used for generating download links) | |
| Returns: | |
| Path to the generated file for download | |
| """ | |
| try: | |
| progress(0, desc="Starting HTR file processing...") | |
| original_filename = Path(image_path).stem if image_path else "output" | |
| processed_collection = _process_htr_pipeline( | |
| image_path, document_type, custom_settings, progress | |
| ) | |
| progress(0.92, desc=f"Generating {output_format.upper()} file...") | |
| temp_dir = Path(tempfile.mkdtemp()) | |
| export_dir = temp_dir / output_format | |
| processed_collection.save(directory=str(export_dir), serializer=output_format) | |
| output_file_path = None | |
| for root, _, files in os.walk(export_dir): | |
| for file in files: | |
| old_path = os.path.join(root, file) | |
| file_ext = Path(file).suffix | |
| new_filename = ( | |
| f"{original_filename}.{output_format}" | |
| if not file_ext | |
| else f"{original_filename}{file_ext}" | |
| ) | |
| new_path = os.path.join(root, new_filename) | |
| os.rename(old_path, new_path) | |
| output_file_path = new_path | |
| break | |
| progress(1.0, desc="File generation complete!") | |
| if output_file_path and os.path.exists(output_file_path): | |
| return output_file_path | |
| else: | |
| return None | |
| except ValueError as e: | |
| # Create an error file with the error message | |
| error_file = tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.txt') | |
| error_file.write(f"Error: {str(e)}") | |
| error_file.close() | |
| return error_file.name | |
| except Exception as e: | |
| # Create an error file with the error message | |
| error_file = tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.txt') | |
| error_file.write(f"HTR file generation failed: {str(e)}") | |
| error_file.close() | |
| return error_file.name | |
| def htrflow_visualizer_with_progress( | |
| image_path: str, | |
| htr_document_path: str, | |
| server_name: str = "https://gabriel-htrflow-mcp.hf.space", | |
| progress: gr.Progress = gr.Progress() | |
| ) -> str: | |
| """ | |
| Create a visualization of HTR results overlaid on the original document. | |
| This tool generates an annotated image showing detected text regions, reading order, | |
| and recognized text overlaid on the original document image. Useful for quality control | |
| and understanding the HTR process. | |
| Args: | |
| image_path: Path to the original document image file or URL | |
| htr_document_path: Path to the HTR output file (ALTO or PAGE XML format) | |
| server_name: Base URL of the server (used for generating download links) | |
| Returns: | |
| Path to the generated visualization image for download | |
| """ | |
| try: | |
| progress(0, desc="Starting visualization generation...") | |
| # Handle image input | |
| image_path = handle_image_input(image_path, progress) | |
| progress(0.5, desc="Creating visualization...") | |
| # Call the original visualizer function | |
| result = htrflow_visualizer(image_path, htr_document_path, server_name) | |
| progress(1.0, desc="Visualization complete!") | |
| return result | |
| except Exception as e: | |
| # Create an error file | |
| error_file = tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.txt') | |
| error_file.write(f"Visualization failed: {str(e)}") | |
| error_file.close() | |
| return error_file.name | |
| finally: | |
| # Clean up temporary file if it was downloaded | |
| if image_path and image_path.startswith(tempfile.gettempdir()): | |
| try: | |
| os.unlink(image_path) | |
| except: | |
| pass | |
| def extract_text_from_collection(collection: Collection) -> str: | |
| """Extract and combine text from all nodes in the collection.""" | |
| text_lines = [] | |
| for page in collection.pages: | |
| for node in page.traverse(): | |
| if hasattr(node, "text") and node.text: | |
| text_lines.append(node.text) | |
| return "\n".join(text_lines) | |
| def create_htrflow_mcp_server(): | |
| # HTR Text extraction interface with improved API description | |
| htr_text_interface = gr.Interface( | |
| fn=htr_text, | |
| inputs=[ | |
| gr.Image(type="filepath", label="Upload Image or Enter URL"), | |
| gr.Dropdown( | |
| choices=FORMAT_CHOICES, | |
| value="letter_swedish", | |
| label="Document Type", | |
| info="Select the type that best matches your document's layout and language" | |
| ), | |
| gr.Textbox( | |
| label="Custom Settings (JSON)", | |
| placeholder='{"steps": [...]} - Leave empty for default settings', | |
| value="", | |
| lines=3 | |
| ), | |
| ], | |
| outputs=[gr.Textbox(label="Extracted Text", lines=15)], | |
| title="Extract Text from Handwritten Documents", | |
| description="Upload a handwritten document image to extract text using AI-powered HTR", | |
| api_name="htr_text", | |
| api_description="Extract text from handwritten historical documents using advanced HTR models. Supports letters and book spreads in English and Swedish.", | |
| ) | |
| # HTR File generation interface | |
| htrflow_file_interface = gr.Interface( | |
| fn=htrflow_file, | |
| inputs=[ | |
| gr.Image(type="filepath", label="Upload Image or Enter URL"), | |
| gr.Dropdown( | |
| choices=FORMAT_CHOICES, | |
| value="letter_swedish", | |
| label="Document Type", | |
| info="Select the type that best matches your document's layout and language" | |
| ), | |
| gr.Dropdown( | |
| choices=FILE_CHOICES, | |
| value=DEFAULT_OUTPUT, | |
| label="Output Format", | |
| info="ALTO/PAGE: XML with coordinates | JSON: Structured data | TXT: Plain text only" | |
| ), | |
| gr.Textbox( | |
| label="Custom Settings (JSON)", | |
| placeholder='{"steps": [...]} - Leave empty for default settings', | |
| value="", | |
| lines=3 | |
| ), | |
| gr.Textbox( | |
| label="Server Name", | |
| value="https://gabriel-htrflow-mcp.hf.space", | |
| placeholder="Server URL for download links", | |
| visible=False # Hide this from UI but keep for API | |
| ), | |
| ], | |
| outputs=[gr.File(label="Download HTR Output File")], | |
| title="Generate HTR Output Files", | |
| description="Process handwritten documents and export in various formats (XML, JSON, TXT)", | |
| api_name="htrflow_file", | |
| api_description="Process handwritten documents and generate formatted output files. Outputs can be in ALTO XML (with text coordinates), PAGE XML, JSON (structured data), or plain text format.", | |
| ) | |
| # HTR Visualization interface | |
| htrflow_viz = gr.Interface( | |
| fn=htrflow_visualizer_with_progress, | |
| inputs=[ | |
| gr.Image(type="filepath", label="Upload Original Image"), | |
| gr.File(label="Upload ALTO/PAGE XML File", file_types=[".xml"]), | |
| gr.Textbox( | |
| label="Server Name", | |
| value="https://gabriel-htrflow-mcp.hf.space", | |
| placeholder="Server URL for download links", | |
| visible=False # Hide this from UI but keep for API | |
| ), | |
| ], | |
| outputs=gr.File(label="Download Visualization Image"), | |
| title="Visualize HTR Results", | |
| description="Create an annotated image showing detected text regions and recognized text", | |
| api_name="htrflow_visualizer", | |
| api_description="Generate a visualization image showing HTR results overlaid on the original document. Shows detected text regions, reading order, and recognized text for quality control.", | |
| ) | |
| # Create tabbed interface with better organization | |
| demo = gr.TabbedInterface( | |
| [htr_text_interface, htrflow_file_interface, htrflow_viz], | |
| ["Extract Text", "Generate Files", "Visualize Results"], | |
| title="🖋️ HTRflow - Handwritten Text Recognition", | |
| analytics_enabled=False, | |
| ) | |
| return demo | |
| if __name__ == "__main__": | |
| demo = create_htrflow_mcp_server() | |
| demo.launch( | |
| mcp_server=True, | |
| share=False, | |
| debug=False, | |
| show_api=True, # Ensure API is visible | |
| favicon_path=None, | |
| ) |