htrflow_mcp

Running on Zero

App Files Files Community

Gabriel commited on Jun 8

Commit

6d382b7

verified ·

1 Parent(s): 8dcf777

Update app.py

Browse files

Files changed (1) hide show

app.py +36 -10

app.py CHANGED Viewed

@@ -116,18 +116,44 @@ PIPELINE_CONFIGS = {
 }
 @spaces.GPU
-def process_htr(image_path: str, document_type: Literal["letter_english", "letter_swedish", "spread_english", "spread_swedish"] = "letter_swedish", output_format: Literal["txt", "alto", "page", "json"] = DEFAULT_OUTPUT, custom_settings: Optional[str] = None) -> str:
     """
-    Process handwritten text recognition and return extracted text with specified format file.
     Args:
-        image_path (str): Path to the image file to process
-        document_type (str): Type of document processing template to use
-        output_format (str): Output format for the processed file
-        custom_settings (str): Optional custom pipeline settings as JSON
     Returns:
-        str: The path to the output file or error message
     """
     if not image_path:
         return "Error: No image provided"
@@ -184,7 +210,7 @@ def extract_text_from_collection(collection: Collection) -> str:
 def create_htrflow_mcp_server():
     demo = gr.Interface(
-        fn=process_htr,
         inputs=[
             gr.Image(type="filepath", label="Upload Image or Enter URL"),
             gr.Dropdown(choices=["letter_english", "letter_swedish", "spread_english", "spread_swedish"], value="letter_swedish", label="Document Type"),
@@ -194,10 +220,10 @@ def create_htrflow_mcp_server():
         outputs=gr.File(label="Download Output File"),
         title="HTRflow MCP Server",
         description="Process handwritten text from uploaded file or URL and get output file in specified format",
-        api_name="process_htr",
     )
     return demo
 if __name__ == "__main__":
     demo = create_htrflow_mcp_server()
-    demo.launch(mcp_server=True, share=False, debug=True)

 }
 @spaces.GPU
+def htrflow_htr(image_path: str, document_type: Literal["letter_english", "letter_swedish", "spread_english", "spread_swedish"] = "letter_swedish", output_format: Literal["txt", "alto", "page", "json"] = DEFAULT_OUTPUT, custom_settings: Optional[str] = None) -> str:
     """
+    Process handwritten text recognition (HTR) on uploaded images and return extracted text in the specified format.
+    This function uses machine learning models to automatically detect, segment, and transcribe handwritten text
+    from historical documents. It supports different document types and languages, with specialized models
+    trained on historical handwriting from the Swedish National Archives (Riksarkivet).
     Args:
+        image_path (str): The file path or URL to the image containing handwritten text to be processed.
+                         Supports common image formats like JPG, PNG, TIFF.
+        document_type (Literal): The type of document and language processing template to use.
+                                Available options:
+                                - "letter_english": Single-page English handwritten letters (default: "letter_swedish")
+                                - "letter_swedish": Single-page Swedish handwritten letters
+                                - "spread_english": Two-page spread English documents with marginalia
+                                - "spread_swedish": Two-page spread Swedish documents with marginalia
+                                Default: "letter_swedish"
+        output_format (Literal): The format for the output file containing the transcribed text.
+                                Available options:
+                                - "txt": Plain text format with line breaks
+                                - "alto": ALTO XML format with detailed layout and coordinate information
+                                - "page": PAGE XML format with structural markup and positioning data
+                                - "json": JSON format with structured text, layout information and metadata
+                                Default: "alto"
+                                Note: Both "alto" and "page" formats are XML-based with layout information.
+        custom_settings (Optional[str]): Advanced users can provide custom pipeline configuration as a
+                                        JSON string to override the default processing steps. This allows
+                                        fine-tuning of model parameters, batch sizes, and processing workflow.
+                                        Default: None (uses predefined configuration for document_type)
     Returns:
+        str: The file path to the generated output file containing the transcribed text in the requested format,
+             or an error message if processing fails. The output file will be named based on the original
+             image filename with the appropriate extension (.txt, .xml, or .json).
     """
     if not image_path:
         return "Error: No image provided"
 def create_htrflow_mcp_server():
     demo = gr.Interface(
+        fn=htrflow_htr,
         inputs=[
             gr.Image(type="filepath", label="Upload Image or Enter URL"),
             gr.Dropdown(choices=["letter_english", "letter_swedish", "spread_english", "spread_swedish"], value="letter_swedish", label="Document Type"),
         outputs=gr.File(label="Download Output File"),
         title="HTRflow MCP Server",
         description="Process handwritten text from uploaded file or URL and get output file in specified format",
+        api_name="htrflow_htr",
     )
     return demo
 if __name__ == "__main__":
     demo = create_htrflow_mcp_server()
+    demo.launch(mcp_server=True, share=False, debug=False)