htrflow_mcp

Running on Zero

App Files Files Community

Gabriel commited on Jun 8

Commit

c662fe8

verified ·

1 Parent(s): a987d91

Update app.py

Browse files

Files changed (1) hide show

app.py +41 -225

app.py CHANGED Viewed

@@ -1,18 +1,17 @@
 import gradio as gr
 import json
-import base64
 import tempfile
 import os
-from typing import Dict, List, Optional, Literal
-from datetime import datetime
-from PIL import Image, ImageDraw, ImageFont
-import io
 import spaces
-import shutil
 from pathlib import Path
 from htrflow.volume.volume import Collection
 from htrflow.pipeline.pipeline import Pipeline
 PIPELINE_CONFIGS = {
     "letter_english": {
         "steps": [
@@ -117,10 +116,10 @@ PIPELINE_CONFIGS = {
 }
 @spaces.GPU
-def process_htr(image: Image.Image, document_type: Literal["letter_english", "letter_swedish", "spread_english", "spread_swedish"] = "letter_english", confidence_threshold: float = 0.8, custom_settings: Optional[str] = None) -> Dict:
-    """Process handwritten text recognition on uploaded images using HTRflow pipelines."""
     if image is None:
-        return {"success": False, "error": "No image provided", "results": None}
     with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as temp_file:
         image.save(temp_file.name, "PNG")
@@ -131,7 +130,7 @@ def process_htr(image: Image.Image, document_type: Literal["letter_english", "le
             try:
                 config = json.loads(custom_settings)
             except json.JSONDecodeError:
-                return {"success": False, "error": "Invalid JSON in custom_settings parameter", "results": None}
         else:
             config = PIPELINE_CONFIGS[document_type]
@@ -141,236 +140,53 @@ def process_htr(image: Image.Image, document_type: Literal["letter_english", "le
         try:
             processed_collection = pipeline.run(collection)
         except Exception as pipeline_error:
-            return {"success": False, "error": f"Pipeline execution failed: {str(pipeline_error)}", "results": None}
-        results = extract_text_results(processed_collection, confidence_threshold)
-        collection_data = serialize_collection_data(processed_collection)
-        processing_state = {
-            "collection_data": collection_data,
-            "document_type": document_type,
-            "confidence_threshold": confidence_threshold,
-            "timestamp": datetime.now().isoformat(),
-        }
-        return {
-            "success": True,
-            "results": results,
-            "processing_state": json.dumps(processing_state),
-            "metadata": {
-                "total_lines": len(results.get("text_lines", [])),
-                "average_confidence": results.get("average_confidence", 0),
-                "document_type": document_type,
-                "image_dimensions": image.size,
-            },
-        }
     except Exception as e:
-        return {"success": False, "error": f"HTR processing failed: {str(e)}", "results": None}
     finally:
         if os.path.exists(temp_image_path):
             os.unlink(temp_image_path)
-def visualize_results(processing_state: str, image: Image.Image, visualization_type: Literal["overlay", "confidence_heatmap", "text_regions"] = "overlay", show_confidence: bool = True, highlight_low_confidence: bool = True) -> Dict:
-    """Generate interactive visualizations of HTR processing results."""
-    try:
-        if image is None:
-            return {"success": False, "error": "Image is required for visualization", "visualization": None}
-        state = json.loads(processing_state)
-        collection_data = state["collection_data"]
-        viz_image = create_visualization(image, collection_data, visualization_type, show_confidence, highlight_low_confidence)
-        img_buffer = io.BytesIO()
-        viz_image.save(img_buffer, format="PNG")
-        img_base64 = base64.b64encode(img_buffer.getvalue()).decode("utf-8")
-        return {
-            "success": True,
-            "visualization": {
-                "image_base64": img_base64,
-                "image_format": "PNG",
-                "visualization_type": visualization_type,
-                "dimensions": viz_image.size,
-            },
-            "metadata": {"total_elements": len(collection_data.get("text_elements", []))},
-        }
-    except Exception as e:
-        return {"success": False, "error": f"Visualization generation failed: {str(e)}", "visualization": None}
-def export_results(processing_state: str, image: Image.Image, output_formats: List[Literal["txt", "json", "alto", "page"]] = ["txt"], confidence_filter: float = 0.0) -> Dict:
-    """Export HTR results to multiple formats using HTRflow's native export functionality."""
-    try:
-        if image is None:
-            return {"success": False, "error": "Image is required for export", "exports": None}
-        state = json.loads(processing_state)
-        with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as temp_file:
-            image.save(temp_file.name, "PNG")
-            temp_image_path = temp_file.name
-        try:
-            collection = Collection([temp_image_path])
-            pipeline = Pipeline.from_config(PIPELINE_CONFIGS[state["document_type"]])
-            processed_collection = pipeline.run(collection)
-            temp_dir = Path(tempfile.mkdtemp())
-            exports = {}
-            for fmt in output_formats:
-                export_dir = temp_dir / fmt
-                processed_collection.save(directory=str(export_dir), serializer=fmt)
-                export_files = []
-                for root, _, files in os.walk(export_dir):
-                    for file in files:
-                        file_path = os.path.join(root, file)
-                        try:
-                            with open(file_path, 'r', encoding='utf-8') as f:
-                                content = f.read()
-                            export_files.append({"filename": file, "content": content})
-                        except UnicodeDecodeError:
-                            with open(file_path, 'rb') as f:
-                                content = base64.b64encode(f.read()).decode('utf-8')
-                            export_files.append({"filename": file, "content": content, "encoding": "base64"})
-                exports[fmt] = export_files
-            shutil.rmtree(temp_dir)
-            return {
-                "success": True,
-                "exports": exports,
-                "export_metadata": {
-                    "formats_generated": output_formats,
-                    "confidence_filter": confidence_filter,
-                    "timestamp": datetime.now().isoformat(),
-                },
-            }
-        finally:
-            if os.path.exists(temp_image_path):
-                os.unlink(temp_image_path)
-    except Exception as e:
-        return {"success": False, "error": f"Export generation failed: {str(e)}", "exports": None}
-def extract_text_results(collection: Collection, confidence_threshold: float) -> Dict:
-    results = {"extracted_text": "", "text_lines": [], "confidence_scores": []}
     for page in collection.pages:
         for node in page.traverse():
             if hasattr(node, "text") and node.text:
-                confidence = getattr(node, "confidence", 1.0)
-                if confidence >= confidence_threshold:
-                    results["text_lines"].append({
-                        "text": node.text,
-                        "confidence": confidence,
-                        "bbox": getattr(node, "bbox", None),
-                    })
-                    results["extracted_text"] += node.text + "\n"
-                    results["confidence_scores"].append(confidence)
-    results["average_confidence"] = sum(results["confidence_scores"]) / len(results["confidence_scores"]) if results["confidence_scores"] else 0
-    return results
-def serialize_collection_data(collection: Collection) -> Dict:
-    text_elements = []
-    for page in collection.pages:
-        for node in page.traverse():
-            if hasattr(node, "text") and node.text:
-                text_elements.append({
-                    "text": node.text,
-                    "confidence": getattr(node, "confidence", 1.0),
-                    "bbox": getattr(node, "bbox", None),
-                })
-    return {"text_elements": text_elements}
-def create_visualization(image, collection_data, visualization_type, show_confidence, highlight_low_confidence):
-    viz_image = image.copy()
-    draw = ImageDraw.Draw(viz_image)
-    try:
-        font = ImageFont.truetype("arial.ttf", 12)
-    except:
-        font = ImageFont.load_default()
-    for element in collection_data.get("text_elements", []):
-        if element.get("bbox"):
-            bbox = element["bbox"]
-            confidence = element.get("confidence", 1.0)
-            if visualization_type == "overlay":
-                color = (255, 165, 0) if highlight_low_confidence and confidence < 0.7 else (0, 255, 0)
-                draw.rectangle(bbox, outline=color, width=2)
-                if show_confidence:
-                    draw.text((bbox[0], bbox[1] - 15), f"{confidence:.2f}", fill=color, font=font)
-            elif visualization_type == "confidence_heatmap":
-                if confidence < 0.5:
-                    color = (255, 0, 0, 100)
-                elif confidence < 0.8:
-                    color = (255, 255, 0, 100)
-                else:
-                    color = (0, 255, 0, 100)
-                overlay = Image.new("RGBA", viz_image.size, (0, 0, 0, 0))
-                overlay_draw = ImageDraw.Draw(overlay)
-                overlay_draw.rectangle(bbox, fill=color)
-                viz_image = Image.alpha_composite(viz_image.convert("RGBA"), overlay)
-            elif visualization_type == "text_regions":
-                colors = [(255, 0, 0), (0, 255, 0), (0, 0, 255), (255, 255, 0)]
-                color = colors[hash(str(bbox)) % len(colors)]
-                draw.rectangle(bbox, outline=color, width=3)
-    return viz_image.convert("RGB") if visualization_type == "confidence_heatmap" else viz_image
 def create_htrflow_mcp_server():
-    demo = gr.TabbedInterface(
-        [
-            gr.Interface(
-                fn=process_htr,
-                inputs=[
-                    gr.Image(type="pil", label="Upload Image"),
-                    gr.Dropdown(choices=["letter_english", "letter_swedish", "spread_english", "spread_swedish"], value="letter_english", label="Document Type"),
-                    gr.Slider(0.0, 1.0, value=0.8, label="Confidence Threshold"),
-                    gr.Textbox(label="Custom Settings (JSON)", placeholder="Optional custom pipeline settings"),
-                ],
-                outputs=gr.JSON(label="Processing Results"),
-                title="HTR Processing Tool",
-                description="Process handwritten text using configurable HTRflow pipelines",
-                api_name="process_htr",
-            ),
-            gr.Interface(
-                fn=visualize_results,
-                inputs=[
-                    gr.Textbox(label="Processing State (JSON)", placeholder="Paste processing results from HTR tool"),
-                    gr.Image(type="pil", label="Image"),
-                    gr.Dropdown(choices=["overlay", "confidence_heatmap", "text_regions"], value="overlay", label="Visualization Type"),
-                    gr.Checkbox(value=True, label="Show Confidence Scores"),
-                    gr.Checkbox(value=True, label="Highlight Low Confidence"),
-                ],
-                outputs=gr.JSON(label="Visualization Results"),
-                title="Results Visualization Tool",
-                description="Generate interactive visualizations of HTR results",
-                api_name="visualize_results",
-            ),
-            gr.Interface(
-                fn=export_results,
-                inputs=[
-                    gr.Textbox(label="Processing State (JSON)", placeholder="Paste processing results from HTR tool"),
-                    gr.Image(type="pil", label="Image"),
-                    gr.CheckboxGroup(choices=["txt", "json", "alto", "page"], value=["txt"], label="Output Formats"),
-                    gr.Slider(0.0, 1.0, value=0.0, label="Confidence Filter"),
-                ],
-                outputs=gr.JSON(label="Export Results"),
-                title="Export Tool",
-                description="Export HTR results to multiple formats",
-                api_name="export_results",
-            ),
         ],
-        ["HTR Processing", "Results Visualization", "Export Results"],
         title="HTRflow MCP Server",
     )
     return demo

 import gradio as gr
 import json
 import tempfile
 import os
+from typing import List, Optional, Literal
+from PIL import Image
 import spaces
 from pathlib import Path
 from htrflow.volume.volume import Collection
 from htrflow.pipeline.pipeline import Pipeline
+DEFAULT_OUTPUT = "alto"
+CHOICES = ["txt", "alto", "page", "json"]
 PIPELINE_CONFIGS = {
     "letter_english": {
         "steps": [
 }
 @spaces.GPU
+def process_htr(image: Image.Image, document_type: Literal["letter_english", "letter_swedish", "spread_english", "spread_swedish"] = "letter_english", output_format: Literal["txt", "alto", "page", "json"] = DEFAULT_OUTPUT, custom_settings: Optional[str] = None):
+    """Process handwritten text recognition and return extracted text with specified format file."""
     if image is None:
+        return "Error: No image provided", None
     with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as temp_file:
         image.save(temp_file.name, "PNG")
             try:
                 config = json.loads(custom_settings)
             except json.JSONDecodeError:
+                return "Error: Invalid JSON in custom_settings parameter", None
         else:
             config = PIPELINE_CONFIGS[document_type]
         try:
             processed_collection = pipeline.run(collection)
         except Exception as pipeline_error:
+            return f"Error: Pipeline execution failed: {str(pipeline_error)}", None
+        temp_dir = Path(tempfile.mkdtemp())
+        export_dir = temp_dir / output_format
+        processed_collection.save(directory=str(export_dir), serializer=output_format)
+        output_file_path = None
+        for root, _, files in os.walk(export_dir):
+            for file in files:
+                output_file_path = os.path.join(root, file)
+                break
+        extracted_text = extract_text_from_collection(processed_collection)
+        return extracted_text, output_file_path
     except Exception as e:
+        return f"Error: HTR processing failed: {str(e)}", None
     finally:
         if os.path.exists(temp_image_path):
             os.unlink(temp_image_path)
+def extract_text_from_collection(collection: Collection) -> str:
+    """Extract plain text from processed collection."""
+    text_lines = []
     for page in collection.pages:
         for node in page.traverse():
             if hasattr(node, "text") and node.text:
+                text_lines.append(node.text)
+    return "\n".join(text_lines)
 def create_htrflow_mcp_server():
+    demo = gr.Interface(
+        fn=process_htr,
+        inputs=[
+            gr.Image(type="pil", label="Upload Image"),
+            gr.Dropdown(choices=["letter_english", "letter_swedish", "spread_english", "spread_swedish"], value="letter_english", label="Document Type"),
+            gr.Dropdown(choices=CHOICES, value=DEFAULT_OUTPUT, label="Output Format"),
+            gr.Textbox(label="Custom Settings (JSON)", placeholder="Optional custom pipeline settings"),
+        ],
+        outputs=[
+            gr.Textbox(label="Extracted Text", lines=10),
+            gr.File(label="Download Output File")
         ],
         title="HTRflow MCP Server",
+        description="Process handwritten text and get extracted text with output file in specified format",
+        api_name="process_htr",
     )
     return demo