htrflow_mcp

Running on Zero

App Files Files Community

Gabriel commited on Jun 7

Commit

a987d91

verified ·

1 Parent(s): d6e55c9

Update app.py

Browse files

Files changed (1) hide show

app.py +78 -83

app.py CHANGED Viewed

@@ -143,16 +143,11 @@ def process_htr(image: Image.Image, document_type: Literal["letter_english", "le
         except Exception as pipeline_error:
             return {"success": False, "error": f"Pipeline execution failed: {str(pipeline_error)}", "results": None}
-        img_buffer = io.BytesIO()
-        image.save(img_buffer, format="PNG")
-        image_base64 = base64.b64encode(img_buffer.getvalue()).decode("utf-8")
         results = extract_text_results(processed_collection, confidence_threshold)
         processing_state = {
-            "processed_collection": processed_collection,
-            "image_base64": image_base64,
-            "image_size": image.size,
             "document_type": document_type,
             "confidence_threshold": confidence_threshold,
             "timestamp": datetime.now().isoformat(),
@@ -161,7 +156,7 @@ def process_htr(image: Image.Image, document_type: Literal["letter_english", "le
         return {
             "success": True,
             "results": results,
-            "processing_state": json.dumps(processing_state, default=str),
             "metadata": {
                 "total_lines": len(results.get("text_lines", [])),
                 "average_confidence": results.get("average_confidence", 0),
@@ -175,58 +170,44 @@ def process_htr(image: Image.Image, document_type: Literal["letter_english", "le
         if os.path.exists(temp_image_path):
             os.unlink(temp_image_path)
-def visualize_results(processing_state: str, visualization_type: Literal["overlay", "confidence_heatmap", "text_regions"] = "overlay", show_confidence: bool = True, highlight_low_confidence: bool = True, image: Optional[Image.Image] = None) -> Dict:
     """Generate interactive visualizations of HTR processing results."""
     try:
         state = json.loads(processing_state)
-        if image is not None:
-            original_image = image
-        else:
-            image_data = base64.b64decode(state["image_base64"])
-            original_image = Image.open(io.BytesIO(image_data))
-        # Recreate the collection from the stored image
-        with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as temp_file:
-            original_image.save(temp_file.name, "PNG")
-            temp_image_path = temp_file.name
-        try:
-            collection = Collection([temp_image_path])
-            pipeline = Pipeline.from_config(PIPELINE_CONFIGS[state["document_type"]])
-            processed_collection = pipeline.run(collection)
-            viz_image = create_visualization(original_image, processed_collection, visualization_type, show_confidence, highlight_low_confidence)
-            img_buffer = io.BytesIO()
-            viz_image.save(img_buffer, format="PNG")
-            img_base64 = base64.b64encode(img_buffer.getvalue()).decode("utf-8")
-            return {
-                "success": True,
-                "visualization": {
-                    "image_base64": img_base64,
-                    "image_format": "PNG",
-                    "visualization_type": visualization_type,
-                    "dimensions": viz_image.size,
-                },
-                "metadata": {"visualization_type": visualization_type},
-            }
-        finally:
-            if os.path.exists(temp_image_path):
-                os.unlink(temp_image_path)
     except Exception as e:
         return {"success": False, "error": f"Visualization generation failed: {str(e)}", "visualization": None}
-def export_results(processing_state: str, output_formats: List[Literal["txt", "json", "alto", "page"]] = ["txt"], confidence_filter: float = 0.0) -> Dict:
     """Export HTR results to multiple formats using HTRflow's native export functionality."""
     try:
         state = json.loads(processing_state)
         with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as temp_file:
-            image_data = base64.b64decode(state["image_base64"])
-            image = Image.open(io.BytesIO(image_data))
             image.save(temp_file.name, "PNG")
             temp_image_path = temp_file.name
@@ -279,19 +260,33 @@ def extract_text_results(collection: Collection, confidence_threshold: float) ->
     results = {"extracted_text": "", "text_lines": [], "confidence_scores": []}
     for page in collection.pages:
         for node in page.traverse():
-            if hasattr(node, "text") and node.text and hasattr(node, "confidence") and node.confidence >= confidence_threshold:
-                results["text_lines"].append({
-                    "text": node.text,
-                    "confidence": node.confidence,
-                    "bbox": getattr(node, "bbox", None),
-                })
-                results["extracted_text"] += node.text + "\n"
-                results["confidence_scores"].append(node.confidence)
     results["average_confidence"] = sum(results["confidence_scores"]) / len(results["confidence_scores"]) if results["confidence_scores"] else 0
     return results
-def create_visualization(image, collection, visualization_type, show_confidence, highlight_low_confidence):
     viz_image = image.copy()
     draw = ImageDraw.Draw(viz_image)
@@ -300,34 +295,33 @@ def create_visualization(image, collection, visualization_type, show_confidence,
     except:
         font = ImageFont.load_default()
-    for page in collection.pages:
-        for node in page.traverse():
-            if hasattr(node, "bbox") and hasattr(node, "text") and node.bbox and node.text:
-                bbox = node.bbox
-                confidence = getattr(node, "confidence", 1.0)
-                if visualization_type == "overlay":
-                    color = (255, 165, 0) if highlight_low_confidence and confidence < 0.7 else (0, 255, 0)
-                    draw.rectangle(bbox, outline=color, width=2)
-                    if show_confidence:
-                        draw.text((bbox[0], bbox[1] - 15), f"{confidence:.2f}", fill=color, font=font)
-                elif visualization_type == "confidence_heatmap":
-                    if confidence < 0.5:
-                        color = (255, 0, 0, 100)
-                    elif confidence < 0.8:
-                        color = (255, 255, 0, 100)
-                    else:
-                        color = (0, 255, 0, 100)
-                    overlay = Image.new("RGBA", viz_image.size, (0, 0, 0, 0))
-                    overlay_draw = ImageDraw.Draw(overlay)
-                    overlay_draw.rectangle(bbox, fill=color)
-                    viz_image = Image.alpha_composite(viz_image.convert("RGBA"), overlay)
-                elif visualization_type == "text_regions":
-                    colors = [(255, 0, 0), (0, 255, 0), (0, 0, 255), (255, 255, 0)]
-                    color = colors[hash(str(bbox)) % len(colors)]
-                    draw.rectangle(bbox, outline=color, width=3)
     return viz_image.convert("RGB") if visualization_type == "confidence_heatmap" else viz_image
@@ -351,10 +345,10 @@ def create_htrflow_mcp_server():
                 fn=visualize_results,
                 inputs=[
                     gr.Textbox(label="Processing State (JSON)", placeholder="Paste processing results from HTR tool"),
                     gr.Dropdown(choices=["overlay", "confidence_heatmap", "text_regions"], value="overlay", label="Visualization Type"),
                     gr.Checkbox(value=True, label="Show Confidence Scores"),
                     gr.Checkbox(value=True, label="Highlight Low Confidence"),
-                    gr.Image(type="pil", label="Image (optional)"),
                 ],
                 outputs=gr.JSON(label="Visualization Results"),
                 title="Results Visualization Tool",
@@ -365,6 +359,7 @@ def create_htrflow_mcp_server():
                 fn=export_results,
                 inputs=[
                     gr.Textbox(label="Processing State (JSON)", placeholder="Paste processing results from HTR tool"),
                     gr.CheckboxGroup(choices=["txt", "json", "alto", "page"], value=["txt"], label="Output Formats"),
                     gr.Slider(0.0, 1.0, value=0.0, label="Confidence Filter"),
                 ],

         except Exception as pipeline_error:
             return {"success": False, "error": f"Pipeline execution failed: {str(pipeline_error)}", "results": None}
         results = extract_text_results(processed_collection, confidence_threshold)
+        collection_data = serialize_collection_data(processed_collection)
         processing_state = {
+            "collection_data": collection_data,
             "document_type": document_type,
             "confidence_threshold": confidence_threshold,
             "timestamp": datetime.now().isoformat(),
         return {
             "success": True,
             "results": results,
+            "processing_state": json.dumps(processing_state),
             "metadata": {
                 "total_lines": len(results.get("text_lines", [])),
                 "average_confidence": results.get("average_confidence", 0),
         if os.path.exists(temp_image_path):
             os.unlink(temp_image_path)
+def visualize_results(processing_state: str, image: Image.Image, visualization_type: Literal["overlay", "confidence_heatmap", "text_regions"] = "overlay", show_confidence: bool = True, highlight_low_confidence: bool = True) -> Dict:
     """Generate interactive visualizations of HTR processing results."""
     try:
+        if image is None:
+            return {"success": False, "error": "Image is required for visualization", "visualization": None}
         state = json.loads(processing_state)
+        collection_data = state["collection_data"]
+        viz_image = create_visualization(image, collection_data, visualization_type, show_confidence, highlight_low_confidence)
+        img_buffer = io.BytesIO()
+        viz_image.save(img_buffer, format="PNG")
+        img_base64 = base64.b64encode(img_buffer.getvalue()).decode("utf-8")
+        return {
+            "success": True,
+            "visualization": {
+                "image_base64": img_base64,
+                "image_format": "PNG",
+                "visualization_type": visualization_type,
+                "dimensions": viz_image.size,
+            },
+            "metadata": {"total_elements": len(collection_data.get("text_elements", []))},
+        }
     except Exception as e:
         return {"success": False, "error": f"Visualization generation failed: {str(e)}", "visualization": None}
+def export_results(processing_state: str, image: Image.Image, output_formats: List[Literal["txt", "json", "alto", "page"]] = ["txt"], confidence_filter: float = 0.0) -> Dict:
     """Export HTR results to multiple formats using HTRflow's native export functionality."""
     try:
+        if image is None:
+            return {"success": False, "error": "Image is required for export", "exports": None}
         state = json.loads(processing_state)
         with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as temp_file:
             image.save(temp_file.name, "PNG")
             temp_image_path = temp_file.name
     results = {"extracted_text": "", "text_lines": [], "confidence_scores": []}
     for page in collection.pages:
         for node in page.traverse():
+            if hasattr(node, "text") and node.text:
+                confidence = getattr(node, "confidence", 1.0)
+                if confidence >= confidence_threshold:
+                    results["text_lines"].append({
+                        "text": node.text,
+                        "confidence": confidence,
+                        "bbox": getattr(node, "bbox", None),
+                    })
+                    results["extracted_text"] += node.text + "\n"
+                    results["confidence_scores"].append(confidence)
     results["average_confidence"] = sum(results["confidence_scores"]) / len(results["confidence_scores"]) if results["confidence_scores"] else 0
     return results
+def serialize_collection_data(collection: Collection) -> Dict:
+    text_elements = []
+    for page in collection.pages:
+        for node in page.traverse():
+            if hasattr(node, "text") and node.text:
+                text_elements.append({
+                    "text": node.text,
+                    "confidence": getattr(node, "confidence", 1.0),
+                    "bbox": getattr(node, "bbox", None),
+                })
+    return {"text_elements": text_elements}
+def create_visualization(image, collection_data, visualization_type, show_confidence, highlight_low_confidence):
     viz_image = image.copy()
     draw = ImageDraw.Draw(viz_image)
     except:
         font = ImageFont.load_default()
+    for element in collection_data.get("text_elements", []):
+        if element.get("bbox"):
+            bbox = element["bbox"]
+            confidence = element.get("confidence", 1.0)
+            if visualization_type == "overlay":
+                color = (255, 165, 0) if highlight_low_confidence and confidence < 0.7 else (0, 255, 0)
+                draw.rectangle(bbox, outline=color, width=2)
+                if show_confidence:
+                    draw.text((bbox[0], bbox[1] - 15), f"{confidence:.2f}", fill=color, font=font)
+            elif visualization_type == "confidence_heatmap":
+                if confidence < 0.5:
+                    color = (255, 0, 0, 100)
+                elif confidence < 0.8:
+                    color = (255, 255, 0, 100)
+                else:
+                    color = (0, 255, 0, 100)
+                overlay = Image.new("RGBA", viz_image.size, (0, 0, 0, 0))
+                overlay_draw = ImageDraw.Draw(overlay)
+                overlay_draw.rectangle(bbox, fill=color)
+                viz_image = Image.alpha_composite(viz_image.convert("RGBA"), overlay)
+            elif visualization_type == "text_regions":
+                colors = [(255, 0, 0), (0, 255, 0), (0, 0, 255), (255, 255, 0)]
+                color = colors[hash(str(bbox)) % len(colors)]
+                draw.rectangle(bbox, outline=color, width=3)
     return viz_image.convert("RGB") if visualization_type == "confidence_heatmap" else viz_image
                 fn=visualize_results,
                 inputs=[
                     gr.Textbox(label="Processing State (JSON)", placeholder="Paste processing results from HTR tool"),
+                    gr.Image(type="pil", label="Image"),
                     gr.Dropdown(choices=["overlay", "confidence_heatmap", "text_regions"], value="overlay", label="Visualization Type"),
                     gr.Checkbox(value=True, label="Show Confidence Scores"),
                     gr.Checkbox(value=True, label="Highlight Low Confidence"),
                 ],
                 outputs=gr.JSON(label="Visualization Results"),
                 title="Results Visualization Tool",
                 fn=export_results,
                 inputs=[
                     gr.Textbox(label="Processing State (JSON)", placeholder="Paste processing results from HTR tool"),
+                    gr.Image(type="pil", label="Image"),
                     gr.CheckboxGroup(choices=["txt", "json", "alto", "page"], value=["txt"], label="Output Formats"),
                     gr.Slider(0.0, 1.0, value=0.0, label="Confidence Filter"),
                 ],