htrflow_mcp

Running on Zero

App Files Files Community

Gabriel commited on Jun 7

Commit

d6e55c9

verified ·

1 Parent(s): f094617

Update app.py

Browse files

Files changed (1) hide show

app.py +112 -102

app.py CHANGED Viewed

@@ -117,66 +117,68 @@ PIPELINE_CONFIGS = {
 }
 @spaces.GPU
-def process_htr(image: Image.Image, document_type: Literal["letter_english", "letter_swedish", "spread_english", "spread_swedish"] = "spread_swedish", confidence_threshold: float = 0.8, custom_settings: Optional[str] = None) -> Dict:
     """Process handwritten text recognition on uploaded images using HTRflow pipelines."""
-    try:
-        if image is None:
-            return {"success": False, "error": "No image provided", "results": None}
-        with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as temp_file:
-            image.save(temp_file.name, "PNG")
-            temp_image_path = temp_file.name
-        try:
-            if custom_settings:
-                try:
-                    config = json.loads(custom_settings)
-                except json.JSONDecodeError:
-                    return {"success": False, "error": "Invalid JSON in custom_settings parameter", "results": None}
-            else:
-                config = PIPELINE_CONFIGS[document_type]
-            collection = Collection([temp_image_path])
-            pipeline = Pipeline.from_config(config)
             processed_collection = pipeline.run(collection)
-            img_buffer = io.BytesIO()
-            image.save(img_buffer, format="PNG")
-            image_base64 = base64.b64encode(img_buffer.getvalue()).decode("utf-8")
-            results = extract_text_results(processed_collection, confidence_threshold)
-            processing_state = {
-                "collection_data": serialize_collection_data(processed_collection),
-                "image_base64": image_base64,
-                "image_size": image.size,
-                "document_type": document_type,
-                "confidence_threshold": confidence_threshold,
-                "timestamp": datetime.now().isoformat(),
-            }
-            return {
-                "success": True,
-                "results": results,
-                "processing_state": json.dumps(processing_state),
-                "metadata": {
-                    "total_lines": len(results.get("text_lines", [])),
-                    "average_confidence": results.get("average_confidence", 0),
-                    "document_type": document_type,
-                    "image_dimensions": image.size,
-                },
-            }
-        finally:
-            if os.path.exists(temp_image_path):
-                os.unlink(temp_image_path)
     except Exception as e:
         return {"success": False, "error": f"HTR processing failed: {str(e)}", "results": None}
 def visualize_results(processing_state: str, visualization_type: Literal["overlay", "confidence_heatmap", "text_regions"] = "overlay", show_confidence: bool = True, highlight_low_confidence: bool = True, image: Optional[Image.Image] = None) -> Dict:
     """Generate interactive visualizations of HTR processing results."""
     try:
         state = json.loads(processing_state)
-        collection_data = state["collection_data"]
         if image is not None:
             original_image = image
@@ -184,22 +186,36 @@ def visualize_results(processing_state: str, visualization_type: Literal["overla
             image_data = base64.b64decode(state["image_base64"])
             original_image = Image.open(io.BytesIO(image_data))
-        viz_image = create_visualization(original_image, collection_data, visualization_type, show_confidence, highlight_low_confidence)
-        img_buffer = io.BytesIO()
-        viz_image.save(img_buffer, format="PNG")
-        img_base64 = base64.b64encode(img_buffer.getvalue()).decode("utf-8")
-        return {
-            "success": True,
-            "visualization": {
-                "image_base64": img_base64,
-                "image_format": "PNG",
-                "visualization_type": visualization_type,
-                "dimensions": viz_image.size,
-            },
-            "metadata": {"total_elements": len(collection_data.get("text_elements", []))},
-        }
     except Exception as e:
         return {"success": False, "error": f"Visualization generation failed: {str(e)}", "visualization": None}
@@ -230,9 +246,14 @@ def export_results(processing_state: str, output_formats: List[Literal["txt", "j
                 for root, _, files in os.walk(export_dir):
                     for file in files:
                         file_path = os.path.join(root, file)
-                        with open(file_path, 'r', encoding='utf-8') as f:
-                            content = f.read()
-                        export_files.append({"filename": file, "content": content})
                 exports[fmt] = export_files
@@ -270,19 +291,7 @@ def extract_text_results(collection: Collection, confidence_threshold: float) ->
     results["average_confidence"] = sum(results["confidence_scores"]) / len(results["confidence_scores"]) if results["confidence_scores"] else 0
     return results
-def serialize_collection_data(collection: Collection) -> Dict:
-    text_elements = []
-    for page in collection.pages:
-        for node in page.traverse():
-            if hasattr(node, "text") and node.text:
-                text_elements.append({
-                    "text": node.text,
-                    "confidence": getattr(node, "confidence", 1.0),
-                    "bbox": getattr(node, "bbox", None),
-                })
-    return {"text_elements": text_elements}
-def create_visualization(image, collection_data, visualization_type, show_confidence, highlight_low_confidence):
     viz_image = image.copy()
     draw = ImageDraw.Draw(viz_image)
@@ -291,33 +300,34 @@ def create_visualization(image, collection_data, visualization_type, show_confid
     except:
         font = ImageFont.load_default()
-    for element in collection_data.get("text_elements", []):
-        if element.get("bbox"):
-            bbox = element["bbox"]
-            confidence = element.get("confidence", 1.0)
-            if visualization_type == "overlay":
-                color = (255, 165, 0) if highlight_low_confidence and confidence < 0.7 else (0, 255, 0)
-                draw.rectangle(bbox, outline=color, width=2)
-                if show_confidence:
-                    draw.text((bbox[0], bbox[1] - 15), f"{confidence:.2f}", fill=color, font=font)
-            elif visualization_type == "confidence_heatmap":
-                if confidence < 0.5:
-                    color = (255, 0, 0, 100)
-                elif confidence < 0.8:
-                    color = (255, 255, 0, 100)
-                else:
-                    color = (0, 255, 0, 100)
-                overlay = Image.new("RGBA", viz_image.size, (0, 0, 0, 0))
-                overlay_draw = ImageDraw.Draw(overlay)
-                overlay_draw.rectangle(bbox, fill=color)
-                viz_image = Image.alpha_composite(viz_image.convert("RGBA"), overlay)
-            elif visualization_type == "text_regions":
-                colors = [(255, 0, 0), (0, 255, 0), (0, 0, 255), (255, 255, 0)]
-                color = colors[hash(str(bbox)) % len(colors)]
-                draw.rectangle(bbox, outline=color, width=3)
     return viz_image.convert("RGB") if visualization_type == "confidence_heatmap" else viz_image

 }
 @spaces.GPU
+def process_htr(image: Image.Image, document_type: Literal["letter_english", "letter_swedish", "spread_english", "spread_swedish"] = "letter_english", confidence_threshold: float = 0.8, custom_settings: Optional[str] = None) -> Dict:
     """Process handwritten text recognition on uploaded images using HTRflow pipelines."""
+    if image is None:
+        return {"success": False, "error": "No image provided", "results": None}
+    with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as temp_file:
+        image.save(temp_file.name, "PNG")
+        temp_image_path = temp_file.name
+    try:
+        if custom_settings:
+            try:
+                config = json.loads(custom_settings)
+            except json.JSONDecodeError:
+                return {"success": False, "error": "Invalid JSON in custom_settings parameter", "results": None}
+        else:
+            config = PIPELINE_CONFIGS[document_type]
+        collection = Collection([temp_image_path])
+        pipeline = Pipeline.from_config(config)
+        try:
             processed_collection = pipeline.run(collection)
+        except Exception as pipeline_error:
+            return {"success": False, "error": f"Pipeline execution failed: {str(pipeline_error)}", "results": None}
+        img_buffer = io.BytesIO()
+        image.save(img_buffer, format="PNG")
+        image_base64 = base64.b64encode(img_buffer.getvalue()).decode("utf-8")
+        results = extract_text_results(processed_collection, confidence_threshold)
+        processing_state = {
+            "processed_collection": processed_collection,
+            "image_base64": image_base64,
+            "image_size": image.size,
+            "document_type": document_type,
+            "confidence_threshold": confidence_threshold,
+            "timestamp": datetime.now().isoformat(),
+        }
+        return {
+            "success": True,
+            "results": results,
+            "processing_state": json.dumps(processing_state, default=str),
+            "metadata": {
+                "total_lines": len(results.get("text_lines", [])),
+                "average_confidence": results.get("average_confidence", 0),
+                "document_type": document_type,
+                "image_dimensions": image.size,
+            },
+        }
     except Exception as e:
         return {"success": False, "error": f"HTR processing failed: {str(e)}", "results": None}
+    finally:
+        if os.path.exists(temp_image_path):
+            os.unlink(temp_image_path)
 def visualize_results(processing_state: str, visualization_type: Literal["overlay", "confidence_heatmap", "text_regions"] = "overlay", show_confidence: bool = True, highlight_low_confidence: bool = True, image: Optional[Image.Image] = None) -> Dict:
     """Generate interactive visualizations of HTR processing results."""
     try:
         state = json.loads(processing_state)
         if image is not None:
             original_image = image
             image_data = base64.b64decode(state["image_base64"])
             original_image = Image.open(io.BytesIO(image_data))
+        # Recreate the collection from the stored image
+        with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as temp_file:
+            original_image.save(temp_file.name, "PNG")
+            temp_image_path = temp_file.name
+        try:
+            collection = Collection([temp_image_path])
+            pipeline = Pipeline.from_config(PIPELINE_CONFIGS[state["document_type"]])
+            processed_collection = pipeline.run(collection)
+            viz_image = create_visualization(original_image, processed_collection, visualization_type, show_confidence, highlight_low_confidence)
+            img_buffer = io.BytesIO()
+            viz_image.save(img_buffer, format="PNG")
+            img_base64 = base64.b64encode(img_buffer.getvalue()).decode("utf-8")
+            return {
+                "success": True,
+                "visualization": {
+                    "image_base64": img_base64,
+                    "image_format": "PNG",
+                    "visualization_type": visualization_type,
+                    "dimensions": viz_image.size,
+                },
+                "metadata": {"visualization_type": visualization_type},
+            }
+        finally:
+            if os.path.exists(temp_image_path):
+                os.unlink(temp_image_path)
     except Exception as e:
         return {"success": False, "error": f"Visualization generation failed: {str(e)}", "visualization": None}
                 for root, _, files in os.walk(export_dir):
                     for file in files:
                         file_path = os.path.join(root, file)
+                        try:
+                            with open(file_path, 'r', encoding='utf-8') as f:
+                                content = f.read()
+                            export_files.append({"filename": file, "content": content})
+                        except UnicodeDecodeError:
+                            with open(file_path, 'rb') as f:
+                                content = base64.b64encode(f.read()).decode('utf-8')
+                            export_files.append({"filename": file, "content": content, "encoding": "base64"})
                 exports[fmt] = export_files
     results["average_confidence"] = sum(results["confidence_scores"]) / len(results["confidence_scores"]) if results["confidence_scores"] else 0
     return results
+def create_visualization(image, collection, visualization_type, show_confidence, highlight_low_confidence):
     viz_image = image.copy()
     draw = ImageDraw.Draw(viz_image)
     except:
         font = ImageFont.load_default()
+    for page in collection.pages:
+        for node in page.traverse():
+            if hasattr(node, "bbox") and hasattr(node, "text") and node.bbox and node.text:
+                bbox = node.bbox
+                confidence = getattr(node, "confidence", 1.0)
+                if visualization_type == "overlay":
+                    color = (255, 165, 0) if highlight_low_confidence and confidence < 0.7 else (0, 255, 0)
+                    draw.rectangle(bbox, outline=color, width=2)
+                    if show_confidence:
+                        draw.text((bbox[0], bbox[1] - 15), f"{confidence:.2f}", fill=color, font=font)
+                elif visualization_type == "confidence_heatmap":
+                    if confidence < 0.5:
+                        color = (255, 0, 0, 100)
+                    elif confidence < 0.8:
+                        color = (255, 255, 0, 100)
+                    else:
+                        color = (0, 255, 0, 100)
+                    overlay = Image.new("RGBA", viz_image.size, (0, 0, 0, 0))
+                    overlay_draw = ImageDraw.Draw(overlay)
+                    overlay_draw.rectangle(bbox, fill=color)
+                    viz_image = Image.alpha_composite(viz_image.convert("RGBA"), overlay)
+                elif visualization_type == "text_regions":
+                    colors = [(255, 0, 0), (0, 255, 0), (0, 0, 255), (255, 255, 0)]
+                    color = colors[hash(str(bbox)) % len(colors)]
+                    draw.rectangle(bbox, outline=color, width=3)
     return viz_image.convert("RGB") if visualization_type == "confidence_heatmap" else viz_image