import gradio as gr from mineru_vl_utils.mineru_client import MinerUClient from PIL import Image import fitz # PyMuPDF import os import torch # Init client model_path = "opendatalab/MinerU2.5-2509-1.2B" device = "cuda" if torch.cuda.is_available() else "cpu" print(f"⚡ Loading MinerU on device: {device}") client = MinerUClient(backend="transformers", model_path=model_path) def extract_from_file(file): try: print(f"📄 Processing file: {file.name}") ext = os.path.splitext(file.name)[-1].lower() images = [] if ext == ".pdf": doc = fitz.open(file.name) for i, page in enumerate(doc): pix = page.get_pixmap() img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples) images.append(img) print(f"✅ Converted page {i+1} to image") else: images.append(Image.open(file.name)) results = [] for i, img in enumerate(images): print(f"🔍 Extracting text from page {i+1}") blocks = client.two_step_extract(img) if not blocks: print(f"⚠️ No blocks found on page {i+1}") results.append("[EMPTY PAGE]") continue text_blocks = [] for b in blocks: if hasattr(b, "text") and b.text: text_blocks.append(b.text) page_text = "\n".join(text_blocks) if text_blocks else "[NO TEXT FOUND]" results.append(page_text) print(f"✅ Page {i+1} extracted") return "\n\n--- PAGE ---\n\n".join(results) except Exception as e: print(f"❌ ERROR: {e}") return f"Error during extraction: {str(e)}" demo = gr.Interface( fn=extract_from_file, inputs=gr.File(type="filepath", label="Upload PDF or Image"), outputs=gr.Textbox(label="Extracted Text", lines=20), title="MinerU2.5 Document Extractor", description="Upload a PDF or Image to extract structured text using MinerU2.5." ) demo.launch()