import gradio as gr from PIL import Image from mineru_vl_utils import MinerUClient from transformers import AutoProcessor, Qwen2VLForConditionalGeneration # Charger le modèle MinerU model_path = "opendatalab/MinerU2.5-2509-1.2B" model = Qwen2VLForConditionalGeneration.from_pretrained( model_path, torch_dtype="auto", device_map="auto" ) processor = AutoProcessor.from_pretrained(model_path, use_fast=True) client = MinerUClient( backend="transformers", model=model, processor=processor ) def extract_from_image(image): # Conversion si nécessaire if not isinstance(image, Image.Image): image = Image.fromarray(image) # Extraction blocks = client.two_step_extract(image) # On retourne le texte concaténé extracted_text = "\n".join([b.text for b in blocks if hasattr(b, "text")]) return extracted_text # Interface Gradio demo = gr.Interface( fn=extract_from_image, inputs=gr.Image(type="pil"), outputs="text", title="MinerU2.5 - Document Extract", description="Upload an image or PDF page and extract structured text with MinerU2.5" ) if __name__ == "__main__": demo.launch()