Instantnewdesign commited on
Commit
87cc209
·
verified ·
1 Parent(s): 489af81

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -20
app.py CHANGED
@@ -1,26 +1,41 @@
1
  import gradio as gr
2
- import logging
3
- logging.basicConfig(level=logging.DEBUG)
 
 
4
 
5
- try:
6
- from mineru_vl_utils import MinerUClient
7
- print(" mineru-vl-utils importé")
8
- except Exception as e:
9
- print("❌ Erreur import mineru-vl-utils:", e)
10
 
11
- try:
12
- client = MinerUClient(
13
- backend="transformers",
14
- model_path="opendatalab/MinerU2.5-2509-1.2B"
15
- )
16
- print("✅ Client MinerU initialisé")
17
- except Exception as e:
18
- print("❌ Erreur init MinerUClient:", e)
19
 
20
- def test_fn():
21
- return "Space lancé avec succès ✅"
 
 
 
 
 
 
22
 
23
- demo = gr.Interface(fn=test_fn, inputs=[], outputs="text")
 
 
 
 
24
 
25
- if __name__ == "__main__":
26
- demo.launch()
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
+ from mineru_vl_utils.mineru_client import MinerUClient
3
+ from PIL import Image
4
+ import fitz # PyMuPDF pour lire les PDFs
5
+ import os
6
 
7
+ # Init client
8
+ model_path = "opendatalab/MinerU2.5-2509-1.2B"
9
+ client = MinerUClient(backend="transformers", model_path=model_path)
 
 
10
 
11
+ def extract_from_file(file):
12
+ # Vérifier si PDF ou image
13
+ ext = os.path.splitext(file.name)[-1].lower()
14
+ images = []
 
 
 
 
15
 
16
+ if ext == ".pdf":
17
+ doc = fitz.open(file.name)
18
+ for page in doc:
19
+ pix = page.get_pixmap()
20
+ img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
21
+ images.append(img)
22
+ else:
23
+ images.append(Image.open(file.name))
24
 
25
+ results = []
26
+ for img in images:
27
+ blocks = client.two_step_extract(img)
28
+ text_blocks = [b.text for b in blocks if hasattr(b, "text")]
29
+ results.append("\n".join(text_blocks))
30
 
31
+ return "\n\n--- PAGE ---\n\n".join(results)
32
+
33
+ demo = gr.Interface(
34
+ fn=extract_from_file,
35
+ inputs=gr.File(type="file", label="Upload PDF or Image"),
36
+ outputs=gr.Textbox(label="Extracted Text", lines=20),
37
+ title="MinerU2.5 Document Extractor",
38
+ description="Upload a PDF or Image to extract structured text using MinerU2.5."
39
+ )
40
+
41
+ demo.launch()