File size: 5,202 Bytes
2210de6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
import os, tempfile, traceback
import gradio as gr
import spaces
import requests

# ---------- Cache & HF Hub settings (before importing Docling) ----------
# Use persistent storage (/data) and disable xet (fixes PermissionDenied in some Spaces)
os.environ.setdefault("HF_HOME", "/data/.cache/huggingface")
os.environ.setdefault("HF_HUB_CACHE", "/data/.cache/huggingface/hub")
os.environ.setdefault("TRANSFORMERS_CACHE", "/data/.cache/huggingface/transformers")
os.environ.setdefault("HF_HUB_ENABLE_XET", "0")          # avoid xet write issues
os.environ.setdefault("HF_HUB_ENABLE_HF_TRANSFER", "1")  # faster downloads
os.environ.setdefault("TOKENIZERS_PARALLELISM", "false")

# Make sure the folders exist
for p in (os.environ["HF_HOME"], os.environ["HF_HUB_CACHE"], os.environ["TRANSFORMERS_CACHE"]):
    os.makedirs(p, exist_ok=True)

# ---------- Imports after env is set ----------
from docling.datamodel.base_models import InputFormat
from docling.document_converter import DocumentConverter, PdfFormatOption
from docling.pipeline.vlm_pipeline import VlmPipeline

# Detect CUDA (ZeroGPU will make this true on first decorated call)
try:
    import torch
    HAS_CUDA = torch.cuda.is_available()
    # keep threads modest on shared infra
    torch.set_num_threads(max(1, int(os.environ.get("OMP_NUM_THREADS", "2"))))
except Exception:
    HAS_CUDA = False

# Build converters once (lifetime of app)
# Standard = text-first (faster, good when PDF has text layer)
std_converter = DocumentConverter(
    format_options={InputFormat.PDF: PdfFormatOption()}
)
# VLM = Granite Docling (better for scans/tables/math)
vlm_converter = DocumentConverter(
    format_options={InputFormat.PDF: PdfFormatOption(pipeline_cls=VlmPipeline)}
)

# ---------- Helpers ----------
def _success(md: str, html: str):
    tmpdir = tempfile.gettempdir()
    md_path = os.path.join(tmpdir, "output.md")
    html_path = os.path.join(tmpdir, "output.html")
    with open(md_path, "w", encoding="utf-8") as f:
        f.write(md)
    with open(html_path, "w", encoding="utf-8") as f:
        f.write(html)
    return md, md_path, html_path

def _fail(msg: str):
    # show readable error in the preview panel
    return f"**Conversion failed**:\n```\n{msg}\n```", None, None

def _convert_local_path(path: str, use_vlm: bool):
    try:
        conv = vlm_converter if use_vlm else std_converter
        doc = conv.convert(source=path).document
        md = doc.export_to_markdown()
        html = doc.export_to_html()
        return _success(md, html)
    except Exception as e:
        return _fail(f"{e}\n\n{traceback.format_exc()}")

# ---------- GPU-decorated endpoints (ZeroGPU requirement) ----------
@spaces.GPU(duration=600)  # up to 10 minutes
def run_convert_file(file, mode):
    if file is None:
        return _fail("No file provided.")
    use_vlm = mode.startswith("VLM")
    return _convert_local_path(file.name, use_vlm)

@spaces.GPU(duration=600)
def run_convert_url(url, mode):
    if not url:
        return _fail("No URL provided.")
    # download to a temp file so Docling always reads a local path
    try:
        r = requests.get(url, stream=True, timeout=60)
        r.raise_for_status()
        fd, tmp_path = tempfile.mkstemp(suffix=".pdf")
        with os.fdopen(fd, "wb") as tmp:
            for chunk in r.iter_content(chunk_size=1 << 20):
                if chunk:
                    tmp.write(chunk)
    except Exception as e:
        return _fail(f"Failed to download URL: {e}")
    try:
        return _convert_local_path(tmp_path, mode.startswith("VLM"))
    finally:
        try:
            os.remove(tmp_path)
        except Exception:
            pass

# ---------- UI ----------
subtitle = "Device: **CUDA (ZeroGPU)**" if HAS_CUDA else "Device: **CPU** (GPU warms on first call)"

with gr.Blocks(title="Granite-Docling 258M β€” PDF β†’ Markdown/HTML") as demo:
    gr.Markdown(
        f"""# Granite-Docling 258M β€” PDF β†’ Markdown / HTML  
{subtitle}

**Modes**
- **Standard (faster)** β†’ PDFs with a text layer  
- **VLM (Granite – better for complex/scanned)** β†’ scans / heavy tables / formulas

_First call may be slow while models download and ZeroGPU warms. Cache lives in `/data`._
"""
    )

    mode = gr.Radio(
        ["Standard (faster)", "VLM (Granite – better for complex/scanned)"],
        value="Standard (faster)", label="Mode"
    )

    with gr.Tab("Upload PDF"):
        fi = gr.File(file_types=[".pdf"], label="PDF")
        out_md = gr.Markdown(label="Markdown Preview")
        dl_md = gr.File(label="Download Markdown")
        dl_html = gr.File(label="Download HTML")
        gr.Button("Convert").click(run_convert_file, [fi, mode], [out_md, dl_md, dl_html])

    with gr.Tab("Convert from URL"):
        url = gr.Textbox(label="Public PDF URL", placeholder="https://.../file.pdf")
        out2_md = gr.Markdown(label="Markdown Preview")
        dl2_md = gr.File(label="Download Markdown")
        dl2_html = gr.File(label="Download HTML")
        gr.Button("Convert").click(run_convert_url, [url, mode], [out2_md, dl2_md, dl2_html])

# Explicit bind & queue
demo.queue().launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT", 7860)))