import os, tempfile, traceback import gradio as gr import spaces import requests # ---------- Cache & HF Hub settings ---------- os.environ.setdefault("HF_HOME", "/data/.cache/huggingface") os.environ.setdefault("HF_HUB_CACHE", "/data/.cache/huggingface/hub") os.environ.setdefault("TRANSFORMERS_CACHE", "/data/.cache/huggingface/transformers") os.environ.setdefault("HF_HUB_ENABLE_XET", "0") os.environ.setdefault("HF_HUB_ENABLE_HF_TRANSFER", "1") os.environ.setdefault("TOKENIZERS_PARALLELISM", "false") for p in (os.environ["HF_HOME"], os.environ["HF_HUB_CACHE"], os.environ["TRANSFORMERS_CACHE"]): os.makedirs(p, exist_ok=True) # ---------- Docling imports ---------- from docling.datamodel.base_models import InputFormat from docling.document_converter import DocumentConverter, PdfFormatOption from docling.pipeline.vlm_pipeline import VlmPipeline # CUDA info (informational) try: import torch HAS_CUDA = torch.cuda.is_available() torch.set_num_threads(max(1, int(os.environ.get("OMP_NUM_THREADS", "2")))) except Exception: HAS_CUDA = False # Converters std_converter = DocumentConverter(format_options={InputFormat.PDF: PdfFormatOption()}) vlm_converter = DocumentConverter(format_options={InputFormat.PDF: PdfFormatOption(pipeline_cls=VlmPipeline)}) # ---------- Helpers ---------- def _success(md: str, html: str): tmpdir = tempfile.gettempdir() md_path = os.path.join(tmpdir, "output.md") html_path = os.path.join(tmpdir, "output.html") with open(md_path, "w", encoding="utf-8") as f: f.write(md) with open(html_path, "w", encoding="utf-8") as f: f.write(html) return md, html, md_path, html_path def _fail(msg: str): err = f"**Conversion failed**:\n```\n{msg}\n```" return err, "
" + err + "
", None, None def _convert_local_path(path: str, use_vlm: bool): try: conv = vlm_converter if use_vlm else std_converter doc = conv.convert(source=path).document md = doc.export_to_markdown() html = doc.export_to_html() return _success(md, html) except Exception as e: return _fail(f"{e}\n\n{traceback.format_exc()}") # ---------- GPU-decorated endpoints ---------- @spaces.GPU(duration=600) def run_convert_file(file, mode): if file is None: return _fail("No file provided.") return _convert_local_path(file.name, mode.startswith("VLM")) @spaces.GPU(duration=600) def run_convert_url(url, mode): if not url: return _fail("No URL provided.") try: r = requests.get(url, stream=True, timeout=60) r.raise_for_status() fd, tmp_path = tempfile.mkstemp(suffix=".pdf") with os.fdopen(fd, "wb") as tmp: for chunk in r.iter_content(chunk_size=1 << 20): if chunk: tmp.write(chunk) except Exception as e: return _fail(f"Failed to download URL: {e}") try: return _convert_local_path(tmp_path, mode.startswith("VLM")) finally: try: os.remove(tmp_path) except: pass # ---------- UI ---------- subtitle = "Device: **CUDA (ZeroGPU)**" if HAS_CUDA else "Device: **CPU** (GPU warms on first call)" with gr.Blocks(title="Granite-Docling 258M — PDF → Markdown/HTML") as demo: gr.Markdown( f"""# Granite-Docling 258M — PDF → Markdown / HTML {subtitle} **Modes** - **Standard (faster)** → PDFs with a text layer - **VLM (Granite – better for complex/scanned)** → scans / heavy tables / formulas _First call may be slow while models download and ZeroGPU warms. Cache lives in `/data`._ """ ) mode = gr.Radio( ["Standard (faster)", "VLM (Granite – better for complex/scanned)"], value="Standard (faster)", label="Mode" ) with gr.Tab("Upload PDF"): fi = gr.File(file_types=[".pdf"], label="PDF") md_preview = gr.Markdown(label="Markdown Preview") html_preview = gr.HTML(label="HTML Preview") # <— rendered HTML dl_md = gr.File(label="Download Markdown (.md)") dl_html = gr.File(label="Download HTML (.html)") gr.Button("Convert").click( fn=run_convert_file, inputs=[fi, mode], outputs=[md_preview, html_preview, dl_md, dl_html] ) with gr.Tab("Convert from URL"): url = gr.Textbox(label="Public PDF URL", placeholder="https://.../file.pdf") md_preview2 = gr.Markdown(label="Markdown Preview") html_preview2 = gr.HTML(label="HTML Preview") dl_md2 = gr.File(label="Download Markdown (.md)") dl_html2 = gr.File(label="Download HTML (.html)") gr.Button("Convert").click( fn=run_convert_url, inputs=[url, mode], outputs=[md_preview2, html_preview2, dl_md2, dl_html2] ) # Bind & queue demo.queue().launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT", 7860)))