Spaces:

Javedalam
/

ibm_granite_docling

Runtime error

App Files Files Community

Javedalam commited on Sep 21

Commit

84635b6

verified ·

1 Parent(s): f5fbc8e

Delete app.py

Browse files

Files changed (1) hide show

app.py +0 -135

app.py DELETED Viewed

@@ -1,135 +0,0 @@
-import os, tempfile, traceback
-import gradio as gr
-import spaces
-import requests
-# ---------- Cache & HF Hub settings (before importing Docling) ----------
-# Use persistent storage (/data) and disable xet (fixes PermissionDenied in some Spaces)
-os.environ.setdefault("HF_HOME", "/data/.cache/huggingface")
-os.environ.setdefault("HF_HUB_CACHE", "/data/.cache/huggingface/hub")
-os.environ.setdefault("TRANSFORMERS_CACHE", "/data/.cache/huggingface/transformers")
-os.environ.setdefault("HF_HUB_ENABLE_XET", "0")          # avoid xet write issues
-os.environ.setdefault("HF_HUB_ENABLE_HF_TRANSFER", "1")  # faster downloads
-os.environ.setdefault("TOKENIZERS_PARALLELISM", "false")
-# Make sure the folders exist
-for p in (os.environ["HF_HOME"], os.environ["HF_HUB_CACHE"], os.environ["TRANSFORMERS_CACHE"]):
-    os.makedirs(p, exist_ok=True)
-# ---------- Imports after env is set ----------
-from docling.datamodel.base_models import InputFormat
-from docling.document_converter import DocumentConverter, PdfFormatOption
-from docling.pipeline.vlm_pipeline import VlmPipeline
-# Detect CUDA (ZeroGPU will make this true on first decorated call)
-try:
-    import torch
-    HAS_CUDA = torch.cuda.is_available()
-    # keep threads modest on shared infra
-    torch.set_num_threads(max(1, int(os.environ.get("OMP_NUM_THREADS", "2"))))
-except Exception:
-    HAS_CUDA = False
-# Build converters once (lifetime of app)
-# Standard = text-first (faster, good when PDF has text layer)
-std_converter = DocumentConverter(
-    format_options={InputFormat.PDF: PdfFormatOption()}
-)
-# VLM = Granite Docling (better for scans/tables/math)
-vlm_converter = DocumentConverter(
-    format_options={InputFormat.PDF: PdfFormatOption(pipeline_cls=VlmPipeline)}
-)
-# ---------- Helpers ----------
-def _success(md: str, html: str):
-    tmpdir = tempfile.gettempdir()
-    md_path = os.path.join(tmpdir, "output.md")
-    html_path = os.path.join(tmpdir, "output.html")
-    with open(md_path, "w", encoding="utf-8") as f:
-        f.write(md)
-    with open(html_path, "w", encoding="utf-8") as f:
-        f.write(html)
-    return md, md_path, html_path
-def _fail(msg: str):
-    # show readable error in the preview panel
-    return f"**Conversion failed**:\n```\n{msg}\n```", None, None
-def _convert_local_path(path: str, use_vlm: bool):
-    try:
-        conv = vlm_converter if use_vlm else std_converter
-        doc = conv.convert(source=path).document
-        md = doc.export_to_markdown()
-        html = doc.export_to_html()
-        return _success(md, html)
-    except Exception as e:
-        return _fail(f"{e}\n\n{traceback.format_exc()}")
-# ---------- GPU-decorated endpoints (ZeroGPU requirement) ----------
-@spaces.GPU(duration=600)  # up to 10 minutes
-def run_convert_file(file, mode):
-    if file is None:
-        return _fail("No file provided.")
-    use_vlm = mode.startswith("VLM")
-    return _convert_local_path(file.name, use_vlm)
-@spaces.GPU(duration=600)
-def run_convert_url(url, mode):
-    if not url:
-        return _fail("No URL provided.")
-    # download to a temp file so Docling always reads a local path
-    try:
-        r = requests.get(url, stream=True, timeout=60)
-        r.raise_for_status()
-        fd, tmp_path = tempfile.mkstemp(suffix=".pdf")
-        with os.fdopen(fd, "wb") as tmp:
-            for chunk in r.iter_content(chunk_size=1 << 20):
-                if chunk:
-                    tmp.write(chunk)
-    except Exception as e:
-        return _fail(f"Failed to download URL: {e}")
-    try:
-        return _convert_local_path(tmp_path, mode.startswith("VLM"))
-    finally:
-        try:
-            os.remove(tmp_path)
-        except Exception:
-            pass
-# ---------- UI ----------
-subtitle = "Device: **CUDA (ZeroGPU)**" if HAS_CUDA else "Device: **CPU** (GPU warms on first call)"
-with gr.Blocks(title="Granite-Docling 258M — PDF → Markdown/HTML") as demo:
-    gr.Markdown(
-        f"""# Granite-Docling 258M — PDF → Markdown / HTML
-{subtitle}
-**Modes**
-- **Standard (faster)** → PDFs with a text layer
-- **VLM (Granite – better for complex/scanned)** → scans / heavy tables / formulas
-_First call may be slow while models download and ZeroGPU warms. Cache lives in `/data`._
-"""
-    )
-    mode = gr.Radio(
-        ["Standard (faster)", "VLM (Granite – better for complex/scanned)"],
-        value="Standard (faster)", label="Mode"
-    )
-    with gr.Tab("Upload PDF"):
-        fi = gr.File(file_types=[".pdf"], label="PDF")
-        out_md = gr.Markdown(label="Markdown Preview")
-        dl_md = gr.File(label="Download Markdown")
-        dl_html = gr.File(label="Download HTML")
-        gr.Button("Convert").click(run_convert_file, [fi, mode], [out_md, dl_md, dl_html])
-    with gr.Tab("Convert from URL"):
-        url = gr.Textbox(label="Public PDF URL", placeholder="https://.../file.pdf")
-        out2_md = gr.Markdown(label="Markdown Preview")
-        dl2_md = gr.File(label="Download Markdown")
-        dl2_html = gr.File(label="Download HTML")
-        gr.Button("Convert").click(run_convert_url, [url, mode], [out2_md, dl2_md, dl2_html])
-# Explicit bind & queue
-demo.queue().launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT", 7860)))