Spaces:
Runtime error
Runtime error
File size: 5,202 Bytes
2210de6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 |
import os, tempfile, traceback
import gradio as gr
import spaces
import requests
# ---------- Cache & HF Hub settings (before importing Docling) ----------
# Use persistent storage (/data) and disable xet (fixes PermissionDenied in some Spaces)
os.environ.setdefault("HF_HOME", "/data/.cache/huggingface")
os.environ.setdefault("HF_HUB_CACHE", "/data/.cache/huggingface/hub")
os.environ.setdefault("TRANSFORMERS_CACHE", "/data/.cache/huggingface/transformers")
os.environ.setdefault("HF_HUB_ENABLE_XET", "0") # avoid xet write issues
os.environ.setdefault("HF_HUB_ENABLE_HF_TRANSFER", "1") # faster downloads
os.environ.setdefault("TOKENIZERS_PARALLELISM", "false")
# Make sure the folders exist
for p in (os.environ["HF_HOME"], os.environ["HF_HUB_CACHE"], os.environ["TRANSFORMERS_CACHE"]):
os.makedirs(p, exist_ok=True)
# ---------- Imports after env is set ----------
from docling.datamodel.base_models import InputFormat
from docling.document_converter import DocumentConverter, PdfFormatOption
from docling.pipeline.vlm_pipeline import VlmPipeline
# Detect CUDA (ZeroGPU will make this true on first decorated call)
try:
import torch
HAS_CUDA = torch.cuda.is_available()
# keep threads modest on shared infra
torch.set_num_threads(max(1, int(os.environ.get("OMP_NUM_THREADS", "2"))))
except Exception:
HAS_CUDA = False
# Build converters once (lifetime of app)
# Standard = text-first (faster, good when PDF has text layer)
std_converter = DocumentConverter(
format_options={InputFormat.PDF: PdfFormatOption()}
)
# VLM = Granite Docling (better for scans/tables/math)
vlm_converter = DocumentConverter(
format_options={InputFormat.PDF: PdfFormatOption(pipeline_cls=VlmPipeline)}
)
# ---------- Helpers ----------
def _success(md: str, html: str):
tmpdir = tempfile.gettempdir()
md_path = os.path.join(tmpdir, "output.md")
html_path = os.path.join(tmpdir, "output.html")
with open(md_path, "w", encoding="utf-8") as f:
f.write(md)
with open(html_path, "w", encoding="utf-8") as f:
f.write(html)
return md, md_path, html_path
def _fail(msg: str):
# show readable error in the preview panel
return f"**Conversion failed**:\n```\n{msg}\n```", None, None
def _convert_local_path(path: str, use_vlm: bool):
try:
conv = vlm_converter if use_vlm else std_converter
doc = conv.convert(source=path).document
md = doc.export_to_markdown()
html = doc.export_to_html()
return _success(md, html)
except Exception as e:
return _fail(f"{e}\n\n{traceback.format_exc()}")
# ---------- GPU-decorated endpoints (ZeroGPU requirement) ----------
@spaces.GPU(duration=600) # up to 10 minutes
def run_convert_file(file, mode):
if file is None:
return _fail("No file provided.")
use_vlm = mode.startswith("VLM")
return _convert_local_path(file.name, use_vlm)
@spaces.GPU(duration=600)
def run_convert_url(url, mode):
if not url:
return _fail("No URL provided.")
# download to a temp file so Docling always reads a local path
try:
r = requests.get(url, stream=True, timeout=60)
r.raise_for_status()
fd, tmp_path = tempfile.mkstemp(suffix=".pdf")
with os.fdopen(fd, "wb") as tmp:
for chunk in r.iter_content(chunk_size=1 << 20):
if chunk:
tmp.write(chunk)
except Exception as e:
return _fail(f"Failed to download URL: {e}")
try:
return _convert_local_path(tmp_path, mode.startswith("VLM"))
finally:
try:
os.remove(tmp_path)
except Exception:
pass
# ---------- UI ----------
subtitle = "Device: **CUDA (ZeroGPU)**" if HAS_CUDA else "Device: **CPU** (GPU warms on first call)"
with gr.Blocks(title="Granite-Docling 258M β PDF β Markdown/HTML") as demo:
gr.Markdown(
f"""# Granite-Docling 258M β PDF β Markdown / HTML
{subtitle}
**Modes**
- **Standard (faster)** β PDFs with a text layer
- **VLM (Granite β better for complex/scanned)** β scans / heavy tables / formulas
_First call may be slow while models download and ZeroGPU warms. Cache lives in `/data`._
"""
)
mode = gr.Radio(
["Standard (faster)", "VLM (Granite β better for complex/scanned)"],
value="Standard (faster)", label="Mode"
)
with gr.Tab("Upload PDF"):
fi = gr.File(file_types=[".pdf"], label="PDF")
out_md = gr.Markdown(label="Markdown Preview")
dl_md = gr.File(label="Download Markdown")
dl_html = gr.File(label="Download HTML")
gr.Button("Convert").click(run_convert_file, [fi, mode], [out_md, dl_md, dl_html])
with gr.Tab("Convert from URL"):
url = gr.Textbox(label="Public PDF URL", placeholder="https://.../file.pdf")
out2_md = gr.Markdown(label="Markdown Preview")
dl2_md = gr.File(label="Download Markdown")
dl2_html = gr.File(label="Download HTML")
gr.Button("Convert").click(run_convert_url, [url, mode], [out2_md, dl2_md, dl2_html])
# Explicit bind & queue
demo.queue().launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT", 7860))) |