Spaces:

Javedalam
/

ibm_granite_docling

Runtime error

App Files Files Community

ibm_granite_docling / app.py

Javedalam

Create app.py

2210de6 verified 3 months ago

raw

history blame

5.2 kB

	import os, tempfile, traceback
	import gradio as gr
	import spaces
	import requests

	# ---------- Cache & HF Hub settings (before importing Docling) ----------
	# Use persistent storage (/data) and disable xet (fixes PermissionDenied in some Spaces)
	os.environ.setdefault("HF_HOME", "/data/.cache/huggingface")
	os.environ.setdefault("HF_HUB_CACHE", "/data/.cache/huggingface/hub")
	os.environ.setdefault("TRANSFORMERS_CACHE", "/data/.cache/huggingface/transformers")
	os.environ.setdefault("HF_HUB_ENABLE_XET", "0") # avoid xet write issues
	os.environ.setdefault("HF_HUB_ENABLE_HF_TRANSFER", "1") # faster downloads
	os.environ.setdefault("TOKENIZERS_PARALLELISM", "false")

	# Make sure the folders exist
	for p in (os.environ["HF_HOME"], os.environ["HF_HUB_CACHE"], os.environ["TRANSFORMERS_CACHE"]):
	os.makedirs(p, exist_ok=True)

	# ---------- Imports after env is set ----------
	from docling.datamodel.base_models import InputFormat
	from docling.document_converter import DocumentConverter, PdfFormatOption
	from docling.pipeline.vlm_pipeline import VlmPipeline

	# Detect CUDA (ZeroGPU will make this true on first decorated call)
	try:
	import torch
	HAS_CUDA = torch.cuda.is_available()
	# keep threads modest on shared infra
	torch.set_num_threads(max(1, int(os.environ.get("OMP_NUM_THREADS", "2"))))
	except Exception:
	HAS_CUDA = False

	# Build converters once (lifetime of app)
	# Standard = text-first (faster, good when PDF has text layer)
	std_converter = DocumentConverter(
	format_options={InputFormat.PDF: PdfFormatOption()}
	)
	# VLM = Granite Docling (better for scans/tables/math)
	vlm_converter = DocumentConverter(
	format_options={InputFormat.PDF: PdfFormatOption(pipeline_cls=VlmPipeline)}
	)

	# ---------- Helpers ----------
	def _success(md: str, html: str):
	tmpdir = tempfile.gettempdir()
	md_path = os.path.join(tmpdir, "output.md")
	html_path = os.path.join(tmpdir, "output.html")
	with open(md_path, "w", encoding="utf-8") as f:
	f.write(md)
	with open(html_path, "w", encoding="utf-8") as f:
	f.write(html)
	return md, md_path, html_path

	def _fail(msg: str):
	# show readable error in the preview panel
	return f"Conversion failed:\n```\n{msg}\n```", None, None

	def _convert_local_path(path: str, use_vlm: bool):
	try:
	conv = vlm_converter if use_vlm else std_converter
	doc = conv.convert(source=path).document
	md = doc.export_to_markdown()
	html = doc.export_to_html()
	return _success(md, html)
	except Exception as e:
	return _fail(f"{e}\n\n{traceback.format_exc()}")

	# ---------- GPU-decorated endpoints (ZeroGPU requirement) ----------
	@spaces.GPU(duration=600) # up to 10 minutes
	def run_convert_file(file, mode):
	if file is None:
	return _fail("No file provided.")
	use_vlm = mode.startswith("VLM")
	return _convert_local_path(file.name, use_vlm)

	@spaces.GPU(duration=600)
	def run_convert_url(url, mode):
	if not url:
	return _fail("No URL provided.")
	# download to a temp file so Docling always reads a local path
	try:
	r = requests.get(url, stream=True, timeout=60)
	r.raise_for_status()
	fd, tmp_path = tempfile.mkstemp(suffix=".pdf")
	with os.fdopen(fd, "wb") as tmp:
	for chunk in r.iter_content(chunk_size=1 << 20):
	if chunk:
	tmp.write(chunk)
	except Exception as e:
	return _fail(f"Failed to download URL: {e}")
	try:
	return _convert_local_path(tmp_path, mode.startswith("VLM"))
	finally:
	try:
	os.remove(tmp_path)
	except Exception:
	pass

	# ---------- UI ----------
	subtitle = "Device: CUDA (ZeroGPU)" if HAS_CUDA else "Device: CPU (GPU warms on first call)"

	with gr.Blocks(title="Granite-Docling 258M — PDF → Markdown/HTML") as demo:
	gr.Markdown(
	f"""# Granite-Docling 258M — PDF → Markdown / HTML
	{subtitle}

	Modes
	- Standard (faster) → PDFs with a text layer
	- VLM (Granite – better for complex/scanned) → scans / heavy tables / formulas

	_First call may be slow while models download and ZeroGPU warms. Cache lives in `/data`._
	"""
	)

	mode = gr.Radio(
	["Standard (faster)", "VLM (Granite – better for complex/scanned)"],
	value="Standard (faster)", label="Mode"
	)

	with gr.Tab("Upload PDF"):
	fi = gr.File(file_types=[".pdf"], label="PDF")
	out_md = gr.Markdown(label="Markdown Preview")
	dl_md = gr.File(label="Download Markdown")
	dl_html = gr.File(label="Download HTML")
	gr.Button("Convert").click(run_convert_file, [fi, mode], [out_md, dl_md, dl_html])

	with gr.Tab("Convert from URL"):
	url = gr.Textbox(label="Public PDF URL", placeholder="https://.../file.pdf")
	out2_md = gr.Markdown(label="Markdown Preview")
	dl2_md = gr.File(label="Download Markdown")
	dl2_html = gr.File(label="Download HTML")
	gr.Button("Convert").click(run_convert_url, [url, mode], [out2_md, dl2_md, dl2_html])

	# Explicit bind & queue
	demo.queue().launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT", 7860)))