Spaces:

algoryn
/

kybocr

Sleeping

App Files Files Community

kybocr / Dockerfile

tommulder

Changed python location and transformer version

aa1ab1e 2 months ago

raw

history blame

2.31 kB

	# Base: official vLLM OpenAI-compatible server (tested version family)
	FROM vllm/vllm-openai:v0.9.1

	# Work as root during build
	USER root

	# Speed up HF downloads and avoid interactive git prompts
	ENV HF_HUB_ENABLE_HF_TRANSFER=1 \
	GIT_LFS_SKIP_SMUDGE=1 \
	PIP_DISABLE_PIP_VERSION_CHECK=1

	# Install compatible Transformers version for vLLM 0.9.1 (>=4.51.1 required)
	# plus minimal utils. vLLM image already includes CUDA/PyTorch.
	RUN pip install --no-cache-dir "transformers>=4.51.1" "huggingface_hub>=0.24.0"

	# Pre-download the model repo into /weights/DotsOCR
	# Note: dots.ocr requires the directory name to avoid '.' (see model card).
	ARG MODEL_ID=rednote-hilab/dots.ocr
	RUN python3 - <<'PY'
	from huggingface_hub import snapshot_download
	import os, shutil
	root = "/weights"
	os.makedirs(root, exist_ok=True)
	# Download into a temp dir then move everything under /weights/DotsOCR
	tmp = snapshot_download(repo_id=os.environ.get("MODEL_ID","rednote-hilab/dots.ocr"),
	local_dir=os.path.join(root,"_tmp"), allow_patterns=["*"])
	dst = os.path.join(root,"DotsOCR")
	os.makedirs(dst, exist_ok=True)
	for name in os.listdir(tmp):
	if name == "DotsOCR": continue
	shutil.move(os.path.join(tmp,name), os.path.join(dst,name))
	shutil.rmtree(tmp, ignore_errors=True)
	PY

	# Ensure the model code is importable as a top-level package "DotsOCR"
	ENV HF_MODEL_PATH=/weights/DotsOCR
	ENV PYTHONPATH="/weights:${PYTHONPATH}"

	# vLLM needs the custom model to be registered before main() runs.
	# The model authors recommend importing their vLLM adapter into the vllm CLI module.
	RUN sed -i '/^from vllm\.entrypoints\.cli\.main import main$/a\
	from DotsOCR import modeling_dots_ocr_vllm' $(which vllm)

	# Expose the Space port and launch the OpenAI-compatible server
	EXPOSE 7860
	ENV PORT=7860

	# Notes:
	# --chat-template-content-format string per dots.ocr README
	# --served-model-name model so clients can use model="model"
	# --trust-remote-code load repo’s custom code
	# --host 0.0.0.0 --port $PORT bind to Space port
	CMD bash -lc '\
	vllm serve ${HF_MODEL_PATH} \
	--host 0.0.0.0 --port ${PORT} \
	--served-model-name model \
	--gpu-memory-utilization 0.95 \
	--chat-template-content-format string \
	--trust-remote-code \
	'