kybocr / Dockerfile
tommulder's picture
Changed python location and transformer version
aa1ab1e
raw
history blame
2.31 kB
# Base: official vLLM OpenAI-compatible server (tested version family)
FROM vllm/vllm-openai:v0.9.1
# Work as root during build
USER root
# Speed up HF downloads and avoid interactive git prompts
ENV HF_HUB_ENABLE_HF_TRANSFER=1 \
GIT_LFS_SKIP_SMUDGE=1 \
PIP_DISABLE_PIP_VERSION_CHECK=1
# Install compatible Transformers version for vLLM 0.9.1 (>=4.51.1 required)
# plus minimal utils. vLLM image already includes CUDA/PyTorch.
RUN pip install --no-cache-dir "transformers>=4.51.1" "huggingface_hub>=0.24.0"
# Pre-download the model repo into /weights/DotsOCR
# Note: dots.ocr requires the directory name to avoid '.' (see model card).
ARG MODEL_ID=rednote-hilab/dots.ocr
RUN python3 - <<'PY'
from huggingface_hub import snapshot_download
import os, shutil
root = "/weights"
os.makedirs(root, exist_ok=True)
# Download into a temp dir then move everything under /weights/DotsOCR
tmp = snapshot_download(repo_id=os.environ.get("MODEL_ID","rednote-hilab/dots.ocr"),
local_dir=os.path.join(root,"_tmp"), allow_patterns=["*"])
dst = os.path.join(root,"DotsOCR")
os.makedirs(dst, exist_ok=True)
for name in os.listdir(tmp):
if name == "DotsOCR": continue
shutil.move(os.path.join(tmp,name), os.path.join(dst,name))
shutil.rmtree(tmp, ignore_errors=True)
PY
# Ensure the model code is importable as a top-level package "DotsOCR"
ENV HF_MODEL_PATH=/weights/DotsOCR
ENV PYTHONPATH="/weights:${PYTHONPATH}"
# vLLM needs the custom model to be registered before main() runs.
# The model authors recommend importing their vLLM adapter into the vllm CLI module.
RUN sed -i '/^from vllm\.entrypoints\.cli\.main import main$/a\
from DotsOCR import modeling_dots_ocr_vllm' $(which vllm)
# Expose the Space port and launch the OpenAI-compatible server
EXPOSE 7860
ENV PORT=7860
# Notes:
# --chat-template-content-format string per dots.ocr README
# --served-model-name model so clients can use model="model"
# --trust-remote-code load repo’s custom code
# --host 0.0.0.0 --port $PORT bind to Space port
CMD bash -lc '\
vllm serve ${HF_MODEL_PATH} \
--host 0.0.0.0 --port ${PORT} \
--served-model-name model \
--gpu-memory-utilization 0.95 \
--chat-template-content-format string \
--trust-remote-code \
'