File size: 2,308 Bytes
15a1cdd
 
 
 
 
 
 
 
 
 
 
aa1ab1e
15a1cdd
aa1ab1e
15a1cdd
 
 
 
aa1ab1e
15a1cdd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
# Base: official vLLM OpenAI-compatible server (tested version family)
FROM vllm/vllm-openai:v0.9.1

# Work as root during build
USER root

# Speed up HF downloads and avoid interactive git prompts
ENV HF_HUB_ENABLE_HF_TRANSFER=1 \
    GIT_LFS_SKIP_SMUDGE=1 \
    PIP_DISABLE_PIP_VERSION_CHECK=1

# Install compatible Transformers version for vLLM 0.9.1 (>=4.51.1 required)
# plus minimal utils. vLLM image already includes CUDA/PyTorch.
RUN pip install --no-cache-dir "transformers>=4.51.1" "huggingface_hub>=0.24.0"

# Pre-download the model repo into /weights/DotsOCR
# Note: dots.ocr requires the directory name to avoid '.' (see model card).
ARG MODEL_ID=rednote-hilab/dots.ocr
RUN python3 - <<'PY'
from huggingface_hub import snapshot_download
import os, shutil
root = "/weights"
os.makedirs(root, exist_ok=True)
# Download into a temp dir then move everything under /weights/DotsOCR
tmp = snapshot_download(repo_id=os.environ.get("MODEL_ID","rednote-hilab/dots.ocr"),
                        local_dir=os.path.join(root,"_tmp"), allow_patterns=["*"])
dst = os.path.join(root,"DotsOCR")
os.makedirs(dst, exist_ok=True)
for name in os.listdir(tmp):
    if name == "DotsOCR": continue
    shutil.move(os.path.join(tmp,name), os.path.join(dst,name))
shutil.rmtree(tmp, ignore_errors=True)
PY

# Ensure the model code is importable as a top-level package "DotsOCR"
ENV HF_MODEL_PATH=/weights/DotsOCR
ENV PYTHONPATH="/weights:${PYTHONPATH}"

# vLLM needs the custom model to be registered before main() runs.
# The model authors recommend importing their vLLM adapter into the vllm CLI module.
RUN sed -i '/^from vllm\.entrypoints\.cli\.main import main$/a\
from DotsOCR import modeling_dots_ocr_vllm' $(which vllm)

# Expose the Space port and launch the OpenAI-compatible server
EXPOSE 7860
ENV PORT=7860

# Notes:
# --chat-template-content-format string   per dots.ocr README
# --served-model-name model               so clients can use model="model"
# --trust-remote-code                     load repo’s custom code
# --host 0.0.0.0 --port $PORT             bind to Space port
CMD bash -lc '\
  vllm serve ${HF_MODEL_PATH} \
    --host 0.0.0.0 --port ${PORT} \
    --served-model-name model \
    --gpu-memory-utilization 0.95 \
    --chat-template-content-format string \
    --trust-remote-code \
'