tommulder commited on
Commit
15a1cdd
·
0 Parent(s):
Files changed (3) hide show
  1. .gitignore +0 -0
  2. Dockerfile +60 -0
  3. README.md +0 -0
.gitignore ADDED
File without changes
Dockerfile ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Base: official vLLM OpenAI-compatible server (tested version family)
2
+ FROM vllm/vllm-openai:v0.9.1
3
+
4
+ # Work as root during build
5
+ USER root
6
+
7
+ # Speed up HF downloads and avoid interactive git prompts
8
+ ENV HF_HUB_ENABLE_HF_TRANSFER=1 \
9
+ GIT_LFS_SKIP_SMUDGE=1 \
10
+ PIP_DISABLE_PIP_VERSION_CHECK=1
11
+
12
+ # Install exact Transformers version dots.ocr expects (qwen2_vl APIs),
13
+ # plus minimal utils. vLLM image already includes CUDA/PyTorch.
14
+ RUN pip install --no-cache-dir "transformers==4.48.3" "huggingface_hub>=0.24.0"
15
+
16
+ # Pre-download the model repo into /weights/DotsOCR
17
+ # Note: dots.ocr requires the directory name to avoid '.' (see model card).
18
+ ARG MODEL_ID=rednote-hilab/dots.ocr
19
+ RUN python - <<'PY'
20
+ from huggingface_hub import snapshot_download
21
+ import os, shutil
22
+ root = "/weights"
23
+ os.makedirs(root, exist_ok=True)
24
+ # Download into a temp dir then move everything under /weights/DotsOCR
25
+ tmp = snapshot_download(repo_id=os.environ.get("MODEL_ID","rednote-hilab/dots.ocr"),
26
+ local_dir=os.path.join(root,"_tmp"), allow_patterns=["*"])
27
+ dst = os.path.join(root,"DotsOCR")
28
+ os.makedirs(dst, exist_ok=True)
29
+ for name in os.listdir(tmp):
30
+ if name == "DotsOCR": continue
31
+ shutil.move(os.path.join(tmp,name), os.path.join(dst,name))
32
+ shutil.rmtree(tmp, ignore_errors=True)
33
+ PY
34
+
35
+ # Ensure the model code is importable as a top-level package "DotsOCR"
36
+ ENV HF_MODEL_PATH=/weights/DotsOCR
37
+ ENV PYTHONPATH="/weights:${PYTHONPATH}"
38
+
39
+ # vLLM needs the custom model to be registered before main() runs.
40
+ # The model authors recommend importing their vLLM adapter into the vllm CLI module.
41
+ RUN sed -i '/^from vllm\.entrypoints\.cli\.main import main$/a\
42
+ from DotsOCR import modeling_dots_ocr_vllm' $(which vllm)
43
+
44
+ # Expose the Space port and launch the OpenAI-compatible server
45
+ EXPOSE 7860
46
+ ENV PORT=7860
47
+
48
+ # Notes:
49
+ # --chat-template-content-format string per dots.ocr README
50
+ # --served-model-name model so clients can use model="model"
51
+ # --trust-remote-code load repo’s custom code
52
+ # --host 0.0.0.0 --port $PORT bind to Space port
53
+ CMD bash -lc '\
54
+ vllm serve ${HF_MODEL_PATH} \
55
+ --host 0.0.0.0 --port ${PORT} \
56
+ --served-model-name model \
57
+ --gpu-memory-utilization 0.95 \
58
+ --chat-template-content-format string \
59
+ --trust-remote-code \
60
+ '
README.md ADDED
File without changes