Spaces:
Runtime error
Runtime error
update dockerfile and app.py
Browse files- Dockerfile +16 -16
- app.py +6 -24
Dockerfile
CHANGED
|
@@ -5,7 +5,7 @@ FROM ubuntu:22.04
|
|
| 5 |
ENV DEBIAN_FRONTEND=noninteractive
|
| 6 |
ENV TZ=Etc/UTC
|
| 7 |
|
| 8 |
-
# Configure
|
| 9 |
ENV XDG_CACHE_HOME=/tmp/.cache
|
| 10 |
ENV HF_HOME=/tmp/.cache/huggingface
|
| 11 |
|
|
@@ -17,7 +17,7 @@ RUN echo "tzdata tzdata/Areas select Etc" > /tmp/tzdata.seed && \
|
|
| 17 |
echo "tzdata tzdata/Zones/Etc select UTC" >> /tmp/tzdata.seed && \
|
| 18 |
debconf-set-selections /tmp/tzdata.seed
|
| 19 |
|
| 20 |
-
# 1
|
| 21 |
RUN echo "### STEP 1: Installing OS-level dependencies" && \
|
| 22 |
apt-get update && \
|
| 23 |
apt-get install -y --no-install-recommends \
|
|
@@ -32,28 +32,28 @@ RUN echo "### STEP 1: Installing OS-level dependencies" && \
|
|
| 32 |
python3-opencv && \
|
| 33 |
rm -rf /var/lib/apt/lists/*
|
| 34 |
|
| 35 |
-
# 2
|
| 36 |
WORKDIR /app
|
| 37 |
COPY requirements.txt ./
|
| 38 |
COPY app.py ./
|
| 39 |
-
#
|
| 40 |
|
| 41 |
-
# 3
|
| 42 |
-
RUN echo "### STEP
|
| 43 |
-
|
| 44 |
-
|
| 45 |
|
| 46 |
-
# 4
|
| 47 |
-
RUN echo "### STEP
|
| 48 |
mkdir -p "$XDG_CACHE_HOME" "$HF_HOME" && \
|
| 49 |
-
chmod -R a+rwX "$XDG_CACHE_HOME"
|
| 50 |
|
| 51 |
-
# 5
|
| 52 |
-
RUN echo "### STEP
|
| 53 |
export CMAKE_ARGS="-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS" && \
|
| 54 |
-
|
| 55 |
|
| 56 |
-
# 6
|
| 57 |
-
RUN echo "### STEP
|
| 58 |
EXPOSE 7860
|
| 59 |
CMD ["python3", "app.py"]
|
|
|
|
| 5 |
ENV DEBIAN_FRONTEND=noninteractive
|
| 6 |
ENV TZ=Etc/UTC
|
| 7 |
|
| 8 |
+
# Configure cache paths to a writable /tmp location
|
| 9 |
ENV XDG_CACHE_HOME=/tmp/.cache
|
| 10 |
ENV HF_HOME=/tmp/.cache/huggingface
|
| 11 |
|
|
|
|
| 17 |
echo "tzdata tzdata/Zones/Etc select UTC" >> /tmp/tzdata.seed && \
|
| 18 |
debconf-set-selections /tmp/tzdata.seed
|
| 19 |
|
| 20 |
+
# STEP 1: Install OS-level dependencies
|
| 21 |
RUN echo "### STEP 1: Installing OS-level dependencies" && \
|
| 22 |
apt-get update && \
|
| 23 |
apt-get install -y --no-install-recommends \
|
|
|
|
| 32 |
python3-opencv && \
|
| 33 |
rm -rf /var/lib/apt/lists/*
|
| 34 |
|
| 35 |
+
# STEP 2: Prepare application directory and copy source code
|
| 36 |
WORKDIR /app
|
| 37 |
COPY requirements.txt ./
|
| 38 |
COPY app.py ./
|
| 39 |
+
# COPY any other source files or directories needed by your app
|
| 40 |
|
| 41 |
+
# STEP 3: Install Python dependencies (ensure huggingface_hub is listed)
|
| 42 |
+
RUN echo "### STEP 3: Installing Python dependencies" && \
|
| 43 |
+
python3 -m pip install --upgrade pip && \
|
| 44 |
+
pip install --no-cache-dir -r requirements.txt
|
| 45 |
|
| 46 |
+
# STEP 4: Ensure cache directories are writable
|
| 47 |
+
RUN echo "### STEP 4: Creating and permissioning cache directories" && \
|
| 48 |
mkdir -p "$XDG_CACHE_HOME" "$HF_HOME" && \
|
| 49 |
+
chmod -R a+rwX "$XDG_CACHE_HOME" "$HF_HOME"
|
| 50 |
|
| 51 |
+
# STEP 5: Build and install llama-cpp-python from source with OpenBLAS
|
| 52 |
+
RUN echo "### STEP 5: Building llama-cpp-python with OpenBLAS" && \
|
| 53 |
export CMAKE_ARGS="-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS" && \
|
| 54 |
+
pip install --no-cache-dir --force-reinstall --no-binary llama-cpp-python llama-cpp-python==0.2.0
|
| 55 |
|
| 56 |
+
# STEP 6: Finalize and launch the application
|
| 57 |
+
RUN echo "### STEP 6: Finalizing Docker image"
|
| 58 |
EXPOSE 7860
|
| 59 |
CMD ["python3", "app.py"]
|
app.py
CHANGED
|
@@ -12,7 +12,6 @@ import gc
|
|
| 12 |
import io
|
| 13 |
from contextlib import redirect_stdout, redirect_stderr
|
| 14 |
import sys, llama_cpp
|
| 15 |
-
import shutil
|
| 16 |
|
| 17 |
# ----------------------------------------
|
| 18 |
# Model configurations: per-size prefixes and repos
|
|
@@ -53,27 +52,12 @@ model_cache = {
|
|
| 53 |
'llm': None
|
| 54 |
}
|
| 55 |
|
| 56 |
-
# Helper to download
|
| 57 |
def ensure_weights(cfg, model_file, clip_file):
|
| 58 |
-
#
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
try:
|
| 63 |
-
os.symlink(path, model_file)
|
| 64 |
-
except (PermissionError, OSError):
|
| 65 |
-
print(f"⚠️ symlink failed, copying {path} → {model_file}")
|
| 66 |
-
shutil.copy2(path, model_file)
|
| 67 |
-
|
| 68 |
-
# repeat for clip_file…
|
| 69 |
-
clip_path = hf_hub_download(repo_id=cfg['clip_repo'], filename=clip_file)
|
| 70 |
-
try:
|
| 71 |
-
os.symlink(clip_path, clip_file)
|
| 72 |
-
except (PermissionError, OSError):
|
| 73 |
-
print(f"⚠️ symlink failed, copying {clip_path} → {clip_file}")
|
| 74 |
-
shutil.copy2(clip_path, clip_file)
|
| 75 |
-
|
| 76 |
-
return model_file, clip_file
|
| 77 |
|
| 78 |
# Custom chat handler
|
| 79 |
class SmolVLM2ChatHandler(Llava15ChatHandler):
|
|
@@ -102,7 +86,7 @@ class SmolVLM2ChatHandler(Llava15ChatHandler):
|
|
| 102 |
# Load and cache LLM (only on dropdown or verbose change)
|
| 103 |
def update_llm(size, model_file, clip_file, verbose_mode):
|
| 104 |
if (model_cache['size'], model_cache['model_file'], model_cache['clip_file'], model_cache['verbose']) != (size, model_file, clip_file, verbose_mode):
|
| 105 |
-
mf, cf = ensure_weights(size, model_file, clip_file)
|
| 106 |
handler = SmolVLM2ChatHandler(clip_model_path=cf, verbose=verbose_mode)
|
| 107 |
llm = Llama(
|
| 108 |
model_path=mf,
|
|
@@ -173,7 +157,6 @@ def caption_frame(frame, size, model_file, clip_file, interval_ms, sys_prompt, u
|
|
| 173 |
debug_msgs.append(f"[{timestamp}] CPU count = {os.cpu_count()}")
|
| 174 |
|
| 175 |
t_start = time.time()
|
| 176 |
-
# right before you call the Llama API:
|
| 177 |
buf = io.StringIO()
|
| 178 |
with redirect_stdout(buf), redirect_stderr(buf):
|
| 179 |
resp = model_cache['llm'].create_chat_completion(
|
|
@@ -182,7 +165,6 @@ def caption_frame(frame, size, model_file, clip_file, interval_ms, sys_prompt, u
|
|
| 182 |
temperature=0.1,
|
| 183 |
stop=["<end_of_utterance>"]
|
| 184 |
)
|
| 185 |
-
# grab every line the Llama client printed
|
| 186 |
for line in buf.getvalue().splitlines():
|
| 187 |
timestamp = time.strftime('%H:%M:%S')
|
| 188 |
debug_msgs.append(f"[{timestamp}] {line}")
|
|
|
|
| 12 |
import io
|
| 13 |
from contextlib import redirect_stdout, redirect_stderr
|
| 14 |
import sys, llama_cpp
|
|
|
|
| 15 |
|
| 16 |
# ----------------------------------------
|
| 17 |
# Model configurations: per-size prefixes and repos
|
|
|
|
| 52 |
'llm': None
|
| 53 |
}
|
| 54 |
|
| 55 |
+
# Helper to download weights and return their cache paths
|
| 56 |
def ensure_weights(cfg, model_file, clip_file):
|
| 57 |
+
# Download model and clip into HF cache (writable, e.g. /tmp/.cache)
|
| 58 |
+
model_path = hf_hub_download(repo_id=cfg['model_repo'], filename=model_file)
|
| 59 |
+
clip_path = hf_hub_download(repo_id=cfg['clip_repo'], filename=clip_file)
|
| 60 |
+
return model_path, clip_path
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 61 |
|
| 62 |
# Custom chat handler
|
| 63 |
class SmolVLM2ChatHandler(Llava15ChatHandler):
|
|
|
|
| 86 |
# Load and cache LLM (only on dropdown or verbose change)
|
| 87 |
def update_llm(size, model_file, clip_file, verbose_mode):
|
| 88 |
if (model_cache['size'], model_cache['model_file'], model_cache['clip_file'], model_cache['verbose']) != (size, model_file, clip_file, verbose_mode):
|
| 89 |
+
mf, cf = ensure_weights(MODELS[size], model_file, clip_file)
|
| 90 |
handler = SmolVLM2ChatHandler(clip_model_path=cf, verbose=verbose_mode)
|
| 91 |
llm = Llama(
|
| 92 |
model_path=mf,
|
|
|
|
| 157 |
debug_msgs.append(f"[{timestamp}] CPU count = {os.cpu_count()}")
|
| 158 |
|
| 159 |
t_start = time.time()
|
|
|
|
| 160 |
buf = io.StringIO()
|
| 161 |
with redirect_stdout(buf), redirect_stderr(buf):
|
| 162 |
resp = model_cache['llm'].create_chat_completion(
|
|
|
|
| 165 |
temperature=0.1,
|
| 166 |
stop=["<end_of_utterance>"]
|
| 167 |
)
|
|
|
|
| 168 |
for line in buf.getvalue().splitlines():
|
| 169 |
timestamp = time.strftime('%H:%M:%S')
|
| 170 |
debug_msgs.append(f"[{timestamp}] {line}")
|