File size: 2,476 Bytes
5cd2388
e31322d
5cd2388
e31322d
5cd2388
 
e31322d
 
 
 
 
 
 
5cd2388
e31322d
 
 
5cd2388
 
 
 
 
 
 
 
e31322d
20a7b41
 
 
 
 
 
e31322d
20a7b41
5cd2388
 
 
 
 
 
 
e31322d
5cd2388
 
e31322d
 
5cd2388
 
 
cc55064
5cd2388
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e31322d
5cd2388
 
20a7b41
 
 
e31322d
5cd2388
20a7b41
 
5cd2388
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
# Dockerfile
FROM python:3.11-slim

# Set environment variables for Python optimization and Ollama
ENV PYTHONUNBUFFERED=1 \
    PYTHONDONTWRITEBYTECODE=1 \
    OLLAMA_HOST=0.0.0.0 \
    OLLAMA_MODELS=/data/ollama

# Create a non-root user
RUN useradd -m -u 1000 ollama && \
    mkdir -p /data/ollama && \
    chown -R ollama:ollama /data

WORKDIR /app

# Install system dependencies
RUN apt-get update && \
    apt-get install -y --no-install-recommends \
    curl \
    wget \
    && apt-get clean && \
    rm -rf /var/lib/apt/lists/*

# Install Ollama
RUN curl -fsSL https://ollama.com/install.sh | sh

# Copy requirements and install Python dependencies
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt

# Copy application code
COPY app.py .

# Set proper ownership and permissions
RUN chown -R ollama:ollama /app && \
    chmod -R 755 /app

# Switch to ollama user
USER ollama

# Create a startup script with configurable model pulling and logging
RUN echo '#!/bin/bash\n\
set -e\n\
LOG_FILE=/data/startup.log\n\
mkdir -p /data && touch $LOG_FILE && chmod 644 $LOG_FILE\n\
echo "Starting Ollama server at $(date)" >> $LOG_FILE\n\
ollama serve >> $LOG_FILE 2>&1 &\n\
sleep 15\n\
MODELS_TO_PULL="${MODELS_TO_PULL:-hf.co/ggml-org/gemma-3-270m-GGUF:Q8_0}"\n\
echo "Pulling models: $MODELS_TO_PULL" | tee -a $LOG_FILE\n\
IFS=',' read -ra MODEL_ARRAY <<< "$MODELS_TO_PULL"\n\
for model in "${MODEL_ARRAY[@]}"; do\n\
    echo "Pulling model $model..." | tee -a $LOG_FILE\n\
    for attempt in {1..3}; do\n\
        if ollama pull "$model" >> $LOG_FILE 2>&1; then\n\
            echo "Model $model pulled successfully" | tee -a $LOG_FILE\n\
            break\n\
        else\n\
            echo "Attempt $attempt: Failed to pull model $model, retrying in 10 seconds..." | tee -a $LOG_FILE\n\
            sleep 10\n\
        fi\n\
        if [ $attempt -eq 3 ]; then\n\
            echo "Error: Failed to pull model $model after 3 attempts" | tee -a $LOG_FILE\n\
            exit 1\n\
        fi\n\
    done\n\
done\n\
echo "Starting Gunicorn server at $(date)" | tee -a $LOG_FILE\n\
exec gunicorn --bind 0.0.0.0:7860 --workers 1 --timeout 120 --log-level info app:app >> $LOG_FILE 2>&1' > /app/start.sh && \
    chmod +x /app/start.sh

# Expose port
EXPOSE 7860

# Health check
HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
    CMD curl -f http://localhost:7860/health || exit 1

# Run the startup script
CMD ["/app/start.sh"]