Spaces:
Sleeping
Sleeping
| FROM python:3.10-slim | |
| # Set working directory | |
| WORKDIR /app | |
| # Install system dependencies | |
| RUN apt-get update && apt-get install -y --no-install-recommends \ | |
| build-essential \ | |
| wget \ | |
| curl \ | |
| gnupg \ | |
| && rm -rf /var/lib/apt/lists/* | |
| # Install MongoDB | |
| RUN wget -qO - https://www.mongodb.org/static/pgp/server-6.0.asc | apt-key add - \ | |
| && echo "deb http://repo.mongodb.org/apt/debian buster/mongodb-org/6.0 main" | tee /etc/apt/sources.list.d/mongodb-org-6.0.list \ | |
| && apt-get update \ | |
| && apt-get install -y mongodb-org \ | |
| && mkdir -p /data/db \ | |
| && apt-get clean \ | |
| && rm -rf /var/lib/apt/lists/* | |
| # Install Redis | |
| RUN apt-get update && apt-get install -y --no-install-recommends \ | |
| redis-server \ | |
| && apt-get clean \ | |
| && rm -rf /var/lib/apt/lists/* | |
| # Copy requirements.txt | |
| COPY requirements.txt . | |
| # Install Python dependencies | |
| RUN pip install --no-cache-dir -r requirements.txt | |
| # Copy the crawler code | |
| COPY . . | |
| # Create necessary directories | |
| RUN mkdir -p /data/storage/html_pages \ | |
| && mkdir -p /data/storage/logs \ | |
| && mkdir -p /data/storage/exports | |
| # Expose ports | |
| # Prometheus metrics port | |
| EXPOSE 9100 | |
| # MongoDB port | |
| EXPOSE 27017 | |
| # Redis port | |
| EXPOSE 6379 | |
| # Set environment variables | |
| ENV MONGODB_URI=mongodb://localhost:27017/ | |
| ENV REDIS_URI=redis://localhost:6379/0 | |
| ENV PYTHONUNBUFFERED=1 | |
| # Create entrypoint script | |
| RUN echo '#!/bin/bash\n\ | |
| # Start MongoDB\n\ | |
| mongod --fork --logpath /var/log/mongodb.log\n\ | |
| \n\ | |
| # Start Redis\n\ | |
| redis-server --daemonize yes\n\ | |
| \n\ | |
| # Check if services are running\n\ | |
| echo "Waiting for MongoDB to start..."\n\ | |
| until mongo --eval "print(\"MongoDB is ready\")" > /dev/null 2>&1; do\n\ | |
| sleep 1\n\ | |
| done\n\ | |
| \n\ | |
| echo "Waiting for Redis to start..."\n\ | |
| until redis-cli ping > /dev/null 2>&1; do\n\ | |
| sleep 1\n\ | |
| done\n\ | |
| \n\ | |
| echo "All services are running!"\n\ | |
| \n\ | |
| # Execute the provided command or default to help\n\ | |
| if [ $# -eq 0 ]; then\n\ | |
| python crawl.py --help\n\ | |
| else\n\ | |
| exec "$@"\n\ | |
| fi' > /app/entrypoint.sh \ | |
| && chmod +x /app/entrypoint.sh | |
| # Set entrypoint | |
| ENTRYPOINT ["/app/entrypoint.sh"] | |
| # Default command is to show help | |
| CMD ["python", "crawl.py", "--help"] |