|
|
|
|
|
FROM python:3.10-slim |
|
|
|
|
|
|
|
|
WORKDIR /app |
|
|
|
|
|
|
|
|
ENV DEBIAN_FRONTEND=noninteractive |
|
|
ENV PYTHONUNBUFFERED=1 |
|
|
ENV PYTHONDONTWRITEBYTECODE=1 |
|
|
|
|
|
|
|
|
COPY requirements.txt requirements.txt |
|
|
COPY extract_img_pdf.py extract_img_pdf.py |
|
|
COPY templates/ /app/templates |
|
|
COPY .env .env |
|
|
|
|
|
|
|
|
RUN apt-get update && apt-get install -y \ |
|
|
build-essential \ |
|
|
libglib2.0-0 \ |
|
|
libsm6 \ |
|
|
libxext6 \ |
|
|
libxrender-dev \ |
|
|
tesseract-ocr \ |
|
|
poppler-utils \ |
|
|
libgl1 \ |
|
|
&& apt-get clean && rm -rf /var/lib/apt/lists/* |
|
|
|
|
|
RUN pip install -r requirements.txt |
|
|
|
|
|
ENV FONTCONFIG_PATH=/etc/fonts |
|
|
ENV FONTCONFIG_FILE=/etc/fonts/fonts.conf |
|
|
|
|
|
|
|
|
ENV NLTK_DATA=/app/nltk_data |
|
|
RUN mkdir -p /app/nltk_data |
|
|
|
|
|
|
|
|
RUN python -m nltk.downloader -d /app/nltk_data \ |
|
|
punkt averaged_perceptron_tagger averaged_perceptron_tagger_eng |
|
|
|
|
|
RUN mkdir -p /app/cache /app/data && chmod -R 777 /app/cache /app/data |
|
|
RUN mkdir -p /app/OUTPUTS |
|
|
RUN chmod -R 777 /app |
|
|
|
|
|
|
|
|
EXPOSE 7860 |
|
|
|
|
|
|
|
|
CMD ["python", "extract_img_pdf.py"] |
|
|
|