File size: 1,789 Bytes
7ec6a6a
 
 
0d08e31
 
 
7ec6a6a
3585865
 
 
 
8298144
6eec827
7ae3bc4
e887626
 
 
 
 
 
529f08a
6eec827
7ec6a6a
 
 
 
 
 
 
 
 
 
79fc65e
 
ed4b853
 
d2bf321
ed4b853
 
 
7ec6a6a
e168ec6
 
 
 
 
 
0d08e31
e168ec6
3760b8e
 
 
7ec6a6a
e168ec6
 
26e9409
e168ec6
529f08a
 
7ec6a6a
 
 
 
8fbe5d2
47f1456
e6d03b6
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
# Base image with Python and common dependencies
FROM python:3.10-slim

# Set working directory
WORKDIR /app

# Set environment variables
ENV DEBIAN_FRONTEND=noninteractive \
    PYTHONUNBUFFERED=1 \
    PYTHONDONTWRITEBYTECODE=1 \
    TRANSFORMERS_CACHE=/app/cache \
    HF_HOME=/app/cache

# COPY requirements file and install Python dependencies
COPY requirements.txt requirements.txt 
COPY extract_img_pdf.py extract_img_pdf.py
COPY live_streaming_flask.py live_streaming_flask.py
COPY templates/ /app/templates 
COPY .env .env
COPY test_streaming.py test_streaming.py 
COPY app_main.py app_main.py

# Install system dependencies
RUN apt-get update && apt-get install -y \
    build-essential \
    libglib2.0-0 \
    libsm6 \
    libxext6 \
    libxrender-dev \
    tesseract-ocr \
    poppler-utils \
    libgl1 \
    ffmpeg \
    libopencv-dev \
    curl \
    ca-certificates \
    && apt-get clean \
    && rm -rf /var/lib/apt/lists/*

RUN pip install --upgrade pip && pip install -r requirements.txt

# Fontconfig Warning Suppression (optional)
ENV FONTCONFIG_PATH=/etc/fonts
ENV FONTCONFIG_FILE=/etc/fonts/fonts.conf

# Set writable path for nltk data
ENV NLTK_DATA=/app/nltk_data
RUN mkdir -p /app/nltk_data

# Pre-download required NLTK models
RUN python -m nltk.downloader -d /app/nltk_data \
    punkt averaged_perceptron_tagger averaged_perceptron_tagger_eng

RUN mkdir -p /app/cache /app/data && chmod -R 777 /app/cache /app/data
RUN mkdir -p /app/OUTPUTS
RUN mkdir -p /app/outputs

RUN mkdir -p outputs/DETECTED_IMAGE outputs/SCANNED_IMAGE outputs/EXTRACTED_JSON

# Expose the required port for HF Spaces
EXPOSE 7860

# Set the command to run your Flask app
# CMD ["python", "extract_img_pdf.py"]
# CMD ["python", "live_streaming_flask.py"]
CMD ["python", "app_main.py"]