feat: docker compose
Browse files- .dockerignore +20 -0
- .github/workflows/ci.yml +34 -5
- CHANGELOG.md +16 -0
- Makefile +66 -9
- app.py +0 -6
- docker-compose.yml +34 -0
- nginx.conf +58 -0
    	
        .dockerignore
    ADDED
    
    | @@ -0,0 +1,20 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            .git
         | 
| 2 | 
            +
            .gitignore
         | 
| 3 | 
            +
            .env
         | 
| 4 | 
            +
            .venv
         | 
| 5 | 
            +
            __pycache__
         | 
| 6 | 
            +
            *.pyc
         | 
| 7 | 
            +
            *.pyo
         | 
| 8 | 
            +
            *.pyd
         | 
| 9 | 
            +
            .Python
         | 
| 10 | 
            +
            *.py[cod]
         | 
| 11 | 
            +
            *$py.class
         | 
| 12 | 
            +
            .pytest_cache
         | 
| 13 | 
            +
            .coverage
         | 
| 14 | 
            +
            htmlcov
         | 
| 15 | 
            +
            .mypy_cache
         | 
| 16 | 
            +
            .ruff_cache
         | 
| 17 | 
            +
            .DS_Store
         | 
| 18 | 
            +
            notebooks/
         | 
| 19 | 
            +
            tests/
         | 
| 20 | 
            +
            docs/
         | 
    	
        .github/workflows/ci.yml
    CHANGED
    
    | @@ -9,13 +9,18 @@ on: | |
| 9 | 
             
            jobs:
         | 
| 10 | 
             
              test:
         | 
| 11 | 
             
                runs-on: ubuntu-latest
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
| 12 | 
             
                steps:
         | 
| 13 | 
             
                  - uses: actions/checkout@v4
         | 
| 14 |  | 
| 15 | 
            -
                  - name: Set up Python  | 
| 16 | 
             
                    uses: actions/setup-python@v5
         | 
| 17 | 
             
                    with:
         | 
| 18 | 
            -
                      python-version:  | 
| 19 | 
             
                      cache: "pip"
         | 
| 20 |  | 
| 21 | 
             
                  - name: Install Poetry
         | 
| @@ -27,9 +32,14 @@ jobs: | |
| 27 | 
             
                      poetry config virtualenvs.create true
         | 
| 28 | 
             
                      poetry config virtualenvs.in-project true
         | 
| 29 |  | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 30 | 
             
                  - name: Install dependencies
         | 
| 31 | 
             
                    run: |
         | 
| 32 | 
            -
                      poetry lock --no-update
         | 
| 33 | 
             
                      poetry install
         | 
| 34 |  | 
| 35 | 
             
                  - name: Run pre-commit hooks
         | 
| @@ -39,5 +49,24 @@ jobs: | |
| 39 | 
             
                    env:
         | 
| 40 | 
             
                      PRE_COMMIT_CACHE_KEY: ${{ hashFiles('.pre-commit-config.yaml', 'pyproject.toml') }}
         | 
| 41 |  | 
| 42 | 
            -
                  - name: Run tests
         | 
| 43 | 
            -
                    run:  | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 9 | 
             
            jobs:
         | 
| 10 | 
             
              test:
         | 
| 11 | 
             
                runs-on: ubuntu-latest
         | 
| 12 | 
            +
                strategy:
         | 
| 13 | 
            +
                  matrix:
         | 
| 14 | 
            +
                    python-version: ["3.10", "3.11", "3.12"]
         | 
| 15 | 
            +
                  fail-fast: false
         | 
| 16 | 
            +
             | 
| 17 | 
             
                steps:
         | 
| 18 | 
             
                  - uses: actions/checkout@v4
         | 
| 19 |  | 
| 20 | 
            +
                  - name: Set up Python ${{ matrix.python-version }}
         | 
| 21 | 
             
                    uses: actions/setup-python@v5
         | 
| 22 | 
             
                    with:
         | 
| 23 | 
            +
                      python-version: ${{ matrix.python-version }}
         | 
| 24 | 
             
                      cache: "pip"
         | 
| 25 |  | 
| 26 | 
             
                  - name: Install Poetry
         | 
|  | |
| 32 | 
             
                      poetry config virtualenvs.create true
         | 
| 33 | 
             
                      poetry config virtualenvs.in-project true
         | 
| 34 |  | 
| 35 | 
            +
                  - name: Cache Poetry virtualenv
         | 
| 36 | 
            +
                    uses: actions/cache@v3
         | 
| 37 | 
            +
                    with:
         | 
| 38 | 
            +
                      path: ./.venv
         | 
| 39 | 
            +
                      key: venv-${{ runner.os }}-${{ matrix.python-version }}-${{ hashFiles('**/poetry.lock') }}
         | 
| 40 | 
            +
             | 
| 41 | 
             
                  - name: Install dependencies
         | 
| 42 | 
             
                    run: |
         | 
|  | |
| 43 | 
             
                      poetry install
         | 
| 44 |  | 
| 45 | 
             
                  - name: Run pre-commit hooks
         | 
|  | |
| 49 | 
             
                    env:
         | 
| 50 | 
             
                      PRE_COMMIT_CACHE_KEY: ${{ hashFiles('.pre-commit-config.yaml', 'pyproject.toml') }}
         | 
| 51 |  | 
| 52 | 
            +
                  - name: Run tests with coverage
         | 
| 53 | 
            +
                    run: |
         | 
| 54 | 
            +
                      poetry run pytest --cov=llmdataparser --cov-report=xml
         | 
| 55 | 
            +
             | 
| 56 | 
            +
                  - name: Upload coverage to Codecov
         | 
| 57 | 
            +
                    uses: codecov/codecov-action@v3
         | 
| 58 | 
            +
                    with:
         | 
| 59 | 
            +
                      file: ./coverage.xml
         | 
| 60 | 
            +
                      fail_ci_if_error: true
         | 
| 61 | 
            +
             | 
| 62 | 
            +
                  - name: Build documentation
         | 
| 63 | 
            +
                    run: |
         | 
| 64 | 
            +
                      poetry run mkdocs build
         | 
| 65 | 
            +
                    if: matrix.python-version == '3.12'
         | 
| 66 | 
            +
             | 
| 67 | 
            +
                  - name: Upload documentation artifact
         | 
| 68 | 
            +
                    uses: actions/upload-artifact@v3
         | 
| 69 | 
            +
                    with:
         | 
| 70 | 
            +
                      name: documentation
         | 
| 71 | 
            +
                      path: site/
         | 
| 72 | 
            +
                    if: matrix.python-version == '3.12'
         | 
    	
        CHANGELOG.md
    ADDED
    
    | @@ -0,0 +1,16 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            # Changelog
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            All notable changes to this project will be documented in this file.
         | 
| 4 | 
            +
             | 
| 5 | 
            +
            The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
         | 
| 6 | 
            +
            and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
         | 
| 7 | 
            +
             | 
| 8 | 
            +
            ## \[1.0.0\] - 2024-12-30
         | 
| 9 | 
            +
             | 
| 10 | 
            +
            ### Added
         | 
| 11 | 
            +
             | 
| 12 | 
            +
            - Initial release
         | 
| 13 | 
            +
            - Support for multiple benchmark datasets (MMLU, GSM8k, etc.)
         | 
| 14 | 
            +
            - Gradio interface for dataset exploration
         | 
| 15 | 
            +
            - Comprehensive test suite
         | 
| 16 | 
            +
            - Documentation and examples
         | 
    	
        Makefile
    CHANGED
    
    | @@ -1,8 +1,13 @@ | |
|  | |
| 1 | 
             
            # Variables
         | 
|  | |
| 2 | 
             
            IMAGE_NAME = llmdataparser
         | 
| 3 | 
             
            CONTAINER_NAME = llmdataparser
         | 
| 4 | 
             
            VERSION = latest
         | 
| 5 |  | 
|  | |
|  | |
|  | |
| 6 | 
             
            # Build the Docker image
         | 
| 7 | 
             
            build:
         | 
| 8 | 
             
            	docker build -t $(IMAGE_NAME):$(VERSION) .
         | 
| @@ -23,28 +28,80 @@ rm: | |
| 23 | 
             
            rmi:
         | 
| 24 | 
             
            	docker rmi $(IMAGE_NAME):$(VERSION)
         | 
| 25 |  | 
| 26 | 
            -
            #  | 
| 27 | 
            -
             | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 28 |  | 
| 29 | 
            -
            #  | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 30 | 
             
            up: build run
         | 
| 31 |  | 
| 32 | 
             
            # Stop and remove container
         | 
| 33 | 
             
            down: stop rm
         | 
| 34 |  | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 35 | 
             
            # Show container logs
         | 
| 36 | 
             
            logs:
         | 
| 37 | 
             
            	docker logs $(CONTAINER_NAME)
         | 
| 38 |  | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 39 | 
             
            # Enter container shell
         | 
| 40 | 
             
            shell:
         | 
| 41 | 
             
            	docker exec -it $(CONTAINER_NAME) /bin/bash
         | 
| 42 |  | 
| 43 | 
            -
            #  | 
| 44 | 
            -
             | 
| 45 | 
            -
             | 
|  | |
|  | |
|  | |
| 46 |  | 
| 47 | 
            -
             | 
| 48 | 
            -
             | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 49 |  | 
| 50 | 
            -
             | 
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            # -----------------------------
         | 
| 2 | 
             
            # Variables
         | 
| 3 | 
            +
            # -----------------------------
         | 
| 4 | 
             
            IMAGE_NAME = llmdataparser
         | 
| 5 | 
             
            CONTAINER_NAME = llmdataparser
         | 
| 6 | 
             
            VERSION = latest
         | 
| 7 |  | 
| 8 | 
            +
            # -----------------------------
         | 
| 9 | 
            +
            # Docker Basic Commands
         | 
| 10 | 
            +
            # -----------------------------
         | 
| 11 | 
             
            # Build the Docker image
         | 
| 12 | 
             
            build:
         | 
| 13 | 
             
            	docker build -t $(IMAGE_NAME):$(VERSION) .
         | 
|  | |
| 28 | 
             
            rmi:
         | 
| 29 | 
             
            	docker rmi $(IMAGE_NAME):$(VERSION)
         | 
| 30 |  | 
| 31 | 
            +
            # -----------------------------
         | 
| 32 | 
            +
            # Docker Compose Commands
         | 
| 33 | 
            +
            # -----------------------------
         | 
| 34 | 
            +
            # Start with docker-compose (development)
         | 
| 35 | 
            +
            compose-up:
         | 
| 36 | 
            +
            	docker compose up -d
         | 
| 37 | 
            +
             | 
| 38 | 
            +
            # Stop and remove containers
         | 
| 39 | 
            +
            compose-down:
         | 
| 40 | 
            +
            	docker compose down
         | 
| 41 | 
            +
             | 
| 42 | 
            +
            # View logs
         | 
| 43 | 
            +
            compose-logs:
         | 
| 44 | 
            +
            	docker compose logs -f
         | 
| 45 | 
            +
             | 
| 46 | 
            +
            # Rebuild containers
         | 
| 47 | 
            +
            compose-build:
         | 
| 48 | 
            +
            	docker compose build
         | 
| 49 |  | 
| 50 | 
            +
            # Restart containers
         | 
| 51 | 
            +
            compose-restart:
         | 
| 52 | 
            +
            	docker compose restart
         | 
| 53 | 
            +
             | 
| 54 | 
            +
            # -----------------------------
         | 
| 55 | 
            +
            # Convenience Commands
         | 
| 56 | 
            +
            # -----------------------------
         | 
| 57 | 
            +
            # Build and run with docker
         | 
| 58 | 
             
            up: build run
         | 
| 59 |  | 
| 60 | 
             
            # Stop and remove container
         | 
| 61 | 
             
            down: stop rm
         | 
| 62 |  | 
| 63 | 
            +
            # Clean everything
         | 
| 64 | 
            +
            clean: stop rm rmi
         | 
| 65 | 
            +
             | 
| 66 | 
            +
            # -----------------------------
         | 
| 67 | 
            +
            # Monitoring Commands
         | 
| 68 | 
            +
            # -----------------------------
         | 
| 69 | 
             
            # Show container logs
         | 
| 70 | 
             
            logs:
         | 
| 71 | 
             
            	docker logs $(CONTAINER_NAME)
         | 
| 72 |  | 
| 73 | 
            +
            # Follow container logs
         | 
| 74 | 
            +
            logs-follow:
         | 
| 75 | 
            +
            	docker logs -f $(CONTAINER_NAME)
         | 
| 76 | 
            +
             | 
| 77 | 
            +
            # Show container status
         | 
| 78 | 
            +
            status:
         | 
| 79 | 
            +
            	docker ps -a | grep $(CONTAINER_NAME)
         | 
| 80 | 
            +
             | 
| 81 | 
             
            # Enter container shell
         | 
| 82 | 
             
            shell:
         | 
| 83 | 
             
            	docker exec -it $(CONTAINER_NAME) /bin/bash
         | 
| 84 |  | 
| 85 | 
            +
            # -----------------------------
         | 
| 86 | 
            +
            # Production Commands
         | 
| 87 | 
            +
            # -----------------------------
         | 
| 88 | 
            +
            # Test nginx configuration (for production use)
         | 
| 89 | 
            +
            nginx-test:
         | 
| 90 | 
            +
            	docker compose run --rm nginx nginx -t
         | 
| 91 |  | 
| 92 | 
            +
            # Start with nginx test (for production use)
         | 
| 93 | 
            +
            compose-up-prod: nginx-test compose-up
         | 
| 94 | 
            +
             | 
| 95 | 
            +
            # -----------------------------
         | 
| 96 | 
            +
            # Security Commands
         | 
| 97 | 
            +
            # -----------------------------
         | 
| 98 | 
            +
            security-check:
         | 
| 99 | 
            +
            	@echo "Checking nginx configuration..."
         | 
| 100 | 
            +
            	docker compose run --rm nginx nginx -t
         | 
| 101 | 
            +
            	@echo "Checking exposed ports..."
         | 
| 102 | 
            +
            	docker compose config | grep -E "ports:|127.0.0.1"
         | 
| 103 |  | 
| 104 | 
            +
            # Ensure all targets are treated as commands, not files
         | 
| 105 | 
            +
            .PHONY: build run stop rm rmi clean up down logs shell \
         | 
| 106 | 
            +
                    compose-up compose-down compose-logs compose-build compose-restart \
         | 
| 107 | 
            +
                    nginx-test status logs-follow compose-up-prod
         | 
    	
        app.py
    CHANGED
    
    | @@ -390,13 +390,7 @@ if __name__ == "__main__": | |
| 390 | 
             
                demo = create_interface()
         | 
| 391 | 
             
                try:
         | 
| 392 | 
             
                    demo.launch(
         | 
| 393 | 
            -
                        server_port=7860,
         | 
| 394 | 
            -
                        auth=None,
         | 
| 395 | 
            -
                        ssl_keyfile=None,
         | 
| 396 | 
            -
                        ssl_certfile=None,
         | 
| 397 | 
             
                        show_error=True,  # Changed to True for debugging
         | 
| 398 | 
            -
                        share=False,
         | 
| 399 | 
            -
                        max_threads=40,
         | 
| 400 | 
             
                    )
         | 
| 401 | 
             
                except Exception as e:
         | 
| 402 | 
             
                    print(f"Error launching Gradio: {e}")  # Add error logging
         | 
|  | |
| 390 | 
             
                demo = create_interface()
         | 
| 391 | 
             
                try:
         | 
| 392 | 
             
                    demo.launch(
         | 
|  | |
|  | |
|  | |
|  | |
| 393 | 
             
                        show_error=True,  # Changed to True for debugging
         | 
|  | |
|  | |
| 394 | 
             
                    )
         | 
| 395 | 
             
                except Exception as e:
         | 
| 396 | 
             
                    print(f"Error launching Gradio: {e}")  # Add error logging
         | 
    	
        docker-compose.yml
    ADDED
    
    | @@ -0,0 +1,34 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version: "3.8"
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            services:
         | 
| 4 | 
            +
              llmdataparser:
         | 
| 5 | 
            +
                build: .
         | 
| 6 | 
            +
                environment:
         | 
| 7 | 
            +
                  - GRADIO_SERVER_PORT=7860
         | 
| 8 | 
            +
                volumes:
         | 
| 9 | 
            +
                  - .:/app
         | 
| 10 | 
            +
                  - huggingface_cache:/app/.cache/huggingface
         | 
| 11 | 
            +
                healthcheck:
         | 
| 12 | 
            +
                  test: ["CMD", "curl", "-f", "http://127.0.0.1:7860"]
         | 
| 13 | 
            +
                  interval: 30s
         | 
| 14 | 
            +
                  timeout: 10s
         | 
| 15 | 
            +
                  retries: 3
         | 
| 16 | 
            +
                networks:
         | 
| 17 | 
            +
                  - internal
         | 
| 18 | 
            +
             | 
| 19 | 
            +
              nginx:
         | 
| 20 | 
            +
                image: nginx:alpine
         | 
| 21 | 
            +
                ports:
         | 
| 22 | 
            +
                  - "80:80"
         | 
| 23 | 
            +
                volumes:
         | 
| 24 | 
            +
                  - ./nginx.conf:/etc/nginx/nginx.conf:ro
         | 
| 25 | 
            +
                depends_on:
         | 
| 26 | 
            +
                  - llmdataparser
         | 
| 27 | 
            +
                networks:
         | 
| 28 | 
            +
                  - internal
         | 
| 29 | 
            +
             | 
| 30 | 
            +
            networks:
         | 
| 31 | 
            +
              internal:
         | 
| 32 | 
            +
             | 
| 33 | 
            +
            volumes:
         | 
| 34 | 
            +
              huggingface_cache:
         | 
    	
        nginx.conf
    ADDED
    
    | @@ -0,0 +1,58 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            events {
         | 
| 2 | 
            +
                worker_connections 1024;
         | 
| 3 | 
            +
            }
         | 
| 4 | 
            +
             | 
| 5 | 
            +
            http {
         | 
| 6 | 
            +
                # Basic security settings
         | 
| 7 | 
            +
                server_tokens off;  # Don't show nginx version
         | 
| 8 | 
            +
                client_max_body_size 10M;  # Limit request size
         | 
| 9 | 
            +
                client_body_timeout 12;
         | 
| 10 | 
            +
                client_header_timeout 12;
         | 
| 11 | 
            +
             | 
| 12 | 
            +
                upstream gradio_app {
         | 
| 13 | 
            +
                    server llmdataparser:7860;
         | 
| 14 | 
            +
                    keepalive 32;
         | 
| 15 | 
            +
                }
         | 
| 16 | 
            +
             | 
| 17 | 
            +
                server {
         | 
| 18 | 
            +
                    listen 80;
         | 
| 19 | 
            +
                    server_name localhost;
         | 
| 20 | 
            +
             | 
| 21 | 
            +
                    # Enhanced security headers
         | 
| 22 | 
            +
                    add_header X-Frame-Options "SAMEORIGIN" always;
         | 
| 23 | 
            +
                    add_header X-Content-Type-Options "nosniff" always;
         | 
| 24 | 
            +
                    add_header X-XSS-Protection "1; mode=block" always;
         | 
| 25 | 
            +
                    add_header Referrer-Policy "strict-origin-always" always;
         | 
| 26 | 
            +
                    add_header Content-Security-Policy "default-src 'self'; script-src 'self' 'unsafe-inline' 'unsafe-eval'; style-src 'self' 'unsafe-inline';" always;
         | 
| 27 | 
            +
                    add_header Strict-Transport-Security "max-age=31536000; includeSubDomains" always;
         | 
| 28 | 
            +
             | 
| 29 | 
            +
                    location / {
         | 
| 30 | 
            +
                        proxy_pass http://gradio_app;
         | 
| 31 | 
            +
                        proxy_set_header Host $host;
         | 
| 32 | 
            +
                        proxy_set_header X-Real-IP $remote_addr;
         | 
| 33 | 
            +
                        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
         | 
| 34 | 
            +
                        proxy_set_header X-Forwarded-Proto $scheme;
         | 
| 35 | 
            +
             | 
| 36 | 
            +
                        # WebSocket support
         | 
| 37 | 
            +
                        proxy_http_version 1.1;
         | 
| 38 | 
            +
                        proxy_set_header Upgrade $http_upgrade;
         | 
| 39 | 
            +
                        proxy_set_header Connection "upgrade";
         | 
| 40 | 
            +
             | 
| 41 | 
            +
                        # Timeouts
         | 
| 42 | 
            +
                        proxy_connect_timeout 60s;
         | 
| 43 | 
            +
                        proxy_send_timeout 60s;
         | 
| 44 | 
            +
                        proxy_read_timeout 60s;
         | 
| 45 | 
            +
             | 
| 46 | 
            +
                        # Security
         | 
| 47 | 
            +
                        proxy_buffering on;
         | 
| 48 | 
            +
                        proxy_buffer_size 8k;
         | 
| 49 | 
            +
                        proxy_buffers 8 8k;
         | 
| 50 | 
            +
                    }
         | 
| 51 | 
            +
             | 
| 52 | 
            +
                    # Deny access to hidden files
         | 
| 53 | 
            +
                    location ~ /\. {
         | 
| 54 | 
            +
                        deny all;
         | 
| 55 | 
            +
                        return 404;
         | 
| 56 | 
            +
                    }
         | 
| 57 | 
            +
                }
         | 
| 58 | 
            +
            }
         | 
