Spaces:
				
			
			
	
			
			
		Sleeping
		
	
	
	
			
			
	
	
	
	
		
		
		Sleeping
		
	Commit 
							
							·
						
						cf16f9f
	
1
								Parent(s):
							
							58a3663
								
Move to remote embedder API
Browse files- .DS_Store +0 -0
- .dockerignore +1 -0
- .gitignore +1 -0
- Dockerfile +4 -17
- dw_model.py +0 -30
- requirements.txt +0 -1
- utils/rag/embeddings.py +22 -18
- warmup.py +0 -17
    	
        .DS_Store
    CHANGED
    
    | Binary files a/.DS_Store and b/.DS_Store differ | 
|  | 
    	
        .dockerignore
    CHANGED
    
    | @@ -25,6 +25,7 @@ coverage.xml | |
| 25 | 
             
            *.md
         | 
| 26 | 
             
            !README.md
         | 
| 27 | 
             
            ingestion_pipeline
         | 
|  | |
| 28 | 
             
            exefiles
         | 
| 29 | 
             
            diagram
         | 
| 30 | 
             
            diagram.svg
         | 
|  | |
| 25 | 
             
            *.md
         | 
| 26 | 
             
            !README.md
         | 
| 27 | 
             
            ingestion_pipeline
         | 
| 28 | 
            +
            embedder
         | 
| 29 | 
             
            exefiles
         | 
| 30 | 
             
            diagram
         | 
| 31 | 
             
            diagram.svg
         | 
    	
        .gitignore
    CHANGED
    
    | @@ -3,4 +3,5 @@ diagram | |
| 3 |  | 
| 4 | 
             
            # For ingestion pipeline
         | 
| 5 | 
             
            ingestion_pipeline
         | 
|  | |
| 6 | 
             
            exefiles
         | 
|  | |
| 3 |  | 
| 4 | 
             
            # For ingestion pipeline
         | 
| 5 | 
             
            ingestion_pipeline
         | 
| 6 | 
            +
            embedder 
         | 
| 7 | 
             
            exefiles
         | 
    	
        Dockerfile
    CHANGED
    
    | @@ -23,25 +23,12 @@ COPY . . | |
| 23 | 
             
            # Install Python dependencies
         | 
| 24 | 
             
            RUN pip install --upgrade pip && pip install --no-cache-dir -r requirements.txt
         | 
| 25 |  | 
| 26 | 
            -
            #  | 
| 27 | 
             
            ENV HF_HOME="/home/user/.cache/huggingface"
         | 
| 28 | 
            -
            ENV SENTENCE_TRANSFORMERS_HOME="/home/user/.cache/huggingface/sentence-transformers"
         | 
| 29 | 
            -
            ENV MEDGEMMA_HOME="/home/user/.cache/huggingface/sentence-transformers"
         | 
| 30 |  | 
| 31 | 
            -
            #  | 
| 32 | 
            -
            RUN mkdir -p / | 
| 33 | 
            -
                chown -R user:user / | 
| 34 | 
            -
             | 
| 35 | 
            -
            # Control preloading flags
         | 
| 36 | 
            -
            ENV PRELOAD_TRANSLATORS="0"
         | 
| 37 | 
            -
            ENV EMBEDDING_HALF="0"
         | 
| 38 | 
            -
             | 
| 39 | 
            -
            # Preload embedding model and warmup
         | 
| 40 | 
            -
            RUN test -f /app/dw_model.py && python /app/dw_model.py || true
         | 
| 41 | 
            -
            RUN test -f /app/warmup.py && python /app/warmup.py || true
         | 
| 42 | 
            -
             | 
| 43 | 
            -
            # Ensure ownership stays correct
         | 
| 44 | 
            -
            RUN chown -R user:user /app/model_cache
         | 
| 45 |  | 
| 46 | 
             
            # Expose port for HF Spaces
         | 
| 47 | 
             
            ENV PORT=7860
         | 
|  | |
| 23 | 
             
            # Install Python dependencies
         | 
| 24 | 
             
            RUN pip install --upgrade pip && pip install --no-cache-dir -r requirements.txt
         | 
| 25 |  | 
| 26 | 
            +
            # Optional: general HF cache directory (kept for other models like BLIP)
         | 
| 27 | 
             
            ENV HF_HOME="/home/user/.cache/huggingface"
         | 
|  | |
|  | |
| 28 |  | 
| 29 | 
            +
            # Ensure cache directory ownership
         | 
| 30 | 
            +
            RUN mkdir -p /home/user/.cache/huggingface && \
         | 
| 31 | 
            +
                chown -R user:user /home/user/.cache/huggingface
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 32 |  | 
| 33 | 
             
            # Expose port for HF Spaces
         | 
| 34 | 
             
            ENV PORT=7860
         | 
    	
        dw_model.py
    DELETED
    
    | @@ -1,30 +0,0 @@ | |
| 1 | 
            -
            # dw_model.py
         | 
| 2 | 
            -
            ### --- A. transformer and embedder ---
         | 
| 3 | 
            -
            import os
         | 
| 4 | 
            -
            import shutil
         | 
| 5 | 
            -
            from huggingface_hub import snapshot_download
         | 
| 6 | 
            -
             | 
| 7 | 
            -
            # Set up paths
         | 
| 8 | 
            -
            MODEL_REPO = "sentence-transformers/all-MiniLM-L6-v2"
         | 
| 9 | 
            -
            MODEL_CACHE_DIR = "/app/model_cache"
         | 
| 10 | 
            -
            HF_CACHE_DIR = os.getenv("HF_HOME", "/home/user/.cache/huggingface")
         | 
| 11 | 
            -
             | 
| 12 | 
            -
            print("⏳ Downloading the SentenceTransformer model...")
         | 
| 13 | 
            -
            # Download directly into /app/model_cache to avoid duplicating files from HF cache
         | 
| 14 | 
            -
            model_path = snapshot_download(
         | 
| 15 | 
            -
                repo_id=MODEL_REPO,
         | 
| 16 | 
            -
                cache_dir=HF_CACHE_DIR,              # Store HF cache in user cache dir
         | 
| 17 | 
            -
                local_dir=MODEL_CACHE_DIR,           # Place usable model here
         | 
| 18 | 
            -
                local_dir_use_symlinks=False         # Copy files into local_dir (no symlinks)
         | 
| 19 | 
            -
            )
         | 
| 20 | 
            -
             | 
| 21 | 
            -
            print("Model path: ", model_path)
         | 
| 22 | 
            -
            if not os.path.exists(MODEL_CACHE_DIR):
         | 
| 23 | 
            -
                os.makedirs(MODEL_CACHE_DIR)
         | 
| 24 | 
            -
             | 
| 25 | 
            -
            # Verify structure after moving
         | 
| 26 | 
            -
            print("\n📂 LLM Model Structure (Build Level):")
         | 
| 27 | 
            -
            for root, dirs, files in os.walk(MODEL_CACHE_DIR):
         | 
| 28 | 
            -
                print(f"📁 {root}/")
         | 
| 29 | 
            -
                for file in files:
         | 
| 30 | 
            -
                    print(f"  📄 {file}")
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
    	
        requirements.txt
    CHANGED
    
    | @@ -8,7 +8,6 @@ PyMuPDF==1.24.10 | |
| 8 | 
             
            pillow==10.4.0
         | 
| 9 | 
             
            transformers==4.44.2
         | 
| 10 | 
             
            torch==2.2.2
         | 
| 11 | 
            -
            sentence-transformers==3.1.1
         | 
| 12 | 
             
            sumy==0.11.0
         | 
| 13 | 
             
            numpy==1.26.4
         | 
| 14 | 
             
            reportlab==4.0.9
         | 
|  | |
| 8 | 
             
            pillow==10.4.0
         | 
| 9 | 
             
            transformers==4.44.2
         | 
| 10 | 
             
            torch==2.2.2
         | 
|  | |
| 11 | 
             
            sumy==0.11.0
         | 
| 12 | 
             
            numpy==1.26.4
         | 
| 13 | 
             
            reportlab==4.0.9
         | 
    	
        utils/rag/embeddings.py
    CHANGED
    
    | @@ -2,32 +2,36 @@ | |
| 2 | 
             
            import os
         | 
| 3 | 
             
            from typing import List
         | 
| 4 | 
             
            import numpy as np
         | 
|  | |
| 5 | 
             
            from ..logger import get_logger
         | 
| 6 |  | 
| 7 | 
            -
            try:
         | 
| 8 | 
            -
                from sentence_transformers import SentenceTransformer
         | 
| 9 | 
            -
            except Exception:
         | 
| 10 | 
            -
                SentenceTransformer = None
         | 
| 11 | 
            -
             | 
| 12 |  | 
| 13 | 
             
            logger = get_logger("EMBED", __name__)
         | 
| 14 |  | 
| 15 |  | 
| 16 | 
             
            class EmbeddingClient:
         | 
| 17 | 
            -
                def __init__(self, model_name: str = "sentence-transformers/all-MiniLM-L6-v2"):
         | 
| 18 | 
             
                    self.model_name = model_name
         | 
| 19 | 
            -
                    self. | 
| 20 | 
            -
             | 
| 21 | 
            -
                def _lazy(self):
         | 
| 22 | 
            -
                    if self.model is None and SentenceTransformer is not None:
         | 
| 23 | 
            -
                        logger.info(f"Loading embedding model: {self.model_name}")
         | 
| 24 | 
            -
                        self.model = SentenceTransformer(self.model_name)
         | 
| 25 |  | 
| 26 | 
             
                def embed(self, texts: List[str]) -> List[list]:
         | 
| 27 | 
            -
                     | 
| 28 | 
            -
             | 
| 29 | 
            -
             | 
| 30 | 
            -
             | 
|  | |
| 31 | 
             
                        return [list(np.random.default_rng(hash(t) % (2**32)).normal(size=384).astype("float32")) for t in texts]
         | 
| 32 | 
            -
             | 
| 33 | 
            -
                     | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 2 | 
             
            import os
         | 
| 3 | 
             
            from typing import List
         | 
| 4 | 
             
            import numpy as np
         | 
| 5 | 
            +
            import httpx
         | 
| 6 | 
             
            from ..logger import get_logger
         | 
| 7 |  | 
|  | |
|  | |
|  | |
|  | |
|  | |
| 8 |  | 
| 9 | 
             
            logger = get_logger("EMBED", __name__)
         | 
| 10 |  | 
| 11 |  | 
| 12 | 
             
            class EmbeddingClient:
         | 
| 13 | 
            +
                def __init__(self, model_name: str = "sentence-transformers/all-MiniLM-L6-v2", api_url: str | None = None):
         | 
| 14 | 
             
                    self.model_name = model_name
         | 
| 15 | 
            +
                    self.api_url = api_url or os.getenv("EMBEDDER_URL")
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
| 16 |  | 
| 17 | 
             
                def embed(self, texts: List[str]) -> List[list]:
         | 
| 18 | 
            +
                    if not texts:
         | 
| 19 | 
            +
                        return []
         | 
| 20 | 
            +
             | 
| 21 | 
            +
                    if not self.api_url:
         | 
| 22 | 
            +
                        logger.warning("EMBEDDER_URL not set; using random fallback embeddings.")
         | 
| 23 | 
             
                        return [list(np.random.default_rng(hash(t) % (2**32)).normal(size=384).astype("float32")) for t in texts]
         | 
| 24 | 
            +
             | 
| 25 | 
            +
                    url = self.api_url.rstrip("/") + "/embed"
         | 
| 26 | 
            +
                    try:
         | 
| 27 | 
            +
                        with httpx.Client(timeout=30.0) as client:
         | 
| 28 | 
            +
                            resp = client.post(url, json={"texts": texts})
         | 
| 29 | 
            +
                            resp.raise_for_status()
         | 
| 30 | 
            +
                            data = resp.json()
         | 
| 31 | 
            +
                            vectors = data.get("vectors")
         | 
| 32 | 
            +
                            if not isinstance(vectors, list):
         | 
| 33 | 
            +
                                raise ValueError("Invalid response: 'vectors' field missing or not a list")
         | 
| 34 | 
            +
                            return vectors
         | 
| 35 | 
            +
                    except Exception as e:
         | 
| 36 | 
            +
                        logger.error(f"Embedding API call failed: {e}; falling back to random embeddings.")
         | 
| 37 | 
            +
                        return [list(np.random.default_rng(hash(t) % (2**32)).normal(size=384).astype("float32")) for t in texts]
         | 
    	
        warmup.py
    DELETED
    
    | @@ -1,17 +0,0 @@ | |
| 1 | 
            -
            from sentence_transformers import SentenceTransformer
         | 
| 2 | 
            -
            import torch
         | 
| 3 | 
            -
            import os
         | 
| 4 | 
            -
             | 
| 5 | 
            -
            print("🚀 Warming up model...")
         | 
| 6 | 
            -
            embedding_model = SentenceTransformer("/app/model_cache", device="cpu")
         | 
| 7 | 
            -
             | 
| 8 | 
            -
            # Some CPU backends on HF Spaces fail on .half(); make it configurable
         | 
| 9 | 
            -
            USE_HALF = os.getenv("EMBEDDING_HALF", "1") == "1"
         | 
| 10 | 
            -
            try:
         | 
| 11 | 
            -
                if USE_HALF and torch.cuda.is_available():
         | 
| 12 | 
            -
                    embedding_model = embedding_model.half()
         | 
| 13 | 
            -
            except Exception as e:
         | 
| 14 | 
            -
                print(f"⚠️ Skipping half precision due to: {e}")
         | 
| 15 | 
            -
             | 
| 16 | 
            -
            embedding_model.to(torch.device("cpu"))
         | 
| 17 | 
            -
            print("✅ Model warm-up complete!")
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 

