LiamKhoaLe commited on
Commit
cf16f9f
·
1 Parent(s): 58a3663

Move to remote embedder API

Browse files
Files changed (8) hide show
  1. .DS_Store +0 -0
  2. .dockerignore +1 -0
  3. .gitignore +1 -0
  4. Dockerfile +4 -17
  5. dw_model.py +0 -30
  6. requirements.txt +0 -1
  7. utils/rag/embeddings.py +22 -18
  8. warmup.py +0 -17
.DS_Store CHANGED
Binary files a/.DS_Store and b/.DS_Store differ
 
.dockerignore CHANGED
@@ -25,6 +25,7 @@ coverage.xml
25
  *.md
26
  !README.md
27
  ingestion_pipeline
 
28
  exefiles
29
  diagram
30
  diagram.svg
 
25
  *.md
26
  !README.md
27
  ingestion_pipeline
28
+ embedder
29
  exefiles
30
  diagram
31
  diagram.svg
.gitignore CHANGED
@@ -3,4 +3,5 @@ diagram
3
 
4
  # For ingestion pipeline
5
  ingestion_pipeline
 
6
  exefiles
 
3
 
4
  # For ingestion pipeline
5
  ingestion_pipeline
6
+ embedder
7
  exefiles
Dockerfile CHANGED
@@ -23,25 +23,12 @@ COPY . .
23
  # Install Python dependencies
24
  RUN pip install --upgrade pip && pip install --no-cache-dir -r requirements.txt
25
 
26
- # Hugging Face cache directories
27
  ENV HF_HOME="/home/user/.cache/huggingface"
28
- ENV SENTENCE_TRANSFORMERS_HOME="/home/user/.cache/huggingface/sentence-transformers"
29
- ENV MEDGEMMA_HOME="/home/user/.cache/huggingface/sentence-transformers"
30
 
31
- # Create cache directories and set permissions
32
- RUN mkdir -p /app/model_cache /home/user/.cache/huggingface/sentence-transformers && \
33
- chown -R user:user /app/model_cache /home/user/.cache/huggingface
34
-
35
- # Control preloading flags
36
- ENV PRELOAD_TRANSLATORS="0"
37
- ENV EMBEDDING_HALF="0"
38
-
39
- # Preload embedding model and warmup
40
- RUN test -f /app/dw_model.py && python /app/dw_model.py || true
41
- RUN test -f /app/warmup.py && python /app/warmup.py || true
42
-
43
- # Ensure ownership stays correct
44
- RUN chown -R user:user /app/model_cache
45
 
46
  # Expose port for HF Spaces
47
  ENV PORT=7860
 
23
  # Install Python dependencies
24
  RUN pip install --upgrade pip && pip install --no-cache-dir -r requirements.txt
25
 
26
+ # Optional: general HF cache directory (kept for other models like BLIP)
27
  ENV HF_HOME="/home/user/.cache/huggingface"
 
 
28
 
29
+ # Ensure cache directory ownership
30
+ RUN mkdir -p /home/user/.cache/huggingface && \
31
+ chown -R user:user /home/user/.cache/huggingface
 
 
 
 
 
 
 
 
 
 
 
32
 
33
  # Expose port for HF Spaces
34
  ENV PORT=7860
dw_model.py DELETED
@@ -1,30 +0,0 @@
1
- # dw_model.py
2
- ### --- A. transformer and embedder ---
3
- import os
4
- import shutil
5
- from huggingface_hub import snapshot_download
6
-
7
- # Set up paths
8
- MODEL_REPO = "sentence-transformers/all-MiniLM-L6-v2"
9
- MODEL_CACHE_DIR = "/app/model_cache"
10
- HF_CACHE_DIR = os.getenv("HF_HOME", "/home/user/.cache/huggingface")
11
-
12
- print("⏳ Downloading the SentenceTransformer model...")
13
- # Download directly into /app/model_cache to avoid duplicating files from HF cache
14
- model_path = snapshot_download(
15
- repo_id=MODEL_REPO,
16
- cache_dir=HF_CACHE_DIR, # Store HF cache in user cache dir
17
- local_dir=MODEL_CACHE_DIR, # Place usable model here
18
- local_dir_use_symlinks=False # Copy files into local_dir (no symlinks)
19
- )
20
-
21
- print("Model path: ", model_path)
22
- if not os.path.exists(MODEL_CACHE_DIR):
23
- os.makedirs(MODEL_CACHE_DIR)
24
-
25
- # Verify structure after moving
26
- print("\n📂 LLM Model Structure (Build Level):")
27
- for root, dirs, files in os.walk(MODEL_CACHE_DIR):
28
- print(f"📁 {root}/")
29
- for file in files:
30
- print(f" 📄 {file}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
requirements.txt CHANGED
@@ -8,7 +8,6 @@ PyMuPDF==1.24.10
8
  pillow==10.4.0
9
  transformers==4.44.2
10
  torch==2.2.2
11
- sentence-transformers==3.1.1
12
  sumy==0.11.0
13
  numpy==1.26.4
14
  reportlab==4.0.9
 
8
  pillow==10.4.0
9
  transformers==4.44.2
10
  torch==2.2.2
 
11
  sumy==0.11.0
12
  numpy==1.26.4
13
  reportlab==4.0.9
utils/rag/embeddings.py CHANGED
@@ -2,32 +2,36 @@
2
  import os
3
  from typing import List
4
  import numpy as np
 
5
  from ..logger import get_logger
6
 
7
- try:
8
- from sentence_transformers import SentenceTransformer
9
- except Exception:
10
- SentenceTransformer = None
11
-
12
 
13
  logger = get_logger("EMBED", __name__)
14
 
15
 
16
  class EmbeddingClient:
17
- def __init__(self, model_name: str = "sentence-transformers/all-MiniLM-L6-v2"):
18
  self.model_name = model_name
19
- self.model = None
20
-
21
- def _lazy(self):
22
- if self.model is None and SentenceTransformer is not None:
23
- logger.info(f"Loading embedding model: {self.model_name}")
24
- self.model = SentenceTransformer(self.model_name)
25
 
26
  def embed(self, texts: List[str]) -> List[list]:
27
- self._lazy()
28
- if self.model is None:
29
- # Fallback: extremely naive hashing -> NOT for production, but keeps code running without deps
30
- logger.warning("SentenceTransformer unavailable; using random fallback embeddings.")
 
31
  return [list(np.random.default_rng(hash(t) % (2**32)).normal(size=384).astype("float32")) for t in texts]
32
- vecs = self.model.encode(texts, show_progress_bar=False, normalize_embeddings=True)
33
- return [v.tolist() for v in vecs]
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  import os
3
  from typing import List
4
  import numpy as np
5
+ import httpx
6
  from ..logger import get_logger
7
 
 
 
 
 
 
8
 
9
  logger = get_logger("EMBED", __name__)
10
 
11
 
12
  class EmbeddingClient:
13
+ def __init__(self, model_name: str = "sentence-transformers/all-MiniLM-L6-v2", api_url: str | None = None):
14
  self.model_name = model_name
15
+ self.api_url = api_url or os.getenv("EMBEDDER_URL")
 
 
 
 
 
16
 
17
  def embed(self, texts: List[str]) -> List[list]:
18
+ if not texts:
19
+ return []
20
+
21
+ if not self.api_url:
22
+ logger.warning("EMBEDDER_URL not set; using random fallback embeddings.")
23
  return [list(np.random.default_rng(hash(t) % (2**32)).normal(size=384).astype("float32")) for t in texts]
24
+
25
+ url = self.api_url.rstrip("/") + "/embed"
26
+ try:
27
+ with httpx.Client(timeout=30.0) as client:
28
+ resp = client.post(url, json={"texts": texts})
29
+ resp.raise_for_status()
30
+ data = resp.json()
31
+ vectors = data.get("vectors")
32
+ if not isinstance(vectors, list):
33
+ raise ValueError("Invalid response: 'vectors' field missing or not a list")
34
+ return vectors
35
+ except Exception as e:
36
+ logger.error(f"Embedding API call failed: {e}; falling back to random embeddings.")
37
+ return [list(np.random.default_rng(hash(t) % (2**32)).normal(size=384).astype("float32")) for t in texts]
warmup.py DELETED
@@ -1,17 +0,0 @@
1
- from sentence_transformers import SentenceTransformer
2
- import torch
3
- import os
4
-
5
- print("🚀 Warming up model...")
6
- embedding_model = SentenceTransformer("/app/model_cache", device="cpu")
7
-
8
- # Some CPU backends on HF Spaces fail on .half(); make it configurable
9
- USE_HALF = os.getenv("EMBEDDING_HALF", "1") == "1"
10
- try:
11
- if USE_HALF and torch.cuda.is_available():
12
- embedding_model = embedding_model.half()
13
- except Exception as e:
14
- print(f"⚠️ Skipping half precision due to: {e}")
15
-
16
- embedding_model.to(torch.device("cpu"))
17
- print("✅ Model warm-up complete!")