Spaces:

ejjocko
/

dr-q-bot-multimodal

Sleeping

App Files Files Community

jocko commited on Aug 10

Commit

b9bdf95

1 Parent(s): c8b7285

fix image similarity detection

Browse files

Files changed (1) hide show

src/streamlit_app.py +15 -15

src/streamlit_app.py CHANGED Viewed

@@ -41,19 +41,19 @@ os.environ["OPIK_WORKSPACE"] = os.getenv("OPIK_WORKSPACE")
 # ========== 📥 Load Models ==========
 @st.cache_resource(show_spinner=False)
 def load_models():
-    clip_model = CLIPModel.from_pretrained(
         "openai/clip-vit-base-patch32",
         cache_dir=os.environ["TRANSFORMERS_CACHE"]
     )
-    clip_processor = CLIPProcessor.from_pretrained(
         "openai/clip-vit-base-patch32",
         cache_dir=os.environ["TRANSFORMERS_CACHE"]
     )
-    text_model = SentenceTransformer(
         "all-MiniLM-L6-v2",
         cache_folder=os.environ["SENTENCE_TRANSFORMERS_HOME"]
     )
-    return clip_model, clip_processor, text_model
 clip_model, clip_processor, text_model = load_models()
@@ -76,9 +76,9 @@ def embed_dataset_images(_dataset):
     for item in _dataset:
         # Load image from URL/path or raw bytes - adapt this if needed
         img = item["image"]
-        inputs = clip_processor(images=img, return_tensors="pt")
         with torch.no_grad():
-            feat = clip_model.get_image_features(**inputs)
         feat /= feat.norm(p=2, dim=-1, keepdim=True)
         features.append(feat.cpu())
     return torch.cat(features, dim=0)
@@ -113,27 +113,27 @@ combined_texts = prepare_combined_texts(data)
 def embed_dataset_texts(_texts):
     return text_model.encode(_texts, convert_to_tensor=True)
-def embed_query_text(query):
-    return text_model.encode([query], convert_to_tensor=True)[0]
 @track
-def get_chat_completion_openai(client, prompt: str):
-    return client.chat.completions.create(
         model="gpt-4o",  # or "gpt-4" if you need the older GPT-4
-        messages=[{"role": "user", "content": prompt}],
         temperature=0.5,
         max_tokens=150
     )
 @track
-def get_similar_prompt(query):
     text_embeddings = embed_dataset_texts(combined_texts)  # cached
-    query_embedding = embed_query_text(query)  # recalculated each time
     cos_scores = util.pytorch_cos_sim(query_embedding, text_embeddings)[0]
     top_result = torch.topk(cos_scores, k=1)
-    idx = top_result.indices[0].item()
-    return data[idx]
 # Pick which text column to use

 # ========== 📥 Load Models ==========
 @st.cache_resource(show_spinner=False)
 def load_models():
+    _clip_model = CLIPModel.from_pretrained(
         "openai/clip-vit-base-patch32",
         cache_dir=os.environ["TRANSFORMERS_CACHE"]
     )
+    _clip_processor = CLIPProcessor.from_pretrained(
         "openai/clip-vit-base-patch32",
         cache_dir=os.environ["TRANSFORMERS_CACHE"]
     )
+    _text_model = SentenceTransformer(
         "all-MiniLM-L6-v2",
         cache_folder=os.environ["SENTENCE_TRANSFORMERS_HOME"]
     )
+    return _clip_model, _clip_processor, _text_model
 clip_model, clip_processor, text_model = load_models()
     for item in _dataset:
         # Load image from URL/path or raw bytes - adapt this if needed
         img = item["image"]
+        inputs_img = clip_processor(images=img, return_tensors="pt")
         with torch.no_grad():
+            feat = clip_model.get_image_features(**inputs_img)
         feat /= feat.norm(p=2, dim=-1, keepdim=True)
         features.append(feat.cpu())
     return torch.cat(features, dim=0)
 def embed_dataset_texts(_texts):
     return text_model.encode(_texts, convert_to_tensor=True)
+def embed_query_text(_query):
+    return text_model.encode([_query], convert_to_tensor=True)[0]
 @track
+def get_chat_completion_openai(_client, _prompt: str):
+    return _client.chat.completions.create(
         model="gpt-4o",  # or "gpt-4" if you need the older GPT-4
+        messages=[{"role": "user", "content": _prompt}],
         temperature=0.5,
         max_tokens=150
     )
 @track
+def get_similar_prompt(_query):
     text_embeddings = embed_dataset_texts(combined_texts)  # cached
+    query_embedding = embed_query_text(_query)  # recalculated each time
     cos_scores = util.pytorch_cos_sim(query_embedding, text_embeddings)[0]
     top_result = torch.topk(cos_scores, k=1)
+    _idx = top_result.indices[0].item()
+    return data[_idx]
 # Pick which text column to use