Spaces:

fffiloni
/

Image-to-Fragrance

Running on Zero

App Files Files Community

fffiloni commited on Jul 4

Commit

9f95f2e

verified ·

1 Parent(s): 11e6a61

Update app.py

Browse files

Files changed (1) hide show

app.py +45 -30

app.py CHANGED Viewed

@@ -293,13 +293,15 @@ def extract_field(data: Union[str, dict], field_name: str) -> str:
     return str(value).strip()
-# Load your perfume database once
 import pandas as pd
 df = pd.read_excel('perfume_database_cleaned.xlsx')
 def extract_notes_for_comparison(data: Union[str, dict]) -> list[str]:
     """
-    Extracts all notes from the Olfactory Pyramid section of a JSON string or dict.
     """
     if isinstance(data, str):
         try:
@@ -329,57 +331,64 @@ def extract_notes_for_comparison(data: Union[str, dict]) -> list[str]:
     return notes
-from rapidfuzz import fuzz
-def find_best_perfumes_from_json(data: Union[str, dict], top_n: int = 5):
     """
-    Finds top N matching perfumes using loose matching on notes.
-    Penalizes perfumes with many extra notes.
     """
     try:
         user_notes = extract_notes_for_comparison(data)
     except Exception as e:
         return pd.DataFrame([{
             'brand': 'N/A',
             'perfume': 'N/A',
-            'matching_notes': f'Error: {e}',
             'match_count': 0,
             'purity': 0,
             'adjusted_score': 0
         }])
-    user_notes_clean = [n.strip().lower() for n in user_notes]
     matches = []
     for _, row in df.iterrows():
-        perfume_notes = [n.strip().lower() for n in row['notes'].split(',')]
         matched = []
         for u_note in user_notes_clean:
             for p_note in perfume_notes:
-                if u_note in p_note or p_note in u_note:
                     matched.append(p_note)
         unique_matched_notes = sorted(set(matched))
-        match_count = len(unique_matched_notes)
-        total_perfume_notes = len(set(perfume_notes))
-        purity = match_count / total_perfume_notes if total_perfume_notes else 0
-        adjusted_score = match_count * purity if match_count > 0 else 0
-        matches.append({
-            'brand': row['brand'],
-            'perfume': row['perfume'],
-            'matching_notes': ', '.join(unique_matched_notes),
-            'match_count': match_count,
-            'purity': round(purity, 2),
-            'adjusted_score': round(adjusted_score, 2)
-        })
-    result = pd.DataFrame(matches)
-    result = result[result['match_count'] > 0]
-    result = result.sort_values(by='adjusted_score', ascending=False).head(top_n).reset_index(drop=True)
-    if result.empty:
-        result = pd.DataFrame([{
             'brand': 'N/A',
             'perfume': 'N/A',
             'matching_notes': 'No matches found',
@@ -388,6 +397,12 @@ def find_best_perfumes_from_json(data: Union[str, dict], top_n: int = 5):
             'adjusted_score': 0
         }])
     return result
 def infer(image_input):

     return str(value).strip()
 import pandas as pd
+from rapidfuzz import fuzz
+# Load the database once
 df = pd.read_excel('perfume_database_cleaned.xlsx')
 def extract_notes_for_comparison(data: Union[str, dict]) -> list[str]:
     """
+    Extracts all notes from the Olfactory Pyramid section.
     """
     if isinstance(data, str):
         try:
     return notes
+def find_best_perfumes_from_json(
+    data: Union[str, dict],
+    top_n: int = 5,
+    threshold: int = 80
+) -> pd.DataFrame:
     """
+    Finds top N matching perfumes with purity-based similarity.
     """
     try:
         user_notes = extract_notes_for_comparison(data)
     except Exception as e:
+        # Return fallback if extraction fails
         return pd.DataFrame([{
             'brand': 'N/A',
             'perfume': 'N/A',
+            'matching_notes': f'Error: {str(e)}',
             'match_count': 0,
             'purity': 0,
             'adjusted_score': 0
         }])
+    user_notes_clean = [n.strip().lower() for n in user_notes if n.strip()]
     matches = []
     for _, row in df.iterrows():
+        perfume_notes = [
+            n.strip().lower()
+            for n in row['notes'].split(',')
+            if n.strip()
+        ]
         matched = []
         for u_note in user_notes_clean:
             for p_note in perfume_notes:
+                ratio = fuzz.partial_ratio(u_note, p_note)
+                if ratio >= threshold:
                     matched.append(p_note)
         unique_matched_notes = sorted(set(matched))
+        unique_matched_notes = [n for n in unique_matched_notes if n]
+        total_notes = len(perfume_notes)
+        match_count = len(unique_matched_notes)
+        purity = match_count / total_notes if total_notes else 0
+        adjusted_score = match_count * purity
+        if match_count > 0:
+            matches.append({
+                'brand': row['brand'],
+                'perfume': row['perfume'],
+                'matching_notes': ', '.join(unique_matched_notes),
+                'match_count': match_count,
+                'purity': round(purity, 2),
+                'adjusted_score': round(adjusted_score, 2)
+            })
+    if not matches:
+        return pd.DataFrame([{
             'brand': 'N/A',
             'perfume': 'N/A',
             'matching_notes': 'No matches found',
             'adjusted_score': 0
         }])
+    result = pd.DataFrame(matches)
+    result = result.sort_values(
+        by=['adjusted_score', 'match_count'],
+        ascending=[False, False]
+    ).head(top_n).reset_index(drop=True)
     return result
 def infer(image_input):