Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
|
@@ -293,13 +293,15 @@ def extract_field(data: Union[str, dict], field_name: str) -> str:
|
|
| 293 |
return str(value).strip()
|
| 294 |
|
| 295 |
|
| 296 |
-
# Load your perfume database once
|
| 297 |
import pandas as pd
|
|
|
|
|
|
|
|
|
|
| 298 |
df = pd.read_excel('perfume_database_cleaned.xlsx')
|
| 299 |
|
| 300 |
def extract_notes_for_comparison(data: Union[str, dict]) -> list[str]:
|
| 301 |
"""
|
| 302 |
-
Extracts all notes from the Olfactory Pyramid section
|
| 303 |
"""
|
| 304 |
if isinstance(data, str):
|
| 305 |
try:
|
|
@@ -329,57 +331,64 @@ def extract_notes_for_comparison(data: Union[str, dict]) -> list[str]:
|
|
| 329 |
|
| 330 |
return notes
|
| 331 |
|
| 332 |
-
|
| 333 |
-
|
| 334 |
-
|
|
|
|
|
|
|
| 335 |
"""
|
| 336 |
-
Finds top N matching perfumes
|
| 337 |
-
Penalizes perfumes with many extra notes.
|
| 338 |
"""
|
| 339 |
try:
|
| 340 |
user_notes = extract_notes_for_comparison(data)
|
| 341 |
except Exception as e:
|
|
|
|
| 342 |
return pd.DataFrame([{
|
| 343 |
'brand': 'N/A',
|
| 344 |
'perfume': 'N/A',
|
| 345 |
-
'matching_notes': f'Error: {e}',
|
| 346 |
'match_count': 0,
|
| 347 |
'purity': 0,
|
| 348 |
'adjusted_score': 0
|
| 349 |
}])
|
| 350 |
|
| 351 |
-
user_notes_clean = [n.strip().lower() for n in user_notes]
|
| 352 |
|
| 353 |
matches = []
|
| 354 |
for _, row in df.iterrows():
|
| 355 |
-
perfume_notes = [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 356 |
matched = []
|
| 357 |
for u_note in user_notes_clean:
|
| 358 |
for p_note in perfume_notes:
|
| 359 |
-
|
|
|
|
| 360 |
matched.append(p_note)
|
| 361 |
|
| 362 |
unique_matched_notes = sorted(set(matched))
|
| 363 |
-
|
| 364 |
-
total_perfume_notes = len(set(perfume_notes))
|
| 365 |
-
purity = match_count / total_perfume_notes if total_perfume_notes else 0
|
| 366 |
-
adjusted_score = match_count * purity if match_count > 0 else 0
|
| 367 |
-
|
| 368 |
-
matches.append({
|
| 369 |
-
'brand': row['brand'],
|
| 370 |
-
'perfume': row['perfume'],
|
| 371 |
-
'matching_notes': ', '.join(unique_matched_notes),
|
| 372 |
-
'match_count': match_count,
|
| 373 |
-
'purity': round(purity, 2),
|
| 374 |
-
'adjusted_score': round(adjusted_score, 2)
|
| 375 |
-
})
|
| 376 |
|
| 377 |
-
|
| 378 |
-
|
| 379 |
-
|
| 380 |
-
|
| 381 |
-
|
| 382 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 383 |
'brand': 'N/A',
|
| 384 |
'perfume': 'N/A',
|
| 385 |
'matching_notes': 'No matches found',
|
|
@@ -388,6 +397,12 @@ def find_best_perfumes_from_json(data: Union[str, dict], top_n: int = 5):
|
|
| 388 |
'adjusted_score': 0
|
| 389 |
}])
|
| 390 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 391 |
return result
|
| 392 |
|
| 393 |
def infer(image_input):
|
|
|
|
| 293 |
return str(value).strip()
|
| 294 |
|
| 295 |
|
|
|
|
| 296 |
import pandas as pd
|
| 297 |
+
from rapidfuzz import fuzz
|
| 298 |
+
|
| 299 |
+
# Load the database once
|
| 300 |
df = pd.read_excel('perfume_database_cleaned.xlsx')
|
| 301 |
|
| 302 |
def extract_notes_for_comparison(data: Union[str, dict]) -> list[str]:
|
| 303 |
"""
|
| 304 |
+
Extracts all notes from the Olfactory Pyramid section.
|
| 305 |
"""
|
| 306 |
if isinstance(data, str):
|
| 307 |
try:
|
|
|
|
| 331 |
|
| 332 |
return notes
|
| 333 |
|
| 334 |
+
def find_best_perfumes_from_json(
|
| 335 |
+
data: Union[str, dict],
|
| 336 |
+
top_n: int = 5,
|
| 337 |
+
threshold: int = 80
|
| 338 |
+
) -> pd.DataFrame:
|
| 339 |
"""
|
| 340 |
+
Finds top N matching perfumes with purity-based similarity.
|
|
|
|
| 341 |
"""
|
| 342 |
try:
|
| 343 |
user_notes = extract_notes_for_comparison(data)
|
| 344 |
except Exception as e:
|
| 345 |
+
# Return fallback if extraction fails
|
| 346 |
return pd.DataFrame([{
|
| 347 |
'brand': 'N/A',
|
| 348 |
'perfume': 'N/A',
|
| 349 |
+
'matching_notes': f'Error: {str(e)}',
|
| 350 |
'match_count': 0,
|
| 351 |
'purity': 0,
|
| 352 |
'adjusted_score': 0
|
| 353 |
}])
|
| 354 |
|
| 355 |
+
user_notes_clean = [n.strip().lower() for n in user_notes if n.strip()]
|
| 356 |
|
| 357 |
matches = []
|
| 358 |
for _, row in df.iterrows():
|
| 359 |
+
perfume_notes = [
|
| 360 |
+
n.strip().lower()
|
| 361 |
+
for n in row['notes'].split(',')
|
| 362 |
+
if n.strip()
|
| 363 |
+
]
|
| 364 |
+
|
| 365 |
matched = []
|
| 366 |
for u_note in user_notes_clean:
|
| 367 |
for p_note in perfume_notes:
|
| 368 |
+
ratio = fuzz.partial_ratio(u_note, p_note)
|
| 369 |
+
if ratio >= threshold:
|
| 370 |
matched.append(p_note)
|
| 371 |
|
| 372 |
unique_matched_notes = sorted(set(matched))
|
| 373 |
+
unique_matched_notes = [n for n in unique_matched_notes if n]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 374 |
|
| 375 |
+
total_notes = len(perfume_notes)
|
| 376 |
+
match_count = len(unique_matched_notes)
|
| 377 |
+
purity = match_count / total_notes if total_notes else 0
|
| 378 |
+
adjusted_score = match_count * purity
|
| 379 |
+
|
| 380 |
+
if match_count > 0:
|
| 381 |
+
matches.append({
|
| 382 |
+
'brand': row['brand'],
|
| 383 |
+
'perfume': row['perfume'],
|
| 384 |
+
'matching_notes': ', '.join(unique_matched_notes),
|
| 385 |
+
'match_count': match_count,
|
| 386 |
+
'purity': round(purity, 2),
|
| 387 |
+
'adjusted_score': round(adjusted_score, 2)
|
| 388 |
+
})
|
| 389 |
+
|
| 390 |
+
if not matches:
|
| 391 |
+
return pd.DataFrame([{
|
| 392 |
'brand': 'N/A',
|
| 393 |
'perfume': 'N/A',
|
| 394 |
'matching_notes': 'No matches found',
|
|
|
|
| 397 |
'adjusted_score': 0
|
| 398 |
}])
|
| 399 |
|
| 400 |
+
result = pd.DataFrame(matches)
|
| 401 |
+
result = result.sort_values(
|
| 402 |
+
by=['adjusted_score', 'match_count'],
|
| 403 |
+
ascending=[False, False]
|
| 404 |
+
).head(top_n).reset_index(drop=True)
|
| 405 |
+
|
| 406 |
return result
|
| 407 |
|
| 408 |
def infer(image_input):
|