fffiloni commited on
Commit
bb4631d
·
verified ·
1 Parent(s): 3d97999

try to handle all notes cases

Browse files
Files changed (1) hide show
  1. app.py +41 -28
app.py CHANGED
@@ -78,6 +78,46 @@ Here is the scene description to analyze:
78
  cleaned_text = re.sub(pattern, '', output_text, flags=re.DOTALL)
79
  return cleaned_text
80
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
  def parse_perfume_description(text: str) -> dict:
82
  # Perfume Name
83
  perfume_name = re.search(r'Perfume Name:\s*(.+)', text).group(1).strip()
@@ -113,34 +153,7 @@ def parse_perfume_description(text: str) -> dict:
113
  image_desc = image_desc_match.group(1).strip() if image_desc_match else ""
114
 
115
  # 🗂️ Smart bullet extractor
116
- def extract_notes(text, section_name):
117
- # Try block of bullets
118
- pattern_block = rf'{section_name}:\s*\n((?:\*.*(?:\n|$))+)'
119
- match_block = re.search(pattern_block, text, re.MULTILINE)
120
- if match_block:
121
- notes_text = match_block.group(1)
122
- notes = []
123
- for line in notes_text.strip().splitlines():
124
- bullet = line.strip().lstrip('*').strip()
125
- if ':' in bullet:
126
- note, desc = bullet.split(':', 1)
127
- notes.append({'note': note.strip(), 'description': desc.strip()})
128
- else:
129
- notes.append({'note': bullet, 'description': ''})
130
- return notes
131
-
132
- # Try inline bullet style: * Section: item1, item2, item3
133
- pattern_inline = rf'\* {section_name}:\s*(.+)'
134
- match_inline = re.search(pattern_inline, text)
135
- if match_inline:
136
- notes_raw = match_inline.group(1).strip()
137
- notes = []
138
- for item in notes_raw.split(','):
139
- notes.append({'note': item.strip(), 'description': ''})
140
- return notes
141
-
142
- return []
143
-
144
  top_notes = extract_notes(text, 'Top Notes')
145
  heart_notes = extract_notes(text, 'Heart Notes')
146
  base_notes = extract_notes(text, 'Base Notes')
 
78
  cleaned_text = re.sub(pattern, '', output_text, flags=re.DOTALL)
79
  return cleaned_text
80
 
81
+ def extract_notes(text, section_name):
82
+ import re
83
+
84
+ # 1. Try block of bullets
85
+ pattern_block = rf'{section_name}:\s*\n((?:\*.*(?:\n|$))+)'
86
+ match_block = re.search(pattern_block, text, re.MULTILINE)
87
+ if match_block:
88
+ notes_text = match_block.group(1)
89
+ notes = []
90
+ for line in notes_text.strip().splitlines():
91
+ bullet = line.strip().lstrip('*').strip()
92
+ if ':' in bullet:
93
+ note, desc = bullet.split(':', 1)
94
+ notes.append({'note': note.strip(), 'description': desc.strip()})
95
+ else:
96
+ notes.append({'note': bullet, 'description': ''})
97
+ return notes
98
+
99
+ # 2. Try inline bullet style: * Section: item1, item2, item3
100
+ pattern_inline = rf'\* {section_name}:\s*(.+)'
101
+ match_inline = re.search(pattern_inline, text)
102
+ if match_inline:
103
+ notes_raw = match_inline.group(1).strip()
104
+ notes = []
105
+ for item in notes_raw.split(','):
106
+ notes.append({'note': item.strip(), 'description': ''})
107
+ return notes
108
+
109
+ # 3. Try plain line style: Section: item1, item2, item3 (no bullet)
110
+ pattern_line = rf'^{section_name}:\s*(.+)$'
111
+ match_line = re.search(pattern_line, text, re.MULTILINE)
112
+ if match_line:
113
+ notes_raw = match_line.group(1).strip()
114
+ notes = []
115
+ for item in notes_raw.split(','):
116
+ notes.append({'note': item.strip(), 'description': ''})
117
+ return notes
118
+
119
+ return []
120
+
121
  def parse_perfume_description(text: str) -> dict:
122
  # Perfume Name
123
  perfume_name = re.search(r'Perfume Name:\s*(.+)', text).group(1).strip()
 
153
  image_desc = image_desc_match.group(1).strip() if image_desc_match else ""
154
 
155
  # 🗂️ Smart bullet extractor
156
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
157
  top_notes = extract_notes(text, 'Top Notes')
158
  heart_notes = extract_notes(text, 'Heart Notes')
159
  base_notes = extract_notes(text, 'Base Notes')