buildinqq commited on
Commit
e309bdd
·
verified ·
1 Parent(s): 372c744

Update app.py

Browse files

clean response (remove special characters from criteria response)

Files changed (1) hide show
  1. app.py +25 -15
app.py CHANGED
@@ -67,10 +67,13 @@ storage_context = StorageContext.from_defaults(persist_dir="malteos_scincl__CAR_
67
  # load index
68
  index_persisted = load_index_from_storage(storage_context, index_id="vector_index")
69
 
 
 
70
  async def clean_trial_text(text):
71
- """Removes intro text from references if present."""
72
- sections, cleaned_sections, in_references = text.split('\n'), [], False
73
- has_intro_text, found_numbers, reference_title_index = False, False, -1
 
74
 
75
  for i, line in enumerate(sections):
76
  if re.match(r'Reference Papers\s*$', line, re.IGNORECASE):
@@ -81,20 +84,26 @@ async def clean_trial_text(text):
81
  if in_references and not found_numbers:
82
  if re.match(r'\d+\.', line.strip()):
83
  found_numbers = True
84
- else:
85
- if line.strip():
86
- has_intro_text = True
87
  cleaned_sections.append(line)
88
- continue
89
 
90
- if not in_references:
91
- cleaned_sections.append(line)
92
 
93
- if in_references and not has_intro_text and reference_title_index != -1:
94
  cleaned_sections.pop(reference_title_index)
95
 
96
  return '\n'.join(cleaned_sections).strip()
97
 
 
 
 
 
 
 
 
 
 
98
  async def extract_criteria(text):
99
  """Extracts inclusion and exclusion criteria from text."""
100
  patterns = {
@@ -201,11 +210,12 @@ async def run_function_on_text(top_k, study_obj, study_type, phase, purpose, all
201
  .
202
  """
203
  )
204
-
205
- if query_response.response != "Empty Response":
 
206
  # Extract and format references
207
  pattern = r'Reference Papers\s*(.+)$'
208
- match = re.search(pattern, query_response.response, re.DOTALL | re.IGNORECASE)
209
  ext_ref = match.group(1) if match else ""
210
  split_ref = re.split(r'\n*\d+\.\s+', ext_ref)[1:]
211
 
@@ -230,9 +240,9 @@ async def run_function_on_text(top_k, study_obj, study_type, phase, purpose, all
230
  ])
231
 
232
  else:
233
- query_response.response, formatted_ref = "Empty Response", []
234
 
235
- return query_response.response, formatted_ref
236
 
237
  # Place holder
238
  place_holder = f"""Study Objectives
 
67
  # load index
68
  index_persisted = load_index_from_storage(storage_context, index_id="vector_index")
69
 
70
+ import re
71
+
72
  async def clean_trial_text(text):
73
+ """Cleans text by removing intro lines from the 'Reference Papers' section if present."""
74
+ sections = text.split('\n')
75
+ cleaned_sections = []
76
+ in_references, found_numbers, reference_title_index = False, False, -1
77
 
78
  for i, line in enumerate(sections):
79
  if re.match(r'Reference Papers\s*$', line, re.IGNORECASE):
 
84
  if in_references and not found_numbers:
85
  if re.match(r'\d+\.', line.strip()):
86
  found_numbers = True
87
+ elif line.strip():
 
 
88
  cleaned_sections.append(line)
89
+ continue
90
 
91
+ cleaned_sections.append(line)
 
92
 
93
+ if in_references and reference_title_index != -1 and not found_numbers:
94
  cleaned_sections.pop(reference_title_index)
95
 
96
  return '\n'.join(cleaned_sections).strip()
97
 
98
+ async def process_criteria(text):
99
+ """Processes the query response text and cleans it."""
100
+ text = re.sub(r'#+\s*', '', text)
101
+ text = re.sub(r'\*\*', '', text)
102
+ text = re.sub(r'(Criteria)\n\s*\n(\d+\.)', r'\1\n\2', text)
103
+ text = await clean_trial_text(text)
104
+ return text
105
+
106
+
107
  async def extract_criteria(text):
108
  """Extracts inclusion and exclusion criteria from text."""
109
  patterns = {
 
210
  .
211
  """
212
  )
213
+ response = query_response.response
214
+ if response != "Empty Response":
215
+ final_response = await process_criteria(response)
216
  # Extract and format references
217
  pattern = r'Reference Papers\s*(.+)$'
218
+ match = re.search(pattern, response, re.DOTALL | re.IGNORECASE)
219
  ext_ref = match.group(1) if match else ""
220
  split_ref = re.split(r'\n*\d+\.\s+', ext_ref)[1:]
221
 
 
240
  ])
241
 
242
  else:
243
+ final_response, formatted_ref = "Empty Response", []
244
 
245
+ return final_response, formatted_ref
246
 
247
  # Place holder
248
  place_holder = f"""Study Objectives