Spaces:
Sleeping
Sleeping
Update app.py
Browse filesclean response (remove special characters from criteria response)
app.py
CHANGED
|
@@ -67,10 +67,13 @@ storage_context = StorageContext.from_defaults(persist_dir="malteos_scincl__CAR_
|
|
| 67 |
# load index
|
| 68 |
index_persisted = load_index_from_storage(storage_context, index_id="vector_index")
|
| 69 |
|
|
|
|
|
|
|
| 70 |
async def clean_trial_text(text):
|
| 71 |
-
"""
|
| 72 |
-
sections
|
| 73 |
-
|
|
|
|
| 74 |
|
| 75 |
for i, line in enumerate(sections):
|
| 76 |
if re.match(r'Reference Papers\s*$', line, re.IGNORECASE):
|
|
@@ -81,20 +84,26 @@ async def clean_trial_text(text):
|
|
| 81 |
if in_references and not found_numbers:
|
| 82 |
if re.match(r'\d+\.', line.strip()):
|
| 83 |
found_numbers = True
|
| 84 |
-
|
| 85 |
-
if line.strip():
|
| 86 |
-
has_intro_text = True
|
| 87 |
cleaned_sections.append(line)
|
| 88 |
-
|
| 89 |
|
| 90 |
-
|
| 91 |
-
cleaned_sections.append(line)
|
| 92 |
|
| 93 |
-
if in_references and
|
| 94 |
cleaned_sections.pop(reference_title_index)
|
| 95 |
|
| 96 |
return '\n'.join(cleaned_sections).strip()
|
| 97 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 98 |
async def extract_criteria(text):
|
| 99 |
"""Extracts inclusion and exclusion criteria from text."""
|
| 100 |
patterns = {
|
|
@@ -201,11 +210,12 @@ async def run_function_on_text(top_k, study_obj, study_type, phase, purpose, all
|
|
| 201 |
.
|
| 202 |
"""
|
| 203 |
)
|
| 204 |
-
|
| 205 |
-
if
|
|
|
|
| 206 |
# Extract and format references
|
| 207 |
pattern = r'Reference Papers\s*(.+)$'
|
| 208 |
-
match = re.search(pattern,
|
| 209 |
ext_ref = match.group(1) if match else ""
|
| 210 |
split_ref = re.split(r'\n*\d+\.\s+', ext_ref)[1:]
|
| 211 |
|
|
@@ -230,9 +240,9 @@ async def run_function_on_text(top_k, study_obj, study_type, phase, purpose, all
|
|
| 230 |
])
|
| 231 |
|
| 232 |
else:
|
| 233 |
-
|
| 234 |
|
| 235 |
-
return
|
| 236 |
|
| 237 |
# Place holder
|
| 238 |
place_holder = f"""Study Objectives
|
|
|
|
| 67 |
# load index
|
| 68 |
index_persisted = load_index_from_storage(storage_context, index_id="vector_index")
|
| 69 |
|
| 70 |
+
import re
|
| 71 |
+
|
| 72 |
async def clean_trial_text(text):
|
| 73 |
+
"""Cleans text by removing intro lines from the 'Reference Papers' section if present."""
|
| 74 |
+
sections = text.split('\n')
|
| 75 |
+
cleaned_sections = []
|
| 76 |
+
in_references, found_numbers, reference_title_index = False, False, -1
|
| 77 |
|
| 78 |
for i, line in enumerate(sections):
|
| 79 |
if re.match(r'Reference Papers\s*$', line, re.IGNORECASE):
|
|
|
|
| 84 |
if in_references and not found_numbers:
|
| 85 |
if re.match(r'\d+\.', line.strip()):
|
| 86 |
found_numbers = True
|
| 87 |
+
elif line.strip():
|
|
|
|
|
|
|
| 88 |
cleaned_sections.append(line)
|
| 89 |
+
continue
|
| 90 |
|
| 91 |
+
cleaned_sections.append(line)
|
|
|
|
| 92 |
|
| 93 |
+
if in_references and reference_title_index != -1 and not found_numbers:
|
| 94 |
cleaned_sections.pop(reference_title_index)
|
| 95 |
|
| 96 |
return '\n'.join(cleaned_sections).strip()
|
| 97 |
|
| 98 |
+
async def process_criteria(text):
|
| 99 |
+
"""Processes the query response text and cleans it."""
|
| 100 |
+
text = re.sub(r'#+\s*', '', text)
|
| 101 |
+
text = re.sub(r'\*\*', '', text)
|
| 102 |
+
text = re.sub(r'(Criteria)\n\s*\n(\d+\.)', r'\1\n\2', text)
|
| 103 |
+
text = await clean_trial_text(text)
|
| 104 |
+
return text
|
| 105 |
+
|
| 106 |
+
|
| 107 |
async def extract_criteria(text):
|
| 108 |
"""Extracts inclusion and exclusion criteria from text."""
|
| 109 |
patterns = {
|
|
|
|
| 210 |
.
|
| 211 |
"""
|
| 212 |
)
|
| 213 |
+
response = query_response.response
|
| 214 |
+
if response != "Empty Response":
|
| 215 |
+
final_response = await process_criteria(response)
|
| 216 |
# Extract and format references
|
| 217 |
pattern = r'Reference Papers\s*(.+)$'
|
| 218 |
+
match = re.search(pattern, response, re.DOTALL | re.IGNORECASE)
|
| 219 |
ext_ref = match.group(1) if match else ""
|
| 220 |
split_ref = re.split(r'\n*\d+\.\s+', ext_ref)[1:]
|
| 221 |
|
|
|
|
| 240 |
])
|
| 241 |
|
| 242 |
else:
|
| 243 |
+
final_response, formatted_ref = "Empty Response", []
|
| 244 |
|
| 245 |
+
return final_response, formatted_ref
|
| 246 |
|
| 247 |
# Place holder
|
| 248 |
place_holder = f"""Study Objectives
|