Spaces:

ravistech
/

Ravis-gemini

Sleeping

App Files Files

buildinqq commited on Nov 28, 2024

Commit

e309bdd

verified ·

1 Parent(s): 372c744

Update app.py

Browse files

clean response (remove special characters from criteria response)

Files changed (1) hide show

app.py +25 -15

app.py CHANGED Viewed

@@ -67,10 +67,13 @@ storage_context = StorageContext.from_defaults(persist_dir="malteos_scincl__CAR_
 # load index
 index_persisted = load_index_from_storage(storage_context, index_id="vector_index")
 async def clean_trial_text(text):
-    """Removes intro text from references if present."""
-    sections, cleaned_sections, in_references = text.split('\n'), [], False
-    has_intro_text, found_numbers, reference_title_index = False, False, -1
     for i, line in enumerate(sections):
         if re.match(r'Reference Papers\s*$', line, re.IGNORECASE):
@@ -81,20 +84,26 @@ async def clean_trial_text(text):
         if in_references and not found_numbers:
             if re.match(r'\d+\.', line.strip()):
                 found_numbers = True
-            else:
-                if line.strip():
-                    has_intro_text = True
                 cleaned_sections.append(line)
-                continue
-        if not in_references:
-            cleaned_sections.append(line)
-    if in_references and not has_intro_text and reference_title_index != -1:
         cleaned_sections.pop(reference_title_index)
     return '\n'.join(cleaned_sections).strip()
 async def extract_criteria(text):
     """Extracts inclusion and exclusion criteria from text."""
     patterns = {
@@ -201,11 +210,12 @@ async def run_function_on_text(top_k, study_obj, study_type, phase, purpose, all
     .
     """
     )
-    if query_response.response != "Empty Response":
         # Extract and format references
         pattern = r'Reference Papers\s*(.+)$'
-        match = re.search(pattern, query_response.response, re.DOTALL | re.IGNORECASE)
         ext_ref = match.group(1) if match else ""
         split_ref = re.split(r'\n*\d+\.\s+', ext_ref)[1:]
@@ -230,9 +240,9 @@ async def run_function_on_text(top_k, study_obj, study_type, phase, purpose, all
             ])
     else:
-        query_response.response, formatted_ref = "Empty Response", []
-    return query_response.response, formatted_ref
 # Place holder
 place_holder = f"""Study Objectives

 # load index
 index_persisted = load_index_from_storage(storage_context, index_id="vector_index")
+import re
 async def clean_trial_text(text):
+    """Cleans text by removing intro lines from the 'Reference Papers' section if present."""
+    sections = text.split('\n')
+    cleaned_sections = []
+    in_references, found_numbers, reference_title_index = False, False, -1
     for i, line in enumerate(sections):
         if re.match(r'Reference Papers\s*$', line, re.IGNORECASE):
         if in_references and not found_numbers:
             if re.match(r'\d+\.', line.strip()):
                 found_numbers = True
+            elif line.strip():
                 cleaned_sections.append(line)
+            continue
+        cleaned_sections.append(line)
+    if in_references and reference_title_index != -1 and not found_numbers:
         cleaned_sections.pop(reference_title_index)
     return '\n'.join(cleaned_sections).strip()
+async def process_criteria(text):
+    """Processes the query response text and cleans it."""
+    text = re.sub(r'#+\s*', '', text)
+    text = re.sub(r'\*\*', '', text)
+    text = re.sub(r'(Criteria)\n\s*\n(\d+\.)', r'\1\n\2', text)
+    text = await clean_trial_text(text)
+    return text
 async def extract_criteria(text):
     """Extracts inclusion and exclusion criteria from text."""
     patterns = {
     .
     """
     )
+    response = query_response.response
+    if response != "Empty Response":
+        final_response = await process_criteria(response)
         # Extract and format references
         pattern = r'Reference Papers\s*(.+)$'
+        match = re.search(pattern, response, re.DOTALL | re.IGNORECASE)
         ext_ref = match.group(1) if match else ""
         split_ref = re.split(r'\n*\d+\.\s+', ext_ref)[1:]
             ])
     else:
+        final_response, formatted_ref = "Empty Response", []
+    return final_response, formatted_ref
 # Place holder
 place_holder = f"""Study Objectives