Ravis-gemini

Sleeping

App Files Files

buildinqq commited on Nov 28, 2024

Commit

296b5a9

verified ·

1 Parent(s): e309bdd

Update app.py

Browse files

remove ref from criteria box

Files changed (1) hide show

app.py +18 -30

app.py CHANGED Viewed

@@ -67,43 +67,31 @@ storage_context = StorageContext.from_defaults(persist_dir="malteos_scincl__CAR_
 # load index
 index_persisted = load_index_from_storage(storage_context, index_id="vector_index")
-import re
 async def clean_trial_text(text):
-    """Cleans text by removing intro lines from the 'Reference Papers' section if present."""
-    sections = text.split('\n')
-    cleaned_sections = []
-    in_references, found_numbers, reference_title_index = False, False, -1
-    for i, line in enumerate(sections):
-        if re.match(r'Reference Papers\s*$', line, re.IGNORECASE):
-            in_references, reference_title_index = True, len(cleaned_sections)
-            cleaned_sections.append(line)
-            continue
-        if in_references and not found_numbers:
-            if re.match(r'\d+\.', line.strip()):
-                found_numbers = True
-            elif line.strip():
-                cleaned_sections.append(line)
-            continue
-        cleaned_sections.append(line)
-    if in_references and reference_title_index != -1 and not found_numbers:
-        cleaned_sections.pop(reference_title_index)
-    return '\n'.join(cleaned_sections).strip()
 async def process_criteria(text):
-    """Processes the query response text and cleans it."""
-    text = re.sub(r'#+\s*', '', text)
-    text = re.sub(r'\*\*', '', text)
-    text = re.sub(r'(Criteria)\n\s*\n(\d+\.)', r'\1\n\2', text)
-    text = await clean_trial_text(text)
     return text
 async def extract_criteria(text):
     """Extracts inclusion and exclusion criteria from text."""
     patterns = {

 # load index
 index_persisted = load_index_from_storage(storage_context, index_id="vector_index")
 async def clean_trial_text(text):
+    """
+    Cleans text by removing everything starting from the word 'Reference Papers'
+    and any special characters like '*'.
+    """
+    # Remove special characters like '*'
+    text = re.sub(r'\*+', '', text).strip()
+    # Find the position of 'Reference Papers' and truncate the text
+    reference_start = re.search(r'\bReference Papers\b', text, re.IGNORECASE)
+    if reference_start:
+        text = text[:reference_start.start()].strip()
+    return text
 async def process_criteria(text):
+    """
+    Processes the query response text, removing special characters and cleaning it
+    up to the word 'Reference Papers'.
+    """
+    text = re.sub(r'#+\s*', '', text)  # Remove headings like '###'
+    text = re.sub(r'(Criteria)\n\s*\n(\d+\.)', r'\1\n\2', text)  # Fix spacing issues
+    text = await clean_trial_text(text)  # Clean up text until 'Reference Papers'
     return text
 async def extract_criteria(text):
     """Extracts inclusion and exclusion criteria from text."""
     patterns = {