buildinqq commited on
Commit
296b5a9
·
verified ·
1 Parent(s): e309bdd

Update app.py

Browse files

remove ref from criteria box

Files changed (1) hide show
  1. app.py +18 -30
app.py CHANGED
@@ -67,43 +67,31 @@ storage_context = StorageContext.from_defaults(persist_dir="malteos_scincl__CAR_
67
  # load index
68
  index_persisted = load_index_from_storage(storage_context, index_id="vector_index")
69
 
70
- import re
71
-
72
  async def clean_trial_text(text):
73
- """Cleans text by removing intro lines from the 'Reference Papers' section if present."""
74
- sections = text.split('\n')
75
- cleaned_sections = []
76
- in_references, found_numbers, reference_title_index = False, False, -1
77
-
78
- for i, line in enumerate(sections):
79
- if re.match(r'Reference Papers\s*$', line, re.IGNORECASE):
80
- in_references, reference_title_index = True, len(cleaned_sections)
81
- cleaned_sections.append(line)
82
- continue
83
-
84
- if in_references and not found_numbers:
85
- if re.match(r'\d+\.', line.strip()):
86
- found_numbers = True
87
- elif line.strip():
88
- cleaned_sections.append(line)
89
- continue
90
-
91
- cleaned_sections.append(line)
92
 
93
- if in_references and reference_title_index != -1 and not found_numbers:
94
- cleaned_sections.pop(reference_title_index)
 
 
95
 
96
- return '\n'.join(cleaned_sections).strip()
97
 
98
  async def process_criteria(text):
99
- """Processes the query response text and cleans it."""
100
- text = re.sub(r'#+\s*', '', text)
101
- text = re.sub(r'\*\*', '', text)
102
- text = re.sub(r'(Criteria)\n\s*\n(\d+\.)', r'\1\n\2', text)
103
- text = await clean_trial_text(text)
 
 
104
  return text
105
 
106
-
107
  async def extract_criteria(text):
108
  """Extracts inclusion and exclusion criteria from text."""
109
  patterns = {
 
67
  # load index
68
  index_persisted = load_index_from_storage(storage_context, index_id="vector_index")
69
 
 
 
70
  async def clean_trial_text(text):
71
+ """
72
+ Cleans text by removing everything starting from the word 'Reference Papers'
73
+ and any special characters like '*'.
74
+ """
75
+ # Remove special characters like '*'
76
+ text = re.sub(r'\*+', '', text).strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
77
 
78
+ # Find the position of 'Reference Papers' and truncate the text
79
+ reference_start = re.search(r'\bReference Papers\b', text, re.IGNORECASE)
80
+ if reference_start:
81
+ text = text[:reference_start.start()].strip()
82
 
83
+ return text
84
 
85
  async def process_criteria(text):
86
+ """
87
+ Processes the query response text, removing special characters and cleaning it
88
+ up to the word 'Reference Papers'.
89
+ """
90
+ text = re.sub(r'#+\s*', '', text) # Remove headings like '###'
91
+ text = re.sub(r'(Criteria)\n\s*\n(\d+\.)', r'\1\n\2', text) # Fix spacing issues
92
+ text = await clean_trial_text(text) # Clean up text until 'Reference Papers'
93
  return text
94
 
 
95
  async def extract_criteria(text):
96
  """Extracts inclusion and exclusion criteria from text."""
97
  patterns = {