Something

Sleeping

Pclanglais commited on Jul 8, 2024

Commit

efdd1b6

verified ·

1 Parent(s): 832ed77

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -152,8 +152,12 @@ def save_bibtex(bibtex_content):
 class CombinedProcessor:
     def process(self, user_message):
         editorial_text = re.sub("\n", " ¶ ", user_message)
-        #editorial_text = re.sub(r'\s*([;:,])\s*', r' \1 ', editorial_text)
         print(editorial_text)
         num_tokens = len(tokenizer.tokenize(editorial_text))
@@ -168,14 +172,8 @@ class CombinedProcessor:
         bibliography_entries = editorial_df[editorial_df['entity_group'] == 'bibliography']['word'].tolist()
         bibtex_entries = []
-        corrected_bibliography_entries = []
-        for entry in bibliography_entries:
-            entry = re.sub(r' +?¶ +?', r'¶', entry)
-            entry = entry.split("¶¶")
-            corrected_bibliography_entries.extend(entry)
-        for entry in corrected_bibliography_entries:
             print(entry)
             entry = re.sub(r'\s*([;:,\.])\s*', r' \1 ', entry)
             entry = re.sub(r'- ?[\n¶] ?', r'', entry)

 class CombinedProcessor:
     def process(self, user_message):
+        #Precaution to reinforce bibliography detection.
+        editorial_text = "Bibliography\n" + user_message
+        #Our fix for the lack of newline in deberta
         editorial_text = re.sub("\n", " ¶ ", user_message)
         print(editorial_text)
         num_tokens = len(tokenizer.tokenize(editorial_text))
         bibliography_entries = editorial_df[editorial_df['entity_group'] == 'bibliography']['word'].tolist()
         bibtex_entries = []
+        for entry in bibliography_entries:
             print(entry)
             entry = re.sub(r'\s*([;:,\.])\s*', r' \1 ', entry)
             entry = re.sub(r'- ?[\n¶] ?', r'', entry)