Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -110,15 +110,37 @@ def split_text(text, max_tokens=500):
|
|
| 110 |
|
| 111 |
return chunks
|
| 112 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 113 |
def create_bibtex_entry(data):
|
| 114 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 115 |
for key, value in data.items():
|
| 116 |
-
if
|
| 117 |
bibtex += f" {key.lower()} = {{{value.strip()}}},\n"
|
| 118 |
bibtex = bibtex.rstrip(',\n') + "\n}"
|
| 119 |
return bibtex
|
| 120 |
|
| 121 |
-
|
| 122 |
def transform_chunks(marianne_segmentation):
|
| 123 |
marianne_segmentation = pd.DataFrame(marianne_segmentation)
|
| 124 |
marianne_segmentation = marianne_segmentation[marianne_segmentation['entity_group'] != 'separator']
|
|
@@ -153,8 +175,6 @@ def transform_chunks(marianne_segmentation):
|
|
| 153 |
|
| 154 |
final_html = '\n'.join(html_output)
|
| 155 |
return final_html, bibtex_entry
|
| 156 |
-
|
| 157 |
-
|
| 158 |
|
| 159 |
# Class to encapsulate the Falcon chatbot
|
| 160 |
class MistralChatBot:
|
|
|
|
| 110 |
|
| 111 |
return chunks
|
| 112 |
|
| 113 |
+
def extract_year(text):
|
| 114 |
+
year_match = re.search(r'\b(\d{4})\b', text)
|
| 115 |
+
return year_match.group(1) if year_match else None
|
| 116 |
+
|
| 117 |
def create_bibtex_entry(data):
|
| 118 |
+
# Determine the entry type
|
| 119 |
+
if 'Journal' in data:
|
| 120 |
+
entry_type = 'article'
|
| 121 |
+
elif 'Booktitle' in data:
|
| 122 |
+
entry_type = 'incollection'
|
| 123 |
+
else:
|
| 124 |
+
entry_type = 'book'
|
| 125 |
+
|
| 126 |
+
# Extract year from 'None' if it exists
|
| 127 |
+
none_content = data.pop('None', '')
|
| 128 |
+
year = extract_year(none_content)
|
| 129 |
+
if year and 'Year' not in data:
|
| 130 |
+
data['Year'] = year
|
| 131 |
+
|
| 132 |
+
# Create BibTeX ID
|
| 133 |
+
author_words = data.get('Author', '').split()
|
| 134 |
+
first_author = author_words[0] if author_words else 'Unknown'
|
| 135 |
+
bibtex_id = f"{first_author}{year}" if year else first_author
|
| 136 |
+
|
| 137 |
+
bibtex = f"@{entry_type}{{{bibtex_id},\n"
|
| 138 |
for key, value in data.items():
|
| 139 |
+
if value.strip():
|
| 140 |
bibtex += f" {key.lower()} = {{{value.strip()}}},\n"
|
| 141 |
bibtex = bibtex.rstrip(',\n') + "\n}"
|
| 142 |
return bibtex
|
| 143 |
|
|
|
|
| 144 |
def transform_chunks(marianne_segmentation):
|
| 145 |
marianne_segmentation = pd.DataFrame(marianne_segmentation)
|
| 146 |
marianne_segmentation = marianne_segmentation[marianne_segmentation['entity_group'] != 'separator']
|
|
|
|
| 175 |
|
| 176 |
final_html = '\n'.join(html_output)
|
| 177 |
return final_html, bibtex_entry
|
|
|
|
|
|
|
| 178 |
|
| 179 |
# Class to encapsulate the Falcon chatbot
|
| 180 |
class MistralChatBot:
|