Spaces:
Running
Running
improve latex parsing
Browse files
app.py
CHANGED
|
@@ -133,34 +133,43 @@ def update_inputs(task):
|
|
| 133 |
]
|
| 134 |
|
| 135 |
def parse_latex_output(res):
|
| 136 |
-
|
|
|
|
| 137 |
parsed_lines = []
|
| 138 |
in_latex = False
|
| 139 |
-
|
|
|
|
| 140 |
for line in lines:
|
| 141 |
-
line
|
| 142 |
-
|
|
|
|
|
|
|
|
|
|
| 143 |
continue
|
|
|
|
|
|
|
| 144 |
|
| 145 |
-
latex_patterns = [r'\{', r'\}', r'\[', r'\]', r'\\', r'\$', r'_', r'^']
|
| 146 |
contains_latex = any(re.search(pattern, line) for pattern in latex_patterns)
|
| 147 |
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
if contains_latex or is_key_value:
|
| 151 |
if not in_latex:
|
| 152 |
-
parsed_lines.append('$$')
|
| 153 |
in_latex = True
|
| 154 |
-
|
|
|
|
| 155 |
else:
|
| 156 |
if in_latex:
|
| 157 |
-
|
|
|
|
| 158 |
in_latex = False
|
|
|
|
| 159 |
parsed_lines.append(line)
|
|
|
|
| 160 |
if in_latex:
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
|
|
|
| 164 |
|
| 165 |
def ocr_demo(image, task, ocr_type, ocr_box, ocr_color):
|
| 166 |
res, html_content, unique_id = process_image(image, task, ocr_type, ocr_box, ocr_color)
|
|
|
|
| 133 |
]
|
| 134 |
|
| 135 |
def parse_latex_output(res):
|
| 136 |
+
# Split the input, preserving newlines and empty lines
|
| 137 |
+
lines = re.split(r'(\$\$.*?\$\$)', res, flags=re.DOTALL)
|
| 138 |
parsed_lines = []
|
| 139 |
in_latex = False
|
| 140 |
+
latex_buffer = []
|
| 141 |
+
|
| 142 |
for line in lines:
|
| 143 |
+
if line == '\n':
|
| 144 |
+
if in_latex:
|
| 145 |
+
latex_buffer.append(line)
|
| 146 |
+
else:
|
| 147 |
+
parsed_lines.append(line)
|
| 148 |
continue
|
| 149 |
+
|
| 150 |
+
line = line.strip()
|
| 151 |
|
| 152 |
+
latex_patterns = [r'\{', r'\}', r'\[', r'\]', r'\\', r'\$', r'_', r'^', r'"']
|
| 153 |
contains_latex = any(re.search(pattern, line) for pattern in latex_patterns)
|
| 154 |
|
| 155 |
+
if contains_latex:
|
|
|
|
|
|
|
| 156 |
if not in_latex:
|
|
|
|
| 157 |
in_latex = True
|
| 158 |
+
latex_buffer = ['$$']
|
| 159 |
+
latex_buffer.append(line)
|
| 160 |
else:
|
| 161 |
if in_latex:
|
| 162 |
+
latex_buffer.append('$$')
|
| 163 |
+
parsed_lines.extend(latex_buffer)
|
| 164 |
in_latex = False
|
| 165 |
+
latex_buffer = []
|
| 166 |
parsed_lines.append(line)
|
| 167 |
+
|
| 168 |
if in_latex:
|
| 169 |
+
latex_buffer.append('$$')
|
| 170 |
+
parsed_lines.extend(latex_buffer)
|
| 171 |
+
|
| 172 |
+
return '$$\n$$'.join(parsed_lines)
|
| 173 |
|
| 174 |
def ocr_demo(image, task, ocr_type, ocr_box, ocr_color):
|
| 175 |
res, html_content, unique_id = process_image(image, task, ocr_type, ocr_box, ocr_color)
|