[fix] update markdown utilities
Browse files- app.py +34 -24
- utils/markdown_utils.py +10 -3
app.py
CHANGED
|
@@ -15,6 +15,7 @@ from PIL import Image
|
|
| 15 |
from transformers import AutoProcessor, VisionEncoderDecoderModel
|
| 16 |
|
| 17 |
from utils.utils import prepare_image, parse_layout_string, process_coordinates, ImageDimensions
|
|
|
|
| 18 |
|
| 19 |
# 读取外部CSS文件
|
| 20 |
def load_css():
|
|
@@ -428,15 +429,36 @@ def process_elements(layout_results, padded_image, dims, max_batch_size=16):
|
|
| 428 |
cropped = padded_image[y1:y2, x1:x2]
|
| 429 |
if cropped.size > 0:
|
| 430 |
if label == "fig":
|
| 431 |
-
#
|
| 432 |
-
|
| 433 |
-
|
| 434 |
-
|
| 435 |
-
|
| 436 |
-
|
| 437 |
-
|
| 438 |
-
|
| 439 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 440 |
else:
|
| 441 |
# 准备元素进行解析
|
| 442 |
pil_crop = Image.fromarray(cv2.cvtColor(cropped, cv2.COLOR_BGR2RGB))
|
|
@@ -479,21 +501,9 @@ def process_elements(layout_results, padded_image, dims, max_batch_size=16):
|
|
| 479 |
|
| 480 |
def generate_markdown(recognition_results):
|
| 481 |
"""从识别结果生成Markdown内容"""
|
| 482 |
-
|
| 483 |
-
|
| 484 |
-
|
| 485 |
-
text = result.get("text", "").strip()
|
| 486 |
-
label = result.get("label", "")
|
| 487 |
-
|
| 488 |
-
if text:
|
| 489 |
-
if label == "tab":
|
| 490 |
-
# 表格内容
|
| 491 |
-
markdown_parts.append(f"\n{text}\n")
|
| 492 |
-
else:
|
| 493 |
-
# 普通文本内容
|
| 494 |
-
markdown_parts.append(text)
|
| 495 |
-
|
| 496 |
-
return "\n\n".join(markdown_parts)
|
| 497 |
|
| 498 |
# LaTeX 渲染配置
|
| 499 |
latex_delimiters = [
|
|
|
|
| 15 |
from transformers import AutoProcessor, VisionEncoderDecoderModel
|
| 16 |
|
| 17 |
from utils.utils import prepare_image, parse_layout_string, process_coordinates, ImageDimensions
|
| 18 |
+
from utils.markdown_utils import MarkdownConverter
|
| 19 |
|
| 20 |
# 读取外部CSS文件
|
| 21 |
def load_css():
|
|
|
|
| 429 |
cropped = padded_image[y1:y2, x1:x2]
|
| 430 |
if cropped.size > 0:
|
| 431 |
if label == "fig":
|
| 432 |
+
# 对于图像区域,提取图像的base64编码
|
| 433 |
+
try:
|
| 434 |
+
# 将裁剪的图像转换为PIL图像
|
| 435 |
+
pil_crop = Image.fromarray(cv2.cvtColor(cropped, cv2.COLOR_BGR2RGB))
|
| 436 |
+
|
| 437 |
+
# 转换为base64
|
| 438 |
+
import io
|
| 439 |
+
import base64
|
| 440 |
+
buffered = io.BytesIO()
|
| 441 |
+
pil_crop.save(buffered, format="PNG")
|
| 442 |
+
img_base64 = base64.b64encode(buffered.getvalue()).decode('utf-8')
|
| 443 |
+
|
| 444 |
+
figure_results.append(
|
| 445 |
+
{
|
| 446 |
+
"label": label,
|
| 447 |
+
"bbox": [orig_x1, orig_y1, orig_x2, orig_y2],
|
| 448 |
+
"text": img_base64, # 存储base64编码而不是空字符串
|
| 449 |
+
"reading_order": reading_order,
|
| 450 |
+
}
|
| 451 |
+
)
|
| 452 |
+
except Exception as e:
|
| 453 |
+
logger.error(f"Error encoding figure to base64: {e}")
|
| 454 |
+
figure_results.append(
|
| 455 |
+
{
|
| 456 |
+
"label": label,
|
| 457 |
+
"bbox": [orig_x1, orig_y1, orig_x2, orig_y2],
|
| 458 |
+
"text": "", # 如果编码失败,使用空字符串
|
| 459 |
+
"reading_order": reading_order,
|
| 460 |
+
}
|
| 461 |
+
)
|
| 462 |
else:
|
| 463 |
# 准备元素进行解析
|
| 464 |
pil_crop = Image.fromarray(cv2.cvtColor(cropped, cv2.COLOR_BGR2RGB))
|
|
|
|
| 501 |
|
| 502 |
def generate_markdown(recognition_results):
|
| 503 |
"""从识别结果生成Markdown内容"""
|
| 504 |
+
# 使用MarkdownConverter来处理所有类型的内容,包括图片
|
| 505 |
+
converter = MarkdownConverter()
|
| 506 |
+
return converter.convert(recognition_results)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 507 |
|
| 508 |
# LaTeX 渲染配置
|
| 509 |
latex_delimiters = [
|
utils/markdown_utils.py
CHANGED
|
@@ -226,6 +226,10 @@ class MarkdownConverter:
|
|
| 226 |
Convert base64 encoded image to markdown image syntax
|
| 227 |
"""
|
| 228 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 229 |
# Determine image format (assuming PNG if not specified)
|
| 230 |
img_format = "png"
|
| 231 |
if text.startswith("data:image/"):
|
|
@@ -336,7 +340,12 @@ class MarkdownConverter:
|
|
| 336 |
label = result.get('label', '')
|
| 337 |
text = result.get('text', '').strip()
|
| 338 |
|
| 339 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 340 |
if not text:
|
| 341 |
continue
|
| 342 |
|
|
@@ -345,8 +354,6 @@ class MarkdownConverter:
|
|
| 345 |
markdown_content.append(self._handle_heading(text, label))
|
| 346 |
elif label == 'list':
|
| 347 |
markdown_content.append(self._handle_list_item(text))
|
| 348 |
-
elif label == 'fig':
|
| 349 |
-
markdown_content.append(self._handle_figure(text, section_count))
|
| 350 |
elif label == 'tab':
|
| 351 |
markdown_content.append(self._handle_table(text))
|
| 352 |
elif label == 'alg':
|
|
|
|
| 226 |
Convert base64 encoded image to markdown image syntax
|
| 227 |
"""
|
| 228 |
try:
|
| 229 |
+
# Check if text is empty (fallback case)
|
| 230 |
+
if not text.strip():
|
| 231 |
+
return f"\n\n"
|
| 232 |
+
|
| 233 |
# Determine image format (assuming PNG if not specified)
|
| 234 |
img_format = "png"
|
| 235 |
if text.startswith("data:image/"):
|
|
|
|
| 340 |
label = result.get('label', '')
|
| 341 |
text = result.get('text', '').strip()
|
| 342 |
|
| 343 |
+
# 处理图片,即使文本为空也要处理
|
| 344 |
+
if label == 'fig':
|
| 345 |
+
markdown_content.append(self._handle_figure(text, section_count))
|
| 346 |
+
continue
|
| 347 |
+
|
| 348 |
+
# Skip empty text for non-figure elements
|
| 349 |
if not text:
|
| 350 |
continue
|
| 351 |
|
|
|
|
| 354 |
markdown_content.append(self._handle_heading(text, label))
|
| 355 |
elif label == 'list':
|
| 356 |
markdown_content.append(self._handle_list_item(text))
|
|
|
|
|
|
|
| 357 |
elif label == 'tab':
|
| 358 |
markdown_content.append(self._handle_table(text))
|
| 359 |
elif label == 'alg':
|