taprosoft
commited on
Commit
·
44f7c24
1
Parent(s):
7aa7cc7
fix: update methods
Browse files- app.py +4 -9
- backends/__init__.py +0 -6
- requirements.txt +1 -1
app.py
CHANGED
|
@@ -11,11 +11,10 @@ from gradio_pdf import PDF
|
|
| 11 |
from backends import ( # convert_zerox,
|
| 12 |
SUPPORTED_METHODS,
|
| 13 |
SUPPORTED_METHODS_METADATA,
|
| 14 |
-
convert_gemini,
|
| 15 |
convert_gmft,
|
| 16 |
-
convert_img2table,
|
| 17 |
convert_pypdf,
|
| 18 |
convert_smoldocling,
|
|
|
|
| 19 |
)
|
| 20 |
from backends.settings import ENABLE_DEBUG_MODE
|
| 21 |
from utils import remove_images_from_markdown, trim_pages
|
|
@@ -53,10 +52,8 @@ def convert_document(path, method, start_page=0, enabled=True):
|
|
| 53 |
path,
|
| 54 |
embed_images=True,
|
| 55 |
)
|
| 56 |
-
elif method == "
|
| 57 |
-
text, debug_image_paths =
|
| 58 |
-
elif method == "Img2Table (table-only)":
|
| 59 |
-
text, debug_image_paths = convert_img2table(path, file_name)
|
| 60 |
elif method == "GMFT (table-only)":
|
| 61 |
text, debug_image_paths = convert_gmft(path, file_name)
|
| 62 |
elif method == "PyPDF":
|
|
@@ -183,9 +180,7 @@ with gr.Blocks(
|
|
| 183 |
with gr.Row():
|
| 184 |
methods = gr.Dropdown(
|
| 185 |
SUPPORTED_METHODS,
|
| 186 |
-
label=(
|
| 187 |
-
"Conversion methods " f"(select up-to {MAX_SELECTED_METHODS})"
|
| 188 |
-
),
|
| 189 |
value=SUPPORTED_METHODS[:2],
|
| 190 |
multiselect=True,
|
| 191 |
)
|
|
|
|
| 11 |
from backends import ( # convert_zerox,
|
| 12 |
SUPPORTED_METHODS,
|
| 13 |
SUPPORTED_METHODS_METADATA,
|
|
|
|
| 14 |
convert_gmft,
|
|
|
|
| 15 |
convert_pypdf,
|
| 16 |
convert_smoldocling,
|
| 17 |
+
convert_unstructured,
|
| 18 |
)
|
| 19 |
from backends.settings import ENABLE_DEBUG_MODE
|
| 20 |
from utils import remove_images_from_markdown, trim_pages
|
|
|
|
| 52 |
path,
|
| 53 |
embed_images=True,
|
| 54 |
)
|
| 55 |
+
elif method == "Unstructured":
|
| 56 |
+
text, debug_image_paths = convert_unstructured(path, file_name)
|
|
|
|
|
|
|
| 57 |
elif method == "GMFT (table-only)":
|
| 58 |
text, debug_image_paths = convert_gmft(path, file_name)
|
| 59 |
elif method == "PyPDF":
|
|
|
|
| 180 |
with gr.Row():
|
| 181 |
methods = gr.Dropdown(
|
| 182 |
SUPPORTED_METHODS,
|
| 183 |
+
label=("Conversion methods"),
|
|
|
|
|
|
|
| 184 |
value=SUPPORTED_METHODS[:2],
|
| 185 |
multiselect=True,
|
| 186 |
)
|
backends/__init__.py
CHANGED
|
@@ -1,7 +1,5 @@
|
|
| 1 |
# flake8: noqa
|
| 2 |
-
from .gemini import convert_gemini
|
| 3 |
from .gmft import convert_gmft
|
| 4 |
-
from .img2table import convert_img2table
|
| 5 |
from .pypdf import convert_pypdf
|
| 6 |
from .smoldocling import convert_smoldocling
|
| 7 |
from .unstructured import convert_unstructured
|
|
@@ -9,8 +7,6 @@ from .unstructured import convert_unstructured
|
|
| 9 |
__all__ = [
|
| 10 |
"convert_smoldocling",
|
| 11 |
"convert_unstructured",
|
| 12 |
-
"convert_gemini",
|
| 13 |
-
"convert_img2table",
|
| 14 |
"convert_gmft",
|
| 15 |
"convert_pypdf",
|
| 16 |
]
|
|
@@ -19,9 +15,7 @@ SUPPORTED_METHODS = [
|
|
| 19 |
"SmolDocling",
|
| 20 |
"PyMuPDF",
|
| 21 |
"PyPDF",
|
| 22 |
-
"Gemini (API)",
|
| 23 |
"Unstructured",
|
| 24 |
-
"Img2Table (table-only)",
|
| 25 |
"GMFT (table-only)",
|
| 26 |
]
|
| 27 |
SUPPORTED_METHODS_METADATA = {
|
|
|
|
| 1 |
# flake8: noqa
|
|
|
|
| 2 |
from .gmft import convert_gmft
|
|
|
|
| 3 |
from .pypdf import convert_pypdf
|
| 4 |
from .smoldocling import convert_smoldocling
|
| 5 |
from .unstructured import convert_unstructured
|
|
|
|
| 7 |
__all__ = [
|
| 8 |
"convert_smoldocling",
|
| 9 |
"convert_unstructured",
|
|
|
|
|
|
|
| 10 |
"convert_gmft",
|
| 11 |
"convert_pypdf",
|
| 12 |
]
|
|
|
|
| 15 |
"SmolDocling",
|
| 16 |
"PyMuPDF",
|
| 17 |
"PyPDF",
|
|
|
|
| 18 |
"Unstructured",
|
|
|
|
| 19 |
"GMFT (table-only)",
|
| 20 |
]
|
| 21 |
SUPPORTED_METHODS_METADATA = {
|
requirements.txt
CHANGED
|
@@ -10,4 +10,4 @@ transformers<5.0.0,>=4.45.2
|
|
| 10 |
pypdf
|
| 11 |
docling_core
|
| 12 |
opencv-contrib-python
|
| 13 |
-
|
|
|
|
| 10 |
pypdf
|
| 11 |
docling_core
|
| 12 |
opencv-contrib-python
|
| 13 |
+
flash_attn
|