Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,9 +1,3 @@
|
|
| 1 |
-
# app.py
|
| 2 |
-
# Remember to add 'PyMuPDF', 'pdf2image', and 'torch' to your requirements.txt or install them.
|
| 3 |
-
# For PDF processing, you might also need to install poppler:
|
| 4 |
-
# On Debian/Ubuntu: sudo apt-get install poppler-utils
|
| 5 |
-
# On macOS (using Homebrew): brew install poppler
|
| 6 |
-
|
| 7 |
import gradio as gr
|
| 8 |
from PIL import Image
|
| 9 |
from transformers import AutoModelForImageTextToText, AutoProcessor, AutoTokenizer, TextIteratorStreamer
|
|
@@ -167,7 +161,6 @@ def convert_to_markdown_stream(
|
|
| 167 |
else:
|
| 168 |
user_prompt = """Extract the text from the above document as if you were reading it naturally. Return the tables in html format. Watermarks should be wrapped in brackets. Ex: <watermark>OFFICIAL COPY</watermark>. Page numbers should be wrapped in brackets. Ex: <page_number>14</page_number> or <page_number>9/22</page_number>. Prefer using β and β for check boxes."""
|
| 169 |
|
| 170 |
-
|
| 171 |
# Accumulate results from all pages
|
| 172 |
full_markdown_content = ""
|
| 173 |
|
|
@@ -212,7 +205,7 @@ def convert_to_markdown_stream(
|
|
| 212 |
except Exception as e:
|
| 213 |
return f"Error: {e}"
|
| 214 |
|
| 215 |
-
def process_document(
|
| 216 |
"""
|
| 217 |
Process uploaded document (PDF or image) and convert to markdown.
|
| 218 |
|
|
@@ -223,33 +216,34 @@ def process_document(file_path, max_tokens, with_img_desc: bool = False):
|
|
| 223 |
Returns:
|
| 224 |
Generator yielding markdown content
|
| 225 |
"""
|
| 226 |
-
if
|
| 227 |
return "Please upload a file first."
|
| 228 |
-
|
| 229 |
try:
|
| 230 |
# Handle PDF files
|
| 231 |
-
if file_path.name.lower().endswith('.pdf'):
|
| 232 |
-
|
| 233 |
-
|
| 234 |
-
|
| 235 |
-
|
| 236 |
-
|
| 237 |
-
|
| 238 |
|
| 239 |
-
|
| 240 |
-
|
| 241 |
-
|
| 242 |
-
|
| 243 |
-
|
| 244 |
-
|
| 245 |
-
|
| 246 |
-
|
| 247 |
-
|
| 248 |
|
| 249 |
-
# Handle image files
|
| 250 |
-
else:
|
| 251 |
-
|
| 252 |
-
image = Image.open(file_path.name).convert("RGB")
|
|
|
|
|
|
|
| 253 |
image = image.resize((2048, 2048))
|
| 254 |
|
| 255 |
# Process single image
|
|
@@ -285,9 +279,8 @@ with gr.Blocks(title="PDF to Markdown Converter", theme=gr.themes.Soft()) as dem
|
|
| 285 |
|
| 286 |
with gr.Row():
|
| 287 |
with gr.Column(scale=1):
|
| 288 |
-
file_input = gr.
|
| 289 |
-
label="Upload
|
| 290 |
-
file_types=["pdf", "image"],
|
| 291 |
height=200
|
| 292 |
)
|
| 293 |
max_tokens_slider = gr.Slider(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
from PIL import Image
|
| 3 |
from transformers import AutoModelForImageTextToText, AutoProcessor, AutoTokenizer, TextIteratorStreamer
|
|
|
|
| 161 |
else:
|
| 162 |
user_prompt = """Extract the text from the above document as if you were reading it naturally. Return the tables in html format. Watermarks should be wrapped in brackets. Ex: <watermark>OFFICIAL COPY</watermark>. Page numbers should be wrapped in brackets. Ex: <page_number>14</page_number> or <page_number>9/22</page_number>. Prefer using β and β for check boxes."""
|
| 163 |
|
|
|
|
| 164 |
# Accumulate results from all pages
|
| 165 |
full_markdown_content = ""
|
| 166 |
|
|
|
|
| 205 |
except Exception as e:
|
| 206 |
return f"Error: {e}"
|
| 207 |
|
| 208 |
+
def process_document(image, max_tokens, with_img_desc: bool = False):
|
| 209 |
"""
|
| 210 |
Process uploaded document (PDF or image) and convert to markdown.
|
| 211 |
|
|
|
|
| 216 |
Returns:
|
| 217 |
Generator yielding markdown content
|
| 218 |
"""
|
| 219 |
+
if image is None:
|
| 220 |
return "Please upload a file first."
|
|
|
|
| 221 |
try:
|
| 222 |
# Handle PDF files
|
| 223 |
+
# if file_path.name.lower().endswith('.pdf'):
|
| 224 |
+
# # Convert PDF to images
|
| 225 |
+
# with tempfile.TemporaryDirectory() as temp_dir:
|
| 226 |
+
# # Copy uploaded file to temp directory
|
| 227 |
+
# temp_pdf_path = os.path.join(temp_dir, "document.pdf")
|
| 228 |
+
# import shutil
|
| 229 |
+
# shutil.copy(file_path.name, temp_pdf_path)
|
| 230 |
|
| 231 |
+
# # Convert PDF pages to images
|
| 232 |
+
# images = convert_from_path(temp_pdf_path, dpi=150)
|
| 233 |
+
# images = [image.convert("RGB") for image in images]
|
| 234 |
+
# images = [image.resize((2048, 2048)) for image in images]
|
| 235 |
+
# # Process each page
|
| 236 |
+
# for result in convert_to_markdown_stream(
|
| 237 |
+
# images, "nanonets/Nanonets-OCR-s", max_tokens, with_img_desc
|
| 238 |
+
# ):
|
| 239 |
+
# yield process_tags(result)
|
| 240 |
|
| 241 |
+
# # Handle image files
|
| 242 |
+
# else:
|
| 243 |
+
# # Open image directly
|
| 244 |
+
# image = Image.open(file_path.name).convert("RGB")
|
| 245 |
+
# image = image.resize((2048, 2048))
|
| 246 |
+
image = Image.fromarray(image)
|
| 247 |
image = image.resize((2048, 2048))
|
| 248 |
|
| 249 |
# Process single image
|
|
|
|
| 279 |
|
| 280 |
with gr.Row():
|
| 281 |
with gr.Column(scale=1):
|
| 282 |
+
file_input = gr.Image(
|
| 283 |
+
label="Upload Image Document",
|
|
|
|
| 284 |
height=200
|
| 285 |
)
|
| 286 |
max_tokens_slider = gr.Slider(
|