|
|
import gradio as gr |
|
|
from transformers import AutoModel, AutoTokenizer |
|
|
from PIL import Image |
|
|
import torch |
|
|
|
|
|
|
|
|
model_name = "rednote-hilab/dots.ocr" |
|
|
print("Loading dots.ocr model...") |
|
|
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True) |
|
|
model = AutoModel.from_pretrained( |
|
|
model_name, |
|
|
trust_remote_code=True, |
|
|
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, |
|
|
device_map="auto" |
|
|
) |
|
|
print("Model loaded!") |
|
|
|
|
|
def process_image(image): |
|
|
"""Process image with dots.ocr""" |
|
|
if image is None: |
|
|
return "Please upload an image" |
|
|
|
|
|
try: |
|
|
|
|
|
result = model.generate(image, tokenizer) |
|
|
return result |
|
|
except Exception as e: |
|
|
return f"Error: {str(e)}" |
|
|
|
|
|
|
|
|
iface = gr.Interface( |
|
|
fn=process_image, |
|
|
inputs=gr.Image(type="pil", label="Upload Image"), |
|
|
outputs=gr.Textbox(label="OCR Result", lines=10), |
|
|
title="dots.ocr - Multilingual OCR", |
|
|
description="Upload an image to extract text using dots.ocr. Supports 100+ languages, tables, formulas, and complex layouts.", |
|
|
examples=[ |
|
|
["examples/example1.jpg"], |
|
|
["examples/example2.png"] |
|
|
], |
|
|
theme="soft" |
|
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
iface.launch() |