import gradio as gr
from transformers import AutoModel, AutoTokenizer
from PIL import Image
import torch

# Load model and tokenizer
model_name = "rednote-hilab/dots.ocr"
print("Loading dots.ocr model...")
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
model = AutoModel.from_pretrained(
    model_name,
    trust_remote_code=True,
    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
    device_map="auto"
)
print("Model loaded!")

def process_image(image):
    """Process image with dots.ocr"""
    if image is None:
        return "Please upload an image"

    try:
        # Run OCR
        result = model.generate(image, tokenizer)
        return result
    except Exception as e:
        return f"Error: {str(e)}"

# Create Gradio interface
iface = gr.Interface(
    fn=process_image,
    inputs=gr.Image(type="pil", label="Upload Image"),
    outputs=gr.Textbox(label="OCR Result", lines=10),
    title="dots.ocr - Multilingual OCR",
    description="Upload an image to extract text using dots.ocr. Supports 100+ languages, tables, formulas, and complex layouts.",
    examples=[
        ["examples/example1.jpg"],
        ["examples/example2.png"]
    ],
    theme="soft"
)

if __name__ == "__main__":
    iface.launch()