import gradio as gr import spaces from transformers import AutoModel, AutoTokenizer from PIL import Image import torch # Load PaddleOCR-VL model model_name = "PaddlePaddle/PaddleOCR-VL" tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True) model = AutoModel.from_pretrained(model_name, trust_remote_code=True) if torch.cuda.is_available(): model = model.cuda() @spaces.GPU def ocr_inference(image): """ Perform OCR on the input image using PaddleOCR-VL """ if image is None: return "Please upload an image." try: # Convert to PIL Image if needed if not isinstance(image, Image.Image): image = Image.fromarray(image) # Run OCR inference result = model.chat(tokenizer, image, "Extract all text from this image.") return result except Exception as e: return f"Error during OCR: {str(e)}" # Create Gradio interface demo = gr.Interface( fn=ocr_inference, inputs=gr.Image(type="pil", label="Upload Image for OCR"), outputs=gr.Textbox(label="Extracted Text"), title="PaddleOCR-VL OCR Demo", description="Upload an image to extract text using PaddlePaddle/PaddleOCR-VL model" ) demo.launch()