Spaces:

Eathprompt
/

OCRtest

Paused

OCRtest / app.py

Configure space to use PaddlePaddle/PaddleOCR-VL model

ca5d6b4 verified 8 days ago

1.24 kB

	import gradio as gr
	import spaces
	from transformers import AutoModel, AutoTokenizer
	from PIL import Image
	import torch

	# Load PaddleOCR-VL model
	model_name = "PaddlePaddle/PaddleOCR-VL"
	tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
	model = AutoModel.from_pretrained(model_name, trust_remote_code=True)

	if torch.cuda.is_available():
	model = model.cuda()

	@spaces.GPU
	def ocr_inference(image):
	"""
	Perform OCR on the input image using PaddleOCR-VL
	"""
	if image is None:
	return "Please upload an image."

	try:
	# Convert to PIL Image if needed
	if not isinstance(image, Image.Image):
	image = Image.fromarray(image)

	# Run OCR inference
	result = model.chat(tokenizer, image, "Extract all text from this image.")
	return result
	except Exception as e:
	return f"Error during OCR: {str(e)}"

	# Create Gradio interface
	demo = gr.Interface(
	fn=ocr_inference,
	inputs=gr.Image(type="pil", label="Upload Image for OCR"),
	outputs=gr.Textbox(label="Extracted Text"),
	title="PaddleOCR-VL OCR Demo",
	description="Upload an image to extract text using PaddlePaddle/PaddleOCR-VL model"
	)

	demo.launch()