Spaces:

Eathprompt
/

OCRtest

Paused

OCRtest / app.py

Update app.py

2ed3b5f verified 5 days ago

1.73 kB

	import gradio as gr
	import spaces
	from transformers import AutoModel, AutoTokenizer, AutoProcessor
	from PIL import Image
	import torch

	# Load PaddleOCR-VL model
	model_name = "PaddlePaddle/PaddleOCR-VL"
	tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
	processor = AutoProcessor.from_pretrained(model_name, trust_remote_code=True)
	model = AutoModel.from_pretrained(model_name, trust_remote_code=True)

	if torch.cuda.is_available():
	model = model.cuda()

	@spaces.GPU
	def ocr_inference(image):
	"""
	Perform OCR on the input image using PaddleOCR-VL
	"""
	if image is None:
	return "Please upload an image."

	try:
	# Convert to PIL Image if needed
	if not isinstance(image, Image.Image):
	image = Image.fromarray(image)

	# Prepare inputs
	prompt = "Extract all text from this image."
	inputs = processor(images=image, text=prompt, return_tensors="pt")

	if torch.cuda.is_available():
	inputs = {k: v.cuda() for k, v in inputs.items()}

	# Run OCR inference
	with torch.no_grad():
	outputs = model.generate(**inputs, max_new_tokens=512)

	# Decode the output
	result = tokenizer.decode(outputs[0], skip_special_tokens=True)
	return result
	except Exception as e:
	return f"Error during OCR: {str(e)}"

	# Create Gradio interface
	demo = gr.Interface(
	fn=ocr_inference,
	inputs=gr.Image(type="pil", label="Upload Image for OCR"),
	outputs=gr.Textbox(label="Extracted Text"),
	title="PaddleOCR-VL OCR Demo",
	description="Upload an image to extract text using PaddlePaddle/PaddleOCR-VL model"
	)

	demo.launch()