Eathprompt commited on
Commit
2ed3b5f
·
verified ·
1 Parent(s): 9cab613

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -2
app.py CHANGED
@@ -1,12 +1,13 @@
1
  import gradio as gr
2
  import spaces
3
- from transformers import AutoModel, AutoTokenizer
4
  from PIL import Image
5
  import torch
6
 
7
  # Load PaddleOCR-VL model
8
  model_name = "PaddlePaddle/PaddleOCR-VL"
9
  tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
 
10
  model = AutoModel.from_pretrained(model_name, trust_remote_code=True)
11
 
12
  if torch.cuda.is_available():
@@ -25,8 +26,19 @@ def ocr_inference(image):
25
  if not isinstance(image, Image.Image):
26
  image = Image.fromarray(image)
27
 
 
 
 
 
 
 
 
28
  # Run OCR inference
29
- result = model.chat(tokenizer, image, "Extract all text from this image.")
 
 
 
 
30
  return result
31
  except Exception as e:
32
  return f"Error during OCR: {str(e)}"
 
1
  import gradio as gr
2
  import spaces
3
+ from transformers import AutoModel, AutoTokenizer, AutoProcessor
4
  from PIL import Image
5
  import torch
6
 
7
  # Load PaddleOCR-VL model
8
  model_name = "PaddlePaddle/PaddleOCR-VL"
9
  tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
10
+ processor = AutoProcessor.from_pretrained(model_name, trust_remote_code=True)
11
  model = AutoModel.from_pretrained(model_name, trust_remote_code=True)
12
 
13
  if torch.cuda.is_available():
 
26
  if not isinstance(image, Image.Image):
27
  image = Image.fromarray(image)
28
 
29
+ # Prepare inputs
30
+ prompt = "Extract all text from this image."
31
+ inputs = processor(images=image, text=prompt, return_tensors="pt")
32
+
33
+ if torch.cuda.is_available():
34
+ inputs = {k: v.cuda() for k, v in inputs.items()}
35
+
36
  # Run OCR inference
37
+ with torch.no_grad():
38
+ outputs = model.generate(**inputs, max_new_tokens=512)
39
+
40
+ # Decode the output
41
+ result = tokenizer.decode(outputs[0], skip_special_tokens=True)
42
  return result
43
  except Exception as e:
44
  return f"Error during OCR: {str(e)}"