akhaliq HF Staff commited on
Commit
eb29213
·
verified ·
1 Parent(s): ebec941

Update Gradio app with multiple files

Browse files
Files changed (1) hide show
  1. app.py +50 -78
app.py CHANGED
@@ -41,89 +41,61 @@ def ocr_process(
41
  if image_input is None:
42
  return "Please upload an image first."
43
 
44
- try:
45
- # Move model to GPU and set dtype
46
- model.cuda().to(torch.bfloat16)
47
-
48
- # Create temp directory for this session
49
- with tempfile.TemporaryDirectory() as temp_dir:
50
- # Save image with proper format
51
- temp_image_path = os.path.join(temp_dir, "input_image.jpg")
52
- # Convert RGBA to RGB if necessary
53
- if image_input.mode in ('RGBA', 'LA', 'P'):
54
- rgb_image = Image.new('RGB', image_input.size, (255, 255, 255))
55
- # Handle different image modes
56
- if image_input.mode == 'RGBA':
57
- rgb_image.paste(image_input, mask=image_input.split()[3])
58
- else:
59
- rgb_image.paste(image_input)
60
- rgb_image.save(temp_image_path, 'JPEG', quality=95)
61
- else:
62
- image_input.save(temp_image_path, 'JPEG', quality=95)
63
-
64
- # Verify image was saved
65
- if not os.path.exists(temp_image_path):
66
- return "Error: Failed to save image for processing."
67
-
68
- # Set parameters based on preset
69
- presets = {
70
- "tiny": {"base_size": 512, "image_size": 512, "crop_mode": False},
71
- "small": {"base_size": 640, "image_size": 640, "crop_mode": False},
72
- "base": {"base_size": 1024, "image_size": 1024, "crop_mode": False},
73
- "large": {"base_size": 1280, "image_size": 1280, "crop_mode": False},
74
- "gundam": {"base_size": 1024, "image_size": 640, "crop_mode": True},
75
- }
76
-
77
- config = presets[preset]
78
-
79
- # Set prompt based on task type
80
- if task_type == "markdown":
81
- prompt = "<image>\n<|grounding|>Convert the document to markdown. "
82
  else:
83
- prompt = "<image>\nFree OCR. "
84
-
85
- # Run inference - the model returns the text directly
86
- result = model.infer(
87
- tokenizer,
88
- prompt=prompt,
89
- image_file=temp_image_path,
90
- output_path=temp_dir,
91
- base_size=config["base_size"],
92
- image_size=config["image_size"],
93
- crop_mode=config["crop_mode"],
94
- save_results=False,
95
- test_compress=False,
96
- )
97
-
98
- # Move model back to CPU to free GPU memory
99
- model.to("cpu")
100
- torch.cuda.empty_cache()
101
-
102
- # Process the result
103
- if result is None:
104
- return "No text could be extracted. The image might be too blurry or contain no readable text."
105
-
106
- # Handle different result types
107
- if isinstance(result, str):
108
- output_text = result.strip()
109
- elif isinstance(result, (list, tuple)) and len(result) > 0:
110
- output_text = str(result[0]).strip()
111
- elif isinstance(result, dict):
112
- # Try to get text from common keys
113
- output_text = result.get('text', result.get('output', result.get('result', str(result))))
114
  else:
115
- output_text = str(result).strip()
116
 
117
- if not output_text or output_text == "None":
118
- return "No text detected. Try adjusting the preset or uploading a clearer image."
 
 
 
 
 
 
119
 
120
- return output_text
121
 
122
- except Exception as e:
123
- # Ensure model is moved back to CPU on error
124
- model.to("cpu")
125
- torch.cuda.empty_cache()
126
- return f"Error processing image: {str(e)}\n\nPlease try a different preset or check if the image is valid."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
127
 
128
 
129
  # Create Gradio interface
 
41
  if image_input is None:
42
  return "Please upload an image first."
43
 
44
+ # Move model to GPU and set dtype
45
+ model.cuda().to(torch.bfloat16)
46
+
47
+ # Create temp directory for this session
48
+ with tempfile.TemporaryDirectory() as temp_dir:
49
+ # Save image with proper format
50
+ temp_image_path = os.path.join(temp_dir, "input_image.jpg")
51
+ # Convert RGBA to RGB if necessary
52
+ if image_input.mode in ('RGBA', 'LA', 'P'):
53
+ rgb_image = Image.new('RGB', image_input.size, (255, 255, 255))
54
+ # Handle different image modes
55
+ if image_input.mode == 'RGBA':
56
+ rgb_image.paste(image_input, mask=image_input.split()[3])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
  else:
58
+ rgb_image.paste(image_input)
59
+ rgb_image.save(temp_image_path, 'JPEG', quality=95)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
  else:
61
+ image_input.save(temp_image_path, 'JPEG', quality=95)
62
 
63
+ # Set parameters based on preset
64
+ presets = {
65
+ "tiny": {"base_size": 512, "image_size": 512, "crop_mode": False},
66
+ "small": {"base_size": 640, "image_size": 640, "crop_mode": False},
67
+ "base": {"base_size": 1024, "image_size": 1024, "crop_mode": False},
68
+ "large": {"base_size": 1280, "image_size": 1280, "crop_mode": False},
69
+ "gundam": {"base_size": 1024, "image_size": 640, "crop_mode": True},
70
+ }
71
 
72
+ config = presets[preset]
73
 
74
+ # Set prompt based on task type
75
+ if task_type == "markdown":
76
+ prompt = "<image>\n<|grounding|>Convert the document to markdown. "
77
+ else:
78
+ prompt = "<image>\nFree OCR. "
79
+
80
+ # Run inference - return the result directly
81
+ result = model.infer(
82
+ tokenizer,
83
+ prompt=prompt,
84
+ image_file=temp_image_path,
85
+ output_path=temp_dir,
86
+ base_size=config["base_size"],
87
+ image_size=config["image_size"],
88
+ crop_mode=config["crop_mode"],
89
+ save_results=False,
90
+ test_compress=False,
91
+ )
92
+
93
+ # Move model back to CPU to free GPU memory
94
+ model.to("cpu")
95
+ torch.cuda.empty_cache()
96
+
97
+ # Return the result directly - the model returns the extracted text
98
+ return result
99
 
100
 
101
  # Create Gradio interface