akhaliq HF Staff commited on
Commit
b245a85
·
verified ·
1 Parent(s): d44e05d

Update Gradio app with multiple files

Browse files
Files changed (1) hide show
  1. app.py +65 -67
app.py CHANGED
@@ -5,8 +5,9 @@ from PIL import Image
5
  import os
6
  import spaces
7
  import tempfile
8
- import json
9
- from pathlib import Path
 
10
 
11
  # Set CUDA device
12
  os.environ["CUDA_VISIBLE_DEVICES"] = '0'
@@ -23,6 +24,17 @@ model = AutoModel.from_pretrained(
23
  model = model.eval()
24
 
25
 
 
 
 
 
 
 
 
 
 
 
 
26
  @spaces.GPU(duration=120)
27
  def ocr_process(
28
  image_input: Image.Image,
@@ -79,81 +91,67 @@ def ocr_process(
79
  else:
80
  prompt = "<image>\nFree OCR. "
81
 
82
- # Run inference with save_results=True to save output
83
- result = model.infer(
84
- tokenizer,
85
- prompt=prompt,
86
- image_file=temp_image_path,
87
- output_path=temp_dir,
88
- base_size=config["base_size"],
89
- image_size=config["image_size"],
90
- crop_mode=config["crop_mode"],
91
- save_results=True,
92
- test_compress=True,
93
- )
 
 
 
94
 
95
- # Try to read the saved results
96
  extracted_text = ""
97
 
98
- # Check for saved JSON results
99
- json_path = Path(temp_dir) / "input_image_outputs.json"
100
- if json_path.exists():
101
- try:
102
- with open(json_path, 'r', encoding='utf-8') as f:
103
- data = json.load(f)
104
- # Extract text from the JSON structure
105
- if isinstance(data, dict):
106
- if 'text' in data:
107
- extracted_text = data['text']
108
- elif 'output' in data:
109
- extracted_text = data['output']
110
- elif 'result' in data:
111
- extracted_text = data['result']
112
- else:
113
- # If the structure is different, try to get the first string value
114
- for key, value in data.items():
115
- if isinstance(value, str) and len(value) > 10:
116
- extracted_text = value
117
- break
118
- elif isinstance(data, list) and len(data) > 0:
119
- extracted_text = str(data[0])
120
- else:
121
- extracted_text = str(data)
122
- except Exception as e:
123
- print(f"Error reading JSON: {e}")
124
 
125
- # If no JSON, check for text file
126
- if not extracted_text:
127
- txt_path = Path(temp_dir) / "input_image_outputs.txt"
128
- if txt_path.exists():
129
- try:
130
- with open(txt_path, 'r', encoding='utf-8') as f:
131
- extracted_text = f.read()
132
- except Exception as e:
133
- print(f"Error reading text file: {e}")
 
 
 
 
134
 
135
- # If still no text, check for any output files
136
- if not extracted_text:
137
- output_files = list(Path(temp_dir).glob("*output*"))
138
- for file_path in output_files:
139
- if file_path.suffix in ['.txt', '.json', '.md']:
140
- try:
141
- with open(file_path, 'r', encoding='utf-8') as f:
142
- content = f.read()
143
- if content.strip():
144
- extracted_text = content
145
- break
146
- except Exception as e:
147
- print(f"Error reading {file_path}: {e}")
148
 
149
- # If we still don't have text but result is not None, use result directly
150
  if not extracted_text and result is not None:
151
  if isinstance(result, str):
152
  extracted_text = result
153
  elif isinstance(result, (list, tuple)) and len(result) > 0:
154
- extracted_text = str(result[0])
155
- else:
156
- extracted_text = str(result)
 
 
 
 
 
 
 
 
 
 
 
157
 
158
  # Move model back to CPU to free GPU memory
159
  model.to("cpu")
 
5
  import os
6
  import spaces
7
  import tempfile
8
+ import sys
9
+ from io import StringIO
10
+ from contextlib import contextmanager
11
 
12
  # Set CUDA device
13
  os.environ["CUDA_VISIBLE_DEVICES"] = '0'
 
24
  model = model.eval()
25
 
26
 
27
+ @contextmanager
28
+ def capture_stdout():
29
+ """Capture stdout to get printed output from model"""
30
+ old_stdout = sys.stdout
31
+ sys.stdout = StringIO()
32
+ try:
33
+ yield sys.stdout
34
+ finally:
35
+ sys.stdout = old_stdout
36
+
37
+
38
  @spaces.GPU(duration=120)
39
  def ocr_process(
40
  image_input: Image.Image,
 
91
  else:
92
  prompt = "<image>\nFree OCR. "
93
 
94
+ # Capture stdout while running inference
95
+ captured_output = ""
96
+ with capture_stdout() as output:
97
+ result = model.infer(
98
+ tokenizer,
99
+ prompt=prompt,
100
+ image_file=temp_image_path,
101
+ output_path=temp_dir,
102
+ base_size=config["base_size"],
103
+ image_size=config["image_size"],
104
+ crop_mode=config["crop_mode"],
105
+ save_results=True,
106
+ test_compress=True,
107
+ )
108
+ captured_output = output.getvalue()
109
 
110
+ # Extract the text from captured output
111
  extracted_text = ""
112
 
113
+ # Look for the actual OCR result in the captured output
114
+ # The model prints the extracted text between certain markers
115
+ lines = captured_output.split('\n')
116
+ capture_text = False
117
+ text_lines = []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
118
 
119
+ for line in lines:
120
+ # Start capturing after seeing certain patterns
121
+ if "# " in line or line.strip().startswith("**"):
122
+ capture_text = True
123
+
124
+ if capture_text:
125
+ # Stop at the separator lines
126
+ if line.startswith("====") or line.startswith("---") and len(line) > 10:
127
+ if text_lines: # Only stop if we've captured something
128
+ break
129
+ # Add non-empty lines that aren't debug output
130
+ elif line.strip() and not line.startswith("image size:") and not line.startswith("valid image") and not line.startswith("output texts") and not line.startswith("compression"):
131
+ text_lines.append(line)
132
 
133
+ if text_lines:
134
+ extracted_text = '\n'.join(text_lines)
 
 
 
 
 
 
 
 
 
 
 
135
 
136
+ # If we didn't get text from stdout, check if result contains text
137
  if not extracted_text and result is not None:
138
  if isinstance(result, str):
139
  extracted_text = result
140
  elif isinstance(result, (list, tuple)) and len(result) > 0:
141
+ # Try to extract text from the result
142
+ if isinstance(result[0], str):
143
+ extracted_text = result[0]
144
+ elif hasattr(result[0], 'text'):
145
+ extracted_text = result[0].text
146
+
147
+ # Clean up any remaining markers from the text
148
+ if extracted_text:
149
+ # Remove any remaining debug output patterns
150
+ clean_lines = []
151
+ for line in extracted_text.split('\n'):
152
+ if not any(pattern in line.lower() for pattern in ['image size:', 'valid image', 'compression ratio', 'save results:', 'output texts']):
153
+ clean_lines.append(line)
154
+ extracted_text = '\n'.join(clean_lines).strip()
155
 
156
  # Move model back to CPU to free GPU memory
157
  model.to("cpu")