akhaliq HF Staff commited on
Commit
d44e05d
·
verified ·
1 Parent(s): 15cf5b6

Update Gradio app with multiple files

Browse files
Files changed (1) hide show
  1. app.py +68 -3
app.py CHANGED
@@ -5,6 +5,8 @@ from PIL import Image
5
  import os
6
  import spaces
7
  import tempfile
 
 
8
 
9
  # Set CUDA device
10
  os.environ["CUDA_VISIBLE_DEVICES"] = '0'
@@ -77,7 +79,7 @@ def ocr_process(
77
  else:
78
  prompt = "<image>\nFree OCR. "
79
 
80
- # Run inference - return the result directly
81
  result = model.infer(
82
  tokenizer,
83
  prompt=prompt,
@@ -89,13 +91,76 @@ def ocr_process(
89
  save_results=True,
90
  test_compress=True,
91
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92
 
93
  # Move model back to CPU to free GPU memory
94
  model.to("cpu")
95
  torch.cuda.empty_cache()
96
 
97
- # Return the result directly - the model returns the extracted text
98
- return result
99
 
100
 
101
  # Create Gradio interface
 
5
  import os
6
  import spaces
7
  import tempfile
8
+ import json
9
+ from pathlib import Path
10
 
11
  # Set CUDA device
12
  os.environ["CUDA_VISIBLE_DEVICES"] = '0'
 
79
  else:
80
  prompt = "<image>\nFree OCR. "
81
 
82
+ # Run inference with save_results=True to save output
83
  result = model.infer(
84
  tokenizer,
85
  prompt=prompt,
 
91
  save_results=True,
92
  test_compress=True,
93
  )
94
+
95
+ # Try to read the saved results
96
+ extracted_text = ""
97
+
98
+ # Check for saved JSON results
99
+ json_path = Path(temp_dir) / "input_image_outputs.json"
100
+ if json_path.exists():
101
+ try:
102
+ with open(json_path, 'r', encoding='utf-8') as f:
103
+ data = json.load(f)
104
+ # Extract text from the JSON structure
105
+ if isinstance(data, dict):
106
+ if 'text' in data:
107
+ extracted_text = data['text']
108
+ elif 'output' in data:
109
+ extracted_text = data['output']
110
+ elif 'result' in data:
111
+ extracted_text = data['result']
112
+ else:
113
+ # If the structure is different, try to get the first string value
114
+ for key, value in data.items():
115
+ if isinstance(value, str) and len(value) > 10:
116
+ extracted_text = value
117
+ break
118
+ elif isinstance(data, list) and len(data) > 0:
119
+ extracted_text = str(data[0])
120
+ else:
121
+ extracted_text = str(data)
122
+ except Exception as e:
123
+ print(f"Error reading JSON: {e}")
124
+
125
+ # If no JSON, check for text file
126
+ if not extracted_text:
127
+ txt_path = Path(temp_dir) / "input_image_outputs.txt"
128
+ if txt_path.exists():
129
+ try:
130
+ with open(txt_path, 'r', encoding='utf-8') as f:
131
+ extracted_text = f.read()
132
+ except Exception as e:
133
+ print(f"Error reading text file: {e}")
134
+
135
+ # If still no text, check for any output files
136
+ if not extracted_text:
137
+ output_files = list(Path(temp_dir).glob("*output*"))
138
+ for file_path in output_files:
139
+ if file_path.suffix in ['.txt', '.json', '.md']:
140
+ try:
141
+ with open(file_path, 'r', encoding='utf-8') as f:
142
+ content = f.read()
143
+ if content.strip():
144
+ extracted_text = content
145
+ break
146
+ except Exception as e:
147
+ print(f"Error reading {file_path}: {e}")
148
+
149
+ # If we still don't have text but result is not None, use result directly
150
+ if not extracted_text and result is not None:
151
+ if isinstance(result, str):
152
+ extracted_text = result
153
+ elif isinstance(result, (list, tuple)) and len(result) > 0:
154
+ extracted_text = str(result[0])
155
+ else:
156
+ extracted_text = str(result)
157
 
158
  # Move model back to CPU to free GPU memory
159
  model.to("cpu")
160
  torch.cuda.empty_cache()
161
 
162
+ # Return the extracted text
163
+ return extracted_text if extracted_text else "No text could be extracted from the image. Please try a different preset or check if the image contains readable text."
164
 
165
 
166
  # Create Gradio interface