Spaces:
Running
Running
Commit
·
e223e6f
1
Parent(s):
1cc9cdc
Major update. Support for 15 LLMs, World Flora Online taxonomy validation, geolocation, 2 OCR methods, significant UI changes, stability improvements, consistent JSON parsing
Browse files
app.py
CHANGED
|
@@ -249,6 +249,126 @@ def load_gallery(converted_files, uploaded_file):
|
|
| 249 |
file_path_small = save_uploaded_file(st.session_state['dir_uploaded_images_small'], uploaded_file, img)
|
| 250 |
st.session_state['input_list_small'].append(file_path_small)
|
| 251 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 252 |
def content_input_images(col_left, col_right):
|
| 253 |
st.write('---')
|
| 254 |
# col1, col2 = st.columns([2,8])
|
|
@@ -272,120 +392,10 @@ def content_input_images(col_left, col_right):
|
|
| 272 |
|
| 273 |
with col_right:
|
| 274 |
if st.session_state.is_hf:
|
| 275 |
-
|
| 276 |
-
# Clear input image gallery and input list
|
| 277 |
-
clear_image_uploads()
|
| 278 |
-
|
| 279 |
-
for uploaded_file in uploaded_files:
|
| 280 |
-
# Determine the file type
|
| 281 |
-
if uploaded_file.name.lower().endswith('.pdf'):
|
| 282 |
-
# Handle PDF files
|
| 283 |
-
file_path = save_uploaded_file(st.session_state['dir_uploaded_images'], uploaded_file)
|
| 284 |
-
# Convert each page of the PDF to an image
|
| 285 |
-
n_pages = convert_pdf_to_jpg(file_path, st.session_state['dir_uploaded_images'], dpi=200)#st.session_state.config['leafmachine']['project']['dir_images_local'])
|
| 286 |
-
# Update the input list for each page image
|
| 287 |
-
converted_files = os.listdir(st.session_state['dir_uploaded_images'])
|
| 288 |
-
for file_name in converted_files:
|
| 289 |
-
if file_name.split('.')[1].lower() in ['jpg','jpeg']:
|
| 290 |
-
jpg_file_path = os.path.join(st.session_state['dir_uploaded_images'], file_name)
|
| 291 |
-
st.session_state['input_list'].append(jpg_file_path)
|
| 292 |
-
|
| 293 |
-
# Optionally, create a thumbnail for the gallery
|
| 294 |
-
img = Image.open(jpg_file_path)
|
| 295 |
-
img.thumbnail((GALLERY_IMAGE_SIZE, GALLERY_IMAGE_SIZE), Image.Resampling.LANCZOS)
|
| 296 |
-
try:
|
| 297 |
-
file_path_small = save_uploaded_file(st.session_state['dir_uploaded_images_small'], file_name, img)
|
| 298 |
-
except:
|
| 299 |
-
file_path_small = save_uploaded_file_local(st.session_state['dir_uploaded_images_small'],st.session_state['dir_uploaded_images_small'], file_name, img)
|
| 300 |
-
st.session_state['input_list_small'].append(file_path_small)
|
| 301 |
-
|
| 302 |
-
else:
|
| 303 |
-
# Handle JPG/JPEG files (existing process)
|
| 304 |
-
file_path = save_uploaded_file(st.session_state['dir_uploaded_images'], uploaded_file)
|
| 305 |
-
st.session_state['input_list'].append(file_path)
|
| 306 |
-
img = Image.open(file_path)
|
| 307 |
-
img.thumbnail((GALLERY_IMAGE_SIZE, GALLERY_IMAGE_SIZE), Image.Resampling.LANCZOS)
|
| 308 |
-
file_path_small = save_uploaded_file(st.session_state['dir_uploaded_images_small'], uploaded_file, img)
|
| 309 |
-
st.session_state['input_list_small'].append(file_path_small)
|
| 310 |
-
|
| 311 |
-
# After processing all files
|
| 312 |
-
st.info(f"Processing images from {st.session_state.config['leafmachine']['project']['dir_images_local']}")
|
| 313 |
-
|
| 314 |
-
if st.session_state['input_list_small']:
|
| 315 |
-
if len(st.session_state['input_list_small']) > MAX_GALLERY_IMAGES:
|
| 316 |
-
# Only take the first 100 images from the list
|
| 317 |
-
images_to_display = st.session_state['input_list_small'][:MAX_GALLERY_IMAGES]
|
| 318 |
-
else:
|
| 319 |
-
# If there are less than 100 images, take them all
|
| 320 |
-
images_to_display = st.session_state['input_list_small']
|
| 321 |
-
show_gallery_small_hf(images_to_display)
|
| 322 |
|
| 323 |
else:
|
| 324 |
-
|
| 325 |
-
|
| 326 |
-
if st.session_state['view_local_gallery'] and st.session_state['input_list_small'] and (st.session_state['dir_images_local_TEMP'] == st.session_state.config['leafmachine']['project']['dir_images_local']):
|
| 327 |
-
if MAX_GALLERY_IMAGES <= st.session_state['processing_add_on']:
|
| 328 |
-
info_txt = f"Showing {MAX_GALLERY_IMAGES} out of {st.session_state['processing_add_on']} images"
|
| 329 |
-
else:
|
| 330 |
-
info_txt = f"Showing {st.session_state['processing_add_on']} out of {st.session_state['processing_add_on']} images"
|
| 331 |
-
st.info(info_txt)
|
| 332 |
-
try:
|
| 333 |
-
show_gallery_small()
|
| 334 |
-
except:
|
| 335 |
-
pass
|
| 336 |
-
|
| 337 |
-
elif not st.session_state['view_local_gallery'] and st.session_state['input_list_small'] and (st.session_state['dir_images_local_TEMP'] == st.session_state.config['leafmachine']['project']['dir_images_local']):
|
| 338 |
-
pass
|
| 339 |
-
elif not st.session_state['view_local_gallery'] and not st.session_state['input_list_small'] and (st.session_state['dir_images_local_TEMP'] == st.session_state.config['leafmachine']['project']['dir_images_local']):
|
| 340 |
-
pass
|
| 341 |
-
# elif st.session_state['input_list_small'] and (st.session_state['dir_images_local_TEMP'] != st.session_state.config['leafmachine']['project']['dir_images_local']):
|
| 342 |
-
elif (st.session_state['dir_images_local_TEMP'] != st.session_state.config['leafmachine']['project']['dir_images_local']):
|
| 343 |
-
has_pdf = False
|
| 344 |
-
clear_image_uploads()
|
| 345 |
-
|
| 346 |
-
for input_file in os.listdir(st.session_state.config['leafmachine']['project']['dir_images_local']):
|
| 347 |
-
if input_file.split('.')[1].lower() in ['jpg','jpeg']:
|
| 348 |
-
pass
|
| 349 |
-
elif input_file.split('.')[1].lower() in ['pdf',]:
|
| 350 |
-
has_pdf = True
|
| 351 |
-
# Handle PDF files
|
| 352 |
-
file_path = save_uploaded_file_local(st.session_state.config['leafmachine']['project']['dir_images_local'], st.session_state['dir_uploaded_images'], input_file)
|
| 353 |
-
# Convert each page of the PDF to an image
|
| 354 |
-
n_pages = convert_pdf_to_jpg(file_path, st.session_state['dir_uploaded_images'], dpi=200)#st.session_state.config['leafmachine']['project']['dir_images_local'])
|
| 355 |
-
|
| 356 |
-
|
| 357 |
-
# pdf_files_pattern = os.path.join(st.session_state['dir_uploaded_images'], '*.pdf')
|
| 358 |
-
# for pdf_file in glob.glob(pdf_files_pattern):
|
| 359 |
-
# os.remove(pdf_file)
|
| 360 |
-
|
| 361 |
-
# # Update the input list for each page image
|
| 362 |
-
# converted_files = os.listdir(st.session_state['dir_uploaded_images'])
|
| 363 |
-
# for file_name in converted_files:
|
| 364 |
-
# if file_name.lower().endswith('.jpg'):
|
| 365 |
-
# jpg_file_path = os.path.join(st.session_state['dir_uploaded_images'], file_name)
|
| 366 |
-
# st.session_state['input_list'].append(jpg_file_path)
|
| 367 |
-
|
| 368 |
-
# # Optionally, create a thumbnail for the gallery
|
| 369 |
-
# img = Image.open(jpg_file_path)
|
| 370 |
-
# img.thumbnail((GALLERY_IMAGE_SIZE, GALLERY_IMAGE_SIZE), Image.Resampling.LANCZOS)
|
| 371 |
-
# file_path_small = save_uploaded_file_local(st.session_state['dir_uploaded_images'], st.session_state['dir_uploaded_images_small'], file_name, img)
|
| 372 |
-
# st.session_state['input_list_small'].append(file_path_small)
|
| 373 |
-
|
| 374 |
-
# st.session_state.config['leafmachine']['project']['dir_images_local'] = st.session_state['dir_uploaded_images']
|
| 375 |
-
|
| 376 |
-
else:
|
| 377 |
-
pass
|
| 378 |
-
# st.warning("Inputs must be '.PDF' or '.jpg' or '.jpeg'")
|
| 379 |
-
if has_pdf:
|
| 380 |
-
st.session_state.config['leafmachine']['project']['dir_images_local'] = st.session_state['dir_uploaded_images']
|
| 381 |
-
|
| 382 |
-
dir_images_local = st.session_state.config['leafmachine']['project']['dir_images_local']
|
| 383 |
-
count_n_imgs = list_jpg_files(dir_images_local)
|
| 384 |
-
st.session_state['processing_add_on'] = count_n_imgs
|
| 385 |
-
# print(st.session_state['processing_add_on'])
|
| 386 |
-
st.session_state['dir_images_local_TEMP'] = st.session_state.config['leafmachine']['project']['dir_images_local']
|
| 387 |
-
print("rerun")
|
| 388 |
-
st.rerun()
|
| 389 |
|
| 390 |
def list_jpg_files(directory_path):
|
| 391 |
jpg_count = 0
|
|
|
|
| 249 |
file_path_small = save_uploaded_file(st.session_state['dir_uploaded_images_small'], uploaded_file, img)
|
| 250 |
st.session_state['input_list_small'].append(file_path_small)
|
| 251 |
|
| 252 |
+
@st.cache_data
|
| 253 |
+
def handle_image_upload_and_gallery_hf(uploaded_files):
|
| 254 |
+
if uploaded_files:
|
| 255 |
+
# Clear input image gallery and input list
|
| 256 |
+
clear_image_uploads()
|
| 257 |
+
|
| 258 |
+
for uploaded_file in uploaded_files:
|
| 259 |
+
# Determine the file type
|
| 260 |
+
if uploaded_file.name.lower().endswith('.pdf'):
|
| 261 |
+
# Handle PDF files
|
| 262 |
+
file_path = save_uploaded_file(st.session_state['dir_uploaded_images'], uploaded_file)
|
| 263 |
+
# Convert each page of the PDF to an image
|
| 264 |
+
n_pages = convert_pdf_to_jpg(file_path, st.session_state['dir_uploaded_images'], dpi=200)#st.session_state.config['leafmachine']['project']['dir_images_local'])
|
| 265 |
+
# Update the input list for each page image
|
| 266 |
+
converted_files = os.listdir(st.session_state['dir_uploaded_images'])
|
| 267 |
+
for file_name in converted_files:
|
| 268 |
+
if file_name.split('.')[1].lower() in ['jpg','jpeg']:
|
| 269 |
+
jpg_file_path = os.path.join(st.session_state['dir_uploaded_images'], file_name)
|
| 270 |
+
st.session_state['input_list'].append(jpg_file_path)
|
| 271 |
+
|
| 272 |
+
# Optionally, create a thumbnail for the gallery
|
| 273 |
+
img = Image.open(jpg_file_path)
|
| 274 |
+
img.thumbnail((GALLERY_IMAGE_SIZE, GALLERY_IMAGE_SIZE), Image.Resampling.LANCZOS)
|
| 275 |
+
try:
|
| 276 |
+
file_path_small = save_uploaded_file(st.session_state['dir_uploaded_images_small'], file_name, img)
|
| 277 |
+
except:
|
| 278 |
+
file_path_small = save_uploaded_file_local(st.session_state['dir_uploaded_images_small'],st.session_state['dir_uploaded_images_small'], file_name, img)
|
| 279 |
+
st.session_state['input_list_small'].append(file_path_small)
|
| 280 |
+
|
| 281 |
+
else:
|
| 282 |
+
# Handle JPG/JPEG files (existing process)
|
| 283 |
+
file_path = save_uploaded_file(st.session_state['dir_uploaded_images'], uploaded_file)
|
| 284 |
+
st.session_state['input_list'].append(file_path)
|
| 285 |
+
img = Image.open(file_path)
|
| 286 |
+
img.thumbnail((GALLERY_IMAGE_SIZE, GALLERY_IMAGE_SIZE), Image.Resampling.LANCZOS)
|
| 287 |
+
file_path_small = save_uploaded_file(st.session_state['dir_uploaded_images_small'], uploaded_file, img)
|
| 288 |
+
st.session_state['input_list_small'].append(file_path_small)
|
| 289 |
+
|
| 290 |
+
# After processing all files
|
| 291 |
+
st.info(f"Processing images from {st.session_state.config['leafmachine']['project']['dir_images_local']}")
|
| 292 |
+
|
| 293 |
+
if st.session_state['input_list_small']:
|
| 294 |
+
if len(st.session_state['input_list_small']) > MAX_GALLERY_IMAGES:
|
| 295 |
+
# Only take the first 100 images from the list
|
| 296 |
+
images_to_display = st.session_state['input_list_small'][:MAX_GALLERY_IMAGES]
|
| 297 |
+
else:
|
| 298 |
+
# If there are less than 100 images, take them all
|
| 299 |
+
images_to_display = st.session_state['input_list_small']
|
| 300 |
+
show_gallery_small_hf(images_to_display)
|
| 301 |
+
|
| 302 |
+
|
| 303 |
+
@st.cache_data
|
| 304 |
+
def handle_image_upload_and_gallery():
|
| 305 |
+
st.session_state['view_local_gallery'] = st.toggle("View Image Gallery",)
|
| 306 |
+
|
| 307 |
+
if st.session_state['view_local_gallery'] and st.session_state['input_list_small'] and (st.session_state['dir_images_local_TEMP'] == st.session_state.config['leafmachine']['project']['dir_images_local']):
|
| 308 |
+
if MAX_GALLERY_IMAGES <= st.session_state['processing_add_on']:
|
| 309 |
+
info_txt = f"Showing {MAX_GALLERY_IMAGES} out of {st.session_state['processing_add_on']} images"
|
| 310 |
+
else:
|
| 311 |
+
info_txt = f"Showing {st.session_state['processing_add_on']} out of {st.session_state['processing_add_on']} images"
|
| 312 |
+
st.info(info_txt)
|
| 313 |
+
try:
|
| 314 |
+
show_gallery_small()
|
| 315 |
+
except:
|
| 316 |
+
pass
|
| 317 |
+
|
| 318 |
+
elif not st.session_state['view_local_gallery'] and st.session_state['input_list_small'] and (st.session_state['dir_images_local_TEMP'] == st.session_state.config['leafmachine']['project']['dir_images_local']):
|
| 319 |
+
pass
|
| 320 |
+
elif not st.session_state['view_local_gallery'] and not st.session_state['input_list_small'] and (st.session_state['dir_images_local_TEMP'] == st.session_state.config['leafmachine']['project']['dir_images_local']):
|
| 321 |
+
pass
|
| 322 |
+
# elif st.session_state['input_list_small'] and (st.session_state['dir_images_local_TEMP'] != st.session_state.config['leafmachine']['project']['dir_images_local']):
|
| 323 |
+
elif (st.session_state['dir_images_local_TEMP'] != st.session_state.config['leafmachine']['project']['dir_images_local']):
|
| 324 |
+
has_pdf = False
|
| 325 |
+
clear_image_uploads()
|
| 326 |
+
|
| 327 |
+
for input_file in os.listdir(st.session_state.config['leafmachine']['project']['dir_images_local']):
|
| 328 |
+
if input_file.split('.')[1].lower() in ['jpg','jpeg']:
|
| 329 |
+
pass
|
| 330 |
+
elif input_file.split('.')[1].lower() in ['pdf',]:
|
| 331 |
+
has_pdf = True
|
| 332 |
+
# Handle PDF files
|
| 333 |
+
file_path = save_uploaded_file_local(st.session_state.config['leafmachine']['project']['dir_images_local'], st.session_state['dir_uploaded_images'], input_file)
|
| 334 |
+
# Convert each page of the PDF to an image
|
| 335 |
+
n_pages = convert_pdf_to_jpg(file_path, st.session_state['dir_uploaded_images'], dpi=200)#st.session_state.config['leafmachine']['project']['dir_images_local'])
|
| 336 |
+
|
| 337 |
+
|
| 338 |
+
# pdf_files_pattern = os.path.join(st.session_state['dir_uploaded_images'], '*.pdf')
|
| 339 |
+
# for pdf_file in glob.glob(pdf_files_pattern):
|
| 340 |
+
# os.remove(pdf_file)
|
| 341 |
+
|
| 342 |
+
# # Update the input list for each page image
|
| 343 |
+
# converted_files = os.listdir(st.session_state['dir_uploaded_images'])
|
| 344 |
+
# for file_name in converted_files:
|
| 345 |
+
# if file_name.lower().endswith('.jpg'):
|
| 346 |
+
# jpg_file_path = os.path.join(st.session_state['dir_uploaded_images'], file_name)
|
| 347 |
+
# st.session_state['input_list'].append(jpg_file_path)
|
| 348 |
+
|
| 349 |
+
# # Optionally, create a thumbnail for the gallery
|
| 350 |
+
# img = Image.open(jpg_file_path)
|
| 351 |
+
# img.thumbnail((GALLERY_IMAGE_SIZE, GALLERY_IMAGE_SIZE), Image.Resampling.LANCZOS)
|
| 352 |
+
# file_path_small = save_uploaded_file_local(st.session_state['dir_uploaded_images'], st.session_state['dir_uploaded_images_small'], file_name, img)
|
| 353 |
+
# st.session_state['input_list_small'].append(file_path_small)
|
| 354 |
+
|
| 355 |
+
# st.session_state.config['leafmachine']['project']['dir_images_local'] = st.session_state['dir_uploaded_images']
|
| 356 |
+
|
| 357 |
+
else:
|
| 358 |
+
pass
|
| 359 |
+
# st.warning("Inputs must be '.PDF' or '.jpg' or '.jpeg'")
|
| 360 |
+
if has_pdf:
|
| 361 |
+
st.session_state.config['leafmachine']['project']['dir_images_local'] = st.session_state['dir_uploaded_images']
|
| 362 |
+
|
| 363 |
+
dir_images_local = st.session_state.config['leafmachine']['project']['dir_images_local']
|
| 364 |
+
count_n_imgs = list_jpg_files(dir_images_local)
|
| 365 |
+
st.session_state['processing_add_on'] = count_n_imgs
|
| 366 |
+
# print(st.session_state['processing_add_on'])
|
| 367 |
+
st.session_state['dir_images_local_TEMP'] = st.session_state.config['leafmachine']['project']['dir_images_local']
|
| 368 |
+
print("rerun")
|
| 369 |
+
st.rerun()
|
| 370 |
+
|
| 371 |
+
|
| 372 |
def content_input_images(col_left, col_right):
|
| 373 |
st.write('---')
|
| 374 |
# col1, col2 = st.columns([2,8])
|
|
|
|
| 392 |
|
| 393 |
with col_right:
|
| 394 |
if st.session_state.is_hf:
|
| 395 |
+
handle_image_upload_and_gallery_hf(uploaded_files)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 396 |
|
| 397 |
else:
|
| 398 |
+
handle_image_upload_and_gallery()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 399 |
|
| 400 |
def list_jpg_files(directory_path):
|
| 401 |
jpg_count = 0
|