Spaces:

orasul
/

deki

Running

App Files Files Community

deki / wrapper.py

orasul

Load initial app

6ff22d6 5 months ago

raw

history blame contribute delete

5.68 kB

	import os
	import sys
	import argparse
	from ultralytics import YOLO
	from os.path import basename, splitext
	import time

	from yolo_script import process_yolo

	from script import process_image

	def process_image_description(
	input_image: str,
	weights_file: str,
	output_dir: str,
	model_to_use: str = 'llama',
	save_images: bool = False,
	icon_detection_path: str = None,
	cache_directory: str = './models_cache',
	huggingface_token: str = 'your_token',
	no_captioning: bool = False,
	output_json: bool = False,
	json_mini: bool = False,
	model_obj: YOLO = None,
	sr=None,
	reader=None,
	spell=None,
	skip_ocr=False,
	skip_spell=False,
	) -> None:
	"""
	Processes an image by running YOLO detection (via the imported process_yolo function)
	and then calling process_image() from script.py to do the image description work.

	Parameters:
	- input_image: Path to the input image.
	- weights_file: Path to the YOLO weights file.
	- output_dir: Directory for YOLO output
	- model_to_use: Which model to use for captioning ('llama' or 'blip').
	- save_images: Whether to save intermediate images.
	- icon_detection_path: Optional path to an icon detection model.
	- cache_directory: Cache directory for models.
	- huggingface_token: Hugging Face token for model downloads.
	- no_captioning: If True, disable image captioning.
	- output_json: If True, output the results in JSON format.
	- json_mini: same as output_json but has more compact json output.
	- model_obj: YOLO object that was initialized at a startup time (optional)
	- sr: Super resolution object (optional)
	- reader: EasyOCR object (optional)
	- spell: Spell checker object (optional)
	"""

	base_name = splitext(basename(input_image))[0]

	process_yolo(input_image, weights_file, output_dir, model_obj=model_obj)

	labels_dir = os.path.join(output_dir, 'labels')
	label_file = os.path.join(labels_dir, base_name + '.txt')

	if not os.path.isfile(label_file):
	raise FileNotFoundError(f"Labels file not found at expected path: {label_file}")

	process_image(
	input_image_path=input_image,
	yolo_output_path=label_file,
	output_dir=output_dir,
	model_to_use=model_to_use,
	save_images=save_images,
	icon_model_path=icon_detection_path,
	cache_directory=cache_directory,
	huggingface_token=huggingface_token,
	no_captioning=no_captioning,
	output_json=output_json,
	json_mini=json_mini,
	sr=sr,
	reader=reader,
	spell=spell,
	skip_ocr=skip_ocr,
	skip_spell=skip_spell,
	)

	if __name__ == '__main__':
	parser = argparse.ArgumentParser(
	description='Wrapper script to run YOLO detection and image description in sequence.'
	)
	parser.add_argument('--input_image', required=True, help='Path to the input image.')
	parser.add_argument('--weights_file', required=True, help='Path to the YOLO weights file.')
	parser.add_argument('--output_dir', default='./output', help='Output directory for YOLO results.')
	parser.add_argument('--model_to_use', choices=['llama', 'blip'], default='llama',
	help='Model for captioning.')
	parser.add_argument('--save_images', action='store_true',
	help='Flag to save intermediate images.')
	parser.add_argument('--icon_detection_path', help='Path to the icon detection model.')
	parser.add_argument('--cache_directory', default='./models_cache',
	help='Cache directory for models.')
	parser.add_argument('--huggingface_token', default='your_token',
	help='Hugging Face token for model downloads.')
	parser.add_argument('--no-captioning', action='store_true',
	help='Disable any image captioning')
	parser.add_argument('--json', dest='output_json', action='store_true',
	help='Output the image data in JSON format')
	parser.add_argument('--json-mini', action='store_true',
	help='JSON output in a more condensed format')
	parser.add_argument('--skip-ocr', action='store_true',
	help='Disable OCR & spell-checking (faster).')
	parser.add_argument('--skip-spell', action='store_true', help='Run OCR but skip spell-check')

	args = parser.parse_args()

	try:
	print("Running YOLO detection...")
	yolo_output_dir = args.output_dir
	os.makedirs(yolo_output_dir, exist_ok=True)
	process_yolo(args.input_image, args.weights_file, yolo_output_dir)

	base_name = splitext(basename(args.input_image))[0]
	labels_dir = os.path.join(yolo_output_dir, 'labels')
	label_file = os.path.join(labels_dir, base_name + '.txt')
	if not os.path.isfile(label_file):
	raise FileNotFoundError(f"Labels file not found: {label_file}")

	print("Running image description...")
	process_image(
	input_image_path=args.input_image,
	yolo_output_path=label_file,
	model_to_use=args.model_to_use,
	save_images=args.save_images,
	icon_model_path=args.icon_detection_path,
	cache_directory=args.cache_directory,
	huggingface_token=args.huggingface_token,
	no_captioning=args.no_captioning,
	output_json=args.output_json,
	json_mini=args.json_mini,
	skip_ocr=args.skip_ocr,
	skip_spell=args.skip_spell
	)
	except Exception as e:
	print(e)
	sys.exit(1)