Spaces:

minhan6559
/

Log-Analysis-MultiAgent

Running

App Files Files Community

Log-Analysis-MultiAgent / src /scripts /execute_pipeline_all_datasets.py

minhan6559

Upload 126 files

223ef32 verified 14 days ago

raw

history blame

4.28 kB

	#!/usr/bin/env python3
	"""
	Execute the complete 3-agent pipeline on all JSON files in mordor_dataset.

	This runs:
	1. Log Analysis Agent
	2. Retrieval Supervisor (with Database Agent and Grader)
	3. Response Agent

	Outputs are saved to final_response/ folder.

	Usage: python execute_pipeline.py [--model MODEL_NAME]
	"""
	import subprocess
	from pathlib import Path
	import sys
	import argparse


	def find_project_root(start: Path) -> Path:
	"""Find the project root by looking for common markers."""
	for p in [start] + list(start.parents):
	if (p / 'mordor_dataset').exists() or (p / 'src').exists() or (p / '.git').exists():
	return p
	return start.parent


	def main():
	"""Execute pipeline on all mordor dataset files"""
	parser = argparse.ArgumentParser(
	description="Execute pipeline on all mordor dataset files",
	formatter_class=argparse.RawDescriptionHelpFormatter,
	epilog="""
	Examples:
	# Run with default model (Gemini 2.0 Flash)
	python execute_pipeline.py

	# Run with specific model
	python execute_pipeline.py --model google_genai:gemini-2.0-flash
	python execute_pipeline.py --model groq:gpt-oss-120b
	python execute_pipeline.py --model groq:llama-3.1-8b-instant

	Available models:
	- google_genai:gemini-2.0-flash (default)
	- google_genai:gemini-1.5-flash
	- groq:gpt-oss-120b
	- groq:gpt-oss-20b
	- groq:llama-3.1-8b-instant
	- groq:llama-3.3-70b-versatile
	"""
	)
	parser.add_argument(
	"--model",
	default="google_genai:gemini-2.0-flash",
	help="Model to use for analysis (default: google_genai:gemini-2.0-flash)"
	)

	args = parser.parse_args()
	model_name = args.model

	current_file = Path(__file__).resolve()
	project_root = find_project_root(current_file.parent)
	mordor_dir = project_root / 'mordor_dataset'

	if not mordor_dir.exists():
	print(f"[ERROR] mordor_dataset not found at {mordor_dir}")
	sys.exit(1)

	# Find all JSON files
	files = sorted([p for p in mordor_dir.rglob('*.json')])
	if not files:
	print("[ERROR] No JSON files found in mordor_dataset")
	sys.exit(1)

	print("="*80)
	print("EXECUTING FULL PIPELINE ON ALL MORDOR FILES")
	print("="*80)
	print(f"Model: {model_name}")
	print(f"Found {len(files)} files to process\n")

	# Group files by folder
	files_by_folder = {}
	for f in files:
	folder_name = f.parent.name
	if folder_name not in files_by_folder:
	files_by_folder[folder_name] = []
	files_by_folder[folder_name].append(f)

	# Process files
	total_success = 0
	total_failed = 0

	for folder_name in sorted(files_by_folder.keys()):
	folder_files = files_by_folder[folder_name]
	print(f"\n{'='*80}")
	print(f"Processing folder: {folder_name} ({len(folder_files)} files)")
	print(f"{'='*80}")

	for f in folder_files:
	# Assume pipeline script is at src/scripts/run_simple_pipeline.py
	pipeline_script = project_root / 'src' / 'scripts' / 'run_simple_pipeline.py'

	if not pipeline_script.exists():
	print(f"[ERROR] Pipeline script not found: {pipeline_script}")
	sys.exit(1)

	cmd = [sys.executable, str(pipeline_script), str(f), "--model", model_name]
	print(f'\n--- Processing: {f.relative_to(mordor_dir)}')
	print(f' Model: {model_name}')

	try:
	subprocess.run(cmd, check=True)
	total_success += 1
	except subprocess.CalledProcessError as e:
	print(f"[ERROR] Pipeline failed for {f.name}: {e}")
	total_failed += 1

	# Summary
	print('\n' + '='*80)
	print('PIPELINE EXECUTION COMPLETE')
	print('='*80)
	print(f"Model used: {model_name}")
	print(f"Total files processed: {len(files)}")
	print(f"Successful: {total_success}")
	print(f"Failed: {total_failed}")
	print(f"Results saved to: {project_root / 'final_response'}/")
	print('='*80 + '\n')


	if __name__ == "__main__":
	main()