Log-Analysis-MultiAgent / src /scripts /execute_pipeline_all_datasets.py
minhan6559's picture
Upload 126 files
223ef32 verified
raw
history blame
4.28 kB
#!/usr/bin/env python3
"""
Execute the complete 3-agent pipeline on all JSON files in mordor_dataset.
This runs:
1. Log Analysis Agent
2. Retrieval Supervisor (with Database Agent and Grader)
3. Response Agent
Outputs are saved to final_response/ folder.
Usage: python execute_pipeline.py [--model MODEL_NAME]
"""
import subprocess
from pathlib import Path
import sys
import argparse
def find_project_root(start: Path) -> Path:
"""Find the project root by looking for common markers."""
for p in [start] + list(start.parents):
if (p / 'mordor_dataset').exists() or (p / 'src').exists() or (p / '.git').exists():
return p
return start.parent
def main():
"""Execute pipeline on all mordor dataset files"""
parser = argparse.ArgumentParser(
description="Execute pipeline on all mordor dataset files",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
# Run with default model (Gemini 2.0 Flash)
python execute_pipeline.py
# Run with specific model
python execute_pipeline.py --model google_genai:gemini-2.0-flash
python execute_pipeline.py --model groq:gpt-oss-120b
python execute_pipeline.py --model groq:llama-3.1-8b-instant
Available models:
- google_genai:gemini-2.0-flash (default)
- google_genai:gemini-1.5-flash
- groq:gpt-oss-120b
- groq:gpt-oss-20b
- groq:llama-3.1-8b-instant
- groq:llama-3.3-70b-versatile
"""
)
parser.add_argument(
"--model",
default="google_genai:gemini-2.0-flash",
help="Model to use for analysis (default: google_genai:gemini-2.0-flash)"
)
args = parser.parse_args()
model_name = args.model
current_file = Path(__file__).resolve()
project_root = find_project_root(current_file.parent)
mordor_dir = project_root / 'mordor_dataset'
if not mordor_dir.exists():
print(f"[ERROR] mordor_dataset not found at {mordor_dir}")
sys.exit(1)
# Find all JSON files
files = sorted([p for p in mordor_dir.rglob('*.json')])
if not files:
print("[ERROR] No JSON files found in mordor_dataset")
sys.exit(1)
print("="*80)
print("EXECUTING FULL PIPELINE ON ALL MORDOR FILES")
print("="*80)
print(f"Model: {model_name}")
print(f"Found {len(files)} files to process\n")
# Group files by folder
files_by_folder = {}
for f in files:
folder_name = f.parent.name
if folder_name not in files_by_folder:
files_by_folder[folder_name] = []
files_by_folder[folder_name].append(f)
# Process files
total_success = 0
total_failed = 0
for folder_name in sorted(files_by_folder.keys()):
folder_files = files_by_folder[folder_name]
print(f"\n{'='*80}")
print(f"Processing folder: {folder_name} ({len(folder_files)} files)")
print(f"{'='*80}")
for f in folder_files:
# Assume pipeline script is at src/scripts/run_simple_pipeline.py
pipeline_script = project_root / 'src' / 'scripts' / 'run_simple_pipeline.py'
if not pipeline_script.exists():
print(f"[ERROR] Pipeline script not found: {pipeline_script}")
sys.exit(1)
cmd = [sys.executable, str(pipeline_script), str(f), "--model", model_name]
print(f'\n--- Processing: {f.relative_to(mordor_dir)}')
print(f' Model: {model_name}')
try:
subprocess.run(cmd, check=True)
total_success += 1
except subprocess.CalledProcessError as e:
print(f"[ERROR] Pipeline failed for {f.name}: {e}")
total_failed += 1
# Summary
print('\n' + '='*80)
print('PIPELINE EXECUTION COMPLETE')
print('='*80)
print(f"Model used: {model_name}")
print(f"Total files processed: {len(files)}")
print(f"Successful: {total_success}")
print(f"Failed: {total_failed}")
print(f"Results saved to: {project_root / 'final_response'}/")
print('='*80 + '\n')
if __name__ == "__main__":
main()