#!/usr/bin/env python3
"""
Execute the complete 3-agent pipeline on all JSON files in mordor_dataset.

This runs:
1. Log Analysis Agent
2. Retrieval Supervisor (with Database Agent and Grader)
3. Response Agent

Outputs are saved to final_response/ folder.

Usage: python execute_pipeline.py [--model MODEL_NAME]
"""
import subprocess
from pathlib import Path
import sys
import argparse


def find_project_root(start: Path) -> Path:
    """Find the project root by looking for common markers."""
    for p in [start] + list(start.parents):
        if (p / 'mordor_dataset').exists() or (p / 'src').exists() or (p / '.git').exists():
            return p
    return start.parent


def main():
    """Execute pipeline on all mordor dataset files"""
    parser = argparse.ArgumentParser(
        description="Execute pipeline on all mordor dataset files",
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
Examples:
  # Run with default model (Gemini 2.0 Flash)
  python execute_pipeline.py
  
  # Run with specific model
  python execute_pipeline.py --model google_genai:gemini-2.0-flash
  python execute_pipeline.py --model groq:gpt-oss-120b
  python execute_pipeline.py --model groq:llama-3.1-8b-instant

Available models:
  - google_genai:gemini-2.0-flash (default)
  - google_genai:gemini-1.5-flash
  - groq:gpt-oss-120b
  - groq:gpt-oss-20b
  - groq:llama-3.1-8b-instant
  - groq:llama-3.3-70b-versatile
        """
    )
    parser.add_argument(
        "--model",
        default="google_genai:gemini-2.0-flash",
        help="Model to use for analysis (default: google_genai:gemini-2.0-flash)"
    )
    
    args = parser.parse_args()
    model_name = args.model
    
    current_file = Path(__file__).resolve()
    project_root = find_project_root(current_file.parent)
    mordor_dir = project_root / 'mordor_dataset'

    if not mordor_dir.exists():
        print(f"[ERROR] mordor_dataset not found at {mordor_dir}")
        sys.exit(1)

    # Find all JSON files
    files = sorted([p for p in mordor_dir.rglob('*.json')])
    if not files:
        print("[ERROR] No JSON files found in mordor_dataset")
        sys.exit(1)

    print("="*80)
    print("EXECUTING FULL PIPELINE ON ALL MORDOR FILES")
    print("="*80)
    print(f"Model: {model_name}")
    print(f"Found {len(files)} files to process\n")

    # Group files by folder
    files_by_folder = {}
    for f in files:
        folder_name = f.parent.name
        if folder_name not in files_by_folder:
            files_by_folder[folder_name] = []
        files_by_folder[folder_name].append(f)

    # Process files
    total_success = 0
    total_failed = 0

    for folder_name in sorted(files_by_folder.keys()):
        folder_files = files_by_folder[folder_name]
        print(f"\n{'='*80}")
        print(f"Processing folder: {folder_name} ({len(folder_files)} files)")
        print(f"{'='*80}")

        for f in folder_files:
            # Assume pipeline script is at src/scripts/run_simple_pipeline.py
            pipeline_script = project_root / 'src' / 'scripts' / 'run_simple_pipeline.py'
            
            if not pipeline_script.exists():
                print(f"[ERROR] Pipeline script not found: {pipeline_script}")
                sys.exit(1)
            
            cmd = [sys.executable, str(pipeline_script), str(f), "--model", model_name]
            print(f'\n--- Processing: {f.relative_to(mordor_dir)}')
            print(f'    Model: {model_name}')
            
            try:
                subprocess.run(cmd, check=True)
                total_success += 1
            except subprocess.CalledProcessError as e:
                print(f"[ERROR] Pipeline failed for {f.name}: {e}")
                total_failed += 1

    # Summary
    print('\n' + '='*80)
    print('PIPELINE EXECUTION COMPLETE')
    print('='*80)
    print(f"Model used: {model_name}")
    print(f"Total files processed: {len(files)}")
    print(f"Successful: {total_success}")
    print(f"Failed: {total_failed}")
    print(f"Results saved to: {project_root / 'final_response'}/")
    print('='*80 + '\n')


if __name__ == "__main__":
    main()