|
|
|
|
|
"""
|
|
|
Run script for the simple integrated pipeline
|
|
|
|
|
|
Usage examples:
|
|
|
python run_simple.py sample_log.json
|
|
|
python run_simple.py /path/to/mordor_dataset/credential_access_log.json
|
|
|
python run_simple.py sample_log.json "Focus on lateral movement techniques"
|
|
|
"""
|
|
|
|
|
|
import os
|
|
|
import sys
|
|
|
from pathlib import Path
|
|
|
from dotenv import load_dotenv
|
|
|
from huggingface_hub import login as huggingface_login
|
|
|
|
|
|
|
|
|
|
|
|
project_root = Path(__file__).parent.parent.parent
|
|
|
sys.path.insert(0, str(project_root))
|
|
|
|
|
|
|
|
|
try:
|
|
|
from src.full_pipeline.simple_pipeline import analyze_log_file
|
|
|
except ImportError as e:
|
|
|
print(f"Import error: {e}")
|
|
|
print("Make sure simple_pipeline.py is in src/full_pipeline/ directory")
|
|
|
print(f"Current working directory: {os.getcwd()}")
|
|
|
print(f"Script location: {Path(__file__).parent}")
|
|
|
sys.exit(1)
|
|
|
|
|
|
|
|
|
def setup_environment(model_name: str = "google_genai:gemini-2.0-flash"):
|
|
|
"""
|
|
|
Setup environment variables and check requirements.
|
|
|
|
|
|
Args:
|
|
|
model_name: Name of the model to validate environment for
|
|
|
"""
|
|
|
load_dotenv()
|
|
|
|
|
|
if os.getenv("GOOGLE_API_KEY"):
|
|
|
os.environ["GOOGLE_API_KEY"] = os.getenv("GOOGLE_API_KEY")
|
|
|
|
|
|
if os.getenv("GROQ_API_KEY"):
|
|
|
os.environ["GROQ_API_KEY"] = os.getenv("GROQ_API_KEY")
|
|
|
|
|
|
if os.getenv("OPENAI_API_KEY"):
|
|
|
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
|
|
|
|
|
|
if os.getenv("HF_TOKEN"):
|
|
|
huggingface_login(token=os.getenv("HF_TOKEN"))
|
|
|
|
|
|
|
|
|
if "google_genai" in model_name or "gemini" in model_name:
|
|
|
required_env_var = "GOOGLE_API_KEY"
|
|
|
elif "groq" in model_name or "gpt-oss" in model_name or "llama" in model_name:
|
|
|
required_env_var = "GROQ_API_KEY"
|
|
|
elif "openai" in model_name or "gpt-" in model_name:
|
|
|
required_env_var = "OPENAI_API_KEY"
|
|
|
else:
|
|
|
print(
|
|
|
f"[WARNING] Unknown model '{model_name}', using default environment checks"
|
|
|
)
|
|
|
required_env_var = "GOOGLE_API_KEY"
|
|
|
|
|
|
if not os.getenv(required_env_var):
|
|
|
print(f"Error: {required_env_var} not found in environment variables")
|
|
|
print(f"Required for model: {model_name}")
|
|
|
print(f"Please set it in your .env file or environment.")
|
|
|
print("\nAvailable models and their requirements:")
|
|
|
print(" ✓ google_genai:gemini-2.0-flash: requires GOOGLE_API_KEY")
|
|
|
print(" ✓ google_genai:gemini-1.5-flash: requires GOOGLE_API_KEY")
|
|
|
print(" ✓ groq:gpt-oss-120b: requires GROQ_API_KEY")
|
|
|
print(" ✓ groq:gpt-oss-20b: requires GROQ_API_KEY")
|
|
|
print(" ✓ groq:llama-3.1-8b-instant: requires GROQ_API_KEY")
|
|
|
print(" ✓ groq:llama-3.3-70b-versatile: requires GROQ_API_KEY")
|
|
|
sys.exit(1)
|
|
|
|
|
|
print(f"Environment setup complete. Using {required_env_var} for {model_name}")
|
|
|
|
|
|
|
|
|
def validate_inputs(log_file: str):
|
|
|
"""Validate input parameters."""
|
|
|
if not os.path.exists(log_file):
|
|
|
print(f"Error: Log file not found: {log_file}")
|
|
|
|
|
|
|
|
|
os.chdir(project_root)
|
|
|
suggestions = []
|
|
|
if Path("mordor_dataset").exists():
|
|
|
suggestions.append("./mordor_dataset/")
|
|
|
if Path("../mordor_dataset").exists():
|
|
|
suggestions.append("../mordor_dataset/")
|
|
|
|
|
|
if suggestions:
|
|
|
print("Try looking in these directories:")
|
|
|
for suggestion in suggestions:
|
|
|
json_files = list(Path(suggestion).glob("*.json"))
|
|
|
if json_files:
|
|
|
print(f" {suggestion}")
|
|
|
for f in json_files[:3]:
|
|
|
print(f" - {f.name}")
|
|
|
if len(json_files) > 3:
|
|
|
print(f" ... and {len(json_files) - 3} more files")
|
|
|
|
|
|
sys.exit(1)
|
|
|
|
|
|
|
|
|
if not log_file.endswith(".json"):
|
|
|
print(f"Warning: File doesn't have .json extension: {log_file}")
|
|
|
response = input("Continue anyway? (y/n): ")
|
|
|
if response.lower() != "y":
|
|
|
sys.exit(1)
|
|
|
|
|
|
|
|
|
def main():
|
|
|
"""Main entry point."""
|
|
|
|
|
|
if len(sys.argv) < 2:
|
|
|
print("Cybersecurity Log Analysis Pipeline")
|
|
|
print("=" * 50)
|
|
|
print("Usage: python run_simple_pipeline.py <log_file> [options]")
|
|
|
print("")
|
|
|
print("Arguments:")
|
|
|
print(" log_file Path to the log file to analyze")
|
|
|
print("")
|
|
|
print("Options:")
|
|
|
print(' --query "TEXT" Optional query for additional context')
|
|
|
print(
|
|
|
" --model MODEL_NAME Model to use for analysis (default: google_genai:gemini-2.0-flash)"
|
|
|
)
|
|
|
print(" --temp TEMPERATURE Temperature for model generation (default: 0.1)")
|
|
|
print(
|
|
|
" --output-dir DIR Output directory for results (default: mordor_dataset/eval_output)"
|
|
|
)
|
|
|
print("")
|
|
|
print("Examples:")
|
|
|
print(" python run_simple_pipeline.py sample_log.json")
|
|
|
print(
|
|
|
" python run_simple_pipeline.py mordor_dataset/datasets/credential_access.json"
|
|
|
)
|
|
|
print(
|
|
|
" python run_simple_pipeline.py sample.json --query 'Focus on privilege escalation'"
|
|
|
)
|
|
|
print(" python run_simple_pipeline.py sample.json --model gpt-oss-120b")
|
|
|
print(
|
|
|
" python run_simple_pipeline.py sample.json --model llama-3.1-8b-instant --temp 0.2"
|
|
|
)
|
|
|
print(" python run_simple_pipeline.py sample.json --output-dir custom_output")
|
|
|
print("")
|
|
|
print("Available models:")
|
|
|
print(" - google_genai:gemini-2.0-flash")
|
|
|
print(" - google_genai:gemini-1.5-flash")
|
|
|
print(" - groq:gpt-oss-120b")
|
|
|
print(" - groq:gpt-oss-20b")
|
|
|
print(" - groq:llama-3.1-8b-instant")
|
|
|
print(" - groq:llama-3.3-70b-versatile")
|
|
|
print("")
|
|
|
|
|
|
|
|
|
os.chdir(project_root)
|
|
|
sample_files = []
|
|
|
for pattern in ["*.json", "mordor_dataset/*.json", "../mordor_dataset/*.json"]:
|
|
|
sample_files.extend(Path(".").glob(pattern))
|
|
|
|
|
|
if sample_files:
|
|
|
print("Available log files found:")
|
|
|
for f in sample_files[:5]:
|
|
|
print(f" {f}")
|
|
|
if len(sample_files) > 5:
|
|
|
print(f" ... and {len(sample_files) - 5} more files")
|
|
|
|
|
|
sys.exit(1)
|
|
|
|
|
|
|
|
|
log_file = sys.argv[1]
|
|
|
query = None
|
|
|
model_name = "google_genai:gemini-2.0-flash"
|
|
|
temperature = 0.1
|
|
|
output_dir = "mordor_dataset/eval_output"
|
|
|
|
|
|
i = 2
|
|
|
while i < len(sys.argv):
|
|
|
if sys.argv[i] == "--query" and i + 1 < len(sys.argv):
|
|
|
query = sys.argv[i + 1]
|
|
|
i += 2
|
|
|
elif sys.argv[i] == "--model" and i + 1 < len(sys.argv):
|
|
|
model_name = sys.argv[i + 1]
|
|
|
i += 2
|
|
|
elif sys.argv[i] == "--temp" and i + 1 < len(sys.argv):
|
|
|
try:
|
|
|
temperature = float(sys.argv[i + 1])
|
|
|
except ValueError:
|
|
|
print(f"Error: Invalid temperature value: {sys.argv[i + 1]}")
|
|
|
sys.exit(1)
|
|
|
i += 2
|
|
|
elif sys.argv[i] == "--output-dir" and i + 1 < len(sys.argv):
|
|
|
output_dir = sys.argv[i + 1]
|
|
|
i += 2
|
|
|
else:
|
|
|
|
|
|
if not query:
|
|
|
query = sys.argv[i]
|
|
|
i += 1
|
|
|
|
|
|
print("Cybersecurity Multi-Agent Pipeline")
|
|
|
print("=" * 50)
|
|
|
print(f"Log file: {log_file}")
|
|
|
print(f"Model: {model_name}")
|
|
|
print(f"Temperature: {temperature}")
|
|
|
print(f"Output directory: {output_dir}")
|
|
|
print(f"User query: {query or 'None'}")
|
|
|
print("")
|
|
|
|
|
|
|
|
|
setup_environment(model_name)
|
|
|
validate_inputs(log_file)
|
|
|
|
|
|
|
|
|
try:
|
|
|
print("Initializing pipeline...")
|
|
|
|
|
|
tactic = None
|
|
|
log_path = Path(log_file)
|
|
|
if log_path.parent.name != "mordor_dataset":
|
|
|
tactic = log_path.parent.name
|
|
|
|
|
|
|
|
|
analysis_dir = os.path.join(output_dir, "analysis")
|
|
|
final_response_dir = os.path.join(output_dir, "final_response")
|
|
|
|
|
|
|
|
|
os.makedirs(analysis_dir, exist_ok=True)
|
|
|
os.makedirs(final_response_dir, exist_ok=True)
|
|
|
|
|
|
final_state = analyze_log_file(
|
|
|
log_file,
|
|
|
query,
|
|
|
tactic,
|
|
|
model_name=model_name,
|
|
|
temperature=temperature,
|
|
|
log_agent_output_dir=analysis_dir,
|
|
|
response_agent_output_dir=final_response_dir,
|
|
|
)
|
|
|
print(final_state["markdown_report"])
|
|
|
print("\nPipeline execution completed successfully!")
|
|
|
|
|
|
except KeyboardInterrupt:
|
|
|
print("\nPipeline interrupted by user.")
|
|
|
sys.exit(0)
|
|
|
|
|
|
except Exception as e:
|
|
|
print(f"\nPipeline failed with error: {e}")
|
|
|
|
|
|
|
|
|
print("\nDebugging information:")
|
|
|
print(f" - Working directory: {os.getcwd()}")
|
|
|
print(f" - Log file exists: {os.path.exists(log_file)}")
|
|
|
print(f" - Python path: {sys.path[0]}")
|
|
|
|
|
|
|
|
|
if "knowledge base" in str(e).lower():
|
|
|
print("\nPossible solution:")
|
|
|
print(
|
|
|
" Make sure ./cyber_knowledge_base directory exists and is properly initialized"
|
|
|
)
|
|
|
elif "import" in str(e).lower():
|
|
|
print("\nPossible solution:")
|
|
|
print(
|
|
|
" Make sure you're running from the correct directory with access to src/"
|
|
|
)
|
|
|
|
|
|
sys.exit(1)
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
main() |