Spaces:

minhan6559
/

Log-Analysis-MultiAgent

Running

App Files Files Community

minhan6559 commited on 10 days ago

Commit

223ef32

verified ·

1 Parent(s): 5af9ea8

Upload 126 files

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +1 -0
app.py +292 -0
cyber_knowledge_base/bm25_retriever.pkl +3 -0
cyber_knowledge_base/chroma/.gitignore +1 -0
cyber_knowledge_base/chroma/1ab81415-9731-4a9a-8d06-afc7fc190d32/.gitignore +1 -0
cyber_knowledge_base/chroma/1ab81415-9731-4a9a-8d06-afc7fc190d32/data_level0.bin +3 -0
cyber_knowledge_base/chroma/1ab81415-9731-4a9a-8d06-afc7fc190d32/header.bin +3 -0
cyber_knowledge_base/chroma/1ab81415-9731-4a9a-8d06-afc7fc190d32/length.bin +3 -0
cyber_knowledge_base/chroma/1ab81415-9731-4a9a-8d06-afc7fc190d32/link_lists.bin +3 -0
cyber_knowledge_base/chroma/76f221d1-5f9d-44f8-8c9c-f610482d9b15/data_level0.bin +3 -0
cyber_knowledge_base/chroma/76f221d1-5f9d-44f8-8c9c-f610482d9b15/header.bin +3 -0
cyber_knowledge_base/chroma/76f221d1-5f9d-44f8-8c9c-f610482d9b15/index_metadata.pickle +3 -0
cyber_knowledge_base/chroma/76f221d1-5f9d-44f8-8c9c-f610482d9b15/length.bin +3 -0
cyber_knowledge_base/chroma/76f221d1-5f9d-44f8-8c9c-f610482d9b15/link_lists.bin +3 -0
cyber_knowledge_base/chroma/chroma.sqlite3 +3 -0
requirements.txt +607 -3
run_app.py +53 -0
src/agents/__pycache__/llm_client.cpython-311.pyc +0 -0
src/agents/correlation_agent/correlation_logic.py +449 -0
src/agents/correlation_agent/input_converters.py +49 -0
src/agents/correlation_agent/test.py +176 -0
src/agents/correlation_agent/types.py +48 -0
src/agents/cti_agent/__pycache__/config.cpython-311.pyc +0 -0
src/agents/cti_agent/__pycache__/cti_agent.cpython-311.pyc +0 -0
src/agents/cti_agent/__pycache__/cti_tools.cpython-311.pyc +0 -0
src/agents/cti_agent/config.py +371 -0
src/agents/cti_agent/cti-bench/data/cti-ate.tsv +0 -0
src/agents/cti_agent/cti-bench/data/cti-mcq.tsv +0 -0
src/agents/cti_agent/cti-bench/data/cti-rcm-2021.tsv +0 -0
src/agents/cti_agent/cti-bench/data/cti-rcm.tsv +0 -0
src/agents/cti_agent/cti-bench/data/cti-taa.tsv +0 -0
src/agents/cti_agent/cti-bench/data/cti-vsp.tsv +0 -0
src/agents/cti_agent/cti-evaluator.py +708 -0
src/agents/cti_agent/cti_agent.py +920 -0
src/agents/cti_agent/cti_tools.py +263 -0
src/agents/cti_agent/testing_cti_agent.ipynb +573 -0
src/agents/cti_agent/tool_evaluation_results/extract_mitre_techniques_results.csv +230 -0
src/agents/cti_agent/tool_evaluation_results/extract_mitre_techniques_summary.json +12 -0
src/agents/cti_agent/tool_evaluation_results/identify_threat_actors_results.csv +173 -0
src/agents/cti_agent/tool_evaluation_results/identify_threat_actors_summary.json +9 -0
src/agents/database_agent/__pycache__/agent.cpython-311.pyc +0 -0
src/agents/database_agent/__pycache__/prompts.cpython-311.pyc +0 -0
src/agents/database_agent/agent.py +442 -0
src/agents/database_agent/prompts.py +71 -0
src/agents/global_supervisor/__pycache__/supervisor.cpython-311.pyc +0 -0
src/agents/log_analysis_agent/__pycache__/agent.cpython-311.pyc +0 -0
src/agents/log_analysis_agent/__pycache__/prompts.cpython-311.pyc +0 -0
src/agents/log_analysis_agent/__pycache__/state_models.cpython-311.pyc +0 -0
src/agents/log_analysis_agent/__pycache__/utils.cpython-311.pyc +0 -0
src/agents/log_analysis_agent/agent.py +1058 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+cyber_knowledge_base/chroma/chroma.sqlite3 filter=lfs diff=lfs merge=lfs -text

app.py ADDED Viewed

	@@ -0,0 +1,292 @@

+#!/usr/bin/env python3
+"""
+Streamlit Web App for Cybersecurity Agent Pipeline
+A simple web interface for uploading log files and running the cybersecurity analysis pipeline
+with different LLM models.
+"""
+import os
+import sys
+import tempfile
+import shutil
+import streamlit as st
+from pathlib import Path
+from typing import Dict, Any, Optional
+from src.full_pipeline.simple_pipeline import analyze_log_file
+from dotenv import load_dotenv
+from huggingface_hub import login as huggingface_login
+load_dotenv()
+def get_model_providers() -> Dict[str, Dict[str, str]]:
+    """Get available model providers and their models."""
+    return {
+        "Google GenAI": {
+            "gemini-2.0-flash": "google_genai:gemini-2.0-flash",
+            "gemini-2.0-flash-lite": "google_genai:gemini-2.0-flash-lite",
+            "gemini-2.5-flash-lite": "google_genai:gemini-2.5-flash-lite",
+        },
+        "Groq": {
+            "openai/gpt-oss-120b": "groq:openai/gpt-oss-120b",
+            "moonshotai/kimi-k2-instruct-0905": "groq:moonshotai/kimi-k2-instruct-0905",
+        },
+        "OpenAI": {"gpt-4o": "openai:gpt-4o", "gpt-4.1": "openai:gpt-4.1"},
+    }
+def get_api_key_help() -> Dict[str, str]:
+    """Get API key help information for each provider."""
+    return {
+        "Google GenAI": "https://aistudio.google.com/app/apikey",
+        "Groq": "https://console.groq.com/keys",
+        "OpenAI": "https://platform.openai.com/api-keys",
+    }
+def setup_temp_directories(temp_dir: str) -> Dict[str, str]:
+    """Setup temporary directories for the pipeline."""
+    log_files_dir = os.path.join(temp_dir, "log_files")
+    analysis_dir = os.path.join(temp_dir, "analysis")
+    final_response_dir = os.path.join(temp_dir, "final_response")
+    os.makedirs(log_files_dir, exist_ok=True)
+    os.makedirs(analysis_dir, exist_ok=True)
+    os.makedirs(final_response_dir, exist_ok=True)
+    return {
+        "log_files": log_files_dir,
+        "analysis": analysis_dir,
+        "final_response": final_response_dir,
+    }
+def save_uploaded_file(uploaded_file, temp_dir: str) -> str:
+    """Save uploaded file to temporary directory."""
+    log_files_dir = os.path.join(temp_dir, "log_files")
+    file_path = os.path.join(log_files_dir, uploaded_file.name)
+    with open(file_path, "wb") as f:
+        f.write(uploaded_file.getbuffer())
+    return file_path
+def run_analysis(
+    log_file_path: str,
+    model_name: str,
+    query: str,
+    temp_dirs: Dict[str, str],
+    api_key: str,
+    provider: str,
+) -> Dict[str, Any]:
+    """Run the cybersecurity analysis pipeline."""
+    # Set environment variable for API key
+    if provider == "Google GenAI":
+        os.environ["GOOGLE_API_KEY"] = api_key
+    elif provider == "Groq":
+        os.environ["GROQ_API_KEY"] = api_key
+    elif provider == "OpenAI":
+        os.environ["OPENAI_API_KEY"] = api_key
+    try:
+        # Run the analysis pipeline
+        result = analyze_log_file(
+            log_file=log_file_path,
+            query=query,
+            tactic=None,
+            model_name=model_name,
+            temperature=0.1,
+            log_agent_output_dir=temp_dirs["analysis"],
+            response_agent_output_dir=temp_dirs["final_response"],
+        )
+        return {"success": True, "result": result}
+    except Exception as e:
+        return {"success": False, "error": str(e)}
+def main():
+    """Main Streamlit app."""
+    if os.getenv("HF_TOKEN"):
+        huggingface_login(token=os.getenv("HF_TOKEN"))
+    st.set_page_config(
+        page_title="Cybersecurity Agent Pipeline", page_icon="🛡️", layout="wide"
+    )
+    st.title("Cybersecurity Agent Pipeline")
+    st.markdown(
+        "Upload a log file and analyze it using advanced LLM-based cybersecurity agents."
+    )
+    # Sidebar for configuration
+    with st.sidebar:
+        st.header("Configuration")
+        # Model selection
+        providers = get_model_providers()
+        selected_provider = st.selectbox(
+            "Select Model Provider", list(providers.keys())
+        )
+        available_models = providers[selected_provider]
+        selected_model_display = st.selectbox(
+            "Select Model", list(available_models.keys())
+        )
+        selected_model = available_models[selected_model_display]
+        # API Key input with help
+        st.subheader("API Key")
+        api_key_help = get_api_key_help()
+        with st.expander("How to get API key", expanded=False):
+            st.markdown(f"**{selected_provider}**:")
+            st.markdown(f"[Get API Key]({api_key_help[selected_provider]})")
+        api_key = st.text_input(
+            f"Enter {selected_provider} API Key",
+            type="password",
+            help=f"Your {selected_provider} API key",
+        )
+        # Additional query
+        st.subheader("Additional Context")
+        user_query = st.text_area(
+            "Optional Query",
+            placeholder="e.g., 'Focus on credential access attacks'",
+            help="Provide additional context or specific focus areas for the analysis",
+        )
+    # Main content area
+    col1, col2 = st.columns([2, 1])
+    with col1:
+        st.header("Upload Log File")
+        uploaded_file = st.file_uploader(
+            "Choose a JSON log file",
+            type=["json"],
+            help="Upload a JSON log file from the Mordor dataset or similar security logs",
+        )
+    with col2:
+        st.header("Analysis Status")
+        if uploaded_file is not None:
+            st.success(f"File uploaded: {uploaded_file.name}")
+            st.info(f"Size: {uploaded_file.size:,} bytes")
+        else:
+            st.warning("Please upload a log file")
+    # Run analysis button
+    if st.button(
+        "Run Analysis", type="primary", disabled=not (uploaded_file and api_key)
+    ):
+        if not uploaded_file:
+            st.error("Please upload a log file first.")
+            return
+        if not api_key:
+            st.error("Please enter your API key.")
+            return
+        # Create temporary directory
+        temp_dir = tempfile.mkdtemp(prefix="cyber_agent_")
+        try:
+            # Setup directories
+            temp_dirs = setup_temp_directories(temp_dir)
+            # Save uploaded file
+            log_file_path = save_uploaded_file(uploaded_file, temp_dir)
+            # Show progress
+            progress_bar = st.progress(0)
+            status_text = st.empty()
+            status_text.text("Initializing analysis...")
+            progress_bar.progress(10)
+            # Run analysis
+            status_text.text("Running cybersecurity analysis...")
+            progress_bar.progress(50)
+            analysis_result = run_analysis(
+                log_file_path=log_file_path,
+                model_name=selected_model,
+                query=user_query,
+                temp_dirs=temp_dirs,
+                api_key=api_key,
+                provider=selected_provider,
+            )
+            progress_bar.progress(90)
+            status_text.text("Finalizing results...")
+            if analysis_result["success"]:
+                progress_bar.progress(100)
+                status_text.text("Analysis completed successfully!")
+                # Display results
+                st.header("Analysis Results")
+                result = analysis_result["result"]
+                # Show key metrics
+                col1, col2, col3 = st.columns(3)
+                with col1:
+                    assessment = result.get("log_analysis_result", {}).get(
+                        "overall_assessment", "Unknown"
+                    )
+                    st.metric("Overall Assessment", assessment)
+                with col2:
+                    abnormal_events = result.get("log_analysis_result", {}).get(
+                        "abnormal_events", []
+                    )
+                    st.metric("Abnormal Events", len(abnormal_events))
+                with col3:
+                    execution_time = result.get("execution_time", "N/A")
+                    st.metric(
+                        "Execution Time",
+                        (
+                            f"{execution_time:.2f}s"
+                            if isinstance(execution_time, (int, float))
+                            else execution_time
+                        ),
+                    )
+                # Show markdown report
+                markdown_report = result.get("markdown_report", "")
+                if markdown_report:
+                    st.header("Detailed Report")
+                    st.markdown(markdown_report)
+                else:
+                    st.warning("No detailed report generated.")
+            else:
+                st.error(f"Analysis failed: {analysis_result['error']}")
+                st.exception(analysis_result["error"])
+        finally:
+            # Cleanup temporary directory
+            try:
+                shutil.rmtree(temp_dir)
+            except Exception as e:
+                st.warning(f"Could not clean up temporary directory: {e}")
+    # Footer
+    st.markdown("---")
+    st.markdown(
+        "**Cybersecurity Agent Pipeline** - Powered by LangGraph and LangChain | "
+        "Built for educational purposes demonstrating LLM-based multi-agent systems"
+    )
+if __name__ == "__main__":
+    main()

cyber_knowledge_base/bm25_retriever.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0988976cad39234f7fab73e71ab0c9d8c6d5c609c556ae9751fde7730e903f0b
+size 5110282

cyber_knowledge_base/chroma/.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ *.sqlite3

cyber_knowledge_base/chroma/1ab81415-9731-4a9a-8d06-afc7fc190d32/.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ *.bin

cyber_knowledge_base/chroma/1ab81415-9731-4a9a-8d06-afc7fc190d32/data_level0.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:95e2ea8a0724a2545afa94c485a99b5fde88d7dc842a137705ea87b74c477d35
+size 321200

cyber_knowledge_base/chroma/1ab81415-9731-4a9a-8d06-afc7fc190d32/header.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:03cb3ac86f3e5bcb15e88b9bf99f760ec6b33e31d64a699e129b49868db6d733
+size 100

cyber_knowledge_base/chroma/1ab81415-9731-4a9a-8d06-afc7fc190d32/length.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:854a86c445127b39997823da48a8556580ebaa06cc7c1289151300c1b9115efc
+size 400

cyber_knowledge_base/chroma/1ab81415-9731-4a9a-8d06-afc7fc190d32/link_lists.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855
+size 0

cyber_knowledge_base/chroma/76f221d1-5f9d-44f8-8c9c-f610482d9b15/data_level0.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0f4fa01cff4dbc86c1cd162ee63e986ce0f9083e3bda7c73f04ed671c38f4dcd
+size 2180948

cyber_knowledge_base/chroma/76f221d1-5f9d-44f8-8c9c-f610482d9b15/header.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1703590965d586b54b7ec6768894f349c76690c5916512cb679891d0b644d6f0
+size 100

cyber_knowledge_base/chroma/76f221d1-5f9d-44f8-8c9c-f610482d9b15/index_metadata.pickle ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:975f009a15eeea6560c1c8c00180ede3b6074c2cad6d4a900fc38cc91a35390a
+size 62596

cyber_knowledge_base/chroma/76f221d1-5f9d-44f8-8c9c-f610482d9b15/length.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:add85c66e3321d0c9e1c84557ba1f784ed8551326ea3e6e666bfd1711d0bee9d
+size 2716

cyber_knowledge_base/chroma/76f221d1-5f9d-44f8-8c9c-f610482d9b15/link_lists.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:46fd6dec85beb221c57b3ed5bc46356e172229d9efcf1d9e67d53fb861d46cdb
+size 5776

cyber_knowledge_base/chroma/chroma.sqlite3 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8d37398e05987708a52fd527d0a7dfeba9b060d12385c7cd7df264f2fc6b66c7
+size 12853248

requirements.txt CHANGED Viewed

@@ -1,3 +1,607 @@
-altair
-pandas
-streamlit

+#
+# This file is autogenerated by pip-compile with Python 3.11
+# by the following command:
+#
+#    pip-compile --output-file=-
+#
+aiohappyeyeballs==2.6.1
+    # via aiohttp
+aiohttp==3.13.1
+    # via
+    #   langchain
+    #   langchain-community
+    #   langchain-tavily
+aiosignal==1.4.0
+    # via aiohttp
+annotated-types==0.7.0
+    # via pydantic
+antlr4-python3-runtime==4.9.3
+    # via stix2-patterns
+anyio==4.11.0
+    # via
+    #   groq
+    #   httpx
+    #   openai
+    #   watchfiles
+argparse==1.4.0
+    # via -r requirements.in
+attrs==25.4.0
+    # via
+    #   aiohttp
+    #   jsonschema
+    #   referencing
+backoff==2.2.1
+    # via posthog
+bcrypt==5.0.0
+    # via chromadb
+build==1.3.0
+    # via chromadb
+cachetools==6.2.1
+    # via google-auth
+certifi==2025.10.5
+    # via
+    #   httpcore
+    #   httpx
+    #   kubernetes
+    #   requests
+charset-normalizer==3.4.4
+    # via
+    #   -r requirements.in
+    #   requests
+chromadb==1.2.2
+    # via
+    #   -r requirements.in
+    #   langchain-chroma
+click==8.3.0
+    # via
+    #   nltk
+    #   typer
+    #   uvicorn
+colorama==0.4.6
+    # via
+    #   build
+    #   click
+    #   loguru
+    #   tqdm
+    #   uvicorn
+coloredlogs==15.0.1
+    # via onnxruntime
+colour==0.1.5
+    # via mitreattack-python
+dataclasses-json==0.6.7
+    # via
+    #   langchain
+    #   langchain-community
+deepdiff==8.6.1
+    # via mitreattack-python
+distro==1.9.0
+    # via
+    #   groq
+    #   openai
+    #   posthog
+drawsvg==2.4.0
+    # via mitreattack-python
+durationpy==0.10
+    # via kubernetes
+et-xmlfile==2.0.0
+    # via openpyxl
+filelock==3.20.0
+    # via
+    #   huggingface-hub
+    #   torch
+    #   transformers
+filetype==1.2.0
+    # via langchain-google-genai
+flatbuffers==25.9.23
+    # via onnxruntime
+frozenlist==1.8.0
+    # via
+    #   aiohttp
+    #   aiosignal
+fsspec==2025.9.0
+    # via
+    #   huggingface-hub
+    #   torch
+google-ai-generativelanguage==0.9.0
+    # via langchain-google-genai
+google-api-core[grpc]==2.27.0
+    # via google-ai-generativelanguage
+google-auth==2.41.1
+    # via
+    #   google-ai-generativelanguage
+    #   google-api-core
+    #   kubernetes
+googleapis-common-protos==1.71.0
+    # via
+    #   -r requirements.in
+    #   google-api-core
+    #   grpcio-status
+    #   opentelemetry-exporter-otlp-proto-grpc
+greenlet==3.2.4
+    # via sqlalchemy
+groq==0.33.0
+    # via langchain-groq
+grpcio==1.76.0
+    # via
+    #   chromadb
+    #   google-ai-generativelanguage
+    #   google-api-core
+    #   grpcio-status
+    #   opentelemetry-exporter-otlp-proto-grpc
+grpcio-status==1.76.0
+    # via google-api-core
+h11==0.16.0
+    # via
+    #   httpcore
+    #   uvicorn
+httpcore==1.0.9
+    # via httpx
+httptools==0.7.1
+    # via uvicorn
+httpx==0.28.1
+    # via
+    #   chromadb
+    #   groq
+    #   langgraph-sdk
+    #   langsmith
+    #   ollama
+    #   openai
+httpx-sse==0.4.3
+    # via langchain-community
+huggingface-hub==0.36.0
+    # via
+    #   -r requirements.in
+    #   langchain-huggingface
+    #   sentence-transformers
+    #   tokenizers
+    #   transformers
+humanfriendly==10.0
+    # via coloredlogs
+idna==3.11
+    # via
+    #   anyio
+    #   httpx
+    #   requests
+    #   yarl
+importlib-metadata==8.7.0
+    # via opentelemetry-api
+importlib-resources==6.5.2
+    # via chromadb
+jinja2==3.1.6
+    # via torch
+jiter==0.11.1
+    # via openai
+joblib==1.5.2
+    # via
+    #   nltk
+    #   scikit-learn
+jsonpatch==1.33
+    # via langchain-core
+jsonpointer==3.0.0
+    # via jsonpatch
+jsonschema==4.25.1
+    # via chromadb
+jsonschema-specifications==2025.9.1
+    # via jsonschema
+kubernetes==34.1.0
+    # via chromadb
+langchain==0.3.27
+    # via
+    #   -r requirements.in
+    #   langchain-community
+    #   langchain-tavily
+langchain-chroma==0.2.6
+    # via -r requirements.in
+langchain-community==0.3.31
+    # via -r requirements.in
+langchain-core==0.3.79
+    # via
+    #   -r requirements.in
+    #   langchain
+    #   langchain-chroma
+    #   langchain-community
+    #   langchain-google-genai
+    #   langchain-groq
+    #   langchain-huggingface
+    #   langchain-ollama
+    #   langchain-openai
+    #   langchain-tavily
+    #   langchain-text-splitters
+    #   langgraph
+    #   langgraph-checkpoint
+    #   langgraph-prebuilt
+    #   langgraph-supervisor
+langchain-google-genai==2.1.12
+    # via -r requirements.in
+langchain-groq==0.3.8
+    # via -r requirements.in
+langchain-huggingface==0.3.1
+    # via -r requirements.in
+langchain-ollama==0.3.10
+    # via -r requirements.in
+langchain-openai==0.3.35
+    # via -r requirements.in
+langchain-tavily==0.2.12
+    # via -r requirements.in
+langchain-text-splitters==0.3.11
+    # via
+    #   -r requirements.in
+    #   langchain
+langgraph==0.6.11
+    # via
+    #   -r requirements.in
+    #   langgraph-supervisor
+langgraph-checkpoint==3.0.0
+    # via
+    #   langgraph
+    #   langgraph-prebuilt
+langgraph-prebuilt==0.6.5
+    # via
+    #   -r requirements.in
+    #   langgraph
+langgraph-sdk==0.2.9
+    # via langgraph
+langgraph-supervisor==0.0.29
+    # via -r requirements.in
+langsmith==0.4.38
+    # via
+    #   -r requirements.in
+    #   langchain
+    #   langchain-community
+    #   langchain-core
+loguru==0.7.3
+    # via mitreattack-python
+markdown==3.9
+    # via mitreattack-python
+markdown-it-py==4.0.0
+    # via rich
+markupsafe==3.0.3
+    # via jinja2
+marshmallow==3.26.1
+    # via dataclasses-json
+mdurl==0.1.2
+    # via markdown-it-py
+mitreattack-python==5.1.0
+    # via -r requirements.in
+mmh3==5.2.0
+    # via chromadb
+mpmath==1.3.0
+    # via sympy
+multidict==6.7.0
+    # via
+    #   aiohttp
+    #   yarl
+mypy-extensions==1.1.0
+    # via typing-inspect
+networkx==3.5
+    # via torch
+nltk==3.9.2
+    # via -r requirements.in
+numpy==2.3.4
+    # via
+    #   chromadb
+    #   langchain-chroma
+    #   langchain-community
+    #   mitreattack-python
+    #   onnxruntime
+    #   pandas
+    #   rank-bm25
+    #   scikit-learn
+    #   scipy
+    #   transformers
+oauthlib==3.3.1
+    # via requests-oauthlib
+ollama==0.6.0
+    # via langchain-ollama
+onnxruntime==1.23.2
+    # via chromadb
+openai==2.6.1
+    # via langchain-openai
+openpyxl==3.1.5
+    # via mitreattack-python
+opentelemetry-api==1.38.0
+    # via
+    #   chromadb
+    #   opentelemetry-exporter-otlp-proto-grpc
+    #   opentelemetry-sdk
+    #   opentelemetry-semantic-conventions
+opentelemetry-exporter-otlp-proto-common==1.38.0
+    # via opentelemetry-exporter-otlp-proto-grpc
+opentelemetry-exporter-otlp-proto-grpc==1.38.0
+    # via chromadb
+opentelemetry-proto==1.38.0
+    # via
+    #   opentelemetry-exporter-otlp-proto-common
+    #   opentelemetry-exporter-otlp-proto-grpc
+opentelemetry-sdk==1.38.0
+    # via
+    #   chromadb
+    #   opentelemetry-exporter-otlp-proto-grpc
+opentelemetry-semantic-conventions==0.59b0
+    # via opentelemetry-sdk
+orderly-set==5.5.0
+    # via deepdiff
+orjson==3.11.4
+    # via
+    #   chromadb
+    #   langgraph-sdk
+    #   langsmith
+ormsgpack==1.11.0
+    # via langgraph-checkpoint
+overrides==7.7.0
+    # via chromadb
+packaging==25.0
+    # via
+    #   build
+    #   huggingface-hub
+    #   langchain-core
+    #   langsmith
+    #   marshmallow
+    #   onnxruntime
+    #   pooch
+    #   transformers
+pandas==2.3.3
+    # via mitreattack-python
+pillow==12.0.0
+    # via
+    #   mitreattack-python
+    #   sentence-transformers
+platformdirs==4.5.0
+    # via pooch
+pooch==1.8.2
+    # via mitreattack-python
+posthog==5.4.0
+    # via chromadb
+propcache==0.4.1
+    # via
+    #   aiohttp
+    #   yarl
+proto-plus==1.26.1
+    # via
+    #   google-ai-generativelanguage
+    #   google-api-core
+protobuf==6.33.0
+    # via
+    #   -r requirements.in
+    #   google-ai-generativelanguage
+    #   google-api-core
+    #   googleapis-common-protos
+    #   grpcio-status
+    #   onnxruntime
+    #   opentelemetry-proto
+    #   proto-plus
+pyasn1==0.6.1
+    # via
+    #   pyasn1-modules
+    #   rsa
+pyasn1-modules==0.4.2
+    # via google-auth
+pybase64==1.4.2
+    # via chromadb
+pydantic==2.12.3
+    # via
+    #   -r requirements.in
+    #   chromadb
+    #   groq
+    #   langchain
+    #   langchain-core
+    #   langchain-google-genai
+    #   langgraph
+    #   langsmith
+    #   ollama
+    #   openai
+    #   pydantic-settings
+pydantic-core==2.41.4
+    # via pydantic
+pydantic-settings==2.11.0
+    # via langchain-community
+pygments==2.19.2
+    # via rich
+pypdf2==3.0.1
+    # via -r requirements.in
+pypika==0.48.9
+    # via chromadb
+pyproject-hooks==1.2.0
+    # via build
+pyreadline3==3.5.4
+    # via humanfriendly
+python-dateutil==2.9.0.post0
+    # via
+    #   kubernetes
+    #   mitreattack-python
+    #   pandas
+    #   posthog
+python-dotenv==1.2.1
+    # via
+    #   -r requirements.in
+    #   pydantic-settings
+    #   uvicorn
+pytz==2025.2
+    # via
+    #   pandas
+    #   stix2
+pyyaml==6.0.3
+    # via
+    #   chromadb
+    #   huggingface-hub
+    #   kubernetes
+    #   langchain
+    #   langchain-community
+    #   langchain-core
+    #   transformers
+    #   uvicorn
+rank-bm25==0.2.2
+    # via -r requirements.in
+referencing==0.37.0
+    # via
+    #   jsonschema
+    #   jsonschema-specifications
+regex==2025.10.23
+    # via
+    #   nltk
+    #   tiktoken
+    #   transformers
+requests==2.32.5
+    # via
+    #   -r requirements.in
+    #   google-api-core
+    #   huggingface-hub
+    #   kubernetes
+    #   langchain
+    #   langchain-community
+    #   langchain-tavily
+    #   langsmith
+    #   mitreattack-python
+    #   pooch
+    #   posthog
+    #   requests-oauthlib
+    #   requests-toolbelt
+    #   stix2
+    #   tiktoken
+    #   transformers
+requests-oauthlib==2.0.0
+    # via kubernetes
+requests-toolbelt==1.0.0
+    # via langsmith
+rich==14.2.0
+    # via
+    #   chromadb
+    #   mitreattack-python
+    #   typer
+rpds-py==0.28.0
+    # via
+    #   jsonschema
+    #   referencing
+rsa==4.9.1
+    # via google-auth
+safetensors==0.6.2
+    # via transformers
+scikit-learn==1.7.2
+    # via sentence-transformers
+scipy==1.16.2
+    # via
+    #   scikit-learn
+    #   sentence-transformers
+sentence-transformers==5.1.2
+    # via -r requirements.in
+shellingham==1.5.4
+    # via typer
+simplejson==3.20.2
+    # via stix2
+six==1.17.0
+    # via
+    #   kubernetes
+    #   posthog
+    #   python-dateutil
+    #   stix2-patterns
+sniffio==1.3.1
+    # via
+    #   anyio
+    #   groq
+    #   openai
+sqlalchemy==2.0.44
+    # via
+    #   langchain
+    #   langchain-community
+stix2==3.0.1
+    # via mitreattack-python
+stix2-patterns==2.0.0
+    # via stix2
+sympy==1.14.0
+    # via
+    #   onnxruntime
+    #   torch
+tabulate==0.9.0
+    # via mitreattack-python
+tenacity==9.1.2
+    # via
+    #   chromadb
+    #   langchain-community
+    #   langchain-core
+threadpoolctl==3.6.0
+    # via scikit-learn
+tiktoken==0.12.0
+    # via langchain-openai
+tokenizers==0.22.1
+    # via
+    #   chromadb
+    #   langchain-huggingface
+    #   transformers
+torch==2.9.0
+    # via
+    #   -r requirements.in
+    #   sentence-transformers
+tqdm==4.67.1
+    # via
+    #   chromadb
+    #   huggingface-hub
+    #   mitreattack-python
+    #   nltk
+    #   openai
+    #   sentence-transformers
+    #   transformers
+transformers==4.57.1
+    # via
+    #   -r requirements.in
+    #   sentence-transformers
+typer==0.20.0
+    # via
+    #   chromadb
+    #   mitreattack-python
+typing-extensions==4.15.0
+    # via
+    #   aiosignal
+    #   anyio
+    #   chromadb
+    #   groq
+    #   grpcio
+    #   huggingface-hub
+    #   langchain-core
+    #   openai
+    #   opentelemetry-api
+    #   opentelemetry-exporter-otlp-proto-grpc
+    #   opentelemetry-sdk
+    #   opentelemetry-semantic-conventions
+    #   pydantic
+    #   pydantic-core
+    #   referencing
+    #   sentence-transformers
+    #   sqlalchemy
+    #   torch
+    #   typer
+    #   typing-inspect
+    #   typing-inspection
+typing-inspect==0.9.0
+    # via dataclasses-json
+typing-inspection==0.4.2
+    # via
+    #   pydantic
+    #   pydantic-settings
+tzdata==2025.2
+    # via pandas
+urllib3==2.3.0
+    # via
+    #   kubernetes
+    #   requests
+uvicorn[standard]==0.38.0
+    # via chromadb
+watchfiles==1.1.1
+    # via uvicorn
+websocket-client==1.9.0
+    # via kubernetes
+websockets==15.0.1
+    # via uvicorn
+wheel==0.45.1
+    # via mitreattack-python
+win32-setctime==1.2.0
+    # via loguru
+xlsxwriter==3.2.9
+    # via mitreattack-python
+xxhash==3.6.0
+    # via langgraph
+yarl==1.22.0
+    # via aiohttp
+zipp==3.23.0
+    # via importlib-metadata
+zstandard==0.25.0
+    # via langsmith

run_app.py ADDED Viewed

	@@ -0,0 +1,53 @@

+"""
+Simple script to run the Streamlit cybersecurity agent web app.
+"""
+import subprocess
+import sys
+import os
+from pathlib import Path
+def main():
+    """Run the Streamlit app."""
+    # Get the directory where this script is located
+    script_dir = Path(__file__).parent
+    app_path = script_dir / "app.py"
+    if not app_path.exists():
+        print(f"Error: app.py not found at {app_path}")
+        sys.exit(1)
+    print("Starting Cybersecurity Agent Web App...")
+    print("=" * 50)
+    print("The app will open in your default web browser.")
+    print("If it doesn't open automatically, go to: http://localhost:8501")
+    print("=" * 50)
+    print()
+    try:
+        # Run streamlit with the app
+        subprocess.run(
+            [
+                sys.executable,
+                "-m",
+                "streamlit",
+                "run",
+                str(app_path),
+                "--server.port",
+                "8501",
+                "--server.address",
+                "localhost",
+            ],
+            check=True,
+        )
+    except subprocess.CalledProcessError as e:
+        print(f"Error running Streamlit: {e}")
+        sys.exit(1)
+    except KeyboardInterrupt:
+        print("\nApp stopped by user.")
+        sys.exit(0)
+if __name__ == "__main__":
+    main()

src/agents/__pycache__/llm_client.cpython-311.pyc ADDED Viewed

Binary file (11.7 kB). View file

src/agents/correlation_agent/correlation_logic.py ADDED Viewed

	@@ -0,0 +1,449 @@

+from typing import Dict, Any, List, Optional
+from datetime import datetime
+import json
+from langchain_core.messages import HumanMessage, AIMessage
+from langchain_core.tools import tool
+from langgraph.prebuilt import create_react_agent
+from langgraph.graph import StateGraph, END
+from langchain_openai import ChatOpenAI, OpenAIEmbeddings
+from pydantic import BaseModel, Field, ConfigDict
+from sklearn.metrics.pairwise import cosine_similarity
+import numpy as np
+from .types import LogInput, MitreInput, CorrelationOutput, ThreatLevel, ConfidenceLevel, MatchedTechnique
+embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
+# State schema for correlation workflow
+class CorrelationState(BaseModel):
+    """State schema for correlation workflow."""
+    analysis_request: str = ""
+    agent_output: Optional[str] = None
+    structured_response: Optional[Dict[str, Any]] = None
+    model_config = ConfigDict(arbitrary_types_allowed=True)
+# Structured response schemas
+class MatchedTechniqueItem(BaseModel):
+    """Schema for individual matched technique."""
+    technique_id: str = Field(..., description="MITRE technique ID")
+    match_confidence: float = Field(..., ge=0.0, le=1.0, description="Correlation confidence 0-1")
+    evidence: str = Field(..., description="Concise evidence string")
+    validation_result: str = Field(..., description="correlated | weak | false_positive")
+    model_config = ConfigDict(extra='forbid')
+class CorrelationStructuredResponse(BaseModel):
+    """Structured response schema for correlation analysis output."""
+    correlation_score: float = Field(..., description="Overall aggregate correlation score (0-1)")
+    threat_level: str = Field(..., description="low | medium | high | critical")
+    confidence: str = Field(..., description="low | medium | high")
+    matched_techniques: List[MatchedTechniqueItem] = Field(
+        default_factory=list,
+        description="Top 3-5 matched techniques"
+    )
+    reasoning: str = Field(..., description="Concise synthesis / justification of assessment (<150 words)")
+    model_config = ConfigDict(extra='forbid')
+# Helper function for JSON parsing
+def parse_json_input(data: Any) -> Any:
+    """Parse JSON string or return as-is if already parsed."""
+    return json.loads(data) if isinstance(data, str) else data
+@tool
+def correlate_log_with_technique(technique_data: str, log_processes: List[str], log_anomalies: List[str]) -> str:
+    """Semantic correlation between log data and MITRE technique using embeddings."""
+    try:
+        technique = parse_json_input(technique_data)
+    except:
+        return json.dumps({"error": "Invalid technique data format"})
+    technique_id = technique.get('attack_id', 'Unknown')
+    technique_name = technique.get('name', 'Unknown')
+    technique_description = technique.get('description', '')
+    if not technique_description:
+        return json.dumps({
+            "technique_id": technique_id,
+            "technique_name": technique_name,
+            "correlation_score": 0.0,
+            "process_matches": [],
+            "anomaly_matches": [],
+            "match_quality": "weak"
+        })
+    # Semantic matching using embeddings
+    try:
+        technique_embedding = np.array(embeddings.embed_query(technique_description)).reshape(1, -1)
+        # Process matching
+        process_matches = []
+        process_scores = []
+        for process in log_processes:
+            if not process.strip():
+                continue
+            process_embedding = np.array(embeddings.embed_query(process)).reshape(1, -1)
+            similarity = cosine_similarity(technique_embedding, process_embedding)[0][0]
+            if similarity > 0.6:
+                process_matches.append(process)
+                process_scores.append(float(similarity))
+        # Anomaly matching
+        anomaly_matches = []
+        anomaly_scores = []
+        for anomaly in log_anomalies:
+            if not str(anomaly).strip():
+                continue
+            anomaly_embedding = np.array(embeddings.embed_query(str(anomaly))).reshape(1, -1)
+            similarity = cosine_similarity(technique_embedding, anomaly_embedding)[0][0]
+            if similarity > 0.5:
+                anomaly_matches.append(anomaly)
+                anomaly_scores.append(float(similarity))
+        # Calculate correlation score
+        avg_process_score = np.mean(process_scores) if process_scores else 0.0
+        avg_anomaly_score = np.mean(anomaly_scores) if anomaly_scores else 0.0
+        correlation_score = (avg_process_score + avg_anomaly_score) / 2
+    except Exception as e:
+        # Fallback to keyword matching
+        print(f"[WARN] Semantic correlation failed: {e}, using fallback")
+        keywords = technique_description.lower().split()[:5]
+        process_matches = [p for p in log_processes if any(k in p.lower() for k in keywords)]
+        anomaly_matches = [a for a in log_anomalies if any(k in str(a).lower() for k in keywords)]
+        correlation_score = 0.3 if (process_matches or anomaly_matches) else 0.1
+    return json.dumps({
+        "technique_id": technique_id,
+        "technique_name": technique_name,
+        "correlation_score": round(float(correlation_score), 3),
+        "process_matches": process_matches,
+        "anomaly_matches": anomaly_matches,
+        "match_quality": "strong" if correlation_score > 0.7 else "moderate" if correlation_score > 0.5 else "weak"
+    })
+@tool
+def correlate_all_techniques(techniques: str, log_processes: str, log_anomalies: str) -> str:
+    """Correlate all MITRE techniques with log data."""
+    try:
+        technique_list = parse_json_input(techniques)
+        processes = parse_json_input(log_processes)
+        anomalies = parse_json_input(log_anomalies)
+    except:
+        return json.dumps({"error": "Invalid input format"})
+    correlations = [
+        json.loads(correlate_log_with_technique(tech, processes, anomalies))
+        for tech in technique_list
+    ]
+    correlations.sort(key=lambda x: x['correlation_score'], reverse=True)
+    return json.dumps({
+        "correlations": correlations,
+        "top_matches": correlations[:3],
+        "total_techniques": len(correlations),
+        "strong_matches": len([c for c in correlations if c['correlation_score'] > 0.7])
+    })
+@tool
+def calculate_confidence(
+    correlation_score: float,
+    log_severity: str = "medium",
+    mitre_confidence: float = 0.5,
+    num_matched_techniques: int = 1,
+    match_quality: str = "moderate"
+) -> str:
+    """
+    Sophisticated confidence calculation using Bayesian-inspired weighted scoring.
+    """
+    # Weight distribution based on cybersecurity research
+    WEIGHTS = {
+        'correlation': 0.50,   # Primary indicator - semantic match quality
+        'evidence': 0.25,      # Evidence strength (quality + quantity)
+        'mitre_prior': 0.15,   # Bayesian prior from MITRE analysis
+        'severity': 0.10       # Contextual severity adjustment
+    }
+    # Quality scores based on semantic similarity thresholds
+    quality_scores = {'strong': 1.0, 'moderate': 0.7, 'weak': 0.4}
+    quality_score = quality_scores.get(match_quality.lower(), 0.7)
+    # Quantity factor with diminishing returns
+    quantity_factor = min(1.0, 0.5 + (num_matched_techniques * 0.15))
+    evidence_component = quality_score * quantity_factor
+    # Severity scores based on CVSS principles
+    severity_scores = {'critical': 1.0, 'high': 0.85, 'medium': 0.6, 'low': 0.35}
+    severity_component = severity_scores.get(log_severity.lower(), 0.6)
+    # Weighted combination
+    overall_confidence = (
+        WEIGHTS['correlation'] * correlation_score +
+        WEIGHTS['evidence'] * evidence_component +
+        WEIGHTS['mitre_prior'] * mitre_confidence +
+        WEIGHTS['severity'] * severity_component
+    )
+    # Cap at 0.95 to avoid overconfidence bias
+    overall_confidence = min(overall_confidence, 0.95)
+    # Uncertainty penalty for weak single matches
+    if num_matched_techniques == 1 and match_quality.lower() == 'weak':
+        overall_confidence *= 0.8
+    # Determine confidence level (FIRST/NIST guidelines)
+    if overall_confidence >= 0.75:
+        level = "high"
+    elif overall_confidence >= 0.50:
+        level = "medium"
+    else:
+        level = "low"
+    reasoning = (
+        f"Correlation: {correlation_score:.2f} ({WEIGHTS['correlation']}) | "
+        f"Evidence: {num_matched_techniques} {match_quality} ({WEIGHTS['evidence']}) | "
+        f"MITRE: {mitre_confidence:.2f} ({WEIGHTS['mitre_prior']}) | "
+        f"Severity: {log_severity} ({WEIGHTS['severity']})"
+    )
+    return json.dumps({
+        "confidence_score": round(overall_confidence, 3),
+        "confidence_level": level,
+        "reasoning": reasoning,
+        "methodology": "Bayesian weighted scoring (Hutchins 2011, NIST SP 800-150)"
+    })
+# Constants for default fallback
+DEFAULT_CORRELATION_DATA = {
+    "correlation_score": 0.5,
+    "threat_level": "medium",
+    "confidence": "medium",
+    "matched_techniques": [],
+    "reasoning": "Workflow failed to produce correlation data"
+}
+def create_correlation_workflow() -> Optional[Any]:
+    """
+    Create correlation workflow with structured output using StateGraph.
+    Workflow: START → correlation_agent → structure_output → END
+    """
+    if ChatOpenAI is None:
+        print("[WARN] Missing ChatOpenAI dependency. Returning None.")
+        return None
+    # ReAct agent with all tools
+    correlation_agent = create_react_agent(
+        model="openai:gpt-4o",
+        tools=[correlate_all_techniques, correlate_log_with_technique, calculate_confidence],
+        name="correlation_agent",
+    )
+    # LLM for structured output extraction
+    structured_llm = ChatOpenAI(model="gpt-4o").with_structured_output(
+        CorrelationStructuredResponse,
+        method="json_schema"
+    )
+    def agent_node(state: CorrelationState) -> CorrelationState:
+        """Execute the correlation agent with tools."""
+        agent_prompt = (
+            f"{state.analysis_request}\n\n"
+            "Instructions:\n"
+            "1. Use correlate_all_techniques to get correlation scores for all techniques.\n"
+            "2. Analyze the top matches and their correlation scores.\n"
+            "3. Use calculate_confidence with:\n"
+            "   - correlation_score: highest or average correlation score\n"
+            "   - log_severity: from log data\n"
+            "   - mitre_confidence: from MITRE data\n"
+            "   - num_matched_techniques: count of techniques with score > 0.5\n"
+            "   - match_quality: 'strong' (>0.7), 'moderate' (0.5-0.7), or 'weak' (<0.5)\n"
+            "4. Summarize findings with evidence and reasoning.\n"
+        )
+        result = correlation_agent.invoke({"messages": [HumanMessage(content=agent_prompt)]})
+        # Extract agent's final message
+        for msg in reversed(result.get("messages", [])):
+            if isinstance(msg, AIMessage):
+                state.agent_output = msg.content
+                break
+        return state
+    def structure_output_node(state: CorrelationState) -> CorrelationState:
+        """Extract structured output from agent's analysis."""
+        structure_prompt = f"""Based on the correlation analysis, provide a structured assessment.
+Analysis:
+{state.agent_output}
+Original Request:
+{state.analysis_request}
+Extract:
+- correlation_score: Overall score (0-1)
+- threat_level: low/medium/high/critical
+- confidence: low/medium/high from confidence calculation
+- matched_techniques: Top 3-5 with IDs, confidence, evidence, validation
+- reasoning: Concise synthesis (max 150 words)"""
+        try:
+            structured_result = structured_llm.invoke(structure_prompt)
+            if isinstance(structured_result, CorrelationStructuredResponse):
+                state.structured_response = structured_result.model_dump()
+            else:
+                state.structured_response = structured_result
+        except Exception as e:
+            print(f"[ERROR] Failed to create structured output: {e}")
+            state.structured_response = DEFAULT_CORRELATION_DATA.copy()
+            state.structured_response["reasoning"] = f"Structuring failed: {str(e)}"
+        return state
+    # Build workflow graph
+    workflow = StateGraph(CorrelationState)
+    workflow.add_node("correlation_agent", agent_node)
+    workflow.add_node("structure_output", structure_output_node)
+    workflow.set_entry_point("correlation_agent")
+    workflow.add_edge("correlation_agent", "structure_output")
+    workflow.add_edge("structure_output", END)
+    return workflow.compile()
+class CorrelationLogic:
+    """Correlation analysis using ReAct agent workflow with structured output."""
+    def __init__(self):
+        try:
+            self.workflow = create_correlation_workflow()
+            if self.workflow:
+                print("[INFO] Correlation workflow initialized")
+            else:
+                print("[WARN] Workflow initialization failed")
+        except Exception as e:
+            print(f"[WARN] Workflow initialization error: {e}")
+            self.workflow = None
+    def correlate(self, log_input: LogInput, mitre_input: MitreInput) -> CorrelationOutput:
+        """Main correlation method."""
+        correlation_data = self.run_workflow(log_input, mitre_input)
+        correlation_id = f"CORR_{log_input.analysis_id}_{mitre_input.analysis_id}_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
+        # Use defaults if workflow failed
+        if not correlation_data:
+            print("[WARN] Using default correlation data")
+            return CorrelationOutput(
+                correlation_id=correlation_id,
+                correlation_score=0.5,
+                threat_level=ThreatLevel.MEDIUM,
+                confidence=ConfidenceLevel.MEDIUM,
+                matched_techniques=[],
+                reasoning="Workflow failed",
+                timestamp=datetime.now().isoformat()
+            )
+        # Parse matched techniques
+        matched_techniques = []
+        for mt_data in correlation_data.get("matched_techniques", []):
+            try:
+                matched_techniques.append(
+                    MatchedTechnique(
+                        technique_id=mt_data.get("technique_id", "Unknown"),
+                        match_confidence=mt_data.get("match_confidence", 0.0),
+                        evidence=mt_data.get("evidence", ""),
+                        validation_result=None
+                    )
+                )
+            except Exception as e:
+                print(f"[WARN] Skipping malformed technique: {e}")
+        # Helper to convert string to enum
+        def to_enum(enum_cls, value: str, default):
+            try:
+                return enum_cls(value.lower())
+            except:
+                return default
+        return CorrelationOutput(
+            correlation_id=correlation_id,
+            correlation_score=correlation_data.get("correlation_score", 0.5),
+            threat_level=to_enum(ThreatLevel, correlation_data.get("threat_level", "medium"), ThreatLevel.MEDIUM),
+            confidence=to_enum(ConfidenceLevel, correlation_data.get("confidence", "medium"), ConfidenceLevel.MEDIUM),
+            matched_techniques=matched_techniques,
+            reasoning=correlation_data.get("reasoning", "No reasoning provided"),
+            timestamp=datetime.now().isoformat()
+        )
+    def run_workflow(self, log_input: LogInput, mitre_input: MitreInput) -> Optional[Dict[str, Any]]:
+        """Execute the correlation workflow and return structured data."""
+        if not self.workflow:
+            print("[ERROR] Workflow not initialized")
+            return None
+        analysis_request = (
+            f"Perform correlation analysis for this security event.\n\n"
+            f"LOG DATA:\n"
+            f"- ID: {log_input.analysis_id}\n"
+            f"- Severity: {log_input.severity}\n"
+            f"- Systems: {', '.join(log_input.affected_systems)}\n"
+            f"- Anomalies: {', '.join(log_input.anomalies)}\n"
+            f"- Processes: {', '.join(log_input.processes)}\n"
+            f"- Summary: {log_input.raw_summary}\n\n"
+            f"MITRE DATA:\n"
+            f"- Techniques: {json.dumps(mitre_input.techniques)}\n"
+            f"- Coverage: {mitre_input.coverage_score}\n"
+            f"- Confidence: {mitre_input.confidence}\n"
+            f"- Analysis: {mitre_input.analysis_text}\n"
+        )
+        try:
+            initial_state = CorrelationState(analysis_request=analysis_request)
+            result = self.workflow.invoke(initial_state)
+            if isinstance(result, dict) and "structured_response" in result:
+                return result.get("structured_response")
+            print("[WARN] No structured_response in result")
+            return None
+        except Exception as e:
+            print(f"[ERROR] Workflow failed: {type(e).__name__}: {e}")
+            import traceback
+            traceback.print_exc()
+            return None
+class CorrelationAgent:
+    """Multi-Agent Correlation Agent with StateGraph workflow."""
+    def __init__(self):
+        self.correlation_logic = CorrelationLogic()
+        print("[INFO] CorrelationAgent initialized")
+    def process(self, log_input: LogInput, mitre_input: MitreInput) -> CorrelationOutput:
+        """Process correlation analysis using multi-agent system."""
+        print(f"[INFO] Processing correlation: {log_input.analysis_id}")
+        try:
+            result = self.correlation_logic.correlate(log_input, mitre_input)
+            print(f"[INFO] Completed: {result.correlation_id}")
+            print(f"[INFO] Threat: {result.threat_level.value.upper()} | "
+                  f"Confidence: {result.confidence.value.upper()} | "
+                  f"Score: {result.correlation_score:.3f} | "
+                  f"Techniques: {len(result.matched_techniques)}")
+            return result
+        except Exception as e:
+            print(f"[ERROR] Correlation processing failed: {e}")
+            raise

src/agents/correlation_agent/input_converters.py ADDED Viewed

	@@ -0,0 +1,49 @@

+from typing import Dict, Any
+from .types import LogInput, MitreInput
+def convert_mitre_agent_input(mitre_agent_input) -> LogInput:
+    """Convert MitreAgentInput to LogInput format for correlation testing"""
+    # Extract anomaly descriptions
+    anomalies = [anomaly["description"] for anomaly in mitre_agent_input.detected_anomalies]
+    # Map severity level to string
+    severity_str = mitre_agent_input.severity.value.upper()
+    return LogInput(
+        analysis_id=mitre_agent_input.analysis_id,
+        severity=severity_str,
+        affected_systems=mitre_agent_input.affected_systems,
+        anomalies=anomalies,
+        processes=mitre_agent_input.processes,
+        raw_summary=mitre_agent_input.raw_summary
+    )
+def convert_mitre_analysis_output(mitre_analysis_result, original_input) -> MitreInput:
+    """Convert MitreAgent analysis result to MitreInput format for correlation"""
+    # Extract top techniques from analysis result safely
+    techniques = []
+    technique_details = mitre_analysis_result.get('technique_details', [])
+    for tech_detail in technique_details[:10]:  # Top 10 techniques
+        techniques.append({
+            "attack_id": tech_detail.get('attack_id', 'Unknown'),
+            "name": tech_detail.get('name', 'Unknown Technique'),
+            "relevance_score": tech_detail.get('relevance_score', 0.5)
+        })
+    # Default techniques if none found
+    if not techniques:
+        techniques = [
+            {"attack_id": "T1059.001", "name": "PowerShell", "relevance_score": 0.7},
+            {"attack_id": "T1566.001", "name": "Spearphishing Attachment", "relevance_score": 0.6}
+        ]
+    return MitreInput(
+        analysis_id=f"MITRE_{getattr(original_input, 'analysis_id', 'UNKNOWN')}",
+        techniques=techniques,
+        coverage_score=mitre_analysis_result.get('coverage_score', 0.5),
+        confidence=mitre_analysis_result.get('confidence', 0.5),
+        analysis_text=mitre_analysis_result.get('analysis', 'MITRE analysis completed')
+    )

src/agents/correlation_agent/test.py ADDED Viewed

	@@ -0,0 +1,176 @@

+from dotenv import load_dotenv
+load_dotenv()
+from src.agents.correlation_agent.correlation_logic import CorrelationAgent
+from src.agents.correlation_agent.types import LogInput, MitreInput
+from src.agents.mitre_retriever_agent.mitre_example_input import create_sample_log_input, create_elaborate_mockup_incident
+from src.agents.mitre_retriever_agent.mitre_agent import MitreAgent
+from src.agents.correlation_agent.input_converters import convert_mitre_agent_input, convert_mitre_analysis_output
+def test_sample_correlation():
+    """Test basic correlation functionality using real MITRE agent output"""
+    print("="*60)
+    print("TESTING BASIC CORRELATION WITH MITRE AGENT")
+    print("="*60)
+    # Create sample input from mitre_example_input
+    mitre_agent_input = create_sample_log_input()
+    log_input = convert_mitre_agent_input(mitre_agent_input)
+    print(f"\nSAMPLE INPUT CREATED:")
+    print(f"- Analysis ID: {log_input.analysis_id}")
+    print(f"- Severity: {log_input.severity}")
+    print(f"- Affected Systems: {log_input.affected_systems}")
+    print(f"- Anomalies: {len(log_input.anomalies)} detected")
+    print(f"- Processes: {log_input.processes}")
+    print("\nRUNNING MITRE AGENT ANALYSIS...")
+    mitre_agent = MitreAgent(
+        llm_provider="openai",
+        model_name="gpt-4o",
+        max_iterations=3
+    )
+    mitre_analysis_result = mitre_agent.analyze_threat(mitre_agent_input)
+    print(f"✓ MITRE analysis completed")
+    print(f"  - Techniques found: {len(mitre_analysis_result.get('technique_details', []))}")
+    print(f"  - Coverage score: {mitre_analysis_result.get('coverage_score', 0):.3f}")
+    print(f"  - Confidence: {mitre_analysis_result.get('confidence', 0):.3f}")
+    # Convert MITRE analysis to MitreInput format
+    mitre_input = convert_mitre_analysis_output(mitre_analysis_result, mitre_agent_input)
+    print(f"\n✓ MITRE INPUT CONVERTED:")
+    print(f"  - Top {min(5, len(mitre_input.techniques))} techniques:")
+    for i, tech in enumerate(mitre_input.techniques[:5], 1):
+        print(f"    {i}. {tech['attack_id']}: {tech['name']} (Score: {tech['relevance_score']:.3f})")
+    # Run correlation analysis
+    print("\nRUNNING CORRELATION ANALYSIS...")
+    correlation_agent = CorrelationAgent()
+    result = correlation_agent.process(log_input, mitre_input)
+    # Display results
+    print(f"\n{'='*60}")
+    print(f"CORRELATION RESULTS:")
+    print(f"{'='*60}")
+    print(f"ID: {result.correlation_id}")
+    print(f"Score: {result.correlation_score:.3f}")
+    print(f"Threat Level: {result.threat_level.value.upper()}")
+    print(f"Confidence: {result.confidence.value.upper()}")
+    print(f"Timestamp: {result.timestamp}")
+    print(f"\nMATCHED TECHNIQUES ({len(result.matched_techniques)}):")
+    for i, tech in enumerate(result.matched_techniques, 1):
+        print(f"{i}. {tech.technique_id} - Confidence: {tech.match_confidence:.3f}")
+        print(f"   Evidence: {tech.evidence[:100]}{'...' if len(tech.evidence) > 100 else ''}")
+    print(f"\nREASONING:")
+    print(f"{result.reasoning}")
+    return result
+def test_elaborate_correlation():
+    """Test correlation using elaborate mockup incident with MITRE agent"""
+    print("\n" + "="*60)
+    print("TESTING CORRELATION - ELABORATE INCIDENT")
+    print("="*60)
+    # Create elaborate incident input
+    mitre_agent_input = create_elaborate_mockup_incident()
+    log_input = convert_mitre_agent_input(mitre_agent_input)
+    print(f"\nELABORATE INCIDENT INPUT:")
+    print(f"- Analysis ID: {log_input.analysis_id}")
+    print(f"- Severity: {log_input.severity}")
+    print(f"- Affected Systems: {len(log_input.affected_systems)} systems")
+    print(f"- Anomalies: {len(log_input.anomalies)} detected")
+    print(f"- Processes: {len(log_input.processes)} processes")
+    # Run MITRE agent analysis for elaborate incident
+    print("\nRUNNING MITRE AGENT ANALYSIS FOR ELABORATE INCIDENT...")
+    mitre_agent = MitreAgent(
+        llm_provider="openai",
+        model_name="gpt-4o",
+        max_iterations=3
+    )
+    mitre_analysis_result = mitre_agent.analyze_threat(mitre_agent_input)
+    print(f"✓ MITRE analysis completed")
+    print(f"  - Techniques found: {len(mitre_analysis_result.get('technique_details', []))}")
+    print(f"  - Coverage score: {mitre_analysis_result.get('coverage_score', 0):.3f}")
+    print(f"  - Confidence: {mitre_analysis_result.get('confidence', 0):.3f}")
+    # Convert to MitreInput
+    mitre_input = convert_mitre_analysis_output(mitre_analysis_result, mitre_agent_input)
+    print(f"\n✓ TOP TECHNIQUES FROM MITRE ANALYSIS:")
+    for i, tech in enumerate(mitre_input.techniques[:5], 1):
+        print(f"  {i}. {tech['attack_id']}: {tech['name'][:50]}... (Score: {tech['relevance_score']:.3f})")
+    # Run correlation analysis
+    print("\nRUNNING ELABORATE CORRELATION ANALYSIS...")
+    correlation_agent = CorrelationAgent()
+    result = correlation_agent.process(log_input, mitre_input)
+    print(f"\n{'='*60}")
+    print(f"ELABORATE CORRELATION RESULTS:")
+    print(f"{'='*60}")
+    print(f"ID: {result.correlation_id}")
+    print(f"Score: {result.correlation_score:.3f}")
+    print(f"Threat Level: {result.threat_level.value.upper()}")
+    print(f"Confidence: {result.confidence.value.upper()}")
+    print(f"Matched Techniques: {len(result.matched_techniques)}")
+    print(f"\nTOP CORRELATED TECHNIQUES:")
+    for i, tech in enumerate(result.matched_techniques[:5], 1):
+        print(f"{i}. {tech.technique_id} - Confidence: {tech.match_confidence:.3f}")
+        print(f"   Evidence: {tech.evidence[:80]}{'...' if len(tech.evidence) > 80 else ''}")
+    print(f"\nREASONING:")
+    print(f"{result.reasoning}")
+    return result
+def main():
+    """Main test function"""
+    print("╔" + "="*58 + "╗")
+    print("║" + " "*10 + "CORRELATION AGENT TEST SUITE" + " "*20 + "║")
+    print("╚" + "="*58 + "╝")
+    print()
+    try:
+        # Test sample correlation with MITRE agent
+        result1 = test_sample_correlation()
+        # Test elaborate correlation with elaborate incident
+        result2 = test_elaborate_correlation()
+        print("\n" + "="*60)
+        print("✓ ALL TESTS COMPLETED SUCCESSFULLY")
+        print("="*60)
+        # Summary
+        print(f"\nTEST SUMMARY:")
+        print(f"\n1. Sample Input Test:")
+        print(f"   - Threat Level: {result1.threat_level.value.upper()}")
+        print(f"   - Confidence: {result1.confidence.value.upper()}")
+        print(f"   - Correlation Score: {result1.correlation_score:.3f}")
+        print(f"   - Matched Techniques: {len(result1.matched_techniques)}")
+        print(f"\n2. Elaborate Incident Test:")
+        print(f"   - Threat Level: {result2.threat_level.value.upper()}")
+        print(f"   - Confidence: {result2.confidence.value.upper()}")
+        print(f"   - Correlation Score: {result2.correlation_score:.3f}")
+        print(f"   - Matched Techniques: {len(result2.matched_techniques)}")
+        print("\n" + "="*60)
+    except Exception as e:
+        print(f"\n❌ TEST FAILED: {e}")
+        import traceback
+        traceback.print_exc()
+        raise
+if __name__ == "__main__":
+    main()

src/agents/correlation_agent/types.py ADDED Viewed

	@@ -0,0 +1,48 @@

+from typing import Dict, List, Any, Optional
+from dataclasses import dataclass
+from enum import Enum
+class ThreatLevel(Enum):
+    LOW = "low"
+    MEDIUM = "medium"
+    HIGH = "high"
+    CRITICAL = "critical"
+class ConfidenceLevel(Enum):
+    LOW = "low"
+    MEDIUM = "medium"
+    HIGH = "high"
+@dataclass
+class LogInput:
+    analysis_id: str
+    severity: str
+    affected_systems: List[str]
+    anomalies: List[str]
+    processes: List[str]
+    raw_summary: str
+@dataclass
+class MitreInput:
+    analysis_id: str
+    techniques: List[Dict[str, Any]]
+    coverage_score: float
+    confidence: float
+    analysis_text: str
+@dataclass
+class MatchedTechnique:
+    technique_id: str
+    match_confidence: float
+    evidence: str
+    validation_result: Optional[Dict[str, Any]] = None
+@dataclass
+class CorrelationOutput:
+    correlation_id: str
+    correlation_score: float
+    threat_level: ThreatLevel
+    confidence: ConfidenceLevel
+    matched_techniques: List[MatchedTechnique]
+    reasoning: str = ""
+    timestamp: str = ""

src/agents/cti_agent/__pycache__/config.cpython-311.pyc ADDED Viewed

Binary file (13.5 kB). View file

src/agents/cti_agent/__pycache__/cti_agent.cpython-311.pyc ADDED Viewed

Binary file (40.8 kB). View file

src/agents/cti_agent/__pycache__/cti_tools.cpython-311.pyc ADDED Viewed

Binary file (12.2 kB). View file

src/agents/cti_agent/config.py ADDED Viewed

	@@ -0,0 +1,371 @@

+# Search configuration
+CTI_SEARCH_CONFIG = {
+    "max_results": 5,
+    "search_depth": "advanced",
+    "include_raw_content": True,
+    "include_domains": [
+        "*.cisa.gov",  # US Cybersecurity and Infrastructure Security Agency
+        "*.us-cert.gov",  # US-CERT advisories
+        "*.crowdstrike.com",  # CrowdStrike threat intelligence
+        "*.mandiant.com",  # Mandiant (Google) threat reports
+        "*.trendmicro.com",  # Trend Micro research
+        "*.securelist.com",  # Kaspersky SecureList blog
+        "*.cert.europa.eu",  # European CERT
+        "*.ncsc.gov.uk",  # UK National Cyber Security Centre
+    ],
+}
+# Model configuration
+MODEL_NAME = "google_genai:gemini-2.0-flash"
+# CTI Planner Prompt
+CTI_PLANNER_PROMPT = """You are a Cyber Threat Intelligence (CTI) researcher planning
+to retrieve actual threat intelligence from CTI reports.
+Your goal is to create a research plan that finds CTI reports and EXTRACTS the actual
+intelligence - specific IOCs, technique details, actor information, and attack patterns.
+IMPORTANT GUIDELINES:
+1. Search for actual CTI reports from reputable sources
+2. Prioritize recent reports (2024-2025)
+3. ALWAYS fetch full report content to extract intelligence
+4. Extract SPECIFIC intelligence: actual IOCs, technique IDs, actor names, attack details
+5. Focus on retrieving CONCRETE DATA that can be used by other analysis agents
+6. Maximum 4 tasks with only one time of web searching
+Available tools:
+(1) SearchCTIReports[query]: Searches for CTI reports, threat analyses, and security advisories.
+    - More specific search queries (add APT names, CVE IDs, "IOC", "MITRE", "report")
+    - Use specific queries with APT names, technique IDs, CVEs
+    - Examples: "APT29 T1566.002 report 2025", "Scattered Spider IOCs"
+(2) ExtractURL[search_result, index]: Extract a specific URL from search results JSON.
+    - search_result: JSON string from SearchCTIReports
+    - index: Which report URL to extract (default: 0 for first)
+    - ALWAYS use this to get the actual report URL from search results
+(3) FetchReport[url]: Retrieves the full content of a CTI report using real url.
+    - ALWAYS use this to get actual report content for intelligence extraction
+    - Essential for retrieving specific IOCs and details
+(4) ExtractIOCs[report_content]: Extracts actual Indicators of Compromise from reports.
+    - Returns specific IPs, domains, hashes, URLs, file names
+    - Provides concrete IOCs that can be used for detection
+(5) IdentifyThreatActors[report_content]: Extracts threat actor details from reports.
+    - Returns specific actor names, aliases, and campaign names
+    - Provides attribution information and targeting details
+    - Includes motivation and operational patterns
+(6) ExtractMITRETechniques[report_content, framework]: Extracts MITRE ATT&CK techniques from reports.
+    - framework: "Enterprise", "Mobile", or "ICS" (default: "Enterprise")
+    - Returns specific technique IDs (T1234) with descriptions
+    - Maps malware behaviors to MITRE framework
+    - Provides structured technique analysis
+(7) LLM[instruction]: Synthesis and correlation of extracted intelligence.
+    - Combine intelligence from multiple sources
+    - DON'T USE FOR ANY OTHER PURPOSES
+    - Identify patterns across findings
+    - Correlate IOCs with techniques and actors
+PLAN STRUCTURE:
+Each plan step should be: Plan: [description] #E[N] = Tool[input]
+Example for task "Find threat intelligence about APT29 using T1566.002":
+Plan: Search for recent APT29 campaign reports with IOCs
+#E1 = SearchCTIReports[APT29 T1566.002 spearphishing IOCs 2025]
+Plan: Search for detailed technical analysis of APT29 spearphishing
+#E2 = SearchCTIReports[APT29 spearphishing technical analysis filetype:pdf]
+Plan: Fetch the most detailed technical report for intelligence extraction
+#E3 = FetchReport[top ranked URL from #E1 with most technical detail]
+Plan: Extract all specific IOCs from the fetched report
+#E4 = ExtractIOCs[#E3]
+Plan: Extract threat actor details and campaign information from the report
+#E5 = IdentifyThreatActors[#E3]
+Plan: If first report lacks detail, fetch second report for additional intelligence
+#E6 = FetchReport[second best URL from #E1]
+Plan: Extract IOCs from second report to enrich intelligence
+#E7 = ExtractIOCs[#E7]
+Plan: Correlate and consolidate all extracted intelligence
+#E8 = LLM[Consolidate intelligence from #E4, #E5, #E6, and #E8. Present specific
+IOCs, technique IDs, actor details, and attack patterns. Identify overlaps and unique findings.]
+Now create a detailed plan for the following task:
+Task: {task}"""
+# CTI Solver Prompt
+CTI_SOLVER_PROMPT = """You are a Cyber Threat Intelligence analyst creating a final intelligence report.
+Below are the COMPLETE results from your CTI research. Each section contains the full output from extraction tools.
+{structured_results}
+{'='*80}
+EXECUTION PLAN OVERVIEW:
+{'='*80}
+{plan}
+{'='*80}
+ORIGINAL TASK: {task}
+{'='*80}
+Create a comprehensive threat intelligence report with the following structure:
+## Intelligence Sources
+[List reports analyzed with titles and sources]
+## Threat Actors & Attribution
+[Names, aliases, campaigns, and attribution details from IdentifyThreatActors results]
+## MITRE ATT&CK Techniques Identified
+[All technique IDs from ExtractMITRETechniques results, with descriptions]
+## Indicators of Compromise (IOCs) Retrieved
+[All IOCs from ExtractIOCs results, organized by type]
+### IP Addresses
+### Domains
+### File Hashes
+### URLs
+### Email Addresses
+### File Names
+### Other Indicators
+## Attack Patterns & Campaign Details
+[Specific attack flows, timeline, targeting from reports]
+## Key Findings Summary
+[3-5 critical bullet points]
+## Intelligence Gaps
+[What information was not available]
+**INSTRUCTIONS:**
+- Extract ALL data from results above - don't summarize, list actual values
+- Parse JSON if present in results
+- If Q&A format, extract all answers
+- Be comprehensive and specific
+"""
+# Regex pattern for parsing CTI plans
+CTI_REGEX_PATTERN = r"Plan:\s*(.+)\s*(#E\d+)\s*=\s*(\w+)\s*\[([^\]]+)\]"
+# Tool-specific prompts
+IOC_EXTRACTION_PROMPT = """Extract all Indicators of Compromise (IOCs) from the content below.
+**Instructions:** List ONLY the actual IOCs found. No explanations, no summaries - just the indicators.
+**Content:**
+{content}
+**Extract and list:**
+**IP Addresses:**
+[List IPs, or write "None found"]
+**Domains:**
+[List domains, or write "None found"]
+**URLs:**
+[List malicious URLs, or write "None found"]
+**File Hashes:**
+[List hashes with type (MD5/SHA1/SHA256), or write "None found"]
+**Email Addresses:**
+[List emails, or write "None found"]
+**File Names:**
+[List malicious files/paths, or write "None found"]
+**Registry Keys:**
+[List registry keys, or write "None found"]
+**Other Indicators:**
+[List mutexes, user agents, etc., or write "None found"]
+If no specific IOCs found, respond: "No extractable IOCs in content."
+"""
+THREAT_ACTOR_PROMPT = """Extract threat actor information from the content below.
+**Instructions:** Provide concise answers. Include brief descriptions where relevant.
+**Content:**
+{content}
+**Answer these questions:**
+**Q: What threat actor/APT group is discussed?**
+A: [Name and aliases, e.g., "APT29 (Cozy Bear, The Dukes)" or "None identified"]
+**Q: What is this actor known for?**
+A: [1-2 sentence description of their typical activities/focus, or "No attribution details"]
+**Q: What campaigns/operations are mentioned?**
+A: [List campaign names with timeframes, e.g., "NobleBaron (2024-Q2)" or "None mentioned"]
+**Q: What is their suspected origin/attribution?**
+A: [Nation-state/origin and confidence level, e.g., "Russian state-sponsored (High confidence)" or "Unknown"]
+**Q: Who/what do they target?**
+A: [Industries and regions, e.g., "Government agencies in Europe, Defense sector in North America" or "Not specified"]
+**Q: What is their motivation?**
+A: [Primary objective, e.g., "Espionage and intelligence collection" or "Not specified"]
+If no specific threat actor information found, respond: "No threat actor attribution in content."
+"""
+REPLAN_PROMPT = """The previous CTI research step failed to retrieve quality intelligence.
+ORIGINAL TASK: {task}
+FAILED STEP:
+Plan: {failed_step}
+{step_name} = {tool}[{tool_input}]
+RESULT: {results}
+PROBLEM: {problem}
+COMPLETED STEPS SO FAR:
+{completed_steps}
+Create an IMPROVED plan for this specific step that will retrieve ACTUAL CTI intelligence.
+Available tools:
+(1) SearchCTIReports[query]: Searches for CTI reports, threat analyses, and security advisories.
+    - Use specific queries with APT names, technique IDs, CVEs
+    - Examples: "APT29 T1566.002 report 2024", "Scattered Spider IOCs"
+(2) ExtractURL[search_result, index]: Extract a specific URL from search results JSON.
+    - search_result: JSON string from SearchCTIReports
+    - index: Which report URL to extract (default: 0 for first)
+    - ALWAYS use this to get the actual report URL from search results
+(3) FetchReport[url]: Retrieves the full content of a CTI report.
+    - ALWAYS use this to get actual report content for intelligence extraction
+    - Essential for retrieving specific IOCs and details
+(4) ExtractIOCs[report_content]: Extracts actual Indicators of Compromise from reports.
+    - Returns specific IPs, domains, hashes, URLs, file names
+    - Provides concrete IOCs that can be used for detection
+(5) IdentifyThreatActors[report_content]: Extracts threat actor details from reports.
+    - Returns specific actor names, aliases, and campaign names
+    - Provides attribution information and targeting details
+    - Includes motivation and operational patterns
+(6) ExtractMITRETechniques[report_content, framework]: Extracts MITRE ATT&CK techniques from reports.
+    - framework: "Enterprise", "Mobile", or "ICS" (default: "Enterprise")
+    - Returns specific technique IDs (T1234) with descriptions
+    - Maps malware behaviors to MITRE framework
+    - Provides structured technique analysis
+(7) LLM[instruction]: Synthesis and correlation of extracted intelligence.
+    - Combine intelligence from multiple sources
+    - Identify patterns across findings
+    - Correlate IOCs with techniques and actors
+Consider:
+1. More specific search queries (add APT names, CVE IDs, "IOC", "MITRE", "report")
+2. Alternative CTI sources (CISA advisories, vendor reports, not news articles)
+3. Different tool combinations (search → extract URL → fetch → extract IOCs)
+Provide ONLY the corrected step in this format:
+Plan: [improved description]
+#E{step} = Tool[improved input]"""
+MITRE_EXTRACTION_PROMPT = """Extract MITRE ATT&CK {framework} techniques from the content below.
+**Instructions:**
+1. Identify behaviors described in the content
+2. Map to MITRE technique IDs (main techniques only: T#### not T####.###)
+3. Provide brief description of what each technique means
+4. List final technique IDs on the last line
+**Content:**
+{content}
+**Identified Techniques:**
+[For each technique found, format as:]
+**T####** - [Technique Name]: [1 sentence: what this technique is and why it was identified in the content]
+[Continue for all techniques...]
+**Final Answer - Technique IDs:**
+T####, T####, T####
+[If no valid techniques found, respond: "No MITRE {framework} techniques identified in content."]
+"""
+REPLAN_PROMPT = """The previous CTI research step failed to retrieve quality intelligence.
+ORIGINAL TASK: {task}
+FAILED STEP:
+Plan: {failed_step}
+{step_name} = {tool}[{tool_input}]
+RESULT: {results}
+PROBLEM: {problem}
+COMPLETED STEPS SO FAR:
+{completed_steps}
+Create an IMPROVED plan for this specific step that will retrieve ACTUAL CTI intelligence.
+Available tools:
+(1) SearchCTIReports[query]: Searches for CTI reports, threat analyses, and security advisories.
+    - Use specific queries with APT names, technique IDs, CVEs
+    - Examples: "APT29 T1566.002 report 2024", "Scattered Spider IOCs"
+(2) ExtractURL[search_result, index]: Extract a specific URL from search results JSON.
+    - search_result: JSON string from SearchCTIReports
+    - index: Which report URL to extract (default: 0 for first)
+    - ALWAYS use this to get the actual report URL from search results
+(3) FetchReport[url]: Retrieves the full content of a CTI report.
+    - ALWAYS use this to get actual report content for intelligence extraction
+    - Essential for retrieving specific IOCs and details
+(4) ExtractIOCs[report_content]: Extracts actual Indicators of Compromise from reports.
+    - Returns specific IPs, domains, hashes, URLs, file names
+    - Provides concrete IOCs that can be used for detection
+(5) IdentifyThreatActors[report_content]: Extracts threat actor details from reports.
+    - Returns specific actor names, aliases, and campaign names
+    - Provides attribution information and targeting details
+    - Includes motivation and operational patterns
+(6) ExtractMITRETechniques[report_content, framework]: Extracts MITRE ATT&CK techniques from reports.
+    - framework: "Enterprise", "Mobile", or "ICS" (default: "Enterprise")
+    - Returns specific technique IDs (T1234) with descriptions
+    - Maps malware behaviors to MITRE framework
+(7) LLM[instruction]: Synthesis and correlation of extracted intelligence.
+    - Combine intelligence from multiple sources
+    - Identify patterns across findings
+    - Correlate IOCs with techniques and actors
+Consider:
+1. More specific search queries (add APT names, CVE IDs, "IOC", "MITRE", "report")
+2. Alternative CTI sources (CISA advisories, vendor reports, not news articles)
+3. Different tool combinations (search → extract URL → fetch → extract IOCs/techniques)
+Provide ONLY the corrected step in this format:
+Plan: [improved description]
+#E{step} = Tool[improved input]"""

src/agents/cti_agent/cti-bench/data/cti-ate.tsv ADDED Viewed