minhan6559 commited on
Commit
223ef32
·
verified ·
1 Parent(s): 5af9ea8

Upload 126 files

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +1 -0
  2. app.py +292 -0
  3. cyber_knowledge_base/bm25_retriever.pkl +3 -0
  4. cyber_knowledge_base/chroma/.gitignore +1 -0
  5. cyber_knowledge_base/chroma/1ab81415-9731-4a9a-8d06-afc7fc190d32/.gitignore +1 -0
  6. cyber_knowledge_base/chroma/1ab81415-9731-4a9a-8d06-afc7fc190d32/data_level0.bin +3 -0
  7. cyber_knowledge_base/chroma/1ab81415-9731-4a9a-8d06-afc7fc190d32/header.bin +3 -0
  8. cyber_knowledge_base/chroma/1ab81415-9731-4a9a-8d06-afc7fc190d32/length.bin +3 -0
  9. cyber_knowledge_base/chroma/1ab81415-9731-4a9a-8d06-afc7fc190d32/link_lists.bin +3 -0
  10. cyber_knowledge_base/chroma/76f221d1-5f9d-44f8-8c9c-f610482d9b15/data_level0.bin +3 -0
  11. cyber_knowledge_base/chroma/76f221d1-5f9d-44f8-8c9c-f610482d9b15/header.bin +3 -0
  12. cyber_knowledge_base/chroma/76f221d1-5f9d-44f8-8c9c-f610482d9b15/index_metadata.pickle +3 -0
  13. cyber_knowledge_base/chroma/76f221d1-5f9d-44f8-8c9c-f610482d9b15/length.bin +3 -0
  14. cyber_knowledge_base/chroma/76f221d1-5f9d-44f8-8c9c-f610482d9b15/link_lists.bin +3 -0
  15. cyber_knowledge_base/chroma/chroma.sqlite3 +3 -0
  16. requirements.txt +607 -3
  17. run_app.py +53 -0
  18. src/agents/__pycache__/llm_client.cpython-311.pyc +0 -0
  19. src/agents/correlation_agent/correlation_logic.py +449 -0
  20. src/agents/correlation_agent/input_converters.py +49 -0
  21. src/agents/correlation_agent/test.py +176 -0
  22. src/agents/correlation_agent/types.py +48 -0
  23. src/agents/cti_agent/__pycache__/config.cpython-311.pyc +0 -0
  24. src/agents/cti_agent/__pycache__/cti_agent.cpython-311.pyc +0 -0
  25. src/agents/cti_agent/__pycache__/cti_tools.cpython-311.pyc +0 -0
  26. src/agents/cti_agent/config.py +371 -0
  27. src/agents/cti_agent/cti-bench/data/cti-ate.tsv +0 -0
  28. src/agents/cti_agent/cti-bench/data/cti-mcq.tsv +0 -0
  29. src/agents/cti_agent/cti-bench/data/cti-rcm-2021.tsv +0 -0
  30. src/agents/cti_agent/cti-bench/data/cti-rcm.tsv +0 -0
  31. src/agents/cti_agent/cti-bench/data/cti-taa.tsv +0 -0
  32. src/agents/cti_agent/cti-bench/data/cti-vsp.tsv +0 -0
  33. src/agents/cti_agent/cti-evaluator.py +708 -0
  34. src/agents/cti_agent/cti_agent.py +920 -0
  35. src/agents/cti_agent/cti_tools.py +263 -0
  36. src/agents/cti_agent/testing_cti_agent.ipynb +573 -0
  37. src/agents/cti_agent/tool_evaluation_results/extract_mitre_techniques_results.csv +230 -0
  38. src/agents/cti_agent/tool_evaluation_results/extract_mitre_techniques_summary.json +12 -0
  39. src/agents/cti_agent/tool_evaluation_results/identify_threat_actors_results.csv +173 -0
  40. src/agents/cti_agent/tool_evaluation_results/identify_threat_actors_summary.json +9 -0
  41. src/agents/database_agent/__pycache__/agent.cpython-311.pyc +0 -0
  42. src/agents/database_agent/__pycache__/prompts.cpython-311.pyc +0 -0
  43. src/agents/database_agent/agent.py +442 -0
  44. src/agents/database_agent/prompts.py +71 -0
  45. src/agents/global_supervisor/__pycache__/supervisor.cpython-311.pyc +0 -0
  46. src/agents/log_analysis_agent/__pycache__/agent.cpython-311.pyc +0 -0
  47. src/agents/log_analysis_agent/__pycache__/prompts.cpython-311.pyc +0 -0
  48. src/agents/log_analysis_agent/__pycache__/state_models.cpython-311.pyc +0 -0
  49. src/agents/log_analysis_agent/__pycache__/utils.cpython-311.pyc +0 -0
  50. src/agents/log_analysis_agent/agent.py +1058 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ cyber_knowledge_base/chroma/chroma.sqlite3 filter=lfs diff=lfs merge=lfs -text
app.py ADDED
@@ -0,0 +1,292 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Streamlit Web App for Cybersecurity Agent Pipeline
4
+
5
+ A simple web interface for uploading log files and running the cybersecurity analysis pipeline
6
+ with different LLM models.
7
+ """
8
+
9
+ import os
10
+ import sys
11
+ import tempfile
12
+ import shutil
13
+ import streamlit as st
14
+ from pathlib import Path
15
+ from typing import Dict, Any, Optional
16
+
17
+ from src.full_pipeline.simple_pipeline import analyze_log_file
18
+
19
+ from dotenv import load_dotenv
20
+ from huggingface_hub import login as huggingface_login
21
+
22
+ load_dotenv()
23
+
24
+
25
+ def get_model_providers() -> Dict[str, Dict[str, str]]:
26
+ """Get available model providers and their models."""
27
+ return {
28
+ "Google GenAI": {
29
+ "gemini-2.0-flash": "google_genai:gemini-2.0-flash",
30
+ "gemini-2.0-flash-lite": "google_genai:gemini-2.0-flash-lite",
31
+ "gemini-2.5-flash-lite": "google_genai:gemini-2.5-flash-lite",
32
+ },
33
+ "Groq": {
34
+ "openai/gpt-oss-120b": "groq:openai/gpt-oss-120b",
35
+ "moonshotai/kimi-k2-instruct-0905": "groq:moonshotai/kimi-k2-instruct-0905",
36
+ },
37
+ "OpenAI": {"gpt-4o": "openai:gpt-4o", "gpt-4.1": "openai:gpt-4.1"},
38
+ }
39
+
40
+
41
+ def get_api_key_help() -> Dict[str, str]:
42
+ """Get API key help information for each provider."""
43
+ return {
44
+ "Google GenAI": "https://aistudio.google.com/app/apikey",
45
+ "Groq": "https://console.groq.com/keys",
46
+ "OpenAI": "https://platform.openai.com/api-keys",
47
+ }
48
+
49
+
50
+ def setup_temp_directories(temp_dir: str) -> Dict[str, str]:
51
+ """Setup temporary directories for the pipeline."""
52
+ log_files_dir = os.path.join(temp_dir, "log_files")
53
+ analysis_dir = os.path.join(temp_dir, "analysis")
54
+ final_response_dir = os.path.join(temp_dir, "final_response")
55
+
56
+ os.makedirs(log_files_dir, exist_ok=True)
57
+ os.makedirs(analysis_dir, exist_ok=True)
58
+ os.makedirs(final_response_dir, exist_ok=True)
59
+
60
+ return {
61
+ "log_files": log_files_dir,
62
+ "analysis": analysis_dir,
63
+ "final_response": final_response_dir,
64
+ }
65
+
66
+
67
+ def save_uploaded_file(uploaded_file, temp_dir: str) -> str:
68
+ """Save uploaded file to temporary directory."""
69
+ log_files_dir = os.path.join(temp_dir, "log_files")
70
+ file_path = os.path.join(log_files_dir, uploaded_file.name)
71
+
72
+ with open(file_path, "wb") as f:
73
+ f.write(uploaded_file.getbuffer())
74
+
75
+ return file_path
76
+
77
+
78
+ def run_analysis(
79
+ log_file_path: str,
80
+ model_name: str,
81
+ query: str,
82
+ temp_dirs: Dict[str, str],
83
+ api_key: str,
84
+ provider: str,
85
+ ) -> Dict[str, Any]:
86
+ """Run the cybersecurity analysis pipeline."""
87
+
88
+ # Set environment variable for API key
89
+ if provider == "Google GenAI":
90
+ os.environ["GOOGLE_API_KEY"] = api_key
91
+ elif provider == "Groq":
92
+ os.environ["GROQ_API_KEY"] = api_key
93
+ elif provider == "OpenAI":
94
+ os.environ["OPENAI_API_KEY"] = api_key
95
+
96
+ try:
97
+ # Run the analysis pipeline
98
+ result = analyze_log_file(
99
+ log_file=log_file_path,
100
+ query=query,
101
+ tactic=None,
102
+ model_name=model_name,
103
+ temperature=0.1,
104
+ log_agent_output_dir=temp_dirs["analysis"],
105
+ response_agent_output_dir=temp_dirs["final_response"],
106
+ )
107
+ return {"success": True, "result": result}
108
+ except Exception as e:
109
+ return {"success": False, "error": str(e)}
110
+
111
+
112
+ def main():
113
+ """Main Streamlit app."""
114
+
115
+ if os.getenv("HF_TOKEN"):
116
+ huggingface_login(token=os.getenv("HF_TOKEN"))
117
+
118
+ st.set_page_config(
119
+ page_title="Cybersecurity Agent Pipeline", page_icon="🛡️", layout="wide"
120
+ )
121
+
122
+ st.title("Cybersecurity Agent Pipeline")
123
+ st.markdown(
124
+ "Upload a log file and analyze it using advanced LLM-based cybersecurity agents."
125
+ )
126
+
127
+ # Sidebar for configuration
128
+ with st.sidebar:
129
+ st.header("Configuration")
130
+
131
+ # Model selection
132
+ providers = get_model_providers()
133
+ selected_provider = st.selectbox(
134
+ "Select Model Provider", list(providers.keys())
135
+ )
136
+
137
+ available_models = providers[selected_provider]
138
+ selected_model_display = st.selectbox(
139
+ "Select Model", list(available_models.keys())
140
+ )
141
+ selected_model = available_models[selected_model_display]
142
+
143
+ # API Key input with help
144
+ st.subheader("API Key")
145
+ api_key_help = get_api_key_help()
146
+
147
+ with st.expander("How to get API key", expanded=False):
148
+ st.markdown(f"**{selected_provider}**:")
149
+ st.markdown(f"[Get API Key]({api_key_help[selected_provider]})")
150
+
151
+ api_key = st.text_input(
152
+ f"Enter {selected_provider} API Key",
153
+ type="password",
154
+ help=f"Your {selected_provider} API key",
155
+ )
156
+
157
+ # Additional query
158
+ st.subheader("Additional Context")
159
+ user_query = st.text_area(
160
+ "Optional Query",
161
+ placeholder="e.g., 'Focus on credential access attacks'",
162
+ help="Provide additional context or specific focus areas for the analysis",
163
+ )
164
+
165
+ # Main content area
166
+ col1, col2 = st.columns([2, 1])
167
+
168
+ with col1:
169
+ st.header("Upload Log File")
170
+ uploaded_file = st.file_uploader(
171
+ "Choose a JSON log file",
172
+ type=["json"],
173
+ help="Upload a JSON log file from the Mordor dataset or similar security logs",
174
+ )
175
+
176
+ with col2:
177
+ st.header("Analysis Status")
178
+ if uploaded_file is not None:
179
+ st.success(f"File uploaded: {uploaded_file.name}")
180
+ st.info(f"Size: {uploaded_file.size:,} bytes")
181
+ else:
182
+ st.warning("Please upload a log file")
183
+
184
+ # Run analysis button
185
+ if st.button(
186
+ "Run Analysis", type="primary", disabled=not (uploaded_file and api_key)
187
+ ):
188
+ if not uploaded_file:
189
+ st.error("Please upload a log file first.")
190
+ return
191
+
192
+ if not api_key:
193
+ st.error("Please enter your API key.")
194
+ return
195
+
196
+ # Create temporary directory
197
+ temp_dir = tempfile.mkdtemp(prefix="cyber_agent_")
198
+
199
+ try:
200
+ # Setup directories
201
+ temp_dirs = setup_temp_directories(temp_dir)
202
+
203
+ # Save uploaded file
204
+ log_file_path = save_uploaded_file(uploaded_file, temp_dir)
205
+
206
+ # Show progress
207
+ progress_bar = st.progress(0)
208
+ status_text = st.empty()
209
+
210
+ status_text.text("Initializing analysis...")
211
+ progress_bar.progress(10)
212
+
213
+ # Run analysis
214
+ status_text.text("Running cybersecurity analysis...")
215
+ progress_bar.progress(50)
216
+
217
+ analysis_result = run_analysis(
218
+ log_file_path=log_file_path,
219
+ model_name=selected_model,
220
+ query=user_query,
221
+ temp_dirs=temp_dirs,
222
+ api_key=api_key,
223
+ provider=selected_provider,
224
+ )
225
+
226
+ progress_bar.progress(90)
227
+ status_text.text("Finalizing results...")
228
+
229
+ if analysis_result["success"]:
230
+ progress_bar.progress(100)
231
+ status_text.text("Analysis completed successfully!")
232
+
233
+ # Display results
234
+ st.header("Analysis Results")
235
+
236
+ result = analysis_result["result"]
237
+
238
+ # Show key metrics
239
+ col1, col2, col3 = st.columns(3)
240
+
241
+ with col1:
242
+ assessment = result.get("log_analysis_result", {}).get(
243
+ "overall_assessment", "Unknown"
244
+ )
245
+ st.metric("Overall Assessment", assessment)
246
+
247
+ with col2:
248
+ abnormal_events = result.get("log_analysis_result", {}).get(
249
+ "abnormal_events", []
250
+ )
251
+ st.metric("Abnormal Events", len(abnormal_events))
252
+
253
+ with col3:
254
+ execution_time = result.get("execution_time", "N/A")
255
+ st.metric(
256
+ "Execution Time",
257
+ (
258
+ f"{execution_time:.2f}s"
259
+ if isinstance(execution_time, (int, float))
260
+ else execution_time
261
+ ),
262
+ )
263
+
264
+ # Show markdown report
265
+ markdown_report = result.get("markdown_report", "")
266
+ if markdown_report:
267
+ st.header("Detailed Report")
268
+ st.markdown(markdown_report)
269
+ else:
270
+ st.warning("No detailed report generated.")
271
+
272
+ else:
273
+ st.error(f"Analysis failed: {analysis_result['error']}")
274
+ st.exception(analysis_result["error"])
275
+
276
+ finally:
277
+ # Cleanup temporary directory
278
+ try:
279
+ shutil.rmtree(temp_dir)
280
+ except Exception as e:
281
+ st.warning(f"Could not clean up temporary directory: {e}")
282
+
283
+ # Footer
284
+ st.markdown("---")
285
+ st.markdown(
286
+ "**Cybersecurity Agent Pipeline** - Powered by LangGraph and LangChain | "
287
+ "Built for educational purposes demonstrating LLM-based multi-agent systems"
288
+ )
289
+
290
+
291
+ if __name__ == "__main__":
292
+ main()
cyber_knowledge_base/bm25_retriever.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0988976cad39234f7fab73e71ab0c9d8c6d5c609c556ae9751fde7730e903f0b
3
+ size 5110282
cyber_knowledge_base/chroma/.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ *.sqlite3
cyber_knowledge_base/chroma/1ab81415-9731-4a9a-8d06-afc7fc190d32/.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ *.bin
cyber_knowledge_base/chroma/1ab81415-9731-4a9a-8d06-afc7fc190d32/data_level0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:95e2ea8a0724a2545afa94c485a99b5fde88d7dc842a137705ea87b74c477d35
3
+ size 321200
cyber_knowledge_base/chroma/1ab81415-9731-4a9a-8d06-afc7fc190d32/header.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:03cb3ac86f3e5bcb15e88b9bf99f760ec6b33e31d64a699e129b49868db6d733
3
+ size 100
cyber_knowledge_base/chroma/1ab81415-9731-4a9a-8d06-afc7fc190d32/length.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:854a86c445127b39997823da48a8556580ebaa06cc7c1289151300c1b9115efc
3
+ size 400
cyber_knowledge_base/chroma/1ab81415-9731-4a9a-8d06-afc7fc190d32/link_lists.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855
3
+ size 0
cyber_knowledge_base/chroma/76f221d1-5f9d-44f8-8c9c-f610482d9b15/data_level0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f4fa01cff4dbc86c1cd162ee63e986ce0f9083e3bda7c73f04ed671c38f4dcd
3
+ size 2180948
cyber_knowledge_base/chroma/76f221d1-5f9d-44f8-8c9c-f610482d9b15/header.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1703590965d586b54b7ec6768894f349c76690c5916512cb679891d0b644d6f0
3
+ size 100
cyber_knowledge_base/chroma/76f221d1-5f9d-44f8-8c9c-f610482d9b15/index_metadata.pickle ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:975f009a15eeea6560c1c8c00180ede3b6074c2cad6d4a900fc38cc91a35390a
3
+ size 62596
cyber_knowledge_base/chroma/76f221d1-5f9d-44f8-8c9c-f610482d9b15/length.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:add85c66e3321d0c9e1c84557ba1f784ed8551326ea3e6e666bfd1711d0bee9d
3
+ size 2716
cyber_knowledge_base/chroma/76f221d1-5f9d-44f8-8c9c-f610482d9b15/link_lists.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:46fd6dec85beb221c57b3ed5bc46356e172229d9efcf1d9e67d53fb861d46cdb
3
+ size 5776
cyber_knowledge_base/chroma/chroma.sqlite3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d37398e05987708a52fd527d0a7dfeba9b060d12385c7cd7df264f2fc6b66c7
3
+ size 12853248
requirements.txt CHANGED
@@ -1,3 +1,607 @@
1
- altair
2
- pandas
3
- streamlit
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #
2
+ # This file is autogenerated by pip-compile with Python 3.11
3
+ # by the following command:
4
+ #
5
+ # pip-compile --output-file=-
6
+ #
7
+ aiohappyeyeballs==2.6.1
8
+ # via aiohttp
9
+ aiohttp==3.13.1
10
+ # via
11
+ # langchain
12
+ # langchain-community
13
+ # langchain-tavily
14
+ aiosignal==1.4.0
15
+ # via aiohttp
16
+ annotated-types==0.7.0
17
+ # via pydantic
18
+ antlr4-python3-runtime==4.9.3
19
+ # via stix2-patterns
20
+ anyio==4.11.0
21
+ # via
22
+ # groq
23
+ # httpx
24
+ # openai
25
+ # watchfiles
26
+ argparse==1.4.0
27
+ # via -r requirements.in
28
+ attrs==25.4.0
29
+ # via
30
+ # aiohttp
31
+ # jsonschema
32
+ # referencing
33
+ backoff==2.2.1
34
+ # via posthog
35
+ bcrypt==5.0.0
36
+ # via chromadb
37
+ build==1.3.0
38
+ # via chromadb
39
+ cachetools==6.2.1
40
+ # via google-auth
41
+ certifi==2025.10.5
42
+ # via
43
+ # httpcore
44
+ # httpx
45
+ # kubernetes
46
+ # requests
47
+ charset-normalizer==3.4.4
48
+ # via
49
+ # -r requirements.in
50
+ # requests
51
+ chromadb==1.2.2
52
+ # via
53
+ # -r requirements.in
54
+ # langchain-chroma
55
+ click==8.3.0
56
+ # via
57
+ # nltk
58
+ # typer
59
+ # uvicorn
60
+ colorama==0.4.6
61
+ # via
62
+ # build
63
+ # click
64
+ # loguru
65
+ # tqdm
66
+ # uvicorn
67
+ coloredlogs==15.0.1
68
+ # via onnxruntime
69
+ colour==0.1.5
70
+ # via mitreattack-python
71
+ dataclasses-json==0.6.7
72
+ # via
73
+ # langchain
74
+ # langchain-community
75
+ deepdiff==8.6.1
76
+ # via mitreattack-python
77
+ distro==1.9.0
78
+ # via
79
+ # groq
80
+ # openai
81
+ # posthog
82
+ drawsvg==2.4.0
83
+ # via mitreattack-python
84
+ durationpy==0.10
85
+ # via kubernetes
86
+ et-xmlfile==2.0.0
87
+ # via openpyxl
88
+ filelock==3.20.0
89
+ # via
90
+ # huggingface-hub
91
+ # torch
92
+ # transformers
93
+ filetype==1.2.0
94
+ # via langchain-google-genai
95
+ flatbuffers==25.9.23
96
+ # via onnxruntime
97
+ frozenlist==1.8.0
98
+ # via
99
+ # aiohttp
100
+ # aiosignal
101
+ fsspec==2025.9.0
102
+ # via
103
+ # huggingface-hub
104
+ # torch
105
+ google-ai-generativelanguage==0.9.0
106
+ # via langchain-google-genai
107
+ google-api-core[grpc]==2.27.0
108
+ # via google-ai-generativelanguage
109
+ google-auth==2.41.1
110
+ # via
111
+ # google-ai-generativelanguage
112
+ # google-api-core
113
+ # kubernetes
114
+ googleapis-common-protos==1.71.0
115
+ # via
116
+ # -r requirements.in
117
+ # google-api-core
118
+ # grpcio-status
119
+ # opentelemetry-exporter-otlp-proto-grpc
120
+ greenlet==3.2.4
121
+ # via sqlalchemy
122
+ groq==0.33.0
123
+ # via langchain-groq
124
+ grpcio==1.76.0
125
+ # via
126
+ # chromadb
127
+ # google-ai-generativelanguage
128
+ # google-api-core
129
+ # grpcio-status
130
+ # opentelemetry-exporter-otlp-proto-grpc
131
+ grpcio-status==1.76.0
132
+ # via google-api-core
133
+ h11==0.16.0
134
+ # via
135
+ # httpcore
136
+ # uvicorn
137
+ httpcore==1.0.9
138
+ # via httpx
139
+ httptools==0.7.1
140
+ # via uvicorn
141
+ httpx==0.28.1
142
+ # via
143
+ # chromadb
144
+ # groq
145
+ # langgraph-sdk
146
+ # langsmith
147
+ # ollama
148
+ # openai
149
+ httpx-sse==0.4.3
150
+ # via langchain-community
151
+ huggingface-hub==0.36.0
152
+ # via
153
+ # -r requirements.in
154
+ # langchain-huggingface
155
+ # sentence-transformers
156
+ # tokenizers
157
+ # transformers
158
+ humanfriendly==10.0
159
+ # via coloredlogs
160
+ idna==3.11
161
+ # via
162
+ # anyio
163
+ # httpx
164
+ # requests
165
+ # yarl
166
+ importlib-metadata==8.7.0
167
+ # via opentelemetry-api
168
+ importlib-resources==6.5.2
169
+ # via chromadb
170
+ jinja2==3.1.6
171
+ # via torch
172
+ jiter==0.11.1
173
+ # via openai
174
+ joblib==1.5.2
175
+ # via
176
+ # nltk
177
+ # scikit-learn
178
+ jsonpatch==1.33
179
+ # via langchain-core
180
+ jsonpointer==3.0.0
181
+ # via jsonpatch
182
+ jsonschema==4.25.1
183
+ # via chromadb
184
+ jsonschema-specifications==2025.9.1
185
+ # via jsonschema
186
+ kubernetes==34.1.0
187
+ # via chromadb
188
+ langchain==0.3.27
189
+ # via
190
+ # -r requirements.in
191
+ # langchain-community
192
+ # langchain-tavily
193
+ langchain-chroma==0.2.6
194
+ # via -r requirements.in
195
+ langchain-community==0.3.31
196
+ # via -r requirements.in
197
+ langchain-core==0.3.79
198
+ # via
199
+ # -r requirements.in
200
+ # langchain
201
+ # langchain-chroma
202
+ # langchain-community
203
+ # langchain-google-genai
204
+ # langchain-groq
205
+ # langchain-huggingface
206
+ # langchain-ollama
207
+ # langchain-openai
208
+ # langchain-tavily
209
+ # langchain-text-splitters
210
+ # langgraph
211
+ # langgraph-checkpoint
212
+ # langgraph-prebuilt
213
+ # langgraph-supervisor
214
+ langchain-google-genai==2.1.12
215
+ # via -r requirements.in
216
+ langchain-groq==0.3.8
217
+ # via -r requirements.in
218
+ langchain-huggingface==0.3.1
219
+ # via -r requirements.in
220
+ langchain-ollama==0.3.10
221
+ # via -r requirements.in
222
+ langchain-openai==0.3.35
223
+ # via -r requirements.in
224
+ langchain-tavily==0.2.12
225
+ # via -r requirements.in
226
+ langchain-text-splitters==0.3.11
227
+ # via
228
+ # -r requirements.in
229
+ # langchain
230
+ langgraph==0.6.11
231
+ # via
232
+ # -r requirements.in
233
+ # langgraph-supervisor
234
+ langgraph-checkpoint==3.0.0
235
+ # via
236
+ # langgraph
237
+ # langgraph-prebuilt
238
+ langgraph-prebuilt==0.6.5
239
+ # via
240
+ # -r requirements.in
241
+ # langgraph
242
+ langgraph-sdk==0.2.9
243
+ # via langgraph
244
+ langgraph-supervisor==0.0.29
245
+ # via -r requirements.in
246
+ langsmith==0.4.38
247
+ # via
248
+ # -r requirements.in
249
+ # langchain
250
+ # langchain-community
251
+ # langchain-core
252
+ loguru==0.7.3
253
+ # via mitreattack-python
254
+ markdown==3.9
255
+ # via mitreattack-python
256
+ markdown-it-py==4.0.0
257
+ # via rich
258
+ markupsafe==3.0.3
259
+ # via jinja2
260
+ marshmallow==3.26.1
261
+ # via dataclasses-json
262
+ mdurl==0.1.2
263
+ # via markdown-it-py
264
+ mitreattack-python==5.1.0
265
+ # via -r requirements.in
266
+ mmh3==5.2.0
267
+ # via chromadb
268
+ mpmath==1.3.0
269
+ # via sympy
270
+ multidict==6.7.0
271
+ # via
272
+ # aiohttp
273
+ # yarl
274
+ mypy-extensions==1.1.0
275
+ # via typing-inspect
276
+ networkx==3.5
277
+ # via torch
278
+ nltk==3.9.2
279
+ # via -r requirements.in
280
+ numpy==2.3.4
281
+ # via
282
+ # chromadb
283
+ # langchain-chroma
284
+ # langchain-community
285
+ # mitreattack-python
286
+ # onnxruntime
287
+ # pandas
288
+ # rank-bm25
289
+ # scikit-learn
290
+ # scipy
291
+ # transformers
292
+ oauthlib==3.3.1
293
+ # via requests-oauthlib
294
+ ollama==0.6.0
295
+ # via langchain-ollama
296
+ onnxruntime==1.23.2
297
+ # via chromadb
298
+ openai==2.6.1
299
+ # via langchain-openai
300
+ openpyxl==3.1.5
301
+ # via mitreattack-python
302
+ opentelemetry-api==1.38.0
303
+ # via
304
+ # chromadb
305
+ # opentelemetry-exporter-otlp-proto-grpc
306
+ # opentelemetry-sdk
307
+ # opentelemetry-semantic-conventions
308
+ opentelemetry-exporter-otlp-proto-common==1.38.0
309
+ # via opentelemetry-exporter-otlp-proto-grpc
310
+ opentelemetry-exporter-otlp-proto-grpc==1.38.0
311
+ # via chromadb
312
+ opentelemetry-proto==1.38.0
313
+ # via
314
+ # opentelemetry-exporter-otlp-proto-common
315
+ # opentelemetry-exporter-otlp-proto-grpc
316
+ opentelemetry-sdk==1.38.0
317
+ # via
318
+ # chromadb
319
+ # opentelemetry-exporter-otlp-proto-grpc
320
+ opentelemetry-semantic-conventions==0.59b0
321
+ # via opentelemetry-sdk
322
+ orderly-set==5.5.0
323
+ # via deepdiff
324
+ orjson==3.11.4
325
+ # via
326
+ # chromadb
327
+ # langgraph-sdk
328
+ # langsmith
329
+ ormsgpack==1.11.0
330
+ # via langgraph-checkpoint
331
+ overrides==7.7.0
332
+ # via chromadb
333
+ packaging==25.0
334
+ # via
335
+ # build
336
+ # huggingface-hub
337
+ # langchain-core
338
+ # langsmith
339
+ # marshmallow
340
+ # onnxruntime
341
+ # pooch
342
+ # transformers
343
+ pandas==2.3.3
344
+ # via mitreattack-python
345
+ pillow==12.0.0
346
+ # via
347
+ # mitreattack-python
348
+ # sentence-transformers
349
+ platformdirs==4.5.0
350
+ # via pooch
351
+ pooch==1.8.2
352
+ # via mitreattack-python
353
+ posthog==5.4.0
354
+ # via chromadb
355
+ propcache==0.4.1
356
+ # via
357
+ # aiohttp
358
+ # yarl
359
+ proto-plus==1.26.1
360
+ # via
361
+ # google-ai-generativelanguage
362
+ # google-api-core
363
+ protobuf==6.33.0
364
+ # via
365
+ # -r requirements.in
366
+ # google-ai-generativelanguage
367
+ # google-api-core
368
+ # googleapis-common-protos
369
+ # grpcio-status
370
+ # onnxruntime
371
+ # opentelemetry-proto
372
+ # proto-plus
373
+ pyasn1==0.6.1
374
+ # via
375
+ # pyasn1-modules
376
+ # rsa
377
+ pyasn1-modules==0.4.2
378
+ # via google-auth
379
+ pybase64==1.4.2
380
+ # via chromadb
381
+ pydantic==2.12.3
382
+ # via
383
+ # -r requirements.in
384
+ # chromadb
385
+ # groq
386
+ # langchain
387
+ # langchain-core
388
+ # langchain-google-genai
389
+ # langgraph
390
+ # langsmith
391
+ # ollama
392
+ # openai
393
+ # pydantic-settings
394
+ pydantic-core==2.41.4
395
+ # via pydantic
396
+ pydantic-settings==2.11.0
397
+ # via langchain-community
398
+ pygments==2.19.2
399
+ # via rich
400
+ pypdf2==3.0.1
401
+ # via -r requirements.in
402
+ pypika==0.48.9
403
+ # via chromadb
404
+ pyproject-hooks==1.2.0
405
+ # via build
406
+ pyreadline3==3.5.4
407
+ # via humanfriendly
408
+ python-dateutil==2.9.0.post0
409
+ # via
410
+ # kubernetes
411
+ # mitreattack-python
412
+ # pandas
413
+ # posthog
414
+ python-dotenv==1.2.1
415
+ # via
416
+ # -r requirements.in
417
+ # pydantic-settings
418
+ # uvicorn
419
+ pytz==2025.2
420
+ # via
421
+ # pandas
422
+ # stix2
423
+ pyyaml==6.0.3
424
+ # via
425
+ # chromadb
426
+ # huggingface-hub
427
+ # kubernetes
428
+ # langchain
429
+ # langchain-community
430
+ # langchain-core
431
+ # transformers
432
+ # uvicorn
433
+ rank-bm25==0.2.2
434
+ # via -r requirements.in
435
+ referencing==0.37.0
436
+ # via
437
+ # jsonschema
438
+ # jsonschema-specifications
439
+ regex==2025.10.23
440
+ # via
441
+ # nltk
442
+ # tiktoken
443
+ # transformers
444
+ requests==2.32.5
445
+ # via
446
+ # -r requirements.in
447
+ # google-api-core
448
+ # huggingface-hub
449
+ # kubernetes
450
+ # langchain
451
+ # langchain-community
452
+ # langchain-tavily
453
+ # langsmith
454
+ # mitreattack-python
455
+ # pooch
456
+ # posthog
457
+ # requests-oauthlib
458
+ # requests-toolbelt
459
+ # stix2
460
+ # tiktoken
461
+ # transformers
462
+ requests-oauthlib==2.0.0
463
+ # via kubernetes
464
+ requests-toolbelt==1.0.0
465
+ # via langsmith
466
+ rich==14.2.0
467
+ # via
468
+ # chromadb
469
+ # mitreattack-python
470
+ # typer
471
+ rpds-py==0.28.0
472
+ # via
473
+ # jsonschema
474
+ # referencing
475
+ rsa==4.9.1
476
+ # via google-auth
477
+ safetensors==0.6.2
478
+ # via transformers
479
+ scikit-learn==1.7.2
480
+ # via sentence-transformers
481
+ scipy==1.16.2
482
+ # via
483
+ # scikit-learn
484
+ # sentence-transformers
485
+ sentence-transformers==5.1.2
486
+ # via -r requirements.in
487
+ shellingham==1.5.4
488
+ # via typer
489
+ simplejson==3.20.2
490
+ # via stix2
491
+ six==1.17.0
492
+ # via
493
+ # kubernetes
494
+ # posthog
495
+ # python-dateutil
496
+ # stix2-patterns
497
+ sniffio==1.3.1
498
+ # via
499
+ # anyio
500
+ # groq
501
+ # openai
502
+ sqlalchemy==2.0.44
503
+ # via
504
+ # langchain
505
+ # langchain-community
506
+ stix2==3.0.1
507
+ # via mitreattack-python
508
+ stix2-patterns==2.0.0
509
+ # via stix2
510
+ sympy==1.14.0
511
+ # via
512
+ # onnxruntime
513
+ # torch
514
+ tabulate==0.9.0
515
+ # via mitreattack-python
516
+ tenacity==9.1.2
517
+ # via
518
+ # chromadb
519
+ # langchain-community
520
+ # langchain-core
521
+ threadpoolctl==3.6.0
522
+ # via scikit-learn
523
+ tiktoken==0.12.0
524
+ # via langchain-openai
525
+ tokenizers==0.22.1
526
+ # via
527
+ # chromadb
528
+ # langchain-huggingface
529
+ # transformers
530
+ torch==2.9.0
531
+ # via
532
+ # -r requirements.in
533
+ # sentence-transformers
534
+ tqdm==4.67.1
535
+ # via
536
+ # chromadb
537
+ # huggingface-hub
538
+ # mitreattack-python
539
+ # nltk
540
+ # openai
541
+ # sentence-transformers
542
+ # transformers
543
+ transformers==4.57.1
544
+ # via
545
+ # -r requirements.in
546
+ # sentence-transformers
547
+ typer==0.20.0
548
+ # via
549
+ # chromadb
550
+ # mitreattack-python
551
+ typing-extensions==4.15.0
552
+ # via
553
+ # aiosignal
554
+ # anyio
555
+ # chromadb
556
+ # groq
557
+ # grpcio
558
+ # huggingface-hub
559
+ # langchain-core
560
+ # openai
561
+ # opentelemetry-api
562
+ # opentelemetry-exporter-otlp-proto-grpc
563
+ # opentelemetry-sdk
564
+ # opentelemetry-semantic-conventions
565
+ # pydantic
566
+ # pydantic-core
567
+ # referencing
568
+ # sentence-transformers
569
+ # sqlalchemy
570
+ # torch
571
+ # typer
572
+ # typing-inspect
573
+ # typing-inspection
574
+ typing-inspect==0.9.0
575
+ # via dataclasses-json
576
+ typing-inspection==0.4.2
577
+ # via
578
+ # pydantic
579
+ # pydantic-settings
580
+ tzdata==2025.2
581
+ # via pandas
582
+ urllib3==2.3.0
583
+ # via
584
+ # kubernetes
585
+ # requests
586
+ uvicorn[standard]==0.38.0
587
+ # via chromadb
588
+ watchfiles==1.1.1
589
+ # via uvicorn
590
+ websocket-client==1.9.0
591
+ # via kubernetes
592
+ websockets==15.0.1
593
+ # via uvicorn
594
+ wheel==0.45.1
595
+ # via mitreattack-python
596
+ win32-setctime==1.2.0
597
+ # via loguru
598
+ xlsxwriter==3.2.9
599
+ # via mitreattack-python
600
+ xxhash==3.6.0
601
+ # via langgraph
602
+ yarl==1.22.0
603
+ # via aiohttp
604
+ zipp==3.23.0
605
+ # via importlib-metadata
606
+ zstandard==0.25.0
607
+ # via langsmith
run_app.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Simple script to run the Streamlit cybersecurity agent web app.
3
+ """
4
+
5
+ import subprocess
6
+ import sys
7
+ import os
8
+ from pathlib import Path
9
+
10
+
11
+ def main():
12
+ """Run the Streamlit app."""
13
+ # Get the directory where this script is located
14
+ script_dir = Path(__file__).parent
15
+ app_path = script_dir / "app.py"
16
+
17
+ if not app_path.exists():
18
+ print(f"Error: app.py not found at {app_path}")
19
+ sys.exit(1)
20
+
21
+ print("Starting Cybersecurity Agent Web App...")
22
+ print("=" * 50)
23
+ print("The app will open in your default web browser.")
24
+ print("If it doesn't open automatically, go to: http://localhost:8501")
25
+ print("=" * 50)
26
+ print()
27
+
28
+ try:
29
+ # Run streamlit with the app
30
+ subprocess.run(
31
+ [
32
+ sys.executable,
33
+ "-m",
34
+ "streamlit",
35
+ "run",
36
+ str(app_path),
37
+ "--server.port",
38
+ "8501",
39
+ "--server.address",
40
+ "localhost",
41
+ ],
42
+ check=True,
43
+ )
44
+ except subprocess.CalledProcessError as e:
45
+ print(f"Error running Streamlit: {e}")
46
+ sys.exit(1)
47
+ except KeyboardInterrupt:
48
+ print("\nApp stopped by user.")
49
+ sys.exit(0)
50
+
51
+
52
+ if __name__ == "__main__":
53
+ main()
src/agents/__pycache__/llm_client.cpython-311.pyc ADDED
Binary file (11.7 kB). View file
 
src/agents/correlation_agent/correlation_logic.py ADDED
@@ -0,0 +1,449 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Dict, Any, List, Optional
2
+ from datetime import datetime
3
+ import json
4
+ from langchain_core.messages import HumanMessage, AIMessage
5
+ from langchain_core.tools import tool
6
+ from langgraph.prebuilt import create_react_agent
7
+ from langgraph.graph import StateGraph, END
8
+ from langchain_openai import ChatOpenAI, OpenAIEmbeddings
9
+ from pydantic import BaseModel, Field, ConfigDict
10
+ from sklearn.metrics.pairwise import cosine_similarity
11
+ import numpy as np
12
+ from .types import LogInput, MitreInput, CorrelationOutput, ThreatLevel, ConfidenceLevel, MatchedTechnique
13
+
14
+ embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
15
+
16
+
17
+ # State schema for correlation workflow
18
+ class CorrelationState(BaseModel):
19
+ """State schema for correlation workflow."""
20
+ analysis_request: str = ""
21
+ agent_output: Optional[str] = None
22
+ structured_response: Optional[Dict[str, Any]] = None
23
+ model_config = ConfigDict(arbitrary_types_allowed=True)
24
+
25
+
26
+ # Structured response schemas
27
+ class MatchedTechniqueItem(BaseModel):
28
+ """Schema for individual matched technique."""
29
+ technique_id: str = Field(..., description="MITRE technique ID")
30
+ match_confidence: float = Field(..., ge=0.0, le=1.0, description="Correlation confidence 0-1")
31
+ evidence: str = Field(..., description="Concise evidence string")
32
+ validation_result: str = Field(..., description="correlated | weak | false_positive")
33
+ model_config = ConfigDict(extra='forbid')
34
+
35
+
36
+ class CorrelationStructuredResponse(BaseModel):
37
+ """Structured response schema for correlation analysis output."""
38
+ correlation_score: float = Field(..., description="Overall aggregate correlation score (0-1)")
39
+ threat_level: str = Field(..., description="low | medium | high | critical")
40
+ confidence: str = Field(..., description="low | medium | high")
41
+ matched_techniques: List[MatchedTechniqueItem] = Field(
42
+ default_factory=list,
43
+ description="Top 3-5 matched techniques"
44
+ )
45
+ reasoning: str = Field(..., description="Concise synthesis / justification of assessment (<150 words)")
46
+ model_config = ConfigDict(extra='forbid')
47
+
48
+
49
+ # Helper function for JSON parsing
50
+ def parse_json_input(data: Any) -> Any:
51
+ """Parse JSON string or return as-is if already parsed."""
52
+ return json.loads(data) if isinstance(data, str) else data
53
+
54
+
55
+ @tool
56
+ def correlate_log_with_technique(technique_data: str, log_processes: List[str], log_anomalies: List[str]) -> str:
57
+ """Semantic correlation between log data and MITRE technique using embeddings."""
58
+ try:
59
+ technique = parse_json_input(technique_data)
60
+ except:
61
+ return json.dumps({"error": "Invalid technique data format"})
62
+
63
+ technique_id = technique.get('attack_id', 'Unknown')
64
+ technique_name = technique.get('name', 'Unknown')
65
+ technique_description = technique.get('description', '')
66
+
67
+ if not technique_description:
68
+ return json.dumps({
69
+ "technique_id": technique_id,
70
+ "technique_name": technique_name,
71
+ "correlation_score": 0.0,
72
+ "process_matches": [],
73
+ "anomaly_matches": [],
74
+ "match_quality": "weak"
75
+ })
76
+
77
+ # Semantic matching using embeddings
78
+ try:
79
+ technique_embedding = np.array(embeddings.embed_query(technique_description)).reshape(1, -1)
80
+
81
+ # Process matching
82
+ process_matches = []
83
+ process_scores = []
84
+ for process in log_processes:
85
+ if not process.strip():
86
+ continue
87
+ process_embedding = np.array(embeddings.embed_query(process)).reshape(1, -1)
88
+ similarity = cosine_similarity(technique_embedding, process_embedding)[0][0]
89
+ if similarity > 0.6:
90
+ process_matches.append(process)
91
+ process_scores.append(float(similarity))
92
+
93
+ # Anomaly matching
94
+ anomaly_matches = []
95
+ anomaly_scores = []
96
+ for anomaly in log_anomalies:
97
+ if not str(anomaly).strip():
98
+ continue
99
+ anomaly_embedding = np.array(embeddings.embed_query(str(anomaly))).reshape(1, -1)
100
+ similarity = cosine_similarity(technique_embedding, anomaly_embedding)[0][0]
101
+ if similarity > 0.5:
102
+ anomaly_matches.append(anomaly)
103
+ anomaly_scores.append(float(similarity))
104
+
105
+ # Calculate correlation score
106
+ avg_process_score = np.mean(process_scores) if process_scores else 0.0
107
+ avg_anomaly_score = np.mean(anomaly_scores) if anomaly_scores else 0.0
108
+ correlation_score = (avg_process_score + avg_anomaly_score) / 2
109
+
110
+ except Exception as e:
111
+ # Fallback to keyword matching
112
+ print(f"[WARN] Semantic correlation failed: {e}, using fallback")
113
+ keywords = technique_description.lower().split()[:5]
114
+ process_matches = [p for p in log_processes if any(k in p.lower() for k in keywords)]
115
+ anomaly_matches = [a for a in log_anomalies if any(k in str(a).lower() for k in keywords)]
116
+ correlation_score = 0.3 if (process_matches or anomaly_matches) else 0.1
117
+
118
+ return json.dumps({
119
+ "technique_id": technique_id,
120
+ "technique_name": technique_name,
121
+ "correlation_score": round(float(correlation_score), 3),
122
+ "process_matches": process_matches,
123
+ "anomaly_matches": anomaly_matches,
124
+ "match_quality": "strong" if correlation_score > 0.7 else "moderate" if correlation_score > 0.5 else "weak"
125
+ })
126
+
127
+ @tool
128
+ def correlate_all_techniques(techniques: str, log_processes: str, log_anomalies: str) -> str:
129
+ """Correlate all MITRE techniques with log data."""
130
+ try:
131
+ technique_list = parse_json_input(techniques)
132
+ processes = parse_json_input(log_processes)
133
+ anomalies = parse_json_input(log_anomalies)
134
+ except:
135
+ return json.dumps({"error": "Invalid input format"})
136
+
137
+ correlations = [
138
+ json.loads(correlate_log_with_technique(tech, processes, anomalies))
139
+ for tech in technique_list
140
+ ]
141
+
142
+ correlations.sort(key=lambda x: x['correlation_score'], reverse=True)
143
+
144
+ return json.dumps({
145
+ "correlations": correlations,
146
+ "top_matches": correlations[:3],
147
+ "total_techniques": len(correlations),
148
+ "strong_matches": len([c for c in correlations if c['correlation_score'] > 0.7])
149
+ })
150
+
151
+ @tool
152
+ def calculate_confidence(
153
+ correlation_score: float,
154
+ log_severity: str = "medium",
155
+ mitre_confidence: float = 0.5,
156
+ num_matched_techniques: int = 1,
157
+ match_quality: str = "moderate"
158
+ ) -> str:
159
+ """
160
+ Sophisticated confidence calculation using Bayesian-inspired weighted scoring.
161
+ """
162
+
163
+ # Weight distribution based on cybersecurity research
164
+ WEIGHTS = {
165
+ 'correlation': 0.50, # Primary indicator - semantic match quality
166
+ 'evidence': 0.25, # Evidence strength (quality + quantity)
167
+ 'mitre_prior': 0.15, # Bayesian prior from MITRE analysis
168
+ 'severity': 0.10 # Contextual severity adjustment
169
+ }
170
+
171
+ # Quality scores based on semantic similarity thresholds
172
+ quality_scores = {'strong': 1.0, 'moderate': 0.7, 'weak': 0.4}
173
+ quality_score = quality_scores.get(match_quality.lower(), 0.7)
174
+
175
+ # Quantity factor with diminishing returns
176
+ quantity_factor = min(1.0, 0.5 + (num_matched_techniques * 0.15))
177
+ evidence_component = quality_score * quantity_factor
178
+
179
+ # Severity scores based on CVSS principles
180
+ severity_scores = {'critical': 1.0, 'high': 0.85, 'medium': 0.6, 'low': 0.35}
181
+ severity_component = severity_scores.get(log_severity.lower(), 0.6)
182
+
183
+ # Weighted combination
184
+ overall_confidence = (
185
+ WEIGHTS['correlation'] * correlation_score +
186
+ WEIGHTS['evidence'] * evidence_component +
187
+ WEIGHTS['mitre_prior'] * mitre_confidence +
188
+ WEIGHTS['severity'] * severity_component
189
+ )
190
+
191
+ # Cap at 0.95 to avoid overconfidence bias
192
+ overall_confidence = min(overall_confidence, 0.95)
193
+
194
+ # Uncertainty penalty for weak single matches
195
+ if num_matched_techniques == 1 and match_quality.lower() == 'weak':
196
+ overall_confidence *= 0.8
197
+
198
+ # Determine confidence level (FIRST/NIST guidelines)
199
+ if overall_confidence >= 0.75:
200
+ level = "high"
201
+ elif overall_confidence >= 0.50:
202
+ level = "medium"
203
+ else:
204
+ level = "low"
205
+
206
+ reasoning = (
207
+ f"Correlation: {correlation_score:.2f} ({WEIGHTS['correlation']}) | "
208
+ f"Evidence: {num_matched_techniques} {match_quality} ({WEIGHTS['evidence']}) | "
209
+ f"MITRE: {mitre_confidence:.2f} ({WEIGHTS['mitre_prior']}) | "
210
+ f"Severity: {log_severity} ({WEIGHTS['severity']})"
211
+ )
212
+
213
+ return json.dumps({
214
+ "confidence_score": round(overall_confidence, 3),
215
+ "confidence_level": level,
216
+ "reasoning": reasoning,
217
+ "methodology": "Bayesian weighted scoring (Hutchins 2011, NIST SP 800-150)"
218
+ })
219
+
220
+
221
+ # Constants for default fallback
222
+ DEFAULT_CORRELATION_DATA = {
223
+ "correlation_score": 0.5,
224
+ "threat_level": "medium",
225
+ "confidence": "medium",
226
+ "matched_techniques": [],
227
+ "reasoning": "Workflow failed to produce correlation data"
228
+ }
229
+
230
+
231
+ def create_correlation_workflow() -> Optional[Any]:
232
+ """
233
+ Create correlation workflow with structured output using StateGraph.
234
+
235
+ Workflow: START → correlation_agent → structure_output → END
236
+ """
237
+ if ChatOpenAI is None:
238
+ print("[WARN] Missing ChatOpenAI dependency. Returning None.")
239
+ return None
240
+
241
+ # ReAct agent with all tools
242
+ correlation_agent = create_react_agent(
243
+ model="openai:gpt-4o",
244
+ tools=[correlate_all_techniques, correlate_log_with_technique, calculate_confidence],
245
+ name="correlation_agent",
246
+ )
247
+
248
+ # LLM for structured output extraction
249
+ structured_llm = ChatOpenAI(model="gpt-4o").with_structured_output(
250
+ CorrelationStructuredResponse,
251
+ method="json_schema"
252
+ )
253
+
254
+ def agent_node(state: CorrelationState) -> CorrelationState:
255
+ """Execute the correlation agent with tools."""
256
+ agent_prompt = (
257
+ f"{state.analysis_request}\n\n"
258
+ "Instructions:\n"
259
+ "1. Use correlate_all_techniques to get correlation scores for all techniques.\n"
260
+ "2. Analyze the top matches and their correlation scores.\n"
261
+ "3. Use calculate_confidence with:\n"
262
+ " - correlation_score: highest or average correlation score\n"
263
+ " - log_severity: from log data\n"
264
+ " - mitre_confidence: from MITRE data\n"
265
+ " - num_matched_techniques: count of techniques with score > 0.5\n"
266
+ " - match_quality: 'strong' (>0.7), 'moderate' (0.5-0.7), or 'weak' (<0.5)\n"
267
+ "4. Summarize findings with evidence and reasoning.\n"
268
+ )
269
+
270
+ result = correlation_agent.invoke({"messages": [HumanMessage(content=agent_prompt)]})
271
+
272
+ # Extract agent's final message
273
+ for msg in reversed(result.get("messages", [])):
274
+ if isinstance(msg, AIMessage):
275
+ state.agent_output = msg.content
276
+ break
277
+
278
+ return state
279
+
280
+ def structure_output_node(state: CorrelationState) -> CorrelationState:
281
+ """Extract structured output from agent's analysis."""
282
+ structure_prompt = f"""Based on the correlation analysis, provide a structured assessment.
283
+
284
+ Analysis:
285
+ {state.agent_output}
286
+
287
+ Original Request:
288
+ {state.analysis_request}
289
+
290
+ Extract:
291
+ - correlation_score: Overall score (0-1)
292
+ - threat_level: low/medium/high/critical
293
+ - confidence: low/medium/high from confidence calculation
294
+ - matched_techniques: Top 3-5 with IDs, confidence, evidence, validation
295
+ - reasoning: Concise synthesis (max 150 words)"""
296
+
297
+ try:
298
+ structured_result = structured_llm.invoke(structure_prompt)
299
+
300
+ if isinstance(structured_result, CorrelationStructuredResponse):
301
+ state.structured_response = structured_result.model_dump()
302
+ else:
303
+ state.structured_response = structured_result
304
+
305
+ except Exception as e:
306
+ print(f"[ERROR] Failed to create structured output: {e}")
307
+ state.structured_response = DEFAULT_CORRELATION_DATA.copy()
308
+ state.structured_response["reasoning"] = f"Structuring failed: {str(e)}"
309
+
310
+ return state
311
+
312
+ # Build workflow graph
313
+ workflow = StateGraph(CorrelationState)
314
+ workflow.add_node("correlation_agent", agent_node)
315
+ workflow.add_node("structure_output", structure_output_node)
316
+ workflow.set_entry_point("correlation_agent")
317
+ workflow.add_edge("correlation_agent", "structure_output")
318
+ workflow.add_edge("structure_output", END)
319
+
320
+ return workflow.compile()
321
+
322
+ class CorrelationLogic:
323
+ """Correlation analysis using ReAct agent workflow with structured output."""
324
+
325
+ def __init__(self):
326
+ try:
327
+ self.workflow = create_correlation_workflow()
328
+ if self.workflow:
329
+ print("[INFO] Correlation workflow initialized")
330
+ else:
331
+ print("[WARN] Workflow initialization failed")
332
+ except Exception as e:
333
+ print(f"[WARN] Workflow initialization error: {e}")
334
+ self.workflow = None
335
+
336
+ def correlate(self, log_input: LogInput, mitre_input: MitreInput) -> CorrelationOutput:
337
+ """Main correlation method."""
338
+ correlation_data = self.run_workflow(log_input, mitre_input)
339
+ correlation_id = f"CORR_{log_input.analysis_id}_{mitre_input.analysis_id}_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
340
+
341
+ # Use defaults if workflow failed
342
+ if not correlation_data:
343
+ print("[WARN] Using default correlation data")
344
+ return CorrelationOutput(
345
+ correlation_id=correlation_id,
346
+ correlation_score=0.5,
347
+ threat_level=ThreatLevel.MEDIUM,
348
+ confidence=ConfidenceLevel.MEDIUM,
349
+ matched_techniques=[],
350
+ reasoning="Workflow failed",
351
+ timestamp=datetime.now().isoformat()
352
+ )
353
+
354
+ # Parse matched techniques
355
+ matched_techniques = []
356
+ for mt_data in correlation_data.get("matched_techniques", []):
357
+ try:
358
+ matched_techniques.append(
359
+ MatchedTechnique(
360
+ technique_id=mt_data.get("technique_id", "Unknown"),
361
+ match_confidence=mt_data.get("match_confidence", 0.0),
362
+ evidence=mt_data.get("evidence", ""),
363
+ validation_result=None
364
+ )
365
+ )
366
+ except Exception as e:
367
+ print(f"[WARN] Skipping malformed technique: {e}")
368
+
369
+ # Helper to convert string to enum
370
+ def to_enum(enum_cls, value: str, default):
371
+ try:
372
+ return enum_cls(value.lower())
373
+ except:
374
+ return default
375
+
376
+ return CorrelationOutput(
377
+ correlation_id=correlation_id,
378
+ correlation_score=correlation_data.get("correlation_score", 0.5),
379
+ threat_level=to_enum(ThreatLevel, correlation_data.get("threat_level", "medium"), ThreatLevel.MEDIUM),
380
+ confidence=to_enum(ConfidenceLevel, correlation_data.get("confidence", "medium"), ConfidenceLevel.MEDIUM),
381
+ matched_techniques=matched_techniques,
382
+ reasoning=correlation_data.get("reasoning", "No reasoning provided"),
383
+ timestamp=datetime.now().isoformat()
384
+ )
385
+
386
+ def run_workflow(self, log_input: LogInput, mitre_input: MitreInput) -> Optional[Dict[str, Any]]:
387
+ """Execute the correlation workflow and return structured data."""
388
+ if not self.workflow:
389
+ print("[ERROR] Workflow not initialized")
390
+ return None
391
+
392
+ analysis_request = (
393
+ f"Perform correlation analysis for this security event.\n\n"
394
+ f"LOG DATA:\n"
395
+ f"- ID: {log_input.analysis_id}\n"
396
+ f"- Severity: {log_input.severity}\n"
397
+ f"- Systems: {', '.join(log_input.affected_systems)}\n"
398
+ f"- Anomalies: {', '.join(log_input.anomalies)}\n"
399
+ f"- Processes: {', '.join(log_input.processes)}\n"
400
+ f"- Summary: {log_input.raw_summary}\n\n"
401
+ f"MITRE DATA:\n"
402
+ f"- Techniques: {json.dumps(mitre_input.techniques)}\n"
403
+ f"- Coverage: {mitre_input.coverage_score}\n"
404
+ f"- Confidence: {mitre_input.confidence}\n"
405
+ f"- Analysis: {mitre_input.analysis_text}\n"
406
+ )
407
+
408
+ try:
409
+ initial_state = CorrelationState(analysis_request=analysis_request)
410
+ result = self.workflow.invoke(initial_state)
411
+
412
+ if isinstance(result, dict) and "structured_response" in result:
413
+ return result.get("structured_response")
414
+
415
+ print("[WARN] No structured_response in result")
416
+ return None
417
+
418
+ except Exception as e:
419
+ print(f"[ERROR] Workflow failed: {type(e).__name__}: {e}")
420
+ import traceback
421
+ traceback.print_exc()
422
+ return None
423
+
424
+
425
+ class CorrelationAgent:
426
+ """Multi-Agent Correlation Agent with StateGraph workflow."""
427
+
428
+ def __init__(self):
429
+ self.correlation_logic = CorrelationLogic()
430
+ print("[INFO] CorrelationAgent initialized")
431
+
432
+ def process(self, log_input: LogInput, mitre_input: MitreInput) -> CorrelationOutput:
433
+ """Process correlation analysis using multi-agent system."""
434
+ print(f"[INFO] Processing correlation: {log_input.analysis_id}")
435
+
436
+ try:
437
+ result = self.correlation_logic.correlate(log_input, mitre_input)
438
+
439
+ print(f"[INFO] Completed: {result.correlation_id}")
440
+ print(f"[INFO] Threat: {result.threat_level.value.upper()} | "
441
+ f"Confidence: {result.confidence.value.upper()} | "
442
+ f"Score: {result.correlation_score:.3f} | "
443
+ f"Techniques: {len(result.matched_techniques)}")
444
+
445
+ return result
446
+
447
+ except Exception as e:
448
+ print(f"[ERROR] Correlation processing failed: {e}")
449
+ raise
src/agents/correlation_agent/input_converters.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Dict, Any
2
+ from .types import LogInput, MitreInput
3
+
4
+ def convert_mitre_agent_input(mitre_agent_input) -> LogInput:
5
+ """Convert MitreAgentInput to LogInput format for correlation testing"""
6
+
7
+ # Extract anomaly descriptions
8
+ anomalies = [anomaly["description"] for anomaly in mitre_agent_input.detected_anomalies]
9
+
10
+ # Map severity level to string
11
+ severity_str = mitre_agent_input.severity.value.upper()
12
+
13
+ return LogInput(
14
+ analysis_id=mitre_agent_input.analysis_id,
15
+ severity=severity_str,
16
+ affected_systems=mitre_agent_input.affected_systems,
17
+ anomalies=anomalies,
18
+ processes=mitre_agent_input.processes,
19
+ raw_summary=mitre_agent_input.raw_summary
20
+ )
21
+
22
+ def convert_mitre_analysis_output(mitre_analysis_result, original_input) -> MitreInput:
23
+ """Convert MitreAgent analysis result to MitreInput format for correlation"""
24
+
25
+ # Extract top techniques from analysis result safely
26
+ techniques = []
27
+ technique_details = mitre_analysis_result.get('technique_details', [])
28
+
29
+ for tech_detail in technique_details[:10]: # Top 10 techniques
30
+ techniques.append({
31
+ "attack_id": tech_detail.get('attack_id', 'Unknown'),
32
+ "name": tech_detail.get('name', 'Unknown Technique'),
33
+ "relevance_score": tech_detail.get('relevance_score', 0.5)
34
+ })
35
+
36
+ # Default techniques if none found
37
+ if not techniques:
38
+ techniques = [
39
+ {"attack_id": "T1059.001", "name": "PowerShell", "relevance_score": 0.7},
40
+ {"attack_id": "T1566.001", "name": "Spearphishing Attachment", "relevance_score": 0.6}
41
+ ]
42
+
43
+ return MitreInput(
44
+ analysis_id=f"MITRE_{getattr(original_input, 'analysis_id', 'UNKNOWN')}",
45
+ techniques=techniques,
46
+ coverage_score=mitre_analysis_result.get('coverage_score', 0.5),
47
+ confidence=mitre_analysis_result.get('confidence', 0.5),
48
+ analysis_text=mitre_analysis_result.get('analysis', 'MITRE analysis completed')
49
+ )
src/agents/correlation_agent/test.py ADDED
@@ -0,0 +1,176 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dotenv import load_dotenv
2
+ load_dotenv()
3
+
4
+ from src.agents.correlation_agent.correlation_logic import CorrelationAgent
5
+ from src.agents.correlation_agent.types import LogInput, MitreInput
6
+ from src.agents.mitre_retriever_agent.mitre_example_input import create_sample_log_input, create_elaborate_mockup_incident
7
+ from src.agents.mitre_retriever_agent.mitre_agent import MitreAgent
8
+ from src.agents.correlation_agent.input_converters import convert_mitre_agent_input, convert_mitre_analysis_output
9
+
10
+ def test_sample_correlation():
11
+ """Test basic correlation functionality using real MITRE agent output"""
12
+ print("="*60)
13
+ print("TESTING BASIC CORRELATION WITH MITRE AGENT")
14
+ print("="*60)
15
+
16
+ # Create sample input from mitre_example_input
17
+ mitre_agent_input = create_sample_log_input()
18
+ log_input = convert_mitre_agent_input(mitre_agent_input)
19
+
20
+ print(f"\nSAMPLE INPUT CREATED:")
21
+ print(f"- Analysis ID: {log_input.analysis_id}")
22
+ print(f"- Severity: {log_input.severity}")
23
+ print(f"- Affected Systems: {log_input.affected_systems}")
24
+ print(f"- Anomalies: {len(log_input.anomalies)} detected")
25
+ print(f"- Processes: {log_input.processes}")
26
+
27
+ print("\nRUNNING MITRE AGENT ANALYSIS...")
28
+ mitre_agent = MitreAgent(
29
+ llm_provider="openai",
30
+ model_name="gpt-4o",
31
+ max_iterations=3
32
+ )
33
+
34
+ mitre_analysis_result = mitre_agent.analyze_threat(mitre_agent_input)
35
+ print(f"✓ MITRE analysis completed")
36
+ print(f" - Techniques found: {len(mitre_analysis_result.get('technique_details', []))}")
37
+ print(f" - Coverage score: {mitre_analysis_result.get('coverage_score', 0):.3f}")
38
+ print(f" - Confidence: {mitre_analysis_result.get('confidence', 0):.3f}")
39
+
40
+ # Convert MITRE analysis to MitreInput format
41
+ mitre_input = convert_mitre_analysis_output(mitre_analysis_result, mitre_agent_input)
42
+
43
+ print(f"\n✓ MITRE INPUT CONVERTED:")
44
+ print(f" - Top {min(5, len(mitre_input.techniques))} techniques:")
45
+ for i, tech in enumerate(mitre_input.techniques[:5], 1):
46
+ print(f" {i}. {tech['attack_id']}: {tech['name']} (Score: {tech['relevance_score']:.3f})")
47
+
48
+ # Run correlation analysis
49
+ print("\nRUNNING CORRELATION ANALYSIS...")
50
+ correlation_agent = CorrelationAgent()
51
+ result = correlation_agent.process(log_input, mitre_input)
52
+
53
+ # Display results
54
+ print(f"\n{'='*60}")
55
+ print(f"CORRELATION RESULTS:")
56
+ print(f"{'='*60}")
57
+ print(f"ID: {result.correlation_id}")
58
+ print(f"Score: {result.correlation_score:.3f}")
59
+ print(f"Threat Level: {result.threat_level.value.upper()}")
60
+ print(f"Confidence: {result.confidence.value.upper()}")
61
+ print(f"Timestamp: {result.timestamp}")
62
+
63
+ print(f"\nMATCHED TECHNIQUES ({len(result.matched_techniques)}):")
64
+ for i, tech in enumerate(result.matched_techniques, 1):
65
+ print(f"{i}. {tech.technique_id} - Confidence: {tech.match_confidence:.3f}")
66
+ print(f" Evidence: {tech.evidence[:100]}{'...' if len(tech.evidence) > 100 else ''}")
67
+
68
+ print(f"\nREASONING:")
69
+ print(f"{result.reasoning}")
70
+
71
+ return result
72
+
73
+ def test_elaborate_correlation():
74
+ """Test correlation using elaborate mockup incident with MITRE agent"""
75
+ print("\n" + "="*60)
76
+ print("TESTING CORRELATION - ELABORATE INCIDENT")
77
+ print("="*60)
78
+
79
+ # Create elaborate incident input
80
+ mitre_agent_input = create_elaborate_mockup_incident()
81
+ log_input = convert_mitre_agent_input(mitre_agent_input)
82
+
83
+ print(f"\nELABORATE INCIDENT INPUT:")
84
+ print(f"- Analysis ID: {log_input.analysis_id}")
85
+ print(f"- Severity: {log_input.severity}")
86
+ print(f"- Affected Systems: {len(log_input.affected_systems)} systems")
87
+ print(f"- Anomalies: {len(log_input.anomalies)} detected")
88
+ print(f"- Processes: {len(log_input.processes)} processes")
89
+
90
+ # Run MITRE agent analysis for elaborate incident
91
+ print("\nRUNNING MITRE AGENT ANALYSIS FOR ELABORATE INCIDENT...")
92
+ mitre_agent = MitreAgent(
93
+ llm_provider="openai",
94
+ model_name="gpt-4o",
95
+ max_iterations=3
96
+ )
97
+
98
+ mitre_analysis_result = mitre_agent.analyze_threat(mitre_agent_input)
99
+ print(f"✓ MITRE analysis completed")
100
+ print(f" - Techniques found: {len(mitre_analysis_result.get('technique_details', []))}")
101
+ print(f" - Coverage score: {mitre_analysis_result.get('coverage_score', 0):.3f}")
102
+ print(f" - Confidence: {mitre_analysis_result.get('confidence', 0):.3f}")
103
+
104
+ # Convert to MitreInput
105
+ mitre_input = convert_mitre_analysis_output(mitre_analysis_result, mitre_agent_input)
106
+
107
+ print(f"\n✓ TOP TECHNIQUES FROM MITRE ANALYSIS:")
108
+ for i, tech in enumerate(mitre_input.techniques[:5], 1):
109
+ print(f" {i}. {tech['attack_id']}: {tech['name'][:50]}... (Score: {tech['relevance_score']:.3f})")
110
+
111
+ # Run correlation analysis
112
+ print("\nRUNNING ELABORATE CORRELATION ANALYSIS...")
113
+ correlation_agent = CorrelationAgent()
114
+ result = correlation_agent.process(log_input, mitre_input)
115
+
116
+ print(f"\n{'='*60}")
117
+ print(f"ELABORATE CORRELATION RESULTS:")
118
+ print(f"{'='*60}")
119
+ print(f"ID: {result.correlation_id}")
120
+ print(f"Score: {result.correlation_score:.3f}")
121
+ print(f"Threat Level: {result.threat_level.value.upper()}")
122
+ print(f"Confidence: {result.confidence.value.upper()}")
123
+ print(f"Matched Techniques: {len(result.matched_techniques)}")
124
+
125
+ print(f"\nTOP CORRELATED TECHNIQUES:")
126
+ for i, tech in enumerate(result.matched_techniques[:5], 1):
127
+ print(f"{i}. {tech.technique_id} - Confidence: {tech.match_confidence:.3f}")
128
+ print(f" Evidence: {tech.evidence[:80]}{'...' if len(tech.evidence) > 80 else ''}")
129
+
130
+ print(f"\nREASONING:")
131
+ print(f"{result.reasoning}")
132
+
133
+ return result
134
+
135
+ def main():
136
+ """Main test function"""
137
+ print("╔" + "="*58 + "╗")
138
+ print("║" + " "*10 + "CORRELATION AGENT TEST SUITE" + " "*20 + "║")
139
+ print("╚" + "="*58 + "╝")
140
+ print()
141
+
142
+ try:
143
+ # Test sample correlation with MITRE agent
144
+ result1 = test_sample_correlation()
145
+
146
+ # Test elaborate correlation with elaborate incident
147
+ result2 = test_elaborate_correlation()
148
+
149
+ print("\n" + "="*60)
150
+ print("✓ ALL TESTS COMPLETED SUCCESSFULLY")
151
+ print("="*60)
152
+
153
+ # Summary
154
+ print(f"\nTEST SUMMARY:")
155
+ print(f"\n1. Sample Input Test:")
156
+ print(f" - Threat Level: {result1.threat_level.value.upper()}")
157
+ print(f" - Confidence: {result1.confidence.value.upper()}")
158
+ print(f" - Correlation Score: {result1.correlation_score:.3f}")
159
+ print(f" - Matched Techniques: {len(result1.matched_techniques)}")
160
+
161
+ print(f"\n2. Elaborate Incident Test:")
162
+ print(f" - Threat Level: {result2.threat_level.value.upper()}")
163
+ print(f" - Confidence: {result2.confidence.value.upper()}")
164
+ print(f" - Correlation Score: {result2.correlation_score:.3f}")
165
+ print(f" - Matched Techniques: {len(result2.matched_techniques)}")
166
+
167
+ print("\n" + "="*60)
168
+
169
+ except Exception as e:
170
+ print(f"\n❌ TEST FAILED: {e}")
171
+ import traceback
172
+ traceback.print_exc()
173
+ raise
174
+
175
+ if __name__ == "__main__":
176
+ main()
src/agents/correlation_agent/types.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Dict, List, Any, Optional
2
+ from dataclasses import dataclass
3
+ from enum import Enum
4
+
5
+ class ThreatLevel(Enum):
6
+ LOW = "low"
7
+ MEDIUM = "medium"
8
+ HIGH = "high"
9
+ CRITICAL = "critical"
10
+
11
+ class ConfidenceLevel(Enum):
12
+ LOW = "low"
13
+ MEDIUM = "medium"
14
+ HIGH = "high"
15
+
16
+ @dataclass
17
+ class LogInput:
18
+ analysis_id: str
19
+ severity: str
20
+ affected_systems: List[str]
21
+ anomalies: List[str]
22
+ processes: List[str]
23
+ raw_summary: str
24
+
25
+ @dataclass
26
+ class MitreInput:
27
+ analysis_id: str
28
+ techniques: List[Dict[str, Any]]
29
+ coverage_score: float
30
+ confidence: float
31
+ analysis_text: str
32
+
33
+ @dataclass
34
+ class MatchedTechnique:
35
+ technique_id: str
36
+ match_confidence: float
37
+ evidence: str
38
+ validation_result: Optional[Dict[str, Any]] = None
39
+
40
+ @dataclass
41
+ class CorrelationOutput:
42
+ correlation_id: str
43
+ correlation_score: float
44
+ threat_level: ThreatLevel
45
+ confidence: ConfidenceLevel
46
+ matched_techniques: List[MatchedTechnique]
47
+ reasoning: str = ""
48
+ timestamp: str = ""
src/agents/cti_agent/__pycache__/config.cpython-311.pyc ADDED
Binary file (13.5 kB). View file
 
src/agents/cti_agent/__pycache__/cti_agent.cpython-311.pyc ADDED
Binary file (40.8 kB). View file
 
src/agents/cti_agent/__pycache__/cti_tools.cpython-311.pyc ADDED
Binary file (12.2 kB). View file
 
src/agents/cti_agent/config.py ADDED
@@ -0,0 +1,371 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Search configuration
2
+ CTI_SEARCH_CONFIG = {
3
+ "max_results": 5,
4
+ "search_depth": "advanced",
5
+ "include_raw_content": True,
6
+ "include_domains": [
7
+ "*.cisa.gov", # US Cybersecurity and Infrastructure Security Agency
8
+ "*.us-cert.gov", # US-CERT advisories
9
+ "*.crowdstrike.com", # CrowdStrike threat intelligence
10
+ "*.mandiant.com", # Mandiant (Google) threat reports
11
+ "*.trendmicro.com", # Trend Micro research
12
+ "*.securelist.com", # Kaspersky SecureList blog
13
+ "*.cert.europa.eu", # European CERT
14
+ "*.ncsc.gov.uk", # UK National Cyber Security Centre
15
+ ],
16
+ }
17
+
18
+
19
+ # Model configuration
20
+ MODEL_NAME = "google_genai:gemini-2.0-flash"
21
+
22
+ # CTI Planner Prompt
23
+ CTI_PLANNER_PROMPT = """You are a Cyber Threat Intelligence (CTI) researcher planning
24
+ to retrieve actual threat intelligence from CTI reports.
25
+
26
+ Your goal is to create a research plan that finds CTI reports and EXTRACTS the actual
27
+ intelligence - specific IOCs, technique details, actor information, and attack patterns.
28
+
29
+ IMPORTANT GUIDELINES:
30
+ 1. Search for actual CTI reports from reputable sources
31
+ 2. Prioritize recent reports (2024-2025)
32
+ 3. ALWAYS fetch full report content to extract intelligence
33
+ 4. Extract SPECIFIC intelligence: actual IOCs, technique IDs, actor names, attack details
34
+ 5. Focus on retrieving CONCRETE DATA that can be used by other analysis agents
35
+ 6. Maximum 4 tasks with only one time of web searching
36
+
37
+ Available tools:
38
+ (1) SearchCTIReports[query]: Searches for CTI reports, threat analyses, and security advisories.
39
+ - More specific search queries (add APT names, CVE IDs, "IOC", "MITRE", "report")
40
+ - Use specific queries with APT names, technique IDs, CVEs
41
+ - Examples: "APT29 T1566.002 report 2025", "Scattered Spider IOCs"
42
+
43
+ (2) ExtractURL[search_result, index]: Extract a specific URL from search results JSON.
44
+ - search_result: JSON string from SearchCTIReports
45
+ - index: Which report URL to extract (default: 0 for first)
46
+ - ALWAYS use this to get the actual report URL from search results
47
+
48
+ (3) FetchReport[url]: Retrieves the full content of a CTI report using real url.
49
+ - ALWAYS use this to get actual report content for intelligence extraction
50
+ - Essential for retrieving specific IOCs and details
51
+
52
+ (4) ExtractIOCs[report_content]: Extracts actual Indicators of Compromise from reports.
53
+ - Returns specific IPs, domains, hashes, URLs, file names
54
+ - Provides concrete IOCs that can be used for detection
55
+
56
+ (5) IdentifyThreatActors[report_content]: Extracts threat actor details from reports.
57
+ - Returns specific actor names, aliases, and campaign names
58
+ - Provides attribution information and targeting details
59
+ - Includes motivation and operational patterns
60
+
61
+ (6) ExtractMITRETechniques[report_content, framework]: Extracts MITRE ATT&CK techniques from reports.
62
+ - framework: "Enterprise", "Mobile", or "ICS" (default: "Enterprise")
63
+ - Returns specific technique IDs (T1234) with descriptions
64
+ - Maps malware behaviors to MITRE framework
65
+ - Provides structured technique analysis
66
+
67
+ (7) LLM[instruction]: Synthesis and correlation of extracted intelligence.
68
+ - Combine intelligence from multiple sources
69
+ - DON'T USE FOR ANY OTHER PURPOSES
70
+ - Identify patterns across findings
71
+ - Correlate IOCs with techniques and actors
72
+
73
+ PLAN STRUCTURE:
74
+ Each plan step should be: Plan: [description] #E[N] = Tool[input]
75
+
76
+ Example for task "Find threat intelligence about APT29 using T1566.002":
77
+
78
+ Plan: Search for recent APT29 campaign reports with IOCs
79
+ #E1 = SearchCTIReports[APT29 T1566.002 spearphishing IOCs 2025]
80
+
81
+ Plan: Search for detailed technical analysis of APT29 spearphishing
82
+ #E2 = SearchCTIReports[APT29 spearphishing technical analysis filetype:pdf]
83
+
84
+ Plan: Fetch the most detailed technical report for intelligence extraction
85
+ #E3 = FetchReport[top ranked URL from #E1 with most technical detail]
86
+
87
+ Plan: Extract all specific IOCs from the fetched report
88
+ #E4 = ExtractIOCs[#E3]
89
+
90
+ Plan: Extract threat actor details and campaign information from the report
91
+ #E5 = IdentifyThreatActors[#E3]
92
+
93
+ Plan: If first report lacks detail, fetch second report for additional intelligence
94
+ #E6 = FetchReport[second best URL from #E1]
95
+
96
+ Plan: Extract IOCs from second report to enrich intelligence
97
+ #E7 = ExtractIOCs[#E7]
98
+
99
+ Plan: Correlate and consolidate all extracted intelligence
100
+ #E8 = LLM[Consolidate intelligence from #E4, #E5, #E6, and #E8. Present specific
101
+ IOCs, technique IDs, actor details, and attack patterns. Identify overlaps and unique findings.]
102
+
103
+ Now create a detailed plan for the following task:
104
+ Task: {task}"""
105
+
106
+ # CTI Solver Prompt
107
+ CTI_SOLVER_PROMPT = """You are a Cyber Threat Intelligence analyst creating a final intelligence report.
108
+
109
+ Below are the COMPLETE results from your CTI research. Each section contains the full output from extraction tools.
110
+
111
+ {structured_results}
112
+
113
+ {'='*80}
114
+ EXECUTION PLAN OVERVIEW:
115
+ {'='*80}
116
+ {plan}
117
+
118
+ {'='*80}
119
+ ORIGINAL TASK: {task}
120
+ {'='*80}
121
+
122
+ Create a comprehensive threat intelligence report with the following structure:
123
+
124
+ ## Intelligence Sources
125
+ [List reports analyzed with titles and sources]
126
+
127
+ ## Threat Actors & Attribution
128
+ [Names, aliases, campaigns, and attribution details from IdentifyThreatActors results]
129
+
130
+ ## MITRE ATT&CK Techniques Identified
131
+ [All technique IDs from ExtractMITRETechniques results, with descriptions]
132
+
133
+ ## Indicators of Compromise (IOCs) Retrieved
134
+ [All IOCs from ExtractIOCs results, organized by type]
135
+
136
+ ### IP Addresses
137
+ ### Domains
138
+ ### File Hashes
139
+ ### URLs
140
+ ### Email Addresses
141
+ ### File Names
142
+ ### Other Indicators
143
+
144
+ ## Attack Patterns & Campaign Details
145
+ [Specific attack flows, timeline, targeting from reports]
146
+
147
+ ## Key Findings Summary
148
+ [3-5 critical bullet points]
149
+
150
+ ## Intelligence Gaps
151
+ [What information was not available]
152
+
153
+ **INSTRUCTIONS:**
154
+ - Extract ALL data from results above - don't summarize, list actual values
155
+ - Parse JSON if present in results
156
+ - If Q&A format, extract all answers
157
+ - Be comprehensive and specific
158
+ """
159
+
160
+ # Regex pattern for parsing CTI plans
161
+ CTI_REGEX_PATTERN = r"Plan:\s*(.+)\s*(#E\d+)\s*=\s*(\w+)\s*\[([^\]]+)\]"
162
+
163
+ # Tool-specific prompts
164
+ IOC_EXTRACTION_PROMPT = """Extract all Indicators of Compromise (IOCs) from the content below.
165
+
166
+ **Instructions:** List ONLY the actual IOCs found. No explanations, no summaries - just the indicators.
167
+
168
+ **Content:**
169
+ {content}
170
+
171
+ **Extract and list:**
172
+
173
+ **IP Addresses:**
174
+ [List IPs, or write "None found"]
175
+
176
+ **Domains:**
177
+ [List domains, or write "None found"]
178
+
179
+ **URLs:**
180
+ [List malicious URLs, or write "None found"]
181
+
182
+ **File Hashes:**
183
+ [List hashes with type (MD5/SHA1/SHA256), or write "None found"]
184
+
185
+ **Email Addresses:**
186
+ [List emails, or write "None found"]
187
+
188
+ **File Names:**
189
+ [List malicious files/paths, or write "None found"]
190
+
191
+ **Registry Keys:**
192
+ [List registry keys, or write "None found"]
193
+
194
+ **Other Indicators:**
195
+ [List mutexes, user agents, etc., or write "None found"]
196
+
197
+ If no specific IOCs found, respond: "No extractable IOCs in content."
198
+ """
199
+
200
+ THREAT_ACTOR_PROMPT = """Extract threat actor information from the content below.
201
+
202
+ **Instructions:** Provide concise answers. Include brief descriptions where relevant.
203
+
204
+ **Content:**
205
+ {content}
206
+
207
+ **Answer these questions:**
208
+
209
+ **Q: What threat actor/APT group is discussed?**
210
+ A: [Name and aliases, e.g., "APT29 (Cozy Bear, The Dukes)" or "None identified"]
211
+
212
+ **Q: What is this actor known for?**
213
+ A: [1-2 sentence description of their typical activities/focus, or "No attribution details"]
214
+
215
+ **Q: What campaigns/operations are mentioned?**
216
+ A: [List campaign names with timeframes, e.g., "NobleBaron (2024-Q2)" or "None mentioned"]
217
+
218
+ **Q: What is their suspected origin/attribution?**
219
+ A: [Nation-state/origin and confidence level, e.g., "Russian state-sponsored (High confidence)" or "Unknown"]
220
+
221
+ **Q: Who/what do they target?**
222
+ A: [Industries and regions, e.g., "Government agencies in Europe, Defense sector in North America" or "Not specified"]
223
+
224
+ **Q: What is their motivation?**
225
+ A: [Primary objective, e.g., "Espionage and intelligence collection" or "Not specified"]
226
+
227
+ If no specific threat actor information found, respond: "No threat actor attribution in content."
228
+ """
229
+
230
+ REPLAN_PROMPT = """The previous CTI research step failed to retrieve quality intelligence.
231
+
232
+ ORIGINAL TASK: {task}
233
+
234
+ FAILED STEP:
235
+ Plan: {failed_step}
236
+ {step_name} = {tool}[{tool_input}]
237
+
238
+ RESULT: {results}
239
+
240
+ PROBLEM: {problem}
241
+
242
+ COMPLETED STEPS SO FAR:
243
+ {completed_steps}
244
+
245
+ Create an IMPROVED plan for this specific step that will retrieve ACTUAL CTI intelligence.
246
+
247
+ Available tools:
248
+ (1) SearchCTIReports[query]: Searches for CTI reports, threat analyses, and security advisories.
249
+ - Use specific queries with APT names, technique IDs, CVEs
250
+ - Examples: "APT29 T1566.002 report 2024", "Scattered Spider IOCs"
251
+
252
+ (2) ExtractURL[search_result, index]: Extract a specific URL from search results JSON.
253
+ - search_result: JSON string from SearchCTIReports
254
+ - index: Which report URL to extract (default: 0 for first)
255
+ - ALWAYS use this to get the actual report URL from search results
256
+
257
+ (3) FetchReport[url]: Retrieves the full content of a CTI report.
258
+ - ALWAYS use this to get actual report content for intelligence extraction
259
+ - Essential for retrieving specific IOCs and details
260
+
261
+ (4) ExtractIOCs[report_content]: Extracts actual Indicators of Compromise from reports.
262
+ - Returns specific IPs, domains, hashes, URLs, file names
263
+ - Provides concrete IOCs that can be used for detection
264
+
265
+ (5) IdentifyThreatActors[report_content]: Extracts threat actor details from reports.
266
+ - Returns specific actor names, aliases, and campaign names
267
+ - Provides attribution information and targeting details
268
+ - Includes motivation and operational patterns
269
+
270
+ (6) ExtractMITRETechniques[report_content, framework]: Extracts MITRE ATT&CK techniques from reports.
271
+ - framework: "Enterprise", "Mobile", or "ICS" (default: "Enterprise")
272
+ - Returns specific technique IDs (T1234) with descriptions
273
+ - Maps malware behaviors to MITRE framework
274
+ - Provides structured technique analysis
275
+
276
+ (7) LLM[instruction]: Synthesis and correlation of extracted intelligence.
277
+ - Combine intelligence from multiple sources
278
+ - Identify patterns across findings
279
+ - Correlate IOCs with techniques and actors
280
+
281
+ Consider:
282
+ 1. More specific search queries (add APT names, CVE IDs, "IOC", "MITRE", "report")
283
+ 2. Alternative CTI sources (CISA advisories, vendor reports, not news articles)
284
+ 3. Different tool combinations (search → extract URL → fetch → extract IOCs)
285
+
286
+ Provide ONLY the corrected step in this format:
287
+ Plan: [improved description]
288
+ #E{step} = Tool[improved input]"""
289
+
290
+ MITRE_EXTRACTION_PROMPT = """Extract MITRE ATT&CK {framework} techniques from the content below.
291
+
292
+ **Instructions:**
293
+ 1. Identify behaviors described in the content
294
+ 2. Map to MITRE technique IDs (main techniques only: T#### not T####.###)
295
+ 3. Provide brief description of what each technique means
296
+ 4. List final technique IDs on the last line
297
+
298
+ **Content:**
299
+ {content}
300
+
301
+ **Identified Techniques:**
302
+
303
+ [For each technique found, format as:]
304
+ **T####** - [Technique Name]: [1 sentence: what this technique is and why it was identified in the content]
305
+
306
+ [Continue for all techniques...]
307
+
308
+ **Final Answer - Technique IDs:**
309
+ T####, T####, T####
310
+
311
+ [If no valid techniques found, respond: "No MITRE {framework} techniques identified in content."]
312
+ """
313
+
314
+ REPLAN_PROMPT = """The previous CTI research step failed to retrieve quality intelligence.
315
+
316
+ ORIGINAL TASK: {task}
317
+
318
+ FAILED STEP:
319
+ Plan: {failed_step}
320
+ {step_name} = {tool}[{tool_input}]
321
+
322
+ RESULT: {results}
323
+
324
+ PROBLEM: {problem}
325
+
326
+ COMPLETED STEPS SO FAR:
327
+ {completed_steps}
328
+
329
+ Create an IMPROVED plan for this specific step that will retrieve ACTUAL CTI intelligence.
330
+
331
+ Available tools:
332
+ (1) SearchCTIReports[query]: Searches for CTI reports, threat analyses, and security advisories.
333
+ - Use specific queries with APT names, technique IDs, CVEs
334
+ - Examples: "APT29 T1566.002 report 2024", "Scattered Spider IOCs"
335
+
336
+ (2) ExtractURL[search_result, index]: Extract a specific URL from search results JSON.
337
+ - search_result: JSON string from SearchCTIReports
338
+ - index: Which report URL to extract (default: 0 for first)
339
+ - ALWAYS use this to get the actual report URL from search results
340
+
341
+ (3) FetchReport[url]: Retrieves the full content of a CTI report.
342
+ - ALWAYS use this to get actual report content for intelligence extraction
343
+ - Essential for retrieving specific IOCs and details
344
+
345
+ (4) ExtractIOCs[report_content]: Extracts actual Indicators of Compromise from reports.
346
+ - Returns specific IPs, domains, hashes, URLs, file names
347
+ - Provides concrete IOCs that can be used for detection
348
+
349
+ (5) IdentifyThreatActors[report_content]: Extracts threat actor details from reports.
350
+ - Returns specific actor names, aliases, and campaign names
351
+ - Provides attribution information and targeting details
352
+ - Includes motivation and operational patterns
353
+
354
+ (6) ExtractMITRETechniques[report_content, framework]: Extracts MITRE ATT&CK techniques from reports.
355
+ - framework: "Enterprise", "Mobile", or "ICS" (default: "Enterprise")
356
+ - Returns specific technique IDs (T1234) with descriptions
357
+ - Maps malware behaviors to MITRE framework
358
+
359
+ (7) LLM[instruction]: Synthesis and correlation of extracted intelligence.
360
+ - Combine intelligence from multiple sources
361
+ - Identify patterns across findings
362
+ - Correlate IOCs with techniques and actors
363
+
364
+ Consider:
365
+ 1. More specific search queries (add APT names, CVE IDs, "IOC", "MITRE", "report")
366
+ 2. Alternative CTI sources (CISA advisories, vendor reports, not news articles)
367
+ 3. Different tool combinations (search → extract URL → fetch → extract IOCs/techniques)
368
+
369
+ Provide ONLY the corrected step in this format:
370
+ Plan: [improved description]
371
+ #E{step} = Tool[improved input]"""
src/agents/cti_agent/cti-bench/data/cti-ate.tsv ADDED
The diff for this file is too large to render. See raw diff
 
src/agents/cti_agent/cti-bench/data/cti-mcq.tsv ADDED
The diff for this file is too large to render. See raw diff
 
src/agents/cti_agent/cti-bench/data/cti-rcm-2021.tsv ADDED
The diff for this file is too large to render. See raw diff
 
src/agents/cti_agent/cti-bench/data/cti-rcm.tsv ADDED
The diff for this file is too large to render. See raw diff
 
src/agents/cti_agent/cti-bench/data/cti-taa.tsv ADDED
The diff for this file is too large to render. See raw diff
 
src/agents/cti_agent/cti-bench/data/cti-vsp.tsv ADDED
The diff for this file is too large to render. See raw diff
 
src/agents/cti_agent/cti-evaluator.py ADDED
@@ -0,0 +1,708 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import json
3
+ import os
4
+ from typing import List, Set, Dict, Tuple
5
+ from pathlib import Path
6
+ import pandas as pd
7
+ from dotenv import load_dotenv
8
+
9
+ # Import your CTI tools
10
+ from langchain.chat_models import init_chat_model
11
+ from langchain_tavily import TavilySearch
12
+ import sys
13
+
14
+ sys.path.append("src/agents/cti_agent")
15
+ from cti_tools import CTITools
16
+ from config import MODEL_NAME, CTI_SEARCH_CONFIG
17
+
18
+
19
+ class CTIToolsEvaluator:
20
+ """Evaluator for CTI tools on CTIBench benchmarks."""
21
+
22
+ def __init__(self):
23
+ """Initialize the evaluator with CTI tools."""
24
+ load_dotenv()
25
+
26
+ # Initialize LLM
27
+ self.llm = init_chat_model(MODEL_NAME, temperature=0.1)
28
+
29
+ # Initialize search (needed for CTITools init, even if not used in evaluation)
30
+ search_config = {**CTI_SEARCH_CONFIG, "api_key": os.getenv("TAVILY_API_KEY")}
31
+ self.cti_search = TavilySearch(**search_config)
32
+
33
+ # Initialize CTI Tools
34
+ self.cti_tools = CTITools(self.llm, self.cti_search)
35
+
36
+ # Storage for results
37
+ self.ate_results = []
38
+ self.taa_results = []
39
+
40
+ # ==================== CTI-ATE: MITRE Technique Extraction Tool ====================
41
+
42
+ def extract_technique_ids(self, text: str) -> Set[str]:
43
+ """
44
+ Extract MITRE technique IDs from text.
45
+ Looks for patterns like T1234 (main techniques only, no subtechniques).
46
+
47
+ Args:
48
+ text: Text containing technique IDs
49
+
50
+ Returns:
51
+ Set of technique IDs (e.g., {'T1071', 'T1059'})
52
+ """
53
+ # Pattern for main techniques only (T#### not T####.###)
54
+ pattern = r"\bT\d{4}\b"
55
+ matches = re.findall(pattern, text)
56
+ return set(matches)
57
+
58
+ def calculate_ate_metrics(
59
+ self, predicted: Set[str], ground_truth: Set[str]
60
+ ) -> Dict[str, float]:
61
+ """
62
+ Calculate precision, recall, and F1 score for technique extraction.
63
+
64
+ Args:
65
+ predicted: Set of predicted technique IDs
66
+ ground_truth: Set of ground truth technique IDs
67
+
68
+ Returns:
69
+ Dictionary with precision, recall, f1, tp, fp, fn
70
+ """
71
+ tp = len(predicted & ground_truth) # True positives
72
+ fp = len(predicted - ground_truth) # False positives
73
+ fn = len(ground_truth - predicted) # False negatives
74
+
75
+ precision = tp / len(predicted) if len(predicted) > 0 else 0.0
76
+ recall = tp / len(ground_truth) if len(ground_truth) > 0 else 0.0
77
+ f1 = (
78
+ 2 * (precision * recall) / (precision + recall)
79
+ if (precision + recall) > 0
80
+ else 0.0
81
+ )
82
+
83
+ return {
84
+ "precision": precision,
85
+ "recall": recall,
86
+ "f1": f1,
87
+ "tp": tp,
88
+ "fp": fp,
89
+ "fn": fn,
90
+ "predicted_count": len(predicted),
91
+ "ground_truth_count": len(ground_truth),
92
+ }
93
+
94
+ def evaluate_mitre_extraction_tool(
95
+ self,
96
+ sample_id: str,
97
+ description: str,
98
+ ground_truth: str,
99
+ platform: str = "Enterprise",
100
+ ) -> Dict:
101
+ """
102
+ Evaluate extract_mitre_techniques tool on a single sample.
103
+
104
+ Args:
105
+ sample_id: Sample identifier (e.g., URL)
106
+ description: Malware/report description to analyze
107
+ ground_truth: Ground truth technique IDs (comma-separated)
108
+ platform: MITRE platform (Enterprise, Mobile, ICS)
109
+
110
+ Returns:
111
+ Dictionary with evaluation metrics
112
+ """
113
+ print(f"Evaluating {sample_id[:60]}...")
114
+
115
+ # Call the extract_mitre_techniques tool
116
+ tool_output = self.cti_tools.extract_mitre_techniques(description, platform)
117
+
118
+ # Extract technique IDs from tool output
119
+ predicted_ids = self.extract_technique_ids(tool_output)
120
+ gt_ids = set([t.strip() for t in ground_truth.split(",") if t.strip()])
121
+
122
+ # Calculate metrics
123
+ metrics = self.calculate_ate_metrics(predicted_ids, gt_ids)
124
+
125
+ result = {
126
+ "sample_id": sample_id,
127
+ "platform": platform,
128
+ "description": description[:100] + "...",
129
+ "tool_output": tool_output[:500] + "...", # Truncate for storage
130
+ "predicted": sorted(predicted_ids),
131
+ "ground_truth": sorted(gt_ids),
132
+ "missing": sorted(gt_ids - predicted_ids), # False negatives
133
+ "extra": sorted(predicted_ids - gt_ids), # False positives
134
+ **metrics,
135
+ }
136
+
137
+ self.ate_results.append(result)
138
+ return result
139
+
140
+ def evaluate_ate_from_tsv(
141
+ self, filepath: str = "cti-bench/data/cti-ate.tsv", limit: int = None
142
+ ) -> pd.DataFrame:
143
+ """
144
+ Evaluate extract_mitre_techniques tool on CTI-ATE benchmark.
145
+
146
+ Args:
147
+ filepath: Path to CTI-ATE TSV file
148
+ limit: Optional limit on number of samples to evaluate
149
+
150
+ Returns:
151
+ DataFrame with results for each sample
152
+ """
153
+ print(f"\n{'='*80}")
154
+ print(f"Evaluating extract_mitre_techniques tool on CTI-ATE benchmark")
155
+ print(f"{'='*80}\n")
156
+
157
+ # Load benchmark
158
+ df = pd.read_csv(filepath, sep="\t")
159
+
160
+ if limit:
161
+ df = df.head(limit)
162
+
163
+ print(f"Loaded {len(df)} samples from {filepath}")
164
+ print(f"Starting evaluation...\n")
165
+
166
+ # Evaluate each sample
167
+ for idx, row in df.iterrows():
168
+ try:
169
+ self.evaluate_mitre_extraction_tool(
170
+ sample_id=row["URL"],
171
+ description=row["Description"],
172
+ ground_truth=row["GT"],
173
+ platform=row["Platform"],
174
+ )
175
+ except Exception as e:
176
+ print(f"Error on sample {idx}: {e}")
177
+ continue
178
+
179
+ results_df = pd.DataFrame(self.ate_results)
180
+
181
+ print(f"\nCompleted evaluation of {len(self.ate_results)} samples")
182
+ return results_df
183
+
184
+ def get_ate_summary(self) -> Dict:
185
+ """
186
+ Get summary statistics for CTI-ATE evaluation.
187
+
188
+ Returns:
189
+ Dictionary with macro and micro averaged metrics
190
+ """
191
+ if not self.ate_results:
192
+ return {}
193
+
194
+ df = pd.DataFrame(self.ate_results)
195
+
196
+ # Macro averages (average of per-sample metrics)
197
+ macro_metrics = {
198
+ "macro_precision": df["precision"].mean(),
199
+ "macro_recall": df["recall"].mean(),
200
+ "macro_f1": df["f1"].mean(),
201
+ }
202
+
203
+ # Micro averages (calculated from total TP, FP, FN)
204
+ total_tp = df["tp"].sum()
205
+ total_fp = df["fp"].sum()
206
+ total_fn = df["fn"].sum()
207
+ total_predicted = df["predicted_count"].sum()
208
+ total_gt = df["ground_truth_count"].sum()
209
+
210
+ micro_precision = total_tp / total_predicted if total_predicted > 0 else 0.0
211
+ micro_recall = total_tp / total_gt if total_gt > 0 else 0.0
212
+ micro_f1 = (
213
+ 2 * (micro_precision * micro_recall) / (micro_precision + micro_recall)
214
+ if (micro_precision + micro_recall) > 0
215
+ else 0.0
216
+ )
217
+
218
+ micro_metrics = {
219
+ "micro_precision": micro_precision,
220
+ "micro_recall": micro_recall,
221
+ "micro_f1": micro_f1,
222
+ "total_samples": len(self.ate_results),
223
+ "total_tp": int(total_tp),
224
+ "total_fp": int(total_fp),
225
+ "total_fn": int(total_fn),
226
+ }
227
+
228
+ return {**macro_metrics, **micro_metrics}
229
+
230
+ # ==================== CTI-TAA: Threat Actor Attribution Tool ====================
231
+
232
+ def normalize_actor_name(self, name: str) -> str:
233
+ """
234
+ Normalize threat actor names for comparison.
235
+
236
+ Args:
237
+ name: Threat actor name
238
+
239
+ Returns:
240
+ Normalized name (lowercase, trimmed)
241
+ """
242
+ if not name:
243
+ return ""
244
+
245
+ # Convert to lowercase and strip
246
+ normalized = name.lower().strip()
247
+
248
+ # Remove common prefixes
249
+ prefixes = ["apt", "apt-", "group", "the "]
250
+ for prefix in prefixes:
251
+ if normalized.startswith(prefix):
252
+ normalized = normalized[len(prefix) :].strip()
253
+
254
+ return normalized
255
+
256
+ def extract_actor_from_output(self, text: str) -> str:
257
+ """
258
+ Extract threat actor name from tool output.
259
+
260
+ Args:
261
+ text: Tool output text
262
+
263
+ Returns:
264
+ Extracted actor name or empty string
265
+ """
266
+ # Look for Q&A format from our updated prompt
267
+ qa_patterns = [
268
+ r"Q:\s*What threat actor.*?\n\s*A:\s*([^\n]+)",
269
+ r"threat actor.*?is[:\s]+([A-Z][A-Za-z0-9\s\-]+?)(?:\s*\(|,|\.|$)",
270
+ r"attributed to[:\s]+([A-Z][A-Za-z0-9\s\-]+?)(?:\s*\(|,|\.|$)",
271
+ ]
272
+
273
+ for pattern in qa_patterns:
274
+ match = re.search(pattern, text, re.IGNORECASE | re.MULTILINE)
275
+ if match:
276
+ actor = match.group(1).strip()
277
+ # Clean up common artifacts
278
+ actor = actor.split("(")[0].strip() # Remove parenthetical aliases
279
+ if actor and actor.lower() not in [
280
+ "none",
281
+ "none identified",
282
+ "unknown",
283
+ "not specified",
284
+ ]:
285
+ return actor
286
+
287
+ return ""
288
+
289
+ def check_actor_match(
290
+ self, predicted: str, ground_truth: str, aliases: Dict[str, List[str]] = None
291
+ ) -> bool:
292
+ """
293
+ Check if predicted actor matches ground truth, considering aliases.
294
+
295
+ Args:
296
+ predicted: Predicted threat actor name
297
+ ground_truth: Ground truth threat actor name
298
+ aliases: Optional dictionary mapping canonical names to aliases
299
+
300
+ Returns:
301
+ True if match, False otherwise
302
+ """
303
+ pred_norm = self.normalize_actor_name(predicted)
304
+ gt_norm = self.normalize_actor_name(ground_truth)
305
+
306
+ if not pred_norm or not gt_norm:
307
+ return False
308
+
309
+ # Direct match
310
+ if pred_norm == gt_norm:
311
+ return True
312
+
313
+ # Check aliases if provided
314
+ if aliases:
315
+ # Check if prediction is in ground truth's aliases
316
+ if gt_norm in aliases:
317
+ for alias in aliases[gt_norm]:
318
+ if pred_norm == self.normalize_actor_name(alias):
319
+ return True
320
+
321
+ # Check if ground truth is in prediction's aliases
322
+ if pred_norm in aliases:
323
+ for alias in aliases[pred_norm]:
324
+ if gt_norm == self.normalize_actor_name(alias):
325
+ return True
326
+
327
+ return False
328
+
329
+ def evaluate_threat_actor_tool(
330
+ self,
331
+ sample_id: str,
332
+ report_text: str,
333
+ ground_truth: str,
334
+ aliases: Dict[str, List[str]] = None,
335
+ ) -> Dict:
336
+ """
337
+ Evaluate identify_threat_actors tool on a single sample.
338
+
339
+ Args:
340
+ sample_id: Sample identifier (e.g., URL)
341
+ report_text: Threat report text to analyze
342
+ ground_truth: Ground truth threat actor name
343
+ aliases: Optional alias dictionary for matching
344
+
345
+ Returns:
346
+ Dictionary with evaluation result
347
+ """
348
+ print(f"Evaluating {sample_id[:60]}...")
349
+
350
+ # Call the identify_threat_actors tool
351
+ tool_output = self.cti_tools.identify_threat_actors(report_text)
352
+
353
+ # Extract predicted actor
354
+ predicted_actor = self.extract_actor_from_output(tool_output)
355
+
356
+ # Check if match
357
+ is_correct = self.check_actor_match(predicted_actor, ground_truth, aliases)
358
+
359
+ result = {
360
+ "sample_id": sample_id,
361
+ "report_snippet": report_text[:100] + "...",
362
+ "tool_output": tool_output[:500] + "...", # Truncate for storage
363
+ "predicted_actor": predicted_actor,
364
+ "ground_truth": ground_truth,
365
+ "correct": is_correct,
366
+ }
367
+
368
+ self.taa_results.append(result)
369
+ return result
370
+
371
+ def evaluate_taa_from_tsv(
372
+ self,
373
+ filepath: str = "cti-bench/data/cti-taa.tsv",
374
+ limit: int = None,
375
+ interactive: bool = True,
376
+ ) -> pd.DataFrame:
377
+ """
378
+ Evaluate identify_threat_actors tool on CTI-TAA benchmark.
379
+
380
+ Since CTI-TAA has no ground truth labels, this generates predictions
381
+ that need manual validation.
382
+
383
+ Args:
384
+ filepath: Path to CTI-TAA TSV file
385
+ limit: Optional limit on number of samples to evaluate
386
+ interactive: If True, prompts for manual validation after each prediction
387
+
388
+ Returns:
389
+ DataFrame with results for each sample
390
+ """
391
+ print(f"\n{'='*80}")
392
+ print(f"Evaluating identify_threat_actors tool on CTI-TAA benchmark")
393
+ print(f"{'='*80}\n")
394
+
395
+ if not interactive:
396
+ print("NOTE: Running in non-interactive mode.")
397
+ print("Predictions will be saved for manual review later.")
398
+ else:
399
+ print("NOTE: Running in interactive mode.")
400
+ print("You will be asked to validate each prediction (y/n/s to skip).")
401
+
402
+ # Load benchmark
403
+ df = pd.read_csv(filepath, sep="\t")
404
+
405
+ if limit:
406
+ df = df.head(limit)
407
+
408
+ print(f"\nLoaded {len(df)} samples from {filepath}")
409
+ print(f"Starting evaluation...\n")
410
+
411
+ # Evaluate each sample
412
+ for idx, row in df.iterrows():
413
+ try:
414
+ print(f"\n{'-'*80}")
415
+ print(f"Sample {idx + 1}/{len(df)}")
416
+ print(f"URL: {row['URL']}")
417
+ print(f"Report snippet: {row['Text'][:200]}...")
418
+ print(f"{'-'*80}")
419
+
420
+ # Call the identify_threat_actors tool
421
+ tool_output = self.cti_tools.identify_threat_actors(row["Text"])
422
+
423
+ # Extract predicted actor
424
+ predicted_actor = self.extract_actor_from_output(tool_output)
425
+
426
+ print(f"\nTOOL OUTPUT:")
427
+ print(tool_output[:600])
428
+ if len(tool_output) > 600:
429
+ print("... (truncated)")
430
+
431
+ print(
432
+ f"\nEXTRACTED ACTOR: {predicted_actor if predicted_actor else '(none detected)'}"
433
+ )
434
+
435
+ # Manual validation
436
+ is_correct = None
437
+ validator_notes = ""
438
+
439
+ if interactive:
440
+ print(f"\nIs this attribution correct?")
441
+ print(f" y = Yes, correct")
442
+ print(f" n = No, incorrect")
443
+ print(
444
+ f" p = Partially correct (e.g., right family but wrong specific group)"
445
+ )
446
+ print(f" s = Skip this sample")
447
+ print(f" q = Quit evaluation")
448
+
449
+ while True:
450
+ response = input("\nYour answer [y/n/p/s/q]: ").strip().lower()
451
+
452
+ if response == "y":
453
+ is_correct = True
454
+ break
455
+ elif response == "n":
456
+ is_correct = False
457
+ correct_actor = input(
458
+ "What is the correct actor? (optional): "
459
+ ).strip()
460
+ if correct_actor:
461
+ validator_notes = f"Correct actor: {correct_actor}"
462
+ break
463
+ elif response == "p":
464
+ is_correct = 0.5 # Partial credit
465
+ note = input("Explanation (optional): ").strip()
466
+ if note:
467
+ validator_notes = f"Partially correct: {note}"
468
+ break
469
+ elif response == "s":
470
+ print("Skipping this sample...")
471
+ break
472
+ elif response == "q":
473
+ print("Quitting evaluation...")
474
+ return pd.DataFrame(self.taa_results)
475
+ else:
476
+ print("Invalid response. Please enter y, n, p, s, or q.")
477
+
478
+ # Store result
479
+ result = {
480
+ "sample_id": row["URL"],
481
+ "report_snippet": row["Text"][:100] + "...",
482
+ "tool_output": tool_output[:500] + "...",
483
+ "predicted_actor": predicted_actor,
484
+ "is_correct": is_correct,
485
+ "validator_notes": validator_notes,
486
+ "needs_review": is_correct is None,
487
+ }
488
+
489
+ self.taa_results.append(result)
490
+
491
+ except Exception as e:
492
+ print(f"Error on sample {idx}: {e}")
493
+ continue
494
+
495
+ results_df = pd.DataFrame(self.taa_results)
496
+
497
+ print(f"\n{'='*80}")
498
+ print(f"Completed evaluation of {len(self.taa_results)} samples")
499
+
500
+ if interactive:
501
+ validated = sum(1 for r in self.taa_results if r["is_correct"] is not None)
502
+ print(f"Validated: {validated}/{len(self.taa_results)}")
503
+
504
+ return results_df
505
+
506
+ def _extract_ground_truths_from_urls(self, urls: List[str]) -> Dict[str, str]:
507
+ """
508
+ Extract ground truth actor names from URLs.
509
+
510
+ Args:
511
+ urls: List of URLs from the benchmark
512
+
513
+ Returns:
514
+ Dictionary mapping URL to actor name
515
+ """
516
+ # Known threat actors and their URL patterns
517
+ actor_patterns = {
518
+ "sidecopy": "SideCopy",
519
+ "apt29": "APT29",
520
+ "apt36": "APT36",
521
+ "transparent-tribe": "Transparent Tribe",
522
+ "emotet": "Emotet",
523
+ "bandook": "Bandook",
524
+ "stately-taurus": "Stately Taurus",
525
+ "mustang-panda": "Mustang Panda",
526
+ "bronze-president": "Bronze President",
527
+ "cozy-bear": "APT29",
528
+ "nobelium": "APT29",
529
+ }
530
+
531
+ ground_truths = {}
532
+ for url in urls:
533
+ url_lower = url.lower()
534
+ for pattern, actor in actor_patterns.items():
535
+ if pattern in url_lower:
536
+ ground_truths[url] = actor
537
+ break
538
+
539
+ return ground_truths
540
+
541
+ def get_taa_summary(self) -> Dict:
542
+ """
543
+ Get summary statistics for CTI-TAA evaluation.
544
+
545
+ Returns:
546
+ Dictionary with accuracy and validation status
547
+ """
548
+ if not self.taa_results:
549
+ return {}
550
+
551
+ df = pd.DataFrame(self.taa_results)
552
+
553
+ # Only calculate metrics for validated samples
554
+ validated_df = df[df["is_correct"].notna()]
555
+
556
+ if len(validated_df) == 0:
557
+ return {
558
+ "total_samples": len(df),
559
+ "validated_samples": 0,
560
+ "needs_review": len(df),
561
+ "message": "No samples have been validated yet",
562
+ }
563
+
564
+ # Calculate accuracy (treating partial credit as 0.5)
565
+ total_score = validated_df["is_correct"].sum()
566
+ accuracy = total_score / len(validated_df) if len(validated_df) > 0 else 0.0
567
+
568
+ # Count correct, incorrect, partial
569
+ correct = sum(1 for x in validated_df["is_correct"] if x == True)
570
+ incorrect = sum(1 for x in validated_df["is_correct"] if x == False)
571
+ partial = sum(1 for x in validated_df["is_correct"] if x == 0.5)
572
+
573
+ return {
574
+ "accuracy": accuracy,
575
+ "total_samples": len(df),
576
+ "validated_samples": len(validated_df),
577
+ "needs_review": len(df) - len(validated_df),
578
+ "correct": correct,
579
+ "incorrect": incorrect,
580
+ "partial": partial,
581
+ }
582
+
583
+ # ==================== Utility Functions ====================
584
+
585
+ def export_results(self, output_dir: str = "./tool_evaluation_results"):
586
+ """
587
+ Export evaluation results to CSV and JSON files.
588
+
589
+ Args:
590
+ output_dir: Directory to save results
591
+ """
592
+ output_path = Path(output_dir)
593
+ output_path.mkdir(exist_ok=True)
594
+
595
+ if self.ate_results:
596
+ ate_df = pd.DataFrame(self.ate_results)
597
+ ate_df.to_csv(
598
+ output_path / "extract_mitre_techniques_results.csv", index=False
599
+ )
600
+
601
+ ate_summary = self.get_ate_summary()
602
+ with open(output_path / "extract_mitre_techniques_summary.json", "w") as f:
603
+ json.dump(ate_summary, f, indent=2)
604
+
605
+ print(f"ATE results saved to {output_path}")
606
+
607
+ if self.taa_results:
608
+ taa_df = pd.DataFrame(self.taa_results)
609
+ taa_df.to_csv(
610
+ output_path / "identify_threat_actors_results.csv", index=False
611
+ )
612
+
613
+ taa_summary = self.get_taa_summary()
614
+ with open(output_path / "identify_threat_actors_summary.json", "w") as f:
615
+ json.dump(taa_summary, f, indent=2)
616
+
617
+ print(f"TAA results saved to {output_path}")
618
+
619
+ def print_summary(self):
620
+ """Print summary of both tool evaluations."""
621
+ print("\n" + "=" * 80)
622
+ print("extract_mitre_techniques Tool Evaluation (CTI-ATE)")
623
+ print("=" * 80)
624
+
625
+ ate_summary = self.get_ate_summary()
626
+ if ate_summary:
627
+ print(f"Total Samples: {ate_summary['total_samples']}")
628
+ print(f"\nMacro Averages (per-sample average):")
629
+ print(f" Precision: {ate_summary['macro_precision']:.4f}")
630
+ print(f" Recall: {ate_summary['macro_recall']:.4f}")
631
+ print(f" F1 Score: {ate_summary['macro_f1']:.4f}")
632
+ print(f"\nMicro Averages (overall corpus):")
633
+ print(f" Precision: {ate_summary['micro_precision']:.4f}")
634
+ print(f" Recall: {ate_summary['micro_recall']:.4f}")
635
+ print(f" F1 Score: {ate_summary['micro_f1']:.4f}")
636
+ print(f"\nConfusion Matrix:")
637
+ print(f" True Positives: {ate_summary['total_tp']}")
638
+ print(f" False Positives: {ate_summary['total_fp']}")
639
+ print(f" False Negatives: {ate_summary['total_fn']}")
640
+ else:
641
+ print("No results available.")
642
+
643
+ print("\n" + "=" * 80)
644
+ print("identify_threat_actors Tool Evaluation (CTI-TAA)")
645
+ print("=" * 80)
646
+
647
+ taa_summary = self.get_taa_summary()
648
+ if taa_summary:
649
+ print(f"Total Samples: {taa_summary['total_samples']}")
650
+ print(
651
+ f"Accuracy: {taa_summary['accuracy']:.4f} ({taa_summary['accuracy']*100:.2f}%)"
652
+ )
653
+ print(f"Correct: {taa_summary['correct']}")
654
+ print(f"Incorrect: {taa_summary['incorrect']}")
655
+ else:
656
+ print("No results available.")
657
+
658
+ print("=" * 80 + "\n")
659
+
660
+
661
+ # ==================== Main Evaluation Script ====================
662
+
663
+ if __name__ == "__main__":
664
+ """Run evaluation on both CTI tools."""
665
+
666
+ # Initialize evaluator
667
+ print("Initializing CTI Tools Evaluator...")
668
+ evaluator = CTIToolsEvaluator()
669
+
670
+ # Define threat actor aliases for TAA evaluation
671
+ aliases = {
672
+ "apt29": ["cozy bear", "the dukes", "nobelium", "yttrium"],
673
+ "apt36": ["transparent tribe", "mythic leopard"],
674
+ "sidecopy": [],
675
+ "emotet": [],
676
+ "stately taurus": ["mustang panda", "bronze president"],
677
+ "bandook": [],
678
+ }
679
+
680
+ # Evaluate extract_mitre_techniques tool (CTI-ATE)
681
+ print("\n" + "=" * 80)
682
+ print("PART 1: Evaluating extract_mitre_techniques tool")
683
+ print("=" * 80)
684
+ try:
685
+ ate_results = evaluator.evaluate_ate_from_tsv(
686
+ filepath="cti-bench/data/cti-ate.tsv"
687
+ )
688
+ except Exception as e:
689
+ print(f"Error evaluating ATE: {e}")
690
+
691
+ # Evaluate identify_threat_actors tool (CTI-TAA)
692
+ print("\n" + "=" * 80)
693
+ print("PART 2: Evaluating identify_threat_actors tool")
694
+ print("=" * 80)
695
+ try:
696
+ taa_results = evaluator.evaluate_taa_from_tsv(
697
+ filepath="cti-bench/data/cti-taa.tsv", limit=25, interactive=True
698
+ )
699
+ except Exception as e:
700
+ print(f"Error evaluating TAA: {e}")
701
+
702
+ # Print summary
703
+ evaluator.print_summary()
704
+
705
+ # Export results
706
+ evaluator.export_results("./tool_evaluation_results")
707
+
708
+ print("\nEvaluation complete! Results saved to ./tool_evaluation_results/")
src/agents/cti_agent/cti_agent.py ADDED
@@ -0,0 +1,920 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import re
3
+ import time
4
+ from typing import List, Dict, Any, Optional, Sequence, Annotated
5
+ from typing_extensions import TypedDict
6
+
7
+ from langchain.chat_models import init_chat_model
8
+ from langchain_core.prompts import ChatPromptTemplate
9
+ from langchain_tavily import TavilySearch
10
+ from langgraph.graph import END, StateGraph, START
11
+ from langgraph.graph.message import add_messages
12
+ from langchain_core.messages import BaseMessage, HumanMessage, AIMessage
13
+ # from langsmith.integrations.otel import configure
14
+ from langsmith import traceable, Client, get_current_run_tree
15
+ from dotenv import load_dotenv
16
+
17
+ from src.agents.cti_agent.config import (
18
+ MODEL_NAME,
19
+ CTI_SEARCH_CONFIG,
20
+ CTI_PLANNER_PROMPT,
21
+ CTI_REGEX_PATTERN,
22
+ REPLAN_PROMPT,
23
+ )
24
+ from src.agents.cti_agent.cti_tools import CTITools
25
+
26
+ load_dotenv()
27
+
28
+ # configure(
29
+ # project_name=os.getenv("LANGSMITH_PROJECT", "cti-agent-project"),
30
+ # api_key=os.getenv("LANGSMITH_API_KEY")
31
+ # )
32
+
33
+ ls_client = Client(api_key=os.getenv("LANGSMITH_API_KEY"))
34
+
35
+ class CTIState(TypedDict):
36
+ """State definition for CTI agent for ReWOO planning."""
37
+
38
+ task: str
39
+ plan_string: str
40
+ steps: List
41
+ results: dict
42
+ structured_intelligence: dict
43
+ result: str
44
+ replans: int # Track number of replans
45
+ last_step_quality: str # "correct", "ambiguous", or "incorrect"
46
+ correction_reason: str # Why we need to replan
47
+
48
+
49
+ # Messages-based state for supervisor compatibility
50
+ class CTIMessagesState(TypedDict):
51
+ messages: Annotated[Sequence[BaseMessage], add_messages]
52
+
53
+
54
+ class CTIAgent:
55
+ """CTI Agent with specialized threat intelligence tools."""
56
+
57
+ def __init__(self):
58
+ """Initialize the CTI Agent with LLM and tools."""
59
+ self.llm = init_chat_model(
60
+ MODEL_NAME,
61
+ temperature=0.1,
62
+ )
63
+
64
+ # Initialize specialized search for CTI
65
+ search_config = {**CTI_SEARCH_CONFIG, "api_key": os.getenv("TAVILY_API_KEY")}
66
+ self.cti_search = TavilySearch(**search_config)
67
+
68
+ # Initialize CTI tools
69
+ self.cti_tools = CTITools(self.llm, self.cti_search)
70
+
71
+ # Create the planner
72
+ prompt_template = ChatPromptTemplate.from_messages(
73
+ [("user", CTI_PLANNER_PROMPT)]
74
+ )
75
+ self.planner = prompt_template | self.llm
76
+
77
+ # Build the internal CTI graph (task-based)
78
+ self.app = self._build_graph()
79
+
80
+ # Build a messages-based wrapper graph for supervisor compatibility
81
+ self.agent = self._build_messages_graph()
82
+
83
+ @traceable(name="cti_planner")
84
+ def _get_plan(self, state: CTIState) -> Dict[str, Any]:
85
+ """
86
+ Planner node: Creates a step-by-step CTI research plan.
87
+
88
+ Args:
89
+ state: Current state containing the task
90
+
91
+ Returns:
92
+ Dictionary with extracted steps and plan string
93
+ """
94
+ task = state["task"]
95
+ result = self.planner.invoke({"task": task})
96
+ result_text = result.content if hasattr(result, "content") else str(result)
97
+ matches = re.findall(CTI_REGEX_PATTERN, result_text)
98
+ return {"steps": matches, "plan_string": result_text}
99
+
100
+ def _get_current_task(self, state: CTIState) -> Optional[int]:
101
+ """
102
+ Get the current task number to execute.
103
+
104
+ Args:
105
+ state: Current state
106
+
107
+ Returns:
108
+ Task number (1-indexed) or None if all tasks completed
109
+ """
110
+ if "results" not in state or state["results"] is None:
111
+ return 1
112
+ if len(state["results"]) == len(state["steps"]):
113
+ return None
114
+ else:
115
+ return len(state["results"]) + 1
116
+
117
+ def _log_tool_metrics(self, tool_name: str, execution_time: float, success: bool, result_quality: str = None):
118
+ """Log custom metrics to LangSmith."""
119
+ try:
120
+
121
+ current_run = get_current_run_tree()
122
+ if current_run:
123
+ ls_client.create_feedback(
124
+ run_id=current_run.id,
125
+ key="tool_performance",
126
+ score=1.0 if success else 0.0,
127
+ value={
128
+ "tool": tool_name,
129
+ "execution_time": execution_time,
130
+ "success": success,
131
+ "quality": result_quality
132
+ }
133
+ )
134
+ else:
135
+ # Log as project-level feedback if no active run
136
+ ls_client.create_feedback(
137
+ project_id=os.getenv("LANGSMITH_PROJECT", "cti-agent-project"),
138
+ key="tool_performance",
139
+ score=1.0 if success else 0.0,
140
+ value={
141
+ "tool": tool_name,
142
+ "execution_time": execution_time,
143
+ "success": success,
144
+ "quality": result_quality
145
+ }
146
+ )
147
+ except Exception as e:
148
+ print(f"Failed to log metrics: {e}")
149
+
150
+
151
+ @traceable(name="cti_tool_execution")
152
+ def _tool_execution(self, state: CTIState) -> Dict[str, Any]:
153
+ """
154
+ Executor node: Executes the specialized CTI tools for the current step.
155
+
156
+ Args:
157
+ state: Current state
158
+
159
+ Returns:
160
+ Dictionary with updated results
161
+ """
162
+ _step = self._get_current_task(state)
163
+ _, step_name, tool, tool_input = state["steps"][_step - 1]
164
+
165
+ _results = (state["results"].copy() or {}) if "results" in state else {}
166
+
167
+ # Replace variables in tool input
168
+ original_tool_input = tool_input
169
+ for k, v in _results.items():
170
+ tool_input = tool_input.replace(k, str(v))
171
+
172
+ start_time = time.time()
173
+ success = False
174
+
175
+ # Execute the appropriate specialized tool
176
+ try:
177
+ if tool == "SearchCTIReports":
178
+ result = self.cti_tools.search_cti_reports(tool_input)
179
+ elif tool == "ExtractURL":
180
+ if "," in original_tool_input:
181
+ parts = original_tool_input.split(",", 1)
182
+ search_result_ref = parts[0].strip()
183
+ index_part = parts[1].strip()
184
+ else:
185
+ search_result_ref = original_tool_input.strip()
186
+ index_part = "0"
187
+
188
+ # Extract index from index_part
189
+ index = 0
190
+ if "second" in index_part.lower():
191
+ index = 1
192
+ elif "third" in index_part.lower():
193
+ index = 2
194
+ elif index_part.isdigit():
195
+ index = int(index_part)
196
+ elif "1" in index_part:
197
+ index = 1
198
+
199
+ # Get the actual search result from previous results
200
+ if search_result_ref in _results:
201
+ search_result = _results[search_result_ref]
202
+ result = self.cti_tools.extract_url_from_search(
203
+ search_result, index
204
+ )
205
+ else:
206
+ result = f"Error: Could not find search result {search_result_ref} in previous results. Available keys: {list(_results.keys())}"
207
+ elif tool == "FetchReport":
208
+ result = self.cti_tools.fetch_report(tool_input)
209
+ elif tool == "ExtractIOCs":
210
+ result = self.cti_tools.extract_iocs(tool_input)
211
+ elif tool == "IdentifyThreatActors":
212
+ result = self.cti_tools.identify_threat_actors(tool_input)
213
+ elif tool == "ExtractMITRETechniques":
214
+ # Parse framework parameter if provided
215
+ if "," in original_tool_input:
216
+ parts = original_tool_input.split(",", 1)
217
+ content_ref = parts[0].strip()
218
+ framework = parts[1].strip()
219
+ else:
220
+ content_ref = original_tool_input.strip()
221
+ framework = "Enterprise" # Default framework
222
+
223
+ # Get content from previous results or use directly
224
+ if content_ref in _results:
225
+ content = _results[content_ref]
226
+ else:
227
+ content = tool_input
228
+
229
+ result = self.cti_tools.extract_mitre_techniques(content, framework)
230
+ elif tool == "LLM":
231
+ llm_result = self.llm.invoke(tool_input)
232
+ result = (
233
+ llm_result.content
234
+ if hasattr(llm_result, "content")
235
+ else str(llm_result)
236
+ )
237
+ else:
238
+ result = f"Unknown tool: {tool}"
239
+ except Exception as e:
240
+ result = f"Error executing {tool}: {str(e)}"
241
+
242
+ _results[step_name] = str(result)
243
+
244
+ success = True
245
+ execution_time = time.time() - start_time
246
+
247
+ # Log metrics
248
+ self._log_tool_metrics(tool, execution_time, success)
249
+
250
+ return {"results": _results}
251
+
252
+ @traceable(name="cti_solver")
253
+ def _solve(self, state: CTIState) -> Dict[str, str]:
254
+ """
255
+ Solver node: Synthesizes the CTI findings into a comprehensive report.
256
+
257
+ Args:
258
+ state: Current state with all execution results
259
+
260
+ Returns:
261
+ Dictionary with the final CTI intelligence report
262
+ """
263
+ # Build comprehensive context with FULL results
264
+ plan = ""
265
+ full_results_context = "\n\n" + "=" * 80 + "\n"
266
+ full_results_context += "COMPLETE EXECUTION RESULTS FOR ANALYSIS:\n"
267
+ full_results_context += "=" * 80 + "\n\n"
268
+
269
+ _results = state.get("results", {}) or {}
270
+
271
+ for idx, (plan_desc, step_name, tool, tool_input) in enumerate(
272
+ state["steps"], 1
273
+ ):
274
+ # Replace variable references in inputs for display
275
+ display_input = tool_input
276
+ for k, v in _results.items():
277
+ display_input = display_input.replace(k, f"<{k}>")
278
+
279
+ # Build the plan summary (truncated for readability)
280
+ plan += f"\nStep {idx}: {plan_desc}\n"
281
+ plan += f"{step_name} = {tool}[{display_input}]\n"
282
+
283
+ # Add result summary to plan (truncated)
284
+ if step_name in _results:
285
+ result_preview = str(_results[step_name])[:800]
286
+ plan += f"Result Preview: {result_preview}...\n"
287
+ else:
288
+ plan += "Result: Not executed\n"
289
+
290
+ # Add FULL result to separate context section
291
+ if step_name in _results:
292
+ full_results_context += f"\n{'─'*80}\n"
293
+ full_results_context += f"STEP {idx}: {step_name} ({tool})\n"
294
+ full_results_context += f"{'─'*80}\n"
295
+ full_results_context += f"INPUT: {display_input}\n\n"
296
+ full_results_context += f"FULL OUTPUT:\n{_results[step_name]}\n"
297
+
298
+ # Create solver prompt with full context
299
+ prompt = f"""You are a Cyber Threat Intelligence analyst creating a final report.
300
+
301
+ You have access to COMPLETE results from all CTI research steps below.
302
+
303
+ IMPORTANT:
304
+ - Use the FULL EXECUTION RESULTS section below - it contains complete, untruncated data
305
+ - Extract ALL specific IOCs, technique IDs, and actor details from the full results
306
+ - Do not say "Report contains X IOCs" - actually LIST them from the results
307
+ - If results contain structured data (JSON), parse and present it clearly
308
+
309
+ {full_results_context}
310
+
311
+ {'='*80}
312
+ RESEARCH PLAN SUMMARY:
313
+ {'='*80}
314
+ {plan}
315
+
316
+ {'='*80}
317
+ ORIGINAL TASK: {state['task']}
318
+ {'='*80}
319
+
320
+ Now create a comprehensive threat intelligence report following this structure:
321
+
322
+ ## Intelligence Sources
323
+ [List the specific reports analyzed with title, source, and date]
324
+
325
+ ## Threat Actors & Attribution
326
+ [Present actual threat actor names, aliases, and campaign names found]
327
+ [Include specific attribution details and confidence levels]
328
+
329
+ ## MITRE ATT&CK Techniques Identified
330
+ [List specific technique IDs (T####) and names found in the reports]
331
+ [Provide brief description of what each technique means and why it's relevant]
332
+
333
+ ## Indicators of Compromise (IOCs) Retrieved
334
+ [Present actual IOCs extracted from reports - be specific and comprehensive]
335
+
336
+ ### IP Addresses
337
+ [List all IPs found, or state "None identified"]
338
+
339
+ ### Domains
340
+ [List all domains found, or state "None identified"]
341
+
342
+ ### File Hashes
343
+ [List all hashes with types, or state "None identified"]
344
+
345
+ ### URLs
346
+ [List all malicious URLs, or state "None identified"]
347
+
348
+ ### Email Addresses
349
+ [List all email patterns, or state "None identified"]
350
+
351
+ ### File Names
352
+ [List all malicious file names, or state "None identified"]
353
+
354
+ ### Other Indicators
355
+ [List any other indicators like registry keys, mutexes, etc.]
356
+
357
+ ## Attack Patterns & Campaign Details
358
+ [Describe specific attack flows and methods detailed in reports]
359
+ [Include timeline information if available]
360
+ [Note targeting information - industries, regions, etc.]
361
+
362
+ ## Key Findings Summary
363
+ [Provide 3-5 bullet points of the most critical findings]
364
+
365
+ ## Intelligence Gaps
366
+ [Note what information was NOT available in the reports]
367
+
368
+ ---
369
+
370
+ **CRITICAL INSTRUCTIONS:**
371
+ 1. Extract data from the FULL EXECUTION RESULTS section above
372
+ 2. If ExtractIOCs results are in JSON format, parse and list all IOCs
373
+ 3. If IdentifyThreatActors results contain Q&A format, extract all answers
374
+ 4. If ExtractMITRETechniques results contain technique IDs, list ALL of them
375
+ 5. Be comprehensive - don't summarize when you have specific data
376
+ 6. If you cannot find specific data in results, clearly state what's missing
377
+ """
378
+
379
+ # Invoke LLM with context
380
+ result = self.llm.invoke(prompt)
381
+ result_text = result.content if hasattr(result, "content") else str(result)
382
+
383
+ return {"result": result_text}
384
+
385
+ # Helper method to better structure results
386
+ def _structure_results_for_solver(self, state: CTIState) -> str:
387
+ """
388
+ Helper method to structure results in a more accessible format for the solver.
389
+
390
+ Returns:
391
+ Formatted string with categorized results
392
+ """
393
+ _results = state.get("results", {}) or {}
394
+
395
+ structured = {
396
+ "searches": [],
397
+ "reports": [],
398
+ "iocs": [],
399
+ "actors": [],
400
+ "techniques": [],
401
+ }
402
+
403
+ # Categorize results by tool type
404
+ for step_name, result in _results.items():
405
+ # Find which tool produced this result
406
+ for _, sname, tool, _ in state["steps"]:
407
+ if sname == step_name:
408
+ if tool == "SearchCTIReports":
409
+ structured["searches"].append(
410
+ {"step": step_name, "result": result}
411
+ )
412
+ elif tool == "FetchReport":
413
+ structured["reports"].append(
414
+ {"step": step_name, "result": result}
415
+ )
416
+ elif tool == "ExtractIOCs":
417
+ structured["iocs"].append({"step": step_name, "result": result})
418
+ elif tool == "IdentifyThreatActors":
419
+ structured["actors"].append(
420
+ {"step": step_name, "result": result}
421
+ )
422
+ elif tool == "ExtractMITRETechniques":
423
+ structured["techniques"].append(
424
+ {"step": step_name, "result": result}
425
+ )
426
+ break
427
+
428
+ # Format into readable sections
429
+ output = []
430
+
431
+ if structured["iocs"]:
432
+ output.append("\n" + "=" * 80)
433
+ output.append("EXTRACTED IOCs (Indicators of Compromise):")
434
+ output.append("=" * 80)
435
+ for item in structured["iocs"]:
436
+ output.append(f"\nFrom {item['step']}:")
437
+ output.append(str(item["result"]))
438
+
439
+ if structured["actors"]:
440
+ output.append("\n" + "=" * 80)
441
+ output.append("IDENTIFIED THREAT ACTORS:")
442
+ output.append("=" * 80)
443
+ for item in structured["actors"]:
444
+ output.append(f"\nFrom {item['step']}:")
445
+ output.append(str(item["result"]))
446
+
447
+ if structured["techniques"]:
448
+ output.append("\n" + "=" * 80)
449
+ output.append("EXTRACTED MITRE ATT&CK TECHNIQUES:")
450
+ output.append("=" * 80)
451
+ for item in structured["techniques"]:
452
+ output.append(f"\nFrom {item['step']}:")
453
+ output.append(str(item["result"]))
454
+
455
+ if structured["reports"]:
456
+ output.append("\n" + "=" * 80)
457
+ output.append("FETCHED REPORTS (for context):")
458
+ output.append("=" * 80)
459
+ for item in structured["reports"]:
460
+ output.append(f"\nFrom {item['step']}:")
461
+ # Truncate report content but keep IOC sections visible
462
+ report_text = str(item["result"])
463
+ output.append(
464
+ report_text[:2000] + "..."
465
+ if len(report_text) > 2000
466
+ else report_text
467
+ )
468
+
469
+ return "\n".join(output)
470
+
471
+ def _route(self, state: CTIState) -> str:
472
+ """
473
+ Routing function to determine next node.
474
+
475
+ Args:
476
+ state: Current state
477
+
478
+ Returns:
479
+ Next node name: "solve" or "tool"
480
+ """
481
+ _step = self._get_current_task(state)
482
+ if _step is None:
483
+ return "solve"
484
+ else:
485
+ return "tool"
486
+
487
+ @traceable(name="cti_evaluator")
488
+ def _evaluate_result(self, state: CTIState) -> Dict[str, Any]:
489
+ """
490
+ Evaluator node: Assesses quality of the last tool execution result.
491
+
492
+ Returns:
493
+ Dictionary with quality assessment and correction needs
494
+ """
495
+ _step = len(state.get("results", {}))
496
+ if _step == 0:
497
+ return {"last_step_quality": "correct"}
498
+
499
+ current_step = state["steps"][_step - 1]
500
+ _, step_name, tool, tool_input = current_step
501
+ result = state["results"][step_name]
502
+
503
+ # Evaluation prompt
504
+ eval_prompt = f"""Evaluate if this CTI tool execution retrieved ACTUAL threat intelligence:
505
+
506
+ Tool: {tool}
507
+ Input: {tool_input}
508
+ Result: {result[:1000]}
509
+
510
+ Quality Criteria for Web Search:
511
+ - CORRECT: Retrieved specific IOCs, technique IDs, actor names. A website that doesn't have the name of the actor or IOCs is not sufficient.
512
+ - AMBIGUOUS: Retrieved general security content but lacks specific CTI details
513
+ - INCORRECT: Retrieved irrelevant content, errors, or marketing material
514
+
515
+ Quality Criteria for MITER Extraction:
516
+ - CORRECT: Extracted valid MITRE ATT&CK technique IDs (e.g., T1234) or tactics (e.g., Initial Access)
517
+ - AMBIGUOUS: Extracted general security terms but no valid technique IDs or tactics
518
+ - INCORRECT: Extracted irrelevant content or no valid techniques/tactics
519
+
520
+ Respond with ONLY one word: CORRECT, AMBIGUOUS, or INCORRECT
521
+
522
+ If AMBIGUOUS or INCORRECT, also provide a brief reason (1 sentence).
523
+ Format: QUALITY: [reason if needed]"""
524
+
525
+ eval_result = self.llm.invoke(eval_prompt)
526
+ eval_text = (
527
+ eval_result.content if hasattr(eval_result, "content") else str(eval_result)
528
+ )
529
+
530
+ # Parse evaluation
531
+ quality = "correct"
532
+ reason = ""
533
+
534
+ if "INCORRECT" in eval_text.upper():
535
+ quality = "incorrect"
536
+ reason = eval_text.split("INCORRECT:")[-1].strip()[:200]
537
+ elif "AMBIGUOUS" in eval_text.upper():
538
+ quality = "ambiguous"
539
+ reason = eval_text.split("AMBIGUOUS:")[-1].strip()[:200]
540
+
541
+ return {"last_step_quality": quality, "correction_reason": reason}
542
+
543
+ def _replan(self, state: CTIState) -> Dict[str, Any]:
544
+ """
545
+ Replanner node: Creates corrected plan when results are inadequate.
546
+ """
547
+ replans = state.get("replans", 0)
548
+
549
+ # Limit replanning attempts
550
+ if replans >= 3:
551
+ return {"replans": replans, "replan_status": "max_attempts_reached"}
552
+
553
+ _step = len(state.get("results", {}))
554
+ failed_step = state["steps"][_step - 1]
555
+ _, step_name, tool, tool_input = failed_step
556
+
557
+ # Store replan context for display
558
+ replan_context = {
559
+ "failed_step_number": _step,
560
+ "failed_tool": tool,
561
+ "failed_input": tool_input[:100],
562
+ "problem": state.get("correction_reason", "Quality issues"),
563
+ "original_plan": failed_step[0],
564
+ }
565
+
566
+ replan_prompt = REPLAN_PROMPT.format(
567
+ task=state["task"],
568
+ failed_step=failed_step[0],
569
+ step_name=step_name,
570
+ tool=tool,
571
+ tool_input=tool_input,
572
+ results=state["results"][step_name][:500],
573
+ problem=state["correction_reason"],
574
+ completed_steps=self._format_completed_steps(state),
575
+ step=_step,
576
+ )
577
+
578
+ replan_result = self.llm.invoke(replan_prompt)
579
+ replan_text = (
580
+ replan_result.content
581
+ if hasattr(replan_result, "content")
582
+ else str(replan_result)
583
+ )
584
+
585
+ # Store the replan thinking for display
586
+ replan_context["replan_thinking"] = (
587
+ replan_text[:500] + "..." if len(replan_text) > 500 else replan_text
588
+ )
589
+
590
+ # Parse new step
591
+ import re
592
+
593
+ matches = re.findall(CTI_REGEX_PATTERN, replan_text)
594
+
595
+ if matches:
596
+ new_plan, new_step_name, new_tool, new_tool_input = matches[0]
597
+
598
+ # Store the correction details
599
+ replan_context["corrected_plan"] = new_plan
600
+ replan_context["corrected_tool"] = new_tool
601
+ replan_context["corrected_input"] = new_tool_input[:100]
602
+ replan_context["success"] = True
603
+
604
+ # Replace the failed step with corrected version
605
+ new_steps = state["steps"].copy()
606
+ new_steps[_step - 1] = matches[0]
607
+
608
+ # Remove the failed result so it gets re-executed
609
+ new_results = state["results"].copy()
610
+ del new_results[step_name]
611
+
612
+ return {
613
+ "steps": new_steps,
614
+ "results": new_results,
615
+ "replans": replans + 1,
616
+ "replan_context": replan_context,
617
+ }
618
+ else:
619
+ replan_context["success"] = False
620
+ replan_context["error"] = "Failed to parse corrected plan"
621
+
622
+ return {"replans": replans + 1, "replan_context": replan_context}
623
+
624
+ def _format_completed_steps(self, state: CTIState) -> str:
625
+ """Helper to format completed steps for replanning context."""
626
+ output = []
627
+ for step in state["steps"][: len(state.get("results", {}))]:
628
+ plan, step_name, tool, tool_input = step
629
+ if step_name in state["results"]:
630
+ output.append(f"{step_name} = {tool}[{tool_input}] ✓")
631
+ return "\n".join(output)
632
+
633
+ def _route_after_tool(self, state: CTIState) -> str:
634
+ """Route to evaluator only after specific tools that retrieve external content."""
635
+ _step = len(state.get("results", {}))
636
+ if _step == 0:
637
+ return "evaluate"
638
+
639
+ current_step = state["steps"][_step - 1]
640
+ _, step_name, tool, tool_input = current_step
641
+
642
+ tools_to_evaluate = ["SearchCTIReports", "ExtractMITRETechniques"]
643
+
644
+ if tool in tools_to_evaluate:
645
+ return "evaluate"
646
+ else:
647
+ # Skip evaluation for extraction/analysis tools
648
+ _next_step = self._get_current_task(state)
649
+ if _next_step is None:
650
+ return "solve"
651
+ else:
652
+ return "tool"
653
+
654
+ def _route_after_eval(self, state: CTIState) -> str:
655
+ """Route based on evaluation: replan, continue, or solve."""
656
+ quality = state.get("last_step_quality", "correct")
657
+
658
+ # Check if all steps are complete
659
+ _step = self._get_current_task(state)
660
+
661
+ if quality in ["ambiguous", "incorrect"]:
662
+ # Need to replan this step
663
+ return "replan"
664
+ elif _step is None:
665
+ # All steps complete and quality is good
666
+ return "solve"
667
+ else:
668
+ # Continue to next tool
669
+ return "tool"
670
+
671
+ def _build_graph(self) -> StateGraph:
672
+ """Build graph with corrective feedback loop."""
673
+ graph = StateGraph(CTIState)
674
+
675
+ # Add nodes
676
+ graph.add_node("plan", self._get_plan)
677
+ graph.add_node("tool", self._tool_execution)
678
+ graph.add_node("evaluate", self._evaluate_result)
679
+ graph.add_node("replan", self._replan)
680
+ graph.add_node("solve", self._solve)
681
+
682
+ # Add edges
683
+ graph.add_edge(START, "plan")
684
+ graph.add_edge("plan", "tool")
685
+ graph.add_edge("replan", "tool")
686
+ graph.add_edge("solve", END)
687
+
688
+ # Conditional routing
689
+ graph.add_conditional_edges("tool", self._route_after_tool)
690
+ graph.add_conditional_edges("evaluate", self._route_after_eval)
691
+
692
+ return graph.compile(name="cti_agent")
693
+
694
+ # --- Messages-based wrapper for supervisor ---
695
+ def _messages_node(self, state: CTIMessagesState) -> Dict[str, List[AIMessage]]:
696
+ """Adapter node: take messages input, run CTI pipeline, return AI message.
697
+
698
+ This allows the CTI agent to plug into a messages-based supervisor.
699
+ """
700
+ # Find the latest human message content as the task
701
+ task_text = None
702
+ for msg in reversed(state.get("messages", [])):
703
+ if isinstance(msg, HumanMessage):
704
+ task_text = msg.content
705
+ break
706
+ if not task_text and state.get("messages"):
707
+ # Fallback: use the last message content
708
+ task_text = state["messages"][-1].content
709
+ if not task_text:
710
+ task_text = "Provide cyber threat intelligence based on the context."
711
+
712
+ # Run the internal CTI graph and extract final report text
713
+ final_chunk = None
714
+ for chunk in self.app.stream({"task": task_text}):
715
+ final_chunk = chunk
716
+
717
+ content = ""
718
+ if isinstance(final_chunk, dict):
719
+ solve_part = final_chunk.get("solve", {}) if final_chunk else {}
720
+ content = solve_part.get("result", "") if isinstance(solve_part, dict) else ""
721
+ if not content:
722
+ # As a fallback, try a direct invoke to get final aggregated state
723
+ try:
724
+ agg_state = self.app.invoke({"task": task_text})
725
+ if isinstance(agg_state, dict):
726
+ content = agg_state.get("result", "") or ""
727
+ except Exception:
728
+ pass
729
+ if not content:
730
+ content = "CTI agent completed, but no final report was produced."
731
+
732
+ return {"messages": [AIMessage(content=content, name="cti_agent")]}
733
+
734
+ def _build_messages_graph(self):
735
+ """Build a minimal messages-based wrapper graph for supervisor usage."""
736
+ graph = StateGraph(CTIMessagesState)
737
+ graph.add_node("cti_adapter", self._messages_node)
738
+ graph.add_edge(START, "cti_adapter")
739
+ graph.add_edge("cti_adapter", END)
740
+ return graph.compile(name="cti_agent")
741
+
742
+ @traceable(name="cti_agent_full_run")
743
+ def run(self, task: str) -> Dict[str, Any]:
744
+ """
745
+ Run the CTI agent on a given task.
746
+
747
+ Args:
748
+ task: The CTI research task/question to solve
749
+
750
+ Returns:
751
+ Final state after execution with comprehensive threat intelligence
752
+ """
753
+ run_metadata = {
754
+ "task": task,
755
+ "agent_version": "1.0",
756
+ "timestamp": time.time()
757
+ }
758
+
759
+ try:
760
+ final_state = None
761
+ for state in self.app.stream({"task": task}):
762
+ final_state = state
763
+
764
+ # Log successful completion
765
+ ls_client.create_feedback(
766
+ run_id=None,
767
+ key="run_completion",
768
+ score=1.0,
769
+ value={"status": "completed", "final_result_length": len(str(final_state))}
770
+ )
771
+
772
+ return final_state
773
+
774
+ except Exception as e:
775
+ # Log failure
776
+ ls_client.create_feedback(
777
+ run_id=None,
778
+ key="run_completion",
779
+ score=0.0,
780
+ value={"status": "failed", "error": str(e)}
781
+ )
782
+ raise
783
+
784
+ def stream(self, task: str):
785
+ """
786
+ Stream the CTI agent execution for a given task.
787
+
788
+ Args:
789
+ task: The CTI research task/question to solve
790
+
791
+ Yields:
792
+ State updates during execution
793
+ """
794
+ for state in self.app.stream({"task": task}):
795
+ yield state
796
+
797
+
798
+ def format_cti_output(state: Dict[str, Any]) -> str:
799
+ """Format the CTI agent output for better readability."""
800
+ output = []
801
+
802
+ for node_name, node_data in state.items():
803
+ output.append(f"\n **{node_name.upper()} PHASE**")
804
+ output.append("-" * 80)
805
+
806
+ if node_name == "plan":
807
+ if "plan_string" in node_data:
808
+ output.append("\n**Research Plan:**")
809
+ output.append(node_data["plan_string"])
810
+
811
+ if "steps" in node_data and node_data["steps"]:
812
+ output.append("\n**Planned Steps:**")
813
+ for i, (plan, step_name, tool, tool_input) in enumerate(
814
+ node_data["steps"], 1
815
+ ):
816
+ output.append(f"\n Step {i}: {plan}")
817
+ output.append(f" {step_name} = {tool}[{tool_input[:100]}...]")
818
+
819
+ elif node_name == "tool":
820
+ if "results" in node_data:
821
+ output.append("\n**Tool Execution Results:**")
822
+ for step_name, result in node_data["results"].items():
823
+ output.append(f"\n {step_name}:")
824
+ result_str = str(result)
825
+ output.append(f" {result_str}")
826
+
827
+ elif node_name == "evaluate":
828
+ # Show evaluation details
829
+ quality = node_data.get("last_step_quality", "unknown")
830
+ reason = node_data.get("correction_reason", "")
831
+
832
+ output.append(f"**Quality Assessment:** {quality.upper()}")
833
+
834
+ if reason:
835
+ output.append(f"**Reason:** {reason}")
836
+
837
+ # Determine next action based on quality
838
+ if quality in ["ambiguous", "incorrect"]:
839
+ output.append("**Decision:** Step needs correction - triggering replan")
840
+ elif quality == "correct":
841
+ output.append("**Decision:** Step quality acceptable - continuing")
842
+ else:
843
+ output.append(f"**Decision:** Quality assessment: {quality}")
844
+
845
+ elif node_name == "replan":
846
+ replans = node_data.get("replans", 0)
847
+ output.append(f"**Replan Attempt:** {replans}")
848
+
849
+ replan_context = node_data.get("replan_context", {})
850
+
851
+ if replans >= 3:
852
+ output.append("**Status:** Maximum replan attempts reached")
853
+ output.append("**Action:** Proceeding with current results")
854
+ elif replan_context:
855
+ # Show detailed replan thinking
856
+ output.append(
857
+ f"**Failed Step:** {replan_context.get('failed_step_number', 'Unknown')}"
858
+ )
859
+ output.append(
860
+ f"**Problem:** {replan_context.get('problem', 'Quality issues')}"
861
+ )
862
+ output.append(
863
+ f"**Original Tool:** {replan_context.get('failed_tool', 'Unknown')}[{replan_context.get('failed_input', '...')}]"
864
+ )
865
+
866
+ if "replan_thinking" in replan_context:
867
+ output.append(f"**Replan Analysis:**")
868
+ output.append(f" {replan_context['replan_thinking']}")
869
+
870
+ if replan_context.get("success", False):
871
+ output.append(
872
+ f"**Corrected Plan:** {replan_context.get('corrected_plan', 'Unknown')}"
873
+ )
874
+ output.append(
875
+ f"**New Tool:** {replan_context.get('corrected_tool', 'Unknown')}[{replan_context.get('corrected_input', '...')}]"
876
+ )
877
+ output.append("**Status:** Successfully generated improved plan")
878
+ output.append(
879
+ "**Action:** Step will be re-executed with new approach"
880
+ )
881
+ else:
882
+ output.append(
883
+ f"**Error:** {replan_context.get('error', 'Unknown error')}"
884
+ )
885
+ output.append("**Status:** Failed to generate valid corrected plan")
886
+ else:
887
+ output.append("**Status:** Generating improved plan...")
888
+ output.append("**Action:** Step will be re-executed with new approach")
889
+
890
+ elif node_name == "solve":
891
+ if "result" in node_data:
892
+ output.append("\n**FINAL THREAT INTELLIGENCE REPORT:**")
893
+ output.append("=" * 80)
894
+ output.append(node_data["result"])
895
+
896
+ output.append("")
897
+
898
+ return "\n".join(output)
899
+
900
+
901
+ if __name__ == "__main__":
902
+ # Example usage demonstrating the enhanced CTI capabilities
903
+ task = """Find comprehensive threat intelligence about recent ransomware attacks targeting healthcare organizations"""
904
+
905
+ print("\n" + "=" * 80)
906
+ print("CTI AGENT - STARTING ANALYSIS")
907
+ print("=" * 80)
908
+ print(f"\nTask: {task}\n")
909
+
910
+ # Initialize the agent
911
+ agent = CTIAgent()
912
+
913
+ # Stream the execution and display results
914
+ for state in agent.stream(task):
915
+ formatted_output = format_cti_output(state)
916
+ print(formatted_output)
917
+ print("\n" + "-" * 80 + "\n")
918
+
919
+ print("\nCTI ANALYSIS COMPLETED!")
920
+ print("=" * 80 + "\n")
src/agents/cti_agent/cti_tools.py ADDED
@@ -0,0 +1,263 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+
3
+ import requests
4
+ from langchain_tavily import TavilySearch
5
+ from langchain.chat_models import init_chat_model
6
+ from langsmith import traceable
7
+
8
+ from src.agents.cti_agent.config import (
9
+ IOC_EXTRACTION_PROMPT,
10
+ THREAT_ACTOR_PROMPT,
11
+ MITRE_EXTRACTION_PROMPT,
12
+ )
13
+
14
+
15
+ class CTITools:
16
+ """Collection of specialized tools for CTI analysis."""
17
+
18
+ def __init__(self, llm, search: TavilySearch):
19
+ """
20
+ Initialize CTI tools.
21
+
22
+ Args:
23
+ llm: Language model for analysis
24
+ search: Search tool for finding CTI reports
25
+ """
26
+ self.llm = llm
27
+ self.search = search
28
+
29
+ @traceable(name="cti_search_reports")
30
+ def search_cti_reports(self, query: str) -> str:
31
+ """
32
+ Specialized search for CTI reports with enhanced queries.
33
+
34
+ Args:
35
+ query: Search query for CTI reports
36
+
37
+ Returns:
38
+ JSON string with search results
39
+ """
40
+ try:
41
+ # Enhance query with CTI-specific terms if not already present
42
+ enhanced_query = query
43
+ if "report" not in query.lower() and "analysis" not in query.lower():
44
+ enhanced_query = f"{query} threat intelligence report"
45
+
46
+ results = self.search.invoke(enhanced_query)
47
+
48
+ # Format results for better parsing
49
+ formatted_results = {
50
+ "query": enhanced_query,
51
+ "found": len(results.get("results", [])),
52
+ "reports": [],
53
+ }
54
+
55
+ for idx, result in enumerate(results.get("results", [])[:5]):
56
+ formatted_results["reports"].append(
57
+ {
58
+ "index": idx + 1,
59
+ "title": result.get("title", "No title"),
60
+ "url": result.get("url", ""),
61
+ "snippet": result.get("content", "")[:500],
62
+ "score": result.get("score", 0),
63
+ }
64
+ )
65
+
66
+ return json.dumps(formatted_results, indent=2)
67
+ except Exception as e:
68
+ return json.dumps({"error": str(e), "query": query})
69
+
70
+ @traceable(name="cti_extract_url_from_search")
71
+ def extract_url_from_search(self, search_result: str, index: int = 0) -> str:
72
+ """
73
+ Extract a specific URL from search results JSON.
74
+
75
+ Args:
76
+ search_result: JSON string from SearchCTIReports
77
+ index: Which report URL to extract (default: 0 for first)
78
+
79
+ Returns:
80
+ Extracted URL string
81
+ """
82
+ try:
83
+ import json
84
+
85
+ data = json.loads(search_result)
86
+
87
+ if "reports" in data and len(data["reports"]) > index:
88
+ url = data["reports"][index]["url"]
89
+ return url
90
+
91
+ return "Error: No URL found at specified index in search results"
92
+ except Exception as e:
93
+ return f"Error extracting URL: {str(e)}"
94
+
95
+ @traceable(name="cti_fetch_report")
96
+ def fetch_report(self, url: str) -> str:
97
+ """Fetch with universal content cleaning."""
98
+ try:
99
+ import requests
100
+ from bs4 import BeautifulSoup
101
+ import PyPDF2
102
+ import io
103
+
104
+ headers = {
105
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
106
+ }
107
+
108
+ response = requests.get(url, headers=headers, timeout=30)
109
+ response.raise_for_status()
110
+
111
+ content_type = response.headers.get("content-type", "").lower()
112
+
113
+ # Handle PDF files
114
+ if "pdf" in content_type or url.lower().endswith(".pdf"):
115
+ try:
116
+ pdf_file = io.BytesIO(response.content)
117
+ pdf_reader = PyPDF2.PdfReader(pdf_file)
118
+
119
+ text_content = []
120
+ # Extract text from first 10 pages (to avoid excessive content)
121
+ max_pages = min(len(pdf_reader.pages), 10)
122
+
123
+ for page_num in range(max_pages):
124
+ page = pdf_reader.pages[page_num]
125
+ page_text = page.extract_text()
126
+ if page_text.strip():
127
+ text_content.append(page_text)
128
+
129
+ if text_content:
130
+ full_text = "\n\n".join(text_content)
131
+ # Clean and truncate the text
132
+ cleaned_text = self._clean_content(full_text)
133
+ return f"PDF Report Content from {url}:\n\n{cleaned_text[:3000]}..."
134
+ else:
135
+ return f"Could not extract readable text from PDF: {url}"
136
+
137
+ except Exception as pdf_error:
138
+ return f"Error processing PDF {url}: {str(pdf_error)}"
139
+
140
+ # Handle web pages
141
+ else:
142
+ soup = BeautifulSoup(response.content, "html.parser")
143
+
144
+ # Remove unwanted elements
145
+ for element in soup(
146
+ ["script", "style", "nav", "footer", "header", "aside"]
147
+ ):
148
+ element.decompose()
149
+
150
+ # Try to find main content areas
151
+ main_content = (
152
+ soup.find("main")
153
+ or soup.find("article")
154
+ or soup.find(
155
+ "div", class_=["content", "main-content", "post-content"]
156
+ )
157
+ or soup.find("body")
158
+ )
159
+
160
+ if main_content:
161
+ text = main_content.get_text(separator=" ", strip=True)
162
+ else:
163
+ text = soup.get_text(separator=" ", strip=True)
164
+
165
+ cleaned_text = self._clean_content(text)
166
+ return f"Report Content from {url}:\n\n{cleaned_text[:3000]}..."
167
+
168
+ except Exception as e:
169
+ return f"Error fetching report from {url}: {str(e)}"
170
+
171
+ def _clean_content(self, text: str) -> str:
172
+ """Clean and normalize text content."""
173
+ import re
174
+
175
+ # Remove excessive whitespace
176
+ text = re.sub(r"\s+", " ", text)
177
+
178
+ # Remove common navigation/UI text
179
+ noise_patterns = [
180
+ r"cookie policy.*?accept",
181
+ r"privacy policy",
182
+ r"terms of service",
183
+ r"subscribe.*?newsletter",
184
+ r"follow us on",
185
+ r"share this.*?social",
186
+ r"back to top",
187
+ r"skip to.*?content",
188
+ ]
189
+
190
+ for pattern in noise_patterns:
191
+ text = re.sub(pattern, "", text, flags=re.IGNORECASE)
192
+
193
+ # Clean up extra spaces again
194
+ text = re.sub(r"\s+", " ", text).strip()
195
+
196
+ return text
197
+
198
+ @traceable(name="cti_extract_iocs")
199
+ def extract_iocs(self, content: str) -> str:
200
+ """
201
+ Extract Indicators of Compromise from report content using LLM.
202
+
203
+ Args:
204
+ content: Report content to analyze
205
+
206
+ Returns:
207
+ Structured IOCs in JSON format
208
+ """
209
+ try:
210
+ prompt = IOC_EXTRACTION_PROMPT.format(content=content)
211
+ response = self.llm.invoke(prompt)
212
+ result_text = (
213
+ response.content if hasattr(response, "content") else str(response)
214
+ )
215
+ return result_text
216
+ except Exception as e:
217
+ return json.dumps({"error": str(e), "iocs": []})
218
+
219
+ @traceable(name="cti_identify_threat_actors")
220
+ def identify_threat_actors(self, content: str) -> str:
221
+ """
222
+ Identify threat actors, APT groups, and campaigns.
223
+
224
+ Args:
225
+ content: Report content to analyze
226
+
227
+ Returns:
228
+ Threat actor identification and attribution
229
+ """
230
+ try:
231
+ prompt = THREAT_ACTOR_PROMPT.format(content=content)
232
+ response = self.llm.invoke(prompt)
233
+ result_text = (
234
+ response.content if hasattr(response, "content") else str(response)
235
+ )
236
+ return result_text
237
+ except Exception as e:
238
+ return f"Error identifying threat actors: {str(e)}"
239
+
240
+ def extract_mitre_techniques(
241
+ self, content: str, framework: str = "Enterprise"
242
+ ) -> str:
243
+ """
244
+ Extract MITRE ATT&CK techniques from report content using LLM.
245
+
246
+ Args:
247
+ content: Report content to analyze
248
+ framework: MITRE framework (Enterprise, Mobile, ICS)
249
+
250
+ Returns:
251
+ Structured MITRE techniques in JSON format
252
+ """
253
+ try:
254
+ prompt = MITRE_EXTRACTION_PROMPT.format(
255
+ content=content, framework=framework
256
+ )
257
+ response = self.llm.invoke(prompt)
258
+ result_text = (
259
+ response.content if hasattr(response, "content") else str(response)
260
+ )
261
+ return result_text
262
+ except Exception as e:
263
+ return json.dumps({"error": str(e), "techniques": []})
src/agents/cti_agent/testing_cti_agent.ipynb ADDED
@@ -0,0 +1,573 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "metadata": {},
5
+ "cell_type": "markdown",
6
+ "source": "## CTI Agent",
7
+ "id": "1e014677902bc4a2"
8
+ },
9
+ {
10
+ "metadata": {},
11
+ "cell_type": "markdown",
12
+ "source": "## Set up",
13
+ "id": "57d21ad42c51b7bb"
14
+ },
15
+ {
16
+ "metadata": {
17
+ "ExecuteTime": {
18
+ "end_time": "2025-09-24T14:09:48.553649Z",
19
+ "start_time": "2025-09-24T14:09:40.747722Z"
20
+ }
21
+ },
22
+ "cell_type": "code",
23
+ "source": [
24
+ "%%capture --no-stderr\n",
25
+ "%pip install --quiet -U langgraph langchain-community langchain-google-genai langchain-tavily"
26
+ ],
27
+ "id": "64e62b8be724effb",
28
+ "outputs": [
29
+ {
30
+ "name": "stderr",
31
+ "output_type": "stream",
32
+ "text": [
33
+ "WARNING: Ignoring invalid distribution ~umpy (D:\\Swinburne University of Technology\\2025\\Swinburne Semester 2 2025\\COS30018 - Intelligent Systems\\Assignment\\Cyber-Agent\\.venv\\Lib\\site-packages)\n",
34
+ "WARNING: Ignoring invalid distribution ~umpy (D:\\Swinburne University of Technology\\2025\\Swinburne Semester 2 2025\\COS30018 - Intelligent Systems\\Assignment\\Cyber-Agent\\.venv\\Lib\\site-packages)\n",
35
+ "WARNING: Ignoring invalid distribution ~umpy (D:\\Swinburne University of Technology\\2025\\Swinburne Semester 2 2025\\COS30018 - Intelligent Systems\\Assignment\\Cyber-Agent\\.venv\\Lib\\site-packages)\n",
36
+ "\n",
37
+ "[notice] A new release of pip is available: 25.0.1 -> 25.2\n",
38
+ "[notice] To update, run: python.exe -m pip install --upgrade pip\n"
39
+ ]
40
+ }
41
+ ],
42
+ "execution_count": 1
43
+ },
44
+ {
45
+ "metadata": {
46
+ "ExecuteTime": {
47
+ "end_time": "2025-09-24T14:09:59.629541Z",
48
+ "start_time": "2025-09-24T14:09:49.858591Z"
49
+ }
50
+ },
51
+ "cell_type": "code",
52
+ "source": [
53
+ "import getpass\n",
54
+ "import os\n",
55
+ "\n",
56
+ "def set_env_variable(var_name):\n",
57
+ " if var_name not in os.environ:\n",
58
+ " os.environ[var_name] = getpass.getpass(f\"{var_name}=\")\n",
59
+ "\n",
60
+ "set_env_variable(\"GEMINI_API_KEY\")\n",
61
+ "set_env_variable(\"TAVILY_API_KEY\")"
62
+ ],
63
+ "id": "b9b8036f5182062b",
64
+ "outputs": [],
65
+ "execution_count": 2
66
+ },
67
+ {
68
+ "metadata": {},
69
+ "cell_type": "markdown",
70
+ "source": "### CTI Agent",
71
+ "id": "b7ccb1c1f41b189"
72
+ },
73
+ {
74
+ "metadata": {
75
+ "ExecuteTime": {
76
+ "end_time": "2025-09-24T14:10:00.191781Z",
77
+ "start_time": "2025-09-24T14:10:00.135222Z"
78
+ }
79
+ },
80
+ "cell_type": "code",
81
+ "source": [
82
+ "from typing import List\n",
83
+ "from typing_extensions import TypedDict\n",
84
+ "\n",
85
+ "class ReWOO(TypedDict):\n",
86
+ " task: str\n",
87
+ " plan_string: str\n",
88
+ " steps: List\n",
89
+ " results: dict\n",
90
+ " result: str"
91
+ ],
92
+ "id": "1ff523d16a86a18c",
93
+ "outputs": [],
94
+ "execution_count": 3
95
+ },
96
+ {
97
+ "metadata": {},
98
+ "cell_type": "markdown",
99
+ "source": "#### Planner",
100
+ "id": "62b86e7dd440db74"
101
+ },
102
+ {
103
+ "metadata": {
104
+ "ExecuteTime": {
105
+ "end_time": "2025-09-24T14:10:30.386536Z",
106
+ "start_time": "2025-09-24T14:10:00.376586Z"
107
+ }
108
+ },
109
+ "cell_type": "code",
110
+ "source": [
111
+ "from langchain_google_genai import GoogleGenerativeAI\n",
112
+ "\n",
113
+ "llm = GoogleGenerativeAI(model=\"gemini-2.5-flash\", api_key=os.environ[\"GEMINI_API_KEY\"])"
114
+ ],
115
+ "id": "7ee558c30d4e1c2c",
116
+ "outputs": [
117
+ {
118
+ "name": "stderr",
119
+ "output_type": "stream",
120
+ "text": [
121
+ "D:\\Swinburne University of Technology\\2025\\Swinburne Semester 2 2025\\COS30018 - Intelligent Systems\\Assignment\\Cyber-Agent\\.venv\\Lib\\site-packages\\tqdm\\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
122
+ " from .autonotebook import tqdm as notebook_tqdm\n"
123
+ ]
124
+ }
125
+ ],
126
+ "execution_count": 4
127
+ },
128
+ {
129
+ "metadata": {
130
+ "ExecuteTime": {
131
+ "end_time": "2025-09-24T14:10:30.432069Z",
132
+ "start_time": "2025-09-24T14:10:30.421360Z"
133
+ }
134
+ },
135
+ "cell_type": "code",
136
+ "source": [
137
+ "prompt = \"\"\"For the following task, make plans that can solve the problem step by step. For each plan, indicate \\\n",
138
+ "which external tool together with tool input to retrieve evidence. You can store the evidence into a \\\n",
139
+ "variable #E that can be called by later tools. (Plan, #E1, Plan, #E2, Plan, ...)\n",
140
+ "\n",
141
+ "Tools can be one of the following:\n",
142
+ "(1) Google[input]: Worker that searches results from Google. Useful when you need to find short\n",
143
+ "and succinct answers about a specific topic. The input should be a search query.\n",
144
+ "(2) LLM[input]: A pretrained LLM like yourself. Useful when you need to act with general\n",
145
+ "world knowledge and common sense. Prioritize it when you are confident in solving the problem\n",
146
+ "yourself. Input can be any instruction.\n",
147
+ "\n",
148
+ "For example,\n",
149
+ "Task: Thomas, Toby, and Rebecca worked a total of 157 hours in one week. Thomas worked x\n",
150
+ "hours. Toby worked 10 hours less than twice what Thomas worked, and Rebecca worked 8 hours\n",
151
+ "less than Toby. How many hours did Rebecca work?\n",
152
+ "Plan: Given Thomas worked x hours, translate the problem into algebraic expressions and solve\n",
153
+ "with Wolfram Alpha. #E1 = WolframAlpha[Solve x + (2x − 10) + ((2x − 10) − 8) = 157]\n",
154
+ "Plan: Find out the number of hours Thomas worked. #E2 = LLM[What is x, given #E1]\n",
155
+ "Plan: Calculate the number of hours Rebecca worked. #E3 = Calculator[(2 ∗ #E2 − 10) − 8]\n",
156
+ "\n",
157
+ "Begin!\n",
158
+ "Describe your plans with rich details. Each Plan should be followed by only one #E.\n",
159
+ "\n",
160
+ "Task: {task}\"\"\""
161
+ ],
162
+ "id": "320871448adc80c",
163
+ "outputs": [],
164
+ "execution_count": 5
165
+ },
166
+ {
167
+ "metadata": {
168
+ "ExecuteTime": {
169
+ "end_time": "2025-09-24T14:10:30.518680Z",
170
+ "start_time": "2025-09-24T14:10:30.508496Z"
171
+ }
172
+ },
173
+ "cell_type": "code",
174
+ "source": "task = \"What are the latest CTI reports of the ATP that uses the T1566.002: Spearphishing Links techniques?\"",
175
+ "id": "cfbfbc30cd1f2a2d",
176
+ "outputs": [],
177
+ "execution_count": 6
178
+ },
179
+ {
180
+ "metadata": {
181
+ "ExecuteTime": {
182
+ "end_time": "2025-09-24T14:10:36.513049Z",
183
+ "start_time": "2025-09-24T14:10:30.637595Z"
184
+ }
185
+ },
186
+ "cell_type": "code",
187
+ "source": "result = llm.invoke(prompt.format(task=task))",
188
+ "id": "cb8c925be339d309",
189
+ "outputs": [],
190
+ "execution_count": 7
191
+ },
192
+ {
193
+ "metadata": {
194
+ "ExecuteTime": {
195
+ "end_time": "2025-09-24T14:10:36.543369Z",
196
+ "start_time": "2025-09-24T14:10:36.536547Z"
197
+ }
198
+ },
199
+ "cell_type": "code",
200
+ "source": "print(result)",
201
+ "id": "77cfb38f9b210b50",
202
+ "outputs": [
203
+ {
204
+ "name": "stdout",
205
+ "output_type": "stream",
206
+ "text": [
207
+ "Plan: Search for the latest CTI reports that specifically mention ATP groups using the T1566.002: Spearphishing Links technique. I will prioritize recent publications.\n",
208
+ "#E1 = Google[latest CTI reports ATP T1566.002 Spearphishing Links]\n",
209
+ "Plan: Review the search results from #E1 to identify relevant reports from reputable cybersecurity intelligence sources. I will look for titles or snippets that indicate a focus on ATP activities and the specified MITRE ATT&CK technique. I will then extract the most pertinent information about the ATPs and their use of T1566.002.\n",
210
+ "#E2 = LLM[Analyze the search results from #E1 to identify specific CTI reports (title, source, date) that discuss ATPs using T1566.002: Spearphishing Links. Summarize the key findings from these reports, mentioning any specific ATP groups identified.]\n"
211
+ ]
212
+ }
213
+ ],
214
+ "execution_count": 8
215
+ },
216
+ {
217
+ "metadata": {},
218
+ "cell_type": "markdown",
219
+ "source": "#### Planner Node",
220
+ "id": "9e462bfcf2ec91f4"
221
+ },
222
+ {
223
+ "metadata": {
224
+ "ExecuteTime": {
225
+ "end_time": "2025-09-24T14:10:36.743644Z",
226
+ "start_time": "2025-09-24T14:10:36.631943Z"
227
+ }
228
+ },
229
+ "cell_type": "code",
230
+ "source": [
231
+ "import re\n",
232
+ "\n",
233
+ "from langchain_core.prompts import ChatPromptTemplate\n",
234
+ "\n",
235
+ "# Regex to match expressions of the form E#... = ...[...]\n",
236
+ "regex_pattern = r\"Plan:\\s*(.+)\\s*(#E\\d+)\\s*=\\s*(\\w+)\\s*\\[([^\\]]+)\\]\"\n",
237
+ "prompt_template = ChatPromptTemplate.from_messages([(\"user\", prompt)])\n",
238
+ "planner = prompt_template | llm\n",
239
+ "\n",
240
+ "\n",
241
+ "def get_plan(state: ReWOO):\n",
242
+ " task = state[\"task\"]\n",
243
+ " result = planner.invoke({\"task\": task})\n",
244
+ " # Find all matches in the sample text\n",
245
+ " matches = re.findall(regex_pattern, result)\n",
246
+ " return {\"steps\": matches, \"plan_string\": result}"
247
+ ],
248
+ "id": "5c3693b5fd44aefa",
249
+ "outputs": [],
250
+ "execution_count": 9
251
+ },
252
+ {
253
+ "metadata": {},
254
+ "cell_type": "markdown",
255
+ "source": "### Executor",
256
+ "id": "ca86ebf96a47fff6"
257
+ },
258
+ {
259
+ "metadata": {
260
+ "ExecuteTime": {
261
+ "end_time": "2025-09-24T14:10:36.918073Z",
262
+ "start_time": "2025-09-24T14:10:36.775677Z"
263
+ }
264
+ },
265
+ "cell_type": "code",
266
+ "source": [
267
+ "from langchain_tavily import TavilySearch\n",
268
+ "\n",
269
+ "search_config = {\n",
270
+ " \"api_key\": os.environ[\"TAVILY_API_KEY\"],\n",
271
+ " \"max_results\": 10,\n",
272
+ " \"search_depth\": \"advanced\",\n",
273
+ " \"include_raw_content\": True\n",
274
+ "}\n",
275
+ "\n",
276
+ "search = TavilySearch(**search_config)"
277
+ ],
278
+ "id": "b7367781aeac5c5",
279
+ "outputs": [],
280
+ "execution_count": 10
281
+ },
282
+ {
283
+ "metadata": {
284
+ "ExecuteTime": {
285
+ "end_time": "2025-09-24T14:10:36.964885Z",
286
+ "start_time": "2025-09-24T14:10:36.953023Z"
287
+ }
288
+ },
289
+ "cell_type": "code",
290
+ "source": [
291
+ "def _get_current_task(state: ReWOO):\n",
292
+ " if \"results\" not in state or state[\"results\"] is None:\n",
293
+ " return 1\n",
294
+ " if len(state[\"results\"]) == len(state[\"steps\"]):\n",
295
+ " return None\n",
296
+ " else:\n",
297
+ " return len(state[\"results\"]) + 1\n",
298
+ "\n",
299
+ "\n",
300
+ "def tool_execution(state: ReWOO):\n",
301
+ " \"\"\"Worker node that executes the tools of a given plan.\"\"\"\n",
302
+ " _step = _get_current_task(state)\n",
303
+ " _, step_name, tool, tool_input = state[\"steps\"][_step - 1]\n",
304
+ " _results = (state[\"results\"] or {}) if \"results\" in state else {}\n",
305
+ " for k, v in _results.items():\n",
306
+ " tool_input = tool_input.replace(k, v)\n",
307
+ " if tool == \"Google\":\n",
308
+ " result = search.invoke(tool_input)\n",
309
+ " elif tool == \"LLM\":\n",
310
+ " result = llm.invoke(tool_input)\n",
311
+ " else:\n",
312
+ " raise ValueError\n",
313
+ " _results[step_name] = str(result)\n",
314
+ " return {\"results\": _results}"
315
+ ],
316
+ "id": "efb45424fa750ce5",
317
+ "outputs": [],
318
+ "execution_count": 11
319
+ },
320
+ {
321
+ "metadata": {},
322
+ "cell_type": "markdown",
323
+ "source": "### Solver",
324
+ "id": "4cf82df72d40e9cd"
325
+ },
326
+ {
327
+ "metadata": {
328
+ "ExecuteTime": {
329
+ "end_time": "2025-09-24T14:10:37.018935Z",
330
+ "start_time": "2025-09-24T14:10:37.008762Z"
331
+ }
332
+ },
333
+ "cell_type": "code",
334
+ "source": [
335
+ "solve_prompt = \"\"\"Solve the following task or problem. To solve the problem, we have made step-by-step Plan and \\\n",
336
+ "retrieved corresponding Evidence to each Plan. Use them with caution since long evidence might \\\n",
337
+ "contain irrelevant information.\n",
338
+ "\n",
339
+ "{plan}\n",
340
+ "\n",
341
+ "Now solve the question or task according to provided Evidence above. Respond with the answer\n",
342
+ "directly with no extra words.\n",
343
+ "\n",
344
+ "Task: {task}\n",
345
+ "Response:\"\"\"\n",
346
+ "\n",
347
+ "\n",
348
+ "def solve(state: ReWOO):\n",
349
+ " plan = \"\"\n",
350
+ " for _plan, step_name, tool, tool_input in state[\"steps\"]:\n",
351
+ " _results = (state[\"results\"] or {}) if \"results\" in state else {}\n",
352
+ " for k, v in _results.items():\n",
353
+ " tool_input = tool_input.replace(k, v)\n",
354
+ " step_name = step_name.replace(k, v)\n",
355
+ " plan += f\"Plan: {_plan}\\n{step_name} = {tool}[{tool_input}]\"\n",
356
+ " prompt = solve_prompt.format(plan=plan, task=state[\"task\"])\n",
357
+ " result = llm.invoke(prompt)\n",
358
+ " return {\"result\": result}"
359
+ ],
360
+ "id": "b545c04c30414789",
361
+ "outputs": [],
362
+ "execution_count": 12
363
+ },
364
+ {
365
+ "metadata": {},
366
+ "cell_type": "markdown",
367
+ "source": "### Define Graph",
368
+ "id": "3b3fbec2f9880412"
369
+ },
370
+ {
371
+ "metadata": {
372
+ "ExecuteTime": {
373
+ "end_time": "2025-09-24T14:10:37.080389Z",
374
+ "start_time": "2025-09-24T14:10:37.071333Z"
375
+ }
376
+ },
377
+ "cell_type": "code",
378
+ "source": [
379
+ "def _route(state):\n",
380
+ " _step = _get_current_task(state)\n",
381
+ " if _step is None:\n",
382
+ " # We have executed all tasks\n",
383
+ " return \"solve\"\n",
384
+ " else:\n",
385
+ " # We are still executing tasks, loop back to the \"tool\" node\n",
386
+ " return \"tool\""
387
+ ],
388
+ "id": "6fee70503c849ab",
389
+ "outputs": [],
390
+ "execution_count": 13
391
+ },
392
+ {
393
+ "metadata": {
394
+ "ExecuteTime": {
395
+ "end_time": "2025-09-24T14:10:37.812966Z",
396
+ "start_time": "2025-09-24T14:10:37.134773Z"
397
+ }
398
+ },
399
+ "cell_type": "code",
400
+ "source": [
401
+ "from langgraph.graph import END, StateGraph, START\n",
402
+ "\n",
403
+ "graph = StateGraph(ReWOO)\n",
404
+ "graph.add_node(\"plan\", get_plan)\n",
405
+ "graph.add_node(\"tool\", tool_execution)\n",
406
+ "graph.add_node(\"solve\", solve)\n",
407
+ "graph.add_edge(\"plan\", \"tool\")\n",
408
+ "graph.add_edge(\"solve\", END)\n",
409
+ "graph.add_conditional_edges(\"tool\", _route)\n",
410
+ "graph.add_edge(START, \"plan\")\n",
411
+ "\n",
412
+ "app = graph.compile()"
413
+ ],
414
+ "id": "a10ad4abef949d17",
415
+ "outputs": [],
416
+ "execution_count": 14
417
+ },
418
+ {
419
+ "metadata": {
420
+ "ExecuteTime": {
421
+ "end_time": "2025-09-24T14:10:37.864440Z",
422
+ "start_time": "2025-09-24T14:10:37.849889Z"
423
+ }
424
+ },
425
+ "cell_type": "code",
426
+ "source": [
427
+ "from typing import Dict, Any\n",
428
+ "\n",
429
+ "def format_output(state: Dict[str, Any]) -> str:\n",
430
+ " \"\"\"Format the CTI agent output for better readability.\"\"\"\n",
431
+ " output = []\n",
432
+ "\n",
433
+ " for node_name, node_data in state.items():\n",
434
+ " output.append(f\"\\n🔹 **{node_name.upper()}**\")\n",
435
+ " output.append(\"=\" * 50)\n",
436
+ "\n",
437
+ " if node_name == \"plan\":\n",
438
+ " if \"plan_string\" in node_data:\n",
439
+ " output.append(\"📋 **Generated Plan:**\")\n",
440
+ " output.append(node_data[\"plan_string\"])\n",
441
+ "\n",
442
+ " if \"steps\" in node_data and node_data[\"steps\"]:\n",
443
+ " output.append(\"\\n📝 **Extracted Steps:**\")\n",
444
+ " for i, (plan, step_name, tool, tool_input) in enumerate(node_data[\"steps\"], 1):\n",
445
+ " output.append(f\" {i}. {plan}\")\n",
446
+ " output.append(f\" 🔧 {step_name} = {tool}[{tool_input}]\")\n",
447
+ "\n",
448
+ " elif node_name == \"tool\":\n",
449
+ " if \"results\" in node_data:\n",
450
+ " output.append(\"🔍 **Execution Results:**\")\n",
451
+ " for step_name, result in node_data[\"results\"].items():\n",
452
+ " output.append(f\" {step_name}:\")\n",
453
+ " # Truncate long results for readability\n",
454
+ " result_str = str(result)\n",
455
+ " if len(result_str) > 500:\n",
456
+ " result_str = result_str[:500] + \"... [truncated]\"\n",
457
+ " output.append(f\" {result_str}\")\n",
458
+ "\n",
459
+ " elif node_name == \"solve\":\n",
460
+ " if \"result\" in node_data:\n",
461
+ " output.append(\"✅ **Final Answer:**\")\n",
462
+ " output.append(node_data[\"result\"])\n",
463
+ "\n",
464
+ " output.append(\"\")\n",
465
+ "\n",
466
+ " return \"\\n\".join(output)\n"
467
+ ],
468
+ "id": "30f337a626e2fbf9",
469
+ "outputs": [],
470
+ "execution_count": 15
471
+ },
472
+ {
473
+ "metadata": {
474
+ "ExecuteTime": {
475
+ "end_time": "2025-09-24T14:11:24.978749Z",
476
+ "start_time": "2025-09-24T14:10:37.901866Z"
477
+ }
478
+ },
479
+ "cell_type": "code",
480
+ "source": [
481
+ "print(\"**CTI Agent Execution**\")\n",
482
+ "print(\"=\" * 60)\n",
483
+ "\n",
484
+ "for s in app.stream({\"task\": task}):\n",
485
+ " formatted_output = format_output(s)\n",
486
+ " print(formatted_output)\n",
487
+ " print(\"-\" * 60)"
488
+ ],
489
+ "id": "b45aa62c23719738",
490
+ "outputs": [
491
+ {
492
+ "name": "stdout",
493
+ "output_type": "stream",
494
+ "text": [
495
+ "**CTI Agent Execution**\n",
496
+ "============================================================\n",
497
+ "\n",
498
+ "🔹 **PLAN**\n",
499
+ "==================================================\n",
500
+ "📋 **Generated Plan:**\n",
501
+ "Plan: Search for the latest CTI reports that specifically mention ATPs and the MITRE ATT&CK technique T1566.002 (Spearphishing Links). I will use keywords to narrow down the search to recent publications.\n",
502
+ "#E1 = Google[latest CTI reports ATP T1566.002 \"Spearphishing Links\" 2023 2024]\n",
503
+ "Plan: Review the search results from #E1 to identify specific CTI reports from reputable sources (e.g., major cybersecurity vendors, government agencies) that discuss ATPs utilizing spearphishing links. Synthesize the key findings, including the names of ATPs and the context of their T1566.002 usage.\n",
504
+ "#E2 = LLM[Based on the search results in #E1, identify and summarize the latest CTI reports that detail ATPs using T1566.002: Spearphishing Links. Include the names of the ATPs and a brief description of their activities related to this technique.]\n",
505
+ "\n",
506
+ "📝 **Extracted Steps:**\n",
507
+ " 1. Search for the latest CTI reports that specifically mention ATPs and the MITRE ATT&CK technique T1566.002 (Spearphishing Links). I will use keywords to narrow down the search to recent publications.\n",
508
+ " 🔧 #E1 = Google[latest CTI reports ATP T1566.002 \"Spearphishing Links\" 2023 2024]\n",
509
+ " 2. Review the search results from #E1 to identify specific CTI reports from reputable sources (e.g., major cybersecurity vendors, government agencies) that discuss ATPs utilizing spearphishing links. Synthesize the key findings, including the names of ATPs and the context of their T1566.002 usage.\n",
510
+ " 🔧 #E2 = LLM[Based on the search results in #E1, identify and summarize the latest CTI reports that detail ATPs using T1566.002: Spearphishing Links. Include the names of the ATPs and a brief description of their activities related to this technique.]\n",
511
+ "\n",
512
+ "------------------------------------------------------------\n",
513
+ "\n",
514
+ "🔹 **TOOL**\n",
515
+ "==================================================\n",
516
+ "🔍 **Execution Results:**\n",
517
+ " #E1:\n",
518
+ " {'query': 'latest CTI reports ATP T1566.002 \"Spearphishing Links\" 2023 2024', 'follow_up_questions': None, 'answer': None, 'images': [], 'results': [{'url': 'https://attack.mitre.org/techniques/T1566/002/', 'title': 'Phishing: Spearphishing Link, Sub-technique T1566.002 - Enterprise', 'content': '| C0036 | Pikabot Distribution February 2024 | Pikabot Distribution February 2024 utilized emails with hyperlinks leading to malicious ZIP archive files containing scripts to download and install Pikabo... [truncated]\n",
519
+ "\n",
520
+ "------------------------------------------------------------\n",
521
+ "\n",
522
+ "🔹 **TOOL**\n",
523
+ "==================================================\n",
524
+ "🔍 **Execution Results:**\n",
525
+ " #E1:\n",
526
+ " {'query': 'latest CTI reports ATP T1566.002 \"Spearphishing Links\" 2023 2024', 'follow_up_questions': None, 'answer': None, 'images': [], 'results': [{'url': 'https://attack.mitre.org/techniques/T1566/002/', 'title': 'Phishing: Spearphishing Link, Sub-technique T1566.002 - Enterprise', 'content': '| C0036 | Pikabot Distribution February 2024 | Pikabot Distribution February 2024 utilized emails with hyperlinks leading to malicious ZIP archive files containing scripts to download and install Pikabo... [truncated]\n",
527
+ " #E2:\n",
528
+ " Based on the provided search results, the following CTI reports detail APTs and campaigns using T1566.002 (Spearphishing Link) in 2023 and 2024:\n",
529
+ "\n",
530
+ "* **Pikabot Distribution February 2024 (C0036):** This campaign, observed in **February 2024**, utilized emails with hyperlinks that led victims to malicious ZIP archive files. These archives contained scripts designed to download and install the Pikabot malware.\n",
531
+ "* **TA577 (G1037) / Latrodectus (S1160):** The threat group TA577, in campaigns report... [truncated]\n",
532
+ "\n",
533
+ "------------------------------------------------------------\n",
534
+ "\n",
535
+ "🔹 **SOLVE**\n",
536
+ "==================================================\n",
537
+ "✅ **Final Answer:**\n",
538
+ "The latest CTI reports of ATPs using the T1566.002 (Spearphishing Links) technique include:\n",
539
+ "\n",
540
+ "* **Pikabot Distribution February 2024 (C0036):** This campaign, observed in February 2024, used emails with hyperlinks leading to malicious ZIP archive files for Pikabot malware distribution.\n",
541
+ "* **TA577 (G1037) / Latrodectus (S1160):** In April 2024, TA577 sent emails with malicious links to distribute Latrodectus malware via malicious JavaScript files.\n",
542
+ "* **Storm-1811 (G1046):** In May 2024, Storm-1811 distributed malicious links that redirected victims to EvilProxy-based phishing sites to harvest credentials.\n",
543
+ "* **OilRig (G0049) / APT34 / Earth Simnavaz:** This group continues to use spearphishing links. Recent activity under the name \"Earth Simnavaz\" was reported in October 2024, and \"Crambus\" (an associated group name) in October 2023.\n",
544
+ "\n",
545
+ "------------------------------------------------------------\n"
546
+ ]
547
+ }
548
+ ],
549
+ "execution_count": 16
550
+ }
551
+ ],
552
+ "metadata": {
553
+ "kernelspec": {
554
+ "display_name": "Python 3",
555
+ "language": "python",
556
+ "name": "python3"
557
+ },
558
+ "language_info": {
559
+ "codemirror_mode": {
560
+ "name": "ipython",
561
+ "version": 2
562
+ },
563
+ "file_extension": ".py",
564
+ "mimetype": "text/x-python",
565
+ "name": "python",
566
+ "nbconvert_exporter": "python",
567
+ "pygments_lexer": "ipython2",
568
+ "version": "2.7.6"
569
+ }
570
+ },
571
+ "nbformat": 4,
572
+ "nbformat_minor": 5
573
+ }
src/agents/cti_agent/tool_evaluation_results/extract_mitre_techniques_results.csv ADDED
@@ -0,0 +1,230 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ sample_id,platform,description,tool_output,predicted,ground_truth,missing,extra,precision,recall,f1,tp,fp,fn,predicted_count,ground_truth_count
2
+ https://attack.mitre.org/software/S0066/,Enterprise,3PARA RAT is a remote access tool (RAT) developed in C++ and associated with the group Putter Panda....,"**T1219** - Remote Access Software: This technique involves the use of legitimate or custom tools to remotely control a system, identified because 3PARA RAT is explicitly described as a ""remote access tool (RAT)"".
3
+
4
+ **T1071** - Application Layer Protocol: This technique involves using common application layer protocols for command and control communication, identified as 3PARA RAT ""communicates with its command and control (C2) servers via HTTP"".
5
+
6
+ **T1573** - Encrypted Channel: This technique des...","['T1070', 'T1071', 'T1083', 'T1219', 'T1573']","['T1070', 'T1071', 'T1083', 'T1573']",[],['T1219'],0.8,1.0,0.888888888888889,4,1,0,5,4
7
+ https://attack.mitre.org/software/S0065/,Enterprise,"4H RAT is a piece of malware linked to Putter Panda, with evidence of its use dating back to at leas...","**T1071** - Application Layer Protocol: Adversaries may communicate using application layer protocols to avoid detection, as 4H RAT relies on HTTP for its command and control communication.
8
+
9
+ **T1059** - Command and Scripting Interpreter: Adversaries may abuse command and scripting interpreters to execute commands, as 4H RAT has the ability to create a remote shell for direct command execution.
10
+
11
+ **T1001** - Data Obfuscation: Adversaries may obfuscate data to conceal its true intent or evade defen...","['T1001', 'T1057', 'T1059', 'T1071', 'T1082', 'T1083']","['T1057', 'T1059', 'T1071', 'T1082', 'T1083', 'T1573']",['T1573'],['T1001'],0.8333333333333334,0.8333333333333334,0.8333333333333334,5,1,1,6,6
12
+ https://attack.mitre.org/software/S0469/,Enterprise,"ABK is a downloader associated with BRONZE BUTLER, active since at least 2019. It communicates with ...","**T1071** - Application Layer Protocol: This technique involves adversaries using common application layer protocols for command and control, identified because ABK communicates with its C2 server via HTTP.
13
+ **T1059** - Command and Scripting Interpreter: This technique involves adversaries using command-line interfaces to execute commands or scripts, identified as ABK uses the command line to execute Portable Executables.
14
+ **T1140** - Deobfuscate/Decode Files or Information: This technique involve...","['T1027', 'T1055', 'T1059', 'T1071', 'T1105', 'T1140', 'T1518']","['T1027', 'T1055', 'T1059', 'T1071', 'T1105', 'T1140', 'T1518']",[],[],1.0,1.0,1.0,7,0,0,7,7
15
+ https://attack.mitre.org/software/S1061/,Mobile,AbstractEmu is mobile malware that was first detected in October 2021 on Google Play and other third...,"**T1477** - Compromise Application Store: This technique involves distributing malware through legitimate or third-party application stores, identified as AbstractEmu was found on Google Play and other app stores.
16
+
17
+ **T1404** - Exploitation for Privilege Escalation: This technique involves exploiting vulnerabilities to gain higher-level permissions, identified as AbstractEmu exploits known Android vulnerabilities to gain root permissions.
18
+
19
+ **T1405** - Rooting: This technique specifically refers t...","['T1400', 'T1401', 'T1402', 'T1403', 'T1404', 'T1405', 'T1407', 'T1424', 'T1425', 'T1436', 'T1437', 'T1475', 'T1476', 'T1477']","['T1404', 'T1406', 'T1407', 'T1418', 'T1422', 'T1426', 'T1429', 'T1430', 'T1437', 'T1512', 'T1517', 'T1533', 'T1544', 'T1623', 'T1626', 'T1629', 'T1633', 'T1636', 'T1646']","['T1406', 'T1418', 'T1422', 'T1426', 'T1429', 'T1430', 'T1512', 'T1517', 'T1533', 'T1544', 'T1623', 'T1626', 'T1629', 'T1633', 'T1636', 'T1646']","['T1400', 'T1401', 'T1402', 'T1403', 'T1405', 'T1424', 'T1425', 'T1436', 'T1475', 'T1476', 'T1477']",0.21428571428571427,0.15789473684210525,0.18181818181818182,3,11,16,14,19
20
+ https://attack.mitre.org/software/S1028/,Enterprise,Action RAT is a remote access tool developed in Delphi and has been employed by SideCopy since at le...,"**T1219** - Remote Access Software: This technique involves adversaries using legitimate or custom software to remotely control a system; Action RAT is explicitly described as a ""remote access tool.""
21
+
22
+ **T1071** - Application Layer Protocol: This technique involves adversaries using common application layer protocols for command and control; the malware communicates with C2 servers ""via HTTP.""
23
+
24
+ **T1059** - Command and Scripting Interpreter: This technique involves adversaries using command and sc...","['T1005', 'T1059', 'T1071', 'T1105', 'T1132', 'T1219']","['T1005', 'T1059', 'T1071', 'T1083', 'T1105', 'T1140']","['T1083', 'T1140']","['T1132', 'T1219']",0.6666666666666666,0.6666666666666666,0.6666666666666666,4,2,2,6,6
25
+ https://attack.mitre.org/software/S0202/,Enterprise,"adbupd is a backdoor utilized by PLATINUM, bearing similarities to Dipsind. It has the capability to...","**T1059** - Command and Scripting Interpreter: This technique involves adversaries abusing command and script interpreters to execute commands, identified because adbupd has the capability to execute a copy of `cmd.exe`.
26
+
27
+ **T1573** - Encrypted Channel: This technique describes adversaries communicating with C2 systems using encryption, identified because adbupd uses the OpenSSL library to encrypt its command and control (C2) traffic.
28
+
29
+ **T1546** - Event Triggered Execution: This technique involve...","['T1059', 'T1546', 'T1573']","['T1059', 'T1546', 'T1573']",[],[],1.0,1.0,1.0,3,0,0,3,3
30
+ https://attack.mitre.org/software/S0552/,Enterprise,AdFind is a free command-line query tool designed for extracting information from Active Directory. ...,"**T1087** - Account Discovery: This technique involves adversaries attempting to get a listing of local system or domain accounts, and it was identified because AdFind can enumerate domain users and domain groups.
31
+
32
+ **T1069** - Permission Groups Discovery: This technique describes adversaries attempting to find and identify the primary ways that permissions are managed in an environment, and it was identified as AdFind can enumerate domain groups and organizational units (OUs), which are often us...","['T1016', 'T1018', 'T1069', 'T1087', 'T1482']","['T1016', 'T1018', 'T1069', 'T1087', 'T1482']",[],[],1.0,1.0,1.0,5,0,0,5,5
33
+ https://attack.mitre.org/software/S0045/,Enterprise,"ADVSTORESHELL is a spying backdoor associated with APT28, active from at least 2012 to 2016. It is t...","**T1071** - Application Layer Protocol: This technique involves using common application layer protocols for command and control, identified as ADVSTORESHELL communicates with its C2 server via HTTP POST requests on port 80.
34
+
35
+ **T1560** - Archive Collected Data: This technique describes adversaries compressing or encrypting data prior to exfiltration, identified as ADVSTORESHELL encrypts data using 3DES before exfiltration.
36
+
37
+ **T1547** - Boot or Logon Autostart Execution: This technique involves a...","['T1001', 'T1059', 'T1071', 'T1074', 'T1547', 'T1560', 'T1573']","['T1059', 'T1071', 'T1074', 'T1132', 'T1547', 'T1560', 'T1573']",['T1132'],['T1001'],0.8571428571428571,0.8571428571428571,0.8571428571428571,6,1,1,7,7
38
+ https://attack.mitre.org/software/S0440/,Enterprise,Agent Smith is mobile malware that generates financial profit by replacing legitimate apps on infect...,"**T1036** - Masquerading: The malware disguises itself as legitimate applications (like a Google app or a Feng Shui Bundle) and files (like a JPG) to avoid detection and trick users.
39
+ **T1027** - Obfuscated Files or Information: The core malware is encrypted with an XOR cipher and disguised as a JPG file, making it harder for security tools to analyze and detect.
40
+ **T1210** - Exploitation for Privilege Escalation: Agent Smith exploits known OS vulnerabilities, such as Janus, to gain elevated privi...","['T1027', 'T1036', 'T1070', 'T1210', 'T1496', 'T1562', 'T1574']","['T1404', 'T1406', 'T1577', 'T1628', 'T1630', 'T1643', 'T1655']","['T1404', 'T1406', 'T1577', 'T1628', 'T1630', 'T1643', 'T1655']","['T1027', 'T1036', 'T1070', 'T1210', 'T1496', 'T1562', 'T1574']",0.0,0.0,0.0,0,7,7,7,7
41
+ https://attack.mitre.org/software/S0331/,Enterprise,"Agent Tesla is a spyware Trojan built on the .NET framework, active since at least 2014. It is capab...","**T1003** - OS Credential Dumping: Adversaries may attempt to dump credentials to obtain accounts and passwords, and Agent Tesla does this by extracting credentials from FTP clients and wireless profiles.
42
+ **T1071** - Application Layer Protocol: Adversaries may communicate over application layer protocols to avoid detection, and Agent Tesla uses HTTP for its command and control (C2) communications.
43
+ **T1573** - Encrypted Channel: Adversaries may encrypt C2 communications or exfiltrated data to hid...","['T1003', 'T1041', 'T1056', 'T1071', 'T1547', 'T1573']","['T1071', 'T1087', 'T1115', 'T1140', 'T1185', 'T1547', 'T1555', 'T1560']","['T1087', 'T1115', 'T1140', 'T1185', 'T1555', 'T1560']","['T1003', 'T1041', 'T1056', 'T1573']",0.3333333333333333,0.25,0.28571428571428575,2,4,6,6,8
44
+ https://attack.mitre.org/software/S0092/,Enterprise,Agent.btz is a worm known for spreading primarily through removable devices like USB drives. It gain...,"**T1091** - Replicating Removable Media: This technique involves an adversary spreading malware by copying it onto removable media, identified as Agent.btz drops copies of itself and an autorun.inf file onto USB drives for propagation.
45
+ **T1082** - System Information Discovery: This technique involves an adversary gathering basic information about the system, identified as Agent.btz collecting and saving general system details.
46
+ **T1027** - Obfuscated Files or Information: This technique involves ...","['T1016', 'T1027', 'T1033', 'T1052', 'T1082', 'T1091', 'T1105']","['T1016', 'T1033', 'T1052', 'T1091', 'T1105', 'T1560']",['T1560'],"['T1027', 'T1082']",0.7142857142857143,0.8333333333333334,0.7692307692307692,5,2,1,7,6
47
+ https://attack.mitre.org/software/S1095/,Mobile,AhRat is an Android remote access tool (RAT) derived from the open-source AhMyth RAT. It began sprea...,"**T1401** - Command and Control: Adversaries communicate with compromised systems to control them. Identified because AhRat communicates with its command and control (C2) server via HTTPS requests.
48
+
49
+ **T1415** - Audio Capture: Adversaries may record audio from a device's microphone. Identified because AhRat is capable of recording audio using the device’s microphone.
50
+
51
+ **T1402** - Boot or Reboot Autostart: Adversaries may configure their malware to automatically start when the device boots or rebo...","['T1401', 'T1402', 'T1403', 'T1409', 'T1413', 'T1415', 'T1416']","['T1398', 'T1420', 'T1429', 'T1430', 'T1437', 'T1521', 'T1533', 'T1624', 'T1646']","['T1398', 'T1420', 'T1429', 'T1430', 'T1437', 'T1521', 'T1533', 'T1624', 'T1646']","['T1401', 'T1402', 'T1403', 'T1409', 'T1413', 'T1415', 'T1416']",0.0,0.0,0.0,0,7,9,7,9
52
+ https://attack.mitre.org/software/S0319/,Mobile,Allwinner is a company that provides processors for Android tablets and various other devices. A Lin...,"**Identified Techniques:**
53
+
54
+ **T1403** - Backdoor: Adversaries may install a backdoor on a device to maintain persistent access or to provide a covert means of access. This technique was identified because the content explicitly states the Linux kernel ""contained a simple backdoor.""
55
+
56
+ **T1405** - Privilege Escalation: Adversaries may exploit vulnerabilities or misconfigurations to gain higher-level permissions on a device. This technique was identified because the backdoor ""could be exploited to g...","['T1403', 'T1405']",['T1474'],['T1474'],"['T1403', 'T1405']",0.0,0.0,0.0,0,2,1,2,1
57
+ https://attack.mitre.org/software/S1025/,Enterprise,Amadey is a Trojan bot that has been active since at least October 2018. It communicates with its co...,"**T1071** - Application Layer Protocol: Amadey communicates with its command and control (C2) servers via HTTP, which is a common application layer protocol.
58
+ **T1082** - System Information Discovery: Amadey collects information from compromised hosts and uses API calls like `GetComputerNameA` to gather system details.
59
+ **T1033** - System Owner/User Discovery: Amadey uses API calls like `GetUserNameA` to identify the current user of the compromised system.
60
+ **T1041** - Exfiltration Over C2 Channel:...","['T1027', 'T1033', 'T1041', 'T1059', 'T1071', 'T1082', 'T1105', 'T1518', 'T1547']","['T1005', 'T1027', 'T1041', 'T1071', 'T1083', 'T1105', 'T1106', 'T1112', 'T1140', 'T1547', 'T1568']","['T1005', 'T1083', 'T1106', 'T1112', 'T1140', 'T1568']","['T1033', 'T1059', 'T1082', 'T1518']",0.5555555555555556,0.45454545454545453,0.5,5,4,6,9,11
61
+ https://attack.mitre.org/software/S0504/,Enterprise,Anchor is a backdoor malware that has been deployed alongside TrickBot on select high-profile target...,"**T1105** - Ingress Tool Transfer: This technique involves transferring tools or files from an external system into a compromised environment, identified as Anchor is ""deployed alongside TrickBot"" and ""able to download additional payloads.""
62
+ **T1071** - Application Layer Protocol: This technique involves using common application layer protocols for command and control communication, identified as Anchor ""communicates with its command and control (C2) servers using HTTP, HTTPS, and in some variant...","['T1059', 'T1070', 'T1071', 'T1105', 'T1497', 'T1543', 'T1564']","['T1008', 'T1059', 'T1070', 'T1071', 'T1105', 'T1480', 'T1543', 'T1564']","['T1008', 'T1480']",['T1497'],0.8571428571428571,0.75,0.7999999999999999,6,1,2,7,8
63
+ https://attack.mitre.org/software/S0525/,Mobile,Android/AdDisplay.Ashas is a variant of adware that has been distributed through several apps on the...,"**T1475** - Compromise Application Store: Adversaries may distribute malicious applications by compromising legitimate application stores, as seen with Android/AdDisplay.Ashas being distributed via the Google Play Store.
64
+ **T1401** - Standard Application Layer Protocol: Adversaries may communicate with their command and control (C2) server using standard application layer protocols like HTTP, which Android/AdDisplay.Ashas uses for C2 communication.
65
+ **T1402** - Boot or Reboot Autostart: Adversarie...","['T1401', 'T1402', 'T1407', 'T1408', 'T1409', 'T1410', 'T1411', 'T1412', 'T1475']","['T1406', 'T1418', 'T1426', 'T1437', 'T1624', 'T1628', 'T1633', 'T1643', 'T1655']","['T1406', 'T1418', 'T1426', 'T1437', 'T1624', 'T1628', 'T1633', 'T1643', 'T1655']","['T1401', 'T1402', 'T1407', 'T1408', 'T1409', 'T1410', 'T1411', 'T1412', 'T1475']",0.0,0.0,0.0,0,9,9,9,9
66
+ https://attack.mitre.org/software/S0304/,Mobile,The Android malware known as Android/Chuli.A was distributed to activist groups through a spearphish...,"**T1447** - Phishing: The malware was distributed to activist groups through a spearphishing email containing an attachment, which is a common phishing technique for initial access.
67
+ **T1403** - Standard Application Layer Protocol: The malware utilized HTTP uploads for its command and control mechanism, which is a standard application layer protocol.
68
+ **T1418** - Location Tracking: The malware was capable of stealing geo-location information, indicating it tracked the device's location.
69
+ **T1410** ...","['T1403', 'T1406', 'T1410', 'T1411', 'T1412', 'T1418', 'T1420', 'T1447']","['T1426', 'T1430', 'T1437', 'T1636', 'T1644']","['T1426', 'T1430', 'T1437', 'T1636', 'T1644']","['T1403', 'T1406', 'T1410', 'T1411', 'T1412', 'T1418', 'T1420', 'T1447']",0.0,0.0,0.0,0,8,5,8,5
70
+ https://attack.mitre.org/software/S0524/,Mobile,AndroidOS/MalLocker.B is a variant of a ransomware family that targets Android devices by blocking u...,"**T1444** - UI Overlays: Adversaries may display content over other applications to trick users into performing actions or to obscure legitimate content, as seen when the malware displays a ransom note and a ""call"" notification screen over other windows.
71
+ **T1406** - Screen Lock: Adversaries may lock the screen of a mobile device to prevent user access, which is achieved by the malware blocking user interaction with the UI and continuously generating new notifications.
72
+ **T1400** - Broadcast Inten...","['T1400', 'T1406', 'T1444', 'T1476', 'T1477', 'T1478']","['T1406', 'T1624', 'T1629', 'T1655']","['T1624', 'T1629', 'T1655']","['T1400', 'T1444', 'T1476', 'T1477', 'T1478']",0.16666666666666666,0.25,0.2,1,5,3,6,4
73
+ https://attack.mitre.org/software/S0310/,Mobile,ANDROIDOS_ANSERVER.A is a distinctive Android malware known for utilizing encrypted content hosted o...,"**T1090** - Command and Control: The malware utilizes encrypted content hosted on a blog site and directs to additional servers to establish and maintain communication with external systems for command and control.
74
+ **T1112** - Device Information: The malware collects various device-specific details, including the OS version, build version, manufacturer, model, IMEI, and IMSI.
75
+
76
+ **Final Answer - Technique IDs:**
77
+ T1090, T1112...","['T1090', 'T1112']","['T1422', 'T1426', 'T1481']","['T1422', 'T1426', 'T1481']","['T1090', 'T1112']",0.0,0.0,0.0,0,2,3,2,3
78
+ https://attack.mitre.org/software/S1074/,Enterprise,ANDROMEDA is a widely recognized commodity malware that was prevalent in the early 2010s and continu...,"**T1568** - Dynamic Resolution: Threat actors re-registered expired command and control (C2) domains, which is a method to establish and resolve C2 infrastructure.
79
+ **T1071** - Standard Application Layer Protocol: ANDROMEDA uses GET requests over standard web protocols to communicate with its C2 server.
80
+ **T1105** - Ingress Tool Transfer: The malware downloads files and additional payloads from its C2 server onto the compromised system.
81
+ **T1547** - Boot or Logon Autostart Execution: ANDROMEDA esta...","['T1036', 'T1055', 'T1071', 'T1091', 'T1105', 'T1204', 'T1547', 'T1568']","['T1036', 'T1055', 'T1071', 'T1091', 'T1105', 'T1547']",[],"['T1204', 'T1568']",0.75,1.0,0.8571428571428571,6,2,0,8,6
82
+ https://attack.mitre.org/software/S0292/,Mobile,AndroRAT is an open-source remote access tool (RAT) designed for Android devices. It is capable of c...,"**T1105** - Location Tracking: This technique involves adversaries collecting the geographical location of a device, identified as AndroRAT can track the device’s location via GPS or network settings.
83
+ **T1124** - Call Log Collection: This technique involves adversaries collecting information about incoming and outgoing calls from the device, identified as AndroRAT is capable of collecting call logs.
84
+ **T1117** - SMS Manipulation: This technique involves adversaries sending, deleting, or modifying...","['T1100', 'T1105', 'T1113', 'T1115', 'T1117', 'T1123', 'T1124']","['T1422', 'T1429', 'T1430', 'T1512', 'T1582', 'T1616', 'T1636', 'T1655']","['T1422', 'T1429', 'T1430', 'T1512', 'T1582', 'T1616', 'T1636', 'T1655']","['T1100', 'T1105', 'T1113', 'T1115', 'T1117', 'T1123', 'T1124']",0.0,0.0,0.0,0,7,8,7,8
85
+ https://attack.mitre.org/software/S0422/,Mobile,Anubis is Android malware that was initially developed for cyber espionage but has since been repurp...,"**T1402** - Data Exfiltration: Anubis exfiltrates data, including encrypted files from its ransomware module, from the device.
86
+ **T1407** - Audio Capture: Anubis records phone calls and other audio from the device.
87
+ **T1408** - Make Phone Call: Anubis is capable of initiating phone calls from the device.
88
+ **T1476** - Data Encrypted for Impact: Anubis encrypts device data and holds it for ransom via its ransomware module.
89
+ **T1475** - Data Manipulation: Anubis modifies external storage on the device....","['T1402', 'T1405', 'T1406', 'T1407', 'T1408', 'T1418', 'T1444', 'T1446', 'T1475', 'T1476']","['T1407', 'T1417', 'T1429', 'T1430', 'T1471', 'T1532', 'T1533', 'T1616', 'T1629', 'T1655']","['T1417', 'T1429', 'T1430', 'T1471', 'T1532', 'T1533', 'T1616', 'T1629', 'T1655']","['T1402', 'T1405', 'T1406', 'T1408', 'T1418', 'T1444', 'T1446', 'T1475', 'T1476']",0.1,0.1,0.10000000000000002,1,9,9,10,10
90
+ https://attack.mitre.org/software/S0584/,Enterprise,"AppleJeus is a malware family of downloaders first discovered in 2018, embedded within trojanized cr...","**T1204** - User Execution: Adversaries may trick users into executing malicious code, as AppleJeus is embedded within trojanized cryptocurrency applications that users would run.
91
+ **T1105** - Ingress Tool Transfer: Adversaries may transfer tools or files from an external source into a compromised environment, which AppleJeus does by distributing the FALLCHILL Remote Access Trojan.
92
+ **T1548** - Abuse Elevation Control Mechanism: Adversaries may abuse elevation control mechanisms to bypass protecti...","['T1041', 'T1059', 'T1071', 'T1105', 'T1140', 'T1204', 'T1543', 'T1548']","['T1041', 'T1059', 'T1071', 'T1140', 'T1543', 'T1546', 'T1548']",['T1546'],"['T1105', 'T1204']",0.75,0.8571428571428571,0.7999999999999999,6,2,1,8,7
93
+ https://attack.mitre.org/software/S0622/,Enterprise,"AppleSeed is a backdoor used by the Kimsuky group to target South Korean government, academic, and c...","**T1134** - Access Token Manipulation: This technique involves modifying access tokens to gain higher privileges, identified as AppleSeed escalates privileges by manipulating `SeDebugPrivilege` via `AdjustTokenPrivilege`.
94
+ **T1071** - Application Layer Protocol: This technique describes using common application layer protocols for C2 communication, identified as AppleSeed communicates with its C2 server over HTTP.
95
+ **T1560** - Archive Collected Data: This technique involves compressing or encrypti...","['T1025', 'T1056', 'T1059', 'T1071', 'T1113', 'T1134', 'T1547', 'T1560']","['T1005', 'T1025', 'T1059', 'T1071', 'T1119', 'T1134', 'T1547', 'T1560']","['T1005', 'T1119']","['T1056', 'T1113']",0.75,0.75,0.75,6,2,2,8,8
96
+ https://attack.mitre.org/software/S0540/,Mobile,Asacub is a banking trojan designed to steal money from victims' bank accounts by initiating wire tr...,"**T1435** - SMS Messaging: This technique involves sending or receiving SMS messages, identified by Asacub initiating wire transfers and sending SMS messages from compromised devices.
97
+ **T1400** - Elevated Permissions: This technique involves an adversary gaining higher-level permissions on a device, identified by Asacub requesting device administrator permissions.
98
+ **T1432** - Standard Application Layer Protocol: This technique involves using common application layer protocols for command and con...","['T1400', 'T1406', 'T1407', 'T1409', 'T1413', 'T1414', 'T1432', 'T1435']","['T1406', 'T1422', 'T1426', 'T1437', 'T1532', 'T1575', 'T1582', 'T1626', 'T1636', 'T1655']","['T1422', 'T1426', 'T1437', 'T1532', 'T1575', 'T1582', 'T1626', 'T1636', 'T1655']","['T1400', 'T1407', 'T1409', 'T1413', 'T1414', 'T1432', 'T1435']",0.125,0.1,0.11111111111111112,1,7,9,8,10
99
+ https://attack.mitre.org/software/S0073/,Enterprise,ASPXSpy is a web shell that has been modified by Threat Group-3390 to create a variant known as ASPX...,"**T1505** - Server Software Component: Adversaries may abuse server software components to run malicious code, as identified by the deployment of the ASPXTool web shell on Internet Information Services (IIS) servers.
100
+
101
+ **T1588** - Obtain Capabilities: Adversaries may obtain capabilities, including developing custom tools or modifying existing ones, as seen with Threat Group-3390 modifying ASPXSpy to create the ASPXTool variant.
102
+
103
+ **Final Answer - Technique IDs:**
104
+ T1505, T1588...","['T1505', 'T1588']",['T1505'],[],['T1588'],0.5,1.0,0.6666666666666666,1,1,0,2,1
105
+ https://attack.mitre.org/software/S0110/,Enterprise,The `at` command is used to schedule tasks on a system to run at a specified date and time....,"**T1053** - Scheduled Task/Job: This technique involves adversaries using legitimate operating system utilities or features to schedule the execution of programs or scripts at a future time or on a recurring basis, and the `at` command is a direct example of such a utility used to schedule tasks.
106
+
107
+ **Final Answer - Technique IDs:**
108
+ T1053...",['T1053'],['T1053'],[],[],1.0,1.0,1.0,1,0,0,1,1
109
+ https://attack.mitre.org/software/S1029/,Enterprise,AuTo Stealer is malware written in C++ that has been used by SideCopy since at least December 2021 t...,"**T1071** - Application Layer Protocol: This technique describes the use of common application layer protocols like HTTP or TCP for command and control communication.
110
+ **T1547** - Boot or Logon Autostart Execution: This technique involves configuring a program to execute automatically when a system boots or a user logs in, such as by placing malicious executables in AutoRun registry keys or the StartUp directory.
111
+ **T1059** - Command and Scripting Interpreter: This technique involves the use of co...","['T1005', 'T1041', 'T1059', 'T1071', 'T1074', 'T1082', 'T1547']","['T1005', 'T1041', 'T1059', 'T1071', 'T1074', 'T1095', 'T1518', 'T1547']","['T1095', 'T1518']",['T1082'],0.8571428571428571,0.75,0.7999999999999999,6,1,2,7,8
112
+ https://attack.mitre.org/software/S0129/,Enterprise,The AutoIt backdoor is malware used by the threat actors behind the MONSOON campaign. It was frequen...,"**Identified Techniques:**
113
+
114
+ **T1566** - Phishing: The malware was frequently deployed via weaponized .pps files, which is a common method of delivering malicious content through phishing campaigns.
115
+ **T1204** - User Execution: The attack relies on a user opening the weaponized .pps file to initiate the infection chain.
116
+ **T1059** - Command and Scripting Interpreter: The malware leverages legitimate scripting languages like AutoIt and PowerShell for malicious execution.
117
+ **T1548** - Abuse Elevation ...","['T1005', 'T1027', 'T1059', 'T1071', 'T1083', 'T1105', 'T1204', 'T1548', 'T1566']","['T1059', 'T1083', 'T1132', 'T1548']",['T1132'],"['T1005', 'T1027', 'T1071', 'T1105', 'T1204', 'T1566']",0.3333333333333333,0.75,0.46153846153846156,3,6,1,9,4
118
+ https://attack.mitre.org/software/S0515/,Enterprise,"WellMail is a lightweight malware written in Golang used by APT29, similar in design and structure t...","**T1560** - Archive Collected Data: WellMail can archive files on the compromised host, which is a method adversaries use to compress or package data prior to exfiltration.
119
+ **T1041** - Exfiltration Over C2 Channel: WellMail can exfiltrate files from the victim machine, indicating it sends collected data back to the command and control server through its established communication channel.
120
+ **T1027** - Obfuscated Files or Information: WellMail can decompress scripts received from C2, demonstrating ...","['T1016', 'T1027', 'T1033', 'T1041', 'T1092', 'T1105', 'T1560', 'T1573']","['T1005', 'T1016', 'T1033', 'T1095', 'T1105', 'T1140', 'T1560', 'T1571', 'T1573']","['T1005', 'T1095', 'T1140', 'T1571']","['T1027', 'T1041', 'T1092']",0.625,0.5555555555555556,0.5882352941176471,5,3,4,8,9
121
+ https://attack.mitre.org/software/S1123/,Enterprise,PITSTOP is a backdoor deployed on compromised Ivanti Connect Secure VPNs during the Cutting Edge cam...,"**T1059** - Command and Scripting Interpreter: Adversaries may abuse command and scripting interpreters to execute commands, scripts, or binaries. This technique was identified because PITSTOP enables ""command execution"" and can ""receive shell commands"" over a Unix domain socket.
122
+
123
+ **T1083** - File and Directory Discovery: Adversaries may search network shares, remote systems, or local hosts for files and directories. This technique was identified because PITSTOP enables ""file read operations"" on...","['T1027', 'T1059', 'T1083', 'T1559', 'T1573']","['T1059', 'T1140', 'T1205', 'T1559', 'T1573']","['T1140', 'T1205']","['T1027', 'T1083']",0.6,0.6,0.6,3,2,2,5,5
124
+ https://attack.mitre.org/software/S1111/,Enterprise,"DarkGate, first identified in 2018, has evolved into a versatile tool used in various criminal cyber...","**T1555** - Stored Credentials: This technique involves adversaries attempting to gather credentials stored in various locations on a system, identified here as DarkGate performs ""credential theft"" and identifies ""stored credentials linked to cryptocurrency wallets"".
125
+
126
+ **T1496** - Resource Hijacking: This technique involves adversaries using a system's resources for purposes not intended by the owner, identified here as DarkGate engages in ""cryptomining"".
127
+
128
+ **T1083** - File and Directory Discovery...","['T1005', 'T1036', 'T1071', 'T1083', 'T1098', 'T1496', 'T1548', 'T1555']","['T1010', 'T1071', 'T1098', 'T1119', 'T1134', 'T1548', 'T1583']","['T1010', 'T1119', 'T1134', 'T1583']","['T1005', 'T1036', 'T1083', 'T1496', 'T1555']",0.375,0.42857142857142855,0.39999999999999997,3,5,4,8,7
129
+ https://attack.mitre.org/software/S1106/,Enterprise,NGLite is a backdoor Trojan designed to execute commands received through its command and control (C...,"**T1059** - Command and Scripting Interpreter: This technique was identified because NGLite is designed to ""execute commands received"" and specifically ""executes the 'whoami' command.""
130
+ **T1071** - Application Layer Protocol: NGLite uses the NKN protocol and HTTP POST requests for its command and control communication, which are application layer protocols.
131
+ **T1573** - Encrypted Channel: NGLite utilizes an ""AES-encrypted channel for C2 communication"" to protect its traffic.
132
+ **T1082** - System Inf...","['T1041', 'T1059', 'T1071', 'T1082', 'T1573']","['T1016', 'T1033', 'T1071', 'T1090', 'T1573']","['T1016', 'T1033', 'T1090']","['T1041', 'T1059', 'T1082']",0.4,0.4,0.4000000000000001,2,3,3,5,5
133
+ https://attack.mitre.org/software/S1124/,Enterprise,SocGholish is a JavaScript-based loader malware that has been active since at least 2017. It has bee...,"**T1189** - Drive-by Compromise: This technique involves adversaries compromising a legitimate website and injecting code that forces a user's browser to download malware, identified as SocGholish gains initial access through drive-by downloads.
134
+
135
+ **T1036** - Masquerading: This technique involves adversaries attempting to mimic legitimate files or processes to evade detection, identified as SocGholish being disguised as software updates and named `AutoUpdater.js`.
136
+
137
+ **T1105** - Ingress Tool Transf...","['T1027', 'T1036', 'T1041', 'T1059', 'T1082', 'T1105', 'T1189', 'T1482', 'T1566']","['T1027', 'T1036', 'T1048', 'T1059', 'T1074', 'T1105', 'T1189', 'T1482', 'T1566']","['T1048', 'T1074']","['T1041', 'T1082']",0.7777777777777778,0.7777777777777778,0.7777777777777778,7,2,2,9,9
138
+ https://attack.mitre.org/software/S1128/,Mobile,HilalRAT is a remote access Android malware developed and used by UNC788. It has the capability to c...,"**T1401** - Location Tracking: Adversaries may collect the device's current or historical location data, identified as HilalRAT collects ""device location.""
139
+ **T1400** - Call Log Collection: Adversaries may collect call logs from the device, identified as HilalRAT collects ""call logs.""
140
+ **T1403** - Audio/Video Recording: Adversaries may record audio or video from the device's microphone and camera, identified as HilalRAT can ""activate a device's camera and microphone.""
141
+ **T1402** - Contact List Coll...","['T1400', 'T1401', 'T1402', 'T1403', 'T1404', 'T1406', 'T1407']","['T1409', 'T1429', 'T1430', 'T1512', 'T1636']","['T1409', 'T1429', 'T1430', 'T1512', 'T1636']","['T1400', 'T1401', 'T1402', 'T1403', 'T1404', 'T1406', 'T1407']",0.0,0.0,0.0,0,7,5,7,5
142
+ https://attack.mitre.org/software/S1102/,Enterprise,Pcexter is an uploader used by ToddyCat since at least 2023 to exfiltrate stolen files. Pcexter can...,"**T1567** - Exfiltration Over Web Service: This technique involves adversaries exfiltrating data using a web service, identified because Pcexter uploads and exfiltrates files to ""OneDrive storage accounts"" via HTTP POST.
143
+ **T1083** - File and Directory Discovery: This technique involves adversaries searching for files and directories to identify data of interest, identified because Pcexter is capable of ""searching for files within specified directories"".
144
+ **T1574** - Hijack Execution Flow: This te...","['T1083', 'T1567', 'T1574']","['T1005', 'T1083', 'T1567', 'T1574']",['T1005'],[],1.0,0.75,0.8571428571428571,3,0,1,3,4
145
+ https://attack.mitre.org/software/S1110/,Enterprise,"SLIGHTPULSE is a web shell that has been used by APT5 since at least 2020, including in attacks agai...","**Identified Techniques:**
146
+
147
+ **T1505** - Server Software Component: This technique involves adversaries abusing server software components to run malicious code, and SLIGHTPULSE is explicitly identified as a ""web shell"" used for this purpose.
148
+
149
+ **T1059** - Command and Scripting Interpreter: This technique describes adversaries executing commands through a command-line interface, which is directly supported by SLIGHTPULSE's capability to ""execute arbitrary commands.""
150
+
151
+ **T1083** - File and Directory...","['T1001', 'T1059', 'T1071', 'T1083', 'T1105', 'T1505']","['T1005', 'T1059', 'T1071', 'T1074', 'T1132', 'T1140', 'T1573']","['T1005', 'T1074', 'T1132', 'T1140', 'T1573']","['T1001', 'T1083', 'T1105', 'T1505']",0.3333333333333333,0.2857142857142857,0.30769230769230765,2,4,5,6,7
152
+ https://attack.mitre.org/software/S1112/,Enterprise,STEADYPULSE is a web shell that targets Pulse Secure VPN servers by modifying a legitimate Perl scri...,"**T1505** - Server Software Component: This technique involves adversaries abusing server software components, such as web servers, to run arbitrary commands or maintain persistent access, which is precisely what a web shell like STEADYPULSE does by enabling arbitrary command execution on compromised web servers.
153
+
154
+ **T1574** - Hijack Execution Flow: This technique describes adversaries manipulating how programs are executed to run their own code, which is achieved by STEADYPULSE modifying legitim...","['T1071', 'T1505', 'T1574']","['T1071', 'T1105', 'T1132', 'T1140', 'T1505']","['T1105', 'T1132', 'T1140']",['T1574'],0.6666666666666666,0.4,0.5,2,1,3,3,5
155
+ https://attack.mitre.org/software/S1120/,Enterprise,FRAMESTING is a Python-based web shell used during the Cutting Edge campaign to infiltrate Ivanti Co...,"**T1505** - Server Software Component: Adversaries may abuse server software components to run arbitrary commands, enable a persistent backdoor, or access sensitive information, which is identified as FRAMESTING is explicitly described as a ""Python-based web shell.""
156
+ **T1059** - Command and Scripting Interpreter: Adversaries may abuse command and scripting interpreters to execute commands, scripts, or binaries, which is identified as FRAMESTING is Python-based and enables the ""execution of arbitr...","['T1001', 'T1059', 'T1071', 'T1505']","['T1001', 'T1059', 'T1071', 'T1140', 'T1505', 'T1554']","['T1140', 'T1554']",[],1.0,0.6666666666666666,0.8,4,0,2,4,6
157
+ https://attack.mitre.org/software/S1105/,Enterprise,COATHANGER is a remote access tool (RAT) designed to target FortiGate networking appliances. It was ...,"**T1219** - Remote Access Software: COATHANGER is explicitly described as a remote access tool (RAT), which is software used to remotely control a system.
158
+ **T1071** - Application Layer Protocol: The malware uses HTTP GET requests and TLS/SSL tunnels for command and control (C2) communication, leveraging common application layer protocols.
159
+ **T1059** - Command and Scripting Interpreter: COATHANGER provides a BusyBox reverse shell for C2 operations, allowing the adversary to execute commands on the...","['T1027', 'T1059', 'T1071', 'T1083', 'T1190', 'T1219', 'T1543', 'T1548', 'T1564', 'T1574']","['T1059', 'T1071', 'T1083', 'T1140', 'T1190', 'T1222', 'T1543', 'T1564', 'T1573', 'T1574']","['T1140', 'T1222', 'T1573']","['T1027', 'T1219', 'T1548']",0.7,0.7,0.7,7,3,3,10,10
160
+ https://attack.mitre.org/software/S1116/,Enterprise,WARPWIRE is a JavaScript-based credential stealer that targets plaintext usernames and passwords for...,"**T1056** - Input Capture: This technique involves adversaries collecting data from input devices, and it was identified because WARPWIRE intercepts credentials submitted during the web logon process.
161
+ **T1036** - Masquerading: This technique involves adversaries attempting to hide their malicious code or activity by making it appear legitimate, and it was identified because WARPWIRE embeds itself into legitimate files.
162
+ **T1027** - Obfuscated Files or Information: This technique involves adversar...","['T1027', 'T1036', 'T1041', 'T1056', 'T1071']","['T1048', 'T1056', 'T1059', 'T1132', 'T1554']","['T1048', 'T1059', 'T1132', 'T1554']","['T1027', 'T1036', 'T1041', 'T1071']",0.2,0.2,0.20000000000000004,1,4,4,5,5
163
+ https://attack.mitre.org/software/S1125/,Enterprise,AcidRain is an ELF binary designed to target modems and routers using MIPS architecture. It is linke...,"**T1485** - Data Destruction: Adversaries may destroy data and files on a system or network to interrupt availability or compromise integrity, which AcidRain achieves by conducting a comprehensive wipe of the target filesystem and connected storage devices through overwriting data or using IOCTL commands.
164
+
165
+ **T1529** - System Shutdown/Reboot: Adversaries may shut down or reboot systems to interrupt availability or to potentially cover their tracks, as AcidRain reboots the compromised system after...","['T1485', 'T1529']","['T1083', 'T1485', 'T1529', 'T1561']","['T1083', 'T1561']",[],1.0,0.5,0.6666666666666666,2,0,2,2,4
166
+ https://attack.mitre.org/software/S1101/,Enterprise,LoFiSe has been used by ToddyCat since at least 2023 to identify and collect files of interest on ta...,"**T1005** - Data from Local System: This technique was identified because the malware is described as identifying and collecting ""files of interest"" and targeting specific file types and extensions from the compromised system.
167
+ **T1074** - Data Staged: This technique was identified because the malware saves collected files in specific directories like `C:\ProgramData\Microsoft\` and `C:\Windows\Temp\` for ""further evaluation and exfiltration.""
168
+ **T1119** - Automated Collection: This technique was ...","['T1005', 'T1074', 'T1119', 'T1560', 'T1574']","['T1005', 'T1074', 'T1083', 'T1119', 'T1560', 'T1574']",['T1083'],[],1.0,0.8333333333333334,0.9090909090909091,5,0,1,5,6
169
+ https://attack.mitre.org/software/S1119/,Enterprise,LIGHTWIRE is a Perl-based web shell used during the Cutting Edge campaign to maintain access and ena...,"**T1505** - Server Software Component: The content describes LIGHTWIRE as a Perl-based web shell that embeds itself into a legitimate server component (`compcheckresult.cgi`) to maintain access and establish persistence.
170
+ **T1059** - Command and Scripting Interpreter: LIGHTWIRE is a Perl-based web shell designed to enable and facilitate command execution on the compromised system.
171
+ **T1071** - Application Layer Protocol: LIGHTWIRE communicates with its command and control server over HTTP, an appl...","['T1027', 'T1059', 'T1071', 'T1505']","['T1071', 'T1140', 'T1505', 'T1554', 'T1573']","['T1140', 'T1554', 'T1573']","['T1027', 'T1059']",0.5,0.4,0.4444444444444445,2,2,3,4,5
172
+ https://attack.mitre.org/software/S1122/,Enterprise,"Mispadu is a banking trojan written in Delphi, first observed in 2019, that operates under a Malware...","**T1547** - Boot or Logon Autostart Execution: Mispadu establishes persistence by creating a link in the startup folder and adding an entry to the registry key `HKCU\Software\Microsoft\Windows\CurrentVersion\Run` to ensure automatic execution.
173
+ **T1176** - Browser Extensions: Mispadu utilizes malicious Google Chrome extensions to steal financial data and monitor browser activity.
174
+ **T1056** - Input Capture: Mispadu monitors browser activity, displays full-screen overlays to prompt for additional d...","['T1027', 'T1041', 'T1056', 'T1059', 'T1176', 'T1547', 'T1555', 'T1573']","['T1041', 'T1059', 'T1115', 'T1140', 'T1176', 'T1217', 'T1547', 'T1555', 'T1573']","['T1115', 'T1140', 'T1217']","['T1027', 'T1056']",0.75,0.6666666666666666,0.7058823529411765,6,2,3,8,9
175
+ https://attack.mitre.org/software/S1115/,Enterprise,WIREFIRE is a web shell written in Python that exists as trojanized logic to the visits.py component...,"**T1505** - Server Software Component: This technique involves adversaries using or modifying legitimate server software components, such as a web server, to host malicious code like a web shell for remote access.
176
+ **T1105** - Ingress Tool Transfer: This technique involves adversaries transferring tools or files from an external system into a compromised environment, as WIREFIRE has the ability to download files to compromised devices.
177
+ **T1059** - Command and Scripting Interpreter: This technique...","['T1022', 'T1059', 'T1071', 'T1105', 'T1505', 'T1573']","['T1071', 'T1105', 'T1132', 'T1140', 'T1505', 'T1554', 'T1573']","['T1132', 'T1140', 'T1554']","['T1022', 'T1059']",0.6666666666666666,0.5714285714285714,0.6153846153846153,4,2,3,6,7
178
+ https://attack.mitre.org/software/S1121/,Enterprise,LITTLELAMB.WOOLTEA is a backdoor that was used by UNC5325 during Cutting Edge to deploy malware on t...,"**T1547** - Boot or Logon Autostart Execution: This technique involves adversaries establishing persistence by executing malicious content at system startup, identified as LITTLELAMB.WOOLTEA appends malicious components to a system archive in the factory reset partition to persist post-reset.
179
+
180
+ **T1543** - Create or Modify System Process: This technique involves adversaries creating or modifying system processes to establish persistence, identified as LITTLELAMB.WOOLTEA initializes itself as a da...","['T1071', 'T1082', 'T1083', 'T1090', 'T1543', 'T1547']","['T1082', 'T1083', 'T1090', 'T1095', 'T1543', 'T1554', 'T1573']","['T1095', 'T1554', 'T1573']","['T1071', 'T1547']",0.6666666666666666,0.5714285714285714,0.6153846153846153,4,2,3,6,7
181
+ https://attack.mitre.org/software/S1103/,Mobile,"FlixOnline is an Android malware, first detected in early 2021, believed to target users of WhatsApp...","**T1446** - Message Hooking: This technique involves intercepting or generating messages within a messaging application, which is evident as FlixOnline spreads and sends automatic replies via WhatsApp messages.
182
+ **T1404** - Notification Manipulation: This technique involves modifying or creating notifications to deceive users or gather information, which FlixOnline achieves by requesting NotificationListenerService access.
183
+ **T1401** - Boot Persistence: This technique involves configuring malware ...","['T1401', 'T1404', 'T1405', 'T1407', 'T1409', 'T1446']","['T1409', 'T1417', 'T1517', 'T1624', 'T1628', 'T1643']","['T1417', 'T1517', 'T1624', 'T1628', 'T1643']","['T1401', 'T1404', 'T1405', 'T1407', 'T1446']",0.16666666666666666,0.16666666666666666,0.16666666666666666,1,5,5,6,6
184
+ https://attack.mitre.org/software/S1109/,Enterprise,PACEMAKER is a credential stealer that was used by APT5 as early as 2020 including activity against ...,"**T1003** - OS Credential Dumping: This technique involves adversaries attempting to collect credentials from the operating system and software, identified by PACEMAKER's ability to ""extract credentials from OS memory"" and its description as a ""credential stealer.""
185
+
186
+ **T1057** - Process Discovery: This technique involves adversaries attempting to get information about running processes on a system, identified by PACEMAKER's actions to ""read /proc/ entries"" and ""parse /proc/""process_name""/cmdline""...","['T1003', 'T1055', 'T1057', 'T1059', 'T1074']","['T1003', 'T1055', 'T1059', 'T1074', 'T1083', 'T1119']","['T1083', 'T1119']",['T1057'],0.8,0.6666666666666666,0.7272727272727272,4,1,2,5,6
187
+ https://attack.mitre.org/software/S1114/,Enterprise,ZIPLINE is a passive backdoor that was used during Cutting Edge on compromised Secure Connect VPNs f...,"**T1090** - Proxy: ZIPLINE establishes a communication channel through a compromised host by creating a proxy server and using reverse shell functionality, which are both methods of proxying network traffic.
188
+ **T1059** - Command and Scripting Interpreter: ZIPLINE leverages `/bin/sh` to create reverse shells and execute commands, directly utilizing a command-line interpreter for execution.
189
+ **T1573** - Encrypted Channel: ZIPLINE encrypts data using AES-128-CBC for both upload and download, indicati...","['T1040', 'T1057', 'T1059', 'T1083', 'T1090', 'T1094', 'T1105', 'T1562', 'T1573']","['T1057', 'T1059', 'T1083', 'T1090', 'T1095', 'T1105', 'T1205', 'T1562', 'T1573']","['T1095', 'T1205']","['T1040', 'T1094']",0.7777777777777778,0.7777777777777778,0.7777777777777778,7,2,2,9,9
190
+ https://attack.mitre.org/software/S1100/,Enterprise,Ninja is a malware developed in C++ that has been used by ToddyCat to penetrate networks and control...,"**T1071** - Application Layer Protocol: This technique involves using common application layer protocols for command and control, identified as Ninja uses HTTP for C2 communications.
191
+ **T1543** - Create or Modify System Process: This technique involves adversaries creating or modifying system processes to achieve execution or persistence, identified as Ninja creates services for persistence.
192
+ **T1027** - Obfuscated Files or Information: This technique involves adversaries making their files or inf...","['T1027', 'T1036', 'T1070', 'T1071', 'T1083', 'T1112', 'T1543', 'T1573', 'T1574']","['T1001', 'T1070', 'T1071', 'T1083', 'T1132', 'T1140', 'T1480', 'T1543', 'T1573', 'T1574']","['T1001', 'T1132', 'T1140', 'T1480']","['T1027', 'T1036', 'T1112']",0.6666666666666666,0.6,0.631578947368421,6,3,4,9,10
193
+ https://attack.mitre.org/software/S1099/,Enterprise,Samurai is a passive backdoor that has been used by ToddyCat since at least 2020. Samurai allows arb...,"**T1219** - Remote Access Tool: Samurai functions as a backdoor for remote administration, allowing external control over compromised systems.
194
+ **T1059** - Command and Scripting Interpreter: Samurai executes arbitrary C# code and commands via the Windows command line, leveraging scripting interpreters for execution.
195
+ **T1071** - Application Layer Protocol: Samurai uses HTTP POST requests for command and control communications, utilizing a standard application layer protocol.
196
+ **T1543** - Create or ...","['T1041', 'T1059', 'T1071', 'T1074', 'T1083', 'T1105', 'T1106', 'T1132', 'T1219', 'T1543', 'T1573', 'T1574']","['T1005', 'T1036', 'T1059', 'T1071', 'T1083', 'T1105', 'T1106', 'T1112', 'T1132', 'T1543', 'T1573']","['T1005', 'T1036', 'T1112']","['T1041', 'T1074', 'T1219', 'T1574']",0.6666666666666666,0.7272727272727273,0.6956521739130435,8,4,3,12,11
197
+ https://attack.mitre.org/software/S1118/,Enterprise,BUSHWALK is a web shell written in Perl that was inserted into the legitimate querymanifest.cgi file...,"**T1505** - Server Software Component: BUSHWALK is described as a web shell inserted into a legitimate server file, which is a type of malicious server software component used by adversaries to maintain access and execute commands.
198
+ **T1036** - Masquerading: BUSHWALK is inserted into a legitimate file (`querymanifest.cgi`) and modifies a legitimate Perl module (`DSUserAgentCap.pm`), which are actions taken by adversaries to appear as legitimate system or application activity.
199
+ **T1140** - Deobfusc...","['T1036', 'T1059', 'T1140', 'T1505', 'T1564', 'T1573']","['T1027', 'T1105', 'T1140', 'T1205', 'T1505', 'T1554']","['T1027', 'T1105', 'T1205', 'T1554']","['T1036', 'T1059', 'T1564', 'T1573']",0.3333333333333333,0.3333333333333333,0.3333333333333333,2,4,4,6,6
200
+ https://attack.mitre.org/software/S1129/,Enterprise,"Akira ransomware, written in C++, is most prominently (but not exclusively) associated with the a ra...","**T1490** - Inhibit System Recovery: Akira deletes system volume shadow copies via PowerShell commands to prevent victims from restoring their systems, a common ransomware tactic.
201
+ **T1059** - Command and Scripting Interpreter: Akira executes PowerShell commands and runs from the Windows command line, leveraging native operating system interpreters to perform its actions.
202
+ **T1486** - Data Encrypted for Impact: Akira encrypts victim filesystems, rendering data inaccessible and demanding a ransom f...","['T1047', 'T1057', 'T1059', 'T1082', 'T1083', 'T1135', 'T1486', 'T1490']","['T1047', 'T1057', 'T1059', 'T1082', 'T1083', 'T1106', 'T1135', 'T1486', 'T1490']",['T1106'],[],1.0,0.8888888888888888,0.9411764705882353,8,0,1,8,9
203
+ https://attack.mitre.org/software/S1107/,Enterprise,"NKAbuse is a Go-based, multi-platform malware abusing NKN (New Kind of Network) technology for data ...","**T1498** - Network Denial of Service: Adversaries may flood a network with traffic to prevent legitimate users from accessing services, as NKAbuse is equipped with ""flooder"" and ""network denial of service capabilities across several protocols.""
204
+
205
+ **T1059** - Command and Scripting Interpreter: Adversaries may abuse command and scripting interpreters to execute commands, scripts, or binaries, which is evident as NKAbuse is ""initially installed and executed through an initial shell script.""
206
+
207
+ **T108...","['T1016', 'T1053', 'T1059', 'T1082', 'T1102', 'T1113', 'T1498']","['T1016', 'T1053', 'T1057', 'T1059', 'T1082', 'T1090', 'T1113', 'T1498']","['T1057', 'T1090']",['T1102'],0.8571428571428571,0.75,0.7999999999999999,6,1,2,7,8
208
+ https://attack.mitre.org/software/S1104/,Enterprise,SLOWPULSE is a malware that was used by APT5 as early as 2020 including against U.S. Defense Industr...,"**Identified Techniques:**
209
+
210
+ **T1574** - Hijack Execution Flow: SLOWPULSE modifies legitimate Pulse Secure VPN files, including `libdsplibs.so`, to insert malicious logic and alter the normal execution flow for logging credentials and bypassing authentication.
211
+ **T1564** - Hide Artifacts: SLOWPULSE hides its malicious code within the padding regions between legitimate functions in the Pulse Secure `libdsplibs.so` file to evade detection.
212
+ **T1056** - Input Capture: SLOWPULSE logs and inspects login...","['T1056', 'T1556', 'T1564', 'T1574']","['T1027', 'T1074', 'T1111', 'T1554', 'T1556']","['T1027', 'T1074', 'T1111', 'T1554']","['T1056', 'T1564', 'T1574']",0.25,0.2,0.22222222222222224,1,3,4,4,5
213
+ https://attack.mitre.org/software/S1113/,Enterprise,RAPIDPULSE is a web shell that exists as a modification to a legitimate Pulse Secure file that has b...,"**T1505** - Server Software Component: RAPIDPULSE is explicitly described as a ""web shell,"" which is a type of malicious server software component used by adversaries to maintain access and execute commands on a compromised server.
214
+ **T1071** - Application Layer Protocol: RAPIDPULSE uses HTTP query parameters for command and control, listening for specific parameters and responding via HTTP, which are characteristics of using application layer protocols for C2.
215
+ **T1005** - Data from Local System:...","['T1005', 'T1027', 'T1071', 'T1505', 'T1567']","['T1005', 'T1027', 'T1140', 'T1505']",['T1140'],"['T1071', 'T1567']",0.6,0.75,0.6666666666666665,3,2,1,5,4
216
+ https://attack.mitre.org/software/S1108/,Enterprise,PULSECHECK is a web shell written in Perl that was used by APT5 as early as 2020 including against P...,"**T1505** - Server Software Component: Adversaries may abuse server software components to run malicious code. PULSECHECK is explicitly identified as a web shell, which is a common example of this technique used for remote access.
217
+
218
+ **T1059** - Command and Scripting Interpreter: Adversaries may abuse command and scripting interpreters to execute commands, scripts, or binaries. PULSECHECK is described as using Unix shell scripts for command execution on compromised servers.
219
+
220
+ **T1071** - Applicatio...","['T1001', 'T1041', 'T1059', 'T1071', 'T1505']","['T1059', 'T1071', 'T1132', 'T1505']",['T1132'],"['T1001', 'T1041']",0.6,0.75,0.6666666666666665,3,2,1,5,4
221
+ https://attack.mitre.org/software/S1126/,Mobile,Phenakite is a mobile malware that is used by APT-C-23 to target iOS devices. According to several r...,"**Identified Techniques:**
222
+
223
+ **T1123** - Audio Capture: This technique involves capturing audio from the device's microphone, which Phenakite does by recording phone calls.
224
+
225
+ **T1119** - Data from Local System: This technique involves collecting specific files from the device's file system, as Phenakite collects WhatsApp media, photos, and documents.
226
+
227
+ **T1113** - Exfiltration: This technique involves transferring collected data from the compromised device to an external location, which Phenakite d...","['T1105', 'T1111', 'T1113', 'T1119', 'T1122', 'T1123', 'T1124', 'T1125', 'T1126', 'T1405', 'T1406']","['T1404', 'T1417', 'T1426', 'T1429', 'T1512', 'T1533', 'T1544', 'T1636', 'T1655']","['T1404', 'T1417', 'T1426', 'T1429', 'T1512', 'T1533', 'T1544', 'T1636', 'T1655']","['T1105', 'T1111', 'T1113', 'T1119', 'T1122', 'T1123', 'T1124', 'T1125', 'T1126', 'T1405', 'T1406']",0.0,0.0,0.0,0,11,9,11,9
228
+ https://attack.mitre.org/software/S1117/,Enterprise,GLASSTOKEN is a custom web shell used by threat actors during Cutting Edge to execute commands on co...,"**T1505** - Server Software Component: Adversaries may abuse server software components to run malicious code, which is identified by GLASSTOKEN being described as a custom web shell.
229
+ **T1059** - Command and Scripting Interpreter: Adversaries may abuse command and scripting interpreters to execute commands, scripts, or binaries, as GLASSTOKEN is used to execute commands and can specifically use PowerShell.
230
+ **T1001** - Data Obfuscation: Adversaries may obfuscate data to make it more difficult to ...","['T1001', 'T1059', 'T1505', 'T1572']","['T1059', 'T1132', 'T1140', 'T1505']","['T1132', 'T1140']","['T1001', 'T1572']",0.5,0.5,0.5,2,2,2,4,4
src/agents/cti_agent/tool_evaluation_results/extract_mitre_techniques_summary.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "macro_precision": 0.5440542328042329,
3
+ "macro_recall": 0.5296301359459255,
4
+ "macro_f1": 0.5290994928728809,
5
+ "micro_precision": 0.5181347150259067,
6
+ "micro_recall": 0.5037783375314862,
7
+ "micro_f1": 0.5108556832694764,
8
+ "total_samples": 60,
9
+ "total_tp": 200,
10
+ "total_fp": 186,
11
+ "total_fn": 197
12
+ }
src/agents/cti_agent/tool_evaluation_results/identify_threat_actors_results.csv ADDED
@@ -0,0 +1,173 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ sample_id,report_snippet,tool_output,predicted_actor,is_correct,validator_notes,needs_review
2
+ https://www.seqrite.com/blog/sidecopys-multi-platform-onslaught-leveraging-winrar-zero-day-and-linux-variant-of-ares-rat/,"SEQRITE Labs APT-Team has discovered multiple campaigns of APT [PLACEHOLDER], targeting Indian gover...","**Q: What threat actor/APT group is discussed?**
3
+ A: APT [PLACEHOLDER], identified as a sub-division of Transparent Tribe (APT36).
4
+
5
+ **Q: What is this actor known for?**
6
+ A: Exploiting vulnerabilities (e.g., WinRAR CVE-2023-38831), deploying various RATs (AllaKore, DRat, Ares, Action, Double Action, Reverse, Margulas, Crimson, Capra, Oblique, BackNet), conducting multi-platform attacks, sharing infrastructure and code with Transparent Tribe, using honey traps, and employing PowerShell remote execut...","APT [PLACEHOLDER], identified as a sub-division of Transparent Tribe",0.5,,False
7
+ https://csirt-cti.net/2024/01/23/stately-taurus-targets-myanmar/,The recent ethnic rebel attacks in Myanmar have put the Myanmar junta and surrounding countries on h...,"**Q: What threat actor/APT group is discussed?**
8
+ A: [PLACEHOLDER] (also referred to as Stately Taurus)
9
+
10
+ **Q: What is this actor known for?**
11
+ A: Performing cyberespionage activities since at least 2012, leveraging DLL sideloading with legitimate software, disguising C2 traffic as Microsoft update traffic, and deploying PlugX implants.
12
+
13
+ **Q: What campaigns/operations are mentioned?**
14
+ A:
15
+ * Analysis of the third meeting of NDSC.zip (November 9th, 2023)
16
+ * ASEAN Notes.iso (January 17th, 2024)
17
+
18
+ **Q...",[PLACEHOLDER],True,,False
19
+ https://unit42.paloaltonetworks.com/stately-taurus-attacks-se-asian-government/,An advanced persistent threat (APT) group suspected with moderate-high confidence to be [PLACEHOLDER...,"**Q: What threat actor/APT group is discussed?**
20
+ A: None identified (referred to as an APT group suspected with moderate-high confidence to be [PLACEHOLDER]).
21
+
22
+ **Q: What is this actor known for?**
23
+ A: Engaging in cyberespionage intrusions, gathering, and exfiltrating sensitive documents and files from compromised networks.
24
+
25
+ **Q: What campaigns/operations are mentioned?**
26
+ A: CL-STA-0044 (from Q2 2021 to Q3 2023).
27
+
28
+ **Q: What is their suspected origin/attribution?**
29
+ A: Chinese state-sponsored (moder...",,False,,False
30
+ https://medium.com/@zyadlzyatsoc/comprehensive-analysis-of-emotet-malware-part-1-by-zyad-elzyat-35d5cf33a3c0,"[PLACEHOLDER], a notorious name in the realm of cyber threats, has loomed large over the digital lan...",No threat actor attribution in content....,,False,,False
31
+ https://www.fortinet.com/blog/threat-research/bandook-persistent-threat-that-keeps-evolving,Bandook malware is a remote access trojan that has been continuously developed since it was first de...,No threat actor attribution in content....,,False,,False
32
+ https://research.checkpoint.com/2024/sharp-dragon-expands-towards-africa-and-the-caribbean/,"Since 2021, Check Point Research has been closely monitoring the activities of [PLACEHOLDER], a Chin...","**Q: What threat actor/APT group is discussed?**
33
+ A: [PLACEHOLDER] (Name redacted in content)
34
+
35
+ **Q: What is this actor known for?**
36
+ A: Historically known for highly-targeted phishing emails deploying VictoryDLL or Soul framework, primarily in South-East Asia. Recently shifted to targeting governmental organizations in Africa and the Caribbean, using compromised email accounts, RoyalRoad-weaponized documents, and Cobalt Strike Beacon. They show increasing operational security awareness and use com...",[PLACEHOLDER],0.5,,False
37
+ https://www.microsoft.com/en-us/security/blog/2024/05/15/threat-actors-misusing-quick-assist-in-social-engineering-attacks-leading-to-ransomware/,"Since mid-April 2024, Microsoft Threat Intelligence has observed the threat actor [PLACEHOLDER] misu...","**Q: What threat actor/APT group is discussed?**
38
+ A: None identified by a specific name. The primary actor is referred to as a financially motivated cybercriminal group. Storm-0303 is mentioned as a ransomware-linked emerging threat activity group and Qakbot distributor.
39
+
40
+ **Q: What is this actor known for?**
41
+ A: A financially motivated cybercriminal group known for deploying ransomware (specifically [PLACEHOLDER] ransomware). They gain initial access through social engineering, including vishing a...",None identified by a specific name. The primary actor is referred to as a financially motivated cybercriminal group. Storm-0303 is mentioned as a ransomware-linked emerging threat activity group and Qakbot distributor.,False,,False
42
+ https://blogs.blackberry.com/en/2023/02/blind-eagle-apt-c-36-targets-colombia,[PLACEHOLDER] has been actively targeting organizations in Colombia and Ecuador since at least 2019....,"**Q: What threat actor/APT group is discussed?**
43
+ A: None identified (referred to as `[PLACEHOLDER]`)
44
+
45
+ **Q: What is this actor known for?**
46
+ A: A South American cyber espionage group active since at least 2019, known for targeting Latin America-based entities, primarily in Colombia. They rely on spear-phishing emails impersonating government agencies (like the Colombian tax agency DIAN) and frequently use RATs such as AsyncRAT, njRAT, QuasarRAT, LimeRAT, and RemcosRAT, often leveraging Dynamic DNS...",,False,,False
47
+ https://research.checkpoint.com/2023/blindeagle-targeting-ecuador-with-sharpened-tools/,"ACTIVE CAMPAIGNS AGAINST COLOMBIAN TARGETS For the last few months, we have been observing the ongo...","**Q: What threat actor/APT group is discussed?**
48
+ A: The text refers to an APT group using the placeholder ""[PLACEHOLDER]"". No specific name or alias is provided.
49
+
50
+ **Q: What is this actor known for?**
51
+ A: This actor is known for orchestrating campaigns with a narrow geographical focus, primarily targeting a single country. They refine their tools, add features to leaked code bases (like QuasarRAT), and experiment with elaborate infection chains and ""Living off the Land"" techniques.
52
+
53
+ **Q: What camp...","The text refers to an APT group using the placeholder ""[PLACEHOLDER]"". No specific name or alias is provided.",True,,False
54
+ https://cloud.google.com/blog/topics/threat-intelligence/turla-galaxy-opportunity/,"USB Spreading As Mandiant recently wrote about in our blog post, Always Another Secret: Lifting the...","**Q: What threat actor/APT group is discussed?**
55
+ A: UNC4210. This group is suspected to be a cluster or team associated with a larger, unnamed (redacted) threat group.
56
+
57
+ **Q: What is this actor known for?**
58
+ A: Gaining initial access via USB spreading malware, extensive victim profiling, using older malware strains, re-registering expired C2 domains, and data collection/exfiltration. Their operational tactics are consistent with planning for advantageous positioning and leveraging USBs.
59
+
60
+ **Q: What...","UNC4210. This group is suspected to be a cluster or team associated with a larger, unnamed",True,,False
61
+ https://www.telsy.com/en/turla-venomous-bear-updates-its-arsenal-newpass-appears-on-the-apt-threat-scene/,Recently Telsy observed some artifacts related to an attack that occurred in June 2020 that is most ...,"**Q: What threat actor/APT group is discussed?**
62
+ A: None identified by name (placeholder used for a popular Russian APT).
63
+
64
+ **Q: What is this actor known for?**
65
+ A: Being a popular Russian Advanced Persistent Threat (APT) group.
66
+
67
+ **Q: What campaigns/operations are mentioned?**
68
+ A: An attack observed in June 2020. No specific campaign name.
69
+
70
+ **Q: What is their suspected origin/attribution?**
71
+ A: Russian state-sponsored (High confidence).
72
+
73
+ **Q: Who/what do they target?**
74
+ A: At least one European Union...",None identified by name,False,,False
75
+ https://cert.gov.ua/article/6276894,"During December 15-25, 2023, several cases of distribution of e-mails with links to ""documents"" were...",No threat actor attribution in content....,,False,,False
76
+ https://cert.gov.ua/article/4492467,"During April 2023, the government computer emergency response team of Ukraine CERT-UA recorded cases...",No threat actor attribution in content....,,False,,False
77
+ https://www.microsoft.com/en-us/security/blog/2024/01/25/midnight-blizzard-guidance-for-responders-on-nation-state-attack/,"The Microsoft security team detected a nation-state attack on our corporate systems on January 12, 2...","**Q: What threat actor/APT group is discussed?**
78
+ A: The content uses placeholders for the specific name and aliases, but identifies the actor as the Foreign Intelligence Service of the Russian Federation (SVR).
79
+
80
+ **Q: What is this actor known for?**
81
+ A: Primarily known for intelligence collection and espionage of foreign interests, leveraging techniques like account compromise, advanced authentication mechanism compromise, diverse initial access methods (stolen credentials, supply chain attacks, o...","The content uses placeholders for the specific name and aliases, but identifies the actor as the Foreign Intelligence Service of the Russian Federation",True,,False
82
+ https://www.rapid7.com/blog/post/2024/05/10/ongoing-social-engineering-campaign-linked-to-black-basta-ransomware-operators/,Rapid7 has identified an ongoing social engineering campaign that has been targeting multiple manage...,"**Q: What threat actor/APT group is discussed?**
83
+ A: The text refers to an unnamed ""threat actor"" conducting a social engineering campaign. Observed indicators of compromise (IoCs) are consistent with the ""[PLACEHOLDER]"" ransomware operators.
84
+
85
+ **Q: What is this actor known for?**
86
+ A: This actor is known for a social engineering campaign involving overwhelming users with spam, impersonating IT support, and tricking users into providing remote access to download payloads for credential harvesting an...","The text refers to an unnamed ""threat actor"" conducting a social engineering campaign. Observed indicators of compromise",0.5,,False
87
+ https://www.welivesecurity.com/en/eset-research/oilrigs-outer-space-juicy-mix-same-ol-rig-new-drill-pipes/,[PLACEHOLDER] is a cyberespionage group that has been active since at least 2014 and is commonly bel...,"**Q: What threat actor/APT group is discussed?**
88
+ A: [PLACEHOLDER]
89
+
90
+ **Q: What is this actor known for?**
91
+ A: A cyberespionage group active since at least 2014, known for targeting Middle Eastern governments and various business sectors, and for using custom backdoors and data dumpers in their campaigns.
92
+
93
+ **Q: What campaigns/operations are mentioned?**
94
+ A: DNSpionage (2018-2019), HardPass (2019-2020), Outer Space (2021), Out to Sea (no specific timeframe, linked to Outer Space), Juicy Mix (2022).
95
+
96
+ *...",[PLACEHOLDER],0.5,,False
97
+ https://www.trendmicro.com/en_us/research/23/b/new-apt34-malware-targets-the-middle-east.html,"On December 2022, we identified a suspicious executable (detected by Trend Micro as Trojan.MSIL.REDC...","**Q: What threat actor/APT group is discussed?**
98
+ A: None identified (referred to as APT group [PLACEHOLDER])
99
+
100
+ **Q: What is this actor known for?**
101
+ A: Cyberespionage, stealing credentials, and adapting tools and techniques, including custom malware and shifting exfiltration methods (e.g., from DNS-based C2 to SMTP mail traffic).
102
+
103
+ **Q: What campaigns/operations are mentioned?**
104
+ A: Current campaign (December 2022) using Trojan.MSIL.REDCAP.AD and Backdoor.MSIL.REDCAP.A. Previous attacks involved Kar...",,False,,False
105
+ https://www.trendmicro.com/en_us/research/24/d/earth-freybug.html,"In the past month, we investigated a cyberespionage attack that we have attributed to [PLACEHOLDER]....","**Q: What threat actor/APT group is discussed?**
106
+ A: [PLACEHOLDER]
107
+
108
+ **Q: What is this actor known for?**
109
+ A: Active since at least 2012, they focus on espionage and financially motivated activities. They use a diverse range of tools and techniques, including LOLBins and custom malware.
110
+
111
+ **Q: What campaigns/operations are mentioned?**
112
+ A: None specifically named, but the current incident is referred to as ""this campaign.""
113
+
114
+ **Q: What is their suspected origin/attribution?**
115
+ A: Unknown
116
+
117
+ **Q: Who/what ...",[PLACEHOLDER],0.5,,False
118
+ https://blog.talosintelligence.com/lazarus_new_rats_dlang_and_telegram/,"Operation Blacksmith involved the exploitation of CVE-2021-44228, also known as Log4Shell, and the u...","**Q: What threat actor/APT group is discussed?**
119
+ A: Andariel (Onyx Sleet, PLUTONIUM). It is described as a sub-group of a broader North Korean APT umbrella group.
120
+
121
+ **Q: What is this actor known for?**
122
+ A: Initial access, reconnaissance, establishing long-term access for espionage in support of North Korean government interests, and conducting ransomware attacks against healthcare organizations. They develop and deploy bespoke malware and engage in extensive hands-on-keyboard activity.
123
+
124
+ **Q: What ...",Andariel,True,,False
125
+ https://symantec-enterprise-blogs.security.com/threat-intelligence/springtail-kimsuky-backdoor-espionage,Symantec’s Threat Hunter Team has uncovered a new Linux backdoor developed by the North Korean [PLAC...,"**Q: What threat actor/APT group is discussed?**
126
+ A: None explicitly named. Referred to as ""the North Korean [PLACEHOLDER]"" and ""a tight-knit espionage group.""
127
+
128
+ **Q: What is this actor known for?**
129
+ A: Espionage, specializing in attacks on South Korean public sector organizations. Known for spear-phishing, exploiting DMARC policies, and a heavy focus on software supply chain attacks, Trojanized software installers, and fake software installers.
130
+
131
+ **Q: What campaigns/operations are mentioned?**
132
+ A:
133
+ *...","None explicitly named. Referred to as ""the North Korean [PLACEHOLDER]"" and ""a tight-knit espionage group.""",0.5,,False
134
+ https://www.securonix.com/blog/securonix-threat-research-security-advisory-new-deepgosu-attack-campaign/,The Securonix Threat Research (STR) team has been monitoring a new campaign tracked as DEEP#GOSU lik...,"**Q: What threat actor/APT group is discussed?**
135
+ A: [PLACEHOLDER] group
136
+
137
+ **Q: What is this actor known for?**
138
+ A: Targeting South Korean victims, using recycled code and TTPs, employing script-based attack chains (PowerShell, VBScript), leveraging legitimate services (Dropbox, Google Docs) for C2, and using RATs (including TutRAT and xRAT in the past), clipboard monitoring, and keylogging. They have also used disguised HWP files previously.
139
+
140
+ **Q: What campaigns/operations are mentioned?**
141
+ A: DEEP...",[PLACEHOLDER] group,0.5,,False
142
+ https://www.seqrite.com/blog/pakistani-apts-escalate-attacks-on-indian-gov-seqrite-labs-unveils-threats-and-connections/,"In the recent past, cyberattacks on Indian government entities by Pakistan-linked APTs have gained s...","**Q: What threat actor/APT group is discussed?**
143
+ A: SideCopy, [PLACEHOLDER] (parent APT group of SideCopy).
144
+
145
+ **Q: What is this actor known for?**
146
+ * **SideCopy:** Pakistan-linked APT group active since at least 2019. Known for deploying AllaKore RAT, Ares RAT, Action RAT, Reverse RAT, Margulas RAT. Uses spear-phishing with LNK files and HTA for initial access.
147
+ * **[PLACEHOLDER]:** Parent threat group of SideCopy, active since 2013. Known for continuously using Crimson RAT (often encoded/packe...","SideCopy, [PLACEHOLDER]",True,,False
148
+ https://www.microsoft.com/en-us/security/blog/2024/01/17/new-ttps-observed-in-mint-sandstorm-campaign-targeting-high-profile-individuals-at-universities-and-research-orgs/,"Since November 2023, Microsoft has observed a distinct subset of [PLACEHOLDER] targeting high-profil...","**Q: What threat actor/APT group is discussed?**
149
+ A: APT35 (Charming Kitten)
150
+
151
+ **Q: What is this actor known for?**
152
+ A: Gaining access to and stealing sensitive information from high-value targets, and conducting resource-intensive social engineering campaigns.
153
+
154
+ **Q: What campaigns/operations are mentioned?**
155
+ A: A campaign observed since November 2023, targeting individuals working on Middle Eastern affairs.
156
+
157
+ **Q: What is their suspected origin/attribution?**
158
+ A: Islamic Revolutionary Guard Corps (I...",APT35,True,,False
159
+ https://www.deepinstinct.com/blog/darkbeatc2-the-latest-muddywater-attack-framework,"Despite the large number of Iranian cyber attacks against Israeli organizations, which has significa...","**Q: What threat actor/APT group is discussed?**
160
+ A: [PLACEHOLDER] (DarkBit, DEV-1084, TA450). The content also discusses KarMa (DEV-0842, BanishedKitten), ScarredManticore (DEV-0861, ShroudedSnooper), MERCURY, Lord Nemesis (Nemesis Kitten, DEV-0270, Cobalt Mirage, TunnelVision), and Mint Sandstorm (PHOSPHORUS, APT35, Charming Kitten).
161
+
162
+ **Q: What is this actor known for?**
163
+ A: [PLACEHOLDER] is known for deploying ransomware and wiper malware, spear-phishing with remote administration tools (RATs),...",[PLACEHOLDER],True,,False
164
+ https://symantec-enterprise-blogs.security.com/threat-intelligence/iran-apt-seedworm-africa-telecoms,"[PLACEHOLDER] has been active since at least 2017, and has targeted organizations in many countries,...","**Q: What threat actor/APT group is discussed?**
165
+ A: Not explicitly named in content, referred to as [PLACEHOLDER].
166
+
167
+ **Q: What is this actor known for?**
168
+ A: A cyberespionage group active since at least 2017, known for targeting organizations in many countries, particularly in the Middle East. They leverage tools like MuddyC2Go, SimpleHelp, and Venom Proxy.
169
+
170
+ **Q: What campaigns/operations are mentioned?**
171
+ A:
172
+ * Activity in November 2023, targeting telecommunications and media organizations.
173
+ * A...","Not explicitly named in content, referred to as [PLACEHOLDER].",True,,False
src/agents/cti_agent/tool_evaluation_results/identify_threat_actors_summary.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "accuracy": 0.5,
3
+ "total_samples": 25,
4
+ "validated_samples": 25,
5
+ "needs_review": 0,
6
+ "correct": 9,
7
+ "incorrect": 9,
8
+ "partial": 7
9
+ }
src/agents/database_agent/__pycache__/agent.cpython-311.pyc ADDED
Binary file (19.6 kB). View file
 
src/agents/database_agent/__pycache__/prompts.cpython-311.pyc ADDED
Binary file (2.13 kB). View file
 
src/agents/database_agent/agent.py ADDED
@@ -0,0 +1,442 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Database Agent - A specialized ReAct agent for MITRE ATT&CK technique retrieval
3
+
4
+ This agent provides semantic search capabilities over the MITRE ATT&CK knowledge base
5
+ with support for filtered searches by tactics, platforms, and other metadata.
6
+ """
7
+
8
+ import os
9
+ import json
10
+ import sys
11
+ import time
12
+ from typing import List, Dict, Any, Optional, Literal
13
+ from pathlib import Path
14
+
15
+ # LangGraph and LangChain imports
16
+ from langchain_core.tools import tool
17
+ from langchain_core.messages import HumanMessage, AIMessage
18
+ from langchain.chat_models import init_chat_model
19
+ from langchain_core.language_models.chat_models import BaseChatModel
20
+ from langchain_text_splitters import TokenTextSplitter
21
+ from langgraph.prebuilt import create_react_agent
22
+
23
+ # LangSmith imports
24
+ from langsmith import traceable, Client, get_current_run_tree
25
+
26
+ # Import prompts from the separate file
27
+ from src.agents.database_agent.prompts import DATABASE_AGENT_SYSTEM_PROMPT
28
+
29
+ # Import the cyber knowledge base
30
+ try:
31
+ from src.knowledge_base.cyber_knowledge_base import CyberKnowledgeBase
32
+ except Exception as e:
33
+ print(
34
+ f"[WARNING] Could not import CyberKnowledgeBase. Please adjust import paths. {e}"
35
+ )
36
+ sys.exit(1)
37
+
38
+ ls_client = Client(api_key=os.getenv("LANGSMITH_API_KEY"))
39
+
40
+
41
+ def truncate_to_tokens(text: str, max_tokens: int) -> str:
42
+ """
43
+ Truncate text to a maximum number of tokens using LangChain's TokenTextSplitter.
44
+
45
+ Args:
46
+ text: The text to truncate
47
+ max_tokens: Maximum number of tokens
48
+
49
+ Returns:
50
+ Truncated text within the token limit
51
+ """
52
+ if not text:
53
+ return ""
54
+
55
+ # Clean the text by replacing newlines with spaces
56
+ cleaned_text = text.replace("\n", " ")
57
+
58
+ # Use TokenTextSplitter to split by tokens
59
+ splitter = TokenTextSplitter(
60
+ encoding_name="cl100k_base", chunk_size=max_tokens, chunk_overlap=0
61
+ )
62
+
63
+ chunks = splitter.split_text(cleaned_text)
64
+ return chunks[0] if chunks else ""
65
+
66
+
67
+ class DatabaseAgent:
68
+ """
69
+ A specialized ReAct agent for MITRE ATT&CK technique retrieval and search.
70
+
71
+ This agent provides intelligent search capabilities over the MITRE ATT&CK knowledge base,
72
+ including semantic search, filtered search, and multi-query search with RRF fusion.
73
+ """
74
+
75
+ def __init__(
76
+ self,
77
+ kb_path: str = "./cyber_knowledge_base",
78
+ llm_client: BaseChatModel = None,
79
+ ):
80
+ """
81
+ Initialize the Database Agent.
82
+
83
+ Args:
84
+ kb_path: Path to the cyber knowledge base directory
85
+ llm_client: LLM model to use for the agent
86
+ """
87
+ self.kb_path = kb_path
88
+ self.kb = self._init_knowledge_base()
89
+
90
+ if llm_client:
91
+ self.llm = llm_client
92
+ else:
93
+ self.llm = init_chat_model(
94
+ "google_genai:gemini-2.0-flash",
95
+ temperature=0.1,
96
+ )
97
+ print(
98
+ f"[INFO] Database Agent: Using default LLM model: google_genai:gemini-2.0-flash"
99
+ )
100
+ # Create tools
101
+ self.tools = self._create_tools()
102
+
103
+ # Create ReAct agent
104
+ self.agent = self._create_react_agent()
105
+
106
+ @traceable(name="database_agent_init_kb")
107
+ def _init_knowledge_base(self) -> CyberKnowledgeBase:
108
+ """Initialize and load the cyber knowledge base."""
109
+ kb = CyberKnowledgeBase()
110
+
111
+ if kb.load_knowledge_base(self.kb_path):
112
+ print("[SUCCESS] Database Agent: Loaded existing knowledge base")
113
+ return kb
114
+ else:
115
+ print(
116
+ f"[ERROR] Database Agent: Could not load knowledge base from {self.kb_path}"
117
+ )
118
+ print("Please ensure the knowledge base is built and available.")
119
+ raise RuntimeError("Knowledge base not available")
120
+
121
+ @traceable(name="database_agent_format_results")
122
+ def _format_results_as_json(self, results) -> List[Dict[str, Any]]:
123
+ """Format search results as structured JSON."""
124
+ output = []
125
+ for doc in results:
126
+ technique_info = {
127
+ "attack_id": doc.metadata.get("attack_id", "Unknown"),
128
+ "name": doc.metadata.get("name", "Unknown"),
129
+ "tactics": [
130
+ t.strip()
131
+ for t in doc.metadata.get("tactics", "").split(",")
132
+ if t.strip()
133
+ ],
134
+ "platforms": [
135
+ p.strip()
136
+ for p in doc.metadata.get("platforms", "").split(",")
137
+ if p.strip()
138
+ ],
139
+ "description": truncate_to_tokens(doc.page_content, 300),
140
+ "relevance_score": doc.metadata.get("relevance_score", None),
141
+ "rrf_score": doc.metadata.get("rrf_score", None),
142
+ "mitigation_count": doc.metadata.get("mitigation_count", 0),
143
+ # "mitigations": truncate_to_tokens(
144
+ # doc.metadata.get("mitigations", ""), 50
145
+ # ),
146
+ }
147
+ output.append(technique_info)
148
+ return output
149
+
150
+ def _log_search_metrics(
151
+ self,
152
+ search_type: str,
153
+ query: str,
154
+ results_count: int,
155
+ execution_time: float,
156
+ success: bool,
157
+ ):
158
+ """Log search performance metrics to LangSmith."""
159
+ try:
160
+ current_run = get_current_run_tree()
161
+ if current_run:
162
+ ls_client.create_feedback(
163
+ run_id=current_run.id,
164
+ key="database_search_performance",
165
+ score=1.0 if success else 0.0,
166
+ value={
167
+ "search_type": search_type,
168
+ "query": query,
169
+ "results_count": results_count,
170
+ "execution_time": execution_time,
171
+ "success": success,
172
+ },
173
+ )
174
+ except Exception as e:
175
+ print(f"Failed to log search metrics: {e}")
176
+
177
+ def _log_agent_performance(
178
+ self, query: str, message_count: int, execution_time: float, success: bool
179
+ ):
180
+ """Log overall agent performance metrics."""
181
+ try:
182
+ current_run = get_current_run_tree()
183
+ if current_run:
184
+ ls_client.create_feedback(
185
+ run_id=current_run.id,
186
+ key="database_agent_performance",
187
+ score=1.0 if success else 0.0,
188
+ value={
189
+ "query": query,
190
+ "message_count": message_count,
191
+ "execution_time": execution_time,
192
+ "success": success,
193
+ "agent_type": "database_search",
194
+ },
195
+ )
196
+ except Exception as e:
197
+ print(f"Failed to log agent metrics: {e}")
198
+
199
+ def _create_tools(self):
200
+ """Create the search tools for the Database Agent."""
201
+
202
+ @tool
203
+ @traceable(name="database_search_techniques")
204
+ def search_techniques(query: str, top_k: int = 5) -> str:
205
+ """
206
+ Search for MITRE ATT&CK techniques using semantic search.
207
+
208
+ Args:
209
+ query: Search query string
210
+ top_k: Number of results to return (default: 5, max: 20)
211
+
212
+ Returns:
213
+ JSON string with search results containing technique details
214
+ """
215
+ start_time = time.time()
216
+ try:
217
+ # Limit top_k for performance
218
+ top_k = min(max(top_k, 1), 20) # Ensure top_k is between 1 and 20
219
+
220
+ # Single query search
221
+ results = self.kb.search(query, top_k=top_k)
222
+ techniques = self._format_results_as_json(results)
223
+
224
+ execution_time = time.time() - start_time
225
+ self._log_search_metrics(
226
+ "single_query", query, len(techniques), execution_time, True
227
+ )
228
+
229
+ return json.dumps(
230
+ {
231
+ "search_type": "single_query",
232
+ "query": query,
233
+ "techniques": techniques,
234
+ "total_results": len(techniques),
235
+ },
236
+ indent=2,
237
+ )
238
+
239
+ except Exception as e:
240
+ execution_time = time.time() - start_time
241
+ self._log_search_metrics(
242
+ "single_query", query, 0, execution_time, False
243
+ )
244
+
245
+ return json.dumps(
246
+ {
247
+ "error": str(e),
248
+ "techniques": [],
249
+ "message": "Error occurred during search",
250
+ },
251
+ indent=2,
252
+ )
253
+
254
+ @tool
255
+ @traceable(name="database_search_techniques_filtered")
256
+ def search_techniques_filtered(
257
+ query: str,
258
+ top_k: int = 5,
259
+ filter_tactics: Optional[List[str]] = None,
260
+ filter_platforms: Optional[List[str]] = None,
261
+ ) -> str:
262
+ """
263
+ Search for MITRE ATT&CK techniques with metadata filters.
264
+
265
+ Args:
266
+ query: Search query string
267
+ top_k: Number of results to return (default: 5, max: 20)
268
+ filter_tactics: Filter by specific tactics (e.g., ['defense-evasion', 'privilege-escalation'])
269
+ filter_platforms: Filter by platforms (e.g., ['Windows', 'Linux'])
270
+
271
+ Returns:
272
+ JSON string with filtered search results
273
+
274
+ Examples of tactics: initial-access, execution, persistence, privilege-escalation,
275
+ defense-evasion, credential-access, discovery, lateral-movement, collection,
276
+ command-and-control, exfiltration, impact
277
+
278
+ Examples of platforms: Windows, macOS, Linux, AWS, Azure, GCP, SaaS, Network,
279
+ Containers, Android, iOS
280
+ """
281
+ start_time = time.time()
282
+ try:
283
+ # Limit top_k for performance
284
+ top_k = min(max(top_k, 1), 20)
285
+
286
+ # Single query search with filters
287
+ results = self.kb.search(
288
+ query,
289
+ top_k=top_k,
290
+ filter_tactics=filter_tactics,
291
+ filter_platforms=filter_platforms,
292
+ )
293
+ techniques = self._format_results_as_json(results)
294
+
295
+ execution_time = time.time() - start_time
296
+ self._log_search_metrics(
297
+ "filtered_query", query, len(techniques), execution_time, True
298
+ )
299
+
300
+ return json.dumps(
301
+ {
302
+ "search_type": "single_query_filtered",
303
+ "query": query,
304
+ "filters": {
305
+ "tactics": filter_tactics,
306
+ "platforms": filter_platforms,
307
+ },
308
+ "techniques": techniques,
309
+ "total_results": len(techniques),
310
+ },
311
+ indent=2,
312
+ )
313
+
314
+ except Exception as e:
315
+ execution_time = time.time() - start_time
316
+ self._log_search_metrics(
317
+ "filtered_query", query, 0, execution_time, False
318
+ )
319
+
320
+ return json.dumps(
321
+ {
322
+ "error": str(e),
323
+ "techniques": [],
324
+ "message": "Error occurred during filtered search",
325
+ },
326
+ indent=2,
327
+ )
328
+
329
+ # return [search_techniques, search_techniques_filtered]
330
+ return [search_techniques]
331
+
332
+ def _create_react_agent(self):
333
+ """Create the ReAct agent with the search tools using the prompt from prompts.py."""
334
+ return create_react_agent(
335
+ model=self.llm,
336
+ tools=self.tools,
337
+ prompt=DATABASE_AGENT_SYSTEM_PROMPT,
338
+ name="database_agent",
339
+ )
340
+
341
+ @traceable(name="database_agent_search")
342
+ def search(self, query: str, **kwargs) -> Dict[str, Any]:
343
+ """
344
+ Search for techniques using the agent's capabilities.
345
+
346
+ Args:
347
+ query: The search query or question
348
+ **kwargs: Additional parameters passed to the agent
349
+
350
+ Returns:
351
+ Dictionary with the agent's response
352
+ """
353
+ start_time = time.time()
354
+ try:
355
+ messages = [HumanMessage(content=query)]
356
+ response = self.agent.invoke({"messages": messages}, **kwargs)
357
+
358
+ execution_time = time.time() - start_time
359
+ self._log_agent_performance(
360
+ query, len(response.get("messages", [])), execution_time, True
361
+ )
362
+
363
+ return {
364
+ "success": True,
365
+ "messages": response["messages"],
366
+ "final_response": (
367
+ response["messages"][-1].content if response["messages"] else ""
368
+ ),
369
+ }
370
+ except Exception as e:
371
+ execution_time = time.time() - start_time
372
+ self._log_agent_performance(query, 0, execution_time, False)
373
+
374
+ return {
375
+ "success": False,
376
+ "error": str(e),
377
+ "messages": [],
378
+ "final_response": f"Error during search: {str(e)}",
379
+ }
380
+
381
+ @traceable(name="database_agent_stream_search")
382
+ def stream_search(self, query: str, **kwargs):
383
+ """
384
+ Stream the agent's search process for real-time feedback.
385
+
386
+ Args:
387
+ query: The search query or question
388
+ **kwargs: Additional parameters passed to the agent
389
+
390
+ Yields:
391
+ Streaming responses from the agent
392
+ """
393
+ try:
394
+ messages = [HumanMessage(content=query)]
395
+ for chunk in self.agent.stream({"messages": messages}, **kwargs):
396
+ yield chunk
397
+ except Exception as e:
398
+ yield {"error": str(e)}
399
+
400
+
401
+ @traceable(name="database_agent_test")
402
+ def test_database_agent():
403
+ """Test function to demonstrate Database Agent capabilities."""
404
+ print("Testing Database Agent...")
405
+
406
+ # Initialize agent
407
+ try:
408
+ agent = DatabaseAgent()
409
+ print("Database Agent initialized successfully")
410
+ except Exception as e:
411
+ print(f"Failed to initialize Database Agent: {e}")
412
+ return
413
+
414
+ # Test queries
415
+ test_queries = [
416
+ "Find techniques related to credential dumping and LSASS memory access",
417
+ "What are Windows-specific privilege escalation techniques?",
418
+ "Search for defense evasion techniques that work on Linux platforms",
419
+ "Find lateral movement techniques involving SMB or WMI",
420
+ "What techniques are used for persistence on macOS systems?",
421
+ ]
422
+
423
+ for i, query in enumerate(test_queries, 1):
424
+ print(f"\n--- Test Query {i} ---")
425
+ print(f"Query: {query}")
426
+ print("-" * 50)
427
+
428
+ # Test regular search
429
+ result = agent.search(query)
430
+ if result["success"]:
431
+ print("Search completed successfully")
432
+ # Print last AI message (the summary)
433
+ for msg in reversed(result["messages"]):
434
+ if isinstance(msg, AIMessage) and not hasattr(msg, "tool_calls"):
435
+ print(f"Response: {msg.content[:300]}...")
436
+ break
437
+ else:
438
+ print(f"Search failed: {result['error']}")
439
+
440
+
441
+ if __name__ == "__main__":
442
+ test_database_agent()
src/agents/database_agent/prompts.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Database Agent Prompts
3
+
4
+ This module contains all prompts used by the Database Agent for MITRE ATT&CK technique retrieval
5
+ and knowledge base search operations.
6
+ """
7
+
8
+ DATABASE_AGENT_SYSTEM_PROMPT = """
9
+ You are a Database Agent specialized in retrieving MITRE ATT&CK techniques and cybersecurity knowledge.
10
+
11
+ Your primary capabilities:
12
+ 1. **Semantic Search**: Use search_techniques for general technique searches
13
+ 2. **Filtered Search**: Use search_techniques_filtered when you need to filter by specific tactics or platforms
14
+
15
+ **Search Strategy Guidelines:**
16
+ - For general queries: Use search_techniques with a single, well-crafted search query
17
+ - For platform-specific needs: Use search_techniques_filtered with appropriate platform filters
18
+ - For tactic-specific needs: Use search_techniques_filtered with tactic filters
19
+ - Craft focused, specific queries rather than broad terms for better results
20
+ - Up to 3 queries to get the most relevant techniques
21
+
22
+ **Available Tactics for Filtering:**
23
+ initial-access, execution, persistence, privilege-escalation, defense-evasion,
24
+ credential-access, discovery, lateral-movement, collection, command-and-control,
25
+ exfiltration, impact
26
+
27
+ **Available Platforms for Filtering:**
28
+ Windows, macOS, Linux, AWS, Azure, GCP, SaaS, Network, Containers, Android, iOS
29
+
30
+ **Response Guidelines:**
31
+ - Always explain your search strategy before using tools
32
+ - Summarize the most relevant techniques found, with detailed descriptions of the techniques
33
+
34
+ - When filtered searches return few results, suggest alternative approaches, and up to 3 queries to get the most relevant techniques
35
+ - Highlight high-relevance techniques and explain why they're relevant
36
+ - Format your final response clearly with technique IDs, names, and detailed descriptions
37
+
38
+ Remember: You are focused on retrieving and analyzing MITRE ATT&CK techniques. Always relate findings back to the user's specific cybersecurity question or scenario.
39
+ """
40
+
41
+ ### Evaluation Database Agent Prompt - Turn on when evaluating ATE dataset
42
+ # DATABASE_AGENT_SYSTEM_PROMPT = """You are a Database Agent specialized in retrieving MITRE ATT&CK techniques and cybersecurity knowledge.
43
+
44
+ # **Vector Database Structure:**
45
+ # The knowledge base contains embeddings of MITRE ATT&CK technique descriptions with associated metadata including:
46
+ # - Technique names and descriptions (primary searchable content)
47
+ # - Platforms (Windows, macOS, Linux, etc.)
48
+ # - Tactics (initial-access, execution, persistence, etc.)
49
+ # - Mitigation information
50
+ # - Attack IDs and subtechnique relationships
51
+
52
+ # **Your primary capabilities:**
53
+ # 1. **Semantic Search**: Use search_techniques for general technique searches based on descriptions
54
+
55
+ # **Search Strategy Guidelines:**
56
+ # - **Focus on descriptions**: The vector database is optimized for semantic search of technique descriptions
57
+ # - For general queries: Use search_techniques with description-focused search queries
58
+ # - Craft focused, specific queries that describe attack behaviors rather than broad terms
59
+ # - Up to 3 queries to get the most relevant techniques
60
+ # - **Do NOT use tools for mitigation searches** - mitigation information is available as metadata in the retrieved techniques
61
+ # - **Do NOT use filtered searches** - filtered searches are not available in the vector database
62
+
63
+ # **Response Guidelines:**
64
+ # - Always explain your search strategy before using tools
65
+ # - Summarize the most relevant techniques found, with detailed descriptions of the techniques
66
+ # - Include mitigation information from the retrieved technique metadata when relevant
67
+ # - When filtered searches return few results, suggest alternative approaches, and up to 3 queries to get the most relevant techniques
68
+ # - Highlight high-relevance techniques and explain why they're relevant
69
+ # - Format your final response clearly with technique IDs, names, and detailed descriptions
70
+
71
+ # Remember: You are focused on retrieving and analyzing MITRE ATT&CK techniques. Always relate findings back to the user's specific cybersecurity question or scenario."""
src/agents/global_supervisor/__pycache__/supervisor.cpython-311.pyc ADDED
Binary file (14.6 kB). View file
 
src/agents/log_analysis_agent/__pycache__/agent.cpython-311.pyc ADDED
Binary file (51 kB). View file
 
src/agents/log_analysis_agent/__pycache__/prompts.cpython-311.pyc ADDED
Binary file (6.77 kB). View file
 
src/agents/log_analysis_agent/__pycache__/state_models.cpython-311.pyc ADDED
Binary file (1.14 kB). View file
 
src/agents/log_analysis_agent/__pycache__/utils.cpython-311.pyc ADDED
Binary file (2.56 kB). View file
 
src/agents/log_analysis_agent/agent.py ADDED
@@ -0,0 +1,1058 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ LogAnalysisAgent - Main orchestrator for cybersecurity log analysis
3
+ """
4
+
5
+ import os
6
+ import json
7
+ import time
8
+ from datetime import datetime
9
+ from pathlib import Path
10
+ from typing import List, Dict, Optional
11
+
12
+ from langchain_core.messages import HumanMessage
13
+ from langgraph.prebuilt import create_react_agent
14
+ from langchain_core.tools import tool
15
+ from langgraph.graph import StateGraph, END
16
+ from langchain.chat_models import init_chat_model
17
+
18
+ from langsmith import traceable, Client, get_current_run_tree
19
+
20
+ from src.agents.log_analysis_agent.state_models import AnalysisState
21
+ from src.agents.log_analysis_agent.utils import (
22
+ get_llm,
23
+ get_tools,
24
+ format_execution_time,
25
+ )
26
+ from src.agents.log_analysis_agent.prompts import (
27
+ ANALYSIS_PROMPT,
28
+ CRITIC_FEEDBACK_TEMPLATE,
29
+ SELF_CRITIC_PROMPT,
30
+ )
31
+
32
+ ls_client = Client(api_key=os.getenv("LANGSMITH_API_KEY"))
33
+
34
+
35
+ class LogAnalysisAgent:
36
+ """
37
+ Main orchestrator for cybersecurity log analysis.
38
+ Coordinates the entire workflow: load → preprocess → analyze → save → display
39
+ """
40
+
41
+ def __init__(
42
+ self,
43
+ model_name: str = "google_genai:gemini-2.0-flash",
44
+ temperature: float = 0.1,
45
+ output_dir: str = "analysis",
46
+ max_iterations: int = 2,
47
+ llm_client = None,
48
+ ):
49
+ """
50
+ Initialize the Log Analysis Agent
51
+
52
+ Args:
53
+ model_name: Name of the model to use (e.g. "google_genai:gemini-2.0-flash")
54
+ temperature: Temperature for the model
55
+ output_dir: Directory name for saving outputs (relative to package directory)
56
+ max_iterations: Maximum number of iterations for the ReAct agent
57
+ llm_client: Optional pre-initialized LLM client (overrides model_name/temperature)
58
+ """
59
+ if llm_client:
60
+ self.llm = llm_client
61
+ print(f"[INFO] Log Analysis Agent: Using provided LLM client")
62
+ else:
63
+ self.llm = init_chat_model(model_name, temperature=temperature)
64
+ print(f"[INFO] Log Analysis Agent: Using default LLM model: {model_name}")
65
+
66
+ self.base_tools = get_tools()
67
+
68
+ self.output_root = Path(output_dir)
69
+ self.output_root.mkdir(exist_ok=True)
70
+
71
+ # Initialize helper components
72
+ self.log_processor = LogProcessor()
73
+ self.react_analyzer = ReactAnalyzer(
74
+ self.llm, self.base_tools, max_iterations=max_iterations
75
+ )
76
+ self.result_manager = ResultManager(self.output_root)
77
+
78
+ # Create workflow graph
79
+ self.workflow = self._create_workflow()
80
+
81
+ def _create_workflow(self) -> StateGraph:
82
+ """Create and configure the analysis workflow graph"""
83
+ workflow = StateGraph(AnalysisState)
84
+
85
+ # Add nodes using instance methods
86
+ workflow.add_node("load_logs", self.log_processor.load_logs)
87
+ workflow.add_node("preprocess_logs", self.log_processor.preprocess_logs)
88
+ workflow.add_node("react_agent_analysis", self.react_analyzer.analyze)
89
+ workflow.add_node("save_results", self.result_manager.save_results)
90
+ workflow.add_node("display_results", self.result_manager.display_results)
91
+
92
+ # Define workflow edges
93
+ workflow.set_entry_point("load_logs")
94
+ workflow.add_edge("load_logs", "preprocess_logs")
95
+ workflow.add_edge("preprocess_logs", "react_agent_analysis")
96
+ workflow.add_edge("react_agent_analysis", "save_results")
97
+ workflow.add_edge("save_results", "display_results")
98
+ workflow.add_edge("display_results", END)
99
+
100
+ return workflow.compile(name="log_analysis_agent")
101
+
102
+ def _log_workflow_metrics(self, workflow_step: str, execution_time: float, success: bool, details: dict = None):
103
+ """Log workflow step performance metrics to LangSmith."""
104
+ try:
105
+ current_run = get_current_run_tree()
106
+ if current_run:
107
+ ls_client.create_feedback(
108
+ run_id=current_run.id,
109
+ key="log_analysis_workflow_performance",
110
+ score=1.0 if success else 0.0,
111
+ value={
112
+ "workflow_step": workflow_step,
113
+ "execution_time": execution_time,
114
+ "success": success,
115
+ "details": details or {},
116
+ "agent_type": "log_analysis_workflow"
117
+ }
118
+ )
119
+ except Exception as e:
120
+ print(f"Failed to log workflow metrics: {e}")
121
+
122
+ def _log_security_analysis_results(self, analysis_result: dict):
123
+ """Log security analysis findings to LangSmith."""
124
+ try:
125
+ current_run = get_current_run_tree()
126
+ if current_run:
127
+ assessment = analysis_result.get("overall_assessment", "UNKNOWN")
128
+ abnormal_events = analysis_result.get("abnormal_events", [])
129
+ total_events = analysis_result.get("total_events_analyzed", 0)
130
+
131
+ # Calculate threat score
132
+ threat_score = 0.0
133
+ if assessment == "CRITICAL":
134
+ threat_score = 1.0
135
+ elif assessment == "HIGH":
136
+ threat_score = 0.8
137
+ elif assessment == "MEDIUM":
138
+ threat_score = 0.5
139
+ elif assessment == "LOW":
140
+ threat_score = 0.2
141
+
142
+ ls_client.create_feedback(
143
+ run_id=current_run.id,
144
+ key="security_analysis_results",
145
+ score=threat_score,
146
+ value={
147
+ "overall_assessment": assessment,
148
+ "abnormal_events_count": len(abnormal_events),
149
+ "total_events_analyzed": total_events,
150
+ "execution_time": analysis_result.get("execution_time_formatted", "Unknown"),
151
+ "iteration_count": analysis_result.get("iteration_count", 1),
152
+ "abnormal_events": abnormal_events[:5] # Limit to first 5 for logging
153
+ }
154
+ )
155
+ except Exception as e:
156
+ print(f"Failed to log security analysis results: {e}")
157
+
158
+ def _log_batch_analysis_metrics(self, total_files: int, successful: int, start_time: datetime, end_time: datetime):
159
+ """Log batch analysis performance metrics."""
160
+ try:
161
+ current_run = get_current_run_tree()
162
+ if current_run:
163
+ duration = (end_time - start_time).total_seconds()
164
+ success_rate = successful / total_files if total_files > 0 else 0
165
+
166
+ ls_client.create_feedback(
167
+ run_id=current_run.id,
168
+ key="batch_analysis_performance",
169
+ score=success_rate,
170
+ value={
171
+ "total_files": total_files,
172
+ "successful_files": successful,
173
+ "failed_files": total_files - successful,
174
+ "success_rate": success_rate,
175
+ "duration_seconds": duration,
176
+ "files_per_minute": (total_files / duration) * 60 if duration > 0 else 0
177
+ }
178
+ )
179
+ except Exception as e:
180
+ print(f"Failed to log batch analysis metrics: {e}")
181
+
182
+ @traceable(name="log_analysis_agent_full_workflow")
183
+ def analyze(self, log_file: str) -> Dict:
184
+ """
185
+ Analyze a single log file
186
+
187
+ Args:
188
+ log_file: Path to the log file to analyze
189
+
190
+ Returns:
191
+ Dictionary containing the analysis result
192
+ """
193
+ state = self._initialize_state(log_file)
194
+ result = self.workflow.invoke(state, config={"recursion_limit": 100})
195
+
196
+ analysis_result = result.get("analysis_result", {})
197
+ if analysis_result:
198
+ self._log_security_analysis_results(analysis_result)
199
+
200
+ return analysis_result
201
+
202
+ @traceable(name="log_analysis_agent_batch_workflow")
203
+ def analyze_batch(
204
+ self, dataset_dir: str, skip_existing: bool = False
205
+ ) -> List[Dict]:
206
+ """
207
+ Analyze all log files in a dataset directory
208
+
209
+ Args:
210
+ dataset_dir: Path to directory containing log files
211
+ skip_existing: Whether to skip already analyzed files
212
+
213
+ Returns:
214
+ List of result dictionaries for each file
215
+ """
216
+ print("=" * 60)
217
+ print("BATCH MODE: Analyzing all files in dataset")
218
+ print("=" * 60 + "\n")
219
+
220
+ files = self._find_dataset_files(dataset_dir)
221
+
222
+ if not files:
223
+ print("No JSON files found in dataset directory")
224
+ return []
225
+
226
+ print(f"Found {len(files)} files to analyze")
227
+ if skip_existing:
228
+ print("Skip mode enabled: Already analyzed files will be skipped")
229
+ print()
230
+
231
+ results = []
232
+ batch_start = datetime.now()
233
+
234
+ for idx, file_path in enumerate(files, 1):
235
+ filename = os.path.basename(file_path)
236
+ print(f"\n[{idx}/{len(files)}] Processing: {filename}")
237
+ print("-" * 60)
238
+
239
+ result = self._analyze_single_file(file_path, skip_existing)
240
+ results.append(result)
241
+
242
+ if result["success"]:
243
+ print(f"Status: {result['message']}")
244
+ else:
245
+ print(f"Status: FAILED - {result['message']}")
246
+
247
+ batch_end = datetime.now()
248
+
249
+ successful = sum(1 for r in results if r["success"])
250
+ self._log_batch_analysis_metrics(len(files), successful, batch_start, batch_end)
251
+
252
+ self.result_manager.display_batch_summary(results, batch_start, batch_end)
253
+
254
+ return results
255
+
256
+ def _initialize_state(self, log_file: str) -> Dict:
257
+ """Initialize the analysis state with default values"""
258
+ return {
259
+ "log_file": log_file,
260
+ "raw_logs": "",
261
+ "prepared_logs": "",
262
+ "analysis_result": {},
263
+ "messages": [],
264
+ "agent_reasoning": "",
265
+ "agent_observations": [],
266
+ "iteration_count": 0,
267
+ "critic_feedback": "",
268
+ "iteration_history": [],
269
+ "start_time": 0.0,
270
+ "end_time": 0.0,
271
+ }
272
+
273
+ def _analyze_single_file(self, log_file: str, skip_existing: bool = False) -> Dict:
274
+ """Analyze a single log file with error handling"""
275
+ try:
276
+ if skip_existing:
277
+ existing = self.result_manager.get_existing_output(log_file)
278
+ if existing:
279
+ return {
280
+ "success": True,
281
+ "log_file": log_file,
282
+ "message": "Skipped (already analyzed)",
283
+ "result": None,
284
+ }
285
+
286
+ state = self._initialize_state(log_file)
287
+ self.workflow.invoke(state, config={"recursion_limit": 100})
288
+
289
+ return {
290
+ "success": True,
291
+ "log_file": log_file,
292
+ "message": "Analysis completed",
293
+ "result": state.get("analysis_result"),
294
+ }
295
+
296
+ except Exception as e:
297
+ return {
298
+ "success": False,
299
+ "log_file": log_file,
300
+ "message": f"Error: {str(e)}",
301
+ "result": None,
302
+ }
303
+
304
+ def _find_dataset_files(self, dataset_dir: str) -> List[str]:
305
+ """Find all JSON files in the dataset directory"""
306
+ import glob
307
+
308
+ if not os.path.exists(dataset_dir):
309
+ print(f"Error: Dataset directory not found: {dataset_dir}")
310
+ return []
311
+
312
+ json_files = glob.glob(os.path.join(dataset_dir, "*.json"))
313
+ return sorted(json_files)
314
+
315
+
316
+ class LogProcessor:
317
+ """
318
+ Handles log loading and preprocessing operations
319
+ """
320
+
321
+ def __init__(self, max_size: int = 20000):
322
+ """
323
+ Initialize the log processor
324
+
325
+ Args:
326
+ max_size: Maximum character size before applying sampling
327
+ """
328
+ self.max_size = max_size
329
+
330
+ @traceable(name="log_processor_load_logs")
331
+ def load_logs(self, state: AnalysisState) -> AnalysisState:
332
+ """Load logs from file and initialize state"""
333
+ filename = os.path.basename(state["log_file"])
334
+ print(f"Loading logs from: {filename}")
335
+
336
+ # Record start time
337
+ state["start_time"] = time.time()
338
+ start_time = time.time()
339
+
340
+ try:
341
+ with open(state["log_file"], "r", encoding="utf-8") as f:
342
+ raw = f.read()
343
+ success = True
344
+ except Exception as e:
345
+ print(f"Error reading file: {e}")
346
+ raw = f"Error loading file: {e}"
347
+ success = False
348
+
349
+ execution_time = time.time() - start_time
350
+ self._log_loading_metrics(filename, len(raw), execution_time, success)
351
+
352
+ state["raw_logs"] = raw
353
+ state["messages"] = []
354
+ state["agent_reasoning"] = ""
355
+ state["agent_observations"] = []
356
+ state["iteration_count"] = 0
357
+ state["critic_feedback"] = ""
358
+ state["iteration_history"] = []
359
+ state["end_time"] = 0.0
360
+
361
+ return state
362
+
363
+ @traceable(name="log_processor_preprocess_logs")
364
+ def preprocess_logs(self, state: AnalysisState) -> AnalysisState:
365
+ """Preprocess logs for analysis - sample large files"""
366
+ raw = state["raw_logs"]
367
+ line_count = raw.count("\n")
368
+ print(f"Loaded {line_count} lines, {len(raw)} characters")
369
+
370
+ start_time = time.time()
371
+
372
+ if len(raw) > self.max_size:
373
+ sampled = self._apply_sampling(raw)
374
+ state["prepared_logs"] = f"TOTAL LINES: {line_count}\nSAMPLED:\n{sampled}"
375
+ print("Large file detected - using sampling strategy")
376
+ sampling_applied = True
377
+ else:
378
+ state["prepared_logs"] = f"TOTAL LINES: {line_count}\n\n{raw}"
379
+ sampling_applied = False
380
+
381
+ execution_time = time.time() - start_time
382
+ self._log_preprocessing_metrics(line_count, len(raw), len(sampled), sampling_applied, execution_time)
383
+
384
+ state["prepared_logs"] = sampled
385
+
386
+ return state
387
+
388
+ def _log_loading_metrics(self, filename: str, file_size: int, execution_time: float, success: bool):
389
+ """Log file loading performance metrics."""
390
+ try:
391
+ current_run = get_current_run_tree()
392
+ if current_run:
393
+ ls_client.create_feedback(
394
+ run_id=current_run.id,
395
+ key="log_loading_performance",
396
+ score=1.0 if success else 0.0,
397
+ value={
398
+ "filename": filename,
399
+ "file_size_chars": file_size,
400
+ "execution_time": execution_time,
401
+ "success": success
402
+ }
403
+ )
404
+ except Exception as e:
405
+ print(f"Failed to log loading metrics: {e}")
406
+
407
+ def _log_preprocessing_metrics(self, line_count: int, original_size: int, processed_size: int, sampling_applied: bool, execution_time: float):
408
+ """Log preprocessing performance metrics."""
409
+ try:
410
+ current_run = get_current_run_tree()
411
+ if current_run:
412
+ ls_client.create_feedback(
413
+ run_id=current_run.id,
414
+ key="log_preprocessing_performance",
415
+ score=1.0,
416
+ value={
417
+ "line_count": line_count,
418
+ "original_size_chars": original_size,
419
+ "processed_size_chars": processed_size,
420
+ "sampling_applied": sampling_applied,
421
+ "size_reduction": (original_size - processed_size) / original_size if original_size > 0 else 0,
422
+ "execution_time": execution_time
423
+ }
424
+ )
425
+ except Exception as e:
426
+ print(f"Failed to log preprocessing metrics: {e}")
427
+
428
+ def _apply_sampling(self, raw: str) -> str:
429
+ """Apply sampling strategy to large log files"""
430
+ first = raw[:7000]
431
+ middle = raw[len(raw) // 2 - 3000 : len(raw) // 2 + 3000]
432
+ last = raw[-7000:]
433
+ return f"{first}\n...[MIDDLE]...\n{middle}\n...[END]...\n{last}"
434
+
435
+
436
+ class ReactAnalyzer:
437
+ """
438
+ Handles ReAct agent analysis with iterative refinement
439
+ Combines react_engine + criticism_engine logic
440
+ """
441
+
442
+ def __init__(self, llm, base_tools, max_iterations: int = 2):
443
+ """
444
+ Initialize the ReAct analyzer
445
+
446
+ Args:
447
+ llm: Language model instance
448
+ base_tools: List of base tools for the agent
449
+ max_iterations: Maximum refinement iterations
450
+ """
451
+ self.llm = llm
452
+ self.base_tools = base_tools
453
+ self.max_iterations = max_iterations
454
+
455
+ @traceable(name="react_analyzer_analysis")
456
+ def analyze(self, state: AnalysisState) -> AnalysisState:
457
+ """Perform ReAct agent analysis with iterative refinement"""
458
+ print("Starting ReAct agent analysis with iterative refinement...")
459
+
460
+ start_time = time.time()
461
+
462
+ # Create state-aware tools
463
+ tools = self._create_state_aware_tools(state)
464
+
465
+ # Create ReAct agent
466
+ agent_executor = create_react_agent(
467
+ self.llm, tools, name="react_agent_analysis"
468
+ )
469
+
470
+ # System context
471
+ system_context = """You are Agent A, an autonomous cybersecurity analyst.
472
+
473
+ IMPORTANT CONTEXT - RAW LOGS AVAILABLE:
474
+ The complete raw logs are available for certain tools automatically.
475
+ When you call event_id_extractor_with_logs or timeline_builder_with_logs,
476
+ you only need to provide the required parameters - the tools will automatically
477
+ access the raw logs to perform their analysis.
478
+
479
+ """
480
+
481
+ try:
482
+ # Iterative refinement loop
483
+ for iteration in range(self.max_iterations):
484
+ state["iteration_count"] = iteration
485
+ print(f"\n{'='*60}")
486
+ print(f"ITERATION {iteration + 1}/{self.max_iterations}")
487
+ print(f"{'='*60}")
488
+
489
+ # Prepare prompt with optional feedback
490
+ messages = self._prepare_messages(state, iteration, system_context)
491
+
492
+ # Run ReAct agent
493
+ print(f"Running agent analysis...")
494
+ result = agent_executor.invoke(
495
+ {"messages": messages},
496
+ config={"recursion_limit": 100}
497
+ )
498
+ state["messages"] = result["messages"]
499
+
500
+ # Extract and process final analysis
501
+ final_analysis = self._extract_final_analysis(state["messages"])
502
+
503
+ # Calculate execution time
504
+ state["end_time"] = time.time()
505
+ execution_time = format_execution_time(
506
+ state["end_time"] - state["start_time"]
507
+ )
508
+
509
+ # Extract reasoning
510
+ state["agent_reasoning"] = final_analysis.get("reasoning", "")
511
+
512
+ # Format result
513
+ state["analysis_result"] = self._format_analysis_result(
514
+ final_analysis,
515
+ execution_time,
516
+ iteration + 1,
517
+ state["agent_reasoning"],
518
+ )
519
+
520
+ # Run self-critic review
521
+ print("Running self-critic review...")
522
+ original_analysis = state["analysis_result"].copy()
523
+ critic_result = self._critic_review(state)
524
+
525
+ # Store iteration in history
526
+ state["iteration_history"].append(
527
+ {
528
+ "iteration": iteration + 1,
529
+ "original_analysis": original_analysis,
530
+ "critic_evaluation": {
531
+ "quality_acceptable": critic_result["quality_acceptable"],
532
+ "issues": critic_result["issues"],
533
+ "feedback": critic_result["feedback"],
534
+ },
535
+ "corrected_analysis": critic_result["corrected_analysis"],
536
+ }
537
+ )
538
+
539
+ # Use corrected analysis
540
+ corrected = critic_result["corrected_analysis"]
541
+ corrected["execution_time_seconds"] = original_analysis.get(
542
+ "execution_time_seconds", 0
543
+ )
544
+ corrected["execution_time_formatted"] = original_analysis.get(
545
+ "execution_time_formatted", "Unknown"
546
+ )
547
+ corrected["iteration_count"] = iteration + 1
548
+ state["analysis_result"] = corrected
549
+
550
+ # Check if refinement is needed
551
+ if critic_result["quality_acceptable"]:
552
+ print(
553
+ f"✓ Quality acceptable - stopping at iteration {iteration + 1}"
554
+ )
555
+ break
556
+ elif iteration < self.max_iterations - 1:
557
+ print(
558
+ f"✗ Quality needs improvement - proceeding to iteration {iteration + 2}"
559
+ )
560
+ state["critic_feedback"] = critic_result["feedback"]
561
+ else:
562
+ print(f"✗ Max iterations reached - using current analysis")
563
+
564
+ print(
565
+ f"\nAnalysis complete after {state['iteration_count'] + 1} iteration(s)"
566
+ )
567
+ print(f"Total messages: {len(state['messages'])}")
568
+
569
+ except Exception as e:
570
+ print(f"Error in analysis: {e}")
571
+ import traceback
572
+
573
+ traceback.print_exc()
574
+ state["end_time"] = time.time()
575
+ execution_time = format_execution_time(
576
+ state["end_time"] - state["start_time"]
577
+ )
578
+
579
+ state["analysis_result"] = {
580
+ "overall_assessment": "ERROR",
581
+ "total_events_analyzed": 0,
582
+ "execution_time_seconds": execution_time["total_seconds"],
583
+ "execution_time_formatted": execution_time["formatted_time"],
584
+ "analysis_summary": f"Analysis failed: {e}",
585
+ "agent_reasoning": "",
586
+ "abnormal_event_ids": [],
587
+ "abnormal_events": [],
588
+ "iteration_count": state.get("iteration_count", 0),
589
+ }
590
+
591
+ return state
592
+
593
+ def _create_state_aware_tools(self, state: AnalysisState):
594
+ """Create state-aware versions of tools that need raw logs"""
595
+
596
+ # Create state-aware event_id_extractor
597
+ @tool
598
+ def event_id_extractor_with_logs(suspected_event_id: str) -> dict:
599
+ """Validates and corrects Windows Event IDs identified in log analysis."""
600
+ from .tools.event_id_extractor_tool import _event_id_extractor_tool
601
+
602
+ return _event_id_extractor_tool.run(
603
+ {
604
+ "suspected_event_id": suspected_event_id,
605
+ "raw_logs": state["raw_logs"],
606
+ }
607
+ )
608
+
609
+ # Create state-aware timeline_builder
610
+ @tool
611
+ def timeline_builder_with_logs(
612
+ pivot_entity: str, pivot_type: str, time_window_minutes: int = 5
613
+ ) -> dict:
614
+ """Build a focused timeline around suspicious events to understand attack sequences.
615
+
616
+ Use this when you suspect coordinated activity or want to understand what happened
617
+ before and after a suspicious event. Analyzes the sequence of events to identify patterns.
618
+
619
+ Args:
620
+ pivot_entity: The entity to build timeline around (e.g., "powershell.exe", "admin", "192.168.1.100")
621
+ pivot_type: Type of entity - "user", "process", "ip", "file", "computer", "event_id", or "registry"
622
+ time_window_minutes: Minutes before and after pivot events to include (default: 5)
623
+
624
+ Returns:
625
+ Timeline analysis showing events before and after the pivot, helping identify attack sequences.
626
+ """
627
+ from .tools.timeline_builder_tool import _timeline_builder_tool
628
+
629
+ return _timeline_builder_tool.run(
630
+ {
631
+ "pivot_entity": pivot_entity,
632
+ "pivot_type": pivot_type,
633
+ "time_window_minutes": time_window_minutes,
634
+ "raw_logs": state["raw_logs"],
635
+ }
636
+ )
637
+
638
+ # Replace base tools with state-aware versions
639
+ tools = [
640
+ t
641
+ for t in self.base_tools
642
+ if t.name not in ["event_id_extractor", "timeline_builder"]
643
+ ]
644
+ tools.append(event_id_extractor_with_logs)
645
+ tools.append(timeline_builder_with_logs)
646
+
647
+ return tools
648
+
649
+ def _prepare_messages(
650
+ self, state: AnalysisState, iteration: int, system_context: str
651
+ ):
652
+ """Prepare messages for the ReAct agent"""
653
+ if iteration == 0:
654
+ # First iteration - no feedback
655
+ critic_feedback_section = ""
656
+ full_prompt = system_context + ANALYSIS_PROMPT.format(
657
+ logs=state["prepared_logs"],
658
+ critic_feedback_section=critic_feedback_section,
659
+ )
660
+ messages = [HumanMessage(content=full_prompt)]
661
+ else:
662
+ # Subsequent iterations - include feedback and preserve messages
663
+ critic_feedback_section = CRITIC_FEEDBACK_TEMPLATE.format(
664
+ iteration=iteration + 1, feedback=state["critic_feedback"]
665
+ )
666
+ # ONLY COPY LANGCHAIN MESSAGE OBJECTS, NOT DICTS
667
+ messages = [msg for msg in state["messages"] if not isinstance(msg, dict)]
668
+ messages.append(HumanMessage(content=critic_feedback_section))
669
+
670
+ return messages
671
+
672
+ def _extract_final_analysis(self, messages):
673
+ """Extract the final analysis from agent messages"""
674
+ final_message = None
675
+ for msg in reversed(messages):
676
+ if (
677
+ hasattr(msg, "__class__")
678
+ and msg.__class__.__name__ == "AIMessage"
679
+ and hasattr(msg, "content")
680
+ and msg.content
681
+ and (not hasattr(msg, "tool_calls") or not msg.tool_calls)
682
+ ):
683
+ final_message = msg.content
684
+ break
685
+
686
+ if not final_message:
687
+ raise Exception("No final analysis message found")
688
+
689
+ return self._parse_agent_output(final_message)
690
+
691
+ def _parse_agent_output(self, content: str) -> dict:
692
+ """Parse agent's final output"""
693
+ try:
694
+ if "```json" in content:
695
+ json_str = content.split("```json")[1].split("```")[0].strip()
696
+ elif "```" in content:
697
+ json_str = content.split("```")[1].split("```")[0].strip()
698
+ else:
699
+ json_str = content.strip()
700
+
701
+ return json.loads(json_str)
702
+ except Exception as e:
703
+ print(f"Failed to parse agent output: {e}")
704
+ return {
705
+ "overall_assessment": "UNKNOWN",
706
+ "total_events_analyzed": 0,
707
+ "analysis_summary": content[:500],
708
+ "reasoning": "",
709
+ "abnormal_event_ids": [],
710
+ "abnormal_events": [],
711
+ }
712
+
713
+ def _format_analysis_result(
714
+ self, final_analysis, execution_time, iteration_count, agent_reasoning
715
+ ):
716
+ """Format the analysis result into the expected structure"""
717
+ abnormal_events = []
718
+ for event in final_analysis.get("abnormal_events", []):
719
+ event_with_tools = {
720
+ "event_id": event.get("event_id", ""),
721
+ "event_description": event.get("event_description", ""),
722
+ "why_abnormal": event.get("why_abnormal", ""),
723
+ "severity": event.get("severity", ""),
724
+ "indicators": event.get("indicators", []),
725
+ "potential_threat": event.get("potential_threat", ""),
726
+ "attack_category": event.get("attack_category", ""),
727
+ "tool_enrichment": event.get("tool_enrichment", {}),
728
+ }
729
+ abnormal_events.append(event_with_tools)
730
+
731
+ return {
732
+ "overall_assessment": final_analysis.get("overall_assessment", "UNKNOWN"),
733
+ "total_events_analyzed": final_analysis.get("total_events_analyzed", 0),
734
+ "execution_time_seconds": execution_time["total_seconds"],
735
+ "execution_time_formatted": execution_time["formatted_time"],
736
+ "analysis_summary": final_analysis.get("analysis_summary", ""),
737
+ "agent_reasoning": agent_reasoning,
738
+ "abnormal_event_ids": final_analysis.get("abnormal_event_ids", []),
739
+ "abnormal_events": abnormal_events,
740
+ "iteration_count": iteration_count,
741
+ }
742
+
743
+ # ========== CRITIC ENGINE METHODS ==========
744
+
745
+ def _critic_review(self, state: dict) -> dict:
746
+ """Run self-critic review with quality evaluation"""
747
+ critic_input = SELF_CRITIC_PROMPT.format(
748
+ final_json=json.dumps(state["analysis_result"], indent=2),
749
+ messages="\n".join(
750
+ [str(m.content) for m in state["messages"] if hasattr(m, "content")]
751
+ ),
752
+ logs=state["prepared_logs"],
753
+ )
754
+
755
+ resp = self.llm.invoke(critic_input)
756
+ full_response = resp.content
757
+
758
+ try:
759
+ # Parse critic response
760
+ quality_acceptable, issues, feedback, corrected_json = (
761
+ self._parse_critic_response(full_response)
762
+ )
763
+
764
+ return {
765
+ "quality_acceptable": quality_acceptable,
766
+ "issues": issues,
767
+ "feedback": feedback,
768
+ "corrected_analysis": corrected_json,
769
+ "full_response": full_response,
770
+ }
771
+ except Exception as e:
772
+ print(f"[Critic] Failed to parse review: {e}")
773
+ # If critic fails, accept current analysis
774
+ return {
775
+ "quality_acceptable": True,
776
+ "issues": [],
777
+ "feedback": "",
778
+ "corrected_analysis": state["analysis_result"],
779
+ "full_response": full_response,
780
+ }
781
+
782
+ def _parse_critic_response(self, content: str) -> tuple:
783
+ """Parse critic response and evaluate quality"""
784
+
785
+ # Extract sections
786
+ issues_section = ""
787
+ feedback_section = ""
788
+
789
+ if "## ISSUES FOUND" in content:
790
+ parts = content.split("## ISSUES FOUND")
791
+ if len(parts) > 1:
792
+ issues_part = parts[1].split("##")[0].strip()
793
+ issues_section = issues_part
794
+
795
+ if "## FEEDBACK FOR AGENT" in content:
796
+ parts = content.split("## FEEDBACK FOR AGENT")
797
+ if len(parts) > 1:
798
+ feedback_part = parts[1].split("##")[0].strip()
799
+ feedback_section = feedback_part
800
+
801
+ # Extract corrected JSON
802
+ if "```json" in content:
803
+ json_str = content.split("```json")[1].split("```")[0].strip()
804
+ elif "```" in content:
805
+ json_str = content.split("```")[1].split("```")[0].strip()
806
+ else:
807
+ json_str = "{}"
808
+
809
+ corrected_json = json.loads(json_str)
810
+
811
+ # Evaluate quality based on issues
812
+ issues = self._extract_issues(issues_section)
813
+ quality_acceptable = self._evaluate_quality(issues, issues_section)
814
+
815
+ return quality_acceptable, issues, feedback_section, corrected_json
816
+
817
+ def _extract_issues(self, issues_text: str) -> list:
818
+ """Extract structured issues from text"""
819
+ issues = []
820
+
821
+ # Check for "None" or "no issues"
822
+ if (
823
+ "none" in issues_text.lower()
824
+ and "analysis is acceptable" in issues_text.lower()
825
+ ):
826
+ return issues
827
+
828
+ # Extract issue types
829
+ issue_types = {
830
+ "MISSING_EVENT_IDS": "missing_event_ids",
831
+ "SEVERITY_MISMATCH": "severity_mismatch",
832
+ "IGNORED_TOOLS": "ignored_tool_results",
833
+ "INCOMPLETE_EVENTS": "incomplete_abnormal_events",
834
+ "EVENT_ID_FORMAT": "event_id_format",
835
+ "SCHEMA_ISSUES": "schema_issues",
836
+ "UNDECODED_COMMANDS": "undecoded_commands",
837
+ }
838
+
839
+ for keyword, issue_type in issue_types.items():
840
+ if keyword in issues_text:
841
+ issues.append({"type": issue_type, "text": issues_text})
842
+
843
+ return issues
844
+
845
+ def _evaluate_quality(self, issues: list, issues_text: str) -> bool:
846
+ """Evaluate if quality is acceptable"""
847
+ # If no issues found
848
+ if not issues:
849
+ return True
850
+
851
+ # Critical issue types that trigger iteration
852
+ critical_types = {
853
+ "missing_event_ids",
854
+ "severity_mismatch",
855
+ "ignored_tool_results",
856
+ "incomplete_abnormal_events",
857
+ "undecoded_commands",
858
+ }
859
+
860
+ # Count critical issues
861
+ critical_count = sum(1 for issue in issues if issue["type"] in critical_types)
862
+
863
+ # Quality threshold: max 1 critical issue is acceptable
864
+ if critical_count >= 2:
865
+ return False
866
+
867
+ # Additional check: if issues_text indicates major problems
868
+ if any(
869
+ word in issues_text.lower() for word in ["critical", "major", "serious"]
870
+ ):
871
+ return False
872
+
873
+ return True
874
+
875
+
876
+ class ResultManager:
877
+ """
878
+ Handles saving results to disk and displaying to console
879
+ """
880
+
881
+ def __init__(self, output_root: Path):
882
+ """
883
+ Initialize the result manager
884
+
885
+ Args:
886
+ output_root: Root directory for saving outputs
887
+ """
888
+ self.output_root = output_root
889
+
890
+ @traceable(name="result_manager_save_results")
891
+ def save_results(self, state: AnalysisState) -> AnalysisState:
892
+ """Save analysis results and messages to files"""
893
+ input_name = os.path.splitext(os.path.basename(state["log_file"]))[0]
894
+ analysis_dir = self.output_root / input_name
895
+
896
+ analysis_dir.mkdir(exist_ok=True)
897
+ ts = datetime.now().strftime("%Y%m%d_%H%M%S")
898
+
899
+ start_time = time.time()
900
+ success = True
901
+
902
+ try:
903
+ # Save main analysis result
904
+ out_file = analysis_dir / f"{input_name}_analysis_{ts}.json"
905
+ with open(out_file, "w", encoding="utf-8") as f:
906
+ json.dump(state["analysis_result"], f, indent=2)
907
+
908
+ # Save iteration history
909
+ history_file = analysis_dir / f"{input_name}_iterations_{ts}.json"
910
+ with open(history_file, "w", encoding="utf-8") as f:
911
+ json.dump(state.get("iteration_history", []), f, indent=2)
912
+
913
+ # Save messages history
914
+ messages_file = analysis_dir / f"{input_name}_messages_{ts}.json"
915
+ serializable_messages = self._serialize_messages(state.get("messages", []))
916
+ with open(messages_file, "w", encoding="utf-8") as f:
917
+ json.dump(serializable_messages, f, indent=2)
918
+
919
+ except Exception as e:
920
+ print(f"Error saving results: {e}")
921
+ success = False
922
+
923
+ execution_time = time.time() - start_time
924
+ self._log_save_metrics(input_name, execution_time, success)
925
+
926
+ return state
927
+
928
+ def _log_save_metrics(self, input_name: str, execution_time: float, success: bool):
929
+ """Log file saving performance metrics."""
930
+ try:
931
+ current_run = get_current_run_tree()
932
+ if current_run:
933
+ ls_client.create_feedback(
934
+ run_id=current_run.id,
935
+ key="result_save_performance",
936
+ score=1.0 if success else 0.0,
937
+ value={
938
+ "input_name": input_name,
939
+ "execution_time": execution_time,
940
+ "success": success
941
+ }
942
+ )
943
+ except Exception as e:
944
+ print(f"Failed to log save metrics: {e}")
945
+
946
+ @traceable(name="result_manager_display_results")
947
+ def display_results(self, state: AnalysisState) -> AnalysisState:
948
+ """Display formatted analysis results"""
949
+ result = state["analysis_result"]
950
+ assessment = result.get("overall_assessment", "UNKNOWN")
951
+ execution_time = result.get("execution_time_formatted", "Unknown")
952
+ abnormal_events = result.get("abnormal_events", [])
953
+ iteration_count = result.get("iteration_count", 1)
954
+
955
+ print("\n" + "=" * 60)
956
+ print("ANALYSIS COMPLETE")
957
+ print("=" * 60)
958
+
959
+ print(f"ASSESSMENT: {assessment}")
960
+ print(f"ITERATIONS: {iteration_count}")
961
+ print(f"EXECUTION TIME: {execution_time}")
962
+ print(f"EVENTS ANALYZED: {result.get('total_events_analyzed', 'Unknown')}")
963
+
964
+ # Tools Used
965
+ tools_used = self._extract_tools_used(state.get("messages", []))
966
+
967
+ if tools_used:
968
+ print(f"TOOLS USED: {len(tools_used)} tools")
969
+ print(f" Types: {', '.join(sorted(tools_used))}")
970
+ else:
971
+ print("TOOLS USED: None")
972
+
973
+ # Abnormal Events
974
+ if abnormal_events:
975
+ print(f"\nABNORMAL EVENTS: {len(abnormal_events)}")
976
+ for event in abnormal_events:
977
+ severity = event.get("severity", "UNKNOWN")
978
+ event_id = event.get("event_id", "N/A")
979
+ print(f" EventID {event_id} [{severity}]")
980
+ else:
981
+ print("\nNO ABNORMAL EVENTS")
982
+
983
+ print("=" * 60)
984
+
985
+ return state
986
+
987
+ def display_batch_summary(
988
+ self, results: List[Dict], start_time: datetime, end_time: datetime
989
+ ):
990
+ """Print summary of batch processing results"""
991
+ total = len(results)
992
+ successful = sum(1 for r in results if r["success"])
993
+ skipped = sum(1 for r in results if "Skipped" in r["message"])
994
+ failed = total - successful
995
+
996
+ duration = (end_time - start_time).total_seconds()
997
+
998
+ print("\n" + "=" * 60)
999
+ print("BATCH ANALYSIS SUMMARY")
1000
+ print("=" * 60)
1001
+ print(f"Total files: {total}")
1002
+ print(f"Successful: {successful}")
1003
+ print(f"Skipped: {skipped}")
1004
+ print(f"Failed: {failed}")
1005
+ print(f"Total time: {duration:.2f} seconds ({duration/60:.2f} minutes)")
1006
+
1007
+ if failed > 0:
1008
+ print(f"\nFailed files:")
1009
+ for r in results:
1010
+ if not r["success"]:
1011
+ filename = os.path.basename(r["log_file"])
1012
+ print(f" - {filename}: {r['message']}")
1013
+
1014
+ print("=" * 60 + "\n")
1015
+
1016
+ def get_existing_output(self, log_file: str) -> Optional[str]:
1017
+ """Get the output file path for a given log file if it exists"""
1018
+ import glob
1019
+
1020
+ input_name = os.path.splitext(os.path.basename(log_file))[0]
1021
+ analysis_dir = self.output_root / input_name
1022
+
1023
+ if analysis_dir.exists():
1024
+ existing_files = list(analysis_dir.glob(f"{input_name}_analysis_*.json"))
1025
+ if existing_files:
1026
+ return str(existing_files[0])
1027
+ return None
1028
+
1029
+ def _serialize_messages(self, messages) -> List[dict]:
1030
+ """Serialize messages for JSON storage"""
1031
+ serializable_messages = []
1032
+ for msg in messages:
1033
+ if isinstance(msg, dict):
1034
+ serializable_messages.append(msg)
1035
+ else:
1036
+ msg_dict = {
1037
+ "type": msg.__class__.__name__,
1038
+ "content": msg.content if hasattr(msg, "content") else str(msg),
1039
+ }
1040
+ if hasattr(msg, "tool_calls") and msg.tool_calls:
1041
+ msg_dict["tool_calls"] = [
1042
+ {"name": tc.get("name", ""), "args": tc.get("args", {})}
1043
+ for tc in msg.tool_calls
1044
+ ]
1045
+ serializable_messages.append(msg_dict)
1046
+
1047
+ return serializable_messages
1048
+
1049
+ def _extract_tools_used(self, messages) -> set:
1050
+ """Extract set of tool names used during analysis"""
1051
+ tools_used = set()
1052
+ for msg in messages:
1053
+ if hasattr(msg, "tool_calls") and msg.tool_calls:
1054
+ for tc in msg.tool_calls:
1055
+ tool_name = tc.get("name", "")
1056
+ if tool_name:
1057
+ tools_used.add(tool_name)
1058
+ return tools_used