Ara Yeroyan commited on
Commit
aafcd0d
Β·
1 Parent(s): 85f1ebc

add single smart chatbot

Browse files
Files changed (1) hide show
  1. smart_chatbot.py +1098 -0
smart_chatbot.py ADDED
@@ -0,0 +1,1098 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Intelligent RAG Chatbot with Smart Query Analysis and Conversation Management
3
+
4
+ This chatbot provides intelligent conversation flow with:
5
+ - Smart query analysis and expansion
6
+ - Single LangSmith conversation traces
7
+ - Local conversation logging
8
+ - Context-aware RAG retrieval
9
+ - Natural conversation without technical jargon
10
+ """
11
+
12
+ import os
13
+ import json
14
+ import time
15
+ import logging
16
+ from pathlib import Path
17
+ from dataclasses import dataclass
18
+ from datetime import datetime, timedelta
19
+ from typing import Dict, List, Any, Optional, TypedDict
20
+
21
+
22
+ import re
23
+ from langgraph.graph import StateGraph, END
24
+ from langchain_core.prompts import ChatPromptTemplate
25
+ from langchain_core.messages import HumanMessage, AIMessage, SystemMessage
26
+
27
+ from src.pipeline import PipelineManager
28
+ from src.config.loader import load_config
29
+
30
+
31
+ @dataclass
32
+ class QueryAnalysis:
33
+ """Analysis result of a user query"""
34
+ has_district: bool
35
+ has_source: bool
36
+ has_year: bool
37
+ extracted_district: Optional[str]
38
+ extracted_source: Optional[str]
39
+ extracted_year: Optional[str]
40
+ confidence_score: float
41
+ can_answer_directly: bool
42
+ missing_filters: List[str]
43
+ suggested_follow_up: Optional[str]
44
+ expanded_query: Optional[str] = None # Query expansion for better RAG
45
+
46
+
47
+ class ConversationState(TypedDict):
48
+ """State for the conversation flow"""
49
+ conversation_id: str
50
+ messages: List[Any]
51
+ current_query: str
52
+ query_analysis: Optional[QueryAnalysis]
53
+ rag_query: Optional[str]
54
+ rag_result: Optional[Any]
55
+ final_response: Optional[str]
56
+ conversation_context: Dict[str, Any] # Store conversation context
57
+ session_start_time: float
58
+ last_ai_message_time: float
59
+
60
+
61
+ class IntelligentRAGChatbot:
62
+ """Intelligent chatbot with smart query analysis and conversation management"""
63
+
64
+ def __init__(self, suppress_logs=False):
65
+ """Initialize the intelligent chatbot"""
66
+ # Setup logger to avoid cluttering UI
67
+ self.logger = logging.getLogger(__name__)
68
+ if suppress_logs:
69
+ self.logger.setLevel(logging.CRITICAL) # Suppress all logs
70
+ else:
71
+ self.logger.setLevel(logging.INFO)
72
+ if not self.logger.handlers:
73
+ handler = logging.StreamHandler()
74
+ formatter = logging.Formatter('%(message)s')
75
+ handler.setFormatter(formatter)
76
+ self.logger.addHandler(handler)
77
+
78
+ self.logger.info("πŸ€– INITIALIZING: Intelligent RAG Chatbot")
79
+
80
+ # Load configuration first
81
+ self.config = load_config()
82
+
83
+ # Use the same LLM configuration as the existing system
84
+ from auditqa.llm.adapters import get_llm_client
85
+
86
+ # Get LLM client using the same configuration
87
+ reader_config = self.config.get("reader", {})
88
+ default_type = reader_config.get("default_type", "INF_PROVIDERS")
89
+
90
+ # Convert to lowercase as that's how it's registered
91
+ provider_name = default_type.lower()
92
+
93
+ self.llm_adapter = get_llm_client(provider_name, self.config)
94
+
95
+ # Create a simple wrapper for LangChain compatibility
96
+ class LLMWrapper:
97
+ def __init__(self, adapter):
98
+ self.adapter = adapter
99
+
100
+ def invoke(self, messages):
101
+ # Convert LangChain messages to the format expected by the adapter
102
+ if isinstance(messages, list):
103
+ # Convert LangChain messages to dict format
104
+ message_dicts = []
105
+ for msg in messages:
106
+ if hasattr(msg, 'content'):
107
+ role = "user" if isinstance(msg, HumanMessage) else "assistant"
108
+ message_dicts.append({"role": role, "content": msg.content})
109
+ else:
110
+ message_dicts.append({"role": "user", "content": str(msg)})
111
+ else:
112
+ # Single message
113
+ message_dicts = [{"role": "user", "content": str(messages)}]
114
+
115
+ # Use the adapter to generate response
116
+ llm_response = self.adapter.generate(message_dicts)
117
+
118
+ # Return in LangChain format
119
+ class MockResponse:
120
+ def __init__(self, content):
121
+ self.content = content
122
+
123
+ return MockResponse(llm_response.content)
124
+
125
+ self.llm = LLMWrapper(self.llm_adapter)
126
+
127
+ # Initialize pipeline manager for RAG
128
+ self.logger.info("πŸ”§ PIPELINE: Initializing PipelineManager...")
129
+ self.pipeline_manager = PipelineManager(self.config)
130
+
131
+ # Ensure vectorstore is connected
132
+ self.logger.info("πŸ”— VECTORSTORE: Connecting to Qdrant...")
133
+ try:
134
+ vectorstore = self.pipeline_manager.vectorstore_manager.connect_to_existing()
135
+ self.logger.info("βœ… VECTORSTORE: Connected successfully")
136
+ except Exception as e:
137
+ self.logger.error(f"❌ VECTORSTORE: Connection failed: {e}")
138
+
139
+ # Fix LLM client to use the same provider as chatbot
140
+ self.logger.info("πŸ”§ LLM: Fixing PipelineManager LLM client...")
141
+ self.pipeline_manager.llm_client = self.llm_adapter
142
+ self.logger.info("βœ… LLM: PipelineManager now uses same LLM as chatbot")
143
+
144
+ self.logger.info("βœ… PIPELINE: PipelineManager initialized")
145
+
146
+ # Available metadata for filtering
147
+ self.available_metadata = {
148
+ 'sources': [
149
+ 'KCCA', 'MAAIF', 'MWTS', 'Gulu DLG', 'Kalangala DLG', 'Namutumba DLG',
150
+ 'Lwengo DLG', 'Kiboga DLG', 'Annual Consolidated OAG', 'Consolidated',
151
+ 'Hospital', 'Local Government', 'Ministry, Department and Agency',
152
+ 'Project', 'Thematic', 'Value for Money'
153
+ ],
154
+ 'years': ['2018', '2019', '2020', '2021', '2022', '2023', '2024', '2025'],
155
+ 'districts': [
156
+ 'Gulu', 'Kalangala', 'Kampala', 'Namutumba', 'Lwengo', 'Kiboga',
157
+ 'Fort Portal', 'Arua', 'Kasese', 'Kabale', 'Masindi', 'Mbale', 'Jinja', 'Masaka', 'Mbarara',
158
+ 'KCCA'
159
+ ]
160
+ }
161
+
162
+ # Try to load district whitelist from filter_options.json
163
+ try:
164
+ fo = Path("filter_options.json")
165
+ if fo.exists():
166
+ with open(fo) as f:
167
+ data = json.load(f)
168
+ if isinstance(data, dict) and data.get("districts"):
169
+ self.district_whitelist = [d.strip() for d in data["districts"] if d]
170
+ else:
171
+ self.district_whitelist = self.available_metadata['districts']
172
+ else:
173
+ self.district_whitelist = self.available_metadata['districts']
174
+ except Exception:
175
+ self.district_whitelist = self.available_metadata['districts']
176
+
177
+ # Enrich whitelist from add_district_metadata.py if available
178
+ try:
179
+ from add_district_metadata import DistrictMetadataProcessor
180
+ proc = DistrictMetadataProcessor()
181
+ names = set()
182
+ for key, mapping in proc.district_mappings.items():
183
+ if getattr(mapping, 'is_district', True):
184
+ names.add(mapping.name)
185
+ if names:
186
+ # Merge while preserving order: existing first, then new ones not present
187
+ merged = list(self.district_whitelist)
188
+ for n in sorted(names):
189
+ if n not in merged:
190
+ merged.append(n)
191
+ self.district_whitelist = merged
192
+ self.logger.info(f"🧭 District whitelist enriched: {len(self.district_whitelist)} entries")
193
+ except Exception as e:
194
+ self.logger.info(f"ℹ️ Could not enrich districts from add_district_metadata: {e}")
195
+
196
+ # Get dynamic year list from filter_options.json
197
+ try:
198
+ fo = Path("filter_options.json")
199
+ if fo.exists():
200
+ with open(fo) as f:
201
+ data = json.load(f)
202
+ if isinstance(data, dict) and data.get("years"):
203
+ self.year_whitelist = [str(y).strip() for y in data["years"] if y]
204
+ else:
205
+ self.year_whitelist = self.available_metadata['years']
206
+ else:
207
+ self.year_whitelist = self.available_metadata['years']
208
+ except Exception:
209
+ self.year_whitelist = self.available_metadata['years']
210
+
211
+ # Calculate current year dynamically
212
+ from datetime import datetime
213
+ self.current_year = str(datetime.now().year)
214
+ self.previous_year = str(datetime.now().year - 1)
215
+
216
+ # Data context for system prompt
217
+ self.data_context = self._load_data_context()
218
+
219
+ # Build the LangGraph
220
+ self.graph = self._build_graph()
221
+
222
+ # Conversation logging
223
+ self.conversations_dir = Path("conversations")
224
+ self.conversations_dir.mkdir(exist_ok=True)
225
+
226
+ def _load_data_context(self) -> str:
227
+ """Load and analyze data context for system prompt"""
228
+ try:
229
+ # Try to load from generated context file
230
+ context_file = Path("data_context.md")
231
+ if context_file.exists():
232
+ with open(context_file) as f:
233
+ return f.read()
234
+
235
+ # Fallback to basic analysis
236
+ reports_dir = Path("reports")
237
+ testset_dir = Path("outputs/datasets/testset")
238
+
239
+ context_parts = []
240
+
241
+ # Report analysis
242
+ if reports_dir.exists():
243
+ report_folders = [d for d in reports_dir.iterdir() if d.is_dir()]
244
+ context_parts.append(f"πŸ“Š Available Reports: {len(report_folders)} audit report folders")
245
+
246
+ # Get year range
247
+ years = []
248
+ for folder in report_folders:
249
+ if "2018" in folder.name:
250
+ years.append("2018")
251
+ elif "2019" in folder.name:
252
+ years.append("2019")
253
+ elif "2020" in folder.name:
254
+ years.append("2020")
255
+ elif "2021" in folder.name:
256
+ years.append("2021")
257
+ elif "2022" in folder.name:
258
+ years.append("2022")
259
+ elif "2023" in folder.name:
260
+ years.append("2023")
261
+
262
+ if years:
263
+ context_parts.append(f"πŸ“… Years covered: {', '.join(sorted(set(years)))}")
264
+
265
+ # Test dataset analysis
266
+ if testset_dir.exists():
267
+ test_files = list(testset_dir.glob("*.json"))
268
+ context_parts.append(f"πŸ§ͺ Test dataset: {len(test_files)} files with sample questions")
269
+
270
+ return "\n".join(context_parts) if context_parts else "πŸ“Š Audit report database with comprehensive coverage"
271
+
272
+ except Exception as e:
273
+ self.logger.warning(f"⚠️ Could not load data context: {e}")
274
+ return "πŸ“Š Comprehensive audit report database"
275
+
276
+ def _build_graph(self) -> StateGraph:
277
+ """Build the LangGraph for intelligent conversation flow"""
278
+
279
+ # Define the graph
280
+ workflow = StateGraph(ConversationState)
281
+
282
+ # Add nodes
283
+ workflow.add_node("analyze_query", self._analyze_query)
284
+ workflow.add_node("decide_action", self._decide_action)
285
+ workflow.add_node("perform_rag", self._perform_rag)
286
+ workflow.add_node("ask_follow_up", self._ask_follow_up)
287
+ workflow.add_node("generate_response", self._generate_response)
288
+
289
+ # Add edges
290
+ workflow.add_edge("analyze_query", "decide_action")
291
+
292
+ # Conditional edges from decide_action
293
+ workflow.add_conditional_edges(
294
+ "decide_action",
295
+ self._should_perform_rag,
296
+ {
297
+ "rag": "perform_rag",
298
+ "follow_up": "ask_follow_up"
299
+ }
300
+ )
301
+
302
+ # From perform_rag, go to generate_response
303
+ workflow.add_edge("perform_rag", "generate_response")
304
+
305
+ # From ask_follow_up, end
306
+ workflow.add_edge("ask_follow_up", END)
307
+
308
+ # From generate_response, end
309
+ workflow.add_edge("generate_response", END)
310
+
311
+ # Set entry point
312
+ workflow.set_entry_point("analyze_query")
313
+
314
+ return workflow.compile()
315
+
316
+ def _extract_districts_list(self, text: str) -> List[str]:
317
+ """Extract one or more districts from free text using whitelist matching.
318
+ - Case-insensitive substring match for each known district name
319
+ - Handles multi-district inputs like "Lwengo Kiboga District & Namutumba"
320
+ """
321
+ if not text:
322
+ return []
323
+ q = text.lower()
324
+ found: List[str] = []
325
+ for name in self.district_whitelist:
326
+ n = name.lower()
327
+ if n in q:
328
+ # Map Kampala -> KCCA canonical
329
+ canonical = 'KCCA' if name.lower() == 'kampala' else name
330
+ if canonical not in found:
331
+ found.append(canonical)
332
+ return found
333
+
334
+ def _extract_years_list(self, text: str) -> List[str]:
335
+ """Extract year list from text, supporting forms like '2022 / 23', '2022-2023', '2022–23'."""
336
+ if not text:
337
+ return []
338
+ years: List[str] = []
339
+ q = text
340
+ # Full 4-digit years
341
+ for y in re.findall(r"\b(20\d{2})\b", q):
342
+ if y not in years:
343
+ years.append(y)
344
+ # Shorthand like 2022/23 or 2022-23
345
+ for m in re.finditer(r"\b(20\d{2})\s*[\-/–]\s*(\d{2})\b", q):
346
+ y1 = m.group(1)
347
+ y2_short = int(m.group(2))
348
+ y2 = f"20{y2_short:02d}"
349
+ for y in [y1, y2]:
350
+ if y not in years:
351
+ years.append(y)
352
+ return years
353
+
354
+ def _analyze_query(self, state: ConversationState) -> ConversationState:
355
+ """Analyze the user query with conversation context"""
356
+
357
+ query = state["current_query"]
358
+ conversation_context = state.get("conversation_context", {})
359
+
360
+ self.logger.info(f"🧠 QUERY ANALYSIS: Starting analysis for: '{query[:50]}...'")
361
+
362
+ # Build conversation context for analysis
363
+ context_info = ""
364
+ if conversation_context:
365
+ context_info = f"\n\nConversation context:\n"
366
+ for key, value in conversation_context.items():
367
+ if value:
368
+ context_info += f"- {key}: {value}\n"
369
+
370
+ # Also include recent conversation messages for better context
371
+ recent_messages = state.get("messages", [])
372
+ if recent_messages and len(recent_messages) > 1:
373
+ context_info += f"\nRecent conversation:\n"
374
+ # Get last 3 messages for context
375
+ for msg in recent_messages[-3:]:
376
+ if hasattr(msg, 'content'):
377
+ role = "User" if isinstance(msg, HumanMessage) else "Assistant"
378
+ context_info += f"- {role}: {msg.content[:100]}...\n"
379
+
380
+ # Create analysis prompt with data context
381
+ analysis_prompt = ChatPromptTemplate.from_messages([
382
+ SystemMessage(content=f"""You are an expert at analyzing audit report queries. Your job is to extract specific information and determine if a query can be answered directly.
383
+
384
+ {self.data_context}
385
+
386
+ DISTRICT RECOGNITION RULES:
387
+ - Kampala = KCCA (Kampala Capital City Authority)
388
+ - Available districts: {', '.join(self.district_whitelist[:15])}... (and {len(self.district_whitelist)-15} more)
389
+ - DLG = District Local Government
390
+ - Uganda has {len(self.district_whitelist)} districts - recognize common ones
391
+
392
+ SOURCE RECOGNITION RULES:
393
+ - KCCA = Kampala Capital City Authority
394
+ - MAAIF = Ministry of Agriculture, Animal Industry and Fisheries
395
+ - MWTS = Ministry of Works and Transport
396
+ - OAG = Office of the Auditor General
397
+ - Consolidated = Annual Consolidated reports
398
+
399
+ YEAR RECOGNITION RULES:
400
+ - Available years: {', '.join(self.year_whitelist)}
401
+ - Current year is {self.current_year} - use this to reason about relative years
402
+ - If user mentions "last year", "previous year" - infer {self.previous_year}
403
+ - If user mentions "this year", "current year" - infer {self.current_year}
404
+
405
+ Analysis rules:
406
+ 1. Be SMART - if you have enough context to search, do it
407
+ 2. Use conversation context to fill in missing information
408
+ 3. For budget/expenditure queries, try to infer missing details from context
409
+ 4. Current year is {self.current_year} - use this to reason about relative years
410
+ 5. If user mentions "last year", "previous year" - infer {self.previous_year}
411
+ 6. If user mentions "this year", "current year" - infer {self.current_year}
412
+ 7. If user mentions a department/ministry, infer the source
413
+ 8. If user is getting frustrated or asking for results, proceed with RAG even if not perfect
414
+ 9. Recognize Kampala as a district (KCCA)
415
+
416
+ IMPORTANT: You must respond with ONLY valid JSON. No additional text.
417
+
418
+ Return your analysis as JSON with these exact fields:
419
+ {{
420
+ "has_district": boolean,
421
+ "has_source": boolean,
422
+ "has_year": boolean,
423
+ "extracted_district": "string or null",
424
+ "extracted_source": "string or null",
425
+ "extracted_year": "string or null",
426
+ "confidence_score": 0.0-1.0,
427
+ "can_answer_directly": boolean,
428
+ "missing_filters": ["list", "of", "missing", "filters"],
429
+ "suggested_follow_up": "string or null",
430
+ "expanded_query": "string or null"
431
+ }}
432
+
433
+ The expanded_query should be a natural language query that combines the original question with any inferred context for better RAG retrieval."""),
434
+ HumanMessage(content=f"Analyze this query: '{query}'{context_info}")
435
+ ])
436
+
437
+ # Get analysis from LLM
438
+ response = self.llm.invoke(analysis_prompt.format_messages())
439
+
440
+ try:
441
+ # Clean the response content to extract JSON
442
+ content = response.content.strip()
443
+
444
+ # Try to find JSON in the response
445
+ if content.startswith('{') and content.endswith('}'):
446
+ json_content = content
447
+ else:
448
+ # Try to extract JSON from the response
449
+ import re
450
+ json_match = re.search(r'\{.*\}', content, re.DOTALL)
451
+ if json_match:
452
+ json_content = json_match.group()
453
+ else:
454
+ raise json.JSONDecodeError("No JSON found in response", content, 0)
455
+
456
+ # Parse JSON response
457
+ analysis_data = json.loads(json_content)
458
+
459
+ query_analysis = QueryAnalysis(
460
+ has_district=analysis_data.get("has_district", False),
461
+ has_source=analysis_data.get("has_source", False),
462
+ has_year=analysis_data.get("has_year", False),
463
+ extracted_district=analysis_data.get("extracted_district"),
464
+ extracted_source=analysis_data.get("extracted_source"),
465
+ extracted_year=analysis_data.get("extracted_year"),
466
+ confidence_score=analysis_data.get("confidence_score", 0.0),
467
+ can_answer_directly=analysis_data.get("can_answer_directly", False),
468
+ missing_filters=analysis_data.get("missing_filters", []),
469
+ suggested_follow_up=analysis_data.get("suggested_follow_up"),
470
+ expanded_query=analysis_data.get("expanded_query")
471
+ )
472
+
473
+ except (json.JSONDecodeError, KeyError, AttributeError) as e:
474
+ self.logger.info(f"⚠️ JSON parsing failed: {e}")
475
+ # Fallback analysis - be more permissive
476
+ query_lower = query.lower()
477
+
478
+ # Simple keyword matching - improved district recognition
479
+ has_district = any(district.lower() in query_lower for district in [
480
+ 'gulu', 'kalangala', 'kampala', 'namutumba', 'lwengo', 'kiboga', 'kcca', 'maaif', 'mwts'
481
+ ])
482
+
483
+ # Special case: Kampala = KCCA
484
+ if 'kampala' in query_lower and not has_district:
485
+ has_district = True
486
+
487
+ has_source = any(source.lower() in query_lower for source in [
488
+ 'kcca', 'maaif', 'mwts', 'gulu', 'kalangala', 'consolidated', 'oag', 'government'
489
+ ])
490
+
491
+ # Check for year mentions using dynamic year list
492
+ has_year = any(year in query_lower for year in self.year_whitelist)
493
+
494
+ # Also check for explicit relative year terms
495
+ has_year = has_year or any(term in query_lower for term in [
496
+ 'this year', 'last year', 'previous year', 'current year'
497
+ ])
498
+
499
+ # Extract specific values
500
+ extracted_district = None
501
+ extracted_source = None
502
+ extracted_year = None
503
+
504
+ # Extract districts using comprehensive whitelist
505
+ for district_name in self.district_whitelist:
506
+ if district_name.lower() in query_lower:
507
+ extracted_district = district_name
508
+ break
509
+
510
+ # Also check common aliases
511
+ district_aliases = {
512
+ 'kampala': 'Kampala',
513
+ 'kcca': 'Kampala',
514
+ 'gulu': 'Gulu',
515
+ 'kalangala': 'Kalangala'
516
+ }
517
+ for alias, full_name in district_aliases.items():
518
+ if alias in query_lower and not extracted_district:
519
+ extracted_district = full_name
520
+ break
521
+
522
+ for source in ['kcca', 'maaif', 'mwts', 'consolidated', 'oag']:
523
+ if source in query_lower:
524
+ extracted_source = source.upper()
525
+ break
526
+
527
+ # Extract year using dynamic year list
528
+ for year in self.year_whitelist:
529
+ if year in query_lower:
530
+ extracted_year = year
531
+ has_year = True
532
+ break
533
+
534
+ # Only handle relative year terms if explicitly mentioned
535
+ if not extracted_year:
536
+ if 'last year' in query_lower or 'previous year' in query_lower:
537
+ extracted_year = self.previous_year
538
+ has_year = True
539
+ elif 'this year' in query_lower or 'current year' in query_lower:
540
+ extracted_year = self.current_year
541
+ has_year = True
542
+ elif 'recent' in query_lower and 'year' in query_lower:
543
+ # Use the most recent year from available data
544
+ extracted_year = max(self.year_whitelist) if self.year_whitelist else self.previous_year
545
+ has_year = True
546
+
547
+ # Be more permissive - if we have some context, try to answer
548
+ missing_filters = []
549
+ if not has_district:
550
+ missing_filters.append("district")
551
+ if not has_source:
552
+ missing_filters.append("source")
553
+ if not has_year:
554
+ missing_filters.append("year")
555
+
556
+ # If user seems frustrated or asking for results, be more permissive
557
+ frustration_indicators = ['already', 'just said', 'specified', 'provided', 'crazy', 'answer']
558
+ is_frustrated = any(indicator in query_lower for indicator in frustration_indicators)
559
+
560
+ can_answer_directly = len(missing_filters) <= 1 or is_frustrated # More permissive
561
+ confidence_score = 0.8 if can_answer_directly else 0.3
562
+
563
+ # Generate follow-up suggestion
564
+ if missing_filters and not is_frustrated:
565
+ if "district" in missing_filters and "source" in missing_filters:
566
+ suggested_follow_up = "I'd be happy to help you with that information! Could you please specify which district and department/ministry you're asking about?"
567
+ elif "district" in missing_filters:
568
+ suggested_follow_up = "Thanks for your question! Could you please specify which district you're asking about?"
569
+ elif "source" in missing_filters:
570
+ suggested_follow_up = "I can help you with that! Could you please specify which department or ministry you're asking about?"
571
+ elif "year" in missing_filters:
572
+ suggested_follow_up = "Great question! Could you please specify which year you're interested in?"
573
+ else:
574
+ suggested_follow_up = "Could you please provide more specific details to help me give you a precise answer?"
575
+ else:
576
+ suggested_follow_up = None
577
+
578
+ # Create expanded query
579
+ expanded_query = query
580
+ if extracted_district:
581
+ expanded_query += f" for {extracted_district} district"
582
+ if extracted_source:
583
+ expanded_query += f" from {extracted_source}"
584
+ if extracted_year:
585
+ expanded_query += f" in {extracted_year}"
586
+
587
+ query_analysis = QueryAnalysis(
588
+ has_district=has_district,
589
+ has_source=has_source,
590
+ has_year=has_year,
591
+ extracted_district=extracted_district,
592
+ extracted_source=extracted_source,
593
+ extracted_year=extracted_year,
594
+ confidence_score=confidence_score,
595
+ can_answer_directly=can_answer_directly,
596
+ missing_filters=missing_filters,
597
+ suggested_follow_up=suggested_follow_up,
598
+ expanded_query=expanded_query
599
+ )
600
+
601
+ # Update conversation context
602
+ if query_analysis.extracted_district:
603
+ conversation_context["district"] = query_analysis.extracted_district
604
+ if query_analysis.extracted_source:
605
+ conversation_context["source"] = query_analysis.extracted_source
606
+ if query_analysis.extracted_year:
607
+ conversation_context["year"] = query_analysis.extracted_year
608
+
609
+ state["query_analysis"] = query_analysis
610
+ state["conversation_context"] = conversation_context
611
+
612
+ self.logger.info(f"βœ… ANALYSIS COMPLETE: district={query_analysis.has_district}, source={query_analysis.has_source}, year={query_analysis.has_year}")
613
+ self.logger.info(f"πŸ“ˆ Confidence: {query_analysis.confidence_score:.2f}, Can answer directly: {query_analysis.can_answer_directly}")
614
+ if query_analysis.expanded_query:
615
+ self.logger.info(f"πŸ”„ Expanded query: {query_analysis.expanded_query}")
616
+
617
+ return state
618
+
619
+ def _decide_action(self, state: ConversationState) -> ConversationState:
620
+ """Decide what action to take based on query analysis"""
621
+
622
+ analysis = state["query_analysis"]
623
+
624
+ # Add decision reasoning
625
+ if analysis.can_answer_directly and analysis.confidence_score > 0.7:
626
+ self.logger.info(f"πŸš€ DECISION: Query is complete, proceeding with RAG")
627
+ self.logger.info(f"πŸ“Š REASONING: Confidence={analysis.confidence_score:.2f}, Missing filters={len(analysis.missing_filters or [])}")
628
+ if analysis.missing_filters:
629
+ self.logger.info(f"πŸ“‹ Missing: {', '.join(analysis.missing_filters)}")
630
+ else:
631
+ self.logger.info(f"βœ… All required information available")
632
+ else:
633
+ self.logger.info(f"❓ DECISION: Query incomplete, asking follow-up")
634
+ self.logger.info(f"πŸ“Š REASONING: Confidence={analysis.confidence_score:.2f}, Missing filters={len(analysis.missing_filters or [])}")
635
+ if analysis.missing_filters:
636
+ self.logger.info(f"πŸ“‹ Missing: {', '.join(analysis.missing_filters)}")
637
+ self.logger.info(f"πŸ’‘ Follow-up needed: {analysis.suggested_follow_up}")
638
+
639
+ return state
640
+
641
+ def _should_perform_rag(self, state: ConversationState) -> str:
642
+ """Determine whether to perform RAG or ask follow-up"""
643
+
644
+ analysis = state["query_analysis"]
645
+ conversation_context = state.get("conversation_context", {})
646
+ recent_messages = state.get("messages", [])
647
+
648
+ # Check if we have enough context from conversation history
649
+ has_district_context = analysis.has_district or conversation_context.get("district")
650
+ has_source_context = analysis.has_source or conversation_context.get("source")
651
+ has_year_context = analysis.has_year or conversation_context.get("year")
652
+
653
+ # Count how many context pieces we have
654
+ context_count = sum([bool(has_district_context), bool(has_source_context), bool(has_year_context)])
655
+
656
+ # For PDM queries, we need more specific information
657
+ current_query = state["current_query"].lower()
658
+ recent_messages = state.get("messages", [])
659
+
660
+ # Check if this is a PDM query by looking at current query OR recent conversation
661
+ is_pdm_query = "pdm" in current_query or "parish development" in current_query
662
+
663
+ # Also check recent messages for PDM context
664
+ if not is_pdm_query and recent_messages:
665
+ for msg in recent_messages[-3:]: # Check last 3 messages
666
+ if isinstance(msg, HumanMessage) and ("pdm" in msg.content.lower() or "parish development" in msg.content.lower()):
667
+ is_pdm_query = True
668
+ break
669
+
670
+ if is_pdm_query:
671
+ # For PDM queries, we need district AND year to be specific enough
672
+ # But we need them to be explicitly provided in the current conversation, not just inferred
673
+ if has_district_context and has_year_context:
674
+ # Check if both district and year are explicitly mentioned in recent messages
675
+ explicit_district = False
676
+ explicit_year = False
677
+
678
+ for msg in recent_messages[-3:]: # Check last 3 messages
679
+ if isinstance(msg, HumanMessage):
680
+ content = msg.content.lower()
681
+ if any(district in content for district in ["gulu", "kalangala", "kampala", "namutumba"]):
682
+ explicit_district = True
683
+ if any(year in content for year in ["2022", "2023", "2022/23", "2023/24"]):
684
+ explicit_year = True
685
+
686
+ if explicit_district and explicit_year:
687
+ self.logger.info(f"πŸš€ DECISION: PDM query with explicit district and year, proceeding with RAG")
688
+ self.logger.info(f"πŸ“Š REASONING: PDM query - explicit_district={explicit_district}, explicit_year={explicit_year}")
689
+ return "rag"
690
+ else:
691
+ self.logger.info(f"❓ DECISION: PDM query needs explicit district and year, asking follow-up")
692
+ self.logger.info(f"πŸ“Š REASONING: PDM query - explicit_district={explicit_district}, explicit_year={explicit_year}")
693
+ return "follow_up"
694
+ else:
695
+ self.logger.info(f"❓ DECISION: PDM query needs more specific info, asking follow-up")
696
+ self.logger.info(f"πŸ“Š REASONING: PDM query - district={has_district_context}, year={has_year_context}")
697
+ return "follow_up"
698
+
699
+ # For general queries, be more conservative - need at least 2 pieces AND high confidence
700
+ if context_count >= 2 and analysis.confidence_score > 0.8:
701
+ self.logger.info(f"πŸš€ DECISION: Sufficient context with high confidence, proceeding with RAG")
702
+ self.logger.info(f"πŸ“Š REASONING: Context pieces: district={has_district_context}, source={has_source_context}, year={has_year_context}, confidence={analysis.confidence_score}")
703
+ return "rag"
704
+
705
+ # If user seems frustrated (short responses like "no"), proceed with RAG
706
+ if recent_messages and len(recent_messages) >= 3: # Need more messages to detect frustration
707
+ last_user_message = None
708
+ for msg in reversed(recent_messages):
709
+ if isinstance(msg, HumanMessage):
710
+ last_user_message = msg.content.lower().strip()
711
+ break
712
+
713
+ if last_user_message and len(last_user_message) < 10 and any(word in last_user_message for word in ["no", "yes", "ok", "sure"]):
714
+ self.logger.info(f"πŸš€ DECISION: User seems frustrated with short response, proceeding with RAG")
715
+ return "rag"
716
+
717
+ # Original logic for direct answers
718
+ if analysis.can_answer_directly and analysis.confidence_score > 0.7:
719
+ return "rag"
720
+ else:
721
+ return "follow_up"
722
+
723
+ def _ask_follow_up(self, state: ConversationState) -> ConversationState:
724
+ """Generate a follow-up question to clarify missing information"""
725
+
726
+ analysis = state["query_analysis"]
727
+ current_query = state["current_query"].lower()
728
+ conversation_context = state.get("conversation_context", {})
729
+
730
+ # Check if this is a PDM query
731
+ is_pdm_query = "pdm" in current_query or "parish development" in current_query
732
+
733
+ if is_pdm_query:
734
+ # Generate PDM-specific follow-up questions
735
+ missing_info = []
736
+
737
+ if not analysis.has_district and not conversation_context.get("district"):
738
+ missing_info.append("district (e.g., Gulu, Kalangala)")
739
+
740
+ if not analysis.has_year and not conversation_context.get("year"):
741
+ missing_info.append("year (e.g., 2022, 2023)")
742
+
743
+ if missing_info:
744
+ follow_up_message = f"For PDM administrative costs information, I need to know the {', '.join(missing_info)}. Could you please specify these details?"
745
+ else:
746
+ follow_up_message = "Could you please provide more specific details about the PDM administrative costs you're looking for?"
747
+ else:
748
+ # Use the original follow-up logic
749
+ if analysis.suggested_follow_up:
750
+ follow_up_message = analysis.suggested_follow_up
751
+ else:
752
+ follow_up_message = "Could you please provide more specific details to help me give you a precise answer?"
753
+
754
+ state["final_response"] = follow_up_message
755
+ state["last_ai_message_time"] = time.time()
756
+
757
+ return state
758
+
759
+ def _build_comprehensive_query(self, current_query: str, analysis, conversation_context: dict, recent_messages: list) -> str:
760
+ """Build a better RAG query from conversation.
761
+ - If latest message is a short modifier (e.g., "financial"), merge it into the last substantive question.
762
+ - If latest message looks like filters (district/year), keep the last question unchanged.
763
+ - Otherwise, use the current message.
764
+ """
765
+
766
+ def is_interrogative(text: str) -> bool:
767
+ t = text.lower().strip()
768
+ return any(t.startswith(w) for w in ["what", "how", "why", "when", "where", "which", "who"]) or t.endswith("?")
769
+
770
+ def is_filter_like(text: str) -> bool:
771
+ t = text.lower()
772
+ if "district" in t:
773
+ return True
774
+ if re.search(r"\b20\d{2}\b", t) or re.search(r"20\d{2}\s*[\-/–]\s*\d{2}\b", t):
775
+ return True
776
+ if self._extract_districts_list(text):
777
+ return True
778
+ return False
779
+
780
+ # Find last substantive user question
781
+ last_question = None
782
+ for msg in reversed(recent_messages[:-1] if recent_messages else []):
783
+ if isinstance(msg, HumanMessage):
784
+ if is_interrogative(msg.content) and len(msg.content.strip()) > 15:
785
+ last_question = msg.content.strip()
786
+ break
787
+
788
+ cq = current_query.strip()
789
+ words = cq.split()
790
+ is_short_modifier = (not is_interrogative(cq)) and (len(words) <= 3)
791
+
792
+ if is_filter_like(cq) and last_question:
793
+ comprehensive_query = last_question
794
+ elif is_short_modifier and last_question:
795
+ modifier = cq
796
+ if modifier.lower() in last_question.lower():
797
+ comprehensive_query = last_question
798
+ else:
799
+ if last_question.endswith('?'):
800
+ comprehensive_query = last_question[:-1] + f" for {modifier}?"
801
+ else:
802
+ comprehensive_query = last_question + f" for {modifier}"
803
+ else:
804
+ comprehensive_query = current_query
805
+
806
+ self.logger.info(f"πŸ”„ COMPREHENSIVE QUERY: '{comprehensive_query}'")
807
+ return comprehensive_query
808
+
809
+ def _rewrite_query_with_llm(self, recent_messages: list, draft_query: str) -> str:
810
+ """Use the LLM to rewrite a clean, focused RAG query from the conversation.
811
+ Rules enforced in prompt:
812
+ - Keep the user's main information need from the last substantive question
813
+ - Integrate short modifiers (e.g., "financial") into that question when appropriate
814
+ - Do NOT include filter text (years/districts/sources) in the query; those are handled separately
815
+ - Return a single plain sentence only (no quotes, no markdown)
816
+ """
817
+ try:
818
+ # Build a compact conversation transcript (last 6 messages max)
819
+ convo_lines = []
820
+ for msg in recent_messages[-6:]:
821
+ if isinstance(msg, HumanMessage):
822
+ convo_lines.append(f"User: {msg.content}")
823
+ elif isinstance(msg, AIMessage):
824
+ convo_lines.append(f"Assistant: {msg.content}")
825
+
826
+ convo_text = "\n".join(convo_lines)
827
+
828
+ """
829
+ "DECISION GUIDANCE:\n"
830
+ "- If the latest user message looks like a modifier (e.g., 'financial'), merge it into the best prior question.\n"
831
+ "- If the latest message provides filters (e.g., districts, years), DO NOT embed them; keep the base question.\n"
832
+ "- If the latest message itself is a full, clear question, use it.\n"
833
+ "- If the draft query is already good, you may refine its clarity but keep the same intent.\n\n"
834
+ """
835
+
836
+
837
+ prompt = ChatPromptTemplate.from_messages([
838
+ SystemMessage(content=(
839
+ "ROLE: Query Rewriter for a RAG system.\n\n"
840
+ "PRIMARY OBJECTIVE:\n- Produce ONE retrieval-focused sentence that best represents the user's information need.\n"
841
+ "- Maximize recall of relevant evidence; be specific but not overconstrained.\n\n"
842
+ "INPUTS:\n- Conversation with User and Assistant turns (latest last).\n- A draft query (heuristic).\n\n"
843
+ "OPERATING PRINCIPLES:\n"
844
+ "1) Use the last substantive USER question as the backbone of intent.\n"
845
+ "2) Merge helpful domain modifiers from any USER turns (financial, procurement, risk) when they sharpen focus; ignore if not helpful.\n"
846
+ "3) Treat Assistant messages as guidance only; if the user later provided filters (years, districts, sources), DO NOT embed them in the query (filters are applied separately).\n"
847
+ "4) Remove meta-verbs like 'summarize', 'list', 'explain', 'compare' from the query.\n"
848
+ "5) Prefer content-bearing terms (topics, programs, outcomes) over task phrasing.\n"
849
+ "6) If the latest user message is filters-only, keep the prior substantive question unchanged.\n"
850
+ "7) If the draft query is already strong, refine wording for clarity but keep the same intent.\n\n"
851
+ "EXAMPLES (multi-turn):\n"
852
+ "A)\nUser: What are the top 5 priorities for improving audit procedures?\nAssistant: Could you specify the scope (e.g., financial, procurement)?\nUser: Financial\n→ Output: Top priorities for improving financial audit procedures.\n\n"
853
+ "B)\nUser: How were PDM administrative costs utilized and what was the impact of shortfalls?\nAssistant: Please specify district/year for precision.\nUser: Namutumba and Lwengo Districts (2022/23)\n→ Output: How were PDM administrative costs utilized and what was the impact of shortfalls.\n(Exclude districts/years; they are filters.)\n\n"
854
+ "C)\nUser: Summarize risk management issues in audit reports.\n→ Output: Key risk management issues in audit reports.\n\n"
855
+ "CONSTRAINTS:\n- Do NOT include filters (years, districts, sources, filenames).\n- Do NOT include quotes/markdown/bullets or multiple sentences.\n- Return exactly one plain sentence."
856
+ )),
857
+ HumanMessage(content=(
858
+ f"Conversation (most recent last):\n{convo_text}\n\n"
859
+ f"Draft query: {draft_query}\n\n"
860
+ "Rewrite the single best retrieval query sentence now:"
861
+ )),
862
+ ])
863
+
864
+ # Add timeout for LLM call
865
+ import signal
866
+
867
+ def timeout_handler(signum, frame):
868
+ raise TimeoutError("LLM rewrite timeout")
869
+
870
+ # Set 10 second timeout
871
+ signal.signal(signal.SIGALRM, timeout_handler)
872
+ signal.alarm(10)
873
+
874
+ try:
875
+ resp = self.llm.invoke(prompt.format_messages())
876
+ signal.alarm(0) # Cancel timeout
877
+
878
+ rewritten = getattr(resp, 'content', '').strip()
879
+ # Basic sanitization: keep it one line
880
+ rewritten = rewritten.replace('\n', ' ').strip()
881
+ if rewritten and len(rewritten) > 5: # Basic quality check
882
+ self.logger.info(f"πŸ› οΈ LLM REWRITER: '{rewritten}'")
883
+ return rewritten
884
+ else:
885
+ self.logger.info(f"⚠️ LLM rewrite too short/empty, using draft query")
886
+ return draft_query
887
+ except TimeoutError:
888
+ signal.alarm(0)
889
+ self.logger.info(f"⚠️ LLM rewrite timeout after 10s, using draft query")
890
+ return draft_query
891
+ except Exception as e:
892
+ signal.alarm(0)
893
+ self.logger.info(f"⚠️ LLM rewrite failed, using draft query. Error: {e}")
894
+ return draft_query
895
+ except Exception as e:
896
+ self.logger.info(f"⚠️ LLM rewrite setup failed, using draft query. Error: {e}")
897
+ return draft_query
898
+
899
+ def _perform_rag(self, state: ConversationState) -> ConversationState:
900
+ """Perform RAG retrieval with smart query expansion"""
901
+
902
+ query = state["current_query"]
903
+ analysis = state["query_analysis"]
904
+ conversation_context = state.get("conversation_context", {})
905
+ recent_messages = state.get("messages", [])
906
+
907
+ # Build comprehensive query from conversation history
908
+ draft_query = self._build_comprehensive_query(query, analysis, conversation_context, recent_messages)
909
+ # Let LLM rewrite a clean, focused search query
910
+ search_query = self._rewrite_query_with_llm(recent_messages, draft_query)
911
+
912
+ self.logger.info(f"πŸ” RAG RETRIEVAL: Starting for query: '{search_query[:50]}...'")
913
+ self.logger.info(f"πŸ“Š Analysis: district={analysis.has_district}, source={analysis.has_source}, year={analysis.has_year}")
914
+
915
+ try:
916
+ # Build filters from analysis and conversation context
917
+ filters = {}
918
+
919
+ # Use conversation context to fill in missing filters
920
+ source = analysis.extracted_source or conversation_context.get("source")
921
+ district = analysis.extracted_district or conversation_context.get("district")
922
+ year = analysis.extracted_year or conversation_context.get("year")
923
+
924
+ if source:
925
+ filters["source"] = [source] # Qdrant expects lists
926
+ self.logger.info(f"🎯 Filter: source={source}")
927
+
928
+ if year:
929
+ filters["year"] = [year]
930
+ self.logger.info(f"🎯 Filter: year={year}")
931
+
932
+ if district:
933
+ # Map district to source if needed
934
+ if district.upper() == "KAMPALA":
935
+ filters["source"] = ["KCCA"]
936
+ self.logger.info(f"🎯 Filter: district={district} -> source=KCCA")
937
+ elif district.upper() in ["GULU", "KALANGALA"]:
938
+ filters["source"] = [f"{district.upper()} DLG"]
939
+ self.logger.info(f"🎯 Filter: district={district} -> source={district.upper()} DLG")
940
+
941
+ # Run RAG pipeline with correct parameters
942
+ result = self.pipeline_manager.run(
943
+ query=search_query, # Use expanded query
944
+ sources=filters.get("source") if filters.get("source") else None,
945
+ auto_infer_filters=False, # Our agent already handled filter inference
946
+ filters=filters if filters else None
947
+ )
948
+
949
+ self.logger.info(f"βœ… RAG completed: Found {len(result.sources)} sources")
950
+ self.logger.info(f"⏱️ Execution time: {result.execution_time:.2f}s")
951
+
952
+ # Store RAG result in state
953
+ state["rag_result"] = result
954
+ state["rag_query"] = search_query
955
+
956
+ except Exception as e:
957
+ self.logger.info(f"❌ RAG retrieval failed: {e}")
958
+ state["rag_result"] = None
959
+
960
+ return state
961
+
962
+ def _generate_response(self, state: ConversationState) -> ConversationState:
963
+ """Generate final response using RAG results"""
964
+
965
+ rag_result = state["rag_result"]
966
+
967
+ self.logger.info(f"πŸ“ RESPONSE: Using RAG result ({len(rag_result.answer)} chars)")
968
+
969
+ # Store the final response directly from RAG
970
+ state["final_response"] = rag_result.answer
971
+ state["last_ai_message_time"] = time.time()
972
+
973
+ return state
974
+
975
+ def chat(self, user_input: str, conversation_id: str = "default") -> str:
976
+ """Main chat interface with conversation management"""
977
+
978
+ self.logger.info(f"πŸ’¬ CHAT: Processing user input: '{user_input[:50]}...'")
979
+ self.logger.info(f"πŸ“Š Session: {conversation_id}")
980
+
981
+ # Load conversation history
982
+ conversation_file = self.conversations_dir / f"{conversation_id}.json"
983
+ conversation = self._load_conversation(conversation_file)
984
+
985
+ # Add user message to conversation
986
+ conversation["messages"].append(HumanMessage(content=user_input))
987
+
988
+ self.logger.info(f"πŸ”„ LANGGRAPH: Starting graph execution")
989
+
990
+ # Prepare state for LangGraph with conversation context
991
+ state = ConversationState(
992
+ conversation_id=conversation_id,
993
+ messages=conversation["messages"],
994
+ current_query=user_input,
995
+ query_analysis=None,
996
+ conversation_context=conversation.get("context", {}),
997
+ rag_result=None,
998
+ final_response=None,
999
+ session_start_time=conversation["session_start_time"],
1000
+ last_ai_message_time=conversation["last_ai_message_time"]
1001
+ )
1002
+
1003
+ # Run the graph
1004
+ final_state = self.graph.invoke(state)
1005
+
1006
+ # Add the AI response to conversation
1007
+ if final_state["final_response"]:
1008
+ conversation["messages"].append(AIMessage(content=final_state["final_response"]))
1009
+
1010
+ # Update conversation state
1011
+ conversation["last_ai_message_time"] = final_state["last_ai_message_time"]
1012
+ conversation["context"] = final_state["conversation_context"]
1013
+
1014
+ # Save conversation
1015
+ self._save_conversation(conversation_file, conversation)
1016
+
1017
+ self.logger.info(f"βœ… LANGGRAPH: Graph execution completed")
1018
+ self.logger.info(f"🎯 CHAT COMPLETE: Response ready")
1019
+
1020
+ # Return both response and RAG result for UI
1021
+ return {
1022
+ 'response': final_state["final_response"] or "I apologize, but I couldn't process your request.",
1023
+ 'rag_result': final_state["rag_result"],
1024
+ 'actual_rag_query': final_state.get("rag_query", "")
1025
+ }
1026
+
1027
+ def _load_conversation(self, conversation_file: Path) -> Dict[str, Any]:
1028
+ """Load conversation from file"""
1029
+ if conversation_file.exists():
1030
+ try:
1031
+ with open(conversation_file) as f:
1032
+ data = json.load(f)
1033
+ # Convert message dicts back to LangChain messages
1034
+ messages = []
1035
+ for msg_data in data.get("messages", []):
1036
+ if msg_data["type"] == "human":
1037
+ messages.append(HumanMessage(content=msg_data["content"]))
1038
+ elif msg_data["type"] == "ai":
1039
+ messages.append(AIMessage(content=msg_data["content"]))
1040
+ data["messages"] = messages
1041
+ return data
1042
+ except Exception as e:
1043
+ self.logger.info(f"⚠️ Could not load conversation: {e}")
1044
+
1045
+ # Return default conversation
1046
+ return {
1047
+ "messages": [],
1048
+ "session_start_time": time.time(),
1049
+ "last_ai_message_time": time.time(),
1050
+ "context": {}
1051
+ }
1052
+
1053
+ def _save_conversation(self, conversation_file: Path, conversation: Dict[str, Any]):
1054
+ """Save conversation to file"""
1055
+ try:
1056
+ # Convert LangChain messages to serializable format
1057
+ messages_data = []
1058
+ for msg in conversation["messages"]:
1059
+ if isinstance(msg, HumanMessage):
1060
+ messages_data.append({"type": "human", "content": msg.content})
1061
+ elif isinstance(msg, AIMessage):
1062
+ messages_data.append({"type": "ai", "content": msg.content})
1063
+
1064
+ data = {
1065
+ "messages": messages_data,
1066
+ "session_start_time": conversation["session_start_time"],
1067
+ "last_ai_message_time": conversation["last_ai_message_time"],
1068
+ "context": conversation.get("context", {}),
1069
+ "last_updated": datetime.now().isoformat()
1070
+ }
1071
+
1072
+ with open(conversation_file, "w") as f:
1073
+ json.dump(data, f, indent=2)
1074
+
1075
+ except Exception as e:
1076
+ self.logger.info(f"⚠️ Could not save conversation: {e}")
1077
+
1078
+
1079
+ def get_chatbot():
1080
+ """Get chatbot instance"""
1081
+ return IntelligentRAGChatbot()
1082
+
1083
+ if __name__ == "__main__":
1084
+ # Test the chatbot
1085
+ chatbot = IntelligentRAGChatbot()
1086
+
1087
+ # Test conversation
1088
+ test_queries = [
1089
+ "How much was the budget allocation for government salary payroll management?",
1090
+ "Namutumba district in 2023",
1091
+ "KCCA"
1092
+ ]
1093
+
1094
+ for query in test_queries:
1095
+ self.logger.info(f"\n{'='*50}")
1096
+ self.logger.info(f"User: {query}")
1097
+ response = chatbot.chat(query)
1098
+ self.logger.info(f"Bot: {response}")