KeenWoo commited on
Commit
2192634
·
verified ·
1 Parent(s): 2c0ca80

Delete alz_companion/agent.py

Browse files
Files changed (1) hide show
  1. alz_companion/agent.py +0 -973
alz_companion/agent.py DELETED
@@ -1,973 +0,0 @@
1
- from __future__ import annotations
2
- import os
3
- import json
4
- import base64
5
- import time
6
- import tempfile
7
- import re
8
- import random # for random select songs
9
- import shutil # <-- ADD THIS LINE
10
-
11
- from typing import List, Dict, Any, Optional
12
- from sentence_transformers import CrossEncoder
13
-
14
- try:
15
- from openai import OpenAI
16
- except Exception:
17
- OpenAI = None
18
-
19
- from langchain.schema import Document
20
- from langchain_community.vectorstores import FAISS
21
- from langchain_community.embeddings import HuggingFaceEmbeddings
22
-
23
- try:
24
- from gtts import gTTS
25
- except Exception:
26
- gTTS = None
27
-
28
- from .prompts import (
29
- SYSTEM_TEMPLATE,
30
-
31
- ROUTER_PROMPT,
32
- SAFETY_GUARDRAILS, RISK_FOOTER, render_emotion_guidelines,
33
-
34
- NLU_ROUTER_PROMPT, SPECIALIST_CLASSIFIER_PROMPT,
35
- ANSWER_TEMPLATE_CALM,
36
- ANSWER_TEMPLATE_ADQ, ANSWER_TEMPLATE_ADQ_MODERATE, ANSWER_TEMPLATE_ADQ_ADVANCED,
37
-
38
- ANSWER_TEMPLATE_FACTUAL, ANSWER_TEMPLATE_FACTUAL_MULTI, ANSWER_TEMPLATE_SUMMARIZE,
39
-
40
- ANSWER_TEMPLATE_GENERAL_KNOWLEDGE, ANSWER_TEMPLATE_GENERAL,
41
-
42
- QUERY_EXPANSION_PROMPT,
43
- MUSIC_PREAMBLE_PROMPT
44
- )
45
-
46
-
47
- _BEHAVIOR_ALIASES = {
48
- "repeating questions": "repetitive_questioning", "repetitive questions": "repetitive_questioning",
49
- "confusion": "confusion", "wandering": "wandering", "agitation": "agitation",
50
- "accusing people": "false_accusations", "false accusations": "false_accusations",
51
- "memory loss": "address_memory_loss", "seeing things": "hallucinations_delusions",
52
- "hallucinations": "hallucinations_delusions", "delusions": "hallucinations_delusions",
53
- "trying to leave": "exit_seeking", "wanting to go home": "exit_seeking",
54
- "aphasia": "aphasia", "word finding": "aphasia", "withdrawn": "withdrawal",
55
- "apathy": "apathy", "affection": "affection", "sleep problems": "sleep_disturbance",
56
- "anxiety": "anxiety", "sadness": "depression_sadness", "depression": "depression_sadness",
57
- "checking orientation": "orientation_check", "misidentification": "misidentification",
58
- "sundowning": "sundowning_restlessness", "restlessness": "sundowning_restlessness",
59
- "losing things": "object_misplacement", "misplacing things": "object_misplacement",
60
- "planning": "goal_breakdown", "reminiscing": "reminiscence_prompting",
61
- "communication strategy": "caregiver_communication_template",
62
- }
63
-
64
- def _canon_behavior_list(xs: list[str] | None, opts: list[str]) -> list[str]:
65
- out = []
66
- for x in (xs or []):
67
- y = _BEHAVIOR_ALIASES.get(x.strip().lower(), x.strip())
68
- if y in opts and y not in out:
69
- out.append(y)
70
- return out
71
-
72
- _TOPIC_ALIASES = {
73
- "home safety": "treatment_option:home_safety", "long-term care": "treatment_option:long_term_care",
74
- "music": "treatment_option:music_therapy", "reassure": "treatment_option:reassurance",
75
- "routine": "treatment_option:routine_structuring", "validation": "treatment_option:validation_therapy",
76
- "caregiving advice": "caregiving_advice", "medical": "medical_fact",
77
- "research": "research_update", "story": "personal_story",
78
- }
79
- _CONTEXT_ALIASES = {
80
- "mild": "disease_stage_mild", "moderate": "disease_stage_moderate", "advanced": "disease_stage_advanced",
81
- "care home": "setting_care_home", "hospital": "setting_clinic_or_hospital", "home": "setting_home_or_community",
82
- "group": "interaction_mode_group_activity", "1:1": "interaction_mode_one_to_one", "one to one": "interaction_mode_one_to_one",
83
- "family": "relationship_family", "spouse": "relationship_spouse", "staff": "relationship_staff_or_caregiver",
84
- }
85
-
86
- def _canon_topic(x: str, opts: list[str]) -> str:
87
- if not x: return "None"
88
- y = _TOPIC_ALIASES.get(x.strip().lower(), x.strip())
89
- return y if y in opts else "None"
90
-
91
- def _canon_context_list(xs: list[str] | None, opts: list[str]) -> list[str]:
92
- out = []
93
- for x in (xs or []):
94
- y = _CONTEXT_ALIASES.get(x.strip().lower(), x.strip())
95
- if y in opts and y not in out: out.append(y)
96
- return out
97
-
98
-
99
- MULTI_HOP_KEYPHRASES = [
100
- r"\bcompare\b", r"\bvs\.?\b", r"\bversus\b", r"\bdifference between\b",
101
- r"\b(more|less|fewer) (than|visitors|agitated)\b", r"\bchange after\b",
102
- r"\bafter.*(vs|before)\b", r"\bbefore.*(vs|after)\b", r"\b(who|which) .*(more|less)\b",
103
- # --- START: REVISED & MORE ROBUST PATTERNS ---
104
- r"\b(did|was|is)\b .*\b(where|when|who)\b", # Catches MH1_new ("Did X happen where Y happened?")
105
- r"\bconsidering\b .*\bhow long\b", # Catches MH2_new
106
- r"\b(but|and)\b who was the other person\b", # Catches MH3_new
107
- r"what does the journal say about" # Catches MH4_new
108
- # --- END: REVISED & MORE ROBUST PATTERNS ---
109
- ]
110
- _MH_PATTERNS = [re.compile(p, re.IGNORECASE) for p in MULTI_HOP_KEYPHRASES]
111
-
112
-
113
-
114
- FACTUAL_KEYPHRASES = [
115
- r"\b(what is|what was) my\b",
116
- r"\b(who is|who was) my\b",
117
- r"\b(where is|where was) my\b",
118
- r"\b(how old am i)\b",
119
- # r"\b(when did|what did) the journal say\b"
120
- ]
121
- _FQ_PATTERNS = [re.compile(p, re.IGNORECASE) for p in FACTUAL_KEYPHRASES]
122
-
123
- def _pre_router_factual(query: str) -> str | None:
124
- """Checks for patterns common in direct factual questions about personal memory."""
125
- q = (query or "")
126
- for pat in _FQ_PATTERNS:
127
- if re.search(pat, q):
128
- return "factual_question"
129
- return None
130
-
131
-
132
- # Add this near the top of agent.py with the other keyphrase lists
133
- SUMMARIZATION_KEYPHRASES = [
134
- r"^\b(summarize|summarise|recap)\b", r"^\b(give me a summary|create a short summary)\b"
135
- ]
136
- _SUM_PATTERNS = [re.compile(p, re.IGNORECASE) for p in SUMMARIZATION_KEYPHRASES]
137
-
138
- def _pre_router_summarization(query: str) -> str | None:
139
- q = (query or "")
140
- for pat in _SUM_PATTERNS:
141
- if re.search(pat, q): return "summarization"
142
- return None
143
-
144
-
145
- CARE_KEYPHRASES = [
146
- r"\bwhere am i\b", r"\byou('?| ha)ve stolen my\b|\byou'?ve stolen my\b",
147
- r"\bi lost (the )?word\b|\bword-finding\b|\bcan.?t find the word\b",
148
- r"\bshe didn('?| no)t know me\b|\bhe didn('?| no)t know me\b",
149
- r"\bdisorient(?:ed|ation)\b|\bagitation\b|\bconfus(?:ed|ion)\b",
150
- r"\bcare home\b|\bnursing home\b|\bthe.*home\b",
151
- r"\bplaylist\b|\bsongs?\b.*\b(memories?|calm|soothe|familiar)\b",
152
- r"\bi want to keep teaching\b|\bi want to keep driving\b|\bi want to go home\b",
153
- r"music therapy",
154
- # --- ADD THESE LINES for handle test cases ---
155
- r"music therapy"
156
- r"\bremembering the\b", # Catches P7
157
- r"\bmissed you so much\b" # Catches P4
158
- r"\b(i forgot my job|what did i work as|do you remember my job)\b" # Catches queries about forgetting profession
159
- ]
160
- _CARE_PATTERNS = [re.compile(p) for p in CARE_KEYPHRASES]
161
-
162
-
163
-
164
- _STRIP_PATTERNS = [(r'^\s*(your\s+(final\s+)?answer|your\s+response)\s+in\s+[A-Za-z\-]+\s*:?\s*', ''), (r'\bbased on (?:the |any )?(?:provided )?(?:context|information|details)(?: provided)?(?:,|\.)?\s*', ''), (r'^\s*as an ai\b.*?(?:,|\.)\s*', ''), (r'\b(according to|from)\s+(the\s+)?(sources?|context)\b[:,]?\s*', ''), (r'\bI hope this helps[.!]?\s*$', '')]
165
-
166
- def _clean_surface_text(text: str) -> str:
167
- # This function remains unchanged from agent_work.py
168
- out = text or ""
169
- for pat, repl in _STRIP_PATTERNS:
170
- out = re.sub(pat, repl, out, flags=re.IGNORECASE)
171
- return re.sub(r'\n{3,}', '\n\n', out).strip()
172
-
173
-
174
- # Utilities
175
- def _openai_client() -> Optional[OpenAI]:
176
- api_key = os.getenv("OPENAI_API_KEY", "").strip()
177
- return OpenAI(api_key=api_key) if api_key and OpenAI else None
178
-
179
- def describe_image(image_path: str) -> str:
180
- # This function remains unchanged from agent_work.py
181
- client = _openai_client()
182
- if not client: return "(Image description failed: OpenAI API key not configured.)"
183
- try:
184
- extension = os.path.splitext(image_path)[1].lower()
185
- mime_type = f"image/{'jpeg' if extension in ['.jpg', '.jpeg'] else extension.strip('.')}"
186
- with open(image_path, "rb") as image_file:
187
- base64_image = base64.b64encode(image_file.read()).decode('utf-8')
188
- response = client.chat.completions.create(
189
- model="gpt-4o",
190
- messages=[{"role": "user", "content": [{"type": "text", "text": "Describe this image concisely for a memory journal. Focus on people, places, and key objects. Example: 'A photo of John and Mary smiling on a bench at the park.'"},{"type": "image_url", "image_url": {"url": f"data:{mime_type};base64,{base64_image}"}}]}], max_tokens=100)
191
- return response.choices[0].message.content or "No description available."
192
- except Exception as e:
193
- return f"[Image description error: {e}]"
194
-
195
- # --- MODIFICATION 1: Use the new, corrected NLU function ---
196
- def detect_tags_from_query(
197
- query: str,
198
- nlu_vectorstore: FAISS,
199
- behavior_options: list,
200
- emotion_options: list,
201
- topic_options: list,
202
- context_options: list,
203
- settings: dict = None
204
- ) -> Dict[str, Any]:
205
- """Uses a dynamic two-step NLU process: Route -> Retrieve Examples -> Classify."""
206
- result_dict = {"detected_behaviors": [], "detected_emotion": "None", "detected_topics": [], "detected_contexts": []}
207
- router_prompt = NLU_ROUTER_PROMPT.format(query=query)
208
- primary_goal_raw = call_llm([{"role": "user", "content": router_prompt}], temperature=0.0).strip().lower()
209
- goal_for_filter = "practical_planning" if "practical" in primary_goal_raw else "emotional_support"
210
- goal_for_prompt = "Practical Planning" if "practical" in primary_goal_raw else "Emotional Support"
211
-
212
- if settings and settings.get("debug_mode"):
213
- print(f"\n--- NLU Router ---\nGoal: {goal_for_prompt} (Filter: '{goal_for_filter}')\n------------------\n")
214
-
215
- retriever = nlu_vectorstore.as_retriever(search_kwargs={"k": 2, "filter": {"primary_goal": goal_for_filter}})
216
- retrieved_docs = retriever.invoke(query)
217
- if not retrieved_docs:
218
- retrieved_docs = nlu_vectorstore.as_retriever(search_kwargs={"k": 2}).invoke(query)
219
-
220
- selected_examples = "\n".join(
221
- f"User Query: \"{doc.page_content}\"\n{json.dumps(doc.metadata['classification'], indent=4)}"
222
- for doc in retrieved_docs
223
- )
224
- if not selected_examples:
225
- selected_examples = "(No relevant examples found)"
226
- if settings and settings.get("debug_mode"):
227
- print("WARNING: NLU retriever found no examples for this query.")
228
-
229
- behavior_str = ", ".join(f'"{opt}"' for opt in behavior_options if opt != "None")
230
- emotion_str = ", ".join(f'"{opt}"' for opt in emotion_options if opt != "None")
231
- topic_str = ", ".join(f'"{opt}"' for opt in topic_options if opt != "None")
232
- context_str = ", ".join(f'"{opt}"' for opt in context_options if opt != "None")
233
-
234
- prompt = SPECIALIST_CLASSIFIER_PROMPT.format(
235
- primary_goal=goal_for_prompt, examples=selected_examples,
236
- behavior_options=behavior_str, emotion_options=emotion_str,
237
- topic_options=topic_str, context_options=context_str, query=query
238
- )
239
-
240
- messages = [{"role": "system", "content": "You are a helpful NLU classification assistant."}, {"role": "user", "content": prompt}]
241
- response_str = call_llm(messages, temperature=0.0, response_format={"type": "json_object"})
242
-
243
- if settings and settings.get("debug_mode"):
244
- print(f"\n--- NLU Specialist Full Response ---\n{response_str}\n----------------------------------\n")
245
-
246
- try:
247
- start_brace = response_str.find('{')
248
- end_brace = response_str.rfind('}')
249
- if start_brace == -1 or end_brace <= start_brace:
250
- raise json.JSONDecodeError("No valid JSON object found in response.", response_str, 0)
251
-
252
- json_str = response_str[start_brace : end_brace + 1]
253
- result = json.loads(json_str)
254
-
255
- result_dict["detected_emotion"] = result.get("detected_emotion") or "None"
256
-
257
- behaviors_raw = result.get("detected_behaviors")
258
- behaviors_canon = _canon_behavior_list(behaviors_raw, behavior_options)
259
- if behaviors_canon:
260
- result_dict["detected_behaviors"] = behaviors_canon
261
-
262
- topics_raw = result.get("detected_topics") or result.get("detected_topic")
263
- detected_topics = []
264
- if isinstance(topics_raw, list):
265
- for t in topics_raw:
266
- ct = _canon_topic(t, topic_options)
267
- if ct != "None": detected_topics.append(ct)
268
- elif isinstance(topics_raw, str):
269
- ct = _canon_topic(topics_raw, topic_options)
270
- if ct != "None": detected_topics.append(ct)
271
- result_dict["detected_topics"] = detected_topics
272
-
273
- contexts_raw = result.get("detected_contexts")
274
- contexts_canon = _canon_context_list(contexts_raw, context_options)
275
- if contexts_canon:
276
- result_dict["detected_contexts"] = contexts_canon
277
-
278
- return result_dict
279
-
280
- except (json.JSONDecodeError, AttributeError) as e:
281
- print(f"ERROR parsing NLU Specialist JSON: {e}")
282
- return result_dict
283
-
284
-
285
- def _default_embeddings():
286
- # This function remains unchanged from agent_work.py
287
- model_name = os.getenv("EMBEDDINGS_MODEL", "sentence-transformers/all-MiniLM-L6-v2")
288
- return HuggingFaceEmbeddings(model_name=model_name)
289
-
290
-
291
- # NEW FIX: The build_or_load_vectorstore function should never create a placeholder
292
- # for a general knowledge index. That logic should only apply to the personal memory bank.
293
- def build_or_load_vectorstore(docs: List[Document], index_path: str, is_personal: bool = False) -> FAISS:
294
- os.makedirs(os.path.dirname(index_path), exist_ok=True)
295
- if os.path.isdir(index_path) and os.path.exists(os.path.join(index_path, "index.faiss")):
296
- try:
297
- return FAISS.load_local(index_path, _default_embeddings(), allow_dangerous_deserialization=True)
298
- except Exception: pass
299
-
300
- # --- START: DEFINITIVE FIX ---
301
- # The placeholder logic should ONLY apply to the personal memory bank when it's first created.
302
- # General knowledge bases should be truly empty if no documents are provided.
303
- if not docs:
304
- if is_personal:
305
- # Create a placeholder only for the personal index.
306
- placeholder_text = "(This is the start of the personal memory journal.)"
307
- docs = [Document(page_content=placeholder_text, metadata={"source": "placeholder"})]
308
- else:
309
- # For general indexes, if there are no docs, create a truly empty FAISS index.
310
- # This prevents the placeholder from appearing in vs_general.
311
- empty_faiss = FAISS.from_texts([""], _default_embeddings())
312
- empty_faiss.delete([empty_faiss.index_to_docstore_id[0]])
313
- empty_faiss.save_local(index_path)
314
- return empty_faiss
315
- # --- END: DEFINITIVE FIX ---
316
-
317
- vs = FAISS.from_documents(docs, _default_embeddings())
318
- vs.save_local(index_path)
319
- return vs
320
-
321
-
322
- # In agent.py, replace the entire bootstrap_vectorstore function.
323
- def bootstrap_vectorstore(sample_paths: List[str] | None = None, index_path: str = "data/faiss_index") -> FAISS:
324
- """
325
- Loads documents from paths and builds or loads a FAISS vector store for the GENERAL knowledge base.
326
- This version correctly handles the case where no initial documents are found.
327
- """
328
-
329
- # --- START: DEFINITIVE FIX ---
330
- # Before bootstrapping, we must delete any stale index that might exist on disk.
331
- # This prevents loading an old, empty index instead of building a new, full one.
332
- if os.path.exists(index_path):
333
- shutil.rmtree(index_path)
334
- # --- END: DEFINITIVE FIX ---
335
-
336
-
337
- docs: List[Document] = []
338
- print(f"DEBUG (bootstrap_vectorstore): Bootstrapping index '{os.path.basename(index_path)}' with {len(sample_paths or [])} file(s).")
339
-
340
- for p in (sample_paths or []):
341
- try:
342
- if p.lower().endswith(".jsonl"):
343
- docs.extend(texts_from_jsonl(p))
344
- else:
345
- with open(p, "r", encoding="utf-8", errors="ignore") as fh:
346
- docs.append(Document(page_content=fh.read(), metadata={"source": os.path.basename(p)}))
347
- except Exception as e:
348
- print(f" - ERROR: Failed to bootstrap from file '{os.path.basename(p)}'. Reason: {e}")
349
- continue
350
-
351
- # --- START OF DEFINITIVE FIX ---
352
- # The general knowledge base should NOT have a placeholder. If no files
353
- # are enabled or found, it should be truly empty. The placeholder logic
354
- # is now correctly isolated within build_or_load_vectorstore for the personal index only.
355
- if not docs:
356
- print(f" - WARNING: No documents were found for this index. It will be created empty.")
357
- # --- END OF DEFINITIVE FIX ---
358
-
359
- # --- START DEBUG BLOCK ---
360
- print(f"DEBUG (bootstrap_vectorstore): Total documents parsed for index '{os.path.basename(index_path)}': {len(docs)}.")
361
- # --- END DEBUG BLOCK ---
362
-
363
- # We pass is_personal=False to ensure no placeholder is created by the downstream function.
364
- return build_or_load_vectorstore(docs, index_path=index_path, is_personal=False)
365
-
366
-
367
- def texts_from_jsonl(path: str) -> List[Document]:
368
- """Parses a JSONL file into a list of Document objects."""
369
- out: List[Document] = []
370
-
371
- # --- START DEBUG BLOCK ---
372
- file_name = os.path.basename(path)
373
- print(f"DEBUG (texts_from_jsonl): Attempting to read and parse '{file_name}'...")
374
- # --- END DEBUG BLOCK ---
375
-
376
- try:
377
- with open(path, "r", encoding="utf-8") as f:
378
- for i, line in enumerate(f):
379
- try:
380
- obj = json.loads(line.strip())
381
- except json.JSONDecodeError:
382
- print(f" - WARNING: Skipping malformed JSON line {i+1} in '{file_name}'")
383
- continue
384
-
385
- txt = obj.get("text") or obj.get("content") or obj.get("query") or ""
386
- if not txt.strip():
387
- continue
388
-
389
- md = {"source": file_name, "chunk": i}
390
- for k in ("behaviors", "emotion", "topic_tags", "context_tags", "scene_id"):
391
- if obj.get(k):
392
- md[k] = obj.get(k)
393
-
394
- out.append(Document(page_content=txt, metadata=md))
395
-
396
- # --- START DEBUG BLOCK ---
397
- print(f"DEBUG (texts_from_jsonl): ✅ SUCCESS: Parsed {len(out)} documents from '{file_name}'.")
398
- # --- END DEBUG BLOCK ---
399
-
400
- except Exception as e:
401
- print(f"DEBUG (texts_from_jsonl): ❌ CRITICAL ERROR while processing '{file_name}'. Reason: {e}")
402
- return []
403
-
404
- return out
405
-
406
-
407
- def rerank_documents(query: str, documents: list[tuple[Document, float]]) -> list[tuple[tuple[Document, float], float]]:
408
- """
409
- Re-ranks a list of retrieved documents against a query using a CrossEncoder model.
410
- Returns the original document tuples along with their new re-ranker score.
411
- """
412
- if not documents or not query:
413
- return []
414
-
415
- model = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2')
416
-
417
- doc_contents = [doc.page_content for doc, score in documents]
418
- query_doc_pairs = [[query, doc_content] for doc_content in doc_contents]
419
-
420
- scores = model.predict(query_doc_pairs)
421
-
422
- reranked_results = list(zip(documents, scores))
423
- reranked_results.sort(key=lambda x: x[1], reverse=True)
424
-
425
- print(f"\n[DEBUG] Re-ranked Top 3 Sources:")
426
- for doc_tuple, score in reranked_results[:3]:
427
- doc, _ = doc_tuple
428
- # --- MODIFICATION: Add score to debug log ---
429
- print(f" - New Rank | Source: {doc.metadata.get('source')} | Score: {score:.4f}")
430
-
431
- # --- MODIFICATION: Return the results with scores ---
432
- return reranked_results
433
-
434
-
435
-
436
- # Some vectorstores might return duplicates.
437
- # This is useful when top-k cutoff might otherwise include near-duplicates from query expansion
438
- def dedup_docs(scored_docs):
439
- seen = set()
440
- unique = []
441
- for doc, score in scored_docs:
442
- uid = doc.metadata.get("source", "") + "::" + doc.page_content.strip()
443
- if uid not in seen:
444
- unique.append((doc, score))
445
- seen.add(uid)
446
- return unique
447
-
448
-
449
- def call_llm(messages: List[Dict[str, str]], temperature: float = 0.6, stop: Optional[List[str]] = None, response_format: Optional[dict] = None) -> str:
450
- # This function remains unchanged from agent_work.py
451
- client = _openai_client()
452
- if client is None: raise RuntimeError("OpenAI client not configured (missing API key?).")
453
- model = os.getenv("OPENAI_CHAT_MODEL", "gpt-4o-mini")
454
- api_args = {"model": model, "messages": messages, "temperature": float(temperature if temperature is not None else 0.6)}
455
- if stop: api_args["stop"] = stop
456
- if response_format: api_args["response_format"] = response_format
457
- resp = client.chat.completions.create(**api_args)
458
- content = ""
459
- try:
460
- content = resp.choices[0].message.content or ""
461
- except Exception:
462
- msg = getattr(resp.choices[0], "message", None)
463
- if isinstance(msg, dict): content = msg.get("content") or ""
464
- return content.strip()
465
-
466
-
467
- # In agent.py, find and replace the MUSIC_KEYPHRASES list
468
- MUSIC_KEYPHRASES = [
469
- r"\bplay\b.*\bsong\b",
470
- r"\bplay\b.*\bmusic\b", # <-- More robust addition
471
- r"\blisten to music\b",
472
- r"\bhear\b.*\bsong\b",
473
- r"\bhear\b.*\bmusic\b" # <-- More robust addition
474
- ]
475
- _MUSIC_PATTERNS = [re.compile(p, re.IGNORECASE) for p in MUSIC_KEYPHRASES]
476
-
477
-
478
- def _pre_router_music(query: str) -> str | None:
479
- for pat in _MUSIC_PATTERNS:
480
- if re.search(pat, query): return "play_music_request"
481
- return None
482
-
483
- MULTI_HOP_KEYPHRASES = [r"\bcompare\b", r"\bvs\.?\b", r"\bversus\b", r"\bdifference between\b", r"\b(more|less|fewer) (than|visitors|agitated)\b", r"\bchange after\b", r"\bafter.*(vs|before)\b", r"\bbefore.*(vs|after)\b", r"\b(who|which) .*(more|less)\b"]
484
- _MH_PATTERNS = [re.compile(p, re.IGNORECASE) for p in MULTI_HOP_KEYPHRASES]
485
-
486
- def _pre_router_multi_hop(query: str) -> str | None:
487
- # This function remains unchanged from agent_work.py
488
- q = (query or "")
489
- for pat in _MH_PATTERNS:
490
- if re.search(pat, q): return "multi_hop"
491
- return None
492
-
493
- def _pre_router(query: str) -> str | None:
494
- # This function remains unchanged from agent_work.py
495
- q = (query or "").lower()
496
- for pat in _CARE_PATTERNS:
497
- if re.search(pat, q): return "caregiving_scenario"
498
- return None
499
-
500
- def _llm_route_with_prompt(query: str, temperature: float = 0.0) -> str:
501
- # This function remains unchanged from agent_work.py
502
- router_messages = [{"role": "user", "content": ROUTER_PROMPT.format(query=query)}]
503
- query_type = call_llm(router_messages, temperature=temperature).strip().lower()
504
- return query_type
505
-
506
- # OLD use this new pre-router and place it in the correct order of priority.
507
- # OLD def route_query_type(query: str) -> str:
508
- # NEW the severity override only apply to moderate or advanced stages
509
- def route_query_type(query: str, severity: str = "Normal / Unspecified"):
510
- # This new, adaptive logic ONLY applies if severity is set to moderate or advanced.
511
- if severity in ["Moderate Stage", "Advanced Stage"]:
512
- # Check if it's an obvious other type first (e.g., summarization)
513
- if not _pre_router_summarization(query) and not _pre_router_multi_hop(query):
514
- print(f"Query classified as: caregiving_scenario (severity override)")
515
- return "caregiving_scenario"
516
- # END
517
-
518
- # FOR "Normal / Unspecified", THE CODE CONTINUES HERE, USING THE EXISTING LOGIC
519
- # This is your original code path.
520
- # Priority 1: Check for specific, structural queries first.
521
- mh_hit = _pre_router_multi_hop(query)
522
- if mh_hit:
523
- print(f"Query classified as: {mh_hit} (multi-hop pre-router)")
524
- return mh_hit
525
-
526
- # Priority 2: Check for explicit commands like "summarize".
527
- sum_hit = _pre_router_summarization(query)
528
- if sum_hit:
529
- print(f"Query classified as: {sum_hit} (summarization pre-router)")
530
- return sum_hit
531
-
532
- # Priority 4: Check for music requests.
533
- # NEW Add Music Support before care_hit = _pre_router(query)
534
- # the general "caregiving" keyword checker (_pre_router) is called before
535
- # the specific "play music" checker (_pre_router_music).
536
- music_hit = _pre_router_music(query)
537
- if music_hit:
538
- print(f"Query classified as: {music_hit} (music re-router)")
539
- return music_hit
540
-
541
- # Priority 5: Check for general caregiving keywords.
542
- care_hit = _pre_router(query)
543
- if care_hit:
544
- print(f"Query classified as: {care_hit} (caregiving pre-router)")
545
- return care_hit
546
-
547
-
548
- # Fallback: If no pre-routers match, use the LLM for nuanced classification.
549
- query_type = _llm_route_with_prompt(query, temperature=0.0)
550
- print(f"Query classified as: {query_type} (LLM router)")
551
- return query_type
552
-
553
-
554
- # helper: put near other small utils in agent.py
555
- # In agent.py, replace the _source_ids_for_eval function
556
- # In agent.py, inside _source_ids_for_eval(...)
557
-
558
- def _source_ids_for_eval(docs, cap=3): # NEW change from 5 to 3
559
- out, seen = [], set()
560
- for d in docs or []:
561
- md = getattr(d, "metadata", {}) or {}
562
- src = md.get("source")
563
-
564
- if not src or src == 'placeholder':
565
- continue
566
-
567
- # --- MODIFICATION START ---
568
- # Always use the filename as the key, regardless of file type.
569
- key = src
570
- # --- MODIFICATION END ---
571
-
572
- if key and key not in seen:
573
- seen.add(key)
574
- out.append(str(key))
575
- if len(out) >= cap:
576
- break
577
- return out
578
-
579
-
580
-
581
-
582
- # In agent.py, replace the ENTIRE make_rag_chain function with this one.
583
- # def make_rag_chain(vs_general: FAISS, vs_personal: FAISS, *, for_evaluation: bool = False, role: str = "patient", temperature: float = 0.6, language: str = "English", patient_name: str = "the patient", caregiver_name: str = "the caregiver", tone: str = "warm"):
584
- # NEW: accept the new disease_stage parameter.
585
- def make_rag_chain(vs_general: FAISS, vs_personal: FAISS, *, for_evaluation: bool = False,
586
- role: str = "patient", temperature: float = 0.6, language: str = "English",
587
- patient_name: str = "the patient", caregiver_name: str = "the caregiver",
588
- tone: str = "warm",
589
- disease_stage: str = "Default: Mild Stage", music_manifest_path: str = ""):
590
- """Returns a callable that performs the complete RAG process."""
591
-
592
- RELEVANCE_THRESHOLD = 0.85
593
- SCORE_MARGIN = 0.10 # Margin to decide if scores are "close enough" to blend.
594
-
595
-
596
- # NEW to add caregiving JSON
597
- # --- STEP 1: ADD vs_playbook to the function signature ---
598
- def make_rag_chain(
599
- vs_general: FAISS,
600
- vs_personal: FAISS,
601
- vs_playbook: FAISS, # <-- ADD THIS NEW PARAMETER
602
- *,
603
- for_evaluation: bool = False,
604
- role: str = "patient",
605
- temperature: float = 0.6,
606
- language: str = "English",
607
- patient_name: str = "the patient",
608
- caregiver_name: str = "the caregiver",
609
- tone: str = "warm",
610
- disease_stage: str = "Default: Mild Stage",
611
- music_manifest_path: str = ""
612
- ):
613
- """Returns a callable that performs the complete RAG process."""
614
-
615
- RELEVANCE_THRESHOLD = 0.85
616
- SCORE_MARGIN = 0.10 # Margin to decide if scores are "close enough" to blend.
617
-
618
- # --- HELPER FUNCTION: This should be defined here, inside make_rag_chain ---
619
- def _format_docs(docs: List[Document], default_msg: str) -> str:
620
- if not docs: return default_msg
621
- unique_docs = {doc.page_content: doc for doc in docs}.values()
622
- return "\n".join([f"- {d.page_content.strip()}" for d in unique_docs])
623
-
624
- # --- STEP 2: REPLACE THE INNER _answer_fn with the code below ---
625
- def _answer_fn(query: str, query_type: str, chat_history: List[Dict[str, str]], **kwargs) -> Dict[str, Any]:
626
-
627
- print(f"[DEBUG] The Query is: {query}")
628
- print(f"[DEBUG] The Query Type is: {query_type}")
629
- print(f"DEBUG: RAG chain received disease_stage = '{disease_stage}'")
630
-
631
- # --- START: NEW DEBUG BLOCK FOR VS_GENERAL ---
632
- print("\n" + "#"*20 + " VS_GENERAL SANITY CHECK " + "#"*20)
633
- if vs_general and hasattr(vs_general.docstore, '_dict'):
634
- doc_count = len(vs_general.docstore._dict)
635
- print(f" - ✅ vs_general is valid and contains {doc_count} document(s).")
636
- # Optional: Check if it's just a placeholder
637
- if doc_count == 1 and list(vs_general.docstore._dict.values())[0].metadata.get("source") == "placeholder":
638
- print(" - ⚠️ Warning: vs_general appears to contain only a placeholder document.")
639
- else:
640
- print(" - ❌ CRITICAL WARNING: vs_general is None or not a valid FAISS index.")
641
- print("#"*67 + "\n")
642
- # --- END: NEW DEBUG BLOCK FOR VS_GENERAL ---
643
-
644
-
645
- # Create a local variable for test_temperature to avoid the UnboundLocalError.
646
- test_temperature = temperature
647
-
648
- # NEW --- MUSIC PLAYBACK LOGIC ---
649
- if "list_music_request" in query_type:
650
- if not music_manifest_path or not os.path.exists(music_manifest_path):
651
- return {"answer": "I don't see any music in your personal library yet.", "sources": ["Personal Music Library"], "audio_playback_url": None}
652
-
653
- with open(music_manifest_path, "r") as f:
654
- manifest = json.load(f)
655
-
656
- if not manifest:
657
- return {"answer": "Your personal music library is currently empty.", "sources": ["Personal Music Library"], "audio_playback_url": None}
658
-
659
- song_list = []
660
- for song_id, data in manifest.items():
661
- song_list.append(f"- '{data['title']}' by {data['artist']}")
662
-
663
- formatted_songs = "\n".join(song_list)
664
- answer = f"Based on your personal library, here is the music you like to listen to:\n{formatted_songs}"
665
- return {"answer": answer, "sources": ["Personal Music Library"], "audio_playback_url": None}
666
- # --- END OF NEW LOGIC ---
667
-
668
- # --- REVISED MUSIC PLAYBACK LOGIC ---
669
- if "play_music_request" in query_type:
670
- # Manifest loading logic
671
- if not music_manifest_path or not os.path.exists(music_manifest_path):
672
- return {"answer": "I'm sorry, there is no music in the library yet.", "sources": [], "audio_playback_url": None}
673
- with open(music_manifest_path, "r") as f:
674
- manifest = json.load(f)
675
- if not manifest:
676
- return {"answer": "I'm sorry, there is no music in the library yet.", "sources": [], "audio_playback_url": None}
677
-
678
- found_song = None
679
- query_lower = query.lower()
680
-
681
- # 1. First, search for a specific Title or Artist mentioned in the query.
682
- for song_id, data in manifest.items():
683
- if data["title"].lower() in query_lower or data["artist"].lower() in query_lower:
684
- found_song = data
685
- break
686
-
687
- # Define emotion tag here to make it available for the preamble later
688
- detected_emotion_raw = kwargs.get("emotion_tag")
689
- detected_emotion = detected_emotion_raw.lower() if detected_emotion_raw else ""
690
-
691
-
692
- # 2. If not found, use the detected NLU tags to find the FIRST mood match.
693
- if not found_song:
694
- detected_emotion_raw = kwargs.get("emotion_tag")
695
- detected_emotion = detected_emotion_raw.lower() if detected_emotion_raw else ""
696
- detected_behavior_raw = kwargs.get("scenario_tag")
697
- detected_behavior = detected_behavior_raw.lower() if detected_behavior_raw else ""
698
-
699
- print(f"[DEBUG] Music Search: Using NLU tags. Behavior='{detected_behavior}', Emotion='{detected_emotion}'")
700
-
701
- search_tags = [detected_emotion, detected_behavior]
702
-
703
- for nlu_tag in search_tags:
704
- if not nlu_tag or nlu_tag == "none": continue
705
-
706
- core_nlu_word = nlu_tag.split('_')[0]
707
- print(f" [DEBUG] Music Search Loop: Using core_nlu_word='{core_nlu_word}' for matching.")
708
-
709
- for song_id, data in manifest.items():
710
- for mood_tag in data.get("moods", []): # Use .get for safety
711
- if not mood_tag or not isinstance(mood_tag, str): continue
712
- mood_words = re.split(r'[\s/]', mood_tag.lower())
713
-
714
- if core_nlu_word in mood_words:
715
- found_song = data
716
- break
717
- if found_song: break
718
- if found_song: break
719
-
720
- # 3. If still not found, handle generic requests by playing a random song.
721
- if not found_song:
722
- print("[DEBUG] Music Search: No specific song or NLU match found. Selecting a random song.")
723
- generic_keywords = ["music", "song", "something", "anything"]
724
- if any(keyword in query_lower for keyword in generic_keywords):
725
- random_song_id = random.choice(list(manifest.keys()))
726
- found_song = manifest[random_song_id]
727
-
728
- # Step 4: Construct the final response, adding the empathetic preamble if a song was found.
729
- if found_song:
730
- preamble_text = ""
731
- # Only generate a preamble if there was a clear emotional context.
732
- if detected_emotion and detected_emotion != "none":
733
- preamble_prompt = MUSIC_PREAMBLE_PROMPT.format(emotion=detected_emotion, query=query)
734
- preamble_text = call_llm([{"role": "user", "content": preamble_prompt}], temperature=0.7)
735
- preamble_text = preamble_text.strip() + " "
736
-
737
- action_text = f"Of course. Playing '{found_song['title']}' by {found_song['artist']} for you."
738
- final_answer = preamble_text + action_text
739
-
740
- return {"answer": final_answer, "sources": ["Personal Music Library"], "audio_playback_url": found_song['filepath']}
741
- else:
742
- return {"answer": "I couldn't find a song matching your request in the library.", "sources": [], "audio_playback_url": None}
743
- # END --- MUSIC PLAYBACK LOGIC ---
744
-
745
- p_name = patient_name or "the patient"
746
- c_name = caregiver_name or "the caregiver"
747
- perspective_line = (f"You are speaking directly to {p_name}, who is the patient...") if role == "patient" else (f"You are communicating with {c_name}, the caregiver, about {p_name}.")
748
- system_message = SYSTEM_TEMPLATE.format(tone=tone, language=language, perspective_line=perspective_line, guardrails=SAFETY_GUARDRAILS)
749
- messages = [{"role": "system", "content": system_message}]
750
- messages.extend(chat_history)
751
-
752
- if "general_knowledge_question" in query_type or "general_conversation" in query_type:
753
- template = ANSWER_TEMPLATE_GENERAL_KNOWLEDGE if "general_knowledge" in query_type else ANSWER_TEMPLATE_GENERAL
754
- user_prompt = template.format(question=query, language=language)
755
- messages.append({"role": "user", "content": user_prompt})
756
- raw_answer = call_llm(messages, temperature=test_temperature)
757
- answer = _clean_surface_text(raw_answer)
758
- sources = ["General Knowledge"] if "general_knowledge" in query_type else []
759
- return {"answer": answer, "sources": sources, "source_documents": []}
760
- # --- END: Non-RAG Route Handling ---
761
-
762
- # --- START OF CORE LOGIC FIX ---
763
- all_retrieved_docs = []
764
- is_personal_route = "factual" in query_type or "summarization" in query_type or "multi_hop" in query_type
765
-
766
- # Path 2: Personal Memory Route
767
- if is_personal_route:
768
- print("[DEBUG] Personal Memory Route Activated.")
769
- print(" - Strategy: Retrieving ALL personal documents to provide full context for the LLM.")
770
-
771
- # For BOTH summarization and factual questions, we load all documents.
772
- # The LLM is powerful enough to find the specific facts from the full context.
773
- if vs_personal and vs_personal.docstore and hasattr(vs_personal.docstore, '_dict'):
774
- # We retrieve all documents from the personal vector store.
775
- all_docs = list(vs_personal.docstore._dict.values())
776
-
777
- # We must filter out the placeholder document.
778
- all_retrieved_docs = [
779
- doc for doc in all_docs if doc.metadata.get("source") != "placeholder"
780
- ]
781
- print(f" -> Retrieved {len(all_retrieved_docs)} documents from the personal memory bank.")
782
- else:
783
- print(" -> Personal vector store is empty or invalid.")
784
- # --- END OF DEFINITIVE FIX ---
785
-
786
- else:
787
- # --- FIX #1: CORRECTED HYBRID RETRIEVAL FOR CAREGIVING ---
788
- print("[DEBUG] Using CORRECTED HYBRID Retrieval for caregiving scenario...")
789
-
790
- # Extract the primary behavior tag
791
- scenario_tags_raw = kwargs.get("scenario_tag")
792
- primary_behavior = (scenario_tags_raw[0] if isinstance(scenario_tags_raw, list) and scenario_tags_raw else scenario_tags_raw) or "None"
793
-
794
- # 1. Search Playbook for expert advice (using the CORRECT metadata key 'behaviors')
795
- if primary_behavior != "None":
796
- print(f" - Stage 1: Searching Playbook for expert advice on '{primary_behavior}'...")
797
- playbook_results = vs_playbook.similarity_search(
798
- primary_behavior, k=1,
799
- filter={"behaviors": primary_behavior} # <-- THE BUG FIX
800
- )
801
- if playbook_results:
802
- print(" -> ✅ Success! Found expert advice in Playbook.")
803
- # TMP for debugging
804
- all_retrieved_docs.extend(playbook_results)
805
-
806
- # --- STEP 2: FALLBACK TO GENERAL SEARCH ONLY IF PLAYBOOK MISSES ---
807
- print(" - Stage 2: Searching general knowledge for illustrative stories...")
808
-
809
- # This is your complete, proven retrieval logic for general documents
810
- search_queries = [query]
811
- try:
812
- expanded = json.loads(call_llm([{"role": "user", "content": QUERY_EXPANSION_PROMPT.format(question=query)}], temperature=0.0))
813
- if isinstance(expanded, list): search_queries.extend(expanded)
814
- except Exception as e:
815
- print(f" - Query expansion failed: {e}")
816
-
817
- candidate_docs = []
818
- if primary_behavior and primary_behavior != "None":
819
- print(f" - Stage 2a: High-precision search for behavior: '{primary_behavior}'")
820
- for q in search_queries:
821
- candidate_docs.extend(vs_general.similarity_search_with_score(q, k=10, filter={"behaviors": primary_behavior}))
822
-
823
- print(" - Stage 2b: High-recall semantic search (k=20)")
824
- for q in search_queries:
825
- candidate_docs.extend(vs_general.similarity_search_with_score(q, k=20))
826
-
827
- unique_candidates = dedup_docs(candidate_docs)
828
- reranked_docs_with_scores = rerank_documents(query, unique_candidates) if unique_candidates else []
829
-
830
-
831
- # --- START: NEW DEBUG BLOCK 1 ---
832
- print("\n" + "="*20 + " All Re-ranked Documents & Scores " + "="*20)
833
- if reranked_docs_with_scores:
834
- for i, (doc_tuple, score) in enumerate(reranked_docs_with_scores):
835
- doc, _ = doc_tuple
836
- source = doc.metadata.get('source', 'N/A')
837
- # print(f" - Rank #{i+1}: Score={score:<9.4f}, Source='{source}'")
838
- else:
839
- print(" - No documents were re-ranked.")
840
- print("="*66)
841
- # --- END: NEW DEBUG BLOCK 1 ---
842
-
843
- final_docs_with_scores = []
844
- if reranked_docs_with_scores:
845
- RELATIVE_SCORE_MARGIN = 3.0
846
- top_doc_tuple, top_score = reranked_docs_with_scores[0]
847
-
848
- # --- START: NEW DEBUG BLOCK 2 ---
849
- print("\n" + "="*20 + " Filtering by Score Margin " + "="*20)
850
- threshold = top_score - RELATIVE_SCORE_MARGIN
851
- print(f" - Top Score: {top_score:.4f}")
852
- print(f" - Score Threshold (must be >): {threshold:.4f}")
853
- # --- END: NEW DEBUG BLOCK 2 ---
854
-
855
- final_docs_with_scores.append(top_doc_tuple)
856
- # --- START: MODIFIED PRINT ---
857
- print(f" - ✅ Kept (Top 1): Score={top_score:<9.4f}, Source='{top_doc_tuple[0].metadata.get('source', 'N/A')}'")
858
- # --- END: MODIFIED PRINT ---
859
- for doc_tuple, score in reranked_docs_with_scores[1:]:
860
- if score > threshold:
861
- final_docs_with_scores.append(doc_tuple)
862
- # --- START: MODIFIED PRINT ---
863
- print(f" - ✅ Kept: Score={score:<9.4f}, Source='{doc_tuple[0].metadata.get('source', 'N/A')}'")
864
- # --- END: MODIFIED PRINT ---
865
- else:
866
- # --- START: MODIFIED PRINT ---
867
- print(f" - ❌ Discarded (Broke loop): Score={score:<9.4f}, Source='{doc_tuple[0].metadata.get('source', 'N/A')}'")
868
- # --- END: MODIFIED PRINT ---
869
- break
870
- print("="*52)
871
-
872
- limit = 5 if disease_stage in ["Moderate Stage", "Advanced Stage"] else 3
873
- final_docs_with_scores = final_docs_with_scores[:limit]
874
-
875
- # This line is modified to EXTEND the main list, not overwrite it.
876
- retrieved_general_docs = [doc for doc, score in final_docs_with_scores]
877
- all_retrieved_docs.extend(retrieved_general_docs)
878
- print(f" -> Retrieved {len(retrieved_general_docs)} general documents.")
879
- # --- END: Integrated Advanced General Search ---
880
- # END Caregiving scenario
881
-
882
- # --- START OF BUG FIX ---
883
- # The variable 'all_retrieved_docs' is now populated correctly for ALL routes.
884
- # We must now de-duplicate it and use it consistently.
885
- final_unique_docs = [doc for doc, score in dedup_docs([(d, 0) for d in all_retrieved_docs])]
886
-
887
-
888
- # --- FINAL PROCESSING (Applies to all RAG routes) ---
889
- print("\n--- DEBUG: Final Selected Docs ---")
890
- for doc in all_retrieved_docs:
891
- print(f" - Source: {doc.metadata.get('source', 'N/A')}")
892
- print("----------------------------------------------------------------")
893
-
894
- # Use 'final_unique_docs' for all context formatting and the final return value.
895
- personal_sources_set = {'1 Complaints of a Dutiful Daughter.txt', 'Saved Chat', 'Text Input'}
896
- personal_context = _format_docs([d for d in final_unique_docs if d.metadata.get('source') in personal_sources_set], "(No relevant personal memories found.)")
897
- general_context = _format_docs([d for d in final_unique_docs if d.metadata.get('source') not in personal_sources_set], "(No general guidance found.)")
898
-
899
- if is_personal_route:
900
- template = ANSWER_TEMPLATE_SUMMARIZE if "summarization" in query_type else \
901
- ANSWER_TEMPLATE_FACTUAL_MULTI if "multi_hop" in query_type else \
902
- ANSWER_TEMPLATE_FACTUAL
903
- user_prompt = template.format(
904
- question=query, context=personal_context, personal_context=personal_context,
905
- general_context=general_context, language=language,
906
- patient_name=p_name, caregiver_name=c_name, role=role
907
- )
908
- print("[DEBUG] Personal Route Factual / Sum / Multi PROMPT")
909
-
910
- else: # caregiving_scenario
911
-
912
- if disease_stage == "Advanced Stage": template = ANSWER_TEMPLATE_ADQ_ADVANCED
913
- elif disease_stage == "Moderate Stage": template = ANSWER_TEMPLATE_ADQ_MODERATE
914
- else: template = ANSWER_TEMPLATE_ADQ
915
-
916
- emotions_context = render_emotion_guidelines(kwargs.get("emotion_tag"))
917
- # Using the refined prompt template from the previous answer
918
- user_prompt = template.format(
919
- general_context=general_context, personal_context=personal_context, # personal_context is now for stories
920
- question=query, scenario_tag=kwargs.get("scenario_tag"), emotions_context=emotions_context,
921
- role=role, language=language, patient_name=p_name, caregiver_name=c_name,
922
- emotion_tag=kwargs.get("emotion_tag")
923
- )
924
- print("[DEBUG] Caregiving Scenario PROMPT")
925
- # END ELSE
926
-
927
- messages.append({"role": "user", "content": user_prompt})
928
- raw_answer = call_llm(messages, temperature=0.0 if for_evaluation else temperature)
929
- answer = _clean_surface_text(raw_answer)
930
- print("[DEBUG] LLM Answer", {answer})
931
-
932
- if (kwargs.get("scenario_tag") or "").lower() in ["exit_seeking", "wandering"]:
933
- answer += f"\n\n---\n{RISK_FOOTER}"
934
-
935
- # Use 'final_unique_docs' for the final source list and document return.
936
- sources = _source_ids_for_eval(final_unique_docs)
937
- print("DEBUG Sources (After Filtering):", sources)
938
- return {"answer": answer, "sources": sources, "source_documents": final_unique_docs}
939
- # --- END OF BUG FIX ---
940
-
941
- return _answer_fn
942
- # END of make_rag_chain
943
-
944
- def answer_query(chain, question: str, **kwargs) -> Dict[str, Any]:
945
- # This function remains unchanged from agent_work.py
946
- if not callable(chain): return {"answer": "[Error: RAG chain is not callable]", "sources": []}
947
- try:
948
- return chain(question, **kwargs)
949
- except Exception as e:
950
- print(f"ERROR in answer_query: {e}")
951
- return {"answer": f"[Error executing chain: {e}]", "sources": []}
952
-
953
- def synthesize_tts(text: str, lang: str = "en"):
954
- # This function remains unchanged from agent_work.py
955
- if not text or gTTS is None: return None
956
- try:
957
- with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as fp:
958
- tts = gTTS(text=text, lang=(lang or "en"))
959
- tts.save(fp.name)
960
- return fp.name
961
- except Exception:
962
- return None
963
-
964
- def transcribe_audio(filepath: str, lang: str = "en"):
965
- # This function remains unchanged from agent_work.py
966
- client = _openai_client()
967
- if not client: return "[Transcription failed: API key not configured]"
968
- model = os.getenv("TRANSCRIBE_MODEL", "whisper-1")
969
- api_args = {"model": model}
970
- if lang and lang != "auto": api_args["language"] = lang
971
- with open(filepath, "rb") as audio_file:
972
- transcription = client.audio.transcriptions.create(file=audio_file, **api_args)
973
- return transcription.text