Spaces:

KeenWoo
/

AD_Multimodal_Chatbot

Running

App Files Files Community

KeenWoo commited on Sep 14

Commit

2192634

verified ·

1 Parent(s): 2c0ca80

Delete alz_companion/agent.py

Browse files

Files changed (1) hide show

alz_companion/agent.py +0 -973

alz_companion/agent.py DELETED Viewed

@@ -1,973 +0,0 @@
-from __future__ import annotations
-import os
-import json
-import base64
-import time
-import tempfile
-import re
-import random # for random select songs
-import shutil # <-- ADD THIS LINE
-from typing import List, Dict, Any, Optional
-from sentence_transformers import CrossEncoder
-try:
-    from openai import OpenAI
-except Exception:
-    OpenAI = None
-from langchain.schema import Document
-from langchain_community.vectorstores import FAISS
-from langchain_community.embeddings import HuggingFaceEmbeddings
-try:
-    from gtts import gTTS
-except Exception:
-    gTTS = None
-from .prompts import (
-    SYSTEM_TEMPLATE,
-    ROUTER_PROMPT,
-    SAFETY_GUARDRAILS, RISK_FOOTER, render_emotion_guidelines,
-    NLU_ROUTER_PROMPT, SPECIALIST_CLASSIFIER_PROMPT,
-    ANSWER_TEMPLATE_CALM,
-    ANSWER_TEMPLATE_ADQ, ANSWER_TEMPLATE_ADQ_MODERATE, ANSWER_TEMPLATE_ADQ_ADVANCED,
-    ANSWER_TEMPLATE_FACTUAL, ANSWER_TEMPLATE_FACTUAL_MULTI, ANSWER_TEMPLATE_SUMMARIZE,
-    ANSWER_TEMPLATE_GENERAL_KNOWLEDGE, ANSWER_TEMPLATE_GENERAL,
-    QUERY_EXPANSION_PROMPT,
-    MUSIC_PREAMBLE_PROMPT
-)
-_BEHAVIOR_ALIASES = {
-    "repeating questions": "repetitive_questioning", "repetitive questions": "repetitive_questioning",
-    "confusion": "confusion", "wandering": "wandering", "agitation": "agitation",
-    "accusing people": "false_accusations", "false accusations": "false_accusations",
-    "memory loss": "address_memory_loss", "seeing things": "hallucinations_delusions",
-    "hallucinations": "hallucinations_delusions", "delusions": "hallucinations_delusions",
-    "trying to leave": "exit_seeking", "wanting to go home": "exit_seeking",
-    "aphasia": "aphasia", "word finding": "aphasia", "withdrawn": "withdrawal",
-    "apathy": "apathy", "affection": "affection", "sleep problems": "sleep_disturbance",
-    "anxiety": "anxiety", "sadness": "depression_sadness", "depression": "depression_sadness",
-    "checking orientation": "orientation_check", "misidentification": "misidentification",
-    "sundowning": "sundowning_restlessness", "restlessness": "sundowning_restlessness",
-    "losing things": "object_misplacement", "misplacing things": "object_misplacement",
-    "planning": "goal_breakdown", "reminiscing": "reminiscence_prompting",
-    "communication strategy": "caregiver_communication_template",
-}
-def _canon_behavior_list(xs: list[str] | None, opts: list[str]) -> list[str]:
-    out = []
-    for x in (xs or []):
-        y = _BEHAVIOR_ALIASES.get(x.strip().lower(), x.strip())
-        if y in opts and y not in out:
-            out.append(y)
-    return out
-_TOPIC_ALIASES = {
-    "home safety": "treatment_option:home_safety", "long-term care": "treatment_option:long_term_care",
-    "music": "treatment_option:music_therapy", "reassure": "treatment_option:reassurance",
-    "routine": "treatment_option:routine_structuring", "validation": "treatment_option:validation_therapy",
-    "caregiving advice": "caregiving_advice", "medical": "medical_fact",
-    "research": "research_update", "story": "personal_story",
-}
-_CONTEXT_ALIASES = {
-    "mild": "disease_stage_mild", "moderate": "disease_stage_moderate", "advanced": "disease_stage_advanced",
-    "care home": "setting_care_home", "hospital": "setting_clinic_or_hospital", "home": "setting_home_or_community",
-    "group": "interaction_mode_group_activity", "1:1": "interaction_mode_one_to_one", "one to one": "interaction_mode_one_to_one",
-    "family": "relationship_family", "spouse": "relationship_spouse", "staff": "relationship_staff_or_caregiver",
-}
-def _canon_topic(x: str, opts: list[str]) -> str:
-    if not x: return "None"
-    y = _TOPIC_ALIASES.get(x.strip().lower(), x.strip())
-    return y if y in opts else "None"
-def _canon_context_list(xs: list[str] | None, opts: list[str]) -> list[str]:
-    out = []
-    for x in (xs or []):
-        y = _CONTEXT_ALIASES.get(x.strip().lower(), x.strip())
-        if y in opts and y not in out: out.append(y)
-    return out
-MULTI_HOP_KEYPHRASES = [
-    r"\bcompare\b", r"\bvs\.?\b", r"\bversus\b", r"\bdifference between\b",
-    r"\b(more|less|fewer) (than|visitors|agitated)\b", r"\bchange after\b",
-    r"\bafter.*(vs|before)\b", r"\bbefore.*(vs|after)\b", r"\b(who|which) .*(more|less)\b",
-    # --- START: REVISED & MORE ROBUST PATTERNS ---
-    r"\b(did|was|is)\b .*\b(where|when|who)\b",      # Catches MH1_new ("Did X happen where Y happened?")
-    r"\bconsidering\b .*\bhow long\b",             # Catches MH2_new
-    r"\b(but|and)\b who was the other person\b",    # Catches MH3_new
-    r"what does the journal say about"             # Catches MH4_new
-    # --- END: REVISED & MORE ROBUST PATTERNS ---
-]
-_MH_PATTERNS = [re.compile(p, re.IGNORECASE) for p in MULTI_HOP_KEYPHRASES]
-FACTUAL_KEYPHRASES = [
-    r"\b(what is|what was) my\b",
-    r"\b(who is|who was) my\b",
-    r"\b(where is|where was) my\b",
-    r"\b(how old am i)\b",
-    # r"\b(when did|what did) the journal say\b"
-]
-_FQ_PATTERNS = [re.compile(p, re.IGNORECASE) for p in FACTUAL_KEYPHRASES]
-def _pre_router_factual(query: str) -> str | None:
-    """Checks for patterns common in direct factual questions about personal memory."""
-    q = (query or "")
-    for pat in _FQ_PATTERNS:
-        if re.search(pat, q):
-            return "factual_question"
-    return None
-# Add this near the top of agent.py with the other keyphrase lists
-SUMMARIZATION_KEYPHRASES = [
-    r"^\b(summarize|summarise|recap)\b", r"^\b(give me a summary|create a short summary)\b"
-]
-_SUM_PATTERNS = [re.compile(p, re.IGNORECASE) for p in SUMMARIZATION_KEYPHRASES]
-def _pre_router_summarization(query: str) -> str | None:
-    q = (query or "")
-    for pat in _SUM_PATTERNS:
-        if re.search(pat, q): return "summarization"
-    return None
-CARE_KEYPHRASES = [
-    r"\bwhere am i\b", r"\byou('?| ha)ve stolen my\b|\byou'?ve stolen my\b",
-    r"\bi lost (the )?word\b|\bword-finding\b|\bcan.?t find the word\b",
-    r"\bshe didn('?| no)t know me\b|\bhe didn('?| no)t know me\b",
-    r"\bdisorient(?:ed|ation)\b|\bagitation\b|\bconfus(?:ed|ion)\b",
-    r"\bcare home\b|\bnursing home\b|\bthe.*home\b",
-    r"\bplaylist\b|\bsongs?\b.*\b(memories?|calm|soothe|familiar)\b",
-    r"\bi want to keep teaching\b|\bi want to keep driving\b|\bi want to go home\b",
-    r"music therapy",
-    # --- ADD THESE LINES for handle test cases ---
-    r"music therapy"
-    r"\bremembering the\b", # Catches P7
-    r"\bmissed you so much\b"  # Catches P4
-    r"\b(i forgot my job|what did i work as|do you remember my job)\b" # Catches queries about forgetting profession
-]
-_CARE_PATTERNS = [re.compile(p) for p in CARE_KEYPHRASES]
-_STRIP_PATTERNS = [(r'^\s*(your\s+(final\s+)?answer|your\s+response)\s+in\s+[A-Za-z\-]+\s*:?\s*', ''), (r'\bbased on (?:the |any )?(?:provided )?(?:context|information|details)(?: provided)?(?:,|\.)?\s*', ''), (r'^\s*as an ai\b.*?(?:,|\.)\s*', ''), (r'\b(according to|from)\s+(the\s+)?(sources?|context)\b[:,]?\s*', ''), (r'\bI hope this helps[.!]?\s*$', '')]
-def _clean_surface_text(text: str) -> str:
-    # This function remains unchanged from agent_work.py
-    out = text or ""
-    for pat, repl in _STRIP_PATTERNS:
-        out = re.sub(pat, repl, out, flags=re.IGNORECASE)
-    return re.sub(r'\n{3,}', '\n\n', out).strip()
-# Utilities
-def _openai_client() -> Optional[OpenAI]:
-    api_key = os.getenv("OPENAI_API_KEY", "").strip()
-    return OpenAI(api_key=api_key) if api_key and OpenAI else None
-def describe_image(image_path: str) -> str:
-    # This function remains unchanged from agent_work.py
-    client = _openai_client()
-    if not client: return "(Image description failed: OpenAI API key not configured.)"
-    try:
-        extension = os.path.splitext(image_path)[1].lower()
-        mime_type = f"image/{'jpeg' if extension in ['.jpg', '.jpeg'] else extension.strip('.')}"
-        with open(image_path, "rb") as image_file:
-            base64_image = base64.b64encode(image_file.read()).decode('utf-8')
-        response = client.chat.completions.create(
-            model="gpt-4o",
-            messages=[{"role": "user", "content": [{"type": "text", "text": "Describe this image concisely for a memory journal. Focus on people, places, and key objects. Example: 'A photo of John and Mary smiling on a bench at the park.'"},{"type": "image_url", "image_url": {"url": f"data:{mime_type};base64,{base64_image}"}}]}], max_tokens=100)
-        return response.choices[0].message.content or "No description available."
-    except Exception as e:
-        return f"[Image description error: {e}]"
-# --- MODIFICATION 1: Use the new, corrected NLU function ---
-def detect_tags_from_query(
-    query: str,
-    nlu_vectorstore: FAISS,
-    behavior_options: list,
-    emotion_options: list,
-    topic_options: list,
-    context_options: list,
-    settings: dict = None
-) -> Dict[str, Any]:
-    """Uses a dynamic two-step NLU process: Route -> Retrieve Examples -> Classify."""
-    result_dict = {"detected_behaviors": [], "detected_emotion": "None", "detected_topics": [], "detected_contexts": []}
-    router_prompt = NLU_ROUTER_PROMPT.format(query=query)
-    primary_goal_raw = call_llm([{"role": "user", "content": router_prompt}], temperature=0.0).strip().lower()
-    goal_for_filter = "practical_planning" if "practical" in primary_goal_raw else "emotional_support"
-    goal_for_prompt = "Practical Planning" if "practical" in primary_goal_raw else "Emotional Support"
-    if settings and settings.get("debug_mode"):
-        print(f"\n--- NLU Router ---\nGoal: {goal_for_prompt} (Filter: '{goal_for_filter}')\n------------------\n")
-    retriever = nlu_vectorstore.as_retriever(search_kwargs={"k": 2, "filter": {"primary_goal": goal_for_filter}})
-    retrieved_docs = retriever.invoke(query)
-    if not retrieved_docs:
-        retrieved_docs = nlu_vectorstore.as_retriever(search_kwargs={"k": 2}).invoke(query)
-    selected_examples = "\n".join(
-        f"User Query: \"{doc.page_content}\"\n{json.dumps(doc.metadata['classification'], indent=4)}"
-        for doc in retrieved_docs
-    )
-    if not selected_examples:
-        selected_examples = "(No relevant examples found)"
-        if settings and settings.get("debug_mode"):
-             print("WARNING: NLU retriever found no examples for this query.")
-    behavior_str = ", ".join(f'"{opt}"' for opt in behavior_options if opt != "None")
-    emotion_str = ", ".join(f'"{opt}"' for opt in emotion_options if opt != "None")
-    topic_str = ", ".join(f'"{opt}"' for opt in topic_options if opt != "None")
-    context_str = ", ".join(f'"{opt}"' for opt in context_options if opt != "None")
-    prompt = SPECIALIST_CLASSIFIER_PROMPT.format(
-        primary_goal=goal_for_prompt, examples=selected_examples,
-        behavior_options=behavior_str, emotion_options=emotion_str,
-        topic_options=topic_str, context_options=context_str, query=query
-    )
-    messages = [{"role": "system", "content": "You are a helpful NLU classification assistant."}, {"role": "user", "content": prompt}]
-    response_str = call_llm(messages, temperature=0.0, response_format={"type": "json_object"})
-    if settings and settings.get("debug_mode"):
-        print(f"\n--- NLU Specialist Full Response ---\n{response_str}\n----------------------------------\n")
-    try:
-        start_brace = response_str.find('{')
-        end_brace = response_str.rfind('}')
-        if start_brace == -1 or end_brace <= start_brace:
-            raise json.JSONDecodeError("No valid JSON object found in response.", response_str, 0)
-        json_str = response_str[start_brace : end_brace + 1]
-        result = json.loads(json_str)
-        result_dict["detected_emotion"] = result.get("detected_emotion") or "None"
-        behaviors_raw = result.get("detected_behaviors")
-        behaviors_canon = _canon_behavior_list(behaviors_raw, behavior_options)
-        if behaviors_canon:
-            result_dict["detected_behaviors"] = behaviors_canon
-        topics_raw = result.get("detected_topics") or result.get("detected_topic")
-        detected_topics = []
-        if isinstance(topics_raw, list):
-            for t in topics_raw:
-                ct = _canon_topic(t, topic_options)
-                if ct != "None": detected_topics.append(ct)
-        elif isinstance(topics_raw, str):
-            ct = _canon_topic(topics_raw, topic_options)
-            if ct != "None": detected_topics.append(ct)
-        result_dict["detected_topics"] = detected_topics
-        contexts_raw = result.get("detected_contexts")
-        contexts_canon = _canon_context_list(contexts_raw, context_options)
-        if contexts_canon:
-            result_dict["detected_contexts"] = contexts_canon
-        return result_dict
-    except (json.JSONDecodeError, AttributeError) as e:
-        print(f"ERROR parsing NLU Specialist JSON: {e}")
-        return result_dict
-def _default_embeddings():
-    # This function remains unchanged from agent_work.py
-    model_name = os.getenv("EMBEDDINGS_MODEL", "sentence-transformers/all-MiniLM-L6-v2")
-    return HuggingFaceEmbeddings(model_name=model_name)
-# NEW FIX: The build_or_load_vectorstore function should never create a placeholder
-# for a general knowledge index.  That logic should only apply to the personal memory bank.
-def build_or_load_vectorstore(docs: List[Document], index_path: str, is_personal: bool = False) -> FAISS:
-    os.makedirs(os.path.dirname(index_path), exist_ok=True)
-    if os.path.isdir(index_path) and os.path.exists(os.path.join(index_path, "index.faiss")):
-        try:
-            return FAISS.load_local(index_path, _default_embeddings(), allow_dangerous_deserialization=True)
-        except Exception: pass
-    # --- START: DEFINITIVE FIX ---
-    # The placeholder logic should ONLY apply to the personal memory bank when it's first created.
-    # General knowledge bases should be truly empty if no documents are provided.
-    if not docs:
-        if is_personal:
-            # Create a placeholder only for the personal index.
-            placeholder_text = "(This is the start of the personal memory journal.)"
-            docs = [Document(page_content=placeholder_text, metadata={"source": "placeholder"})]
-        else:
-            # For general indexes, if there are no docs, create a truly empty FAISS index.
-            # This prevents the placeholder from appearing in vs_general.
-            empty_faiss = FAISS.from_texts([""], _default_embeddings())
-            empty_faiss.delete([empty_faiss.index_to_docstore_id[0]])
-            empty_faiss.save_local(index_path)
-            return empty_faiss
-    # --- END: DEFINITIVE FIX ---
-    vs = FAISS.from_documents(docs, _default_embeddings())
-    vs.save_local(index_path)
-    return vs
-# In agent.py, replace the entire bootstrap_vectorstore function.
-def bootstrap_vectorstore(sample_paths: List[str] | None = None, index_path: str = "data/faiss_index") -> FAISS:
-    """
-    Loads documents from paths and builds or loads a FAISS vector store for the GENERAL knowledge base.
-    This version correctly handles the case where no initial documents are found.
-    """
-    # --- START: DEFINITIVE FIX ---
-    # Before bootstrapping, we must delete any stale index that might exist on disk.
-    # This prevents loading an old, empty index instead of building a new, full one.
-    if os.path.exists(index_path):
-        shutil.rmtree(index_path)
-    # --- END: DEFINITIVE FIX ---
-    docs: List[Document] = []
-    print(f"DEBUG (bootstrap_vectorstore): Bootstrapping index '{os.path.basename(index_path)}' with {len(sample_paths or [])} file(s).")
-    for p in (sample_paths or []):
-        try:
-            if p.lower().endswith(".jsonl"):
-                docs.extend(texts_from_jsonl(p))
-            else:
-                with open(p, "r", encoding="utf-8", errors="ignore") as fh:
-                    docs.append(Document(page_content=fh.read(), metadata={"source": os.path.basename(p)}))
-        except Exception as e:
-            print(f"    - ERROR: Failed to bootstrap from file '{os.path.basename(p)}'. Reason: {e}")
-            continue
-    # --- START OF DEFINITIVE FIX ---
-    # The general knowledge base should NOT have a placeholder. If no files
-    # are enabled or found, it should be truly empty. The placeholder logic
-    # is now correctly isolated within build_or_load_vectorstore for the personal index only.
-    if not docs:
-        print(f"    - WARNING: No documents were found for this index. It will be created empty.")
-    # --- END OF DEFINITIVE FIX ---
-    # --- START DEBUG BLOCK ---
-    print(f"DEBUG (bootstrap_vectorstore): Total documents parsed for index '{os.path.basename(index_path)}': {len(docs)}.")
-    # --- END DEBUG BLOCK ---
-    # We pass is_personal=False to ensure no placeholder is created by the downstream function.
-    return build_or_load_vectorstore(docs, index_path=index_path, is_personal=False)
-def texts_from_jsonl(path: str) -> List[Document]:
-    """Parses a JSONL file into a list of Document objects."""
-    out: List[Document] = []
-    # --- START DEBUG BLOCK ---
-    file_name = os.path.basename(path)
-    print(f"DEBUG (texts_from_jsonl): Attempting to read and parse '{file_name}'...")
-    # --- END DEBUG BLOCK ---
-    try:
-        with open(path, "r", encoding="utf-8") as f:
-            for i, line in enumerate(f):
-                try:
-                    obj = json.loads(line.strip())
-                except json.JSONDecodeError:
-                    print(f"    - WARNING: Skipping malformed JSON line {i+1} in '{file_name}'")
-                    continue
-                txt = obj.get("text") or obj.get("content") or obj.get("query") or ""
-                if not txt.strip():
-                    continue
-                md = {"source": file_name, "chunk": i}
-                for k in ("behaviors", "emotion", "topic_tags", "context_tags", "scene_id"):
-                    if obj.get(k):
-                        md[k] = obj.get(k)
-                out.append(Document(page_content=txt, metadata=md))
-        # --- START DEBUG BLOCK ---
-        print(f"DEBUG (texts_from_jsonl): ✅ SUCCESS: Parsed {len(out)} documents from '{file_name}'.")
-        # --- END DEBUG BLOCK ---
-    except Exception as e:
-        print(f"DEBUG (texts_from_jsonl): ❌ CRITICAL ERROR while processing '{file_name}'. Reason: {e}")
-        return []
-    return out
-def rerank_documents(query: str, documents: list[tuple[Document, float]]) -> list[tuple[tuple[Document, float], float]]:
-    """
-    Re-ranks a list of retrieved documents against a query using a CrossEncoder model.
-    Returns the original document tuples along with their new re-ranker score.
-    """
-    if not documents or not query:
-        return []
-    model = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2')
-    doc_contents = [doc.page_content for doc, score in documents]
-    query_doc_pairs = [[query, doc_content] for doc_content in doc_contents]
-    scores = model.predict(query_doc_pairs)
-    reranked_results = list(zip(documents, scores))
-    reranked_results.sort(key=lambda x: x[1], reverse=True)
-    print(f"\n[DEBUG] Re-ranked Top 3 Sources:")
-    for doc_tuple, score in reranked_results[:3]:
-        doc, _ = doc_tuple
-        # --- MODIFICATION: Add score to debug log ---
-        print(f"  - New Rank | Source: {doc.metadata.get('source')} | Score: {score:.4f}")
-    # --- MODIFICATION: Return the results with scores ---
-    return reranked_results
-# Some vectorstores might return duplicates.
-# This is useful when top-k cutoff might otherwise include near-duplicates from query expansion
-def dedup_docs(scored_docs):
-    seen = set()
-    unique = []
-    for doc, score in scored_docs:
-        uid = doc.metadata.get("source", "") + "::" + doc.page_content.strip()
-        if uid not in seen:
-            unique.append((doc, score))
-            seen.add(uid)
-    return unique
-def call_llm(messages: List[Dict[str, str]], temperature: float = 0.6, stop: Optional[List[str]] = None, response_format: Optional[dict] = None) -> str:
-    # This function remains unchanged from agent_work.py
-    client = _openai_client()
-    if client is None: raise RuntimeError("OpenAI client not configured (missing API key?).")
-    model = os.getenv("OPENAI_CHAT_MODEL", "gpt-4o-mini")
-    api_args = {"model": model, "messages": messages, "temperature": float(temperature if temperature is not None else 0.6)}
-    if stop: api_args["stop"] = stop
-    if response_format: api_args["response_format"] = response_format
-    resp = client.chat.completions.create(**api_args)
-    content = ""
-    try:
-        content = resp.choices[0].message.content or ""
-    except Exception:
-        msg = getattr(resp.choices[0], "message", None)
-        if isinstance(msg, dict): content = msg.get("content") or ""
-    return content.strip()
-# In agent.py, find and replace the MUSIC_KEYPHRASES list
-MUSIC_KEYPHRASES = [
-    r"\bplay\b.*\bsong\b",
-    r"\bplay\b.*\bmusic\b", # <-- More robust addition
-    r"\blisten to music\b",
-    r"\bhear\b.*\bsong\b",
-    r"\bhear\b.*\bmusic\b"  # <-- More robust addition
-]
-_MUSIC_PATTERNS = [re.compile(p, re.IGNORECASE) for p in MUSIC_KEYPHRASES]
-def _pre_router_music(query: str) -> str | None:
-    for pat in _MUSIC_PATTERNS:
-        if re.search(pat, query): return "play_music_request"
-    return None
-MULTI_HOP_KEYPHRASES = [r"\bcompare\b", r"\bvs\.?\b", r"\bversus\b", r"\bdifference between\b", r"\b(more|less|fewer) (than|visitors|agitated)\b", r"\bchange after\b", r"\bafter.*(vs|before)\b", r"\bbefore.*(vs|after)\b", r"\b(who|which) .*(more|less)\b"]
-_MH_PATTERNS = [re.compile(p, re.IGNORECASE) for p in MULTI_HOP_KEYPHRASES]
-def _pre_router_multi_hop(query: str) -> str | None:
-    # This function remains unchanged from agent_work.py
-    q = (query or "")
-    for pat in _MH_PATTERNS:
-        if re.search(pat, q): return "multi_hop"
-    return None
-def _pre_router(query: str) -> str | None:
-    # This function remains unchanged from agent_work.py
-    q = (query or "").lower()
-    for pat in _CARE_PATTERNS:
-        if re.search(pat, q): return "caregiving_scenario"
-    return None
-def _llm_route_with_prompt(query: str, temperature: float = 0.0) -> str:
-    # This function remains unchanged from agent_work.py
-    router_messages = [{"role": "user", "content": ROUTER_PROMPT.format(query=query)}]
-    query_type = call_llm(router_messages, temperature=temperature).strip().lower()
-    return query_type
-# OLD use this new pre-router and place it in the correct order of priority.
-# OLD def route_query_type(query: str) -> str:
-# NEW the severity override only apply to moderate or advanced stages
-def route_query_type(query: str, severity: str = "Normal / Unspecified"):
-    # This new, adaptive logic ONLY applies if severity is set to moderate or advanced.
-    if severity in ["Moderate Stage", "Advanced Stage"]:
-        # Check if it's an obvious other type first (e.g., summarization)
-        if not _pre_router_summarization(query) and not _pre_router_multi_hop(query):
-             print(f"Query classified as: caregiving_scenario (severity override)")
-             return "caregiving_scenario"
-    # END
-    # FOR "Normal / Unspecified", THE CODE CONTINUES HERE, USING THE EXISTING LOGIC
-    # This is your original code path.
-    # Priority 1: Check for specific, structural queries first.
-    mh_hit = _pre_router_multi_hop(query)
-    if mh_hit:
-        print(f"Query classified as: {mh_hit} (multi-hop pre-router)")
-        return mh_hit
-    # Priority 2: Check for explicit commands like "summarize".
-    sum_hit = _pre_router_summarization(query)
-    if sum_hit:
-        print(f"Query classified as: {sum_hit} (summarization pre-router)")
-        return sum_hit
-    # Priority 4: Check for music requests.
-    # NEW Add Music Support before care_hit = _pre_router(query)
-    # the general "caregiving" keyword checker (_pre_router) is called before
-    # the specific "play music" checker (_pre_router_music).
-    music_hit = _pre_router_music(query)
-    if music_hit:
-        print(f"Query classified as: {music_hit} (music re-router)")
-        return music_hit
-    # Priority 5: Check for general caregiving keywords.
-    care_hit = _pre_router(query)
-    if care_hit:
-        print(f"Query classified as: {care_hit} (caregiving pre-router)")
-        return care_hit
-    # Fallback: If no pre-routers match, use the LLM for nuanced classification.
-    query_type = _llm_route_with_prompt(query, temperature=0.0)
-    print(f"Query classified as: {query_type} (LLM router)")
-    return query_type
-# helper: put near other small utils in agent.py
-# In agent.py, replace the _source_ids_for_eval function
-# In agent.py, inside _source_ids_for_eval(...)
-def _source_ids_for_eval(docs, cap=3):  # NEW change from 5 to 3
-    out, seen = [], set()
-    for d in docs or []:
-        md = getattr(d, "metadata", {}) or {}
-        src = md.get("source")
-        if not src or src == 'placeholder':
-            continue
-        # --- MODIFICATION START ---
-        # Always use the filename as the key, regardless of file type.
-        key = src
-        # --- MODIFICATION END ---
-        if key and key not in seen:
-            seen.add(key)
-            out.append(str(key))
-            if len(out) >= cap:
-                break
-    return out
-# In agent.py, replace the ENTIRE make_rag_chain function with this one.
-# def make_rag_chain(vs_general: FAISS, vs_personal: FAISS, *, for_evaluation: bool = False, role: str = "patient", temperature: float = 0.6, language: str = "English", patient_name: str = "the patient", caregiver_name: str = "the caregiver", tone: str = "warm"):
-# NEW:  accept the new disease_stage parameter.
-def make_rag_chain(vs_general: FAISS, vs_personal: FAISS, *, for_evaluation: bool = False,
-                   role: str = "patient", temperature: float = 0.6, language: str = "English",
-                   patient_name: str = "the patient", caregiver_name: str = "the caregiver",
-                   tone: str = "warm",
-                   disease_stage: str = "Default: Mild Stage", music_manifest_path: str = ""):
-    """Returns a callable that performs the complete RAG process."""
-    RELEVANCE_THRESHOLD = 0.85
-    SCORE_MARGIN = 0.10  # Margin to decide if scores are "close enough" to blend.
-# NEW to add caregiving JSON
-# --- STEP 1: ADD vs_playbook to the function signature ---
-def make_rag_chain(
-    vs_general: FAISS,
-    vs_personal: FAISS,
-    vs_playbook: FAISS, # <-- ADD THIS NEW PARAMETER
-    *,
-    for_evaluation: bool = False,
-    role: str = "patient",
-    temperature: float = 0.6,
-    language: str = "English",
-    patient_name: str = "the patient",
-    caregiver_name: str = "the caregiver",
-    tone: str = "warm",
-    disease_stage: str = "Default: Mild Stage",
-    music_manifest_path: str = ""
-):
-    """Returns a callable that performs the complete RAG process."""
-    RELEVANCE_THRESHOLD = 0.85
-    SCORE_MARGIN = 0.10  # Margin to decide if scores are "close enough" to blend.
-     # --- HELPER FUNCTION: This should be defined here, inside make_rag_chain ---
-    def _format_docs(docs: List[Document], default_msg: str) -> str:
-        if not docs: return default_msg
-        unique_docs = {doc.page_content: doc for doc in docs}.values()
-        return "\n".join([f"- {d.page_content.strip()}" for d in unique_docs])
-    # --- STEP 2: REPLACE THE INNER _answer_fn with the code below ---
-    def _answer_fn(query: str, query_type: str, chat_history: List[Dict[str, str]], **kwargs) -> Dict[str, Any]:
-        print(f"[DEBUG] The Query is: {query}")
-        print(f"[DEBUG] The Query Type is: {query_type}")
-        print(f"DEBUG: RAG chain received disease_stage = '{disease_stage}'")
-        # --- START: NEW DEBUG BLOCK FOR VS_GENERAL ---
-        print("\n" + "#"*20 + " VS_GENERAL SANITY CHECK " + "#"*20)
-        if vs_general and hasattr(vs_general.docstore, '_dict'):
-            doc_count = len(vs_general.docstore._dict)
-            print(f"  - ✅ vs_general is valid and contains {doc_count} document(s).")
-            # Optional: Check if it's just a placeholder
-            if doc_count == 1 and list(vs_general.docstore._dict.values())[0].metadata.get("source") == "placeholder":
-                print("  - ⚠️  Warning: vs_general appears to contain only a placeholder document.")
-        else:
-            print("  - ❌ CRITICAL WARNING: vs_general is None or not a valid FAISS index.")
-        print("#"*67 + "\n")
-        # --- END: NEW DEBUG BLOCK FOR VS_GENERAL ---
-        # Create a local variable for test_temperature to avoid the UnboundLocalError.
-        test_temperature = temperature
-        # NEW --- MUSIC PLAYBACK LOGIC ---
-        if "list_music_request" in query_type:
-            if not music_manifest_path or not os.path.exists(music_manifest_path):
-                return {"answer": "I don't see any music in your personal library yet.", "sources": ["Personal Music Library"], "audio_playback_url": None}
-            with open(music_manifest_path, "r") as f:
-                manifest = json.load(f)
-            if not manifest:
-                return {"answer": "Your personal music library is currently empty.", "sources": ["Personal Music Library"], "audio_playback_url": None}
-            song_list = []
-            for song_id, data in manifest.items():
-                song_list.append(f"- '{data['title']}' by {data['artist']}")
-            formatted_songs = "\n".join(song_list)
-            answer = f"Based on your personal library, here is the music you like to listen to:\n{formatted_songs}"
-            return {"answer": answer, "sources": ["Personal Music Library"], "audio_playback_url": None}
-        # --- END OF NEW LOGIC ---
-        # --- REVISED MUSIC PLAYBACK LOGIC ---
-        if "play_music_request" in query_type:
-            # Manifest loading logic
-            if not music_manifest_path or not os.path.exists(music_manifest_path):
-                return {"answer": "I'm sorry, there is no music in the library yet.", "sources": [], "audio_playback_url": None}
-            with open(music_manifest_path, "r") as f:
-                manifest = json.load(f)
-            if not manifest:
-                return {"answer": "I'm sorry, there is no music in the library yet.", "sources": [], "audio_playback_url": None}
-            found_song = None
-            query_lower = query.lower()
-            # 1. First, search for a specific Title or Artist mentioned in the query.
-            for song_id, data in manifest.items():
-                if data["title"].lower() in query_lower or data["artist"].lower() in query_lower:
-                    found_song = data
-                    break
-            # Define emotion tag here to make it available for the preamble later
-            detected_emotion_raw = kwargs.get("emotion_tag")
-            detected_emotion = detected_emotion_raw.lower() if detected_emotion_raw else ""
-            # 2. If not found, use the detected NLU tags to find the FIRST mood match.
-            if not found_song:
-                detected_emotion_raw = kwargs.get("emotion_tag")
-                detected_emotion = detected_emotion_raw.lower() if detected_emotion_raw else ""
-                detected_behavior_raw = kwargs.get("scenario_tag")
-                detected_behavior = detected_behavior_raw.lower() if detected_behavior_raw else ""
-                print(f"[DEBUG] Music Search: Using NLU tags. Behavior='{detected_behavior}', Emotion='{detected_emotion}'")
-                search_tags = [detected_emotion, detected_behavior]
-                for nlu_tag in search_tags:
-                    if not nlu_tag or nlu_tag == "none": continue
-                    core_nlu_word = nlu_tag.split('_')[0]
-                    print(f"  [DEBUG] Music Search Loop: Using core_nlu_word='{core_nlu_word}' for matching.")
-                    for song_id, data in manifest.items():
-                        for mood_tag in data.get("moods", []): # Use .get for safety
-                            if not mood_tag or not isinstance(mood_tag, str): continue
-                            mood_words = re.split(r'[\s/]', mood_tag.lower())
-                            if core_nlu_word in mood_words:
-                                found_song = data
-                                break
-                        if found_song: break
-                    if found_song: break
-            # 3. If still not found, handle generic requests by playing a random song.
-            if not found_song:
-                print("[DEBUG] Music Search: No specific song or NLU match found. Selecting a random song.")
-                generic_keywords = ["music", "song", "something", "anything"]
-                if any(keyword in query_lower for keyword in generic_keywords):
-                    random_song_id = random.choice(list(manifest.keys()))
-                    found_song = manifest[random_song_id]
-            # Step 4: Construct the final response, adding the empathetic preamble if a song was found.
-            if found_song:
-                preamble_text = ""
-                # Only generate a preamble if there was a clear emotional context.
-                if detected_emotion and detected_emotion != "none":
-                    preamble_prompt = MUSIC_PREAMBLE_PROMPT.format(emotion=detected_emotion, query=query)
-                    preamble_text = call_llm([{"role": "user", "content": preamble_prompt}], temperature=0.7)
-                    preamble_text = preamble_text.strip() + " "
-                action_text = f"Of course. Playing '{found_song['title']}' by {found_song['artist']} for you."
-                final_answer = preamble_text + action_text
-                return {"answer": final_answer, "sources": ["Personal Music Library"], "audio_playback_url": found_song['filepath']}
-            else:
-                return {"answer": "I couldn't find a song matching your request in the library.", "sources": [], "audio_playback_url": None}
-        # END --- MUSIC PLAYBACK LOGIC ---
-        p_name = patient_name or "the patient"
-        c_name = caregiver_name or "the caregiver"
-        perspective_line = (f"You are speaking directly to {p_name}, who is the patient...") if role == "patient" else (f"You are communicating with {c_name}, the caregiver, about {p_name}.")
-        system_message = SYSTEM_TEMPLATE.format(tone=tone, language=language, perspective_line=perspective_line, guardrails=SAFETY_GUARDRAILS)
-        messages = [{"role": "system", "content": system_message}]
-        messages.extend(chat_history)
-        if "general_knowledge_question" in query_type or "general_conversation" in query_type:
-            template = ANSWER_TEMPLATE_GENERAL_KNOWLEDGE if "general_knowledge" in query_type else ANSWER_TEMPLATE_GENERAL
-            user_prompt = template.format(question=query, language=language)
-            messages.append({"role": "user", "content": user_prompt})
-            raw_answer = call_llm(messages, temperature=test_temperature)
-            answer = _clean_surface_text(raw_answer)
-            sources = ["General Knowledge"] if "general_knowledge" in query_type else []
-            return {"answer": answer, "sources": sources, "source_documents": []}
-        # --- END: Non-RAG Route Handling ---
-        # --- START OF CORE LOGIC FIX ---
-        all_retrieved_docs = []
-        is_personal_route = "factual" in query_type or "summarization" in query_type or "multi_hop" in query_type
-        # Path 2: Personal Memory Route
-        if is_personal_route:
-            print("[DEBUG] Personal Memory Route Activated.")
-            print("  - Strategy: Retrieving ALL personal documents to provide full context for the LLM.")
-            # For BOTH summarization and factual questions, we load all documents.
-            # The LLM is powerful enough to find the specific facts from the full context.
-            if vs_personal and vs_personal.docstore and hasattr(vs_personal.docstore, '_dict'):
-                # We retrieve all documents from the personal vector store.
-                all_docs = list(vs_personal.docstore._dict.values())
-                # We must filter out the placeholder document.
-                all_retrieved_docs = [
-                    doc for doc in all_docs if doc.metadata.get("source") != "placeholder"
-                ]
-                print(f"    -> Retrieved {len(all_retrieved_docs)} documents from the personal memory bank.")
-            else:
-                print("    -> Personal vector store is empty or invalid.")
-            # --- END OF DEFINITIVE FIX ---
-        else:
-            # --- FIX #1: CORRECTED HYBRID RETRIEVAL FOR CAREGIVING ---
-            print("[DEBUG] Using CORRECTED HYBRID Retrieval for caregiving scenario...")
-            # Extract the primary behavior tag
-            scenario_tags_raw = kwargs.get("scenario_tag")
-            primary_behavior = (scenario_tags_raw[0] if isinstance(scenario_tags_raw, list) and scenario_tags_raw else scenario_tags_raw) or "None"
-            # 1. Search Playbook for expert advice (using the CORRECT metadata key 'behaviors')
-            if primary_behavior != "None":
-                print(f"  - Stage 1: Searching Playbook for expert advice on '{primary_behavior}'...")
-                playbook_results = vs_playbook.similarity_search(
-                    primary_behavior, k=1,
-                    filter={"behaviors": primary_behavior} # <-- THE BUG FIX
-                )
-                if playbook_results:
-                    print("    -> ✅ Success! Found expert advice in Playbook.")
-                    # TMP for debugging
-                    all_retrieved_docs.extend(playbook_results)
-            # --- STEP 2: FALLBACK TO GENERAL SEARCH ONLY IF PLAYBOOK MISSES ---
-            print("  - Stage 2: Searching general knowledge for illustrative stories...")
-            # This is your complete, proven retrieval logic for general documents
-            search_queries = [query]
-            try:
-                expanded = json.loads(call_llm([{"role": "user", "content": QUERY_EXPANSION_PROMPT.format(question=query)}], temperature=0.0))
-                if isinstance(expanded, list): search_queries.extend(expanded)
-            except Exception as e:
-                print(f"    - Query expansion failed: {e}")
-            candidate_docs = []
-            if primary_behavior and primary_behavior != "None":
-                print(f"  - Stage 2a: High-precision search for behavior: '{primary_behavior}'")
-                for q in search_queries:
-                    candidate_docs.extend(vs_general.similarity_search_with_score(q, k=10, filter={"behaviors": primary_behavior}))
-            print("  - Stage 2b: High-recall semantic search (k=20)")
-            for q in search_queries:
-                candidate_docs.extend(vs_general.similarity_search_with_score(q, k=20))
-            unique_candidates = dedup_docs(candidate_docs)
-            reranked_docs_with_scores = rerank_documents(query, unique_candidates) if unique_candidates else []
-            # --- START: NEW DEBUG BLOCK 1 ---
-            print("\n" + "="*20 + " All Re-ranked Documents & Scores " + "="*20)
-            if reranked_docs_with_scores:
-                for i, (doc_tuple, score) in enumerate(reranked_docs_with_scores):
-                    doc, _ = doc_tuple
-                    source = doc.metadata.get('source', 'N/A')
-                    # print(f"  - Rank #{i+1}: Score={score:<9.4f}, Source='{source}'")
-            else:
-                print("  - No documents were re-ranked.")
-            print("="*66)
-            # --- END: NEW DEBUG BLOCK 1 ---
-            final_docs_with_scores = []
-            if reranked_docs_with_scores:
-                RELATIVE_SCORE_MARGIN = 3.0
-                top_doc_tuple, top_score = reranked_docs_with_scores[0]
-                # --- START: NEW DEBUG BLOCK 2 ---
-                print("\n" + "="*20 + " Filtering by Score Margin " + "="*20)
-                threshold = top_score - RELATIVE_SCORE_MARGIN
-                print(f"  - Top Score: {top_score:.4f}")
-                print(f"  - Score Threshold (must be >): {threshold:.4f}")
-                # --- END: NEW DEBUG BLOCK 2 ---
-                final_docs_with_scores.append(top_doc_tuple)
-                # --- START: MODIFIED PRINT ---
-                print(f"  - ✅ Kept (Top 1): Score={top_score:<9.4f}, Source='{top_doc_tuple[0].metadata.get('source', 'N/A')}'")
-                # --- END: MODIFIED PRINT ---
-                for doc_tuple, score in reranked_docs_with_scores[1:]:
-                    if score > threshold:
-                        final_docs_with_scores.append(doc_tuple)
-                        # --- START: MODIFIED PRINT ---
-                        print(f"  - ✅ Kept:      Score={score:<9.4f}, Source='{doc_tuple[0].metadata.get('source', 'N/A')}'")
-                        # --- END: MODIFIED PRINT ---
-                    else:
-                        # --- START: MODIFIED PRINT ---
-                        print(f"  - ❌ Discarded (Broke loop): Score={score:<9.4f}, Source='{doc_tuple[0].metadata.get('source', 'N/A')}'")
-                        # --- END: MODIFIED PRINT ---
-                        break
-                print("="*52)
-            limit = 5 if disease_stage in ["Moderate Stage", "Advanced Stage"] else 3
-            final_docs_with_scores = final_docs_with_scores[:limit]
-            # This line is modified to EXTEND the main list, not overwrite it.
-            retrieved_general_docs = [doc for doc, score in final_docs_with_scores]
-            all_retrieved_docs.extend(retrieved_general_docs)
-            print(f"    -> Retrieved {len(retrieved_general_docs)} general documents.")
-            # --- END: Integrated Advanced General Search ---
-        # END Caregiving scenario
-        # --- START OF BUG FIX ---
-        # The variable 'all_retrieved_docs' is now populated correctly for ALL routes.
-        # We must now de-duplicate it and use it consistently.
-        final_unique_docs = [doc for doc, score in dedup_docs([(d, 0) for d in all_retrieved_docs])]
-        # --- FINAL PROCESSING (Applies to all RAG routes) ---
-        print("\n--- DEBUG: Final Selected Docs ---")
-        for doc in all_retrieved_docs:
-            print(f"    - Source: {doc.metadata.get('source', 'N/A')}")
-        print("----------------------------------------------------------------")
-        # Use 'final_unique_docs' for all context formatting and the final return value.
-        personal_sources_set = {'1 Complaints of a Dutiful Daughter.txt', 'Saved Chat', 'Text Input'}
-        personal_context = _format_docs([d for d in final_unique_docs if d.metadata.get('source') in personal_sources_set], "(No relevant personal memories found.)")
-        general_context = _format_docs([d for d in final_unique_docs if d.metadata.get('source') not in personal_sources_set], "(No general guidance found.)")
-        if is_personal_route:
-            template = ANSWER_TEMPLATE_SUMMARIZE if "summarization" in query_type else \
-                       ANSWER_TEMPLATE_FACTUAL_MULTI if "multi_hop" in query_type else \
-                       ANSWER_TEMPLATE_FACTUAL
-            user_prompt = template.format(
-                question=query, context=personal_context, personal_context=personal_context,
-                general_context=general_context, language=language,
-                patient_name=p_name, caregiver_name=c_name, role=role
-            )
-            print("[DEBUG] Personal Route Factual / Sum / Multi PROMPT")
-        else: # caregiving_scenario
-            if disease_stage == "Advanced Stage": template = ANSWER_TEMPLATE_ADQ_ADVANCED
-            elif disease_stage == "Moderate Stage": template = ANSWER_TEMPLATE_ADQ_MODERATE
-            else: template = ANSWER_TEMPLATE_ADQ
-            emotions_context = render_emotion_guidelines(kwargs.get("emotion_tag"))
-            # Using the refined prompt template from the previous answer
-            user_prompt = template.format(
-                general_context=general_context, personal_context=personal_context, # personal_context is now for stories
-                question=query, scenario_tag=kwargs.get("scenario_tag"), emotions_context=emotions_context,
-                role=role, language=language, patient_name=p_name, caregiver_name=c_name,
-                emotion_tag=kwargs.get("emotion_tag")
-            )
-            print("[DEBUG] Caregiving Scenario PROMPT")
-        # END ELSE
-        messages.append({"role": "user", "content": user_prompt})
-        raw_answer = call_llm(messages, temperature=0.0 if for_evaluation else temperature)
-        answer = _clean_surface_text(raw_answer)
-        print("[DEBUG] LLM Answer", {answer})
-        if (kwargs.get("scenario_tag") or "").lower() in ["exit_seeking", "wandering"]:
-            answer += f"\n\n---\n{RISK_FOOTER}"
-        # Use 'final_unique_docs' for the final source list and document return.
-        sources = _source_ids_for_eval(final_unique_docs)
-        print("DEBUG Sources (After Filtering):", sources)
-        return {"answer": answer, "sources": sources, "source_documents": final_unique_docs}
-        # --- END OF BUG FIX ---
-    return _answer_fn
-    # END of make_rag_chain
-def answer_query(chain, question: str, **kwargs) -> Dict[str, Any]:
-    # This function remains unchanged from agent_work.py
-    if not callable(chain): return {"answer": "[Error: RAG chain is not callable]", "sources": []}
-    try:
-        return chain(question, **kwargs)
-    except Exception as e:
-        print(f"ERROR in answer_query: {e}")
-        return {"answer": f"[Error executing chain: {e}]", "sources": []}
-def synthesize_tts(text: str, lang: str = "en"):
-    # This function remains unchanged from agent_work.py
-    if not text or gTTS is None: return None
-    try:
-        with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as fp:
-            tts = gTTS(text=text, lang=(lang or "en"))
-            tts.save(fp.name)
-            return fp.name
-    except Exception:
-        return None
-def transcribe_audio(filepath: str, lang: str = "en"):
-    # This function remains unchanged from agent_work.py
-    client = _openai_client()
-    if not client: return "[Transcription failed: API key not configured]"
-    model = os.getenv("TRANSCRIBE_MODEL", "whisper-1")
-    api_args = {"model": model}
-    if lang and lang != "auto": api_args["language"] = lang
-    with open(filepath, "rb") as audio_file:
-        transcription = client.audio.transcriptions.create(file=audio_file, **api_args)
-    return transcription.text