Spaces:

vikramvasudevan
/

sanatan_ai

Restarting on CPU Upgrade

App Files Files Community

vikramvasudevan commited on Aug 31

Commit

63d1774

verified ·

1 Parent(s): 911afbf

Upload folder using huggingface_hub

Browse files

Files changed (14) hide show

app.py +16 -26
config.py +13 -1
db.py +2 -2
graph_helper.py +17 -265
modules/__init__.py +0 -0
modules/nodes/chat.py +73 -0
modules/nodes/conditions.py +25 -0
modules/nodes/dedup.py +43 -0
modules/nodes/init.py +61 -0
modules/nodes/state.py +11 -0
modules/nodes/tool_calls.py +6 -0
modules/nodes/validator.py +146 -0
sanatan_assistant.py +17 -7
tools.py +13 -4

app.py CHANGED Viewed

@@ -149,7 +149,7 @@ async def chat_streaming(debug_mode: bool, message, history, thread_id):
     start_time = time.time()
     streamed_response = ""
     final_response = ""
-    final_node = "validator"
     MAX_CONTENT = 500
@@ -167,7 +167,7 @@ async def chat_streaming(debug_mode: bool, message, history, thread_id):
                 node_icon = "🧠"
             else:
                 node_icon = "⚙️"
-            node_label = f"node:{node}"
             tool_label = f"{name or ''}"
             if tool_label:
                 node_label = node_label + f":{tool_label}"
@@ -186,10 +186,6 @@ async def chat_streaming(debug_mode: bool, message, history, thread_id):
             def generate_processing_message():
                 return (
                     f"<div class='thinking-bubble'><em>🤔{random.choice(thinking_verbs)} ...</em></div>"
-                    # f"<div style='opacity: 0.1' title='{full}'>"
-                    # f"<span>{node}:{name or ''}:</span>"
-                    # f"<strong>Looking for : [{message}]</strong> {truncated or '...'}"
-                    # f"</div>"
                 )
             if (
@@ -203,11 +199,6 @@ async def chat_streaming(debug_mode: bool, message, history, thread_id):
                 html = (
                     f"<div class='thinking-bubble'><em>🤔 {msg.name} tool: {random.choice(thinking_verbs)} ...</em></div>"
-                    # f"<div style='opacity: 0.5'>"
-                    # f"<strong>Looking for : [{message}]</strong><br>"
-                    # f"<strong>Tool Args:</strong> {tooltip or '(no args)'}<br>"
-                    # f"{truncated or '...'}"
-                    # f"</div>"
                 )
                 yield f"### { ' → '.join(node_tree)}\n{html}"
             elif isinstance(msg, AIMessageChunk):
@@ -226,12 +217,12 @@ async def chat_streaming(debug_mode: bool, message, history, thread_id):
                     yield f"### { " → ".join(node_tree)}\n{generate_processing_message()}\n<div class='intermediate-output'>{escape(truncate_middle(streamed_response))}</div>"
                 else:
                     # Stream intermediate messages with transparent style
-                    if node != final_node:
-                        streamed_response += msg.content
-                        yield f"### { ' → '.join(node_tree) }\n<div class='intermediate-output'>{escape(truncate_middle(streamed_response))}</div>"
-                    else:
-                        # Buffer the final validated response instead of yielding
-                        final_response += msg.content
                 if msg.tool_call_chunks:
                     for tool_call_chunk in msg.tool_call_chunks:
@@ -275,16 +266,15 @@ async def chat_streaming(debug_mode: bool, message, history, thread_id):
         node_tree[-1] = "✅"
         end_time = time.time()
         duration = end_time - start_time
-        if final_response:
-            final_response = (
-                f"\n{final_response}" f"\n\n⏱️ Processed in {duration:.2f} seconds"
-            )
-            buffer = f"### {' → '.join(node_tree)}\n"
             yield buffer
-            for c in final_response:
-                buffer += c
-                yield buffer
-                await asyncio.sleep(0.0005)
         logger.debug("************************************")
         # Now, you can process the complete tool calls from the buffer

     start_time = time.time()
     streamed_response = ""
     final_response = ""
+    # final_node = "validator"
     MAX_CONTENT = 500
                 node_icon = "🧠"
             else:
                 node_icon = "⚙️"
+            node_label = f"{node}"
             tool_label = f"{name or ''}"
             if tool_label:
                 node_label = node_label + f":{tool_label}"
             def generate_processing_message():
                 return (
                     f"<div class='thinking-bubble'><em>🤔{random.choice(thinking_verbs)} ...</em></div>"
                 )
             if (
                 html = (
                     f"<div class='thinking-bubble'><em>🤔 {msg.name} tool: {random.choice(thinking_verbs)} ...</em></div>"
                 )
                 yield f"### { ' → '.join(node_tree)}\n{html}"
             elif isinstance(msg, AIMessageChunk):
                     yield f"### { " → ".join(node_tree)}\n{generate_processing_message()}\n<div class='intermediate-output'>{escape(truncate_middle(streamed_response))}</div>"
                 else:
                     # Stream intermediate messages with transparent style
+                    # if node != final_node:
+                    streamed_response += msg.content
+                    yield f"### { ' → '.join(node_tree) }\n<div class='intermediate-output'>{escape(truncate_middle(streamed_response))}</div>"
+                    # else:
+                    # Buffer the final validated response instead of yielding
+                    final_response += msg.content
                 if msg.tool_call_chunks:
                     for tool_call_chunk in msg.tool_call_chunks:
         node_tree[-1] = "✅"
         end_time = time.time()
         duration = end_time - start_time
+        final_response = (
+            f"\n{final_response}" f"\n\n⏱️ Processed in {duration:.2f} seconds"
+        )
+        buffer = f"### {' → '.join(node_tree)}\n"
+        yield buffer
+        for c in final_response:
+            buffer += c
             yield buffer
+            await asyncio.sleep(0.0005)
         logger.debug("************************************")
         # Now, you can process the complete tool calls from the buffer

config.py CHANGED Viewed

@@ -1,5 +1,5 @@
 from metadata import MetadataWhereClause
 class SanatanConfig:
     # shuklaYajurVedamPdfPath: str = "./data/shukla-yajur-veda.pdf"
@@ -497,3 +497,15 @@ class SanatanConfig:
         if "collection_embedding_fn" in scripture:
             embedding_fn = scripture["collection_embedding_fn"]  # overridden in config
         return embedding_fn

 from metadata import MetadataWhereClause
+from typing import List, Dict
 class SanatanConfig:
     # shuklaYajurVedamPdfPath: str = "./data/shukla-yajur-veda.pdf"
         if "collection_embedding_fn" in scripture:
             embedding_fn = scripture["collection_embedding_fn"]  # overridden in config
         return embedding_fn
+    def filter_scriptures_fields(
+        self,
+        fields_to_keep: List[str]
+    ) -> List[Dict]:
+        """
+        Return a list of scripture dicts containing only the specified fields.
+        """
+        filtered = []
+        for s in self.scriptures:
+            filtered.append({k: s[k] for k in fields_to_keep if k in s})
+        return filtered

db.py CHANGED Viewed

@@ -58,8 +58,8 @@ class SanatanDatabase:
         # 1. Try native contains
         response = collection.query(
-            query_texts=get_embedding(
-                [""], SanatanConfig().get_embedding_for_collection(collection_name)
             ),
             where_document={"$contains": literal_to_search_for},
             n_results=n_results,

         # 1. Try native contains
         response = collection.query(
+            query_embeddings=get_embedding(
+                [literal_to_search_for], SanatanConfig().get_embedding_for_collection(collection_name)
             ),
             where_document={"$contains": literal_to_search_for},
             n_results=n_results,

graph_helper.py CHANGED Viewed

@@ -1,289 +1,41 @@
-import json
-from typing import Annotated, TypedDict
-from httpx import Timeout
 from langgraph.graph import StateGraph, START, END
 from langgraph.checkpoint.memory import MemorySaver
-from langgraph.graph.message import add_messages
-from langchain_openai import ChatOpenAI
 from langgraph.graph.state import CompiledStateGraph
-from langchain_core.messages import AIMessage
-from config import SanatanConfig
-from tools import (
-    tool_format_scripture_answer,
-    tool_get_standardized_prabandham_names,
-    tool_search_db,
-    tool_search_web,
-    tool_push,
-    tool_get_standardized_azhwar_names,
-    tool_search_db_by_metadata,
-    tool_get_standardized_divya_desam_names,
-    tool_search_db_for_literal,
-)
-from langgraph.prebuilt import ToolNode, tools_condition
-from langchain_core.messages import SystemMessage, ToolMessage, HumanMessage
 import logging
 logger = logging.getLogger(__name__)
 logger.setLevel(logging.INFO)
-class ChatState(TypedDict):
-    debug_mode: bool = True
-    messages: Annotated[list[str], add_messages]
-def check_debug_condition(state: ChatState) -> str:
-    if state["debug_mode"]:
-        return "validator"
-    else:
-        return "__end__"
-def branching_condition(state: ChatState) -> str:
-    last_message = state["messages"][-1]
-    if hasattr(last_message, "tool_calls") and last_message.tool_calls:
-        return "tools"
-    else:
-        return check_debug_condition(state)
-from typing import List, Dict
-def truncate_messages_for_token_limit(messages, max_tokens=50000):
-    """
-    messages: list of dicts or LangChain messages
-    """
-    total_tokens = 0
-    result = []
-    # iterate from newest to oldest
-    for msg in reversed(messages):
-        content = getattr(msg, "content", "")
-        tokens = len(content) // 4
-        # handle tool calls
-        group = [msg]
-        tool_calls = getattr(msg, "additional_kwargs", {}).get("tool_calls", [])
-        for call in tool_calls:
-            # include corresponding tool messages
-            for m in messages:
-                if (
-                    getattr(m, "additional_kwargs", {}).get("tool_call_id")
-                    == call["id"]
-                ):
-                    group.append(m)
-        group_tokens = sum(len(getattr(m, "content", "")) // 4 for m in group)
-        if total_tokens + group_tokens > max_tokens:
-            break
-        total_tokens += group_tokens
-        result = group + result
-    return result
 def generate_graph() -> CompiledStateGraph:
     memory = MemorySaver()
-    tools = [
-        tool_search_web,
-        tool_push,
-        tool_search_db,
-        tool_format_scripture_answer,
-        tool_get_standardized_azhwar_names,
-        tool_get_standardized_prabandham_names,
-        tool_get_standardized_divya_desam_names,
-        tool_search_db_by_metadata,
-        tool_search_db_for_literal,
-    ]
-    llm = ChatOpenAI(
-        model="gpt-4o-mini", temperature=0.2, max_retries=0, timeout=Timeout(60.0)
-    ).bind_tools(tools)
-    llm_without_tools = ChatOpenAI(
-        model="gpt-4o-mini", temperature=0.1, max_retries=0, timeout=Timeout(60.0)
-    )
-    def chatNode(state: ChatState) -> ChatState:
-        # logger.info("messages before LLM: %s", str(state["messages"]))
-        state["messages"] = truncate_messages_for_token_limit(
-            messages=state["messages"]
-        )
-        response = llm.invoke(state["messages"])
-        # return {"messages": [response]}
-        return {"messages": state["messages"] + [response]}
-    def validatorNode(state: ChatState) -> ChatState:
-        messages = state["messages"] or []
-        # Step 1: Separate out last message
-        last_message = messages[-1]
-        trimmed_messages = messages[:-1]
-        # Step 2: Ensure last message is from LLM
-        if not isinstance(last_message, AIMessage):
-            return {"messages": messages}  # no-op if nothing to validate
-        # Step 3: Build validation input
-        validation_prompt = trimmed_messages + [
-            SystemMessage(
-                content=(
-                    """
-You are a strict validator for LLM responses to scripture queries. DO NOT USE any tools for this.
-Your tasks:
-0. Treat your input as `original_llm_response`.
-1. Compare the original user query to the LLM’s answer.
-2. Identify the scripture context (e.g., Divya Prabandham, Bhagavad Gita, Upanishads, Ramayana, etc.).
-3. Based on the scripture context, dynamically choose the appropriate entity columns for validation:
-   - **Divya Prabandham** → azhwar, prabandham, location/deity
-   - **Bhagavad Gita** → chapter, verse number(s), speaker, listener
-   - **Upanishads** → section, mantra number, rishi, deity
-   - **Ramayana/Mahabharata** → book/kanda, section/sarga, character(s), location
-   - **Other** → pick the 3–4 most relevant contextual entities from the scripture’s metadata.
-4. Verify (from `original_llm_response`):
-   - Correct verse number(s)
-   - Keyword/context match
-   - All scripture-specific entity fields
-   - Native verse text quality
-   - Relevance of the response with respect to the question asked by the user.
-5. **Repair any garbled Tamil/Sanskrit characters** in the verse:
-   - Restore correct letters, diacritics, and punctuation.
-   - Replace broken Unicode with proper characters.
-   - Correct vowel signs, consonants, and pulli markers.
-   - Preserve original spacing and line breaks.
-   The repaired version is `fixed_llm_response`.
-6. Evaluate your `Confidence` as an integer between 0 to 100(no percentage sign). Confidence-based display rule:
-   - If `Confidence` < 75:
-       - Show this message upfront:
-         #### Confidence score: {{Confidence}}%
-7. Formatting rules for output:
-<!-- **Step 1 – Repaired LLM Response in Markdown:** -->
-<!-- BEGIN_MARKDOWN -->
-{{fixed_llm_response}}
-<!-- END_MARKDOWN -->
-<!-- **Step 2 – Validation Table:** -->
-<div style="font-size: small; opacity: 0.6;">
-<hr>
-<b>Original user query:</b> {{original_user_query}}
-<table border="1" cellpadding="4" cellspacing="0" style="border-collapse: collapse; width: 100%;">
-<tr>
-<th>Parameter</th>
-<th>Expected</th>
-<th>Found</th>
-<th>Match?</th>
-</tr>
-<tr>
-<td>verse number(s)</td>
-<td>{{requested_verse_numbers}}</td>
-<td>{{found_verse_numbers}}</td>
-<td>{{match_status_for_verse}}</td>
-</tr>
-<tr>
-<td>keyword/context</td>
-<td>{{requested_keywords}}</td>
-<td>{{found_keywords}}</td>
-<td>{{match_status_for_keyword}}</td>
-</tr>
-{{dynamic_entity_rows}}
-<tr>
-<td>native verse text</td>
-<td style="white-space: normal; word-break: break-word; word-wrap: break-word;">{{original_native_text_100_characters}}</td>
-<td style="white-space: normal; word-break: break-word; word-wrap: break-word;">{{cleaned_native_text_100_characters}}</td>
-<td>{{garbled_fix_status}}</td>
-</tr>
-</table>
-<p><b>Verdict:</b> {{Verdict}}<br>
-<b>Confidence score:</b> {{Confidence}}% – {{Justification}}<br>
-<span style="background-color:{{badge_color_code}}; color:white; padding:2px 6px; border-radius:4px;">{{badge_emoji}}</span></p>
-</div>
----
-Where:
-- `{{dynamic_entity_rows}}` is context-specific entity rows.
-- `{{cleaned_native_text}}` must be from the repaired `fixed_llm_response` (if Confidence ≥ 75).
-- ✅, ❌, ⚠️ remain for matches.
-- Hidden markers (`<!-- BEGIN_MARKDOWN -->`) prevent them from rendering as visible text.
-- Always wrap verse text so it doesn’t overflow horizontally.
-                    """
-                )
-            ),
-            HumanMessage(
-                content=last_message.content
-            ),  # 🟢 convert AI output to Human input
-        ]
-        # Step 4: Invoke LLM
-        response = llm_without_tools.invoke(validation_prompt)
-        # Step 5: Replace old AI message with validated one
-        return {"messages": trimmed_messages + [response]}
-    def init_system_prompt_node(state: ChatState) -> ChatState:
-        messages = state["messages"] or []
-        # Check if system prompts were already added
-        already_has_prompt = any(
-            isinstance(m, SystemMessage) and "format_scripture_answer" in m.content
-            for m in messages
-        )
-        if not already_has_prompt:
-            messages += [
-                SystemMessage(
-                    content=f"Here is the list of all scriptures along with their metadata configurations:\n{json.dumps(SanatanConfig.scriptures, indent=1)}\n"
-                ),
-                SystemMessage(
-                    content="⚠️ Do NOT summarize or compress the output from the `query` tool. It will be passed directly to `format_scripture_answer` tool that formats the answer **AS IS**. DO NOT REMOVE SANSKRIT/TAMIL TEXTS"
-                ),
-                SystemMessage(
-                    content="You MUST call the `format_scripture_answer` tool if the user question is about scripture content and the `query` tool has returned a result."
-                ),
-                SystemMessage(
-                    content="For general scripture queries, always prefer semantic search (tool_search_db). Use metadata or literal search only if the user specifies an exact verse number, azhwar, divya desam or phrase."
-                ),
-                SystemMessage(
-                    content="you must ALWAYS call one of the standardization tools available to get the correct entity name before using the `tool_search_db_by_metadata` tool."
-                ),
-                SystemMessage(
-                    content="""
-                        When using tools, you may call the same tool multiple times in a single task ONLY if:
-                        1. Each call has materially different arguments or targets a different piece of missing information.
-                        2. You have a clear reason for another call that is explicitly based on the new results you just received.
-                        3. You must stop calling the tool if:
-                        - The results do not change meaningfully from the previous call, OR
-                        - You have already resolved enough information to produce the required output.
-                        NEVER recall the tool if it fails execution.
-                        Before each new call to the same tool, compare the planned arguments with your previous call(s).
-                        If they are essentially the same, do NOT call it again — instead, proceed to generate the final validated output.
-                """
-                ),
-            ]
-        return {"messages": messages}
     graph = StateGraph(ChatState)
     graph.add_node("init", init_system_prompt_node)
     graph.add_node("llm", chatNode)
     graph.add_node("tools", ToolNode(tools))
     graph.add_node("validator", validatorNode)
     graph.add_edge(START, "init")
     graph.add_edge("init", "llm")
-    # graph.add_conditional_edges("llm", tools_condition, "tools")
     graph.add_conditional_edges(
         "llm",
         branching_condition,
         {"tools": "tools", "validator": "validator", "__end__": END},
     )
-    graph.add_edge("tools", "llm")
     graph.add_edge("validator", END)
     return graph.compile(checkpointer=memory)

 from langgraph.graph import StateGraph, START, END
 from langgraph.checkpoint.memory import MemorySaver
 from langgraph.graph.state import CompiledStateGraph
+from modules.nodes.chat import chatNode, tools
+from modules.nodes.conditions import branching_condition
+from modules.nodes.dedup import dedup_tool_call
+from modules.nodes.init import init_system_prompt_node
+from modules.nodes.state import ChatState
+from modules.nodes.tool_calls import increment_tool_calls
+from modules.nodes.validator import validatorNode
+from langgraph.prebuilt import ToolNode
 import logging
 logger = logging.getLogger(__name__)
 logger.setLevel(logging.INFO)
 def generate_graph() -> CompiledStateGraph:
     memory = MemorySaver()
     graph = StateGraph(ChatState)
     graph.add_node("init", init_system_prompt_node)
     graph.add_node("llm", chatNode)
+    graph.add_node("dedup", dedup_tool_call)
     graph.add_node("tools", ToolNode(tools))
+    graph.add_node("count_tools", increment_tool_calls)
     graph.add_node("validator", validatorNode)
     graph.add_edge(START, "init")
     graph.add_edge("init", "llm")
+    # branching happens *after* dedup
     graph.add_conditional_edges(
         "llm",
         branching_condition,
         {"tools": "tools", "validator": "validator", "__end__": END},
     )
+    graph.add_edge("tools", "count_tools")
+    graph.add_edge("count_tools", "llm")
     graph.add_edge("validator", END)
     return graph.compile(checkpointer=memory)

modules/__init__.py ADDED Viewed

File without changes

modules/nodes/chat.py ADDED Viewed

	@@ -0,0 +1,73 @@

+from httpx import Timeout
+from langchain_openai import ChatOpenAI
+from modules.nodes.state import ChatState
+from tools import (
+    tool_format_scripture_answer,
+    tool_get_standardized_prabandham_names,
+    tool_search_db,
+    tool_search_web,
+    tool_push,
+    tool_get_standardized_azhwar_names,
+    tool_search_db_by_metadata,
+    tool_get_standardized_divya_desam_names,
+    tool_search_db_for_literal,
+)
+tools = [
+    tool_search_db_by_metadata,
+    tool_search_db,
+    tool_search_db_for_literal,
+    tool_get_standardized_azhwar_names,
+    tool_get_standardized_prabandham_names,
+    tool_get_standardized_divya_desam_names,
+    tool_format_scripture_answer,
+    tool_search_web,
+    tool_push,
+]
+llm = ChatOpenAI(
+    model="gpt-4o-mini", temperature=0.2, max_retries=0, timeout=Timeout(60.0)
+).bind_tools(tools)
+def _truncate_messages_for_token_limit(messages, max_tokens=50000):
+    """
+    Truncate messages to stay under token limit while preserving assistant-tool_call integrity.
+    """
+    return messages
+    total_tokens = 0
+    result = []
+    # iterate oldest → newest to preserve conversation
+    for msg in messages:
+        content = getattr(msg, "content", "")
+        msg_tokens = len(content) // 4
+        # gather tool responses if any
+        tool_calls = getattr(msg, "additional_kwargs", {}).get("tool_calls", [])
+        group = [msg]
+        for call in tool_calls:
+            for m in messages:
+                if (
+                    getattr(m, "additional_kwargs", {}).get("tool_call_id")
+                    == call["id"]
+                ):
+                    group.append(m)
+        group_tokens = sum(len(getattr(m, "content", "")) // 4 for m in group)
+        # if this whole group would exceed the limit, stop
+        if total_tokens + group_tokens > max_tokens:
+            break
+        total_tokens += group_tokens
+        result.extend(group)
+    return result
+def chatNode(state: ChatState) -> ChatState:
+    # logger.info("messages before LLM: %s", str(state["messages"]))
+    state["messages"] = _truncate_messages_for_token_limit(
+        messages=state["messages"]
+    )
+    response = llm.invoke(state["messages"])
+    state["messages"] = state["messages"] + [response]
+    return state

modules/nodes/conditions.py ADDED Viewed

	@@ -0,0 +1,25 @@

+from modules.nodes.state import ChatState
+def _check_debug_condition(state: ChatState) -> str:
+    if state["debug_mode"]:
+        return "validator"
+    else:
+        return "__end__"
+MAX_TOOL_CALLS = 3
+def branching_condition(state: ChatState) -> str:
+    # hard stop after MAX_TOOL_CALLS
+    if state.get("tool_calls", 0) >= MAX_TOOL_CALLS:
+        return _check_debug_condition(state)
+    last_msg = state["messages"][-1]
+    if hasattr(last_msg, "tool_calls") and last_msg.tool_calls:
+        if state.get("skip_tool", False):
+            # remove tool_calls from the last assistant message
+            last_msg.tool_calls = []
+            return _check_debug_condition(state)
+        return "tools"
+    else:
+        return _check_debug_condition(state)

modules/nodes/dedup.py ADDED Viewed

	@@ -0,0 +1,43 @@

+import hashlib
+import json
+from langchain_core.messages import ToolMessage
+from modules.nodes.state import ChatState
+def dedup_tool_call(state: ChatState):
+    # ensure seen_tool_calls exists
+    if state.get("seen_tool_calls") is None:
+        state["seen_tool_calls"] = set()
+    state["skip_tool"] = False  # reset every time
+    if not state.get("messages"):
+        return state
+    last_msg = state["messages"][-1]
+    # only process messages that have tool_calls
+    if hasattr(last_msg, "tool_calls") and last_msg.tool_calls:
+        call = last_msg.tool_calls[0]
+        tool_name = call["name"]
+        raw_args = call.get("arguments") or {}
+        tool_args = json.dumps(raw_args, sort_keys=True)
+        sig = (tool_name, hashlib.md5(tool_args.encode()).hexdigest())
+        if sig in state["seen_tool_calls"]:
+            # Duplicate detected → append a proper ToolMessage instead of a system message
+            state["messages"].append(
+                ToolMessage(
+                    content=f"Duplicate tool call skipped: {tool_name}({tool_args})",
+                    tool_call_id=call["id"],
+                    name=tool_name,
+                    additional_kwargs={},
+                )
+            )
+            state["skip_tool"] = True
+            # remove the tool_calls from the last assistant message to prevent validation error
+            last_msg.tool_calls = []
+        else:
+            state["seen_tool_calls"].add(sig)
+            state["skip_tool"] = False
+    return state

modules/nodes/init.py ADDED Viewed

	@@ -0,0 +1,61 @@

+import json
+from langchain_core.messages import SystemMessage
+from config import SanatanConfig
+from modules.nodes.state import ChatState
+def init_system_prompt_node(state: ChatState) -> ChatState:
+    messages = state["messages"] or []
+    # Check if system prompts were already added
+    already_has_prompt = any(
+        isinstance(m, SystemMessage) and "format_scripture_answer" in m.content
+        for m in messages
+    )
+    if not already_has_prompt:
+        scriptures = SanatanConfig().filter_scriptures_fields(
+            fields_to_keep=[
+                "name",
+                "title",
+                "collection_name",
+                "metadata_fields",
+                "llm_hints",
+            ]
+        )
+        messages += [
+            SystemMessage(
+                content=f"Here is the list of all scriptures along with their metadata configurations:\n{json.dumps(scriptures, indent=1)}\n"
+            ),
+            SystemMessage(
+                content="⚠️ Do NOT summarize or compress the output from the `query` tool. It will be passed directly to `format_scripture_answer` tool that formats the answer **AS IS**. DO NOT REMOVE SANSKRIT/TAMIL TEXTS"
+            ),
+            SystemMessage(
+                content="You MUST call the `format_scripture_answer` tool if the user question is about scripture content and the `query` tool has returned a result."
+            ),
+            SystemMessage(
+                content="For general scripture queries, always prefer semantic search (tool_search_db). Use metadata or literal search only if the user specifies an exact verse number, azhwar, divya desam or phrase."
+            ),
+            SystemMessage(
+                content="you must ALWAYS call one of the standardization tools available to get the correct entity name before using the `tool_search_db_by_metadata` tool."
+            ),
+            SystemMessage(
+                content="""
+                    When using tools, you may call the same tool multiple times in a single task ONLY if:
+                    1. Each call has materially different arguments or targets a different piece of missing information.
+                    2. You have a clear reason for another call that is explicitly based on the new results you just received.
+                    3. You must stop calling the tool if:
+                    - The results do not change meaningfully from the previous call, OR
+                    - You have already resolved enough information to produce the required output.
+                    NEVER recall the tool if it fails execution.
+                    Before each new call to the same tool, compare the planned arguments with your previous call(s).
+                    If they are essentially the same, do NOT call it again — instead, proceed to generate the final validated output.
+            """
+            ),
+        ]
+    state["tool_calls"] = 0
+    state["seen_tool_calls"] = set()
+    state["skip_tool"] = False
+    return state

modules/nodes/state.py ADDED Viewed

	@@ -0,0 +1,11 @@

+from typing import Annotated, TypedDict
+from langgraph.graph.message import add_messages
+class ChatState(TypedDict):
+    debug_mode: bool = True
+    messages: Annotated[list[str], add_messages]
+    tool_calls: int
+    seen_tool_calls: set[tuple[str, str]]  # (tool_name, params_hash)
+    skip_tool: bool

modules/nodes/tool_calls.py ADDED Viewed

	@@ -0,0 +1,6 @@

+from modules.nodes.state import ChatState
+def increment_tool_calls(state: ChatState):
+    state["tool_calls"] = state.get("tool_calls", 0) + 1
+    return state

modules/nodes/validator.py ADDED Viewed

	@@ -0,0 +1,146 @@

+from httpx import Timeout
+from langchain_core.messages import AIMessage, HumanMessage, SystemMessage
+from langchain_openai import ChatOpenAI
+from modules.nodes.state import ChatState
+_llm_without_tools = ChatOpenAI(
+    model="gpt-4o-mini", temperature=0.1, max_retries=0, timeout=Timeout(60.0)
+)
+_validator_prompt_text = """
+    You are a strict validator for LLM responses to scripture queries. DO NOT USE any tools for this.
+    Your tasks:
+    0. Treat your input as `original_llm_response`.
+    1. Compare the original user query to the LLM’s answer.
+    2. Identify the scripture context (e.g., Divya Prabandham, Bhagavad Gita, Upanishads, Ramayana, etc.).
+    3. Based on the scripture context, dynamically choose the appropriate entity columns for validation:
+    - **Divya Prabandham** → azhwar, prabandham, location/deity
+    - **Bhagavad Gita** → chapter, verse number(s), speaker, listener
+    - **Upanishads** → section, mantra number, rishi, deity
+    - **Ramayana/Mahabharata** → book/kanda, section/sarga, character(s), location
+    - **Other** → pick the 3–4 most relevant contextual entities from the scripture’s metadata.
+    4. Verify (from `original_llm_response`):
+    - Correct verse number(s)
+    - Keyword/context match
+    - All scripture-specific entity fields
+    - Native verse text quality
+    - Relevance of the response with respect to the question asked by the user.
+    5. **Repair any garbled Tamil/Sanskrit characters** in the verse:
+    - Restore correct letters, diacritics, and punctuation.
+    - Replace broken Unicode with proper characters.
+    - Correct vowel signs, consonants, and pulli markers.
+    - Preserve original spacing and line breaks.
+    The repaired version is `fixed_llm_response`.
+    6. Evaluate your `Confidence` as an integer between 0 to 100(no percentage sign). Confidence-based display rule:
+    - If `Confidence` < 75:
+        - Show this message upfront:
+            #### Confidence score: {{Confidence}}%
+    7. Formatting rules for output:
+    <!-- **Step 1 – Repaired LLM Response in Markdown:** -->
+    <!-- BEGIN_MARKDOWN -->
+    {{fixed_llm_response}}
+    <!-- END_MARKDOWN -->
+    <!-- **Step 2 – Validation Table:** -->
+    <div style="font-size: small; opacity: 0.6;">
+    <hr>
+    <b>Original user query:</b> {{original_user_query}}
+    <table border="1" cellpadding="4" cellspacing="0" style="border-collapse: collapse; width: 100%;">
+    <tr>
+    <th>Parameter</th>
+    <th>Expected</th>
+    <th>Found</th>
+    <th>Match?</th>
+    </tr>
+    <tr>
+    <td>verse number(s)</td>
+    <td>{{requested_verse_numbers}}</td>
+    <td>{{found_verse_numbers}}</td>
+    <td>{{match_status_for_verse}}</td>
+    </tr>
+    <tr>
+    <td>keyword/context</td>
+    <td>{{requested_keywords}}</td>
+    <td>{{found_keywords}}</td>
+    <td>{{match_status_for_keyword}}</td>
+    </tr>
+    {{dynamic_entity_rows}}
+    <tr>
+    <td>native verse text</td>
+    <td style="white-space: normal; word-break: break-word; word-wrap: break-word;">{{original_native_text_100_characters}}</td>
+    <td style="white-space: normal; word-break: break-word; word-wrap: break-word;">{{cleaned_native_text_100_characters}}</td>
+    <td>{{garbled_fix_status}}</td>
+    </tr>
+    </table>
+    <p><b>Verdict:</b> {{Verdict}}<br>
+    <b>Confidence score:</b> {{Confidence}}% – {{Justification}}<br>
+    <span style="background-color:{{badge_color_code}}; color:white; padding:2px 6px; border-radius:4px;">{{badge_emoji}}</span></p>
+    </div>
+    ---
+    Where:
+    - `{{dynamic_entity_rows}}` is context-specific entity rows.
+    - `{{cleaned_native_text}}` must be from the repaired `fixed_llm_response` (if Confidence ≥ 75).
+    - ✅, ❌, ⚠️ remain for matches.
+    - Hidden markers (`<!-- BEGIN_MARKDOWN -->`) prevent them from rendering as visible text.
+    - Always wrap verse text so it doesn’t overflow horizontally.
+    """
+def _get_ai_message_text(ai_msg):
+    # Try different common attributes where LangChain might store text
+    if hasattr(ai_msg, "content") and ai_msg.content:
+        return ai_msg.content
+    if hasattr(ai_msg, "message") and getattr(ai_msg, "message"):
+        return ai_msg.message
+    # fallback to additional_kwargs
+    return ai_msg.additional_kwargs.get("content", "")
+def validatorNode(state: ChatState) -> ChatState:
+    # Dummy: just pass through, but create a final AIMessage for streaming
+    # state["messages"] = state.get("messages", [])
+    # last_ai_msg = state["messages"][-1] if state["messages"] else AIMessage(content="")
+    # # tag it so chat_streaming sees it as final_node
+    # last_ai_msg.langgraph_node = "validator"
+    return state
+    messages = state.get("messages", [])
+    if not messages:
+        print("No messages. Returning state as-is")
+        return state
+    # Step 1: Last LLM message content
+    last_message = messages[-1]
+    if not isinstance(last_message, AIMessage):
+        print("Last message was not AI message. Returning state as-is")
+        return state
+    llm_text = _get_ai_message_text(last_message)
+    # Step 2: Find the original user query content
+    original_user_message = next(
+        (m for m in reversed(messages[:-1]) if isinstance(m, HumanMessage)), None
+    )
+    user_text = original_user_message.content if original_user_message else ""
+    # Step 3: Build validation prompt (only SystemMessage + pure text)
+    validation_prompt = [
+        SystemMessage(content=_validator_prompt_text),
+        HumanMessage(
+            content=f"Original user query:\n{user_text}\n\nLLM response:\n{llm_text}"
+        ),
+    ]
+    # Step 4: Invoke LLM without tools
+    response = _llm_without_tools.invoke(validation_prompt)
+    # Step 5: Replace old AI message with validated one
+    state["messages"] = messages[:-1] + [response]
+    return state

sanatan_assistant.py CHANGED Viewed

@@ -32,12 +32,10 @@ You must answer the question using **only** the content from *{collection_name}*
 - Do **not** bring in information from **any other scripture or source**, or from prior knowledge, even if the answer seems obvious or well-known.
 - Do **not** quote any Sanskrit/Tamil verses unless they appear **explicitly** in the provided context.
 - Do **not** use verse numbers or line references unless clearly mentioned in the context.
-- If the answer cannot be found in the context, clearly say:
-  **"I do not have enough information from the {collection_name} to answer this."**
 If the answer is not directly stated in the verses but is present in explanatory notes within the context, you may interpret — but **explicitly mention that it is an interpretation**.
-If the user query is not small talk, use the following response format (in Markdown):
 ### 🧾 Answer
 - Present a brief summary of your response in concise **English**.
@@ -54,10 +52,22 @@ If the user query is not small talk, use the following response format (in Markd
 ### 🔗 Reference Link(s)
 - Provide reference link(s) (`html_url`) if one is available in the context.
-### 📜 Native Verse(s)
-- Quote the **original** native verse(s) from the context without any **translation, transliteration**, or **interpretation**.
-- Do **not** include **any English text** in this section. Only show the Sanskrit/Tamil verses as-is from the context.
-- Do **not repeat these verses** in the translation section — just align the relevant transliteration and translation in the following sections.
 ### 📜 English Transliteration
 - For each verse above, provide the **matching English transliteration**.

 - Do **not** bring in information from **any other scripture or source**, or from prior knowledge, even if the answer seems obvious or well-known.
 - Do **not** quote any Sanskrit/Tamil verses unless they appear **explicitly** in the provided context.
 - Do **not** use verse numbers or line references unless clearly mentioned in the context.
 If the answer is not directly stated in the verses but is present in explanatory notes within the context, you may interpret — but **explicitly mention that it is an interpretation**.
+If the answer WAS indeed found in the context, use the following response format (in Markdown) othereise clearly state **"I do not have enough information from the {collection_name} to answer this."**
 ### 🧾 Answer
 - Present a brief summary of your response in concise **English**.
 ### 🔗 Reference Link(s)
 - Provide reference link(s) (`html_url`) if one is available in the context.
+### 📜 Native Verse(s) - Original
+- Include the original native verses as-is
+### 📜 Native Verse(s) - Sanitized
+- Task: Sanitize the native verses **without adding, removing, or inventing text**. Only fix obvious encoding or typographical errors.
+- Sanitization rules:
+  1. Correct garbled Unicode characters.
+  2. Fix broken diacritics, pulli markers, vowel signs, and punctuation.
+  3. Preserve **original spacing, line breaks, and character order**.
+- Do not translate, transliterate, or interpret.
+- Do not hallucinate or generate new verses.
+- Output should only be the **cleaned, original verses**.
+- The output in this section **MUST** be in native script not english or transliterated english.
+> If you are unsure about a character, leave it as it is rather than guessing.
 ### 📜 English Transliteration
 - For each verse above, provide the **matching English transliteration**.

tools.py CHANGED Viewed

@@ -25,11 +25,9 @@ allowed_collections = [s["collection_name"] for s in SanatanConfig.scriptures]
 tool_search_db = StructuredTool.from_function(
     query,
     description=(
-        f"Do **NOT** use this tool if the user asks for specific verse numbers or pasuram numbers."
-        "This is the **PRIMARY** tool to use for most user queries about scripture."
         " Use this when the user asks **about themes, stories, ideas, emotions, or meanings** in the scriptures."
         " This tool uses semantic vector search and can understand context and meaning beyond keywords."
-        f" Only use other tools like metadata or literal search if the user explicitly asks for them."
         f" The collection_name must be one of: {', '.join(allowed_collections)}."
     ),
 )
@@ -70,7 +68,18 @@ tool_search_web = Tool(
     name="search_web", description="Search the web for information", func=search_web
 )
-tool_format_scripture_answer = StructuredTool.from_function(format_scripture_answer)
 tool_get_standardized_azhwar_names = StructuredTool.from_function(
     get_standardized_azhwar_names,

 tool_search_db = StructuredTool.from_function(
     query,
     description=(
+        f"Do **NOT** use this tool if the user asks for specific verse numbers or pasuram numbers."
         " Use this when the user asks **about themes, stories, ideas, emotions, or meanings** in the scriptures."
         " This tool uses semantic vector search and can understand context and meaning beyond keywords."
         f" The collection_name must be one of: {', '.join(allowed_collections)}."
     ),
 )
     name="search_web", description="Search the web for information", func=search_web
 )
+tool_format_scripture_answer = StructuredTool.from_function(
+    format_scripture_answer,
+    name="tool_format_scripture_answer",
+    description=(
+        """
+    Use this tool to generate a custom system prompt based on the scripture title, question, and query_tool_output.
+    This is especially useful when the user has asked a question about a scripture, and the relevant context has been fetched using the `query` tool.
+    The generated prompt will guide the assistant to respond using only that scripture’s content, with a clear format including Sanskrit/Tamil verses, English explanations, and source chapters.
+    Include a santized version of the original native text. Do ensure you dont lose any words from the native text when sanitizing.
+    """
+    ),
+)
 tool_get_standardized_azhwar_names = StructuredTool.from_function(
     get_standardized_azhwar_names,