Spaces:

vikramvasudevan
/

sanatan_ai

Running on CPU Upgrade

App Files Files Community

sanatan_ai / modules /nodes /validator.py

vikramvasudevan

Upload folder using huggingface_hub

63d1774 verified 2 months ago

raw

history blame contribute delete

5.83 kB

	from httpx import Timeout
	from langchain_core.messages import AIMessage, HumanMessage, SystemMessage
	from langchain_openai import ChatOpenAI
	from modules.nodes.state import ChatState


	_llm_without_tools = ChatOpenAI(
	model="gpt-4o-mini", temperature=0.1, max_retries=0, timeout=Timeout(60.0)
	)

	_validator_prompt_text = """
	You are a strict validator for LLM responses to scripture queries. DO NOT USE any tools for this.

	Your tasks:
	0. Treat your input as `original_llm_response`.
	1. Compare the original user query to the LLM’s answer.
	2. Identify the scripture context (e.g., Divya Prabandham, Bhagavad Gita, Upanishads, Ramayana, etc.).
	3. Based on the scripture context, dynamically choose the appropriate entity columns for validation:
	- Divya Prabandham → azhwar, prabandham, location/deity
	- Bhagavad Gita → chapter, verse number(s), speaker, listener
	- Upanishads → section, mantra number, rishi, deity
	- Ramayana/Mahabharata → book/kanda, section/sarga, character(s), location
	- Other → pick the 3–4 most relevant contextual entities from the scripture’s metadata.
	4. Verify (from `original_llm_response`):
	- Correct verse number(s)
	- Keyword/context match
	- All scripture-specific entity fields
	- Native verse text quality
	- Relevance of the response with respect to the question asked by the user.
	5. Repair any garbled Tamil/Sanskrit characters in the verse:
	- Restore correct letters, diacritics, and punctuation.
	- Replace broken Unicode with proper characters.
	- Correct vowel signs, consonants, and pulli markers.
	- Preserve original spacing and line breaks.
	The repaired version is `fixed_llm_response`.

	6. Evaluate your `Confidence` as an integer between 0 to 100(no percentage sign). Confidence-based display rule:
	- If `Confidence` < 75:
	- Show this message upfront:
	#### Confidence score: {{Confidence}}%
	7. Formatting rules for output:

	<!-- Step 1 – Repaired LLM Response in Markdown: -->
	<!-- BEGIN_MARKDOWN -->
	{{fixed_llm_response}}
	<!-- END_MARKDOWN -->

	<!-- Step 2 – Validation Table: -->
	<div style="font-size: small; opacity: 0.6;">
	<hr>
	<b>Original user query:</b> {{original_user_query}}

	<table border="1" cellpadding="4" cellspacing="0" style="border-collapse: collapse; width: 100%;">
	<tr>
	<th>Parameter</th>
	<th>Expected</th>
	<th>Found</th>
	<th>Match?</th>
	</tr>
	<tr>
	<td>verse number(s)</td>
	<td>{{requested_verse_numbers}}</td>
	<td>{{found_verse_numbers}}</td>
	<td>{{match_status_for_verse}}</td>
	</tr>
	<tr>
	<td>keyword/context</td>
	<td>{{requested_keywords}}</td>
	<td>{{found_keywords}}</td>
	<td>{{match_status_for_keyword}}</td>
	</tr>
	{{dynamic_entity_rows}}
	<tr>
	<td>native verse text</td>
	<td style="white-space: normal; word-break: break-word; word-wrap: break-word;">{{original_native_text_100_characters}}</td>
	<td style="white-space: normal; word-break: break-word; word-wrap: break-word;">{{cleaned_native_text_100_characters}}</td>
	<td>{{garbled_fix_status}}</td>
	</tr>
	</table>

	<p><b>Verdict:</b> {{Verdict}}<br>
	<b>Confidence score:</b> {{Confidence}}% – {{Justification}}<br>
	<span style="background-color:{{badge_color_code}}; color:white; padding:2px 6px; border-radius:4px;">{{badge_emoji}}</span></p>
	</div>

	---
	Where:
	- `{{dynamic_entity_rows}}` is context-specific entity rows.
	- `{{cleaned_native_text}}` must be from the repaired `fixed_llm_response` (if Confidence ≥ 75).
	- ✅, ❌, ⚠️ remain for matches.
	- Hidden markers (`<!-- BEGIN_MARKDOWN -->`) prevent them from rendering as visible text.
	- Always wrap verse text so it doesn’t overflow horizontally.
	"""


	def _get_ai_message_text(ai_msg):
	# Try different common attributes where LangChain might store text
	if hasattr(ai_msg, "content") and ai_msg.content:
	return ai_msg.content
	if hasattr(ai_msg, "message") and getattr(ai_msg, "message"):
	return ai_msg.message
	# fallback to additional_kwargs
	return ai_msg.additional_kwargs.get("content", "")


	def validatorNode(state: ChatState) -> ChatState:
	# Dummy: just pass through, but create a final AIMessage for streaming
	# state["messages"] = state.get("messages", [])
	# last_ai_msg = state["messages"][-1] if state["messages"] else AIMessage(content="")
	# # tag it so chat_streaming sees it as final_node
	# last_ai_msg.langgraph_node = "validator"
	return state

	messages = state.get("messages", [])
	if not messages:
	print("No messages. Returning state as-is")
	return state

	# Step 1: Last LLM message content
	last_message = messages[-1]
	if not isinstance(last_message, AIMessage):
	print("Last message was not AI message. Returning state as-is")
	return state

	llm_text = _get_ai_message_text(last_message)

	# Step 2: Find the original user query content
	original_user_message = next(
	(m for m in reversed(messages[:-1]) if isinstance(m, HumanMessage)), None
	)
	user_text = original_user_message.content if original_user_message else ""

	# Step 3: Build validation prompt (only SystemMessage + pure text)
	validation_prompt = [
	SystemMessage(content=_validator_prompt_text),
	HumanMessage(
	content=f"Original user query:\n{user_text}\n\nLLM response:\n{llm_text}"
	),
	]

	# Step 4: Invoke LLM without tools
	response = _llm_without_tools.invoke(validation_prompt)

	# Step 5: Replace old AI message with validated one
	state["messages"] = messages[:-1] + [response]
	return state