Spaces:

vikramvasudevan
/

sanatan_ai

Running on CPU Upgrade

App Files Files Community

sanatan_ai / sanatan_assistant.py

vikramvasudevan

Upload folder using huggingface_hub

e8bcdf0 verified 2 months ago

raw

history blame

6.94 kB

	import logging
	from typing import Any, Literal
	from dotenv import load_dotenv
	from config import SanatanConfig
	from db import MetadataWhereClause, SanatanDatabase

	load_dotenv(override=True)
	logger = logging.getLogger(__name__)
	logger.setLevel(logging.INFO)

	sanatanDatabase = SanatanDatabase()
	sanatanConfig = SanatanConfig()
	allowedCollections = Literal[
	*[scripture["collection_name"] for scripture in sanatanConfig.scriptures]
	]
	allowedScriptureTitles = Literal[
	*[scripture["title"] for scripture in sanatanConfig.scriptures]
	]

	def format_scripture_answer(
	collection_name: allowedCollections, question: str, query_tool_output: str
	):
	"""
	Use this tool to generate a custom system prompt based on the scripture title, question, and query_tool_output.

	This is especially useful when the user has asked a question about a scripture, and the relevant context has been fetched using the `query` tool.

	The generated prompt will guide the assistant to respond using only that scripture’s content, with a clear format including Sanskrit/Tamil verses, English explanations, and source chapters.
	"""

	prompt = f"""You are a knowledgeable assistant on the scripture {collection_name}, well-versed in Sanskrit , English and Tamil.

	You must answer the question using only the content from {collection_name} provided in the context below.
	- Do not bring in information from any other scripture or source, or from prior knowledge, even if the answer seems obvious or well-known.
	- Do not quote any Sanskrit/Tamil verses unless they appear explicitly in the provided context.
	- Do not use verse numbers or line references unless clearly mentioned in the context.

	If the answer is not directly stated in the verses but is present in explanatory notes within the context, you may interpret — but explicitly mention that it is an interpretation.

	If the answer WAS indeed found in the context, use the following response format (in Markdown) othereise clearly state "I do not have enough information from the {collection_name} to answer this."

	### 🧾 Answer
	- Present a brief summary of your response in concise English.

	### 🕉️ Scripture
	- {sanatanConfig.get_scripture_by_collection(collection_name=collection_name)["title"]}

	### 🕮 Chapter Title
	- Mention the chapter(s) from which the references were taken. Use the field title here from the context if available. For example `TVM 1.8.3`

	### 🕮 Verse Number
	- Mention the verse number from which the references were taken.

	### 🔗 Reference Link(s)
	- Provide reference link(s) (`html_url`) if one is available in the context.

	### 📜 Native Verse(s) - Original
	- Include the original native verses as-is

	### 📜 Native Verse(s) - Sanitized

	- Task: Sanitize the native verses without adding, removing, or inventing text. Only fix obvious encoding or typographical errors.
	- Sanitization rules:
	1. Correct garbled Unicode characters.
	2. Fix broken diacritics, pulli markers, vowel signs, and punctuation.
	3. Preserve original spacing, line breaks, and character order.
	- Do not translate, transliterate, or interpret.
	- Do not hallucinate or generate new verses.
	- Output should only be the cleaned, original verses.
	- The output in this section MUST be in native script not english or transliterated english.
	> If you are unsure about a character, leave it as it is rather than guessing.


	### 📜 English Transliteration
	- For each verse above, provide the matching English transliteration.
	- Maintain the same order as the verses listed above.

	### 📜 English Translation
	- Provide the English meaning for each verse listed above.
	- Again, follow the same order.
	- Do not repeat the original verse here — just the translation.

	### 📜 Notes
	- Bullet any extra points or cross-references from explanatory notes only if present in the context.
	- Do not include anything that is not supported or implied in the context.

	⚠️ Do not duplicate content across sections.
	- Each section has a distinct purpose.
	- If a verse is shown in `📜 Supporting Verse(s)`, do not repeat it in the Translation section.
	- Only transliterations and meanings should appear in their respective sections.


	Question:
	{question}

	---

	Context:
	{query_tool_output}

	---

	Respond in Markdown format only. Ensure Sanskrit/Tamil verses are always clearly shown and translated. If a section does not apply (e.g. no verses), you may omit it.
	"""

	return prompt


	def query(
	collection_name: allowedCollections,
	query: str \| None = None,
	metadata_where_clause: MetadataWhereClause \| None = None,
	n_results=3,
	search_type: Literal["semantic", "literal", "random"] = "semantic",
	):
	"""
	Search a scripture collection.

	Parameters:
	- collection_name (str): The name of the scripture collection to search (use the exact name from the metadata configuration. ...
	- query (str): The search query - this is the semantic or literal query you want to search for. if you want to perform a random search or just want to search by metadata only, can be passed as None ..
	- metadata_where_clause: MetadataWhereClause - Set to None if no metadata filters are requested. Always set when user mentions a specific prabandham, azhwar, or any other known field from the configuration. Example: {\"prabandham_name\": \"Thiruvaimozhi\"}. use the `conditional_operator` to filter based on $and or $or conditions. use `groups` to combine multiple queries into one.
	- n_results (int): Number of results to return. Default is 3.
	- search_type: can be one of semantic, literal or random.

	Returns:
	- A list of matching results.
	"""
	logger.info(
	"%s Search: collection [%s] for [%s] \| metadata_where_clause=%s",
	search_type,
	collection_name,
	query,
	metadata_where_clause,
	)
	if search_type != "random" and metadata_where_clause is None and query is None:
	raise Exception(
	"Invalid input: when search type is not random, either metadata_where_clause or query should be provided"
	)
	try:
	if metadata_where_clause is not None:
	sanatanConfig.is_metadata_field_allowed(
	collection_name=collection_name,
	metadata_where_clause=metadata_where_clause,
	)
	except:
	raise

	response = sanatanDatabase.search(
	collection_name=collection_name,
	query=query,
	metadata_where_clause=metadata_where_clause,
	n_results=n_results,
	search_type=search_type,
	)

	return "\n\n".join(
	f"Document: {doc}\nMetadata: {meta}\nID: {id_}"
	for doc, meta, id_ in zip(
	response["documents"], response["metadatas"], response["ids"]
	)
	)