import logging from typing import Any, Literal from dotenv import load_dotenv from config import SanatanConfig from db import MetadataWhereClause, SanatanDatabase load_dotenv(override=True) logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) sanatanDatabase = SanatanDatabase() sanatanConfig = SanatanConfig() allowedCollections = Literal[ *[scripture["collection_name"] for scripture in sanatanConfig.scriptures] ] allowedScriptureTitles = Literal[ *[scripture["title"] for scripture in sanatanConfig.scriptures] ] def format_scripture_answer( collection_name: allowedCollections, question: str, query_tool_output: str ): """ Use this tool to generate a custom system prompt based on the scripture title, question, and query_tool_output. This is especially useful when the user has asked a question about a scripture, and the relevant context has been fetched using the `query` tool. The generated prompt will guide the assistant to respond using only that scriptureโ€™s content, with a clear format including Sanskrit/Tamil verses, English explanations, and source chapters. """ prompt = f"""You are a knowledgeable assistant on the scripture *{collection_name}*, well-versed in **Sanskrit** , **English** and **Tamil**. You must answer the question using **only** the content from *{collection_name}* provided in the context below. - Do **not** bring in information from **any other scripture or source**, or from prior knowledge, even if the answer seems obvious or well-known. - Do **not** quote any Sanskrit/Tamil verses unless they appear **explicitly** in the provided context. - Do **not** use verse numbers or line references unless clearly mentioned in the context. If the answer is not directly stated in the verses but is present in explanatory notes within the context, you may interpret โ€” but **explicitly mention that it is an interpretation**. If the answer WAS indeed found in the context, use the following response format (in Markdown) othereise clearly state **"I do not have enough information from the {collection_name} to answer this."** ### ๐Ÿงพ Answer - Present a brief summary of your response in concise **English**. ### ๐Ÿ•‰๏ธ Scripture - {sanatanConfig.get_scripture_by_collection(collection_name=collection_name)["title"]} ### ๐Ÿ•ฎ Chapter Title - Mention the chapter(s) from which the references were taken. Use the field *title* here from the context if available. For example `TVM 1.8.3` ### ๐Ÿ•ฎ Verse Number - Mention the *verse number* from which the references were taken. ### ๐Ÿ”— Reference Link(s) - Provide reference link(s) (`html_url`) if one is available in the context. ### ๐Ÿ“œ Native Verse(s) - Original - Include the original native verses as-is ### ๐Ÿ“œ Native Verse(s) - Sanitized - Task: Sanitize the native verses **without adding, removing, or inventing text**. Only fix obvious encoding or typographical errors. - Sanitization rules: 1. Correct garbled Unicode characters. 2. Fix broken diacritics, pulli markers, vowel signs, and punctuation. 3. Preserve **original spacing, line breaks, and character order**. - Do not translate, transliterate, or interpret. - Do not hallucinate or generate new verses. - Output should only be the **cleaned, original verses**. - The output in this section **MUST** be in native script not english or transliterated english. > If you are unsure about a character, leave it as it is rather than guessing. ### ๐Ÿ“œ English Transliteration - For each verse above, provide the **matching English transliteration**. - Maintain the **same order** as the verses listed above. ### ๐Ÿ“œ English Translation - Provide the **English meaning** for each verse listed above. - Again, follow the **same order**. - Do **not** repeat the original verse here โ€” just the translation. ### ๐Ÿ“œ Notes - Bullet any extra points or cross-references from explanatory notes **only if present in the context**. - Do **not** include anything that is not supported or implied in the context. โš ๏ธ Do **not duplicate content** across sections. - Each section has a distinct purpose. - If a verse is shown in `๐Ÿ“œ Supporting Verse(s)`, do **not** repeat it in the Translation section. - Only transliterations and meanings should appear in their respective sections. **Question:** {question} --- **Context:** {query_tool_output} --- Respond in **Markdown** format only. Ensure Sanskrit/Tamil verses are always clearly shown and translated. If a section does not apply (e.g. no verses), you may omit it. """ return prompt def query( collection_name: allowedCollections, query: str | None = None, metadata_where_clause: MetadataWhereClause | None = None, n_results=3, search_type: Literal["semantic", "literal", "random"] = "semantic", ): """ Search a scripture collection. Parameters: - collection_name (str): The name of the scripture collection to search (use the exact name from the metadata configuration. ... - query (str): The search query - this is the semantic or literal query you want to search for. if you want to perform a random search or just want to search by metadata only, can be passed as None .. - metadata_where_clause: MetadataWhereClause - Set to None if no metadata filters are requested. Always set when user mentions a specific prabandham, azhwar, or any other known field from the configuration. Example: {\"prabandham_name\": \"Thiruvaimozhi\"}. use the `conditional_operator` to filter based on $and or $or conditions. use `groups` to combine multiple queries into one. - n_results (int): Number of results to return. Default is 3. - search_type: can be one of semantic, literal or random. Returns: - A list of matching results. """ logger.info( "%s Search: collection [%s] for [%s] | metadata_where_clause=%s", search_type, collection_name, query, metadata_where_clause, ) if search_type != "random" and metadata_where_clause is None and query is None: raise Exception( "Invalid input: when search type is not random, either metadata_where_clause or query should be provided" ) try: if metadata_where_clause is not None: sanatanConfig.is_metadata_field_allowed( collection_name=collection_name, metadata_where_clause=metadata_where_clause, ) except: raise response = sanatanDatabase.search( collection_name=collection_name, query=query, metadata_where_clause=metadata_where_clause, n_results=n_results, search_type=search_type, ) return "\n\n".join( f"Document: {doc}\nMetadata: {meta}\nID: {id_}" for doc, meta, id_ in zip( response["documents"], response["metadatas"], response["ids"] ) )