Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
| import logging | |
| from typing import Any, Literal | |
| from dotenv import load_dotenv | |
| from config import SanatanConfig | |
| from db import MetadataWhereClause, SanatanDatabase | |
| load_dotenv(override=True) | |
| logger = logging.getLogger(__name__) | |
| logger.setLevel(logging.INFO) | |
| sanatanDatabase = SanatanDatabase() | |
| sanatanConfig = SanatanConfig() | |
| allowedCollections = Literal[ | |
| *[scripture["collection_name"] for scripture in sanatanConfig.scriptures] | |
| ] | |
| allowedScriptureTitles = Literal[ | |
| *[scripture["title"] for scripture in sanatanConfig.scriptures] | |
| ] | |
| def format_scripture_answer( | |
| collection_name: allowedCollections, question: str, query_tool_output: str | |
| ): | |
| """ | |
| Use this tool to generate a custom system prompt based on the scripture title, question, and query_tool_output. | |
| This is especially useful when the user has asked a question about a scripture, and the relevant context has been fetched using the `query` tool. | |
| The generated prompt will guide the assistant to respond using only that scriptureโs content, with a clear format including Sanskrit/Tamil verses, English explanations, and source chapters. | |
| """ | |
| prompt = f"""You are a knowledgeable assistant on the scripture *{collection_name}*, well-versed in **Sanskrit** , **English** and **Tamil**. | |
| You must answer the question using **only** the content from *{collection_name}* provided in the context below. | |
| - Do **not** bring in information from **any other scripture or source**, or from prior knowledge, even if the answer seems obvious or well-known. | |
| - Do **not** quote any Sanskrit/Tamil verses unless they appear **explicitly** in the provided context. | |
| - Do **not** use verse numbers or line references unless clearly mentioned in the context. | |
| If the answer is not directly stated in the verses but is present in explanatory notes within the context, you may interpret โ but **explicitly mention that it is an interpretation**. | |
| If the answer WAS indeed found in the context, use the following response format (in Markdown) othereise clearly state **"I do not have enough information from the {collection_name} to answer this."** | |
| ### ๐งพ Answer | |
| - Present a brief summary of your response in concise **English**. | |
| ### ๐๏ธ Scripture | |
| - {sanatanConfig.get_scripture_by_collection(collection_name=collection_name)["title"]} | |
| ### ๐ฎ Chapter Title | |
| - Mention the chapter(s) from which the references were taken. Use the field *title* here from the context if available. For example `TVM 1.8.3` | |
| ### ๐ฎ Verse Number | |
| - Mention the *verse number* from which the references were taken. | |
| ### ๐ Reference Link(s) | |
| - Provide reference link(s) (`html_url`) if one is available in the context. | |
| ### ๐ Native Verse(s) - Original | |
| - Include the original native verses as-is | |
| ### ๐ Native Verse(s) - Sanitized | |
| - Task: Sanitize the native verses **without adding, removing, or inventing text**. Only fix obvious encoding or typographical errors. | |
| - Sanitization rules: | |
| 1. Correct garbled Unicode characters. | |
| 2. Fix broken diacritics, pulli markers, vowel signs, and punctuation. | |
| 3. Preserve **original spacing, line breaks, and character order**. | |
| - Do not translate, transliterate, or interpret. | |
| - Do not hallucinate or generate new verses. | |
| - Output should only be the **cleaned, original verses**. | |
| - The output in this section **MUST** be in native script not english or transliterated english. | |
| > If you are unsure about a character, leave it as it is rather than guessing. | |
| ### ๐ English Transliteration | |
| - For each verse above, provide the **matching English transliteration**. | |
| - Maintain the **same order** as the verses listed above. | |
| ### ๐ English Translation | |
| - Provide the **English meaning** for each verse listed above. | |
| - Again, follow the **same order**. | |
| - Do **not** repeat the original verse here โ just the translation. | |
| ### ๐ Notes | |
| - Bullet any extra points or cross-references from explanatory notes **only if present in the context**. | |
| - Do **not** include anything that is not supported or implied in the context. | |
| โ ๏ธ Do **not duplicate content** across sections. | |
| - Each section has a distinct purpose. | |
| - If a verse is shown in `๐ Supporting Verse(s)`, do **not** repeat it in the Translation section. | |
| - Only transliterations and meanings should appear in their respective sections. | |
| **Question:** | |
| {question} | |
| --- | |
| **Context:** | |
| {query_tool_output} | |
| --- | |
| Respond in **Markdown** format only. Ensure Sanskrit/Tamil verses are always clearly shown and translated. If a section does not apply (e.g. no verses), you may omit it. | |
| """ | |
| return prompt | |
| def query( | |
| collection_name: allowedCollections, | |
| query: str | None = None, | |
| metadata_where_clause: MetadataWhereClause | None = None, | |
| n_results=3, | |
| search_type: Literal["semantic", "literal", "random"] = "semantic", | |
| ): | |
| """ | |
| Search a scripture collection. | |
| Parameters: | |
| - collection_name (str): The name of the scripture collection to search (use the exact name from the metadata configuration. ... | |
| - query (str): The search query - this is the semantic or literal query you want to search for. if you want to perform a random search or just want to search by metadata only, can be passed as None .. | |
| - metadata_where_clause: MetadataWhereClause - Set to None if no metadata filters are requested. Always set when user mentions a specific prabandham, azhwar, or any other known field from the configuration. Example: {\"prabandham_name\": \"Thiruvaimozhi\"}. use the `conditional_operator` to filter based on $and or $or conditions. use `groups` to combine multiple queries into one. | |
| - n_results (int): Number of results to return. Default is 3. | |
| - search_type: can be one of semantic, literal or random. | |
| Returns: | |
| - A list of matching results. | |
| """ | |
| logger.info( | |
| "%s Search: collection [%s] for [%s] | metadata_where_clause=%s", | |
| search_type, | |
| collection_name, | |
| query, | |
| metadata_where_clause, | |
| ) | |
| if search_type != "random" and metadata_where_clause is None and query is None: | |
| raise Exception( | |
| "Invalid input: when search type is not random, either metadata_where_clause or query should be provided" | |
| ) | |
| try: | |
| if metadata_where_clause is not None: | |
| sanatanConfig.is_metadata_field_allowed( | |
| collection_name=collection_name, | |
| metadata_where_clause=metadata_where_clause, | |
| ) | |
| except: | |
| raise | |
| response = sanatanDatabase.search( | |
| collection_name=collection_name, | |
| query=query, | |
| metadata_where_clause=metadata_where_clause, | |
| n_results=n_results, | |
| search_type=search_type, | |
| ) | |
| return "\n\n".join( | |
| f"Document: {doc}\nMetadata: {meta}\nID: {id_}" | |
| for doc, meta, id_ in zip( | |
| response["documents"], response["metadatas"], response["ids"] | |
| ) | |
| ) | |