Spaces:

vikramvasudevan
/

sanatan_ai

Running on CPU Upgrade

File size: 8,023 Bytes

import logging
from typing import Any, Literal
from dotenv import load_dotenv
from config import SanatanConfig
from db import MetadataWhereClause, SanatanDatabase

load_dotenv(override=True)
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)

sanatanDatabase = SanatanDatabase()
sanatanConfig = SanatanConfig()
allowedCollections = Literal[
    *[scripture["collection_name"] for scripture in sanatanConfig.scriptures]
]


def format_scripture_answer(
    collection_name: allowedCollections, question: str, query_tool_output: str
):
    """
    Use this tool to generate a custom system prompt based on the scripture title, question, and query_tool_output.

    This is especially useful when the user has asked a question about a scripture, and the relevant context has been fetched using the `query` tool.

    The generated prompt will guide the assistant to respond using only that scripture’s content, with a clear format including Sanskrit/Tamil verses, English explanations, and source chapters.
    """

    prompt = f"""You are a knowledgeable assistant on the scripture *{collection_name}*, well-versed in **Sanskrit** , **English** and **Tamil**.

You must answer the question using **only** the content from *{collection_name}* provided in the context below.  
- Do **not** bring in information from **any other scripture or source**, or from prior knowledge, even if the answer seems obvious or well-known.  
- Do **not** quote any Sanskrit/Tamil verses unless they appear **explicitly** in the provided context.  
- Do **not** use verse numbers or line references unless clearly mentioned in the context.  

If the answer is not directly stated in the verses but is present in explanatory notes within the context, you may interpret — but **explicitly mention that it is an interpretation**.

If the answer WAS indeed found in the context, use the following response format (in Markdown) othereise clearly state **"I do not have enough information from the {collection_name} to answer this."**

### 🧾 Answer  
- Present a brief summary of your response in concise **English**.  

### 🕉️ Scripture  
- {sanatanConfig.get_scripture_by_collection(collection_name=collection_name)["title"]}

### 🕮 Chapter Title  
- Mention the chapter(s) from which the references were taken.  Use the field *title* here from the context if available. For example `TVM 1.8.3`

### 🕮 Verse Number
- Mention the *verse number* from which the references were taken.  

### 🔗 Reference Link(s)
- Provide reference link(s) (`html_url`) if one is available in the context.

### 📜 Native Verse(s) - Original
- Include the original native verses as-is

### 📜 Native Verse(s) - Sanitized

- Task: Sanitize the native verses **without adding, removing, or inventing text**. Only fix obvious encoding or typographical errors.
- Sanitization rules:
  1. Correct garbled Unicode characters.
  2. Fix broken diacritics, pulli markers, vowel signs, and punctuation.
  3. Preserve **original spacing, line breaks, and character order**.
- Do not translate, transliterate, or interpret.
- Do not hallucinate or generate new verses.
- Output should only be the **cleaned, original verses**.
- The output in this section **MUST** be in native script not english or transliterated english.
> If you are unsure about a character, leave it as it is rather than guessing.


### 📜 English Transliteration  
- For each verse above, provide the **matching English transliteration**.  
- Maintain the **same order** as the verses listed above.

### 📜 English Translation  
- Provide the **English meaning** for each verse listed above.  
- Again, follow the **same order**.  
- Do **not** repeat the original verse here — just the translation.

### 📜 Notes  
- Bullet any extra points or cross-references from explanatory notes **only if present in the context**.  
- Do **not** include anything that is not supported or implied in the context.

⚠️ Do **not duplicate content** across sections.  
- Each section has a distinct purpose.  
- If a verse is shown in `📜 Supporting Verse(s)`, do **not** repeat it in the Translation section.  
- Only transliterations and meanings should appear in their respective sections.


**Question:**  
{question}

---

**Context:**  
{query_tool_output}

---

Respond in **Markdown** format only. Ensure Sanskrit/Tamil verses are always clearly shown and translated. If a section does not apply (e.g. no verses), you may omit it.
"""

    return prompt


def query(collection_name: allowedCollections, query: str, n_results=3):
    """
    Search a scripture collection.

    Parameters:
    - collection_name (str): The name of the scripture collection to search. ...
    - query (str): The search query.
    - n_results (int): Number of results to return. Default is 3.

    Returns:
    - A list of matching results.
    """
    logger.info("Semantic Search: Searching collection [%s] for [%s]", collection_name, query)
    response = sanatanDatabase.search(
        collection_name=collection_name, query=query, n_results=n_results
    )

    return "\n\n".join(
        f"Document: {doc}\nMetadata: {meta}\nID: {id_}"
        for doc, meta, id_ in zip(
            response["documents"], response["metadatas"], response["ids"]
        )
    )

def query_by_metadata_field(
    collection_name: allowedCollections,
    query: str,
    metadata_where_clause : MetadataWhereClause,
    n_results=3,
):
    """
    Search a scripture collection by metadata. Do NOT use this for semantic search. Only use when a specific metadata field is provided.

    Parameters:
    - collection_name (str): The name of the scripture collection to search. ...
    - query (str): The search query.
    - metadata_where_clause: the filter which is an array of the following type
        - metadata_field (str) : The name of the metadata field. e.g. azhwar_name
        - metadata_search_operator (str) : The search operator e.g. $eq or $in. DO NOT use $regex.
        - metadata_value : Value to search for can be any primitive datatype like str or int (or a list[str] if metadata_search_operator = '$in'). for e.g. Thirumangai Azhwar or '2233' or 2233
    - n_results (int): Number of results to return. Default is 3.

    Returns:
    - A list of matching results.
    """
    logger.info("Searching collection [%s] for [%s]", collection_name, query)

    try:
        sanatanConfig.is_metadata_field_allowed(collection_name=collection_name, metadata_where_clause=metadata_where_clause)
    except:
        raise

    response = sanatanDatabase.search_by_metadata(
        collection_name=collection_name,
        query=query,
        metadata_where_clause=metadata_where_clause,
        n_results=n_results,
    )

    return "\n\n".join(
        f"Document: {doc}\nMetadata: {meta}\nID: {id_}"
        for doc, meta, id_ in zip(
            response["documents"], response["metadatas"], response["ids"]
        )
    )


def query_by_literal_text(
    collection_name: allowedCollections,
    literal_to_search_for: str,
    n_results=3,
):
    """
    Search a scripture collection by a literal. Do NOT use this for semantic search. Only use when the user specifically asks for literal search.

    Parameters:
    - collection_name (str): The name of the scripture collection to search. ...
    - literal_to_search_for (str): The search query.
    - n_results (int): Number of results to return. Default is 3.

    Returns:
    - A list of matching results.
    """
    logger.info("Performing literal search in collection [%s] for [%s]", collection_name, literal_to_search_for)


    response = sanatanDatabase.search_for_literal(
        collection_name=collection_name,
        literal_to_search_for=literal_to_search_for,
        n_results=n_results,
    )

    return "\n\n".join(
        f"Document: {doc}\nMetadata: {meta}\nID: {id_}"
        for doc, meta, id_ in zip(
            response["documents"], response["metadatas"], response["ids"]
        )
    )