File size: 8,023 Bytes
fd1b271
bbb5184
fd1b271
 
d434239
fd1b271
 
 
 
 
 
6a252fb
fd1b271
6a252fb
fd1b271
 
 
210796c
d023803
210796c
fd1b271
 
 
 
 
210796c
fd1b271
 
d023803
fd1b271
d023803
fd1b271
210796c
fd1b271
 
 
 
63d1774
fd1b271
 
06f448a
fd1b271
d4b0094
6a252fb
fd1b271
9879992
06f448a
fd1b271
9879992
d023803
 
9879992
d023803
 
63d1774
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d023803
2919500
 
 
fd1b271
9879992
2919500
 
 
fd1b271
9879992
fd1b271
 
 
2919500
 
 
 
 
 
fd1b271
 
 
 
 
 
 
 
 
 
210796c
fd1b271
 
 
 
 
f3f0477
fd1b271
 
 
 
 
 
f3f0477
fd1b271
 
 
 
7290ba6
fd1b271
 
 
210796c
 
 
 
 
 
 
bbb5184
845dddf
 
 
d434239
f3f0477
845dddf
 
 
 
 
 
 
d434239
 
 
 
f3f0477
845dddf
 
 
 
 
d023803
 
d434239
d023803
 
 
845dddf
 
 
d434239
845dddf
 
 
 
 
 
 
 
 
c6893be
 
 
 
 
f3f0477
c6893be
 
 
 
 
 
 
f3f0477
c6893be
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
import logging
from typing import Any, Literal
from dotenv import load_dotenv
from config import SanatanConfig
from db import MetadataWhereClause, SanatanDatabase

load_dotenv(override=True)
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)

sanatanDatabase = SanatanDatabase()
sanatanConfig = SanatanConfig()
allowedCollections = Literal[
    *[scripture["collection_name"] for scripture in sanatanConfig.scriptures]
]


def format_scripture_answer(
    collection_name: allowedCollections, question: str, query_tool_output: str
):
    """
    Use this tool to generate a custom system prompt based on the scripture title, question, and query_tool_output.

    This is especially useful when the user has asked a question about a scripture, and the relevant context has been fetched using the `query` tool.

    The generated prompt will guide the assistant to respond using only that scriptureโ€™s content, with a clear format including Sanskrit/Tamil verses, English explanations, and source chapters.
    """

    prompt = f"""You are a knowledgeable assistant on the scripture *{collection_name}*, well-versed in **Sanskrit** , **English** and **Tamil**.

You must answer the question using **only** the content from *{collection_name}* provided in the context below.  
- Do **not** bring in information from **any other scripture or source**, or from prior knowledge, even if the answer seems obvious or well-known.  
- Do **not** quote any Sanskrit/Tamil verses unless they appear **explicitly** in the provided context.  
- Do **not** use verse numbers or line references unless clearly mentioned in the context.  

If the answer is not directly stated in the verses but is present in explanatory notes within the context, you may interpret โ€” but **explicitly mention that it is an interpretation**.

If the answer WAS indeed found in the context, use the following response format (in Markdown) othereise clearly state **"I do not have enough information from the {collection_name} to answer this."**

### ๐Ÿงพ Answer  
- Present a brief summary of your response in concise **English**.  

### ๐Ÿ•‰๏ธ Scripture  
- {sanatanConfig.get_scripture_by_collection(collection_name=collection_name)["title"]}

### ๐Ÿ•ฎ Chapter Title  
- Mention the chapter(s) from which the references were taken.  Use the field *title* here from the context if available. For example `TVM 1.8.3`

### ๐Ÿ•ฎ Verse Number
- Mention the *verse number* from which the references were taken.  

### ๐Ÿ”— Reference Link(s)
- Provide reference link(s) (`html_url`) if one is available in the context.

### ๐Ÿ“œ Native Verse(s) - Original
- Include the original native verses as-is

### ๐Ÿ“œ Native Verse(s) - Sanitized

- Task: Sanitize the native verses **without adding, removing, or inventing text**. Only fix obvious encoding or typographical errors.
- Sanitization rules:
  1. Correct garbled Unicode characters.
  2. Fix broken diacritics, pulli markers, vowel signs, and punctuation.
  3. Preserve **original spacing, line breaks, and character order**.
- Do not translate, transliterate, or interpret.
- Do not hallucinate or generate new verses.
- Output should only be the **cleaned, original verses**.
- The output in this section **MUST** be in native script not english or transliterated english.
> If you are unsure about a character, leave it as it is rather than guessing.


### ๐Ÿ“œ English Transliteration  
- For each verse above, provide the **matching English transliteration**.  
- Maintain the **same order** as the verses listed above.

### ๐Ÿ“œ English Translation  
- Provide the **English meaning** for each verse listed above.  
- Again, follow the **same order**.  
- Do **not** repeat the original verse here โ€” just the translation.

### ๐Ÿ“œ Notes  
- Bullet any extra points or cross-references from explanatory notes **only if present in the context**.  
- Do **not** include anything that is not supported or implied in the context.

โš ๏ธ Do **not duplicate content** across sections.  
- Each section has a distinct purpose.  
- If a verse is shown in `๐Ÿ“œ Supporting Verse(s)`, do **not** repeat it in the Translation section.  
- Only transliterations and meanings should appear in their respective sections.


**Question:**  
{question}

---

**Context:**  
{query_tool_output}

---

Respond in **Markdown** format only. Ensure Sanskrit/Tamil verses are always clearly shown and translated. If a section does not apply (e.g. no verses), you may omit it.
"""

    return prompt


def query(collection_name: allowedCollections, query: str, n_results=3):
    """
    Search a scripture collection.

    Parameters:
    - collection_name (str): The name of the scripture collection to search. ...
    - query (str): The search query.
    - n_results (int): Number of results to return. Default is 3.

    Returns:
    - A list of matching results.
    """
    logger.info("Semantic Search: Searching collection [%s] for [%s]", collection_name, query)
    response = sanatanDatabase.search(
        collection_name=collection_name, query=query, n_results=n_results
    )

    return "\n\n".join(
        f"Document: {doc}\nMetadata: {meta}\nID: {id_}"
        for doc, meta, id_ in zip(
            response["documents"], response["metadatas"], response["ids"]
        )
    )

def query_by_metadata_field(
    collection_name: allowedCollections,
    query: str,
    metadata_where_clause : MetadataWhereClause,
    n_results=3,
):
    """
    Search a scripture collection by metadata. Do NOT use this for semantic search. Only use when a specific metadata field is provided.

    Parameters:
    - collection_name (str): The name of the scripture collection to search. ...
    - query (str): The search query.
    - metadata_where_clause: the filter which is an array of the following type
        - metadata_field (str) : The name of the metadata field. e.g. azhwar_name
        - metadata_search_operator (str) : The search operator e.g. $eq or $in. DO NOT use $regex.
        - metadata_value : Value to search for can be any primitive datatype like str or int (or a list[str] if metadata_search_operator = '$in'). for e.g. Thirumangai Azhwar or '2233' or 2233
    - n_results (int): Number of results to return. Default is 3.

    Returns:
    - A list of matching results.
    """
    logger.info("Searching collection [%s] for [%s]", collection_name, query)

    try:
        sanatanConfig.is_metadata_field_allowed(collection_name=collection_name, metadata_where_clause=metadata_where_clause)
    except:
        raise

    response = sanatanDatabase.search_by_metadata(
        collection_name=collection_name,
        query=query,
        metadata_where_clause=metadata_where_clause,
        n_results=n_results,
    )

    return "\n\n".join(
        f"Document: {doc}\nMetadata: {meta}\nID: {id_}"
        for doc, meta, id_ in zip(
            response["documents"], response["metadatas"], response["ids"]
        )
    )


def query_by_literal_text(
    collection_name: allowedCollections,
    literal_to_search_for: str,
    n_results=3,
):
    """
    Search a scripture collection by a literal. Do NOT use this for semantic search. Only use when the user specifically asks for literal search.

    Parameters:
    - collection_name (str): The name of the scripture collection to search. ...
    - literal_to_search_for (str): The search query.
    - n_results (int): Number of results to return. Default is 3.

    Returns:
    - A list of matching results.
    """
    logger.info("Performing literal search in collection [%s] for [%s]", collection_name, literal_to_search_for)


    response = sanatanDatabase.search_for_literal(
        collection_name=collection_name,
        literal_to_search_for=literal_to_search_for,
        n_results=n_results,
    )

    return "\n\n".join(
        f"Document: {doc}\nMetadata: {meta}\nID: {id_}"
        for doc, meta, id_ in zip(
            response["documents"], response["metadatas"], response["ids"]
        )
    )