sanatan_ai / config.py
vikramvasudevan's picture
Upload folder using huggingface_hub
12446b3 verified
raw
history blame
25.6 kB
from metadata import MetadataWhereClause
from typing import List, Dict
class SanatanConfig:
# shuklaYajurVedamPdfPath: str = "./data/shukla-yajur-veda.pdf"
# shuklaYajurVedamSmallPdfPath: str = "./data/shukla-yajur-veda-small.pdf"
# vishnuPuranamPdfPath = "./data/vishnu_puranam.pdf"
# datastores = [{"name": "sanskrit_001", "dbStorePath": "./chromadb-store"}, {"name": "nalayiram", "dbStorePath": "./chromadb-store-4000"}]
dbStorePath: str = "./chromadb-store"
# shuklaYajurVedamCollectionName: str = "shukla_yajur_vedam"
# vishnuPuranamCollectionName: str = "vishnu_puranam"
# shuklaYajurVedamOutputDir = "./output/shukla_yajur_vedam"
# vishnuPuranamOutputDir = "./output/vishnu_puranam"
scriptures = [
{
"name": "vishnu_puranam",
"title": "Sri Vishnu Puranam",
"output_dir": "./output/vishnu_puranam",
"collection_name": "vishnu_puranam_openai",
"collection_embedding_fn": "openai",
"unit": "page",
"metadata_fields": [
{
"name": "file",
"datatype": "str",
"desc": "name of the file from which the information was extracted",
},
{"name": "num_chars", "datatype": "str"},
{"name": "page", "datatype": "int"},
],
"pdf_path": "./data/vishnu_puranam.pdf",
"source": "https://dn720005.ca.archive.org/0/items/vishnu-purana-sanskrit-english-ocr/VISHNU-PURANA-Sanskrit-English-OCR.pdf",
"language": "san+eng",
"example_labels": [
"Vishnu's form",
"About the five elements",
"About Garuda",
"Weapons of Vishnu",
"Vishnu's form (all scriptures)",
],
"examples": [
"describe Vishnu's form as defined in vishnu puranam",
"five elements and their significance as per vishnu puranam",
"What is the significance of Garuda? Show some verses from vishnu puranam that describe him.",
"What weapons does Vishnu hold as mentioned in vishnu puranam?",
"How is the form of Vishnu described across the scriptures?",
],
"llm_hints": [],
},
{
"name": "shukla_yajur_vedam",
"title": "Shukla Yajur Vedam",
"output_dir": "./output/shukla_yajur_vedam",
"collection_name": "shukla_yajur_vedam",
"unit": "page",
"metadata_fields": [
{
"name": "file",
"datatype": "str",
"desc": "name of the file from which the information was extracted",
},
{"name": "num_chars", "datatype": "str"},
{"name": "page", "datatype": "int"},
],
"pdf_path": "./data/shukla-yajur-veda.pdf",
"source": "https://www.thearyasamaj.org/uploads/book/2014/04/R1sSjG_eLb_sub_406_yajurveda.pdf",
"language": "san+eng",
"example_labels": [
"About Vedam",
"About the five elements",
"About Brahma",
],
"examples": [
"Gist of Shukla Yajur Vedam. Give me some sanskrit verses.",
"What is the significance of fire and water. show some sanskrit verses",
"Brahma",
],
"llm_hints": [],
},
{
"name": "bhagavat_gita",
"title": "Bhagavat Gita",
"output_dir": "./output/bhagavat_gita",
"collection_name": "bhagavat_gita_openai",
"collection_embedding_fn": "openai",
"unit": "page",
"metadata_fields": [
{
"name": "file",
"datatype": "str",
"desc": "name of the file from which the information was extracted",
},
{"name": "num_chars", "datatype": "str"},
{"name": "page", "datatype": "int"},
],
"pdf_path": "./data/bhagavat_gita.pdf",
"source": "https://dn790006.ca.archive.org/0/items/in.gov.ignca.279/279_text.pdf",
"language": "san+eng",
"example_labels": [
"About Arjuna",
"About Karma",
"About birth and death",
"About the battle field",
"About Krishna's form",
"Krishna's Teachings",
],
"examples": [
"Show some verses where Krishna advises Arjuna",
"What does Krishna say about Karma",
"What does Krishna say about birth and death",
"describe the battle field",
"How did Arjuna respond upon witnessing Krishna’s Vishwarupa?"
"What teachings did Krishna share in the Gita?",
],
"llm_hints": [],
},
{
"name": "valmiki_ramayanam",
"title": "Valmiki Ramayanam",
"output_dir": "./output/valmiki_ramayanam",
"collection_name": "valmiki_ramayanam_openai",
"collection_embedding_fn": "openai",
"unit": "page",
"metadata_fields": [
{
"name": "file",
"datatype": "str",
"desc": "name of the file from which the information was extracted",
},
{"name": "num_chars", "datatype": "str"},
{"name": "page", "datatype": "int"},
],
"pdf_path": "./data/valmiki_ramayanam.pdf",
"source": "https://ia800509.us.archive.org/28/items/valmiki-ramayana-gita-press-english/Valmiki%20Ramayana%20Gita%20Press%20English.pdf",
"language": "san+eng",
"example_labels": [
"About Jatayu",
"About Hanuman",
"About Vali",
"About Sita",
"About Ravana",
"A slokam by name",
"Vibheeshana sharanagathi slokam",
],
"examples": [
"What is the significance of Jatayu? show some sanskrit verses to support the argument",
"Show some verses where Hanuman is mentioned",
"How did Rama kill Vali",
"How was Sita abducted",
"How did Rama kill Ravana?",
"explain sakrudeva prapannaaya shlokam in ramayana",
"give the shlokam in ramayanam that vibheeshana uses to perform sharanagathi to rama, give the sanskrit shlokam and its meaning",
],
"llm_hints": [],
},
{
"name": "vishnu_sahasranamam",
"title": "Vishnu Sahasranamam",
"output_dir": "./output/vishnu_sahasranamam",
"collection_name": "vishnu_sahasranamam_openai",
"collection_embedding_fn": "openai",
"unit": "verse",
"metadata_fields": [
{"name": "chapter", "datatype": "str"},
{"name": "page_number", "datatype": "int"},
{
"name": "sanskrit",
"datatype": "str",
"desc": "The original sloka in sanskrit.",
},
{
"name": "translation",
"datatype": "str",
"desc": "The english translation.",
},
{
"name": "transliteration",
"datatype": "str",
"desc": "The english transliteration.",
},
{
"name": "verse",
"datatype": "int",
"desc": "The verse number of the sloka.",
},
],
"pdf_path": "./data/vishnu_sahasranamam.pdf",
"source": "https://www.swami-krishnananda.org/vishnu/Sri_Vishnu_Sahasranama_Stotram.pdf",
"language": "san+eng",
"example_labels": ["Vanamali", "1000 names", "Sanskrit text search"],
"examples": [
"Vanamali",
"Show some of the 1000 names of Vishnu along with their meaning",
"show the verse that begins with शुक्लाम्बरधरं",
],
"llm_hints": [],
},
{
"name": "divya_prabandham",
"title": "4000 Divya Prabandham",
"output_dir": "./output/divya_prabandham",
"collection_name": "divya_prabandham",
"collection_embedding_fn": "openai",
"unit": "verse",
"metadata_fields": [
{
"name": "prabandham_code",
"datatype": "str",
"description": "contains the short prabandham_code. e.g. `TPL` for `Thiruppallandu`",
},
{
"name": "prabandham_name",
"datatype": "str",
"description": "contains the prabandham name. e.g. `Thiruppallandu`",
},
{
"name": "azhwar_name",
"datatype": "str",
"description": "contains the azhwar name. e.g. `Thirumangai Azhwar`",
},
{
"name": "divya_desams",
"datatype": "str",
"description": "comma separated list of divya desams. e.g. Thiruneermalai,Thiruvallikkeni.",
},
# {"name": "html_url", "datatype": "str", "description" : "Reference link for the source"},
# {"name": "pasuram_en", "datatype": "str", "description" : "Transliteration of pasuram in english"},
# {"name": "pasuram_ta", "datatype": "str", "description" : "Pasuram lyrics in tamil"},
{
"name": "title",
"datatype": "str",
"description": (
"Exact title of a pasuram in one of the following formats:\n"
"1. '{prabandham_code} {decade}.{chapter}.{pasuram}' — use when the prabandham has decades.\n"
"2. '{prabandham_code} {chapter}.{pasuram}' — use when the prabandham does not have decades.\n\n"
"⚠️ Use this field ONLY when the user provides a specific prabandham and a relative verse number.\n"
"Examples of valid usage:\n"
"- User query: '3rd pasuram in the 8th Thiruvaimozhi of the 1st decade.'\n"
" → Convert to: '{prabandham_code} 1.8.3' and pass as `title` filter.\n"
"- User query: '2nd pasuram of chapter 5 in [Prabandham with no decades].'\n"
" → Convert to: '{prabandham_code} 5.2' and pass as `title` filter.\n"
"Do NOT use `title` for general queries or keyword searches — leave it empty in those cases."
),
},
{
"name": "verse",
"datatype": "int",
"is_unique" : True,
"description": (
"Absolute verse number or pasuram number. Each verse has a unique number."
# "Use it only when a specific prabandham name is NOT mentioned in the user query."
"For e.g. 'Give me pasuram 1176'"
),
},
# {"name": "wbw_ta", "datatype": "str", "description" : "Word by word meaning in tamil."},
{
"name": "decade",
"datatype": "int",
"description": (
"The decade (or `pathu` in Tamil) that this pasuram belongs to. decade is -1 when there is no associated decade."
),
},
{
"name": "chapter",
"datatype": "int",
"description": (
"chapter number of this pasuram. is -1 when there is no associated chapter number"
),
},
{
"name": "position_in_chapter",
"datatype": "int",
"description": (
"Relative verse number or pasuram number within a chapter."
"Use it only when a specific prabandham name is mentioned in the user query."
"For e.g. 'Give me the 5th pasuram from Thirupavai'"
),
},
],
"pdf_path": "./data/divya_prabandham.pdf",
"source": "https://uveda.org",
"language": "tamil",
"example_labels": [
"About the five elements",
"About Garuda",
"Pasuram about Krishna's Flute",
"Andal's pasuram",
"Specific Pasuram (absolute)",
"Pasuram by Azhwar",
"Specific pasuram(relative)",
"Decade and Chapter Search",
],
"examples": [
"five elements and their significance as defined in divya_prabandham",
"What is the significance of Garuda? Show some verses from divya prabandham that describe him.",
"Show me a pasuram that talks about how the animals and birds enjoy Krishna's flute playing.",
"Give me a pasuram by Andal",
"Show me Pasuram 1187 ",
"Show me a pasuram by Thondaradippodi azhwar",
"Give me the 2nd pasuram in the 3rd Thiruvaimozhi from the 2nd decade",
"Give me just a few words from the starting lines and reference links of all 11 pasurams from thiruvaimozhi 5th decade 4th chapter.",
],
"llm_hints": [
"If the user wishes to query at a decade or chapter level for a given prabandham, use the direct metadata query on the appropriate fields once instead of querying the tool multiple times for each pasuram from the chapter."
],
},
{
"name": "bhagavata_purana",
"title": "Bhagavatha Puranam",
"output_dir": "./output/bhagavata_purana",
"collection_name": "bhagavata_purana",
"unit": "page",
"metadata_fields": [
{
"name": "file",
"datatype": "str",
"desc": "name of the file from which the information was extracted",
},
{"name": "num_chars", "datatype": "str"},
{"name": "page", "datatype": "int"},
],
"pdf_path": "./data/bhagavata_purana.pdf",
"source": "https://dn790003.ca.archive.org/0/items/bhagavatapuranagitapress_201907/Bhagavata%20Purana%20-%20Gita%20Press_text.pdf",
"language": "san+eng",
"example_labels": ["Gajendra Moksham", "Prahalad"],
"examples": [
"State some verses that showcase the devotion of Gajendra the elephant",
"State some verses that showcase the devotion of Prahlada",
],
"llm_hints": [],
},
{
"name": "kamba_ramayanam_en",
"title": "Kamba Ramayanam (English)",
"output_dir": "./output/kamba_ramayanam",
"collection_name": "kamba_ramayanam_en",
"unit": "verse",
"metadata_fields": [
{
"name": "kandam",
"datatype": "str",
"description": "The name of the Kandam or the chapter.",
},
{
"name": "padalam_en",
"datatype": "str",
"description": "The name of the Padalam (Episode) in English.",
},
{
"name": "padalam_ta",
"datatype": "str",
"description": "The name of the Padalam (Episode) in Tamil.",
},
{"name": "page", "datatype": "int"},
{"name": "verse_number", "datatype": "int"},
],
"pdf_path": "./data/kamba_ramayanam.pdf",
"source": "https://www.hindupedia.com/images/1/13/Kamba_Ramayanam_I.pdf",
"language": "tamil",
"example_labels": [
"About Jatayu",
"About Hanuman",
"About Vali",
"About Sita",
"About Ravana",
],
"examples": [
"What is the significance of Jatayu? show some sanskrit verses to support the argument",
"Show some verses where Hanuman is mentioned",
"How did Rama kill Vali",
"How was Sita abducted",
"How did Rama kill Ravana?",
],
"llm_hints": [],
},
{
"name": "kamba_ramayanam",
"title": "Kamba Ramayanam (Tamil)",
"output_dir": "./output/kamba_ramayanam",
"collection_name": "kamba_ramayanam",
"unit": "chunk",
"metadata_fields": [
{
"name": "chunk_index",
"datatype": "int",
"description": "The index of the chunk",
},
{
"name": "filename",
"datatype": "str",
"description": "The name of the file.",
},
],
"pdf_path": "./data/kamba_ramayanam.pdf",
"source": "https://archive.org/details/vrajeshkumar_gmail_061/01-%E0%AE%AA%E0%AE%BE%E0%AE%B2%20%E0%AE%95%E0%AE%BE%E0%AE%A3%E0%AF%8D%E0%AE%9F%E0%AE%AE%E0%AF%8D/page/n15/mode/2up",
"language": "tamil",
"example_labels": [
"About Jatayu",
"About Hanuman",
"About Vali",
"About Sita",
"About Ravana",
],
"examples": [
"What is the significance of Jatayu? show some sanskrit verses to support the argument",
"Show some verses where Hanuman is mentioned",
"How did Rama kill Vali",
"How was Sita abducted",
"How did Rama kill Ravana?",
],
"llm_hints": [],
},
{
"name": "chathusloki",
"title": "Chathusloki by Sri Alavandar",
"output_dir": "./output/chathusloki",
"collection_name": "chathusloki",
"unit": "slokam",
"metadata_fields": [
{
"name": "sloka_number",
"datatype": "int",
"description": "The index of the sloka or verse",
},
{
"name": "meaning_short",
"datatype": "str",
"description": "A short meaning of the sanskrit verse in English.",
},
],
"pdf_path": "./data/chathusloki.pdf",
"source": "https://www.sadagopan.org/ebook/pdf/Chatusloki%20-%20VS.pdf",
"language": "san+eng",
"example_labels": ["Recite a sloka", "Commentary", "Role of Sridevi"],
"examples": [
"Recite the 1st sloka from Chathusloki",
"Show detailed commentary for sloka 2 from Chathusloki",
"What is the role of Sri Devi in the universe according to the Chathusloki?",
],
"llm_hints": [],
},
{
"name": "sri_stavam",
"title": "Sri Stavam by Sri Koorathazhwar",
"output_dir": "./output/sri_stavam",
"collection_name": "sri_stavam",
"unit": "slokam",
"metadata_fields": [
{
"name": "sloka_number",
"datatype": "int",
"description": "The index of the sloka or verse",
},
{
"name": "meaning_short",
"datatype": "str",
"description": "A short meaning of the sanskrit verse in English.",
},
{
"name": "sanskrit",
"datatype": "str",
"description": "Verse in sanskrit",
},
{
"name": "transliteration",
"datatype": "str",
"description": "Verse transliterated to English",
},
],
"pdf_path": "./data/sri_stavam.pdf",
"source": "https://www.sadagopan.org/ebook/pdf/Sri%20Stavam.pdf",
"language": "san+eng",
"example_labels": ["Recite a sloka", "Commentary", "Role of Sridevi"],
"examples": [
"Recite the 1st sloka from Sri Stavam",
"Show detailed commentary for sloka 2 from Sri Stavam",
"What is the role of Sri Devi in the universe according to the Sri Stavam?",
],
"llm_hints": [
"if the user asks for nth sloka, do a metadata search on the `verse` field."
],
},
{
"name": "yt_metadata",
"title": "Sampradayam in YouTube",
"output_dir": "./output/yt_metadata",
"collection_name": "yt_metadata",
"collection_embedding_fn": "openai",
"unit": "video",
"metadata_fields": [
{
"name": "video_id",
"datatype": "str",
"description": "The video id as in YouTube",
},
{
"name": "video_title",
"datatype": "str",
"description": "The title of the video as in YouTube",
},
{
"name": "description",
"datatype": "str",
"description": "Description as in YouTube",
},
{
"name": "channel_url",
"datatype": "str",
"description": "URL of the YouTube Channel",
},
{
"name": "channel_title",
"datatype": "str",
"description": "Title of the YouTube Channel",
},
],
"pdf_path": "./data/none.pdf",
"source": "https://youtube.com",
"language": "san+eng+tam",
"example_labels": ["Srirangam", "Pasuram video"],
"examples": [
"Show me YouTube videos that talk about Srirangam",
"Show me lyrics of 1st pasuram of 1st decade in the 4st Thiruvaimozhi. Also show the related youtube videos.",
],
"llm_hints": [
"if the user asks for YouTube videos, DO NOT do a web search, instead do a search on this collection."
],
},
]
def get_scripture_by_collection(self, collection_name: str):
return [
scripture
for scripture in self.scriptures
if scripture["collection_name"] == collection_name
][0]
def is_metadata_field_allowed(
self, collection_name: str, metadata_where_clause: MetadataWhereClause
):
scripture = self.get_scripture_by_collection(collection_name=collection_name)
allowed_fields = [field["name"] for field in scripture["metadata_fields"]]
def validate_clause(clause: MetadataWhereClause):
# validate direct filters
if clause.filters:
for f in clause.filters:
if f.metadata_field not in allowed_fields:
raise Exception(
f"metadata_field: [{f.metadata_field}] not allowed in collection [{collection_name}]. "
f"Here are the allowed fields with their descriptions: {scripture['metadata_fields']}"
)
# recurse into groups
if clause.groups:
for g in clause.groups:
validate_clause(g)
validate_clause(metadata_where_clause)
return True
def get_embedding_for_collection(self, collection_name: str):
scripture = self.get_scripture_by_collection(collection_name)
embedding_fn = "hf" # default is huggingface sentence transformaers
if "collection_embedding_fn" in scripture:
embedding_fn = scripture["collection_embedding_fn"] # overridden in config
return embedding_fn
def filter_scriptures_fields(self, fields_to_keep: List[str]) -> List[Dict]:
"""
Return a list of scripture dicts containing only the specified fields.
"""
filtered = []
for s in self.scriptures:
filtered.append({k: s[k] for k in fields_to_keep if k in s})
return filtered