Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
| from metadata import MetadataWhereClause | |
| from typing import List, Dict | |
| class SanatanConfig: | |
| # shuklaYajurVedamPdfPath: str = "./data/shukla-yajur-veda.pdf" | |
| # shuklaYajurVedamSmallPdfPath: str = "./data/shukla-yajur-veda-small.pdf" | |
| # vishnuPuranamPdfPath = "./data/vishnu_puranam.pdf" | |
| # datastores = [{"name": "sanskrit_001", "dbStorePath": "./chromadb-store"}, {"name": "nalayiram", "dbStorePath": "./chromadb-store-4000"}] | |
| dbStorePath: str = "./chromadb-store" | |
| # shuklaYajurVedamCollectionName: str = "shukla_yajur_vedam" | |
| # vishnuPuranamCollectionName: str = "vishnu_puranam" | |
| # shuklaYajurVedamOutputDir = "./output/shukla_yajur_vedam" | |
| # vishnuPuranamOutputDir = "./output/vishnu_puranam" | |
| scriptures = [ | |
| { | |
| "name": "vishnu_puranam", | |
| "title": "Sri Vishnu Puranam", | |
| "output_dir": "./output/vishnu_puranam", | |
| "collection_name": "vishnu_puranam_openai", | |
| "collection_embedding_fn": "openai", | |
| "unit": "page", | |
| "metadata_fields": [ | |
| { | |
| "name": "file", | |
| "datatype": "str", | |
| "desc": "name of the file from which the information was extracted", | |
| }, | |
| {"name": "num_chars", "datatype": "str"}, | |
| {"name": "page", "datatype": "int"}, | |
| ], | |
| "pdf_path": "./data/vishnu_puranam.pdf", | |
| "source": "https://dn720005.ca.archive.org/0/items/vishnu-purana-sanskrit-english-ocr/VISHNU-PURANA-Sanskrit-English-OCR.pdf", | |
| "language": "san+eng", | |
| "example_labels": [ | |
| "Vishnu's form", | |
| "About the five elements", | |
| "About Garuda", | |
| "Weapons of Vishnu", | |
| "Vishnu's form (all scriptures)", | |
| ], | |
| "examples": [ | |
| "describe Vishnu's form as defined in vishnu puranam", | |
| "five elements and their significance as per vishnu puranam", | |
| "What is the significance of Garuda? Show some verses from vishnu puranam that describe him.", | |
| "What weapons does Vishnu hold as mentioned in vishnu puranam?", | |
| "How is the form of Vishnu described across the scriptures?", | |
| ], | |
| "llm_hints": [], | |
| }, | |
| { | |
| "name": "shukla_yajur_vedam", | |
| "title": "Shukla Yajur Vedam", | |
| "output_dir": "./output/shukla_yajur_vedam", | |
| "collection_name": "shukla_yajur_vedam", | |
| "unit": "page", | |
| "metadata_fields": [ | |
| { | |
| "name": "file", | |
| "datatype": "str", | |
| "desc": "name of the file from which the information was extracted", | |
| }, | |
| {"name": "num_chars", "datatype": "str"}, | |
| {"name": "page", "datatype": "int"}, | |
| ], | |
| "pdf_path": "./data/shukla-yajur-veda.pdf", | |
| "source": "https://www.thearyasamaj.org/uploads/book/2014/04/R1sSjG_eLb_sub_406_yajurveda.pdf", | |
| "language": "san+eng", | |
| "example_labels": [ | |
| "About Vedam", | |
| "About the five elements", | |
| "About Brahma", | |
| ], | |
| "examples": [ | |
| "Gist of Shukla Yajur Vedam. Give me some sanskrit verses.", | |
| "What is the significance of fire and water. show some sanskrit verses", | |
| "Brahma", | |
| ], | |
| "llm_hints": [], | |
| }, | |
| { | |
| "name": "bhagavat_gita", | |
| "title": "Bhagavat Gita", | |
| "output_dir": "./output/bhagavat_gita", | |
| "collection_name": "bhagavat_gita_openai", | |
| "collection_embedding_fn": "openai", | |
| "unit": "page", | |
| "metadata_fields": [ | |
| { | |
| "name": "file", | |
| "datatype": "str", | |
| "desc": "name of the file from which the information was extracted", | |
| }, | |
| {"name": "num_chars", "datatype": "str"}, | |
| {"name": "page", "datatype": "int"}, | |
| ], | |
| "pdf_path": "./data/bhagavat_gita.pdf", | |
| "source": "https://dn790006.ca.archive.org/0/items/in.gov.ignca.279/279_text.pdf", | |
| "language": "san+eng", | |
| "example_labels": [ | |
| "About Arjuna", | |
| "About Karma", | |
| "About birth and death", | |
| "About the battle field", | |
| "About Krishna's form", | |
| "Krishna's Teachings", | |
| ], | |
| "examples": [ | |
| "Show some verses where Krishna advises Arjuna", | |
| "What does Krishna say about Karma", | |
| "What does Krishna say about birth and death", | |
| "describe the battle field", | |
| "How did Arjuna respond upon witnessing Krishna’s Vishwarupa?" | |
| "What teachings did Krishna share in the Gita?", | |
| ], | |
| "llm_hints": [], | |
| }, | |
| { | |
| "name": "valmiki_ramayanam", | |
| "title": "Valmiki Ramayanam", | |
| "output_dir": "./output/valmiki_ramayanam", | |
| "collection_name": "valmiki_ramayanam_openai", | |
| "collection_embedding_fn": "openai", | |
| "unit": "page", | |
| "metadata_fields": [ | |
| { | |
| "name": "file", | |
| "datatype": "str", | |
| "desc": "name of the file from which the information was extracted", | |
| }, | |
| {"name": "num_chars", "datatype": "str"}, | |
| {"name": "page", "datatype": "int"}, | |
| ], | |
| "pdf_path": "./data/valmiki_ramayanam.pdf", | |
| "source": "https://ia800509.us.archive.org/28/items/valmiki-ramayana-gita-press-english/Valmiki%20Ramayana%20Gita%20Press%20English.pdf", | |
| "language": "san+eng", | |
| "example_labels": [ | |
| "About Jatayu", | |
| "About Hanuman", | |
| "About Vali", | |
| "About Sita", | |
| "About Ravana", | |
| "A slokam by name", | |
| "Vibheeshana sharanagathi slokam", | |
| ], | |
| "examples": [ | |
| "What is the significance of Jatayu? show some sanskrit verses to support the argument", | |
| "Show some verses where Hanuman is mentioned", | |
| "How did Rama kill Vali", | |
| "How was Sita abducted", | |
| "How did Rama kill Ravana?", | |
| "explain sakrudeva prapannaaya shlokam in ramayana", | |
| "give the shlokam in ramayanam that vibheeshana uses to perform sharanagathi to rama, give the sanskrit shlokam and its meaning", | |
| ], | |
| "llm_hints": [], | |
| }, | |
| { | |
| "name": "vishnu_sahasranamam", | |
| "title": "Vishnu Sahasranamam", | |
| "output_dir": "./output/vishnu_sahasranamam", | |
| "collection_name": "vishnu_sahasranamam_openai", | |
| "collection_embedding_fn": "openai", | |
| "unit": "verse", | |
| "metadata_fields": [ | |
| {"name": "chapter", "datatype": "str"}, | |
| {"name": "page_number", "datatype": "int"}, | |
| { | |
| "name": "sanskrit", | |
| "datatype": "str", | |
| "desc": "The original sloka in sanskrit.", | |
| }, | |
| { | |
| "name": "translation", | |
| "datatype": "str", | |
| "desc": "The english translation.", | |
| }, | |
| { | |
| "name": "transliteration", | |
| "datatype": "str", | |
| "desc": "The english transliteration.", | |
| }, | |
| { | |
| "name": "verse", | |
| "datatype": "int", | |
| "desc": "The verse number of the sloka.", | |
| }, | |
| ], | |
| "pdf_path": "./data/vishnu_sahasranamam.pdf", | |
| "source": "https://www.swami-krishnananda.org/vishnu/Sri_Vishnu_Sahasranama_Stotram.pdf", | |
| "language": "san+eng", | |
| "example_labels": ["Vanamali", "1000 names", "Sanskrit text search"], | |
| "examples": [ | |
| "Vanamali", | |
| "Show some of the 1000 names of Vishnu along with their meaning", | |
| "show the verse that begins with शुक्लाम्बरधरं", | |
| ], | |
| "llm_hints": [], | |
| }, | |
| { | |
| "name": "divya_prabandham", | |
| "title": "4000 Divya Prabandham", | |
| "output_dir": "./output/divya_prabandham", | |
| "collection_name": "divya_prabandham", | |
| "collection_embedding_fn": "openai", | |
| "unit": "verse", | |
| "metadata_fields": [ | |
| { | |
| "name": "prabandham_code", | |
| "datatype": "str", | |
| "description": "contains the short prabandham_code. e.g. `TPL` for `Thiruppallandu`", | |
| }, | |
| { | |
| "name": "prabandham_name", | |
| "datatype": "str", | |
| "description": "contains the prabandham name. e.g. `Thiruppallandu`", | |
| }, | |
| { | |
| "name": "azhwar_name", | |
| "datatype": "str", | |
| "description": "contains the azhwar name. e.g. `Thirumangai Azhwar`", | |
| }, | |
| { | |
| "name": "divya_desams", | |
| "datatype": "str", | |
| "description": "comma separated list of divya desams. e.g. Thiruneermalai,Thiruvallikkeni.", | |
| }, | |
| # {"name": "html_url", "datatype": "str", "description" : "Reference link for the source"}, | |
| # {"name": "pasuram_en", "datatype": "str", "description" : "Transliteration of pasuram in english"}, | |
| # {"name": "pasuram_ta", "datatype": "str", "description" : "Pasuram lyrics in tamil"}, | |
| { | |
| "name": "title", | |
| "datatype": "str", | |
| "description": ( | |
| "Exact title of a pasuram in one of the following formats:\n" | |
| "1. '{prabandham_code} {decade}.{chapter}.{pasuram}' — use when the prabandham has decades.\n" | |
| "2. '{prabandham_code} {chapter}.{pasuram}' — use when the prabandham does not have decades.\n\n" | |
| "⚠️ Use this field ONLY when the user provides a specific prabandham and a relative verse number.\n" | |
| "Examples of valid usage:\n" | |
| "- User query: '3rd pasuram in the 8th Thiruvaimozhi of the 1st decade.'\n" | |
| " → Convert to: '{prabandham_code} 1.8.3' and pass as `title` filter.\n" | |
| "- User query: '2nd pasuram of chapter 5 in [Prabandham with no decades].'\n" | |
| " → Convert to: '{prabandham_code} 5.2' and pass as `title` filter.\n" | |
| "Do NOT use `title` for general queries or keyword searches — leave it empty in those cases." | |
| ), | |
| }, | |
| { | |
| "name": "verse", | |
| "datatype": "int", | |
| "is_unique" : True, | |
| "description": ( | |
| "Absolute verse number or pasuram number. Each verse has a unique number." | |
| # "Use it only when a specific prabandham name is NOT mentioned in the user query." | |
| "For e.g. 'Give me pasuram 1176'" | |
| ), | |
| }, | |
| # {"name": "wbw_ta", "datatype": "str", "description" : "Word by word meaning in tamil."}, | |
| { | |
| "name": "decade", | |
| "datatype": "int", | |
| "description": ( | |
| "The decade (or `pathu` in Tamil) that this pasuram belongs to. decade is -1 when there is no associated decade." | |
| ), | |
| }, | |
| { | |
| "name": "chapter", | |
| "datatype": "int", | |
| "description": ( | |
| "chapter number of this pasuram. is -1 when there is no associated chapter number" | |
| ), | |
| }, | |
| { | |
| "name": "position_in_chapter", | |
| "datatype": "int", | |
| "description": ( | |
| "Relative verse number or pasuram number within a chapter." | |
| "Use it only when a specific prabandham name is mentioned in the user query." | |
| "For e.g. 'Give me the 5th pasuram from Thirupavai'" | |
| ), | |
| }, | |
| ], | |
| "pdf_path": "./data/divya_prabandham.pdf", | |
| "source": "https://uveda.org", | |
| "language": "tamil", | |
| "example_labels": [ | |
| "About the five elements", | |
| "About Garuda", | |
| "Pasuram about Krishna's Flute", | |
| "Andal's pasuram", | |
| "Specific Pasuram (absolute)", | |
| "Pasuram by Azhwar", | |
| "Specific pasuram(relative)", | |
| "Decade and Chapter Search", | |
| ], | |
| "examples": [ | |
| "five elements and their significance as defined in divya_prabandham", | |
| "What is the significance of Garuda? Show some verses from divya prabandham that describe him.", | |
| "Show me a pasuram that talks about how the animals and birds enjoy Krishna's flute playing.", | |
| "Give me a pasuram by Andal", | |
| "Show me Pasuram 1187 ", | |
| "Show me a pasuram by Thondaradippodi azhwar", | |
| "Give me the 2nd pasuram in the 3rd Thiruvaimozhi from the 2nd decade", | |
| "Give me just a few words from the starting lines and reference links of all 11 pasurams from thiruvaimozhi 5th decade 4th chapter.", | |
| ], | |
| "llm_hints": [ | |
| "If the user wishes to query at a decade or chapter level for a given prabandham, use the direct metadata query on the appropriate fields once instead of querying the tool multiple times for each pasuram from the chapter." | |
| ], | |
| }, | |
| { | |
| "name": "bhagavata_purana", | |
| "title": "Bhagavatha Puranam", | |
| "output_dir": "./output/bhagavata_purana", | |
| "collection_name": "bhagavata_purana", | |
| "unit": "page", | |
| "metadata_fields": [ | |
| { | |
| "name": "file", | |
| "datatype": "str", | |
| "desc": "name of the file from which the information was extracted", | |
| }, | |
| {"name": "num_chars", "datatype": "str"}, | |
| {"name": "page", "datatype": "int"}, | |
| ], | |
| "pdf_path": "./data/bhagavata_purana.pdf", | |
| "source": "https://dn790003.ca.archive.org/0/items/bhagavatapuranagitapress_201907/Bhagavata%20Purana%20-%20Gita%20Press_text.pdf", | |
| "language": "san+eng", | |
| "example_labels": ["Gajendra Moksham", "Prahalad"], | |
| "examples": [ | |
| "State some verses that showcase the devotion of Gajendra the elephant", | |
| "State some verses that showcase the devotion of Prahlada", | |
| ], | |
| "llm_hints": [], | |
| }, | |
| { | |
| "name": "kamba_ramayanam_en", | |
| "title": "Kamba Ramayanam (English)", | |
| "output_dir": "./output/kamba_ramayanam", | |
| "collection_name": "kamba_ramayanam_en", | |
| "unit": "verse", | |
| "metadata_fields": [ | |
| { | |
| "name": "kandam", | |
| "datatype": "str", | |
| "description": "The name of the Kandam or the chapter.", | |
| }, | |
| { | |
| "name": "padalam_en", | |
| "datatype": "str", | |
| "description": "The name of the Padalam (Episode) in English.", | |
| }, | |
| { | |
| "name": "padalam_ta", | |
| "datatype": "str", | |
| "description": "The name of the Padalam (Episode) in Tamil.", | |
| }, | |
| {"name": "page", "datatype": "int"}, | |
| {"name": "verse_number", "datatype": "int"}, | |
| ], | |
| "pdf_path": "./data/kamba_ramayanam.pdf", | |
| "source": "https://www.hindupedia.com/images/1/13/Kamba_Ramayanam_I.pdf", | |
| "language": "tamil", | |
| "example_labels": [ | |
| "About Jatayu", | |
| "About Hanuman", | |
| "About Vali", | |
| "About Sita", | |
| "About Ravana", | |
| ], | |
| "examples": [ | |
| "What is the significance of Jatayu? show some sanskrit verses to support the argument", | |
| "Show some verses where Hanuman is mentioned", | |
| "How did Rama kill Vali", | |
| "How was Sita abducted", | |
| "How did Rama kill Ravana?", | |
| ], | |
| "llm_hints": [], | |
| }, | |
| { | |
| "name": "kamba_ramayanam", | |
| "title": "Kamba Ramayanam (Tamil)", | |
| "output_dir": "./output/kamba_ramayanam", | |
| "collection_name": "kamba_ramayanam", | |
| "unit": "chunk", | |
| "metadata_fields": [ | |
| { | |
| "name": "chunk_index", | |
| "datatype": "int", | |
| "description": "The index of the chunk", | |
| }, | |
| { | |
| "name": "filename", | |
| "datatype": "str", | |
| "description": "The name of the file.", | |
| }, | |
| ], | |
| "pdf_path": "./data/kamba_ramayanam.pdf", | |
| "source": "https://archive.org/details/vrajeshkumar_gmail_061/01-%E0%AE%AA%E0%AE%BE%E0%AE%B2%20%E0%AE%95%E0%AE%BE%E0%AE%A3%E0%AF%8D%E0%AE%9F%E0%AE%AE%E0%AF%8D/page/n15/mode/2up", | |
| "language": "tamil", | |
| "example_labels": [ | |
| "About Jatayu", | |
| "About Hanuman", | |
| "About Vali", | |
| "About Sita", | |
| "About Ravana", | |
| ], | |
| "examples": [ | |
| "What is the significance of Jatayu? show some sanskrit verses to support the argument", | |
| "Show some verses where Hanuman is mentioned", | |
| "How did Rama kill Vali", | |
| "How was Sita abducted", | |
| "How did Rama kill Ravana?", | |
| ], | |
| "llm_hints": [], | |
| }, | |
| { | |
| "name": "chathusloki", | |
| "title": "Chathusloki by Sri Alavandar", | |
| "output_dir": "./output/chathusloki", | |
| "collection_name": "chathusloki", | |
| "unit": "slokam", | |
| "metadata_fields": [ | |
| { | |
| "name": "sloka_number", | |
| "datatype": "int", | |
| "description": "The index of the sloka or verse", | |
| }, | |
| { | |
| "name": "meaning_short", | |
| "datatype": "str", | |
| "description": "A short meaning of the sanskrit verse in English.", | |
| }, | |
| ], | |
| "pdf_path": "./data/chathusloki.pdf", | |
| "source": "https://www.sadagopan.org/ebook/pdf/Chatusloki%20-%20VS.pdf", | |
| "language": "san+eng", | |
| "example_labels": ["Recite a sloka", "Commentary", "Role of Sridevi"], | |
| "examples": [ | |
| "Recite the 1st sloka from Chathusloki", | |
| "Show detailed commentary for sloka 2 from Chathusloki", | |
| "What is the role of Sri Devi in the universe according to the Chathusloki?", | |
| ], | |
| "llm_hints": [], | |
| }, | |
| { | |
| "name": "sri_stavam", | |
| "title": "Sri Stavam by Sri Koorathazhwar", | |
| "output_dir": "./output/sri_stavam", | |
| "collection_name": "sri_stavam", | |
| "unit": "slokam", | |
| "metadata_fields": [ | |
| { | |
| "name": "sloka_number", | |
| "datatype": "int", | |
| "description": "The index of the sloka or verse", | |
| }, | |
| { | |
| "name": "meaning_short", | |
| "datatype": "str", | |
| "description": "A short meaning of the sanskrit verse in English.", | |
| }, | |
| { | |
| "name": "sanskrit", | |
| "datatype": "str", | |
| "description": "Verse in sanskrit", | |
| }, | |
| { | |
| "name": "transliteration", | |
| "datatype": "str", | |
| "description": "Verse transliterated to English", | |
| }, | |
| ], | |
| "pdf_path": "./data/sri_stavam.pdf", | |
| "source": "https://www.sadagopan.org/ebook/pdf/Sri%20Stavam.pdf", | |
| "language": "san+eng", | |
| "example_labels": ["Recite a sloka", "Commentary", "Role of Sridevi"], | |
| "examples": [ | |
| "Recite the 1st sloka from Sri Stavam", | |
| "Show detailed commentary for sloka 2 from Sri Stavam", | |
| "What is the role of Sri Devi in the universe according to the Sri Stavam?", | |
| ], | |
| "llm_hints": [ | |
| "if the user asks for nth sloka, do a metadata search on the `verse` field." | |
| ], | |
| }, | |
| { | |
| "name": "yt_metadata", | |
| "title": "Sampradayam in YouTube", | |
| "output_dir": "./output/yt_metadata", | |
| "collection_name": "yt_metadata", | |
| "collection_embedding_fn": "openai", | |
| "unit": "video", | |
| "metadata_fields": [ | |
| { | |
| "name": "video_id", | |
| "datatype": "str", | |
| "description": "The video id as in YouTube", | |
| }, | |
| { | |
| "name": "video_title", | |
| "datatype": "str", | |
| "description": "The title of the video as in YouTube", | |
| }, | |
| { | |
| "name": "description", | |
| "datatype": "str", | |
| "description": "Description as in YouTube", | |
| }, | |
| { | |
| "name": "channel_url", | |
| "datatype": "str", | |
| "description": "URL of the YouTube Channel", | |
| }, | |
| { | |
| "name": "channel_title", | |
| "datatype": "str", | |
| "description": "Title of the YouTube Channel", | |
| }, | |
| ], | |
| "pdf_path": "./data/none.pdf", | |
| "source": "https://youtube.com", | |
| "language": "san+eng+tam", | |
| "example_labels": ["Srirangam", "Pasuram video"], | |
| "examples": [ | |
| "Show me YouTube videos that talk about Srirangam", | |
| "Show me lyrics of 1st pasuram of 1st decade in the 4st Thiruvaimozhi. Also show the related youtube videos.", | |
| ], | |
| "llm_hints": [ | |
| "if the user asks for YouTube videos, DO NOT do a web search, instead do a search on this collection." | |
| ], | |
| }, | |
| ] | |
| def get_scripture_by_collection(self, collection_name: str): | |
| return [ | |
| scripture | |
| for scripture in self.scriptures | |
| if scripture["collection_name"] == collection_name | |
| ][0] | |
| def is_metadata_field_allowed( | |
| self, collection_name: str, metadata_where_clause: MetadataWhereClause | |
| ): | |
| scripture = self.get_scripture_by_collection(collection_name=collection_name) | |
| allowed_fields = [field["name"] for field in scripture["metadata_fields"]] | |
| def validate_clause(clause: MetadataWhereClause): | |
| # validate direct filters | |
| if clause.filters: | |
| for f in clause.filters: | |
| if f.metadata_field not in allowed_fields: | |
| raise Exception( | |
| f"metadata_field: [{f.metadata_field}] not allowed in collection [{collection_name}]. " | |
| f"Here are the allowed fields with their descriptions: {scripture['metadata_fields']}" | |
| ) | |
| # recurse into groups | |
| if clause.groups: | |
| for g in clause.groups: | |
| validate_clause(g) | |
| validate_clause(metadata_where_clause) | |
| return True | |
| def get_embedding_for_collection(self, collection_name: str): | |
| scripture = self.get_scripture_by_collection(collection_name) | |
| embedding_fn = "hf" # default is huggingface sentence transformaers | |
| if "collection_embedding_fn" in scripture: | |
| embedding_fn = scripture["collection_embedding_fn"] # overridden in config | |
| return embedding_fn | |
| def filter_scriptures_fields(self, fields_to_keep: List[str]) -> List[Dict]: | |
| """ | |
| Return a list of scripture dicts containing only the specified fields. | |
| """ | |
| filtered = [] | |
| for s in self.scriptures: | |
| filtered.append({k: s[k] for k in fields_to_keep if k in s}) | |
| return filtered | |