vikramvasudevan commited on
Commit
a116972
·
verified ·
1 Parent(s): eff6b9e

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. config.py +57 -0
  2. tools.py +8 -5
config.py CHANGED
@@ -14,6 +14,11 @@ class SanatanConfig:
14
  "title": "Sri Vishnu Puranam",
15
  "output_dir": "./output/vishnu_puranam",
16
  "collection_name": "vishnu_puranam",
 
 
 
 
 
17
  "pdf_path": "./data/vishnu_puranam.pdf",
18
  "source": "https://dn720005.ca.archive.org/0/items/vishnu-purana-sanskrit-english-ocr/VISHNU-PURANA-Sanskrit-English-OCR.pdf",
19
  "language": "san+eng",
@@ -35,6 +40,11 @@ class SanatanConfig:
35
  "title": "Shukla Yajur Vedam",
36
  "output_dir": "./output/shukla_yajur_vedam",
37
  "collection_name": "shukla_yajur_vedam",
 
 
 
 
 
38
  "pdf_path": "./data/shukla-yajur-veda.pdf",
39
  "source": "https://www.thearyasamaj.org/uploads/book/2014/04/R1sSjG_eLb_sub_406_yajurveda.pdf",
40
  "language": "san+eng",
@@ -54,6 +64,11 @@ class SanatanConfig:
54
  "title": "Bhagavat Gita",
55
  "output_dir": "./output/bhagavat_gita",
56
  "collection_name": "bhagavat_gita",
 
 
 
 
 
57
  "pdf_path": "./data/bhagavat_gita.pdf",
58
  "source": "https://dn790006.ca.archive.org/0/items/in.gov.ignca.279/279_text.pdf",
59
  "language": "san+eng",
@@ -77,6 +92,11 @@ class SanatanConfig:
77
  "title": "Valmiki Ramayanam",
78
  "output_dir": "./output/valmiki_ramayanam",
79
  "collection_name": "valmiki_ramayanam",
 
 
 
 
 
80
  "pdf_path": "./data/valmiki_ramayanam.pdf",
81
  "source": "https://ia800509.us.archive.org/28/items/valmiki-ramayana-gita-press-english/Valmiki%20Ramayana%20Gita%20Press%20English.pdf",
82
  "language": "san+eng",
@@ -100,6 +120,14 @@ class SanatanConfig:
100
  "title": "Vishnu Sahasranamam",
101
  "output_dir": "./output/vishnu_sahasranamam",
102
  "collection_name": "vishnu_sahasranamam",
 
 
 
 
 
 
 
 
103
  "pdf_path": "./data/vishnu_sahasranamam.pdf",
104
  "source": "https://www.swami-krishnananda.org/vishnu/Sri_Vishnu_Sahasranama_Stotram.pdf",
105
  "language": "san+eng",
@@ -115,6 +143,18 @@ class SanatanConfig:
115
  "title": "4000 Divya Prabandham",
116
  "output_dir": "./output/divya_prabandham",
117
  "collection_name": "divya_prabandham",
 
 
 
 
 
 
 
 
 
 
 
 
118
  "pdf_path": "./data/divya_prabandham.pdf",
119
  "source": "https://uveda.org",
120
  "language": "tam",
@@ -136,6 +176,11 @@ class SanatanConfig:
136
  "title": "Bhagavatha Puranam",
137
  "output_dir": "./output/bhagavata_purana",
138
  "collection_name": "bhagavata_purana",
 
 
 
 
 
139
  "pdf_path": "./data/bhagavata_purana.pdf",
140
  "source": "https://dn790003.ca.archive.org/0/items/bhagavatapuranagitapress_201907/Bhagavata%20Purana%20-%20Gita%20Press_text.pdf",
141
  "language": "san+eng",
@@ -150,6 +195,14 @@ class SanatanConfig:
150
  "title": "Kamba Ramayanam (English)",
151
  "output_dir": "./output/kamba_ramayanam",
152
  "collection_name": "kamba_ramayanam_en",
 
 
 
 
 
 
 
 
153
  "pdf_path": "./data/kamba_ramayanam.pdf",
154
  "source": "https://www.hindupedia.com/images/1/13/Kamba_Ramayanam_I.pdf",
155
  "language": "tam",
@@ -173,6 +226,10 @@ class SanatanConfig:
173
  "title": "Kamba Ramayanam (Tamil)",
174
  "output_dir": "./output/kamba_ramayanam",
175
  "collection_name": "kamba_ramayanam",
 
 
 
 
176
  "pdf_path": "./data/kamba_ramayanam.pdf",
177
  "source": "https://archive.org/details/vrajeshkumar_gmail_061/01-%E0%AE%AA%E0%AE%BE%E0%AE%B2%20%E0%AE%95%E0%AE%BE%E0%AE%A3%E0%AF%8D%E0%AE%9F%E0%AE%AE%E0%AF%8D/page/n15/mode/2up",
178
  "language": "tam",
 
14
  "title": "Sri Vishnu Puranam",
15
  "output_dir": "./output/vishnu_puranam",
16
  "collection_name": "vishnu_puranam",
17
+ "metadata_fields" : [
18
+ {"name": "file", "datatype": "str"},
19
+ {"name": "num_chars", "datatype": "str"},
20
+ {"name": "page", "datatype": "int"},
21
+ ],
22
  "pdf_path": "./data/vishnu_puranam.pdf",
23
  "source": "https://dn720005.ca.archive.org/0/items/vishnu-purana-sanskrit-english-ocr/VISHNU-PURANA-Sanskrit-English-OCR.pdf",
24
  "language": "san+eng",
 
40
  "title": "Shukla Yajur Vedam",
41
  "output_dir": "./output/shukla_yajur_vedam",
42
  "collection_name": "shukla_yajur_vedam",
43
+ "metadata_fields" : [
44
+ {"name": "file", "datatype": "str"},
45
+ {"name": "num_chars", "datatype": "str"},
46
+ {"name": "page", "datatype": "int"},
47
+ ],
48
  "pdf_path": "./data/shukla-yajur-veda.pdf",
49
  "source": "https://www.thearyasamaj.org/uploads/book/2014/04/R1sSjG_eLb_sub_406_yajurveda.pdf",
50
  "language": "san+eng",
 
64
  "title": "Bhagavat Gita",
65
  "output_dir": "./output/bhagavat_gita",
66
  "collection_name": "bhagavat_gita",
67
+ "metadata_fields" : [
68
+ {"name": "file", "datatype": "str"},
69
+ {"name": "num_chars", "datatype": "str"},
70
+ {"name": "page", "datatype": "int"},
71
+ ],
72
  "pdf_path": "./data/bhagavat_gita.pdf",
73
  "source": "https://dn790006.ca.archive.org/0/items/in.gov.ignca.279/279_text.pdf",
74
  "language": "san+eng",
 
92
  "title": "Valmiki Ramayanam",
93
  "output_dir": "./output/valmiki_ramayanam",
94
  "collection_name": "valmiki_ramayanam",
95
+ "metadata_fields" : [
96
+ {"name": "file", "datatype": "str"},
97
+ {"name": "num_chars", "datatype": "str"},
98
+ {"name": "page", "datatype": "int"},
99
+ ],
100
  "pdf_path": "./data/valmiki_ramayanam.pdf",
101
  "source": "https://ia800509.us.archive.org/28/items/valmiki-ramayana-gita-press-english/Valmiki%20Ramayana%20Gita%20Press%20English.pdf",
102
  "language": "san+eng",
 
120
  "title": "Vishnu Sahasranamam",
121
  "output_dir": "./output/vishnu_sahasranamam",
122
  "collection_name": "vishnu_sahasranamam",
123
+ "metadata_fields" : [
124
+ {"name": "chapter", "datatype": "str"},
125
+ {"name": "page_number", "datatype": "int"},
126
+ {"name": "sanskrit", "datatype": "str"},
127
+ {"name": "translation", "datatype": "str"},
128
+ {"name": "transliteration", "datatype": "str"},
129
+ {"name": "verse", "datatype": "int"},
130
+ ],
131
  "pdf_path": "./data/vishnu_sahasranamam.pdf",
132
  "source": "https://www.swami-krishnananda.org/vishnu/Sri_Vishnu_Sahasranama_Stotram.pdf",
133
  "language": "san+eng",
 
143
  "title": "4000 Divya Prabandham",
144
  "output_dir": "./output/divya_prabandham",
145
  "collection_name": "divya_prabandham",
146
+ "metadata_fields" : [
147
+ {"name": "prabandham", "datatype": "str", "description" : "contains the prabandham code. e.g. `TPL` for `Thiruppallandu`"},
148
+ {"name": "prabandham_name", "datatype": "str", "description" : "contains the prabandham name. e.g. `Thiruppallandu`"},
149
+ {"name": "azhwar_name", "datatype": "str", "description" : "contains the azhwar name. e.g. `Thirumangai Azhwar`"},
150
+ {"name": "divya_desams", "datatype": "str", "description" : "comma separated list of divya desams. e.g. Thiruneermalai,Thiruvallikkeni"},
151
+ {"name": "html_url", "datatype": "str", "description" : "Reference link for the source"},
152
+ {"name": "pasuram_en", "datatype": "str", "description" : "Transliteration of pasuram in english"},
153
+ {"name": "pasuram_ta", "datatype": "str", "description" : "Pasuram lyrics in tamil"},
154
+ {"name": "title", "datatype": "str", "description" : "Title of this pasuram. for example `TVM 1.8.3`"},
155
+ {"name": "verse", "datatype": "int", "description" : "Verse number or pasuram number."},
156
+ {"name": "wbw_ta", "datatype": "str", "description" : "Word by word meaning in tamil."},
157
+ ],
158
  "pdf_path": "./data/divya_prabandham.pdf",
159
  "source": "https://uveda.org",
160
  "language": "tam",
 
176
  "title": "Bhagavatha Puranam",
177
  "output_dir": "./output/bhagavata_purana",
178
  "collection_name": "bhagavata_purana",
179
+ "metadata_fields" : [
180
+ {"name": "file", "datatype": "str"},
181
+ {"name": "num_chars", "datatype": "str"},
182
+ {"name": "page", "datatype": "int"},
183
+ ],
184
  "pdf_path": "./data/bhagavata_purana.pdf",
185
  "source": "https://dn790003.ca.archive.org/0/items/bhagavatapuranagitapress_201907/Bhagavata%20Purana%20-%20Gita%20Press_text.pdf",
186
  "language": "san+eng",
 
195
  "title": "Kamba Ramayanam (English)",
196
  "output_dir": "./output/kamba_ramayanam",
197
  "collection_name": "kamba_ramayanam_en",
198
+ "metadata_fields" : [
199
+ {"name": "file", "datatype": "str", "description" : "The name of the Kandam or the chapter."},
200
+ {"name": "padalam_en", "datatype": "str", "description" : "The name of the Padalam (Episode) in English."},
201
+ {"name": "padalam_ta", "datatype": "str", "description" : "The name of the Padalam (Episode) in Tamil."},
202
+ {"name": "page", "datatype": "int"},
203
+ {"name": "verse_number", "datatype": "int"},
204
+ ],
205
+
206
  "pdf_path": "./data/kamba_ramayanam.pdf",
207
  "source": "https://www.hindupedia.com/images/1/13/Kamba_Ramayanam_I.pdf",
208
  "language": "tam",
 
226
  "title": "Kamba Ramayanam (Tamil)",
227
  "output_dir": "./output/kamba_ramayanam",
228
  "collection_name": "kamba_ramayanam",
229
+ "metadata_fields" : [
230
+ {"name": "chunk_index", "datatype": "int", "description" : "The index of the chunk"},
231
+ {"name": "filename", "datatype": "str", "description" : "The name of the file."},
232
+ ],
233
  "pdf_path": "./data/kamba_ramayanam.pdf",
234
  "source": "https://archive.org/details/vrajeshkumar_gmail_061/01-%E0%AE%AA%E0%AE%BE%E0%AE%B2%20%E0%AE%95%E0%AE%BE%E0%AE%A3%E0%AF%8D%E0%AE%9F%E0%AE%AE%E0%AF%8D/page/n15/mode/2up",
235
  "language": "tam",
tools.py CHANGED
@@ -1,3 +1,4 @@
 
1
  from langchain.agents import Tool
2
  from langchain_core.tools import StructuredTool
3
 
@@ -29,11 +30,13 @@ tool_search_db_by_metadata = StructuredTool.from_function(
29
  f"The collection_name must be one of: {', '.join(allowed_collections)}."
30
  " Use this to find relevant scripture verses or explanations."
31
  "if the user asks for a specific azhwar, use the `tool_get_standardized_azhwar_names` tool to get the standard name first and then pass to this tool to filter pasurams based on azhwar_name."
32
- "be aware that verse numbers are sometimes stored as strings and sometimes as mumbers, so if str search does not yield results, try passing in the metadata_value as a number instead"
33
- "in the context of divya_prabandham, the verse/pasuram number is stored in metadata as the field `verse` and it is stored as an int."
34
- "in the context of sahasranamam, the verse/pasuram number is stored in metadata as the field `verse` and it is stored as an int."
35
- "in the context of kamba_ramayanam, the verse number is stored in metadata as the field `verse_number` and it is stored as a string datatype."
36
- "for other scriptures, the verse number is stored either as `verse` or `verse_number` fields and it can be either str or int so check for both whichever yields results."
 
 
37
  ),
38
  )
39
 
 
1
+ import json
2
  from langchain.agents import Tool
3
  from langchain_core.tools import StructuredTool
4
 
 
30
  f"The collection_name must be one of: {', '.join(allowed_collections)}."
31
  " Use this to find relevant scripture verses or explanations."
32
  "if the user asks for a specific azhwar, use the `tool_get_standardized_azhwar_names` tool to get the standard name first and then pass to this tool to filter pasurams based on azhwar_name."
33
+ "if the user asks for a specific prabandham name, use the `tool_get_standardized_azhwar_names` tool to get the standard prabandham name first and then pass to this tool to filter pasurams based on prabandham_name."
34
+ f"use this configuration for reference :\n{json.dumps(SanatanConfig.scriptures, indent=1)}\n"
35
+ # "be aware that verse numbers are sometimes stored as strings and sometimes as mumbers, so if str search does not yield results, try passing in the metadata_value as a number instead"
36
+ # "in the context of divya_prabandham, the verse/pasuram number is stored in metadata as the field `verse` and it is stored as an int."
37
+ # "in the context of sahasranamam, the verse/pasuram number is stored in metadata as the field `verse` and it is stored as an int."
38
+ # "in the context of kamba_ramayanam, the verse number is stored in metadata as the field `verse_number` and it is stored as a string datatype."
39
+ # "for other scriptures, the verse number is stored either as `verse` or `verse_number` fields and it can be either str or int so check for both whichever yields results."
40
  ),
41
  )
42