vikramvasudevan commited on
Commit
90a01cc
·
verified ·
1 Parent(s): 2cfcf56

Upload folder using huggingface_hub

Browse files
config.py CHANGED
@@ -1,6 +1,10 @@
1
  from metadata import MetadataWhereClause
2
  from typing import List, Dict
3
 
 
 
 
 
4
  from modules.scripture_helpers.kamba_ramayanam_helper import (
5
  get_list_of_kandams,
6
  get_list_of_padalams,
@@ -101,19 +105,54 @@ class SanatanConfig:
101
  "output_dir": "./output/bhagavat_gita",
102
  "collection_name": "bhagavat_gita_openai",
103
  "collection_embedding_fn": "openai",
104
- "unit": "page",
 
 
 
 
 
 
 
 
 
 
 
 
 
105
  "metadata_fields": [
106
  {
107
- "name": "file",
108
- "label": "File Name",
109
  "datatype": "str",
110
- "description": "name of the file from which the information was extracted",
 
 
 
 
 
111
  },
112
  {
113
- "name": "page",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
114
  "datatype": "int",
115
- "label": "Page Number",
116
- "description": "Page number from the source",
117
  "show_as_filter": True,
118
  "is_unique": True,
119
  },
@@ -192,7 +231,7 @@ class SanatanConfig:
192
  "collection_name": "vishnu_sahasranamam_openai",
193
  "collection_embedding_fn": "openai",
194
  "unit": "verse",
195
- "chapter_order" : lambda: get_chapter_order_from_sahasranamam(),
196
  "field_mapping": {
197
  "text": "sanskrit",
198
  "title": lambda doc: f"Verse {doc.get('verse','')}",
@@ -200,7 +239,7 @@ class SanatanConfig:
200
  "transliteration": "transliteration",
201
  "word_by_word_native": "translation",
202
  "author": lambda doc: f"Sage Vyasa",
203
- "chapter_name" : "chapter",
204
  "relative_path": lambda doc: f"Vishnu Sahasranamam-{doc.get("chapter","")}-{doc.get("verse","")}",
205
  },
206
  "metadata_fields": [
@@ -791,9 +830,11 @@ class SanatanConfig:
791
  embedding_fn = scripture["collection_embedding_fn"] # overridden in config
792
  return embedding_fn
793
 
794
- def remove_callables(self,obj):
795
  if isinstance(obj, dict):
796
- return {k: self.remove_callables(v) for k, v in obj.items() if not callable(v)}
 
 
797
  elif isinstance(obj, list):
798
  return [self.remove_callables(v) for v in obj if not callable(v)]
799
  else:
@@ -825,6 +866,7 @@ class SanatanConfig:
825
  "unit",
826
  "unit_index",
827
  "word_by_word_native",
 
828
  "transliteration",
829
  "reference_link",
830
  "author",
 
1
  from metadata import MetadataWhereClause
2
  from typing import List, Dict
3
 
4
+ from modules.scripture_helpers.bhagavat_gita_helper import (
5
+ get_bhagavat_gita_chapters,
6
+ get_chapter_order_from_bhagavat_gita,
7
+ )
8
  from modules.scripture_helpers.kamba_ramayanam_helper import (
9
  get_list_of_kandams,
10
  get_list_of_padalams,
 
105
  "output_dir": "./output/bhagavat_gita",
106
  "collection_name": "bhagavat_gita_openai",
107
  "collection_embedding_fn": "openai",
108
+ "unit": "verse",
109
+ "unit_field": "verse_number",
110
+ "chapter_order": lambda: get_chapter_order_from_bhagavat_gita(),
111
+ "field_mapping": {
112
+ "text": "sanskrit",
113
+ "title": "verse_title",
114
+ "unit_index": "relative_verse_number",
115
+ "transliteration": "transliteration",
116
+ "word_by_word_native": "word_by_word_meaning",
117
+ "translation": "translation",
118
+ "author": lambda doc: f"Maharishi Vedvyas Ji",
119
+ "chapter_name": "chapter_title",
120
+ "relative_path": lambda doc: f"{doc.get("chapter_title","")}-{doc.get("relative_verse_number","")}",
121
+ },
122
  "metadata_fields": [
123
  {
124
+ "name": "chapter",
 
125
  "datatype": "str",
126
+ "label": "Chapter Name",
127
+ "description": "Name of the Chapter",
128
+ "show_as_filter": True,
129
+ "component": "dropdown",
130
+ "lov": lambda: get_bhagavat_gita_chapters(),
131
+ "is_unique": True,
132
  },
133
  {
134
+ "name": "sanskrit",
135
+ "label": "Lyrics in sanskrit",
136
+ "datatype": "str",
137
+ "description": "The original sloka in sanskrit.",
138
+ },
139
+ {
140
+ "name": "translation",
141
+ "label": "English Translation",
142
+ "datatype": "str",
143
+ "description": "The english translation.",
144
+ },
145
+ {
146
+ "name": "transliteration",
147
+ "label": "English Transliteration",
148
+ "datatype": "str",
149
+ "description": "The english transliteration.",
150
+ },
151
+ {
152
+ "name": "relative_verse_number",
153
+ "label": "Relative Verse Number",
154
  "datatype": "int",
155
+ "description": "The relative verse number of the sloka inside the chapter.",
 
156
  "show_as_filter": True,
157
  "is_unique": True,
158
  },
 
231
  "collection_name": "vishnu_sahasranamam_openai",
232
  "collection_embedding_fn": "openai",
233
  "unit": "verse",
234
+ "chapter_order": lambda: get_chapter_order_from_sahasranamam(),
235
  "field_mapping": {
236
  "text": "sanskrit",
237
  "title": lambda doc: f"Verse {doc.get('verse','')}",
 
239
  "transliteration": "transliteration",
240
  "word_by_word_native": "translation",
241
  "author": lambda doc: f"Sage Vyasa",
242
+ "chapter_name": "chapter",
243
  "relative_path": lambda doc: f"Vishnu Sahasranamam-{doc.get("chapter","")}-{doc.get("verse","")}",
244
  },
245
  "metadata_fields": [
 
830
  embedding_fn = scripture["collection_embedding_fn"] # overridden in config
831
  return embedding_fn
832
 
833
+ def remove_callables(self, obj):
834
  if isinstance(obj, dict):
835
+ return {
836
+ k: self.remove_callables(v) for k, v in obj.items() if not callable(v)
837
+ }
838
  elif isinstance(obj, list):
839
  return [self.remove_callables(v) for v in obj if not callable(v)]
840
  else:
 
866
  "unit",
867
  "unit_index",
868
  "word_by_word_native",
869
+ "translation",
870
  "transliteration",
871
  "reference_link",
872
  "author",
copy_chromadb.py CHANGED
@@ -18,6 +18,11 @@ db_config = {
18
  "source_collection_name": "vishnu_sahasranamam",
19
  "destination_collection_name": "vishnu_sahasranamam_openai",
20
  },
 
 
 
 
 
21
  }
22
 
23
  parser = argparse.ArgumentParser(description="My app with database parameter")
 
18
  "source_collection_name": "vishnu_sahasranamam",
19
  "destination_collection_name": "vishnu_sahasranamam_openai",
20
  },
21
+ "bhagavat_gita": {
22
+ "source_db_path": "../bhagavat_gita_chat/chromadb_store",
23
+ "source_collection_name": "bhagavat_gita",
24
+ "destination_collection_name": "bhagavat_gita_openai",
25
+ },
26
  }
27
 
28
  parser = argparse.ArgumentParser(description="My app with database parameter")
data/bhagavat_gita_chapters.json ADDED
The diff for this file is too large to render. See raw diff
 
modules/scripture_helpers/bhagavat_gita_helper.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+
3
+ def get_chapter_order_from_bhagavat_gita():
4
+ chapters = []
5
+ with open("./data/bhagavat_gita_chapters.json", "r", encoding="utf-8") as f:
6
+ chapters = json.load(f)
7
+
8
+ return {chapter["chapter_title"]: chapter["chapter_number"] for chapter in chapters}
9
+
10
+ def get_bhagavat_gita_chapters() -> list[str]:
11
+ """
12
+ Get a list of chapter names from bhagavat_gita
13
+ """
14
+ chapters = []
15
+ with open("./data/bhagavat_gita_chapters.json", "r", encoding="utf-8") as f:
16
+ chapters = json.load(f)
17
+
18
+ return sorted(set([chapter["chapter_title"] for chapter in chapters]))