Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Upload folder using huggingface_hub
Browse files- config.py +53 -11
- copy_chromadb.py +5 -0
- data/bhagavat_gita_chapters.json +0 -0
- modules/scripture_helpers/bhagavat_gita_helper.py +18 -0
config.py
CHANGED
|
@@ -1,6 +1,10 @@
|
|
| 1 |
from metadata import MetadataWhereClause
|
| 2 |
from typing import List, Dict
|
| 3 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
from modules.scripture_helpers.kamba_ramayanam_helper import (
|
| 5 |
get_list_of_kandams,
|
| 6 |
get_list_of_padalams,
|
|
@@ -101,19 +105,54 @@ class SanatanConfig:
|
|
| 101 |
"output_dir": "./output/bhagavat_gita",
|
| 102 |
"collection_name": "bhagavat_gita_openai",
|
| 103 |
"collection_embedding_fn": "openai",
|
| 104 |
-
"unit": "
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 105 |
"metadata_fields": [
|
| 106 |
{
|
| 107 |
-
"name": "
|
| 108 |
-
"label": "File Name",
|
| 109 |
"datatype": "str",
|
| 110 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 111 |
},
|
| 112 |
{
|
| 113 |
-
"name": "
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 114 |
"datatype": "int",
|
| 115 |
-
"
|
| 116 |
-
"description": "Page number from the source",
|
| 117 |
"show_as_filter": True,
|
| 118 |
"is_unique": True,
|
| 119 |
},
|
|
@@ -192,7 +231,7 @@ class SanatanConfig:
|
|
| 192 |
"collection_name": "vishnu_sahasranamam_openai",
|
| 193 |
"collection_embedding_fn": "openai",
|
| 194 |
"unit": "verse",
|
| 195 |
-
"chapter_order"
|
| 196 |
"field_mapping": {
|
| 197 |
"text": "sanskrit",
|
| 198 |
"title": lambda doc: f"Verse {doc.get('verse','')}",
|
|
@@ -200,7 +239,7 @@ class SanatanConfig:
|
|
| 200 |
"transliteration": "transliteration",
|
| 201 |
"word_by_word_native": "translation",
|
| 202 |
"author": lambda doc: f"Sage Vyasa",
|
| 203 |
-
"chapter_name"
|
| 204 |
"relative_path": lambda doc: f"Vishnu Sahasranamam-{doc.get("chapter","")}-{doc.get("verse","")}",
|
| 205 |
},
|
| 206 |
"metadata_fields": [
|
|
@@ -791,9 +830,11 @@ class SanatanConfig:
|
|
| 791 |
embedding_fn = scripture["collection_embedding_fn"] # overridden in config
|
| 792 |
return embedding_fn
|
| 793 |
|
| 794 |
-
def remove_callables(self,obj):
|
| 795 |
if isinstance(obj, dict):
|
| 796 |
-
return {
|
|
|
|
|
|
|
| 797 |
elif isinstance(obj, list):
|
| 798 |
return [self.remove_callables(v) for v in obj if not callable(v)]
|
| 799 |
else:
|
|
@@ -825,6 +866,7 @@ class SanatanConfig:
|
|
| 825 |
"unit",
|
| 826 |
"unit_index",
|
| 827 |
"word_by_word_native",
|
|
|
|
| 828 |
"transliteration",
|
| 829 |
"reference_link",
|
| 830 |
"author",
|
|
|
|
| 1 |
from metadata import MetadataWhereClause
|
| 2 |
from typing import List, Dict
|
| 3 |
|
| 4 |
+
from modules.scripture_helpers.bhagavat_gita_helper import (
|
| 5 |
+
get_bhagavat_gita_chapters,
|
| 6 |
+
get_chapter_order_from_bhagavat_gita,
|
| 7 |
+
)
|
| 8 |
from modules.scripture_helpers.kamba_ramayanam_helper import (
|
| 9 |
get_list_of_kandams,
|
| 10 |
get_list_of_padalams,
|
|
|
|
| 105 |
"output_dir": "./output/bhagavat_gita",
|
| 106 |
"collection_name": "bhagavat_gita_openai",
|
| 107 |
"collection_embedding_fn": "openai",
|
| 108 |
+
"unit": "verse",
|
| 109 |
+
"unit_field": "verse_number",
|
| 110 |
+
"chapter_order": lambda: get_chapter_order_from_bhagavat_gita(),
|
| 111 |
+
"field_mapping": {
|
| 112 |
+
"text": "sanskrit",
|
| 113 |
+
"title": "verse_title",
|
| 114 |
+
"unit_index": "relative_verse_number",
|
| 115 |
+
"transliteration": "transliteration",
|
| 116 |
+
"word_by_word_native": "word_by_word_meaning",
|
| 117 |
+
"translation": "translation",
|
| 118 |
+
"author": lambda doc: f"Maharishi Vedvyas Ji",
|
| 119 |
+
"chapter_name": "chapter_title",
|
| 120 |
+
"relative_path": lambda doc: f"{doc.get("chapter_title","")}-{doc.get("relative_verse_number","")}",
|
| 121 |
+
},
|
| 122 |
"metadata_fields": [
|
| 123 |
{
|
| 124 |
+
"name": "chapter",
|
|
|
|
| 125 |
"datatype": "str",
|
| 126 |
+
"label": "Chapter Name",
|
| 127 |
+
"description": "Name of the Chapter",
|
| 128 |
+
"show_as_filter": True,
|
| 129 |
+
"component": "dropdown",
|
| 130 |
+
"lov": lambda: get_bhagavat_gita_chapters(),
|
| 131 |
+
"is_unique": True,
|
| 132 |
},
|
| 133 |
{
|
| 134 |
+
"name": "sanskrit",
|
| 135 |
+
"label": "Lyrics in sanskrit",
|
| 136 |
+
"datatype": "str",
|
| 137 |
+
"description": "The original sloka in sanskrit.",
|
| 138 |
+
},
|
| 139 |
+
{
|
| 140 |
+
"name": "translation",
|
| 141 |
+
"label": "English Translation",
|
| 142 |
+
"datatype": "str",
|
| 143 |
+
"description": "The english translation.",
|
| 144 |
+
},
|
| 145 |
+
{
|
| 146 |
+
"name": "transliteration",
|
| 147 |
+
"label": "English Transliteration",
|
| 148 |
+
"datatype": "str",
|
| 149 |
+
"description": "The english transliteration.",
|
| 150 |
+
},
|
| 151 |
+
{
|
| 152 |
+
"name": "relative_verse_number",
|
| 153 |
+
"label": "Relative Verse Number",
|
| 154 |
"datatype": "int",
|
| 155 |
+
"description": "The relative verse number of the sloka inside the chapter.",
|
|
|
|
| 156 |
"show_as_filter": True,
|
| 157 |
"is_unique": True,
|
| 158 |
},
|
|
|
|
| 231 |
"collection_name": "vishnu_sahasranamam_openai",
|
| 232 |
"collection_embedding_fn": "openai",
|
| 233 |
"unit": "verse",
|
| 234 |
+
"chapter_order": lambda: get_chapter_order_from_sahasranamam(),
|
| 235 |
"field_mapping": {
|
| 236 |
"text": "sanskrit",
|
| 237 |
"title": lambda doc: f"Verse {doc.get('verse','')}",
|
|
|
|
| 239 |
"transliteration": "transliteration",
|
| 240 |
"word_by_word_native": "translation",
|
| 241 |
"author": lambda doc: f"Sage Vyasa",
|
| 242 |
+
"chapter_name": "chapter",
|
| 243 |
"relative_path": lambda doc: f"Vishnu Sahasranamam-{doc.get("chapter","")}-{doc.get("verse","")}",
|
| 244 |
},
|
| 245 |
"metadata_fields": [
|
|
|
|
| 830 |
embedding_fn = scripture["collection_embedding_fn"] # overridden in config
|
| 831 |
return embedding_fn
|
| 832 |
|
| 833 |
+
def remove_callables(self, obj):
|
| 834 |
if isinstance(obj, dict):
|
| 835 |
+
return {
|
| 836 |
+
k: self.remove_callables(v) for k, v in obj.items() if not callable(v)
|
| 837 |
+
}
|
| 838 |
elif isinstance(obj, list):
|
| 839 |
return [self.remove_callables(v) for v in obj if not callable(v)]
|
| 840 |
else:
|
|
|
|
| 866 |
"unit",
|
| 867 |
"unit_index",
|
| 868 |
"word_by_word_native",
|
| 869 |
+
"translation",
|
| 870 |
"transliteration",
|
| 871 |
"reference_link",
|
| 872 |
"author",
|
copy_chromadb.py
CHANGED
|
@@ -18,6 +18,11 @@ db_config = {
|
|
| 18 |
"source_collection_name": "vishnu_sahasranamam",
|
| 19 |
"destination_collection_name": "vishnu_sahasranamam_openai",
|
| 20 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
}
|
| 22 |
|
| 23 |
parser = argparse.ArgumentParser(description="My app with database parameter")
|
|
|
|
| 18 |
"source_collection_name": "vishnu_sahasranamam",
|
| 19 |
"destination_collection_name": "vishnu_sahasranamam_openai",
|
| 20 |
},
|
| 21 |
+
"bhagavat_gita": {
|
| 22 |
+
"source_db_path": "../bhagavat_gita_chat/chromadb_store",
|
| 23 |
+
"source_collection_name": "bhagavat_gita",
|
| 24 |
+
"destination_collection_name": "bhagavat_gita_openai",
|
| 25 |
+
},
|
| 26 |
}
|
| 27 |
|
| 28 |
parser = argparse.ArgumentParser(description="My app with database parameter")
|
data/bhagavat_gita_chapters.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
modules/scripture_helpers/bhagavat_gita_helper.py
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
|
| 3 |
+
def get_chapter_order_from_bhagavat_gita():
|
| 4 |
+
chapters = []
|
| 5 |
+
with open("./data/bhagavat_gita_chapters.json", "r", encoding="utf-8") as f:
|
| 6 |
+
chapters = json.load(f)
|
| 7 |
+
|
| 8 |
+
return {chapter["chapter_title"]: chapter["chapter_number"] for chapter in chapters}
|
| 9 |
+
|
| 10 |
+
def get_bhagavat_gita_chapters() -> list[str]:
|
| 11 |
+
"""
|
| 12 |
+
Get a list of chapter names from bhagavat_gita
|
| 13 |
+
"""
|
| 14 |
+
chapters = []
|
| 15 |
+
with open("./data/bhagavat_gita_chapters.json", "r", encoding="utf-8") as f:
|
| 16 |
+
chapters = json.load(f)
|
| 17 |
+
|
| 18 |
+
return sorted(set([chapter["chapter_title"] for chapter in chapters]))
|