vikramvasudevan commited on
Commit
7b33394
·
verified ·
1 Parent(s): 0412bab

Upload folder using huggingface_hub

Browse files
app.py CHANGED
@@ -58,6 +58,8 @@ def init():
58
  )
59
  downloader.unzip(zip_path, extract_to="./")
60
 
 
 
61
 
62
  def render_message_with_tooltip(content: str, max_chars=200):
63
  short = escape(content[:max_chars]) + ("…" if len(content) > max_chars else "")
 
58
  )
59
  downloader.unzip(zip_path, extract_to="./")
60
 
61
+ # add global index
62
+ SanatanDatabase().build_global_index_for_all_scriptures()
63
 
64
  def render_message_with_tooltip(content: str, max_chars=200):
65
  short = escape(content[:max_chars]) + ("…" if len(content) > max_chars else "")
config.py CHANGED
@@ -1,11 +1,15 @@
1
  from metadata import MetadataWhereClause
2
  from typing import List, Dict
3
 
4
- from modules.kamba_ramayanam_helper import (
5
  get_list_of_kandams,
6
  get_list_of_padalams,
7
  get_list_of_padalams_in_tamil,
8
  )
 
 
 
 
9
  import nalayiram_helper
10
 
11
 
@@ -32,6 +36,7 @@ class SanatanConfig:
32
  "label": "Page Number",
33
  "description": "Page number from the source",
34
  "show_as_filter": True,
 
35
  },
36
  ],
37
  "pdf_path": "./data/vishnu_puranam.pdf",
@@ -72,6 +77,7 @@ class SanatanConfig:
72
  "label": "Page Number",
73
  "description": "Page number from the source",
74
  "show_as_filter": True,
 
75
  },
76
  ],
77
  "pdf_path": "./data/shukla-yajur-veda.pdf",
@@ -109,6 +115,7 @@ class SanatanConfig:
109
  "label": "Page Number",
110
  "description": "Page number from the source",
111
  "show_as_filter": True,
 
112
  },
113
  ],
114
  "pdf_path": "./data/bhagavat_gita.pdf",
@@ -152,6 +159,7 @@ class SanatanConfig:
152
  "label": "Page Number",
153
  "description": "Page number from the source",
154
  "show_as_filter": True,
 
155
  },
156
  ],
157
  "pdf_path": "./data/valmiki_ramayanam.pdf",
@@ -184,6 +192,7 @@ class SanatanConfig:
184
  "collection_name": "vishnu_sahasranamam_openai",
185
  "collection_embedding_fn": "openai",
186
  "unit": "verse",
 
187
  "field_mapping": {
188
  "text": "sanskrit",
189
  "title": lambda doc: f"Verse {doc.get('verse','')}",
@@ -191,6 +200,7 @@ class SanatanConfig:
191
  "transliteration": "transliteration",
192
  "word_by_word_native": "translation",
193
  "author": lambda doc: f"Sage Vyasa",
 
194
  "relative_path": lambda doc: f"Vishnu Sahasranamam-{doc.get("chapter","")}-{doc.get("verse","")}",
195
  },
196
  "metadata_fields": [
@@ -199,6 +209,10 @@ class SanatanConfig:
199
  "datatype": "str",
200
  "label": "Chapter Name",
201
  "description": "Name of the Chapter",
 
 
 
 
202
  },
203
  {
204
  "name": "page_number",
@@ -211,28 +225,26 @@ class SanatanConfig:
211
  "label": "Lyrics in sanskrit",
212
  "datatype": "str",
213
  "description": "The original sloka in sanskrit.",
214
- "show_as_filter": True,
215
  },
216
  {
217
  "name": "translation",
218
  "label": "English Translation",
219
  "datatype": "str",
220
  "description": "The english translation.",
221
- "show_as_filter": True,
222
  },
223
  {
224
  "name": "transliteration",
225
  "label": "English Transliteration",
226
  "datatype": "str",
227
  "description": "The english transliteration.",
228
- "show_as_filter": True,
229
  },
230
  {
231
  "name": "verse",
232
- "labek": "Verse Number",
233
  "datatype": "int",
234
  "description": "The verse number of the sloka.",
235
  "show_as_filter": True,
 
236
  },
237
  ],
238
  "pdf_path": "./data/vishnu_sahasranamam.pdf",
@@ -427,6 +439,7 @@ class SanatanConfig:
427
  "label": "Page Number",
428
  "description": "Page number from the source",
429
  "show_as_filter": True,
 
430
  },
431
  ],
432
  "pdf_path": "./data/bhagavata_purana.pdf",
@@ -495,6 +508,7 @@ class SanatanConfig:
495
  "label": "Verse Number",
496
  "description": "Verse Number",
497
  "show_as_filter": True,
 
498
  },
499
  ],
500
  "pdf_path": "./data/kamba_ramayanam.pdf",
@@ -538,6 +552,7 @@ class SanatanConfig:
538
  "datatype": "int",
539
  "description": "The index of the chunk",
540
  "show_as_filter": True,
 
541
  },
542
  {
543
  "name": "filename",
@@ -588,6 +603,7 @@ class SanatanConfig:
588
  "datatype": "int",
589
  "description": "The index of the sloka or verse",
590
  "show_as_filter": True,
 
591
  },
592
  {
593
  "name": "meaning_short",
@@ -631,6 +647,7 @@ class SanatanConfig:
631
  "datatype": "int",
632
  "description": "The index of the sloka or verse",
633
  "show_as_filter": True,
 
634
  },
635
  {
636
  "name": "meaning_short",
@@ -686,6 +703,7 @@ class SanatanConfig:
686
  "datatype": "str",
687
  "description": "The video id as in YouTube",
688
  "show_as_filter": True,
 
689
  },
690
  {
691
  "name": "video_title",
@@ -792,6 +810,8 @@ class SanatanConfig:
792
  Only allows keys from the allowed canonical fields list.
793
  """
794
  allowed_keys = {
 
 
795
  "verse",
796
  "text",
797
  "title",
@@ -843,6 +863,8 @@ class SanatanConfig:
843
  canonical_doc["document"] = "-"
844
  verse = resolve_field(config.get("unit_field", config.get("unit")))
845
  canonical_doc["verse"] = int(verse) if verse else 0
 
 
846
  return canonical_doc
847
 
848
  def get_collection_name(self, scripture_name):
 
1
  from metadata import MetadataWhereClause
2
  from typing import List, Dict
3
 
4
+ from modules.scripture_helpers.kamba_ramayanam_helper import (
5
  get_list_of_kandams,
6
  get_list_of_padalams,
7
  get_list_of_padalams_in_tamil,
8
  )
9
+ from modules.scripture_helpers.vishnu_sahasranamam_helper import (
10
+ get_chapter_order_from_sahasranamam,
11
+ get_chapters_from_sahasranamam,
12
+ )
13
  import nalayiram_helper
14
 
15
 
 
36
  "label": "Page Number",
37
  "description": "Page number from the source",
38
  "show_as_filter": True,
39
+ "is_unique": True,
40
  },
41
  ],
42
  "pdf_path": "./data/vishnu_puranam.pdf",
 
77
  "label": "Page Number",
78
  "description": "Page number from the source",
79
  "show_as_filter": True,
80
+ "is_unique": True,
81
  },
82
  ],
83
  "pdf_path": "./data/shukla-yajur-veda.pdf",
 
115
  "label": "Page Number",
116
  "description": "Page number from the source",
117
  "show_as_filter": True,
118
+ "is_unique": True,
119
  },
120
  ],
121
  "pdf_path": "./data/bhagavat_gita.pdf",
 
159
  "label": "Page Number",
160
  "description": "Page number from the source",
161
  "show_as_filter": True,
162
+ "is_unique": True,
163
  },
164
  ],
165
  "pdf_path": "./data/valmiki_ramayanam.pdf",
 
192
  "collection_name": "vishnu_sahasranamam_openai",
193
  "collection_embedding_fn": "openai",
194
  "unit": "verse",
195
+ "chapter_order" : lambda: get_chapter_order_from_sahasranamam(),
196
  "field_mapping": {
197
  "text": "sanskrit",
198
  "title": lambda doc: f"Verse {doc.get('verse','')}",
 
200
  "transliteration": "transliteration",
201
  "word_by_word_native": "translation",
202
  "author": lambda doc: f"Sage Vyasa",
203
+ "chapter_name" : "chapter",
204
  "relative_path": lambda doc: f"Vishnu Sahasranamam-{doc.get("chapter","")}-{doc.get("verse","")}",
205
  },
206
  "metadata_fields": [
 
209
  "datatype": "str",
210
  "label": "Chapter Name",
211
  "description": "Name of the Chapter",
212
+ "show_as_filter": True,
213
+ "component": "dropdown",
214
+ "lov": lambda: get_chapters_from_sahasranamam(),
215
+ "is_unique": True,
216
  },
217
  {
218
  "name": "page_number",
 
225
  "label": "Lyrics in sanskrit",
226
  "datatype": "str",
227
  "description": "The original sloka in sanskrit.",
 
228
  },
229
  {
230
  "name": "translation",
231
  "label": "English Translation",
232
  "datatype": "str",
233
  "description": "The english translation.",
 
234
  },
235
  {
236
  "name": "transliteration",
237
  "label": "English Transliteration",
238
  "datatype": "str",
239
  "description": "The english transliteration.",
 
240
  },
241
  {
242
  "name": "verse",
243
+ "label": "Verse Number",
244
  "datatype": "int",
245
  "description": "The verse number of the sloka.",
246
  "show_as_filter": True,
247
+ "is_unique": True,
248
  },
249
  ],
250
  "pdf_path": "./data/vishnu_sahasranamam.pdf",
 
439
  "label": "Page Number",
440
  "description": "Page number from the source",
441
  "show_as_filter": True,
442
+ "is_unique": True,
443
  },
444
  ],
445
  "pdf_path": "./data/bhagavata_purana.pdf",
 
508
  "label": "Verse Number",
509
  "description": "Verse Number",
510
  "show_as_filter": True,
511
+ "is_unique": True,
512
  },
513
  ],
514
  "pdf_path": "./data/kamba_ramayanam.pdf",
 
552
  "datatype": "int",
553
  "description": "The index of the chunk",
554
  "show_as_filter": True,
555
+ "is_unique": True,
556
  },
557
  {
558
  "name": "filename",
 
603
  "datatype": "int",
604
  "description": "The index of the sloka or verse",
605
  "show_as_filter": True,
606
+ "is_unique": True,
607
  },
608
  {
609
  "name": "meaning_short",
 
647
  "datatype": "int",
648
  "description": "The index of the sloka or verse",
649
  "show_as_filter": True,
650
+ "is_unique": True,
651
  },
652
  {
653
  "name": "meaning_short",
 
703
  "datatype": "str",
704
  "description": "The video id as in YouTube",
705
  "show_as_filter": True,
706
+ "is_unique": True,
707
  },
708
  {
709
  "name": "video_title",
 
810
  Only allows keys from the allowed canonical fields list.
811
  """
812
  allowed_keys = {
813
+ "_global_index",
814
+ "id",
815
  "verse",
816
  "text",
817
  "title",
 
863
  canonical_doc["document"] = "-"
864
  verse = resolve_field(config.get("unit_field", config.get("unit")))
865
  canonical_doc["verse"] = int(verse) if verse else 0
866
+ canonical_doc["id"] = resolve_field("id")
867
+ canonical_doc["_global_index"] = resolve_field("_global_index")
868
  return canonical_doc
869
 
870
  def get_collection_name(self, scripture_name):
db.py CHANGED
@@ -1,4 +1,5 @@
1
- import json
 
2
  import random
3
  from typing import Literal
4
  import chromadb
@@ -51,6 +52,7 @@ class SanatanDatabase:
51
  )
52
  collection = self.chroma_client.get_or_create_collection(name=collection_name)
53
  data = collection.get(
 
54
  where=(
55
  metadata_where_clause.to_chroma_where()
56
  if metadata_where_clause is not None
@@ -74,6 +76,39 @@ class SanatanDatabase:
74
  metadatas=[metas[i] for i in indices],
75
  )
76
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77
  def search(
78
  self,
79
  collection_name: str,
@@ -112,7 +147,7 @@ class SanatanDatabase:
112
  n_results=n_results,
113
  )
114
 
115
- def fetch_document_by_index(self, collection_name: str, index: int, unit_name: str):
116
  """
117
  Fetch one document at a time from a ChromaDB collection using pagination (index = 0-based).
118
 
@@ -129,7 +164,7 @@ class SanatanDatabase:
129
  }
130
  Or a dict with "error" key if something went wrong.
131
  """
132
- logger.info("fetching %s #%d from [%s]", unit_name, index, collection_name)
133
  collection = self.chroma_client.get_or_create_collection(name=collection_name)
134
 
135
  try:
@@ -137,31 +172,27 @@ class SanatanDatabase:
137
  limit=1,
138
  # offset=index, # pagination via offset
139
  include=["metadatas", "documents"],
140
- where={"$or": [{unit_name: index}, {unit_name: str(index)}]},
141
  )
142
  except Exception as e:
143
- logger.error("Error fetching document: %s", e)
144
  return {"error": f"There was an error fetching the document: {str(e)}"}
145
 
146
  documents = response.get("documents", [])
147
  metadatas = response.get("metadatas", [])
 
148
 
149
  if documents:
150
  # merge document text with metadata
151
  result = {"document": documents[0]}
152
  if metadatas:
153
  result.update(metadatas[0])
154
- # print("raw data = ", result)
 
 
155
  return result
156
  else:
157
  print("No data available")
158
- if index == 1:
159
- # there should be atleast one row in the collection?
160
- # add index
161
- self.add_unit_index_to_collection(
162
- collection_name=collection_name, unit_field=unit_name
163
- )
164
-
165
  # show a sample data record
166
  response1 = collection.get(
167
  limit=2,
@@ -449,7 +480,10 @@ class SanatanDatabase:
449
  print(
450
  f"✅ Finished adding {unit_field} to {unit_counter-1} documents in {collection_name}."
451
  )
452
- def get_list_of_values(self, collection_name: str, metadata_field_name: str) -> list:
 
 
 
453
  """
454
  Returns the unique values for a given metadata field in a collection.
455
  """
@@ -477,3 +511,146 @@ class SanatanDatabase:
477
  values.add(md[metadata_field_name])
478
 
479
  return sorted(list(values))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
  import random
4
  from typing import Literal
5
  import chromadb
 
52
  )
53
  collection = self.chroma_client.get_or_create_collection(name=collection_name)
54
  data = collection.get(
55
+ include=["metadatas", "documents"],
56
  where=(
57
  metadata_where_clause.to_chroma_where()
58
  if metadata_where_clause is not None
 
76
  metadatas=[metas[i] for i in indices],
77
  )
78
 
79
+ def fetch_first_match(
80
+ self,
81
+ collection_name: str,
82
+ metadata_where_clause: MetadataWhereClause = None
83
+ ):
84
+ """This version is created to support the browse module"""
85
+ logger.info(
86
+ "getting first matching verses from [%s] | metadata_where_clause = %s",
87
+ collection_name,
88
+ metadata_where_clause,
89
+ )
90
+ collection = self.chroma_client.get_or_create_collection(name=collection_name)
91
+ data = collection.get(
92
+ limit=1, #hardcoded to 1 by design
93
+ include=["metadatas", "documents"],
94
+ where=(
95
+ metadata_where_clause.to_chroma_where()
96
+ if metadata_where_clause is not None
97
+ else None
98
+ )
99
+ )
100
+ docs = data["documents"] # list of all verse texts
101
+ ids = data["ids"]
102
+ metas = data["metadatas"]
103
+
104
+ if not docs:
105
+ logger.warning("No data found! - data=%s", data)
106
+ return chromadb.GetResult(ids=[], documents=[], metadatas=[])
107
+
108
+ # pick k random indices
109
+
110
+ return data
111
+
112
  def search(
113
  self,
114
  collection_name: str,
 
147
  n_results=n_results,
148
  )
149
 
150
+ def fetch_document_by_index(self, collection_name: str, index: int):
151
  """
152
  Fetch one document at a time from a ChromaDB collection using pagination (index = 0-based).
153
 
 
164
  }
165
  Or a dict with "error" key if something went wrong.
166
  """
167
+ logger.info("fetching index %d from [%s]", index, collection_name)
168
  collection = self.chroma_client.get_or_create_collection(name=collection_name)
169
 
170
  try:
 
172
  limit=1,
173
  # offset=index, # pagination via offset
174
  include=["metadatas", "documents"],
175
+ where={"_global_index": index},
176
  )
177
  except Exception as e:
178
+ logger.error("Error fetching document: %s", e, exc_info=True)
179
  return {"error": f"There was an error fetching the document: {str(e)}"}
180
 
181
  documents = response.get("documents", [])
182
  metadatas = response.get("metadatas", [])
183
+ ids = response.get("ids", [])
184
 
185
  if documents:
186
  # merge document text with metadata
187
  result = {"document": documents[0]}
188
  if metadatas:
189
  result.update(metadatas[0])
190
+ if ids:
191
+ result["id"] = ids[0]
192
+ print("raw data = ", result)
193
  return result
194
  else:
195
  print("No data available")
 
 
 
 
 
 
 
196
  # show a sample data record
197
  response1 = collection.get(
198
  limit=2,
 
480
  print(
481
  f"✅ Finished adding {unit_field} to {unit_counter-1} documents in {collection_name}."
482
  )
483
+
484
+ def get_list_of_values(
485
+ self, collection_name: str, metadata_field_name: str
486
+ ) -> list:
487
  """
488
  Returns the unique values for a given metadata field in a collection.
489
  """
 
511
  values.add(md[metadata_field_name])
512
 
513
  return sorted(list(values))
514
+
515
+ def build_global_index_for_all_scriptures(self, force: bool = False):
516
+ import pandas as pd
517
+ import numpy as np
518
+
519
+ logger.info("build_global_index_for_all_scriptures: started")
520
+ config = SanatanConfig()
521
+
522
+ for scripture in config.scriptures:
523
+ scripture_name = scripture["name"]
524
+ chapter_order = scripture.get("chapter_order", None)
525
+ # if scripture_name != "vishnu_sahasranamam":
526
+ # continue
527
+ logger.info(
528
+ "build_global_index_for_all_scriptures:%s: Processing", scripture_name
529
+ )
530
+ collection_name = scripture["collection_name"]
531
+ collection = self.chroma_client.get_or_create_collection(
532
+ name=collection_name
533
+ )
534
+ metadata_fields = scripture.get("metadata_fields", [])
535
+
536
+ # Get metadata field names marked as unique
537
+ unique_fields = [f["name"] for f in metadata_fields if f.get("is_unique")]
538
+ if not unique_fields:
539
+ if metadata_fields:
540
+ unique_fields = [metadata_fields[0]["name"]]
541
+ else:
542
+ logger.warning(
543
+ f"No metadata fields defined for {collection_name}, skipping"
544
+ )
545
+ continue
546
+
547
+ logger.info(
548
+ "build_global_index_for_all_scriptures:%s:unique fields: %s",
549
+ scripture_name,
550
+ unique_fields,
551
+ )
552
+
553
+ # Build chapter_order mapping if defined
554
+ chapter_order_mapping = {}
555
+ for field in metadata_fields:
556
+ if callable(chapter_order):
557
+ chapter_order_mapping = chapter_order()
558
+ logger.info(
559
+ "build_global_index_for_all_scriptures:%s:chapter_order_mapping: %s",
560
+ scripture_name,
561
+ chapter_order_mapping,
562
+ )
563
+
564
+ # Fetch all records (keep embeddings for upsert)
565
+ try:
566
+ results = collection.get(
567
+ include=["metadatas", "documents", "embeddings"]
568
+ )
569
+ except Exception as e:
570
+ logger.error(
571
+ "build_global_index_for_all_scriptures:%s Error getting data from chromadb",
572
+ scripture_name,
573
+ exc_info=True,
574
+ )
575
+ continue
576
+
577
+ ids = results["ids"]
578
+ metadatas = results["metadatas"]
579
+ documents = results["documents"]
580
+ embeddings = results.get("embeddings", [None] * len(ids))
581
+
582
+ if not force and metadatas and "_global_index" in metadatas[0]:
583
+ logger.warning(
584
+ "build_global_index_for_all_scriptures:%s: global index already available. skipping collection",
585
+ scripture_name,
586
+ )
587
+ continue
588
+
589
+ # Create a DataFrame for metadata sorting
590
+ df = pd.DataFrame(metadatas)
591
+ df["_id"] = ids
592
+ df["_doc"] = documents
593
+
594
+ # Add sortable columns for each unique field
595
+ for field_name in unique_fields:
596
+ if field_name.lower() == "chapter" and chapter_order_mapping:
597
+ # Map chapter names to their defined order
598
+ df["_sort_" + field_name] = (
599
+ df[field_name].map(chapter_order_mapping).fillna(np.inf)
600
+ )
601
+ else:
602
+ # Try numeric, fallback to string lowercase
603
+ def parse_val(v):
604
+ if v is None:
605
+ return float("inf")
606
+ if isinstance(v, int):
607
+ return v
608
+ if isinstance(v, str):
609
+ v = v.strip()
610
+ return int(v) if v.isdigit() else v.lower()
611
+ return str(v)
612
+
613
+ df["_sort_" + field_name] = df[field_name].apply(parse_val)
614
+
615
+ sort_cols = ["_sort_" + f for f in unique_fields]
616
+ df = df.sort_values(by=sort_cols, kind="stable").reset_index(drop=True)
617
+
618
+ # Assign global index
619
+ df["_global_index"] = range(1, len(df) + 1)
620
+
621
+ logger.info(
622
+ "build_global_index_for_all_scriptures:%s: updating database",
623
+ scripture_name,
624
+ )
625
+
626
+ # Batch upsert
627
+ BATCH_SIZE = 5000 # safely below max batch size
628
+ for i in range(0, len(df), BATCH_SIZE):
629
+ batch_df = df.iloc[i : i + BATCH_SIZE]
630
+ batch_ids = batch_df["_id"].tolist()
631
+ batch_docs = batch_df["_doc"].tolist()
632
+ batch_metas = [
633
+ {k: record[k] for k in metadatas[0].keys() if k in record}
634
+ | {"_global_index": record["_global_index"]}
635
+ for record in batch_df.to_dict(orient="records")
636
+ ]
637
+ # Use original metadata keys for upsert
638
+ batch_metas = [
639
+ {k: record[k] for k in metadatas[0].keys() if k in record}
640
+ | {"_global_index": record["_global_index"]}
641
+ for record in batch_df.to_dict(orient="records")
642
+ ]
643
+ batch_embeds = [embeddings[idx] for idx in batch_df.index]
644
+
645
+ collection.upsert(
646
+ ids=batch_ids,
647
+ documents=batch_docs,
648
+ metadatas=batch_metas,
649
+ embeddings=batch_embeds,
650
+ )
651
+
652
+ logger.info(
653
+ "build_global_index_for_all_scriptures:%s: ✅ Updated with %d records",
654
+ scripture_name,
655
+ len(df),
656
+ )
main.py CHANGED
@@ -1,6 +1,7 @@
1
  from fastapi.responses import RedirectResponse
2
  import uvicorn
3
  from fastapi import FastAPI
 
4
  from server import router as mobile_router
5
  from app import gradio_app # your Blocks object
6
  import gradio as gr
@@ -31,4 +32,4 @@ async def log_requests(request: Request, call_next):
31
  return response
32
 
33
  if __name__ == "__main__":
34
- uvicorn.run("main:app", host="0.0.0.0", port=7860, reload=True)
 
1
  from fastapi.responses import RedirectResponse
2
  import uvicorn
3
  from fastapi import FastAPI
4
+ from db import SanatanDatabase
5
  from server import router as mobile_router
6
  from app import gradio_app # your Blocks object
7
  import gradio as gr
 
32
  return response
33
 
34
  if __name__ == "__main__":
35
+ uvicorn.run("main:app", host="0.0.0.0", port=7860, reload=False)
modules/scripture_helpers/kamba_ramayanam_helper.py ADDED
@@ -0,0 +1,158 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def get_list_of_kandams():
2
+ return sorted([
3
+ "Yudha Kandam",
4
+ "Ayodhya Kandam",
5
+ "Kishkinda Kandam",
6
+ "Aranya Kandam",
7
+ "Sundara Kandam",
8
+ "Kamba Ramayanam- Bala Kandam",
9
+ "1Bala Kandam",
10
+ ])
11
+
12
+
13
+ def get_list_of_padalams():
14
+ return sorted([
15
+ "",
16
+ "The chapter on moon mountain",
17
+ "The chapter on rainy season",
18
+ "The chapter on river",
19
+ "The chapter on lake at Pambai",
20
+ "The chapter on death of Sarabanga",
21
+ "The chapter on floating in oil",
22
+ "The chapter on fire sacrifice",
23
+ "The chapter on handing over",
24
+ "The chapter on killing of Viradha",
25
+ "The chapter on seeing ganges",
26
+ "The chapter on Ahalya",
27
+ "The chapter on picturesque description",
28
+ "The chapter on the great marriage",
29
+ "The chapter on going away for search",
30
+ "The chapter on narration about family of Rama.",
31
+ "The chapter on dancing after drinking",
32
+ "The chapter on conspiracy of Kaikeyi",
33
+ "The chapter on divine incarnation",
34
+ "The chapter on friendship",
35
+ "The chapter on the country",
36
+ "The chapter on Kabandha",
37
+ "The chapter on governance",
38
+ "The chapter on Agasthya",
39
+ "The chapter on looks of MIthila",
40
+ ])
41
+
42
+ def get_list_of_padalams_in_tamil():
43
+ return sorted([
44
+ "",
45
+ "Vidai Kodu Padalam",
46
+ "Kakshi Padalam",
47
+ "Yethir kol Padalam",
48
+ "Soorpankai padalam",
49
+ "Kaarkala Padalam",
50
+ "Pini Veetu padalam",
51
+ "Thiru avathara Padalam",
52
+ "Choodamani padalam",
53
+ "Ezhuchi Padalam",
54
+ "Pambai Vaavi padalam",
55
+ "Poo Koi padalam",
56
+ "Ravanan Kalam Kaan Padalam",
57
+ "Dundhubhi padalam",
58
+ "Arasiyar Padalam",
59
+ "Varai kakshi padalam",
60
+ "Makara Kannan Vadhai padalam",
61
+ "Maya Sithai Padalam",
62
+ "Angadhan Dhoothu Padalam",
63
+ "Thadagai vadhai padalam",
64
+ "Chitrakoota Padalam",
65
+ "KItkindhai Padalam",
66
+ "Raman ther yerum Padalam",
67
+ "Ulaviyar Padalam",
68
+ "Meekshi Padalam",
69
+ "Nikumbalai Yaga Padalam",
70
+ "Mahendra Padalam",
71
+ "Kula murai kilathu Padalam",
72
+ "Kaliyattu Padalam",
73
+ "8,Jambu mali vadhai padalam",
74
+ "Nattu Padalam",
75
+ "Nagara Padalam",
76
+ "Moola Bala Vadhai padalam",
77
+ "Agathiya Padalam",
78
+ "Ahaligai padalam",
79
+ "Oor thedu padalam",
80
+ "Indirachithu vadhai padalam",
81
+ "Natpu Kot Padalam",
82
+ "10Ravanan Vanara thanai kaan Padalam",
83
+ "Padai Kakshi Padalam",
84
+ "Velvi Padalam",
85
+ ".22. Seethai Kalam Kan padalam",
86
+ "Nagar neengu padalam",
87
+ "Ravanan ther yeru padalam",
88
+ "Palli padai padalam",
89
+ "Anuma Padalam",
90
+ "Kharan Vadha Padalam",
91
+ "Vanam pugu Padalam",
92
+ "Kaikeyi choozhvinai Padalam",
93
+ "Kalan Kaan Padalam",
94
+ "Ayomukhi Padalam",
95
+ "Kumbhakarunan Vadhai Padalam",
96
+ "Ravananan Vadhai Padalam",
97
+ "Ani Vakuppu Padalam",
98
+ "Naada vita Padalam",
99
+ "Sampathi Padalam",
100
+ "Karmukha padalam",
101
+ "Sarabangan pirappu neengu padalam",
102
+ "Savari pirappu neengu padalam",
103
+ "Chandra Saila Padalam",
104
+ "Arasiyal padalam",
105
+ "Gangai Padalam",
106
+ "Thailam aatu Padalam",
107
+ "Hiranyan Vadhai Padalam",
108
+ "Ravanana Mandira Padalam",
109
+ "8,Otthu Kelvi Padalam",
110
+ "Ilangai kaan Padalam",
111
+ "Athikayan vadhai Padalam",
112
+ "Ravanan choozhchi Padalam",
113
+ "Mandhira Padalam",
114
+ "Maya Janaka Padalam",
115
+ "Mudal por puri Padalam",
116
+ "Kolam kaan padalam",
117
+ "Varunanai vazhi vendu padalam",
118
+ "Parasurama Padalam",
119
+ "Vel Yetha Padalam",
120
+ "Mareechan vadhai padalam",
121
+ "Naga Pasa Padalam",
122
+ "Mithilai Kakshi Padalam",
123
+ "PIlam pugu padalam",
124
+ "Thirumudi chootu Padalam",
125
+ "Undattu Padalam",
126
+ "Kadal kaan Padalam",
127
+ "23,Maruthu malai Padalam",
128
+ "Sethu Bandhana Padalam",
129
+ "Ilankai eriyuttu padalam",
130
+ "4,Veedanan adaikkala Padalam",
131
+ "Vali Vadhai Padalam",
132
+ "AAthu Padalam",
133
+ "Neer Vilayattu Padalam",
134
+ "Padai thalaivar Vadhai Padalam",
135
+ "Kinkarar vadhai padalam",
136
+ "Kavandhan Padalam",
137
+ "Pozhil irutha Padalam",
138
+ "Chadayu kaan Padalam",
139
+ "Kayadai padalam",
140
+ "Ilangai kelvi Padalam",
141
+ "Dandakaranya Padalam",
142
+ "11,Maguda Banga Padalam",
143
+ "AAru chel padalam",
144
+ "Kadal thavu Padalam",
145
+ "Brahmathira Padalam",
146
+ "Gangai kaan padalam",
147
+ "Ravanan Soka Padalam",
148
+ "Guha Padalam",
149
+ "Soorpanakai Choozhchi padalam",
150
+ "Katimana Padalam",
151
+ "Maramara Padalam",
152
+ "32,Vanarar kalam kaan Padalam",
153
+ "14,Thiruvadi thozhutha Padalam",
154
+ "Thiruvadi Chootu Padalam",
155
+ "Aaru chel padalam",
156
+ "Viradhan Vadhai padalam",
157
+ "Jatayu uyir neetha padalam",
158
+ ])
modules/scripture_helpers/vishnu_sahasranamam_helper.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def get_chapters_from_sahasranamam():
2
+ return sorted(
3
+ ["MEDITATION ON VISHNU", "ATHA DHYĀNAM", "STOTRAM — 1000 NAMES", "PHALASHRUTHI"]
4
+ )
5
+
6
+
7
+ def get_chapter_order_from_sahasranamam():
8
+ return {
9
+ "MEDITATION ON VISHNU": 1,
10
+ "ATHA DHYĀNAM": 2,
11
+ "STOTRAM — 1000 NAMES": 3,
12
+ "PHALASHRUTHI": 4,
13
+ }
server.py CHANGED
@@ -15,6 +15,11 @@ from metadata import MetadataWhereClause
15
  from modules.quiz.answer_validator import validate_answer
16
  from modules.quiz.models import Question
17
  from modules.quiz.quiz_helper import generate_question
 
 
 
 
 
18
 
19
  router = APIRouter()
20
 
@@ -201,8 +206,9 @@ async def get_scripture(req: ScriptureRequest):
201
  """
202
  Return a scripture unit (page or verse, based on config),
203
  including all metadata fields separately.
 
204
  """
205
- print("received request to fetch scripture.", req)
206
 
207
  # find config entry for the scripture
208
  config = next(
@@ -215,10 +221,10 @@ async def get_scripture(req: ScriptureRequest):
215
  raw_doc = SanatanDatabase().fetch_document_by_index(
216
  collection_name=config["collection_name"],
217
  index=req.unit_index,
218
- unit_name=config.get("unit_field", config.get("unit")),
219
  )
220
 
221
- if not raw_doc or isinstance(raw_doc, str):
222
  return {"error": f"No data available for unit {req.unit_index}"}
223
 
224
  # canonicalize it
@@ -229,7 +235,6 @@ async def get_scripture(req: ScriptureRequest):
229
  )
230
 
231
  # add unit index & total units (so Flutter can paginate)
232
- canonical_doc["unit_index"] = req.unit_index
233
  canonical_doc["total"] = SanatanDatabase().count(config["collection_name"])
234
 
235
  print("canonical_doc = ", canonical_doc)
@@ -275,7 +280,6 @@ async def get_scripture_configs():
275
  async def search_scripture(
276
  scripture_name: str,
277
  filter_obj: Optional[MetadataWhereClause] = None,
278
- n_results: int = 1,
279
  ):
280
  """
281
  Search scripture collection with optional filters.
@@ -285,17 +289,23 @@ async def search_scripture(
285
  """
286
  try:
287
  db = SanatanDatabase()
288
- results = db.fetch_random_data(
289
- collection_name=scripture_name,
 
 
 
 
290
  metadata_where_clause=filter_obj,
291
- n_results=n_results,
292
  )
293
 
294
  print("results = ", results)
295
  # Flatten + canonicalize results
296
  formatted_results = []
297
  for i in range(len(results["metadatas"])):
 
298
  metadata_doc = results["metadatas"][i]
 
 
299
  document_text = (
300
  results["documents"][i] if results.get("documents") else None
301
  )
@@ -305,7 +315,7 @@ async def search_scripture(
305
  )
306
  formatted_results.append(canonical_doc)
307
 
308
- # print("formatted_results = ", formatted_results)
309
  return {"results": formatted_results}
310
 
311
  except Exception as e:
 
15
  from modules.quiz.answer_validator import validate_answer
16
  from modules.quiz.models import Question
17
  from modules.quiz.quiz_helper import generate_question
18
+ import logging
19
+
20
+ logging.basicConfig()
21
+ logger = logging.getLogger(__name__)
22
+ logger.setLevel(logging.INFO)
23
 
24
  router = APIRouter()
25
 
 
206
  """
207
  Return a scripture unit (page or verse, based on config),
208
  including all metadata fields separately.
209
+ used for page view to fetch by global index.
210
  """
211
+ logger.info("get_scripture: received request to fetch scripture: %s", req)
212
 
213
  # find config entry for the scripture
214
  config = next(
 
221
  raw_doc = SanatanDatabase().fetch_document_by_index(
222
  collection_name=config["collection_name"],
223
  index=req.unit_index,
224
+ # unit_name=config.get("unit_field", config.get("unit")),
225
  )
226
 
227
+ if not raw_doc or isinstance(raw_doc, str) or "error" in raw_doc:
228
  return {"error": f"No data available for unit {req.unit_index}"}
229
 
230
  # canonicalize it
 
235
  )
236
 
237
  # add unit index & total units (so Flutter can paginate)
 
238
  canonical_doc["total"] = SanatanDatabase().count(config["collection_name"])
239
 
240
  print("canonical_doc = ", canonical_doc)
 
280
  async def search_scripture(
281
  scripture_name: str,
282
  filter_obj: Optional[MetadataWhereClause] = None,
 
283
  ):
284
  """
285
  Search scripture collection with optional filters.
 
289
  """
290
  try:
291
  db = SanatanDatabase()
292
+ config = next(
293
+ (s for s in SanatanConfig().scriptures if s["name"] == scripture_name), None
294
+ )
295
+
296
+ results = db.fetch_first_match(
297
+ collection_name=config["collection_name"],
298
  metadata_where_clause=filter_obj,
 
299
  )
300
 
301
  print("results = ", results)
302
  # Flatten + canonicalize results
303
  formatted_results = []
304
  for i in range(len(results["metadatas"])):
305
+ id = results["ids"][i]
306
  metadata_doc = results["metadatas"][i]
307
+ metadata_doc["id"] = id
308
+ print("metadata_doc = ",metadata_doc)
309
  document_text = (
310
  results["documents"][i] if results.get("documents") else None
311
  )
 
315
  )
316
  formatted_results.append(canonical_doc)
317
 
318
+ print("formatted_results = ", formatted_results)
319
  return {"results": formatted_results}
320
 
321
  except Exception as e:
tests/test_db.py CHANGED
@@ -36,7 +36,7 @@ if __name__ == "__main__":
36
  ),
37
  ]
38
  )
39
- response = database.search_by_metadata(
40
  collection_name=collection_name,
41
  query=query,
42
  metadata_where_clause=metadata_where_clause,
 
36
  ),
37
  ]
38
  )
39
+ response = database.search(
40
  collection_name=collection_name,
41
  query=query,
42
  metadata_where_clause=metadata_where_clause,
tests/test_gen_global_index.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import logging
3
+
4
+ from db import SanatanDatabase
5
+ from metadata import MetadataFilter, MetadataWhereClause
6
+
7
+
8
+ if __name__ == "__main__":
9
+ logging.basicConfig()
10
+ collection_name = "divya_prabandham"
11
+ database = SanatanDatabase()
12
+ database.build_global_index_for_all_scriptures(force=True)