vikramvasudevan commited on
Commit
7b420fa
·
verified ·
1 Parent(s): 7b33394

Upload folder using huggingface_hub

Browse files
data/azhwars.json CHANGED
@@ -42,7 +42,7 @@
42
  [
43
  "PAT",
44
  "Periyaazhwar",
45
- "periyazvar thirumozhi"
46
  ],
47
  [
48
  "TP",
@@ -57,7 +57,7 @@
57
  [
58
  "MLT",
59
  "Poigaiazhwar",
60
- "muthal thiruvanthathi"
61
  ],
62
  [
63
  "IT",
 
42
  [
43
  "PAT",
44
  "Periyaazhwar",
45
+ "Periyazvar Thirumozhi"
46
  ],
47
  [
48
  "TP",
 
57
  [
58
  "MLT",
59
  "Poigaiazhwar",
60
+ "Muthal Thiruvanthathi"
61
  ],
62
  [
63
  "IT",
db.py CHANGED
@@ -242,7 +242,7 @@ class SanatanDatabase:
242
  include=["metadatas", "documents", "distances"],
243
  )
244
  except Exception as e:
245
- logger.error("Error in search: %s", e)
246
  return chromadb.QueryResult(
247
  documents=[],
248
  ids=[],
@@ -642,11 +642,11 @@ class SanatanDatabase:
642
  ]
643
  batch_embeds = [embeddings[idx] for idx in batch_df.index]
644
 
645
- collection.upsert(
646
  ids=batch_ids,
647
- documents=batch_docs,
648
  metadatas=batch_metas,
649
- embeddings=batch_embeds,
650
  )
651
 
652
  logger.info(
 
242
  include=["metadatas", "documents", "distances"],
243
  )
244
  except Exception as e:
245
+ logger.error("Error in search: %s", e, exc_info=True)
246
  return chromadb.QueryResult(
247
  documents=[],
248
  ids=[],
 
642
  ]
643
  batch_embeds = [embeddings[idx] for idx in batch_df.index]
644
 
645
+ collection.update(
646
  ids=batch_ids,
647
+ # documents=batch_docs,
648
  metadatas=batch_metas,
649
+ # embeddings=batch_embeds,
650
  )
651
 
652
  logger.info(
nalayiram_helper.py CHANGED
@@ -10,12 +10,17 @@ class Pasuram:
10
 
11
  def get_standardized_prabandham_names() -> list[Pasuram]:
12
  """
13
- Get a list of prabandham names along with the azhwars who authored them in divya_prabandham
 
14
  """
15
  with open("./data/azhwars.json", "r", encoding="utf-8") as f:
16
  azhwars = json.load(f)
17
  header = azhwars[0]
18
  rows = azhwars[1:]
 
 
 
 
19
  final_azhwars = [Pasuram(**dict(zip(header, row))) for row in rows]
20
 
21
  return final_azhwars
 
10
 
11
  def get_standardized_prabandham_names() -> list[Pasuram]:
12
  """
13
+ Get a list of prabandham names along with the azhwars who authored them in divya_prabandham,
14
+ sorted by the prabandham name (3rd field, index 2).
15
  """
16
  with open("./data/azhwars.json", "r", encoding="utf-8") as f:
17
  azhwars = json.load(f)
18
  header = azhwars[0]
19
  rows = azhwars[1:]
20
+
21
+ # Sort by 3rd field (index 2)
22
+ rows.sort(key=lambda row: row[2])
23
+
24
  final_azhwars = [Pasuram(**dict(zip(header, row))) for row in rows]
25
 
26
  return final_azhwars
tests/test_list_global_index.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import logging
3
+
4
+ from db import SanatanDatabase
5
+ from metadata import MetadataFilter, MetadataWhereClause
6
+
7
+
8
+ if __name__ == "__main__":
9
+ logging.basicConfig()
10
+ collection_name = "divya_prabandham"
11
+ database = SanatanDatabase()
12
+ result = database.chroma_client.get_collection(collection_name).get(include=["metadatas","documents"],where={"verse" : 21})
13
+
14
+ for index, metadata in enumerate(result["metadatas"]):
15
+ global_index = metadata.get("_global_index",-1)
16
+ verse = metadata["verse"]
17
+ print(f"global_index: {global_index} | verse: {verse} | metadata = {metadata}")