Spaces:

vikramvasudevan
/

sanatan_ai

Running on CPU Upgrade

vikramvasudevan commited on Sep 18

Commit

7b420fa

verified ·

1 Parent(s): 7b33394

Upload folder using huggingface_hub

Files changed (4) hide show

data/azhwars.json CHANGED Viewed

@@ -42,7 +42,7 @@
     [
       "PAT",
       "Periyaazhwar",
-      "periyazvar thirumozhi"
     ],
     [
       "TP",
@@ -57,7 +57,7 @@
     [
       "MLT",
       "Poigaiazhwar",
-      "muthal thiruvanthathi"
     ],
     [
       "IT",

     [
       "PAT",
       "Periyaazhwar",
+      "Periyazvar Thirumozhi"
     ],
     [
       "TP",
     [
       "MLT",
       "Poigaiazhwar",
+      "Muthal Thiruvanthathi"
     ],
     [
       "IT",

db.py CHANGED Viewed

@@ -242,7 +242,7 @@ class SanatanDatabase:
                     include=["metadatas", "documents", "distances"],
                 )
         except Exception as e:
-            logger.error("Error in search: %s", e)
             return chromadb.QueryResult(
                 documents=[],
                 ids=[],
@@ -642,11 +642,11 @@ class SanatanDatabase:
                 ]
                 batch_embeds = [embeddings[idx] for idx in batch_df.index]
-                collection.upsert(
                     ids=batch_ids,
-                    documents=batch_docs,
                     metadatas=batch_metas,
-                    embeddings=batch_embeds,
                 )
             logger.info(

                     include=["metadatas", "documents", "distances"],
                 )
         except Exception as e:
+            logger.error("Error in search: %s", e, exc_info=True)
             return chromadb.QueryResult(
                 documents=[],
                 ids=[],
                 ]
                 batch_embeds = [embeddings[idx] for idx in batch_df.index]
+                collection.update(
                     ids=batch_ids,
+                    # documents=batch_docs,
                     metadatas=batch_metas,
+                    # embeddings=batch_embeds,
                 )
             logger.info(

nalayiram_helper.py CHANGED Viewed

@@ -10,12 +10,17 @@ class Pasuram:
 def get_standardized_prabandham_names() -> list[Pasuram]:
     """
-    Get a list of prabandham names along with the azhwars who authored them in divya_prabandham
     """
     with open("./data/azhwars.json", "r", encoding="utf-8") as f:
         azhwars = json.load(f)
         header = azhwars[0]
         rows = azhwars[1:]
         final_azhwars = [Pasuram(**dict(zip(header, row))) for row in rows]
     return final_azhwars

 def get_standardized_prabandham_names() -> list[Pasuram]:
     """
+    Get a list of prabandham names along with the azhwars who authored them in divya_prabandham,
+    sorted by the prabandham name (3rd field, index 2).
     """
     with open("./data/azhwars.json", "r", encoding="utf-8") as f:
         azhwars = json.load(f)
         header = azhwars[0]
         rows = azhwars[1:]
+        # Sort by 3rd field (index 2)
+        rows.sort(key=lambda row: row[2])
         final_azhwars = [Pasuram(**dict(zip(header, row))) for row in rows]
     return final_azhwars

tests/test_list_global_index.py ADDED Viewed

+import json
+import logging
+from db import SanatanDatabase
+from metadata import MetadataFilter, MetadataWhereClause
+if __name__ == "__main__":
+    logging.basicConfig()
+    collection_name = "divya_prabandham"
+    database = SanatanDatabase()
+    result = database.chroma_client.get_collection(collection_name).get(include=["metadatas","documents"],where={"verse" : 21})
+    for index, metadata in enumerate(result["metadatas"]):
+        global_index = metadata.get("_global_index",-1)
+        verse = metadata["verse"]
+        print(f"global_index: {global_index} | verse: {verse} | metadata = {metadata}")