Spaces:

vikramvasudevan
/

sanatan_ai

Running on CPU Upgrade

App Files Files Community

vikramvasudevan commited on Aug 6

Commit

4646386

verified ·

1 Parent(s): 7dfb891

Upload folder using huggingface_hub

Browse files

Files changed (2) hide show

app.py +148 -122
db.py +12 -3

app.py CHANGED Viewed

@@ -1,14 +1,20 @@
 import os
 import random
-import time
 import gradio as gr
 from config import SanatanConfig
 from drive_downloader import ZipDownloader
 from graph_helper import generate_graph
-import uuid
-import logging
-from dotenv import load_dotenv
 logging.basicConfig()
 logger = logging.getLogger()
 logger.setLevel(logging.INFO)
@@ -18,42 +24,81 @@ graph = generate_graph()
 def init():
     load_dotenv(override=True)
-    # if(os.path.isdir("./chromadb-store")):
-    #     logger.warning("database exists locally. not downloading.")
-    #     return
-    downloader = ZipDownloader(
-        service_account_json=os.getenv("GOOGLE_SERVICE_ACCOUNT_JSON")
-    )
-    # 1. Download zip file
-    zip_path = downloader.download_zip_from_drive(
-        file_id=os.getenv("CHROMADB_FILE_ID"),
-        output_path=SanatanConfig.dbStorePath,
-    )
-    # 2. Extract it
-    downloader.unzip(zip_path, extract_to="./")
 def init_session():
-    # Generate a unique ID per browser session
     return str(uuid.uuid4())
 async def chat_wrapper(message, history, thread_id, debug):
     if debug:
-        # Yield each message as it's generated
         async for chunk in chat_streaming(message, history, thread_id):
             yield chunk
     else:
-        # Yield full response once
         response = chat(message, history, thread_id)
         yield response
 def chat(message, history, thread_id):
-    print("received chat message for thread:", thread_id)
     config = {"configurable": {"thread_id": thread_id}}
     response = graph.invoke(
         {"messages": [{"role": "user", "content": message}]}, config=config
@@ -61,107 +106,74 @@ def chat(message, history, thread_id):
     return response["messages"][-1].content
-import uuid
-import asyncio
-from html import escape
 async def chat_streaming(message, history, thread_id):
     state = {"messages": (history or []) + [{"role": "user", "content": message}]}
     config = {"configurable": {"thread_id": thread_id}}
-    buffered_final = None
-    step_counter = 0
-    thinking_verbs = [
-        "thinking",
-        "processing",
-        "crunching data",
-        "please wait",
-        "just a few more seconds",
-        "closing in",
-        "analyzing",
-        "reasoning",
-        "computing",
-        "synthesizing insight",
-        "searching through the cosmos",
-        "decoding ancient knowledge",
-        "scanning the scriptures",
-        "accessing divine memory",
-        "gathering wisdom",
-        "consulting the rishis",
-        "listening to the ātmā",
-        "channeling sacred energy",
-        "unfolding the divine word",
-        "meditating on the meaning",
-        "reciting from memory",
-        "traversing the Vedas",
-        "seeking the inner light",
-        "invoking paramārtha",
-        "putting it all together",
-        "digging deeper",
-        "making sense of it",
-        "connecting the dots",
-        "almost there",
-        "getting closer",
-        "wrapping it up",
-        "piecing it together",
-        "swirling through verses",
-        "diving into the ocean of knowledge",
-        "lighting the lamp of understanding",
-        "walking the path of inquiry",
-        "aligning stars of context",
-    ]
-    async for step in graph.astream(state, config=config):
-        for node_name, node_output in step.items():
-            step_counter += 1
-            messages = node_output.get("messages", [])
-            if not messages:
-                continue
-            last_message = messages[-1]
-            content = getattr(last_message, "content", "") or ""
-            full = escape(content)
             truncated = (full[:300] + "…") if len(full) > 300 else full
-            buffered_final = full  # Save latest response in case it's the final one
-            # Unique ID for each step to help frontend match animation + content
-            thinking_id = str(uuid.uuid4())
-            # Yield animated thinking bubble BEFORE actual message
-            yield f"<div id='{thinking_id}'><em>💭 {random.choice(thinking_verbs)}...</em></div>"
-            await asyncio.sleep(0.5)  # let the thinking bubble show up slightly before
-            # Yield actual message in transparent font with node name
-            # Instead of two yields: one for thinking, one for message
-            # Combine both in one yield so frontend shows them as one stream block
-            html = (
-                f"<div><em>💭 {random.choice(thinking_verbs)} ...</em></div>"
-                f"<div style='opacity: 0.5' title='{full}'>"
-                f"<strong>[{node_name}]</strong> {truncated or '...'}"
                 f"</div>"
             )
-            yield html
-    # ✅ Final answer: normal font, no node label
-    if buffered_final:
-        temp = ""
-        for char in buffered_final:
-            temp = temp + char
-            await asyncio.sleep(0.001)  # ✅ non-blocking!
-            yield temp
 thread_id = gr.State(init_session)
 supported_scriptures = "\n - ".join(
     [
         f"📖 **{scripture['title']}** [source]({scripture['source']})"
         for scripture in SanatanConfig.scriptures
     ]
 )
 intro_messages = [
     {
         "role": "assistant",
@@ -170,19 +182,20 @@ intro_messages = [
     {
         "role": "assistant",
         "content": """
-    #### Example questions you can ask:
-      - How is the form of Vishnu described across the scriptures?
-      - What teachings did Krishna share in the Gita?
-      - How did Arjuna respond upon witnessing Krishna’s Vishwarupa?
-      - What are some names of Vishnu from the Sahasranamam?
-      - Give me a pasuram by Andal
-      - explain sakrudeva prapannaaya shlokam in ramayana
-      - give the shlokam in ramayanam that vibheeshana uses to perform sharanagathi to rama, give the sanskrit shlokam and its meaning
-      - give one name in vishnu sahasranamam related to "cause"
-      - explain name of vishnu in sahasranamam related to wheel
-      """,
     },
 ]
 chatbot = gr.Chatbot(
     elem_id="chatbot",
     avatar_images=("assets/avatar_user.png", "assets/adiyen_bot.png"),
@@ -202,9 +215,22 @@ chatInterface = gr.ChatInterface(
     fn=chat_wrapper,
     additional_inputs=[thread_id, debug_checkbox],
     chatbot=chatbot,
 )
-# initializze database
 init()
 chatInterface.launch()

 import os
 import random
+import asyncio
+import logging
+import uuid
+from html import escape
 import gradio as gr
+from dotenv import load_dotenv
+from langchain_core.messages.ai import AIMessage, AIMessageChunk
+from langchain_core.messages.tool import ToolMessage
 from config import SanatanConfig
+from db import SanatanDatabase
 from drive_downloader import ZipDownloader
 from graph_helper import generate_graph
+# Logging
 logging.basicConfig()
 logger = logging.getLogger()
 logger.setLevel(logging.INFO)
 def init():
     load_dotenv(override=True)
+    try:
+        SanatanDatabase().test_sanity()
+    except Exception as e:
+        logger.warning("Sanity Test Failed - %s", e)
+        logger.info("Downloading database ...")
+        downloader = ZipDownloader(
+            service_account_json=os.getenv("GOOGLE_SERVICE_ACCOUNT_JSON")
+        )
+        zip_path = downloader.download_zip_from_drive(
+            file_id=os.getenv("CHROMADB_FILE_ID"),
+            output_path=SanatanConfig.dbStorePath,
+        )
+        downloader.unzip(zip_path, extract_to="./")
 def init_session():
     return str(uuid.uuid4())
+def render_message_with_tooltip(content: str, max_chars=200):
+    short = escape(content[:max_chars]) + ("…" if len(content) > max_chars else "")
+    return f"<div title='{escape(content)}'>{short}</div>"
+thinking_verbs = [
+    "thinking",
+    "processing",
+    "crunching data",
+    "please wait",
+    "just a few more seconds",
+    "closing in",
+    "analyzing",
+    "reasoning",
+    "computing",
+    "synthesizing insight",
+    "searching through the cosmos",
+    "decoding ancient knowledge",
+    "scanning the scriptures",
+    "accessing divine memory",
+    "gathering wisdom",
+    "consulting the rishis",
+    "listening to the ātmā",
+    "channeling sacred energy",
+    "unfolding the divine word",
+    "meditating on the meaning",
+    "reciting from memory",
+    "traversing the Vedas",
+    "seeking the inner light",
+    "invoking paramārtha",
+    "putting it all together",
+    "digging deeper",
+    "making sense of it",
+    "connecting the dots",
+    "almost there",
+    "getting closer",
+    "wrapping it up",
+    "piecing it together",
+    "swirling through verses",
+    "diving into the ocean of knowledge",
+    "lighting the lamp of understanding",
+    "walking the path of inquiry",
+    "aligning stars of context",
+]
 async def chat_wrapper(message, history, thread_id, debug):
     if debug:
         async for chunk in chat_streaming(message, history, thread_id):
             yield chunk
     else:
         response = chat(message, history, thread_id)
         yield response
 def chat(message, history, thread_id):
     config = {"configurable": {"thread_id": thread_id}}
     response = graph.invoke(
         {"messages": [{"role": "user", "content": message}]}, config=config
     return response["messages"][-1].content
 async def chat_streaming(message, history, thread_id):
     state = {"messages": (history or []) + [{"role": "user", "content": message}]}
     config = {"configurable": {"thread_id": thread_id}}
+    streamed_response = ""
+    try:
+        async for msg, metadata in graph.astream(
+            state, config=config, stream_mode="messages"
+        ):
+            node = metadata.get("langgraph_node", "?")
+            name = getattr(msg, "name", "-")
+            full: str = escape(msg.content)
             truncated = (full[:300] + "…") if len(full) > 300 else full
+            processing_message = (
+                f"<div style='opacity: 0.1' title='{full}'>"
+                f"<div class='thinking-bubble'><em>🤔{random.choice(thinking_verbs)} ...</em></div>"
+                f"<span>{node}:{name or ''}:</span>"
+                f"<strong>Looking for : [{message}]</strong> {truncated or '...'}"
                 f"</div>"
             )
+            # yield processing_message
+            if isinstance(msg, ToolMessage):
+                html = (
+                    f"<div class='thinking-bubble'><em>🤔{name} : {random.choice(thinking_verbs)} ...</em></div>"
+                    f"<div style='opacity: 0.5'>"
+                    f"<strong>Looking for : [{message}]</strong> {truncated or '...'}"
+                    f"</div>"
+                )
+                yield html
+                # yield f"""
+                # <div class='thinking-bubble'>🤔 {random.choice(thinking_verbs)}...</div>
+                # <div style='opacity: 0.5'><strong>[{node} - {name}]</strong>: {escape(msg.content)}</div>"""
+            elif isinstance(msg, AIMessageChunk):
+                if not msg.content:
+                    # logger.warning("*** No Message Chunk!")
+                    yield processing_message
+                else:
+                    streamed_response += msg.content
+                    yield streamed_response
+            else:
+                logger.debug("message = ", type(msg), msg.content[:100])
+                full: str = escape(msg.content)
+                truncated = (full[:300] + "…") if len(full) > 300 else full
+                html = (
+                    f"<div class='thinking-bubble'><em>🤔 {random.choice(thinking_verbs)} ...</em></div>"
+                    f"<div style='opacity: 0.1'>"
+                    f"<strong>Telling myself:</strong> {truncated or '...'}"
+                    f"</div>"
+                )
+                yield html
+        yield streamed_response
+    except Exception as e:
+        yield f"Error processing request {str(e)}"
+# UI Elements
 thread_id = gr.State(init_session)
 supported_scriptures = "\n - ".join(
     [
         f"📖 **{scripture['title']}** [source]({scripture['source']})"
         for scripture in SanatanConfig.scriptures
     ]
 )
 intro_messages = [
     {
         "role": "assistant",
     {
         "role": "assistant",
         "content": """
+#### Example questions you can ask:
+  - How is the form of Vishnu described across the scriptures?
+  - What teachings did Krishna share in the Gita?
+  - How did Arjuna respond upon witnessing Krishna’s Vishwarupa?
+  - What are some names of Vishnu from the Sahasranamam?
+  - Give me a pasuram by Andal
+  - explain sakrudeva prapannaaya shlokam in ramayana
+  - give the shlokam in ramayanam that vibheeshana uses to perform sharanagathi to rama, give the sanskrit shlokam and its meaning
+  - give one name in vishnu sahasranamam related to "cause"
+  - explain name of vishnu in sahasranamam related to wheel
+    """,
     },
 ]
 chatbot = gr.Chatbot(
     elem_id="chatbot",
     avatar_images=("assets/avatar_user.png", "assets/adiyen_bot.png"),
     fn=chat_wrapper,
     additional_inputs=[thread_id, debug_checkbox],
     chatbot=chatbot,
+    css="""
+<style>
+.thinking-bubble {
+  opacity: 0.5;
+  font-style: italic;
+  animation: pulse 1.5s infinite;
+  margin-bottom: 5px;
+}
+@keyframes pulse {
+  0% { opacity: 0.3; }
+  50% { opacity: 1; }
+  100% { opacity: 0.3; }
+}
+</style>
+""",
 )
 init()
 chatInterface.launch()

db.py CHANGED Viewed

@@ -43,7 +43,9 @@ class SanatanDatabase:
         collection_name: str,
         query: str,
         metadata_field: str,
-        metadata_search_operator: Literal["$in", "$eq", "$gt", "$gte", "$lt", "$lte", "$ne", "$nin"],
         metadata_value: str,
         n_results=2,
     ):
@@ -59,9 +61,16 @@ class SanatanDatabase:
         return response
     def count(self, collection_name: str):
-        logger.info("Getting total records in [%s]", collection_name)
         collection = self.chroma_client.get_or_create_collection(name=collection_name)
-        return collection.count()
 if __name__ == "__main__":

         collection_name: str,
         query: str,
         metadata_field: str,
+        metadata_search_operator: Literal[
+            "$in", "$eq", "$gt", "$gte", "$lt", "$lte", "$ne", "$nin"
+        ],
         metadata_value: str,
         n_results=2,
     ):
         return response
     def count(self, collection_name: str):
         collection = self.chroma_client.get_or_create_collection(name=collection_name)
+        total_count = collection.count()
+        logger.info("Total records in [%s] = %d", collection_name, total_count)
+        return total_count
+    def test_sanity(self):
+        for scripture in SanatanConfig().scriptures:
+            count = self.count(collection_name=scripture["collection_name"])
+            if count == 0:
+                raise Exception(f"No data in collection {scripture["collection_name"]}")
 if __name__ == "__main__":