Spaces:

vikramvasudevan
/

sanatan_ai

Running on CPU Upgrade

App Files Files Community

vikramvasudevan commited on Aug 30

Commit

a88708b

verified ·

1 Parent(s): f5fe593

Upload folder using huggingface_hub

Browse files

Files changed (2) hide show

embeddings.py +11 -6
graph_helper.py +17 -4

embeddings.py CHANGED Viewed

@@ -51,14 +51,19 @@ def _get_openai_embedding(texts: list[str]) -> list[list[float]]:
     return final_embeddings
 def get_embedding(texts: list[str], backend: Literal["hf","openai"] = "hf") -> list[list[float]]:
-    """
-    Get embeddings for a list of texts.
-    backend = "openai" or "hf"
-    """
     if backend == "hf":
-        return _get_hf_embedding(texts)
-    return _get_openai_embedding(texts)
 # -------------------------------
 # Example

     return final_embeddings
+embedding_cache = {}
 def get_embedding(texts: list[str], backend: Literal["hf","openai"] = "hf") -> list[list[float]]:
+    key = (backend, tuple(texts))  # tuple is hashable
+    if key in embedding_cache:
+        return embedding_cache[key]
     if backend == "hf":
+        embedding_cache[key] = _get_hf_embedding(texts)
+    else:
+        embedding_cache[key] = _get_openai_embedding(texts)
+    return embedding_cache[key]
 # -------------------------------
 # Example

graph_helper.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import json
 from typing import Annotated, TypedDict
 from langgraph.graph import StateGraph, START, END
 from langgraph.checkpoint.memory import MemorySaver
 from langgraph.graph.message import add_messages
@@ -45,8 +46,10 @@ def branching_condition(state: ChatState) -> str:
     else:
         return check_debug_condition(state)
 from typing import List, Dict
 def truncate_messages_for_token_limit(messages, max_tokens=50000):
     """
     messages: list of dicts or LangChain messages
@@ -65,7 +68,10 @@ def truncate_messages_for_token_limit(messages, max_tokens=50000):
         for call in tool_calls:
             # include corresponding tool messages
             for m in messages:
-                if getattr(m, "additional_kwargs", {}).get("tool_call_id") == call["id"]:
                     group.append(m)
         group_tokens = sum(len(getattr(m, "content", "")) // 4 for m in group)
@@ -78,6 +84,7 @@ def truncate_messages_for_token_limit(messages, max_tokens=50000):
     return result
 def generate_graph() -> CompiledStateGraph:
     memory = MemorySaver()
     tools = [
@@ -91,12 +98,18 @@ def generate_graph() -> CompiledStateGraph:
         tool_search_db_by_metadata,
         tool_search_db_for_literal,
     ]
-    llm = ChatOpenAI(model="gpt-4o-mini", temperature=0.2).bind_tools(tools)
-    llm_without_tools = ChatOpenAI(model="gpt-4o-mini", temperature=0.1)
     def chatNode(state: ChatState) -> ChatState:
         # logger.info("messages before LLM: %s", str(state["messages"]))
-        state["messages"] = truncate_messages_for_token_limit(messages=state["messages"])
         response = llm.invoke(state["messages"])
         # return {"messages": [response]}
         return {"messages": state["messages"] + [response]}

 import json
 from typing import Annotated, TypedDict
+from httpx import Timeout
 from langgraph.graph import StateGraph, START, END
 from langgraph.checkpoint.memory import MemorySaver
 from langgraph.graph.message import add_messages
     else:
         return check_debug_condition(state)
 from typing import List, Dict
 def truncate_messages_for_token_limit(messages, max_tokens=50000):
     """
     messages: list of dicts or LangChain messages
         for call in tool_calls:
             # include corresponding tool messages
             for m in messages:
+                if (
+                    getattr(m, "additional_kwargs", {}).get("tool_call_id")
+                    == call["id"]
+                ):
                     group.append(m)
         group_tokens = sum(len(getattr(m, "content", "")) // 4 for m in group)
     return result
 def generate_graph() -> CompiledStateGraph:
     memory = MemorySaver()
     tools = [
         tool_search_db_by_metadata,
         tool_search_db_for_literal,
     ]
+    llm = ChatOpenAI(
+        model="gpt-4o-mini", temperature=0.2, max_retries=0, timeout=Timeout(60.0)
+    ).bind_tools(tools)
+    llm_without_tools = ChatOpenAI(
+        model="gpt-4o-mini", temperature=0.1, max_retries=0, timeout=Timeout(60.0)
+    )
     def chatNode(state: ChatState) -> ChatState:
         # logger.info("messages before LLM: %s", str(state["messages"]))
+        state["messages"] = truncate_messages_for_token_limit(
+            messages=state["messages"]
+        )
         response = llm.invoke(state["messages"])
         # return {"messages": [response]}
         return {"messages": state["messages"] + [response]}