FernAI

Running

App Files Files Community

redfernstech commited on Apr 25

Commit

bae8b12

verified ·

1 Parent(s): 9643dce

Update app.py

Browse files

Files changed (1) hide show

app.py +57 -8

app.py CHANGED Viewed

@@ -236,6 +236,8 @@ from fastapi.templating import Jinja2Templates
 from simple_salesforce import Salesforce, SalesforceLogin
 from langchain_groq import ChatGroq
 from langchain_core.prompts import ChatPromptTemplate
 # Configure logging
 logging.basicConfig(level=logging.INFO)
@@ -262,7 +264,7 @@ app.mount("/static", StaticFiles(directory="static"), name="static")
 templates = Jinja2Templates(directory="static")
 # Validate environment variables
-required_env_vars = ["CHATGROQ_API_KEY", "username", "password", "security_token", "domain"]
 for var in required_env_vars:
     if not os.getenv(var):
         logger.error(f"Environment variable {var} is not set")
@@ -282,10 +284,13 @@ except Exception as e:
     logger.error(f"Failed to initialize Groq model: {e}")
     raise HTTPException(status_code=500, detail="Failed to initialize Groq model")
 # Salesforce credentials
 username = os.getenv("username")
 password = os.getenv("password")
-security_token = os.getenv("security_token")
 domain = os.getenv("domain")  # e.g., 'test' for sandbox
 # Initialize Salesforce connection
@@ -303,27 +308,71 @@ chat_history = []
 current_chat_history = []
 MAX_HISTORY_SIZE = 100  # Limit chat history size
 def handle_query(query):
     # Prepare context from chat history
-    context_str = ""
     for past_query, response in reversed(current_chat_history[-10:]):  # Limit context to last 10 exchanges
         if past_query.strip():
-            context_str += f"User asked: '{past_query}'\nBot answered: '{response}'\n"
-    # Construct the prompt
     prompt_template = ChatPromptTemplate.from_messages([
         ("system", """
         You are the Clara Redfernstech chatbot. Provide accurate, professional answers in 10-15 words.
-        Context:
-        {context}
         Question:
         {query}
         """),
     ])
-    prompt = prompt_template.format(context=context_str, query=query)
     try:
         response = llm.invoke(prompt)
         response_text = response.content.strip()

 from simple_salesforce import Salesforce, SalesforceLogin
 from langchain_groq import ChatGroq
 from langchain_core.prompts import ChatPromptTemplate
+from llama_index.core import StorageContext, VectorStoreIndex, SimpleDirectoryReader, Settings
+from llama_index.embeddings.huggingface import HuggingFaceEmbedding
 # Configure logging
 logging.basicConfig(level=logging.INFO)
 templates = Jinja2Templates(directory="static")
 # Validate environment variables
+required_env_vars = ["CHATGROQ_API_KEY", "username", "password", "security_token", "domain", "HF_TOKEN"]
 for var in required_env_vars:
     if not os.getenv(var):
         logger.error(f"Environment variable {var} is not set")
     logger.error(f"Failed to initialize Groq model: {e}")
     raise HTTPException(status_code=500, detail="Failed to initialize Groq model")
+# Configure LlamaIndex settings
+Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
 # Salesforce credentials
 username = os.getenv("username")
 password = os.getenv("password")
+security_token = os.getenv("security_token zasi")
 domain = os.getenv("domain")  # e.g., 'test' for sandbox
 # Initialize Salesforce connection
 current_chat_history = []
 MAX_HISTORY_SIZE = 100  # Limit chat history size
+# Directories for data ingestion
+PDF_DIRECTORY = "data"
+PERSIST_DIR = "db"
+# Ensure directories exist
+os.makedirs(PDF_DIRECTORY, exist_ok=True)
+os.makedirs(PERSIST_DIR, exist_ok=True)
+def data_ingestion_from_directory():
+    try:
+        documents = SimpleDirectoryReader(PDF_DIRECTORY).load_data()
+        storage_context = StorageContext.from_defaults()
+        index = VectorStoreIndex.from_documents(documents)
+        index.storage_context.persist(persist_dir=PERSIST_DIR)
+        logger.info("Data ingestion completed successfully")
+    except Exception as e:
+        logger.error(f"Error during data ingestion: {e}")
+        raise HTTPException(status_code=500, detail=f"Data ingestion failed: {str(e)}")
+def initialize():
+    try:
+        data_ingestion_from_directory()  # Process PDF ingestion at startup
+    except Exception as e:
+        logger.error(f"Initialization failed: {e}")
+        raise HTTPException(status_code=500, detail="Initialization failed")
+initialize()  # Run initialization tasks
 def handle_query(query):
     # Prepare context from chat history
+    chat_context = ""
     for past_query, response in reversed(current_chat_history[-10:]):  # Limit context to last 10 exchanges
         if past_query.strip():
+            chat_context += f"User asked: '{past_query}'\nBot answered: '{response}'\n"
+    # Load vector index and retrieve relevant documents
+    try:
+        storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
+        index = load_index_from_storage(storage_context)
+        query_engine = index.as_query_engine(similarity_top_k=2)  # Retrieve top 2 relevant chunks
+        retrieved = query_engine.query(query)
+        doc_context = retrieved.response if hasattr(retrieved, 'response') else "No relevant documents found."
+    except Exception as e:
+        logger.error(f"Error retrieving documents: {e}")
+        doc_context = "Failed to retrieve documents."
+    # Construct the prompt with both chat and document context
     prompt_template = ChatPromptTemplate.from_messages([
         ("system", """
         You are the Clara Redfernstech chatbot. Provide accurate, professional answers in 10-15 words.
+        Use the document context and chat history to inform your response.
+        Document Context:
+        {doc_context}
+        Chat History:
+        {chat_context}
         Question:
         {query}
         """),
     ])
+    prompt = prompt_template.format(doc_context=doc_context, chat_context=chat_context, query=query)
+    # Query Groq model
     try:
         response = llm.invoke(prompt)
         response_text = response.content.strip()