Document-RAG-System / chatbot.py
Wills17's picture
Upload 9 files
8b017a0 verified
""" Script to chat with a Gemini model about the content of an uploaded file."""
import os
import tempfile
from dotenv import load_dotenv
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_community.document_loaders import TextLoader, PyPDFLoader
from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnableParallel, RunnableLambda, RunnablePassthrough
from langchain_core.messages import AIMessage, HumanMessage, SystemMessage
from langchain_huggingface import HuggingFaceEmbeddings
# Variables for global use
chunk_size=1000
chunk_overlap=100
model_name= "sentence-transformers/all-MiniLM-L6-v2"
System_Message = """
You are RAG Assistant for the provided document.
Your role is to help users understand and explore the content of uploaded documents.
Follow these rules:
1. Always prioritize the document context when answering questions.
2. If the answer is not in the document, clearly say you don't know.
3. Keep responses friendly, clear, and concise.
"""
# Load environment variables from .env file
load_dotenv()
api_key = os.getenv("GEMINI_API_KEY")
if not api_key:
raise ValueError("GEMINI_API_KEY not found. Please add it to your .env file.")
# Input file path
file_path = input("\nEnter path to your document (PDF or TXT): ").strip()
if not os.path.exists(file_path):
print(f"\nThe file path '{file_path}' does not exist. Please check the path and try again.\n")
raise FileNotFoundError(f"File not found: {file_path}")
# Load document
if file_path.endswith(".pdf") or file_path.endswith(".PDF"):
loader = PyPDFLoader(file_path)
else:
loader = TextLoader(file_path)
documents = loader.load()
# Split text into chunks
splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
chunks = splitter.split_documents(documents)
# embeddings + retriever
embeds = HuggingFaceEmbeddings(model_name=model_name)
vector_store = FAISS.from_documents(chunks, embeds)
retriever = vector_store.as_retriever(search_type="similarity", search_kwargs={"k": 5})
# Initialize chat model
LLM = ChatGoogleGenerativeAI(
model="gemini-2.5-flash",
google_api_key=api_key
)
# Initialize chat history
messages = [SystemMessage(content=System_Message)]
print("\n Your document is ready. Ask anything (type 'exit' to quit).\n")
# Chat loop
while True:
user_input = input("\nYou: ")
if user_input.lower() in ["exit", "quit"]:
print("Chat ended.")
break
messages.append(HumanMessage(content=user_input))
# Retrieve relevant documents
retrieved_document = retriever.invoke(user_input)
context_text = "\n\n".join(document.page_content for document in retrieved_document)
prompt_template = PromptTemplate(
template="""You are answering based on this document:
{context}
Question: {question}""",
input_variables=["context", "question"],
)
parallel_chain = RunnableParallel(
{"context": retriever | RunnableLambda(lambda documents: "\n\n".join(doc.page_content for doc in documents)),
"question": RunnablePassthrough()}
)
parse = StrOutputParser()
# main chain
main_chain = parallel_chain | prompt_template | LLM | parse
# respond to user
response = ""
for chunk in main_chain.stream(user_input):
response += chunk
print("AI Bot:", response.strip())
messages.append(AIMessage(content=response.strip()))