""" Script to chat with a Gemini model about the content of an uploaded file.""" import os import tempfile from dotenv import load_dotenv from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_community.vectorstores import FAISS from langchain_community.document_loaders import TextLoader, PyPDFLoader from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI from langchain_core.prompts import PromptTemplate from langchain_core.output_parsers import StrOutputParser from langchain_core.runnables import RunnableParallel, RunnableLambda, RunnablePassthrough from langchain_core.messages import AIMessage, HumanMessage, SystemMessage from langchain_huggingface import HuggingFaceEmbeddings # Variables for global use chunk_size=1000 chunk_overlap=100 model_name= "sentence-transformers/all-MiniLM-L6-v2" System_Message = """ You are RAG Assistant for the provided document. Your role is to help users understand and explore the content of uploaded documents. Follow these rules: 1. Always prioritize the document context when answering questions. 2. If the answer is not in the document, clearly say you don't know. 3. Keep responses friendly, clear, and concise. """ # Load environment variables from .env file load_dotenv() api_key = os.getenv("GEMINI_API_KEY") if not api_key: raise ValueError("GEMINI_API_KEY not found. Please add it to your .env file.") # Input file path file_path = input("\nEnter path to your document (PDF or TXT): ").strip() if not os.path.exists(file_path): print(f"\nThe file path '{file_path}' does not exist. Please check the path and try again.\n") raise FileNotFoundError(f"File not found: {file_path}") # Load document if file_path.endswith(".pdf") or file_path.endswith(".PDF"): loader = PyPDFLoader(file_path) else: loader = TextLoader(file_path) documents = loader.load() # Split text into chunks splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap) chunks = splitter.split_documents(documents) # embeddings + retriever embeds = HuggingFaceEmbeddings(model_name=model_name) vector_store = FAISS.from_documents(chunks, embeds) retriever = vector_store.as_retriever(search_type="similarity", search_kwargs={"k": 5}) # Initialize chat model LLM = ChatGoogleGenerativeAI( model="gemini-2.5-flash", google_api_key=api_key ) # Initialize chat history messages = [SystemMessage(content=System_Message)] print("\n Your document is ready. Ask anything (type 'exit' to quit).\n") # Chat loop while True: user_input = input("\nYou: ") if user_input.lower() in ["exit", "quit"]: print("Chat ended.") break messages.append(HumanMessage(content=user_input)) # Retrieve relevant documents retrieved_document = retriever.invoke(user_input) context_text = "\n\n".join(document.page_content for document in retrieved_document) prompt_template = PromptTemplate( template="""You are answering based on this document: {context} Question: {question}""", input_variables=["context", "question"], ) parallel_chain = RunnableParallel( {"context": retriever | RunnableLambda(lambda documents: "\n\n".join(doc.page_content for doc in documents)), "question": RunnablePassthrough()} ) parse = StrOutputParser() # main chain main_chain = parallel_chain | prompt_template | LLM | parse # respond to user response = "" for chunk in main_chain.stream(user_input): response += chunk print("AI Bot:", response.strip()) messages.append(AIMessage(content=response.strip()))