import gradio as gr import os import time import chromadb from langchain_chroma import Chroma import transformers from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline from langchain_huggingface import HuggingFaceEmbeddings from langchain_core.prompts import PromptTemplate from langchain_core.output_parsers import StrOutputParser from langchain_ollama import ChatOllama from langchain_core.documents import Document from langchain_community.llms import HuggingFacePipeline from langchain_community.document_loaders import PyPDFLoader from langchain.text_splitter import RecursiveCharacterTextSplitter from huggingface_hub import login # Initialize in-memory ChromaDB client client = chromadb.Client() # Load your embeddings model model_kwargs = {"device": "cpu"} encode_kwargs = {"normalize_embeddings": True} embeddings = HuggingFaceEmbeddings( model_name="sentence-transformers/paraphrase-distilroberta-base-v1", model_kwargs=model_kwargs, encode_kwargs=encode_kwargs, ) # Initialize the vector DB using the in-memory client vectorDB = Chroma( client=client, collection_name="embeddings", embedding_function=embeddings, ) # Function to process and ingest a PDF file def process_pdf(file_path): # Use PyPDFLoader to load the PDF loader = PyPDFLoader(file_path) documents = loader.load() # Split the documents for better retrieval text_splitter = RecursiveCharacterTextSplitter(chunk_size=400, chunk_overlap=30) texts = text_splitter.split_documents(documents) # Ingest into the vector store # Note: A docId is added to group these documents metadata_chunks = [] # Concatenate all chunks into a single string for i, chunk in enumerate(texts): # Add metadata to each chunk metadata = {"source": f"example_source_{i}", "docId":str("42")} id = str(i) doc_with_metadata = Document( page_content=chunk.page_content, metadata=metadata, id=id,docId="42" ) metadata_chunks.append(doc_with_metadata) print("Done") # Add the documents to the vector database try: vectorDB.add_documents(metadata_chunks) except: raise Exception() gr.Info("PDF processed and ready for questions!") # Your existing functions def using_ollama_model(retriever, query, results, conversation_history, token): print("toekn----------->", token) try: if token: gr.Info("Attempting to log in to Hugging Face...") login(token=token) gr.Info("Login successful!") else: gr.Warning("No Hugging Face token provided. Gated models may not be accessible.") except Exception as e: gr.Error(f"Hugging Face login failed: {e}") return "An error occurred during authentication. Please check your token and try again." history_text = "" for item in conversation_history: if "question" in item and item["question"]: history_text += f"User: {item['question']}\n" if "answer" in item and item["answer"]: history_text += f"Assistant: {item['answer']}\n" prompt_template = """ You are a helpful assistant. Answer the following question using the provided context and previous conversation history. If the context does not contain the answer, only then reply with: "Sorry, I don't have enough information." Conversation History :{history} Context:{results} Question:{query} """ template = PromptTemplate( input_variables=["history", "results", "query"], template=prompt_template, ) doc_texts = "\\n".join([doc.page_content for doc in results]) model_id = "meta-llama/Llama-3.2-3B-Instruct" tokenizer = AutoTokenizer.from_pretrained(model_id) model = AutoModelForCausalLM.from_pretrained(model_id) pipe = pipeline( "text-generation", model=model, tokenizer=tokenizer, max_new_tokens=256, do_sample=True, temperature=0.7, top_p=0.95, repetition_penalty=1.2 ) llm = HuggingFacePipeline(pipeline=pipe) rag_chain = template | llm | StrOutputParser() answer = rag_chain.invoke({"history": history_text, "results": doc_texts, "query": query}) return answer def retrievingReponse(docId, query, conversation_history, token): retriever = vectorDB.as_retriever( search_type="similarity", search_kwargs={ "k": 4, "filter": {"docId": docId} } ) results = retriever.invoke(query) unique_results = [] seen_texts = set() for result in results: if result.page_content not in seen_texts: ans = result.page_content.replace("\n", "") unique_results.append(ans) seen_texts.add(result.page_content) llm_result = using_ollama_model(retriever, query, results, conversation_history, token) return llm_result # The revised Gradio wrapper function def gradio_rag_wrapper(message, history, token): print(history) # Check if a file has been uploaded # 'message' is a dictionary due to `multimodal=True` uploaded_files = message.get("files", []) # Process the PDF if it exists if uploaded_files: for file_path in uploaded_files: process_pdf(file_path) # Return a message to confirm the upload return "PDF uploaded and processed. You can now ask questions about the content." # Process the text query text_query = message.get("text", "") if not text_query.strip(): # Handle cases where only a file was uploaded return "Please upload a document or enter a text query." rag_history = [] for user_msg, bot_msg in history: # Note: You need to extract the text from user messages which may contain files user_text = user_msg.get("text", "") if isinstance(user_msg, dict) else user_msg rag_history.append({"question": user_text, "answer": bot_msg}) docId = "42" # Use the docId from the uploaded file response = retrievingReponse(docId, text_query, rag_history, token) return response # Create the Gradio interface with multimodal input with gr.Blocks(title="Contextual RAG Chatbot on Hugging Face Spaces") as demo: gr.Markdown("## Contextual RAG Chatbot") gr.Markdown("Please enter your Hugging Face Access Token to access gated models like Llama 3.2. You can generate a token from your [Hugging Face settings](https://huggingface.co/settings/tokens).") hf_token_textbox = gr.Textbox( label="Hugging Face Access Token", type="password", interactive=True ) # Use gr.Chatbot and gr.MultimodalTextbox for more control chatbot = gr.Chatbot(label="Chatbot") msg = gr.MultimodalTextbox( placeholder="Upload a PDF file or enter your query...", file_types=[".pdf"], interactive=True ) # Submit handler to process user input and update the chatbot def respond(message, chat_history, hf_token_from_textbox): # The wrapper function now correctly receives the token from the text box user_message_text = message.get("text") or "File uploaded." response = gradio_rag_wrapper(message, chat_history, hf_token_from_textbox) chat_history.append((user_message_text, response)) return "", chat_history # Define the submit event to call the respond function msg.submit( respond, inputs=[msg, chatbot, hf_token_textbox], outputs=[msg, chatbot], ) if __name__ == "__main__": # Create a dummy doc for initial testing if no PDF is uploaded demo.launch()