Pranjal Gupta
gradio text box xhange
c4f2134
raw
history blame
7.76 kB
import gradio as gr
import os
import time
import chromadb
from langchain_chroma import Chroma
import transformers
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_ollama import ChatOllama
from langchain_core.documents import Document
from langchain_community.llms import HuggingFacePipeline
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from huggingface_hub import login
# Initialize in-memory ChromaDB client
client = chromadb.Client()
# Load your embeddings model
model_kwargs = {"device": "cpu"}
encode_kwargs = {"normalize_embeddings": True}
embeddings = HuggingFaceEmbeddings(
model_name="sentence-transformers/paraphrase-distilroberta-base-v1",
model_kwargs=model_kwargs,
encode_kwargs=encode_kwargs,
)
# Initialize the vector DB using the in-memory client
vectorDB = Chroma(
client=client,
collection_name="embeddings",
embedding_function=embeddings,
)
# Function to process and ingest a PDF file
def process_pdf(file_path):
# Use PyPDFLoader to load the PDF
loader = PyPDFLoader(file_path)
documents = loader.load()
# Split the documents for better retrieval
text_splitter = RecursiveCharacterTextSplitter(chunk_size=400, chunk_overlap=30)
texts = text_splitter.split_documents(documents)
# Ingest into the vector store
# Note: A docId is added to group these documents
metadata_chunks = []
# Concatenate all chunks into a single string
for i, chunk in enumerate(texts):
# Add metadata to each chunk
metadata = {"source": f"example_source_{i}", "docId":str("42")}
id = str(i)
doc_with_metadata = Document(
page_content=chunk.page_content, metadata=metadata, id=id,docId="42"
)
metadata_chunks.append(doc_with_metadata)
print("Done")
# Add the documents to the vector database
try:
vectorDB.add_documents(metadata_chunks)
except:
raise Exception()
gr.Info("PDF processed and ready for questions!")
# Your existing functions
def using_ollama_model(retriever, query, results, conversation_history, token):
print("toekn----------->", token)
try:
if token:
gr.Info("Attempting to log in to Hugging Face...")
login(token=token)
gr.Info("Login successful!")
else:
gr.Warning("No Hugging Face token provided. Gated models may not be accessible.")
except Exception as e:
gr.Error(f"Hugging Face login failed: {e}")
return "An error occurred during authentication. Please check your token and try again."
history_text = ""
for item in conversation_history:
if "question" in item and item["question"]:
history_text += f"User: {item['question']}\n"
if "answer" in item and item["answer"]:
history_text += f"Assistant: {item['answer']}\n"
prompt_template = """
You are a helpful assistant. Answer the following question using the provided context and previous conversation history.
If the context does not contain the answer, only then reply with: "Sorry, I don't have enough information."
Conversation History :{history}
Context:{results}
Question:{query}
"""
template = PromptTemplate(
input_variables=["history", "results", "query"], template=prompt_template,
)
doc_texts = "\\n".join([doc.page_content for doc in results])
model_id = "meta-llama/Llama-3.2-3B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id)
pipe = pipeline(
"text-generation",
model=model,
tokenizer=tokenizer,
max_new_tokens=256,
do_sample=True,
temperature=0.7,
top_p=0.95,
repetition_penalty=1.2
)
llm = HuggingFacePipeline(pipeline=pipe)
rag_chain = template | llm | StrOutputParser()
answer = rag_chain.invoke({"history": history_text, "results": doc_texts, "query": query})
return answer
def retrievingReponse(docId, query, conversation_history, token):
retriever = vectorDB.as_retriever(
search_type="similarity",
search_kwargs={
"k": 4,
"filter": {"docId": docId}
}
)
results = retriever.invoke(query)
unique_results = []
seen_texts = set()
for result in results:
if result.page_content not in seen_texts:
ans = result.page_content.replace("\n", "")
unique_results.append(ans)
seen_texts.add(result.page_content)
llm_result = using_ollama_model(retriever, query, results, conversation_history, token)
return llm_result
# The revised Gradio wrapper function
def gradio_rag_wrapper(message, history, token):
print(history)
# Check if a file has been uploaded
# 'message' is a dictionary due to `multimodal=True`
uploaded_files = message.get("files", [])
# Process the PDF if it exists
if uploaded_files:
for file_path in uploaded_files:
process_pdf(file_path)
# Return a message to confirm the upload
return "PDF uploaded and processed. You can now ask questions about the content."
# Process the text query
text_query = message.get("text", "")
if not text_query.strip():
# Handle cases where only a file was uploaded
return "Please upload a document or enter a text query."
rag_history = []
for user_msg, bot_msg in history:
# Note: You need to extract the text from user messages which may contain files
user_text = user_msg.get("text", "") if isinstance(user_msg, dict) else user_msg
rag_history.append({"question": user_text, "answer": bot_msg})
docId = "42" # Use the docId from the uploaded file
response = retrievingReponse(docId, text_query, rag_history, token)
return response
# Create the Gradio interface with multimodal input
with gr.Blocks(title="Contextual RAG Chatbot on Hugging Face Spaces") as demo:
gr.Markdown("## Contextual RAG Chatbot")
gr.Markdown("Please enter your Hugging Face Access Token to access gated models like Llama 3.2. You can generate a token from your [Hugging Face settings](https://huggingface.co/settings/tokens).")
hf_token_textbox = gr.Textbox(
label="Hugging Face Access Token",
type="password",
interactive=True
)
# Use gr.Chatbot and gr.MultimodalTextbox for more control
chatbot = gr.Chatbot(label="Chatbot")
msg = gr.MultimodalTextbox(
placeholder="Upload a PDF file or enter your query...",
file_types=[".pdf"],
interactive=True
)
# Submit handler to process user input and update the chatbot
def respond(message, chat_history, hf_token_from_textbox):
# The wrapper function now correctly receives the token from the text box
user_message_text = message.get("text") or "File uploaded."
response = gradio_rag_wrapper(message, chat_history, hf_token_from_textbox)
chat_history.append((user_message_text, response))
return "", chat_history
# Define the submit event to call the respond function
msg.submit(
respond,
inputs=[msg, chatbot, hf_token_textbox],
outputs=[msg, chatbot],
)
if __name__ == "__main__":
# Create a dummy doc for initial testing if no PDF is uploaded
demo.launch()