Spaces:
Sleeping
Sleeping
File size: 7,757 Bytes
4a68d7a d3935d3 4a68d7a d3935d3 1d93192 52e00f2 4a68d7a 1d93192 4a68d7a 1d93192 d272c6c 1d93192 75c55bc 59a7048 75c55bc 4a68d7a 52e00f2 6c68252 d3935d3 4a68d7a 75c55bc 4a68d7a 75c55bc 4a68d7a 1d93192 75c55bc 70d6f98 1d93192 4a68d7a 1d93192 4a68d7a fd7e9f9 75c55bc 4a68d7a 1d93192 52e00f2 1ee6c81 7bd0b80 9d39028 7bd0b80 9d39028 52e00f2 7bd0b80 c4f2134 7bd0b80 c4f2134 7bd0b80 52e00f2 4a68d7a fd7e9f9 1d93192 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 |
import gradio as gr
import os
import time
import chromadb
from langchain_chroma import Chroma
import transformers
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_ollama import ChatOllama
from langchain_core.documents import Document
from langchain_community.llms import HuggingFacePipeline
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from huggingface_hub import login
# Initialize in-memory ChromaDB client
client = chromadb.Client()
# Load your embeddings model
model_kwargs = {"device": "cpu"}
encode_kwargs = {"normalize_embeddings": True}
embeddings = HuggingFaceEmbeddings(
model_name="sentence-transformers/paraphrase-distilroberta-base-v1",
model_kwargs=model_kwargs,
encode_kwargs=encode_kwargs,
)
# Initialize the vector DB using the in-memory client
vectorDB = Chroma(
client=client,
collection_name="embeddings",
embedding_function=embeddings,
)
# Function to process and ingest a PDF file
def process_pdf(file_path):
# Use PyPDFLoader to load the PDF
loader = PyPDFLoader(file_path)
documents = loader.load()
# Split the documents for better retrieval
text_splitter = RecursiveCharacterTextSplitter(chunk_size=400, chunk_overlap=30)
texts = text_splitter.split_documents(documents)
# Ingest into the vector store
# Note: A docId is added to group these documents
metadata_chunks = []
# Concatenate all chunks into a single string
for i, chunk in enumerate(texts):
# Add metadata to each chunk
metadata = {"source": f"example_source_{i}", "docId":str("42")}
id = str(i)
doc_with_metadata = Document(
page_content=chunk.page_content, metadata=metadata, id=id,docId="42"
)
metadata_chunks.append(doc_with_metadata)
print("Done")
# Add the documents to the vector database
try:
vectorDB.add_documents(metadata_chunks)
except:
raise Exception()
gr.Info("PDF processed and ready for questions!")
# Your existing functions
def using_ollama_model(retriever, query, results, conversation_history, token):
print("toekn----------->", token)
try:
if token:
gr.Info("Attempting to log in to Hugging Face...")
login(token=token)
gr.Info("Login successful!")
else:
gr.Warning("No Hugging Face token provided. Gated models may not be accessible.")
except Exception as e:
gr.Error(f"Hugging Face login failed: {e}")
return "An error occurred during authentication. Please check your token and try again."
history_text = ""
for item in conversation_history:
if "question" in item and item["question"]:
history_text += f"User: {item['question']}\n"
if "answer" in item and item["answer"]:
history_text += f"Assistant: {item['answer']}\n"
prompt_template = """
You are a helpful assistant. Answer the following question using the provided context and previous conversation history.
If the context does not contain the answer, only then reply with: "Sorry, I don't have enough information."
Conversation History :{history}
Context:{results}
Question:{query}
"""
template = PromptTemplate(
input_variables=["history", "results", "query"], template=prompt_template,
)
doc_texts = "\\n".join([doc.page_content for doc in results])
model_id = "meta-llama/Llama-3.2-3B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id)
pipe = pipeline(
"text-generation",
model=model,
tokenizer=tokenizer,
max_new_tokens=256,
do_sample=True,
temperature=0.7,
top_p=0.95,
repetition_penalty=1.2
)
llm = HuggingFacePipeline(pipeline=pipe)
rag_chain = template | llm | StrOutputParser()
answer = rag_chain.invoke({"history": history_text, "results": doc_texts, "query": query})
return answer
def retrievingReponse(docId, query, conversation_history, token):
retriever = vectorDB.as_retriever(
search_type="similarity",
search_kwargs={
"k": 4,
"filter": {"docId": docId}
}
)
results = retriever.invoke(query)
unique_results = []
seen_texts = set()
for result in results:
if result.page_content not in seen_texts:
ans = result.page_content.replace("\n", "")
unique_results.append(ans)
seen_texts.add(result.page_content)
llm_result = using_ollama_model(retriever, query, results, conversation_history, token)
return llm_result
# The revised Gradio wrapper function
def gradio_rag_wrapper(message, history, token):
print(history)
# Check if a file has been uploaded
# 'message' is a dictionary due to `multimodal=True`
uploaded_files = message.get("files", [])
# Process the PDF if it exists
if uploaded_files:
for file_path in uploaded_files:
process_pdf(file_path)
# Return a message to confirm the upload
return "PDF uploaded and processed. You can now ask questions about the content."
# Process the text query
text_query = message.get("text", "")
if not text_query.strip():
# Handle cases where only a file was uploaded
return "Please upload a document or enter a text query."
rag_history = []
for user_msg, bot_msg in history:
# Note: You need to extract the text from user messages which may contain files
user_text = user_msg.get("text", "") if isinstance(user_msg, dict) else user_msg
rag_history.append({"question": user_text, "answer": bot_msg})
docId = "42" # Use the docId from the uploaded file
response = retrievingReponse(docId, text_query, rag_history, token)
return response
# Create the Gradio interface with multimodal input
with gr.Blocks(title="Contextual RAG Chatbot on Hugging Face Spaces") as demo:
gr.Markdown("## Contextual RAG Chatbot")
gr.Markdown("Please enter your Hugging Face Access Token to access gated models like Llama 3.2. You can generate a token from your [Hugging Face settings](https://huggingface.co/settings/tokens).")
hf_token_textbox = gr.Textbox(
label="Hugging Face Access Token",
type="password",
interactive=True
)
# Use gr.Chatbot and gr.MultimodalTextbox for more control
chatbot = gr.Chatbot(label="Chatbot")
msg = gr.MultimodalTextbox(
placeholder="Upload a PDF file or enter your query...",
file_types=[".pdf"],
interactive=True
)
# Submit handler to process user input and update the chatbot
def respond(message, chat_history, hf_token_from_textbox):
# The wrapper function now correctly receives the token from the text box
user_message_text = message.get("text") or "File uploaded."
response = gradio_rag_wrapper(message, chat_history, hf_token_from_textbox)
chat_history.append((user_message_text, response))
return "", chat_history
# Define the submit event to call the respond function
msg.submit(
respond,
inputs=[msg, chatbot, hf_token_textbox],
outputs=[msg, chatbot],
)
if __name__ == "__main__":
# Create a dummy doc for initial testing if no PDF is uploaded
demo.launch()
|