File size: 7,757 Bytes
4a68d7a
 
 
 
 
 
d3935d3
4a68d7a
 
 
 
 
d3935d3
1d93192
 
52e00f2
 
4a68d7a
 
 
 
 
1d93192
4a68d7a
 
 
 
 
 
 
 
 
 
 
 
 
 
1d93192
 
 
 
 
 
 
 
 
 
 
 
d272c6c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1d93192
 
 
75c55bc
59a7048
75c55bc
 
 
 
 
 
 
 
 
 
 
4a68d7a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52e00f2
6c68252
 
d3935d3
 
 
 
 
 
 
 
 
 
 
 
 
4a68d7a
 
 
 
 
 
75c55bc
4a68d7a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75c55bc
4a68d7a
 
1d93192
75c55bc
70d6f98
 
1d93192
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4a68d7a
 
1d93192
 
 
 
4a68d7a
fd7e9f9
75c55bc
4a68d7a
 
 
1d93192
52e00f2
 
 
1ee6c81
7bd0b80
9d39028
7bd0b80
 
9d39028
52e00f2
7bd0b80
 
 
 
 
 
 
 
 
 
 
 
c4f2134
 
7bd0b80
c4f2134
7bd0b80
 
 
 
 
 
 
52e00f2
4a68d7a
 
fd7e9f9
1d93192
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
import gradio as gr
import os
import time
import chromadb
from langchain_chroma import Chroma
import transformers
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_ollama import ChatOllama
from langchain_core.documents import Document
from langchain_community.llms import HuggingFacePipeline
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from huggingface_hub import login


# Initialize in-memory ChromaDB client
client = chromadb.Client()

# Load your embeddings model
model_kwargs = {"device": "cpu"}
encode_kwargs = {"normalize_embeddings": True}
embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/paraphrase-distilroberta-base-v1",
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs,
)

# Initialize the vector DB using the in-memory client
vectorDB = Chroma(
    client=client,
    collection_name="embeddings",
    embedding_function=embeddings,
)

# Function to process and ingest a PDF file
def process_pdf(file_path):
    # Use PyPDFLoader to load the PDF
    loader = PyPDFLoader(file_path)
    documents = loader.load()
    
    # Split the documents for better retrieval
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=400, chunk_overlap=30)
    texts = text_splitter.split_documents(documents)
    
    # Ingest into the vector store
    # Note: A docId is added to group these documents
    metadata_chunks = []
    # Concatenate all chunks into a single string
    for i, chunk in enumerate(texts):
        # Add metadata to each chunk
        metadata = {"source": f"example_source_{i}", "docId":str("42")}
        id = str(i)
        doc_with_metadata = Document(
            page_content=chunk.page_content, metadata=metadata, id=id,docId="42"
        )
        metadata_chunks.append(doc_with_metadata)
 
    print("Done")

    # Add the documents to the vector database
    try:
        vectorDB.add_documents(metadata_chunks)
    except:
        raise Exception()
    gr.Info("PDF processed and ready for questions!")

# Your existing functions
def using_ollama_model(retriever, query, results, conversation_history, token):
    print("toekn----------->", token)
    try:
        if token:
            gr.Info("Attempting to log in to Hugging Face...")
            login(token=token)
            gr.Info("Login successful!")
        else:
            gr.Warning("No Hugging Face token provided. Gated models may not be accessible.")
    except Exception as e:
        gr.Error(f"Hugging Face login failed: {e}")
        return "An error occurred during authentication. Please check your token and try again."

    history_text = ""
    for item in conversation_history:
        if "question" in item and item["question"]:
            history_text += f"User: {item['question']}\n"
        if "answer" in item and item["answer"]:
            history_text += f"Assistant: {item['answer']}\n"
    
    prompt_template = """
    You are a helpful assistant. Answer the following question using the provided context and previous conversation history.
    If the context does not contain the answer, only then reply with: "Sorry, I don't have enough information."
    Conversation History :{history} 
    Context:{results}
    Question:{query}
    """
    
    template = PromptTemplate(
        input_variables=["history", "results", "query"], template=prompt_template,
    )
    
    doc_texts = "\\n".join([doc.page_content for doc in results])
    model_id = "meta-llama/Llama-3.2-3B-Instruct"
    tokenizer = AutoTokenizer.from_pretrained(model_id)
    model = AutoModelForCausalLM.from_pretrained(model_id)
    pipe = pipeline(
        "text-generation",
        model=model,
        tokenizer=tokenizer,
        max_new_tokens=256,
        do_sample=True,
        temperature=0.7,
        top_p=0.95,
        repetition_penalty=1.2
    )

    llm = HuggingFacePipeline(pipeline=pipe)

    rag_chain = template | llm | StrOutputParser()
    
    answer = rag_chain.invoke({"history": history_text, "results": doc_texts, "query": query})
    
    return answer

def retrievingReponse(docId, query, conversation_history, token):
    retriever = vectorDB.as_retriever(
        search_type="similarity",
        search_kwargs={
            "k": 4,
            "filter": {"docId": docId}
        }
    )
    
    results = retriever.invoke(query)
    
    unique_results = []
    seen_texts = set()
    for result in results:
        if result.page_content not in seen_texts:
            ans = result.page_content.replace("\n", "")
            unique_results.append(ans)
            seen_texts.add(result.page_content)

    llm_result = using_ollama_model(retriever, query, results, conversation_history, token)
    return llm_result

# The revised Gradio wrapper function
def gradio_rag_wrapper(message, history, token):

    print(history)
    # Check if a file has been uploaded
    # 'message' is a dictionary due to `multimodal=True`
    uploaded_files = message.get("files", [])
    
    # Process the PDF if it exists
    if uploaded_files:
        for file_path in uploaded_files:
            process_pdf(file_path)
            # Return a message to confirm the upload
            return "PDF uploaded and processed. You can now ask questions about the content."

    # Process the text query
    text_query = message.get("text", "")
    if not text_query.strip():
        # Handle cases where only a file was uploaded
        return "Please upload a document or enter a text query."

    rag_history = []
    for user_msg, bot_msg in history:
        # Note: You need to extract the text from user messages which may contain files
        user_text = user_msg.get("text", "") if isinstance(user_msg, dict) else user_msg
        
        rag_history.append({"question": user_text, "answer": bot_msg})

    docId = "42" # Use the docId from the uploaded file
    response = retrievingReponse(docId, text_query, rag_history, token)
    
    return response

# Create the Gradio interface with multimodal input
with gr.Blocks(title="Contextual RAG Chatbot on Hugging Face Spaces") as demo:
    gr.Markdown("## Contextual RAG Chatbot")
    gr.Markdown("Please enter your Hugging Face Access Token to access gated models like Llama 3.2. You can generate a token from your [Hugging Face settings](https://huggingface.co/settings/tokens).")

    hf_token_textbox = gr.Textbox(
        label="Hugging Face Access Token",
        type="password",
        interactive=True
    )
    
    # Use gr.Chatbot and gr.MultimodalTextbox for more control
    chatbot = gr.Chatbot(label="Chatbot")
    
    msg = gr.MultimodalTextbox(
        placeholder="Upload a PDF file or enter your query...", 
        file_types=[".pdf"],
        interactive=True
    )
    
    # Submit handler to process user input and update the chatbot
    def respond(message, chat_history, hf_token_from_textbox):
        # The wrapper function now correctly receives the token from the text box
        user_message_text = message.get("text") or "File uploaded."

        response = gradio_rag_wrapper(message, chat_history, hf_token_from_textbox)
        chat_history.append((user_message_text, response))
        return "", chat_history
    
    # Define the submit event to call the respond function
    msg.submit(
        respond,
        inputs=[msg, chatbot, hf_token_textbox],
        outputs=[msg, chatbot],
    )

if __name__ == "__main__":
    # Create a dummy doc for initial testing if no PDF is uploaded
    
    demo.launch()