File size: 6,320 Bytes
4a68d7a
 
 
 
 
 
d3935d3
4a68d7a
 
 
 
 
d3935d3
1d93192
 
4a68d7a
 
 
 
 
1d93192
4a68d7a
 
 
 
 
 
 
 
 
 
 
 
 
 
1d93192
 
 
 
 
 
 
 
 
 
 
 
d272c6c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1d93192
 
 
4a68d7a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1d93192
6c68252
 
d3935d3
 
 
 
 
 
 
 
 
 
 
 
 
4a68d7a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1d93192
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4a68d7a
 
1d93192
 
 
 
4a68d7a
1d93192
 
4a68d7a
 
 
1d93192
4a68d7a
 
1d93192
60e1ea2
1d93192
 
4a68d7a
 
 
1d93192
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
import gradio as gr
import os
import time
import chromadb
from langchain_chroma import Chroma
import transformers
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_ollama import ChatOllama
from langchain_core.documents import Document
from langchain_community.llms import HuggingFacePipeline
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

# Initialize in-memory ChromaDB client
client = chromadb.Client()

# Load your embeddings model
model_kwargs = {"device": "cpu"}
encode_kwargs = {"normalize_embeddings": True}
embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/paraphrase-distilroberta-base-v1",
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs,
)

# Initialize the vector DB using the in-memory client
vectorDB = Chroma(
    client=client,
    collection_name="embeddings",
    embedding_function=embeddings,
)

# Function to process and ingest a PDF file
def process_pdf(file_path):
    # Use PyPDFLoader to load the PDF
    loader = PyPDFLoader(file_path)
    documents = loader.load()
    
    # Split the documents for better retrieval
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=400, chunk_overlap=30)
    texts = text_splitter.split_documents(documents)
    
    # Ingest into the vector store
    # Note: A docId is added to group these documents
    metadata_chunks = []
    # Concatenate all chunks into a single string
    for i, chunk in enumerate(texts):
        # Add metadata to each chunk
        metadata = {"source": f"example_source_{i}", "docId":str("42")}
        id = str(i)
        doc_with_metadata = Document(
            page_content=chunk.page_content, metadata=metadata, id=id,docId="42"
        )
        metadata_chunks.append(doc_with_metadata)
 
    print("Done")

    # Add the documents to the vector database
    try:
        vectorDB.add_documents(metadata_chunks)
    except:
        raise Exception()
    gr.Info("PDF processed and ready for questions!")

# Your existing functions
def using_ollama_model(retriever, query, results, conversation_history):
    history_text = ""
    for item in conversation_history:
        if "question" in item and item["question"]:
            history_text += f"User: {item['question']}\n"
        if "answer" in item and item["answer"]:
            history_text += f"Assistant: {item['answer']}\n"
    
    prompt_template = """
    You are a helpful assistant. Answer the following question using the provided context and previous conversation history.
    If the context does not contain the answer, only then reply with: "Sorry, I don't have enough information."
    Conversation History :{history} 
    Context:{results}
    Question:{query}
    """
    
    template = PromptTemplate(
        input_variables=["history", "results", "query"], template=prompt_template,
    )
    
    doc_texts = "\\n".join([doc.page_content for doc in results])
    model_id = "meta-llama/Llama-3.2-1B-Instruct"
    tokenizer = AutoTokenizer.from_pretrained(model_id)
    model = AutoModelForCausalLM.from_pretrained(model_id)
    pipe = pipeline(
        "text-generation",
        model=model,
        tokenizer=tokenizer,
        max_new_tokens=256,
        do_sample=True,
        temperature=0.7,
        top_p=0.95,
        repetition_penalty=1.2
    )

    llm = HuggingFacePipeline(pipeline=pipe)

    rag_chain = template | llm | StrOutputParser()
    
    answer = rag_chain.invoke({"history": history_text, "results": doc_texts, "query": query})
    
    return answer

def retrievingReponse(docId, query, conversation_history):
    retriever = vectorDB.as_retriever(
        search_type="similarity",
        search_kwargs={
            "k": 4,
            "filter": {"docId": docId}
        }
    )
    
    results = retriever.invoke(query)
    
    unique_results = []
    seen_texts = set()
    for result in results:
        if result.page_content not in seen_texts:
            ans = result.page_content.replace("\n", "")
            unique_results.append(ans)
            seen_texts.add(result.page_content)

    llm_result = using_ollama_model(retriever, query, results, conversation_history)
    return llm_result

# The revised Gradio wrapper function
def gradio_rag_wrapper(message, history):
    # Check if a file has been uploaded
    # 'message' is a dictionary due to `multimodal=True`
    uploaded_files = message.get("files", [])
    
    # Process the PDF if it exists
    if uploaded_files:
        for file_path in uploaded_files:
            process_pdf(file_path)
            # Return a message to confirm the upload
            return "PDF uploaded and processed. You can now ask questions about the content."

    # Process the text query
    text_query = message.get("text", "")
    if not text_query.strip():
        # Handle cases where only a file was uploaded
        return "Please upload a document or enter a text query."

    rag_history = []
    for user_msg, bot_msg in history:
        # Note: You need to extract the text from user messages which may contain files
        user_text = user_msg.get("text", "") if isinstance(user_msg, dict) else user_msg
        
        rag_history.append({"question": user_text, "answer": bot_msg})

    docId = "uploaded_doc" # Use the docId from the uploaded file
    response = retrievingReponse(docId, text_query, rag_history)
    
    return response

# Create the Gradio interface with multimodal input
demo = gr.ChatInterface(
    fn=gradio_rag_wrapper,
    multimodal=True, # This enables file upload
    title="Contextual RAG Chatbot on Hugging Face Spaces",
    description="Upload a PDF file to start chatting!",
    textbox=gr.MultimodalTextbox(file_types=[".pdf"]), # Restrict file types
)

if __name__ == "__main__":
    # Create a dummy doc for initial testing if no PDF is uploaded
    vectorDB.add_documents([Document(page_content="This is a sample document about the history of artificial intelligence. It was created to demonstrate the RAG pipeline.", metadata={"docId": "uploaded_doc"})])
    
    demo.launch()