Pranjal Gupta commited on
Commit
1d93192
·
1 Parent(s): 6c68252

pdf uploader

Browse files
Files changed (1) hide show
  1. app.py +53 -20
app.py CHANGED
@@ -11,14 +11,14 @@ from langchain_core.output_parsers import StrOutputParser
11
  from langchain_ollama import ChatOllama
12
  from langchain_core.documents import Document
13
  from langchain_community.llms import HuggingFacePipeline
14
-
 
15
 
16
  # Initialize in-memory ChromaDB client
17
- # This client runs entirely within the app.py script.
18
  client = chromadb.Client()
19
 
20
  # Load your embeddings model
21
- model_kwargs = {"device": "cpu"} # Hugging Face Spaces typically use CPU for free tiers
22
  encode_kwargs = {"normalize_embeddings": True}
23
  embeddings = HuggingFaceEmbeddings(
24
  model_name="sentence-transformers/paraphrase-distilroberta-base-v1",
@@ -27,18 +27,28 @@ embeddings = HuggingFaceEmbeddings(
27
  )
28
 
29
  # Initialize the vector DB using the in-memory client
30
- # You'll need to embed your documents here. In a real-world app, you'd load them from a file.
31
- # For a demo, let's create a dummy document.
32
  vectorDB = Chroma(
33
  client=client,
34
  collection_name="embeddings",
35
  embedding_function=embeddings,
36
  )
37
- # Example of adding a document. You would replace this with your actual documents.
38
- sample_doc = "This is a sample document about the history of artificial intelligence. It was created to demonstrate the RAG pipeline."
39
- vectorDB.add_documents([Document(page_content=sample_doc, metadata={"docId": "my_doc_id"})])
40
 
41
- # Your existing functions without the HttpClient call
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  def using_ollama_model(retriever, query, results, conversation_history):
43
  history_text = ""
44
  for item in conversation_history:
@@ -60,8 +70,7 @@ def using_ollama_model(retriever, query, results, conversation_history):
60
  )
61
 
62
  doc_texts = "\\n".join([doc.page_content for doc in results])
63
- model_id = "meta-llama/Llama-3.2-1B-Instruct" # was llama3.2
64
- # llm = ChatOllama(model="llama3.2", temperature=0.4, num_predict=512)
65
  tokenizer = AutoTokenizer.from_pretrained(model_id)
66
  model = AutoModelForCausalLM.from_pretrained(model_id)
67
  pipe = pipeline(
@@ -75,10 +84,8 @@ def using_ollama_model(retriever, query, results, conversation_history):
75
  repetition_penalty=1.2
76
  )
77
 
78
- # Use the pipeline with LangChain's HuggingFacePipeline
79
  llm = HuggingFacePipeline(pipeline=pipe)
80
 
81
-
82
  rag_chain = template | llm | StrOutputParser()
83
 
84
  answer = rag_chain.invoke({"history": history_text, "results": doc_texts, "query": query})
@@ -107,22 +114,48 @@ def retrievingReponse(docId, query, conversation_history):
107
  llm_result = using_ollama_model(retriever, query, results, conversation_history)
108
  return llm_result
109
 
110
- # Gradio interface
111
- def gradio_rag_wrapper(query, history):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
  rag_history = []
113
  for user_msg, bot_msg in history:
114
- rag_history.append({"question": user_msg, "answer": bot_msg})
 
 
 
115
 
116
- docId = "my_doc_id"
117
- response = retrievingReponse(docId, query, rag_history)
118
 
119
  return response
120
 
 
121
  demo = gr.ChatInterface(
122
  fn=gradio_rag_wrapper,
 
123
  title="Contextual RAG Chatbot on Hugging Face Spaces",
124
- description="Ask questions about the document to get answers.",
 
125
  )
126
 
127
  if __name__ == "__main__":
128
- demo.launch()
 
 
 
 
11
  from langchain_ollama import ChatOllama
12
  from langchain_core.documents import Document
13
  from langchain_community.llms import HuggingFacePipeline
14
+ from langchain_community.document_loaders import PyPDFLoader
15
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
16
 
17
  # Initialize in-memory ChromaDB client
 
18
  client = chromadb.Client()
19
 
20
  # Load your embeddings model
21
+ model_kwargs = {"device": "cpu"}
22
  encode_kwargs = {"normalize_embeddings": True}
23
  embeddings = HuggingFaceEmbeddings(
24
  model_name="sentence-transformers/paraphrase-distilroberta-base-v1",
 
27
  )
28
 
29
  # Initialize the vector DB using the in-memory client
 
 
30
  vectorDB = Chroma(
31
  client=client,
32
  collection_name="embeddings",
33
  embedding_function=embeddings,
34
  )
 
 
 
35
 
36
+ # Function to process and ingest a PDF file
37
+ def process_pdf(file_path):
38
+ # Use PyPDFLoader to load the PDF
39
+ loader = PyPDFLoader(file_path)
40
+ documents = loader.load()
41
+
42
+ # Split the documents for better retrieval
43
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=400, chunk_overlap=30)
44
+ texts = text_splitter.split_documents(documents)
45
+
46
+ # Ingest into the vector store
47
+ # Note: A docId is added to group these documents
48
+ vectorDB.add_documents(texts, [{"docId": "uploaded_doc"}] * len(texts))
49
+ gr.Info("PDF processed and ready for questions!")
50
+
51
+ # Your existing functions
52
  def using_ollama_model(retriever, query, results, conversation_history):
53
  history_text = ""
54
  for item in conversation_history:
 
70
  )
71
 
72
  doc_texts = "\\n".join([doc.page_content for doc in results])
73
+ model_id = "meta-llama/Llama-3.2-1B-Instruct"
 
74
  tokenizer = AutoTokenizer.from_pretrained(model_id)
75
  model = AutoModelForCausalLM.from_pretrained(model_id)
76
  pipe = pipeline(
 
84
  repetition_penalty=1.2
85
  )
86
 
 
87
  llm = HuggingFacePipeline(pipeline=pipe)
88
 
 
89
  rag_chain = template | llm | StrOutputParser()
90
 
91
  answer = rag_chain.invoke({"history": history_text, "results": doc_texts, "query": query})
 
114
  llm_result = using_ollama_model(retriever, query, results, conversation_history)
115
  return llm_result
116
 
117
+ # The revised Gradio wrapper function
118
+ def gradio_rag_wrapper(message, history):
119
+ # Check if a file has been uploaded
120
+ # 'message' is a dictionary due to `multimodal=True`
121
+ uploaded_files = message.get("files", [])
122
+
123
+ # Process the PDF if it exists
124
+ if uploaded_files:
125
+ for file_path in uploaded_files:
126
+ process_pdf(file_path)
127
+ # Return a message to confirm the upload
128
+ return "PDF uploaded and processed. You can now ask questions about the content."
129
+
130
+ # Process the text query
131
+ text_query = message.get("text", "")
132
+ if not text_query.strip():
133
+ # Handle cases where only a file was uploaded
134
+ return "Please upload a document or enter a text query."
135
+
136
  rag_history = []
137
  for user_msg, bot_msg in history:
138
+ # Note: You need to extract the text from user messages which may contain files
139
+ user_text = user_msg.get("text", "") if isinstance(user_msg, dict) else user_msg
140
+
141
+ rag_history.append({"question": user_text, "answer": bot_msg})
142
 
143
+ docId = "uploaded_doc" # Use the docId from the uploaded file
144
+ response = retrievingReponse(docId, text_query, rag_history)
145
 
146
  return response
147
 
148
+ # Create the Gradio interface with multimodal input
149
  demo = gr.ChatInterface(
150
  fn=gradio_rag_wrapper,
151
+ multimodal=True, # This enables file upload
152
  title="Contextual RAG Chatbot on Hugging Face Spaces",
153
+ description="Upload a PDF file to start chatting!",
154
+ textbox=gr.MultimodalTextbox(file_types=[".pdf"]), # Restrict file types
155
  )
156
 
157
  if __name__ == "__main__":
158
+ # Create a dummy doc for initial testing if no PDF is uploaded
159
+ vectorDB.add_documents([Document(page_content="This is a sample document about the history of artificial intelligence. It was created to demonstrate the RAG pipeline.", metadata={"docId": "uploaded_doc"})])
160
+
161
+ demo.launch()