riteshraut commited on
Commit
27481ac
·
1 Parent(s): becc8f7

fix/update in speed

Browse files
Files changed (1) hide show
  1. app.py +13 -13
app.py CHANGED
@@ -142,7 +142,7 @@ def upload_files():
142
  print("Starting RAG pipeline setup...")
143
 
144
  parent_splitter =RecursiveCharacterTextSplitter(chunk_size=1500,chunk_overlap=300)
145
- child_splitter = SemanticChunker(EMBEDDING_MODEL,breakpoint_threshold_type='percentile',breakpoint_threshold_amount=80)
146
 
147
  parent_docs = parent_splitter.split_documents(all_docs)
148
  doc_ids = [str(uuid.uuid4()) for _ in parent_docs]
@@ -216,25 +216,25 @@ def chat():
216
  print("="*50 + "\n")
217
 
218
  rewritten_query = chain_components["rewriter"].invoke({"question": question, "chat_history": get_session_history(session_id).messages})
219
- print(f"--- 1. Rewritten Query ---\n{rewritten_query}\n")
220
 
221
  hyde_doc = chain_components["hyde"].invoke({"question": rewritten_query})
222
- print(f"--- 2. HyDE Document ---\n{hyde_doc}\n")
223
 
224
  final_retrieved_docs = chain_components["base_retriever"].get_relevant_documents(hyde_doc)
225
- print(f"--- 3. Retrieved Top {len(final_retrieved_docs)} Child Docs ---")
226
- for i, doc in enumerate(final_retrieved_docs):
227
- print(f" Doc {i+1}: {doc.page_content[:150]}... (Source: {doc.metadata.get('source')})")
228
- print("\n")
229
 
230
  final_context_docs = chain_components["parent_fetcher"].invoke(final_retrieved_docs)
231
- print(f"--- 4. Final {len(final_context_docs)} Parent Docs for LLM ---")
232
- for i, doc in enumerate(final_context_docs):
233
- print(f" Final Doc {i+1} (Source: {doc.metadata.get('source')}, Page: {doc.metadata.get('page')}):\n '{doc.page_content[:300]}...'\n---")
234
 
235
- print("="*50)
236
- print("--- INVOKING FINAL CHAIN ---")
237
- print("="*50 + "\n")
238
 
239
  answer_string = chain_components["final_chain"].invoke({"question": question}, config=config)
240
 
 
142
  print("Starting RAG pipeline setup...")
143
 
144
  parent_splitter =RecursiveCharacterTextSplitter(chunk_size=1500,chunk_overlap=300)
145
+ child_splitter = RecursiveCharacterTextSplitter(chunk_size=500,chunk_overlap=50)
146
 
147
  parent_docs = parent_splitter.split_documents(all_docs)
148
  doc_ids = [str(uuid.uuid4()) for _ in parent_docs]
 
216
  print("="*50 + "\n")
217
 
218
  rewritten_query = chain_components["rewriter"].invoke({"question": question, "chat_history": get_session_history(session_id).messages})
219
+ #print(f"--- 1. Rewritten Query ---\n{rewritten_query}\n")
220
 
221
  hyde_doc = chain_components["hyde"].invoke({"question": rewritten_query})
222
+ #print(f"--- 2. HyDE Document ---\n{hyde_doc}\n")
223
 
224
  final_retrieved_docs = chain_components["base_retriever"].get_relevant_documents(hyde_doc)
225
+ #print(f"--- 3. Retrieved Top {len(final_retrieved_docs)} Child Docs ---")
226
+ #for i, doc in enumerate(final_retrieved_docs):
227
+ #print(f" Doc {i+1}: {doc.page_content[:150]}... (Source: {doc.metadata.get('source')})")
228
+ #print("\n")
229
 
230
  final_context_docs = chain_components["parent_fetcher"].invoke(final_retrieved_docs)
231
+ #print(f"--- 4. Final {len(final_context_docs)} Parent Docs for LLM ---")
232
+ #for i, doc in enumerate(final_context_docs):
233
+ #print(f" Final Doc {i+1} (Source: {doc.metadata.get('source')}, Page: {doc.metadata.get('page')}):\n '{doc.page_content[:300]}...'\n---")
234
 
235
+ #print("="*50)
236
+ #print("--- INVOKING FINAL CHAIN ---")
237
+ #print("="*50 + "\n")
238
 
239
  answer_string = chain_components["final_chain"].invoke({"question": question}, config=config)
240