Cogni-Chat-document-reader-v2 / test_dependencies.py
riteshraut
fix/new update
becc8f7
import os
import sys
print("CogniChat Dependencies & PDF Handling Test")
# Test imports
try:
print("\nTesting core imports...")
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.retrievers import BM25Retriever
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_core.documents import Document
print("Core LangChain imports successful!")
except ImportError as e:
print(f"Import error: {e}")
if "rank_bm25" in str(e):
print("Missing dependency: pip install rank-bm25==0.2.2")
sys.exit(1)
try:
print("\nTesting PDF loading capabilities...")
try:
from langchain_community.document_loaders import PyPDFLoader
print("PyPDFLoader available")
except ImportError:
print("PyPDFLoader not available")
try:
import fitz
print("PyMuPDF (fitz) available - can handle corrupted PDFs")
except ImportError:
print("PyMuPDF (fitz) not available")
try:
import pdfplumber
print("pdfplumber available - additional PDF parsing method")
except ImportError:
print("pdfplumber not available")
except Exception as e:
print(f"Error testing PDF capabilities: {e}")
try:
print("\nTesting BM25 Retriever...")
test_docs = [
Document(page_content="This is the first test document about machine learning."),
Document(page_content="This is the second document discussing natural language processing."),
Document(page_content="The third document covers artificial intelligence topics."),
]
bm25_retriever = BM25Retriever.from_documents(test_docs)
bm25_retriever.k = 2
query = "machine learning"
results = bm25_retriever.get_relevant_documents(query)
print(f"BM25 retriever created and tested successfully!")
print(f"Retrieved {len(results)} documents for query: '{query}'")
except Exception as e:
print(f"✗ Error testing BM25 retriever: {e}")
import traceback
traceback.print_exc()
sys.exit(1)
print("\nAll tests completed successfully!")
print("\nThe application should now handle:")
print(" • Regular file uploads and processing")
print(" • Corrupted PDF files with multiple fallback methods")
print(" • BM25 and FAISS hybrid retrieval")
print(" • Proper error messages for failed file processing")
print("\nMake sure to install all dependencies with:")
print(" pip install -r requirements.txt")
print("\nKey Dependencies Added/Updated")
print(" • rank-bm25==0.2.2 (for BM25 retrieval)")
print(" • pymupdf==1.23.26 (PDF fallback method)")
print(" • pdfplumber==0.10.3 (additional PDF parsing)")