Spaces:
Sleeping
Sleeping
| from rank_bm25 import BM25Okapi | |
| from langchain.vectorstores import Qdrant | |
| from chatbot.retrieval import get_vector_db | |
| from langchain_huggingface import HuggingFaceEmbeddings | |
| from sklearn.preprocessing import MinMaxScaler | |
| import numpy as np | |
| embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") | |
| def hybrid_search(query, k=6): | |
| vector_db = get_vector_db() | |
| # Dense search (Qdrant) | |
| dense_results = vector_db.similarity_search_with_score(query, k=k) | |
| # Sparse search (BM25) | |
| documents = [doc.page_content for doc, _ in dense_results] | |
| bm25 = BM25Okapi([doc.split() for doc in documents]) | |
| bm25_scores = bm25.get_scores(query.split()) | |
| # Normalize scores | |
| dense_scores = np.array([score for _, score in dense_results]) | |
| scaler = MinMaxScaler() | |
| combined_scores = 0.5 * scaler.fit_transform(dense_scores.reshape(-1, 1)).flatten() + \ | |
| 0.5 * scaler.fit_transform(np.array(bm25_scores).reshape(-1, 1)).flatten() | |
| # Sort results | |
| sorted_indices = np.argsort(combined_scores)[::-1] | |
| final_results = [documents[i] for i in sorted_indices[:k]] | |
| return "\n".join(final_results) | |
| # Example usage | |
| query = "What is AI?" | |
| results = hybrid_search(query) | |
| print(results) | |