Spaces:
Sleeping
Sleeping
“vinit5112”
commited on
Commit
·
65726e0
1
Parent(s):
5b65de2
changes
Browse files- backend/backend_api.py +1 -0
- backend/rag.py +5 -3
- backend/vector_store.py +4 -3
backend/backend_api.py
CHANGED
|
@@ -50,6 +50,7 @@ async def lifespan(app: FastAPI):
|
|
| 50 |
|
| 51 |
collection_name = os.getenv("COLLECTION_NAME", "ca-documents")
|
| 52 |
rag_system = RAG(google_api_key, collection_name)
|
|
|
|
| 53 |
logger.info("RAG system initialized successfully")
|
| 54 |
|
| 55 |
except Exception as e:
|
|
|
|
| 50 |
|
| 51 |
collection_name = os.getenv("COLLECTION_NAME", "ca-documents")
|
| 52 |
rag_system = RAG(google_api_key, collection_name)
|
| 53 |
+
await rag_system.initialize()
|
| 54 |
logger.info("RAG system initialized successfully")
|
| 55 |
|
| 56 |
except Exception as e:
|
backend/rag.py
CHANGED
|
@@ -17,9 +17,6 @@ class RAG:
|
|
| 17 |
# Setup Vector Store (Qdrant configuration is handled via environment variables)
|
| 18 |
self.vector_store = VectorStore()
|
| 19 |
|
| 20 |
-
# Verify vector store is properly initialized
|
| 21 |
-
asyncio.run(self.vector_store.verify_collection_health())
|
| 22 |
-
|
| 23 |
# Setup Text Splitter
|
| 24 |
self.text_splitter = RecursiveCharacterTextSplitter(
|
| 25 |
chunk_size=1000,
|
|
@@ -28,6 +25,11 @@ class RAG:
|
|
| 28 |
separators=["\n\n", "\n", ". ", " ", ""]
|
| 29 |
)
|
| 30 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
def process_pdf(self, file_path: str) -> List[str]:
|
| 32 |
"""Extract text from PDF and split into chunks using RecursiveTextSplitter"""
|
| 33 |
full_text = ""
|
|
|
|
| 17 |
# Setup Vector Store (Qdrant configuration is handled via environment variables)
|
| 18 |
self.vector_store = VectorStore()
|
| 19 |
|
|
|
|
|
|
|
|
|
|
| 20 |
# Setup Text Splitter
|
| 21 |
self.text_splitter = RecursiveCharacterTextSplitter(
|
| 22 |
chunk_size=1000,
|
|
|
|
| 25 |
separators=["\n\n", "\n", ". ", " ", ""]
|
| 26 |
)
|
| 27 |
|
| 28 |
+
async def initialize(self):
|
| 29 |
+
"""Asynchronous initialization to be called after object creation."""
|
| 30 |
+
await self.vector_store.initialize()
|
| 31 |
+
await self.vector_store.verify_collection_health()
|
| 32 |
+
|
| 33 |
def process_pdf(self, file_path: str) -> List[str]:
|
| 34 |
"""Extract text from PDF and split into chunks using RecursiveTextSplitter"""
|
| 35 |
full_text = ""
|
backend/vector_store.py
CHANGED
|
@@ -36,10 +36,11 @@ class VectorStore:
|
|
| 36 |
|
| 37 |
# Initialize embedding model with offline support
|
| 38 |
self.embedding_model = self._initialize_embedding_model()
|
| 39 |
-
|
| 40 |
-
# Create collection with proper indices
|
| 41 |
-
asyncio.run(self._ensure_collection_exists())
|
| 42 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
def _initialize_embedding_model(self):
|
| 44 |
"""Initialize the embedding model from a local directory"""
|
| 45 |
try:
|
|
|
|
| 36 |
|
| 37 |
# Initialize embedding model with offline support
|
| 38 |
self.embedding_model = self._initialize_embedding_model()
|
|
|
|
|
|
|
|
|
|
| 39 |
|
| 40 |
+
async def initialize(self):
|
| 41 |
+
"""Asynchronous initialization to be called after object creation."""
|
| 42 |
+
await self._ensure_collection_exists()
|
| 43 |
+
|
| 44 |
def _initialize_embedding_model(self):
|
| 45 |
"""Initialize the embedding model from a local directory"""
|
| 46 |
try:
|