LiamKhoaLe's picture
Merge commit 'e34edc7cd55f292dd0b192dc00b782c22208fde6' as 'ingestion_python'
ee39cc9
raw
history blame
1.77 kB
"""
Ingestion Pipeline Service
A dedicated service for processing file uploads and storing them in MongoDB Atlas.
This service mirrors the main system's file processing functionality while
running as a separate service to share the processing load.
"""
import os
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
# Import shared utilities (now local)
from utils.logger import get_logger
from utils.rag.rag import RAGStore, ensure_indexes
from utils.embedding import RemoteEmbeddingClient
from services.maverick_captioner import NvidiaMaverickCaptioner
from api.routes import router, initialize_services
logger = get_logger("INGESTION_PIPELINE", __name__)
# FastAPI app
app = FastAPI(title="Ingestion Pipeline", version="1.0.0")
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# In-memory job tracker (same as main system)
app.state.jobs = {}
# Global clients (same as main system)
try:
rag = RAGStore(mongo_uri=os.getenv("MONGO_URI"), db_name=os.getenv("MONGO_DB", "studybuddy"))
rag.client.admin.command('ping')
logger.info("[INGESTION_PIPELINE] MongoDB connection successful")
ensure_indexes(rag)
logger.info("[INGESTION_PIPELINE] MongoDB indexes ensured")
except Exception as e:
logger.error(f"[INGESTION_PIPELINE] Failed to initialize MongoDB: {e}")
rag = None
embedder = RemoteEmbeddingClient()
captioner = NvidiaMaverickCaptioner()
# Initialize services
initialize_services(rag, embedder, captioner)
# Include API routes
app.include_router(router)
if __name__ == "__main__":
import uvicorn
port = int(os.getenv("INGESTION_PORT", "7860"))
uvicorn.run(app, host="0.0.0.0", port=port)