File size: 1,765 Bytes
ee39cc9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
"""
Ingestion Pipeline Service

A dedicated service for processing file uploads and storing them in MongoDB Atlas.
This service mirrors the main system's file processing functionality while
running as a separate service to share the processing load.
"""

import os
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware

# Import shared utilities (now local)
from utils.logger import get_logger
from utils.rag.rag import RAGStore, ensure_indexes
from utils.embedding import RemoteEmbeddingClient
from services.maverick_captioner import NvidiaMaverickCaptioner
from api.routes import router, initialize_services

logger = get_logger("INGESTION_PIPELINE", __name__)

# FastAPI app
app = FastAPI(title="Ingestion Pipeline", version="1.0.0")
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

# In-memory job tracker (same as main system)
app.state.jobs = {}

# Global clients (same as main system)
try:
    rag = RAGStore(mongo_uri=os.getenv("MONGO_URI"), db_name=os.getenv("MONGO_DB", "studybuddy"))
    rag.client.admin.command('ping')
    logger.info("[INGESTION_PIPELINE] MongoDB connection successful")
    ensure_indexes(rag)
    logger.info("[INGESTION_PIPELINE] MongoDB indexes ensured")
except Exception as e:
    logger.error(f"[INGESTION_PIPELINE] Failed to initialize MongoDB: {e}")
    rag = None

embedder = RemoteEmbeddingClient()
captioner = NvidiaMaverickCaptioner()

# Initialize services
initialize_services(rag, embedder, captioner)

# Include API routes
app.include_router(router)

if __name__ == "__main__":
    import uvicorn
    port = int(os.getenv("INGESTION_PORT", "7860"))
    uvicorn.run(app, host="0.0.0.0", port=port)