|
|
import os |
|
|
from typing import Optional |
|
|
from dotenv import load_dotenv |
|
|
|
|
|
load_dotenv() |
|
|
|
|
|
|
|
|
class Config: |
|
|
|
|
|
NEBIUS_API_KEY: Optional[str] = os.getenv("NEBIUS_API_KEY") |
|
|
MISTRAL_API_KEY: Optional[str] = os.getenv("MISTRAL_API_KEY") |
|
|
HUGGINGFACE_API_KEY: Optional[str] = os.getenv("HUGGINGFACE_API_KEY", os.getenv("HF_TOKEN")) |
|
|
|
|
|
|
|
|
NEBIUS_BASE_URL: str = os.getenv("NEBIUS_BASE_URL", "https://api.studio.nebius.com/v1/") |
|
|
NEBIUS_MODEL: str = os.getenv("NEBIUS_MODEL", "openai/gpt-oss-120b") |
|
|
|
|
|
|
|
|
EMBEDDING_MODEL: str = os.getenv("EMBEDDING_MODEL", "sentence-transformers/all-MiniLM-L6-v2") |
|
|
MISTRAL_MODEL: str = os.getenv("MISTRAL_MODEL", "mistral-small-latest") |
|
|
|
|
|
|
|
|
VECTOR_STORE_PATH: str = os.getenv("VECTOR_STORE_PATH", "./data/vector_store") |
|
|
DOCUMENT_STORE_PATH: str = os.getenv("DOCUMENT_STORE_PATH", "./data/documents") |
|
|
INDEX_NAME: str = os.getenv("INDEX_NAME", "content_index") |
|
|
|
|
|
|
|
|
CHUNK_SIZE: int = int(os.getenv("CHUNK_SIZE", "500")) |
|
|
CHUNK_OVERLAP: int = int(os.getenv("CHUNK_OVERLAP", "50")) |
|
|
MAX_CONCURRENT_REQUESTS: int = int(os.getenv("MAX_CONCURRENT_REQUESTS", "5")) |
|
|
|
|
|
|
|
|
DEFAULT_TOP_K: int = int(os.getenv("DEFAULT_TOP_K", "5")) |
|
|
SIMILARITY_THRESHOLD: float = float(os.getenv("SIMILARITY_THRESHOLD", "0.1")) |
|
|
|
|
|
|
|
|
TESSERACT_PATH: Optional[str] = os.getenv("TESSERACT_PATH") |
|
|
OCR_LANGUAGE: str = os.getenv("OCR_LANGUAGE", "eng") |
|
|
|
|
|
@classmethod |
|
|
def validate(cls) -> bool: |
|
|
"""Validate that required configuration is present""" |
|
|
|
|
|
return True |
|
|
|
|
|
|
|
|
config = Config() |
|
|
|
|
|
|
|
|
import pathlib |
|
|
pathlib.Path(config.VECTOR_STORE_PATH).mkdir(parents=True, exist_ok=True) |
|
|
pathlib.Path(config.DOCUMENT_STORE_PATH).mkdir(parents=True, exist_ok=True) |