Nihal2000 commited on
Commit
2589ed0
·
verified ·
1 Parent(s): c242ff0

Update config.py

Browse files
Files changed (1) hide show
  1. config.py +39 -66
config.py CHANGED
@@ -1,73 +1,46 @@
1
-
2
  import os
3
- from dotenv import load_dotenv
4
 
5
- # Load environment variables
6
- load_dotenv()
7
 
8
  class Config:
9
- """Configuration management for API keys and settings"""
10
-
11
- # API Keys - Only 2 needed, both with free tiers!
12
- MISTRAL_API_KEY = os.getenv("MISTRAL_API_KEY", "")
13
- ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY", "")
14
-
15
- # ChromaDB Settings (completely free local storage)
16
- CHROMA_DB_PATH = os.getenv("CHROMA_DB_PATH", "./chroma_db")
17
- CHROMA_COLLECTION_NAME = "knowledge_base"
18
-
19
- # MCP Server Settings
20
- MCP_SERVER_NAME = "intelligent-content-organizer"
21
- MCP_SERVER_VERSION = "1.0.0"
22
- MCP_SERVER_DESCRIPTION = "AI-powered knowledge management with automatic tagging and semantic search"
23
-
24
- # Processing Settings
25
- MAX_FILE_SIZE_MB = 50
26
- SUPPORTED_FILE_TYPES = [
27
- ".pdf", ".txt", ".docx", ".doc", ".html", ".htm",
28
- ".md", ".csv", ".json", ".xml", ".rtf"
29
- ]
30
-
31
- # Model Settings
32
- MISTRAL_MODEL = "mistral-small-latest" # Free tier available
33
- CLAUDE_MODEL = "claude-3-haiku-20240307" # Free tier available
34
- EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2" # Completely free
35
-
36
- # Feature Flags - Enable/disable based on API availability
37
- USE_MISTRAL_FOR_TAGS = bool(MISTRAL_API_KEY)
38
- USE_CLAUDE_FOR_SUMMARY = bool(ANTHROPIC_API_KEY)
39
-
40
- # Free alternatives settings
41
- ENABLE_FREE_FALLBACKS = True # Always use free methods when APIs fail
42
 
43
  @classmethod
44
- def validate(cls):
45
- """Validate configuration - now more flexible"""
46
- warnings = []
47
-
48
- if not cls.MISTRAL_API_KEY:
49
- warnings.append("MISTRAL_API_KEY not set - will use free tag generation")
50
-
51
- if not cls.ANTHROPIC_API_KEY:
52
- warnings.append("ANTHROPIC_API_KEY not set - will use free summarization")
53
-
54
- if warnings:
55
- print("⚠️ Configuration warnings:")
56
- for warning in warnings:
57
- print(f" - {warning}")
58
- print("\n✅ The app will still work using free alternatives!")
59
- else:
60
- print("✅ All API keys configured")
61
-
62
  return True
63
-
64
- @classmethod
65
- def get_status(cls):
66
- """Get configuration status for display"""
67
- return {
68
- "mistral_configured": bool(cls.MISTRAL_API_KEY),
69
- "anthropic_configured": bool(cls.ANTHROPIC_API_KEY),
70
- "free_fallbacks_enabled": cls.ENABLE_FREE_FALLBACKS,
71
- "supported_formats": cls.SUPPORTED_FILE_TYPES,
72
- "embedding_model": cls.EMBEDDING_MODEL
73
- }
 
 
1
  import os
2
+ from typing import Optional
3
 
 
 
4
 
5
  class Config:
6
+ # API Keys
7
+ ANTHROPIC_API_KEY: Optional[str] = os.getenv("ANTHROPIC_API_KEY")
8
+ MISTRAL_API_KEY: Optional[str] = os.getenv("MISTRAL_API_KEY")
9
+ HUGGINGFACE_API_KEY: Optional[str] = os.getenv("HUGGINGFACE_API_KEY", os.getenv("HF_TOKEN"))
10
+
11
+ # Model Configuration
12
+ EMBEDDING_MODEL: str = os.getenv("EMBEDDING_MODEL", "sentence-transformers/all-MiniLM-L6-v2")
13
+ ANTHROPIC_MODEL: str = os.getenv("ANTHROPIC_MODEL", "claude-3-haiku-20240307") # Using faster model
14
+ MISTRAL_MODEL: str = os.getenv("MISTRAL_MODEL", "mistral-small-latest") # Using smaller model
15
+
16
+ # Vector Store Configuration
17
+ VECTOR_STORE_PATH: str = os.getenv("VECTOR_STORE_PATH", "./data/vector_store")
18
+ DOCUMENT_STORE_PATH: str = os.getenv("DOCUMENT_STORE_PATH", "./data/documents")
19
+ INDEX_NAME: str = os.getenv("INDEX_NAME", "content_index")
20
+
21
+ # Processing Configuration
22
+ CHUNK_SIZE: int = int(os.getenv("CHUNK_SIZE", "500"))
23
+ CHUNK_OVERLAP: int = int(os.getenv("CHUNK_OVERLAP", "50"))
24
+ MAX_CONCURRENT_REQUESTS: int = int(os.getenv("MAX_CONCURRENT_REQUESTS", "5"))
25
+
26
+ # Search Configuration
27
+ DEFAULT_TOP_K: int = int(os.getenv("DEFAULT_TOP_K", "5"))
28
+ SIMILARITY_THRESHOLD: float = float(os.getenv("SIMILARITY_THRESHOLD", "0.1"))
29
+
30
+ # OCR Configuration
31
+ TESSERACT_PATH: Optional[str] = os.getenv("TESSERACT_PATH")
32
+ OCR_LANGUAGE: str = os.getenv("OCR_LANGUAGE", "eng")
 
 
 
 
 
 
33
 
34
  @classmethod
35
+ def validate(cls) -> bool:
36
+ """Validate that required configuration is present"""
37
+ # Make API keys optional for testing
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  return True
39
+
40
+ # Global config instance
41
+ config = Config()
42
+
43
+ # Create data directories
44
+ import pathlib
45
+ pathlib.Path(config.VECTOR_STORE_PATH).mkdir(parents=True, exist_ok=True)
46
+ pathlib.Path(config.DOCUMENT_STORE_PATH).mkdir(parents=True, exist_ok=True)