rdune71 commited on
Commit
5b5f50c
·
1 Parent(s): 7fe839d

Implement Core LLM Factory Module with provider abstraction and factory pattern

Browse files
$envOLLAMA_HOST=httpsf943b91f0a0c.n.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ # Ollama Configuration
2
+ $env:OLLAMA_HOST="https://f943b91f0a0c.ngrok-free.app"
3
+ $env:LOCAL_MODEL_NAME="mistral:latest"
core/llm.py CHANGED
@@ -1,68 +1,51 @@
1
- import openai
2
- import requests
3
- import time
4
  from typing import List, Dict, Optional
5
- from utils.config import config
6
 
7
- class LLMProvider:
8
- def __init__(self, model_name: str, timeout: int = 30, retries: int = 3):
9
- self.model_name = model_name
10
- self.timeout = timeout
11
- self.retries = retries
12
 
13
- class OllamaProvider(LLMProvider):
14
- def generate_response(self, prompt: str, conversation_history: List[Dict]) -> Optional[str]:
15
- url = f"{config.ollama_host}/api/chat"
16
- messages = conversation_history
17
-
18
- payload = {
19
- "model": self.model_name,
20
- "messages": messages,
21
- "stream": False
22
- }
23
-
24
- for attempt in range(self.retries):
25
- try:
26
- response = requests.post(url, json=payload, timeout=self.timeout)
27
- response.raise_for_status()
28
- return response.json()["message"]["content"]
29
- except Exception as e:
30
- if attempt == self.retries - 1:
31
- print(f"Error after {self.retries} attempts: {e}")
32
- return None
33
- time.sleep(2 ** attempt) # Exponential backoff
34
- return None
35
-
36
- class HuggingFaceProvider(LLMProvider):
37
- def __init__(self, model_name: str, timeout: int = 30, retries: int = 3):
38
- super().__init__(model_name, timeout, retries)
39
- if not config.hf_token:
40
- raise ValueError("HF_TOKEN not set - required for Hugging Face provider")
41
-
42
- self.client = openai.OpenAI(
43
- base_url=config.hf_api_url,
44
- api_key=config.hf_token
45
- )
46
 
47
- def generate_response(self, prompt: str, conversation_history: List[Dict]) -> Optional[str]:
 
 
 
 
48
  try:
49
- response = self.client.chat.completions.create(
50
- model=self.model_name,
51
- messages=conversation_history,
52
- max_tokens=500,
53
- temperature=0.7
54
- )
55
- return response.choices[0].message.content
56
  except Exception as e:
57
- print(f"Hugging Face API error: {e}")
58
- return None
59
 
60
  def send_to_ollama(prompt: str, conversation_history: List[Dict], ollama_url: str, model: str) -> Optional[str]:
61
- config.ollama_host = ollama_url
62
- provider = OllamaProvider(model)
63
- return provider.generate_response(prompt, conversation_history)
 
 
 
 
 
64
 
65
  def send_to_hf(prompt: str, conversation_history: List[Dict]) -> Optional[str]:
66
- # Using a common model that works well for coaching
67
- provider = HuggingFaceProvider("meta-llama/Llama-2-7b-chat-hf")
68
- return provider.generate_response(prompt, conversation_history)
 
 
 
 
 
 
 
1
+ import logging
 
 
2
  from typing import List, Dict, Optional
3
+ from core.llm_factory import llm_factory, ProviderNotAvailableError
4
 
5
+ logger = logging.getLogger(__name__)
 
 
 
 
6
 
7
+ class LLMClient:
8
+ """High-level LLM client that uses the factory pattern"""
9
+
10
+ def __init__(self, provider: Optional[str] = None):
11
+ self.provider_name = provider
12
+ try:
13
+ self.provider = llm_factory.get_provider(provider)
14
+ except ProviderNotAvailableError:
15
+ self.provider = None
16
+ logger.error("No LLM providers available")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
+ def generate(self, prompt: str, conversation_history: List[Dict], stream: bool = False):
19
+ """Generate a response"""
20
+ if not self.provider:
21
+ raise ProviderNotAvailableError("No LLM provider available")
22
+
23
  try:
24
+ if stream:
25
+ return self.provider.stream_generate(prompt, conversation_history)
26
+ else:
27
+ return self.provider.generate(prompt, conversation_history)
 
 
 
28
  except Exception as e:
29
+ logger.error(f"LLM generation failed: {e}")
30
+ raise
31
 
32
  def send_to_ollama(prompt: str, conversation_history: List[Dict], ollama_url: str, model: str) -> Optional[str]:
33
+ """Legacy function for backward compatibility"""
34
+ try:
35
+ from core.providers.ollama import OllamaProvider
36
+ provider = OllamaProvider(model)
37
+ return provider.generate(prompt, conversation_history)
38
+ except Exception as e:
39
+ logger.error(f"Ollama call failed: {e}")
40
+ return None
41
 
42
  def send_to_hf(prompt: str, conversation_history: List[Dict]) -> Optional[str]:
43
+ """Legacy function for backward compatibility"""
44
+ try:
45
+ from utils.config import config
46
+ from core.providers.huggingface import HuggingFaceProvider
47
+ provider = HuggingFaceProvider("meta-llama/Llama-2-7b-chat-hf")
48
+ return provider.generate(prompt, conversation_history)
49
+ except Exception as e:
50
+ logger.error(f"Hugging Face call failed: {e}")
51
+ return None
core/llm_factory.py ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ from typing import Optional, List
3
+ from core.providers.base import LLMProvider
4
+ from core.providers.ollama import OllamaProvider
5
+ from core.providers.huggingface import HuggingFaceProvider
6
+ from core.providers.openai import OpenAIProvider
7
+ from utils.config import config
8
+
9
+ logger = logging.getLogger(__name__)
10
+
11
+ class ProviderNotAvailableError(Exception):
12
+ """Raised when no provider is available"""
13
+ pass
14
+
15
+ class LLMFactory:
16
+ """Factory for creating LLM providers with fallback support"""
17
+
18
+ _instance = None
19
+ _providers = {}
20
+
21
+ def __new__(cls):
22
+ if cls._instance is None:
23
+ cls._instance = super(LLMFactory, cls).__new__(cls)
24
+ cls._instance._initialized = False
25
+ return cls._instance
26
+
27
+ def __init__(self):
28
+ if self._initialized:
29
+ return
30
+
31
+ self._initialized = True
32
+ self._provider_chain = []
33
+ self._circuit_breakers = {}
34
+ self._initialize_providers()
35
+
36
+ def _initialize_providers(self):
37
+ """Initialize all available providers based on configuration"""
38
+ # Define provider priority order
39
+ provider_configs = [
40
+ {
41
+ 'name': 'ollama',
42
+ 'class': OllamaProvider,
43
+ 'enabled': bool(config.ollama_host),
44
+ 'model': config.local_model_name
45
+ },
46
+ {
47
+ 'name': 'huggingface',
48
+ 'class': HuggingFaceProvider,
49
+ 'enabled': bool(config.hf_token),
50
+ 'model': "meta-llama/Llama-2-7b-chat-hf" # Default HF model
51
+ },
52
+ {
53
+ 'name': 'openai',
54
+ 'class': OpenAIProvider,
55
+ 'enabled': bool(config.openai_api_key),
56
+ 'model': "gpt-3.5-turbo" # Default OpenAI model
57
+ }
58
+ ]
59
+
60
+ # Initialize providers in priority order
61
+ for provider_config in provider_configs:
62
+ if provider_config['enabled']:
63
+ try:
64
+ provider = provider_config['class'](
65
+ model_name=provider_config['model']
66
+ )
67
+ self._providers[provider_config['name']] = provider
68
+ self._provider_chain.append(provider_config['name'])
69
+ self._circuit_breakers[provider_config['name']] = {
70
+ 'failures': 0,
71
+ 'last_failure': None,
72
+ 'tripped': False
73
+ }
74
+ logger.info(f"Initialized {provider_config['name']} provider")
75
+ except Exception as e:
76
+ logger.warning(f"Failed to initialize {provider_config['name']} provider: {e}")
77
+
78
+ def get_provider(self, preferred_provider: Optional[str] = None) -> LLMProvider:
79
+ """
80
+ Get an LLM provider based on preference and availability
81
+
82
+ Args:
83
+ preferred_provider: Preferred provider name (ollama, huggingface, openai)
84
+
85
+ Returns:
86
+ LLMProvider instance
87
+
88
+ Raises:
89
+ ProviderNotAvailableError: When no providers are available
90
+ """
91
+ # Check preferred provider first
92
+ if preferred_provider and preferred_provider in self._providers:
93
+ provider = self._providers[preferred_provider]
94
+ if self._is_provider_available(preferred_provider) and provider.validate_model():
95
+ logger.info(f"Using preferred provider: {preferred_provider}")
96
+ return provider
97
+
98
+ # Fallback through provider chain
99
+ for provider_name in self._provider_chain:
100
+ if self._is_provider_available(provider_name):
101
+ provider = self._providers[provider_name]
102
+ try:
103
+ if provider.validate_model():
104
+ logger.info(f"Using fallback provider: {provider_name}")
105
+ return provider
106
+ except Exception as e:
107
+ logger.warning(f"Provider {provider_name} model validation failed: {e}")
108
+ self._record_provider_failure(provider_name)
109
+
110
+ raise ProviderNotAvailableError("No LLM providers are available")
111
+
112
+ def get_all_providers(self) -> List[LLMProvider]:
113
+ """Get all initialized providers"""
114
+ return list(self._providers.values())
115
+
116
+ def _is_provider_available(self, provider_name: str) -> bool:
117
+ """Check if a provider is available (not tripped by circuit breaker)"""
118
+ if provider_name not in self._circuit_breakers:
119
+ return False
120
+
121
+ breaker = self._circuit_breakers[provider_name]
122
+ if not breaker['tripped']:
123
+ return True
124
+
125
+ # Check if enough time has passed to reset the circuit breaker
126
+ # In a real implementation, you might want more sophisticated logic here
127
+ return False
128
+
129
+ def _record_provider_failure(self, provider_name: str):
130
+ """Record a provider failure for circuit breaker logic"""
131
+ if provider_name in self._circuit_breakers:
132
+ breaker = self._circuit_breakers[provider_name]
133
+ breaker['failures'] += 1
134
+ # Trip the circuit breaker after 3 failures
135
+ if breaker['failures'] >= 3:
136
+ breaker['tripped'] = True
137
+ logger.warning(f"Circuit breaker tripped for provider: {provider_name}")
138
+
139
+ # Global factory instance
140
+ llm_factory = LLMFactory()
core/providers/base.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ import logging
3
+ from abc import ABC, abstractmethod
4
+ from typing import List, Dict, Optional, Union
5
+ from utils.config import config
6
+
7
+ logger = logging.getLogger(__name__)
8
+
9
+ class LLMProvider(ABC):
10
+ """Abstract base class for all LLM providers"""
11
+
12
+ def __init__(self, model_name: str, timeout: int = 30, max_retries: int = 3):
13
+ self.model_name = model_name
14
+ self.timeout = timeout
15
+ self.max_retries = max_retries
16
+ self.is_available = True
17
+
18
+ @abstractmethod
19
+ def generate(self, prompt: str, conversation_history: List[Dict]) -> Optional[str]:
20
+ """Generate a response synchronously"""
21
+ pass
22
+
23
+ @abstractmethod
24
+ def stream_generate(self, prompt: str, conversation_history: List[Dict]) -> Optional[Union[str, List[str]]]:
25
+ """Generate a response with streaming support"""
26
+ pass
27
+
28
+ @abstractmethod
29
+ def validate_model(self) -> bool:
30
+ """Validate if the model is available"""
31
+ pass
32
+
33
+ def _retry_with_backoff(self, func, *args, **kwargs):
34
+ """Retry logic with exponential backoff"""
35
+ last_exception = None
36
+
37
+ for attempt in range(self.max_retries):
38
+ try:
39
+ return func(*args, **kwargs)
40
+ except Exception as e:
41
+ last_exception = e
42
+ if attempt < self.max_retries - 1: # Don't sleep on last attempt
43
+ sleep_time = (2 ** attempt) * 0.5 # Exponential backoff starting at 0.5s
44
+ logger.warning(f"Attempt {attempt + 1} failed: {str(e)}. Retrying in {sleep_time}s...")
45
+ time.sleep(sleep_time)
46
+ else:
47
+ logger.error(f"All {self.max_retries} attempts failed. Last error: {str(e)}")
48
+
49
+ raise last_exception
50
+
51
+ def _is_rate_limited(self, error: Exception) -> bool:
52
+ """Check if the error is related to rate limiting"""
53
+ error_str = str(error).lower()
54
+ rate_limit_indicators = [
55
+ "rate limit",
56
+ "too many requests",
57
+ "quota exceeded",
58
+ "429",
59
+ "limit exceeded"
60
+ ]
61
+ return any(indicator in error_str for indicator in rate_limit_indicators)
core/providers/huggingface.py ADDED
@@ -0,0 +1,138 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ import logging
3
+ from typing import List, Dict, Optional, Union
4
+ from core.providers.base import LLMProvider
5
+ from utils.config import config
6
+
7
+ logger = logging.getLogger(__name__)
8
+
9
+ try:
10
+ from openai import OpenAI
11
+ HUGGINGFACE_SDK_AVAILABLE = True
12
+ except ImportError:
13
+ HUGGINGFACE_SDK_AVAILABLE = False
14
+ OpenAI = None
15
+
16
+ class HuggingFaceProvider(LLMProvider):
17
+ """Hugging Face LLM provider implementation"""
18
+
19
+ def __init__(self, model_name: str, timeout: int = 30, max_retries: int = 3):
20
+ super().__init__(model_name, timeout, max_retries)
21
+
22
+ if not HUGGINGFACE_SDK_AVAILABLE:
23
+ raise ImportError("Hugging Face provider requires 'openai' package")
24
+
25
+ if not config.hf_token:
26
+ raise ValueError("HF_TOKEN not set - required for Hugging Face provider")
27
+
28
+ self.client = OpenAI(
29
+ base_url=config.hf_api_url,
30
+ api_key=config.hf_token
31
+ )
32
+
33
+ def generate(self, prompt: str, conversation_history: List[Dict]) -> Optional[str]:
34
+ """Generate a response synchronously"""
35
+ try:
36
+ return self._retry_with_backoff(self._generate_impl, prompt, conversation_history)
37
+ except Exception as e:
38
+ logger.error(f"Hugging Face generation failed: {e}")
39
+ return None
40
+
41
+ def stream_generate(self, prompt: str, conversation_history: List[Dict]) -> Optional[Union[str, List[str]]]:
42
+ """Generate a response with streaming support"""
43
+ try:
44
+ return self._retry_with_backoff(self._stream_generate_impl, prompt, conversation_history)
45
+ except Exception as e:
46
+ logger.error(f"Hugging Face stream generation failed: {e}")
47
+ return None
48
+
49
+ def validate_model(self) -> bool:
50
+ """Validate if the model is available"""
51
+ # For Hugging Face endpoints, we'll assume the model is valid if we can connect
52
+ # In production, you might want to ping the endpoint specifically
53
+ try:
54
+ # Simple connectivity check
55
+ self.client.models.list()
56
+ return True
57
+ except Exception as e:
58
+ logger.warning(f"Hugging Face model validation failed: {e}")
59
+ return False
60
+
61
+ def _generate_impl(self, prompt: str, conversation_history: List[Dict]) -> str:
62
+ """Implementation of synchronous generation"""
63
+ try:
64
+ response = self.client.chat.completions.create(
65
+ model=self.model_name,
66
+ messages=conversation_history,
67
+ max_tokens=500,
68
+ temperature=0.7
69
+ )
70
+ return response.choices[0].message.content
71
+ except Exception as e:
72
+ # Handle scale-to-zero behavior
73
+ if self._is_scale_to_zero_error(e):
74
+ logger.info("Hugging Face endpoint is scaling up, waiting...")
75
+ time.sleep(60) # Wait for endpoint to initialize
76
+ # Retry once after waiting
77
+ response = self.client.chat.completions.create(
78
+ model=self.model_name,
79
+ messages=conversation_history,
80
+ max_tokens=500,
81
+ temperature=0.7
82
+ )
83
+ return response.choices[0].message.content
84
+ else:
85
+ raise
86
+
87
+ def _stream_generate_impl(self, prompt: str, conversation_history: List[Dict]) -> List[str]:
88
+ """Implementation of streaming generation"""
89
+ try:
90
+ response = self.client.chat.completions.create(
91
+ model=self.model_name,
92
+ messages=conversation_history,
93
+ max_tokens=500,
94
+ temperature=0.7,
95
+ stream=True
96
+ )
97
+
98
+ chunks = []
99
+ for chunk in response:
100
+ content = chunk.choices[0].delta.content
101
+ if content:
102
+ chunks.append(content)
103
+
104
+ return chunks
105
+ except Exception as e:
106
+ # Handle scale-to-zero behavior
107
+ if self._is_scale_to_zero_error(e):
108
+ logger.info("Hugging Face endpoint is scaling up, waiting...")
109
+ time.sleep(60) # Wait for endpoint to initialize
110
+ # Retry once after waiting
111
+ response = self.client.chat.completions.create(
112
+ model=self.model_name,
113
+ messages=conversation_history,
114
+ max_tokens=500,
115
+ temperature=0.7,
116
+ stream=True
117
+ )
118
+
119
+ chunks = []
120
+ for chunk in response:
121
+ content = chunk.choices[0].delta.content
122
+ if content:
123
+ chunks.append(content)
124
+
125
+ return chunks
126
+ else:
127
+ raise
128
+
129
+ def _is_scale_to_zero_error(self, error: Exception) -> bool:
130
+ """Check if the error is related to scale-to-zero initialization"""
131
+ error_str = str(error).lower()
132
+ scale_to_zero_indicators = [
133
+ "503",
134
+ "service unavailable",
135
+ "initializing",
136
+ "cold start"
137
+ ]
138
+ return any(indicator in error_str for indicator in scale_to_zero_indicators)
core/providers/ollama.py ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import logging
3
+ from typing import List, Dict, Optional, Union
4
+ from core.providers.base import LLMProvider
5
+ from utils.config import config
6
+
7
+ logger = logging.getLogger(__name__)
8
+
9
+ class OllamaProvider(LLMProvider):
10
+ """Ollama LLM provider implementation"""
11
+
12
+ def __init__(self, model_name: str, timeout: int = 30, max_retries: int = 3):
13
+ super().__init__(model_name, timeout, max_retries)
14
+ self.host = config.ollama_host or "http://localhost:11434"
15
+ # Headers to skip ngrok browser warning
16
+ self.headers = {
17
+ "ngrok-skip-browser-warning": "true",
18
+ "User-Agent": "AI-Life-Coach-Ollama"
19
+ }
20
+
21
+ def generate(self, prompt: str, conversation_history: List[Dict]) -> Optional[str]:
22
+ """Generate a response synchronously"""
23
+ try:
24
+ return self._retry_with_backoff(self._generate_impl, prompt, conversation_history)
25
+ except Exception as e:
26
+ logger.error(f"Ollama generation failed: {e}")
27
+ return None
28
+
29
+ def stream_generate(self, prompt: str, conversation_history: List[Dict]) -> Optional[Union[str, List[str]]]:
30
+ """Generate a response with streaming support"""
31
+ try:
32
+ return self._retry_with_backoff(self._stream_generate_impl, prompt, conversation_history)
33
+ except Exception as e:
34
+ logger.error(f"Ollama stream generation failed: {e}")
35
+ return None
36
+
37
+ def validate_model(self) -> bool:
38
+ """Validate if the model is available"""
39
+ try:
40
+ response = requests.get(
41
+ f"{self.host}/api/tags",
42
+ headers=self.headers,
43
+ timeout=self.timeout
44
+ )
45
+
46
+ if response.status_code == 200:
47
+ models = response.json().get("models", [])
48
+ model_names = [model.get("name") for model in models]
49
+ return self.model_name in model_names
50
+ elif response.status_code == 404:
51
+ # Try alternative endpoint
52
+ response2 = requests.get(
53
+ f"{self.host}",
54
+ headers=self.headers,
55
+ timeout=self.timeout
56
+ )
57
+ return response2.status_code == 200
58
+
59
+ return False
60
+ except Exception as e:
61
+ logger.error(f"Model validation failed: {e}")
62
+ return False
63
+
64
+ def _generate_impl(self, prompt: str, conversation_history: List[Dict]) -> str:
65
+ """Implementation of synchronous generation"""
66
+ url = f"{self.host}/api/chat"
67
+ messages = conversation_history.copy()
68
+
69
+ # Add the current prompt if not already in history
70
+ if not messages or messages[-1].get("content") != prompt:
71
+ messages.append({"role": "user", "content": prompt})
72
+
73
+ payload = {
74
+ "model": self.model_name,
75
+ "messages": messages,
76
+ "stream": False
77
+ }
78
+
79
+ response = requests.post(
80
+ url,
81
+ json=payload,
82
+ headers=self.headers,
83
+ timeout=self.timeout
84
+ )
85
+ response.raise_for_status()
86
+
87
+ result = response.json()
88
+ return result["message"]["content"]
89
+
90
+ def _stream_generate_impl(self, prompt: str, conversation_history: List[Dict]) -> List[str]:
91
+ """Implementation of streaming generation"""
92
+ url = f"{self.host}/api/chat"
93
+ messages = conversation_history.copy()
94
+
95
+ # Add the current prompt if not already in history
96
+ if not messages or messages[-1].get("content") != prompt:
97
+ messages.append({"role": "user", "content": prompt})
98
+
99
+ payload = {
100
+ "model": self.model_name,
101
+ "messages": messages,
102
+ "stream": True
103
+ }
104
+
105
+ response = requests.post(
106
+ url,
107
+ json=payload,
108
+ headers=self.headers,
109
+ timeout=self.timeout,
110
+ stream=True
111
+ )
112
+ response.raise_for_status()
113
+
114
+ chunks = []
115
+ for line in response.iter_lines():
116
+ if line:
117
+ chunk = line.decode('utf-8')
118
+ try:
119
+ data = eval(chunk) # Simplified JSON parsing
120
+ content = data.get("message", {}).get("content", "")
121
+ if content:
122
+ chunks.append(content)
123
+ except:
124
+ continue
125
+
126
+ return chunks
core/providers/openai.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ import logging
3
+ from typing import List, Dict, Optional, Union
4
+ from core.providers.base import LLMProvider
5
+ from utils.config import config
6
+
7
+ logger = logging.getLogger(__name__)
8
+
9
+ try:
10
+ from openai import OpenAI
11
+ OPENAI_SDK_AVAILABLE = True
12
+ except ImportError:
13
+ OPENAI_SDK_AVAILABLE = False
14
+ OpenAI = None
15
+
16
+ class OpenAIProvider(LLMProvider):
17
+ """OpenAI LLM provider implementation"""
18
+
19
+ def __init__(self, model_name: str, timeout: int = 30, max_retries: int = 3):
20
+ super().__init__(model_name, timeout, max_retries)
21
+
22
+ if not OPENAI_SDK_AVAILABLE:
23
+ raise ImportError("OpenAI provider requires 'openai' package")
24
+
25
+ if not config.openai_api_key:
26
+ raise ValueError("OPENAI_API_KEY not set - required for OpenAI provider")
27
+
28
+ self.client = OpenAI(api_key=config.openai_api_key)
29
+
30
+ def generate(self, prompt: str, conversation_history: List[Dict]) -> Optional[str]:
31
+ """Generate a response synchronously"""
32
+ try:
33
+ return self._retry_with_backoff(self._generate_impl, prompt, conversation_history)
34
+ except Exception as e:
35
+ logger.error(f"OpenAI generation failed: {e}")
36
+ return None
37
+
38
+ def stream_generate(self, prompt: str, conversation_history: List[Dict]) -> Optional[Union[str, List[str]]]:
39
+ """Generate a response with streaming support"""
40
+ try:
41
+ return self._retry_with_backoff(self._stream_generate_impl, prompt, conversation_history)
42
+ except Exception as e:
43
+ logger.error(f"OpenAI stream generation failed: {e}")
44
+ return None
45
+
46
+ def validate_model(self) -> bool:
47
+ """Validate if the model is available"""
48
+ try:
49
+ models = self.client.models.list()
50
+ model_ids = [model.id for model in models.data]
51
+ return self.model_name in model_ids
52
+ except Exception as e:
53
+ logger.warning(f"OpenAI model validation failed: {e}")
54
+ return False
55
+
56
+ def _generate_impl(self, prompt: str, conversation_history: List[Dict]) -> str:
57
+ """Implementation of synchronous generation"""
58
+ response = self.client.chat.completions.create(
59
+ model=self.model_name,
60
+ messages=conversation_history,
61
+ max_tokens=500,
62
+ temperature=0.7
63
+ )
64
+ return response.choices[0].message.content
65
+
66
+ def _stream_generate_impl(self, prompt: str, conversation_history: List[Dict]) -> List[str]:
67
+ """Implementation of streaming generation"""
68
+ response = self.client.chat.completions.create(
69
+ model=self.model_name,
70
+ messages=conversation_history,
71
+ max_tokens=500,
72
+ temperature=0.7,
73
+ stream=True
74
+ )
75
+
76
+ chunks = []
77
+ for chunk in response:
78
+ content = chunk.choices[0].delta.content
79
+ if content:
80
+ chunks.append(content)
81
+
82
+ return chunks
ngrok.yml CHANGED
@@ -1,9 +1,9 @@
1
  version: "2"
2
  authtoken: 32HaXMF3tuRxfas1siT3CIhLjH4_2AXbGGma38NnCF1tjyJNZ
3
  tunnels:
4
- ai-coach-api:
5
- addr: 8000
6
- proto: http
7
- ai-coach-ui:
8
  addr: 8501
 
 
 
9
  proto: http
 
1
  version: "2"
2
  authtoken: 32HaXMF3tuRxfas1siT3CIhLjH4_2AXbGGma38NnCF1tjyJNZ
3
  tunnels:
4
+ web:
 
 
 
5
  addr: 8501
6
+ proto: http
7
+ api:
8
+ addr: 11434
9
  proto: http
ngrok.yml.txt DELETED
@@ -1,9 +0,0 @@
1
- version: "2"
2
- authtoken: 32HaXMF3tuRxfas1siT3CIhLjH4_2AXbGGma38NnCF1tjyJNZ
3
- tunnels:
4
- ai-coach-api:
5
- addr: 8000
6
- proto: http
7
- ai-coach-ui:
8
- addr: 8501
9
- proto: http
 
 
 
 
 
 
 
 
 
 
utils/config.py CHANGED
@@ -8,8 +8,14 @@ class Config:
8
  # Detect if running on HF Spaces
9
  self.is_hf_space = bool(os.getenv("SPACE_ID"))
10
 
 
11
  self.hf_token = os.getenv("HF_TOKEN")
 
 
 
12
  self.hf_api_url = os.getenv("HF_API_ENDPOINT_URL", "https://api-inference.huggingface.co/v1/")
 
 
13
  self.use_fallback = os.getenv("USE_FALLBACK", "true").lower() == "true"
14
 
15
  # Redis configuration (optional for HF)
@@ -23,6 +29,9 @@ class Config:
23
  # Local model configuration
24
  self.local_model_name = os.getenv("LOCAL_MODEL_NAME", "mistral:latest")
25
  self.ollama_host = os.getenv("OLLAMA_HOST", "")
 
 
 
26
 
27
  # Global config instance
28
  config = Config()
 
8
  # Detect if running on HF Spaces
9
  self.is_hf_space = bool(os.getenv("SPACE_ID"))
10
 
11
+ # API tokens
12
  self.hf_token = os.getenv("HF_TOKEN")
13
+ self.openai_api_key = os.getenv("OPENAI_API_KEY")
14
+
15
+ # API endpoints
16
  self.hf_api_url = os.getenv("HF_API_ENDPOINT_URL", "https://api-inference.huggingface.co/v1/")
17
+
18
+ # Fallback settings
19
  self.use_fallback = os.getenv("USE_FALLBACK", "true").lower() == "true"
20
 
21
  # Redis configuration (optional for HF)
 
29
  # Local model configuration
30
  self.local_model_name = os.getenv("LOCAL_MODEL_NAME", "mistral:latest")
31
  self.ollama_host = os.getenv("OLLAMA_HOST", "")
32
+
33
+ # OpenWeather API
34
+ self.openweather_api_key = os.getenv("OPENWEATHER_API_KEY")
35
 
36
  # Global config instance
37
  config = Config()