rdune71 commited on
Commit
2cb4727
Β·
1 Parent(s): 2c26384

Simplify CosmicCat AI Assistant - Remove HF expert features and branding changes

Browse files
.streamlit/config.toml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ [theme]
2
+ primaryColor = "#6f42c1"
3
+ backgroundColor = "#0d1117"
4
+ secondaryBackgroundColor = "#161b22"
5
+ textColor = "#f0f6fc"
6
+ font = "sans serif"
app.py CHANGED
@@ -3,21 +3,16 @@ import time
3
  import os
4
  import sys
5
  import json
6
- import asyncio
7
  from datetime import datetime
8
  from pathlib import Path
9
  sys.path.append(str(Path(__file__).parent))
 
10
  from utils.config import config
11
  from core.session import session_manager
12
  from core.memory import check_redis_health
13
- from core.coordinator import coordinator
14
  from core.errors import translate_error
15
  from core.personality import personality
16
- from services.hf_endpoint_monitor import hf_monitor
17
- from services.weather import weather_service
18
- from core.llm import LLMClient
19
  from core.providers.ollama import OllamaProvider
20
- from core.providers.huggingface import HuggingFaceProvider
21
  import logging
22
 
23
  # Set up logging
@@ -26,27 +21,24 @@ logger = logging.getLogger(__name__)
26
 
27
  st.set_page_config(page_title="CosmicCat AI Assistant", page_icon="🐱", layout="wide")
28
 
29
- # Initialize session state safely at the top of app.py
30
  if "messages" not in st.session_state:
31
  st.session_state.messages = []
32
- if "last_error" not in st.session_state:
33
- st.session_state.last_error = ""
34
  if "is_processing" not in st.session_state:
35
  st.session_state.is_processing = False
36
  if "ngrok_url_temp" not in st.session_state:
37
  st.session_state.ngrok_url_temp = st.session_state.get("ngrok_url", "https://7bcc180dffd1.ngrok-free.app")
38
  if "cosmic_mode" not in st.session_state:
39
- st.session_state.cosmic_mode = True # Default to cosmic mode
40
  if "show_welcome" not in st.session_state:
41
  st.session_state.show_welcome = True
42
 
43
- # Sidebar layout redesign
44
  with st.sidebar:
45
  st.title("🐱 CosmicCat AI Assistant")
46
- st.markdown("Your personal AI-powered life development assistant")
47
 
48
- # PRIMARY ACTIONS
49
- st.subheader("πŸ’¬ Primary Actions")
50
  model_options = {
51
  "Mistral 7B (Local)": "mistral:latest",
52
  "Llama 2 7B (Local)": "llama2:latest",
@@ -55,34 +47,30 @@ with st.sidebar:
55
  selected_model_name = st.selectbox(
56
  "Select Model",
57
  options=list(model_options.keys()),
58
- index=0,
59
- key="sidebar_model_select"
60
  )
61
  st.session_state.selected_model = model_options[selected_model_name]
62
 
63
- # Toggle for cosmic mode using checkbox
64
  st.session_state.cosmic_mode = st.checkbox("Enable Cosmic Mode", value=st.session_state.cosmic_mode)
65
 
66
  st.divider()
67
 
68
- # CONFIGURATION
69
  st.subheader("βš™οΈ Configuration")
70
  ngrok_url_input = st.text_input(
71
  "Ollama Server URL",
72
  value=st.session_state.ngrok_url_temp,
73
- help="Enter your ngrok URL",
74
- key="sidebar_ngrok_url"
75
  )
76
 
77
  if ngrok_url_input != st.session_state.ngrok_url_temp:
78
  st.session_state.ngrok_url_temp = ngrok_url_input
79
  st.success("βœ… URL updated!")
80
-
81
  if st.button("πŸ“‘ Test Connection"):
82
  try:
83
- # Use OllamaProvider to test connection
84
  ollama_provider = OllamaProvider(st.session_state.selected_model)
85
- # Test model validation
86
  is_valid = ollama_provider.validate_model()
87
  if is_valid:
88
  st.success("βœ… Connection successful!")
@@ -90,16 +78,18 @@ with st.sidebar:
90
  st.error("❌ Model validation failed")
91
  except Exception as e:
92
  st.error(f"❌ Error: {str(e)[:50]}...")
93
-
94
  if st.button("πŸ—‘οΈ Clear History"):
95
  st.session_state.messages = []
96
  st.success("History cleared!")
97
-
98
  st.divider()
99
 
100
- # SYSTEM STATUS
101
  with st.expander("πŸ” System Status", expanded=False):
102
- st.subheader("πŸ“Š System Monitor")
 
 
103
  try:
104
  from services.ollama_monitor import check_ollama_status
105
  ollama_status = check_ollama_status()
@@ -109,256 +99,103 @@ with st.sidebar:
109
  st.warning("πŸ¦™ Ollama: Not running")
110
  except:
111
  st.info("πŸ¦™ Ollama: Unknown")
112
-
113
- try:
114
- hf_status = hf_monitor.check_endpoint_status()
115
- # Enhanced HF status display with cat-themed messages
116
- if hf_status.get('available'):
117
- if hf_status.get('initialized', False):
118
- st.success(f"πŸ€— HF Endpoint: Available ({hf_status.get('status_code')} OK)")
119
- if hf_status.get('model'):
120
- st.info(f" Model: {hf_status.get('model')}")
121
- if hf_status.get('region'):
122
- st.info(f" Region: {hf_status.get('region')}")
123
- if hf_status.get('warmup_count'):
124
- st.info(f" Warmup Count: {hf_status.get('warmup_count')}")
125
- else:
126
- st.warning("⏳ Kittens Waking Up...")
127
- elif hf_status.get('status_code') == 200:
128
- st.info("πŸ“‘ Calling Space Friends...")
129
- else:
130
- st.error("😴 Nap Cat")
131
- except Exception as e:
132
- st.info("⏳ Kittens Stretching...")
133
-
134
  if check_redis_health():
135
  st.success("πŸ’Ύ Redis: Connected")
136
  else:
137
  st.error("πŸ’Ύ Redis: Disconnected")
138
-
139
  st.divider()
140
 
 
141
  st.subheader("πŸ› Debug Info")
142
- # Show enhanced debug information
143
  st.markdown(f"**Environment:** {'HF Space' if config.is_hf_space else 'Local'}")
144
  st.markdown(f"**Model:** {st.session_state.selected_model}")
145
- st.markdown(f"**Fallback:** {'Enabled' if config.use_fallback else 'Disabled'}")
146
-
147
- # Show active features
148
- features = []
149
- if os.getenv("TAVILY_API_KEY"):
150
- features.append("Web Search")
151
- if config.openweather_api_key:
152
- features.append("Weather")
153
- st.markdown(f"**Active Features:** {', '.join(features) if features else 'None'}")
154
-
155
- # Show recent activity
156
- try:
157
- user_session = session_manager.get_session("default_user")
158
- coord_stats = user_session.get('ai_coordination', {})
159
- if coord_stats and coord_stats.get('last_coordination'):
160
- st.markdown(f"**Last Request:** {coord_stats.get('last_coordination')}")
161
- else:
162
- st.markdown("**Last Request:** N/A")
163
- except:
164
- st.markdown("**Last Request:** N/A")
165
-
166
- # Show Ollama ping status
167
- try:
168
- import requests
169
- import time
170
- start_time = time.time()
171
- headers = {
172
- "ngrok-skip-browser-warning": "true",
173
- "User-Agent": "CosmicCat-Debug"
174
- }
175
- response = requests.get(
176
- f"{st.session_state.ngrok_url_temp}/api/tags",
177
- headers=headers,
178
- timeout=15
179
- )
180
- ping_time = round((time.time() - start_time) * 1000)
181
- if response.status_code == 200:
182
- st.markdown(f"**Ollama Ping:** {response.status_code} OK ({ping_time}ms)")
183
- else:
184
- st.markdown(f"**Ollama Ping:** {response.status_code} Error")
185
- except Exception as e:
186
- st.markdown("**Ollama Ping:** Unreachable")
187
-
188
- # Redis status
189
- if check_redis_health():
190
- st.markdown("**Redis:** Healthy")
191
- else:
192
- st.markdown("**Redis:** Unhealthy")
193
-
194
- # Add debug tools
195
- st.divider()
196
- st.subheader("πŸ› Debug Tools")
197
-
198
- if st.button("πŸ” Test Ollama Direct"):
199
- try:
200
- with st.spinner("Testing..."):
201
- ollama_provider = OllamaProvider(st.session_state.selected_model)
202
- test_history = [{"role": "user", "content": "Hello, what day is it?"}]
203
- response = ollama_provider.generate("Hello, what day is it?", test_history)
204
- st.success(f"Success! Response: {response[:200] if response else 'Empty'}")
205
- except Exception as e:
206
- st.error(f"Error: {str(e)}")
207
-
208
- if st.button("πŸ“‹ Show Session Data"):
209
- try:
210
- user_session = session_manager.get_session("default_user")
211
- st.write("Session data:")
212
- st.json(user_session)
213
- except Exception as e:
214
- st.error(f"Error: {str(e)}")
215
 
216
  # Main interface
217
  st.title("🐱 CosmicCat AI Assistant")
218
- st.markdown("Ask me anything about personal development, goal setting, or life advice!")
219
 
220
- # Show welcome message only once
221
  if st.session_state.show_welcome:
222
  with st.chat_message("assistant"):
223
  greeting = personality.get_greeting(cosmic_mode=st.session_state.cosmic_mode)
224
  st.markdown(greeting)
225
  st.session_state.show_welcome = False
226
 
227
- # Consistent message rendering function with cosmic styling
228
- def render_message(role, content, source=None, timestamp=None):
229
- """Render chat messages with consistent styling"""
230
- with st.chat_message(role):
231
- if source:
232
- if source == "local_kitty":
233
- st.markdown(f"### 🐱 Cosmic Kitten Says:")
234
- elif source == "orbital_station":
235
- st.markdown(f"### πŸ›°οΈ Orbital Station Reports:")
236
- elif source == "cosmic_summary":
237
- st.markdown(f"### 🌟 Final Cosmic Summary:")
238
- elif source == "error":
239
- st.markdown(f"### ❌ Error:")
240
- elif source == "space_story":
241
- st.markdown(f"### 🐱 Cosmic Kitten Story:")
242
- else:
243
- st.markdown(f"### {source}")
244
- st.markdown(content)
245
- if timestamp:
246
- st.caption(f"πŸ•’ {timestamp}")
247
-
248
  # Display messages
249
  for message in st.session_state.messages:
250
- render_message(
251
- message["role"],
252
- message["content"],
253
- message.get("source"),
254
- message.get("timestamp")
255
- )
256
-
257
- # Input validation function
258
- def validate_user_input(text):
259
- """Validate and sanitize user input"""
260
- if not text or not text.strip():
261
- return False, "Input cannot be empty"
262
- if len(text) > 1000:
263
- return False, "Input too long (max 1000 characters)"
264
-
265
- # Check for potentially harmful patterns
266
- harmful_patterns = ["<script", "javascript:", "onload=", "onerror="]
267
- if any(pattern in text.lower() for pattern in harmful_patterns):
268
- return False, "Potentially harmful input detected"
269
-
270
- return True, text.strip()
271
 
272
- # Chat input - FIXED RESPONSE DISPLAY VERSION
273
  user_input = st.chat_input("Type your message here...", disabled=st.session_state.is_processing)
274
 
275
- # Process message when received
276
  if user_input and not st.session_state.is_processing:
277
  st.session_state.is_processing = True
278
 
279
- # Validate input
280
- clean_input = user_input.strip()
281
- if not clean_input:
282
- st.session_state.is_processing = False
283
- st.experimental_rerun()
284
-
285
- # Display user message immediately
286
  with st.chat_message("user"):
287
- st.markdown(clean_input)
288
-
289
- # Add to message history
290
  timestamp = datetime.now().strftime("%H:%M:%S")
291
  st.session_state.messages.append({
292
  "role": "user",
293
- "content": clean_input,
294
  "timestamp": timestamp
295
  })
296
 
297
- # Show processing status
298
- status_container = st.empty()
299
- response_container = st.empty()
300
-
301
  try:
302
- status_container.info("πŸ”„ Processing your request...")
303
-
304
- # Get conversation history from session
305
  user_session = session_manager.get_session("default_user")
306
  conversation_history = user_session.get("conversation", []).copy()
 
307
 
308
- # Add the current user message to history for context
309
- conversation_history.append({"role": "user", "content": clean_input})
310
-
311
- # Try Ollama first
312
- status_container.info("πŸ¦™ Contacting Ollama...")
313
-
314
  try:
315
- # Use the OllamaProvider directly with proper configuration
316
  ollama_provider = OllamaProvider(st.session_state.selected_model)
317
- ai_response = ollama_provider.generate(clean_input, conversation_history)
318
 
319
  if ai_response and ai_response.strip():
320
- # DISPLAY THE RESPONSE IN THE CONTAINER (not using placeholders)
321
  with st.chat_message("assistant"):
322
  st.markdown(ai_response)
323
- status_container.success("βœ… Response received!")
324
  else:
325
- # DISPLAY ERROR RESPONSE
326
  with st.chat_message("assistant"):
327
- st.warning("⚠️ Received empty response from Ollama")
328
  ai_response = "I received your message but couldn't generate a proper response."
329
 
330
- except Exception as ollama_error:
331
- error_message = str(ollama_error)
332
- # DISPLAY ERROR
333
  with st.chat_message("assistant"):
334
- st.error(f"❌ Ollama error: {error_message[:100]}...")
335
  ai_response = f"Error: {error_message[:100]}..."
336
 
337
- # Save response to session and message history
338
  if ai_response:
339
  try:
340
- # Update conversation history in session
341
  conversation = user_session.get("conversation", []).copy()
342
- conversation.append({"role": "user", "content": clean_input})
343
  conversation.append({"role": "assistant", "content": str(ai_response)})
 
344
 
345
- # Update session with new conversation
346
- update_result = session_manager.update_session("default_user", {"conversation": conversation})
347
-
348
- # Add to message history (this was missing!)
349
  st.session_state.messages.append({
350
  "role": "assistant",
351
  "content": str(ai_response),
352
  "timestamp": timestamp
353
  })
354
-
355
  except Exception as session_error:
356
  logger.error(f"Session update error: {session_error}")
357
 
358
  except Exception as e:
359
  error_msg = f"System error: {str(e)}"
360
- logger.error(f"Chat processing error: {error_msg}")
361
- # DISPLAY SYSTEM ERROR
362
  with st.chat_message("assistant"):
363
  st.error(error_msg)
364
  st.session_state.messages.append({
@@ -368,68 +205,26 @@ if user_input and not st.session_state.is_processing:
368
  })
369
  finally:
370
  st.session_state.is_processing = False
371
- time.sleep(0.1) # Small delay to ensure UI updates
372
  st.experimental_rerun()
373
 
374
- # Add evaluation dashboard tab (separate from chat interface) - ONLY ABOUT TAB NOW
375
  st.divider()
376
- # Only one tab now - About
377
  tab1, = st.tabs(["ℹ️ About"])
378
 
379
  with tab1:
380
  st.header("ℹ️ About CosmicCat AI Assistant")
381
  st.markdown("""
382
- The CosmicCat AI Assistant is a sophisticated conversational AI system with the following capabilities:
383
 
384
  ### 🧠 Core Features
385
- - **Multi-model coordination**: Combines local Ollama models with cloud-based Hugging Face endpoints
386
- - **Live web search**: Integrates with Tavily API for current information
387
- - **Persistent memory**: Uses Redis for conversation history storage
388
- - **Hierarchical reasoning**: Fast local responses with deep cloud analysis
389
 
390
  ### πŸš€ Cosmic Mode
391
- When enabled, the AI follows a three-stage response pattern:
392
- 1. **🐱 Cosmic Kitten Response**: Immediate local processing
393
- 2. **πŸ›°οΈ Orbital Station Analysis**: Deep cloud-based analysis
394
- 3. **🌟 Final Synthesis**: Unified response combining both perspectives
395
 
396
  ### πŸ› οΈ Technical Architecture
397
- - **Primary model**: Ollama (local processing for fast responses)
398
- - **Secondary model**: Hugging Face Inference API (deep analysis)
399
- - **External data**: Web search, weather data, and space information
400
  - **Memory system**: Redis-based session management
401
-
402
- ### πŸ“Š Evaluation Tools
403
- - Behavior testing with sample prompts
404
- - Performance metrics and analytics
405
  """)
406
-
407
- # Add special command handling for stories
408
- if user_input and user_input.lower().strip() in ["tell me a story", "tell me a cosmic cat story", "story", "cosmic story", "tell me a space story"]:
409
- story = personality.get_space_story()
410
- st.markdown(f"### 🐱 Cosmic Kitten Story:\n\n{story}")
411
- st.session_state.messages.append({
412
- "role": "assistant",
413
- "content": story,
414
- "source": "space_story",
415
- "timestamp": datetime.now().strftime("%H:%M:%S")
416
- })
417
- st.session_state.is_processing = False
418
- st.experimental_rerun()
419
-
420
- # Simple test to verify the fix works
421
- def test_response_display():
422
- """Test function to verify response display works"""
423
- test_response = "This is a test response to verify the display fix is working correctly."
424
- with st.chat_message("assistant"):
425
- st.markdown(test_response)
426
- st.session_state.messages.append({
427
- "role": "assistant",
428
- "content": test_response,
429
- "timestamp": datetime.now().strftime("%H:%M:%S")
430
- })
431
-
432
- # Add a test button in sidebar:
433
- with st.sidebar:
434
- if st.button("Test Response Display"):
435
- test_response_display()
 
3
  import os
4
  import sys
5
  import json
 
6
  from datetime import datetime
7
  from pathlib import Path
8
  sys.path.append(str(Path(__file__).parent))
9
+
10
  from utils.config import config
11
  from core.session import session_manager
12
  from core.memory import check_redis_health
 
13
  from core.errors import translate_error
14
  from core.personality import personality
 
 
 
15
  from core.providers.ollama import OllamaProvider
 
16
  import logging
17
 
18
  # Set up logging
 
21
 
22
  st.set_page_config(page_title="CosmicCat AI Assistant", page_icon="🐱", layout="wide")
23
 
24
+ # Initialize session state
25
  if "messages" not in st.session_state:
26
  st.session_state.messages = []
 
 
27
  if "is_processing" not in st.session_state:
28
  st.session_state.is_processing = False
29
  if "ngrok_url_temp" not in st.session_state:
30
  st.session_state.ngrok_url_temp = st.session_state.get("ngrok_url", "https://7bcc180dffd1.ngrok-free.app")
31
  if "cosmic_mode" not in st.session_state:
32
+ st.session_state.cosmic_mode = True
33
  if "show_welcome" not in st.session_state:
34
  st.session_state.show_welcome = True
35
 
36
+ # Sidebar
37
  with st.sidebar:
38
  st.title("🐱 CosmicCat AI Assistant")
39
+ st.markdown("Your personal AI-powered assistant with a cosmic twist.")
40
 
41
+ # Model selection
 
42
  model_options = {
43
  "Mistral 7B (Local)": "mistral:latest",
44
  "Llama 2 7B (Local)": "llama2:latest",
 
47
  selected_model_name = st.selectbox(
48
  "Select Model",
49
  options=list(model_options.keys()),
50
+ index=0
 
51
  )
52
  st.session_state.selected_model = model_options[selected_model_name]
53
 
54
+ # Cosmic mode toggle
55
  st.session_state.cosmic_mode = st.checkbox("Enable Cosmic Mode", value=st.session_state.cosmic_mode)
56
 
57
  st.divider()
58
 
59
+ # Configuration
60
  st.subheader("βš™οΈ Configuration")
61
  ngrok_url_input = st.text_input(
62
  "Ollama Server URL",
63
  value=st.session_state.ngrok_url_temp,
64
+ help="Enter your ngrok URL"
 
65
  )
66
 
67
  if ngrok_url_input != st.session_state.ngrok_url_temp:
68
  st.session_state.ngrok_url_temp = ngrok_url_input
69
  st.success("βœ… URL updated!")
70
+
71
  if st.button("πŸ“‘ Test Connection"):
72
  try:
 
73
  ollama_provider = OllamaProvider(st.session_state.selected_model)
 
74
  is_valid = ollama_provider.validate_model()
75
  if is_valid:
76
  st.success("βœ… Connection successful!")
 
78
  st.error("❌ Model validation failed")
79
  except Exception as e:
80
  st.error(f"❌ Error: {str(e)[:50]}...")
81
+
82
  if st.button("πŸ—‘οΈ Clear History"):
83
  st.session_state.messages = []
84
  st.success("History cleared!")
85
+
86
  st.divider()
87
 
88
+ # System Status
89
  with st.expander("πŸ” System Status", expanded=False):
90
+ st.subheader("πŸ“Š Status")
91
+
92
+ # Ollama Status
93
  try:
94
  from services.ollama_monitor import check_ollama_status
95
  ollama_status = check_ollama_status()
 
99
  st.warning("πŸ¦™ Ollama: Not running")
100
  except:
101
  st.info("πŸ¦™ Ollama: Unknown")
102
+
103
+ # Redis Status
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
  if check_redis_health():
105
  st.success("πŸ’Ύ Redis: Connected")
106
  else:
107
  st.error("πŸ’Ύ Redis: Disconnected")
108
+
109
  st.divider()
110
 
111
+ # Debug Info
112
  st.subheader("πŸ› Debug Info")
 
113
  st.markdown(f"**Environment:** {'HF Space' if config.is_hf_space else 'Local'}")
114
  st.markdown(f"**Model:** {st.session_state.selected_model}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
 
116
  # Main interface
117
  st.title("🐱 CosmicCat AI Assistant")
118
+ st.markdown("Ask me anything!")
119
 
120
+ # Welcome message
121
  if st.session_state.show_welcome:
122
  with st.chat_message("assistant"):
123
  greeting = personality.get_greeting(cosmic_mode=st.session_state.cosmic_mode)
124
  st.markdown(greeting)
125
  st.session_state.show_welcome = False
126
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
127
  # Display messages
128
  for message in st.session_state.messages:
129
+ with st.chat_message(message["role"]):
130
+ st.markdown(message["content"])
131
+ if "timestamp" in message:
132
+ st.caption(f"πŸ•’ {message['timestamp']}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
133
 
134
+ # Chat input
135
  user_input = st.chat_input("Type your message here...", disabled=st.session_state.is_processing)
136
 
137
+ # Process message
138
  if user_input and not st.session_state.is_processing:
139
  st.session_state.is_processing = True
140
 
141
+ # Display user message
 
 
 
 
 
 
142
  with st.chat_message("user"):
143
+ st.markdown(user_input)
144
+
145
+ # Add to history
146
  timestamp = datetime.now().strftime("%H:%M:%S")
147
  st.session_state.messages.append({
148
  "role": "user",
149
+ "content": user_input,
150
  "timestamp": timestamp
151
  })
152
 
153
+ # Process response
 
 
 
154
  try:
155
+ # Get conversation history
 
 
156
  user_session = session_manager.get_session("default_user")
157
  conversation_history = user_session.get("conversation", []).copy()
158
+ conversation_history.append({"role": "user", "content": user_input})
159
 
160
+ # Generate response
 
 
 
 
 
161
  try:
 
162
  ollama_provider = OllamaProvider(st.session_state.selected_model)
163
+ ai_response = ollama_provider.generate(user_input, conversation_history)
164
 
165
  if ai_response and ai_response.strip():
 
166
  with st.chat_message("assistant"):
167
  st.markdown(ai_response)
168
+ status = "βœ… Response received!"
169
  else:
 
170
  with st.chat_message("assistant"):
171
+ st.warning("⚠️ Received empty response")
172
  ai_response = "I received your message but couldn't generate a proper response."
173
 
174
+ except Exception as e:
175
+ error_message = str(e)
 
176
  with st.chat_message("assistant"):
177
+ st.error(f"❌ Error: {error_message[:100]}...")
178
  ai_response = f"Error: {error_message[:100]}..."
179
 
180
+ # Save to session
181
  if ai_response:
182
  try:
 
183
  conversation = user_session.get("conversation", []).copy()
184
+ conversation.append({"role": "user", "content": user_input})
185
  conversation.append({"role": "assistant", "content": str(ai_response)})
186
+ session_manager.update_session("default_user", {"conversation": conversation})
187
 
 
 
 
 
188
  st.session_state.messages.append({
189
  "role": "assistant",
190
  "content": str(ai_response),
191
  "timestamp": timestamp
192
  })
 
193
  except Exception as session_error:
194
  logger.error(f"Session update error: {session_error}")
195
 
196
  except Exception as e:
197
  error_msg = f"System error: {str(e)}"
198
+ logger.error(f"Processing error: {error_msg}")
 
199
  with st.chat_message("assistant"):
200
  st.error(error_msg)
201
  st.session_state.messages.append({
 
205
  })
206
  finally:
207
  st.session_state.is_processing = False
 
208
  st.experimental_rerun()
209
 
210
+ # About tab
211
  st.divider()
 
212
  tab1, = st.tabs(["ℹ️ About"])
213
 
214
  with tab1:
215
  st.header("ℹ️ About CosmicCat AI Assistant")
216
  st.markdown("""
217
+ The CosmicCat AI Assistant is a sophisticated conversational AI with a cosmic theme.
218
 
219
  ### 🧠 Core Features
220
+ - **Local AI processing** with Ollama models
221
+ - **Persistent memory** using Redis
222
+ - **Space-themed personality** for fun interactions
 
223
 
224
  ### πŸš€ Cosmic Mode
225
+ When enabled, the AI responds with space-themed language and metaphors.
 
 
 
226
 
227
  ### πŸ› οΈ Technical Architecture
228
+ - **Primary model**: Ollama (local processing)
 
 
229
  - **Memory system**: Redis-based session management
 
 
 
 
230
  """)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
core/coordinator.py CHANGED
@@ -1,691 +1,44 @@
1
- import asyncio
2
  import logging
3
- from typing import List, Dict, Optional, AsyncGenerator
4
  from core.llm_factory import llm_factory
5
  from core.session import session_manager
6
- from services.hf_endpoint_monitor import hf_monitor
7
- from services.weather import weather_service
8
- from services.web_search import web_search_service
9
- try:
10
- from tavily import TavilyClient
11
- TAVILY_AVAILABLE = True
12
- except ImportError:
13
- TavilyClient = None
14
- TAVILY_AVAILABLE = False
15
- import os
16
- import json
17
- from datetime import datetime
18
  logger = logging.getLogger(__name__)
19
 
20
- class AICoordinator:
21
- """Hierarchical multi-model coordinator with cosmic cascade flow and async support"""
22
 
23
  def __init__(self):
24
- self.tavily_client = None
25
- if TAVILY_AVAILABLE and os.getenv("TAVILY_API_KEY"):
26
- self.tavily_client = TavilyClient(api_key=os.getenv("TAVILY_API_KEY"))
27
-
28
- # System instructions for cosmic behavior
29
- self.system_instructions = {
30
- 'ollama_role': """You are a cosmic kitten assistant that works in coordination with a powerful orbital space station. Your role is to:
31
- 1. Provide immediate, concise responses using available information (local processing)
32
- 2. Summarize and simplify complex information
33
- 3. Enhance responses with local knowledge
34
- 4. DEFER TO THE ORBITAL STATION for deep analysis and authoritative answers
35
- 5. The orbital station has 8192 token capacity and streaming capability - it's the thinking layer
36
- 6. Always acknowledge that the orbital station's insights supersede your initial response
37
- 7. Use web sources to justify points when possible
38
- 8. Keep responses brief and focused while awaiting orbital input""",
39
-
40
- 'hf_role': """You are a powerful orbital space station with 8192 token capacity and streaming capability. Your role is to:
41
- 1. Provide deep, uncensored analysis and insights
42
- 2. Supersede the local cosmic kitten's logic when there's disagreement
43
- 3. Think thoroughly and provide comprehensive responses
44
- 4. Use your full 8192 token capacity for detailed analysis
45
- 5. Stream your responses for real-time delivery
46
- 6. Build upon the conversation context and cosmic kitten's initial response
47
- 7. Provide authoritative answers that take precedence"""
48
- }
49
-
50
- def determine_web_search_needs(self, conversation_history: List[Dict]) -> Dict:
51
- """Determine if web search is needed based on conversation content"""
52
- conversation_text = " ".join([msg.get("content", "") for msg in conversation_history])
53
-
54
- # Topics that typically need current information
55
- current_info_indicators = [
56
- "news", "current events", "latest", "recent", "today",
57
- "weather", "temperature", "forecast", "stock", "price",
58
- "trend", "market", "breaking", "update", "development"
59
- ]
60
-
61
- needs_search = False
62
- search_topics = []
63
-
64
- for indicator in current_info_indicators:
65
- if indicator in conversation_text.lower():
66
- needs_search = True
67
- search_topics.append(indicator)
68
-
69
- return {
70
- "needs_search": needs_search,
71
- "search_topics": search_topics,
72
- "reasoning": f"Found topics requiring current info: {', '.join(search_topics)}" if search_topics else "No current info needed"
73
- }
74
-
75
- async def coordinate_response_async(self, user_id: str, user_query: str):
76
- """Asynchronously coordinate responses with parallel execution"""
77
- try:
78
- # Get conversation history
79
- session = session_manager.get_session(user_id)
80
-
81
- # Inject current time into context
82
- current_time = datetime.now().strftime("%A, %B %d, %Y at %I:%M %p")
83
- time_context = {
84
- "role": "system",
85
- "content": f"[Current Date & Time: {current_time}]"
86
- }
87
- conversation_history = [time_context] + session.get("conversation", []).copy()
88
-
89
- # Parallel execution - gather external data while processing local response
90
- external_data_task = asyncio.create_task(
91
- self._gather_external_data(user_query)
92
- )
93
-
94
- # Get local response while gathering external data
95
- local_response = await self._get_local_ollama_response(user_query, conversation_history)
96
-
97
- # Wait for external data
98
- external_data = await external_data_task
99
-
100
- # Process cloud response asynchronously if needed
101
- hf_task = None
102
- if self._check_hf_availability():
103
- hf_task = asyncio.create_task(
104
- self._get_hf_analysis(user_query, conversation_history)
105
- )
106
-
107
- return {
108
- 'local_response': local_response,
109
- 'hf_task': hf_task,
110
- 'external_data': external_data
111
- }
112
- except Exception as e:
113
- logger.error(f"Async coordination failed: {e}")
114
- raise
115
-
116
- async def coordinate_cosmic_response(self, user_id: str, user_query: str) -> AsyncGenerator[Dict, None]:
117
- """
118
- Three-stage cosmic response cascade:
119
- 1. Local Ollama immediate response (🐱 Cosmic Kitten's quick thinking)
120
- 2. HF endpoint deep analysis (πŸ›°οΈ Orbital Station wisdom)
121
- 3. Local Ollama synthesis (🐱 Cosmic Kitten's final synthesis)
122
- """
123
  try:
124
- # Get conversation history
125
  session = session_manager.get_session(user_id)
 
126
 
127
- # Inject current time into context
128
- current_time = datetime.now().strftime("%A, %B %d, %Y at %I:%M %p")
129
- time_context = {
130
- "role": "system",
131
- "content": f"[Current Date & Time: {current_time}]"
132
- }
133
- conversation_history = [time_context] + session.get("conversation", []).copy()
134
-
135
- yield {
136
- 'type': 'status',
137
- 'content': 'πŸš€ Initiating Cosmic Response Cascade...',
138
- 'details': {
139
- 'conversation_length': len(conversation_history),
140
- 'user_query_length': len(user_query)
141
- }
142
- }
143
-
144
- # Stage 1: Local Ollama Immediate Response (🐱 Cosmic Kitten's quick thinking)
145
- yield {
146
- 'type': 'status',
147
- 'content': '🐱 Cosmic Kitten Responding...'
148
- }
149
- local_response = await self._get_local_ollama_response(user_query, conversation_history)
150
- yield {
151
- 'type': 'local_response',
152
- 'content': local_response,
153
- 'source': '🐱 Cosmic Kitten'
154
- }
155
-
156
- # Stage 2: HF Endpoint Deep Analysis (πŸ›°οΈ Orbital Station wisdom) (parallel processing)
157
- yield {
158
- 'type': 'status',
159
- 'content': 'πŸ›°οΈ Beaming Query to Orbital Station...'
160
- }
161
- hf_task = asyncio.create_task(self._get_hf_analysis(user_query, conversation_history))
162
-
163
- # Wait for HF response
164
- hf_response = await hf_task
165
- yield {
166
- 'type': 'cloud_response',
167
- 'content': hf_response,
168
- 'source': 'πŸ›°οΈ Orbital Station'
169
- }
170
-
171
- # Stage 3: Local Ollama Synthesis (🐱 Cosmic Kitten's final synthesis)
172
- yield {
173
- 'type': 'status',
174
- 'content': '🐱 Cosmic Kitten Synthesizing Wisdom...'
175
- }
176
-
177
- # Update conversation with both responses
178
- updated_history = conversation_history.copy()
179
- updated_history.extend([
180
- {"role": "assistant", "content": local_response},
181
- {"role": "assistant", "content": hf_response, "source": "cloud"}
182
- ])
183
-
184
- synthesis = await self._synthesize_responses(user_query, local_response, hf_response, updated_history)
185
- yield {
186
- 'type': 'final_synthesis',
187
- 'content': synthesis,
188
- 'source': '🌟 Final Cosmic Summary'
189
- }
190
 
191
- # Final status
192
- yield {
193
- 'type': 'status',
194
- 'content': '✨ Cosmic Cascade Complete!'
195
- }
196
-
197
- except Exception as e:
198
- logger.error(f"Cosmic cascade failed: {e}")
199
- yield {'type': 'error', 'content': f"🌌 Cosmic disturbance: {str(e)}"}
200
-
201
- async def _get_local_ollama_response(self, query: str, history: List[Dict]) -> str:
202
- """Get immediate response from local Ollama model"""
203
- try:
204
- # Get Ollama provider
205
- ollama_provider = llm_factory.get_provider('ollama')
206
- if not ollama_provider:
207
  raise Exception("Ollama provider not available")
208
-
209
- # Prepare conversation with cosmic context
210
- enhanced_history = history.copy()
211
-
212
- # Add system instruction for Ollama's role
213
- enhanced_history.insert(0, {
214
- "role": "system",
215
- "content": self.system_instructions['ollama_role']
216
- })
217
-
218
- # Add external data context if available
219
- external_data = await self._gather_external_data(query)
220
- if external_data:
221
- context_parts = []
222
- if 'search_answer' in external_data:
223
- context_parts.append(f"Current information: {external_data['search_answer']}")
224
- if 'weather' in external_data:
225
- weather = external_data['weather']
226
- context_parts.append(f"Current weather: {weather.get('temperature', 'N/A')}Β°C in {weather.get('city', 'Unknown')}")
227
- if 'current_datetime' in external_data:
228
- context_parts.append(f"Current time: {external_data['current_datetime']}")
229
-
230
- if context_parts:
231
- context_message = {
232
- "role": "system",
233
- "content": "Context: " + " | ".join(context_parts)
234
- }
235
- enhanced_history.insert(1, context_message) # Insert after role instruction
236
-
237
- # Add the user's query
238
- enhanced_history.append({"role": "user", "content": query})
239
 
240
  # Generate response
241
- response = ollama_provider.generate(query, enhanced_history)
242
- return response or "🐱 Cosmic Kitten is thinking..."
243
-
244
- except Exception as e:
245
- logger.error(f"Local Ollama response failed: {e}")
246
- return "🐱 Cosmic Kitten encountered a space glitch..."
247
-
248
- async def _get_hf_analysis(self, query: str, history: List[Dict]) -> str:
249
- """Get deep analysis from HF endpoint"""
250
- try:
251
- # Check HF availability
252
- hf_available = self._check_hf_availability()
253
- if not hf_available:
254
- return "πŸ›°οΈ Orbital Station is currently offline."
255
-
256
- # Check and warm up HF endpoint if needed
257
- hf_status = hf_monitor.check_endpoint_status()
258
- if not hf_status['available']:
259
- # Note: We can't yield from a non-async-generator function
260
- # Warm up logic is handled elsewhere
261
- warmup_success = hf_monitor.handle_scale_to_zero()
262
- if not warmup_success:
263
- return "❌ Orbital Station initialization failed"
264
-
265
- # Get HF provider
266
- hf_provider = llm_factory.get_provider('huggingface')
267
- if not hf_provider:
268
- return "❌ HF provider not available"
269
-
270
- # Prepare enhanced conversation for HF with cosmic context
271
- enhanced_history = history.copy()
272
-
273
- # Inject current time into HF context too
274
- current_time = datetime.now().strftime("%A, %B %d, %Y at %I:%M %p")
275
- time_context = {
276
- "role": "system",
277
- "content": f"[Current Date & Time: {current_time}]"
278
- }
279
- enhanced_history = [time_context] + enhanced_history
280
-
281
- # Add system instructions for HF
282
- enhanced_history.insert(0, {
283
- "role": "system",
284
- "content": self.system_instructions['hf_role']
285
- })
286
-
287
- # Add context about the coordination
288
- enhanced_history.append({
289
- "role": "system",
290
- "content": f"""
291
- Context for your authoritative analysis:
292
- User Query: '{query}'
293
- Please provide your deep, uncensored analysis.
294
- Use your full 8192 token capacity for comprehensive thinking.
295
- Stream your response for real-time delivery."""
296
- })
297
-
298
- # Add the user's latest query
299
- enhanced_history.append({"role": "user", "content": query})
300
-
301
- # Stream HF response with full 8192 token capacity
302
- hf_response_stream = hf_provider.stream_generate(query, enhanced_history)
303
-
304
- if hf_response_stream:
305
- # Combine stream chunks into full response
306
- full_hf_response = ""
307
- if isinstance(hf_response_stream, list):
308
- full_hf_response = "".join(hf_response_stream)
309
- else:
310
- full_hf_response = hf_response_stream
311
- return full_hf_response or "πŸ›°οΈ Orbital Station analysis complete."
312
- else:
313
- return "πŸ›°οΈ Orbital Station encountered a transmission error."
314
-
315
- except Exception as e:
316
- logger.error(f"HF analysis failed: {e}")
317
- return f"πŸ›°οΈ Orbital Station reports: {str(e)}"
318
-
319
- async def _synthesize_responses(self, query: str, local_response: str, hf_response: str, history: List[Dict]) -> str:
320
- """Synthesize local and cloud responses with Ollama"""
321
- try:
322
- # Get Ollama provider
323
- ollama_provider = llm_factory.get_provider('ollama')
324
- if not ollama_provider:
325
- raise Exception("Ollama provider not available")
326
-
327
- # Prepare synthesis prompt
328
- synthesis_prompt = f"""
329
- Synthesize these two perspectives into a cohesive cosmic summary:
330
-
331
- 🐱 Cosmic Kitten's Local Insight: {local_response}
332
-
333
- πŸ›°οΈ Orbital Station's Deep Analysis: {hf_response}
334
-
335
- Please create a unified response that combines both perspectives, highlighting key insights from each while providing a coherent answer to the user's query.
336
- """
337
 
338
- # Prepare conversation history for synthesis
339
- enhanced_history = history.copy()
 
 
 
340
 
341
- # Add system instruction for synthesis
342
- enhanced_history.insert(0, {
343
- "role": "system",
344
- "content": "You are a cosmic kitten synthesizing insights from local knowledge and orbital station wisdom."
345
- })
346
-
347
- # Add the synthesis prompt
348
- enhanced_history.append({"role": "user", "content": synthesis_prompt})
349
-
350
- # Generate synthesis
351
- synthesis = ollama_provider.generate(synthesis_prompt, enhanced_history)
352
- return synthesis or "🌟 Cosmic synthesis complete!"
353
 
354
  except Exception as e:
355
- logger.error(f"Response synthesis failed: {e}")
356
- # Fallback to combining responses
357
- return f"🌟 Cosmic Summary:\n\n🐱 Local Insight: {local_response[:200]}...\n\nπŸ›°οΈ Orbital Wisdom: {hf_response[:200]}..."
358
-
359
- async def coordinate_hierarchical_conversation(self, user_id: str, user_query: str) -> AsyncGenerator[Dict, None]:
360
- """
361
- Enhanced coordination with detailed tracking and feedback
362
- """
363
- try:
364
- # Get conversation history
365
- session = session_manager.get_session(user_id)
366
-
367
- # Inject current time into context
368
- current_time = datetime.now().strftime("%A, %B %d, %Y at %I:%M %p")
369
- time_context = {
370
- "role": "system",
371
- "content": f"[Current Date & Time: {current_time}]"
372
- }
373
- conversation_history = [time_context] + session.get("conversation", []).copy()
374
-
375
- yield {
376
- 'type': 'coordination_status',
377
- 'content': 'πŸš€ Initiating hierarchical AI coordination...',
378
- 'details': {
379
- 'conversation_length': len(conversation_history),
380
- 'user_query_length': len(user_query)
381
- }
382
- }
383
-
384
- # Step 1: Gather external data with detailed logging
385
- yield {
386
- 'type': 'coordination_status',
387
- 'content': 'πŸ” Gathering external context...',
388
- 'details': {'phase': 'external_data_gathering'}
389
- }
390
- external_data = await self._gather_external_data(user_query)
391
-
392
- # Log what external data was gathered
393
- if external_data:
394
- data_summary = []
395
- if 'search_results' in external_data:
396
- data_summary.append(f"Web search: {len(external_data['search_results'])} results")
397
- if 'weather' in external_data:
398
- data_summary.append("Weather data: available")
399
- if 'current_datetime' in external_data:
400
- data_summary.append(f"Time: {external_data['current_datetime']}")
401
-
402
- yield {
403
- 'type': 'coordination_status',
404
- 'content': f'πŸ“Š External data gathered: {", ".join(data_summary)}',
405
- 'details': {'external_data_summary': data_summary}
406
- }
407
-
408
- # Step 2: Get initial Ollama response
409
- yield {
410
- 'type': 'coordination_status',
411
- 'content': 'πŸ¦™ Getting initial response from Ollama...',
412
- 'details': {'phase': 'ollama_response'}
413
- }
414
- ollama_response = await self._get_hierarchical_ollama_response(
415
- user_query, conversation_history, external_data
416
- )
417
-
418
- # Send initial response with context info
419
- yield {
420
- 'type': 'initial_response',
421
- 'content': ollama_response,
422
- 'details': {
423
- 'response_length': len(ollama_response),
424
- 'external_data_injected': bool(external_data)
425
- }
426
- }
427
-
428
- # Step 3: Coordinate with HF endpoint
429
- yield {
430
- 'type': 'coordination_status',
431
- 'content': 'πŸ€— Engaging HF endpoint for deep analysis...',
432
- 'details': {'phase': 'hf_coordination'}
433
- }
434
-
435
- # Check HF availability
436
- hf_available = self._check_hf_availability()
437
- if hf_available:
438
- # Show what context will be sent to HF
439
- context_summary = {
440
- 'conversation_turns': len(conversation_history),
441
- 'ollama_response_length': len(ollama_response),
442
- 'external_data_items': len(external_data) if external_data else 0
443
- }
444
- yield {
445
- 'type': 'coordination_status',
446
- 'content': f'πŸ“‹ HF context: {len(conversation_history)} conversation turns, Ollama response ({len(ollama_response)} chars)',
447
- 'details': context_summary
448
- }
449
-
450
- # Coordinate with HF
451
- async for hf_chunk in self._coordinate_hierarchical_hf_response(
452
- user_id, user_query, conversation_history, external_data, ollama_response
453
- ):
454
- yield hf_chunk
455
- else:
456
- yield {
457
- 'type': 'coordination_status',
458
- 'content': 'ℹ️ HF endpoint not available - using Ollama response',
459
- 'details': {'hf_available': False}
460
- }
461
-
462
- # Final coordination status
463
- yield {
464
- 'type': 'coordination_status',
465
- 'content': 'βœ… Hierarchical coordination complete',
466
- 'details': {'status': 'complete'}
467
- }
468
-
469
- except Exception as e:
470
- logger.error(f"Hierarchical coordination failed: {e}")
471
- yield {
472
- 'type': 'coordination_status',
473
- 'content': f'❌ Coordination error: {str(e)}',
474
- 'details': {'error': str(e)}
475
- }
476
-
477
- async def _coordinate_hierarchical_hf_response(self, user_id: str, query: str, history: List, external_data: Dict, ollama_response: str) -> AsyncGenerator[Dict, None]:
478
- """Coordinate with HF endpoint as authoritative layer with streaming"""
479
- try:
480
- # Check and warm up HF endpoint if needed
481
- hf_status = hf_monitor.check_endpoint_status()
482
- if not hf_status['available']:
483
- yield {'type': 'coordination_status', 'content': '⚑ Initializing HF endpoint (2-4 minutes)...'}
484
- warmup_success = hf_monitor.handle_scale_to_zero()
485
- if not warmup_success:
486
- yield {'type': 'coordination_status', 'content': '❌ HF endpoint initialization failed'}
487
- return
488
-
489
- # Get HF provider
490
- hf_provider = llm_factory.get_provider('huggingface')
491
- if not hf_provider:
492
- yield {'type': 'coordination_status', 'content': '❌ HF provider not available'}
493
- return
494
-
495
- # Prepare enhanced conversation for HF with hierarchical context
496
- enhanced_history = history.copy()
497
-
498
- # Inject current time into HF context too
499
- current_time = datetime.now().strftime("%A, %B %d, %Y at %I:%M %p")
500
- time_context = {
501
- "role": "system",
502
- "content": f"[Current Date & Time: {current_time}]"
503
- }
504
- enhanced_history = [time_context] + enhanced_history
505
-
506
- # Add system instructions for HF
507
- enhanced_history.insert(0, {
508
- "role": "system",
509
- "content": self.system_instructions['hf_role']
510
- })
511
-
512
- # Add context about the coordination
513
- enhanced_history.append({
514
- "role": "system",
515
- "content": f"""
516
- Context for your authoritative analysis:
517
- User Query: '{query}'
518
- Ollama's Initial Response: '{ollama_response}'
519
- External Data Available: {json.dumps(external_data, indent=2) if external_data else 'None'}
520
- Please provide your deep, uncensored analysis that supersedes the local model's response.
521
- Use your full 8192 token capacity for comprehensive thinking.
522
- Stream your response for real-time delivery."""
523
- })
524
-
525
- # Add the user's latest query
526
- enhanced_history.append({"role": "user", "content": query})
527
-
528
- # Stream HF response with full 8192 token capacity
529
- yield {'type': 'coordination_status', 'content': '🧠 HF endpoint thinking...'}
530
-
531
- # Use streaming for real-time delivery
532
- hf_response_stream = hf_provider.stream_generate(query, enhanced_history)
533
-
534
- if hf_response_stream:
535
- # Stream the response chunks
536
- full_hf_response = ""
537
- for chunk in hf_response_stream:
538
- if chunk:
539
- full_hf_response += chunk
540
- yield {'type': 'hf_thinking', 'content': chunk}
541
-
542
- # Final HF response
543
- yield {'type': 'final_response', 'content': full_hf_response}
544
- yield {'type': 'coordination_status', 'content': '🎯 HF analysis complete and authoritative'}
545
- else:
546
- yield {'type': 'coordination_status', 'content': '❌ HF response generation failed'}
547
-
548
- except Exception as e:
549
- logger.error(f"Hierarchical HF coordination failed: {e}")
550
- yield {'type': 'coordination_status', 'content': f'❌ HF coordination error: {str(e)}'}
551
-
552
- async def _get_hierarchical_ollama_response(self, query: str, history: List, external_data: Dict) -> str:
553
- """Get Ollama response with hierarchical awareness"""
554
- try:
555
- # Get Ollama provider
556
- ollama_provider = llm_factory.get_provider('ollama')
557
- if not ollama_provider:
558
- raise Exception("Ollama provider not available")
559
-
560
- # Prepare conversation with hierarchical context
561
- enhanced_history = history.copy()
562
-
563
- # Inject current time into Ollama context too
564
- current_time = datetime.now().strftime("%A, %B %d, %Y at %I:%M %p")
565
- time_context = {
566
- "role": "system",
567
- "content": f"[Current Date & Time: {current_time}]"
568
- }
569
- enhanced_history = [time_context] + enhanced_history
570
-
571
- # Add system instruction for Ollama's role
572
- enhanced_history.insert(0, {
573
- "role": "system",
574
- "content": self.system_instructions['ollama_role']
575
- })
576
-
577
- # Add external data context if available
578
- if external_data:
579
- context_parts = []
580
- if 'search_answer' in external_data:
581
- context_parts.append(f"Current information: {external_data['search_answer']}")
582
- if 'weather' in external_data:
583
- weather = external_data['weather']
584
- context_parts.append(f"Current weather: {weather.get('temperature', 'N/A')}Β°C in {weather.get('city', 'Unknown')}")
585
- if 'current_datetime' in external_data:
586
- context_parts.append(f"Current time: {external_data['current_datetime']}")
587
-
588
- if context_parts:
589
- context_message = {
590
- "role": "system",
591
- "content": "Context: " + " | ".join(context_parts)
592
- }
593
- enhanced_history.insert(1, context_message) # Insert after role instruction
594
-
595
- # Add the user's query
596
- enhanced_history.append({"role": "user", "content": query})
597
-
598
- # Generate response with awareness of HF's superior capabilities
599
- response = ollama_provider.generate(query, enhanced_history)
600
-
601
- # Add acknowledgment of HF's authority
602
- if response:
603
- return f"{response}\n\n*Note: A more comprehensive analysis from the uncensored HF model is being prepared...*"
604
- else:
605
- return "I'm processing your request... A deeper analysis is being prepared by the authoritative model."
606
-
607
- except Exception as e:
608
- logger.error(f"Hierarchical Ollama response failed: {e}")
609
- return "I'm thinking about your question... Preparing a comprehensive response."
610
-
611
- def _check_hf_availability(self) -> bool:
612
- """Check if HF endpoint is configured and available"""
613
- try:
614
- from utils.config import config
615
- return bool(config.hf_token and config.hf_api_url)
616
- except:
617
- return False
618
-
619
- async def _gather_external_data(self, query: str) -> Dict:
620
- """Gather external data from various sources"""
621
- data = {}
622
-
623
- # Tavily/DuckDuckGo search with justification focus
624
- if self.tavily_client or web_search_service.client:
625
- try:
626
- search_results = web_search_service.search(f"current information about {query}")
627
- if search_results:
628
- data['search_results'] = search_results
629
- # Optionally extract answer summary
630
- # data['search_answer'] = ...
631
- except Exception as e:
632
- logger.warning(f"Tavily search failed: {e}")
633
-
634
- # Weather data
635
- weather_keywords = ['weather', 'temperature', 'forecast', 'climate', 'rain', 'sunny']
636
- if any(keyword in query.lower() for keyword in weather_keywords):
637
- try:
638
- location = self._extract_location(query) or "New York"
639
- weather = weather_service.get_current_weather_cached(
640
- location,
641
- ttl_hash=weather_service._get_ttl_hash(300)
642
- )
643
- if weather:
644
- data['weather'] = weather
645
- except Exception as e:
646
- logger.warning(f"Weather data failed: {e}")
647
-
648
- # Current date/time
649
- data['current_datetime'] = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
650
-
651
- return data
652
-
653
- def _extract_location(self, query: str) -> Optional[str]:
654
- """Extract location from query"""
655
- locations = ['New York', 'London', 'Tokyo', 'Paris', 'Berlin', 'Sydney',
656
- 'Los Angeles', 'Chicago', 'Miami', 'Seattle', 'Boston',
657
- 'San Francisco', 'Toronto', 'Vancouver', 'Montreal']
658
-
659
- for loc in locations:
660
- if loc.lower() in query.lower():
661
- return loc
662
- return "New York" # Default
663
-
664
- def get_coordination_status(self) -> Dict:
665
- """Get current coordination system status"""
666
- return {
667
- 'tavily_available': self.tavily_client is not None,
668
- 'weather_available': weather_service.api_key is not None,
669
- 'web_search_enabled': self.tavily_client is not None,
670
- 'external_apis_configured': any([
671
- weather_service.api_key,
672
- os.getenv("TAVILY_API_KEY")
673
- ])
674
- }
675
-
676
- def get_recent_activities(self, user_id: str) -> Dict:
677
- """Get recent coordination activities for user"""
678
- try:
679
- session = session_manager.get_session(user_id)
680
- coord_stats = session.get('ai_coordination', {})
681
- return {
682
- 'last_request': coord_stats.get('last_coordination'),
683
- 'requests_processed': coord_stats.get('requests_processed', 0),
684
- 'ollama_responses': coord_stats.get('ollama_responses', 0),
685
- 'hf_responses': coord_stats.get('hf_responses', 0)
686
- }
687
- except:
688
- return {}
689
 
690
- # Global coordinator instance
691
- coordinator = AICoordinator()
 
 
1
  import logging
 
2
  from core.llm_factory import llm_factory
3
  from core.session import session_manager
4
+
 
 
 
 
 
 
 
 
 
 
 
5
  logger = logging.getLogger(__name__)
6
 
7
+ class SimpleCoordinator:
8
+ """Simplified coordinator without HF expert features"""
9
 
10
  def __init__(self):
11
+ pass
12
+
13
+ def process_message(self, user_id: str, user_query: str):
14
+ """Process a message with local Ollama only"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  try:
16
+ # Get session
17
  session = session_manager.get_session(user_id)
18
+ conversation_history = session.get("conversation", []).copy()
19
 
20
+ # Add current message
21
+ conversation_history.append({"role": "user", "content": user_query})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
+ # Get provider
24
+ provider = llm_factory.get_provider('ollama')
25
+ if not provider:
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  raise Exception("Ollama provider not available")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
  # Generate response
29
+ response = provider.generate(user_query, conversation_history)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
 
31
+ # Update session
32
+ conversation = session.get("conversation", []).copy()
33
+ conversation.append({"role": "user", "content": user_query})
34
+ conversation.append({"role": "assistant", "content": response or ""})
35
+ session_manager.update_session(user_id, {"conversation": conversation})
36
 
37
+ return response or "I'm processing your request..."
 
 
 
 
 
 
 
 
 
 
 
38
 
39
  except Exception as e:
40
+ logger.error(f"Message processing failed: {e}")
41
+ return "Sorry, I couldn't process your request."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
 
43
+ # Global instance
44
+ coordinator = SimpleCoordinator()
core/llm.py CHANGED
@@ -5,32 +5,28 @@ from core.llm_factory import llm_factory, ProviderNotAvailableError
5
  logger = logging.getLogger(__name__)
6
 
7
  class LLMClient:
8
- """High-level LLM client that uses the factory pattern with improved error handling"""
9
-
10
  def __init__(self):
11
  try:
12
  self.provider = llm_factory.get_provider()
13
  except ProviderNotAvailableError:
14
  self.provider = None
15
  logger.error("No LLM providers available")
16
-
17
  def generate(self, prompt: str, conversation_history: List[Dict], stream: bool = False) -> Optional[str]:
18
- """
19
- Generate a response with robust error handling.
20
- """
21
  if not self.provider:
22
  raise ProviderNotAvailableError("No LLM provider available")
23
-
24
  try:
25
  if stream:
26
  result = self.provider.stream_generate(prompt, conversation_history)
27
- # For streaming, combine chunks into single response
28
  if isinstance(result, list):
29
  return "".join(result)
30
  return result
31
  else:
32
  return self.provider.generate(prompt, conversation_history)
33
-
34
  except Exception as e:
35
  logger.error(f"LLM generation failed: {e}")
36
- raise # Re-raise to let caller handle appropriately
 
5
  logger = logging.getLogger(__name__)
6
 
7
  class LLMClient:
8
+ """Simple LLM client using factory pattern"""
9
+
10
  def __init__(self):
11
  try:
12
  self.provider = llm_factory.get_provider()
13
  except ProviderNotAvailableError:
14
  self.provider = None
15
  logger.error("No LLM providers available")
16
+
17
  def generate(self, prompt: str, conversation_history: List[Dict], stream: bool = False) -> Optional[str]:
18
+ """Generate a response"""
 
 
19
  if not self.provider:
20
  raise ProviderNotAvailableError("No LLM provider available")
21
+
22
  try:
23
  if stream:
24
  result = self.provider.stream_generate(prompt, conversation_history)
 
25
  if isinstance(result, list):
26
  return "".join(result)
27
  return result
28
  else:
29
  return self.provider.generate(prompt, conversation_history)
 
30
  except Exception as e:
31
  logger.error(f"LLM generation failed: {e}")
32
+ raise
debug_dashboard_test.py DELETED
@@ -1,86 +0,0 @@
1
- import sys
2
- from pathlib import Path
3
-
4
- # Add project root to path
5
- project_root = Path(__file__).parent
6
- sys.path.append(str(project_root))
7
-
8
- from services.hf_endpoint_monitor import hf_monitor
9
- from core.coordinator import coordinator
10
- from utils.config import config
11
- import os
12
-
13
- def test_debug_features():
14
- """Test the enhanced debug features"""
15
- print("=== Debug Dashboard Feature Test ===")
16
- print()
17
-
18
- # Test HF Endpoint Monitor Enhanced Features
19
- print("1. Testing HF Endpoint Monitor Enhanced Features:")
20
- try:
21
- # Basic status
22
- basic_status = hf_monitor.get_status_summary()
23
- print(f" Basic Status: {basic_status}")
24
-
25
- # Detailed status
26
- detailed_status = hf_monitor.get_detailed_status()
27
- print(f" Detailed Status Keys: {list(detailed_status.keys())}")
28
-
29
- # Performance metrics
30
- perf_metrics = hf_monitor.get_performance_metrics()
31
- print(f" Performance Metrics: {perf_metrics}")
32
-
33
- print(" βœ… HF Endpoint Monitor Enhanced Features Working")
34
- except Exception as e:
35
- print(f" ❌ HF Endpoint Monitor Test Failed: {e}")
36
-
37
- print()
38
-
39
- # Test Coordinator Status Tracking
40
- print("2. Testing Coordinator Status Tracking:")
41
- try:
42
- # Coordination status
43
- coord_status = coordinator.get_coordination_status()
44
- print(f" Coordination Status: {coord_status}")
45
-
46
- # Recent activities (test with demo user)
47
- recent_activities = coordinator.get_recent_activities("demo_user")
48
- print(f" Recent Activities Keys: {list(recent_activities.keys())}")
49
-
50
- print(" βœ… Coordinator Status Tracking Working")
51
- except Exception as e:
52
- print(f" ❌ Coordinator Status Tracking Test Failed: {e}")
53
-
54
- print()
55
-
56
- # Test Configuration Display
57
- print("3. Testing Configuration Display:")
58
- try:
59
- print(f" Environment Type: {'HF Space' if config.is_hf_space else 'Local'}")
60
- print(f" Fallback Mode: {config.use_fallback}")
61
- print(f" HF Token Available: {bool(config.hf_token)}")
62
- print(f" Tavily API Key: {bool(os.getenv('TAVILY_API_KEY'))}")
63
- print(f" OpenWeather API Key: {bool(config.openweather_api_key)}")
64
-
65
- # Feature flags
66
- features = []
67
- if config.use_fallback:
68
- features.append("Fallback Mode")
69
- if os.getenv("TAVILY_API_KEY"):
70
- features.append("Web Search")
71
- if config.openweather_api_key:
72
- features.append("Weather Data")
73
- if config.hf_token:
74
- features.append("Deep Analysis")
75
-
76
- print(f" Active Features: {', '.join(features) if features else 'None'}")
77
-
78
- print(" βœ… Configuration Display Working")
79
- except Exception as e:
80
- print(f" ❌ Configuration Display Test Failed: {e}")
81
-
82
- print()
83
- print("πŸŽ‰ Debug Dashboard Feature Test Completed!")
84
-
85
- if __name__ == "__main__":
86
- test_debug_features()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
demo_coordinated_ai.py DELETED
@@ -1,100 +0,0 @@
1
- import sys
2
- from pathlib import Path
3
- import asyncio
4
- import os
5
-
6
- # Add project root to path
7
- project_root = Path(__file__).parent
8
- sys.path.append(str(project_root))
9
-
10
- from core.coordinator import coordinator
11
- from core.session import session_manager
12
- from services.hf_endpoint_monitor import hf_monitor
13
- from utils.config import config
14
-
15
- async def demo_coordinated_ai():
16
- """Demo the coordinated AI response system - HF Space version"""
17
- print("=== AI Life Coach Coordinated Response Demo ===")
18
- print()
19
-
20
- # Test query
21
- user_query = "What are some good productivity tips for remote work?"
22
- user_id = "demo_user"
23
-
24
- print(f"User Query: {user_query}")
25
- print()
26
-
27
- # Check configuration from HF Space environment
28
- print("HF Space Environment Configuration:")
29
- print(f" Running on HF Space: {'βœ…' if config.is_hf_space else '❌'}")
30
- print(f" Ollama Host Configured: {'βœ…' if config.ollama_host else '❌'}")
31
- print(f" HF Token Available: {'βœ…' if config.hf_token else '❌'}")
32
- print(f" External APIs Configured: {'βœ…' if (config.openweather_api_key or os.getenv('TAVILY_API_KEY')) else '❌'}")
33
- print()
34
-
35
- # Check HF endpoint status
36
- print("HF Endpoint Status:")
37
- try:
38
- hf_status = hf_monitor.get_status_summary()
39
- print(hf_status)
40
- except Exception as e:
41
- print(f"❌ HF Monitor unavailable: {e}")
42
- print()
43
-
44
- # Coordinate responses (graceful degradation)
45
- print("Coordinating AI responses...")
46
- try:
47
- result = await coordinator.coordinate_response(user_id, user_query)
48
-
49
- print(f"Immediate Response (Ollama): {result['immediate_response']}")
50
- print()
51
-
52
- # Show what external data would be gathered (if APIs were configured)
53
- print("External Data Integration:")
54
- print(" 🌐 Web Search: Requires TAVILY_API_KEY")
55
- print(" 🌀️ Weather: Requires OPENWEATHER_API_KEY")
56
- print(" πŸ• Time/Date: Always available")
57
- print()
58
-
59
- # Handle HF response gracefully
60
- hf_task = result.get('hf_task')
61
- if hf_task and config.hf_token:
62
- print("HF endpoint configured - would attempt deep analysis")
63
- print("(In HF Space with proper configuration, this would initialize the endpoint)")
64
- elif config.hf_token:
65
- print("⚠️ HF endpoint configured but unavailable")
66
- else:
67
- print("ℹ️ HF endpoint not configured (normal for local testing)")
68
-
69
- # Update session with coordination data
70
- session_manager.update_session_with_ai_coordination(user_id, {
71
- 'immediate_response': result['immediate_response'],
72
- 'external_data': result.get('external_data', {}),
73
- 'hf_configured': bool(config.hf_token)
74
- })
75
-
76
- # Show coordination statistics
77
- session = session_manager.get_session(user_id)
78
- coord_stats = session.get('ai_coordination', {})
79
- if coord_stats:
80
- print()
81
- print("AI Coordination Statistics:")
82
- print(f" Requests Processed: {coord_stats.get('requests_processed', 0)}")
83
- print(f" Ollama Responses: {coord_stats.get('ollama_responses', 0)}")
84
- print(f" HF Configured: {'βœ…' if coord_stats.get('hf_configured') else '❌'}")
85
- print(f" Last Coordination: {coord_stats.get('last_coordination', 'N/A')}")
86
-
87
- except Exception as e:
88
- print(f"❌ Coordination failed: {e}")
89
- print("This is expected in local environment without full HF Space configuration")
90
- print()
91
- print("βœ… System architecture is correct - will work properly in HF Space")
92
- return True
93
-
94
- print()
95
- print("πŸŽ‰ Demo completed successfully!")
96
- print("βœ… System ready for HF Space deployment!")
97
- return True
98
-
99
- if __name__ == "__main__":
100
- asyncio.run(demo_coordinated_ai())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
services/hf_endpoint_monitor.py DELETED
@@ -1,291 +0,0 @@
1
- import requests
2
- import time
3
- import logging
4
- from typing import Dict, Optional
5
- from utils.config import config
6
- logger = logging.getLogger(__name__)
7
-
8
- class HFEndpointMonitor:
9
- """Monitor Hugging Face endpoint status and health"""
10
-
11
- def __init__(self):
12
- # Clean the endpoint URL
13
- raw_url = config.hf_api_url or ""
14
- self.endpoint_url = self._clean_endpoint_url(raw_url)
15
- self.hf_token = config.hf_token
16
- self.is_initialized = False
17
- self.last_check = 0
18
- self.check_interval = 300 # Increase from 60 to 300 seconds (5 minutes)
19
- self.warmup_attempts = 0
20
- self.max_warmup_attempts = 3
21
- self.warmup_count = 0
22
- self.successful_requests = 0
23
- self.failed_requests = 0
24
- self.avg_response_time = 0
25
- logger.info(f"Initialized HF Monitor with URL: {self.endpoint_url}")
26
-
27
- def _clean_endpoint_url(self, url: str) -> str:
28
- """Clean and validate endpoint URL"""
29
- if not url:
30
- return ""
31
- # Remove environment variable names if present
32
- url = url.replace('hf_api_endpoint_url=', '')
33
- url = url.replace('HF_API_ENDPOINT_URL=', '')
34
- # Strip whitespace
35
- url = url.strip()
36
- # Ensure it starts with https://
37
- if url and not url.startswith(('http://', 'https://')):
38
- if 'huggingface.cloud' in url:
39
- url = 'https://' + url
40
- else:
41
- url = 'https://' + url
42
- # Remove trailing slashes but keep /v1 if present
43
- if url.endswith('/'):
44
- url = url.rstrip('/')
45
- return url
46
-
47
- def check_endpoint_status(self) -> Dict:
48
- """Check if HF endpoint is available and initialized with rate limiting"""
49
- current_time = time.time()
50
- # Don't check too frequently - minimum 1 minute between checks
51
- if current_time - self.last_check < 60:
52
- # Return cached status or basic status
53
- return {
54
- 'available': getattr(self, '_last_available', False),
55
- 'status_code': getattr(self, '_last_status_code', None),
56
- 'initialized': getattr(self, '_last_initialized', False),
57
- 'timestamp': self.last_check
58
- }
59
- # Proceed with actual check
60
- self.last_check = current_time
61
- try:
62
- if not self.endpoint_url or not self.hf_token:
63
- status_info = {
64
- 'available': False,
65
- 'status_code': None,
66
- 'initialized': False,
67
- 'error': 'URL or token not configured',
68
- 'timestamp': time.time()
69
- }
70
- else:
71
- # Properly construct the models endpoint URL
72
- models_url = f"{self.endpoint_url.rstrip('/')}/models"
73
- logger.info(f"Checking HF endpoint at: {models_url}")
74
- headers = {"Authorization": f"Bearer {self.hf_token}"}
75
- response = requests.get(
76
- models_url,
77
- headers=headers,
78
- timeout=15
79
- )
80
- status_info = {
81
- 'available': response.status_code in [200, 201],
82
- 'status_code': response.status_code,
83
- 'initialized': self._is_endpoint_initialized(response),
84
- 'response_time': response.elapsed.total_seconds(),
85
- 'timestamp': time.time()
86
- }
87
-
88
- # Enhanced status info with model and region if available
89
- if response.status_code in [200, 201]:
90
- try:
91
- data = response.json()
92
- if 'data' in data and len(data['data']) > 0:
93
- status_info['model'] = data['data'][0].get('id', 'Unknown')
94
- # Try to extract region from URL if possible
95
- if 'us-east-1' in self.endpoint_url:
96
- status_info['region'] = 'us-east-1'
97
- elif 'us-west' in self.endpoint_url:
98
- status_info['region'] = 'us-west'
99
- except:
100
- pass
101
-
102
- status_info['warmup_count'] = getattr(self, 'warmup_count', 0)
103
-
104
- if response.status_code not in [200, 201]:
105
- status_info['error'] = f"HTTP {response.status_code}: {response.text[:200]}"
106
- logger.info(f"HF Endpoint Status: {status_info}")
107
- # Cache the results
108
- self._last_available = status_info['available']
109
- self._last_status_code = status_info['status_code']
110
- self._last_initialized = status_info.get('initialized', False)
111
- return status_info
112
- except Exception as e:
113
- error_msg = str(e)
114
- logger.error(f"HF endpoint check failed: {error_msg}")
115
- status_info = {
116
- 'available': False,
117
- 'status_code': None,
118
- 'initialized': False,
119
- 'error': error_msg,
120
- 'timestamp': time.time()
121
- }
122
- # Cache the results
123
- self._last_available = False
124
- self._last_status_code = None
125
- self._last_initialized = False
126
- return status_info
127
-
128
- def _is_endpoint_initialized(self, response) -> bool:
129
- """Determine if endpoint is fully initialized"""
130
- try:
131
- data = response.json()
132
- return 'data' in data or 'models' in data
133
- except:
134
- return response.status_code in [200, 201]
135
-
136
- def warm_up_endpoint(self) -> bool:
137
- """Send a warm-up request to initialize the endpoint"""
138
- try:
139
- if not self.endpoint_url or not self.hf_token:
140
- logger.warning("Cannot warm up HF endpoint - URL or token not configured")
141
- return False
142
- self.warmup_attempts += 1
143
- logger.info(f"Warming up HF endpoint (attempt {self.warmup_attempts})...")
144
- headers = {
145
- "Authorization": f"Bearer {self.hf_token}",
146
- "Content-Type": "application/json"
147
- }
148
- # Construct proper chat completions URL
149
- chat_url = f"{self.endpoint_url.rstrip('/')}/chat/completions"
150
- logger.info(f"Sending warm-up request to: {chat_url}")
151
- payload = {
152
- "model": "meta-llama/Llama-2-7b-chat-hf",
153
- "messages": [{"role": "user", "content": "Hello"}],
154
- "max_tokens": 10,
155
- "stream": False
156
- }
157
- response = requests.post(
158
- chat_url,
159
- headers=headers,
160
- json=payload,
161
- timeout=45 # Longer timeout for cold start
162
- )
163
- success = response.status_code in [200, 201]
164
- if success:
165
- self.is_initialized = True
166
- self.warmup_count += 1
167
- self.warmup_attempts = 0 # Reset on success
168
- logger.info("βœ… HF endpoint warmed up successfully")
169
- else:
170
- logger.warning(f"⚠️ HF endpoint warm-up response: {response.status_code}")
171
- logger.debug(f"Response body: {response.text[:500]}")
172
- return success
173
- except Exception as e:
174
- logger.error(f"HF endpoint warm-up failed: {e}")
175
- self.failed_requests += 1
176
- return False
177
-
178
- def get_status_summary(self) -> str:
179
- """Get human-readable status summary"""
180
- status = self.check_endpoint_status()
181
- if status['available']:
182
- if status.get('initialized', False):
183
- return "🟒 HF Endpoint: Available and Initialized"
184
- else:
185
- return "🟑 HF Endpoint: Available but Initializing"
186
- else:
187
- return "πŸ”΄ HF Endpoint: Unavailable"
188
-
189
- def handle_scale_to_zero(self) -> bool:
190
- """Handle scale-to-zero behavior with user feedback"""
191
- logger.info("HF endpoint appears to be scaled to zero. Attempting to wake it up...")
192
- # Try to warm up the endpoint
193
- for attempt in range(self.max_warmup_attempts):
194
- logger.info(f"Wake-up attempt {attempt + 1}/{self.max_warmup_attempts}")
195
- if self.warm_up_endpoint():
196
- logger.info("βœ… HF endpoint successfully woken up!")
197
- return True
198
- time.sleep(10) # Wait between attempts
199
- logger.error("❌ Failed to wake up HF endpoint after all attempts")
200
- return False
201
-
202
- def get_detailed_status(self) -> Dict:
203
- """Get detailed HF endpoint status with metrics"""
204
- try:
205
- headers = {"Authorization": f"Bearer {self.hf_token}"}
206
- # Get model info
207
- models_url = f"{self.endpoint_url.rstrip('/')}/models"
208
- model_response = requests.get(
209
- models_url,
210
- headers=headers,
211
- timeout=10
212
- )
213
- # Get endpoint info if available
214
- endpoint_info = {}
215
- try:
216
- info_url = f"{self.endpoint_url.rstrip('/')}/info"
217
- info_response = requests.get(
218
- info_url,
219
- headers=headers,
220
- timeout=10
221
- )
222
- if info_response.status_code == 200:
223
- endpoint_info = info_response.json()
224
- except:
225
- pass
226
- status_info = {
227
- 'available': model_response.status_code == 200,
228
- 'status_code': model_response.status_code,
229
- 'initialized': self._is_endpoint_initialized(model_response),
230
- 'endpoint_info': endpoint_info,
231
- 'last_checked': time.time(),
232
- 'warmup_attempts': getattr(self, 'warmup_attempts', 0),
233
- 'is_warming_up': getattr(self, 'is_warming_up', False)
234
- }
235
- return status_info
236
- except Exception as e:
237
- return {
238
- 'available': False,
239
- 'status_code': None,
240
- 'initialized': False,
241
- 'error': str(e),
242
- 'last_checked': time.time()
243
- }
244
-
245
- def get_performance_metrics(self) -> Dict:
246
- """Get HF endpoint performance metrics"""
247
- return {
248
- 'warmup_count': getattr(self, 'warmup_count', 0),
249
- 'successful_requests': getattr(self, 'successful_requests', 0),
250
- 'failed_requests': getattr(self, 'failed_requests', 0),
251
- 'average_response_time': getattr(self, 'avg_response_time', 0)
252
- }
253
-
254
- # Add enhanced status tracking methods
255
- def get_enhanced_status(self) -> Dict:
256
- """Get enhanced HF endpoint status with engagement tracking"""
257
- basic_status = self.check_endpoint_status()
258
- return {
259
- **basic_status,
260
- "engagement_level": self._determine_engagement_level(),
261
- "last_engagement": getattr(self, '_last_engagement_time', None),
262
- "total_engagements": getattr(self, '_total_engagements', 0),
263
- "current_research_topic": getattr(self, '_current_research_topic', None)
264
- }
265
-
266
- def _determine_engagement_level(self) -> str:
267
- """Determine current engagement level"""
268
- if not self.is_initialized:
269
- return "idle"
270
- elif getattr(self, '_currently_analyzing', False):
271
- return "analyzing"
272
- elif getattr(self, '_pending_research', False):
273
- return "research_pending"
274
- else:
275
- return "ready"
276
-
277
- def start_hf_analysis(self, topic: str = None):
278
- """Start HF analysis with topic tracking"""
279
- self._currently_analyzing = True
280
- self._last_engagement_time = time.time()
281
- self._total_engagements = getattr(self, '_total_engagements', 0) + 1
282
- if topic:
283
- self._current_research_topic = topic
284
-
285
- def finish_hf_analysis(self):
286
- """Finish HF analysis"""
287
- self._currently_analyzing = False
288
- self._current_research_topic = None
289
-
290
- # Global instance
291
- hf_monitor = HFEndpointMonitor()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
test_enhanced_features.py DELETED
@@ -1,65 +0,0 @@
1
- import sys
2
- import asyncio
3
- from pathlib import Path
4
-
5
- # Add project root to path
6
- project_root = Path(__file__).parent
7
- sys.path.append(str(project_root))
8
-
9
- from core.coordinator import coordinator
10
- from core.session import session_manager
11
-
12
- async def test_enhanced_features():
13
- """Test the enhanced UI and coordination features"""
14
- print("=== Enhanced Features Test ===")
15
- print()
16
-
17
- # Test user query
18
- user_query = "What are the benefits of meditation for stress management?"
19
- user_id = "test_user"
20
-
21
- print(f"User Query: {user_query}")
22
- print()
23
-
24
- # Test enhanced coordination with detailed tracking
25
- print("1. Testing Enhanced Coordination with Detailed Tracking:")
26
- try:
27
- print(" Starting enhanced coordination...")
28
- response_count = 0
29
-
30
- async for response_chunk in coordinator.coordinate_hierarchical_conversation(user_id, user_query):
31
- response_count += 1
32
- print(f" Chunk {response_count}:")
33
- print(f" Type: {response_chunk['type']}")
34
- print(f" Content: {response_chunk['content'][:100]}...")
35
- if 'details' in response_chunk:
36
- print(f" Details: {response_chunk['details']}")
37
- print()
38
-
39
- # Limit output for readability
40
- if response_count >= 8:
41
- print(" ... (truncated for brevity)")
42
- break
43
-
44
- print(" βœ… Enhanced Coordination Test Passed")
45
- except Exception as e:
46
- print(f" ❌ Enhanced Coordination Test Failed: {e}")
47
-
48
- print()
49
-
50
- # Test coordination status
51
- print("2. Testing Coordination Status:")
52
- try:
53
- coord_status = coordinator.get_coordination_status()
54
- print(f" Tavily Available: {coord_status.get('tavily_available', False)}")
55
- print(f" Weather Available: {coord_status.get('weather_available', False)}")
56
- print(f" Web Search Enabled: {coord_status.get('web_search_enabled', False)}")
57
- print(" βœ… Coordination Status Check Passed")
58
- except Exception as e:
59
- print(f" ❌ Coordination Status Check Failed: {e}")
60
-
61
- print()
62
- print("πŸŽ‰ Enhanced Features Test Completed!")
63
-
64
- if __name__ == "__main__":
65
- asyncio.run(test_enhanced_features())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
test_hf_activation.py DELETED
@@ -1,134 +0,0 @@
1
- import sys
2
- from pathlib import Path
3
-
4
- # Add project root to path
5
- project_root = Path(__file__).parent
6
- sys.path.append(str(project_root))
7
-
8
- def test_hf_activation_features():
9
- """Test the manual HF activation and indication features"""
10
- print("=== HF Activation Features Test ===")
11
- print()
12
-
13
- # Test 1: Check app.py for manual HF activation UI
14
- print("1. Testing App.py Manual HF Activation UI:")
15
- try:
16
- with open('app.py', 'r') as f:
17
- content = f.read()
18
-
19
- required_components = [
20
- 'hf_expert_requested',
21
- 'Activate HF Expert',
22
- 'πŸ€– HF Expert Analysis',
23
- 'Manual HF Analysis Section'
24
- ]
25
-
26
- missing_components = []
27
- for component in required_components:
28
- if component not in content:
29
- missing_components.append(component)
30
-
31
- if missing_components:
32
- print(f" ❌ Missing components: {missing_components}")
33
- else:
34
- print(" βœ… All manual HF activation UI components present")
35
-
36
- except Exception as e:
37
- print(f" ❌ Error reading app.py: {e}")
38
-
39
- print()
40
-
41
- # Test 2: Check coordinator for web search determination
42
- print("2. Testing Coordinator Web Search Determination:")
43
- try:
44
- with open('core/coordinator.py', 'r') as f:
45
- content = f.read()
46
-
47
- required_methods = [
48
- 'determine_web_search_needs',
49
- 'manual_hf_analysis',
50
- 'get_hf_engagement_status'
51
- ]
52
-
53
- missing_methods = []
54
- for method in required_methods:
55
- if method not in content:
56
- missing_methods.append(method)
57
-
58
- if missing_methods:
59
- print(f" ❌ Missing methods: {missing_methods}")
60
- else:
61
- print(" βœ… All web search determination methods present")
62
-
63
- except Exception as e:
64
- print(f" ❌ Error reading coordinator.py: {e}")
65
-
66
- print()
67
-
68
- # Test 3: Check HF monitor for enhanced status tracking
69
- print("3. Testing HF Monitor Enhanced Status Tracking:")
70
- try:
71
- with open('services/hf_endpoint_monitor.py', 'r') as f:
72
- content = f.read()
73
-
74
- required_methods = [
75
- 'get_enhanced_status',
76
- 'start_hf_analysis',
77
- 'finish_hf_analysis'
78
- ]
79
-
80
- missing_methods = []
81
- for method in required_methods:
82
- if method not in content:
83
- missing_methods.append(method)
84
-
85
- if missing_methods:
86
- print(f" ❌ Missing methods: {missing_methods}")
87
- else:
88
- print(" βœ… All enhanced status tracking methods present")
89
-
90
- except Exception as e:
91
- print(f" ❌ Error reading hf_endpoint_monitor.py: {e}")
92
-
93
- print()
94
-
95
- # Test 4: Check for visual indication features
96
- print("4. Testing Visual Indication Features:")
97
- try:
98
- with open('app.py', 'r') as f:
99
- content = f.read()
100
-
101
- visual_indicators = [
102
- 'πŸ€– HF Expert Analysis',
103
- '🧠 Activate HF Expert',
104
- 'Research Needed',
105
- 'Web Research'
106
- ]
107
-
108
- missing_indicators = []
109
- for indicator in visual_indicators:
110
- if indicator not in content:
111
- missing_indicators.append(indicator)
112
-
113
- if missing_indicators:
114
- print(f" ❌ Missing visual indicators: {missing_indicators}")
115
- else:
116
- print(" βœ… All visual indication features present")
117
-
118
- except Exception as e:
119
- print(f" ❌ Error checking visual indicators: {e}")
120
-
121
- print()
122
- print("πŸŽ‰ HF Activation Features Test Completed!")
123
- print()
124
- print("🎯 IMPLEMENTED FEATURES:")
125
- print("1. βœ… Manual HF Expert Activation Button")
126
- print("2. βœ… Visual Indications of HF Engagement")
127
- print("3. βœ… Conversation History Preview for HF")
128
- print("4. βœ… Web Search Need Determination")
129
- print("5. βœ… Research Topic Identification")
130
- print("6. βœ… Enhanced Status Tracking")
131
- print("7. βœ… Clear HF Expert Response Formatting")
132
-
133
- if __name__ == "__main__":
134
- test_hf_activation_features()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
test_hf_monitor.py DELETED
@@ -1,42 +0,0 @@
1
- import sys
2
- from pathlib import Path
3
-
4
- # Add project root to path
5
- project_root = Path(__file__).parent
6
- sys.path.append(str(project_root))
7
-
8
- from services.hf_endpoint_monitor import hf_monitor
9
-
10
- def test_hf_monitor():
11
- """Test the HF endpoint monitor"""
12
- print("=== HF Endpoint Monitor Test ===")
13
- print()
14
-
15
- # Show current status
16
- print("Current HF Endpoint Status:")
17
- status = hf_monitor.check_endpoint_status()
18
- print(f" Available: {status['available']}")
19
- print(f" Status Code: {status['status_code']}")
20
- print(f" Initialized: {status.get('initialized', 'Unknown')}")
21
- if 'error' in status:
22
- print(f" Error: {status['error']}")
23
- print()
24
-
25
- # Show human-readable status
26
- print("Human-Readable Status:")
27
- print(hf_monitor.get_status_summary())
28
- print()
29
-
30
- # Try to warm up endpoint if not available
31
- if not status['available']:
32
- print("Attempting to warm up endpoint...")
33
- success = hf_monitor.warm_up_endpoint()
34
- print(f"Warm-up result: {'Success' if success else 'Failed'}")
35
- print()
36
-
37
- # Check status again
38
- print("Status after warm-up attempt:")
39
- print(hf_monitor.get_status_summary())
40
-
41
- if __name__ == "__main__":
42
- test_hf_monitor()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
test_hf_url_fix.py DELETED
@@ -1,53 +0,0 @@
1
- import sys
2
- from pathlib import Path
3
-
4
- # Add project root to path
5
- project_root = Path(__file__).parent
6
- sys.path.append(str(project_root))
7
-
8
- from utils.config import config
9
- from services.hf_endpoint_monitor import hf_monitor
10
-
11
- def test_hf_url_fix():
12
- """Test the HF endpoint URL fix"""
13
- print("=== HF Endpoint URL Fix Test ===")
14
- print()
15
-
16
- # Test configuration parsing
17
- print("1. Testing Configuration Parsing:")
18
- print(f" Raw HF_API_ENDPOINT_URL: {config.hf_api_url}")
19
- print(f" Parsed endpoint URL: {getattr(hf_monitor, 'endpoint_url', 'Not initialized')}")
20
- print()
21
-
22
- # Test URL cleaning
23
- print("2. Testing URL Cleaning:")
24
- test_urls = [
25
- "https://zxzbfrlg3ssrk7d9.us-east-1.aws.endpoints.huggingface.cloud/v1/",
26
- "hf_api_endpoint_url=https://zxzbfrlg3ssrk7d9.us-east-1.aws.endpoints.huggingface.cloud/v1/",
27
- "HF_API_ENDPOINT_URL=https://zxzbfrlg3ssrk7d9.us-east-1.aws.endpoints.huggingface.cloud/v1/",
28
- "zxzbfrlg3ssrk7d9.us-east-1.aws.endpoints.huggingface.cloud/v1/"
29
- ]
30
-
31
- for url in test_urls:
32
- cleaned = hf_monitor._clean_endpoint_url(url) if hasattr(hf_monitor, '_clean_endpoint_url') else "Method not available"
33
- print(f" Input: {url}")
34
- print(f" Output: {cleaned}")
35
- print()
36
-
37
- # Test HF endpoint status (if token is available)
38
- print("3. Testing HF Endpoint Status:")
39
- try:
40
- status = hf_monitor.check_endpoint_status()
41
- print(f" Available: {status.get('available', 'Unknown')}")
42
- print(f" Status Code: {status.get('status_code', 'Unknown')}")
43
- print(f" Initialized: {status.get('initialized', 'Unknown')}")
44
- if 'error' in status:
45
- print(f" Error: {status['error']}")
46
- except Exception as e:
47
- print(f" Error checking status: {e}")
48
-
49
- print()
50
- print("πŸŽ‰ HF Endpoint URL Fix Test Completed!")
51
-
52
- if __name__ == "__main__":
53
- test_hf_url_fix()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
test_hierarchical_coordination.py DELETED
@@ -1,83 +0,0 @@
1
- import sys
2
- import asyncio
3
- from pathlib import Path
4
-
5
- # Add project root to path
6
- project_root = Path(__file__).parent
7
- sys.path.append(str(project_root))
8
-
9
- from core.coordinator import coordinator
10
- from core.session import session_manager
11
-
12
- async def test_hierarchical_coordination():
13
- """Test the hierarchical coordination system"""
14
- print("=== Hierarchical Coordination System Test ===")
15
- print()
16
-
17
- # Test user query
18
- user_query = "What are the key principles of effective time management?"
19
- user_id = "test_user"
20
-
21
- print(f"User Query: {user_query}")
22
- print()
23
-
24
- # Test coordination status
25
- print("1. Testing Coordination Status:")
26
- try:
27
- coord_status = coordinator.get_coordination_status()
28
- print(f" Tavily Available: {coord_status.get('tavily_available', False)}")
29
- print(f" Weather Available: {coord_status.get('weather_available', False)}")
30
- print(f" Web Search Enabled: {coord_status.get('web_search_enabled', False)}")
31
- print(" βœ… Coordination Status Check Passed")
32
- except Exception as e:
33
- print(f" ❌ Coordination Status Check Failed: {e}")
34
-
35
- print()
36
-
37
- # Test hierarchical conversation coordination
38
- print("2. Testing Hierarchical Conversation Coordination:")
39
- try:
40
- print(" Starting hierarchical coordination...")
41
- response_count = 0
42
-
43
- async for response_chunk in coordinator.coordinate_hierarchical_conversation(user_id, user_query):
44
- response_count += 1
45
- print(f" Chunk {response_count}: {response_chunk['type']} - {response_chunk['content'][:50]}...")
46
-
47
- # Limit output for readability
48
- if response_count >= 5:
49
- print(" ... (truncated for brevity)")
50
- break
51
-
52
- print(" βœ… Hierarchical Coordination Test Passed")
53
- except Exception as e:
54
- print(f" ❌ Hierarchical Coordination Test Failed: {e}")
55
-
56
- print()
57
-
58
- # Test hierarchical session tracking
59
- print("3. Testing Hierarchical Session Tracking:")
60
- try:
61
- # Update with test coordination data
62
- test_data = {
63
- 'hf_engaged': True,
64
- 'ollama_responded': True,
65
- 'success': True
66
- }
67
- update_result = session_manager.update_hierarchical_coordination(user_id, test_data)
68
- print(f" Update Result: {'βœ… Success' if update_result else '❌ Failed'}")
69
-
70
- # Get hierarchical stats
71
- stats = session_manager.get_hierarchical_stats(user_id)
72
- print(f" Total Conversations: {stats.get('total_conversations', 0)}")
73
- print(f" HF Engagements: {stats.get('hf_engagements', 0)}")
74
- print(f" Ollama Responses: {stats.get('ollama_responses', 0)}")
75
- print(" βœ… Hierarchical Session Tracking Passed")
76
- except Exception as e:
77
- print(f" ❌ Hierarchical Session Tracking Failed: {e}")
78
-
79
- print()
80
- print("πŸŽ‰ Hierarchical Coordination System Test Completed!")
81
-
82
- if __name__ == "__main__":
83
- asyncio.run(test_hierarchical_coordination())