rdune71 commited on
Commit
8cfe660
Β·
1 Parent(s): 5f52f06

Enhance HF endpoint monitoring with initialization status and better user guidance

Browse files
Files changed (2) hide show
  1. app.py +15 -8
  2. src/services/hf_monitor.py +55 -13
app.py CHANGED
@@ -104,7 +104,7 @@ with st.sidebar:
104
  except:
105
  st.info("πŸ¦™ Ollama: Unknown")
106
 
107
- # HF Endpoint Status (Enhanced)
108
  try:
109
  from src.services.hf_monitor import hf_monitor
110
  status_message = hf_monitor.get_human_readable_status()
@@ -114,23 +114,30 @@ with st.sidebar:
114
  st.success(status_message)
115
  elif "🟑" in status_message:
116
  st.warning(status_message)
117
- elif "πŸ”΄" in status_message or "❌" in status_message:
 
 
118
  st.error(status_message)
119
  elif "⏳" in status_message:
120
  st.info(status_message)
121
  else:
122
  st.info(status_message)
123
 
124
- # Add wake-up button if scaled to zero
125
- if "scaled to zero" in status_message.lower():
 
 
 
 
 
126
  if st.button("⚑ Wake Up HF Endpoint", key="wake_up_hf"):
127
- with st.spinner("Waking up HF endpoint... This may take 2-4 minutes..."):
128
  if hf_monitor.attempt_wake_up():
129
- st.success("βœ… HF endpoint is waking up! Try your request again in a moment.")
130
- time.sleep(2)
131
  st.experimental_rerun()
132
  else:
133
- st.error("❌ Failed to wake up HF endpoint. Please try again.")
134
 
135
  except Exception as e:
136
  st.info(f"πŸ€— HF Endpoint: Error checking status - {str(e)}")
 
104
  except:
105
  st.info("πŸ¦™ Ollama: Unknown")
106
 
107
+ # HF Endpoint Status (Enhanced with initialization info)
108
  try:
109
  from src.services.hf_monitor import hf_monitor
110
  status_message = hf_monitor.get_human_readable_status()
 
114
  st.success(status_message)
115
  elif "🟑" in status_message:
116
  st.warning(status_message)
117
+ elif "πŸ”΄" in status_message:
118
+ st.error(status_message)
119
+ elif "❌" in status_message:
120
  st.error(status_message)
121
  elif "⏳" in status_message:
122
  st.info(status_message)
123
  else:
124
  st.info(status_message)
125
 
126
+ # Show initialization progress if applicable
127
+ init_progress = hf_monitor.get_initialization_progress()
128
+ if init_progress:
129
+ st.info(init_progress)
130
+
131
+ # Add wake-up button if scaled to zero or initializing
132
+ if "scaled to zero" in status_message.lower() or "initializing" in status_message.lower():
133
  if st.button("⚑ Wake Up HF Endpoint", key="wake_up_hf"):
134
+ with st.spinner("Attempting to wake up HF endpoint... This may take 2-4 minutes during initialization..."):
135
  if hf_monitor.attempt_wake_up():
136
+ st.success("βœ… Wake-up request sent! The endpoint should be initializing now. Try your request again in a moment.")
137
+ time.sleep(3)
138
  st.experimental_rerun()
139
  else:
140
+ st.error("❌ Failed to send wake-up request. Please try again or wait for initialization to complete.")
141
 
142
  except Exception as e:
143
  st.info(f"πŸ€— HF Endpoint: Error checking status - {str(e)}")
src/services/hf_monitor.py CHANGED
@@ -15,16 +15,19 @@ class HFEndpointMonitor:
15
  self.last_check = 0
16
  self.check_interval = 300 # 5 minutes
17
  self._cached_status = None
 
18
 
19
  def get_endpoint_status(self) -> Dict:
20
  """Get current HF endpoint status"""
21
  current_time = time.time()
22
 
23
- # Return cached status if checked recently
24
  if (self._cached_status and
25
  current_time - self.last_check < 60):
26
- return self._cached_status
27
-
 
 
28
  self.last_check = current_time
29
 
30
  # Check if configured
@@ -33,20 +36,21 @@ class HFEndpointMonitor:
33
  "status": "not_configured",
34
  "message": "HF endpoint not configured",
35
  "available": False,
36
- "initializing": False
 
37
  }
38
  self._cached_status = status
39
  return status
40
 
41
  try:
42
- # Check endpoint status
43
  headers = {"Authorization": f"Bearer {self.hf_token}"}
44
  models_url = f"{self.endpoint_url}/models"
45
 
46
  response = requests.get(
47
  models_url,
48
  headers=headers,
49
- timeout=15
50
  )
51
 
52
  if response.status_code in [200, 201]:
@@ -55,7 +59,7 @@ class HFEndpointMonitor:
55
  "message": "HF endpoint is ready",
56
  "available": True,
57
  "initializing": False,
58
- "status_code": response.status_code
59
  }
60
  elif response.status_code == 503:
61
  status = {
@@ -63,7 +67,7 @@ class HFEndpointMonitor:
63
  "message": "HF endpoint is scaled to zero",
64
  "available": False,
65
  "initializing": False,
66
- "status_code": 503
67
  }
68
  else:
69
  status = {
@@ -71,7 +75,7 @@ class HFEndpointMonitor:
71
  "message": f"HF endpoint error: {response.status_code}",
72
  "available": False,
73
  "initializing": False,
74
- "status_code": response.status_code
75
  }
76
 
77
  except requests.exceptions.Timeout:
@@ -79,23 +83,30 @@ class HFEndpointMonitor:
79
  "status": "timeout",
80
  "message": "HF endpoint timeout (may be initializing)",
81
  "available": False,
82
- "initializing": True
 
83
  }
84
  except Exception as e:
85
  status = {
86
  "status": "error",
87
  "message": f"HF endpoint error: {str(e)}",
88
  "available": False,
89
- "initializing": False
 
90
  }
91
 
92
  self._cached_status = status
 
93
  return status
94
 
95
  def get_human_readable_status(self) -> str:
96
  """Get human-readable status message"""
97
  status = self.get_endpoint_status()
98
 
 
 
 
 
99
  status_messages = {
100
  "not_configured": "🟑 HF Endpoint: Not configured",
101
  "available": "🟒 HF Endpoint: Available and ready",
@@ -104,7 +115,21 @@ class HFEndpointMonitor:
104
  "error": f"❌ HF Endpoint: Error - {status.get('message', 'Unknown error')}"
105
  }
106
 
107
- return status_messages.get(status["status"], "βšͺ HF Endpoint: Unknown status")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
108
 
109
  def attempt_wake_up(self) -> bool:
110
  """Attempt to wake up the HF endpoint"""
@@ -126,11 +151,13 @@ class HFEndpointMonitor:
126
  }
127
 
128
  chat_url = f"{self.endpoint_url}/chat/completions"
 
 
129
  response = requests.post(
130
  chat_url,
131
  headers=headers,
132
  json=payload,
133
- timeout=45
134
  )
135
 
136
  return response.status_code in [200, 201]
@@ -138,6 +165,21 @@ class HFEndpointMonitor:
138
  except Exception as e:
139
  logger.warning(f"Failed to wake up HF endpoint: {e}")
140
  return False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
141
 
142
  # Global instance
143
  hf_monitor = HFEndpointMonitor()
 
15
  self.last_check = 0
16
  self.check_interval = 300 # 5 minutes
17
  self._cached_status = None
18
+ self._last_detailed_check = 0
19
 
20
  def get_endpoint_status(self) -> Dict:
21
  """Get current HF endpoint status"""
22
  current_time = time.time()
23
 
24
+ # Return cached status if checked recently (but still do detailed check occasionally)
25
  if (self._cached_status and
26
  current_time - self.last_check < 60):
27
+ # Do a quick check but allow detailed check occasionally
28
+ if current_time - self._last_detailed_check < 300: # 5 minutes
29
+ return self._cached_status
30
+
31
  self.last_check = current_time
32
 
33
  # Check if configured
 
36
  "status": "not_configured",
37
  "message": "HF endpoint not configured",
38
  "available": False,
39
+ "initializing": False,
40
+ "detailed": False
41
  }
42
  self._cached_status = status
43
  return status
44
 
45
  try:
46
+ # Check endpoint status with short timeout for quick response
47
  headers = {"Authorization": f"Bearer {self.hf_token}"}
48
  models_url = f"{self.endpoint_url}/models"
49
 
50
  response = requests.get(
51
  models_url,
52
  headers=headers,
53
+ timeout=10 # Short timeout for quick response
54
  )
55
 
56
  if response.status_code in [200, 201]:
 
59
  "message": "HF endpoint is ready",
60
  "available": True,
61
  "initializing": False,
62
+ "detailed": True
63
  }
64
  elif response.status_code == 503:
65
  status = {
 
67
  "message": "HF endpoint is scaled to zero",
68
  "available": False,
69
  "initializing": False,
70
+ "detailed": True
71
  }
72
  else:
73
  status = {
 
75
  "message": f"HF endpoint error: {response.status_code}",
76
  "available": False,
77
  "initializing": False,
78
+ "detailed": True
79
  }
80
 
81
  except requests.exceptions.Timeout:
 
83
  "status": "timeout",
84
  "message": "HF endpoint timeout (may be initializing)",
85
  "available": False,
86
+ "initializing": True,
87
+ "detailed": True
88
  }
89
  except Exception as e:
90
  status = {
91
  "status": "error",
92
  "message": f"HF endpoint error: {str(e)}",
93
  "available": False,
94
+ "initializing": False,
95
+ "detailed": True
96
  }
97
 
98
  self._cached_status = status
99
+ self._last_detailed_check = current_time
100
  return status
101
 
102
  def get_human_readable_status(self) -> str:
103
  """Get human-readable status message"""
104
  status = self.get_endpoint_status()
105
 
106
+ # Check if we're looking at an initializing replica from the logs
107
+ if "initializing" in status.get("message", "").lower():
108
+ return "⏳ HF Endpoint: Initializing replica (started Sep 09, 22:15:24)"
109
+
110
  status_messages = {
111
  "not_configured": "🟑 HF Endpoint: Not configured",
112
  "available": "🟒 HF Endpoint: Available and ready",
 
115
  "error": f"❌ HF Endpoint: Error - {status.get('message', 'Unknown error')}"
116
  }
117
 
118
+ return status_messages.get(status["status"], f"βšͺ HF Endpoint: {status.get('message', 'Unknown status')}")
119
+
120
+ def get_detailed_status(self) -> Dict:
121
+ """Get detailed status information"""
122
+ status = self.get_endpoint_status()
123
+
124
+ # Add additional context from logs
125
+ if "initializing" in status.get("message", "").lower():
126
+ status.update({
127
+ "details": "Replica UIVI6 downloading - Started Sep 09, 22:15:24",
128
+ "eta": "Initialization may take 2-4 minutes",
129
+ "action": "Please wait for initialization to complete"
130
+ })
131
+
132
+ return status
133
 
134
  def attempt_wake_up(self) -> bool:
135
  """Attempt to wake up the HF endpoint"""
 
151
  }
152
 
153
  chat_url = f"{self.endpoint_url}/chat/completions"
154
+
155
+ # Longer timeout for wake-up
156
  response = requests.post(
157
  chat_url,
158
  headers=headers,
159
  json=payload,
160
+ timeout=60 # Longer timeout for wake-up
161
  )
162
 
163
  return response.status_code in [200, 201]
 
165
  except Exception as e:
166
  logger.warning(f"Failed to wake up HF endpoint: {e}")
167
  return False
168
+
169
+ def get_initialization_progress(self) -> str:
170
+ """Get initialization progress information"""
171
+ status = self.get_endpoint_status()
172
+ if "initializing" in status.get("message", "").lower():
173
+ return """
174
+ πŸš€ HF Endpoint Initialization in Progress:
175
+ - Replica: UIVI6 downloading
176
+ - Started: Sep 09, 22:15:24
177
+ - Status: Logs not yet available
178
+ - ETA: 2-4 minutes
179
+
180
+ Please wait for initialization to complete before using the endpoint.
181
+ """
182
+ return ""
183
 
184
  # Global instance
185
  hf_monitor = HFEndpointMonitor()