ContentAgent / endpoint_utils.py
yetessam's picture
Update endpoint_utils.py
f69b655 verified
raw
history blame
3.26 kB
# endpoint_utils.py
from __future__ import annotations
from typing import Optional, Tuple, Callable, Dict, Any
from urllib.parse import urlparse
import os, time, requests
def _valid_uri(uri: Optional[str]) -> bool:
if not uri:
return False
p = urlparse(uri)
return p.scheme in {"http", "https"} and bool(p.netloc)
def wake_endpoint(
uri: Optional[str],
*,
token: Optional[str] = None,
max_wait: int = 600, # was 180 — bump to 10 minutes
poll_every: float = 5.0,
warm_payload: Optional[Dict[str, Any]] = None,
log: Callable[[str], None] = lambda _: None,
) -> Tuple[bool, Optional[str]]:
"""
Wake a scale-to-zero HF Inference Endpoint by nudging it, then polling until ready.
Returns (True, None) if ready; otherwise (False, "<last status/message>").
"""
if not _valid_uri(uri):
return False, "invalid or missing URI (expect http(s)://...)"
headers: Dict[str, str] = {}
tok = token or os.environ.get("HF_TOKEN")
if tok:
headers["Authorization"] = f"Bearer {tok}"
# 0) Try a quick health check first (cheap)
last_detail = "no response"
try:
hr = requests.get(f"{uri.rstrip('/')}/health", headers=headers, timeout=5)
if hr.ok:
log("✅ /health reports ready.")
return True, None
try:
last_detail = (hr.json().get("error") or hr.json().get("message")) # type: ignore
except Exception:
last_detail = (hr.text or "").strip()
log(f"[health] HTTP {hr.status_code}{last_detail or 'warming?'}")
except requests.RequestException as e:
last_detail = type(e).__name__
log(f"[health] {last_detail}")
# 1) Initial nudge (ignore errors)
payload = warm_payload if warm_payload is not None else {"inputs": "wake"}
try:
requests.post(uri, headers=headers, json=payload, timeout=5)
except requests.RequestException:
pass
# 2) Poll until healthy or timeout
deadline = time.time() + max_wait
while time.time() < deadline:
try:
r = requests.post(uri, headers=headers, json={"inputs": "ping"}, timeout=8)
if r.ok:
log("✅ Endpoint is awake and responsive.")
return True, None
# extract any helpful server message
detail = ""
try:
data = r.json()
detail = data.get("error") or data.get("message") or ""
except ValueError:
detail = (r.text or "").strip()
last_detail = f"HTTP {r.status_code}" + (f" – {detail}" if detail else "")
if r.status_code in (429, 503, 504):
log(f"[server] {detail or 'warming up'} (HTTP {r.status_code}); retrying in {int(poll_every)}s…")
else:
log(f"[server] {detail or 'unexpected response'} (HTTP {r.status_code}); retrying in {int(poll_every)}s…")
except requests.RequestException as e:
last_detail = type(e).__name__
log(f"[client] {last_detail}; retrying in {int(poll_every)}s…")
time.sleep(poll_every)
return False, f"Timed out after {max_wait}s — last status: {last_detail}"