yetessam commited on
Commit
f69b655
·
verified ·
1 Parent(s): 0a72d15

Update endpoint_utils.py

Browse files
Files changed (1) hide show
  1. endpoint_utils.py +35 -16
endpoint_utils.py CHANGED
@@ -16,20 +16,14 @@ def wake_endpoint(
16
  uri: Optional[str],
17
  *,
18
  token: Optional[str] = None,
19
- max_wait: int = 180,
20
  poll_every: float = 5.0,
21
  warm_payload: Optional[Dict[str, Any]] = None,
22
  log: Callable[[str], None] = lambda _: None,
23
  ) -> Tuple[bool, Optional[str]]:
24
  """
25
- Nudge a scale-to-zero Hugging Face Inference Endpoint and poll until it responds.
26
-
27
- Returns:
28
- (True, None) on success, or (False, "reason") on timeout / invalid input.
29
-
30
- Notes:
31
- - Expects endpoints that accept POST JSON bodies like {"inputs": "..."}.
32
- - Treats 429/503/504 as "warming" signals while polling.
33
  """
34
  if not _valid_uri(uri):
35
  return False, "invalid or missing URI (expect http(s)://...)"
@@ -39,7 +33,23 @@ def wake_endpoint(
39
  if tok:
40
  headers["Authorization"] = f"Bearer {tok}"
41
 
42
- # 1) Initial nudge (ignore any errors)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  payload = warm_payload if warm_payload is not None else {"inputs": "wake"}
44
  try:
45
  requests.post(uri, headers=headers, json=payload, timeout=5)
@@ -50,20 +60,29 @@ def wake_endpoint(
50
  deadline = time.time() + max_wait
51
  while time.time() < deadline:
52
  try:
53
- r = requests.post(uri, headers=headers, json={"inputs": "ping"}, timeout=5)
54
  if r.ok:
55
  log("✅ Endpoint is awake and responsive.")
56
  return True, None
57
 
58
- # Common warmup statuses: throttle/warming
 
 
 
 
 
 
 
 
59
  if r.status_code in (429, 503, 504):
60
- log(f"Endpoint warming (HTTP {r.status_code}); retrying in {poll_every:.0f}s…")
61
  else:
62
- log(f"Unexpected response (HTTP {r.status_code}); retrying in {poll_every:.0f}s…")
63
 
64
  except requests.RequestException as e:
65
- log(f"{type(e).__name__}; retrying in {poll_every:.0f}s…")
 
66
 
67
  time.sleep(poll_every)
68
 
69
- return False, f"timed out after {max_wait}s waiting for endpoint"
 
16
  uri: Optional[str],
17
  *,
18
  token: Optional[str] = None,
19
+ max_wait: int = 600, # was 180 — bump to 10 minutes
20
  poll_every: float = 5.0,
21
  warm_payload: Optional[Dict[str, Any]] = None,
22
  log: Callable[[str], None] = lambda _: None,
23
  ) -> Tuple[bool, Optional[str]]:
24
  """
25
+ Wake a scale-to-zero HF Inference Endpoint by nudging it, then polling until ready.
26
+ Returns (True, None) if ready; otherwise (False, "<last status/message>").
 
 
 
 
 
 
27
  """
28
  if not _valid_uri(uri):
29
  return False, "invalid or missing URI (expect http(s)://...)"
 
33
  if tok:
34
  headers["Authorization"] = f"Bearer {tok}"
35
 
36
+ # 0) Try a quick health check first (cheap)
37
+ last_detail = "no response"
38
+ try:
39
+ hr = requests.get(f"{uri.rstrip('/')}/health", headers=headers, timeout=5)
40
+ if hr.ok:
41
+ log("✅ /health reports ready.")
42
+ return True, None
43
+ try:
44
+ last_detail = (hr.json().get("error") or hr.json().get("message")) # type: ignore
45
+ except Exception:
46
+ last_detail = (hr.text or "").strip()
47
+ log(f"[health] HTTP {hr.status_code} – {last_detail or 'warming?'}")
48
+ except requests.RequestException as e:
49
+ last_detail = type(e).__name__
50
+ log(f"[health] {last_detail}")
51
+
52
+ # 1) Initial nudge (ignore errors)
53
  payload = warm_payload if warm_payload is not None else {"inputs": "wake"}
54
  try:
55
  requests.post(uri, headers=headers, json=payload, timeout=5)
 
60
  deadline = time.time() + max_wait
61
  while time.time() < deadline:
62
  try:
63
+ r = requests.post(uri, headers=headers, json={"inputs": "ping"}, timeout=8)
64
  if r.ok:
65
  log("✅ Endpoint is awake and responsive.")
66
  return True, None
67
 
68
+ # extract any helpful server message
69
+ detail = ""
70
+ try:
71
+ data = r.json()
72
+ detail = data.get("error") or data.get("message") or ""
73
+ except ValueError:
74
+ detail = (r.text or "").strip()
75
+
76
+ last_detail = f"HTTP {r.status_code}" + (f" – {detail}" if detail else "")
77
  if r.status_code in (429, 503, 504):
78
+ log(f"[server] {detail or 'warming up'} (HTTP {r.status_code}); retrying in {int(poll_every)}s…")
79
  else:
80
+ log(f"[server] {detail or 'unexpected response'} (HTTP {r.status_code}); retrying in {int(poll_every)}s…")
81
 
82
  except requests.RequestException as e:
83
+ last_detail = type(e).__name__
84
+ log(f"[client] {last_detail}; retrying in {int(poll_every)}s…")
85
 
86
  time.sleep(poll_every)
87
 
88
+ return False, f"Timed out after {max_wait}s last status: {last_detail}"