Spaces:
Sleeping
Sleeping
Create endpoint_utils.py
Browse files- endpoint_utils.py +69 -0
endpoint_utils.py
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# endpoint_utils.py
|
| 2 |
+
from __future__ import annotations
|
| 3 |
+
from typing import Optional, Tuple, Callable, Dict, Any
|
| 4 |
+
from urllib.parse import urlparse
|
| 5 |
+
import os, time, requests
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
def _valid_uri(uri: Optional[str]) -> bool:
|
| 9 |
+
if not uri:
|
| 10 |
+
return False
|
| 11 |
+
p = urlparse(uri)
|
| 12 |
+
return p.scheme in {"http", "https"} and bool(p.netloc)
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
def wake_endpoint(
|
| 16 |
+
uri: Optional[str],
|
| 17 |
+
*,
|
| 18 |
+
token: Optional[str] = None,
|
| 19 |
+
max_wait: int = 180,
|
| 20 |
+
poll_every: float = 5.0,
|
| 21 |
+
warm_payload: Optional[Dict[str, Any]] = None,
|
| 22 |
+
log: Callable[[str], None] = lambda _: None,
|
| 23 |
+
) -> Tuple[bool, Optional[str]]:
|
| 24 |
+
"""
|
| 25 |
+
Nudge a scale-to-zero Hugging Face Inference Endpoint and poll until it responds.
|
| 26 |
+
|
| 27 |
+
Returns:
|
| 28 |
+
(True, None) on success, or (False, "reason") on timeout / invalid input.
|
| 29 |
+
|
| 30 |
+
Notes:
|
| 31 |
+
- Expects endpoints that accept POST JSON bodies like {"inputs": "..."}.
|
| 32 |
+
- Treats 429/503/504 as "warming" signals while polling.
|
| 33 |
+
"""
|
| 34 |
+
if not _valid_uri(uri):
|
| 35 |
+
return False, "invalid or missing URI (expect http(s)://...)"
|
| 36 |
+
|
| 37 |
+
headers: Dict[str, str] = {}
|
| 38 |
+
tok = token or os.environ.get("HF_TOKEN")
|
| 39 |
+
if tok:
|
| 40 |
+
headers["Authorization"] = f"Bearer {tok}"
|
| 41 |
+
|
| 42 |
+
# 1) Initial nudge (ignore any errors)
|
| 43 |
+
payload = warm_payload if warm_payload is not None else {"inputs": "wake"}
|
| 44 |
+
try:
|
| 45 |
+
requests.post(uri, headers=headers, json=payload, timeout=5)
|
| 46 |
+
except requests.RequestException:
|
| 47 |
+
pass
|
| 48 |
+
|
| 49 |
+
# 2) Poll until healthy or timeout
|
| 50 |
+
deadline = time.time() + max_wait
|
| 51 |
+
while time.time() < deadline:
|
| 52 |
+
try:
|
| 53 |
+
r = requests.post(uri, headers=headers, json={"inputs": "ping"}, timeout=5)
|
| 54 |
+
if r.ok:
|
| 55 |
+
log("✅ Endpoint is awake and responsive.")
|
| 56 |
+
return True, None
|
| 57 |
+
|
| 58 |
+
# Common warmup statuses: throttle/warming
|
| 59 |
+
if r.status_code in (429, 503, 504):
|
| 60 |
+
log(f"Endpoint warming (HTTP {r.status_code}); retrying in {poll_every:.0f}s…")
|
| 61 |
+
else:
|
| 62 |
+
log(f"Unexpected response (HTTP {r.status_code}); retrying in {poll_every:.0f}s…")
|
| 63 |
+
|
| 64 |
+
except requests.RequestException as e:
|
| 65 |
+
log(f"{type(e).__name__}; retrying in {poll_every:.0f}s…")
|
| 66 |
+
|
| 67 |
+
time.sleep(poll_every)
|
| 68 |
+
|
| 69 |
+
return False, f"timed out after {max_wait}s waiting for endpoint"
|