Spaces:
Running
Running
File size: 11,765 Bytes
186c8e8 d7291ef 1686de5 186c8e8 d7291ef 186c8e8 d7291ef 186c8e8 1686de5 186c8e8 d7291ef 186c8e8 d7291ef 186c8e8 1686de5 186c8e8 d7291ef 186c8e8 d7291ef 1686de5 186c8e8 d7291ef 186c8e8 d7291ef 1c65d0e d7291ef 186c8e8 d7291ef 186c8e8 872dec2 186c8e8 872dec2 186c8e8 872dec2 186c8e8 872dec2 186c8e8 872dec2 186c8e8 872dec2 186c8e8 d7291ef 186c8e8 833c993 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 |
# services/huggingface_service.py
from .vlm_service import VLMService, ModelType
from typing import Dict, Any
import aiohttp
import base64
import json
import time
import re
import imghdr
class HuggingFaceService(VLMService):
"""
Hugging Face Inference Providers (OpenAI-compatible) service.
This class speaks to https://router.huggingface.co/v1/chat/completions
so you can call many VLMs with the same payload shape.
"""
def __init__(self, api_key: str, model_id: str = "Qwen/Qwen2.5-VL-7B-Instruct"):
super().__init__(f"HF_{model_id.replace('/', '_')}", ModelType.CUSTOM)
self.api_key = api_key
self.model_id = model_id
self.providers_url = "https://router.huggingface.co/v1/chat/completions"
def _guess_mime(self, image_bytes: bytes) -> str:
kind = imghdr.what(None, h=image_bytes)
if kind == "png":
return "image/png"
if kind in ("jpg", "jpeg"):
return "image/jpeg"
if kind == "webp":
return "image/webp"
return "image/jpeg"
async def generate_caption(
self,
image_bytes: bytes,
prompt: str,
metadata_instructions: str = "",
) -> Dict[str, Any]:
"""
Generate caption using HF Inference Providers (OpenAI-style).
"""
start_time = time.time()
instruction = (prompt or "").strip()
if metadata_instructions:
instruction += "\n\n" + metadata_instructions.strip()
mime = self._guess_mime(image_bytes)
data_url = f"data:{mime};base64,{base64.b64encode(image_bytes).decode('utf-8')}"
headers = {
"Authorization": f"Bearer {self.api_key}",
"Content-Type": "application/json",
}
# OpenAI-compatible chat payload with one text + one image block.
payload = {
"model": self.model_id,
"messages": [
{
"role": "user",
"content": [
{"type": "text", "text": instruction},
{"type": "image_url", "image_url": {"url": data_url}},
],
}
],
"max_tokens": 512,
"temperature": 0.2,
}
try:
async with aiohttp.ClientSession() as session:
async with session.post(
self.providers_url,
headers=headers,
json=payload,
timeout=aiohttp.ClientTimeout(total=180),
) as resp:
raw_text = await resp.text()
if resp.status != 200:
# Any non-200 status - throw generic error for fallback handling
raise Exception(f"MODEL_UNAVAILABLE: {self.model_name} is currently unavailable (HTTP {resp.status}). Switching to another model.")
result = await resp.json()
except Exception as e:
if "MODEL_UNAVAILABLE" in str(e):
raise # Re-raise model unavailable exceptions as-is
# Catch any other errors (network, timeout, parsing, etc.) and treat as model unavailable
raise Exception(f"MODEL_UNAVAILABLE: {self.model_name} is currently unavailable due to an error. Switching to another model.")
# Extract model output (string or list-of-blocks)
message = (result.get("choices") or [{}])[0].get("message", {})
content = message.get("content", "")
# GLM models sometimes put content in reasoning_content field
if not content and message.get("reasoning_content"):
content = message.get("reasoning_content", "")
if isinstance(content, list):
# Some providers may return a list of output blocks (e.g., {"type":"output_text","text":...})
parts = []
for block in content:
if isinstance(block, dict):
parts.append(block.get("text") or block.get("content") or "")
else:
parts.append(str(block))
content = "\n".join([p for p in parts if p])
caption = content or ""
cleaned = caption.strip()
# Strip accidental fenced JSON
if cleaned.startswith("```json"):
cleaned = re.sub(r"^```json\s*", "", cleaned)
cleaned = re.sub(r"\s*```$", "", cleaned)
# Best-effort JSON protocol
metadata = {}
description = ""
analysis = cleaned
recommended_actions = ""
try:
parsed = json.loads(cleaned)
description = parsed.get("description", "")
analysis = parsed.get("analysis", cleaned)
recommended_actions = parsed.get("recommended_actions", "")
metadata = parsed.get("metadata", {}) or {}
except json.JSONDecodeError:
# If not JSON, try to extract metadata from GLM thinking format
if "<think>" in cleaned:
analysis, metadata = self._extract_glm_metadata(cleaned)
else:
# Fallback: try to extract any structured information
analysis = cleaned
metadata = {}
# Combine all three parts for backward compatibility
caption_text = f"Description: {description}\n\nAnalysis: {analysis}\n\nRecommended Actions: {recommended_actions}"
# Validate and clean metadata fields with sensible defaults
if isinstance(metadata, dict):
# Clean EPSG - default to "OTHER" if not in allowed values
if metadata.get("epsg"):
allowed = {"4326", "3857", "32617", "32633", "32634", "OTHER"}
if str(metadata["epsg"]) not in allowed:
metadata["epsg"] = "OTHER"
else:
metadata["epsg"] = "OTHER" # Default when missing
# Clean source - default to "OTHER" if not recognized
if metadata.get("source"):
allowed_sources = {"PDC", "GDACS", "WFP", "GFH", "GGC", "USGS", "OTHER"}
if str(metadata["source"]).upper() not in allowed_sources:
metadata["source"] = "OTHER"
else:
metadata["source"] = "OTHER"
# Clean event type - default to "OTHER" if not recognized
if metadata.get("type"):
allowed_types = {"BIOLOGICAL_EMERGENCY", "CHEMICAL_EMERGENCY", "CIVIL_UNREST",
"COLD_WAVE", "COMPLEX_EMERGENCY", "CYCLONE", "DROUGHT", "EARTHQUAKE",
"EPIDEMIC", "FIRE", "FLOOD", "FLOOD_INSECURITY", "HEAT_WAVE",
"INSECT_INFESTATION", "LANDSLIDE", "OTHER", "PLUVIAL",
"POPULATION_MOVEMENT", "RADIOLOGICAL_EMERGENCY", "STORM",
"TRANSPORTATION_EMERGENCY", "TSUNAMI", "VOLCANIC_ERUPTION"}
if str(metadata["type"]).upper() not in allowed_types:
metadata["type"] = "OTHER"
else:
metadata["type"] = "OTHER"
# Ensure countries is always a list
if not metadata.get("countries") or not isinstance(metadata.get("countries"), list):
metadata["countries"] = []
elapsed = time.time() - start_time
return {
"caption": caption_text,
"metadata": metadata,
"confidence": None,
"processing_time": elapsed,
"raw_response": {
"model": self.model_id,
"response": result,
"parsed_successfully": bool(metadata),
},
"description": description,
"analysis": analysis,
"recommended_actions": recommended_actions
}
def _extract_glm_metadata(self, content: str) -> tuple[str, dict]:
"""
Extract metadata from GLM thinking format using simple, robust patterns.
Focus on extracting what we can and rely on defaults for the rest.
"""
# Remove <think> tags
content = re.sub(r'<think>|</think>', '', content)
metadata = {}
# Simple extraction - just look for key patterns, don't overthink it
# Title: Look for quoted strings after "Maybe" or "Title"
title_match = re.search(r'(?:Maybe|Title).*?["\']([^"\']{5,50})["\']', content, re.IGNORECASE)
if title_match:
metadata["title"] = title_match.group(1).strip()
# Source: Look for common source names (WFP, PDC, etc.)
source_match = re.search(r'\b(WFP|PDC|GDACS|GFH|GGC|USGS)\b', content, re.IGNORECASE)
if source_match:
metadata["source"] = source_match.group(1).upper()
# Type: Look for disaster types
disaster_types = ["EARTHQUAKE", "FLOOD", "CYCLONE", "DROUGHT", "FIRE", "STORM", "TSUNAMI", "VOLCANIC"]
for disaster_type in disaster_types:
if re.search(rf'\b{disaster_type}\b', content, re.IGNORECASE):
metadata["type"] = disaster_type
break
# Countries: Look for 2-letter country codes
country_matches = re.findall(r'\b([A-Z]{2})\b', content)
valid_countries = []
for match in country_matches:
# Basic validation - exclude common false positives
if match not in ["SO", "IS", "OR", "IN", "ON", "TO", "OF", "AT", "BY", "NO", "GO", "UP", "US"]:
valid_countries.append(match)
if valid_countries:
metadata["countries"] = list(set(valid_countries)) # Remove duplicates
# EPSG: Look for 4-digit numbers that could be EPSG codes
epsg_match = re.search(r'\b(4326|3857|32617|32633|32634)\b', content)
if epsg_match:
metadata["epsg"] = epsg_match.group(1)
# For caption, just use the first part before metadata discussion
lines = content.split('\n')
caption_lines = []
for line in lines:
if any(keyword in line.lower() for keyword in ['metadata:', 'now for the metadata', 'let me double-check']):
break
caption_lines.append(line)
caption_text = '\n'.join(caption_lines).strip()
if not caption_text:
caption_text = content
return caption_text, metadata
# --- Generic Model Wrapper for Dynamic Registration ---
class ProvidersGenericVLMService(HuggingFaceService):
"""
Generic wrapper so you can register ANY Providers VLM by model_id from config.
Example:
ProvidersGenericVLMService(HF_TOKEN, "Qwen/Qwen2.5-VL-32B-Instruct", "QWEN2_5_VL_32B")
"""
def __init__(self, api_key: str, model_id: str, public_name: str | None = None):
super().__init__(api_key, model_id)
# Use a human-friendly stable name that your UI/DB will reference
self.model_name = public_name or model_id.replace("/", "_").upper()
self.model_type = ModelType.CUSTOM
class ProvidersGenericVLMService(HuggingFaceService):
"""
Generic wrapper so you can register ANY Providers VLM by model_id from config.
Example:
ProvidersGenericVLMService(HF_TOKEN, "Qwen/Qwen2.5-VL-32B-Instruct", "QWEN2_5_VL_32B")
"""
def __init__(self, api_key: str, model_id: str, public_name: str | None = None):
super().__init__(api_key, model_id)
# Use a human-friendly stable name that your UI/DB will reference
self.model_name = public_name or model_id.replace("/", "_").upper()
self.model_type = ModelType.CUSTOM
|