Spaces:
Running
Running
revert
Browse files
py_backend/app/routers/caption.py
CHANGED
|
@@ -44,11 +44,7 @@ if settings.HF_API_KEY:
|
|
| 44 |
try:
|
| 45 |
models = crud.get_models(db)
|
| 46 |
for model in models:
|
| 47 |
-
|
| 48 |
-
if (model.provider == "huggingface" and
|
| 49 |
-
model.model_id and
|
| 50 |
-
model.m_code != "STUB_MODEL" and
|
| 51 |
-
model.m_code not in ["GPT-4O", "GEMINI15"]):
|
| 52 |
try:
|
| 53 |
service = ProvidersGenericVLMService(
|
| 54 |
api_key=settings.HF_API_KEY,
|
|
@@ -92,22 +88,30 @@ async def create_caption(
|
|
| 92 |
model_name: str | None = Form(None),
|
| 93 |
db: Session = Depends(get_db),
|
| 94 |
):
|
| 95 |
-
print(f"
|
| 96 |
-
print(f"📝 Caption Router: Requested model: {model_name}")
|
| 97 |
|
| 98 |
-
# Get the image
|
| 99 |
img = crud.get_image(db, image_id)
|
| 100 |
if not img:
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
|
|
|
| 105 |
prompt_obj = crud.get_prompt(db, prompt)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 106 |
if not prompt_obj:
|
| 107 |
-
print(f"❌ Caption Router: Prompt '{prompt}' not found")
|
| 108 |
raise HTTPException(400, f"Prompt '{prompt}' not found")
|
| 109 |
|
| 110 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 111 |
try:
|
| 112 |
if hasattr(storage, 's3') and settings.STORAGE_PROVIDER != "local":
|
| 113 |
response = storage.s3.get_object(
|
|
@@ -121,73 +125,75 @@ async def create_caption(
|
|
| 121 |
with open(file_path, 'rb') as f:
|
| 122 |
img_bytes = f.read()
|
| 123 |
except Exception as e:
|
| 124 |
-
print(f"
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
|
|
|
| 137 |
try:
|
| 138 |
result = await vlm_manager.generate_caption(
|
| 139 |
-
image_bytes=img_bytes,
|
| 140 |
-
prompt=
|
| 141 |
metadata_instructions=metadata_instructions,
|
| 142 |
model_name=model_name,
|
| 143 |
db_session=db,
|
| 144 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 145 |
except Exception as e:
|
| 146 |
-
print(f"
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
raw = result.get("raw_response", {})
|
| 153 |
-
|
| 154 |
-
# Validate and clean the data using schema validation
|
| 155 |
-
image_type = img.image_type
|
| 156 |
-
print(f"📝 Caption Router: Validating data for image type: {image_type}")
|
| 157 |
-
|
| 158 |
-
cleaned_data, is_valid, validation_error = schema_validator.clean_and_validate_data(raw, image_type)
|
| 159 |
-
|
| 160 |
-
if is_valid:
|
| 161 |
-
print(f"✅ Caption Router: Schema validation passed for {image_type}")
|
| 162 |
-
text = cleaned_data.get("analysis", "")
|
| 163 |
-
metadata = cleaned_data.get("metadata", {})
|
| 164 |
-
else:
|
| 165 |
-
print(f"⚠️ Caption Router: Schema validation failed for {image_type}: {validation_error}")
|
| 166 |
-
# Use fallback but log the validation error
|
| 167 |
-
text = result.get("caption", "This is a fallback caption due to schema validation error.")
|
| 168 |
-
metadata = result.get("metadata", {})
|
| 169 |
-
raw["validation_error"] = validation_error
|
| 170 |
-
raw["validation_failed"] = True
|
| 171 |
-
|
| 172 |
-
# Use the actual model that was used, not the requested model_name
|
| 173 |
-
used_model = result.get("model", model_name) or "STUB_MODEL"
|
| 174 |
-
|
| 175 |
-
# Check if fallback was used
|
| 176 |
-
fallback_used = result.get("fallback_used", False)
|
| 177 |
-
original_model = result.get("original_model", None)
|
| 178 |
-
fallback_reason = result.get("fallback_reason", None)
|
| 179 |
-
|
| 180 |
-
if fallback_used:
|
| 181 |
-
print(f"⚠️ Caption Router: Model fallback occurred: {original_model} -> {used_model} (reason: {fallback_reason})")
|
| 182 |
-
# Add fallback info to raw response for frontend
|
| 183 |
-
raw["fallback_info"] = {
|
| 184 |
-
"original_model": original_model,
|
| 185 |
-
"fallback_model": used_model,
|
| 186 |
-
"reason": fallback_reason
|
| 187 |
-
}
|
| 188 |
-
|
| 189 |
-
print(f"📝 Caption Router: Creating caption in database...")
|
| 190 |
-
|
| 191 |
c = crud.create_caption(
|
| 192 |
db,
|
| 193 |
image_id=image_id,
|
|
@@ -201,18 +207,23 @@ async def create_caption(
|
|
| 201 |
|
| 202 |
db.refresh(c)
|
| 203 |
|
| 204 |
-
print(f"
|
|
|
|
|
|
|
| 205 |
|
| 206 |
from .upload import convert_image_to_dict
|
| 207 |
try:
|
| 208 |
url = storage.get_object_url(c.file_key)
|
|
|
|
| 209 |
if url.startswith('/') and settings.STORAGE_PROVIDER == "local":
|
| 210 |
url = f"http://localhost:8000{url}"
|
|
|
|
| 211 |
except Exception as e:
|
|
|
|
| 212 |
url = f"/api/images/{c.image_id}/file"
|
|
|
|
| 213 |
|
| 214 |
img_dict = convert_image_to_dict(c, url)
|
| 215 |
-
print(f"📝 Caption Router: Caption generation completed successfully")
|
| 216 |
return schemas.ImageOut(**img_dict)
|
| 217 |
|
| 218 |
@router.get(
|
|
|
|
| 44 |
try:
|
| 45 |
models = crud.get_models(db)
|
| 46 |
for model in models:
|
| 47 |
+
if model.provider == "huggingface" and model.model_id and model.m_code != "STUB_MODEL":
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
try:
|
| 49 |
service = ProvidersGenericVLMService(
|
| 50 |
api_key=settings.HF_API_KEY,
|
|
|
|
| 88 |
model_name: str | None = Form(None),
|
| 89 |
db: Session = Depends(get_db),
|
| 90 |
):
|
| 91 |
+
print(f"DEBUG: Received request - image_id: {image_id}, title: {title}, prompt: {prompt}, model_name: {model_name}")
|
|
|
|
| 92 |
|
|
|
|
| 93 |
img = crud.get_image(db, image_id)
|
| 94 |
if not img:
|
| 95 |
+
raise HTTPException(404, "image not found")
|
| 96 |
+
|
| 97 |
+
|
| 98 |
+
print(f"Looking for prompt: '{prompt}' (type: {type(prompt)})")
|
| 99 |
+
|
| 100 |
prompt_obj = crud.get_prompt(db, prompt)
|
| 101 |
+
|
| 102 |
+
if not prompt_obj:
|
| 103 |
+
print(f"Prompt not found by code, trying to find by label...")
|
| 104 |
+
prompt_obj = crud.get_prompt_by_label(db, prompt)
|
| 105 |
+
|
| 106 |
+
print(f"Prompt lookup result: {prompt_obj}")
|
| 107 |
if not prompt_obj:
|
|
|
|
| 108 |
raise HTTPException(400, f"Prompt '{prompt}' not found")
|
| 109 |
|
| 110 |
+
prompt_text = prompt_obj.label
|
| 111 |
+
metadata_instructions = prompt_obj.metadata_instructions or ""
|
| 112 |
+
print(f"Using prompt text: '{prompt_text}'")
|
| 113 |
+
print(f"Using metadata instructions: '{metadata_instructions[:100]}...'")
|
| 114 |
+
|
| 115 |
try:
|
| 116 |
if hasattr(storage, 's3') and settings.STORAGE_PROVIDER != "local":
|
| 117 |
response = storage.s3.get_object(
|
|
|
|
| 125 |
with open(file_path, 'rb') as f:
|
| 126 |
img_bytes = f.read()
|
| 127 |
except Exception as e:
|
| 128 |
+
print(f"Error reading image file: {e}")
|
| 129 |
+
try:
|
| 130 |
+
url = storage.get_object_url(img.file_key)
|
| 131 |
+
if url.startswith('/') and settings.STORAGE_PROVIDER == "local":
|
| 132 |
+
url = f"http://localhost:8000{url}"
|
| 133 |
+
import requests
|
| 134 |
+
resp = requests.get(url)
|
| 135 |
+
resp.raise_for_status()
|
| 136 |
+
img_bytes = resp.content
|
| 137 |
+
except Exception as fallback_error:
|
| 138 |
+
print(f"Fallback also failed: {fallback_error}")
|
| 139 |
+
raise HTTPException(500, f"Could not read image file: {e}")
|
| 140 |
+
|
| 141 |
+
metadata = {}
|
| 142 |
try:
|
| 143 |
result = await vlm_manager.generate_caption(
|
| 144 |
+
image_bytes=img_bytes,
|
| 145 |
+
prompt=prompt_text,
|
| 146 |
metadata_instructions=metadata_instructions,
|
| 147 |
model_name=model_name,
|
| 148 |
db_session=db,
|
| 149 |
)
|
| 150 |
+
|
| 151 |
+
# Get the raw response for validation
|
| 152 |
+
raw = result.get("raw_response", {})
|
| 153 |
+
|
| 154 |
+
# Validate and clean the data using schema validation
|
| 155 |
+
image_type = img.image_type
|
| 156 |
+
print(f"DEBUG: Validating data for image type: {image_type}")
|
| 157 |
+
print(f"DEBUG: Raw data structure: {list(raw.keys()) if isinstance(raw, dict) else 'Not a dict'}")
|
| 158 |
+
|
| 159 |
+
cleaned_data, is_valid, validation_error = schema_validator.clean_and_validate_data(raw, image_type)
|
| 160 |
+
|
| 161 |
+
if is_valid:
|
| 162 |
+
print(f"✓ Schema validation passed for {image_type}")
|
| 163 |
+
text = cleaned_data.get("analysis", "")
|
| 164 |
+
metadata = cleaned_data.get("metadata", {})
|
| 165 |
+
else:
|
| 166 |
+
print(f"⚠ Schema validation failed for {image_type}: {validation_error}")
|
| 167 |
+
# Use fallback but log the validation error
|
| 168 |
+
text = result.get("caption", "This is a fallback caption due to schema validation error.")
|
| 169 |
+
metadata = result.get("metadata", {})
|
| 170 |
+
raw["validation_error"] = validation_error
|
| 171 |
+
raw["validation_failed"] = True
|
| 172 |
+
|
| 173 |
+
# Use the actual model that was used, not the requested model_name
|
| 174 |
+
used_model = result.get("model", model_name) or "STUB_MODEL"
|
| 175 |
+
|
| 176 |
+
# Check if fallback was used
|
| 177 |
+
fallback_used = result.get("fallback_used", False)
|
| 178 |
+
original_model = result.get("original_model", None)
|
| 179 |
+
fallback_reason = result.get("fallback_reason", None)
|
| 180 |
+
|
| 181 |
+
if fallback_used:
|
| 182 |
+
print(f"⚠ Model fallback occurred: {original_model} -> {used_model} (reason: {fallback_reason})")
|
| 183 |
+
# Add fallback info to raw response for frontend
|
| 184 |
+
raw["fallback_info"] = {
|
| 185 |
+
"original_model": original_model,
|
| 186 |
+
"fallback_model": used_model,
|
| 187 |
+
"reason": fallback_reason
|
| 188 |
+
}
|
| 189 |
+
|
| 190 |
except Exception as e:
|
| 191 |
+
print(f"VLM error, using fallback: {e}")
|
| 192 |
+
text = "This is a fallback caption due to VLM service error."
|
| 193 |
+
used_model = "STUB_MODEL"
|
| 194 |
+
raw = {"error": str(e), "fallback": True}
|
| 195 |
+
metadata = {}
|
| 196 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 197 |
c = crud.create_caption(
|
| 198 |
db,
|
| 199 |
image_id=image_id,
|
|
|
|
| 207 |
|
| 208 |
db.refresh(c)
|
| 209 |
|
| 210 |
+
print(f"DEBUG: Caption created, image object: {c}")
|
| 211 |
+
print(f"DEBUG: file_key: {c.file_key}")
|
| 212 |
+
print(f"DEBUG: image_id: {c.image_id}")
|
| 213 |
|
| 214 |
from .upload import convert_image_to_dict
|
| 215 |
try:
|
| 216 |
url = storage.get_object_url(c.file_key)
|
| 217 |
+
print(f"DEBUG: Generated URL: {url}")
|
| 218 |
if url.startswith('/') and settings.STORAGE_PROVIDER == "local":
|
| 219 |
url = f"http://localhost:8000{url}"
|
| 220 |
+
print(f"DEBUG: Local URL adjusted to: {url}")
|
| 221 |
except Exception as e:
|
| 222 |
+
print(f"DEBUG: URL generation failed: {e}")
|
| 223 |
url = f"/api/images/{c.image_id}/file"
|
| 224 |
+
print(f"DEBUG: Using fallback URL: {url}")
|
| 225 |
|
| 226 |
img_dict = convert_image_to_dict(c, url)
|
|
|
|
| 227 |
return schemas.ImageOut(**img_dict)
|
| 228 |
|
| 229 |
@router.get(
|
py_backend/app/services/gemini_service.py
CHANGED
|
@@ -20,10 +20,6 @@ class GeminiService(VLMService):
|
|
| 20 |
|
| 21 |
async def generate_caption(self, image_bytes: bytes, prompt: str, metadata_instructions: str = "") -> Dict[str, Any]:
|
| 22 |
"""Generate caption using Google Gemini Vision"""
|
| 23 |
-
print(f"🔍 Gemini: Starting caption generation for {len(image_bytes)} bytes")
|
| 24 |
-
print(f"🔍 Gemini: Prompt: {prompt[:100]}...")
|
| 25 |
-
print(f"🔍 Gemini: Metadata instructions: {metadata_instructions[:100]}...")
|
| 26 |
-
|
| 27 |
instruction = prompt + "\n\n" + metadata_instructions
|
| 28 |
|
| 29 |
image_part = {
|
|
@@ -32,70 +28,37 @@ class GeminiService(VLMService):
|
|
| 32 |
}
|
| 33 |
|
| 34 |
start = time.time()
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
print(f"🔍 Gemini: API call successful, response received")
|
| 41 |
-
content = getattr(response, "text", None) or ""
|
| 42 |
-
print(f"🔍 Gemini: Raw content length: {len(content)}")
|
| 43 |
-
print(f"🔍 Gemini: Content preview: {content[:200]}...")
|
| 44 |
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
except json.JSONDecodeError as e:
|
| 63 |
-
print(f"⚠️ Gemini: JSON parse error: {e}")
|
| 64 |
-
caption_text = content
|
| 65 |
-
metadata = {}
|
| 66 |
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
print(f"🔍 Gemini: Final metadata: {metadata}")
|
| 70 |
-
print(f"🔍 Gemini: Caption generation completed successfully in {elapsed:.2f}s")
|
| 71 |
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
except Exception as e:
|
| 81 |
-
error_msg = str(e)
|
| 82 |
-
error_type = type(e).__name__
|
| 83 |
-
print(f"❌ Gemini: Error occurred during caption generation")
|
| 84 |
-
print(f"❌ Gemini: Error type: {error_type}")
|
| 85 |
-
print(f"❌ Gemini: Error message: {error_msg}")
|
| 86 |
-
|
| 87 |
-
# Check for specific error types
|
| 88 |
-
if "quota" in error_msg.lower() or "limit" in error_msg.lower():
|
| 89 |
-
print(f"❌ Gemini: Quota or rate limit exceeded detected")
|
| 90 |
-
raise Exception(f"MODEL_UNAVAILABLE: GEMINI15 is currently unavailable (quota/rate limit exceeded). Switching to another model.")
|
| 91 |
-
elif "authentication" in error_msg.lower() or "invalid" in error_msg.lower() or "api_key" in error_msg.lower():
|
| 92 |
-
print(f"❌ Gemini: Authentication or API key error detected")
|
| 93 |
-
raise Exception(f"MODEL_UNAVAILABLE: GEMINI15 is currently unavailable (authentication error). Switching to another model.")
|
| 94 |
-
elif "timeout" in error_msg.lower() or "connection" in error_msg.lower():
|
| 95 |
-
print(f"❌ Gemini: Network timeout or connection error detected")
|
| 96 |
-
raise Exception(f"MODEL_UNAVAILABLE: GEMINI15 is currently unavailable (network error). Switching to another model.")
|
| 97 |
-
else:
|
| 98 |
-
print(f"❌ Gemini: Generic error, converting to MODEL_UNAVAILABLE")
|
| 99 |
-
raise Exception(f"MODEL_UNAVAILABLE: GEMINI15 is currently unavailable ({error_type}: {error_msg}). Switching to another model.")
|
| 100 |
|
| 101 |
|
|
|
|
| 20 |
|
| 21 |
async def generate_caption(self, image_bytes: bytes, prompt: str, metadata_instructions: str = "") -> Dict[str, Any]:
|
| 22 |
"""Generate caption using Google Gemini Vision"""
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
instruction = prompt + "\n\n" + metadata_instructions
|
| 24 |
|
| 25 |
image_part = {
|
|
|
|
| 28 |
}
|
| 29 |
|
| 30 |
start = time.time()
|
| 31 |
+
response = await asyncio.to_thread(self.model.generate_content, [instruction, image_part])
|
| 32 |
+
elapsed = time.time() - start
|
| 33 |
+
|
| 34 |
+
content = getattr(response, "text", None) or ""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
|
| 36 |
+
cleaned_content = content
|
| 37 |
+
if cleaned_content.startswith("```json"):
|
| 38 |
+
cleaned_content = re.sub(r"^```json\s*", "", cleaned_content)
|
| 39 |
+
cleaned_content = re.sub(r"\s*```$", "", cleaned_content)
|
| 40 |
|
| 41 |
+
try:
|
| 42 |
+
parsed = json.loads(cleaned_content)
|
| 43 |
+
caption_text = parsed.get("analysis", content)
|
| 44 |
+
metadata = parsed.get("metadata", {})
|
| 45 |
+
if metadata.get("epsg"):
|
| 46 |
+
epsg_value = metadata["epsg"]
|
| 47 |
+
allowed_epsg = ["4326", "3857", "32617", "32633", "32634", "OTHER"]
|
| 48 |
+
if epsg_value not in allowed_epsg:
|
| 49 |
+
metadata["epsg"] = "OTHER"
|
| 50 |
+
except json.JSONDecodeError:
|
| 51 |
+
caption_text = content
|
| 52 |
+
metadata = {}
|
|
|
|
|
|
|
|
|
|
|
|
|
| 53 |
|
| 54 |
+
raw_response: Dict[str, Any] = {"model": self.model_id}
|
|
|
|
|
|
|
|
|
|
| 55 |
|
| 56 |
+
return {
|
| 57 |
+
"caption": caption_text,
|
| 58 |
+
"metadata": metadata,
|
| 59 |
+
"confidence": None,
|
| 60 |
+
"processing_time": elapsed,
|
| 61 |
+
"raw_response": raw_response,
|
| 62 |
+
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 63 |
|
| 64 |
|
py_backend/app/services/gpt4v_service.py
CHANGED
|
@@ -15,15 +15,9 @@ class GPT4VService(VLMService):
|
|
| 15 |
|
| 16 |
async def generate_caption(self, image_bytes: bytes, prompt: str, metadata_instructions: str = "") -> Dict[str, Any]:
|
| 17 |
"""Generate caption using GPT-4 Vision"""
|
| 18 |
-
print(f"🔍 GPT-4V: Starting caption generation for {len(image_bytes)} bytes")
|
| 19 |
-
print(f"🔍 GPT-4V: Prompt: {prompt[:100]}...")
|
| 20 |
-
print(f"🔍 GPT-4V: Metadata instructions: {metadata_instructions[:100]}...")
|
| 21 |
-
|
| 22 |
try:
|
| 23 |
image_base64 = base64.b64encode(image_bytes).decode('utf-8')
|
| 24 |
-
print(f"🔍 GPT-4V: Image encoded to base64, length: {len(image_base64)}")
|
| 25 |
|
| 26 |
-
print(f"🔍 GPT-4V: Calling OpenAI API with model: gpt-4o")
|
| 27 |
response = await asyncio.to_thread(
|
| 28 |
self.client.chat.completions.create,
|
| 29 |
model="gpt-4o",
|
|
@@ -44,10 +38,7 @@ class GPT4VService(VLMService):
|
|
| 44 |
max_tokens=800
|
| 45 |
)
|
| 46 |
|
| 47 |
-
print(f"🔍 GPT-4V: API call successful, response received")
|
| 48 |
content = response.choices[0].message.content
|
| 49 |
-
print(f"🔍 GPT-4V: Raw content length: {len(content)}")
|
| 50 |
-
print(f"🔍 GPT-4V: Content preview: {content[:200]}...")
|
| 51 |
|
| 52 |
cleaned_content = content.strip()
|
| 53 |
if cleaned_content.startswith("```json"):
|
|
@@ -59,9 +50,7 @@ class GPT4VService(VLMService):
|
|
| 59 |
metadata = {}
|
| 60 |
try:
|
| 61 |
metadata = json.loads(cleaned_content)
|
| 62 |
-
|
| 63 |
-
except json.JSONDecodeError as e:
|
| 64 |
-
print(f"⚠️ GPT-4V: JSON parse error: {e}")
|
| 65 |
if "```json" in content:
|
| 66 |
json_start = content.find("```json") + 7
|
| 67 |
json_end = content.find("```", json_start)
|
|
@@ -69,21 +58,16 @@ class GPT4VService(VLMService):
|
|
| 69 |
json_str = content[json_start:json_end].strip()
|
| 70 |
try:
|
| 71 |
metadata = json.loads(json_str)
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
print(f"⚠️ GPT-4V: Code block JSON parse also failed: {e2}")
|
| 75 |
else:
|
| 76 |
import re
|
| 77 |
json_match = re.search(r'\{[^{}]*"metadata"[^{}]*\{[^{}]*\}', content)
|
| 78 |
if json_match:
|
| 79 |
try:
|
| 80 |
metadata = json.loads(json_match.group())
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
print(f"⚠️ GPT-4V: Regex JSON extraction failed: {e3}")
|
| 84 |
-
|
| 85 |
-
print(f"🔍 GPT-4V: Final metadata: {metadata}")
|
| 86 |
-
print(f"🔍 GPT-4V: Caption generation completed successfully")
|
| 87 |
|
| 88 |
return {
|
| 89 |
"caption": cleaned_content,
|
|
@@ -96,22 +80,4 @@ class GPT4VService(VLMService):
|
|
| 96 |
}
|
| 97 |
|
| 98 |
except Exception as e:
|
| 99 |
-
|
| 100 |
-
error_type = type(e).__name__
|
| 101 |
-
print(f"❌ GPT-4V: Error occurred during caption generation")
|
| 102 |
-
print(f"❌ GPT-4V: Error type: {error_type}")
|
| 103 |
-
print(f"❌ GPT-4V: Error message: {error_msg}")
|
| 104 |
-
|
| 105 |
-
# Check for specific error types
|
| 106 |
-
if "rate_limit" in error_msg.lower() or "quota" in error_msg.lower():
|
| 107 |
-
print(f"❌ GPT-4V: Rate limit or quota exceeded detected")
|
| 108 |
-
raise Exception(f"MODEL_UNAVAILABLE: GPT-4O is currently unavailable (rate limit/quota exceeded). Switching to another model.")
|
| 109 |
-
elif "authentication" in error_msg.lower() or "invalid" in error_msg.lower() or "api_key" in error_msg.lower():
|
| 110 |
-
print(f"❌ GPT-4V: Authentication or API key error detected")
|
| 111 |
-
raise Exception(f"MODEL_UNAVAILABLE: GPT-4O is currently unavailable (authentication error). Switching to another model.")
|
| 112 |
-
elif "timeout" in error_msg.lower() or "connection" in error_msg.lower():
|
| 113 |
-
print(f"❌ GPT-4V: Network timeout or connection error detected")
|
| 114 |
-
raise Exception(f"MODEL_UNAVAILABLE: GPT-4O is currently unavailable (network error). Switching to another model.")
|
| 115 |
-
else:
|
| 116 |
-
print(f"❌ GPT-4V: Generic error, converting to MODEL_UNAVAILABLE")
|
| 117 |
-
raise Exception(f"MODEL_UNAVAILABLE: GPT-4O is currently unavailable ({error_type}: {error_msg}). Switching to another model.")
|
|
|
|
| 15 |
|
| 16 |
async def generate_caption(self, image_bytes: bytes, prompt: str, metadata_instructions: str = "") -> Dict[str, Any]:
|
| 17 |
"""Generate caption using GPT-4 Vision"""
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
try:
|
| 19 |
image_base64 = base64.b64encode(image_bytes).decode('utf-8')
|
|
|
|
| 20 |
|
|
|
|
| 21 |
response = await asyncio.to_thread(
|
| 22 |
self.client.chat.completions.create,
|
| 23 |
model="gpt-4o",
|
|
|
|
| 38 |
max_tokens=800
|
| 39 |
)
|
| 40 |
|
|
|
|
| 41 |
content = response.choices[0].message.content
|
|
|
|
|
|
|
| 42 |
|
| 43 |
cleaned_content = content.strip()
|
| 44 |
if cleaned_content.startswith("```json"):
|
|
|
|
| 50 |
metadata = {}
|
| 51 |
try:
|
| 52 |
metadata = json.loads(cleaned_content)
|
| 53 |
+
except json.JSONDecodeError:
|
|
|
|
|
|
|
| 54 |
if "```json" in content:
|
| 55 |
json_start = content.find("```json") + 7
|
| 56 |
json_end = content.find("```", json_start)
|
|
|
|
| 58 |
json_str = content[json_start:json_end].strip()
|
| 59 |
try:
|
| 60 |
metadata = json.loads(json_str)
|
| 61 |
+
except json.JSONDecodeError as e:
|
| 62 |
+
print(f"JSON parse error: {e}")
|
|
|
|
| 63 |
else:
|
| 64 |
import re
|
| 65 |
json_match = re.search(r'\{[^{}]*"metadata"[^{}]*\{[^{}]*\}', content)
|
| 66 |
if json_match:
|
| 67 |
try:
|
| 68 |
metadata = json.loads(json_match.group())
|
| 69 |
+
except json.JSONDecodeError:
|
| 70 |
+
pass
|
|
|
|
|
|
|
|
|
|
|
|
|
| 71 |
|
| 72 |
return {
|
| 73 |
"caption": cleaned_content,
|
|
|
|
| 80 |
}
|
| 81 |
|
| 82 |
except Exception as e:
|
| 83 |
+
raise Exception(f"GPT-4 Vision API error: {str(e)}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
py_backend/app/services/vlm_service.py
CHANGED
|
@@ -63,37 +63,64 @@ class VLMServiceManager:
|
|
| 63 |
return list(self.services.keys())
|
| 64 |
|
| 65 |
async def generate_caption(self, image_bytes: bytes, prompt: str, metadata_instructions: str = "", model_name: str | None = None, db_session = None) -> dict:
|
| 66 |
-
"""Generate caption using
|
| 67 |
|
| 68 |
-
# Select initial service
|
| 69 |
service = None
|
| 70 |
-
if model_name:
|
| 71 |
service = self.services.get(model_name)
|
|
|
|
|
|
|
| 72 |
|
| 73 |
-
if not service:
|
|
|
|
| 74 |
if db_session:
|
|
|
|
| 75 |
try:
|
| 76 |
from .. import crud
|
| 77 |
available_models = crud.get_models(db_session)
|
| 78 |
available_model_codes = [m.m_code for m in available_models if m.is_available]
|
| 79 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 80 |
available_services = [s for s in self.services.values() if s.model_name in available_model_codes]
|
|
|
|
|
|
|
|
|
|
| 81 |
if available_services:
|
| 82 |
import random
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 83 |
shuffled_services = available_services.copy()
|
| 84 |
random.shuffle(shuffled_services)
|
|
|
|
| 85 |
service = shuffled_services[0]
|
|
|
|
|
|
|
|
|
|
| 86 |
else:
|
|
|
|
| 87 |
service = next(iter(self.services.values()))
|
|
|
|
| 88 |
except Exception as e:
|
|
|
|
| 89 |
service = next(iter(self.services.values()))
|
|
|
|
| 90 |
else:
|
|
|
|
| 91 |
available_services = [s for s in self.services.values() if s.is_available]
|
| 92 |
if available_services:
|
| 93 |
import random
|
| 94 |
service = random.choice(available_services)
|
|
|
|
| 95 |
else:
|
|
|
|
| 96 |
service = next(iter(self.services.values()))
|
|
|
|
| 97 |
|
| 98 |
if not service:
|
| 99 |
raise ValueError("No VLM services available")
|
|
@@ -114,10 +141,12 @@ class VLMServiceManager:
|
|
| 114 |
return result
|
| 115 |
except Exception as e:
|
| 116 |
error_str = str(e)
|
|
|
|
| 117 |
|
| 118 |
# Check if it's a model unavailable error (any type of error)
|
| 119 |
if "MODEL_UNAVAILABLE" in error_str:
|
| 120 |
attempted_services.add(service.model_name)
|
|
|
|
| 121 |
|
| 122 |
# Try to find another available service
|
| 123 |
if db_session:
|
|
@@ -131,24 +160,29 @@ class VLMServiceManager:
|
|
| 131 |
if (next_service.model_name in available_model_codes and
|
| 132 |
next_service.model_name not in attempted_services):
|
| 133 |
service = next_service
|
|
|
|
| 134 |
break
|
| 135 |
else:
|
| 136 |
# No more available services, use any untried service
|
| 137 |
for next_service in self.services.values():
|
| 138 |
if next_service.model_name not in attempted_services:
|
| 139 |
service = next_service
|
|
|
|
| 140 |
break
|
| 141 |
except Exception as db_error:
|
|
|
|
| 142 |
# Fallback to any untried service
|
| 143 |
for next_service in self.services.values():
|
| 144 |
if next_service.model_name not in attempted_services:
|
| 145 |
service = next_service
|
|
|
|
| 146 |
break
|
| 147 |
else:
|
| 148 |
# No database session, use any untried service
|
| 149 |
for next_service in self.services.values():
|
| 150 |
if next_service.model_name not in attempted_services:
|
| 151 |
service = next_service
|
|
|
|
| 152 |
break
|
| 153 |
|
| 154 |
if not service:
|
|
@@ -157,6 +191,7 @@ class VLMServiceManager:
|
|
| 157 |
continue # Try again with new service
|
| 158 |
else:
|
| 159 |
# Non-model-unavailable error, don't retry
|
|
|
|
| 160 |
raise
|
| 161 |
|
| 162 |
# If we get here, we've tried all services
|
|
|
|
| 63 |
return list(self.services.keys())
|
| 64 |
|
| 65 |
async def generate_caption(self, image_bytes: bytes, prompt: str, metadata_instructions: str = "", model_name: str | None = None, db_session = None) -> dict:
|
| 66 |
+
"""Generate caption using the specified model or fallback to available service."""
|
| 67 |
|
|
|
|
| 68 |
service = None
|
| 69 |
+
if model_name and model_name != "random":
|
| 70 |
service = self.services.get(model_name)
|
| 71 |
+
if not service:
|
| 72 |
+
print(f"Model '{model_name}' not found, using fallback")
|
| 73 |
|
| 74 |
+
if not service and self.services:
|
| 75 |
+
# If random is selected or no specific model, choose a random available service
|
| 76 |
if db_session:
|
| 77 |
+
# Check database availability for random selection
|
| 78 |
try:
|
| 79 |
from .. import crud
|
| 80 |
available_models = crud.get_models(db_session)
|
| 81 |
available_model_codes = [m.m_code for m in available_models if m.is_available]
|
| 82 |
|
| 83 |
+
print(f"DEBUG: Available models in database: {available_model_codes}")
|
| 84 |
+
print(f"DEBUG: Registered services: {list(self.services.keys())}")
|
| 85 |
+
|
| 86 |
+
# Filter services to only those marked as available in database
|
| 87 |
available_services = [s for s in self.services.values() if s.model_name in available_model_codes]
|
| 88 |
+
|
| 89 |
+
print(f"DEBUG: Available services after filtering: {[s.model_name for s in available_services]}")
|
| 90 |
+
|
| 91 |
if available_services:
|
| 92 |
import random
|
| 93 |
+
import time
|
| 94 |
+
# Use current time as seed for better randomness
|
| 95 |
+
random.seed(int(time.time() * 1000000) % 1000000)
|
| 96 |
+
|
| 97 |
+
# Shuffle the list first for better randomization
|
| 98 |
shuffled_services = available_services.copy()
|
| 99 |
random.shuffle(shuffled_services)
|
| 100 |
+
|
| 101 |
service = shuffled_services[0]
|
| 102 |
+
print(f"Randomly selected service: {service.model_name} (from {len(available_services)} available)")
|
| 103 |
+
print(f"DEBUG: All available services were: {[s.model_name for s in available_services]}")
|
| 104 |
+
print(f"DEBUG: Shuffled order: {[s.model_name for s in shuffled_services]}")
|
| 105 |
else:
|
| 106 |
+
# Fallback to any service
|
| 107 |
service = next(iter(self.services.values()))
|
| 108 |
+
print(f"Using fallback service: {service.model_name}")
|
| 109 |
except Exception as e:
|
| 110 |
+
print(f"Error checking database availability: {e}, using fallback")
|
| 111 |
service = next(iter(self.services.values()))
|
| 112 |
+
print(f"Using fallback service: {service.model_name}")
|
| 113 |
else:
|
| 114 |
+
# No database session, use service property
|
| 115 |
available_services = [s for s in self.services.values() if s.is_available]
|
| 116 |
if available_services:
|
| 117 |
import random
|
| 118 |
service = random.choice(available_services)
|
| 119 |
+
print(f"Randomly selected service: {service.model_name}")
|
| 120 |
else:
|
| 121 |
+
# Fallback to any service
|
| 122 |
service = next(iter(self.services.values()))
|
| 123 |
+
print(f"Using fallback service: {service.model_name}")
|
| 124 |
|
| 125 |
if not service:
|
| 126 |
raise ValueError("No VLM services available")
|
|
|
|
| 141 |
return result
|
| 142 |
except Exception as e:
|
| 143 |
error_str = str(e)
|
| 144 |
+
print(f"Error with service {service.model_name}: {error_str}")
|
| 145 |
|
| 146 |
# Check if it's a model unavailable error (any type of error)
|
| 147 |
if "MODEL_UNAVAILABLE" in error_str:
|
| 148 |
attempted_services.add(service.model_name)
|
| 149 |
+
print(f"Model {service.model_name} is unavailable, trying another service...")
|
| 150 |
|
| 151 |
# Try to find another available service
|
| 152 |
if db_session:
|
|
|
|
| 160 |
if (next_service.model_name in available_model_codes and
|
| 161 |
next_service.model_name not in attempted_services):
|
| 162 |
service = next_service
|
| 163 |
+
print(f"Switching to fallback service: {service.model_name}")
|
| 164 |
break
|
| 165 |
else:
|
| 166 |
# No more available services, use any untried service
|
| 167 |
for next_service in self.services.values():
|
| 168 |
if next_service.model_name not in attempted_services:
|
| 169 |
service = next_service
|
| 170 |
+
print(f"Using untried service as fallback: {service.model_name}")
|
| 171 |
break
|
| 172 |
except Exception as db_error:
|
| 173 |
+
print(f"Error checking database availability: {db_error}")
|
| 174 |
# Fallback to any untried service
|
| 175 |
for next_service in self.services.values():
|
| 176 |
if next_service.model_name not in attempted_services:
|
| 177 |
service = next_service
|
| 178 |
+
print(f"Using untried service as fallback: {service.model_name}")
|
| 179 |
break
|
| 180 |
else:
|
| 181 |
# No database session, use any untried service
|
| 182 |
for next_service in self.services.values():
|
| 183 |
if next_service.model_name not in attempted_services:
|
| 184 |
service = next_service
|
| 185 |
+
print(f"Using untried service as fallback: {service.model_name}")
|
| 186 |
break
|
| 187 |
|
| 188 |
if not service:
|
|
|
|
| 191 |
continue # Try again with new service
|
| 192 |
else:
|
| 193 |
# Non-model-unavailable error, don't retry
|
| 194 |
+
print(f"Non-model-unavailable error, not retrying: {error_str}")
|
| 195 |
raise
|
| 196 |
|
| 197 |
# If we get here, we've tried all services
|