SCGR commited on
Commit
5778774
·
1 Parent(s): 4c43a48
py_backend/app/routers/caption.py CHANGED
@@ -44,11 +44,7 @@ if settings.HF_API_KEY:
44
  try:
45
  models = crud.get_models(db)
46
  for model in models:
47
-
48
- if (model.provider == "huggingface" and
49
- model.model_id and
50
- model.m_code != "STUB_MODEL" and
51
- model.m_code not in ["GPT-4O", "GEMINI15"]):
52
  try:
53
  service = ProvidersGenericVLMService(
54
  api_key=settings.HF_API_KEY,
@@ -92,22 +88,30 @@ async def create_caption(
92
  model_name: str | None = Form(None),
93
  db: Session = Depends(get_db),
94
  ):
95
- print(f"📝 Caption Router: Starting caption generation for image {image_id}")
96
- print(f"📝 Caption Router: Requested model: {model_name}")
97
 
98
- # Get the image
99
  img = crud.get_image(db, image_id)
100
  if not img:
101
- print(f" Caption Router: Image {image_id} not found")
102
- raise HTTPException(404, f"Image {image_id} not found")
103
-
104
- # Get the prompt object
 
105
  prompt_obj = crud.get_prompt(db, prompt)
 
 
 
 
 
 
106
  if not prompt_obj:
107
- print(f"❌ Caption Router: Prompt '{prompt}' not found")
108
  raise HTTPException(400, f"Prompt '{prompt}' not found")
109
 
110
- # Get image bytes
 
 
 
 
111
  try:
112
  if hasattr(storage, 's3') and settings.STORAGE_PROVIDER != "local":
113
  response = storage.s3.get_object(
@@ -121,73 +125,75 @@ async def create_caption(
121
  with open(file_path, 'rb') as f:
122
  img_bytes = f.read()
123
  except Exception as e:
124
- print(f" Caption Router: Failed to get image bytes: {e}")
125
- raise HTTPException(500, f"Failed to get image: {e}")
126
-
127
- # Prepare metadata instructions
128
- metadata_instructions = ""
129
- if img.image_type == "drone_image":
130
- metadata_instructions = f"Image type: drone image. Center coordinates: {img.center_lon}, {img.center_lat}. Altitude: {img.amsl_m}m AMSL, {img.agl_m}m AGL. Heading: {img.heading_deg}°, Yaw: {img.yaw_deg}°, Pitch: {img.pitch_deg}°, Roll: {img.roll_deg}°. RTK fix: {img.rtk_fix}. Standard deviations: H={img.std_h_m}m, V={img.std_v_m}m."
131
- else:
132
- metadata_instructions = f"Image type: crisis map. Source: {img.source}. Event type: {img.event_type}. EPSG: {img.epsg}. Countries: {img.countries}."
133
-
134
- print(f"📝 Caption Router: Calling VLM manager...")
135
-
136
- # Call VLM manager
 
137
  try:
138
  result = await vlm_manager.generate_caption(
139
- image_bytes=img_bytes,
140
- prompt=prompt_obj.label,
141
  metadata_instructions=metadata_instructions,
142
  model_name=model_name,
143
  db_session=db,
144
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
145
  except Exception as e:
146
- print(f" Caption Router: VLM manager failed: {e}")
147
- raise HTTPException(500, f"Caption generation failed: {e}")
148
-
149
- print(f"📝 Caption Router: VLM manager returned result")
150
-
151
- # Get the raw response for validation
152
- raw = result.get("raw_response", {})
153
-
154
- # Validate and clean the data using schema validation
155
- image_type = img.image_type
156
- print(f"📝 Caption Router: Validating data for image type: {image_type}")
157
-
158
- cleaned_data, is_valid, validation_error = schema_validator.clean_and_validate_data(raw, image_type)
159
-
160
- if is_valid:
161
- print(f"✅ Caption Router: Schema validation passed for {image_type}")
162
- text = cleaned_data.get("analysis", "")
163
- metadata = cleaned_data.get("metadata", {})
164
- else:
165
- print(f"⚠️ Caption Router: Schema validation failed for {image_type}: {validation_error}")
166
- # Use fallback but log the validation error
167
- text = result.get("caption", "This is a fallback caption due to schema validation error.")
168
- metadata = result.get("metadata", {})
169
- raw["validation_error"] = validation_error
170
- raw["validation_failed"] = True
171
-
172
- # Use the actual model that was used, not the requested model_name
173
- used_model = result.get("model", model_name) or "STUB_MODEL"
174
-
175
- # Check if fallback was used
176
- fallback_used = result.get("fallback_used", False)
177
- original_model = result.get("original_model", None)
178
- fallback_reason = result.get("fallback_reason", None)
179
-
180
- if fallback_used:
181
- print(f"⚠️ Caption Router: Model fallback occurred: {original_model} -> {used_model} (reason: {fallback_reason})")
182
- # Add fallback info to raw response for frontend
183
- raw["fallback_info"] = {
184
- "original_model": original_model,
185
- "fallback_model": used_model,
186
- "reason": fallback_reason
187
- }
188
-
189
- print(f"📝 Caption Router: Creating caption in database...")
190
-
191
  c = crud.create_caption(
192
  db,
193
  image_id=image_id,
@@ -201,18 +207,23 @@ async def create_caption(
201
 
202
  db.refresh(c)
203
 
204
- print(f"📝 Caption Router: Caption created successfully")
 
 
205
 
206
  from .upload import convert_image_to_dict
207
  try:
208
  url = storage.get_object_url(c.file_key)
 
209
  if url.startswith('/') and settings.STORAGE_PROVIDER == "local":
210
  url = f"http://localhost:8000{url}"
 
211
  except Exception as e:
 
212
  url = f"/api/images/{c.image_id}/file"
 
213
 
214
  img_dict = convert_image_to_dict(c, url)
215
- print(f"📝 Caption Router: Caption generation completed successfully")
216
  return schemas.ImageOut(**img_dict)
217
 
218
  @router.get(
 
44
  try:
45
  models = crud.get_models(db)
46
  for model in models:
47
+ if model.provider == "huggingface" and model.model_id and model.m_code != "STUB_MODEL":
 
 
 
 
48
  try:
49
  service = ProvidersGenericVLMService(
50
  api_key=settings.HF_API_KEY,
 
88
  model_name: str | None = Form(None),
89
  db: Session = Depends(get_db),
90
  ):
91
+ print(f"DEBUG: Received request - image_id: {image_id}, title: {title}, prompt: {prompt}, model_name: {model_name}")
 
92
 
 
93
  img = crud.get_image(db, image_id)
94
  if not img:
95
+ raise HTTPException(404, "image not found")
96
+
97
+
98
+ print(f"Looking for prompt: '{prompt}' (type: {type(prompt)})")
99
+
100
  prompt_obj = crud.get_prompt(db, prompt)
101
+
102
+ if not prompt_obj:
103
+ print(f"Prompt not found by code, trying to find by label...")
104
+ prompt_obj = crud.get_prompt_by_label(db, prompt)
105
+
106
+ print(f"Prompt lookup result: {prompt_obj}")
107
  if not prompt_obj:
 
108
  raise HTTPException(400, f"Prompt '{prompt}' not found")
109
 
110
+ prompt_text = prompt_obj.label
111
+ metadata_instructions = prompt_obj.metadata_instructions or ""
112
+ print(f"Using prompt text: '{prompt_text}'")
113
+ print(f"Using metadata instructions: '{metadata_instructions[:100]}...'")
114
+
115
  try:
116
  if hasattr(storage, 's3') and settings.STORAGE_PROVIDER != "local":
117
  response = storage.s3.get_object(
 
125
  with open(file_path, 'rb') as f:
126
  img_bytes = f.read()
127
  except Exception as e:
128
+ print(f"Error reading image file: {e}")
129
+ try:
130
+ url = storage.get_object_url(img.file_key)
131
+ if url.startswith('/') and settings.STORAGE_PROVIDER == "local":
132
+ url = f"http://localhost:8000{url}"
133
+ import requests
134
+ resp = requests.get(url)
135
+ resp.raise_for_status()
136
+ img_bytes = resp.content
137
+ except Exception as fallback_error:
138
+ print(f"Fallback also failed: {fallback_error}")
139
+ raise HTTPException(500, f"Could not read image file: {e}")
140
+
141
+ metadata = {}
142
  try:
143
  result = await vlm_manager.generate_caption(
144
+ image_bytes=img_bytes,
145
+ prompt=prompt_text,
146
  metadata_instructions=metadata_instructions,
147
  model_name=model_name,
148
  db_session=db,
149
  )
150
+
151
+ # Get the raw response for validation
152
+ raw = result.get("raw_response", {})
153
+
154
+ # Validate and clean the data using schema validation
155
+ image_type = img.image_type
156
+ print(f"DEBUG: Validating data for image type: {image_type}")
157
+ print(f"DEBUG: Raw data structure: {list(raw.keys()) if isinstance(raw, dict) else 'Not a dict'}")
158
+
159
+ cleaned_data, is_valid, validation_error = schema_validator.clean_and_validate_data(raw, image_type)
160
+
161
+ if is_valid:
162
+ print(f"✓ Schema validation passed for {image_type}")
163
+ text = cleaned_data.get("analysis", "")
164
+ metadata = cleaned_data.get("metadata", {})
165
+ else:
166
+ print(f"⚠ Schema validation failed for {image_type}: {validation_error}")
167
+ # Use fallback but log the validation error
168
+ text = result.get("caption", "This is a fallback caption due to schema validation error.")
169
+ metadata = result.get("metadata", {})
170
+ raw["validation_error"] = validation_error
171
+ raw["validation_failed"] = True
172
+
173
+ # Use the actual model that was used, not the requested model_name
174
+ used_model = result.get("model", model_name) or "STUB_MODEL"
175
+
176
+ # Check if fallback was used
177
+ fallback_used = result.get("fallback_used", False)
178
+ original_model = result.get("original_model", None)
179
+ fallback_reason = result.get("fallback_reason", None)
180
+
181
+ if fallback_used:
182
+ print(f"⚠ Model fallback occurred: {original_model} -> {used_model} (reason: {fallback_reason})")
183
+ # Add fallback info to raw response for frontend
184
+ raw["fallback_info"] = {
185
+ "original_model": original_model,
186
+ "fallback_model": used_model,
187
+ "reason": fallback_reason
188
+ }
189
+
190
  except Exception as e:
191
+ print(f"VLM error, using fallback: {e}")
192
+ text = "This is a fallback caption due to VLM service error."
193
+ used_model = "STUB_MODEL"
194
+ raw = {"error": str(e), "fallback": True}
195
+ metadata = {}
196
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
197
  c = crud.create_caption(
198
  db,
199
  image_id=image_id,
 
207
 
208
  db.refresh(c)
209
 
210
+ print(f"DEBUG: Caption created, image object: {c}")
211
+ print(f"DEBUG: file_key: {c.file_key}")
212
+ print(f"DEBUG: image_id: {c.image_id}")
213
 
214
  from .upload import convert_image_to_dict
215
  try:
216
  url = storage.get_object_url(c.file_key)
217
+ print(f"DEBUG: Generated URL: {url}")
218
  if url.startswith('/') and settings.STORAGE_PROVIDER == "local":
219
  url = f"http://localhost:8000{url}"
220
+ print(f"DEBUG: Local URL adjusted to: {url}")
221
  except Exception as e:
222
+ print(f"DEBUG: URL generation failed: {e}")
223
  url = f"/api/images/{c.image_id}/file"
224
+ print(f"DEBUG: Using fallback URL: {url}")
225
 
226
  img_dict = convert_image_to_dict(c, url)
 
227
  return schemas.ImageOut(**img_dict)
228
 
229
  @router.get(
py_backend/app/services/gemini_service.py CHANGED
@@ -20,10 +20,6 @@ class GeminiService(VLMService):
20
 
21
  async def generate_caption(self, image_bytes: bytes, prompt: str, metadata_instructions: str = "") -> Dict[str, Any]:
22
  """Generate caption using Google Gemini Vision"""
23
- print(f"🔍 Gemini: Starting caption generation for {len(image_bytes)} bytes")
24
- print(f"🔍 Gemini: Prompt: {prompt[:100]}...")
25
- print(f"🔍 Gemini: Metadata instructions: {metadata_instructions[:100]}...")
26
-
27
  instruction = prompt + "\n\n" + metadata_instructions
28
 
29
  image_part = {
@@ -32,70 +28,37 @@ class GeminiService(VLMService):
32
  }
33
 
34
  start = time.time()
35
- try:
36
- print(f"🔍 Gemini: Calling Google Gemini API with model: {self.model_id}")
37
- response = await asyncio.to_thread(self.model.generate_content, [instruction, image_part])
38
- elapsed = time.time() - start
39
-
40
- print(f"🔍 Gemini: API call successful, response received")
41
- content = getattr(response, "text", None) or ""
42
- print(f"🔍 Gemini: Raw content length: {len(content)}")
43
- print(f"🔍 Gemini: Content preview: {content[:200]}...")
44
 
45
- cleaned_content = content
46
- if cleaned_content.startswith("```json"):
47
- cleaned_content = re.sub(r"^```json\s*", "", cleaned_content)
48
- cleaned_content = re.sub(r"\s*```$", "", cleaned_content)
49
 
50
- try:
51
- parsed = json.loads(cleaned_content)
52
- caption_text = parsed.get("analysis", content)
53
- metadata = parsed.get("metadata", {})
54
- print(f"🔍 Gemini: JSON parsed successfully, metadata keys: {list(metadata.keys())}")
55
-
56
- if metadata.get("epsg"):
57
- epsg_value = metadata["epsg"]
58
- allowed_epsg = ["4326", "3857", "32617", "32633", "32634", "OTHER"]
59
- if epsg_value not in allowed_epsg:
60
- metadata["epsg"] = "OTHER"
61
- print(f"🔍 Gemini: EPSG value {epsg_value} not in allowed list, set to OTHER")
62
- except json.JSONDecodeError as e:
63
- print(f"⚠️ Gemini: JSON parse error: {e}")
64
- caption_text = content
65
- metadata = {}
66
 
67
- raw_response: Dict[str, Any] = {"model": self.model_id}
68
-
69
- print(f"🔍 Gemini: Final metadata: {metadata}")
70
- print(f"🔍 Gemini: Caption generation completed successfully in {elapsed:.2f}s")
71
 
72
- return {
73
- "caption": caption_text,
74
- "metadata": metadata,
75
- "confidence": None,
76
- "processing_time": elapsed,
77
- "raw_response": raw_response,
78
- }
79
-
80
- except Exception as e:
81
- error_msg = str(e)
82
- error_type = type(e).__name__
83
- print(f"❌ Gemini: Error occurred during caption generation")
84
- print(f"❌ Gemini: Error type: {error_type}")
85
- print(f"❌ Gemini: Error message: {error_msg}")
86
-
87
- # Check for specific error types
88
- if "quota" in error_msg.lower() or "limit" in error_msg.lower():
89
- print(f"❌ Gemini: Quota or rate limit exceeded detected")
90
- raise Exception(f"MODEL_UNAVAILABLE: GEMINI15 is currently unavailable (quota/rate limit exceeded). Switching to another model.")
91
- elif "authentication" in error_msg.lower() or "invalid" in error_msg.lower() or "api_key" in error_msg.lower():
92
- print(f"❌ Gemini: Authentication or API key error detected")
93
- raise Exception(f"MODEL_UNAVAILABLE: GEMINI15 is currently unavailable (authentication error). Switching to another model.")
94
- elif "timeout" in error_msg.lower() or "connection" in error_msg.lower():
95
- print(f"❌ Gemini: Network timeout or connection error detected")
96
- raise Exception(f"MODEL_UNAVAILABLE: GEMINI15 is currently unavailable (network error). Switching to another model.")
97
- else:
98
- print(f"❌ Gemini: Generic error, converting to MODEL_UNAVAILABLE")
99
- raise Exception(f"MODEL_UNAVAILABLE: GEMINI15 is currently unavailable ({error_type}: {error_msg}). Switching to another model.")
100
 
101
 
 
20
 
21
  async def generate_caption(self, image_bytes: bytes, prompt: str, metadata_instructions: str = "") -> Dict[str, Any]:
22
  """Generate caption using Google Gemini Vision"""
 
 
 
 
23
  instruction = prompt + "\n\n" + metadata_instructions
24
 
25
  image_part = {
 
28
  }
29
 
30
  start = time.time()
31
+ response = await asyncio.to_thread(self.model.generate_content, [instruction, image_part])
32
+ elapsed = time.time() - start
33
+
34
+ content = getattr(response, "text", None) or ""
 
 
 
 
 
35
 
36
+ cleaned_content = content
37
+ if cleaned_content.startswith("```json"):
38
+ cleaned_content = re.sub(r"^```json\s*", "", cleaned_content)
39
+ cleaned_content = re.sub(r"\s*```$", "", cleaned_content)
40
 
41
+ try:
42
+ parsed = json.loads(cleaned_content)
43
+ caption_text = parsed.get("analysis", content)
44
+ metadata = parsed.get("metadata", {})
45
+ if metadata.get("epsg"):
46
+ epsg_value = metadata["epsg"]
47
+ allowed_epsg = ["4326", "3857", "32617", "32633", "32634", "OTHER"]
48
+ if epsg_value not in allowed_epsg:
49
+ metadata["epsg"] = "OTHER"
50
+ except json.JSONDecodeError:
51
+ caption_text = content
52
+ metadata = {}
 
 
 
 
53
 
54
+ raw_response: Dict[str, Any] = {"model": self.model_id}
 
 
 
55
 
56
+ return {
57
+ "caption": caption_text,
58
+ "metadata": metadata,
59
+ "confidence": None,
60
+ "processing_time": elapsed,
61
+ "raw_response": raw_response,
62
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
 
64
 
py_backend/app/services/gpt4v_service.py CHANGED
@@ -15,15 +15,9 @@ class GPT4VService(VLMService):
15
 
16
  async def generate_caption(self, image_bytes: bytes, prompt: str, metadata_instructions: str = "") -> Dict[str, Any]:
17
  """Generate caption using GPT-4 Vision"""
18
- print(f"🔍 GPT-4V: Starting caption generation for {len(image_bytes)} bytes")
19
- print(f"🔍 GPT-4V: Prompt: {prompt[:100]}...")
20
- print(f"🔍 GPT-4V: Metadata instructions: {metadata_instructions[:100]}...")
21
-
22
  try:
23
  image_base64 = base64.b64encode(image_bytes).decode('utf-8')
24
- print(f"🔍 GPT-4V: Image encoded to base64, length: {len(image_base64)}")
25
 
26
- print(f"🔍 GPT-4V: Calling OpenAI API with model: gpt-4o")
27
  response = await asyncio.to_thread(
28
  self.client.chat.completions.create,
29
  model="gpt-4o",
@@ -44,10 +38,7 @@ class GPT4VService(VLMService):
44
  max_tokens=800
45
  )
46
 
47
- print(f"🔍 GPT-4V: API call successful, response received")
48
  content = response.choices[0].message.content
49
- print(f"🔍 GPT-4V: Raw content length: {len(content)}")
50
- print(f"🔍 GPT-4V: Content preview: {content[:200]}...")
51
 
52
  cleaned_content = content.strip()
53
  if cleaned_content.startswith("```json"):
@@ -59,9 +50,7 @@ class GPT4VService(VLMService):
59
  metadata = {}
60
  try:
61
  metadata = json.loads(cleaned_content)
62
- print(f"🔍 GPT-4V: JSON parsed successfully, metadata keys: {list(metadata.keys())}")
63
- except json.JSONDecodeError as e:
64
- print(f"⚠️ GPT-4V: JSON parse error: {e}")
65
  if "```json" in content:
66
  json_start = content.find("```json") + 7
67
  json_end = content.find("```", json_start)
@@ -69,21 +58,16 @@ class GPT4VService(VLMService):
69
  json_str = content[json_start:json_end].strip()
70
  try:
71
  metadata = json.loads(json_str)
72
- print(f"🔍 GPT-4V: Extracted JSON from code blocks successfully")
73
- except json.JSONDecodeError as e2:
74
- print(f"⚠️ GPT-4V: Code block JSON parse also failed: {e2}")
75
  else:
76
  import re
77
  json_match = re.search(r'\{[^{}]*"metadata"[^{}]*\{[^{}]*\}', content)
78
  if json_match:
79
  try:
80
  metadata = json.loads(json_match.group())
81
- print(f"🔍 GPT-4V: Extracted JSON using regex successfully")
82
- except json.JSONDecodeError as e3:
83
- print(f"⚠️ GPT-4V: Regex JSON extraction failed: {e3}")
84
-
85
- print(f"🔍 GPT-4V: Final metadata: {metadata}")
86
- print(f"🔍 GPT-4V: Caption generation completed successfully")
87
 
88
  return {
89
  "caption": cleaned_content,
@@ -96,22 +80,4 @@ class GPT4VService(VLMService):
96
  }
97
 
98
  except Exception as e:
99
- error_msg = str(e)
100
- error_type = type(e).__name__
101
- print(f"❌ GPT-4V: Error occurred during caption generation")
102
- print(f"❌ GPT-4V: Error type: {error_type}")
103
- print(f"❌ GPT-4V: Error message: {error_msg}")
104
-
105
- # Check for specific error types
106
- if "rate_limit" in error_msg.lower() or "quota" in error_msg.lower():
107
- print(f"❌ GPT-4V: Rate limit or quota exceeded detected")
108
- raise Exception(f"MODEL_UNAVAILABLE: GPT-4O is currently unavailable (rate limit/quota exceeded). Switching to another model.")
109
- elif "authentication" in error_msg.lower() or "invalid" in error_msg.lower() or "api_key" in error_msg.lower():
110
- print(f"❌ GPT-4V: Authentication or API key error detected")
111
- raise Exception(f"MODEL_UNAVAILABLE: GPT-4O is currently unavailable (authentication error). Switching to another model.")
112
- elif "timeout" in error_msg.lower() or "connection" in error_msg.lower():
113
- print(f"❌ GPT-4V: Network timeout or connection error detected")
114
- raise Exception(f"MODEL_UNAVAILABLE: GPT-4O is currently unavailable (network error). Switching to another model.")
115
- else:
116
- print(f"❌ GPT-4V: Generic error, converting to MODEL_UNAVAILABLE")
117
- raise Exception(f"MODEL_UNAVAILABLE: GPT-4O is currently unavailable ({error_type}: {error_msg}). Switching to another model.")
 
15
 
16
  async def generate_caption(self, image_bytes: bytes, prompt: str, metadata_instructions: str = "") -> Dict[str, Any]:
17
  """Generate caption using GPT-4 Vision"""
 
 
 
 
18
  try:
19
  image_base64 = base64.b64encode(image_bytes).decode('utf-8')
 
20
 
 
21
  response = await asyncio.to_thread(
22
  self.client.chat.completions.create,
23
  model="gpt-4o",
 
38
  max_tokens=800
39
  )
40
 
 
41
  content = response.choices[0].message.content
 
 
42
 
43
  cleaned_content = content.strip()
44
  if cleaned_content.startswith("```json"):
 
50
  metadata = {}
51
  try:
52
  metadata = json.loads(cleaned_content)
53
+ except json.JSONDecodeError:
 
 
54
  if "```json" in content:
55
  json_start = content.find("```json") + 7
56
  json_end = content.find("```", json_start)
 
58
  json_str = content[json_start:json_end].strip()
59
  try:
60
  metadata = json.loads(json_str)
61
+ except json.JSONDecodeError as e:
62
+ print(f"JSON parse error: {e}")
 
63
  else:
64
  import re
65
  json_match = re.search(r'\{[^{}]*"metadata"[^{}]*\{[^{}]*\}', content)
66
  if json_match:
67
  try:
68
  metadata = json.loads(json_match.group())
69
+ except json.JSONDecodeError:
70
+ pass
 
 
 
 
71
 
72
  return {
73
  "caption": cleaned_content,
 
80
  }
81
 
82
  except Exception as e:
83
+ raise Exception(f"GPT-4 Vision API error: {str(e)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
py_backend/app/services/vlm_service.py CHANGED
@@ -63,37 +63,64 @@ class VLMServiceManager:
63
  return list(self.services.keys())
64
 
65
  async def generate_caption(self, image_bytes: bytes, prompt: str, metadata_instructions: str = "", model_name: str | None = None, db_session = None) -> dict:
66
- """Generate caption using available VLM services with fallback"""
67
 
68
- # Select initial service
69
  service = None
70
- if model_name:
71
  service = self.services.get(model_name)
 
 
72
 
73
- if not service:
 
74
  if db_session:
 
75
  try:
76
  from .. import crud
77
  available_models = crud.get_models(db_session)
78
  available_model_codes = [m.m_code for m in available_models if m.is_available]
79
 
 
 
 
 
80
  available_services = [s for s in self.services.values() if s.model_name in available_model_codes]
 
 
 
81
  if available_services:
82
  import random
 
 
 
 
 
83
  shuffled_services = available_services.copy()
84
  random.shuffle(shuffled_services)
 
85
  service = shuffled_services[0]
 
 
 
86
  else:
 
87
  service = next(iter(self.services.values()))
 
88
  except Exception as e:
 
89
  service = next(iter(self.services.values()))
 
90
  else:
 
91
  available_services = [s for s in self.services.values() if s.is_available]
92
  if available_services:
93
  import random
94
  service = random.choice(available_services)
 
95
  else:
 
96
  service = next(iter(self.services.values()))
 
97
 
98
  if not service:
99
  raise ValueError("No VLM services available")
@@ -114,10 +141,12 @@ class VLMServiceManager:
114
  return result
115
  except Exception as e:
116
  error_str = str(e)
 
117
 
118
  # Check if it's a model unavailable error (any type of error)
119
  if "MODEL_UNAVAILABLE" in error_str:
120
  attempted_services.add(service.model_name)
 
121
 
122
  # Try to find another available service
123
  if db_session:
@@ -131,24 +160,29 @@ class VLMServiceManager:
131
  if (next_service.model_name in available_model_codes and
132
  next_service.model_name not in attempted_services):
133
  service = next_service
 
134
  break
135
  else:
136
  # No more available services, use any untried service
137
  for next_service in self.services.values():
138
  if next_service.model_name not in attempted_services:
139
  service = next_service
 
140
  break
141
  except Exception as db_error:
 
142
  # Fallback to any untried service
143
  for next_service in self.services.values():
144
  if next_service.model_name not in attempted_services:
145
  service = next_service
 
146
  break
147
  else:
148
  # No database session, use any untried service
149
  for next_service in self.services.values():
150
  if next_service.model_name not in attempted_services:
151
  service = next_service
 
152
  break
153
 
154
  if not service:
@@ -157,6 +191,7 @@ class VLMServiceManager:
157
  continue # Try again with new service
158
  else:
159
  # Non-model-unavailable error, don't retry
 
160
  raise
161
 
162
  # If we get here, we've tried all services
 
63
  return list(self.services.keys())
64
 
65
  async def generate_caption(self, image_bytes: bytes, prompt: str, metadata_instructions: str = "", model_name: str | None = None, db_session = None) -> dict:
66
+ """Generate caption using the specified model or fallback to available service."""
67
 
 
68
  service = None
69
+ if model_name and model_name != "random":
70
  service = self.services.get(model_name)
71
+ if not service:
72
+ print(f"Model '{model_name}' not found, using fallback")
73
 
74
+ if not service and self.services:
75
+ # If random is selected or no specific model, choose a random available service
76
  if db_session:
77
+ # Check database availability for random selection
78
  try:
79
  from .. import crud
80
  available_models = crud.get_models(db_session)
81
  available_model_codes = [m.m_code for m in available_models if m.is_available]
82
 
83
+ print(f"DEBUG: Available models in database: {available_model_codes}")
84
+ print(f"DEBUG: Registered services: {list(self.services.keys())}")
85
+
86
+ # Filter services to only those marked as available in database
87
  available_services = [s for s in self.services.values() if s.model_name in available_model_codes]
88
+
89
+ print(f"DEBUG: Available services after filtering: {[s.model_name for s in available_services]}")
90
+
91
  if available_services:
92
  import random
93
+ import time
94
+ # Use current time as seed for better randomness
95
+ random.seed(int(time.time() * 1000000) % 1000000)
96
+
97
+ # Shuffle the list first for better randomization
98
  shuffled_services = available_services.copy()
99
  random.shuffle(shuffled_services)
100
+
101
  service = shuffled_services[0]
102
+ print(f"Randomly selected service: {service.model_name} (from {len(available_services)} available)")
103
+ print(f"DEBUG: All available services were: {[s.model_name for s in available_services]}")
104
+ print(f"DEBUG: Shuffled order: {[s.model_name for s in shuffled_services]}")
105
  else:
106
+ # Fallback to any service
107
  service = next(iter(self.services.values()))
108
+ print(f"Using fallback service: {service.model_name}")
109
  except Exception as e:
110
+ print(f"Error checking database availability: {e}, using fallback")
111
  service = next(iter(self.services.values()))
112
+ print(f"Using fallback service: {service.model_name}")
113
  else:
114
+ # No database session, use service property
115
  available_services = [s for s in self.services.values() if s.is_available]
116
  if available_services:
117
  import random
118
  service = random.choice(available_services)
119
+ print(f"Randomly selected service: {service.model_name}")
120
  else:
121
+ # Fallback to any service
122
  service = next(iter(self.services.values()))
123
+ print(f"Using fallback service: {service.model_name}")
124
 
125
  if not service:
126
  raise ValueError("No VLM services available")
 
141
  return result
142
  except Exception as e:
143
  error_str = str(e)
144
+ print(f"Error with service {service.model_name}: {error_str}")
145
 
146
  # Check if it's a model unavailable error (any type of error)
147
  if "MODEL_UNAVAILABLE" in error_str:
148
  attempted_services.add(service.model_name)
149
+ print(f"Model {service.model_name} is unavailable, trying another service...")
150
 
151
  # Try to find another available service
152
  if db_session:
 
160
  if (next_service.model_name in available_model_codes and
161
  next_service.model_name not in attempted_services):
162
  service = next_service
163
+ print(f"Switching to fallback service: {service.model_name}")
164
  break
165
  else:
166
  # No more available services, use any untried service
167
  for next_service in self.services.values():
168
  if next_service.model_name not in attempted_services:
169
  service = next_service
170
+ print(f"Using untried service as fallback: {service.model_name}")
171
  break
172
  except Exception as db_error:
173
+ print(f"Error checking database availability: {db_error}")
174
  # Fallback to any untried service
175
  for next_service in self.services.values():
176
  if next_service.model_name not in attempted_services:
177
  service = next_service
178
+ print(f"Using untried service as fallback: {service.model_name}")
179
  break
180
  else:
181
  # No database session, use any untried service
182
  for next_service in self.services.values():
183
  if next_service.model_name not in attempted_services:
184
  service = next_service
185
+ print(f"Using untried service as fallback: {service.model_name}")
186
  break
187
 
188
  if not service:
 
191
  continue # Try again with new service
192
  else:
193
  # Non-model-unavailable error, don't retry
194
+ print(f"Non-model-unavailable error, not retrying: {error_str}")
195
  raise
196
 
197
  # If we get here, we've tried all services