Spaces:

AgentsGuards
/

image_utilities_mcp

Sleeping

image_utilities_mcp / src /utils /describe.py

JuanjoSG5

curretn progress

cc083b4 5 months ago

4.29 kB

	import os
	import base64
	import requests
	from pathlib import Path
	from openai import OpenAI
	from urllib.parse import urlparse
	from dotenv import load_dotenv


	def describe_image(image_path: str) -> str:
	"""
	Generate a description of the image at the given path or URL.

	Args:
	image_path: Path to local image file OR URL to image

	Returns:
	A string description of the image """
	load_dotenv()

	# Check if API key is available
	api_key = os.getenv("NEBIUS_API_KEY")
	if not api_key:
	return "Error: NEBIUS_API_KEY environment variable not set"

	try:
	# Determine if it's a URL or local file path
	parsed = urlparse(image_path)
	is_url = bool(parsed.scheme and parsed.netloc)

	if is_url:
	# Handle URL
	print(f"📡 Downloading image from URL: {image_path}")
	response = requests.get(image_path, timeout=30)
	response.raise_for_status()
	image_data = response.content

	# Determine content type from response headers
	content_type = response.headers.get('content-type', '')
	if 'image' not in content_type:
	return f"Error: URL does not appear to contain an image. Content-Type: {content_type}"

	else:
	# Handle local file
	image_path = Path(image_path)

	if not image_path.exists():
	return f"Error: Local file not found: {image_path}"

	# Check if it's an image file
	valid_extensions = {'.png', '.jpg', '.jpeg', '.gif', '.bmp', '.webp'}
	if image_path.suffix.lower() not in valid_extensions:
	return f"Error: Unsupported file type '{image_path.suffix}'. Supported: {valid_extensions}"

	print(f"📁 Reading local image: {image_path}")
	with open(image_path, "rb") as f:
	image_data = f.read()

	# Encode image to base64
	base64_image = base64.b64encode(image_data).decode('utf-8')

	# Create OpenAI client
	client = OpenAI(
	base_url="https://api.studio.nebius.com/v1/",
	api_key=api_key
	)

	# Make API call with proper vision format
	response = client.chat.completions.create(
	model="mistralai/Mistral-Small-3.1-24B-Instruct-2503",
	messages=[
	{
	"role": "system",
	"content": "You are a helpful assistant that provides detailed descriptions of images. Focus on the main subjects, colors, composition, and any notable details."
	},
	{
	"role": "user",
	"content": [
	{
	"type": "text",
	"text": "Please provide a detailed description of this image."
	},
	{
	"type": "image_url",
	"image_url": {
	"url": f"data:image/jpeg;base64,{base64_image}"
	}
	}
	]
	}
	],
	max_tokens=500
	)

	description = response.choices[0].message.content.strip()
	return description

	except requests.RequestException as e:
	return f"Error downloading image from URL: {str(e)}"
	except FileNotFoundError:
	return f"Error: File not found: {image_path}"
	except Exception as e:
	error_msg = str(e)

	if "vision" in error_msg.lower() or "image" in error_msg.lower():
	return f"Error: This model may not support vision capabilities. Try a vision-enabled model. Details: {error_msg}"
	elif "401" in error_msg or "unauthorized" in error_msg.lower():
	return "Error: Invalid API key or insufficient permissions"
	elif "rate" in error_msg.lower() or "quota" in error_msg.lower():
	return f"Error: API rate limit or quota exceeded: {error_msg}"
	else:
	return f"Error processing image: {error_msg}"