Spaces:
Sleeping
Sleeping
| import os | |
| import base64 | |
| import requests | |
| from pathlib import Path | |
| from openai import OpenAI | |
| from urllib.parse import urlparse | |
| from dotenv import load_dotenv | |
| def describe_image(image_path: str) -> str: | |
| """ | |
| Generate a description of the image at the given path or URL. | |
| Args: | |
| image_path: Path to local image file OR URL to image | |
| Returns: | |
| A string description of the image """ | |
| load_dotenv() | |
| # Check if API key is available | |
| api_key = os.getenv("NEBIUS_API_KEY") | |
| if not api_key: | |
| return "Error: NEBIUS_API_KEY environment variable not set" | |
| try: | |
| # Determine if it's a URL or local file path | |
| parsed = urlparse(image_path) | |
| is_url = bool(parsed.scheme and parsed.netloc) | |
| if is_url: | |
| # Handle URL | |
| print(f"๐ก Downloading image from URL: {image_path}") | |
| response = requests.get(image_path, timeout=30) | |
| response.raise_for_status() | |
| image_data = response.content | |
| # Determine content type from response headers | |
| content_type = response.headers.get('content-type', '') | |
| if 'image' not in content_type: | |
| return f"Error: URL does not appear to contain an image. Content-Type: {content_type}" | |
| else: | |
| # Handle local file | |
| image_path = Path(image_path) | |
| if not image_path.exists(): | |
| return f"Error: Local file not found: {image_path}" | |
| # Check if it's an image file | |
| valid_extensions = {'.png', '.jpg', '.jpeg', '.gif', '.bmp', '.webp'} | |
| if image_path.suffix.lower() not in valid_extensions: | |
| return f"Error: Unsupported file type '{image_path.suffix}'. Supported: {valid_extensions}" | |
| print(f"๐ Reading local image: {image_path}") | |
| with open(image_path, "rb") as f: | |
| image_data = f.read() | |
| # Encode image to base64 | |
| base64_image = base64.b64encode(image_data).decode('utf-8') | |
| # Create OpenAI client | |
| client = OpenAI( | |
| base_url="https://api.studio.nebius.com/v1/", | |
| api_key=api_key | |
| ) | |
| # Make API call with proper vision format | |
| response = client.chat.completions.create( | |
| model="mistralai/Mistral-Small-3.1-24B-Instruct-2503", | |
| messages=[ | |
| { | |
| "role": "system", | |
| "content": "You are a helpful assistant that provides detailed descriptions of images. Focus on the main subjects, colors, composition, and any notable details." | |
| }, | |
| { | |
| "role": "user", | |
| "content": [ | |
| { | |
| "type": "text", | |
| "text": "Please provide a detailed description of this image." | |
| }, | |
| { | |
| "type": "image_url", | |
| "image_url": { | |
| "url": f"data:image/jpeg;base64,{base64_image}" | |
| } | |
| } | |
| ] | |
| } | |
| ], | |
| max_tokens=500 | |
| ) | |
| description = response.choices[0].message.content.strip() | |
| return description | |
| except requests.RequestException as e: | |
| return f"Error downloading image from URL: {str(e)}" | |
| except FileNotFoundError: | |
| return f"Error: File not found: {image_path}" | |
| except Exception as e: | |
| error_msg = str(e) | |
| if "vision" in error_msg.lower() or "image" in error_msg.lower(): | |
| return f"Error: This model may not support vision capabilities. Try a vision-enabled model. Details: {error_msg}" | |
| elif "401" in error_msg or "unauthorized" in error_msg.lower(): | |
| return "Error: Invalid API key or insufficient permissions" | |
| elif "rate" in error_msg.lower() or "quota" in error_msg.lower(): | |
| return f"Error: API rate limit or quota exceeded: {error_msg}" | |
| else: | |
| return f"Error processing image: {error_msg}" | |