Spaces:
Sleeping
Sleeping
| import os | |
| import sys | |
| import json | |
| import gradio as gr | |
| import numpy as np | |
| import pandas as pd | |
| import matplotlib.pyplot as plt | |
| from PIL import Image | |
| import torch | |
| import cv2 | |
| # Create necessary directories | |
| os.makedirs('/tmp/image_evaluator_uploads', exist_ok=True) | |
| os.makedirs('/tmp/image_evaluator_results', exist_ok=True) | |
| # Base Evaluator class | |
| class BaseEvaluator: | |
| """ | |
| Base class for all image quality evaluators. | |
| All evaluator implementations should inherit from this class. | |
| """ | |
| def __init__(self, config=None): | |
| """ | |
| Initialize the evaluator with optional configuration. | |
| Args: | |
| config (dict, optional): Configuration parameters for the evaluator. | |
| """ | |
| self.config = config or {} | |
| def evaluate(self, image_path): | |
| """ | |
| Evaluate a single image and return scores. | |
| Args: | |
| image_path (str): Path to the image file. | |
| Returns: | |
| dict: Dictionary containing evaluation scores. | |
| """ | |
| raise NotImplementedError("Subclasses must implement evaluate()") | |
| def batch_evaluate(self, image_paths): | |
| """ | |
| Evaluate multiple images. | |
| Args: | |
| image_paths (list): List of paths to image files. | |
| Returns: | |
| list: List of dictionaries containing evaluation scores for each image. | |
| """ | |
| return [self.evaluate(img_path) for img_path in image_paths] | |
| def get_metadata(self): | |
| """ | |
| Return metadata about this evaluator. | |
| Returns: | |
| dict: Dictionary containing metadata about the evaluator. | |
| """ | |
| raise NotImplementedError("Subclasses must implement get_metadata()") | |
| # Technical Evaluator | |
| class TechnicalEvaluator(BaseEvaluator): | |
| """ | |
| Evaluator for basic technical image quality metrics. | |
| Measures sharpness, noise, artifacts, and other technical aspects. | |
| """ | |
| def __init__(self, config=None): | |
| super().__init__(config) | |
| self.config.setdefault('laplacian_ksize', 3) | |
| self.config.setdefault('blur_threshold', 100) | |
| self.config.setdefault('noise_threshold', 0.05) | |
| def evaluate(self, image_path): | |
| """ | |
| Evaluate technical aspects of an image. | |
| Args: | |
| image_path (str): Path to the image file. | |
| Returns: | |
| dict: Dictionary containing technical evaluation scores. | |
| """ | |
| try: | |
| # Load image | |
| img = cv2.imread(image_path) | |
| if img is None: | |
| return { | |
| 'error': 'Failed to load image', | |
| 'overall_technical': 0.0 | |
| } | |
| # Convert to grayscale for some calculations | |
| gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) | |
| # Calculate sharpness using Laplacian variance | |
| laplacian = cv2.Laplacian(gray, cv2.CV_64F, ksize=self.config['laplacian_ksize']) | |
| sharpness_score = np.var(laplacian) / 10000 # Normalize | |
| sharpness_score = min(1.0, sharpness_score) # Cap at 1.0 | |
| # Calculate noise level | |
| # Using a simple method based on standard deviation in smooth areas | |
| blur = cv2.GaussianBlur(gray, (11, 11), 0) | |
| diff = cv2.absdiff(gray, blur) | |
| noise_level = np.std(diff) / 255.0 | |
| noise_score = 1.0 - min(1.0, noise_level / self.config['noise_threshold']) | |
| # Check for compression artifacts | |
| edges = cv2.Canny(gray, 100, 200) | |
| artifact_score = 1.0 - (np.count_nonzero(edges) / (gray.shape[0] * gray.shape[1])) | |
| artifact_score = max(0.0, min(1.0, artifact_score * 2)) # Adjust range | |
| # Calculate color range and saturation | |
| hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV) | |
| saturation = hsv[:, :, 1] | |
| saturation_score = np.mean(saturation) / 255.0 | |
| # Calculate contrast | |
| min_val, max_val, _, _ = cv2.minMaxLoc(gray) | |
| contrast_score = (max_val - min_val) / 255.0 | |
| # Calculate overall technical score (weighted average) | |
| overall_technical = ( | |
| 0.3 * sharpness_score + | |
| 0.2 * noise_score + | |
| 0.2 * artifact_score + | |
| 0.15 * saturation_score + | |
| 0.15 * contrast_score | |
| ) | |
| return { | |
| 'sharpness': float(sharpness_score), | |
| 'noise': float(noise_score), | |
| 'artifacts': float(artifact_score), | |
| 'saturation': float(saturation_score), | |
| 'contrast': float(contrast_score), | |
| 'overall_technical': float(overall_technical) | |
| } | |
| except Exception as e: | |
| return { | |
| 'error': str(e), | |
| 'overall_technical': 0.0 | |
| } | |
| def get_metadata(self): | |
| """ | |
| Return metadata about this evaluator. | |
| Returns: | |
| dict: Dictionary containing metadata about the evaluator. | |
| """ | |
| return { | |
| 'id': 'technical', | |
| 'name': 'Technical Metrics', | |
| 'description': 'Evaluates basic technical aspects of image quality including sharpness, noise, artifacts, saturation, and contrast.', | |
| 'version': '1.0', | |
| 'metrics': [ | |
| {'id': 'sharpness', 'name': 'Sharpness', 'description': 'Measures image clarity and detail'}, | |
| {'id': 'noise', 'name': 'Noise', 'description': 'Measures absence of unwanted variations'}, | |
| {'id': 'artifacts', 'name': 'Artifacts', 'description': 'Measures absence of compression artifacts'}, | |
| {'id': 'saturation', 'name': 'Saturation', 'description': 'Measures color intensity'}, | |
| {'id': 'contrast', 'name': 'Contrast', 'description': 'Measures difference between light and dark areas'}, | |
| {'id': 'overall_technical', 'name': 'Overall Technical', 'description': 'Combined technical quality score'} | |
| ] | |
| } | |
| # Aesthetic Evaluator | |
| class AestheticEvaluator(BaseEvaluator): | |
| """ | |
| Evaluator for aesthetic image quality. | |
| Uses a simplified aesthetic assessment model. | |
| """ | |
| def __init__(self, config=None): | |
| super().__init__(config) | |
| self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') | |
| def evaluate(self, image_path): | |
| """ | |
| Evaluate aesthetic aspects of an image. | |
| Args: | |
| image_path (str): Path to the image file. | |
| Returns: | |
| dict: Dictionary containing aesthetic evaluation scores. | |
| """ | |
| try: | |
| # Load and preprocess image | |
| img = Image.open(image_path).convert('RGB') | |
| # Convert to numpy array for calculations | |
| img_np = np.array(img) | |
| # Calculate color harmony using standard deviation of colors | |
| r, g, b = img_np[:,:,0], img_np[:,:,1], img_np[:,:,2] | |
| color_std = (np.std(r) + np.std(g) + np.std(b)) / 3 | |
| color_harmony = min(1.0, color_std / 80.0) # Normalize | |
| # Calculate composition score using rule of thirds | |
| h, w = img_np.shape[:2] | |
| third_h, third_w = h // 3, w // 3 | |
| # Create a rule of thirds grid mask | |
| grid_mask = np.zeros((h, w)) | |
| for i in range(1, 3): | |
| grid_mask[third_h * i - 5:third_h * i + 5, :] = 1 | |
| grid_mask[:, third_w * i - 5:third_w * i + 5] = 1 | |
| # Convert to grayscale for edge detection | |
| gray = np.mean(img_np, axis=2).astype(np.uint8) | |
| # Simple edge detection | |
| edges = np.abs(np.diff(gray, axis=0, prepend=0)) + np.abs(np.diff(gray, axis=1, prepend=0)) | |
| edges = edges > 30 # Threshold | |
| # Calculate how many edges fall on the rule of thirds lines | |
| thirds_alignment = np.sum(edges * grid_mask) / max(1, np.sum(edges)) | |
| composition_score = min(1.0, thirds_alignment * 3) # Scale up for better distribution | |
| # Calculate visual interest using entropy | |
| hist_r = np.histogram(r, bins=256, range=(0, 256))[0] / (h * w) | |
| hist_g = np.histogram(g, bins=256, range=(0, 256))[0] / (h * w) | |
| hist_b = np.histogram(b, bins=256, range=(0, 256))[0] / (h * w) | |
| entropy_r = -np.sum(hist_r[hist_r > 0] * np.log2(hist_r[hist_r > 0])) | |
| entropy_g = -np.sum(hist_g[hist_g > 0] * np.log2(hist_g[hist_g > 0])) | |
| entropy_b = -np.sum(hist_b[hist_b > 0] * np.log2(hist_b[hist_b > 0])) | |
| entropy = (entropy_r + entropy_g + entropy_b) / 3 | |
| visual_interest = min(1.0, entropy / 7.5) # Normalize | |
| # Calculate overall aesthetic score (weighted average) | |
| overall_aesthetic = ( | |
| 0.4 * color_harmony + | |
| 0.3 * composition_score + | |
| 0.3 * visual_interest | |
| ) | |
| return { | |
| 'color_harmony': float(color_harmony), | |
| 'composition': float(composition_score), | |
| 'visual_interest': float(visual_interest), | |
| 'overall_aesthetic': float(overall_aesthetic) | |
| } | |
| except Exception as e: | |
| return { | |
| 'error': str(e), | |
| 'overall_aesthetic': 0.0 | |
| } | |
| def get_metadata(self): | |
| """ | |
| Return metadata about this evaluator. | |
| Returns: | |
| dict: Dictionary containing metadata about the evaluator. | |
| """ | |
| return { | |
| 'id': 'aesthetic', | |
| 'name': 'Aesthetic Assessment', | |
| 'description': 'Evaluates aesthetic qualities of images including color harmony, composition, and visual interest.', | |
| 'version': '1.0', | |
| 'metrics': [ | |
| {'id': 'color_harmony', 'name': 'Color Harmony', 'description': 'Measures how well colors work together'}, | |
| {'id': 'composition', 'name': 'Composition', 'description': 'Measures adherence to compositional principles like rule of thirds'}, | |
| {'id': 'visual_interest', 'name': 'Visual Interest', 'description': 'Measures how visually engaging the image is'}, | |
| {'id': 'overall_aesthetic', 'name': 'Overall Aesthetic', 'description': 'Combined aesthetic quality score'} | |
| ] | |
| } | |
| # Anime Style Evaluator | |
| class AnimeStyleEvaluator(BaseEvaluator): | |
| """ | |
| Specialized evaluator for anime-style images. | |
| Focuses on line quality, character design, style consistency, and other anime-specific attributes. | |
| """ | |
| def __init__(self, config=None): | |
| super().__init__(config) | |
| self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') | |
| def evaluate(self, image_path): | |
| """ | |
| Evaluate anime-specific aspects of an image. | |
| Args: | |
| image_path (str): Path to the image file. | |
| Returns: | |
| dict: Dictionary containing anime-style evaluation scores. | |
| """ | |
| try: | |
| # Load image | |
| img = Image.open(image_path).convert('RGB') | |
| img_np = np.array(img) | |
| # Line quality assessment | |
| gray = np.mean(img_np, axis=2).astype(np.uint8) | |
| # Calculate gradients for edge detection | |
| gx = np.abs(np.diff(gray, axis=1, prepend=0)) | |
| gy = np.abs(np.diff(gray, axis=0, prepend=0)) | |
| # Combine gradients | |
| edges = np.maximum(gx, gy) | |
| # Strong edges are characteristic of anime | |
| strong_edges = edges > 50 | |
| edge_ratio = np.sum(strong_edges) / (gray.shape[0] * gray.shape[1]) | |
| # Line quality score - anime typically has a higher proportion of strong edges | |
| line_quality = min(1.0, edge_ratio * 20) # Scale appropriately | |
| # Color palette assessment | |
| pixels = img_np.reshape(-1, 3) | |
| sample_size = min(10000, pixels.shape[0]) | |
| indices = np.random.choice(pixels.shape[0], sample_size, replace=False) | |
| sampled_pixels = pixels[indices] | |
| # Calculate color diversity (simplified) | |
| color_std = np.std(sampled_pixels, axis=0) | |
| color_diversity = np.mean(color_std) / 128.0 # Normalize | |
| # Anime often has a good balance of diversity but not excessive | |
| color_score = 1.0 - abs(color_diversity - 0.5) * 2 # Penalize too high or too low | |
| # Placeholder for character quality | |
| character_quality = 0.85 # Default value for prototype | |
| # Style consistency assessment | |
| hsv = np.array(img.convert('HSV')) | |
| saturation = hsv[:,:,1] | |
| value = hsv[:,:,2] | |
| # Calculate statistics | |
| sat_mean = np.mean(saturation) / 255.0 | |
| val_mean = np.mean(value) / 255.0 | |
| # Anime often has higher saturation and controlled brightness | |
| sat_score = 1.0 - abs(sat_mean - 0.7) * 2 # Ideal around 0.7 | |
| val_score = 1.0 - abs(val_mean - 0.6) * 2 # Ideal around 0.6 | |
| style_consistency = (sat_score + val_score) / 2 | |
| # Overall anime score (weighted average) | |
| overall_anime = ( | |
| 0.3 * line_quality + | |
| 0.2 * color_score + | |
| 0.25 * character_quality + | |
| 0.25 * style_consistency | |
| ) | |
| return { | |
| 'line_quality': float(line_quality), | |
| 'color_palette': float(color_score), | |
| 'character_quality': float(character_quality), | |
| 'style_consistency': float(style_consistency), | |
| 'overall_anime': float(overall_anime) | |
| } | |
| except Exception as e: | |
| return { | |
| 'error': str(e), | |
| 'overall_anime': 0.0 | |
| } | |
| def get_metadata(self): | |
| """ | |
| Return metadata about this evaluator. | |
| Returns: | |
| dict: Dictionary containing metadata about the evaluator. | |
| """ | |
| return { | |
| 'id': 'anime_specialized', | |
| 'name': 'Anime Style Evaluator', | |
| 'description': 'Specialized evaluator for anime-style images, focusing on line quality, color palette, character design, and style consistency.', | |
| 'version': '1.0', | |
| 'metrics': [ | |
| {'id': 'line_quality', 'name': 'Line Quality', 'description': 'Measures clarity and quality of line work'}, | |
| {'id': 'color_palette', 'name': 'Color Palette', 'description': 'Evaluates color choices and harmony for anime style'}, | |
| {'id': 'character_quality', 'name': 'Character Quality', 'description': 'Assesses character design and rendering'}, | |
| {'id': 'style_consistency', 'name': 'Style Consistency', 'description': 'Measures adherence to anime style conventions'}, | |
| {'id': 'overall_anime', 'name': 'Overall Anime Quality', 'description': 'Combined anime-specific quality score'} | |
| ] | |
| } | |
| # Evaluator Manager | |
| class EvaluatorManager: | |
| """ | |
| Manager class for handling multiple evaluators. | |
| Provides a unified interface for evaluating images with different metrics. | |
| """ | |
| def __init__(self): | |
| """Initialize the evaluator manager with available evaluators.""" | |
| self.evaluators = {} | |
| self._register_default_evaluators() | |
| def _register_default_evaluators(self): | |
| """Register the default set of evaluators.""" | |
| self.register_evaluator(TechnicalEvaluator()) | |
| self.register_evaluator(AestheticEvaluator()) | |
| self.register_evaluator(AnimeStyleEvaluator()) | |
| def register_evaluator(self, evaluator): | |
| """ | |
| Register a new evaluator. | |
| Args: | |
| evaluator (BaseEvaluator): The evaluator to register. | |
| """ | |
| if not isinstance(evaluator, BaseEvaluator): | |
| raise TypeError("Evaluator must be an instance of BaseEvaluator") | |
| metadata = evaluator.get_metadata() | |
| self.evaluators[metadata['id']] = evaluator | |
| def get_available_evaluators(self): | |
| """ | |
| Get a list of available evaluators. | |
| Returns: | |
| list: List of evaluator metadata. | |
| """ | |
| return [evaluator.get_metadata() for evaluator in self.evaluators.values()] | |
| def evaluate_image(self, image_path, evaluator_ids=None): | |
| """ | |
| Evaluate an image using specified evaluators. | |
| Args: | |
| image_path (str): Path to the image file. | |
| evaluator_ids (list, optional): List of evaluator IDs to use. | |
| If None, all available evaluators will be used. | |
| Returns: | |
| dict: Dictionary containing evaluation results from each evaluator. | |
| """ | |
| if not os.path.exists(image_path): | |
| return {'error': f'Image file not found: {image_path}'} | |
| if evaluator_ids is None: | |
| evaluator_ids = list(self.evaluators.keys()) | |
| results = {} | |
| for evaluator_id in evaluator_ids: | |
| if evaluator_id in self.evaluators: | |
| results[evaluator_id] = self.evaluators[evaluator_id].evaluate(image_path) | |
| else: | |
| results[evaluator_id] = {'error': f'Evaluator not found: {evaluator_id}'} | |
| return results | |
| def batch_evaluate_images(self, image_paths, evaluator_ids=None): | |
| """ | |
| Evaluate multiple images using specified evaluators. | |
| Args: | |
| image_paths (list): List of paths to image files. | |
| evaluator_ids (list, optional): List of evaluator IDs to use. | |
| If None, all available evaluators will be used. | |
| Returns: | |
| list: List of dictionaries containing evaluation results for each image. | |
| """ | |
| return [self.evaluate_image(path, evaluator_ids) for path in image_paths] | |
| def compare_models(self, model_results): | |
| """ | |
| Compare different models based on evaluation results. | |
| Args: | |
| model_results (dict): Dictionary mapping model names to their evaluation results. | |
| Returns: | |
| dict: Comparison results including rankings and best model. | |
| """ | |
| if not model_results: | |
| return {'error': 'No model results provided for comparison'} | |
| # Calculate average scores for each model across all images and evaluators | |
| model_scores = {} | |
| for model_name, image_results in model_results.items(): | |
| model_scores[model_name] = { | |
| 'technical': 0.0, | |
| 'aesthetic': 0.0, | |
| 'anime_specialized': 0.0, | |
| 'overall': 0.0 | |
| } | |
| image_count = len(image_results) | |
| if image_count == 0: | |
| continue | |
| # Sum up scores across all images | |
| for image_id, evaluations in image_results.items(): | |
| if 'technical' in evaluations and 'overall_technical' in evaluations['technical']: | |
| model_scores[model_name]['technical'] += evaluations['technical']['overall_technical'] | |
| if 'aesthetic' in evaluations and 'overall_aesthetic' in evaluations['aesthetic']: | |
| model_scores[model_name]['aesthetic'] += evaluations['aesthetic']['overall_aesthetic'] | |
| if 'anime_specialized' in evaluations and 'overall_anime' in evaluations['anime_specialized']: | |
| model_scores[model_name]['anime_specialized'] += evaluations['anime_specialized']['overall_anime'] | |
| # Calculate averages | |
| model_scores[model_name]['technical'] /= image_count | |
| model_scores[model_name]['aesthetic'] /= image_count | |
| model_scores[model_name]['anime_specialized'] /= image_count | |
| # Calculate overall score (weighted average of all metrics) | |
| model_scores[model_name]['overall'] = ( | |
| 0.3 * model_scores[model_name]['technical'] + | |
| 0.4 * model_scores[model_name]['aesthetic'] + | |
| 0.3 * model_scores[model_name]['anime_specialized'] | |
| ) | |
| # Rank models by overall score | |
| rankings = sorted( | |
| [(model, scores['overall']) for model, scores in model_scores.items()], | |
| key=lambda x: x[1], | |
| reverse=True | |
| ) | |
| # Format rankings | |
| formatted_rankings = [ | |
| {'rank': i+1, 'model': model, 'score': score} | |
| for i, (model, score) in enumerate(rankings) | |
| ] | |
| # Determine best model | |
| best_model = rankings[0][0] if rankings else None | |
| # Format comparison metrics | |
| comparison_metrics = { | |
| 'technical': {model: scores['technical'] for model, scores in model_scores.items()}, | |
| 'aesthetic': {model: scores['aesthetic'] for model, scores in model_scores.items()}, | |
| 'anime_specialized': {model: scores['anime_specialized'] for model, scores in model_scores.items()}, | |
| 'overall': {model: scores['overall'] for model, scores in model_scores.items()} | |
| } | |
| return { | |
| 'best_model': best_model, | |
| 'rankings': formatted_rankings, | |
| 'comparison_metrics': comparison_metrics | |
| } | |
| # Initialize evaluator manager | |
| evaluator_manager = EvaluatorManager() | |
| # Global variables to store uploaded images and results | |
| uploaded_images = {} | |
| evaluation_results = {} | |
| def evaluate_images(images, model_name, selected_evaluators): | |
| """ | |
| Evaluate uploaded images using selected evaluators. | |
| Args: | |
| images (list): List of uploaded image files | |
| model_name (str): Name of the model that generated these images | |
| selected_evaluators (list): List of evaluator IDs to use | |
| Returns: | |
| str: Status message | |
| """ | |
| global uploaded_images, evaluation_results | |
| if not images: | |
| return "No images uploaded." | |
| if not model_name: | |
| model_name = "unknown_model" | |
| # Save uploaded images | |
| if model_name not in uploaded_images: | |
| uploaded_images[model_name] = [] | |
| image_paths = [] | |
| for img in images: | |
| # Save image to temporary file | |
| img_path = f"/tmp/image_evaluator_uploads/{model_name}_{len(uploaded_images[model_name])}.png" | |
| os.makedirs(os.path.dirname(img_path), exist_ok=True) | |
| Image.open(img).save(img_path) | |
| # Add to uploaded images | |
| uploaded_images[model_name].append({ | |
| 'path': img_path, | |
| 'id': f"{model_name}_{len(uploaded_images[model_name])}" | |
| }) | |
| image_paths.append(img_path) | |
| # Evaluate images | |
| if not selected_evaluators: | |
| selected_evaluators = ['technical', 'aesthetic', 'anime_specialized'] | |
| results = {} | |
| for i, img_path in enumerate(image_paths): | |
| img_id = uploaded_images[model_name][i]['id'] | |
| results[img_id] = evaluator_manager.evaluate_image(img_path, selected_evaluators) | |
| # Store results | |
| if model_name not in evaluation_results: | |
| evaluation_results[model_name] = {} | |
| evaluation_results[model_name].update(results) | |
| return f"Evaluated {len(images)} images for model '{model_name}'." | |
| def compare_models(): | |
| """ | |
| Compare models based on evaluation results. | |
| Returns: | |
| tuple: (comparison table HTML, overall chart, radar chart) | |
| """ | |
| global evaluation_results | |
| if not evaluation_results or len(evaluation_results) < 2: | |
| return "Need at least two models with evaluated images for comparison.", None, None | |
| # Compare models | |
| comparison = evaluator_manager.compare_models(evaluation_results) | |
| # Create comparison table | |
| models = list(evaluation_results.keys()) | |
| metrics = ['technical', 'aesthetic', 'anime_specialized', 'overall'] | |
| data = [] | |
| for model in models: | |
| row = {'Model': model} | |
| for metric in metrics: | |
| if metric in comparison['comparison_metrics'] and model in comparison['comparison_metrics'][metric]: | |
| row[metric.capitalize()] = comparison['comparison_metrics'][metric][model] | |
| else: | |
| row[metric.capitalize()] = 0.0 | |
| data.append(row) | |
| df = pd.DataFrame(data) | |
| # Add ranking information | |
| for rank_info in comparison['rankings']: | |
| if rank_info['model'] in df['Model'].values: | |
| df.loc[df['Model'] == rank_info['model'], 'Rank'] = rank_info['rank'] | |
| # Sort by rank | |
| df = df.sort_values('Rank') | |
| # Create overall comparison chart | |
| plt.figure(figsize=(10, 6)) | |
| overall_scores = [comparison['comparison_metrics']['overall'].get(model, 0) for model in models] | |
| bars = plt.bar(models, overall_scores, color='skyblue') | |
| # Add value labels on top of bars | |
| for bar in bars: | |
| height = bar.get_height() | |
| plt.text(bar.get_x() + bar.get_width()/2., height + 0.01, | |
| f'{height:.2f}', ha='center', va='bottom') | |
| plt.title('Overall Quality Scores by Model') | |
| plt.xlabel('Model') | |
| plt.ylabel('Score') | |
| plt.ylim(0, 1.1) | |
| plt.grid(axis='y', linestyle='--', alpha=0.7) | |
| # Save the chart | |
| overall_chart_path = "/tmp/image_evaluator_results/overall_comparison.png" | |
| os.makedirs(os.path.dirname(overall_chart_path), exist_ok=True) | |
| plt.savefig(overall_chart_path) | |
| plt.close() | |
| # Create radar chart | |
| categories = [m.capitalize() for m in metrics[:-1]] # Exclude 'overall' | |
| N = len(categories) | |
| # Create angles for each metric | |
| angles = [n / float(N) * 2 * np.pi for n in range(N)] | |
| angles += angles[:1] # Close the loop | |
| # Create radar chart | |
| plt.figure(figsize=(10, 10)) | |
| ax = plt.subplot(111, polar=True) | |
| # Add lines for each model | |
| colors = plt.cm.tab10(np.linspace(0, 1, len(models))) | |
| for i, model in enumerate(models): | |
| values = [comparison['comparison_metrics'][metric].get(model, 0) for metric in metrics[:-1]] | |
| values += values[:1] # Close the loop | |
| ax.plot(angles, values, linewidth=2, linestyle='solid', label=model, color=colors[i]) | |
| ax.fill(angles, values, alpha=0.1, color=colors[i]) | |
| # Set category labels | |
| plt.xticks(angles[:-1], categories) | |
| # Set y-axis limits | |
| ax.set_ylim(0, 1) | |
| # Add legend | |
| plt.legend(loc='upper right', bbox_to_anchor=(0.1, 0.1)) | |
| plt.title('Detailed Metrics Comparison by Model') | |
| # Save the chart | |
| radar_chart_path = "/tmp/image_evaluator_results/radar_comparison.png" | |
| plt.savefig(radar_chart_path) | |
| plt.close() | |
| # Create result message | |
| result_message = f"Best model: {comparison['best_model']}\n\nModel rankings:\n" | |
| for rank in comparison['rankings']: | |
| result_message += f"{rank['rank']}. {rank['model']} (score: {rank['score']:.2f})\n" | |
| return result_message, overall_chart_path, radar_chart_path | |
| def export_results(format_type): | |
| """ | |
| Export evaluation results to file. | |
| Args: | |
| format_type (str): Export format ('csv', 'json', or 'html') | |
| Returns: | |
| str: Path to exported file | |
| """ | |
| global evaluation_results | |
| if not evaluation_results: | |
| return "No evaluation results to export." | |
| # Create output directory | |
| output_dir = "/tmp/image_evaluator_results" | |
| os.makedirs(output_dir, exist_ok=True) | |
| # Compare models if multiple models are available | |
| if len(evaluation_results) >= 2: | |
| comparison = evaluator_manager.compare_models(evaluation_results) | |
| else: | |
| comparison = None | |
| # Create DataFrame for the results | |
| models = list(evaluation_results.keys()) | |
| metrics = ['technical', 'aesthetic', 'anime_specialized', 'overall'] | |
| if comparison: | |
| data = [] | |
| for model in models: | |
| row = {'Model': model} | |
| for metric in metrics: | |
| if metric in comparison['comparison_metrics'] and model in comparison['comparison_metrics'][metric]: | |
| row[metric.capitalize()] = comparison['comparison_metrics'][metric][model] | |
| else: | |
| row[metric.capitalize()] = 0.0 | |
| data.append(row) | |
| df = pd.DataFrame(data) | |
| # Add ranking information | |
| for rank_info in comparison['rankings']: | |
| if rank_info['model'] in df['Model'].values: | |
| df.loc[df['Model'] == rank_info['model'], 'Rank'] = rank_info['rank'] | |
| # Sort by rank | |
| df = df.sort_values('Rank') | |
| else: | |
| # Single model, create detailed results | |
| model = models[0] | |
| data = [] | |
| for img_id, results in evaluation_results[model].items(): | |
| row = {'Image': img_id} | |
| for evaluator_id, evaluator_results in results.items(): | |
| for metric, value in evaluator_results.items(): | |
| row[f"{evaluator_id}_{metric}"] = value | |
| data.append(row) | |
| df = pd.DataFrame(data) | |
| # Export based on format | |
| if format_type == 'csv': | |
| output_path = os.path.join(output_dir, 'evaluation_results.csv') | |
| df.to_csv(output_path, index=False) | |
| elif format_type == 'json': | |
| output_path = os.path.join(output_dir, 'evaluation_results.json') | |
| if comparison: | |
| export_data = { | |
| 'comparison': comparison, | |
| 'results': evaluation_results | |
| } | |
| else: | |
| export_data = evaluation_results | |
| with open(output_path, 'w') as f: | |
| json.dump(export_data, f, indent=2) | |
| elif format_type == 'html': | |
| output_path = os.path.join(output_dir, 'evaluation_results.html') | |
| df.to_html(output_path, index=False) | |
| else: | |
| return f"Unsupported format: {format_type}" | |
| return output_path | |
| def reset_data(): | |
| """Reset all uploaded images and evaluation results.""" | |
| global uploaded_images, evaluation_results | |
| uploaded_images = {} | |
| evaluation_results = {} | |
| return "All data has been reset." | |
| def create_interface(): | |
| """Create Gradio interface.""" | |
| # Get available evaluators | |
| available_evaluators = evaluator_manager.get_available_evaluators() | |
| evaluator_choices = [e['id'] for e in available_evaluators] | |
| with gr.Blocks(title="Image Evaluator") as interface: | |
| gr.Markdown("# Image Evaluator") | |
| gr.Markdown("Tool for evaluating and comparing images generated by different AI models") | |
| with gr.Tab("Upload & Evaluate"): | |
| with gr.Row(): | |
| with gr.Column(): | |
| images_input = gr.File(file_count="multiple", label="Upload Images") | |
| model_name_input = gr.Textbox(label="Model Name", placeholder="Enter model name") | |
| evaluator_select = gr.CheckboxGroup(choices=evaluator_choices, label="Select Evaluators", value=evaluator_choices) | |
| evaluate_button = gr.Button("Evaluate Images") | |
| with gr.Column(): | |
| evaluation_output = gr.Textbox(label="Evaluation Status") | |
| evaluate_button.click( | |
| evaluate_images, | |
| inputs=[images_input, model_name_input, evaluator_select], | |
| outputs=evaluation_output | |
| ) | |
| with gr.Tab("Compare Models"): | |
| with gr.Row(): | |
| compare_button = gr.Button("Compare Models") | |
| with gr.Row(): | |
| with gr.Column(): | |
| comparison_output = gr.Textbox(label="Comparison Results") | |
| with gr.Column(): | |
| overall_chart = gr.Image(label="Overall Scores") | |
| radar_chart = gr.Image(label="Detailed Metrics") | |
| compare_button.click( | |
| compare_models, | |
| inputs=[], | |
| outputs=[comparison_output, overall_chart, radar_chart] | |
| ) | |
| with gr.Tab("Export Results"): | |
| with gr.Row(): | |
| format_select = gr.Radio(choices=["csv", "json", "html"], label="Export Format", value="csv") | |
| export_button = gr.Button("Export Results") | |
| with gr.Row(): | |
| export_output = gr.Textbox(label="Export Status") | |
| export_button.click( | |
| export_results, | |
| inputs=[format_select], | |
| outputs=export_output | |
| ) | |
| with gr.Tab("Help"): | |
| gr.Markdown(""" | |
| ## How to Use Image Evaluator | |
| ### Step 1: Upload Images | |
| - Go to the "Upload & Evaluate" tab | |
| - Upload images for a specific model | |
| - Enter the model name | |
| - Select which evaluators to use | |
| - Click "Evaluate Images" | |
| - Repeat for each model you want to compare | |
| ### Step 2: Compare Models | |
| - Go to the "Compare Models" tab | |
| - Click "Compare Models" to see results | |
| - The best model will be highlighted | |
| - View charts for visual comparison | |
| ### Step 3: Export Results | |
| - Go to the "Export Results" tab | |
| - Select export format (CSV, JSON, or HTML) | |
| - Click "Export Results" | |
| - Download the exported file | |
| ### Available Metrics | |
| #### Technical Metrics | |
| - Sharpness: Measures image clarity and detail | |
| - Noise: Measures absence of unwanted variations | |
| - Artifacts: Measures absence of compression artifacts | |
| - Saturation: Measures color intensity | |
| - Contrast: Measures difference between light and dark areas | |
| #### Aesthetic Metrics | |
| - Color Harmony: Measures how well colors work together | |
| - Composition: Measures adherence to compositional principles | |
| - Visual Interest: Measures how visually engaging the image is | |
| #### Anime-Specific Metrics | |
| - Line Quality: Measures clarity and quality of line work | |
| - Color Palette: Evaluates color choices for anime style | |
| - Character Quality: Assesses character design and rendering | |
| - Style Consistency: Measures adherence to anime style conventions | |
| """) | |
| with gr.Row(): | |
| reset_button = gr.Button("Reset All Data") | |
| reset_output = gr.Textbox(label="Reset Status") | |
| reset_button.click( | |
| reset_data, | |
| inputs=[], | |
| outputs=reset_output | |
| ) | |
| return interface | |
| # Create and launch the interface | |
| interface = create_interface() | |
| if __name__ == "__main__": | |
| interface.launch() | |