Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Update dataset configurations and descriptions for COCO 2014 and SA-1B; refactor file pattern generation in examples.py
ab4ab85
| # Change these values to match your dataset structure if loading locally or from a different source. | |
| # IMPORTANT: When running from docker more setup is required (e.g. on Huggingface) | |
| ABS_DATASET_DOMAIN = "https://dl.fbaipublicfiles.com" | |
| # Sample dataset domain and path for local loading | |
| # Some more configuration may be required to load examples from local files. | |
| # ABS_DATASET_DOMAIN = "./data" | |
| ABS_DATASET_PATH = f"{ABS_DATASET_DOMAIN}/omnisealbench/" | |
| DATASET_CONFIGS = { | |
| "voxpopuli_1k/audio": { | |
| "type": "audio", | |
| "path": ABS_DATASET_PATH, | |
| "first_cols": [ | |
| "snr", | |
| "sisnr", | |
| "stoi", | |
| "pesq", | |
| ], | |
| "attack_scores": [ | |
| "bit_acc", | |
| "log10_p_value", | |
| "TPR", | |
| "FPR", | |
| ], | |
| "categories": { | |
| "speed": "Time", | |
| "updownresample": "Time", | |
| "echo": "Time", | |
| "random_noise": "Amplitude", | |
| "lowpass_filter": "Amplitude", | |
| "highpass_filter": "Amplitude", | |
| "bandpass_filter": "Amplitude", | |
| "smooth": "Amplitude", | |
| "boost_audio": "Amplitude", | |
| "duck_audio": "Amplitude", | |
| "shush": "Amplitude", | |
| "pink_noise": "Amplitude", | |
| "aac_compression": "Compression", | |
| "mp3_compression": "Compression", | |
| }, | |
| "attacks_with_variations": [ | |
| "random_noise", | |
| "lowpass_filter", | |
| "highpass_filter", | |
| "boost_audio", | |
| "duck_audio", | |
| "shush", | |
| ], | |
| }, | |
| "ravdess_1k/audio": { | |
| "type": "audio", | |
| "path": ABS_DATASET_PATH, | |
| "first_cols": ["snr", "sisnr", "stoi", "pesq"], | |
| "attack_scores": ["bit_acc", "log10_p_value", "TPR", "FPR"], | |
| "categories": { | |
| "speed": "Time", | |
| "updownresample": "Time", | |
| "echo": "Time", | |
| "random_noise": "Amplitude", | |
| "lowpass_filter": "Amplitude", | |
| "highpass_filter": "Amplitude", | |
| "bandpass_filter": "Amplitude", | |
| "smooth": "Amplitude", | |
| "boost_audio": "Amplitude", | |
| "duck_audio": "Amplitude", | |
| "shush": "Amplitude", | |
| "pink_noise": "Amplitude", | |
| "aac_compression": "Compression", | |
| "mp3_compression": "Compression", | |
| }, | |
| "attacks_with_variations": [ | |
| "random_noise", | |
| "lowpass_filter", | |
| "highpass_filter", | |
| "boost_audio", | |
| "duck_audio", | |
| "shush", | |
| ], | |
| }, | |
| "val2014_1k_v2/image": { | |
| "type": "image", | |
| "path": ABS_DATASET_PATH, | |
| "first_cols": ["psnr", "ssim", "lpips", "decoder_time"], | |
| "attack_scores": ["bit_acc", "log10_p_value", "TPR", "FPR"], | |
| "categories": { | |
| "proportion": "Geometric", | |
| "collage": "Inpainting", | |
| "center_crop": "Geometric", | |
| "rotate": "Geometric", | |
| "jpeg": "Compression", | |
| "brightness": "Visual", | |
| "contrast": "Visual", | |
| "saturation": "Visual", | |
| "sharpness": "Visual", | |
| "resize": "Geometric", | |
| "overlay_text": "Inpainting", | |
| "hflip": "Geometric", | |
| "perspective": "Geometric", | |
| "median_filter": "Visual", | |
| "hue": "Visual", | |
| "gaussian_blur": "Visual", | |
| "comb": "Mixed", | |
| "avg": "Averages", | |
| "none": "Baseline", | |
| }, | |
| "attacks_with_variations": [ | |
| "center_crop", | |
| "jpeg", | |
| "brightness", | |
| "contrast", | |
| "saturation", | |
| "sharpness", | |
| "resize", | |
| "perspective", | |
| "median_filter", | |
| "hue", | |
| "gaussian_blur", | |
| ], | |
| }, | |
| "sa_1b_val_1k/image": { | |
| "type": "image", | |
| "path": ABS_DATASET_PATH, | |
| "first_cols": ["psnr", "ssim", "lpips", "decoder_time"], | |
| "attack_scores": ["bit_acc", "log10_p_value", "TPR", "FPR"], | |
| "categories": { | |
| "proportion": "Geometric", | |
| "collage": "Inpainting", | |
| "center_crop": "Geometric", | |
| "rotate": "Geometric", | |
| "jpeg": "Compression", | |
| "brightness": "Visual", | |
| "contrast": "Visual", | |
| "saturation": "Visual", | |
| "sharpness": "Visual", | |
| "resize": "Geometric", | |
| "overlay_text": "Inpainting", | |
| "hflip": "Geometric", | |
| "perspective": "Geometric", | |
| "median_filter": "Visual", | |
| "hue": "Visual", | |
| "gaussian_blur": "Visual", | |
| "comb": "Mixed", | |
| "avg": "Averages", | |
| "none": "Baseline", | |
| }, | |
| "attacks_with_variations": [ | |
| "center_crop", | |
| "jpeg", | |
| "brightness", | |
| "contrast", | |
| "saturation", | |
| "sharpness", | |
| "resize", | |
| "perspective", | |
| "median_filter", | |
| "hue", | |
| "gaussian_blur", | |
| ], | |
| }, | |
| "sav_val_full_v2/video": { | |
| "type": "video", | |
| "path": ABS_DATASET_PATH, | |
| "first_cols": ["psnr", "ssim", "msssim", "lpips", "vmaf", "decoder_time"], | |
| "attack_scores": ["bit_acc", "log10_p_value", "TPR", "FPR"], | |
| "categories": { | |
| "HorizontalFlip": "Geometric", | |
| "Rotate": "Geometric", | |
| "Resize": "Geometric", | |
| "Crop": "Geometric", | |
| "Perspective": "Geometric", | |
| "Brightness": "Visual", | |
| "Contrast": "Visual", | |
| "Saturation": "Visual", | |
| "Grayscale": "Visual", | |
| "Hue": "Visual", | |
| "JPEG": "Compression", | |
| "GaussianBlur": "Visual", | |
| "MedianFilter": "Visual", | |
| "H264": "Compression", | |
| "H264rgb": "Compression", | |
| "H265": "Compression", | |
| "VP9": "Compression", | |
| "H264_Crop_Brightness0": "Mixed", | |
| "H264_Crop_Brightness1": "Mixed", | |
| "H264_Crop_Brightness2": "Mixed", | |
| "H264_Crop_Brightness3": "Mixed", | |
| }, | |
| "attacks_with_variations": [ | |
| "Rotate", | |
| "Resize", | |
| "Crop", | |
| "Brightness", | |
| "Contrast", | |
| "Saturation", | |
| "H264", | |
| "H264rgb", | |
| "H265", | |
| ], | |
| }, | |
| } | |
| EXAMPLE_CONFIGS = { | |
| "audio": { | |
| "type": "audio", | |
| "dataset_name": "voxpopuli_1k", | |
| "path": ABS_DATASET_PATH, | |
| "db_key": "voxpopuli", | |
| }, | |
| # "image": { | |
| # "dataset_name": "val2014_1k_v2", | |
| # "path": ABS_DATASET_PATH, | |
| # "db_key": "local_val2014", | |
| # }, | |
| "image": { | |
| "dataset_name": "sa_1b_val_1k", | |
| "path": ABS_DATASET_PATH, | |
| "db_key": "local_valid", | |
| }, | |
| "video": { | |
| "dataset_name": "sav_val_full_v2", | |
| "path": ABS_DATASET_PATH, | |
| "db_key": "sa-v_sav_val_videos", | |
| }, | |
| } | |
| def get_datasets(): | |
| grouped = {"audio": [], "image": [], "video": []} | |
| for name, cfg in DATASET_CONFIGS.items(): | |
| dtype = cfg.get("type") | |
| if dtype in grouped: | |
| grouped[dtype].append(name) | |
| return grouped | |
| def get_example_config(type): | |
| if type in EXAMPLE_CONFIGS: | |
| return EXAMPLE_CONFIGS[type] | |
| else: | |
| raise ValueError(f"Unknown example type: {type}") | |
| def get_dataset_config(dataset_name): | |
| if dataset_name in DATASET_CONFIGS: | |
| return DATASET_CONFIGS[dataset_name] | |
| else: | |
| raise ValueError(f"Unknown dataset: {dataset_name}") | |