Spaces:
Runtime error
Runtime error
| #!/usr/bin/env python3 | |
| """Calculates the Kernel Inception Distance (KID) to evalulate GANs | |
| """ | |
| import os | |
| import pathlib | |
| import sys | |
| from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter | |
| import json | |
| import numpy as np | |
| import torch | |
| from sklearn.metrics.pairwise import polynomial_kernel | |
| from scipy import linalg | |
| from PIL import Image | |
| from torch.nn.functional import adaptive_avg_pool2d | |
| import ipdb | |
| try: | |
| from tqdm import tqdm | |
| except ImportError: | |
| # If not tqdm is not available, provide a mock version of it | |
| def tqdm(x): return x | |
| import cv2 | |
| from models.inception import InceptionV3 | |
| from models.lenet import LeNet5 | |
| import glob | |
| import pathlib | |
| def get_activations(files, model, batch_size=50, dims=2048, | |
| cuda=False, verbose=False,reso=128): | |
| """Calculates the activations of the pool_3 layer for all images. | |
| Params: | |
| -- files : List of image files paths | |
| -- model : Instance of inception model | |
| -- batch_size : Batch size of images for the model to process at once. | |
| Make sure that the number of samples is a multiple of | |
| the batch size, otherwise some samples are ignored. This | |
| behavior is retained to match the original FID score | |
| implementation. | |
| -- dims : Dimensionality of features returned by Inception | |
| -- cuda : If set to True, use GPU | |
| -- verbose : If set to True and parameter out_step is given, the number | |
| of calculated batches is reported. | |
| Returns: | |
| -- A numpy array of dimension (num images, dims) that contains the | |
| activations of the given tensor when feeding inception with the | |
| query tensor. | |
| """ | |
| model.eval() | |
| is_numpy = True if type(files[0]) == np.ndarray else False | |
| if len(files) % batch_size != 0: | |
| print(('Warning: number of images is not a multiple of the ' | |
| 'batch size. Some samples are going to be ignored.')) | |
| if batch_size > len(files): | |
| print(('Warning: batch size is bigger than the data size. ' | |
| 'Setting batch size to data size')) | |
| batch_size = len(files) | |
| n_batches = len(files) // batch_size | |
| n_used_imgs = n_batches * batch_size | |
| pred_arr = np.empty((n_used_imgs, dims)) | |
| for i in tqdm(range(n_batches)): | |
| if verbose: | |
| print('\rPropagating batch %d/%d' % (i + 1, n_batches), end='', flush=True) | |
| start = i * batch_size | |
| end = start + batch_size | |
| if is_numpy: | |
| images = np.copy(files[start:end]) + 1 | |
| images /= 2. | |
| else: | |
| images=[] | |
| #ipdb.set_trace() | |
| for f in files[start:end]: | |
| try: | |
| img=cv2.imread(str(f)) | |
| #if img.mean(-1)>254.9: | |
| #img[np.where(img.mean(-1)>254.9)]=0 | |
| img=cv2.resize(img,(reso,reso),interpolation=cv2.INTER_CUBIC) | |
| img=cv2.cvtColor(img,cv2.COLOR_BGR2RGB) | |
| except: | |
| img=cv2.imread(str(files[0])) | |
| #if img.mean(-1)>254.9: | |
| #img[np.where(img.mean(-1)>254.9)]=0 | |
| img=cv2.resize(img,(reso,reso),interpolation=cv2.INTER_CUBIC) | |
| img=cv2.cvtColor(img,cv2.COLOR_BGR2RGB) | |
| print(str(f)) | |
| #ipdb.set_trace() | |
| images.append(img) | |
| #ipdb.set_trace() | |
| #images = [np.array(Image.open(str(f)).convert('RGB')) for f in files[start:end]] | |
| images = np.stack(images).astype(np.float32) / 255. | |
| # Reshape to (n_images, 3, height, width) | |
| images = images.transpose((0, 3, 1, 2)) | |
| #ipdb.set_trace() | |
| batch = torch.from_numpy(images).type(torch.FloatTensor) | |
| if cuda: | |
| batch = batch.cuda() | |
| pred = model(batch)[0] | |
| # If model output is not scalar, apply global spatial average pooling. | |
| # This happens if you choose a dimensionality not equal 2048. | |
| if pred.shape[2] != 1 or pred.shape[3] != 1: | |
| pred = adaptive_avg_pool2d(pred, output_size=(1, 1)) | |
| pred_arr[start:end] = pred.cpu().data.numpy().reshape(batch_size, -1) | |
| if verbose: | |
| print('done', np.min(images)) | |
| return pred_arr | |
| def extract_lenet_features(imgs, net): | |
| net.eval() | |
| feats = [] | |
| imgs = imgs.reshape([-1, 100] + list(imgs.shape[1:])) | |
| if imgs[0].min() < -0.001: | |
| imgs = (imgs + 1)/2.0 | |
| print(imgs.shape, imgs.min(), imgs.max()) | |
| imgs = torch.from_numpy(imgs) | |
| for i, images in enumerate(imgs): | |
| feats.append(net.extract_features(images).detach().cpu().numpy()) | |
| feats = np.vstack(feats) | |
| return feats | |
| def _compute_activations(path, model, batch_size, dims, cuda, model_type,reso,dataset): | |
| sample_name=path.split('/')[-1] | |
| # basepath='/mnt/petrelfs/caoziang/3D_generation/cmetric/kid'+str(reso) | |
| # basepath='/mnt/lustre/yslan/logs/nips23/LSGM/cldm/cmetric/shapenet-outs/kid'+str(reso)+'test'+dataset | |
| # basepath='/mnt/lustre/yslan/logs/nips23/LSGM/cldm/cmetric/shapenet-outs-testTra/kid'+str(reso)+'test'+dataset | |
| # basepath="/mnt/sfs-common/yslan/Repo/3dgen/FID-KID-Outputdir/metrics/kid/gso_gt" | |
| # basepath="/mnt/sfs-common/yslan/Repo/3dgen/FID-KID-Outputdir-free3d/metrics/kid/gso_gt" | |
| basepath="/mnt/sfs-common/yslan/Repo/3dgen/FID-KID-Outputdir-objv/metrics/kid/gso_gt" | |
| os.makedirs(os.path.join(basepath), exist_ok=True) | |
| files=[] | |
| #path = pathlib.Path(path) | |
| #oripath=path | |
| # ! objv dataset | |
| objv_dataset = '/mnt/sfs-common/yslan/Dataset/Obajverse/chunk-jpeg-normal/bs_16_fixsave3/170K/512/' | |
| dataset_json = os.path.join(objv_dataset, 'dataset.json') | |
| with open(dataset_json, 'r') as f: | |
| dataset_json = json.load(f) | |
| # all_objs = dataset_json['Animals'][::3][:6250] | |
| all_objs = dataset_json['Animals'][::3][1100:2200] | |
| all_objs = all_objs[:600] | |
| for obj_folder in tqdm(all_objs): | |
| obj_folder = obj_folder[:-2] # to load 3 chunks | |
| for batch in range(1,4): | |
| for idx in range(8): | |
| files.append(os.path.join(path, obj_folder, str(batch), f'{idx}.jpg')) | |
| # for obj_folder in tqdm(sorted(os.listdir(path))): | |
| # for idx in range(0,25): | |
| # # img_name = os.path.join(path, obj_folder, 'rgba', f'{idx:03}.png') | |
| # img_name = os.path.join(path, obj_folder, 'render_mvs_25', 'model', f'{idx:03}.png') | |
| # files.append(img_name) | |
| ''' | |
| if not os.path.exists(os.path.join(basepath,path.split('/')[-1]+str(reso)+'kid.npy')): | |
| import glob | |
| import pathlib | |
| path = pathlib.Path(path) | |
| if not type(path) == np.ndarray: | |
| files=[] | |
| # load gso | |
| for obj_folder in tqdm(sorted(os.listdir(path))): | |
| for idx in range(0,25): | |
| # for idx in [0]: | |
| img_name = os.path.join(path, obj_folder, 'rgba', f'{idx:03}.png') | |
| files.append(img_name) | |
| if len(files) > 50000: | |
| files = files[:50000] | |
| break | |
| ''' | |
| if model_type == 'inception': | |
| if os.path.exists(os.path.join(basepath,sample_name+str(reso)+'kid.npy')): | |
| act=np.load(os.path.join(basepath,sample_name+str(reso)+'kid.npy')) | |
| print('load_dataset',dataset) | |
| else: | |
| act = get_activations(files, model, batch_size, dims, cuda,reso=reso) | |
| np.save(os.path.join(basepath,sample_name+str(reso)+'kid'),act) | |
| elif model_type == 'lenet': | |
| act = extract_lenet_features(files, model) | |
| #ipdb.set_trace() | |
| return act | |
| def _compute_activations_new(path, model, batch_size, dims, cuda, model_type,reso,dataset): | |
| sample_name=path.split('/')[-1] | |
| # basepath='/mnt/petrelfs/caoziang/3D_generation/cmetric/get3d/kid'+str(reso)+'test'+dataset | |
| # basepath='/mnt/lustre/yslan/logs/nips23/LSGM/cldm/cmetric/shapenet-outs/kid'+str(reso)+'test'+dataset | |
| # basepath='/mnt/sfs-common/yslan/Repo/3dgen/FID-KID-Outputdir/metrics/kid'+str(reso)+'test'+dataset | |
| # if '_cond' in path: | |
| # basepath=basepath+'_cond' | |
| # basepath='/mnt/sfs-common/yslan/Repo/3dgen/FID-KID-Outputdir-objv/metrics/fid/'+str(reso)+dataset | |
| basepath='/mnt/sfs-common/yslan/Repo/3dgen/FID-KID-Outputdir-objv/metrics/kid/'+str(reso)+dataset | |
| # basepath='/mnt/sfs-common/yslan/Repo/3dgen/FID-KID-Outputdir-free3d/metrics/kid/'+str(reso)+dataset | |
| objv_dataset = '/mnt/sfs-common/yslan/Dataset/Obajverse/chunk-jpeg-normal/bs_16_fixsave3/170K/512/' | |
| dataset_json = os.path.join(objv_dataset, 'dataset.json') | |
| with open(dataset_json, 'r') as f: | |
| dataset_json = json.load(f) | |
| all_objs = dataset_json['Animals'][::3][1100:2200][:600] | |
| if not type(path) == np.ndarray: | |
| import glob | |
| import pathlib | |
| #ipdb.set_trace() | |
| files=[] | |
| # path = pathlib.Path(path) | |
| # for classname in os.listdir(path): | |
| # classpath=os.path.join(path,classname) | |
| # #ipdb.set_trace() | |
| # for instance in os.listdir(classpath): | |
| # if os.path.isdir(os.path.join(classpath,instance)): | |
| # img=os.path.join(classpath,instance) | |
| # if 'diffusion' in img: | |
| # files = files+sorted([os.path.join(img, idd) for idd in os.listdir(img) if idd.endswith('ddpm.png')]) | |
| # else: | |
| ''' | |
| for obj_folder in sorted(os.listdir(path)): | |
| if not os.path.isdir(os.path.join(path, obj_folder)): | |
| continue | |
| # for idx in os.listdir(os.path.join(path, obj_folder)): | |
| # for idx in range(0,25,5): | |
| for idx in [0]: | |
| for i in range(10): | |
| # img=os.path.join(path,obj_folder, str(idx),f'{i}.jpg') | |
| if 'GA' in path: | |
| img=os.path.join(path,obj_folder, str(idx),f'sample-0-{i}.jpg') | |
| else: | |
| img=os.path.join(path,obj_folder, str(idx),f'{i}.jpg') | |
| files.append(img) | |
| ''' | |
| # ! objv | |
| for obj_folder in tqdm(all_objs): | |
| obj_folder = '/'.join(obj_folder.split('/')[1:]) | |
| for idx in range(24): | |
| # files.append(os.path.join(path, obj_folder, f'{idx}.jpg')) | |
| if 'Lara' in path: | |
| files.append(os.path.join(path, '/'.join(obj_folder.split('/')[:-1]), '0.jpg', f'{idx}.jpg')) | |
| elif 'GA' in path: | |
| files.append(os.path.join(path, '/'.join(obj_folder.split('/')[:-1]), '0', f'sample-0-{idx}.jpg')) | |
| elif 'scale3d' in path: | |
| files.append(os.path.join(path, '/'.join(obj_folder.split('/')[:-1]), '1', f'{idx}.png')) | |
| elif 'LRM' in path: | |
| files.append(os.path.join(path, '/'.join(obj_folder.split('/')[:-1]), '0', f'{idx}.jpg')) | |
| else: | |
| files.append(os.path.join(path, obj_folder, '0', f'{idx}.jpg')) | |
| # ! gso | |
| # for obj_folder in sorted(os.listdir(path)): | |
| # if obj_folder == 'runs': | |
| # continue | |
| # if not os.path.isdir(os.path.join(path, obj_folder)): | |
| # continue | |
| # for idx in [0]: | |
| # for i in range(24): | |
| # if 'GA' in path: | |
| # img=os.path.join(path,obj_folder, str(idx),f'sample-0-{i}.jpg') | |
| # else: | |
| # img=os.path.join(path,obj_folder, str(idx),f'{i}.jpg') | |
| # # ipdb.set_trace() | |
| # files.append(img) | |
| # for name in os.listdir(path): | |
| # #ipdb.set_trace() | |
| # # if os.path.isdir(os.path.join(path,name)): # ! no cls | |
| # img=os.path.join(path,name) | |
| # files.append(img) # ! directly append | |
| # files = files+sorted([os.path.join(img, idd) for idd in os.listdir(img) if idd.endswith('.png')]) | |
| # ipdb.set_trace() | |
| files=files[:50000] | |
| os.makedirs(os.path.join(basepath), exist_ok=True) | |
| #ipdb.set_trace() | |
| if model_type == 'inception': | |
| if os.path.exists(os.path.join(basepath,sample_name+str(reso)+'kid.npy')): | |
| act=np.load(os.path.join(basepath,sample_name+str(reso)+'kid.npy')) | |
| print('load_sample') | |
| else: | |
| act = get_activations(files, model, batch_size, dims, cuda,reso=reso) | |
| np.save(os.path.join(basepath,sample_name+str(reso)+'kid'),act) | |
| elif model_type == 'lenet': | |
| act = extract_lenet_features(files, model) | |
| #ipdb.set_trace() | |
| return act | |
| def calculate_kid_given_paths(paths, batch_size, cuda, dims, model_type='inception',reso=128,dataset='omni'): | |
| """Calculates the KID of two paths""" | |
| pths = [] | |
| for p in paths: | |
| if not os.path.exists(p): | |
| raise RuntimeError('Invalid path: %s' % p) | |
| if os.path.isdir(p): | |
| pths.append(p) | |
| # elif p.endswith('.npy'): | |
| # np_imgs = np.load(p) | |
| # if np_imgs.shape[0] > 50000: np_imgs = np_imgs[np.random.permutation(np.arange(np_imgs.shape[0]))][:50000] | |
| # pths.append(np_imgs) | |
| if model_type == 'inception': | |
| block_idx = InceptionV3.BLOCK_INDEX_BY_DIM[dims] | |
| model = InceptionV3([block_idx]) | |
| elif model_type == 'lenet': | |
| model = LeNet5() | |
| model.load_state_dict(torch.load('./models/lenet.pth')) | |
| if cuda: | |
| model.cuda() | |
| act_true = _compute_activations(pths[0], model, batch_size, dims, cuda, model_type,reso,dataset) | |
| pths = pths[1:] | |
| results = [] | |
| #ipdb.set_trace() | |
| for j, pth in enumerate(pths): | |
| print(paths[j+1]) | |
| actj = _compute_activations_new(pth, model, batch_size, dims, cuda, model_type,reso,dataset) | |
| #ipdb.set_trace() | |
| kid_values = polynomial_mmd_averages(act_true, actj, n_subsets=100) | |
| results.append((paths[j+1], kid_values[0].mean(), kid_values[0].std())) | |
| return results | |
| def _sqn(arr): | |
| flat = np.ravel(arr) | |
| return flat.dot(flat) | |
| def polynomial_mmd_averages(codes_g, codes_r, n_subsets=50, subset_size=1000, | |
| ret_var=True, output=sys.stdout, **kernel_args): | |
| m = min(codes_g.shape[0], codes_r.shape[0]) | |
| mmds = np.zeros(n_subsets) | |
| if ret_var: | |
| vars = np.zeros(n_subsets) | |
| choice = np.random.choice | |
| #ipdb.set_trace() | |
| with tqdm(range(n_subsets), desc='MMD', file=output) as bar: | |
| for i in bar: | |
| g = codes_g[choice(len(codes_g), subset_size, replace=False)] | |
| r = codes_r[choice(len(codes_r), subset_size, replace=False)] | |
| o = polynomial_mmd(g, r, **kernel_args, var_at_m=m, ret_var=ret_var) | |
| if ret_var: | |
| mmds[i], vars[i] = o | |
| else: | |
| mmds[i] = o | |
| bar.set_postfix({'mean': mmds[:i+1].mean()}) | |
| return (mmds, vars) if ret_var else mmds | |
| def polynomial_mmd(codes_g, codes_r, degree=3, gamma=None, coef0=1, | |
| var_at_m=None, ret_var=True): | |
| # use k(x, y) = (gamma <x, y> + coef0)^degree | |
| # default gamma is 1 / dim | |
| X = codes_g | |
| Y = codes_r | |
| K_XX = polynomial_kernel(X, degree=degree, gamma=gamma, coef0=coef0) | |
| K_YY = polynomial_kernel(Y, degree=degree, gamma=gamma, coef0=coef0) | |
| K_XY = polynomial_kernel(X, Y, degree=degree, gamma=gamma, coef0=coef0) | |
| return _mmd2_and_variance(K_XX, K_XY, K_YY, | |
| var_at_m=var_at_m, ret_var=ret_var) | |
| def _mmd2_and_variance(K_XX, K_XY, K_YY, unit_diagonal=False, | |
| mmd_est='unbiased', block_size=1024, | |
| var_at_m=None, ret_var=True): | |
| # based on | |
| # https://github.com/dougalsutherland/opt-mmd/blob/master/two_sample/mmd.py | |
| # but changed to not compute the full kernel matrix at once | |
| m = K_XX.shape[0] | |
| assert K_XX.shape == (m, m) | |
| assert K_XY.shape == (m, m) | |
| assert K_YY.shape == (m, m) | |
| if var_at_m is None: | |
| var_at_m = m | |
| # Get the various sums of kernels that we'll use | |
| # Kts drop the diagonal, but we don't need to compute them explicitly | |
| if unit_diagonal: | |
| diag_X = diag_Y = 1 | |
| sum_diag_X = sum_diag_Y = m | |
| sum_diag2_X = sum_diag2_Y = m | |
| else: | |
| diag_X = np.diagonal(K_XX) | |
| diag_Y = np.diagonal(K_YY) | |
| sum_diag_X = diag_X.sum() | |
| sum_diag_Y = diag_Y.sum() | |
| sum_diag2_X = _sqn(diag_X) | |
| sum_diag2_Y = _sqn(diag_Y) | |
| Kt_XX_sums = K_XX.sum(axis=1) - diag_X | |
| Kt_YY_sums = K_YY.sum(axis=1) - diag_Y | |
| K_XY_sums_0 = K_XY.sum(axis=0) | |
| K_XY_sums_1 = K_XY.sum(axis=1) | |
| Kt_XX_sum = Kt_XX_sums.sum() | |
| Kt_YY_sum = Kt_YY_sums.sum() | |
| K_XY_sum = K_XY_sums_0.sum() | |
| if mmd_est == 'biased': | |
| mmd2 = ((Kt_XX_sum + sum_diag_X) / (m * m) | |
| + (Kt_YY_sum + sum_diag_Y) / (m * m) | |
| - 2 * K_XY_sum / (m * m)) | |
| else: | |
| assert mmd_est in {'unbiased', 'u-statistic'} | |
| mmd2 = (Kt_XX_sum + Kt_YY_sum) / (m * (m-1)) | |
| if mmd_est == 'unbiased': | |
| mmd2 -= 2 * K_XY_sum / (m * m) | |
| else: | |
| mmd2 -= 2 * (K_XY_sum - np.trace(K_XY)) / (m * (m-1)) | |
| if not ret_var: | |
| return mmd2 | |
| Kt_XX_2_sum = _sqn(K_XX) - sum_diag2_X | |
| Kt_YY_2_sum = _sqn(K_YY) - sum_diag2_Y | |
| K_XY_2_sum = _sqn(K_XY) | |
| dot_XX_XY = Kt_XX_sums.dot(K_XY_sums_1) | |
| dot_YY_YX = Kt_YY_sums.dot(K_XY_sums_0) | |
| m1 = m - 1 | |
| m2 = m - 2 | |
| zeta1_est = ( | |
| 1 / (m * m1 * m2) * ( | |
| _sqn(Kt_XX_sums) - Kt_XX_2_sum + _sqn(Kt_YY_sums) - Kt_YY_2_sum) | |
| - 1 / (m * m1)**2 * (Kt_XX_sum**2 + Kt_YY_sum**2) | |
| + 1 / (m * m * m1) * ( | |
| _sqn(K_XY_sums_1) + _sqn(K_XY_sums_0) - 2 * K_XY_2_sum) | |
| - 2 / m**4 * K_XY_sum**2 | |
| - 2 / (m * m * m1) * (dot_XX_XY + dot_YY_YX) | |
| + 2 / (m**3 * m1) * (Kt_XX_sum + Kt_YY_sum) * K_XY_sum | |
| ) | |
| zeta2_est = ( | |
| 1 / (m * m1) * (Kt_XX_2_sum + Kt_YY_2_sum) | |
| - 1 / (m * m1)**2 * (Kt_XX_sum**2 + Kt_YY_sum**2) | |
| + 2 / (m * m) * K_XY_2_sum | |
| - 2 / m**4 * K_XY_sum**2 | |
| - 4 / (m * m * m1) * (dot_XX_XY + dot_YY_YX) | |
| + 4 / (m**3 * m1) * (Kt_XX_sum + Kt_YY_sum) * K_XY_sum | |
| ) | |
| var_est = (4 * (var_at_m - 2) / (var_at_m * (var_at_m - 1)) * zeta1_est | |
| + 2 / (var_at_m * (var_at_m - 1)) * zeta2_est) | |
| return mmd2, var_est | |
| if __name__ == '__main__': | |
| parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter) | |
| parser.add_argument('--true', type=str, required=True, | |
| help=('Path to the true images')) | |
| parser.add_argument('--fake', type=str, nargs='+', required=True, | |
| help=('Path to the generated images')) | |
| parser.add_argument('--batch-size', type=int, default=100, | |
| help='Batch size to use') | |
| parser.add_argument('--reso', type=int, default=128, | |
| help='Batch size to use') | |
| parser.add_argument('--dims', type=int, default=2048, | |
| choices=list(InceptionV3.BLOCK_INDEX_BY_DIM), | |
| help=('Dimensionality of Inception features to use. ' | |
| 'By default, uses pool3 features')) | |
| parser.add_argument('-c', '--gpu', default='0', type=str, | |
| help='GPU to use (leave blank for CPU only)') | |
| parser.add_argument('--model', default='inception', type=str, | |
| help='inception or lenet') | |
| parser.add_argument('--dataset', default='omni', type=str, | |
| help='inception or lenet') | |
| args = parser.parse_args() | |
| print(args) | |
| #ipdb.set_trace() | |
| os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu | |
| paths = [args.true] + args.fake | |
| results = calculate_kid_given_paths(paths, args.batch_size,True, args.dims, model_type=args.model,reso=args.reso,dataset=args.dataset) | |
| for p, m, s in results: | |
| print('KID (%s): %.6f (%.6f)' % (p, m, s)) | |