Spaces:
Runtime error
Runtime error
| import argparse | |
| import inspect | |
| from pdb import set_trace as st | |
| from cldm.cldm import ControlledUnetModel, ControlNet | |
| from . import gaussian_diffusion as gd | |
| from .respace import SpacedDiffusion, space_timesteps | |
| from .unet import SuperResModel, UNetModel, EncoderUNetModel, UNetModelWithHint | |
| import torch as th | |
| # from dit.dit_models_xformers import DiT_models, TextCondDiTBlock | |
| from dit.dit_models_xformers import TextCondDiTBlock, ImageCondDiTBlock, FinalLayer | |
| from dit.dit_trilatent import DiT_models as DiT_models_t23d | |
| from dit.dit_i23d import DiT_models as DiT_models_i23d | |
| # if th.cuda.is_available(): | |
| # from xformers.triton import FusedLayerNorm as LayerNorm | |
| NUM_CLASSES = 1000 | |
| def diffusion_defaults(): | |
| """ | |
| Defaults for image and classifier training. | |
| """ | |
| return dict( | |
| learn_sigma=False, | |
| diffusion_steps=1000, | |
| noise_schedule="linear", | |
| standarization_xt=False, | |
| timestep_respacing="", | |
| use_kl=False, | |
| predict_xstart=False, | |
| predict_v=False, | |
| rescale_timesteps=False, | |
| rescale_learned_sigmas=False, | |
| mixed_prediction=False, # ! to assign later | |
| ) | |
| def classifier_defaults(): | |
| """ | |
| Defaults for classifier models. | |
| """ | |
| return dict( | |
| image_size=64, | |
| classifier_use_fp16=False, | |
| classifier_width=128, | |
| classifier_depth=2, | |
| classifier_attention_resolutions="32,16,8", # 16 | |
| classifier_use_scale_shift_norm=True, # False | |
| classifier_resblock_updown=True, # False | |
| classifier_pool="attention", | |
| ) | |
| def control_net_defaults(): | |
| res = dict( | |
| only_mid_control=False, # TODO | |
| control_key='img', | |
| normalize_clip_encoding=False, # zero-shot text inference | |
| scale_clip_encoding=1.0, | |
| cfg_dropout_prob=0.0, # dropout condition for CFG training | |
| cond_key='caption', | |
| ) | |
| return res | |
| def continuous_diffusion_defaults(): | |
| # NVlabs/LSGM/train_vada.py | |
| res = dict( | |
| sde_time_eps=1e-2, | |
| sde_beta_start=0.1, | |
| sde_beta_end=20.0, | |
| sde_sde_type='vpsde', | |
| sde_sigma2_0=0.0, # ? | |
| iw_sample_p='drop_sigma2t_iw', | |
| iw_sample_q='ll_iw', | |
| iw_subvp_like_vp_sde=False, | |
| train_vae=True, | |
| pred_type='eps', # [x0, eps] | |
| # joint_train=False, | |
| p_rendering_loss=False, | |
| unfix_logit=False, | |
| loss_type='eps', | |
| loss_weight='simple', # snr snr_sqrt sigmoid_snr | |
| # train_vae_denoise_rendering=False, | |
| diffusion_ce_anneal=True, | |
| enable_mixing_normal=True, | |
| ) | |
| return res | |
| def model_and_diffusion_defaults(): | |
| """ | |
| Defaults for image training. | |
| """ | |
| res = dict( | |
| # image_size=64, | |
| diffusion_input_size=224, | |
| num_channels=128, | |
| num_res_blocks=2, | |
| num_heads=4, | |
| num_heads_upsample=-1, | |
| num_head_channels=-1, | |
| attention_resolutions="16,8", | |
| channel_mult="", | |
| dropout=0.0, | |
| class_cond=False, | |
| use_checkpoint=False, | |
| use_scale_shift_norm=True, | |
| resblock_updown=False, | |
| use_fp16=False, | |
| use_new_attention_order=False, | |
| denoise_in_channels=3, | |
| denoise_out_channels=3, | |
| # ! controlnet args | |
| create_controlnet=False, | |
| create_dit=False, | |
| i23d=False, | |
| create_unet_with_hint=False, | |
| dit_model_arch='DiT-L/2', | |
| # ! ldm unet support | |
| use_spatial_transformer=False, # custom transformer support | |
| transformer_depth=1, # custom transformer support | |
| context_dim=-1, # custom transformer support | |
| pooling_ctx_dim=-1, | |
| roll_out=False, # whether concat in batch, not channel | |
| n_embed= | |
| None, # custom support for prediction of discrete ids into codebook of first stage vq model | |
| legacy=True, | |
| mixing_logit_init=-6, | |
| hint_channels=3, | |
| # unconditional_guidance_scale=1.0, | |
| # normalize_clip_encoding=False, # for zero-shot conditioning | |
| ) | |
| res.update(diffusion_defaults()) | |
| # res.update(continuous_diffusion_defaults()) | |
| return res | |
| def classifier_and_diffusion_defaults(): | |
| res = classifier_defaults() | |
| res.update(diffusion_defaults()) | |
| return res | |
| def create_model_and_diffusion( | |
| # image_size, | |
| diffusion_input_size, | |
| class_cond, | |
| learn_sigma, | |
| num_channels, | |
| num_res_blocks, | |
| channel_mult, | |
| num_heads, | |
| num_head_channels, | |
| num_heads_upsample, | |
| attention_resolutions, | |
| dropout, | |
| diffusion_steps, | |
| noise_schedule, | |
| timestep_respacing, | |
| use_kl, | |
| predict_xstart, | |
| predict_v, | |
| rescale_timesteps, | |
| rescale_learned_sigmas, | |
| use_checkpoint, | |
| use_scale_shift_norm, | |
| resblock_updown, | |
| use_fp16, | |
| use_new_attention_order, | |
| denoise_in_channels, | |
| denoise_out_channels, | |
| standarization_xt, | |
| mixed_prediction, | |
| # controlnet | |
| create_controlnet, | |
| # only_mid_control, | |
| # control_key, | |
| use_spatial_transformer, | |
| transformer_depth, | |
| context_dim, | |
| pooling_ctx_dim, | |
| n_embed, | |
| legacy, | |
| mixing_logit_init, | |
| create_dit, | |
| i23d, | |
| create_unet_with_hint, | |
| dit_model_arch, | |
| roll_out, | |
| hint_channels, | |
| # unconditional_guidance_scale, | |
| # normalize_clip_encoding, | |
| ): | |
| model = create_model( | |
| diffusion_input_size, | |
| num_channels, | |
| num_res_blocks, | |
| channel_mult=channel_mult, | |
| learn_sigma=learn_sigma, | |
| class_cond=class_cond, | |
| use_checkpoint=use_checkpoint, | |
| attention_resolutions=attention_resolutions, | |
| num_heads=num_heads, | |
| num_head_channels=num_head_channels, | |
| num_heads_upsample=num_heads_upsample, | |
| use_scale_shift_norm=use_scale_shift_norm, | |
| dropout=dropout, | |
| resblock_updown=resblock_updown, | |
| use_fp16=use_fp16, | |
| use_new_attention_order=use_new_attention_order, | |
| denoise_in_channels=denoise_in_channels, | |
| denoise_out_channels=denoise_out_channels, | |
| mixed_prediction=mixed_prediction, | |
| create_controlnet=create_controlnet, | |
| # only_mid_control=only_mid_control, | |
| # control_key=control_key, | |
| use_spatial_transformer=use_spatial_transformer, | |
| transformer_depth=transformer_depth, | |
| context_dim=context_dim, | |
| pooling_ctx_dim=pooling_ctx_dim, | |
| n_embed=n_embed, | |
| legacy=legacy, | |
| mixing_logit_init=mixing_logit_init, | |
| create_dit=create_dit, | |
| i23d=i23d, | |
| create_unet_with_hint=create_unet_with_hint, | |
| dit_model_arch=dit_model_arch, | |
| roll_out=roll_out, | |
| hint_channels=hint_channels, | |
| # normalize_clip_encoding=normalize_clip_encoding, | |
| ) | |
| diffusion = create_gaussian_diffusion( | |
| diffusion_steps=diffusion_steps, | |
| learn_sigma=learn_sigma, | |
| noise_schedule=noise_schedule, | |
| use_kl=use_kl, | |
| predict_xstart=predict_xstart, | |
| predict_v=predict_v, | |
| rescale_timesteps=rescale_timesteps, | |
| rescale_learned_sigmas=rescale_learned_sigmas, | |
| timestep_respacing=timestep_respacing, | |
| standarization_xt=standarization_xt, | |
| ) | |
| return model, diffusion | |
| def create_model( | |
| image_size, | |
| num_channels, | |
| num_res_blocks, | |
| channel_mult="", | |
| learn_sigma=False, | |
| class_cond=False, | |
| use_checkpoint=False, | |
| attention_resolutions="16", | |
| num_heads=1, | |
| num_head_channels=-1, | |
| num_heads_upsample=-1, | |
| use_scale_shift_norm=False, | |
| dropout=0, | |
| resblock_updown=False, | |
| use_fp16=False, | |
| use_new_attention_order=False, | |
| # denoise_in_channels=3, | |
| denoise_in_channels=-1, | |
| denoise_out_channels=3, | |
| mixed_prediction=False, | |
| create_controlnet=False, | |
| create_dit=False, | |
| # t23d=True, | |
| i23d=False, | |
| create_unet_with_hint=False, | |
| dit_model_arch='DiT-L/2', | |
| hint_channels=3, | |
| use_spatial_transformer=False, # custom transformer support | |
| transformer_depth=1, # custom transformer support | |
| context_dim=None, # custom transformer support | |
| pooling_ctx_dim=768, | |
| n_embed=None, # custom support for prediction of discrete ids into codebook of first stage vq model | |
| legacy=True, | |
| mixing_logit_init=-6, | |
| roll_out=False, | |
| # normalize_clip_encoding=False, | |
| ): | |
| if channel_mult == "": | |
| if image_size == 512: | |
| channel_mult = (0.5, 1, 1, 2, 2, 4, 4) | |
| elif image_size == 448: | |
| channel_mult = (0.5, 1, 1, 2, 2, 4, 4) | |
| elif image_size == 320: # ffhq | |
| channel_mult = (0.5, 1, 1, 2, 2, 4, 4) | |
| elif image_size == 224 and denoise_in_channels == 144: # ffhq | |
| channel_mult = (1, 1, 2, 3, 4, 4) | |
| elif image_size == 224: | |
| channel_mult = (1, 1, 2, 2, 4, 4) | |
| elif image_size == 256: | |
| channel_mult = (1, 1, 2, 2, 4, 4) | |
| elif image_size == 128: | |
| channel_mult = (1, 1, 2, 3, 4) | |
| elif image_size == 64: | |
| channel_mult = (1, 2, 3, 4) | |
| elif image_size == 32: # https://github.com/CompVis/latent-diffusion/blob/a506df5756472e2ebaf9078affdde2c4f1502cd4/configs/latent-diffusion/lsun_churches-ldm-kl-8.yaml#L37 | |
| channel_mult = (1, 2, 4, 4) | |
| elif image_size == 16: # B,12,16,16. just for baseline check. not good performance. | |
| channel_mult = (1, 2, 3, 4) | |
| else: | |
| raise ValueError(f"unsupported image size: {image_size}") | |
| else: | |
| channel_mult = tuple( | |
| int(ch_mult) for ch_mult in channel_mult.split(",")) | |
| attention_ds = [] | |
| for res in attention_resolutions.split(","): | |
| attention_ds.append(image_size // int(res)) | |
| if create_controlnet: | |
| controlledUnetModel = ControlledUnetModel( | |
| image_size=image_size, | |
| in_channels=denoise_in_channels, | |
| model_channels=num_channels, | |
| # out_channels=(3 if not learn_sigma else 6), | |
| out_channels=(denoise_out_channels | |
| if not learn_sigma else denoise_out_channels * 2), | |
| num_res_blocks=num_res_blocks, | |
| attention_resolutions=tuple(attention_ds), | |
| dropout=dropout, | |
| channel_mult=channel_mult, | |
| num_classes=(NUM_CLASSES if class_cond else None), | |
| use_checkpoint=use_checkpoint, | |
| use_fp16=use_fp16, | |
| num_heads=num_heads, | |
| num_head_channels=num_head_channels, | |
| num_heads_upsample=num_heads_upsample, | |
| use_scale_shift_norm=use_scale_shift_norm, | |
| resblock_updown=resblock_updown, | |
| use_new_attention_order=use_new_attention_order, | |
| mixed_prediction=mixed_prediction, | |
| # ldm support | |
| use_spatial_transformer=use_spatial_transformer, | |
| transformer_depth=transformer_depth, | |
| context_dim=context_dim, | |
| pooling_ctx_dim=pooling_ctx_dim, | |
| n_embed=n_embed, | |
| legacy=legacy, | |
| mixing_logit_init=mixing_logit_init, | |
| roll_out=roll_out | |
| ) | |
| controlNet = ControlNet( | |
| image_size=image_size, | |
| in_channels=denoise_in_channels, | |
| model_channels=num_channels, | |
| # ! condition channels | |
| hint_channels=hint_channels, | |
| # out_channels=(3 if not learn_sigma else 6), | |
| # out_channels=(denoise_out_channels | |
| # if not learn_sigma else denoise_out_channels * 2), | |
| num_res_blocks=num_res_blocks, | |
| attention_resolutions=tuple(attention_ds), | |
| dropout=dropout, | |
| channel_mult=channel_mult, | |
| # num_classes=(NUM_CLASSES if class_cond else None), | |
| use_checkpoint=use_checkpoint, | |
| use_fp16=use_fp16, | |
| num_heads=num_heads, | |
| num_head_channels=num_head_channels, | |
| num_heads_upsample=num_heads_upsample, | |
| use_scale_shift_norm=use_scale_shift_norm, | |
| resblock_updown=resblock_updown, | |
| use_new_attention_order=use_new_attention_order, | |
| roll_out=roll_out | |
| ) | |
| # mixed_prediction=mixed_prediction) | |
| return controlledUnetModel, controlNet | |
| elif create_dit: | |
| # if i23d | |
| if i23d: | |
| return DiT_models_i23d[dit_model_arch]( | |
| input_size=image_size, | |
| num_classes=0, | |
| learn_sigma=learn_sigma, | |
| in_channels=denoise_in_channels, | |
| context_dim=context_dim, # add CLIP text embedding | |
| roll_out=roll_out, | |
| # vit_blk=ImageCondDiTBlock, | |
| pooling_ctx_dim=pooling_ctx_dim,) | |
| else: # t23d | |
| return DiT_models_t23d[dit_model_arch]( | |
| input_size=image_size, | |
| num_classes=0, | |
| learn_sigma=learn_sigma, | |
| in_channels=denoise_in_channels, | |
| context_dim=context_dim, # add CLIP text embedding | |
| roll_out=roll_out, | |
| # vit_blk=TextCondDiTBlock | |
| ) | |
| else: | |
| if create_unet_with_hint: | |
| unet_cls = UNetModelWithHint | |
| else: | |
| unet_cls = UNetModel | |
| # st() | |
| return unet_cls( | |
| image_size=image_size, | |
| in_channels=denoise_in_channels, | |
| model_channels=num_channels, | |
| # out_channels=(3 if not learn_sigma else 6), | |
| out_channels=(denoise_out_channels | |
| if not learn_sigma else denoise_out_channels * 2), | |
| num_res_blocks=num_res_blocks, | |
| attention_resolutions=tuple(attention_ds), | |
| dropout=dropout, | |
| channel_mult=channel_mult, | |
| num_classes=(NUM_CLASSES if class_cond else None), | |
| use_checkpoint=use_checkpoint, | |
| use_fp16=use_fp16, | |
| num_heads=num_heads, | |
| num_head_channels=num_head_channels, | |
| num_heads_upsample=num_heads_upsample, | |
| use_scale_shift_norm=use_scale_shift_norm, | |
| resblock_updown=resblock_updown, | |
| use_new_attention_order=use_new_attention_order, | |
| mixed_prediction=mixed_prediction, | |
| # ldm support | |
| use_spatial_transformer=use_spatial_transformer, | |
| transformer_depth=transformer_depth, | |
| context_dim=context_dim, | |
| pooling_ctx_dim=pooling_ctx_dim, | |
| n_embed=n_embed, | |
| legacy=legacy, | |
| mixing_logit_init=mixing_logit_init, | |
| roll_out=roll_out, | |
| hint_channels=hint_channels, | |
| # normalize_clip_encoding=normalize_clip_encoding, | |
| ) | |
| def create_classifier_and_diffusion( | |
| image_size, | |
| classifier_use_fp16, | |
| classifier_width, | |
| classifier_depth, | |
| classifier_attention_resolutions, | |
| classifier_use_scale_shift_norm, | |
| classifier_resblock_updown, | |
| classifier_pool, | |
| learn_sigma, | |
| diffusion_steps, | |
| noise_schedule, | |
| timestep_respacing, | |
| use_kl, | |
| predict_xstart, | |
| rescale_timesteps, | |
| rescale_learned_sigmas, | |
| ): | |
| classifier = create_classifier( | |
| image_size, | |
| classifier_use_fp16, | |
| classifier_width, | |
| classifier_depth, | |
| classifier_attention_resolutions, | |
| classifier_use_scale_shift_norm, | |
| classifier_resblock_updown, | |
| classifier_pool, | |
| ) | |
| diffusion = create_gaussian_diffusion( | |
| steps=diffusion_steps, | |
| learn_sigma=learn_sigma, | |
| noise_schedule=noise_schedule, | |
| use_kl=use_kl, | |
| predict_xstart=predict_xstart, | |
| rescale_timesteps=rescale_timesteps, | |
| rescale_learned_sigmas=rescale_learned_sigmas, | |
| timestep_respacing=timestep_respacing, | |
| ) | |
| return classifier, diffusion | |
| def create_classifier( | |
| image_size, | |
| classifier_use_fp16, | |
| classifier_width, | |
| classifier_depth, | |
| classifier_attention_resolutions, | |
| classifier_use_scale_shift_norm, | |
| classifier_resblock_updown, | |
| classifier_pool, | |
| ): | |
| if image_size == 512: | |
| channel_mult = (0.5, 1, 1, 2, 2, 4, 4) | |
| elif image_size == 256: | |
| channel_mult = (1, 1, 2, 2, 4, 4) | |
| elif image_size == 128: | |
| channel_mult = (1, 1, 2, 3, 4) | |
| elif image_size == 64: | |
| channel_mult = (1, 2, 3, 4) | |
| else: | |
| raise ValueError(f"unsupported image size: {image_size}") | |
| attention_ds = [] | |
| for res in classifier_attention_resolutions.split(","): | |
| attention_ds.append(image_size // int(res)) | |
| return EncoderUNetModel( | |
| image_size=image_size, | |
| in_channels=3, | |
| model_channels=classifier_width, | |
| out_channels=1000, | |
| num_res_blocks=classifier_depth, | |
| attention_resolutions=tuple(attention_ds), | |
| channel_mult=channel_mult, | |
| use_fp16=classifier_use_fp16, | |
| num_head_channels=64, | |
| use_scale_shift_norm=classifier_use_scale_shift_norm, | |
| resblock_updown=classifier_resblock_updown, | |
| pool=classifier_pool, | |
| ) | |
| def sr_model_and_diffusion_defaults(): | |
| res = model_and_diffusion_defaults() | |
| res["large_size"] = 256 | |
| res["small_size"] = 64 | |
| arg_names = inspect.getfullargspec(sr_create_model_and_diffusion)[0] | |
| for k in res.copy().keys(): | |
| if k not in arg_names: | |
| del res[k] | |
| return res | |
| def sr_create_model_and_diffusion( | |
| large_size, | |
| small_size, | |
| class_cond, | |
| learn_sigma, | |
| num_channels, | |
| num_res_blocks, | |
| num_heads, | |
| num_head_channels, | |
| num_heads_upsample, | |
| attention_resolutions, | |
| dropout, | |
| diffusion_steps, | |
| noise_schedule, | |
| timestep_respacing, | |
| use_kl, | |
| predict_xstart, | |
| rescale_timesteps, | |
| rescale_learned_sigmas, | |
| use_checkpoint, | |
| use_scale_shift_norm, | |
| resblock_updown, | |
| use_fp16, | |
| ): | |
| model = sr_create_model( | |
| large_size, | |
| small_size, | |
| num_channels, | |
| num_res_blocks, | |
| learn_sigma=learn_sigma, | |
| class_cond=class_cond, | |
| use_checkpoint=use_checkpoint, | |
| attention_resolutions=attention_resolutions, | |
| num_heads=num_heads, | |
| num_head_channels=num_head_channels, | |
| num_heads_upsample=num_heads_upsample, | |
| use_scale_shift_norm=use_scale_shift_norm, | |
| dropout=dropout, | |
| resblock_updown=resblock_updown, | |
| use_fp16=use_fp16, | |
| ) | |
| diffusion = create_gaussian_diffusion( | |
| steps=diffusion_steps, | |
| learn_sigma=learn_sigma, | |
| noise_schedule=noise_schedule, | |
| use_kl=use_kl, | |
| predict_xstart=predict_xstart, | |
| rescale_timesteps=rescale_timesteps, | |
| rescale_learned_sigmas=rescale_learned_sigmas, | |
| timestep_respacing=timestep_respacing, | |
| ) | |
| return model, diffusion | |
| def sr_create_model( | |
| large_size, | |
| small_size, | |
| num_channels, | |
| num_res_blocks, | |
| learn_sigma, | |
| class_cond, | |
| use_checkpoint, | |
| attention_resolutions, | |
| num_heads, | |
| num_head_channels, | |
| num_heads_upsample, | |
| use_scale_shift_norm, | |
| dropout, | |
| resblock_updown, | |
| use_fp16, | |
| ): | |
| _ = small_size # hack to prevent unused variable | |
| if large_size == 512: | |
| channel_mult = (1, 1, 2, 2, 4, 4) | |
| elif large_size == 256: | |
| channel_mult = (1, 1, 2, 2, 4, 4) | |
| elif large_size == 64: | |
| channel_mult = (1, 2, 3, 4) | |
| else: | |
| raise ValueError(f"unsupported large size: {large_size}") | |
| attention_ds = [] | |
| for res in attention_resolutions.split(","): | |
| attention_ds.append(large_size // int(res)) | |
| return SuperResModel( | |
| image_size=large_size, | |
| in_channels=3, | |
| model_channels=num_channels, | |
| out_channels=(3 if not learn_sigma else 6), | |
| num_res_blocks=num_res_blocks, | |
| attention_resolutions=tuple(attention_ds), | |
| dropout=dropout, | |
| channel_mult=channel_mult, | |
| num_classes=(NUM_CLASSES if class_cond else None), | |
| use_checkpoint=use_checkpoint, | |
| num_heads=num_heads, | |
| num_head_channels=num_head_channels, | |
| num_heads_upsample=num_heads_upsample, | |
| use_scale_shift_norm=use_scale_shift_norm, | |
| resblock_updown=resblock_updown, | |
| use_fp16=use_fp16, | |
| ) | |
| def create_gaussian_diffusion( | |
| *, | |
| diffusion_steps=1000, | |
| learn_sigma=False, | |
| sigma_small=False, | |
| noise_schedule="linear", | |
| use_kl=False, | |
| predict_xstart=False, | |
| predict_v=False, | |
| rescale_timesteps=False, | |
| rescale_learned_sigmas=False, | |
| timestep_respacing="", | |
| standarization_xt=False, | |
| ): | |
| betas = gd.get_named_beta_schedule(noise_schedule, diffusion_steps) | |
| if use_kl: | |
| loss_type = gd.LossType.RESCALED_KL | |
| elif rescale_learned_sigmas: | |
| loss_type = gd.LossType.RESCALED_MSE | |
| else: | |
| loss_type = gd.LossType.MSE # * used here. | |
| if not timestep_respacing: | |
| timestep_respacing = [diffusion_steps] | |
| if predict_xstart: | |
| model_mean_type = gd.ModelMeanType.START_X | |
| elif predict_v: | |
| model_mean_type = gd.ModelMeanType.V | |
| else: | |
| model_mean_type = gd.ModelMeanType.EPSILON | |
| # model_mean_type=( | |
| # gd.ModelMeanType.EPSILON if not predict_xstart else | |
| # gd.ModelMeanType.START_X # * used gd.ModelMeanType.EPSILON | |
| # ), | |
| return SpacedDiffusion( | |
| use_timesteps=space_timesteps(diffusion_steps, timestep_respacing), | |
| betas=betas, | |
| model_mean_type=model_mean_type, | |
| # ( | |
| # gd.ModelMeanType.EPSILON if not predict_xstart else | |
| # gd.ModelMeanType.START_X # * used gd.ModelMeanType.EPSILON | |
| # ), | |
| model_var_type=(( | |
| gd.ModelVarType.FIXED_LARGE # * used here | |
| if not sigma_small else gd.ModelVarType.FIXED_SMALL) | |
| if not learn_sigma else gd.ModelVarType.LEARNED_RANGE), | |
| loss_type=loss_type, | |
| rescale_timesteps=rescale_timesteps, | |
| standarization_xt=standarization_xt, | |
| ) | |
| def add_dict_to_argparser(parser, default_dict): | |
| for k, v in default_dict.items(): | |
| v_type = type(v) | |
| if v is None: | |
| v_type = str | |
| elif isinstance(v, bool): | |
| v_type = str2bool | |
| parser.add_argument(f"--{k}", default=v, type=v_type) | |
| def args_to_dict(args, keys): | |
| return {k: getattr(args, k) for k in keys} | |
| def str2bool(v): | |
| """ | |
| https://stackoverflow.com/questions/15008758/parsing-boolean-values-with-argparse | |
| """ | |
| if isinstance(v, bool): | |
| return v | |
| if v.lower() in ("yes", "true", "t", "y", "1"): | |
| return True | |
| elif v.lower() in ("no", "false", "f", "n", "0"): | |
| return False | |
| else: | |
| raise argparse.ArgumentTypeError("boolean value expected") | |