| """BD3LM config for Hugging Face. | |
| """ | |
| import transformers | |
| class BD3LMConfig(transformers.PretrainedConfig): | |
| """Hugging Face configuration class for BD3LM.""" | |
| model_type = "bd3lm" | |
| def __init__( | |
| self, | |
| block_size: int = 1, | |
| vocab_size: int = 4107, | |
| model_length: int = 1024, | |
| cross_attn: bool = True, | |
| adaln: bool = True, | |
| attn_backend: str = 'flex', | |
| causal: bool = False, | |
| hidden_dim: int = 768, | |
| cond_dim: int = 129, | |
| n_blocks: int = 12, | |
| n_heads: int = 12, | |
| dropout: float = 0.1, | |
| time_conditioning: bool = False, | |
| var_min: bool = True, | |
| sampling_eps_min: float = 1e-3, | |
| sampling_eps_max: float = 0.999, | |
| ** kwargs): | |
| super().__init__(**kwargs) | |
| self.block_size = block_size | |
| self.cross_attn = cross_attn | |
| self.adaln = adaln | |
| self.attn_backend = attn_backend | |
| self.causal = causal | |
| self.vocab_size = vocab_size | |
| self.model_length = model_length | |
| self.hidden_dim = hidden_dim | |
| self.cond_dim = cond_dim | |
| self.n_blocks = n_blocks | |
| self.n_heads = n_heads | |
| self.dropout = dropout | |
| self.time_conditioning = time_conditioning | |
| self.var_min = var_min | |
| self.sampling_eps_min = sampling_eps_min | |
| self.sampling_eps_max = sampling_eps_max | |