Commit 
							
							·
						
						d334358
	
1
								Parent(s):
							
							3d19935
								
Update configuration_aquila.py
Browse files- configuration_aquila.py +15 -0
    	
        configuration_aquila.py
    CHANGED
    
    | @@ -83,6 +83,7 @@ class AquilaConfig(PretrainedConfig): | |
| 83 | 
             
                    intermediate_size=11008,
         | 
| 84 | 
             
                    num_hidden_layers=32,
         | 
| 85 | 
             
                    num_attention_heads=32,
         | 
|  | |
| 86 | 
             
                    hidden_act="silu",
         | 
| 87 | 
             
                    max_position_embeddings=2048,
         | 
| 88 | 
             
                    initializer_range=0.02,
         | 
| @@ -91,7 +92,10 @@ class AquilaConfig(PretrainedConfig): | |
| 91 | 
             
                    pad_token_id=0,
         | 
| 92 | 
             
                    bos_token_id=1,
         | 
| 93 | 
             
                    eos_token_id=2,
         | 
|  | |
| 94 | 
             
                    tie_word_embeddings=False,
         | 
|  | |
|  | |
| 95 | 
             
                    **kwargs,
         | 
| 96 | 
             
                ):
         | 
| 97 | 
             
                    self.vocab_size = vocab_size
         | 
| @@ -99,11 +103,22 @@ class AquilaConfig(PretrainedConfig): | |
| 99 | 
             
                    self.hidden_size = hidden_size
         | 
| 100 | 
             
                    self.intermediate_size = intermediate_size
         | 
| 101 | 
             
                    self.num_hidden_layers = num_hidden_layers
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 102 | 
             
                    self.num_attention_heads = num_attention_heads
         | 
| 103 | 
             
                    self.hidden_act = hidden_act
         | 
| 104 | 
             
                    self.initializer_range = initializer_range
         | 
| 105 | 
             
                    self.rms_norm_eps = rms_norm_eps
         | 
|  | |
| 106 | 
             
                    self.use_cache = use_cache
         | 
|  | |
|  | |
|  | |
| 107 | 
             
                    super().__init__(
         | 
| 108 | 
             
                        pad_token_id=pad_token_id,
         | 
| 109 | 
             
                        bos_token_id=bos_token_id,
         | 
|  | |
| 83 | 
             
                    intermediate_size=11008,
         | 
| 84 | 
             
                    num_hidden_layers=32,
         | 
| 85 | 
             
                    num_attention_heads=32,
         | 
| 86 | 
            +
                    num_key_value_heads=None,
         | 
| 87 | 
             
                    hidden_act="silu",
         | 
| 88 | 
             
                    max_position_embeddings=2048,
         | 
| 89 | 
             
                    initializer_range=0.02,
         | 
|  | |
| 92 | 
             
                    pad_token_id=0,
         | 
| 93 | 
             
                    bos_token_id=1,
         | 
| 94 | 
             
                    eos_token_id=2,
         | 
| 95 | 
            +
                    pretraining_tp=1,
         | 
| 96 | 
             
                    tie_word_embeddings=False,
         | 
| 97 | 
            +
                    rope_theta=10000.0,
         | 
| 98 | 
            +
                    rope_scaling=None,
         | 
| 99 | 
             
                    **kwargs,
         | 
| 100 | 
             
                ):
         | 
| 101 | 
             
                    self.vocab_size = vocab_size
         | 
|  | |
| 103 | 
             
                    self.hidden_size = hidden_size
         | 
| 104 | 
             
                    self.intermediate_size = intermediate_size
         | 
| 105 | 
             
                    self.num_hidden_layers = num_hidden_layers
         | 
| 106 | 
            +
             | 
| 107 | 
            +
                    # for backward compatibility
         | 
| 108 | 
            +
                    if num_key_value_heads is None:
         | 
| 109 | 
            +
                        num_key_value_heads = num_attention_heads
         | 
| 110 | 
            +
             | 
| 111 | 
            +
                    self.num_key_value_heads = num_key_value_heads
         | 
| 112 | 
            +
             | 
| 113 | 
             
                    self.num_attention_heads = num_attention_heads
         | 
| 114 | 
             
                    self.hidden_act = hidden_act
         | 
| 115 | 
             
                    self.initializer_range = initializer_range
         | 
| 116 | 
             
                    self.rms_norm_eps = rms_norm_eps
         | 
| 117 | 
            +
                    self.pretraining_tp = pretraining_tp
         | 
| 118 | 
             
                    self.use_cache = use_cache
         | 
| 119 | 
            +
                    self.rope_theta = rope_theta
         | 
| 120 | 
            +
                    self.rope_scaling = rope_scaling
         | 
| 121 | 
            +
             | 
| 122 | 
             
                    super().__init__(
         | 
| 123 | 
             
                        pad_token_id=pad_token_id,
         | 
| 124 | 
             
                        bos_token_id=bos_token_id,
         | 
