metascroy commited on
Commit
1028a93
·
verified ·
1 Parent(s): 32fb0bf

Delete config.json

Browse files
Files changed (1) hide show
  1. config.json +0 -223
config.json DELETED
@@ -1,223 +0,0 @@
1
- {
2
- "architectures": [
3
- "Phi3ForCausalLM"
4
- ],
5
- "attention_bias": false,
6
- "attention_dropout": 0.0,
7
- "auto_map": {
8
- "AutoConfig": "microsoft/Phi-4-mini-instruct--configuration_phi3.Phi3Config",
9
- "AutoModelForCausalLM": "microsoft/Phi-4-mini-instruct--modeling_phi3.Phi3ForCausalLM",
10
- "AutoTokenizer": "microsoft/Phi-4-mini-instruct--Xenova/gpt-4o"
11
- },
12
- "bos_token_id": 199999,
13
- "embd_pdrop": 0.0,
14
- "eos_token_id": 199999,
15
- "full_attn_mod": 1,
16
- "hidden_act": "silu",
17
- "hidden_size": 3072,
18
- "initializer_range": 0.02,
19
- "intermediate_size": 8192,
20
- "interpolate_factor": 1,
21
- "lm_head_bias": false,
22
- "max_position_embeddings": 131072,
23
- "mlp_bias": false,
24
- "model_type": "phi3",
25
- "num_attention_heads": 24,
26
- "num_hidden_layers": 32,
27
- "num_key_value_heads": 8,
28
- "original_max_position_embeddings": 4096,
29
- "pad_token_id": 199999,
30
- "partial_rotary_factor": 0.75,
31
- "quantization_config": {
32
- "include_input_output_embeddings": false,
33
- "modules_to_not_convert": [],
34
- "quant_method": "torchao",
35
- "quant_type": {
36
- "default": {
37
- "_data": {
38
- "module_fqn_to_config": {
39
- "_default": {
40
- "_data": {
41
- "act_mapping_type": {
42
- "_data": "ASYMMETRIC",
43
- "_type": "MappingType"
44
- },
45
- "layout": {
46
- "_data": {},
47
- "_type": "QDQLayout",
48
- "_version": 1
49
- },
50
- "weight_dtype": {
51
- "_data": "int4",
52
- "_type": "torch.dtype"
53
- },
54
- "weight_granularity": {
55
- "_data": {
56
- "group_size": 32
57
- },
58
- "_type": "PerGroup",
59
- "_version": 1
60
- },
61
- "weight_mapping_type": {
62
- "_data": "SYMMETRIC",
63
- "_type": "MappingType"
64
- },
65
- "weight_scale_dtype": {
66
- "_data": "bfloat16",
67
- "_type": "torch.dtype"
68
- }
69
- },
70
- "_type": "Int8DynamicActivationIntxWeightConfig",
71
- "_version": 1
72
- },
73
- "model.embed_tokens": {
74
- "_data": {
75
- "granularity": {
76
- "_data": {
77
- "axis": 0
78
- },
79
- "_type": "PerAxis",
80
- "_version": 1
81
- },
82
- "layout": {
83
- "_data": {},
84
- "_type": "QDQLayout",
85
- "_version": 1
86
- },
87
- "mapping_type": {
88
- "_data": "SYMMETRIC",
89
- "_type": "MappingType"
90
- },
91
- "scale_dtype": null,
92
- "weight_dtype": {
93
- "_data": "int8",
94
- "_type": "torch.dtype"
95
- }
96
- },
97
- "_type": "IntxWeightOnlyConfig",
98
- "_version": 1
99
- }
100
- }
101
- },
102
- "_type": "ModuleFqnToConfig",
103
- "_version": 1
104
- }
105
- },
106
- "quant_type_kwargs": {
107
- "include_embedding": true
108
- },
109
- "untie_embedding_weights": true
110
- },
111
- "resid_pdrop": 0.0,
112
- "rms_norm_eps": 1e-05,
113
- "rope_scaling": {
114
- "long_factor": [
115
- 1,
116
- 1.118320672,
117
- 1.250641126,
118
- 1.398617824,
119
- 1.564103225,
120
- 1.74916897,
121
- 1.956131817,
122
- 2.187582649,
123
- 2.446418898,
124
- 2.735880826,
125
- 3.059592084,
126
- 3.421605075,
127
- 3.826451687,
128
- 4.279200023,
129
- 4.785517845,
130
- 5.351743533,
131
- 5.984965424,
132
- 6.693110555,
133
- 7.485043894,
134
- 8.370679318,
135
- 9.36110372,
136
- 10.4687158,
137
- 11.70738129,
138
- 13.09260651,
139
- 14.64173252,
140
- 16.37415215,
141
- 18.31155283,
142
- 20.47818807,
143
- 22.90118105,
144
- 25.61086418,
145
- 28.64115884,
146
- 32.03,
147
- 32.1,
148
- 32.13,
149
- 32.23,
150
- 32.6,
151
- 32.61,
152
- 32.64,
153
- 32.66,
154
- 32.7,
155
- 32.71,
156
- 32.93,
157
- 32.97,
158
- 33.28,
159
- 33.49,
160
- 33.5,
161
- 44.16,
162
- 47.77
163
- ],
164
- "short_factor": [
165
- 1.0,
166
- 1.0,
167
- 1.0,
168
- 1.0,
169
- 1.0,
170
- 1.0,
171
- 1.0,
172
- 1.0,
173
- 1.0,
174
- 1.0,
175
- 1.0,
176
- 1.0,
177
- 1.0,
178
- 1.0,
179
- 1.0,
180
- 1.0,
181
- 1.0,
182
- 1.0,
183
- 1.0,
184
- 1.0,
185
- 1.0,
186
- 1.0,
187
- 1.0,
188
- 1.0,
189
- 1.0,
190
- 1.0,
191
- 1.0,
192
- 1.0,
193
- 1.0,
194
- 1.0,
195
- 1.0,
196
- 1.0,
197
- 1.0,
198
- 1.0,
199
- 1.0,
200
- 1.0,
201
- 1.0,
202
- 1.0,
203
- 1.0,
204
- 1.0,
205
- 1.0,
206
- 1.0,
207
- 1.0,
208
- 1.0,
209
- 1.0,
210
- 1.0,
211
- 1.0,
212
- 1.0
213
- ],
214
- "type": "longrope"
215
- },
216
- "rope_theta": 10000.0,
217
- "sliding_window": 262144,
218
- "tie_word_embeddings": false,
219
- "torch_dtype": "float32",
220
- "transformers_version": "4.52.3",
221
- "use_cache": true,
222
- "vocab_size": 200064
223
- }