Spaces:
Build error
Build error
mac
commited on
Commit
·
385c000
1
Parent(s):
e102133
update_readme
Browse files
eagle/model/ea_model.py
CHANGED
|
@@ -21,7 +21,6 @@ from .cnets1 import Model as Model1
|
|
| 21 |
from .configs import EConfig
|
| 22 |
|
| 23 |
""" Modified to support Eagle-3, marked by <mod> xxx </mod> """
|
| 24 |
-
# from .modeling_minicpm_kv import HackConvertMiniCPMForCausalLM as KVMiniCPMForCausalLM # <mod> convert opensource impl to llama </mod>
|
| 25 |
from .modeling_minicpm_kv import MiniCPMForCausalLM as KVMiniCPMForCausalLM # <mod> use modified opensource impl </mod>
|
| 26 |
|
| 27 |
class EaModel(nn.Module):
|
|
|
|
| 21 |
from .configs import EConfig
|
| 22 |
|
| 23 |
""" Modified to support Eagle-3, marked by <mod> xxx </mod> """
|
|
|
|
| 24 |
from .modeling_minicpm_kv import MiniCPMForCausalLM as KVMiniCPMForCausalLM # <mod> use modified opensource impl </mod>
|
| 25 |
|
| 26 |
class EaModel(nn.Module):
|
eagle/model/modeling_minicpm_kv.py
CHANGED
|
@@ -2443,45 +2443,3 @@ class MiniCPMForSequenceClassification(MiniCPMPreTrainedModel):
|
|
| 2443 |
attentions=transformer_outputs.attentions,
|
| 2444 |
)
|
| 2445 |
|
| 2446 |
-
|
| 2447 |
-
|
| 2448 |
-
# hack version
|
| 2449 |
-
from .modeling_llama_kv import LlamaForCausalLM as KVLlamaForCausalLM
|
| 2450 |
-
|
| 2451 |
-
class HackConvertMiniCPMForCausalLM:
|
| 2452 |
-
def from_pretrained(model_path, **kwargs):
|
| 2453 |
-
model = KVLlamaForCausalLM.from_pretrained(model_path, **kwargs)
|
| 2454 |
-
|
| 2455 |
-
state_dict = model.state_dict()
|
| 2456 |
-
scale_emb = 12
|
| 2457 |
-
dim_model_base = 256
|
| 2458 |
-
scale_depth = 1.4
|
| 2459 |
-
num_layers = 32
|
| 2460 |
-
hidden_size = 4096
|
| 2461 |
-
|
| 2462 |
-
print(state_dict["model.embed_tokens.weight"])
|
| 2463 |
-
embedding = state_dict["model.embed_tokens.weight"]
|
| 2464 |
-
#model.embed_tokens.weight * scale_emb
|
| 2465 |
-
new_emb = embedding.clone() * scale_emb
|
| 2466 |
-
state_dict["model.embed_tokens.weight"] = new_emb
|
| 2467 |
-
|
| 2468 |
-
#lm_head.weight / (hidden_size / dim_model_base)
|
| 2469 |
-
new_emb = state_dict["lm_head.weight"].clone() / (hidden_size / dim_model_base)
|
| 2470 |
-
state_dict["lm_head.weight"] = new_emb
|
| 2471 |
-
|
| 2472 |
-
#model.layers.34.self_attn.o_proj.weight * (scale_depth / sqrt(num_layers))
|
| 2473 |
-
for i in range(num_layers):
|
| 2474 |
-
attn_out_name = f"model.layers.{i}.self_attn.o_proj.weight"
|
| 2475 |
-
new_weight = state_dict[attn_out_name] * (scale_depth / math.sqrt(num_layers))
|
| 2476 |
-
state_dict[attn_out_name] = new_weight
|
| 2477 |
-
|
| 2478 |
-
ffn_down_proj_name = f"model.layers.{i}.mlp.down_proj.weight"
|
| 2479 |
-
new_weight = state_dict[ffn_down_proj_name] * (scale_depth / math.sqrt(num_layers))
|
| 2480 |
-
state_dict[ffn_down_proj_name] = new_weight
|
| 2481 |
-
|
| 2482 |
-
print(f"Converting: reload from converted state_dict.\nCheck sd:\n{model}")
|
| 2483 |
-
|
| 2484 |
-
model.load_state_dict(state_dict)
|
| 2485 |
-
print(f"Convert to llama: DONE.")
|
| 2486 |
-
|
| 2487 |
-
return model
|
|
|
|
| 2443 |
attentions=transformer_outputs.attentions,
|
| 2444 |
)
|
| 2445 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|