MolmoAct-7B-O-0812 / model.yaml

update weights from bf16 to fp32

8de3e36 2 months ago

4.79 kB

	model_name: molmo
	llm:
	d_model: 4096
	n_heads: 32
	n_kv_heads: null
	head_dim: null
	qkv_bias: false
	clip_qkv: null
	n_layers: 32
	mlp_ratio: 4
	mlp_hidden_size: 22016
	activation_type: swiglu
	block_type: sequential
	rope: true
	rope_full_precision: true
	rope_theta: 500000.0
	rope_type: default
	rope_factor: null
	rope_high_freq_factor: null
	rope_low_freq_factor: null
	rope_original_max_position_embeddings: null
	attention_type: sdpa
	float32_attention: true
	attention_dropout: 0.0
	attention_layer_norm: true
	attention_layer_norm_type: olmo
	residual_dropout: 0.1
	response_residual_dropout: 0.0
	layer_norm_type: rms
	layer_norm_with_affine: true
	layer_norm_eps: 1.0e-06
	attention_layer_norm_with_affine: true
	max_sequence_length: 4096
	max_position_embeddings: null
	include_bias: false
	bias_for_layer_norm: false
	norm_after: true
	moe_num_experts: 8
	moe_top_k: 2
	moe_mlp_impl: sparse
	moe_log_expert_assignment: false
	moe_shared_expert: false
	moe_lbl_in_fp32: false
	moe_interleave: false
	moe_loss_weight: 0.1
	moe_zloss_weight: null
	moe_dropless: true
	moe_capacity_factor: 1.25
	embedding_dropout: 0.0
	scale_logits: false
	vocab_size: 100278
	additional_vocab_size: 128
	weight_tying: false
	embedding_size: 100864
	use_position_ids: true
	tokenizer:
	identifier: allenai/OLMo-2-1124-7B
	tokenizer_dir: null
	depth_tokens: true
	init_path: /weka/oe-training-default/mm-olmo/pretrained_llms/olmo2-1124-7b.pt
	init_incremental: null
	new_embedding_init_range: 0.02
	initializer_range: 0.02
	normalize_input_embeds: false
	activation_checkpoint: whole_layer
	compile: blocks
	fix_pad_tokenizer: true
	resize_vocab: true
	init_std: 0.02
	init_fn: normal
	init_cutoff_factor: null
	vision_backbone:
	vit:
	image_model_type: openai
	image_default_input_size:
	- 336
	- 336
	image_patch_size: 14
	image_pos_patch_size: 14
	image_emb_dim: 1024
	image_num_heads: 16
	image_num_key_value_heads: 16
	image_num_layers: 23
	image_head_dim: 64
	image_mlp_dim: 4096
	image_mlp_activations: quick_gelu
	image_dropout_rate: 0.0
	image_num_pos: 577
	image_norm_eps: 1.0e-05
	attention_dropout: 0.0
	residual_dropout: 0.0
	initializer_range: 0.02
	float32_attention: true
	attention_type: sdpa
	activation_checkpointing: true
	init_path: /weka/oe-training-default/mm-olmo/pretrained_image_encoders/vit-l-14-336.pt
	resize_mode: default
	pad_value: 0.0
	normalize: openai
	image_pooling_2d: attention_meanq
	pooling_attention_mask: false
	image_projector: mlp
	image_padding_embed: pad_and_partial_pad
	vit_layers:
	- -2
	- -9
	skip_unused_layers: true
	image_feature_dropout: 0.0
	connector_activation_checkpointing: true
	compile_vit: blocks
	data_formatter:
	prompt_templates: uber_model
	message_format: role
	system_prompt: demo_or_style
	always_start_with_space: false
	default_inference_len: 65
	select_answer: best
	debug: false
	image_last: false
	format_message_list: null
	p_one_message: 0.0
	mm_preprocessor:
	crop_mode: overlap-and-resize-c2
	max_crops: 8
	max_images: 2
	max_multi_image_crops: 8
	pooling_w: 2
	pooling_h: 2
	overlap_margins:
	- 4
	- 4
	use_col_tokens: true
	loss_token_weighting: root_subsegments
	legacy_image_mask: false
	max_answer_len: null
	img_aug: false
	bi_directional_attn: null
	lora_enable: false
	lora_rank: 64
	lora_alpha: 16
	lora_dropout: 0.05
	lora_bias: none
	n_action_bins: 256
	norm_stats:
	molmoact:
	action:
	mean:
	- 0.0005706787342205644
	- 0.0002448957529850304
	- -3.5987635783385485e-05
	- 0.00021597897284664214
	- -0.0004896928439848125
	- -0.000241481073317118
	- 0.5570635199546814
	std:
	- 0.005207270849496126
	- 0.007506529800593853
	- 0.006415561307221651
	- 0.013248044066131115
	- 0.010928540490567684
	- 0.014873150736093521
	- 0.49715080857276917
	min:
	- -0.07434078305959702
	- -0.07339745759963989
	- -0.06539416313171387
	- -0.1688285619020462
	- -0.10289879888296127
	- -0.2667275667190552
	- 0.0
	max:
	- 0.06042003631591797
	- 0.09417290985584259
	- 0.07019275426864624
	- 0.2616892158985138
	- 0.11751057207584381
	- 0.16968433558940887
	- 1.0
	q01:
	- -0.01538565568625927
	- -0.021047022193670273
	- -0.01688069850206375
	- -0.044314172118902206
	- -0.03890235349535942
	- -0.04788423702120781
	- 0.0
	q99:
	- 0.014661382883787155
	- 0.026515591889619827
	- 0.021398313343524933
	- 0.04216696694493294
	- 0.03401297703385353
	- 0.04957397282123566
	- 1.0
	num_entries: 1560068