{ "absolute_positional_embedding_type": null, "architectures": [ "TiteForPreTraining" ], "dropout_prob": 0.1, "hidden_act": "gelu_pytorch_tanh", "hidden_sizes": [ 768, 768, 768, 1024, 1024, 1024, 1280, 1280, 1280, 1536, 1536, 1536 ], "initializer_range": 0.02, "intermediate_sizes": [ 3072, 3072, 3072, 4096, 4096, 4096, 5120, 5120, 5120, 6144, 6144, 6144 ], "kernel_sizes": [ null, null, null, 2, 2, 2, 2, 2, 2, 2, 2, 2 ], "layer_norm_eps": 1e-12, "max_position_embeddings": 512, "model_type": "tite", "norm_location": "post", "norm_type": "layer", "num_attention_heads": [ 12, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 24 ], "num_hidden_layers": 12, "pad_token_id": 0, "pooling_implementation": "triton", "pooling_location": "intra", "positional_embedding_type": null, "relative_positional_embedding_type": "rotary", "rope_implementation": "eager", "rotary_interleaved": true, "strides": [ null, null, null, 2, 2, 2, 2, 2, 2, 2, 2, 2 ], "torch_dtype": "float32", "transformers_version": "4.52.4", "vocab_size": 30522 }