webis
/

tite-2-late-upscale

Model card Files Files and versions

tite-2-late-upscale / config.json

fschlatt's picture

Upload folder using huggingface_hub

ce84b1e verified 4 months ago

history blame contribute delete

1.33 kB

	{
	"absolute_positional_embedding_type": null,
	"architectures": [
	"TiteForPreTraining"
	],
	"dropout_prob": 0.1,
	"hidden_act": "gelu_pytorch_tanh",
	"hidden_sizes": [
	768,
	768,
	768,
	1024,
	1024,
	1024,
	1280,
	1280,
	1280,
	1536,
	1536,
	1536
	],
	"initializer_range": 0.02,
	"intermediate_sizes": [
	3072,
	3072,
	3072,
	4096,
	4096,
	4096,
	5120,
	5120,
	5120,
	6144,
	6144,
	6144
	],
	"kernel_sizes": [
	null,
	null,
	null,
	2,
	2,
	2,
	2,
	2,
	2,
	2,
	2,
	2
	],
	"layer_norm_eps": 1e-12,
	"max_position_embeddings": 512,
	"model_type": "tite",
	"norm_location": "post",
	"norm_type": "layer",
	"num_attention_heads": [
	12,
	12,
	12,
	16,
	16,
	16,
	20,
	20,
	20,
	24,
	24,
	24
	],
	"num_hidden_layers": 12,
	"pad_token_id": 0,
	"pooling_implementation": "triton",
	"pooling_location": "intra",
	"positional_embedding_type": null,
	"relative_positional_embedding_type": "rotary",
	"rope_implementation": "eager",
	"rotary_interleaved": true,
	"strides": [
	null,
	null,
	null,
	2,
	2,
	2,
	2,
	2,
	2,
	2,
	2,
	2
	],
	"torch_dtype": "float32",
	"transformers_version": "4.52.4",
	"vocab_size": 30522
	}