{ "architectures": [ "Transformer" ], "bias": true, "d_hidden": 2048, "d_model": 512, "mask": "causal", "n_ctx": 256, "n_head": 8, "n_layer": 1, "norm": true, "tokenizer": "tiny", "torch_dtype": "float32", "transformers_version": "4.51.1" }