drbh's picture
drbh HF Staff
Upload folder using huggingface_hub
73f8595 verified
raw
history blame
459 Bytes
# /// script
# dependencies = [
# "torch",
# "numpy",
# ]
# ///
"""Shared configuration for both implementations."""
import torch
# Model configuration
NUM_EXPERTS = 128
HIDDEN_SIZE = 1152
INTERMEDIATE_SIZE = 3072
TOP_K = 4
# Input configuration
BATCH_SIZE = 1
SEQ_LEN = 100
DTYPE = "float32"
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
# Seeds for reproducibility
WEIGHT_SEED = 999
EXPERT_SEED = 777
INPUT_SEED = 123
GENERAL_SEED = 42