# /// script # dependencies = [ # "torch", # "numpy", # ] # /// """Shared configuration for both implementations.""" import torch # Model configuration NUM_EXPERTS = 128 HIDDEN_SIZE = 1152 INTERMEDIATE_SIZE = 3072 TOP_K = 4 # Input configuration BATCH_SIZE = 1 SEQ_LEN = 100 DTYPE = "float32" DEVICE = "cuda" if torch.cuda.is_available() else "cpu" # Seeds for reproducibility WEIGHT_SEED = 999 EXPERT_SEED = 777 INPUT_SEED = 123 GENERAL_SEED = 42