| # /// script | |
| # requires-python = ">=3.10" | |
| # dependencies = [ | |
| # "numpy", | |
| # "torch==2.8.0", | |
| # "kernels-benchmark-tools", | |
| # "kernels", | |
| # ] | |
| # | |
| # [tool.uv.sources] | |
| # kernels-benchmark-tools = { path = "../../../../../tools", editable = true } | |
| # /// | |
| import torch | |
| import sys | |
| from kernels_benchmark_tools import KernelTypeEnum, run_benchmark | |
| from kernels import get_kernel | |
| # Load the activation kernel | |
| activation = get_kernel("kernels-community/activation") | |
| def hf_kernels_swiglu(input_tensor): | |
| hidden_dim = input_tensor.shape[-1] // 2 | |
| out_shape = input_tensor.shape[:-1] + (hidden_dim,) | |
| out = torch.empty(out_shape, dtype=input_tensor.dtype, device=input_tensor.device) | |
| return activation.silu_and_mul(out, input_tensor) | |
| run_benchmark( | |
| kernel_type=KernelTypeEnum.ACTIVATION, | |
| impl_name="hf_kernels_swiglu", | |
| impl_tags={"family": "hf-kernels", "backend": "cuda"}, | |
| impl_func=hf_kernels_swiglu, | |
| ) |