File size: 935 Bytes
aa3ac98
 
 
 
81fff32
aa3ac98
f2afc26
aa3ac98
 
 
81fff32
aa3ac98
 
 
05bebc1
f2afc26
aa3ac98
f2afc26
 
aa3ac98
f2afc26
 
 
 
 
 
aa3ac98
 
05bebc1
 
f2afc26
 
 
05bebc1
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
# /// script
# requires-python = ">=3.10"
# dependencies = [
#     "numpy",
#     "torch==2.8.0",
#     "kernels-benchmark-tools",
#     "kernels",
# ]
#
# [tool.uv.sources]
# kernels-benchmark-tools = { path = "../../../../../tools", editable = true }
# ///
import torch
import sys
from kernels_benchmark_tools import KernelTypeEnum, run_benchmark
from kernels import get_kernel

# Load the activation kernel
activation = get_kernel("kernels-community/activation")


def hf_kernels_swiglu(input_tensor):
    hidden_dim = input_tensor.shape[-1] // 2
    out_shape = input_tensor.shape[:-1] + (hidden_dim,)
    out = torch.empty(out_shape, dtype=input_tensor.dtype, device=input_tensor.device)
    return activation.silu_and_mul(out, input_tensor)


run_benchmark(
    kernel_type=KernelTypeEnum.ACTIVATION,
    impl_name="hf_kernels_swiglu",
    impl_tags={"family": "hf-kernels", "backend": "cuda"},
    impl_func=hf_kernels_swiglu,
)