File size: 1,288 Bytes
aa3ac98
 
 
 
81fff32
d8c3a70
aa3ac98
 
 
 
81fff32
aa3ac98
 
05bebc1
 
d8c3a70
aa3ac98
d8c3a70
 
aa3ac98
d8c3a70
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
aa3ac98
 
05bebc1
 
d8c3a70
 
 
05bebc1
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
# /// script
# requires-python = ">=3.10"
# dependencies = [
#     "numpy",
#     "torch==2.8.0",
#     "kernels",
#     "kernels-benchmark-tools",
# ]
#
# [tool.uv.sources]
# kernels-benchmark-tools = { path = "../../../../../tools", editable = true }
# ///
import torch
import sys
from kernels_benchmark_tools import KernelTypeEnum, run_benchmark
from kernels import get_kernel

# Load the layer norm kernel
layer_norm_kernel = get_kernel("kernels-community/layer-norm")


def hf_kernels_layer_norm(x, weight, bias, eps: float = 1e-5):
    B, S, D = x.shape
    # The kernel expects [N, D] input; support beta (bias) if provided.
    out = layer_norm_kernel.dropout_add_ln_fwd(
        input=x.view(-1, D),
        gamma=weight,
        beta=bias,
        rowscale=None,
        colscale=None,
        x0_subset=None,
        z_subset=None,
        dropout_p=0.0,
        epsilon=eps,
        rowscale_const=1.0,
        z_numrows=S,
        gen=None,
        residual_in_fp32=False,
        is_rms_norm=False,
    )[0].view(B, S, D)
    return out


run_benchmark(
    kernel_type=KernelTypeEnum.LAYER_NORM,
    impl_name="hf_kernels_layer_norm",
    impl_tags={"family": "hf-kernels", "repo": "kernels-community/layer-norm", "op": "layer_norm"},
    impl_func=hf_kernels_layer_norm,
)