Reference kernel
▼ code
▼ output
▶ uv-logs
|
Cell: forward_only | 100.45s | FAILED
|
Raw
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
# /// script
# requires-python = ">=3.12"
# dependencies = [
# "accelerate>=1.10.1",
# "torch>=2.7.0",
# "kernels==0.10.0",
# "transformers@https://github.com/huggingface/transformers.git",
# "ipdb>=0.13.13",
# "matplotlib>=3.7.2",
# "numpy>=1.24.3",
# ]
# ///
import torch
from transformers import GptOssForCausalLM, PreTrainedTokenizerFast, Mxfp4Config
import time
import torch.nn as nn
from kernels import register_kernel_mapping, Mode, LayerRepository
import sys
import torch.profiler
import gc
import logging
# set to debug logging
logging.basicConfig(level=logging.INFO)
def reset_peak_memory_stats():
"""Clear CUDA cache and reset memory allocation counters."""
torch.cuda.empty_cache()
if torch.cuda.is_available():
torch.cuda.reset_peak_memory_stats()
gc.collect()
def get_memory_stats():
"""Get current and peak CUDA memory usage."""
if not torch.cuda.is_available():
return {"allocated_gb": 0, "peak_gb": 0, "reserved_gb": 0}
return {
"allocated_gb": torch.cuda.memory_allocated() / 1e9,
"peak_gb": torch.cuda.max_memory_allocated() / 1e9,
"reserved_gb": torch.cuda.memory_reserved() / 1e9,
}
def override_kernel_layer_name(cls_name: str, value) -> bool:
"""Helper to dynamically override the kernel_layer_name in a model class."""
for mod in sys.modules.values():
if mod is None:
continue
obj = getattr(mod, cls_name, None)
if isinstance(obj, type) and issubclass(obj, nn.Module):
setattr(obj, "kernel_layer_name", value)
print(f"Overrode {cls_name}.kernel_layer_name to {value}")
return True
return False
# Init the model the normal way
model_id = "openai/gpt-oss-20b"
tokenizer = PreTrainedTokenizerFast.from_pretrained(model_id)
quantization_config = Mxfp4Config(dequantize=True)
model = GptOssForCausalLM.from_pretrained(
model_id,
dtype="bfloat16",
device_map="auto",
use_kernels=True,
quantization_config=quantization_config,
).eval()
messages = [
{"role": "system", "content": "What is Tensor Parallelism?"},
]
inputs = tokenizer.apply_chat_template(
messages,
add_generation_prompt=True,
return_tensors="pt",
return_dict=True,
reasoning_effort="low",
).to("cuda")
max_tokens = 512
with torch.inference_mode():
start_time = time.perf_counter()
generated = model.generate(
**inputs,
max_new_tokens=max_tokens,
do_sample=False,
temperature=None,
)
end_time = time.perf_counter()
print(tokenizer.decode(generated[0], skip_special_tokens=False))
print(f"Generation took {end_time - start_time:.2f} seconds")
▶ UV Install Logs
Fetching 3 files: 0%| | 0/3 [00:00<?, ?it/s]
Fetching 3 files: 33%|███▎ | 1/3 [00:13<00:27, 13.93s/it]
Fetching 3 files: 67%|██████▋ | 2/3 [00:17<00:08, 8.08s/it]
Fetching 3 files: 100%|██████████| 3/3 [00:17<00:00, 5.97s/it]
You are using full precision kernels, we will dequantize the model to bf16. To use the quantized model with quantization kernels, please set use_kernels=False
Loading checkpoint shards: 0%| | 0/3 [00:00<?, ?it/s]
Loading checkpoint shards: 33%|███▎ | 1/3 [00:03<00:06, 3.23s/it]
Loading checkpoint shards: 67%|██████▋ | 2/3 [00:06<00:03, 3.14s/it]
Loading checkpoint shards: 100%|██████████| 3/3 [00:08<00:00, 2.49s/it]
Loading checkpoint shards: 100%|██████████| 3/3 [00:08<00:00, 2.68s/it]
INFO:root:Using layer `MegaBlocksMoeMLP` from repo `kernels-community/megablocks` (revision: main) for layer `MegaBlocksMoeMLP`
Fetching 66 files: 0%| | 0/66 [00:00<?, ?it/s]
Fetching 66 files: 2%|▏ | 1/66 [00:00<00:15, 4.28it/s]
Fetching 66 files: 26%|██▌ | 17/66 [00:01<00:03, 12.73it/s]
Fetching 66 files: 100%|██████████| 66/66 [00:01<00:00, 47.76it/s]
INFO:root:Using layer `LigerRMSNorm` from repo `kernels-community/liger_kernels` (revision: main) for layer `LigerRMSNorm`
Fetching 17 files: 0%| | 0/17 [00:00<?, ?it/s]
Fetching 17 files: 65%|██████▍ | 11/17 [00:00<00:00, 104.99it/s]
Fetching 17 files: 100%|██████████| 17/17 [00:00<00:00, 128.06it/s]
INFO:root:Using layer `LigerRMSNorm` from repo `kernels-community/liger_kernels` (revision: main) for layer `LigerRMSNorm`
INFO:root:Using layer `MegaBlocksMoeMLP` from repo `kernels-community/megablocks` (revision: main) for layer `MegaBlocksMoeMLP`
INFO:root:Using layer `LigerRMSNorm` from repo `kernels-community/liger_kernels` (revision: main) for layer `LigerRMSNorm`
INFO:root:Using layer `LigerRMSNorm` from repo `kernels-community/liger_kernels` (revision: main) for layer `LigerRMSNorm`
INFO:root:Using layer `MegaBlocksMoeMLP` from repo `kernels-community/megablocks` (revision: main) for layer `MegaBlocksMoeMLP`
INFO:root:Using layer `LigerRMSNorm` from repo `kernels-community/liger_kernels` (revision: main) for layer `LigerRMSNorm`
INFO:root:Using layer `LigerRMSNorm` from repo `kernels-community/liger_kernels` (revision: main) for layer `LigerRMSNorm`
INFO:root:Using layer `MegaBlocksMoeMLP` from repo `kernels-community/megablocks` (revision: main) for layer `MegaBlocksMoeMLP`
INFO:root:Using layer `LigerRMSNorm` from repo `kernels-community/liger_kernels` (revision: main) for layer `LigerRMSNorm`
INFO:root:Using layer `LigerRMSNorm` from repo `kernels-community/liger_kernels` (revision: main) for layer `LigerRMSNorm`
INFO:root:Using layer `MegaBlocksMoeMLP` from repo `kernels-community/megablocks` (revision: main) for layer `MegaBlocksMoeMLP`
INFO:root:Using layer `LigerRMSNorm` from repo `kernels-community/liger_kernels` (revision: main) for layer `LigerRMSNorm`
INFO:root:Using layer `LigerRMSNorm` from repo `kernels-community/liger_kernels` (revision: main) for layer `LigerRMSNorm`
INFO:root:Using layer `MegaBlocksMoeMLP` from repo `kernels-community/megablocks` (revision: main) for layer `MegaBlocksMoeMLP`
INFO:root:Using layer `LigerRMSNorm` from repo `kernels-community/liger_kernels` (revision: main) for layer `LigerRMSNorm`
INFO:root:Using layer `LigerRMSNorm` from repo `kernels-community/liger_kernels` (revision: main) for layer `LigerRMSNorm`
INFO:root:Using layer `MegaBlocksMoeMLP` from repo `kernels-community/megablocks` (revision: main) for layer `MegaBlocksMoeMLP`
INFO:root:Using layer `LigerRMSNorm` from repo `kernels-community/liger_kernels` (revision: main) for layer `LigerRMSNorm`
INFO:root:Using layer `LigerRMSNorm` from repo `kernels-community/liger_kernels` (revision: main) for layer `LigerRMSNorm`
INFO:root:Using layer `MegaBlocksMoeMLP` from repo `kernels-community/megablocks` (revision: main) for layer `MegaBlocksMoeMLP`
INFO:root:Using layer `LigerRMSNorm` from repo `kernels-community/liger_kernels` (revision: main) for layer `LigerRMSNorm`
INFO:root:Using layer `LigerRMSNorm` from repo `kernels-community/liger_kernels` (revision: main) for layer `LigerRMSNorm`
INFO:root:Using layer `MegaBlocksMoeMLP` from repo `kernels-community/megablocks` (revision: main) for layer `MegaBlocksMoeMLP`
INFO:root:Using layer `LigerRMSNorm` from repo `kernels-community/liger_kernels` (revision: main) for layer `LigerRMSNorm`
INFO:root:Using layer `LigerRMSNorm` from repo `kernels-community/liger_kernels` (revision: main) for layer `LigerRMSNorm`
INFO:root:Using layer `MegaBlocksMoeMLP` from repo `kernels-community/megablocks` (revision: main) for layer `MegaBlocksMoeMLP`
INFO:root:Using layer `LigerRMSNorm` from repo `kernels-community/liger_kernels` (revision: main) for layer `LigerRMSNorm`
INFO:root:Using layer `LigerRMSNorm` from repo `kernels-community/liger_kernels` (revision: main) for layer `LigerRMSNorm`
INFO:root:Using layer `MegaBlocksMoeMLP` from repo `kernels-community/megablocks` (revision: main) for layer `MegaBlocksMoeMLP`
INFO:root:Using layer `LigerRMSNorm` from repo `kernels-community/liger_kernels` (revision: main) for layer `LigerRMSNorm`
INFO:root:Using layer `LigerRMSNorm` from repo `kernels-community/liger_kernels` (revision: main) for layer `LigerRMSNorm`
INFO:root:Using layer `MegaBlocksMoeMLP` from repo `kernels-community/megablocks` (revision: main) for layer `MegaBlocksMoeMLP`
INFO:root:Using layer `LigerRMSNorm` from repo `kernels-community/liger_kernels` (revision: main) for layer `LigerRMSNorm`
INFO:root:Using layer `LigerRMSNorm` from repo `kernels-community/liger_kernels` (revision: main) for layer `LigerRMSNorm`
INFO:root:Using layer `MegaBlocksMoeMLP` from repo `kernels-community/megablocks` (revision: main) for layer `MegaBlocksMoeMLP`
INFO:root:Using layer `LigerRMSNorm` from repo `kernels-community/liger_kernels` (revision: main) for layer `LigerRMSNorm`
INFO:root:Using layer `LigerRMSNorm` from repo `kernels-community/liger_kernels` (revision: main) for layer `LigerRMSNorm`
INFO:root:Using layer `MegaBlocksMoeMLP` from repo `kernels-community/megablocks` (revision: main) for layer `MegaBlocksMoeMLP`
INFO:root:Using layer `LigerRMSNorm` from repo `kernels-community/liger_kernels` (revision: main) for layer `LigerRMSNorm`
INFO:root:Using layer `LigerRMSNorm` from repo `kernels-community/liger_kernels` (revision: main) for layer `LigerRMSNorm`
INFO:root:Using layer `MegaBlocksMoeMLP` from repo `kernels-community/megablocks` (revision: main) for layer `MegaBlocksMoeMLP`
INFO:root:Using layer `LigerRMSNorm` from repo `kernels-community/liger_kernels` (revision: main) for layer `LigerRMSNorm`
INFO:root:Using layer `LigerRMSNorm` from repo `kernels-community/liger_kernels` (revision: main) for layer `LigerRMSNorm`
INFO:root:Using layer `MegaBlocksMoeMLP` from repo `kernels-community/megablocks` (revision: main) for layer `MegaBlocksMoeMLP`
INFO:root:Using layer `LigerRMSNorm` from repo `kernels-community/liger_kernels` (revision: main) for layer `LigerRMSNorm`
INFO:root:Using layer `LigerRMSNorm` from repo `kernels-community/liger_kernels` (revision: main) for layer `LigerRMSNorm`
INFO:root:Using layer `MegaBlocksMoeMLP` from repo `kernels-community/megablocks` (revision: main) for layer `MegaBlocksMoeMLP`
INFO:root:Using layer `LigerRMSNorm` from repo `kernels-community/liger_kernels` (revision: main) for layer `LigerRMSNorm`
INFO:root:Using layer `LigerRMSNorm` from repo `kernels-community/liger_kernels` (revision: main) for layer `LigerRMSNorm`
INFO:root:Using layer `MegaBlocksMoeMLP` from repo `kernels-community/megablocks` (revision: main) for layer `MegaBlocksMoeMLP`
INFO:root:Using layer `LigerRMSNorm` from repo `kernels-community/liger_kernels` (revision: main) for layer `LigerRMSNorm`
INFO:root:Using layer `LigerRMSNorm` from repo `kernels-community/liger_kernels` (revision: main) for layer `LigerRMSNorm`
INFO:root:Using layer `MegaBlocksMoeMLP` from repo `kernels-community/megablocks` (revision: main) for layer `MegaBlocksMoeMLP`
INFO:root:Using layer `LigerRMSNorm` from repo `kernels-community/liger_kernels` (revision: main) for layer `LigerRMSNorm`
INFO:root:Using layer `LigerRMSNorm` from repo `kernels-community/liger_kernels` (revision: main) for layer `LigerRMSNorm`
INFO:root:Using layer `MegaBlocksMoeMLP` from repo `kernels-community/megablocks` (revision: main) for layer `MegaBlocksMoeMLP`
INFO:root:Using layer `LigerRMSNorm` from repo `kernels-community/liger_kernels` (revision: main) for layer `LigerRMSNorm`
INFO:root:Using layer `LigerRMSNorm` from repo `kernels-community/liger_kernels` (revision: main) for layer `LigerRMSNorm`
INFO:root:Using layer `MegaBlocksMoeMLP` from repo `kernels-community/megablocks` (revision: main) for layer `MegaBlocksMoeMLP`
INFO:root:Using layer `LigerRMSNorm` from repo `kernels-community/liger_kernels` (revision: main) for layer `LigerRMSNorm`
INFO:root:Using layer `LigerRMSNorm` from repo `kernels-community/liger_kernels` (revision: main) for layer `LigerRMSNorm`
INFO:root:Using layer `MegaBlocksMoeMLP` from repo `kernels-community/megablocks` (revision: main) for layer `MegaBlocksMoeMLP`
INFO:root:Using layer `LigerRMSNorm` from repo `kernels-community/liger_kernels` (revision: main) for layer `LigerRMSNorm`
INFO:root:Using layer `LigerRMSNorm` from repo `kernels-community/liger_kernels` (revision: main) for layer `LigerRMSNorm`
INFO:root:Using layer `MegaBlocksMoeMLP` from repo `kernels-community/megablocks` (revision: main) for layer `MegaBlocksMoeMLP`
INFO:root:Using layer `LigerRMSNorm` from repo `kernels-community/liger_kernels` (revision: main) for layer `LigerRMSNorm`
INFO:root:Using layer `LigerRMSNorm` from repo `kernels-community/liger_kernels` (revision: main) for layer `LigerRMSNorm`
INFO:root:Using layer `MegaBlocksMoeMLP` from repo `kernels-community/megablocks` (revision: main) for layer `MegaBlocksMoeMLP`
INFO:root:Using layer `LigerRMSNorm` from repo `kernels-community/liger_kernels` (revision: main) for layer `LigerRMSNorm`
INFO:root:Using layer `LigerRMSNorm` from repo `kernels-community/liger_kernels` (revision: main) for layer `LigerRMSNorm`
INFO:root:Using layer `LigerRMSNorm` from repo `kernels-community/liger_kernels` (revision: main) for layer `LigerRMSNorm`
INFO:root:Using layer `MegaBlocksMoeMLP` from repo `kernels-community/megablocks` (revision: main) for layer `MegaBlocksMoeMLP`
INFO:root:Using layer `LigerRMSNorm` from repo `kernels-community/liger_kernels` (revision: main) for layer `LigerRMSNorm`
INFO:root:Using layer `LigerRMSNorm` from repo `kernels-community/liger_kernels` (revision: main) for layer `LigerRMSNorm`
INFO:root:Using layer `MegaBlocksMoeMLP` from repo `kernels-community/megablocks` (revision: main) for layer `MegaBlocksMoeMLP`
INFO:root:Using layer `LigerRMSNorm` from repo `kernels-community/liger_kernels` (revision: main) for layer `LigerRMSNorm`
INFO:root:Using layer `LigerRMSNorm` from repo `kernels-community/liger_kernels` (revision: main) for layer `LigerRMSNorm`
INFO:root:Using layer `MegaBlocksMoeMLP` from repo `kernels-community/megablocks` (revision: main) for layer `MegaBlocksMoeMLP`
INFO:root:Using layer `LigerRMSNorm` from repo `kernels-community/liger_kernels` (revision: main) for layer `LigerRMSNorm`
INFO:root:Using layer `LigerRMSNorm` from repo `kernels-community/liger_kernels` (revision: main) for layer `LigerRMSNorm`
INFO:root:Using layer `MegaBlocksMoeMLP` from repo `kernels-community/megablocks` (revision: main) for layer `MegaBlocksMoeMLP`
INFO:root:Using layer `LigerRMSNorm` from repo `kernels-community/liger_kernels` (revision: main) for layer `LigerRMSNorm`
INFO:root:Using layer `LigerRMSNorm` from repo `kernels-community/liger_kernels` (revision: main) for layer `LigerRMSNorm`
INFO:root:Using layer `MegaBlocksMoeMLP` from repo `kernels-community/megablocks` (revision: main) for layer `MegaBlocksMoeMLP`
INFO:root:Using layer `LigerRMSNorm` from repo `kernels-community/liger_kernels` (revision: main) for layer `LigerRMSNorm`
INFO:root:Using layer `LigerRMSNorm` from repo `kernels-community/liger_kernels` (revision: main) for layer `LigerRMSNorm`
INFO:root:Using layer `MegaBlocksMoeMLP` from repo `kernels-community/megablocks` (revision: main) for layer `MegaBlocksMoeMLP`
INFO:root:Using layer `LigerRMSNorm` from repo `kernels-community/liger_kernels` (revision: main) for layer `LigerRMSNorm`
INFO:root:Using layer `LigerRMSNorm` from repo `kernels-community/liger_kernels` (revision: main) for layer `LigerRMSNorm`
INFO:root:Using layer `MegaBlocksMoeMLP` from repo `kernels-community/megablocks` (revision: main) for layer `MegaBlocksMoeMLP`
INFO:root:Using layer `LigerRMSNorm` from repo `kernels-community/liger_kernels` (revision: main) for layer `LigerRMSNorm`
INFO:root:Using layer `LigerRMSNorm` from repo `kernels-community/liger_kernels` (revision: main) for layer `LigerRMSNorm`
INFO:root:Using layer `MegaBlocksMoeMLP` from repo `kernels-community/megablocks` (revision: main) for layer `MegaBlocksMoeMLP`
INFO:root:Using layer `LigerRMSNorm` from repo `kernels-community/liger_kernels` (revision: main) for layer `LigerRMSNorm`
INFO:root:Using layer `LigerRMSNorm` from repo `kernels-community/liger_kernels` (revision: main) for layer `LigerRMSNorm`
INFO:root:Using layer `MegaBlocksMoeMLP` from repo `kernels-community/megablocks` (revision: main) for layer `MegaBlocksMoeMLP`
INFO:root:Using layer `LigerRMSNorm` from repo `kernels-community/liger_kernels` (revision: main) for layer `LigerRMSNorm`
INFO:root:Using layer `LigerRMSNorm` from repo `kernels-community/liger_kernels` (revision: main) for layer `LigerRMSNorm`
INFO:root:Using layer `MegaBlocksMoeMLP` from repo `kernels-community/megablocks` (revision: main) for layer `MegaBlocksMoeMLP`
INFO:root:Using layer `LigerRMSNorm` from repo `kernels-community/liger_kernels` (revision: main) for layer `LigerRMSNorm`
INFO:root:Using layer `LigerRMSNorm` from repo `kernels-community/liger_kernels` (revision: main) for layer `LigerRMSNorm`
INFO:root:Using layer `MegaBlocksMoeMLP` from repo `kernels-community/megablocks` (revision: main) for layer `MegaBlocksMoeMLP`
INFO:root:Using layer `LigerRMSNorm` from repo `kernels-community/liger_kernels` (revision: main) for layer `LigerRMSNorm`
INFO:root:Using layer `LigerRMSNorm` from repo `kernels-community/liger_kernels` (revision: main) for layer `LigerRMSNorm`
INFO:root:Using layer `MegaBlocksMoeMLP` from repo `kernels-community/megablocks` (revision: main) for layer `MegaBlocksMoeMLP`
INFO:root:Using layer `LigerRMSNorm` from repo `kernels-community/liger_kernels` (revision: main) for layer `LigerRMSNorm`
INFO:root:Using layer `LigerRMSNorm` from repo `kernels-community/liger_kernels` (revision: main) for layer `LigerRMSNorm`
INFO:root:Using layer `MegaBlocksMoeMLP` from repo `kernels-community/megablocks` (revision: main) for layer `MegaBlocksMoeMLP`
INFO:root:Using layer `LigerRMSNorm` from repo `kernels-community/liger_kernels` (revision: main) for layer `LigerRMSNorm`
INFO:root:Using layer `LigerRMSNorm` from repo `kernels-community/liger_kernels` (revision: main) for layer `LigerRMSNorm`
INFO:root:Using layer `MegaBlocksMoeMLP` from repo `kernels-community/megablocks` (revision: main) for layer `MegaBlocksMoeMLP`
INFO:root:Using layer `LigerRMSNorm` from repo `kernels-community/liger_kernels` (revision: main) for layer `LigerRMSNorm`
INFO:root:Using layer `LigerRMSNorm` from repo `kernels-community/liger_kernels` (revision: main) for layer `LigerRMSNorm`
INFO:root:Using layer `MegaBlocksMoeMLP` from repo `kernels-community/megablocks` (revision: main) for layer `MegaBlocksMoeMLP`
INFO:root:Using layer `LigerRMSNorm` from repo `kernels-community/liger_kernels` (revision: main) for layer `LigerRMSNorm`
INFO:root:Using layer `LigerRMSNorm` from repo `kernels-community/liger_kernels` (revision: main) for layer `LigerRMSNorm`
INFO:root:Using layer `MegaBlocksMoeMLP` from repo `kernels-community/megablocks` (revision: main) for layer `MegaBlocksMoeMLP`
INFO:root:Using layer `LigerRMSNorm` from repo `kernels-community/liger_kernels` (revision: main) for layer `LigerRMSNorm`
INFO:root:Using layer `LigerRMSNorm` from repo `kernels-community/liger_kernels` (revision: main) for layer `LigerRMSNorm`
INFO:root:Using layer `MegaBlocksMoeMLP` from repo `kernels-community/megablocks` (revision: main) for layer `MegaBlocksMoeMLP`
INFO:root:Using layer `LigerRMSNorm` from repo `kernels-community/liger_kernels` (revision: main) for layer `LigerRMSNorm`
INFO:root:Using layer `LigerRMSNorm` from repo `kernels-community/liger_kernels` (revision: main) for layer `LigerRMSNorm`
INFO:root:Using layer `MegaBlocksMoeMLP` from repo `kernels-community/megablocks` (revision: main) for layer `MegaBlocksMoeMLP`
INFO:root:Using layer `LigerRMSNorm` from repo `kernels-community/liger_kernels` (revision: main) for layer `LigerRMSNorm`
INFO:root:Using layer `LigerRMSNorm` from repo `kernels-community/liger_kernels` (revision: main) for layer `LigerRMSNorm`
INFO:root:Using layer `MegaBlocksMoeMLP` from repo `kernels-community/megablocks` (revision: main) for layer `MegaBlocksMoeMLP`
INFO:root:Using layer `LigerRMSNorm` from repo `kernels-community/liger_kernels` (revision: main) for layer `LigerRMSNorm`
INFO:root:Using layer `LigerRMSNorm` from repo `kernels-community/liger_kernels` (revision: main) for layer `LigerRMSNorm`
INFO:root:Using layer `MegaBlocksMoeMLP` from repo `kernels-community/megablocks` (revision: main) for layer `MegaBlocksMoeMLP`
INFO:root:Using layer `LigerRMSNorm` from repo `kernels-community/liger_kernels` (revision: main) for layer `LigerRMSNorm`
INFO:root:Using layer `LigerRMSNorm` from repo `kernels-community/liger_kernels` (revision: main) for layer `LigerRMSNorm`
INFO:root:Using layer `MegaBlocksMoeMLP` from repo `kernels-community/megablocks` (revision: main) for layer `MegaBlocksMoeMLP`
INFO:root:Using layer `LigerRMSNorm` from repo `kernels-community/liger_kernels` (revision: main) for layer `LigerRMSNorm`
INFO:root:Using layer `LigerRMSNorm` from repo `kernels-community/liger_kernels` (revision: main) for layer `LigerRMSNorm`
INFO:root:Using layer `MegaBlocksMoeMLP` from repo `kernels-community/megablocks` (revision: main) for layer `MegaBlocksMoeMLP`
INFO:root:Using layer `LigerRMSNorm` from repo `kernels-community/liger_kernels` (revision: main) for layer `LigerRMSNorm`
INFO:root:Using layer `LigerRMSNorm` from repo `kernels-community/liger_kernels` (revision: main) for layer `LigerRMSNorm`
INFO:root:Using layer `MegaBlocksMoeMLP` from repo `kernels-community/megablocks` (revision: main) for layer `MegaBlocksMoeMLP`
INFO:root:Using layer `LigerRMSNorm` from repo `kernels-community/liger_kernels` (revision: main) for layer `LigerRMSNorm`
INFO:root:Using layer `LigerRMSNorm` from repo `kernels-community/liger_kernels` (revision: main) for layer `LigerRMSNorm`
INFO:root:Using layer `MegaBlocksMoeMLP` from repo `kernels-community/megablocks` (revision: main) for layer `MegaBlocksMoeMLP`
INFO:root:Using layer `LigerRMSNorm` from repo `kernels-community/liger_kernels` (revision: main) for layer `LigerRMSNorm`
INFO:root:Using layer `LigerRMSNorm` from repo `kernels-community/liger_kernels` (revision: main) for layer `LigerRMSNorm`
INFO:root:Using layer `LigerRMSNorm` from repo `kernels-community/liger_kernels` (revision: main) for layer `LigerRMSNorm`
Traceback (most recent call last):
File "/home/ubuntu/Projects/yamoe-gpt-integration/.uvnote/cells/forward_only.py", line 87, in <module>
generated = model.generate(
^^^^^^^^^^^^^^^
File "/tmp/uvnote-run-b1sqpts5/home/.cache/uv/environments-v2/forward-only-e48b1c4099197fb9/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 120, in decorate_context
return func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/tmp/uvnote-run-b1sqpts5/home/.cache/uv/environments-v2/forward-only-e48b1c4099197fb9/lib/python3.12/site-packages/transformers/generation/utils.py", line 2546, in generate
result = decoding_method(
^^^^^^^^^^^^^^^^
File "/tmp/uvnote-run-b1sqpts5/home/.cache/uv/environments-v2/forward-only-e48b1c4099197fb9/lib/python3.12/site-packages/transformers/generation/utils.py", line 2766, in _sample
outputs = self(**model_inputs, return_dict=True)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/tmp/uvnote-run-b1sqpts5/home/.cache/uv/environments-v2/forward-only-e48b1c4099197fb9/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/tmp/uvnote-run-b1sqpts5/home/.cache/uv/environments-v2/forward-only-e48b1c4099197fb9/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/tmp/uvnote-run-b1sqpts5/home/.cache/uv/environments-v2/forward-only-e48b1c4099197fb9/lib/python3.12/site-packages/accelerate/hooks.py", line 175, in new_forward
output = module._old_forward(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/tmp/uvnote-run-b1sqpts5/home/.cache/uv/environments-v2/forward-only-e48b1c4099197fb9/lib/python3.12/site-packages/transformers/utils/generic.py", line 783, in wrapper
output = func(self, *args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/tmp/uvnote-run-b1sqpts5/home/.cache/uv/environments-v2/forward-only-e48b1c4099197fb9/lib/python3.12/site-packages/transformers/models/gpt_oss/modeling_gpt_oss.py", line 668, in forward
outputs: MoeModelOutputWithPast = self.model(
^^^^^^^^^^^
File "/tmp/uvnote-run-b1sqpts5/home/.cache/uv/environments-v2/forward-only-e48b1c4099197fb9/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/tmp/uvnote-run-b1sqpts5/home/.cache/uv/environments-v2/forward-only-e48b1c4099197fb9/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/tmp/uvnote-run-b1sqpts5/home/.cache/uv/environments-v2/forward-only-e48b1c4099197fb9/lib/python3.12/site-packages/transformers/utils/generic.py", line 929, in wrapper
outputs = func(self, *args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/tmp/uvnote-run-b1sqpts5/home/.cache/uv/environments-v2/forward-only-e48b1c4099197fb9/lib/python3.12/site-packages/transformers/models/gpt_oss/modeling_gpt_oss.py", line 507, in forward
hidden_states = decoder_layer(
^^^^^^^^^^^^^^
File "/tmp/uvnote-run-b1sqpts5/home/.cache/uv/environments-v2/forward-only-e48b1c4099197fb9/lib/python3.12/site-packages/transformers/modeling_layers.py", line 94, in __call__
return super().__call__(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/tmp/uvnote-run-b1sqpts5/home/.cache/uv/environments-v2/forward-only-e48b1c4099197fb9/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/tmp/uvnote-run-b1sqpts5/home/.cache/uv/environments-v2/forward-only-e48b1c4099197fb9/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/tmp/uvnote-run-b1sqpts5/home/.cache/uv/environments-v2/forward-only-e48b1c4099197fb9/lib/python3.12/site-packages/accelerate/hooks.py", line 175, in new_forward
output = module._old_forward(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/tmp/uvnote-run-b1sqpts5/home/.cache/uv/environments-v2/forward-only-e48b1c4099197fb9/lib/python3.12/site-packages/transformers/utils/deprecation.py", line 172, in wrapped_func
return func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/tmp/uvnote-run-b1sqpts5/home/.cache/uv/environments-v2/forward-only-e48b1c4099197fb9/lib/python3.12/site-packages/transformers/models/gpt_oss/modeling_gpt_oss.py", line 369, in forward
hidden_states = self.input_layernorm(hidden_states)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/tmp/uvnote-run-b1sqpts5/home/.cache/uv/environments-v2/forward-only-e48b1c4099197fb9/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/tmp/uvnote-run-b1sqpts5/home/.cache/uv/environments-v2/forward-only-e48b1c4099197fb9/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/tmp/uvnote-run-b1sqpts5/home/.cache/huggingface/hub/models--kernels-community--liger_kernels/snapshots/7435d25a8faf175758be14046371a5b0c686f94c/build/torch-universal/liger_kernels/layers.py", line 30, in forward
return LigerRMSNormFunction.apply(
^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/tmp/uvnote-run-b1sqpts5/home/.cache/uv/environments-v2/forward-only-e48b1c4099197fb9/lib/python3.12/site-packages/torch/autograd/function.py", line 576, in apply
return super().apply(*args, **kwargs) # type: ignore[misc]
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/tmp/uvnote-run-b1sqpts5/home/.cache/huggingface/hub/models--kernels-community--liger_kernels/snapshots/7435d25a8faf175758be14046371a5b0c686f94c/build/torch-universal/liger_kernels/utils.py", line 48, in wrapper
return fn(ctx, *args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^
File "/tmp/uvnote-run-b1sqpts5/home/.cache/huggingface/hub/models--kernels-community--liger_kernels/snapshots/7435d25a8faf175758be14046371a5b0c686f94c/build/torch-universal/liger_kernels/rms_norm.py", line 338, in forward
Y, X, RSTD, BLOCK_SIZE, num_warps, casting_mode = rms_norm_forward(X, W, eps, offset, casting_mode)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/tmp/uvnote-run-b1sqpts5/home/.cache/huggingface/hub/models--kernels-community--liger_kernels/snapshots/7435d25a8faf175758be14046371a5b0c686f94c/build/torch-universal/liger_kernels/rms_norm.py", line 230, in rms_norm_forward
_rms_norm_forward_kernel[(n_rows,)](
File "/tmp/uvnote-run-b1sqpts5/home/.cache/uv/environments-v2/forward-only-e48b1c4099197fb9/lib/python3.12/site-packages/triton/runtime/jit.py", line 390, in <lambda>
return lambda *args, **kwargs: self.run(grid=grid, warmup=False, *args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/tmp/uvnote-run-b1sqpts5/home/.cache/uv/environments-v2/forward-only-e48b1c4099197fb9/lib/python3.12/site-packages/triton/runtime/jit.py", line 617, in run
kernel.run(grid_0, grid_1, grid_2, stream, kernel.function, kernel.packed_metadata, launch_metadata,
File "/tmp/uvnote-run-b1sqpts5/home/.cache/uv/environments-v2/forward-only-e48b1c4099197fb9/lib/python3.12/site-packages/triton/backends/nvidia/driver.py", line 708, in __call__
self.launch(gridX, gridY, gridZ, stream, function, self.launch_cooperative_grid, self.launch_pdl,
ValueError: Pointer argument (at 0) cannot be accessed from Triton (cpu tensor?)