Upload folder using huggingface_hub
Browse files- .gitattributes +1 -0
- flash_attn/benchmark.html +468 -140
- moe_benchmarks/megablocks/megablocks_only.html +125 -314
- moe_benchmarks/megablocks_yamoe/artifacts/binned_run/binned_results.json +9 -9
- moe_benchmarks/megablocks_yamoe/artifacts/gptoss_run/gptoss_results.json +9 -9
- moe_benchmarks/megablocks_yamoe/artifacts/gptoss_training_run/gptoss_training_results.json +9 -9
- moe_benchmarks/megablocks_yamoe/artifacts/megablocks_run/megablocks_results.json +24 -0
- moe_benchmarks/megablocks_yamoe/artifacts/visualization/moe_performance_comparison.png +3 -0
- moe_benchmarks/megablocks_yamoe/artifacts/yamoe_run/yamoe_results.json +9 -9
- moe_benchmarks/megablocks_yamoe/cells/__pycache__/bench_utils.cpython-311.pyc +0 -0
- moe_benchmarks/megablocks_yamoe/cells/__pycache__/config.cpython-311.pyc +0 -0
- moe_benchmarks/megablocks_yamoe/cells/visualization.py +116 -0
- moe_benchmarks/megablocks_yamoe/megablocks_yamoe.html +2 -3
- moe_benchmarks/megablocks_yamoe/torch_profile.html +519 -292
.gitattributes
CHANGED
|
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
moe_benchmarks/megablocks_yamoe/artifacts/visualization/moe_performance_comparison.png filter=lfs diff=lfs merge=lfs -text
|
flash_attn/benchmark.html
CHANGED
|
@@ -3,7 +3,7 @@
|
|
| 3 |
<head>
|
| 4 |
<meta charset="UTF-8">
|
| 5 |
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 6 |
-
<title>
|
| 7 |
<script>
|
| 8 |
// Apply theme and widget visibility immediately to prevent flicker
|
| 9 |
(function() {
|
|
@@ -1058,7 +1058,21 @@ span.linenos.special { color: #000000; background-color: #ffffc0; padding-left:
|
|
| 1058 |
.line-numbers .line-number { line-height: var(--code-line-height) !important; }
|
| 1059 |
|
| 1060 |
/* Custom CSS from frontmatter */
|
| 1061 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1062 |
|
| 1063 |
|
| 1064 |
|
|
@@ -3701,43 +3715,363 @@ span.linenos.special { color: #000000; background-color: #ffffc0; padding-left:
|
|
| 3701 |
</div>
|
| 3702 |
|
| 3703 |
<div class="main-content">
|
| 3704 |
-
<
|
| 3705 |
-
<p>title: "Flash Attention Benchmark"
|
| 3706 |
-
author: "uvnote"
|
| 3707 |
-
theme: "dark"
|
| 3708 |
-
syntax_theme: "monokai"
|
| 3709 |
-
show_line_numbers: true
|
| 3710 |
-
collapse_code: false
|
| 3711 |
-
custom_css: |
|
| 3712 |
-
#output-setup {
|
| 3713 |
-
overflow-x: auto;
|
| 3714 |
-
}
|
| 3715 |
-
.cell-output {
|
| 3716 |
-
overflow: scroll;
|
| 3717 |
-
}
|
| 3718 |
-
.cell-stdout {
|
| 3719 |
-
width: max-content;
|
| 3720 |
-
overflow: scroll;
|
| 3721 |
-
}
|
| 3722 |
-
.cell-stderr {
|
| 3723 |
-
width: max-content;
|
| 3724 |
-
overflow: scroll;
|
| 3725 |
-
max-height: 300px;
|
| 3726 |
-
}</p>
|
| 3727 |
-
<hr />
|
| 3728 |
-
<div class="cell cell-failed" id="cell-benchmark">
|
| 3729 |
<div class="cell-header">
|
| 3730 |
<span class="collapse-indicators">
|
| 3731 |
<span onclick="toggleCode('benchmark')" style="cursor: pointer;">▼ code</span>
|
| 3732 |
<span onclick="toggleOutput('benchmark')" style="cursor: pointer;">▼ output</span>
|
| 3733 |
<span id="uv-indicator-benchmark" onclick="toggleUvLogsFromHeader('benchmark')" style="cursor: pointer;">▶ uv-logs</span>
|
| 3734 |
</span> |
|
| 3735 |
-
Cell: benchmark |
|
| 3736 |
| <button class="run-btn" onclick="runCell('benchmark')">▶ run</button>
|
| 3737 |
<button class="copy-btn" onclick="copyCell('benchmark')">Copy</button>
|
| 3738 |
<a href="cells/benchmark.py" target="_blank" class="raw-btn">Raw</a>
|
| 3739 |
</div>
|
| 3740 |
<div id="code-benchmark" class="cell-code" data-lines="341">
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3741 |
<div class="code-wrap">
|
| 3742 |
<div class="highlight"><pre><span></span><span class="c1"># /// script</span>
|
| 3743 |
<span class="c1"># dependencies = [</span>
|
|
@@ -4085,6 +4419,7 @@ Cell: benchmark | 50.28s | FAILED
|
|
| 4085 |
<div class="code-line-highlight" id="line-highlight-benchmark"></div>
|
| 4086 |
</div>
|
| 4087 |
</div>
|
|
|
|
| 4088 |
<div id="output-benchmark" class="cell-output">
|
| 4089 |
<div class="cell-stdout">Flash Attention 2 not found.
|
| 4090 |
Flash Attention 3 not found.
|
|
@@ -4094,35 +4429,105 @@ xFormers not found.
|
|
| 4094 |
|
| 4095 |
|
| 4096 |
===== Testing shape: (1, 4224, 24, 128) =====
|
| 4097 |
-
torch_cudnn : absmax=0.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4098 |
</div>
|
| 4099 |
<div class="uv-install-logs" id="uv-logs-benchmark">
|
| 4100 |
<div class="uv-logs-header" onclick="toggleUvLogs(this)">▶ UV Install Logs</div>
|
| 4101 |
<div class="uv-logs-content" style="display: none;">
|
| 4102 |
-
Downloading networkx (1.9MiB)
|
| 4103 |
-
Downloading sympy (6.0MiB)
|
| 4104 |
-
Downloading pillow (6.3MiB)
|
| 4105 |
-
Downloading matplotlib (8.3MiB)
|
| 4106 |
-
Downloading numpy (16.2MiB)
|
| 4107 |
-
Downloading nvidia-cufile-cu12 (1.1MiB)
|
| 4108 |
-
Downloading fonttools (4.7MiB)
|
| 4109 |
-
Downloading nvidia-cuda-nvrtc-cu12 (84.0MiB)
|
| 4110 |
-
Downloading nvidia-curand-cu12 (60.7MiB)
|
| 4111 |
-
Downloading setuptools (1.1MiB)
|
| 4112 |
Downloading kiwisolver (1.4MiB)
|
| 4113 |
-
Downloading nvidia-
|
| 4114 |
-
Downloading nvidia-cusparselt-cu12 (273.9MiB)
|
| 4115 |
-
Downloading nvidia-cufft-cu12 (184.2MiB)
|
| 4116 |
Downloading nvidia-cublas-cu12 (566.8MiB)
|
| 4117 |
Downloading pandas (11.8MiB)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4118 |
Downloading nvidia-cudnn-cu12 (674.0MiB)
|
|
|
|
|
|
|
|
|
|
| 4119 |
Downloading nvidia-cuda-cupti-cu12 (9.8MiB)
|
| 4120 |
-
Downloading
|
|
|
|
| 4121 |
Downloading nvidia-nvjitlink-cu12 (37.4MiB)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4122 |
Downloading triton (148.3MiB)
|
| 4123 |
-
Downloading torch (846.9MiB)
|
| 4124 |
-
Downloading nvidia-cusparse-cu12 (274.9MiB)
|
| 4125 |
-
Downloading hf-xet (3.0MiB)
|
| 4126 |
Downloading nvidia-cufile-cu12
|
| 4127 |
Downloading kiwisolver
|
| 4128 |
Downloading hf-xet
|
|
@@ -4130,8 +4535,8 @@ Downloading hf-xet (3.0MiB)
|
|
| 4130 |
Downloading networkx
|
| 4131 |
Downloading fonttools
|
| 4132 |
Downloading pillow
|
| 4133 |
-
Downloading nvidia-cuda-cupti-cu12
|
| 4134 |
Downloading matplotlib
|
|
|
|
| 4135 |
Downloading sympy
|
| 4136 |
Downloading numpy
|
| 4137 |
Downloading nvidia-nvjitlink-cu12
|
|
@@ -4147,104 +4552,27 @@ Downloading hf-xet (3.0MiB)
|
|
| 4147 |
Downloading nvidia-cublas-cu12
|
| 4148 |
Downloading nvidia-cudnn-cu12
|
| 4149 |
Downloading torch
|
| 4150 |
-
Installed 49 packages in
|
| 4151 |
</div>
|
| 4152 |
</div>
|
| 4153 |
<div class="cell-stderr">Fetching 20 files: 0%| | 0/20 [00:00<?, ?it/s]
|
| 4154 |
-
Fetching 20 files: 5%|▌ | 1/20 [00:00<00:
|
| 4155 |
-
Fetching 20 files: 10%|█ | 2/20 [00:02<00:
|
| 4156 |
-
Fetching 20 files: 100%|██████████| 20/20 [00:02<00:00,
|
| 4157 |
|
| 4158 |
Fetching 4 files: 0%| | 0/4 [00:00<?, ?it/s]
|
| 4159 |
-
Fetching 4 files: 25%|██▌ | 1/4 [00:00<00:00, 5.
|
| 4160 |
-
Fetching 4 files: 50%|█████ | 2/4 [00:
|
| 4161 |
-
Fetching 4 files: 100%|██████████| 4/4 [00:
|
| 4162 |
-
|
| 4163 |
-
|
| 4164 |
-
|
| 4165 |
-
|
| 4166 |
-
|
| 4167 |
-
|
| 4168 |
-
|
| 4169 |
-
|
| 4170 |
-
|
| 4171 |
-
^^^^^^^^^^^^^^^^^^^^^
|
| 4172 |
-
File "/repo/flash_attn/.uvnote/cells/benchmark.py", line 114, in _attention_torch_compile_default
|
| 4173 |
-
return _compiled_attention_torch_default(query, key, value, backend=backend)
|
| 4174 |
-
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
| 4175 |
-
File "/tmp/uvnote-run-08by6gh7/home/.cache/uv/environments-v2/benchmark-bfbc462482636f25/lib/python3.11/site-packages/torch/_dynamo/eval_frame.py", line 749, in compile_wrapper
|
| 4176 |
-
raise e.remove_dynamo_frames() from None # see TORCHDYNAMO_VERBOSE=1
|
| 4177 |
-
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
| 4178 |
-
File "/tmp/uvnote-run-08by6gh7/home/.cache/uv/environments-v2/benchmark-bfbc462482636f25/lib/python3.11/site-packages/torch/_inductor/compile_fx.py", line 923, in _compile_fx_inner
|
| 4179 |
-
raise InductorError(e, currentframe()).with_traceback(
|
| 4180 |
-
File "/tmp/uvnote-run-08by6gh7/home/.cache/uv/environments-v2/benchmark-bfbc462482636f25/lib/python3.11/site-packages/torch/_inductor/compile_fx.py", line 907, in _compile_fx_inner
|
| 4181 |
-
mb_compiled_graph = fx_codegen_and_compile(
|
| 4182 |
-
^^^^^^^^^^^^^^^^^^^^^^^
|
| 4183 |
-
File "/tmp/uvnote-run-08by6gh7/home/.cache/uv/environments-v2/benchmark-bfbc462482636f25/lib/python3.11/site-packages/torch/_inductor/compile_fx.py", line 1578, in fx_codegen_and_compile
|
| 4184 |
-
return scheme.codegen_and_compile(gm, example_inputs, inputs_to_check, graph_kwargs)
|
| 4185 |
-
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
| 4186 |
-
File "/tmp/uvnote-run-08by6gh7/home/.cache/uv/environments-v2/benchmark-bfbc462482636f25/lib/python3.11/site-packages/torch/_inductor/compile_fx.py", line 1456, in codegen_and_compile
|
| 4187 |
-
compiled_module = graph.compile_to_module()
|
| 4188 |
-
^^^^^^^^^^^^^^^^^^^^^^^^^
|
| 4189 |
-
File "/tmp/uvnote-run-08by6gh7/home/.cache/uv/environments-v2/benchmark-bfbc462482636f25/lib/python3.11/site-packages/torch/_inductor/graph.py", line 2293, in compile_to_module
|
| 4190 |
-
return self._compile_to_module()
|
| 4191 |
-
^^^^^^^^^^^^^^^^^^^^^^^^^
|
| 4192 |
-
File "/tmp/uvnote-run-08by6gh7/home/.cache/uv/environments-v2/benchmark-bfbc462482636f25/lib/python3.11/site-packages/torch/_inductor/graph.py", line 2299, in _compile_to_module
|
| 4193 |
-
self.codegen_with_cpp_wrapper() if self.cpp_wrapper else self.codegen()
|
| 4194 |
-
^^^^^^^^^^^^^^
|
| 4195 |
-
File "/tmp/uvnote-run-08by6gh7/home/.cache/uv/environments-v2/benchmark-bfbc462482636f25/lib/python3.11/site-packages/torch/_inductor/graph.py", line 2238, in codegen
|
| 4196 |
-
self.scheduler.codegen()
|
| 4197 |
-
File "/tmp/uvnote-run-08by6gh7/home/.cache/uv/environments-v2/benchmark-bfbc462482636f25/lib/python3.11/site-packages/torch/_inductor/scheduler.py", line 4598, in codegen
|
| 4198 |
-
else self._codegen(self.nodes)
|
| 4199 |
-
^^^^^^^^^^^^^^^^^^^^^^^^^
|
| 4200 |
-
File "/tmp/uvnote-run-08by6gh7/home/.cache/uv/environments-v2/benchmark-bfbc462482636f25/lib/python3.11/site-packages/torch/_inductor/scheduler.py", line 4750, in _codegen
|
| 4201 |
-
self.get_backend(device).codegen_node(node)
|
| 4202 |
-
File "/tmp/uvnote-run-08by6gh7/home/.cache/uv/environments-v2/benchmark-bfbc462482636f25/lib/python3.11/site-packages/torch/_inductor/codegen/cuda_combined_scheduling.py", line 107, in codegen_node
|
| 4203 |
-
return self._triton_scheduling.codegen_node(node)
|
| 4204 |
-
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
| 4205 |
-
File "/tmp/uvnote-run-08by6gh7/home/.cache/uv/environments-v2/benchmark-bfbc462482636f25/lib/python3.11/site-packages/torch/_inductor/codegen/simd.py", line 1371, in codegen_node
|
| 4206 |
-
return self.codegen_node_schedule(
|
| 4207 |
-
^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
| 4208 |
-
File "/tmp/uvnote-run-08by6gh7/home/.cache/uv/environments-v2/benchmark-bfbc462482636f25/lib/python3.11/site-packages/torch/_inductor/codegen/simd.py", line 1424, in codegen_node_schedule
|
| 4209 |
-
src_code = kernel.codegen_kernel()
|
| 4210 |
-
^^^^^^^^^^^^^^^^^^^^^^^
|
| 4211 |
-
File "/tmp/uvnote-run-08by6gh7/home/.cache/uv/environments-v2/benchmark-bfbc462482636f25/lib/python3.11/site-packages/torch/_inductor/codegen/triton.py", line 3677, in codegen_kernel
|
| 4212 |
-
**self.inductor_meta_common(),
|
| 4213 |
-
^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
| 4214 |
-
File "/tmp/uvnote-run-08by6gh7/home/.cache/uv/environments-v2/benchmark-bfbc462482636f25/lib/python3.11/site-packages/torch/_inductor/codegen/triton.py", line 3501, in inductor_meta_common
|
| 4215 |
-
"backend_hash": torch.utils._triton.triton_hash_with_backend(),
|
| 4216 |
-
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
| 4217 |
-
File "/tmp/uvnote-run-08by6gh7/home/.cache/uv/environments-v2/benchmark-bfbc462482636f25/lib/python3.11/site-packages/torch/utils/_triton.py", line 165, in triton_hash_with_backend
|
| 4218 |
-
backend = triton_backend()
|
| 4219 |
-
^^^^^^^^^^^^^^^^
|
| 4220 |
-
File "/tmp/uvnote-run-08by6gh7/home/.cache/uv/environments-v2/benchmark-bfbc462482636f25/lib/python3.11/site-packages/torch/utils/_triton.py", line 157, in triton_backend
|
| 4221 |
-
target = driver.active.get_current_target()
|
| 4222 |
-
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
| 4223 |
-
File "/tmp/uvnote-run-08by6gh7/home/.cache/uv/environments-v2/benchmark-bfbc462482636f25/lib/python3.11/site-packages/triton/runtime/driver.py", line 30, in __getattr__
|
| 4224 |
-
return getattr(self._initialize_obj(), name)
|
| 4225 |
-
^^^^^^^^^^^^^^^^^^^^^^
|
| 4226 |
-
File "/tmp/uvnote-run-08by6gh7/home/.cache/uv/environments-v2/benchmark-bfbc462482636f25/lib/python3.11/site-packages/triton/runtime/driver.py", line 26, in _initialize_obj
|
| 4227 |
-
self._obj = self._init_fn()
|
| 4228 |
-
^^^^^^^^^^^^^^^
|
| 4229 |
-
File "/tmp/uvnote-run-08by6gh7/home/.cache/uv/environments-v2/benchmark-bfbc462482636f25/lib/python3.11/site-packages/triton/runtime/driver.py", line 12, in _create_driver
|
| 4230 |
-
return active_drivers[0]()
|
| 4231 |
-
^^^^^^^^^^^^^^^^^^^
|
| 4232 |
-
File "/tmp/uvnote-run-08by6gh7/home/.cache/uv/environments-v2/benchmark-bfbc462482636f25/lib/python3.11/site-packages/triton/backends/nvidia/driver.py", line 715, in __init__
|
| 4233 |
-
self.utils = CudaUtils() # TODO: make static
|
| 4234 |
-
^^^^^^^^^^^
|
| 4235 |
-
File "/tmp/uvnote-run-08by6gh7/home/.cache/uv/environments-v2/benchmark-bfbc462482636f25/lib/python3.11/site-packages/triton/backends/nvidia/driver.py", line 62, in __init__
|
| 4236 |
-
mod = compile_module_from_src(
|
| 4237 |
-
^^^^^^^^^^^^^^^^^^^^^^^^
|
| 4238 |
-
File "/tmp/uvnote-run-08by6gh7/home/.cache/uv/environments-v2/benchmark-bfbc462482636f25/lib/python3.11/site-packages/triton/runtime/build.py", line 88, in compile_module_from_src
|
| 4239 |
-
so = _build(name, src_path, tmpdir, library_dirs or [], include_dirs or [], libraries or [])
|
| 4240 |
-
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
| 4241 |
-
File "/tmp/uvnote-run-08by6gh7/home/.cache/uv/environments-v2/benchmark-bfbc462482636f25/lib/python3.11/site-packages/triton/runtime/build.py", line 51, in _build
|
| 4242 |
-
subprocess.check_call(cc_cmd, stdout=subprocess.DEVNULL)
|
| 4243 |
-
File "/usr/lib/python3.11/subprocess.py", line 413, in check_call
|
| 4244 |
-
raise CalledProcessError(retcode, cmd)
|
| 4245 |
-
torch._inductor.exc.InductorError: CalledProcessError: Command '['/usr/bin/gcc', '/tmp/tmpyw1le_3d/cuda_utils.c', '-O3', '-shared', '-fPIC', '-Wno-psabi', '-o', '/tmp/tmpyw1le_3d/cuda_utils.cpython-311-x86_64-linux-gnu.so', '-lcuda', '-L/tmp/uvnote-run-08by6gh7/home/.cache/uv/environments-v2/benchmark-bfbc462482636f25/lib/python3.11/site-packages/triton/backends/nvidia/lib', '-L/usr/lib/x86_64-linux-gnu', '-I/tmp/uvnote-run-08by6gh7/home/.cache/uv/environments-v2/benchmark-bfbc462482636f25/lib/python3.11/site-packages/triton/backends/nvidia/include', '-I/tmp/tmpyw1le_3d', '-I/usr/include/python3.11']' returned non-zero exit status 1.
|
| 4246 |
-
|
| 4247 |
-
Set TORCHDYNAMO_VERBOSE=1 for the internal stack trace (please do this especially if you're reporting a bug to PyTorch). For even more developer context, set TORCH_LOGS="+dynamo"</div>
|
| 4248 |
</div>
|
| 4249 |
</div>
|
| 4250 |
</div>
|
|
|
|
| 3 |
<head>
|
| 4 |
<meta charset="UTF-8">
|
| 5 |
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 6 |
+
<title>Flash Attention Benchmark</title>
|
| 7 |
<script>
|
| 8 |
// Apply theme and widget visibility immediately to prevent flicker
|
| 9 |
(function() {
|
|
|
|
| 1058 |
.line-numbers .line-number { line-height: var(--code-line-height) !important; }
|
| 1059 |
|
| 1060 |
/* Custom CSS from frontmatter */
|
| 1061 |
+
#output-setup {
|
| 1062 |
+
overflow-x: auto;
|
| 1063 |
+
}
|
| 1064 |
+
.cell-output {
|
| 1065 |
+
overflow: scroll;
|
| 1066 |
+
}
|
| 1067 |
+
.cell-stdout {
|
| 1068 |
+
width: max-content;
|
| 1069 |
+
overflow: scroll;
|
| 1070 |
+
}
|
| 1071 |
+
.cell-stderr {
|
| 1072 |
+
width: max-content;
|
| 1073 |
+
overflow: scroll;
|
| 1074 |
+
max-height: 300px;
|
| 1075 |
+
}
|
| 1076 |
|
| 1077 |
|
| 1078 |
|
|
|
|
| 3715 |
</div>
|
| 3716 |
|
| 3717 |
<div class="main-content">
|
| 3718 |
+
<div class="cell" id="cell-benchmark">
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3719 |
<div class="cell-header">
|
| 3720 |
<span class="collapse-indicators">
|
| 3721 |
<span onclick="toggleCode('benchmark')" style="cursor: pointer;">▼ code</span>
|
| 3722 |
<span onclick="toggleOutput('benchmark')" style="cursor: pointer;">▼ output</span>
|
| 3723 |
<span id="uv-indicator-benchmark" onclick="toggleUvLogsFromHeader('benchmark')" style="cursor: pointer;">▶ uv-logs</span>
|
| 3724 |
</span> |
|
| 3725 |
+
Cell: benchmark | 77.34s
|
| 3726 |
| <button class="run-btn" onclick="runCell('benchmark')">▶ run</button>
|
| 3727 |
<button class="copy-btn" onclick="copyCell('benchmark')">Copy</button>
|
| 3728 |
<a href="cells/benchmark.py" target="_blank" class="raw-btn">Raw</a>
|
| 3729 |
</div>
|
| 3730 |
<div id="code-benchmark" class="cell-code" data-lines="341">
|
| 3731 |
+
<div class="highlight-with-lines">
|
| 3732 |
+
<div class="line-numbers" id="lines-benchmark">
|
| 3733 |
+
<a class="line-number" data-cell="benchmark" data-line="1" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 1, true);">1</a>
|
| 3734 |
+
<a class="line-number" data-cell="benchmark" data-line="2" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 2, true);">2</a>
|
| 3735 |
+
<a class="line-number" data-cell="benchmark" data-line="3" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 3, true);">3</a>
|
| 3736 |
+
<a class="line-number" data-cell="benchmark" data-line="4" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 4, true);">4</a>
|
| 3737 |
+
<a class="line-number" data-cell="benchmark" data-line="5" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 5, true);">5</a>
|
| 3738 |
+
<a class="line-number" data-cell="benchmark" data-line="6" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 6, true);">6</a>
|
| 3739 |
+
<a class="line-number" data-cell="benchmark" data-line="7" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 7, true);">7</a>
|
| 3740 |
+
<a class="line-number" data-cell="benchmark" data-line="8" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 8, true);">8</a>
|
| 3741 |
+
<a class="line-number" data-cell="benchmark" data-line="9" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 9, true);">9</a>
|
| 3742 |
+
<a class="line-number" data-cell="benchmark" data-line="10" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 10, true);">10</a>
|
| 3743 |
+
<a class="line-number" data-cell="benchmark" data-line="11" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 11, true);">11</a>
|
| 3744 |
+
<a class="line-number" data-cell="benchmark" data-line="12" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 12, true);">12</a>
|
| 3745 |
+
<a class="line-number" data-cell="benchmark" data-line="13" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 13, true);">13</a>
|
| 3746 |
+
<a class="line-number" data-cell="benchmark" data-line="14" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 14, true);">14</a>
|
| 3747 |
+
<a class="line-number" data-cell="benchmark" data-line="15" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 15, true);">15</a>
|
| 3748 |
+
<a class="line-number" data-cell="benchmark" data-line="16" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 16, true);">16</a>
|
| 3749 |
+
<a class="line-number" data-cell="benchmark" data-line="17" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 17, true);">17</a>
|
| 3750 |
+
<a class="line-number" data-cell="benchmark" data-line="18" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 18, true);">18</a>
|
| 3751 |
+
<a class="line-number" data-cell="benchmark" data-line="19" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 19, true);">19</a>
|
| 3752 |
+
<a class="line-number" data-cell="benchmark" data-line="20" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 20, true);">20</a>
|
| 3753 |
+
<a class="line-number" data-cell="benchmark" data-line="21" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 21, true);">21</a>
|
| 3754 |
+
<a class="line-number" data-cell="benchmark" data-line="22" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 22, true);">22</a>
|
| 3755 |
+
<a class="line-number" data-cell="benchmark" data-line="23" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 23, true);">23</a>
|
| 3756 |
+
<a class="line-number" data-cell="benchmark" data-line="24" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 24, true);">24</a>
|
| 3757 |
+
<a class="line-number" data-cell="benchmark" data-line="25" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 25, true);">25</a>
|
| 3758 |
+
<a class="line-number" data-cell="benchmark" data-line="26" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 26, true);">26</a>
|
| 3759 |
+
<a class="line-number" data-cell="benchmark" data-line="27" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 27, true);">27</a>
|
| 3760 |
+
<a class="line-number" data-cell="benchmark" data-line="28" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 28, true);">28</a>
|
| 3761 |
+
<a class="line-number" data-cell="benchmark" data-line="29" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 29, true);">29</a>
|
| 3762 |
+
<a class="line-number" data-cell="benchmark" data-line="30" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 30, true);">30</a>
|
| 3763 |
+
<a class="line-number" data-cell="benchmark" data-line="31" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 31, true);">31</a>
|
| 3764 |
+
<a class="line-number" data-cell="benchmark" data-line="32" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 32, true);">32</a>
|
| 3765 |
+
<a class="line-number" data-cell="benchmark" data-line="33" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 33, true);">33</a>
|
| 3766 |
+
<a class="line-number" data-cell="benchmark" data-line="34" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 34, true);">34</a>
|
| 3767 |
+
<a class="line-number" data-cell="benchmark" data-line="35" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 35, true);">35</a>
|
| 3768 |
+
<a class="line-number" data-cell="benchmark" data-line="36" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 36, true);">36</a>
|
| 3769 |
+
<a class="line-number" data-cell="benchmark" data-line="37" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 37, true);">37</a>
|
| 3770 |
+
<a class="line-number" data-cell="benchmark" data-line="38" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 38, true);">38</a>
|
| 3771 |
+
<a class="line-number" data-cell="benchmark" data-line="39" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 39, true);">39</a>
|
| 3772 |
+
<a class="line-number" data-cell="benchmark" data-line="40" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 40, true);">40</a>
|
| 3773 |
+
<a class="line-number" data-cell="benchmark" data-line="41" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 41, true);">41</a>
|
| 3774 |
+
<a class="line-number" data-cell="benchmark" data-line="42" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 42, true);">42</a>
|
| 3775 |
+
<a class="line-number" data-cell="benchmark" data-line="43" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 43, true);">43</a>
|
| 3776 |
+
<a class="line-number" data-cell="benchmark" data-line="44" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 44, true);">44</a>
|
| 3777 |
+
<a class="line-number" data-cell="benchmark" data-line="45" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 45, true);">45</a>
|
| 3778 |
+
<a class="line-number" data-cell="benchmark" data-line="46" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 46, true);">46</a>
|
| 3779 |
+
<a class="line-number" data-cell="benchmark" data-line="47" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 47, true);">47</a>
|
| 3780 |
+
<a class="line-number" data-cell="benchmark" data-line="48" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 48, true);">48</a>
|
| 3781 |
+
<a class="line-number" data-cell="benchmark" data-line="49" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 49, true);">49</a>
|
| 3782 |
+
<a class="line-number" data-cell="benchmark" data-line="50" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 50, true);">50</a>
|
| 3783 |
+
<a class="line-number" data-cell="benchmark" data-line="51" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 51, true);">51</a>
|
| 3784 |
+
<a class="line-number" data-cell="benchmark" data-line="52" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 52, true);">52</a>
|
| 3785 |
+
<a class="line-number" data-cell="benchmark" data-line="53" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 53, true);">53</a>
|
| 3786 |
+
<a class="line-number" data-cell="benchmark" data-line="54" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 54, true);">54</a>
|
| 3787 |
+
<a class="line-number" data-cell="benchmark" data-line="55" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 55, true);">55</a>
|
| 3788 |
+
<a class="line-number" data-cell="benchmark" data-line="56" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 56, true);">56</a>
|
| 3789 |
+
<a class="line-number" data-cell="benchmark" data-line="57" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 57, true);">57</a>
|
| 3790 |
+
<a class="line-number" data-cell="benchmark" data-line="58" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 58, true);">58</a>
|
| 3791 |
+
<a class="line-number" data-cell="benchmark" data-line="59" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 59, true);">59</a>
|
| 3792 |
+
<a class="line-number" data-cell="benchmark" data-line="60" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 60, true);">60</a>
|
| 3793 |
+
<a class="line-number" data-cell="benchmark" data-line="61" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 61, true);">61</a>
|
| 3794 |
+
<a class="line-number" data-cell="benchmark" data-line="62" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 62, true);">62</a>
|
| 3795 |
+
<a class="line-number" data-cell="benchmark" data-line="63" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 63, true);">63</a>
|
| 3796 |
+
<a class="line-number" data-cell="benchmark" data-line="64" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 64, true);">64</a>
|
| 3797 |
+
<a class="line-number" data-cell="benchmark" data-line="65" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 65, true);">65</a>
|
| 3798 |
+
<a class="line-number" data-cell="benchmark" data-line="66" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 66, true);">66</a>
|
| 3799 |
+
<a class="line-number" data-cell="benchmark" data-line="67" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 67, true);">67</a>
|
| 3800 |
+
<a class="line-number" data-cell="benchmark" data-line="68" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 68, true);">68</a>
|
| 3801 |
+
<a class="line-number" data-cell="benchmark" data-line="69" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 69, true);">69</a>
|
| 3802 |
+
<a class="line-number" data-cell="benchmark" data-line="70" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 70, true);">70</a>
|
| 3803 |
+
<a class="line-number" data-cell="benchmark" data-line="71" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 71, true);">71</a>
|
| 3804 |
+
<a class="line-number" data-cell="benchmark" data-line="72" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 72, true);">72</a>
|
| 3805 |
+
<a class="line-number" data-cell="benchmark" data-line="73" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 73, true);">73</a>
|
| 3806 |
+
<a class="line-number" data-cell="benchmark" data-line="74" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 74, true);">74</a>
|
| 3807 |
+
<a class="line-number" data-cell="benchmark" data-line="75" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 75, true);">75</a>
|
| 3808 |
+
<a class="line-number" data-cell="benchmark" data-line="76" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 76, true);">76</a>
|
| 3809 |
+
<a class="line-number" data-cell="benchmark" data-line="77" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 77, true);">77</a>
|
| 3810 |
+
<a class="line-number" data-cell="benchmark" data-line="78" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 78, true);">78</a>
|
| 3811 |
+
<a class="line-number" data-cell="benchmark" data-line="79" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 79, true);">79</a>
|
| 3812 |
+
<a class="line-number" data-cell="benchmark" data-line="80" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 80, true);">80</a>
|
| 3813 |
+
<a class="line-number" data-cell="benchmark" data-line="81" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 81, true);">81</a>
|
| 3814 |
+
<a class="line-number" data-cell="benchmark" data-line="82" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 82, true);">82</a>
|
| 3815 |
+
<a class="line-number" data-cell="benchmark" data-line="83" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 83, true);">83</a>
|
| 3816 |
+
<a class="line-number" data-cell="benchmark" data-line="84" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 84, true);">84</a>
|
| 3817 |
+
<a class="line-number" data-cell="benchmark" data-line="85" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 85, true);">85</a>
|
| 3818 |
+
<a class="line-number" data-cell="benchmark" data-line="86" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 86, true);">86</a>
|
| 3819 |
+
<a class="line-number" data-cell="benchmark" data-line="87" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 87, true);">87</a>
|
| 3820 |
+
<a class="line-number" data-cell="benchmark" data-line="88" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 88, true);">88</a>
|
| 3821 |
+
<a class="line-number" data-cell="benchmark" data-line="89" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 89, true);">89</a>
|
| 3822 |
+
<a class="line-number" data-cell="benchmark" data-line="90" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 90, true);">90</a>
|
| 3823 |
+
<a class="line-number" data-cell="benchmark" data-line="91" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 91, true);">91</a>
|
| 3824 |
+
<a class="line-number" data-cell="benchmark" data-line="92" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 92, true);">92</a>
|
| 3825 |
+
<a class="line-number" data-cell="benchmark" data-line="93" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 93, true);">93</a>
|
| 3826 |
+
<a class="line-number" data-cell="benchmark" data-line="94" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 94, true);">94</a>
|
| 3827 |
+
<a class="line-number" data-cell="benchmark" data-line="95" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 95, true);">95</a>
|
| 3828 |
+
<a class="line-number" data-cell="benchmark" data-line="96" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 96, true);">96</a>
|
| 3829 |
+
<a class="line-number" data-cell="benchmark" data-line="97" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 97, true);">97</a>
|
| 3830 |
+
<a class="line-number" data-cell="benchmark" data-line="98" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 98, true);">98</a>
|
| 3831 |
+
<a class="line-number" data-cell="benchmark" data-line="99" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 99, true);">99</a>
|
| 3832 |
+
<a class="line-number" data-cell="benchmark" data-line="100" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 100, true);">100</a>
|
| 3833 |
+
<a class="line-number" data-cell="benchmark" data-line="101" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 101, true);">101</a>
|
| 3834 |
+
<a class="line-number" data-cell="benchmark" data-line="102" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 102, true);">102</a>
|
| 3835 |
+
<a class="line-number" data-cell="benchmark" data-line="103" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 103, true);">103</a>
|
| 3836 |
+
<a class="line-number" data-cell="benchmark" data-line="104" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 104, true);">104</a>
|
| 3837 |
+
<a class="line-number" data-cell="benchmark" data-line="105" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 105, true);">105</a>
|
| 3838 |
+
<a class="line-number" data-cell="benchmark" data-line="106" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 106, true);">106</a>
|
| 3839 |
+
<a class="line-number" data-cell="benchmark" data-line="107" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 107, true);">107</a>
|
| 3840 |
+
<a class="line-number" data-cell="benchmark" data-line="108" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 108, true);">108</a>
|
| 3841 |
+
<a class="line-number" data-cell="benchmark" data-line="109" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 109, true);">109</a>
|
| 3842 |
+
<a class="line-number" data-cell="benchmark" data-line="110" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 110, true);">110</a>
|
| 3843 |
+
<a class="line-number" data-cell="benchmark" data-line="111" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 111, true);">111</a>
|
| 3844 |
+
<a class="line-number" data-cell="benchmark" data-line="112" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 112, true);">112</a>
|
| 3845 |
+
<a class="line-number" data-cell="benchmark" data-line="113" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 113, true);">113</a>
|
| 3846 |
+
<a class="line-number" data-cell="benchmark" data-line="114" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 114, true);">114</a>
|
| 3847 |
+
<a class="line-number" data-cell="benchmark" data-line="115" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 115, true);">115</a>
|
| 3848 |
+
<a class="line-number" data-cell="benchmark" data-line="116" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 116, true);">116</a>
|
| 3849 |
+
<a class="line-number" data-cell="benchmark" data-line="117" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 117, true);">117</a>
|
| 3850 |
+
<a class="line-number" data-cell="benchmark" data-line="118" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 118, true);">118</a>
|
| 3851 |
+
<a class="line-number" data-cell="benchmark" data-line="119" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 119, true);">119</a>
|
| 3852 |
+
<a class="line-number" data-cell="benchmark" data-line="120" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 120, true);">120</a>
|
| 3853 |
+
<a class="line-number" data-cell="benchmark" data-line="121" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 121, true);">121</a>
|
| 3854 |
+
<a class="line-number" data-cell="benchmark" data-line="122" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 122, true);">122</a>
|
| 3855 |
+
<a class="line-number" data-cell="benchmark" data-line="123" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 123, true);">123</a>
|
| 3856 |
+
<a class="line-number" data-cell="benchmark" data-line="124" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 124, true);">124</a>
|
| 3857 |
+
<a class="line-number" data-cell="benchmark" data-line="125" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 125, true);">125</a>
|
| 3858 |
+
<a class="line-number" data-cell="benchmark" data-line="126" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 126, true);">126</a>
|
| 3859 |
+
<a class="line-number" data-cell="benchmark" data-line="127" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 127, true);">127</a>
|
| 3860 |
+
<a class="line-number" data-cell="benchmark" data-line="128" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 128, true);">128</a>
|
| 3861 |
+
<a class="line-number" data-cell="benchmark" data-line="129" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 129, true);">129</a>
|
| 3862 |
+
<a class="line-number" data-cell="benchmark" data-line="130" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 130, true);">130</a>
|
| 3863 |
+
<a class="line-number" data-cell="benchmark" data-line="131" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 131, true);">131</a>
|
| 3864 |
+
<a class="line-number" data-cell="benchmark" data-line="132" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 132, true);">132</a>
|
| 3865 |
+
<a class="line-number" data-cell="benchmark" data-line="133" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 133, true);">133</a>
|
| 3866 |
+
<a class="line-number" data-cell="benchmark" data-line="134" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 134, true);">134</a>
|
| 3867 |
+
<a class="line-number" data-cell="benchmark" data-line="135" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 135, true);">135</a>
|
| 3868 |
+
<a class="line-number" data-cell="benchmark" data-line="136" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 136, true);">136</a>
|
| 3869 |
+
<a class="line-number" data-cell="benchmark" data-line="137" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 137, true);">137</a>
|
| 3870 |
+
<a class="line-number" data-cell="benchmark" data-line="138" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 138, true);">138</a>
|
| 3871 |
+
<a class="line-number" data-cell="benchmark" data-line="139" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 139, true);">139</a>
|
| 3872 |
+
<a class="line-number" data-cell="benchmark" data-line="140" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 140, true);">140</a>
|
| 3873 |
+
<a class="line-number" data-cell="benchmark" data-line="141" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 141, true);">141</a>
|
| 3874 |
+
<a class="line-number" data-cell="benchmark" data-line="142" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 142, true);">142</a>
|
| 3875 |
+
<a class="line-number" data-cell="benchmark" data-line="143" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 143, true);">143</a>
|
| 3876 |
+
<a class="line-number" data-cell="benchmark" data-line="144" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 144, true);">144</a>
|
| 3877 |
+
<a class="line-number" data-cell="benchmark" data-line="145" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 145, true);">145</a>
|
| 3878 |
+
<a class="line-number" data-cell="benchmark" data-line="146" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 146, true);">146</a>
|
| 3879 |
+
<a class="line-number" data-cell="benchmark" data-line="147" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 147, true);">147</a>
|
| 3880 |
+
<a class="line-number" data-cell="benchmark" data-line="148" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 148, true);">148</a>
|
| 3881 |
+
<a class="line-number" data-cell="benchmark" data-line="149" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 149, true);">149</a>
|
| 3882 |
+
<a class="line-number" data-cell="benchmark" data-line="150" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 150, true);">150</a>
|
| 3883 |
+
<a class="line-number" data-cell="benchmark" data-line="151" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 151, true);">151</a>
|
| 3884 |
+
<a class="line-number" data-cell="benchmark" data-line="152" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 152, true);">152</a>
|
| 3885 |
+
<a class="line-number" data-cell="benchmark" data-line="153" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 153, true);">153</a>
|
| 3886 |
+
<a class="line-number" data-cell="benchmark" data-line="154" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 154, true);">154</a>
|
| 3887 |
+
<a class="line-number" data-cell="benchmark" data-line="155" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 155, true);">155</a>
|
| 3888 |
+
<a class="line-number" data-cell="benchmark" data-line="156" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 156, true);">156</a>
|
| 3889 |
+
<a class="line-number" data-cell="benchmark" data-line="157" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 157, true);">157</a>
|
| 3890 |
+
<a class="line-number" data-cell="benchmark" data-line="158" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 158, true);">158</a>
|
| 3891 |
+
<a class="line-number" data-cell="benchmark" data-line="159" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 159, true);">159</a>
|
| 3892 |
+
<a class="line-number" data-cell="benchmark" data-line="160" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 160, true);">160</a>
|
| 3893 |
+
<a class="line-number" data-cell="benchmark" data-line="161" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 161, true);">161</a>
|
| 3894 |
+
<a class="line-number" data-cell="benchmark" data-line="162" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 162, true);">162</a>
|
| 3895 |
+
<a class="line-number" data-cell="benchmark" data-line="163" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 163, true);">163</a>
|
| 3896 |
+
<a class="line-number" data-cell="benchmark" data-line="164" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 164, true);">164</a>
|
| 3897 |
+
<a class="line-number" data-cell="benchmark" data-line="165" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 165, true);">165</a>
|
| 3898 |
+
<a class="line-number" data-cell="benchmark" data-line="166" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 166, true);">166</a>
|
| 3899 |
+
<a class="line-number" data-cell="benchmark" data-line="167" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 167, true);">167</a>
|
| 3900 |
+
<a class="line-number" data-cell="benchmark" data-line="168" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 168, true);">168</a>
|
| 3901 |
+
<a class="line-number" data-cell="benchmark" data-line="169" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 169, true);">169</a>
|
| 3902 |
+
<a class="line-number" data-cell="benchmark" data-line="170" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 170, true);">170</a>
|
| 3903 |
+
<a class="line-number" data-cell="benchmark" data-line="171" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 171, true);">171</a>
|
| 3904 |
+
<a class="line-number" data-cell="benchmark" data-line="172" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 172, true);">172</a>
|
| 3905 |
+
<a class="line-number" data-cell="benchmark" data-line="173" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 173, true);">173</a>
|
| 3906 |
+
<a class="line-number" data-cell="benchmark" data-line="174" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 174, true);">174</a>
|
| 3907 |
+
<a class="line-number" data-cell="benchmark" data-line="175" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 175, true);">175</a>
|
| 3908 |
+
<a class="line-number" data-cell="benchmark" data-line="176" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 176, true);">176</a>
|
| 3909 |
+
<a class="line-number" data-cell="benchmark" data-line="177" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 177, true);">177</a>
|
| 3910 |
+
<a class="line-number" data-cell="benchmark" data-line="178" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 178, true);">178</a>
|
| 3911 |
+
<a class="line-number" data-cell="benchmark" data-line="179" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 179, true);">179</a>
|
| 3912 |
+
<a class="line-number" data-cell="benchmark" data-line="180" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 180, true);">180</a>
|
| 3913 |
+
<a class="line-number" data-cell="benchmark" data-line="181" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 181, true);">181</a>
|
| 3914 |
+
<a class="line-number" data-cell="benchmark" data-line="182" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 182, true);">182</a>
|
| 3915 |
+
<a class="line-number" data-cell="benchmark" data-line="183" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 183, true);">183</a>
|
| 3916 |
+
<a class="line-number" data-cell="benchmark" data-line="184" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 184, true);">184</a>
|
| 3917 |
+
<a class="line-number" data-cell="benchmark" data-line="185" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 185, true);">185</a>
|
| 3918 |
+
<a class="line-number" data-cell="benchmark" data-line="186" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 186, true);">186</a>
|
| 3919 |
+
<a class="line-number" data-cell="benchmark" data-line="187" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 187, true);">187</a>
|
| 3920 |
+
<a class="line-number" data-cell="benchmark" data-line="188" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 188, true);">188</a>
|
| 3921 |
+
<a class="line-number" data-cell="benchmark" data-line="189" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 189, true);">189</a>
|
| 3922 |
+
<a class="line-number" data-cell="benchmark" data-line="190" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 190, true);">190</a>
|
| 3923 |
+
<a class="line-number" data-cell="benchmark" data-line="191" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 191, true);">191</a>
|
| 3924 |
+
<a class="line-number" data-cell="benchmark" data-line="192" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 192, true);">192</a>
|
| 3925 |
+
<a class="line-number" data-cell="benchmark" data-line="193" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 193, true);">193</a>
|
| 3926 |
+
<a class="line-number" data-cell="benchmark" data-line="194" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 194, true);">194</a>
|
| 3927 |
+
<a class="line-number" data-cell="benchmark" data-line="195" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 195, true);">195</a>
|
| 3928 |
+
<a class="line-number" data-cell="benchmark" data-line="196" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 196, true);">196</a>
|
| 3929 |
+
<a class="line-number" data-cell="benchmark" data-line="197" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 197, true);">197</a>
|
| 3930 |
+
<a class="line-number" data-cell="benchmark" data-line="198" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 198, true);">198</a>
|
| 3931 |
+
<a class="line-number" data-cell="benchmark" data-line="199" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 199, true);">199</a>
|
| 3932 |
+
<a class="line-number" data-cell="benchmark" data-line="200" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 200, true);">200</a>
|
| 3933 |
+
<a class="line-number" data-cell="benchmark" data-line="201" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 201, true);">201</a>
|
| 3934 |
+
<a class="line-number" data-cell="benchmark" data-line="202" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 202, true);">202</a>
|
| 3935 |
+
<a class="line-number" data-cell="benchmark" data-line="203" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 203, true);">203</a>
|
| 3936 |
+
<a class="line-number" data-cell="benchmark" data-line="204" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 204, true);">204</a>
|
| 3937 |
+
<a class="line-number" data-cell="benchmark" data-line="205" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 205, true);">205</a>
|
| 3938 |
+
<a class="line-number" data-cell="benchmark" data-line="206" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 206, true);">206</a>
|
| 3939 |
+
<a class="line-number" data-cell="benchmark" data-line="207" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 207, true);">207</a>
|
| 3940 |
+
<a class="line-number" data-cell="benchmark" data-line="208" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 208, true);">208</a>
|
| 3941 |
+
<a class="line-number" data-cell="benchmark" data-line="209" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 209, true);">209</a>
|
| 3942 |
+
<a class="line-number" data-cell="benchmark" data-line="210" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 210, true);">210</a>
|
| 3943 |
+
<a class="line-number" data-cell="benchmark" data-line="211" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 211, true);">211</a>
|
| 3944 |
+
<a class="line-number" data-cell="benchmark" data-line="212" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 212, true);">212</a>
|
| 3945 |
+
<a class="line-number" data-cell="benchmark" data-line="213" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 213, true);">213</a>
|
| 3946 |
+
<a class="line-number" data-cell="benchmark" data-line="214" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 214, true);">214</a>
|
| 3947 |
+
<a class="line-number" data-cell="benchmark" data-line="215" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 215, true);">215</a>
|
| 3948 |
+
<a class="line-number" data-cell="benchmark" data-line="216" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 216, true);">216</a>
|
| 3949 |
+
<a class="line-number" data-cell="benchmark" data-line="217" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 217, true);">217</a>
|
| 3950 |
+
<a class="line-number" data-cell="benchmark" data-line="218" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 218, true);">218</a>
|
| 3951 |
+
<a class="line-number" data-cell="benchmark" data-line="219" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 219, true);">219</a>
|
| 3952 |
+
<a class="line-number" data-cell="benchmark" data-line="220" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 220, true);">220</a>
|
| 3953 |
+
<a class="line-number" data-cell="benchmark" data-line="221" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 221, true);">221</a>
|
| 3954 |
+
<a class="line-number" data-cell="benchmark" data-line="222" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 222, true);">222</a>
|
| 3955 |
+
<a class="line-number" data-cell="benchmark" data-line="223" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 223, true);">223</a>
|
| 3956 |
+
<a class="line-number" data-cell="benchmark" data-line="224" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 224, true);">224</a>
|
| 3957 |
+
<a class="line-number" data-cell="benchmark" data-line="225" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 225, true);">225</a>
|
| 3958 |
+
<a class="line-number" data-cell="benchmark" data-line="226" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 226, true);">226</a>
|
| 3959 |
+
<a class="line-number" data-cell="benchmark" data-line="227" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 227, true);">227</a>
|
| 3960 |
+
<a class="line-number" data-cell="benchmark" data-line="228" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 228, true);">228</a>
|
| 3961 |
+
<a class="line-number" data-cell="benchmark" data-line="229" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 229, true);">229</a>
|
| 3962 |
+
<a class="line-number" data-cell="benchmark" data-line="230" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 230, true);">230</a>
|
| 3963 |
+
<a class="line-number" data-cell="benchmark" data-line="231" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 231, true);">231</a>
|
| 3964 |
+
<a class="line-number" data-cell="benchmark" data-line="232" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 232, true);">232</a>
|
| 3965 |
+
<a class="line-number" data-cell="benchmark" data-line="233" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 233, true);">233</a>
|
| 3966 |
+
<a class="line-number" data-cell="benchmark" data-line="234" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 234, true);">234</a>
|
| 3967 |
+
<a class="line-number" data-cell="benchmark" data-line="235" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 235, true);">235</a>
|
| 3968 |
+
<a class="line-number" data-cell="benchmark" data-line="236" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 236, true);">236</a>
|
| 3969 |
+
<a class="line-number" data-cell="benchmark" data-line="237" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 237, true);">237</a>
|
| 3970 |
+
<a class="line-number" data-cell="benchmark" data-line="238" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 238, true);">238</a>
|
| 3971 |
+
<a class="line-number" data-cell="benchmark" data-line="239" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 239, true);">239</a>
|
| 3972 |
+
<a class="line-number" data-cell="benchmark" data-line="240" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 240, true);">240</a>
|
| 3973 |
+
<a class="line-number" data-cell="benchmark" data-line="241" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 241, true);">241</a>
|
| 3974 |
+
<a class="line-number" data-cell="benchmark" data-line="242" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 242, true);">242</a>
|
| 3975 |
+
<a class="line-number" data-cell="benchmark" data-line="243" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 243, true);">243</a>
|
| 3976 |
+
<a class="line-number" data-cell="benchmark" data-line="244" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 244, true);">244</a>
|
| 3977 |
+
<a class="line-number" data-cell="benchmark" data-line="245" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 245, true);">245</a>
|
| 3978 |
+
<a class="line-number" data-cell="benchmark" data-line="246" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 246, true);">246</a>
|
| 3979 |
+
<a class="line-number" data-cell="benchmark" data-line="247" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 247, true);">247</a>
|
| 3980 |
+
<a class="line-number" data-cell="benchmark" data-line="248" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 248, true);">248</a>
|
| 3981 |
+
<a class="line-number" data-cell="benchmark" data-line="249" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 249, true);">249</a>
|
| 3982 |
+
<a class="line-number" data-cell="benchmark" data-line="250" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 250, true);">250</a>
|
| 3983 |
+
<a class="line-number" data-cell="benchmark" data-line="251" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 251, true);">251</a>
|
| 3984 |
+
<a class="line-number" data-cell="benchmark" data-line="252" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 252, true);">252</a>
|
| 3985 |
+
<a class="line-number" data-cell="benchmark" data-line="253" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 253, true);">253</a>
|
| 3986 |
+
<a class="line-number" data-cell="benchmark" data-line="254" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 254, true);">254</a>
|
| 3987 |
+
<a class="line-number" data-cell="benchmark" data-line="255" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 255, true);">255</a>
|
| 3988 |
+
<a class="line-number" data-cell="benchmark" data-line="256" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 256, true);">256</a>
|
| 3989 |
+
<a class="line-number" data-cell="benchmark" data-line="257" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 257, true);">257</a>
|
| 3990 |
+
<a class="line-number" data-cell="benchmark" data-line="258" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 258, true);">258</a>
|
| 3991 |
+
<a class="line-number" data-cell="benchmark" data-line="259" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 259, true);">259</a>
|
| 3992 |
+
<a class="line-number" data-cell="benchmark" data-line="260" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 260, true);">260</a>
|
| 3993 |
+
<a class="line-number" data-cell="benchmark" data-line="261" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 261, true);">261</a>
|
| 3994 |
+
<a class="line-number" data-cell="benchmark" data-line="262" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 262, true);">262</a>
|
| 3995 |
+
<a class="line-number" data-cell="benchmark" data-line="263" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 263, true);">263</a>
|
| 3996 |
+
<a class="line-number" data-cell="benchmark" data-line="264" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 264, true);">264</a>
|
| 3997 |
+
<a class="line-number" data-cell="benchmark" data-line="265" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 265, true);">265</a>
|
| 3998 |
+
<a class="line-number" data-cell="benchmark" data-line="266" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 266, true);">266</a>
|
| 3999 |
+
<a class="line-number" data-cell="benchmark" data-line="267" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 267, true);">267</a>
|
| 4000 |
+
<a class="line-number" data-cell="benchmark" data-line="268" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 268, true);">268</a>
|
| 4001 |
+
<a class="line-number" data-cell="benchmark" data-line="269" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 269, true);">269</a>
|
| 4002 |
+
<a class="line-number" data-cell="benchmark" data-line="270" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 270, true);">270</a>
|
| 4003 |
+
<a class="line-number" data-cell="benchmark" data-line="271" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 271, true);">271</a>
|
| 4004 |
+
<a class="line-number" data-cell="benchmark" data-line="272" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 272, true);">272</a>
|
| 4005 |
+
<a class="line-number" data-cell="benchmark" data-line="273" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 273, true);">273</a>
|
| 4006 |
+
<a class="line-number" data-cell="benchmark" data-line="274" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 274, true);">274</a>
|
| 4007 |
+
<a class="line-number" data-cell="benchmark" data-line="275" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 275, true);">275</a>
|
| 4008 |
+
<a class="line-number" data-cell="benchmark" data-line="276" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 276, true);">276</a>
|
| 4009 |
+
<a class="line-number" data-cell="benchmark" data-line="277" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 277, true);">277</a>
|
| 4010 |
+
<a class="line-number" data-cell="benchmark" data-line="278" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 278, true);">278</a>
|
| 4011 |
+
<a class="line-number" data-cell="benchmark" data-line="279" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 279, true);">279</a>
|
| 4012 |
+
<a class="line-number" data-cell="benchmark" data-line="280" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 280, true);">280</a>
|
| 4013 |
+
<a class="line-number" data-cell="benchmark" data-line="281" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 281, true);">281</a>
|
| 4014 |
+
<a class="line-number" data-cell="benchmark" data-line="282" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 282, true);">282</a>
|
| 4015 |
+
<a class="line-number" data-cell="benchmark" data-line="283" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 283, true);">283</a>
|
| 4016 |
+
<a class="line-number" data-cell="benchmark" data-line="284" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 284, true);">284</a>
|
| 4017 |
+
<a class="line-number" data-cell="benchmark" data-line="285" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 285, true);">285</a>
|
| 4018 |
+
<a class="line-number" data-cell="benchmark" data-line="286" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 286, true);">286</a>
|
| 4019 |
+
<a class="line-number" data-cell="benchmark" data-line="287" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 287, true);">287</a>
|
| 4020 |
+
<a class="line-number" data-cell="benchmark" data-line="288" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 288, true);">288</a>
|
| 4021 |
+
<a class="line-number" data-cell="benchmark" data-line="289" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 289, true);">289</a>
|
| 4022 |
+
<a class="line-number" data-cell="benchmark" data-line="290" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 290, true);">290</a>
|
| 4023 |
+
<a class="line-number" data-cell="benchmark" data-line="291" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 291, true);">291</a>
|
| 4024 |
+
<a class="line-number" data-cell="benchmark" data-line="292" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 292, true);">292</a>
|
| 4025 |
+
<a class="line-number" data-cell="benchmark" data-line="293" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 293, true);">293</a>
|
| 4026 |
+
<a class="line-number" data-cell="benchmark" data-line="294" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 294, true);">294</a>
|
| 4027 |
+
<a class="line-number" data-cell="benchmark" data-line="295" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 295, true);">295</a>
|
| 4028 |
+
<a class="line-number" data-cell="benchmark" data-line="296" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 296, true);">296</a>
|
| 4029 |
+
<a class="line-number" data-cell="benchmark" data-line="297" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 297, true);">297</a>
|
| 4030 |
+
<a class="line-number" data-cell="benchmark" data-line="298" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 298, true);">298</a>
|
| 4031 |
+
<a class="line-number" data-cell="benchmark" data-line="299" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 299, true);">299</a>
|
| 4032 |
+
<a class="line-number" data-cell="benchmark" data-line="300" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 300, true);">300</a>
|
| 4033 |
+
<a class="line-number" data-cell="benchmark" data-line="301" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 301, true);">301</a>
|
| 4034 |
+
<a class="line-number" data-cell="benchmark" data-line="302" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 302, true);">302</a>
|
| 4035 |
+
<a class="line-number" data-cell="benchmark" data-line="303" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 303, true);">303</a>
|
| 4036 |
+
<a class="line-number" data-cell="benchmark" data-line="304" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 304, true);">304</a>
|
| 4037 |
+
<a class="line-number" data-cell="benchmark" data-line="305" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 305, true);">305</a>
|
| 4038 |
+
<a class="line-number" data-cell="benchmark" data-line="306" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 306, true);">306</a>
|
| 4039 |
+
<a class="line-number" data-cell="benchmark" data-line="307" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 307, true);">307</a>
|
| 4040 |
+
<a class="line-number" data-cell="benchmark" data-line="308" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 308, true);">308</a>
|
| 4041 |
+
<a class="line-number" data-cell="benchmark" data-line="309" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 309, true);">309</a>
|
| 4042 |
+
<a class="line-number" data-cell="benchmark" data-line="310" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 310, true);">310</a>
|
| 4043 |
+
<a class="line-number" data-cell="benchmark" data-line="311" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 311, true);">311</a>
|
| 4044 |
+
<a class="line-number" data-cell="benchmark" data-line="312" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 312, true);">312</a>
|
| 4045 |
+
<a class="line-number" data-cell="benchmark" data-line="313" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 313, true);">313</a>
|
| 4046 |
+
<a class="line-number" data-cell="benchmark" data-line="314" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 314, true);">314</a>
|
| 4047 |
+
<a class="line-number" data-cell="benchmark" data-line="315" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 315, true);">315</a>
|
| 4048 |
+
<a class="line-number" data-cell="benchmark" data-line="316" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 316, true);">316</a>
|
| 4049 |
+
<a class="line-number" data-cell="benchmark" data-line="317" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 317, true);">317</a>
|
| 4050 |
+
<a class="line-number" data-cell="benchmark" data-line="318" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 318, true);">318</a>
|
| 4051 |
+
<a class="line-number" data-cell="benchmark" data-line="319" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 319, true);">319</a>
|
| 4052 |
+
<a class="line-number" data-cell="benchmark" data-line="320" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 320, true);">320</a>
|
| 4053 |
+
<a class="line-number" data-cell="benchmark" data-line="321" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 321, true);">321</a>
|
| 4054 |
+
<a class="line-number" data-cell="benchmark" data-line="322" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 322, true);">322</a>
|
| 4055 |
+
<a class="line-number" data-cell="benchmark" data-line="323" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 323, true);">323</a>
|
| 4056 |
+
<a class="line-number" data-cell="benchmark" data-line="324" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 324, true);">324</a>
|
| 4057 |
+
<a class="line-number" data-cell="benchmark" data-line="325" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 325, true);">325</a>
|
| 4058 |
+
<a class="line-number" data-cell="benchmark" data-line="326" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 326, true);">326</a>
|
| 4059 |
+
<a class="line-number" data-cell="benchmark" data-line="327" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 327, true);">327</a>
|
| 4060 |
+
<a class="line-number" data-cell="benchmark" data-line="328" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 328, true);">328</a>
|
| 4061 |
+
<a class="line-number" data-cell="benchmark" data-line="329" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 329, true);">329</a>
|
| 4062 |
+
<a class="line-number" data-cell="benchmark" data-line="330" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 330, true);">330</a>
|
| 4063 |
+
<a class="line-number" data-cell="benchmark" data-line="331" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 331, true);">331</a>
|
| 4064 |
+
<a class="line-number" data-cell="benchmark" data-line="332" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 332, true);">332</a>
|
| 4065 |
+
<a class="line-number" data-cell="benchmark" data-line="333" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 333, true);">333</a>
|
| 4066 |
+
<a class="line-number" data-cell="benchmark" data-line="334" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 334, true);">334</a>
|
| 4067 |
+
<a class="line-number" data-cell="benchmark" data-line="335" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 335, true);">335</a>
|
| 4068 |
+
<a class="line-number" data-cell="benchmark" data-line="336" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 336, true);">336</a>
|
| 4069 |
+
<a class="line-number" data-cell="benchmark" data-line="337" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 337, true);">337</a>
|
| 4070 |
+
<a class="line-number" data-cell="benchmark" data-line="338" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 338, true);">338</a>
|
| 4071 |
+
<a class="line-number" data-cell="benchmark" data-line="339" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 339, true);">339</a>
|
| 4072 |
+
<a class="line-number" data-cell="benchmark" data-line="340" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 340, true);">340</a>
|
| 4073 |
+
<a class="line-number" data-cell="benchmark" data-line="341" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 341, true);">341</a>
|
| 4074 |
+
</div>
|
| 4075 |
<div class="code-wrap">
|
| 4076 |
<div class="highlight"><pre><span></span><span class="c1"># /// script</span>
|
| 4077 |
<span class="c1"># dependencies = [</span>
|
|
|
|
| 4419 |
<div class="code-line-highlight" id="line-highlight-benchmark"></div>
|
| 4420 |
</div>
|
| 4421 |
</div>
|
| 4422 |
+
</div>
|
| 4423 |
<div id="output-benchmark" class="cell-output">
|
| 4424 |
<div class="cell-stdout">Flash Attention 2 not found.
|
| 4425 |
Flash Attention 3 not found.
|
|
|
|
| 4429 |
|
| 4430 |
|
| 4431 |
===== Testing shape: (1, 4224, 24, 128) =====
|
| 4432 |
+
torch_cudnn : absmax=0.001009, mae=0.000075, mse=0.000000
|
| 4433 |
+
torch_cudnn_compile_d : absmax=0.001009, mae=0.000075, mse=0.000000
|
| 4434 |
+
torch_cudnn_compile_ma : absmax=0.001009, mae=0.000075, mse=0.000000
|
| 4435 |
+
torch_flash : absmax=0.001009, mae=0.000075, mse=0.000000
|
| 4436 |
+
torch_flash_compile_d : absmax=0.001009, mae=0.000075, mse=0.000000
|
| 4437 |
+
torch_flash_compile_ma : absmax=0.001009, mae=0.000075, mse=0.000000
|
| 4438 |
+
hf_flash_attn : absmax=0.001009, mae=0.000075, mse=0.000000
|
| 4439 |
+
hf_flash_attn3 : absmax=0.001009, mae=0.000075, mse=0.000000
|
| 4440 |
+
|
| 4441 |
+
|
| 4442 |
+
===== Testing shape: (1, 4352, 24, 128) =====
|
| 4443 |
+
torch_cudnn : absmax=0.001015, mae=0.000073, mse=0.000000
|
| 4444 |
+
torch_cudnn_compile_d : absmax=0.001015, mae=0.000073, mse=0.000000
|
| 4445 |
+
torch_cudnn_compile_ma : absmax=0.001015, mae=0.000073, mse=0.000000
|
| 4446 |
+
torch_flash : absmax=0.001015, mae=0.000073, mse=0.000000
|
| 4447 |
+
torch_flash_compile_d : absmax=0.001015, mae=0.000073, mse=0.000000
|
| 4448 |
+
torch_flash_compile_ma : absmax=0.001015, mae=0.000073, mse=0.000000
|
| 4449 |
+
hf_flash_attn : absmax=0.001015, mae=0.000073, mse=0.000000
|
| 4450 |
+
hf_flash_attn3 : absmax=0.001015, mae=0.000073, mse=0.000000
|
| 4451 |
+
|
| 4452 |
+
|
| 4453 |
+
===== Testing shape: (1, 4416, 24, 128) =====
|
| 4454 |
+
torch_cudnn : absmax=0.001374, mae=0.000073, mse=0.000000
|
| 4455 |
+
torch_cudnn_compile_d : absmax=0.001374, mae=0.000073, mse=0.000000
|
| 4456 |
+
torch_cudnn_compile_ma : absmax=0.001374, mae=0.000073, mse=0.000000
|
| 4457 |
+
torch_flash : absmax=0.001374, mae=0.000073, mse=0.000000
|
| 4458 |
+
torch_flash_compile_d : absmax=0.001374, mae=0.000073, mse=0.000000
|
| 4459 |
+
torch_flash_compile_ma : absmax=0.001374, mae=0.000073, mse=0.000000
|
| 4460 |
+
hf_flash_attn : absmax=0.001374, mae=0.000073, mse=0.000000
|
| 4461 |
+
hf_flash_attn3 : absmax=0.001374, mae=0.000073, mse=0.000000
|
| 4462 |
+
|
| 4463 |
+
|
| 4464 |
+
===== Testing shape: (1, 4480, 24, 128) =====
|
| 4465 |
+
torch_cudnn : absmax=0.001190, mae=0.000072, mse=0.000000
|
| 4466 |
+
torch_cudnn_compile_d : absmax=0.001190, mae=0.000072, mse=0.000000
|
| 4467 |
+
torch_cudnn_compile_ma : absmax=0.001190, mae=0.000072, mse=0.000000
|
| 4468 |
+
torch_flash : absmax=0.001190, mae=0.000072, mse=0.000000
|
| 4469 |
+
torch_flash_compile_d : absmax=0.001190, mae=0.000072, mse=0.000000
|
| 4470 |
+
torch_flash_compile_ma : absmax=0.001190, mae=0.000072, mse=0.000000
|
| 4471 |
+
hf_flash_attn : absmax=0.001190, mae=0.000072, mse=0.000000
|
| 4472 |
+
hf_flash_attn3 : absmax=0.001190, mae=0.000072, mse=0.000000
|
| 4473 |
+
|
| 4474 |
+
|
| 4475 |
+
===== Testing shape: (1, 4544, 24, 128) =====
|
| 4476 |
+
torch_cudnn : absmax=0.001189, mae=0.000072, mse=0.000000
|
| 4477 |
+
torch_cudnn_compile_d : absmax=0.001189, mae=0.000072, mse=0.000000
|
| 4478 |
+
torch_cudnn_compile_ma : absmax=0.001189, mae=0.000072, mse=0.000000
|
| 4479 |
+
torch_flash : absmax=0.001189, mae=0.000072, mse=0.000000
|
| 4480 |
+
torch_flash_compile_d : absmax=0.001189, mae=0.000072, mse=0.000000
|
| 4481 |
+
torch_flash_compile_ma : absmax=0.001189, mae=0.000072, mse=0.000000
|
| 4482 |
+
hf_flash_attn : absmax=0.001189, mae=0.000072, mse=0.000000
|
| 4483 |
+
hf_flash_attn3 : absmax=0.001189, mae=0.000072, mse=0.000000
|
| 4484 |
+
|
| 4485 |
+
|
| 4486 |
+
===== Testing shape: (1, 4608, 24, 128) =====
|
| 4487 |
+
torch_cudnn : absmax=0.000851, mae=0.000072, mse=0.000000
|
| 4488 |
+
torch_cudnn_compile_d : absmax=0.000851, mae=0.000072, mse=0.000000
|
| 4489 |
+
torch_cudnn_compile_ma : absmax=0.000851, mae=0.000072, mse=0.000000
|
| 4490 |
+
torch_flash : absmax=0.001095, mae=0.000072, mse=0.000000
|
| 4491 |
+
torch_flash_compile_d : absmax=0.001095, mae=0.000072, mse=0.000000
|
| 4492 |
+
torch_flash_compile_ma : absmax=0.001095, mae=0.000072, mse=0.000000
|
| 4493 |
+
hf_flash_attn : absmax=0.001095, mae=0.000072, mse=0.000000
|
| 4494 |
+
hf_flash_attn3 : absmax=0.000872, mae=0.000072, mse=0.000000
|
| 4495 |
+
Attention Benchmark:
|
| 4496 |
+
seq_len torch_cudnn torch_cudnn_compile_d torch_cudnn_compile_ma torch_flash torch_flash_compile_d torch_flash_compile_ma hf_flash_attn hf_flash_attn3
|
| 4497 |
+
0 4224.0 3.798368 3.791408 4.181328 3.967520 3.956704 4.310320 3.395904 3.330144
|
| 4498 |
+
1 4352.0 4.079536 4.073056 4.418592 4.399408 4.394336 4.733312 3.836944 3.757936
|
| 4499 |
+
2 4416.0 4.143008 4.138512 4.483328 4.452528 4.444224 4.789856 3.894816 3.862496
|
| 4500 |
+
3 4480.0 4.205120 4.199184 4.552352 4.529248 4.523552 4.870752 3.953808 3.870560
|
| 4501 |
+
4 4544.0 4.437120 4.428784 4.782656 4.584704 4.576832 4.933216 4.008992 3.975952
|
| 4502 |
+
5 4608.0 4.500704 4.493792 4.869824 4.658752 4.654752 5.028256 4.066272 3.985760
|
| 4503 |
</div>
|
| 4504 |
<div class="uv-install-logs" id="uv-logs-benchmark">
|
| 4505 |
<div class="uv-logs-header" onclick="toggleUvLogs(this)">▶ UV Install Logs</div>
|
| 4506 |
<div class="uv-logs-content" style="display: none;">
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4507 |
Downloading kiwisolver (1.4MiB)
|
| 4508 |
+
Downloading nvidia-cuda-nvrtc-cu12 (84.0MiB)
|
|
|
|
|
|
|
| 4509 |
Downloading nvidia-cublas-cu12 (566.8MiB)
|
| 4510 |
Downloading pandas (11.8MiB)
|
| 4511 |
+
Downloading hf-xet (3.0MiB)
|
| 4512 |
+
Downloading nvidia-curand-cu12 (60.7MiB)
|
| 4513 |
+
Downloading fonttools (4.7MiB)
|
| 4514 |
+
Downloading networkx (1.9MiB)
|
| 4515 |
+
Downloading numpy (16.2MiB)
|
| 4516 |
+
Downloading torch (846.9MiB)
|
| 4517 |
+
Downloading nvidia-cusparse-cu12 (274.9MiB)
|
| 4518 |
Downloading nvidia-cudnn-cu12 (674.0MiB)
|
| 4519 |
+
Downloading nvidia-nccl-cu12 (307.4MiB)
|
| 4520 |
+
Downloading setuptools (1.1MiB)
|
| 4521 |
+
Downloading nvidia-cufft-cu12 (184.2MiB)
|
| 4522 |
Downloading nvidia-cuda-cupti-cu12 (9.8MiB)
|
| 4523 |
+
Downloading sympy (6.0MiB)
|
| 4524 |
+
Downloading nvidia-cufile-cu12 (1.1MiB)
|
| 4525 |
Downloading nvidia-nvjitlink-cu12 (37.4MiB)
|
| 4526 |
+
Downloading nvidia-cusparselt-cu12 (273.9MiB)
|
| 4527 |
+
Downloading nvidia-cusolver-cu12 (255.1MiB)
|
| 4528 |
+
Downloading pillow (6.3MiB)
|
| 4529 |
+
Downloading matplotlib (8.3MiB)
|
| 4530 |
Downloading triton (148.3MiB)
|
|
|
|
|
|
|
|
|
|
| 4531 |
Downloading nvidia-cufile-cu12
|
| 4532 |
Downloading kiwisolver
|
| 4533 |
Downloading hf-xet
|
|
|
|
| 4535 |
Downloading networkx
|
| 4536 |
Downloading fonttools
|
| 4537 |
Downloading pillow
|
|
|
|
| 4538 |
Downloading matplotlib
|
| 4539 |
+
Downloading nvidia-cuda-cupti-cu12
|
| 4540 |
Downloading sympy
|
| 4541 |
Downloading numpy
|
| 4542 |
Downloading nvidia-nvjitlink-cu12
|
|
|
|
| 4552 |
Downloading nvidia-cublas-cu12
|
| 4553 |
Downloading nvidia-cudnn-cu12
|
| 4554 |
Downloading torch
|
| 4555 |
+
Installed 49 packages in 563ms
|
| 4556 |
</div>
|
| 4557 |
</div>
|
| 4558 |
<div class="cell-stderr">Fetching 20 files: 0%| | 0/20 [00:00<?, ?it/s]
|
| 4559 |
+
Fetching 20 files: 5%|▌ | 1/20 [00:00<00:04, 4.34it/s]
|
| 4560 |
+
Fetching 20 files: 10%|█ | 2/20 [00:02<00:25, 1.42s/it]
|
| 4561 |
+
Fetching 20 files: 100%|██████████| 20/20 [00:02<00:00, 8.05it/s]
|
| 4562 |
|
| 4563 |
Fetching 4 files: 0%| | 0/4 [00:00<?, ?it/s]
|
| 4564 |
+
Fetching 4 files: 25%|██▌ | 1/4 [00:00<00:00, 5.49it/s]
|
| 4565 |
+
Fetching 4 files: 50%|█████ | 2/4 [00:01<00:01, 1.15it/s]
|
| 4566 |
+
Fetching 4 files: 100%|██████████| 4/4 [00:01<00:00, 2.60it/s]</div>
|
| 4567 |
+
<div class="cell-artifacts">
|
| 4568 |
+
<h4>Artifacts:</h4>
|
| 4569 |
+
<a href="artifacts/benchmark/dump_attention_benchmark/Attention Benchmark.png" class="artifact" target="_blank">dump_attention_benchmark/Attention Benchmark.png</a>
|
| 4570 |
+
<a href="artifacts/benchmark/dump_attention_benchmark/Attention Benchmark.csv" class="artifact" target="_blank">dump_attention_benchmark/Attention Benchmark.csv</a>
|
| 4571 |
+
<a href="artifacts/benchmark/dump_attention_benchmark/results.html" class="artifact" target="_blank">dump_attention_benchmark/results.html</a>
|
| 4572 |
+
<div class="artifact-preview">
|
| 4573 |
+
<img src="artifacts/benchmark/dump_attention_benchmark/Attention Benchmark.png" alt="dump_attention_benchmark/Attention Benchmark.png">
|
| 4574 |
+
</div>
|
| 4575 |
+
</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4576 |
</div>
|
| 4577 |
</div>
|
| 4578 |
</div>
|
moe_benchmarks/megablocks/megablocks_only.html
CHANGED
|
@@ -3724,219 +3724,122 @@ span.linenos.special { color: #000000; background-color: #ffffc0; padding-left:
|
|
| 3724 |
<p>Next we can run with Megablocks kernels enabled.</p>
|
| 3725 |
<h3>Forward</h3>
|
| 3726 |
<p>First, we run a forward pass with Megablocks kernels.</p>
|
| 3727 |
-
<
|
| 3728 |
-
<p>Next, we run a forward and backward pass with Megablocks kernels enabled. This should be more memory efficient and allow us to complete the backward pass without running out of memory.</p>
|
| 3729 |
-
<div class="cell cell-failed" id="cell-forward_and_backward">
|
| 3730 |
<div class="cell-header">
|
| 3731 |
<span class="collapse-indicators">
|
| 3732 |
-
<span onclick="toggleCode('
|
| 3733 |
-
<span onclick="toggleOutput('
|
| 3734 |
-
<span id="uv-indicator-
|
| 3735 |
</span> |
|
| 3736 |
-
Cell:
|
| 3737 |
-
| <button class="run-btn" onclick="runCell('
|
| 3738 |
-
<button class="copy-btn" onclick="copyCell('
|
| 3739 |
-
<a href="cells/
|
| 3740 |
</div>
|
| 3741 |
-
<div id="code-
|
| 3742 |
<div class="highlight-with-lines">
|
| 3743 |
-
<div class="line-numbers" id="lines-
|
| 3744 |
-
<a class="line-number" data-cell="
|
| 3745 |
-
<a class="line-number" data-cell="
|
| 3746 |
-
<a class="line-number" data-cell="
|
| 3747 |
-
<a class="line-number" data-cell="
|
| 3748 |
-
<a class="line-number" data-cell="
|
| 3749 |
-
<a class="line-number" data-cell="
|
| 3750 |
-
<a class="line-number" data-cell="
|
| 3751 |
-
<a class="line-number" data-cell="
|
| 3752 |
-
<a class="line-number" data-cell="
|
| 3753 |
-
<a class="line-number" data-cell="
|
| 3754 |
-
<a class="line-number" data-cell="
|
| 3755 |
-
<a class="line-number" data-cell="
|
| 3756 |
-
<a class="line-number" data-cell="
|
| 3757 |
-
<a class="line-number" data-cell="
|
| 3758 |
-
<a class="line-number" data-cell="
|
| 3759 |
-
<a class="line-number" data-cell="
|
| 3760 |
-
<a class="line-number" data-cell="
|
| 3761 |
-
<a class="line-number" data-cell="
|
| 3762 |
-
<a class="line-number" data-cell="
|
| 3763 |
-
<a class="line-number" data-cell="
|
| 3764 |
-
<a class="line-number" data-cell="
|
| 3765 |
-
<a class="line-number" data-cell="
|
| 3766 |
-
<a class="line-number" data-cell="
|
| 3767 |
-
<a class="line-number" data-cell="
|
| 3768 |
-
<a class="line-number" data-cell="
|
| 3769 |
-
<a class="line-number" data-cell="
|
| 3770 |
-
<a class="line-number" data-cell="
|
| 3771 |
-
<a class="line-number" data-cell="
|
| 3772 |
-
<a class="line-number" data-cell="
|
| 3773 |
-
<a class="line-number" data-cell="
|
| 3774 |
-
<a class="line-number" data-cell="
|
| 3775 |
-
<a class="line-number" data-cell="
|
| 3776 |
-
<a class="line-number" data-cell="
|
| 3777 |
-
<a class="line-number" data-cell="
|
| 3778 |
-
<a class="line-number" data-cell="
|
| 3779 |
-
<a class="line-number" data-cell="
|
| 3780 |
-
<a class="line-number" data-cell="
|
| 3781 |
-
<a class="line-number" data-cell="
|
| 3782 |
-
<a class="line-number" data-cell="
|
| 3783 |
-
<a class="line-number" data-cell="
|
| 3784 |
-
<a class="line-number" data-cell="
|
| 3785 |
-
<a class="line-number" data-cell="
|
| 3786 |
-
<a class="line-number" data-cell="
|
| 3787 |
-
<a class="line-number" data-cell="
|
| 3788 |
-
<a class="line-number" data-cell="
|
| 3789 |
-
<a class="line-number" data-cell="
|
| 3790 |
-
<a class="line-number" data-cell="
|
| 3791 |
-
<a class="line-number" data-cell="
|
| 3792 |
-
<a class="line-number" data-cell="
|
| 3793 |
-
<a class="line-number" data-cell="
|
| 3794 |
-
<a class="line-number" data-cell="
|
| 3795 |
-
<a class="line-number" data-cell="
|
| 3796 |
-
<a class="line-number" data-cell="
|
| 3797 |
-
<a class="line-number" data-cell="
|
| 3798 |
-
<a class="line-number" data-cell="
|
| 3799 |
-
<a class="line-number" data-cell="
|
| 3800 |
-
<a class="line-number" data-cell="
|
| 3801 |
-
<a class="line-number" data-cell="
|
| 3802 |
-
<a class="line-number" data-cell="
|
| 3803 |
-
<a class="line-number" data-cell="
|
| 3804 |
-
<a class="line-number" data-cell="
|
| 3805 |
-
<a class="line-number" data-cell="
|
| 3806 |
-
<a class="line-number" data-cell="
|
| 3807 |
-
<a class="line-number" data-cell="
|
| 3808 |
-
<a class="line-number" data-cell="
|
| 3809 |
-
<a class="line-number" data-cell="
|
| 3810 |
-
<a class="line-number" data-cell="
|
| 3811 |
-
<a class="line-number" data-cell="
|
| 3812 |
-
<a class="line-number" data-cell="
|
| 3813 |
-
<a class="line-number" data-cell="
|
| 3814 |
-
<a class="line-number" data-cell="
|
| 3815 |
-
<a class="line-number" data-cell="
|
| 3816 |
-
<a class="line-number" data-cell="
|
| 3817 |
-
<a class="line-number" data-cell="
|
| 3818 |
-
<a class="line-number" data-cell="
|
| 3819 |
-
<a class="line-number" data-cell="
|
| 3820 |
-
<a class="line-number" data-cell="
|
| 3821 |
-
<a class="line-number" data-cell="
|
| 3822 |
-
<a class="line-number" data-cell="
|
| 3823 |
-
<a class="line-number" data-cell="
|
| 3824 |
-
<a class="line-number" data-cell="
|
| 3825 |
-
<a class="line-number" data-cell="
|
| 3826 |
-
<a class="line-number" data-cell="
|
| 3827 |
-
<a class="line-number" data-cell="
|
| 3828 |
-
<a class="line-number" data-cell="
|
| 3829 |
-
<a class="line-number" data-cell="
|
| 3830 |
-
<a class="line-number" data-cell="
|
| 3831 |
-
<a class="line-number" data-cell="
|
| 3832 |
-
<a class="line-number" data-cell="
|
| 3833 |
-
<a class="line-number" data-cell="
|
| 3834 |
-
<a class="line-number" data-cell="
|
| 3835 |
-
<a class="line-number" data-cell="
|
| 3836 |
-
<a class="line-number" data-cell="
|
| 3837 |
-
<a class="line-number" data-cell="
|
| 3838 |
-
<a class="line-number" data-cell="
|
| 3839 |
-
<a class="line-number" data-cell="
|
| 3840 |
-
<a class="line-number" data-cell="
|
| 3841 |
-
<a class="line-number" data-cell="
|
| 3842 |
-
<a class="line-number" data-cell="
|
| 3843 |
-
<a class="line-number" data-cell="
|
| 3844 |
-
<a class="line-number" data-cell="
|
| 3845 |
-
<a class="line-number" data-cell="forward_and_backward" data-line="102" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 102, true);">102</a>
|
| 3846 |
-
<a class="line-number" data-cell="forward_and_backward" data-line="103" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 103, true);">103</a>
|
| 3847 |
-
<a class="line-number" data-cell="forward_and_backward" data-line="104" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 104, true);">104</a>
|
| 3848 |
-
<a class="line-number" data-cell="forward_and_backward" data-line="105" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 105, true);">105</a>
|
| 3849 |
-
<a class="line-number" data-cell="forward_and_backward" data-line="106" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 106, true);">106</a>
|
| 3850 |
-
<a class="line-number" data-cell="forward_and_backward" data-line="107" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 107, true);">107</a>
|
| 3851 |
-
<a class="line-number" data-cell="forward_and_backward" data-line="108" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 108, true);">108</a>
|
| 3852 |
-
<a class="line-number" data-cell="forward_and_backward" data-line="109" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 109, true);">109</a>
|
| 3853 |
-
<a class="line-number" data-cell="forward_and_backward" data-line="110" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 110, true);">110</a>
|
| 3854 |
-
<a class="line-number" data-cell="forward_and_backward" data-line="111" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 111, true);">111</a>
|
| 3855 |
-
<a class="line-number" data-cell="forward_and_backward" data-line="112" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 112, true);">112</a>
|
| 3856 |
-
<a class="line-number" data-cell="forward_and_backward" data-line="113" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 113, true);">113</a>
|
| 3857 |
-
<a class="line-number" data-cell="forward_and_backward" data-line="114" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 114, true);">114</a>
|
| 3858 |
-
<a class="line-number" data-cell="forward_and_backward" data-line="115" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 115, true);">115</a>
|
| 3859 |
-
<a class="line-number" data-cell="forward_and_backward" data-line="116" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 116, true);">116</a>
|
| 3860 |
-
<a class="line-number" data-cell="forward_and_backward" data-line="117" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 117, true);">117</a>
|
| 3861 |
-
<a class="line-number" data-cell="forward_and_backward" data-line="118" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 118, true);">118</a>
|
| 3862 |
-
<a class="line-number" data-cell="forward_and_backward" data-line="119" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 119, true);">119</a>
|
| 3863 |
-
<a class="line-number" data-cell="forward_and_backward" data-line="120" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 120, true);">120</a>
|
| 3864 |
-
<a class="line-number" data-cell="forward_and_backward" data-line="121" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 121, true);">121</a>
|
| 3865 |
-
<a class="line-number" data-cell="forward_and_backward" data-line="122" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 122, true);">122</a>
|
| 3866 |
-
<a class="line-number" data-cell="forward_and_backward" data-line="123" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 123, true);">123</a>
|
| 3867 |
-
<a class="line-number" data-cell="forward_and_backward" data-line="124" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 124, true);">124</a>
|
| 3868 |
-
<a class="line-number" data-cell="forward_and_backward" data-line="125" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 125, true);">125</a>
|
| 3869 |
-
<a class="line-number" data-cell="forward_and_backward" data-line="126" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 126, true);">126</a>
|
| 3870 |
-
<a class="line-number" data-cell="forward_and_backward" data-line="127" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 127, true);">127</a>
|
| 3871 |
-
<a class="line-number" data-cell="forward_and_backward" data-line="128" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 128, true);">128</a>
|
| 3872 |
-
<a class="line-number" data-cell="forward_and_backward" data-line="129" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 129, true);">129</a>
|
| 3873 |
-
<a class="line-number" data-cell="forward_and_backward" data-line="130" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 130, true);">130</a>
|
| 3874 |
-
<a class="line-number" data-cell="forward_and_backward" data-line="131" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 131, true);">131</a>
|
| 3875 |
-
<a class="line-number" data-cell="forward_and_backward" data-line="132" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 132, true);">132</a>
|
| 3876 |
-
<a class="line-number" data-cell="forward_and_backward" data-line="133" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 133, true);">133</a>
|
| 3877 |
-
<a class="line-number" data-cell="forward_and_backward" data-line="134" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 134, true);">134</a>
|
| 3878 |
-
<a class="line-number" data-cell="forward_and_backward" data-line="135" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 135, true);">135</a>
|
| 3879 |
-
<a class="line-number" data-cell="forward_and_backward" data-line="136" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 136, true);">136</a>
|
| 3880 |
-
<a class="line-number" data-cell="forward_and_backward" data-line="137" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 137, true);">137</a>
|
| 3881 |
-
<a class="line-number" data-cell="forward_and_backward" data-line="138" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 138, true);">138</a>
|
| 3882 |
-
<a class="line-number" data-cell="forward_and_backward" data-line="139" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 139, true);">139</a>
|
| 3883 |
-
<a class="line-number" data-cell="forward_and_backward" data-line="140" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 140, true);">140</a>
|
| 3884 |
-
<a class="line-number" data-cell="forward_and_backward" data-line="141" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 141, true);">141</a>
|
| 3885 |
-
<a class="line-number" data-cell="forward_and_backward" data-line="142" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 142, true);">142</a>
|
| 3886 |
-
<a class="line-number" data-cell="forward_and_backward" data-line="143" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 143, true);">143</a>
|
| 3887 |
-
<a class="line-number" data-cell="forward_and_backward" data-line="144" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 144, true);">144</a>
|
| 3888 |
-
<a class="line-number" data-cell="forward_and_backward" data-line="145" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 145, true);">145</a>
|
| 3889 |
-
<a class="line-number" data-cell="forward_and_backward" data-line="146" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 146, true);">146</a>
|
| 3890 |
-
<a class="line-number" data-cell="forward_and_backward" data-line="147" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 147, true);">147</a>
|
| 3891 |
-
<a class="line-number" data-cell="forward_and_backward" data-line="148" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 148, true);">148</a>
|
| 3892 |
-
<a class="line-number" data-cell="forward_and_backward" data-line="149" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 149, true);">149</a>
|
| 3893 |
-
<a class="line-number" data-cell="forward_and_backward" data-line="150" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 150, true);">150</a>
|
| 3894 |
-
<a class="line-number" data-cell="forward_and_backward" data-line="151" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 151, true);">151</a>
|
| 3895 |
-
<a class="line-number" data-cell="forward_and_backward" data-line="152" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 152, true);">152</a>
|
| 3896 |
-
<a class="line-number" data-cell="forward_and_backward" data-line="153" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 153, true);">153</a>
|
| 3897 |
-
<a class="line-number" data-cell="forward_and_backward" data-line="154" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 154, true);">154</a>
|
| 3898 |
-
<a class="line-number" data-cell="forward_and_backward" data-line="155" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 155, true);">155</a>
|
| 3899 |
-
<a class="line-number" data-cell="forward_and_backward" data-line="156" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 156, true);">156</a>
|
| 3900 |
-
<a class="line-number" data-cell="forward_and_backward" data-line="157" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 157, true);">157</a>
|
| 3901 |
-
<a class="line-number" data-cell="forward_and_backward" data-line="158" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 158, true);">158</a>
|
| 3902 |
-
<a class="line-number" data-cell="forward_and_backward" data-line="159" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 159, true);">159</a>
|
| 3903 |
-
<a class="line-number" data-cell="forward_and_backward" data-line="160" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 160, true);">160</a>
|
| 3904 |
-
<a class="line-number" data-cell="forward_and_backward" data-line="161" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 161, true);">161</a>
|
| 3905 |
-
<a class="line-number" data-cell="forward_and_backward" data-line="162" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 162, true);">162</a>
|
| 3906 |
-
<a class="line-number" data-cell="forward_and_backward" data-line="163" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 163, true);">163</a>
|
| 3907 |
-
<a class="line-number" data-cell="forward_and_backward" data-line="164" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 164, true);">164</a>
|
| 3908 |
-
<a class="line-number" data-cell="forward_and_backward" data-line="165" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 165, true);">165</a>
|
| 3909 |
-
<a class="line-number" data-cell="forward_and_backward" data-line="166" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 166, true);">166</a>
|
| 3910 |
-
<a class="line-number" data-cell="forward_and_backward" data-line="167" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 167, true);">167</a>
|
| 3911 |
-
<a class="line-number" data-cell="forward_and_backward" data-line="168" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 168, true);">168</a>
|
| 3912 |
-
<a class="line-number" data-cell="forward_and_backward" data-line="169" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 169, true);">169</a>
|
| 3913 |
-
<a class="line-number" data-cell="forward_and_backward" data-line="170" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 170, true);">170</a>
|
| 3914 |
-
<a class="line-number" data-cell="forward_and_backward" data-line="171" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 171, true);">171</a>
|
| 3915 |
-
<a class="line-number" data-cell="forward_and_backward" data-line="172" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 172, true);">172</a>
|
| 3916 |
-
<a class="line-number" data-cell="forward_and_backward" data-line="173" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 173, true);">173</a>
|
| 3917 |
-
<a class="line-number" data-cell="forward_and_backward" data-line="174" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 174, true);">174</a>
|
| 3918 |
-
<a class="line-number" data-cell="forward_and_backward" data-line="175" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 175, true);">175</a>
|
| 3919 |
-
<a class="line-number" data-cell="forward_and_backward" data-line="176" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 176, true);">176</a>
|
| 3920 |
-
<a class="line-number" data-cell="forward_and_backward" data-line="177" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 177, true);">177</a>
|
| 3921 |
-
<a class="line-number" data-cell="forward_and_backward" data-line="178" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 178, true);">178</a>
|
| 3922 |
-
<a class="line-number" data-cell="forward_and_backward" data-line="179" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 179, true);">179</a>
|
| 3923 |
-
<a class="line-number" data-cell="forward_and_backward" data-line="180" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 180, true);">180</a>
|
| 3924 |
-
<a class="line-number" data-cell="forward_and_backward" data-line="181" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 181, true);">181</a>
|
| 3925 |
-
<a class="line-number" data-cell="forward_and_backward" data-line="182" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 182, true);">182</a>
|
| 3926 |
-
<a class="line-number" data-cell="forward_and_backward" data-line="183" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 183, true);">183</a>
|
| 3927 |
-
<a class="line-number" data-cell="forward_and_backward" data-line="184" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 184, true);">184</a>
|
| 3928 |
-
<a class="line-number" data-cell="forward_and_backward" data-line="185" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 185, true);">185</a>
|
| 3929 |
-
<a class="line-number" data-cell="forward_and_backward" data-line="186" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 186, true);">186</a>
|
| 3930 |
-
<a class="line-number" data-cell="forward_and_backward" data-line="187" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 187, true);">187</a>
|
| 3931 |
-
<a class="line-number" data-cell="forward_and_backward" data-line="188" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 188, true);">188</a>
|
| 3932 |
-
<a class="line-number" data-cell="forward_and_backward" data-line="189" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 189, true);">189</a>
|
| 3933 |
-
<a class="line-number" data-cell="forward_and_backward" data-line="190" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 190, true);">190</a>
|
| 3934 |
-
<a class="line-number" data-cell="forward_and_backward" data-line="191" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 191, true);">191</a>
|
| 3935 |
-
<a class="line-number" data-cell="forward_and_backward" data-line="192" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 192, true);">192</a>
|
| 3936 |
-
<a class="line-number" data-cell="forward_and_backward" data-line="193" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 193, true);">193</a>
|
| 3937 |
-
<a class="line-number" data-cell="forward_and_backward" data-line="194" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 194, true);">194</a>
|
| 3938 |
-
<a class="line-number" data-cell="forward_and_backward" data-line="195" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 195, true);">195</a>
|
| 3939 |
-
<a class="line-number" data-cell="forward_and_backward" data-line="196" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 196, true);">196</a>
|
| 3940 |
</div>
|
| 3941 |
<div class="code-wrap">
|
| 3942 |
<div class="highlight"><pre><span></span><span class="c1"># /// script</span>
|
|
@@ -3963,7 +3866,7 @@ Cell: forward_and_backward | 19.43s | FAILED
|
|
| 3963 |
<span class="kn">import</span><span class="w"> </span><span class="nn">logging</span>
|
| 3964 |
<span class="kn">from</span><span class="w"> </span><span class="nn">transformers.models.gpt_oss.modeling_gpt_oss</span><span class="w"> </span><span class="kn">import</span> <span class="n">GptOssRMSNorm</span>
|
| 3965 |
|
| 3966 |
-
|
| 3967 |
<span class="n">replace_kernel_forward_from_hub</span><span class="p">(</span><span class="n">GptOssRMSNorm</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span>
|
| 3968 |
|
| 3969 |
<span class="c1"># set to debug logging</span>
|
|
@@ -4004,6 +3907,8 @@ Cell: forward_and_backward | 19.43s | FAILED
|
|
| 4004 |
<span class="n">tokenizer</span> <span class="o">=</span> <span class="n">PreTrainedTokenizerFast</span><span class="o">.</span><span class="n">from_pretrained</span><span class="p">(</span><span class="n">model_id</span><span class="p">)</span>
|
| 4005 |
<span class="n">quantization_config</span> <span class="o">=</span> <span class="n">Mxfp4Config</span><span class="p">(</span><span class="n">dequantize</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
|
| 4006 |
|
|
|
|
|
|
|
| 4007 |
<span class="n">model</span> <span class="o">=</span> <span class="n">GptOssForCausalLM</span><span class="o">.</span><span class="n">from_pretrained</span><span class="p">(</span>
|
| 4008 |
<span class="n">model_id</span><span class="p">,</span>
|
| 4009 |
<span class="n">dtype</span><span class="o">=</span><span class="s2">"bfloat16"</span><span class="p">,</span>
|
|
@@ -4024,14 +3929,9 @@ Cell: forward_and_backward | 19.43s | FAILED
|
|
| 4024 |
<span class="n">reasoning_effort</span><span class="o">=</span><span class="s2">"low"</span><span class="p">,</span>
|
| 4025 |
<span class="p">)</span><span class="o">.</span><span class="n">to</span><span class="p">(</span><span class="s2">"cuda"</span><span class="p">)</span>
|
| 4026 |
|
| 4027 |
-
<span class="n">max_tokens</span> <span class="o">=</span> <span class="mi">
|
| 4028 |
|
| 4029 |
-
<span class="
|
| 4030 |
-
<span class="n">reset_peak_memory_stats</span><span class="p">()</span>
|
| 4031 |
-
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">"Pre-generation memory: </span><span class="si">{</span><span class="n">get_memory_stats</span><span class="p">()</span><span class="si">}</span><span class="s2">"</span><span class="p">)</span>
|
| 4032 |
-
|
| 4033 |
-
<span class="c1"># forward and backward pass</span>
|
| 4034 |
-
<span class="k">with</span> <span class="n">torch</span><span class="o">.</span><span class="n">autograd</span><span class="o">.</span><span class="n">set_grad_enabled</span><span class="p">(</span><span class="kc">True</span><span class="p">):</span>
|
| 4035 |
<span class="n">start_time</span> <span class="o">=</span> <span class="n">time</span><span class="o">.</span><span class="n">perf_counter</span><span class="p">()</span>
|
| 4036 |
<span class="n">generated</span> <span class="o">=</span> <span class="n">model</span><span class="o">.</span><span class="n">generate</span><span class="p">(</span>
|
| 4037 |
<span class="o">**</span><span class="n">inputs</span><span class="p">,</span>
|
|
@@ -4040,109 +3940,17 @@ Cell: forward_and_backward | 19.43s | FAILED
|
|
| 4040 |
<span class="n">temperature</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
|
| 4041 |
<span class="p">)</span>
|
| 4042 |
<span class="n">end_time</span> <span class="o">=</span> <span class="n">time</span><span class="o">.</span><span class="n">perf_counter</span><span class="p">()</span>
|
| 4043 |
-
|
| 4044 |
-
|
| 4045 |
-
|
| 4046 |
-
|
| 4047 |
-
<span class="c1"># Use gradient checkpointing to reduce memory usage</span>
|
| 4048 |
-
<span class="k">if</span> <span class="nb">hasattr</span><span class="p">(</span><span class="n">model</span><span class="p">,</span> <span class="s1">'gradient_checkpointing_enable'</span><span class="p">):</span>
|
| 4049 |
-
<span class="n">model</span><span class="o">.</span><span class="n">gradient_checkpointing_enable</span><span class="p">()</span>
|
| 4050 |
-
<span class="nb">print</span><span class="p">(</span><span class="s2">"Enabled gradient checkpointing"</span><span class="p">)</span>
|
| 4051 |
-
|
| 4052 |
-
<span class="c1"># Reduce sequence length if needed for memory</span>
|
| 4053 |
-
<span class="n">max_seq_len</span> <span class="o">=</span> <span class="mi">512</span> <span class="c1"># Limit sequence length for backward pass</span>
|
| 4054 |
-
<span class="k">if</span> <span class="n">generated</span><span class="o">.</span><span class="n">size</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span> <span class="o">></span> <span class="n">max_seq_len</span><span class="p">:</span>
|
| 4055 |
-
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">"Truncating sequence from </span><span class="si">{</span><span class="n">generated</span><span class="o">.</span><span class="n">size</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span><span class="si">}</span><span class="s2"> to </span><span class="si">{</span><span class="n">max_seq_len</span><span class="si">}</span><span class="s2"> tokens"</span><span class="p">)</span>
|
| 4056 |
-
<span class="n">full_sequence</span> <span class="o">=</span> <span class="n">generated</span><span class="p">[:,</span> <span class="o">-</span><span class="n">max_seq_len</span><span class="p">:]</span>
|
| 4057 |
-
<span class="k">else</span><span class="p">:</span>
|
| 4058 |
-
<span class="n">full_sequence</span> <span class="o">=</span> <span class="n">generated</span>
|
| 4059 |
-
|
| 4060 |
-
<span class="c1"># Get model outputs for the full sequence</span>
|
| 4061 |
-
<span class="n">model</span><span class="o">.</span><span class="n">train</span><span class="p">()</span> <span class="c1"># Enable dropout and other training behaviors</span>
|
| 4062 |
-
|
| 4063 |
-
<span class="k">try</span><span class="p">:</span>
|
| 4064 |
-
<span class="n">outputs</span> <span class="o">=</span> <span class="n">model</span><span class="p">(</span>
|
| 4065 |
-
<span class="n">input_ids</span><span class="o">=</span><span class="n">full_sequence</span><span class="p">,</span>
|
| 4066 |
-
<span class="n">labels</span><span class="o">=</span><span class="n">full_sequence</span><span class="p">,</span> <span class="c1"># This will compute loss internally</span>
|
| 4067 |
-
<span class="n">return_dict</span><span class="o">=</span><span class="kc">True</span>
|
| 4068 |
-
<span class="p">)</span>
|
| 4069 |
-
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">"Post-forward memory: </span><span class="si">{</span><span class="n">get_memory_stats</span><span class="p">()</span><span class="si">}</span><span class="s2">"</span><span class="p">)</span>
|
| 4070 |
-
|
| 4071 |
-
<span class="c1"># If model doesn't compute loss, compute it manually</span>
|
| 4072 |
-
<span class="k">if</span> <span class="n">outputs</span><span class="o">.</span><span class="n">loss</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
|
| 4073 |
-
<span class="n">shift_logits</span> <span class="o">=</span> <span class="n">outputs</span><span class="o">.</span><span class="n">logits</span><span class="p">[</span><span class="o">...</span><span class="p">,</span> <span class="p">:</span><span class="o">-</span><span class="mi">1</span><span class="p">,</span> <span class="p">:]</span><span class="o">.</span><span class="n">contiguous</span><span class="p">()</span>
|
| 4074 |
-
<span class="n">shift_labels</span> <span class="o">=</span> <span class="n">full_sequence</span><span class="p">[</span><span class="o">...</span><span class="p">,</span> <span class="mi">1</span><span class="p">:]</span><span class="o">.</span><span class="n">contiguous</span><span class="p">()</span>
|
| 4075 |
-
|
| 4076 |
-
<span class="c1"># Use CrossEntropyLoss with ignore_index for padding tokens</span>
|
| 4077 |
-
<span class="n">loss_fct</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">nn</span><span class="o">.</span><span class="n">CrossEntropyLoss</span><span class="p">(</span><span class="n">ignore_index</span><span class="o">=</span><span class="n">tokenizer</span><span class="o">.</span><span class="n">pad_token_id</span> <span class="k">if</span> <span class="n">tokenizer</span><span class="o">.</span><span class="n">pad_token_id</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> <span class="k">else</span> <span class="o">-</span><span class="mi">100</span><span class="p">)</span>
|
| 4078 |
-
<span class="n">loss</span> <span class="o">=</span> <span class="n">loss_fct</span><span class="p">(</span>
|
| 4079 |
-
<span class="n">shift_logits</span><span class="o">.</span><span class="n">view</span><span class="p">(</span><span class="o">-</span><span class="mi">1</span><span class="p">,</span> <span class="n">shift_logits</span><span class="o">.</span><span class="n">size</span><span class="p">(</span><span class="o">-</span><span class="mi">1</span><span class="p">)),</span>
|
| 4080 |
-
<span class="n">shift_labels</span><span class="o">.</span><span class="n">view</span><span class="p">(</span><span class="o">-</span><span class="mi">1</span><span class="p">)</span>
|
| 4081 |
-
<span class="p">)</span>
|
| 4082 |
-
<span class="k">else</span><span class="p">:</span>
|
| 4083 |
-
<span class="n">loss</span> <span class="o">=</span> <span class="n">outputs</span><span class="o">.</span><span class="n">loss</span>
|
| 4084 |
-
|
| 4085 |
-
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">"Loss: </span><span class="si">{</span><span class="n">loss</span><span class="o">.</span><span class="n">item</span><span class="p">()</span><span class="si">:</span><span class="s2">.4f</span><span class="si">}</span><span class="s2">"</span><span class="p">)</span>
|
| 4086 |
-
|
| 4087 |
-
<span class="c1"># Clear intermediate tensors to save memory</span>
|
| 4088 |
-
<span class="k">del</span> <span class="n">outputs</span>
|
| 4089 |
-
<span class="n">torch</span><span class="o">.</span><span class="n">cuda</span><span class="o">.</span><span class="n">empty_cache</span><span class="p">()</span>
|
| 4090 |
-
|
| 4091 |
-
<span class="c1"># Perform backward pass with memory management</span>
|
| 4092 |
-
<span class="nb">print</span><span class="p">(</span><span class="s2">"Running backward pass..."</span><span class="p">)</span>
|
| 4093 |
-
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">"Pre-backward memory: </span><span class="si">{</span><span class="n">get_memory_stats</span><span class="p">()</span><span class="si">}</span><span class="s2">"</span><span class="p">)</span>
|
| 4094 |
-
|
| 4095 |
-
<span class="n">loss</span><span class="o">.</span><span class="n">backward</span><span class="p">()</span>
|
| 4096 |
-
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">"Post-backward memory: </span><span class="si">{</span><span class="n">get_memory_stats</span><span class="p">()</span><span class="si">}</span><span class="s2">"</span><span class="p">)</span>
|
| 4097 |
-
|
| 4098 |
-
<span class="k">except</span> <span class="n">torch</span><span class="o">.</span><span class="n">cuda</span><span class="o">.</span><span class="n">OutOfMemoryError</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
|
| 4099 |
-
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">"OOM during forward/backward pass: </span><span class="si">{</span><span class="n">e</span><span class="si">}</span><span class="s2">"</span><span class="p">)</span>
|
| 4100 |
-
<span class="nb">print</span><span class="p">(</span><span class="s2">"Try reducing max_tokens or max_seq_len"</span><span class="p">)</span>
|
| 4101 |
-
<span class="k">raise</span>
|
| 4102 |
-
|
| 4103 |
-
<span class="c1"># Calculate gradient statistics and print sample gradients</span>
|
| 4104 |
-
<span class="n">total_norm</span> <span class="o">=</span> <span class="mf">0.0</span>
|
| 4105 |
-
<span class="n">param_count</span> <span class="o">=</span> <span class="mi">0</span>
|
| 4106 |
-
<span class="n">grad_samples</span> <span class="o">=</span> <span class="p">{}</span>
|
| 4107 |
-
|
| 4108 |
-
<span class="k">for</span> <span class="n">name</span><span class="p">,</span> <span class="n">p</span> <span class="ow">in</span> <span class="n">model</span><span class="o">.</span><span class="n">named_parameters</span><span class="p">():</span>
|
| 4109 |
-
<span class="k">if</span> <span class="n">p</span><span class="o">.</span><span class="n">grad</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
|
| 4110 |
-
<span class="n">param_count</span> <span class="o">+=</span> <span class="mi">1</span>
|
| 4111 |
-
<span class="n">grad_norm</span> <span class="o">=</span> <span class="n">p</span><span class="o">.</span><span class="n">grad</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">norm</span><span class="p">(</span><span class="mi">2</span><span class="p">)</span><span class="o">.</span><span class="n">item</span><span class="p">()</span>
|
| 4112 |
-
<span class="n">total_norm</span> <span class="o">+=</span> <span class="n">grad_norm</span> <span class="o">**</span> <span class="mi">2</span>
|
| 4113 |
-
|
| 4114 |
-
<span class="c1"># Collect gradient statistics for key layers</span>
|
| 4115 |
-
<span class="k">if</span> <span class="nb">any</span><span class="p">(</span><span class="n">key</span> <span class="ow">in</span> <span class="n">name</span> <span class="k">for</span> <span class="n">key</span> <span class="ow">in</span> <span class="p">[</span><span class="s1">'embed'</span><span class="p">,</span> <span class="s1">'lm_head'</span><span class="p">,</span> <span class="s1">'mlp.up'</span><span class="p">,</span> <span class="s1">'mlp.down'</span><span class="p">,</span> <span class="s1">'self_attn.q_proj'</span><span class="p">,</span> <span class="s1">'norm'</span><span class="p">]):</span>
|
| 4116 |
-
<span class="n">grad_samples</span><span class="p">[</span><span class="n">name</span><span class="p">]</span> <span class="o">=</span> <span class="p">{</span>
|
| 4117 |
-
<span class="s1">'norm'</span><span class="p">:</span> <span class="n">grad_norm</span><span class="p">,</span>
|
| 4118 |
-
<span class="s1">'mean'</span><span class="p">:</span> <span class="n">p</span><span class="o">.</span><span class="n">grad</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">mean</span><span class="p">()</span><span class="o">.</span><span class="n">item</span><span class="p">(),</span>
|
| 4119 |
-
<span class="s1">'std'</span><span class="p">:</span> <span class="n">p</span><span class="o">.</span><span class="n">grad</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">std</span><span class="p">()</span><span class="o">.</span><span class="n">item</span><span class="p">(),</span>
|
| 4120 |
-
<span class="s1">'max'</span><span class="p">:</span> <span class="n">p</span><span class="o">.</span><span class="n">grad</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">max</span><span class="p">()</span><span class="o">.</span><span class="n">item</span><span class="p">(),</span>
|
| 4121 |
-
<span class="s1">'min'</span><span class="p">:</span> <span class="n">p</span><span class="o">.</span><span class="n">grad</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">min</span><span class="p">()</span><span class="o">.</span><span class="n">item</span><span class="p">(),</span>
|
| 4122 |
-
<span class="p">}</span>
|
| 4123 |
-
|
| 4124 |
-
<span class="n">total_norm</span> <span class="o">=</span> <span class="n">total_norm</span> <span class="o">**</span> <span class="mf">0.5</span>
|
| 4125 |
-
|
| 4126 |
-
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">"</span><span class="se">\n</span><span class="s2">Gradient norm: </span><span class="si">{</span><span class="n">total_norm</span><span class="si">:</span><span class="s2">.4f</span><span class="si">}</span><span class="s2">"</span><span class="p">)</span>
|
| 4127 |
-
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">"Parameters with gradients: </span><span class="si">{</span><span class="n">param_count</span><span class="si">}</span><span class="s2">"</span><span class="p">)</span>
|
| 4128 |
-
|
| 4129 |
-
<span class="c1"># Print sample gradients from important layers</span>
|
| 4130 |
-
<span class="nb">print</span><span class="p">(</span><span class="s2">"</span><span class="se">\n</span><span class="s2">Sample gradient statistics:"</span><span class="p">)</span>
|
| 4131 |
-
<span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="p">(</span><span class="n">name</span><span class="p">,</span> <span class="n">stats</span><span class="p">)</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="nb">list</span><span class="p">(</span><span class="n">grad_samples</span><span class="o">.</span><span class="n">items</span><span class="p">())[:</span><span class="mi">10</span><span class="p">]):</span>
|
| 4132 |
-
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">" </span><span class="si">{</span><span class="n">name</span><span class="p">[:</span><span class="mi">60</span><span class="p">]</span><span class="si">:</span><span class="s2"><60</span><span class="si">}</span><span class="s2"> | norm: </span><span class="si">{</span><span class="n">stats</span><span class="p">[</span><span class="s1">'norm'</span><span class="p">]</span><span class="si">:</span><span class="s2">.4e</span><span class="si">}</span><span class="s2"> | mean: </span><span class="si">{</span><span class="n">stats</span><span class="p">[</span><span class="s1">'mean'</span><span class="p">]</span><span class="si">:</span><span class="s2">.4e</span><span class="si">}</span><span class="s2"> | std: </span><span class="si">{</span><span class="n">stats</span><span class="p">[</span><span class="s1">'std'</span><span class="p">]</span><span class="si">:</span><span class="s2">.4e</span><span class="si">}</span><span class="s2">"</span><span class="p">)</span>
|
| 4133 |
-
|
| 4134 |
-
<span class="c1"># Optional: zero gradients for next iteration</span>
|
| 4135 |
-
<span class="n">model</span><span class="o">.</span><span class="n">zero_grad</span><span class="p">()</span>
|
| 4136 |
-
<span class="n">model</span><span class="o">.</span><span class="n">eval</span><span class="p">()</span> <span class="c1"># Switch back to eval mode</span>
|
| 4137 |
</pre></div>
|
| 4138 |
|
| 4139 |
-
<div class="code-line-highlight" id="line-highlight-
|
| 4140 |
</div>
|
| 4141 |
</div>
|
| 4142 |
</div>
|
| 4143 |
-
<div id="output-
|
| 4144 |
-
<div class="cell-stderr">
|
| 4145 |
-
Downloading cpython-3.13.7-linux-x86_64-gnu (download)
|
| 4146 |
Updating https://github.com/huggingface/transformers.git (HEAD)
|
| 4147 |
Updated https://github.com/huggingface/transformers.git (449533af73874470e914a203391635e04ac2ffc8)
|
| 4148 |
× No solution found when resolving script dependencies:
|
|
@@ -4158,6 +3966,9 @@ Cell: forward_and_backward | 19.43s | FAILED
|
|
| 4158 |
</div>
|
| 4159 |
</div>
|
| 4160 |
</div>
|
|
|
|
|
|
|
|
|
|
| 4161 |
</div>
|
| 4162 |
|
| 4163 |
</body>
|
|
|
|
| 3724 |
<p>Next we can run with Megablocks kernels enabled.</p>
|
| 3725 |
<h3>Forward</h3>
|
| 3726 |
<p>First, we run a forward pass with Megablocks kernels.</p>
|
| 3727 |
+
<div class="cell cell-failed" id="cell-forward_only">
|
|
|
|
|
|
|
| 3728 |
<div class="cell-header">
|
| 3729 |
<span class="collapse-indicators">
|
| 3730 |
+
<span onclick="toggleCode('forward_only')" style="cursor: pointer;">▼ code</span>
|
| 3731 |
+
<span onclick="toggleOutput('forward_only')" style="cursor: pointer;">▼ output</span>
|
| 3732 |
+
<span id="uv-indicator-forward_only" style="cursor: default; opacity: 0.3;">▶ uv-logs</span>
|
| 3733 |
</span> |
|
| 3734 |
+
Cell: forward_only | 17.22s | FAILED
|
| 3735 |
+
| <button class="run-btn" onclick="runCell('forward_only')">▶ run</button>
|
| 3736 |
+
<button class="copy-btn" onclick="copyCell('forward_only')">Copy</button>
|
| 3737 |
+
<a href="cells/forward_only.py" target="_blank" class="raw-btn">Raw</a>
|
| 3738 |
</div>
|
| 3739 |
+
<div id="code-forward_only" class="cell-code" data-lines="101">
|
| 3740 |
<div class="highlight-with-lines">
|
| 3741 |
+
<div class="line-numbers" id="lines-forward_only">
|
| 3742 |
+
<a class="line-number" data-cell="forward_only" data-line="1" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 1, true);">1</a>
|
| 3743 |
+
<a class="line-number" data-cell="forward_only" data-line="2" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 2, true);">2</a>
|
| 3744 |
+
<a class="line-number" data-cell="forward_only" data-line="3" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 3, true);">3</a>
|
| 3745 |
+
<a class="line-number" data-cell="forward_only" data-line="4" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 4, true);">4</a>
|
| 3746 |
+
<a class="line-number" data-cell="forward_only" data-line="5" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 5, true);">5</a>
|
| 3747 |
+
<a class="line-number" data-cell="forward_only" data-line="6" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 6, true);">6</a>
|
| 3748 |
+
<a class="line-number" data-cell="forward_only" data-line="7" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 7, true);">7</a>
|
| 3749 |
+
<a class="line-number" data-cell="forward_only" data-line="8" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 8, true);">8</a>
|
| 3750 |
+
<a class="line-number" data-cell="forward_only" data-line="9" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 9, true);">9</a>
|
| 3751 |
+
<a class="line-number" data-cell="forward_only" data-line="10" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 10, true);">10</a>
|
| 3752 |
+
<a class="line-number" data-cell="forward_only" data-line="11" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 11, true);">11</a>
|
| 3753 |
+
<a class="line-number" data-cell="forward_only" data-line="12" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 12, true);">12</a>
|
| 3754 |
+
<a class="line-number" data-cell="forward_only" data-line="13" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 13, true);">13</a>
|
| 3755 |
+
<a class="line-number" data-cell="forward_only" data-line="14" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 14, true);">14</a>
|
| 3756 |
+
<a class="line-number" data-cell="forward_only" data-line="15" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 15, true);">15</a>
|
| 3757 |
+
<a class="line-number" data-cell="forward_only" data-line="16" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 16, true);">16</a>
|
| 3758 |
+
<a class="line-number" data-cell="forward_only" data-line="17" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 17, true);">17</a>
|
| 3759 |
+
<a class="line-number" data-cell="forward_only" data-line="18" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 18, true);">18</a>
|
| 3760 |
+
<a class="line-number" data-cell="forward_only" data-line="19" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 19, true);">19</a>
|
| 3761 |
+
<a class="line-number" data-cell="forward_only" data-line="20" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 20, true);">20</a>
|
| 3762 |
+
<a class="line-number" data-cell="forward_only" data-line="21" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 21, true);">21</a>
|
| 3763 |
+
<a class="line-number" data-cell="forward_only" data-line="22" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 22, true);">22</a>
|
| 3764 |
+
<a class="line-number" data-cell="forward_only" data-line="23" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 23, true);">23</a>
|
| 3765 |
+
<a class="line-number" data-cell="forward_only" data-line="24" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 24, true);">24</a>
|
| 3766 |
+
<a class="line-number" data-cell="forward_only" data-line="25" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 25, true);">25</a>
|
| 3767 |
+
<a class="line-number" data-cell="forward_only" data-line="26" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 26, true);">26</a>
|
| 3768 |
+
<a class="line-number" data-cell="forward_only" data-line="27" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 27, true);">27</a>
|
| 3769 |
+
<a class="line-number" data-cell="forward_only" data-line="28" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 28, true);">28</a>
|
| 3770 |
+
<a class="line-number" data-cell="forward_only" data-line="29" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 29, true);">29</a>
|
| 3771 |
+
<a class="line-number" data-cell="forward_only" data-line="30" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 30, true);">30</a>
|
| 3772 |
+
<a class="line-number" data-cell="forward_only" data-line="31" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 31, true);">31</a>
|
| 3773 |
+
<a class="line-number" data-cell="forward_only" data-line="32" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 32, true);">32</a>
|
| 3774 |
+
<a class="line-number" data-cell="forward_only" data-line="33" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 33, true);">33</a>
|
| 3775 |
+
<a class="line-number" data-cell="forward_only" data-line="34" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 34, true);">34</a>
|
| 3776 |
+
<a class="line-number" data-cell="forward_only" data-line="35" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 35, true);">35</a>
|
| 3777 |
+
<a class="line-number" data-cell="forward_only" data-line="36" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 36, true);">36</a>
|
| 3778 |
+
<a class="line-number" data-cell="forward_only" data-line="37" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 37, true);">37</a>
|
| 3779 |
+
<a class="line-number" data-cell="forward_only" data-line="38" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 38, true);">38</a>
|
| 3780 |
+
<a class="line-number" data-cell="forward_only" data-line="39" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 39, true);">39</a>
|
| 3781 |
+
<a class="line-number" data-cell="forward_only" data-line="40" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 40, true);">40</a>
|
| 3782 |
+
<a class="line-number" data-cell="forward_only" data-line="41" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 41, true);">41</a>
|
| 3783 |
+
<a class="line-number" data-cell="forward_only" data-line="42" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 42, true);">42</a>
|
| 3784 |
+
<a class="line-number" data-cell="forward_only" data-line="43" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 43, true);">43</a>
|
| 3785 |
+
<a class="line-number" data-cell="forward_only" data-line="44" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 44, true);">44</a>
|
| 3786 |
+
<a class="line-number" data-cell="forward_only" data-line="45" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 45, true);">45</a>
|
| 3787 |
+
<a class="line-number" data-cell="forward_only" data-line="46" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 46, true);">46</a>
|
| 3788 |
+
<a class="line-number" data-cell="forward_only" data-line="47" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 47, true);">47</a>
|
| 3789 |
+
<a class="line-number" data-cell="forward_only" data-line="48" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 48, true);">48</a>
|
| 3790 |
+
<a class="line-number" data-cell="forward_only" data-line="49" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 49, true);">49</a>
|
| 3791 |
+
<a class="line-number" data-cell="forward_only" data-line="50" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 50, true);">50</a>
|
| 3792 |
+
<a class="line-number" data-cell="forward_only" data-line="51" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 51, true);">51</a>
|
| 3793 |
+
<a class="line-number" data-cell="forward_only" data-line="52" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 52, true);">52</a>
|
| 3794 |
+
<a class="line-number" data-cell="forward_only" data-line="53" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 53, true);">53</a>
|
| 3795 |
+
<a class="line-number" data-cell="forward_only" data-line="54" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 54, true);">54</a>
|
| 3796 |
+
<a class="line-number" data-cell="forward_only" data-line="55" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 55, true);">55</a>
|
| 3797 |
+
<a class="line-number" data-cell="forward_only" data-line="56" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 56, true);">56</a>
|
| 3798 |
+
<a class="line-number" data-cell="forward_only" data-line="57" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 57, true);">57</a>
|
| 3799 |
+
<a class="line-number" data-cell="forward_only" data-line="58" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 58, true);">58</a>
|
| 3800 |
+
<a class="line-number" data-cell="forward_only" data-line="59" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 59, true);">59</a>
|
| 3801 |
+
<a class="line-number" data-cell="forward_only" data-line="60" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 60, true);">60</a>
|
| 3802 |
+
<a class="line-number" data-cell="forward_only" data-line="61" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 61, true);">61</a>
|
| 3803 |
+
<a class="line-number" data-cell="forward_only" data-line="62" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 62, true);">62</a>
|
| 3804 |
+
<a class="line-number" data-cell="forward_only" data-line="63" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 63, true);">63</a>
|
| 3805 |
+
<a class="line-number" data-cell="forward_only" data-line="64" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 64, true);">64</a>
|
| 3806 |
+
<a class="line-number" data-cell="forward_only" data-line="65" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 65, true);">65</a>
|
| 3807 |
+
<a class="line-number" data-cell="forward_only" data-line="66" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 66, true);">66</a>
|
| 3808 |
+
<a class="line-number" data-cell="forward_only" data-line="67" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 67, true);">67</a>
|
| 3809 |
+
<a class="line-number" data-cell="forward_only" data-line="68" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 68, true);">68</a>
|
| 3810 |
+
<a class="line-number" data-cell="forward_only" data-line="69" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 69, true);">69</a>
|
| 3811 |
+
<a class="line-number" data-cell="forward_only" data-line="70" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 70, true);">70</a>
|
| 3812 |
+
<a class="line-number" data-cell="forward_only" data-line="71" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 71, true);">71</a>
|
| 3813 |
+
<a class="line-number" data-cell="forward_only" data-line="72" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 72, true);">72</a>
|
| 3814 |
+
<a class="line-number" data-cell="forward_only" data-line="73" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 73, true);">73</a>
|
| 3815 |
+
<a class="line-number" data-cell="forward_only" data-line="74" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 74, true);">74</a>
|
| 3816 |
+
<a class="line-number" data-cell="forward_only" data-line="75" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 75, true);">75</a>
|
| 3817 |
+
<a class="line-number" data-cell="forward_only" data-line="76" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 76, true);">76</a>
|
| 3818 |
+
<a class="line-number" data-cell="forward_only" data-line="77" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 77, true);">77</a>
|
| 3819 |
+
<a class="line-number" data-cell="forward_only" data-line="78" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 78, true);">78</a>
|
| 3820 |
+
<a class="line-number" data-cell="forward_only" data-line="79" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 79, true);">79</a>
|
| 3821 |
+
<a class="line-number" data-cell="forward_only" data-line="80" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 80, true);">80</a>
|
| 3822 |
+
<a class="line-number" data-cell="forward_only" data-line="81" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 81, true);">81</a>
|
| 3823 |
+
<a class="line-number" data-cell="forward_only" data-line="82" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 82, true);">82</a>
|
| 3824 |
+
<a class="line-number" data-cell="forward_only" data-line="83" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 83, true);">83</a>
|
| 3825 |
+
<a class="line-number" data-cell="forward_only" data-line="84" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 84, true);">84</a>
|
| 3826 |
+
<a class="line-number" data-cell="forward_only" data-line="85" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 85, true);">85</a>
|
| 3827 |
+
<a class="line-number" data-cell="forward_only" data-line="86" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 86, true);">86</a>
|
| 3828 |
+
<a class="line-number" data-cell="forward_only" data-line="87" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 87, true);">87</a>
|
| 3829 |
+
<a class="line-number" data-cell="forward_only" data-line="88" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 88, true);">88</a>
|
| 3830 |
+
<a class="line-number" data-cell="forward_only" data-line="89" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 89, true);">89</a>
|
| 3831 |
+
<a class="line-number" data-cell="forward_only" data-line="90" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 90, true);">90</a>
|
| 3832 |
+
<a class="line-number" data-cell="forward_only" data-line="91" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 91, true);">91</a>
|
| 3833 |
+
<a class="line-number" data-cell="forward_only" data-line="92" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 92, true);">92</a>
|
| 3834 |
+
<a class="line-number" data-cell="forward_only" data-line="93" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 93, true);">93</a>
|
| 3835 |
+
<a class="line-number" data-cell="forward_only" data-line="94" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 94, true);">94</a>
|
| 3836 |
+
<a class="line-number" data-cell="forward_only" data-line="95" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 95, true);">95</a>
|
| 3837 |
+
<a class="line-number" data-cell="forward_only" data-line="96" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 96, true);">96</a>
|
| 3838 |
+
<a class="line-number" data-cell="forward_only" data-line="97" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 97, true);">97</a>
|
| 3839 |
+
<a class="line-number" data-cell="forward_only" data-line="98" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 98, true);">98</a>
|
| 3840 |
+
<a class="line-number" data-cell="forward_only" data-line="99" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 99, true);">99</a>
|
| 3841 |
+
<a class="line-number" data-cell="forward_only" data-line="100" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 100, true);">100</a>
|
| 3842 |
+
<a class="line-number" data-cell="forward_only" data-line="101" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 101, true);">101</a>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3843 |
</div>
|
| 3844 |
<div class="code-wrap">
|
| 3845 |
<div class="highlight"><pre><span></span><span class="c1"># /// script</span>
|
|
|
|
| 3866 |
<span class="kn">import</span><span class="w"> </span><span class="nn">logging</span>
|
| 3867 |
<span class="kn">from</span><span class="w"> </span><span class="nn">transformers.models.gpt_oss.modeling_gpt_oss</span><span class="w"> </span><span class="kn">import</span> <span class="n">GptOssRMSNorm</span>
|
| 3868 |
|
| 3869 |
+
|
| 3870 |
<span class="n">replace_kernel_forward_from_hub</span><span class="p">(</span><span class="n">GptOssRMSNorm</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span>
|
| 3871 |
|
| 3872 |
<span class="c1"># set to debug logging</span>
|
|
|
|
| 3907 |
<span class="n">tokenizer</span> <span class="o">=</span> <span class="n">PreTrainedTokenizerFast</span><span class="o">.</span><span class="n">from_pretrained</span><span class="p">(</span><span class="n">model_id</span><span class="p">)</span>
|
| 3908 |
<span class="n">quantization_config</span> <span class="o">=</span> <span class="n">Mxfp4Config</span><span class="p">(</span><span class="n">dequantize</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
|
| 3909 |
|
| 3910 |
+
|
| 3911 |
+
|
| 3912 |
<span class="n">model</span> <span class="o">=</span> <span class="n">GptOssForCausalLM</span><span class="o">.</span><span class="n">from_pretrained</span><span class="p">(</span>
|
| 3913 |
<span class="n">model_id</span><span class="p">,</span>
|
| 3914 |
<span class="n">dtype</span><span class="o">=</span><span class="s2">"bfloat16"</span><span class="p">,</span>
|
|
|
|
| 3929 |
<span class="n">reasoning_effort</span><span class="o">=</span><span class="s2">"low"</span><span class="p">,</span>
|
| 3930 |
<span class="p">)</span><span class="o">.</span><span class="n">to</span><span class="p">(</span><span class="s2">"cuda"</span><span class="p">)</span>
|
| 3931 |
|
| 3932 |
+
<span class="n">max_tokens</span> <span class="o">=</span> <span class="mi">256</span>
|
| 3933 |
|
| 3934 |
+
<span class="k">with</span> <span class="n">torch</span><span class="o">.</span><span class="n">inference_mode</span><span class="p">():</span>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3935 |
<span class="n">start_time</span> <span class="o">=</span> <span class="n">time</span><span class="o">.</span><span class="n">perf_counter</span><span class="p">()</span>
|
| 3936 |
<span class="n">generated</span> <span class="o">=</span> <span class="n">model</span><span class="o">.</span><span class="n">generate</span><span class="p">(</span>
|
| 3937 |
<span class="o">**</span><span class="n">inputs</span><span class="p">,</span>
|
|
|
|
| 3940 |
<span class="n">temperature</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
|
| 3941 |
<span class="p">)</span>
|
| 3942 |
<span class="n">end_time</span> <span class="o">=</span> <span class="n">time</span><span class="o">.</span><span class="n">perf_counter</span><span class="p">()</span>
|
| 3943 |
+
|
| 3944 |
+
<span class="nb">print</span><span class="p">(</span><span class="n">tokenizer</span><span class="o">.</span><span class="n">decode</span><span class="p">(</span><span class="n">generated</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="n">skip_special_tokens</span><span class="o">=</span><span class="kc">False</span><span class="p">))</span>
|
| 3945 |
+
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">"Generation took </span><span class="si">{</span><span class="n">end_time</span><span class="w"> </span><span class="o">-</span><span class="w"> </span><span class="n">start_time</span><span class="si">:</span><span class="s2">.2f</span><span class="si">}</span><span class="s2"> seconds"</span><span class="p">)</span>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3946 |
</pre></div>
|
| 3947 |
|
| 3948 |
+
<div class="code-line-highlight" id="line-highlight-forward_only"></div>
|
| 3949 |
</div>
|
| 3950 |
</div>
|
| 3951 |
</div>
|
| 3952 |
+
<div id="output-forward_only" class="cell-output">
|
| 3953 |
+
<div class="cell-stderr">warning: The requested interpreter resolved to Python 3.11.13, which is incompatible with the script's Python requirement: `>=3.12`
|
|
|
|
| 3954 |
Updating https://github.com/huggingface/transformers.git (HEAD)
|
| 3955 |
Updated https://github.com/huggingface/transformers.git (449533af73874470e914a203391635e04ac2ffc8)
|
| 3956 |
× No solution found when resolving script dependencies:
|
|
|
|
| 3966 |
</div>
|
| 3967 |
</div>
|
| 3968 |
</div>
|
| 3969 |
+
|
| 3970 |
+
<h2>Forward and Backward</h2>
|
| 3971 |
+
<p>Next, we run a forward and backward pass with Megablocks kernels enabled. This should be more memory efficient and allow us to complete the backward pass without running out of memory.</p>
|
| 3972 |
</div>
|
| 3973 |
|
| 3974 |
</body>
|
moe_benchmarks/megablocks_yamoe/artifacts/binned_run/binned_results.json
CHANGED
|
@@ -9,16 +9,16 @@
|
|
| 9 |
"vary_inputs": true
|
| 10 |
},
|
| 11 |
"stats": {
|
| 12 |
-
"avg_ms": 36.
|
| 13 |
-
"min_ms":
|
| 14 |
-
"max_ms":
|
| 15 |
-
"std_ms": 1.
|
| 16 |
-
"p50_ms": 36.
|
| 17 |
-
"p95_ms":
|
| 18 |
-
"p99_ms":
|
| 19 |
"num_iters": 50,
|
| 20 |
-
"tokens_per_s":
|
| 21 |
-
"throughput_variance":
|
| 22 |
},
|
| 23 |
"output_sum": 3.97190523147583
|
| 24 |
}
|
|
|
|
| 9 |
"vary_inputs": true
|
| 10 |
},
|
| 11 |
"stats": {
|
| 12 |
+
"avg_ms": 36.539247979999345,
|
| 13 |
+
"min_ms": 32.831213000008574,
|
| 14 |
+
"max_ms": 40.07397899999887,
|
| 15 |
+
"std_ms": 1.6136977896039295,
|
| 16 |
+
"p50_ms": 36.86953950000316,
|
| 17 |
+
"p95_ms": 39.00453930000651,
|
| 18 |
+
"p99_ms": 39.94982966000805,
|
| 19 |
"num_iters": 50,
|
| 20 |
+
"tokens_per_s": 2736.7831996634814,
|
| 21 |
+
"throughput_variance": 122.96225627695094
|
| 22 |
},
|
| 23 |
"output_sum": 3.97190523147583
|
| 24 |
}
|
moe_benchmarks/megablocks_yamoe/artifacts/gptoss_run/gptoss_results.json
CHANGED
|
@@ -9,16 +9,16 @@
|
|
| 9 |
"vary_inputs": true
|
| 10 |
},
|
| 11 |
"stats": {
|
| 12 |
-
"avg_ms": 45.
|
| 13 |
-
"min_ms":
|
| 14 |
-
"max_ms": 49.
|
| 15 |
-
"std_ms":
|
| 16 |
-
"p50_ms":
|
| 17 |
-
"p95_ms":
|
| 18 |
-
"p99_ms": 49.
|
| 19 |
"num_iters": 50,
|
| 20 |
-
"tokens_per_s":
|
| 21 |
-
"throughput_variance":
|
| 22 |
},
|
| 23 |
"output_sum": 11.53223705291748
|
| 24 |
}
|
|
|
|
| 9 |
"vary_inputs": true
|
| 10 |
},
|
| 11 |
"stats": {
|
| 12 |
+
"avg_ms": 45.23668440000279,
|
| 13 |
+
"min_ms": 39.77574300000697,
|
| 14 |
+
"max_ms": 49.24737599998252,
|
| 15 |
+
"std_ms": 2.8509890347901194,
|
| 16 |
+
"p50_ms": 46.46113100000093,
|
| 17 |
+
"p95_ms": 48.43337355002859,
|
| 18 |
+
"p99_ms": 49.07736706999401,
|
| 19 |
"num_iters": 50,
|
| 20 |
+
"tokens_per_s": 2210.595257507286,
|
| 21 |
+
"throughput_variance": 143.00506086156602
|
| 22 |
},
|
| 23 |
"output_sum": 11.53223705291748
|
| 24 |
}
|
moe_benchmarks/megablocks_yamoe/artifacts/gptoss_training_run/gptoss_training_results.json
CHANGED
|
@@ -9,16 +9,16 @@
|
|
| 9 |
"vary_inputs": true
|
| 10 |
},
|
| 11 |
"stats": {
|
| 12 |
-
"avg_ms":
|
| 13 |
-
"min_ms":
|
| 14 |
-
"max_ms":
|
| 15 |
-
"std_ms":
|
| 16 |
-
"p50_ms":
|
| 17 |
-
"p95_ms": 50.
|
| 18 |
-
"p99_ms":
|
| 19 |
"num_iters": 50,
|
| 20 |
-
"tokens_per_s":
|
| 21 |
-
"throughput_variance":
|
| 22 |
},
|
| 23 |
"output_sum": 11.53223705291748
|
| 24 |
}
|
|
|
|
| 9 |
"vary_inputs": true
|
| 10 |
},
|
| 11 |
"stats": {
|
| 12 |
+
"avg_ms": 45.75117022000086,
|
| 13 |
+
"min_ms": 38.86002600000893,
|
| 14 |
+
"max_ms": 50.81734300000562,
|
| 15 |
+
"std_ms": 2.8335743767450845,
|
| 16 |
+
"p50_ms": 45.83255949998488,
|
| 17 |
+
"p95_ms": 50.53969424997433,
|
| 18 |
+
"p99_ms": 50.77732372002288,
|
| 19 |
"num_iters": 50,
|
| 20 |
+
"tokens_per_s": 2185.7364416939745,
|
| 21 |
+
"throughput_variance": 141.0139740289509
|
| 22 |
},
|
| 23 |
"output_sum": 11.53223705291748
|
| 24 |
}
|
moe_benchmarks/megablocks_yamoe/artifacts/megablocks_run/megablocks_results.json
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"implementation": "megablocks_results",
|
| 3 |
+
"config": {
|
| 4 |
+
"warmup": 10,
|
| 5 |
+
"iters": 50,
|
| 6 |
+
"device": "cuda",
|
| 7 |
+
"dtype": "torch.float32",
|
| 8 |
+
"tokens": 100,
|
| 9 |
+
"vary_inputs": true
|
| 10 |
+
},
|
| 11 |
+
"stats": {
|
| 12 |
+
"avg_ms": 3.8699466199977905,
|
| 13 |
+
"min_ms": 0.810037999940505,
|
| 14 |
+
"max_ms": 8.541234000063014,
|
| 15 |
+
"std_ms": 3.7276327220093757,
|
| 16 |
+
"p50_ms": 0.840403500035336,
|
| 17 |
+
"p95_ms": 8.54010504996836,
|
| 18 |
+
"p99_ms": 8.54096940002023,
|
| 19 |
+
"num_iters": 50,
|
| 20 |
+
"tokens_per_s": 25840.149702131315,
|
| 21 |
+
"throughput_variance": 53236.043048659514
|
| 22 |
+
},
|
| 23 |
+
"output_sum": 6.4738850593566895
|
| 24 |
+
}
|
moe_benchmarks/megablocks_yamoe/artifacts/visualization/moe_performance_comparison.png
ADDED
|
Git LFS Details
|
moe_benchmarks/megablocks_yamoe/artifacts/yamoe_run/yamoe_results.json
CHANGED
|
@@ -9,16 +9,16 @@
|
|
| 9 |
"vary_inputs": true
|
| 10 |
},
|
| 11 |
"stats": {
|
| 12 |
-
"avg_ms": 4.
|
| 13 |
-
"min_ms": 4.
|
| 14 |
-
"max_ms": 4.
|
| 15 |
-
"std_ms": 0.
|
| 16 |
-
"p50_ms": 4.
|
| 17 |
-
"p95_ms": 4.
|
| 18 |
-
"p99_ms": 4.
|
| 19 |
"num_iters": 50,
|
| 20 |
-
"tokens_per_s":
|
| 21 |
-
"throughput_variance":
|
| 22 |
},
|
| 23 |
"output_sum": 3.97190523147583
|
| 24 |
}
|
|
|
|
| 9 |
"vary_inputs": true
|
| 10 |
},
|
| 11 |
"stats": {
|
| 12 |
+
"avg_ms": 4.249726199998349,
|
| 13 |
+
"min_ms": 4.124869000008857,
|
| 14 |
+
"max_ms": 4.300293000028432,
|
| 15 |
+
"std_ms": 0.0233051162040199,
|
| 16 |
+
"p50_ms": 4.253981000005069,
|
| 17 |
+
"p95_ms": 4.2678174999991825,
|
| 18 |
+
"p99_ms": 4.289845220023949,
|
| 19 |
"num_iters": 50,
|
| 20 |
+
"tokens_per_s": 23530.927710128442,
|
| 21 |
+
"throughput_variance": 131.31218448800846
|
| 22 |
},
|
| 23 |
"output_sum": 3.97190523147583
|
| 24 |
}
|
moe_benchmarks/megablocks_yamoe/cells/__pycache__/bench_utils.cpython-311.pyc
CHANGED
|
Binary files a/moe_benchmarks/megablocks_yamoe/cells/__pycache__/bench_utils.cpython-311.pyc and b/moe_benchmarks/megablocks_yamoe/cells/__pycache__/bench_utils.cpython-311.pyc differ
|
|
|
moe_benchmarks/megablocks_yamoe/cells/__pycache__/config.cpython-311.pyc
CHANGED
|
Binary files a/moe_benchmarks/megablocks_yamoe/cells/__pycache__/config.cpython-311.pyc and b/moe_benchmarks/megablocks_yamoe/cells/__pycache__/config.cpython-311.pyc differ
|
|
|
moe_benchmarks/megablocks_yamoe/cells/visualization.py
ADDED
|
@@ -0,0 +1,116 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# /// script
|
| 2 |
+
# dependencies = [
|
| 3 |
+
# "matplotlib",
|
| 4 |
+
# ]
|
| 5 |
+
# ///
|
| 6 |
+
|
| 7 |
+
import json
|
| 8 |
+
import matplotlib.pyplot as plt
|
| 9 |
+
import numpy as np
|
| 10 |
+
from pathlib import Path
|
| 11 |
+
import os
|
| 12 |
+
|
| 13 |
+
# List of expected result files
|
| 14 |
+
yamoe_dir = os.environ.get('UVNOTE_INPUT_YAMOE_RUN', '.')
|
| 15 |
+
binned_dir = os.environ.get('UVNOTE_INPUT_BINNED_RUN', '.')
|
| 16 |
+
gptoss_dir = os.environ.get('UVNOTE_INPUT_GPTOSS_RUN', '.')
|
| 17 |
+
gptoss_training_dir = os.environ.get('UVNOTE_INPUT_GPTOSS_TRAINING_RUN', '.')
|
| 18 |
+
megablocks_dir = os.environ.get('UVNOTE_INPUT_MEGABLOCKS_RUN', '.')
|
| 19 |
+
|
| 20 |
+
result_files = [
|
| 21 |
+
Path(yamoe_dir) / "yamoe_results.json",
|
| 22 |
+
Path(binned_dir) / "binned_results.json",
|
| 23 |
+
Path(gptoss_dir) / "gptoss_results.json",
|
| 24 |
+
Path(gptoss_training_dir) / "gptoss_training_results.json",
|
| 25 |
+
Path(megablocks_dir) / "megablocks_results.json"
|
| 26 |
+
]
|
| 27 |
+
|
| 28 |
+
# Load all benchmark results
|
| 29 |
+
results = {}
|
| 30 |
+
for file in result_files:
|
| 31 |
+
if Path(file).exists():
|
| 32 |
+
with open(file, 'r') as f:
|
| 33 |
+
data = json.load(f)
|
| 34 |
+
results[data['implementation']] = data
|
| 35 |
+
print(f"Loaded {file}")
|
| 36 |
+
else:
|
| 37 |
+
print(f"Missing {file}")
|
| 38 |
+
|
| 39 |
+
if not results:
|
| 40 |
+
print("No benchmark results found. Run the benchmark cells first.")
|
| 41 |
+
else:
|
| 42 |
+
# Extract data for plotting
|
| 43 |
+
implementations = list(results.keys())
|
| 44 |
+
avg_latencies = [results[impl]['stats']['avg_ms'] for impl in implementations]
|
| 45 |
+
p95_latencies = [results[impl]['stats']['p95_ms'] for impl in implementations]
|
| 46 |
+
throughputs = [results[impl]['stats'].get('tokens_per_s', 0) for impl in implementations]
|
| 47 |
+
|
| 48 |
+
# Create figure with subplots
|
| 49 |
+
fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(18, 6))
|
| 50 |
+
fig.suptitle('MoE Implementation Performance Comparison', fontsize=16, fontweight='bold')
|
| 51 |
+
|
| 52 |
+
# Colors for each implementation
|
| 53 |
+
colors = ['#FF6B6B', '#4ECDC4', '#45B7D1', '#96CEB4', '#FECA57'][:len(implementations)]
|
| 54 |
+
|
| 55 |
+
# 1. Average Latency Chart
|
| 56 |
+
bars1 = ax1.bar(implementations, avg_latencies, color=colors, alpha=0.8, edgecolor='black', linewidth=1)
|
| 57 |
+
ax1.set_title('Average Latency', fontweight='bold', fontsize=14)
|
| 58 |
+
ax1.set_ylabel('Latency (ms)', fontweight='bold')
|
| 59 |
+
ax1.tick_params(axis='x', rotation=45)
|
| 60 |
+
ax1.grid(axis='y', alpha=0.3)
|
| 61 |
+
|
| 62 |
+
# Add value labels on bars
|
| 63 |
+
for bar, val in zip(bars1, avg_latencies):
|
| 64 |
+
ax1.text(bar.get_x() + bar.get_width()/2, bar.get_height() + max(avg_latencies)*0.01,
|
| 65 |
+
f'{val:.2f}ms', ha='center', va='bottom', fontweight='bold')
|
| 66 |
+
|
| 67 |
+
# 2. P95 Latency Chart
|
| 68 |
+
bars2 = ax2.bar(implementations, p95_latencies, color=colors, alpha=0.8, edgecolor='black', linewidth=1)
|
| 69 |
+
ax2.set_title('95th Percentile Latency', fontweight='bold', fontsize=14)
|
| 70 |
+
ax2.set_ylabel('Latency (ms)', fontweight='bold')
|
| 71 |
+
ax2.tick_params(axis='x', rotation=45)
|
| 72 |
+
ax2.grid(axis='y', alpha=0.3)
|
| 73 |
+
|
| 74 |
+
# Add value labels on bars
|
| 75 |
+
for bar, val in zip(bars2, p95_latencies):
|
| 76 |
+
ax2.text(bar.get_x() + bar.get_width()/2, bar.get_height() + max(p95_latencies)*0.01,
|
| 77 |
+
f'{val:.2f}ms', ha='center', va='bottom', fontweight='bold')
|
| 78 |
+
|
| 79 |
+
# 3. Throughput Chart
|
| 80 |
+
bars3 = ax3.bar(implementations, throughputs, color=colors, alpha=0.8, edgecolor='black', linewidth=1)
|
| 81 |
+
ax3.set_title('Throughput', fontweight='bold', fontsize=14)
|
| 82 |
+
ax3.set_ylabel('Tokens/sec', fontweight='bold')
|
| 83 |
+
ax3.tick_params(axis='x', rotation=45)
|
| 84 |
+
ax3.grid(axis='y', alpha=0.3)
|
| 85 |
+
|
| 86 |
+
# Add value labels on bars
|
| 87 |
+
for bar, val in zip(bars3, throughputs):
|
| 88 |
+
if val > 0: # Only show label if throughput was calculated
|
| 89 |
+
ax3.text(bar.get_x() + bar.get_width()/2, bar.get_height() + max(throughputs)*0.01,
|
| 90 |
+
f'{val:.0f}', ha='center', va='bottom', fontweight='bold')
|
| 91 |
+
|
| 92 |
+
plt.tight_layout()
|
| 93 |
+
plt.savefig("moe_performance_comparison.png", dpi=300)
|
| 94 |
+
|
| 95 |
+
# Print summary table
|
| 96 |
+
print("\nPerformance Summary:")
|
| 97 |
+
print(f"{'Implementation':<30} {'Avg (ms)':<12} {'P95 (ms)':<12} {'Tokens/sec':<12} {'Relative Speed':<15}")
|
| 98 |
+
print("-"*80)
|
| 99 |
+
|
| 100 |
+
# Sort by average latency for relative speed calculation
|
| 101 |
+
sorted_results = sorted(results.items(), key=lambda x: x[1]['stats']['avg_ms'])
|
| 102 |
+
fastest_latency = sorted_results[0][1]['stats']['avg_ms']
|
| 103 |
+
|
| 104 |
+
for impl, data in sorted_results:
|
| 105 |
+
avg_ms = data['stats']['avg_ms']
|
| 106 |
+
p95_ms = data['stats']['p95_ms']
|
| 107 |
+
tokens_s = data['stats'].get('tokens_per_s', 0)
|
| 108 |
+
relative_speed = fastest_latency / avg_ms
|
| 109 |
+
|
| 110 |
+
print(f"{impl:<30} {avg_ms:>8.2f} {p95_ms:>8.2f} {tokens_s:>8.0f} {relative_speed:>6.2f}x")
|
| 111 |
+
|
| 112 |
+
print(f"\nFastest: {sorted_results[0][0]} ({sorted_results[0][1]['stats']['avg_ms']:.2f}ms avg)")
|
| 113 |
+
if len(sorted_results) > 1:
|
| 114 |
+
print(f"Slowest: {sorted_results[-1][0]} ({sorted_results[-1][1]['stats']['avg_ms']:.2f}ms avg)")
|
| 115 |
+
speedup = sorted_results[-1][1]['stats']['avg_ms'] / sorted_results[0][1]['stats']['avg_ms']
|
| 116 |
+
print(f"Max Speedup: {speedup:.1f}x")
|
moe_benchmarks/megablocks_yamoe/megablocks_yamoe.html
CHANGED
|
@@ -3726,7 +3726,7 @@ span.linenos.special { color: #000000; background-color: #ffffc0; padding-left:
|
|
| 3726 |
<span onclick="toggleOutput('setup')" style="cursor: pointer;">▼ output</span>
|
| 3727 |
<span id="uv-indicator-setup" style="cursor: default; opacity: 0.3;">▶ uv-logs</span>
|
| 3728 |
</span> |
|
| 3729 |
-
Cell: setup |
|
| 3730 |
| <button class="run-btn" onclick="runCell('setup')">▶ run</button>
|
| 3731 |
<button class="copy-btn" onclick="copyCell('setup')">Copy</button>
|
| 3732 |
<a href="cells/setup.py" target="_blank" class="raw-btn">Raw</a>
|
|
@@ -3975,8 +3975,7 @@ Cell: setup | 19.20s | FAILED
|
|
| 3975 |
</div>
|
| 3976 |
</div>
|
| 3977 |
<div id="output-setup" class="cell-output">
|
| 3978 |
-
<div class="cell-stderr">
|
| 3979 |
-
Downloading cpython-3.13.7-linux-x86_64-gnu (download)
|
| 3980 |
Updating https://github.com/huggingface/transformers.git (HEAD)
|
| 3981 |
Updated https://github.com/huggingface/transformers.git (449533af73874470e914a203391635e04ac2ffc8)
|
| 3982 |
× No solution found when resolving script dependencies:
|
|
|
|
| 3726 |
<span onclick="toggleOutput('setup')" style="cursor: pointer;">▼ output</span>
|
| 3727 |
<span id="uv-indicator-setup" style="cursor: default; opacity: 0.3;">▶ uv-logs</span>
|
| 3728 |
</span> |
|
| 3729 |
+
Cell: setup | 17.01s | FAILED
|
| 3730 |
| <button class="run-btn" onclick="runCell('setup')">▶ run</button>
|
| 3731 |
<button class="copy-btn" onclick="copyCell('setup')">Copy</button>
|
| 3732 |
<a href="cells/setup.py" target="_blank" class="raw-btn">Raw</a>
|
|
|
|
| 3975 |
</div>
|
| 3976 |
</div>
|
| 3977 |
<div id="output-setup" class="cell-output">
|
| 3978 |
+
<div class="cell-stderr">warning: The requested interpreter resolved to Python 3.11.13, which is incompatible with the script's Python requirement: `>=3.12`
|
|
|
|
| 3979 |
Updating https://github.com/huggingface/transformers.git (HEAD)
|
| 3980 |
Updated https://github.com/huggingface/transformers.git (449533af73874470e914a203391635e04ac2ffc8)
|
| 3981 |
× No solution found when resolving script dependencies:
|
moe_benchmarks/megablocks_yamoe/torch_profile.html
CHANGED
|
@@ -3720,7 +3720,7 @@ span.linenos.special { color: #000000; background-color: #ffffc0; padding-left:
|
|
| 3720 |
<span onclick="toggleOutput('utils')" style="cursor: pointer;">▼ output</span>
|
| 3721 |
<span id="uv-indicator-utils" onclick="toggleUvLogsFromHeader('utils')" style="cursor: pointer;">▶ uv-logs</span>
|
| 3722 |
</span> |
|
| 3723 |
-
Cell: utils | deps: torch, numpy |
|
| 3724 |
| <button class="run-btn" onclick="runCell('utils')">▶ run</button>
|
| 3725 |
<button class="copy-btn" onclick="copyCell('utils')">Copy</button>
|
| 3726 |
<a href="cells/utils.py" target="_blank" class="raw-btn">Raw</a>
|
|
@@ -3794,23 +3794,23 @@ Cell: utils | deps: torch, numpy | 34.59s
|
|
| 3794 |
<div class="uv-install-logs" id="uv-logs-utils">
|
| 3795 |
<div class="uv-logs-header" onclick="toggleUvLogs(this)">▶ UV Install Logs</div>
|
| 3796 |
<div class="uv-logs-content" style="display: none;">
|
| 3797 |
-
Downloading
|
| 3798 |
-
Downloading networkx (1.9MiB)
|
| 3799 |
-
Downloading nvidia-cublas-cu12 (566.8MiB)
|
| 3800 |
-
Downloading nvidia-cusparse-cu12 (274.9MiB)
|
| 3801 |
Downloading nvidia-cufile-cu12 (1.1MiB)
|
| 3802 |
-
Downloading
|
| 3803 |
-
Downloading nvidia-
|
| 3804 |
-
Downloading nvidia-
|
| 3805 |
Downloading nvidia-cusparselt-cu12 (273.9MiB)
|
| 3806 |
-
Downloading nvidia-curand-cu12 (60.7MiB)
|
| 3807 |
Downloading torch (846.9MiB)
|
| 3808 |
-
Downloading nvidia-
|
| 3809 |
-
Downloading
|
| 3810 |
-
Downloading nvidia-
|
| 3811 |
-
Downloading
|
| 3812 |
-
Downloading
|
| 3813 |
Downloading nvidia-cusolver-cu12 (255.1MiB)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3814 |
Downloading triton (148.3MiB)
|
| 3815 |
Downloading nvidia-cufile-cu12
|
| 3816 |
Downloading setuptools
|
|
@@ -3830,7 +3830,7 @@ Downloading triton (148.3MiB)
|
|
| 3830 |
Downloading nvidia-cublas-cu12
|
| 3831 |
Downloading nvidia-cudnn-cu12
|
| 3832 |
Downloading torch
|
| 3833 |
-
Installed 26 packages in
|
| 3834 |
</div>
|
| 3835 |
</div>
|
| 3836 |
</div>
|
|
@@ -3843,7 +3843,7 @@ Installed 26 packages in 452ms
|
|
| 3843 |
<span onclick="toggleOutput('bench_utils')" style="cursor: pointer;">▼ output</span>
|
| 3844 |
<span id="uv-indicator-bench_utils" onclick="toggleUvLogsFromHeader('bench_utils')" style="cursor: pointer;">▶ uv-logs</span>
|
| 3845 |
</span> |
|
| 3846 |
-
Cell: bench_utils | deps: torch, numpy |
|
| 3847 |
| <button class="run-btn" onclick="runCell('bench_utils')">▶ run</button>
|
| 3848 |
<button class="copy-btn" onclick="copyCell('bench_utils')">Copy</button>
|
| 3849 |
<a href="cells/bench_utils.py" target="_blank" class="raw-btn">Raw</a>
|
|
@@ -4331,23 +4331,23 @@ Cell: bench_utils | deps: torch, numpy | 35.65s
|
|
| 4331 |
<div class="uv-install-logs" id="uv-logs-bench_utils">
|
| 4332 |
<div class="uv-logs-header" onclick="toggleUvLogs(this)">▶ UV Install Logs</div>
|
| 4333 |
<div class="uv-logs-content" style="display: none;">
|
| 4334 |
-
Downloading nvidia-nvjitlink-cu12 (37.4MiB)
|
| 4335 |
-
Downloading nvidia-cusparselt-cu12 (273.9MiB)
|
| 4336 |
-
Downloading nvidia-cusparse-cu12 (274.9MiB)
|
| 4337 |
-
Downloading networkx (1.9MiB)
|
| 4338 |
-
Downloading nvidia-cuda-cupti-cu12 (9.8MiB)
|
| 4339 |
-
Downloading nvidia-cudnn-cu12 (674.0MiB)
|
| 4340 |
Downloading setuptools (1.1MiB)
|
| 4341 |
Downloading nvidia-cufile-cu12 (1.1MiB)
|
| 4342 |
-
Downloading nvidia-cufft-cu12 (184.2MiB)
|
| 4343 |
-
Downloading nvidia-cusolver-cu12 (255.1MiB)
|
| 4344 |
-
Downloading torch (846.9MiB)
|
| 4345 |
Downloading sympy (6.0MiB)
|
| 4346 |
-
Downloading
|
| 4347 |
Downloading nvidia-curand-cu12 (60.7MiB)
|
| 4348 |
-
Downloading nvidia-nccl-cu12 (307.4MiB)
|
| 4349 |
-
Downloading nvidia-cuda-nvrtc-cu12 (84.0MiB)
|
| 4350 |
Downloading nvidia-cublas-cu12 (566.8MiB)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4351 |
Downloading triton (148.3MiB)
|
| 4352 |
Downloading nvidia-cufile-cu12
|
| 4353 |
Downloading setuptools
|
|
@@ -4367,7 +4367,7 @@ Downloading triton (148.3MiB)
|
|
| 4367 |
Downloading nvidia-cublas-cu12
|
| 4368 |
Downloading nvidia-cudnn-cu12
|
| 4369 |
Downloading torch
|
| 4370 |
-
Installed 26 packages in
|
| 4371 |
</div>
|
| 4372 |
</div>
|
| 4373 |
</div>
|
|
@@ -4381,7 +4381,7 @@ Installed 26 packages in 452ms
|
|
| 4381 |
<span onclick="toggleOutput('config')" style="cursor: pointer;">▼ output</span>
|
| 4382 |
<span id="uv-indicator-config" onclick="toggleUvLogsFromHeader('config')" style="cursor: pointer;">▶ uv-logs</span>
|
| 4383 |
</span> |
|
| 4384 |
-
Cell: config | deps: torch, numpy | 34.
|
| 4385 |
| <button class="run-btn" onclick="runCell('config')">▶ run</button>
|
| 4386 |
<button class="copy-btn" onclick="copyCell('config')">Copy</button>
|
| 4387 |
<a href="cells/config.py" target="_blank" class="raw-btn">Raw</a>
|
|
@@ -4441,23 +4441,23 @@ Cell: config | deps: torch, numpy | 34.53s
|
|
| 4441 |
<div class="uv-install-logs" id="uv-logs-config">
|
| 4442 |
<div class="uv-logs-header" onclick="toggleUvLogs(this)">▶ UV Install Logs</div>
|
| 4443 |
<div class="uv-logs-content" style="display: none;">
|
|
|
|
| 4444 |
Downloading nvidia-cufile-cu12 (1.1MiB)
|
|
|
|
|
|
|
| 4445 |
Downloading nvidia-cusparse-cu12 (274.9MiB)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4446 |
Downloading nvidia-nccl-cu12 (307.4MiB)
|
|
|
|
| 4447 |
Downloading nvidia-cusparselt-cu12 (273.9MiB)
|
| 4448 |
-
Downloading nvidia-cuda-
|
| 4449 |
-
Downloading nvidia-cudnn-cu12 (674.0MiB)
|
| 4450 |
-
Downloading numpy (16.2MiB)
|
| 4451 |
Downloading networkx (1.9MiB)
|
| 4452 |
-
Downloading
|
| 4453 |
-
Downloading
|
| 4454 |
-
Downloading nvidia-cufft-cu12 (184.2MiB)
|
| 4455 |
-
Downloading nvidia-nvjitlink-cu12 (37.4MiB)
|
| 4456 |
-
Downloading nvidia-curand-cu12 (60.7MiB)
|
| 4457 |
Downloading nvidia-cublas-cu12 (566.8MiB)
|
| 4458 |
-
Downloading nvidia-cuda-nvrtc-cu12 (84.0MiB)
|
| 4459 |
-
Downloading nvidia-cusolver-cu12 (255.1MiB)
|
| 4460 |
-
Downloading sympy (6.0MiB)
|
| 4461 |
Downloading triton (148.3MiB)
|
| 4462 |
Downloading nvidia-cufile-cu12
|
| 4463 |
Downloading setuptools
|
|
@@ -4474,10 +4474,10 @@ Downloading triton (148.3MiB)
|
|
| 4474 |
Downloading nvidia-cusparselt-cu12
|
| 4475 |
Downloading nvidia-cusparse-cu12
|
| 4476 |
Downloading nvidia-nccl-cu12
|
| 4477 |
-
Downloading nvidia-cublas-cu12
|
| 4478 |
Downloading nvidia-cudnn-cu12
|
|
|
|
| 4479 |
Downloading torch
|
| 4480 |
-
Installed 26 packages in
|
| 4481 |
</div>
|
| 4482 |
</div>
|
| 4483 |
</div>
|
|
@@ -4490,7 +4490,7 @@ Installed 26 packages in 448ms
|
|
| 4490 |
<span onclick="toggleOutput('save_data')" style="cursor: pointer;">▼ output</span>
|
| 4491 |
<span id="uv-indicator-save_data" onclick="toggleUvLogsFromHeader('save_data')" style="cursor: pointer;">▶ uv-logs</span>
|
| 4492 |
</span> |
|
| 4493 |
-
Cell: save_data | deps: torch, numpy | 39.
|
| 4494 |
| <button class="run-btn" onclick="runCell('save_data')">▶ run</button>
|
| 4495 |
<button class="copy-btn" onclick="copyCell('save_data')">Copy</button>
|
| 4496 |
<a href="cells/save_data.py" target="_blank" class="raw-btn">Raw</a>
|
|
@@ -4585,24 +4585,24 @@ Down sum: 206.729263
|
|
| 4585 |
<div class="uv-install-logs" id="uv-logs-save_data">
|
| 4586 |
<div class="uv-logs-header" onclick="toggleUvLogs(this)">▶ UV Install Logs</div>
|
| 4587 |
<div class="uv-logs-content" style="display: none;">
|
| 4588 |
-
Downloading networkx (1.9MiB)
|
| 4589 |
Downloading nvidia-cufft-cu12 (184.2MiB)
|
| 4590 |
-
Downloading
|
| 4591 |
Downloading nvidia-nvjitlink-cu12 (37.4MiB)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4592 |
Downloading nvidia-nccl-cu12 (307.4MiB)
|
| 4593 |
-
Downloading
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4594 |
Downloading nvidia-cudnn-cu12 (674.0MiB)
|
| 4595 |
Downloading nvidia-cublas-cu12 (566.8MiB)
|
| 4596 |
-
Downloading
|
| 4597 |
-
Downloading nvidia-cuda-nvrtc-cu12 (84.0MiB)
|
| 4598 |
Downloading nvidia-cusparse-cu12 (274.9MiB)
|
| 4599 |
-
Downloading nvidia-cusolver-cu12 (255.1MiB)
|
| 4600 |
-
Downloading nvidia-curand-cu12 (60.7MiB)
|
| 4601 |
Downloading nvidia-cusparselt-cu12 (273.9MiB)
|
| 4602 |
-
Downloading triton (148.3MiB)
|
| 4603 |
-
Downloading torch (846.9MiB)
|
| 4604 |
-
Downloading numpy (16.2MiB)
|
| 4605 |
-
Downloading nvidia-cuda-cupti-cu12 (9.8MiB)
|
| 4606 |
Downloading nvidia-cufile-cu12
|
| 4607 |
Downloading setuptools
|
| 4608 |
Downloading networkx
|
|
@@ -4615,23 +4615,23 @@ Downloading nvidia-cuda-cupti-cu12 (9.8MiB)
|
|
| 4615 |
Downloading triton
|
| 4616 |
Downloading nvidia-cufft-cu12
|
| 4617 |
Downloading nvidia-cusolver-cu12
|
| 4618 |
-
Downloading nvidia-cusparselt-cu12
|
| 4619 |
Downloading nvidia-cusparse-cu12
|
|
|
|
| 4620 |
Downloading nvidia-nccl-cu12
|
| 4621 |
Downloading nvidia-cublas-cu12
|
| 4622 |
Downloading nvidia-cudnn-cu12
|
| 4623 |
Downloading torch
|
| 4624 |
-
Installed 26 packages in
|
| 4625 |
</div>
|
| 4626 |
</div>
|
| 4627 |
<div class="cell-artifacts">
|
| 4628 |
<h4>Artifacts:</h4>
|
| 4629 |
<a href="artifacts/save_data/router_bias.pt" class="artifact" target="_blank">router_bias.pt</a>
|
| 4630 |
-
<a href="artifacts/save_data/gate_up_proj_bias.pt" class="artifact" target="_blank">gate_up_proj_bias.pt</a>
|
| 4631 |
-
<a href="artifacts/save_data/down_proj.pt" class="artifact" target="_blank">down_proj.pt</a>
|
| 4632 |
-
<a href="artifacts/save_data/gate_up_proj.pt" class="artifact" target="_blank">gate_up_proj.pt</a>
|
| 4633 |
-
<a href="artifacts/save_data/down_proj_bias.pt" class="artifact" target="_blank">down_proj_bias.pt</a>
|
| 4634 |
<a href="artifacts/save_data/router_weight.pt" class="artifact" target="_blank">router_weight.pt</a>
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4635 |
</div>
|
| 4636 |
</div>
|
| 4637 |
</div>
|
|
@@ -4645,7 +4645,7 @@ Installed 26 packages in 450ms
|
|
| 4645 |
<span onclick="toggleOutput('yamoe_run')" style="cursor: pointer;">▼ output</span>
|
| 4646 |
<span id="uv-indicator-yamoe_run" onclick="toggleUvLogsFromHeader('yamoe_run')" style="cursor: pointer;">▶ uv-logs</span>
|
| 4647 |
</span> |
|
| 4648 |
-
Cell: yamoe_run | deps: torch, kernels, numpy |
|
| 4649 |
| <button class="run-btn" onclick="runCell('yamoe_run')">▶ run</button>
|
| 4650 |
<button class="copy-btn" onclick="copyCell('yamoe_run')">Copy</button>
|
| 4651 |
<a href="cells/yamoe_run.py" target="_blank" class="raw-btn">Raw</a>
|
|
@@ -4916,7 +4916,7 @@ Cell: yamoe_run | deps: torch, kernels, numpy | 39.19s
|
|
| 4916 |
</div>
|
| 4917 |
</div>
|
| 4918 |
<div id="output-yamoe_run" class="cell-output">
|
| 4919 |
-
<div class="cell-stdout">Loading weights from: /repo/moe_benchmarks/megablocks_yamoe/.uvnote/cache/
|
| 4920 |
Loaded shared weights from artifacts
|
| 4921 |
Router weight sum: 12.588732
|
| 4922 |
Gate/up sum: 1026.601807
|
|
@@ -4939,9 +4939,9 @@ Input Variation: +0.001 * iteration (deterministic)
|
|
| 4939 |
Warming up (10 iterations)...
|
| 4940 |
Benchmarking (50 iterations)...
|
| 4941 |
Progress: 20% complete (avg: 4.253 ms)
|
| 4942 |
-
Progress: 40% complete (avg: 4.
|
| 4943 |
-
Progress: 60% complete (avg: 4.
|
| 4944 |
-
Progress: 80% complete (avg: 4.
|
| 4945 |
|
| 4946 |
Output tensors:
|
| 4947 |
Primary: shape=(1, 100, 1152), dtype=torch.float32, device=cuda:0, range=[-0.049506, 0.054984], mean=0.000034, std=0.006508, norm=2.208791
|
|
@@ -4951,19 +4951,19 @@ Output tensors:
|
|
| 4951 |
Iterations: 50
|
| 4952 |
|
| 4953 |
Latency Statistics:
|
| 4954 |
-
Average: 4.
|
| 4955 |
-
Min: 4.
|
| 4956 |
-
Max: 4.
|
| 4957 |
-
Std Dev: 0.
|
| 4958 |
|
| 4959 |
Percentiles:
|
| 4960 |
P50 (median): 4.254 ms
|
| 4961 |
-
P95: 4.
|
| 4962 |
-
P99: 4.
|
| 4963 |
|
| 4964 |
Throughput:
|
| 4965 |
-
Tokens/sec:
|
| 4966 |
-
Std Dev:
|
| 4967 |
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
| 4968 |
|
| 4969 |
Saved benchmark results to yamoe_results.json
|
|
@@ -4973,25 +4973,25 @@ Output sum: 3.971905
|
|
| 4973 |
<div class="uv-install-logs" id="uv-logs-yamoe_run">
|
| 4974 |
<div class="uv-logs-header" onclick="toggleUvLogs(this)">▶ UV Install Logs</div>
|
| 4975 |
<div class="uv-logs-content" style="display: none;">
|
| 4976 |
-
Downloading nvidia-
|
|
|
|
|
|
|
|
|
|
| 4977 |
Downloading networkx (1.9MiB)
|
| 4978 |
-
Downloading nvidia-
|
|
|
|
| 4979 |
Downloading sympy (6.0MiB)
|
|
|
|
| 4980 |
Downloading nvidia-cublas-cu12 (566.8MiB)
|
| 4981 |
-
Downloading
|
| 4982 |
-
Downloading nvidia-cudnn-cu12 (674.0MiB)
|
| 4983 |
Downloading nvidia-cufft-cu12 (184.2MiB)
|
| 4984 |
-
Downloading
|
| 4985 |
-
Downloading setuptools (1.1MiB)
|
| 4986 |
Downloading nvidia-cusolver-cu12 (255.1MiB)
|
| 4987 |
-
Downloading nvidia-
|
| 4988 |
-
Downloading nvidia-
|
| 4989 |
-
Downloading nvidia-cuda-cupti-cu12 (9.8MiB)
|
| 4990 |
-
Downloading hf-xet (3.0MiB)
|
| 4991 |
Downloading nvidia-cuda-nvrtc-cu12 (84.0MiB)
|
|
|
|
| 4992 |
Downloading nvidia-nvjitlink-cu12 (37.4MiB)
|
| 4993 |
-
Downloading torch (846.9MiB)
|
| 4994 |
-
Downloading triton (148.3MiB)
|
| 4995 |
Downloading nvidia-cufile-cu12
|
| 4996 |
Downloading hf-xet
|
| 4997 |
Downloading setuptools
|
|
@@ -5011,13 +5011,14 @@ Downloading triton (148.3MiB)
|
|
| 5011 |
Downloading nvidia-cublas-cu12
|
| 5012 |
Downloading nvidia-cudnn-cu12
|
| 5013 |
Downloading torch
|
| 5014 |
-
Installed 37 packages in
|
| 5015 |
</div>
|
| 5016 |
</div>
|
| 5017 |
<div class="cell-stderr">Fetching 6 files: 0%| | 0/6 [00:00<?, ?it/s]
|
| 5018 |
-
Fetching 6 files: 17%|█▋ | 1/6 [00:00<00:01, 3.
|
| 5019 |
-
Fetching 6 files:
|
| 5020 |
-
Fetching 6 files:
|
|
|
|
| 5021 |
<div class="cell-artifacts">
|
| 5022 |
<h4>Artifacts:</h4>
|
| 5023 |
<a href="artifacts/yamoe_run/yamoe_results.json" class="artifact" target="_blank">yamoe_results.json</a>
|
|
@@ -5034,7 +5035,7 @@ Fetching 6 files: 100%|██████████| 6/6 [00:00<00:00, 7.5
|
|
| 5034 |
<span onclick="toggleOutput('binned_run')" style="cursor: pointer;">▼ output</span>
|
| 5035 |
<span id="uv-indicator-binned_run" onclick="toggleUvLogsFromHeader('binned_run')" style="cursor: pointer;">▶ uv-logs</span>
|
| 5036 |
</span> |
|
| 5037 |
-
Cell: binned_run | deps: torch, numpy | 39.
|
| 5038 |
| <button class="run-btn" onclick="runCell('binned_run')">▶ run</button>
|
| 5039 |
<button class="copy-btn" onclick="copyCell('binned_run')">Copy</button>
|
| 5040 |
<a href="cells/binned_run.py" target="_blank" class="raw-btn">Raw</a>
|
|
@@ -5448,10 +5449,10 @@ Input Variation: +0.001 * iteration (deterministic)
|
|
| 5448 |
|
| 5449 |
Warming up (10 iterations)...
|
| 5450 |
Benchmarking (50 iterations)...
|
| 5451 |
-
Progress: 20% complete (avg:
|
| 5452 |
-
Progress: 40% complete (avg: 37.
|
| 5453 |
-
Progress: 60% complete (avg:
|
| 5454 |
-
Progress: 80% complete (avg:
|
| 5455 |
|
| 5456 |
Output tensors:
|
| 5457 |
Primary: shape=(1, 100, 1152), dtype=torch.float32, device=cuda:0, range=[-0.049506, 0.054984], mean=0.000034, std=0.006508, norm=2.208791
|
|
@@ -5461,19 +5462,19 @@ Output tensors:
|
|
| 5461 |
Iterations: 50
|
| 5462 |
|
| 5463 |
Latency Statistics:
|
| 5464 |
-
Average: 36.
|
| 5465 |
-
Min:
|
| 5466 |
-
Max:
|
| 5467 |
-
Std Dev: 1.
|
| 5468 |
|
| 5469 |
Percentiles:
|
| 5470 |
-
P50 (median): 36.
|
| 5471 |
-
P95:
|
| 5472 |
-
P99:
|
| 5473 |
|
| 5474 |
Throughput:
|
| 5475 |
-
Tokens/sec:
|
| 5476 |
-
Std Dev:
|
| 5477 |
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
| 5478 |
|
| 5479 |
Saved benchmark results to binned_results.json
|
|
@@ -5483,23 +5484,23 @@ Output sum: 3.971905
|
|
| 5483 |
<div class="uv-install-logs" id="uv-logs-binned_run">
|
| 5484 |
<div class="uv-logs-header" onclick="toggleUvLogs(this)">▶ UV Install Logs</div>
|
| 5485 |
<div class="uv-logs-content" style="display: none;">
|
| 5486 |
-
Downloading
|
|
|
|
|
|
|
|
|
|
| 5487 |
Downloading nvidia-cusolver-cu12 (255.1MiB)
|
| 5488 |
Downloading networkx (1.9MiB)
|
| 5489 |
-
Downloading
|
| 5490 |
-
Downloading nvidia-curand-cu12 (60.7MiB)
|
| 5491 |
Downloading nvidia-nvjitlink-cu12 (37.4MiB)
|
| 5492 |
-
Downloading nvidia-cublas-cu12 (566.8MiB)
|
| 5493 |
-
Downloading nvidia-cufile-cu12 (1.1MiB)
|
| 5494 |
Downloading nvidia-nccl-cu12 (307.4MiB)
|
| 5495 |
-
Downloading nvidia-
|
| 5496 |
-
Downloading torch (846.9MiB)
|
| 5497 |
-
Downloading nvidia-cuda-nvrtc-cu12 (84.0MiB)
|
| 5498 |
-
Downloading nvidia-cusparselt-cu12 (273.9MiB)
|
| 5499 |
-
Downloading setuptools (1.1MiB)
|
| 5500 |
Downloading numpy (16.2MiB)
|
| 5501 |
-
Downloading nvidia-
|
|
|
|
|
|
|
| 5502 |
Downloading triton (148.3MiB)
|
|
|
|
|
|
|
| 5503 |
Downloading nvidia-cufft-cu12 (184.2MiB)
|
| 5504 |
Downloading nvidia-cufile-cu12
|
| 5505 |
Downloading setuptools
|
|
@@ -5513,13 +5514,13 @@ Downloading nvidia-cufft-cu12 (184.2MiB)
|
|
| 5513 |
Downloading triton
|
| 5514 |
Downloading nvidia-cufft-cu12
|
| 5515 |
Downloading nvidia-cusolver-cu12
|
| 5516 |
-
Downloading nvidia-cusparse-cu12
|
| 5517 |
Downloading nvidia-cusparselt-cu12
|
|
|
|
| 5518 |
Downloading nvidia-nccl-cu12
|
| 5519 |
Downloading nvidia-cublas-cu12
|
| 5520 |
Downloading nvidia-cudnn-cu12
|
| 5521 |
Downloading torch
|
| 5522 |
-
Installed 26 packages in
|
| 5523 |
</div>
|
| 5524 |
</div>
|
| 5525 |
<div class="cell-artifacts">
|
|
@@ -5538,7 +5539,7 @@ Installed 26 packages in 449ms
|
|
| 5538 |
<span onclick="toggleOutput('gptoss_run')" style="cursor: pointer;">▼ output</span>
|
| 5539 |
<span id="uv-indicator-gptoss_run" onclick="toggleUvLogsFromHeader('gptoss_run')" style="cursor: pointer;">▶ uv-logs</span>
|
| 5540 |
</span> |
|
| 5541 |
-
Cell: gptoss_run | deps: torch, numpy | 39.
|
| 5542 |
| <button class="run-btn" onclick="runCell('gptoss_run')">▶ run</button>
|
| 5543 |
<button class="copy-btn" onclick="copyCell('gptoss_run')">Copy</button>
|
| 5544 |
<a href="cells/gptoss_run.py" target="_blank" class="raw-btn">Raw</a>
|
|
@@ -5856,10 +5857,10 @@ Input Variation: +0.001 * iteration (deterministic)
|
|
| 5856 |
|
| 5857 |
Warming up (10 iterations)...
|
| 5858 |
Benchmarking (50 iterations)...
|
| 5859 |
-
Progress: 20% complete (avg: 48.
|
| 5860 |
-
Progress: 40% complete (avg:
|
| 5861 |
-
Progress: 60% complete (avg: 47.
|
| 5862 |
-
Progress: 80% complete (avg: 46.
|
| 5863 |
|
| 5864 |
Output tensors:
|
| 5865 |
Primary: shape=(1, 100, 1152), dtype=torch.float32, device=cuda:0, range=[-0.064982, 0.061193], mean=0.000100, std=0.013510, norm=4.585560
|
|
@@ -5869,19 +5870,19 @@ Output tensors:
|
|
| 5869 |
Iterations: 50
|
| 5870 |
|
| 5871 |
Latency Statistics:
|
| 5872 |
-
Average: 45.
|
| 5873 |
-
Min:
|
| 5874 |
-
Max: 49.
|
| 5875 |
-
Std Dev:
|
| 5876 |
|
| 5877 |
Percentiles:
|
| 5878 |
-
P50 (median):
|
| 5879 |
-
P95:
|
| 5880 |
-
P99: 49.
|
| 5881 |
|
| 5882 |
Throughput:
|
| 5883 |
-
Tokens/sec:
|
| 5884 |
-
Std Dev:
|
| 5885 |
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
| 5886 |
|
| 5887 |
Saved benchmark results to gptoss_results.json
|
|
@@ -5891,24 +5892,24 @@ Output sum: 11.532237
|
|
| 5891 |
<div class="uv-install-logs" id="uv-logs-gptoss_run">
|
| 5892 |
<div class="uv-logs-header" onclick="toggleUvLogs(this)">▶ UV Install Logs</div>
|
| 5893 |
<div class="uv-logs-content" style="display: none;">
|
| 5894 |
-
Downloading
|
| 5895 |
-
Downloading setuptools (1.1MiB)
|
| 5896 |
-
Downloading nvidia-nccl-cu12 (307.4MiB)
|
| 5897 |
Downloading nvidia-cusparse-cu12 (274.9MiB)
|
| 5898 |
-
Downloading
|
| 5899 |
-
Downloading nvidia-
|
| 5900 |
-
Downloading nvidia-cufft-cu12 (184.2MiB)
|
| 5901 |
Downloading nvidia-cusparselt-cu12 (273.9MiB)
|
| 5902 |
Downloading nvidia-cuda-cupti-cu12 (9.8MiB)
|
| 5903 |
-
Downloading nvidia-
|
|
|
|
|
|
|
|
|
|
| 5904 |
Downloading nvidia-cufile-cu12 (1.1MiB)
|
|
|
|
| 5905 |
Downloading nvidia-cublas-cu12 (566.8MiB)
|
|
|
|
| 5906 |
Downloading nvidia-cusolver-cu12 (255.1MiB)
|
| 5907 |
Downloading nvidia-cuda-nvrtc-cu12 (84.0MiB)
|
| 5908 |
-
Downloading nvidia-cudnn-cu12 (674.0MiB)
|
| 5909 |
-
Downloading numpy (16.2MiB)
|
| 5910 |
-
Downloading triton (148.3MiB)
|
| 5911 |
Downloading torch (846.9MiB)
|
|
|
|
| 5912 |
Downloading nvidia-cufile-cu12
|
| 5913 |
Downloading setuptools
|
| 5914 |
Downloading networkx
|
|
@@ -5921,13 +5922,13 @@ Downloading torch (846.9MiB)
|
|
| 5921 |
Downloading triton
|
| 5922 |
Downloading nvidia-cufft-cu12
|
| 5923 |
Downloading nvidia-cusolver-cu12
|
| 5924 |
-
Downloading nvidia-cusparse-cu12
|
| 5925 |
Downloading nvidia-cusparselt-cu12
|
|
|
|
| 5926 |
Downloading nvidia-nccl-cu12
|
| 5927 |
Downloading nvidia-cublas-cu12
|
| 5928 |
Downloading nvidia-cudnn-cu12
|
| 5929 |
Downloading torch
|
| 5930 |
-
Installed 26 packages in
|
| 5931 |
</div>
|
| 5932 |
</div>
|
| 5933 |
<div class="cell-artifacts">
|
|
@@ -5946,7 +5947,7 @@ Installed 26 packages in 453ms
|
|
| 5946 |
<span onclick="toggleOutput('gptoss_training_run')" style="cursor: pointer;">▼ output</span>
|
| 5947 |
<span id="uv-indicator-gptoss_training_run" onclick="toggleUvLogsFromHeader('gptoss_training_run')" style="cursor: pointer;">▶ uv-logs</span>
|
| 5948 |
</span> |
|
| 5949 |
-
Cell: gptoss_training_run | deps: torch, numpy |
|
| 5950 |
| <button class="run-btn" onclick="runCell('gptoss_training_run')">▶ run</button>
|
| 5951 |
<button class="copy-btn" onclick="copyCell('gptoss_training_run')">Copy</button>
|
| 5952 |
<a href="cells/gptoss_training_run.py" target="_blank" class="raw-btn">Raw</a>
|
|
@@ -6247,10 +6248,10 @@ Input Variation: +0.001 * iteration (deterministic)
|
|
| 6247 |
|
| 6248 |
Warming up (10 iterations)...
|
| 6249 |
Benchmarking (50 iterations)...
|
| 6250 |
-
Progress: 20% complete (avg: 49.
|
| 6251 |
-
Progress: 40% complete (avg:
|
| 6252 |
-
Progress: 60% complete (avg:
|
| 6253 |
-
Progress: 80% complete (avg:
|
| 6254 |
|
| 6255 |
Output tensors:
|
| 6256 |
Primary: shape=(1, 100, 1152), dtype=torch.float32, device=cuda:0, range=[-0.064982, 0.061193], mean=0.000100, std=0.013510, norm=4.585560
|
|
@@ -6260,19 +6261,19 @@ Output tensors:
|
|
| 6260 |
Iterations: 50
|
| 6261 |
|
| 6262 |
Latency Statistics:
|
| 6263 |
-
Average:
|
| 6264 |
-
Min:
|
| 6265 |
-
Max:
|
| 6266 |
-
Std Dev:
|
| 6267 |
|
| 6268 |
Percentiles:
|
| 6269 |
-
P50 (median):
|
| 6270 |
-
P95: 50.
|
| 6271 |
-
P99:
|
| 6272 |
|
| 6273 |
Throughput:
|
| 6274 |
-
Tokens/sec:
|
| 6275 |
-
Std Dev:
|
| 6276 |
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
| 6277 |
|
| 6278 |
Saved benchmark results to gptoss_training_results.json
|
|
@@ -6282,24 +6283,24 @@ Output sum: 11.532237
|
|
| 6282 |
<div class="uv-install-logs" id="uv-logs-gptoss_training_run">
|
| 6283 |
<div class="uv-logs-header" onclick="toggleUvLogs(this)">▶ UV Install Logs</div>
|
| 6284 |
<div class="uv-logs-content" style="display: none;">
|
| 6285 |
-
Downloading setuptools (1.1MiB)
|
| 6286 |
Downloading sympy (6.0MiB)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6287 |
Downloading numpy (16.2MiB)
|
|
|
|
| 6288 |
Downloading networkx (1.9MiB)
|
| 6289 |
-
Downloading
|
| 6290 |
-
Downloading nvidia-cufft-cu12 (184.2MiB)
|
| 6291 |
Downloading nvidia-cusparselt-cu12 (273.9MiB)
|
| 6292 |
Downloading nvidia-cuda-cupti-cu12 (9.8MiB)
|
| 6293 |
-
Downloading torch (846.9MiB)
|
| 6294 |
Downloading nvidia-cudnn-cu12 (674.0MiB)
|
| 6295 |
-
Downloading nvidia-
|
| 6296 |
-
Downloading nvidia-cusolver-cu12 (255.1MiB)
|
| 6297 |
Downloading nvidia-nccl-cu12 (307.4MiB)
|
| 6298 |
-
Downloading nvidia-
|
| 6299 |
-
Downloading nvidia-curand-cu12 (60.7MiB)
|
| 6300 |
Downloading nvidia-nvjitlink-cu12 (37.4MiB)
|
| 6301 |
-
Downloading nvidia-cublas-cu12 (566.8MiB)
|
| 6302 |
Downloading triton (148.3MiB)
|
|
|
|
| 6303 |
Downloading nvidia-cufile-cu12
|
| 6304 |
Downloading setuptools
|
| 6305 |
Downloading networkx
|
|
@@ -6318,7 +6319,7 @@ Downloading triton (148.3MiB)
|
|
| 6318 |
Downloading nvidia-cublas-cu12
|
| 6319 |
Downloading nvidia-cudnn-cu12
|
| 6320 |
Downloading torch
|
| 6321 |
-
Installed 26 packages in
|
| 6322 |
</div>
|
| 6323 |
</div>
|
| 6324 |
<div class="cell-artifacts">
|
|
@@ -6330,14 +6331,14 @@ Installed 26 packages in 448ms
|
|
| 6330 |
|
| 6331 |
<h2>MegaBlocks Implementation</h2>
|
| 6332 |
<p>This section runs the MegaBlocks MoE implementation with optimized kernels from the Hugging Face hub.</p>
|
| 6333 |
-
<div class="cell
|
| 6334 |
<div class="cell-header">
|
| 6335 |
<span class="collapse-indicators">
|
| 6336 |
<span onclick="toggleCode('megablocks_run')" style="cursor: pointer;">▼ code</span>
|
| 6337 |
<span onclick="toggleOutput('megablocks_run')" style="cursor: pointer;">▼ output</span>
|
| 6338 |
<span id="uv-indicator-megablocks_run" onclick="toggleUvLogsFromHeader('megablocks_run')" style="cursor: pointer;">▶ uv-logs</span>
|
| 6339 |
</span> |
|
| 6340 |
-
Cell: megablocks_run | deps: torch, numpy, kernels |
|
| 6341 |
| <button class="run-btn" onclick="runCell('megablocks_run')">▶ run</button>
|
| 6342 |
<button class="copy-btn" onclick="copyCell('megablocks_run')">Copy</button>
|
| 6343 |
<a href="cells/megablocks_run.py" target="_blank" class="raw-btn">Raw</a>
|
|
@@ -6544,7 +6545,7 @@ Cell: megablocks_run | deps: torch, numpy, kernels | 40.58s | FAILED
|
|
| 6544 |
</div>
|
| 6545 |
</div>
|
| 6546 |
<div id="output-megablocks_run" class="cell-output">
|
| 6547 |
-
<div class="cell-stdout">Loading weights from: /repo/moe_benchmarks/megablocks_yamoe/.uvnote/cache/
|
| 6548 |
Loaded shared weights from artifacts
|
| 6549 |
Router weight sum: 12.588732
|
| 6550 |
Gate/up sum: 1026.601807
|
|
@@ -6565,29 +6566,61 @@ Base Input: shape=(1, 100, 1152), dtype=torch.float32, device=cuda:0, range=[-0.
|
|
| 6565 |
Input Variation: +0.001 * iteration (deterministic)
|
| 6566 |
|
| 6567 |
Warming up (10 iterations)...
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6568 |
</div>
|
| 6569 |
<div class="uv-install-logs" id="uv-logs-megablocks_run">
|
| 6570 |
<div class="uv-logs-header" onclick="toggleUvLogs(this)">▶ UV Install Logs</div>
|
| 6571 |
<div class="uv-logs-content" style="display: none;">
|
| 6572 |
-
Downloading
|
| 6573 |
-
Downloading
|
| 6574 |
-
Downloading nvidia-
|
| 6575 |
-
Downloading numpy (16.2MiB)
|
| 6576 |
Downloading nvidia-cufile-cu12 (1.1MiB)
|
| 6577 |
-
Downloading nvidia-cudnn-cu12 (674.0MiB)
|
| 6578 |
-
Downloading nvidia-cusolver-cu12 (255.1MiB)
|
| 6579 |
-
Downloading nvidia-cuda-cupti-cu12 (9.8MiB)
|
| 6580 |
-
Downloading triton (148.3MiB)
|
| 6581 |
Downloading nvidia-cuda-nvrtc-cu12 (84.0MiB)
|
| 6582 |
-
Downloading nvidia-
|
| 6583 |
Downloading nvidia-cusparse-cu12 (274.9MiB)
|
| 6584 |
-
Downloading nvidia-
|
| 6585 |
-
Downloading
|
| 6586 |
-
Downloading
|
| 6587 |
-
Downloading nvidia-cufft-cu12 (184.2MiB)
|
| 6588 |
Downloading hf-xet (3.0MiB)
|
| 6589 |
Downloading nvidia-cublas-cu12 (566.8MiB)
|
| 6590 |
-
Downloading
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6591 |
Downloading nvidia-cufile-cu12
|
| 6592 |
Downloading hf-xet
|
| 6593 |
Downloading setuptools
|
|
@@ -6607,122 +6640,316 @@ Downloading networkx (1.9MiB)
|
|
| 6607 |
Downloading nvidia-cublas-cu12
|
| 6608 |
Downloading nvidia-cudnn-cu12
|
| 6609 |
Downloading torch
|
| 6610 |
-
Installed 37 packages in
|
| 6611 |
</div>
|
| 6612 |
</div>
|
| 6613 |
<div class="cell-stderr">Fetching 66 files: 0%| | 0/66 [00:00<?, ?it/s]
|
| 6614 |
-
Fetching 66 files: 2%|▏ | 1/66 [00:00<00:
|
| 6615 |
-
Fetching 66 files:
|
| 6616 |
-
Fetching 66 files:
|
| 6617 |
-
Fetching 66 files:
|
| 6618 |
-
Fetching 66 files:
|
| 6619 |
-
Fetching 66 files:
|
| 6620 |
-
Fetching 66 files:
|
| 6621 |
-
Fetching 66 files:
|
| 6622 |
-
Fetching 66 files: 100%|██████████| 66/66 [00:
|
| 6623 |
-
|
| 6624 |
-
|
| 6625 |
-
|
| 6626 |
-
|
| 6627 |
-
Traceback (most recent call last):
|
| 6628 |
-
File "/repo/moe_benchmarks/megablocks_yamoe/.uvnote/cells/megablocks_run.py", line 102, in <module>
|
| 6629 |
-
output, stats = bench(model, x)
|
| 6630 |
-
^^^^^^^^^^^^^^^
|
| 6631 |
-
File "/repo/moe_benchmarks/megablocks_yamoe/.uvnote/cells/bench_utils.py", line 189, in runner
|
| 6632 |
-
result, times_s = _bench_engine(call, warmup=warmup, iters=iters, device=device, dtype=dtype, input_gen=input_gen)
|
| 6633 |
-
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
| 6634 |
-
File "/repo/moe_benchmarks/megablocks_yamoe/.uvnote/cells/bench_utils.py", line 96, in _bench_engine
|
| 6635 |
-
_ = call(input_gen())
|
| 6636 |
-
^^^^^^^^^^^^^^^^^
|
| 6637 |
-
File "/repo/moe_benchmarks/megablocks_yamoe/.uvnote/cells/bench_utils.py", line 177, in <lambda>
|
| 6638 |
-
call = lambda x: fn(x, *args[1:], **kwargs)
|
| 6639 |
-
^^^^^^^^^^^^^^^^^^^^^^^^^^
|
| 6640 |
-
File "/tmp/uvnote-run-4n1mby1e/home/.cache/uv/environments-v2/megablocks-run-8802ebf6d3566120/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl
|
| 6641 |
-
return self._call_impl(*args, **kwargs)
|
| 6642 |
-
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
| 6643 |
-
File "/tmp/uvnote-run-4n1mby1e/home/.cache/uv/environments-v2/megablocks-run-8802ebf6d3566120/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl
|
| 6644 |
-
return forward_call(*args, **kwargs)
|
| 6645 |
-
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
| 6646 |
-
File "/repo/moe_benchmarks/megablocks_yamoe/.uvnote/cells/megablocks_run.py", line 81, in forward
|
| 6647 |
-
output, dummy_routing_weights = self.model(hidden_states)
|
| 6648 |
-
^^^^^^^^^^^^^^^^^^^^^^^^^
|
| 6649 |
-
File "/tmp/uvnote-run-4n1mby1e/home/.cache/uv/environments-v2/megablocks-run-8802ebf6d3566120/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl
|
| 6650 |
-
return self._call_impl(*args, **kwargs)
|
| 6651 |
-
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
| 6652 |
-
File "/tmp/uvnote-run-4n1mby1e/home/.cache/uv/environments-v2/megablocks-run-8802ebf6d3566120/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl
|
| 6653 |
-
return forward_call(*args, **kwargs)
|
| 6654 |
-
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
| 6655 |
-
File "/tmp/uvnote-run-4n1mby1e/home/.cache/huggingface/hub/models--kernels-community--megablocks/snapshots/e0fb1437de3f8d7079c4da13be8cb64dc0cfcdd5/build/torch28-cxx11-cu128-x86_64-linux/megablocks/layers.py", line 896, in forward
|
| 6656 |
-
output, expert_weights_out, *_ = moe_forward(
|
| 6657 |
-
^^^^^^^^^^^^
|
| 6658 |
-
File "/tmp/uvnote-run-4n1mby1e/home/.cache/huggingface/hub/models--kernels-community--megablocks/snapshots/e0fb1437de3f8d7079c4da13be8cb64dc0cfcdd5/build/torch28-cxx11-cu128-x86_64-linux/megablocks/layers.py", line 730, in moe_forward
|
| 6659 |
-
x, tokens_per_expert = forward_fn(**forward_args)
|
| 6660 |
-
^^^^^^^^^^^^^^^^^^^^^^^^^^
|
| 6661 |
-
File "/tmp/uvnote-run-4n1mby1e/home/.cache/huggingface/hub/models--kernels-community--megablocks/snapshots/e0fb1437de3f8d7079c4da13be8cb64dc0cfcdd5/build/torch28-cxx11-cu128-x86_64-linux/megablocks/layers.py", line 457, in forward_once
|
| 6662 |
-
x = permute_and_compute(
|
| 6663 |
-
^^^^^^^^^^^^^^^^^^^^
|
| 6664 |
-
File "/tmp/uvnote-run-4n1mby1e/home/.cache/huggingface/hub/models--kernels-community--megablocks/snapshots/e0fb1437de3f8d7079c4da13be8cb64dc0cfcdd5/build/torch28-cxx11-cu128-x86_64-linux/megablocks/layers.py", line 401, in permute_and_compute
|
| 6665 |
-
x = ops.binned_gather(x, indices, bins, expert_capacity, top_k)
|
| 6666 |
-
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
| 6667 |
-
File "/tmp/uvnote-run-4n1mby1e/home/.cache/uv/environments-v2/megablocks-run-8802ebf6d3566120/lib/python3.11/site-packages/torch/autograd/function.py", line 576, in apply
|
| 6668 |
-
return super().apply(*args, **kwargs) # type: ignore[misc]
|
| 6669 |
-
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
| 6670 |
-
File "/tmp/uvnote-run-4n1mby1e/home/.cache/huggingface/hub/models--kernels-community--megablocks/snapshots/e0fb1437de3f8d7079c4da13be8cb64dc0cfcdd5/build/torch28-cxx11-cu128-x86_64-linux/megablocks/ops/stk_autocast.py", line 30, in decorate_fwd
|
| 6671 |
-
return fwd(*args, **kwargs)
|
| 6672 |
-
^^^^^^^^^^^^^^^^^^^^
|
| 6673 |
-
File "/tmp/uvnote-run-4n1mby1e/home/.cache/huggingface/hub/models--kernels-community--megablocks/snapshots/e0fb1437de3f8d7079c4da13be8cb64dc0cfcdd5/build/torch28-cxx11-cu128-x86_64-linux/megablocks/ops/binned_gather.py", line 26, in forward
|
| 6674 |
-
return kernels.binned_gather(x, indices, None, bins, bin_size, top_k)
|
| 6675 |
-
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
| 6676 |
-
File "/tmp/uvnote-run-4n1mby1e/home/.cache/huggingface/hub/models--kernels-community--megablocks/snapshots/e0fb1437de3f8d7079c4da13be8cb64dc0cfcdd5/build/torch28-cxx11-cu128-x86_64-linux/megablocks/backend/kernels.py", line 419, in binned_gather
|
| 6677 |
-
_binned_copy[(num_experts, expert_capacity)](
|
| 6678 |
-
File "/tmp/uvnote-run-4n1mby1e/home/.cache/uv/environments-v2/megablocks-run-8802ebf6d3566120/lib/python3.11/site-packages/triton/runtime/jit.py", line 390, in <lambda>
|
| 6679 |
-
return lambda *args, **kwargs: self.run(grid=grid, warmup=False, *args, **kwargs)
|
| 6680 |
-
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
| 6681 |
-
File "/tmp/uvnote-run-4n1mby1e/home/.cache/uv/environments-v2/megablocks-run-8802ebf6d3566120/lib/python3.11/site-packages/triton/runtime/autotuner.py", line 239, in run
|
| 6682 |
-
benchmark()
|
| 6683 |
-
File "/tmp/uvnote-run-4n1mby1e/home/.cache/uv/environments-v2/megablocks-run-8802ebf6d3566120/lib/python3.11/site-packages/triton/runtime/autotuner.py", line 228, in benchmark
|
| 6684 |
-
timings = {config: self._bench(*args, config=config, **kwargs) for config in pruned_configs}
|
| 6685 |
-
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
| 6686 |
-
File "/tmp/uvnote-run-4n1mby1e/home/.cache/uv/environments-v2/megablocks-run-8802ebf6d3566120/lib/python3.11/site-packages/triton/runtime/autotuner.py", line 228, in <dictcomp>
|
| 6687 |
-
timings = {config: self._bench(*args, config=config, **kwargs) for config in pruned_configs}
|
| 6688 |
-
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
| 6689 |
-
File "/tmp/uvnote-run-4n1mby1e/home/.cache/uv/environments-v2/megablocks-run-8802ebf6d3566120/lib/python3.11/site-packages/triton/runtime/autotuner.py", line 160, in _bench
|
| 6690 |
-
return self.do_bench(kernel_call, quantiles=(0.5, 0.2, 0.8))
|
| 6691 |
-
^^^^^^^^^^^^^
|
| 6692 |
-
File "/usr/lib/python3.11/functools.py", line 1001, in __get__
|
| 6693 |
-
val = self.func(instance)
|
| 6694 |
-
^^^^^^^^^^^^^^^^^^^
|
| 6695 |
-
File "/tmp/uvnote-run-4n1mby1e/home/.cache/uv/environments-v2/megablocks-run-8802ebf6d3566120/lib/python3.11/site-packages/triton/runtime/autotuner.py", line 121, in do_bench
|
| 6696 |
-
return driver.active.get_benchmarker()
|
| 6697 |
-
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
| 6698 |
-
File "/tmp/uvnote-run-4n1mby1e/home/.cache/uv/environments-v2/megablocks-run-8802ebf6d3566120/lib/python3.11/site-packages/triton/runtime/driver.py", line 30, in __getattr__
|
| 6699 |
-
return getattr(self._initialize_obj(), name)
|
| 6700 |
-
^^^^^^^^^^^^^^^^^^^^^^
|
| 6701 |
-
File "/tmp/uvnote-run-4n1mby1e/home/.cache/uv/environments-v2/megablocks-run-8802ebf6d3566120/lib/python3.11/site-packages/triton/runtime/driver.py", line 26, in _initialize_obj
|
| 6702 |
-
self._obj = self._init_fn()
|
| 6703 |
-
^^^^^^^^^^^^^^^
|
| 6704 |
-
File "/tmp/uvnote-run-4n1mby1e/home/.cache/uv/environments-v2/megablocks-run-8802ebf6d3566120/lib/python3.11/site-packages/triton/runtime/driver.py", line 12, in _create_driver
|
| 6705 |
-
return active_drivers[0]()
|
| 6706 |
-
^^^^^^^^^^^^^^^^^^^
|
| 6707 |
-
File "/tmp/uvnote-run-4n1mby1e/home/.cache/uv/environments-v2/megablocks-run-8802ebf6d3566120/lib/python3.11/site-packages/triton/backends/nvidia/driver.py", line 715, in __init__
|
| 6708 |
-
self.utils = CudaUtils() # TODO: make static
|
| 6709 |
-
^^^^^^^^^^^
|
| 6710 |
-
File "/tmp/uvnote-run-4n1mby1e/home/.cache/uv/environments-v2/megablocks-run-8802ebf6d3566120/lib/python3.11/site-packages/triton/backends/nvidia/driver.py", line 62, in __init__
|
| 6711 |
-
mod = compile_module_from_src(
|
| 6712 |
-
^^^^^^^^^^^^^^^^^^^^^^^^
|
| 6713 |
-
File "/tmp/uvnote-run-4n1mby1e/home/.cache/uv/environments-v2/megablocks-run-8802ebf6d3566120/lib/python3.11/site-packages/triton/runtime/build.py", line 88, in compile_module_from_src
|
| 6714 |
-
so = _build(name, src_path, tmpdir, library_dirs or [], include_dirs or [], libraries or [])
|
| 6715 |
-
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
| 6716 |
-
File "/tmp/uvnote-run-4n1mby1e/home/.cache/uv/environments-v2/megablocks-run-8802ebf6d3566120/lib/python3.11/site-packages/triton/runtime/build.py", line 51, in _build
|
| 6717 |
-
subprocess.check_call(cc_cmd, stdout=subprocess.DEVNULL)
|
| 6718 |
-
File "/usr/lib/python3.11/subprocess.py", line 413, in check_call
|
| 6719 |
-
raise CalledProcessError(retcode, cmd)
|
| 6720 |
-
subprocess.CalledProcessError: Command '['/usr/bin/gcc', '/tmp/tmpsyirxqys/cuda_utils.c', '-O3', '-shared', '-fPIC', '-Wno-psabi', '-o', '/tmp/tmpsyirxqys/cuda_utils.cpython-311-x86_64-linux-gnu.so', '-lcuda', '-L/tmp/uvnote-run-4n1mby1e/home/.cache/uv/environments-v2/megablocks-run-8802ebf6d3566120/lib/python3.11/site-packages/triton/backends/nvidia/lib', '-L/usr/lib/x86_64-linux-gnu', '-I/tmp/uvnote-run-4n1mby1e/home/.cache/uv/environments-v2/megablocks-run-8802ebf6d3566120/lib/python3.11/site-packages/triton/backends/nvidia/include', '-I/tmp/tmpsyirxqys', '-I/usr/include/python3.11']' returned non-zero exit status 1.</div>
|
| 6721 |
</div>
|
| 6722 |
</div>
|
| 6723 |
|
| 6724 |
<h2>Performance Visualization</h2>
|
| 6725 |
<p>This section reads all benchmark results and creates a comprehensive performance comparison chart.</p>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6726 |
</div>
|
| 6727 |
|
| 6728 |
</body>
|
|
|
|
| 3720 |
<span onclick="toggleOutput('utils')" style="cursor: pointer;">▼ output</span>
|
| 3721 |
<span id="uv-indicator-utils" onclick="toggleUvLogsFromHeader('utils')" style="cursor: pointer;">▶ uv-logs</span>
|
| 3722 |
</span> |
|
| 3723 |
+
Cell: utils | deps: torch, numpy | 35.49s
|
| 3724 |
| <button class="run-btn" onclick="runCell('utils')">▶ run</button>
|
| 3725 |
<button class="copy-btn" onclick="copyCell('utils')">Copy</button>
|
| 3726 |
<a href="cells/utils.py" target="_blank" class="raw-btn">Raw</a>
|
|
|
|
| 3794 |
<div class="uv-install-logs" id="uv-logs-utils">
|
| 3795 |
<div class="uv-logs-header" onclick="toggleUvLogs(this)">▶ UV Install Logs</div>
|
| 3796 |
<div class="uv-logs-content" style="display: none;">
|
| 3797 |
+
Downloading nvidia-curand-cu12 (60.7MiB)
|
|
|
|
|
|
|
|
|
|
| 3798 |
Downloading nvidia-cufile-cu12 (1.1MiB)
|
| 3799 |
+
Downloading nvidia-cuda-nvrtc-cu12 (84.0MiB)
|
| 3800 |
+
Downloading nvidia-cusparse-cu12 (274.9MiB)
|
| 3801 |
+
Downloading nvidia-cudnn-cu12 (674.0MiB)
|
| 3802 |
Downloading nvidia-cusparselt-cu12 (273.9MiB)
|
|
|
|
| 3803 |
Downloading torch (846.9MiB)
|
| 3804 |
+
Downloading nvidia-nvjitlink-cu12 (37.4MiB)
|
| 3805 |
+
Downloading setuptools (1.1MiB)
|
| 3806 |
+
Downloading nvidia-cublas-cu12 (566.8MiB)
|
| 3807 |
+
Downloading networkx (1.9MiB)
|
| 3808 |
+
Downloading nvidia-cuda-cupti-cu12 (9.8MiB)
|
| 3809 |
Downloading nvidia-cusolver-cu12 (255.1MiB)
|
| 3810 |
+
Downloading numpy (16.2MiB)
|
| 3811 |
+
Downloading nvidia-cufft-cu12 (184.2MiB)
|
| 3812 |
+
Downloading nvidia-nccl-cu12 (307.4MiB)
|
| 3813 |
+
Downloading sympy (6.0MiB)
|
| 3814 |
Downloading triton (148.3MiB)
|
| 3815 |
Downloading nvidia-cufile-cu12
|
| 3816 |
Downloading setuptools
|
|
|
|
| 3830 |
Downloading nvidia-cublas-cu12
|
| 3831 |
Downloading nvidia-cudnn-cu12
|
| 3832 |
Downloading torch
|
| 3833 |
+
Installed 26 packages in 461ms
|
| 3834 |
</div>
|
| 3835 |
</div>
|
| 3836 |
</div>
|
|
|
|
| 3843 |
<span onclick="toggleOutput('bench_utils')" style="cursor: pointer;">▼ output</span>
|
| 3844 |
<span id="uv-indicator-bench_utils" onclick="toggleUvLogsFromHeader('bench_utils')" style="cursor: pointer;">▶ uv-logs</span>
|
| 3845 |
</span> |
|
| 3846 |
+
Cell: bench_utils | deps: torch, numpy | 34.17s
|
| 3847 |
| <button class="run-btn" onclick="runCell('bench_utils')">▶ run</button>
|
| 3848 |
<button class="copy-btn" onclick="copyCell('bench_utils')">Copy</button>
|
| 3849 |
<a href="cells/bench_utils.py" target="_blank" class="raw-btn">Raw</a>
|
|
|
|
| 4331 |
<div class="uv-install-logs" id="uv-logs-bench_utils">
|
| 4332 |
<div class="uv-logs-header" onclick="toggleUvLogs(this)">▶ UV Install Logs</div>
|
| 4333 |
<div class="uv-logs-content" style="display: none;">
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4334 |
Downloading setuptools (1.1MiB)
|
| 4335 |
Downloading nvidia-cufile-cu12 (1.1MiB)
|
|
|
|
|
|
|
|
|
|
| 4336 |
Downloading sympy (6.0MiB)
|
| 4337 |
+
Downloading nvidia-nvjitlink-cu12 (37.4MiB)
|
| 4338 |
Downloading nvidia-curand-cu12 (60.7MiB)
|
|
|
|
|
|
|
| 4339 |
Downloading nvidia-cublas-cu12 (566.8MiB)
|
| 4340 |
+
Downloading nvidia-cusolver-cu12 (255.1MiB)
|
| 4341 |
+
Downloading networkx (1.9MiB)
|
| 4342 |
+
Downloading nvidia-cuda-cupti-cu12 (9.8MiB)
|
| 4343 |
+
Downloading nvidia-cusparse-cu12 (274.9MiB)
|
| 4344 |
+
Downloading torch (846.9MiB)
|
| 4345 |
+
Downloading nvidia-cuda-nvrtc-cu12 (84.0MiB)
|
| 4346 |
+
Downloading nvidia-nccl-cu12 (307.4MiB)
|
| 4347 |
+
Downloading numpy (16.2MiB)
|
| 4348 |
+
Downloading nvidia-cudnn-cu12 (674.0MiB)
|
| 4349 |
+
Downloading nvidia-cufft-cu12 (184.2MiB)
|
| 4350 |
+
Downloading nvidia-cusparselt-cu12 (273.9MiB)
|
| 4351 |
Downloading triton (148.3MiB)
|
| 4352 |
Downloading nvidia-cufile-cu12
|
| 4353 |
Downloading setuptools
|
|
|
|
| 4367 |
Downloading nvidia-cublas-cu12
|
| 4368 |
Downloading nvidia-cudnn-cu12
|
| 4369 |
Downloading torch
|
| 4370 |
+
Installed 26 packages in 507ms
|
| 4371 |
</div>
|
| 4372 |
</div>
|
| 4373 |
</div>
|
|
|
|
| 4381 |
<span onclick="toggleOutput('config')" style="cursor: pointer;">▼ output</span>
|
| 4382 |
<span id="uv-indicator-config" onclick="toggleUvLogsFromHeader('config')" style="cursor: pointer;">▶ uv-logs</span>
|
| 4383 |
</span> |
|
| 4384 |
+
Cell: config | deps: torch, numpy | 34.91s
|
| 4385 |
| <button class="run-btn" onclick="runCell('config')">▶ run</button>
|
| 4386 |
<button class="copy-btn" onclick="copyCell('config')">Copy</button>
|
| 4387 |
<a href="cells/config.py" target="_blank" class="raw-btn">Raw</a>
|
|
|
|
| 4441 |
<div class="uv-install-logs" id="uv-logs-config">
|
| 4442 |
<div class="uv-logs-header" onclick="toggleUvLogs(this)">▶ UV Install Logs</div>
|
| 4443 |
<div class="uv-logs-content" style="display: none;">
|
| 4444 |
+
Downloading sympy (6.0MiB)
|
| 4445 |
Downloading nvidia-cufile-cu12 (1.1MiB)
|
| 4446 |
+
Downloading setuptools (1.1MiB)
|
| 4447 |
+
Downloading nvidia-curand-cu12 (60.7MiB)
|
| 4448 |
Downloading nvidia-cusparse-cu12 (274.9MiB)
|
| 4449 |
+
Downloading torch (846.9MiB)
|
| 4450 |
+
Downloading nvidia-cusolver-cu12 (255.1MiB)
|
| 4451 |
+
Downloading numpy (16.2MiB)
|
| 4452 |
+
Downloading nvidia-nvjitlink-cu12 (37.4MiB)
|
| 4453 |
Downloading nvidia-nccl-cu12 (307.4MiB)
|
| 4454 |
+
Downloading nvidia-cufft-cu12 (184.2MiB)
|
| 4455 |
Downloading nvidia-cusparselt-cu12 (273.9MiB)
|
| 4456 |
+
Downloading nvidia-cuda-nvrtc-cu12 (84.0MiB)
|
|
|
|
|
|
|
| 4457 |
Downloading networkx (1.9MiB)
|
| 4458 |
+
Downloading nvidia-cudnn-cu12 (674.0MiB)
|
| 4459 |
+
Downloading nvidia-cuda-cupti-cu12 (9.8MiB)
|
|
|
|
|
|
|
|
|
|
| 4460 |
Downloading nvidia-cublas-cu12 (566.8MiB)
|
|
|
|
|
|
|
|
|
|
| 4461 |
Downloading triton (148.3MiB)
|
| 4462 |
Downloading nvidia-cufile-cu12
|
| 4463 |
Downloading setuptools
|
|
|
|
| 4474 |
Downloading nvidia-cusparselt-cu12
|
| 4475 |
Downloading nvidia-cusparse-cu12
|
| 4476 |
Downloading nvidia-nccl-cu12
|
|
|
|
| 4477 |
Downloading nvidia-cudnn-cu12
|
| 4478 |
+
Downloading nvidia-cublas-cu12
|
| 4479 |
Downloading torch
|
| 4480 |
+
Installed 26 packages in 572ms
|
| 4481 |
</div>
|
| 4482 |
</div>
|
| 4483 |
</div>
|
|
|
|
| 4490 |
<span onclick="toggleOutput('save_data')" style="cursor: pointer;">▼ output</span>
|
| 4491 |
<span id="uv-indicator-save_data" onclick="toggleUvLogsFromHeader('save_data')" style="cursor: pointer;">▶ uv-logs</span>
|
| 4492 |
</span> |
|
| 4493 |
+
Cell: save_data | deps: torch, numpy | 39.37s
|
| 4494 |
| <button class="run-btn" onclick="runCell('save_data')">▶ run</button>
|
| 4495 |
<button class="copy-btn" onclick="copyCell('save_data')">Copy</button>
|
| 4496 |
<a href="cells/save_data.py" target="_blank" class="raw-btn">Raw</a>
|
|
|
|
| 4585 |
<div class="uv-install-logs" id="uv-logs-save_data">
|
| 4586 |
<div class="uv-logs-header" onclick="toggleUvLogs(this)">▶ UV Install Logs</div>
|
| 4587 |
<div class="uv-logs-content" style="display: none;">
|
|
|
|
| 4588 |
Downloading nvidia-cufft-cu12 (184.2MiB)
|
| 4589 |
+
Downloading sympy (6.0MiB)
|
| 4590 |
Downloading nvidia-nvjitlink-cu12 (37.4MiB)
|
| 4591 |
+
Downloading triton (148.3MiB)
|
| 4592 |
+
Downloading numpy (16.2MiB)
|
| 4593 |
+
Downloading networkx (1.9MiB)
|
| 4594 |
+
Downloading torch (846.9MiB)
|
| 4595 |
Downloading nvidia-nccl-cu12 (307.4MiB)
|
| 4596 |
+
Downloading nvidia-cuda-cupti-cu12 (9.8MiB)
|
| 4597 |
+
Downloading nvidia-cuda-nvrtc-cu12 (84.0MiB)
|
| 4598 |
+
Downloading nvidia-curand-cu12 (60.7MiB)
|
| 4599 |
+
Downloading nvidia-cusolver-cu12 (255.1MiB)
|
| 4600 |
+
Downloading setuptools (1.1MiB)
|
| 4601 |
Downloading nvidia-cudnn-cu12 (674.0MiB)
|
| 4602 |
Downloading nvidia-cublas-cu12 (566.8MiB)
|
| 4603 |
+
Downloading nvidia-cufile-cu12 (1.1MiB)
|
|
|
|
| 4604 |
Downloading nvidia-cusparse-cu12 (274.9MiB)
|
|
|
|
|
|
|
| 4605 |
Downloading nvidia-cusparselt-cu12 (273.9MiB)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4606 |
Downloading nvidia-cufile-cu12
|
| 4607 |
Downloading setuptools
|
| 4608 |
Downloading networkx
|
|
|
|
| 4615 |
Downloading triton
|
| 4616 |
Downloading nvidia-cufft-cu12
|
| 4617 |
Downloading nvidia-cusolver-cu12
|
|
|
|
| 4618 |
Downloading nvidia-cusparse-cu12
|
| 4619 |
+
Downloading nvidia-cusparselt-cu12
|
| 4620 |
Downloading nvidia-nccl-cu12
|
| 4621 |
Downloading nvidia-cublas-cu12
|
| 4622 |
Downloading nvidia-cudnn-cu12
|
| 4623 |
Downloading torch
|
| 4624 |
+
Installed 26 packages in 455ms
|
| 4625 |
</div>
|
| 4626 |
</div>
|
| 4627 |
<div class="cell-artifacts">
|
| 4628 |
<h4>Artifacts:</h4>
|
| 4629 |
<a href="artifacts/save_data/router_bias.pt" class="artifact" target="_blank">router_bias.pt</a>
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4630 |
<a href="artifacts/save_data/router_weight.pt" class="artifact" target="_blank">router_weight.pt</a>
|
| 4631 |
+
<a href="artifacts/save_data/down_proj_bias.pt" class="artifact" target="_blank">down_proj_bias.pt</a>
|
| 4632 |
+
<a href="artifacts/save_data/gate_up_proj.pt" class="artifact" target="_blank">gate_up_proj.pt</a>
|
| 4633 |
+
<a href="artifacts/save_data/down_proj.pt" class="artifact" target="_blank">down_proj.pt</a>
|
| 4634 |
+
<a href="artifacts/save_data/gate_up_proj_bias.pt" class="artifact" target="_blank">gate_up_proj_bias.pt</a>
|
| 4635 |
</div>
|
| 4636 |
</div>
|
| 4637 |
</div>
|
|
|
|
| 4645 |
<span onclick="toggleOutput('yamoe_run')" style="cursor: pointer;">▼ output</span>
|
| 4646 |
<span id="uv-indicator-yamoe_run" onclick="toggleUvLogsFromHeader('yamoe_run')" style="cursor: pointer;">▶ uv-logs</span>
|
| 4647 |
</span> |
|
| 4648 |
+
Cell: yamoe_run | deps: torch, kernels, numpy | 38.45s
|
| 4649 |
| <button class="run-btn" onclick="runCell('yamoe_run')">▶ run</button>
|
| 4650 |
<button class="copy-btn" onclick="copyCell('yamoe_run')">Copy</button>
|
| 4651 |
<a href="cells/yamoe_run.py" target="_blank" class="raw-btn">Raw</a>
|
|
|
|
| 4916 |
</div>
|
| 4917 |
</div>
|
| 4918 |
<div id="output-yamoe_run" class="cell-output">
|
| 4919 |
+
<div class="cell-stdout">Loading weights from: /repo/moe_benchmarks/megablocks_yamoe/.uvnote/cache/b398a2853af91970392ae37f0d53a0eda463df639220863fbd38f33605bf9cbb
|
| 4920 |
Loaded shared weights from artifacts
|
| 4921 |
Router weight sum: 12.588732
|
| 4922 |
Gate/up sum: 1026.601807
|
|
|
|
| 4939 |
Warming up (10 iterations)...
|
| 4940 |
Benchmarking (50 iterations)...
|
| 4941 |
Progress: 20% complete (avg: 4.253 ms)
|
| 4942 |
+
Progress: 40% complete (avg: 4.248 ms)
|
| 4943 |
+
Progress: 60% complete (avg: 4.248 ms)
|
| 4944 |
+
Progress: 80% complete (avg: 4.250 ms)
|
| 4945 |
|
| 4946 |
Output tensors:
|
| 4947 |
Primary: shape=(1, 100, 1152), dtype=torch.float32, device=cuda:0, range=[-0.049506, 0.054984], mean=0.000034, std=0.006508, norm=2.208791
|
|
|
|
| 4951 |
Iterations: 50
|
| 4952 |
|
| 4953 |
Latency Statistics:
|
| 4954 |
+
Average: 4.250 ms
|
| 4955 |
+
Min: 4.125 ms
|
| 4956 |
+
Max: 4.300 ms
|
| 4957 |
+
Std Dev: 0.023 ms
|
| 4958 |
|
| 4959 |
Percentiles:
|
| 4960 |
P50 (median): 4.254 ms
|
| 4961 |
+
P95: 4.268 ms
|
| 4962 |
+
P99: 4.290 ms
|
| 4963 |
|
| 4964 |
Throughput:
|
| 4965 |
+
Tokens/sec: 23530.9
|
| 4966 |
+
Std Dev: 131.3
|
| 4967 |
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
| 4968 |
|
| 4969 |
Saved benchmark results to yamoe_results.json
|
|
|
|
| 4973 |
<div class="uv-install-logs" id="uv-logs-yamoe_run">
|
| 4974 |
<div class="uv-logs-header" onclick="toggleUvLogs(this)">▶ UV Install Logs</div>
|
| 4975 |
<div class="uv-logs-content" style="display: none;">
|
| 4976 |
+
Downloading nvidia-curand-cu12 (60.7MiB)
|
| 4977 |
+
Downloading numpy (16.2MiB)
|
| 4978 |
+
Downloading nvidia-cuda-cupti-cu12 (9.8MiB)
|
| 4979 |
+
Downloading hf-xet (3.0MiB)
|
| 4980 |
Downloading networkx (1.9MiB)
|
| 4981 |
+
Downloading nvidia-cufile-cu12 (1.1MiB)
|
| 4982 |
+
Downloading setuptools (1.1MiB)
|
| 4983 |
Downloading sympy (6.0MiB)
|
| 4984 |
+
Downloading nvidia-cusparse-cu12 (274.9MiB)
|
| 4985 |
Downloading nvidia-cublas-cu12 (566.8MiB)
|
| 4986 |
+
Downloading torch (846.9MiB)
|
|
|
|
| 4987 |
Downloading nvidia-cufft-cu12 (184.2MiB)
|
| 4988 |
+
Downloading triton (148.3MiB)
|
|
|
|
| 4989 |
Downloading nvidia-cusolver-cu12 (255.1MiB)
|
| 4990 |
+
Downloading nvidia-cudnn-cu12 (674.0MiB)
|
| 4991 |
+
Downloading nvidia-nccl-cu12 (307.4MiB)
|
|
|
|
|
|
|
| 4992 |
Downloading nvidia-cuda-nvrtc-cu12 (84.0MiB)
|
| 4993 |
+
Downloading nvidia-cusparselt-cu12 (273.9MiB)
|
| 4994 |
Downloading nvidia-nvjitlink-cu12 (37.4MiB)
|
|
|
|
|
|
|
| 4995 |
Downloading nvidia-cufile-cu12
|
| 4996 |
Downloading hf-xet
|
| 4997 |
Downloading setuptools
|
|
|
|
| 5011 |
Downloading nvidia-cublas-cu12
|
| 5012 |
Downloading nvidia-cudnn-cu12
|
| 5013 |
Downloading torch
|
| 5014 |
+
Installed 37 packages in 452ms
|
| 5015 |
</div>
|
| 5016 |
</div>
|
| 5017 |
<div class="cell-stderr">Fetching 6 files: 0%| | 0/6 [00:00<?, ?it/s]
|
| 5018 |
+
Fetching 6 files: 17%|█▋ | 1/6 [00:00<00:01, 3.53it/s]
|
| 5019 |
+
Fetching 6 files: 33%|███▎ | 2/6 [00:00<00:01, 3.45it/s]
|
| 5020 |
+
Fetching 6 files: 50%|█████ | 3/6 [00:00<00:01, 2.94it/s]
|
| 5021 |
+
Fetching 6 files: 100%|██████████| 6/6 [00:00<00:00, 6.14it/s]</div>
|
| 5022 |
<div class="cell-artifacts">
|
| 5023 |
<h4>Artifacts:</h4>
|
| 5024 |
<a href="artifacts/yamoe_run/yamoe_results.json" class="artifact" target="_blank">yamoe_results.json</a>
|
|
|
|
| 5035 |
<span onclick="toggleOutput('binned_run')" style="cursor: pointer;">▼ output</span>
|
| 5036 |
<span id="uv-indicator-binned_run" onclick="toggleUvLogsFromHeader('binned_run')" style="cursor: pointer;">▶ uv-logs</span>
|
| 5037 |
</span> |
|
| 5038 |
+
Cell: binned_run | deps: torch, numpy | 39.83s
|
| 5039 |
| <button class="run-btn" onclick="runCell('binned_run')">▶ run</button>
|
| 5040 |
<button class="copy-btn" onclick="copyCell('binned_run')">Copy</button>
|
| 5041 |
<a href="cells/binned_run.py" target="_blank" class="raw-btn">Raw</a>
|
|
|
|
| 5449 |
|
| 5450 |
Warming up (10 iterations)...
|
| 5451 |
Benchmarking (50 iterations)...
|
| 5452 |
+
Progress: 20% complete (avg: 38.543 ms)
|
| 5453 |
+
Progress: 40% complete (avg: 37.857 ms)
|
| 5454 |
+
Progress: 60% complete (avg: 37.457 ms)
|
| 5455 |
+
Progress: 80% complete (avg: 37.143 ms)
|
| 5456 |
|
| 5457 |
Output tensors:
|
| 5458 |
Primary: shape=(1, 100, 1152), dtype=torch.float32, device=cuda:0, range=[-0.049506, 0.054984], mean=0.000034, std=0.006508, norm=2.208791
|
|
|
|
| 5462 |
Iterations: 50
|
| 5463 |
|
| 5464 |
Latency Statistics:
|
| 5465 |
+
Average: 36.539 ms
|
| 5466 |
+
Min: 32.831 ms
|
| 5467 |
+
Max: 40.074 ms
|
| 5468 |
+
Std Dev: 1.614 ms
|
| 5469 |
|
| 5470 |
Percentiles:
|
| 5471 |
+
P50 (median): 36.870 ms
|
| 5472 |
+
P95: 39.005 ms
|
| 5473 |
+
P99: 39.950 ms
|
| 5474 |
|
| 5475 |
Throughput:
|
| 5476 |
+
Tokens/sec: 2736.8
|
| 5477 |
+
Std Dev: 123.0
|
| 5478 |
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
| 5479 |
|
| 5480 |
Saved benchmark results to binned_results.json
|
|
|
|
| 5484 |
<div class="uv-install-logs" id="uv-logs-binned_run">
|
| 5485 |
<div class="uv-logs-header" onclick="toggleUvLogs(this)">▶ UV Install Logs</div>
|
| 5486 |
<div class="uv-logs-content" style="display: none;">
|
| 5487 |
+
Downloading nvidia-cudnn-cu12 (674.0MiB)
|
| 5488 |
+
Downloading nvidia-cublas-cu12 (566.8MiB)
|
| 5489 |
+
Downloading nvidia-cuda-nvrtc-cu12 (84.0MiB)
|
| 5490 |
+
Downloading setuptools (1.1MiB)
|
| 5491 |
Downloading nvidia-cusolver-cu12 (255.1MiB)
|
| 5492 |
Downloading networkx (1.9MiB)
|
| 5493 |
+
Downloading torch (846.9MiB)
|
|
|
|
| 5494 |
Downloading nvidia-nvjitlink-cu12 (37.4MiB)
|
|
|
|
|
|
|
| 5495 |
Downloading nvidia-nccl-cu12 (307.4MiB)
|
| 5496 |
+
Downloading nvidia-cusparse-cu12 (274.9MiB)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5497 |
Downloading numpy (16.2MiB)
|
| 5498 |
+
Downloading nvidia-cufile-cu12 (1.1MiB)
|
| 5499 |
+
Downloading nvidia-cusparselt-cu12 (273.9MiB)
|
| 5500 |
+
Downloading sympy (6.0MiB)
|
| 5501 |
Downloading triton (148.3MiB)
|
| 5502 |
+
Downloading nvidia-cuda-cupti-cu12 (9.8MiB)
|
| 5503 |
+
Downloading nvidia-curand-cu12 (60.7MiB)
|
| 5504 |
Downloading nvidia-cufft-cu12 (184.2MiB)
|
| 5505 |
Downloading nvidia-cufile-cu12
|
| 5506 |
Downloading setuptools
|
|
|
|
| 5514 |
Downloading triton
|
| 5515 |
Downloading nvidia-cufft-cu12
|
| 5516 |
Downloading nvidia-cusolver-cu12
|
|
|
|
| 5517 |
Downloading nvidia-cusparselt-cu12
|
| 5518 |
+
Downloading nvidia-cusparse-cu12
|
| 5519 |
Downloading nvidia-nccl-cu12
|
| 5520 |
Downloading nvidia-cublas-cu12
|
| 5521 |
Downloading nvidia-cudnn-cu12
|
| 5522 |
Downloading torch
|
| 5523 |
+
Installed 26 packages in 442ms
|
| 5524 |
</div>
|
| 5525 |
</div>
|
| 5526 |
<div class="cell-artifacts">
|
|
|
|
| 5539 |
<span onclick="toggleOutput('gptoss_run')" style="cursor: pointer;">▼ output</span>
|
| 5540 |
<span id="uv-indicator-gptoss_run" onclick="toggleUvLogsFromHeader('gptoss_run')" style="cursor: pointer;">▶ uv-logs</span>
|
| 5541 |
</span> |
|
| 5542 |
+
Cell: gptoss_run | deps: torch, numpy | 39.94s
|
| 5543 |
| <button class="run-btn" onclick="runCell('gptoss_run')">▶ run</button>
|
| 5544 |
<button class="copy-btn" onclick="copyCell('gptoss_run')">Copy</button>
|
| 5545 |
<a href="cells/gptoss_run.py" target="_blank" class="raw-btn">Raw</a>
|
|
|
|
| 5857 |
|
| 5858 |
Warming up (10 iterations)...
|
| 5859 |
Benchmarking (50 iterations)...
|
| 5860 |
+
Progress: 20% complete (avg: 48.070 ms)
|
| 5861 |
+
Progress: 40% complete (avg: 47.917 ms)
|
| 5862 |
+
Progress: 60% complete (avg: 47.432 ms)
|
| 5863 |
+
Progress: 80% complete (avg: 46.164 ms)
|
| 5864 |
|
| 5865 |
Output tensors:
|
| 5866 |
Primary: shape=(1, 100, 1152), dtype=torch.float32, device=cuda:0, range=[-0.064982, 0.061193], mean=0.000100, std=0.013510, norm=4.585560
|
|
|
|
| 5870 |
Iterations: 50
|
| 5871 |
|
| 5872 |
Latency Statistics:
|
| 5873 |
+
Average: 45.237 ms
|
| 5874 |
+
Min: 39.776 ms
|
| 5875 |
+
Max: 49.247 ms
|
| 5876 |
+
Std Dev: 2.851 ms
|
| 5877 |
|
| 5878 |
Percentiles:
|
| 5879 |
+
P50 (median): 46.461 ms
|
| 5880 |
+
P95: 48.433 ms
|
| 5881 |
+
P99: 49.077 ms
|
| 5882 |
|
| 5883 |
Throughput:
|
| 5884 |
+
Tokens/sec: 2210.6
|
| 5885 |
+
Std Dev: 143.0
|
| 5886 |
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
| 5887 |
|
| 5888 |
Saved benchmark results to gptoss_results.json
|
|
|
|
| 5892 |
<div class="uv-install-logs" id="uv-logs-gptoss_run">
|
| 5893 |
<div class="uv-logs-header" onclick="toggleUvLogs(this)">▶ UV Install Logs</div>
|
| 5894 |
<div class="uv-logs-content" style="display: none;">
|
| 5895 |
+
Downloading numpy (16.2MiB)
|
|
|
|
|
|
|
| 5896 |
Downloading nvidia-cusparse-cu12 (274.9MiB)
|
| 5897 |
+
Downloading nvidia-cudnn-cu12 (674.0MiB)
|
| 5898 |
+
Downloading nvidia-nccl-cu12 (307.4MiB)
|
|
|
|
| 5899 |
Downloading nvidia-cusparselt-cu12 (273.9MiB)
|
| 5900 |
Downloading nvidia-cuda-cupti-cu12 (9.8MiB)
|
| 5901 |
+
Downloading nvidia-curand-cu12 (60.7MiB)
|
| 5902 |
+
Downloading networkx (1.9MiB)
|
| 5903 |
+
Downloading setuptools (1.1MiB)
|
| 5904 |
+
Downloading nvidia-cufft-cu12 (184.2MiB)
|
| 5905 |
Downloading nvidia-cufile-cu12 (1.1MiB)
|
| 5906 |
+
Downloading nvidia-nvjitlink-cu12 (37.4MiB)
|
| 5907 |
Downloading nvidia-cublas-cu12 (566.8MiB)
|
| 5908 |
+
Downloading sympy (6.0MiB)
|
| 5909 |
Downloading nvidia-cusolver-cu12 (255.1MiB)
|
| 5910 |
Downloading nvidia-cuda-nvrtc-cu12 (84.0MiB)
|
|
|
|
|
|
|
|
|
|
| 5911 |
Downloading torch (846.9MiB)
|
| 5912 |
+
Downloading triton (148.3MiB)
|
| 5913 |
Downloading nvidia-cufile-cu12
|
| 5914 |
Downloading setuptools
|
| 5915 |
Downloading networkx
|
|
|
|
| 5922 |
Downloading triton
|
| 5923 |
Downloading nvidia-cufft-cu12
|
| 5924 |
Downloading nvidia-cusolver-cu12
|
|
|
|
| 5925 |
Downloading nvidia-cusparselt-cu12
|
| 5926 |
+
Downloading nvidia-cusparse-cu12
|
| 5927 |
Downloading nvidia-nccl-cu12
|
| 5928 |
Downloading nvidia-cublas-cu12
|
| 5929 |
Downloading nvidia-cudnn-cu12
|
| 5930 |
Downloading torch
|
| 5931 |
+
Installed 26 packages in 443ms
|
| 5932 |
</div>
|
| 5933 |
</div>
|
| 5934 |
<div class="cell-artifacts">
|
|
|
|
| 5947 |
<span onclick="toggleOutput('gptoss_training_run')" style="cursor: pointer;">▼ output</span>
|
| 5948 |
<span id="uv-indicator-gptoss_training_run" onclick="toggleUvLogsFromHeader('gptoss_training_run')" style="cursor: pointer;">▶ uv-logs</span>
|
| 5949 |
</span> |
|
| 5950 |
+
Cell: gptoss_training_run | deps: torch, numpy | 41.85s
|
| 5951 |
| <button class="run-btn" onclick="runCell('gptoss_training_run')">▶ run</button>
|
| 5952 |
<button class="copy-btn" onclick="copyCell('gptoss_training_run')">Copy</button>
|
| 5953 |
<a href="cells/gptoss_training_run.py" target="_blank" class="raw-btn">Raw</a>
|
|
|
|
| 6248 |
|
| 6249 |
Warming up (10 iterations)...
|
| 6250 |
Benchmarking (50 iterations)...
|
| 6251 |
+
Progress: 20% complete (avg: 49.277 ms)
|
| 6252 |
+
Progress: 40% complete (avg: 48.351 ms)
|
| 6253 |
+
Progress: 60% complete (avg: 47.557 ms)
|
| 6254 |
+
Progress: 80% complete (avg: 46.750 ms)
|
| 6255 |
|
| 6256 |
Output tensors:
|
| 6257 |
Primary: shape=(1, 100, 1152), dtype=torch.float32, device=cuda:0, range=[-0.064982, 0.061193], mean=0.000100, std=0.013510, norm=4.585560
|
|
|
|
| 6261 |
Iterations: 50
|
| 6262 |
|
| 6263 |
Latency Statistics:
|
| 6264 |
+
Average: 45.751 ms
|
| 6265 |
+
Min: 38.860 ms
|
| 6266 |
+
Max: 50.817 ms
|
| 6267 |
+
Std Dev: 2.834 ms
|
| 6268 |
|
| 6269 |
Percentiles:
|
| 6270 |
+
P50 (median): 45.833 ms
|
| 6271 |
+
P95: 50.540 ms
|
| 6272 |
+
P99: 50.777 ms
|
| 6273 |
|
| 6274 |
Throughput:
|
| 6275 |
+
Tokens/sec: 2185.7
|
| 6276 |
+
Std Dev: 141.0
|
| 6277 |
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
| 6278 |
|
| 6279 |
Saved benchmark results to gptoss_training_results.json
|
|
|
|
| 6283 |
<div class="uv-install-logs" id="uv-logs-gptoss_training_run">
|
| 6284 |
<div class="uv-logs-header" onclick="toggleUvLogs(this)">▶ UV Install Logs</div>
|
| 6285 |
<div class="uv-logs-content" style="display: none;">
|
|
|
|
| 6286 |
Downloading sympy (6.0MiB)
|
| 6287 |
+
Downloading nvidia-cufile-cu12 (1.1MiB)
|
| 6288 |
+
Downloading nvidia-cufft-cu12 (184.2MiB)
|
| 6289 |
+
Downloading nvidia-cusparse-cu12 (274.9MiB)
|
| 6290 |
+
Downloading nvidia-cuda-nvrtc-cu12 (84.0MiB)
|
| 6291 |
Downloading numpy (16.2MiB)
|
| 6292 |
+
Downloading nvidia-curand-cu12 (60.7MiB)
|
| 6293 |
Downloading networkx (1.9MiB)
|
| 6294 |
+
Downloading setuptools (1.1MiB)
|
|
|
|
| 6295 |
Downloading nvidia-cusparselt-cu12 (273.9MiB)
|
| 6296 |
Downloading nvidia-cuda-cupti-cu12 (9.8MiB)
|
|
|
|
| 6297 |
Downloading nvidia-cudnn-cu12 (674.0MiB)
|
| 6298 |
+
Downloading nvidia-cublas-cu12 (566.8MiB)
|
|
|
|
| 6299 |
Downloading nvidia-nccl-cu12 (307.4MiB)
|
| 6300 |
+
Downloading nvidia-cusolver-cu12 (255.1MiB)
|
|
|
|
| 6301 |
Downloading nvidia-nvjitlink-cu12 (37.4MiB)
|
|
|
|
| 6302 |
Downloading triton (148.3MiB)
|
| 6303 |
+
Downloading torch (846.9MiB)
|
| 6304 |
Downloading nvidia-cufile-cu12
|
| 6305 |
Downloading setuptools
|
| 6306 |
Downloading networkx
|
|
|
|
| 6319 |
Downloading nvidia-cublas-cu12
|
| 6320 |
Downloading nvidia-cudnn-cu12
|
| 6321 |
Downloading torch
|
| 6322 |
+
Installed 26 packages in 544ms
|
| 6323 |
</div>
|
| 6324 |
</div>
|
| 6325 |
<div class="cell-artifacts">
|
|
|
|
| 6331 |
|
| 6332 |
<h2>MegaBlocks Implementation</h2>
|
| 6333 |
<p>This section runs the MegaBlocks MoE implementation with optimized kernels from the Hugging Face hub.</p>
|
| 6334 |
+
<div class="cell" id="cell-megablocks_run">
|
| 6335 |
<div class="cell-header">
|
| 6336 |
<span class="collapse-indicators">
|
| 6337 |
<span onclick="toggleCode('megablocks_run')" style="cursor: pointer;">▼ code</span>
|
| 6338 |
<span onclick="toggleOutput('megablocks_run')" style="cursor: pointer;">▼ output</span>
|
| 6339 |
<span id="uv-indicator-megablocks_run" onclick="toggleUvLogsFromHeader('megablocks_run')" style="cursor: pointer;">▶ uv-logs</span>
|
| 6340 |
</span> |
|
| 6341 |
+
Cell: megablocks_run | deps: torch, numpy, kernels | 47.50s
|
| 6342 |
| <button class="run-btn" onclick="runCell('megablocks_run')">▶ run</button>
|
| 6343 |
<button class="copy-btn" onclick="copyCell('megablocks_run')">Copy</button>
|
| 6344 |
<a href="cells/megablocks_run.py" target="_blank" class="raw-btn">Raw</a>
|
|
|
|
| 6545 |
</div>
|
| 6546 |
</div>
|
| 6547 |
<div id="output-megablocks_run" class="cell-output">
|
| 6548 |
+
<div class="cell-stdout">Loading weights from: /repo/moe_benchmarks/megablocks_yamoe/.uvnote/cache/b398a2853af91970392ae37f0d53a0eda463df639220863fbd38f33605bf9cbb
|
| 6549 |
Loaded shared weights from artifacts
|
| 6550 |
Router weight sum: 12.588732
|
| 6551 |
Gate/up sum: 1026.601807
|
|
|
|
| 6566 |
Input Variation: +0.001 * iteration (deterministic)
|
| 6567 |
|
| 6568 |
Warming up (10 iterations)...
|
| 6569 |
+
Benchmarking (50 iterations)...
|
| 6570 |
+
Progress: 20% complete (avg: 0.852 ms)
|
| 6571 |
+
Progress: 40% complete (avg: 0.837 ms)
|
| 6572 |
+
Progress: 60% complete (avg: 0.835 ms)
|
| 6573 |
+
Progress: 80% complete (avg: 2.704 ms)
|
| 6574 |
+
|
| 6575 |
+
Output tensors:
|
| 6576 |
+
Primary: shape=(1, 100, 1152), dtype=torch.float32, device=cuda:0, range=[-0.061104, 0.055115], mean=0.000056, std=0.013535, norm=4.593927
|
| 6577 |
+
Auxiliary: shape=(100, 4), dtype=torch.float32, device=cuda:0, range=[0.220999, 0.302948], mean=0.250000, std=0.012156, norm=5.005893
|
| 6578 |
+
|
| 6579 |
+
━━━━━━━━━━━━━━━━━━━━ Benchmark Results ━━━━━━━━━━━━━━━━━━━━
|
| 6580 |
+
Iterations: 50
|
| 6581 |
+
|
| 6582 |
+
Latency Statistics:
|
| 6583 |
+
Average: 3.870 ms
|
| 6584 |
+
Min: 0.810 ms
|
| 6585 |
+
Max: 8.541 ms
|
| 6586 |
+
Std Dev: 3.728 ms
|
| 6587 |
+
|
| 6588 |
+
Percentiles:
|
| 6589 |
+
P50 (median): 0.840 ms
|
| 6590 |
+
P95: 8.540 ms
|
| 6591 |
+
P99: 8.541 ms
|
| 6592 |
+
|
| 6593 |
+
Throughput:
|
| 6594 |
+
Tokens/sec: 25840.1
|
| 6595 |
+
Std Dev: 53236.0
|
| 6596 |
+
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
| 6597 |
+
|
| 6598 |
+
Saved benchmark results to megablocks_results.json
|
| 6599 |
+
|
| 6600 |
+
Output sum: 6.473885
|
| 6601 |
</div>
|
| 6602 |
<div class="uv-install-logs" id="uv-logs-megablocks_run">
|
| 6603 |
<div class="uv-logs-header" onclick="toggleUvLogs(this)">▶ UV Install Logs</div>
|
| 6604 |
<div class="uv-logs-content" style="display: none;">
|
| 6605 |
+
Downloading nvidia-nccl-cu12 (307.4MiB)
|
| 6606 |
+
Downloading nvidia-cufft-cu12 (184.2MiB)
|
| 6607 |
+
Downloading nvidia-curand-cu12 (60.7MiB)
|
|
|
|
| 6608 |
Downloading nvidia-cufile-cu12 (1.1MiB)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6609 |
Downloading nvidia-cuda-nvrtc-cu12 (84.0MiB)
|
| 6610 |
+
Downloading nvidia-nvjitlink-cu12 (37.4MiB)
|
| 6611 |
Downloading nvidia-cusparse-cu12 (274.9MiB)
|
| 6612 |
+
Downloading nvidia-cudnn-cu12 (674.0MiB)
|
| 6613 |
+
Downloading triton (148.3MiB)
|
| 6614 |
+
Downloading networkx (1.9MiB)
|
|
|
|
| 6615 |
Downloading hf-xet (3.0MiB)
|
| 6616 |
Downloading nvidia-cublas-cu12 (566.8MiB)
|
| 6617 |
+
Downloading setuptools (1.1MiB)
|
| 6618 |
+
Downloading nvidia-cusolver-cu12 (255.1MiB)
|
| 6619 |
+
Downloading sympy (6.0MiB)
|
| 6620 |
+
Downloading torch (846.9MiB)
|
| 6621 |
+
Downloading numpy (16.2MiB)
|
| 6622 |
+
Downloading nvidia-cuda-cupti-cu12 (9.8MiB)
|
| 6623 |
+
Downloading nvidia-cusparselt-cu12 (273.9MiB)
|
| 6624 |
Downloading nvidia-cufile-cu12
|
| 6625 |
Downloading hf-xet
|
| 6626 |
Downloading setuptools
|
|
|
|
| 6640 |
Downloading nvidia-cublas-cu12
|
| 6641 |
Downloading nvidia-cudnn-cu12
|
| 6642 |
Downloading torch
|
| 6643 |
+
Installed 37 packages in 458ms
|
| 6644 |
</div>
|
| 6645 |
</div>
|
| 6646 |
<div class="cell-stderr">Fetching 66 files: 0%| | 0/66 [00:00<?, ?it/s]
|
| 6647 |
+
Fetching 66 files: 2%|▏ | 1/66 [00:00<00:12, 5.34it/s]
|
| 6648 |
+
Fetching 66 files: 3%|▎ | 2/66 [00:00<00:16, 3.98it/s]
|
| 6649 |
+
Fetching 66 files: 26%|██▌ | 17/66 [00:01<00:02, 17.77it/s]
|
| 6650 |
+
Fetching 66 files: 67%|██████▋ | 44/66 [00:01<00:00, 43.54it/s]
|
| 6651 |
+
Fetching 66 files: 76%|███████▌ | 50/66 [00:01<00:00, 43.86it/s]
|
| 6652 |
+
Fetching 66 files: 83%|████████▎ | 55/66 [00:01<00:00, 42.86it/s]
|
| 6653 |
+
Fetching 66 files: 91%|█████████ | 60/66 [00:01<00:00, 33.73it/s]
|
| 6654 |
+
Fetching 66 files: 98%|█████████▊| 65/66 [00:02<00:00, 33.63it/s]
|
| 6655 |
+
Fetching 66 files: 100%|██████████| 66/66 [00:02<00:00, 32.65it/s]</div>
|
| 6656 |
+
<div class="cell-artifacts">
|
| 6657 |
+
<h4>Artifacts:</h4>
|
| 6658 |
+
<a href="artifacts/megablocks_run/megablocks_results.json" class="artifact" target="_blank">megablocks_results.json</a>
|
| 6659 |
+
</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6660 |
</div>
|
| 6661 |
</div>
|
| 6662 |
|
| 6663 |
<h2>Performance Visualization</h2>
|
| 6664 |
<p>This section reads all benchmark results and creates a comprehensive performance comparison chart.</p>
|
| 6665 |
+
<div class="cell" id="cell-visualization">
|
| 6666 |
+
<div class="cell-header">
|
| 6667 |
+
<span class="collapse-indicators">
|
| 6668 |
+
<span onclick="toggleCode('visualization')" style="cursor: pointer;">▼ code</span>
|
| 6669 |
+
<span onclick="toggleOutput('visualization')" style="cursor: pointer;">▼ output</span>
|
| 6670 |
+
<span id="uv-indicator-visualization" onclick="toggleUvLogsFromHeader('visualization')" style="cursor: pointer;">▶ uv-logs</span>
|
| 6671 |
+
</span> |
|
| 6672 |
+
Cell: visualization | deps: matplotlib | 3.14s
|
| 6673 |
+
| <button class="run-btn" onclick="runCell('visualization')">▶ run</button>
|
| 6674 |
+
<button class="copy-btn" onclick="copyCell('visualization')">Copy</button>
|
| 6675 |
+
<a href="cells/visualization.py" target="_blank" class="raw-btn">Raw</a>
|
| 6676 |
+
</div>
|
| 6677 |
+
<div id="code-visualization" class="cell-code" data-lines="110">
|
| 6678 |
+
<div class="highlight-with-lines">
|
| 6679 |
+
<div class="line-numbers" id="lines-visualization">
|
| 6680 |
+
<a class="line-number" data-cell="visualization" data-line="1" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 1, true);">1</a>
|
| 6681 |
+
<a class="line-number" data-cell="visualization" data-line="2" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 2, true);">2</a>
|
| 6682 |
+
<a class="line-number" data-cell="visualization" data-line="3" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 3, true);">3</a>
|
| 6683 |
+
<a class="line-number" data-cell="visualization" data-line="4" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 4, true);">4</a>
|
| 6684 |
+
<a class="line-number" data-cell="visualization" data-line="5" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 5, true);">5</a>
|
| 6685 |
+
<a class="line-number" data-cell="visualization" data-line="6" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 6, true);">6</a>
|
| 6686 |
+
<a class="line-number" data-cell="visualization" data-line="7" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 7, true);">7</a>
|
| 6687 |
+
<a class="line-number" data-cell="visualization" data-line="8" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 8, true);">8</a>
|
| 6688 |
+
<a class="line-number" data-cell="visualization" data-line="9" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 9, true);">9</a>
|
| 6689 |
+
<a class="line-number" data-cell="visualization" data-line="10" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 10, true);">10</a>
|
| 6690 |
+
<a class="line-number" data-cell="visualization" data-line="11" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 11, true);">11</a>
|
| 6691 |
+
<a class="line-number" data-cell="visualization" data-line="12" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 12, true);">12</a>
|
| 6692 |
+
<a class="line-number" data-cell="visualization" data-line="13" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 13, true);">13</a>
|
| 6693 |
+
<a class="line-number" data-cell="visualization" data-line="14" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 14, true);">14</a>
|
| 6694 |
+
<a class="line-number" data-cell="visualization" data-line="15" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 15, true);">15</a>
|
| 6695 |
+
<a class="line-number" data-cell="visualization" data-line="16" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 16, true);">16</a>
|
| 6696 |
+
<a class="line-number" data-cell="visualization" data-line="17" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 17, true);">17</a>
|
| 6697 |
+
<a class="line-number" data-cell="visualization" data-line="18" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 18, true);">18</a>
|
| 6698 |
+
<a class="line-number" data-cell="visualization" data-line="19" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 19, true);">19</a>
|
| 6699 |
+
<a class="line-number" data-cell="visualization" data-line="20" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 20, true);">20</a>
|
| 6700 |
+
<a class="line-number" data-cell="visualization" data-line="21" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 21, true);">21</a>
|
| 6701 |
+
<a class="line-number" data-cell="visualization" data-line="22" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 22, true);">22</a>
|
| 6702 |
+
<a class="line-number" data-cell="visualization" data-line="23" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 23, true);">23</a>
|
| 6703 |
+
<a class="line-number" data-cell="visualization" data-line="24" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 24, true);">24</a>
|
| 6704 |
+
<a class="line-number" data-cell="visualization" data-line="25" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 25, true);">25</a>
|
| 6705 |
+
<a class="line-number" data-cell="visualization" data-line="26" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 26, true);">26</a>
|
| 6706 |
+
<a class="line-number" data-cell="visualization" data-line="27" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 27, true);">27</a>
|
| 6707 |
+
<a class="line-number" data-cell="visualization" data-line="28" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 28, true);">28</a>
|
| 6708 |
+
<a class="line-number" data-cell="visualization" data-line="29" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 29, true);">29</a>
|
| 6709 |
+
<a class="line-number" data-cell="visualization" data-line="30" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 30, true);">30</a>
|
| 6710 |
+
<a class="line-number" data-cell="visualization" data-line="31" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 31, true);">31</a>
|
| 6711 |
+
<a class="line-number" data-cell="visualization" data-line="32" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 32, true);">32</a>
|
| 6712 |
+
<a class="line-number" data-cell="visualization" data-line="33" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 33, true);">33</a>
|
| 6713 |
+
<a class="line-number" data-cell="visualization" data-line="34" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 34, true);">34</a>
|
| 6714 |
+
<a class="line-number" data-cell="visualization" data-line="35" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 35, true);">35</a>
|
| 6715 |
+
<a class="line-number" data-cell="visualization" data-line="36" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 36, true);">36</a>
|
| 6716 |
+
<a class="line-number" data-cell="visualization" data-line="37" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 37, true);">37</a>
|
| 6717 |
+
<a class="line-number" data-cell="visualization" data-line="38" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 38, true);">38</a>
|
| 6718 |
+
<a class="line-number" data-cell="visualization" data-line="39" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 39, true);">39</a>
|
| 6719 |
+
<a class="line-number" data-cell="visualization" data-line="40" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 40, true);">40</a>
|
| 6720 |
+
<a class="line-number" data-cell="visualization" data-line="41" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 41, true);">41</a>
|
| 6721 |
+
<a class="line-number" data-cell="visualization" data-line="42" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 42, true);">42</a>
|
| 6722 |
+
<a class="line-number" data-cell="visualization" data-line="43" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 43, true);">43</a>
|
| 6723 |
+
<a class="line-number" data-cell="visualization" data-line="44" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 44, true);">44</a>
|
| 6724 |
+
<a class="line-number" data-cell="visualization" data-line="45" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 45, true);">45</a>
|
| 6725 |
+
<a class="line-number" data-cell="visualization" data-line="46" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 46, true);">46</a>
|
| 6726 |
+
<a class="line-number" data-cell="visualization" data-line="47" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 47, true);">47</a>
|
| 6727 |
+
<a class="line-number" data-cell="visualization" data-line="48" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 48, true);">48</a>
|
| 6728 |
+
<a class="line-number" data-cell="visualization" data-line="49" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 49, true);">49</a>
|
| 6729 |
+
<a class="line-number" data-cell="visualization" data-line="50" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 50, true);">50</a>
|
| 6730 |
+
<a class="line-number" data-cell="visualization" data-line="51" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 51, true);">51</a>
|
| 6731 |
+
<a class="line-number" data-cell="visualization" data-line="52" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 52, true);">52</a>
|
| 6732 |
+
<a class="line-number" data-cell="visualization" data-line="53" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 53, true);">53</a>
|
| 6733 |
+
<a class="line-number" data-cell="visualization" data-line="54" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 54, true);">54</a>
|
| 6734 |
+
<a class="line-number" data-cell="visualization" data-line="55" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 55, true);">55</a>
|
| 6735 |
+
<a class="line-number" data-cell="visualization" data-line="56" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 56, true);">56</a>
|
| 6736 |
+
<a class="line-number" data-cell="visualization" data-line="57" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 57, true);">57</a>
|
| 6737 |
+
<a class="line-number" data-cell="visualization" data-line="58" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 58, true);">58</a>
|
| 6738 |
+
<a class="line-number" data-cell="visualization" data-line="59" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 59, true);">59</a>
|
| 6739 |
+
<a class="line-number" data-cell="visualization" data-line="60" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 60, true);">60</a>
|
| 6740 |
+
<a class="line-number" data-cell="visualization" data-line="61" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 61, true);">61</a>
|
| 6741 |
+
<a class="line-number" data-cell="visualization" data-line="62" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 62, true);">62</a>
|
| 6742 |
+
<a class="line-number" data-cell="visualization" data-line="63" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 63, true);">63</a>
|
| 6743 |
+
<a class="line-number" data-cell="visualization" data-line="64" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 64, true);">64</a>
|
| 6744 |
+
<a class="line-number" data-cell="visualization" data-line="65" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 65, true);">65</a>
|
| 6745 |
+
<a class="line-number" data-cell="visualization" data-line="66" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 66, true);">66</a>
|
| 6746 |
+
<a class="line-number" data-cell="visualization" data-line="67" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 67, true);">67</a>
|
| 6747 |
+
<a class="line-number" data-cell="visualization" data-line="68" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 68, true);">68</a>
|
| 6748 |
+
<a class="line-number" data-cell="visualization" data-line="69" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 69, true);">69</a>
|
| 6749 |
+
<a class="line-number" data-cell="visualization" data-line="70" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 70, true);">70</a>
|
| 6750 |
+
<a class="line-number" data-cell="visualization" data-line="71" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 71, true);">71</a>
|
| 6751 |
+
<a class="line-number" data-cell="visualization" data-line="72" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 72, true);">72</a>
|
| 6752 |
+
<a class="line-number" data-cell="visualization" data-line="73" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 73, true);">73</a>
|
| 6753 |
+
<a class="line-number" data-cell="visualization" data-line="74" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 74, true);">74</a>
|
| 6754 |
+
<a class="line-number" data-cell="visualization" data-line="75" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 75, true);">75</a>
|
| 6755 |
+
<a class="line-number" data-cell="visualization" data-line="76" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 76, true);">76</a>
|
| 6756 |
+
<a class="line-number" data-cell="visualization" data-line="77" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 77, true);">77</a>
|
| 6757 |
+
<a class="line-number" data-cell="visualization" data-line="78" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 78, true);">78</a>
|
| 6758 |
+
<a class="line-number" data-cell="visualization" data-line="79" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 79, true);">79</a>
|
| 6759 |
+
<a class="line-number" data-cell="visualization" data-line="80" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 80, true);">80</a>
|
| 6760 |
+
<a class="line-number" data-cell="visualization" data-line="81" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 81, true);">81</a>
|
| 6761 |
+
<a class="line-number" data-cell="visualization" data-line="82" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 82, true);">82</a>
|
| 6762 |
+
<a class="line-number" data-cell="visualization" data-line="83" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 83, true);">83</a>
|
| 6763 |
+
<a class="line-number" data-cell="visualization" data-line="84" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 84, true);">84</a>
|
| 6764 |
+
<a class="line-number" data-cell="visualization" data-line="85" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 85, true);">85</a>
|
| 6765 |
+
<a class="line-number" data-cell="visualization" data-line="86" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 86, true);">86</a>
|
| 6766 |
+
<a class="line-number" data-cell="visualization" data-line="87" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 87, true);">87</a>
|
| 6767 |
+
<a class="line-number" data-cell="visualization" data-line="88" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 88, true);">88</a>
|
| 6768 |
+
<a class="line-number" data-cell="visualization" data-line="89" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 89, true);">89</a>
|
| 6769 |
+
<a class="line-number" data-cell="visualization" data-line="90" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 90, true);">90</a>
|
| 6770 |
+
<a class="line-number" data-cell="visualization" data-line="91" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 91, true);">91</a>
|
| 6771 |
+
<a class="line-number" data-cell="visualization" data-line="92" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 92, true);">92</a>
|
| 6772 |
+
<a class="line-number" data-cell="visualization" data-line="93" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 93, true);">93</a>
|
| 6773 |
+
<a class="line-number" data-cell="visualization" data-line="94" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 94, true);">94</a>
|
| 6774 |
+
<a class="line-number" data-cell="visualization" data-line="95" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 95, true);">95</a>
|
| 6775 |
+
<a class="line-number" data-cell="visualization" data-line="96" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 96, true);">96</a>
|
| 6776 |
+
<a class="line-number" data-cell="visualization" data-line="97" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 97, true);">97</a>
|
| 6777 |
+
<a class="line-number" data-cell="visualization" data-line="98" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 98, true);">98</a>
|
| 6778 |
+
<a class="line-number" data-cell="visualization" data-line="99" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 99, true);">99</a>
|
| 6779 |
+
<a class="line-number" data-cell="visualization" data-line="100" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 100, true);">100</a>
|
| 6780 |
+
<a class="line-number" data-cell="visualization" data-line="101" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 101, true);">101</a>
|
| 6781 |
+
<a class="line-number" data-cell="visualization" data-line="102" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 102, true);">102</a>
|
| 6782 |
+
<a class="line-number" data-cell="visualization" data-line="103" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 103, true);">103</a>
|
| 6783 |
+
<a class="line-number" data-cell="visualization" data-line="104" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 104, true);">104</a>
|
| 6784 |
+
<a class="line-number" data-cell="visualization" data-line="105" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 105, true);">105</a>
|
| 6785 |
+
<a class="line-number" data-cell="visualization" data-line="106" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 106, true);">106</a>
|
| 6786 |
+
<a class="line-number" data-cell="visualization" data-line="107" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 107, true);">107</a>
|
| 6787 |
+
<a class="line-number" data-cell="visualization" data-line="108" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 108, true);">108</a>
|
| 6788 |
+
<a class="line-number" data-cell="visualization" data-line="109" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 109, true);">109</a>
|
| 6789 |
+
<a class="line-number" data-cell="visualization" data-line="110" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 110, true);">110</a>
|
| 6790 |
+
</div>
|
| 6791 |
+
<div class="code-wrap">
|
| 6792 |
+
<div class="highlight"><pre><span></span><span class="kn">import</span><span class="w"> </span><span class="nn">json</span>
|
| 6793 |
+
<span class="kn">import</span><span class="w"> </span><span class="nn">matplotlib.pyplot</span><span class="w"> </span><span class="k">as</span><span class="w"> </span><span class="nn">plt</span>
|
| 6794 |
+
<span class="kn">import</span><span class="w"> </span><span class="nn">numpy</span><span class="w"> </span><span class="k">as</span><span class="w"> </span><span class="nn">np</span>
|
| 6795 |
+
<span class="kn">from</span><span class="w"> </span><span class="nn">pathlib</span><span class="w"> </span><span class="kn">import</span> <span class="n">Path</span>
|
| 6796 |
+
<span class="kn">import</span><span class="w"> </span><span class="nn">os</span>
|
| 6797 |
+
|
| 6798 |
+
<span class="c1"># List of expected result files</span>
|
| 6799 |
+
<span class="n">yamoe_dir</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">environ</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'UVNOTE_INPUT_YAMOE_RUN'</span><span class="p">,</span> <span class="s1">'.'</span><span class="p">)</span>
|
| 6800 |
+
<span class="n">binned_dir</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">environ</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'UVNOTE_INPUT_BINNED_RUN'</span><span class="p">,</span> <span class="s1">'.'</span><span class="p">)</span>
|
| 6801 |
+
<span class="n">gptoss_dir</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">environ</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'UVNOTE_INPUT_GPTOSS_RUN'</span><span class="p">,</span> <span class="s1">'.'</span><span class="p">)</span>
|
| 6802 |
+
<span class="n">gptoss_training_dir</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">environ</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'UVNOTE_INPUT_GPTOSS_TRAINING_RUN'</span><span class="p">,</span> <span class="s1">'.'</span><span class="p">)</span>
|
| 6803 |
+
<span class="n">megablocks_dir</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">environ</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'UVNOTE_INPUT_MEGABLOCKS_RUN'</span><span class="p">,</span> <span class="s1">'.'</span><span class="p">)</span>
|
| 6804 |
+
|
| 6805 |
+
<span class="n">result_files</span> <span class="o">=</span> <span class="p">[</span>
|
| 6806 |
+
<span class="n">Path</span><span class="p">(</span><span class="n">yamoe_dir</span><span class="p">)</span> <span class="o">/</span> <span class="s2">"yamoe_results.json"</span><span class="p">,</span>
|
| 6807 |
+
<span class="n">Path</span><span class="p">(</span><span class="n">binned_dir</span><span class="p">)</span> <span class="o">/</span> <span class="s2">"binned_results.json"</span><span class="p">,</span>
|
| 6808 |
+
<span class="n">Path</span><span class="p">(</span><span class="n">gptoss_dir</span><span class="p">)</span> <span class="o">/</span> <span class="s2">"gptoss_results.json"</span><span class="p">,</span>
|
| 6809 |
+
<span class="n">Path</span><span class="p">(</span><span class="n">gptoss_training_dir</span><span class="p">)</span> <span class="o">/</span> <span class="s2">"gptoss_training_results.json"</span><span class="p">,</span>
|
| 6810 |
+
<span class="n">Path</span><span class="p">(</span><span class="n">megablocks_dir</span><span class="p">)</span> <span class="o">/</span> <span class="s2">"megablocks_results.json"</span>
|
| 6811 |
+
<span class="p">]</span>
|
| 6812 |
+
|
| 6813 |
+
<span class="c1"># Load all benchmark results</span>
|
| 6814 |
+
<span class="n">results</span> <span class="o">=</span> <span class="p">{}</span>
|
| 6815 |
+
<span class="k">for</span> <span class="n">file</span> <span class="ow">in</span> <span class="n">result_files</span><span class="p">:</span>
|
| 6816 |
+
<span class="k">if</span> <span class="n">Path</span><span class="p">(</span><span class="n">file</span><span class="p">)</span><span class="o">.</span><span class="n">exists</span><span class="p">():</span>
|
| 6817 |
+
<span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">file</span><span class="p">,</span> <span class="s1">'r'</span><span class="p">)</span> <span class="k">as</span> <span class="n">f</span><span class="p">:</span>
|
| 6818 |
+
<span class="n">data</span> <span class="o">=</span> <span class="n">json</span><span class="o">.</span><span class="n">load</span><span class="p">(</span><span class="n">f</span><span class="p">)</span>
|
| 6819 |
+
<span class="n">results</span><span class="p">[</span><span class="n">data</span><span class="p">[</span><span class="s1">'implementation'</span><span class="p">]]</span> <span class="o">=</span> <span class="n">data</span>
|
| 6820 |
+
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">"Loaded </span><span class="si">{</span><span class="n">file</span><span class="si">}</span><span class="s2">"</span><span class="p">)</span>
|
| 6821 |
+
<span class="k">else</span><span class="p">:</span>
|
| 6822 |
+
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">"Missing </span><span class="si">{</span><span class="n">file</span><span class="si">}</span><span class="s2">"</span><span class="p">)</span>
|
| 6823 |
+
|
| 6824 |
+
<span class="k">if</span> <span class="ow">not</span> <span class="n">results</span><span class="p">:</span>
|
| 6825 |
+
<span class="nb">print</span><span class="p">(</span><span class="s2">"No benchmark results found. Run the benchmark cells first."</span><span class="p">)</span>
|
| 6826 |
+
<span class="k">else</span><span class="p">:</span>
|
| 6827 |
+
<span class="c1"># Extract data for plotting</span>
|
| 6828 |
+
<span class="n">implementations</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="n">results</span><span class="o">.</span><span class="n">keys</span><span class="p">())</span>
|
| 6829 |
+
<span class="n">avg_latencies</span> <span class="o">=</span> <span class="p">[</span><span class="n">results</span><span class="p">[</span><span class="n">impl</span><span class="p">][</span><span class="s1">'stats'</span><span class="p">][</span><span class="s1">'avg_ms'</span><span class="p">]</span> <span class="k">for</span> <span class="n">impl</span> <span class="ow">in</span> <span class="n">implementations</span><span class="p">]</span>
|
| 6830 |
+
<span class="n">p95_latencies</span> <span class="o">=</span> <span class="p">[</span><span class="n">results</span><span class="p">[</span><span class="n">impl</span><span class="p">][</span><span class="s1">'stats'</span><span class="p">][</span><span class="s1">'p95_ms'</span><span class="p">]</span> <span class="k">for</span> <span class="n">impl</span> <span class="ow">in</span> <span class="n">implementations</span><span class="p">]</span>
|
| 6831 |
+
<span class="n">throughputs</span> <span class="o">=</span> <span class="p">[</span><span class="n">results</span><span class="p">[</span><span class="n">impl</span><span class="p">][</span><span class="s1">'stats'</span><span class="p">]</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'tokens_per_s'</span><span class="p">,</span> <span class="mi">0</span><span class="p">)</span> <span class="k">for</span> <span class="n">impl</span> <span class="ow">in</span> <span class="n">implementations</span><span class="p">]</span>
|
| 6832 |
+
|
| 6833 |
+
<span class="c1"># Create figure with subplots</span>
|
| 6834 |
+
<span class="n">fig</span><span class="p">,</span> <span class="p">(</span><span class="n">ax1</span><span class="p">,</span> <span class="n">ax2</span><span class="p">,</span> <span class="n">ax3</span><span class="p">)</span> <span class="o">=</span> <span class="n">plt</span><span class="o">.</span><span class="n">subplots</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="n">figsize</span><span class="o">=</span><span class="p">(</span><span class="mi">18</span><span class="p">,</span> <span class="mi">6</span><span class="p">))</span>
|
| 6835 |
+
<span class="n">fig</span><span class="o">.</span><span class="n">suptitle</span><span class="p">(</span><span class="s1">'MoE Implementation Performance Comparison'</span><span class="p">,</span> <span class="n">fontsize</span><span class="o">=</span><span class="mi">16</span><span class="p">,</span> <span class="n">fontweight</span><span class="o">=</span><span class="s1">'bold'</span><span class="p">)</span>
|
| 6836 |
+
|
| 6837 |
+
<span class="c1"># Colors for each implementation</span>
|
| 6838 |
+
<span class="n">colors</span> <span class="o">=</span> <span class="p">[</span><span class="s1">'#FF6B6B'</span><span class="p">,</span> <span class="s1">'#4ECDC4'</span><span class="p">,</span> <span class="s1">'#45B7D1'</span><span class="p">,</span> <span class="s1">'#96CEB4'</span><span class="p">,</span> <span class="s1">'#FECA57'</span><span class="p">][:</span><span class="nb">len</span><span class="p">(</span><span class="n">implementations</span><span class="p">)]</span>
|
| 6839 |
+
|
| 6840 |
+
<span class="c1"># 1. Average Latency Chart</span>
|
| 6841 |
+
<span class="n">bars1</span> <span class="o">=</span> <span class="n">ax1</span><span class="o">.</span><span class="n">bar</span><span class="p">(</span><span class="n">implementations</span><span class="p">,</span> <span class="n">avg_latencies</span><span class="p">,</span> <span class="n">color</span><span class="o">=</span><span class="n">colors</span><span class="p">,</span> <span class="n">alpha</span><span class="o">=</span><span class="mf">0.8</span><span class="p">,</span> <span class="n">edgecolor</span><span class="o">=</span><span class="s1">'black'</span><span class="p">,</span> <span class="n">linewidth</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
|
| 6842 |
+
<span class="n">ax1</span><span class="o">.</span><span class="n">set_title</span><span class="p">(</span><span class="s1">'Average Latency'</span><span class="p">,</span> <span class="n">fontweight</span><span class="o">=</span><span class="s1">'bold'</span><span class="p">,</span> <span class="n">fontsize</span><span class="o">=</span><span class="mi">14</span><span class="p">)</span>
|
| 6843 |
+
<span class="n">ax1</span><span class="o">.</span><span class="n">set_ylabel</span><span class="p">(</span><span class="s1">'Latency (ms)'</span><span class="p">,</span> <span class="n">fontweight</span><span class="o">=</span><span class="s1">'bold'</span><span class="p">)</span>
|
| 6844 |
+
<span class="n">ax1</span><span class="o">.</span><span class="n">tick_params</span><span class="p">(</span><span class="n">axis</span><span class="o">=</span><span class="s1">'x'</span><span class="p">,</span> <span class="n">rotation</span><span class="o">=</span><span class="mi">45</span><span class="p">)</span>
|
| 6845 |
+
<span class="n">ax1</span><span class="o">.</span><span class="n">grid</span><span class="p">(</span><span class="n">axis</span><span class="o">=</span><span class="s1">'y'</span><span class="p">,</span> <span class="n">alpha</span><span class="o">=</span><span class="mf">0.3</span><span class="p">)</span>
|
| 6846 |
+
|
| 6847 |
+
<span class="c1"># Add value labels on bars</span>
|
| 6848 |
+
<span class="k">for</span> <span class="n">bar</span><span class="p">,</span> <span class="n">val</span> <span class="ow">in</span> <span class="nb">zip</span><span class="p">(</span><span class="n">bars1</span><span class="p">,</span> <span class="n">avg_latencies</span><span class="p">):</span>
|
| 6849 |
+
<span class="n">ax1</span><span class="o">.</span><span class="n">text</span><span class="p">(</span><span class="n">bar</span><span class="o">.</span><span class="n">get_x</span><span class="p">()</span> <span class="o">+</span> <span class="n">bar</span><span class="o">.</span><span class="n">get_width</span><span class="p">()</span><span class="o">/</span><span class="mi">2</span><span class="p">,</span> <span class="n">bar</span><span class="o">.</span><span class="n">get_height</span><span class="p">()</span> <span class="o">+</span> <span class="nb">max</span><span class="p">(</span><span class="n">avg_latencies</span><span class="p">)</span><span class="o">*</span><span class="mf">0.01</span><span class="p">,</span>
|
| 6850 |
+
<span class="sa">f</span><span class="s1">'</span><span class="si">{</span><span class="n">val</span><span class="si">:</span><span class="s1">.2f</span><span class="si">}</span><span class="s1">ms'</span><span class="p">,</span> <span class="n">ha</span><span class="o">=</span><span class="s1">'center'</span><span class="p">,</span> <span class="n">va</span><span class="o">=</span><span class="s1">'bottom'</span><span class="p">,</span> <span class="n">fontweight</span><span class="o">=</span><span class="s1">'bold'</span><span class="p">)</span>
|
| 6851 |
+
|
| 6852 |
+
<span class="c1"># 2. P95 Latency Chart</span>
|
| 6853 |
+
<span class="n">bars2</span> <span class="o">=</span> <span class="n">ax2</span><span class="o">.</span><span class="n">bar</span><span class="p">(</span><span class="n">implementations</span><span class="p">,</span> <span class="n">p95_latencies</span><span class="p">,</span> <span class="n">color</span><span class="o">=</span><span class="n">colors</span><span class="p">,</span> <span class="n">alpha</span><span class="o">=</span><span class="mf">0.8</span><span class="p">,</span> <span class="n">edgecolor</span><span class="o">=</span><span class="s1">'black'</span><span class="p">,</span> <span class="n">linewidth</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
|
| 6854 |
+
<span class="n">ax2</span><span class="o">.</span><span class="n">set_title</span><span class="p">(</span><span class="s1">'95th Percentile Latency'</span><span class="p">,</span> <span class="n">fontweight</span><span class="o">=</span><span class="s1">'bold'</span><span class="p">,</span> <span class="n">fontsize</span><span class="o">=</span><span class="mi">14</span><span class="p">)</span>
|
| 6855 |
+
<span class="n">ax2</span><span class="o">.</span><span class="n">set_ylabel</span><span class="p">(</span><span class="s1">'Latency (ms)'</span><span class="p">,</span> <span class="n">fontweight</span><span class="o">=</span><span class="s1">'bold'</span><span class="p">)</span>
|
| 6856 |
+
<span class="n">ax2</span><span class="o">.</span><span class="n">tick_params</span><span class="p">(</span><span class="n">axis</span><span class="o">=</span><span class="s1">'x'</span><span class="p">,</span> <span class="n">rotation</span><span class="o">=</span><span class="mi">45</span><span class="p">)</span>
|
| 6857 |
+
<span class="n">ax2</span><span class="o">.</span><span class="n">grid</span><span class="p">(</span><span class="n">axis</span><span class="o">=</span><span class="s1">'y'</span><span class="p">,</span> <span class="n">alpha</span><span class="o">=</span><span class="mf">0.3</span><span class="p">)</span>
|
| 6858 |
+
|
| 6859 |
+
<span class="c1"># Add value labels on bars</span>
|
| 6860 |
+
<span class="k">for</span> <span class="n">bar</span><span class="p">,</span> <span class="n">val</span> <span class="ow">in</span> <span class="nb">zip</span><span class="p">(</span><span class="n">bars2</span><span class="p">,</span> <span class="n">p95_latencies</span><span class="p">):</span>
|
| 6861 |
+
<span class="n">ax2</span><span class="o">.</span><span class="n">text</span><span class="p">(</span><span class="n">bar</span><span class="o">.</span><span class="n">get_x</span><span class="p">()</span> <span class="o">+</span> <span class="n">bar</span><span class="o">.</span><span class="n">get_width</span><span class="p">()</span><span class="o">/</span><span class="mi">2</span><span class="p">,</span> <span class="n">bar</span><span class="o">.</span><span class="n">get_height</span><span class="p">()</span> <span class="o">+</span> <span class="nb">max</span><span class="p">(</span><span class="n">p95_latencies</span><span class="p">)</span><span class="o">*</span><span class="mf">0.01</span><span class="p">,</span>
|
| 6862 |
+
<span class="sa">f</span><span class="s1">'</span><span class="si">{</span><span class="n">val</span><span class="si">:</span><span class="s1">.2f</span><span class="si">}</span><span class="s1">ms'</span><span class="p">,</span> <span class="n">ha</span><span class="o">=</span><span class="s1">'center'</span><span class="p">,</span> <span class="n">va</span><span class="o">=</span><span class="s1">'bottom'</span><span class="p">,</span> <span class="n">fontweight</span><span class="o">=</span><span class="s1">'bold'</span><span class="p">)</span>
|
| 6863 |
+
|
| 6864 |
+
<span class="c1"># 3. Throughput Chart</span>
|
| 6865 |
+
<span class="n">bars3</span> <span class="o">=</span> <span class="n">ax3</span><span class="o">.</span><span class="n">bar</span><span class="p">(</span><span class="n">implementations</span><span class="p">,</span> <span class="n">throughputs</span><span class="p">,</span> <span class="n">color</span><span class="o">=</span><span class="n">colors</span><span class="p">,</span> <span class="n">alpha</span><span class="o">=</span><span class="mf">0.8</span><span class="p">,</span> <span class="n">edgecolor</span><span class="o">=</span><span class="s1">'black'</span><span class="p">,</span> <span class="n">linewidth</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
|
| 6866 |
+
<span class="n">ax3</span><span class="o">.</span><span class="n">set_title</span><span class="p">(</span><span class="s1">'Throughput'</span><span class="p">,</span> <span class="n">fontweight</span><span class="o">=</span><span class="s1">'bold'</span><span class="p">,</span> <span class="n">fontsize</span><span class="o">=</span><span class="mi">14</span><span class="p">)</span>
|
| 6867 |
+
<span class="n">ax3</span><span class="o">.</span><span class="n">set_ylabel</span><span class="p">(</span><span class="s1">'Tokens/sec'</span><span class="p">,</span> <span class="n">fontweight</span><span class="o">=</span><span class="s1">'bold'</span><span class="p">)</span>
|
| 6868 |
+
<span class="n">ax3</span><span class="o">.</span><span class="n">tick_params</span><span class="p">(</span><span class="n">axis</span><span class="o">=</span><span class="s1">'x'</span><span class="p">,</span> <span class="n">rotation</span><span class="o">=</span><span class="mi">45</span><span class="p">)</span>
|
| 6869 |
+
<span class="n">ax3</span><span class="o">.</span><span class="n">grid</span><span class="p">(</span><span class="n">axis</span><span class="o">=</span><span class="s1">'y'</span><span class="p">,</span> <span class="n">alpha</span><span class="o">=</span><span class="mf">0.3</span><span class="p">)</span>
|
| 6870 |
+
|
| 6871 |
+
<span class="c1"># Add value labels on bars</span>
|
| 6872 |
+
<span class="k">for</span> <span class="n">bar</span><span class="p">,</span> <span class="n">val</span> <span class="ow">in</span> <span class="nb">zip</span><span class="p">(</span><span class="n">bars3</span><span class="p">,</span> <span class="n">throughputs</span><span class="p">):</span>
|
| 6873 |
+
<span class="k">if</span> <span class="n">val</span> <span class="o">></span> <span class="mi">0</span><span class="p">:</span> <span class="c1"># Only show label if throughput was calculated</span>
|
| 6874 |
+
<span class="n">ax3</span><span class="o">.</span><span class="n">text</span><span class="p">(</span><span class="n">bar</span><span class="o">.</span><span class="n">get_x</span><span class="p">()</span> <span class="o">+</span> <span class="n">bar</span><span class="o">.</span><span class="n">get_width</span><span class="p">()</span><span class="o">/</span><span class="mi">2</span><span class="p">,</span> <span class="n">bar</span><span class="o">.</span><span class="n">get_height</span><span class="p">()</span> <span class="o">+</span> <span class="nb">max</span><span class="p">(</span><span class="n">throughputs</span><span class="p">)</span><span class="o">*</span><span class="mf">0.01</span><span class="p">,</span>
|
| 6875 |
+
<span class="sa">f</span><span class="s1">'</span><span class="si">{</span><span class="n">val</span><span class="si">:</span><span class="s1">.0f</span><span class="si">}</span><span class="s1">'</span><span class="p">,</span> <span class="n">ha</span><span class="o">=</span><span class="s1">'center'</span><span class="p">,</span> <span class="n">va</span><span class="o">=</span><span class="s1">'bottom'</span><span class="p">,</span> <span class="n">fontweight</span><span class="o">=</span><span class="s1">'bold'</span><span class="p">)</span>
|
| 6876 |
+
|
| 6877 |
+
<span class="n">plt</span><span class="o">.</span><span class="n">tight_layout</span><span class="p">()</span>
|
| 6878 |
+
<span class="n">plt</span><span class="o">.</span><span class="n">savefig</span><span class="p">(</span><span class="s2">"moe_performance_comparison.png"</span><span class="p">,</span> <span class="n">dpi</span><span class="o">=</span><span class="mi">300</span><span class="p">)</span>
|
| 6879 |
+
|
| 6880 |
+
<span class="c1"># Print summary table</span>
|
| 6881 |
+
<span class="nb">print</span><span class="p">(</span><span class="s2">"</span><span class="se">\n</span><span class="s2">Performance Summary:"</span><span class="p">)</span>
|
| 6882 |
+
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">"</span><span class="si">{</span><span class="s1">'Implementation'</span><span class="si">:</span><span class="s2"><30</span><span class="si">}</span><span class="s2"> </span><span class="si">{</span><span class="s1">'Avg (ms)'</span><span class="si">:</span><span class="s2"><12</span><span class="si">}</span><span class="s2"> </span><span class="si">{</span><span class="s1">'P95 (ms)'</span><span class="si">:</span><span class="s2"><12</span><span class="si">}</span><span class="s2"> </span><span class="si">{</span><span class="s1">'Tokens/sec'</span><span class="si">:</span><span class="s2"><12</span><span class="si">}</span><span class="s2"> </span><span class="si">{</span><span class="s1">'Relative Speed'</span><span class="si">:</span><span class="s2"><15</span><span class="si">}</span><span class="s2">"</span><span class="p">)</span>
|
| 6883 |
+
<span class="nb">print</span><span class="p">(</span><span class="s2">"-"</span><span class="o">*</span><span class="mi">80</span><span class="p">)</span>
|
| 6884 |
+
|
| 6885 |
+
<span class="c1"># Sort by average latency for relative speed calculation</span>
|
| 6886 |
+
<span class="n">sorted_results</span> <span class="o">=</span> <span class="nb">sorted</span><span class="p">(</span><span class="n">results</span><span class="o">.</span><span class="n">items</span><span class="p">(),</span> <span class="n">key</span><span class="o">=</span><span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="n">x</span><span class="p">[</span><span class="mi">1</span><span class="p">][</span><span class="s1">'stats'</span><span class="p">][</span><span class="s1">'avg_ms'</span><span class="p">])</span>
|
| 6887 |
+
<span class="n">fastest_latency</span> <span class="o">=</span> <span class="n">sorted_results</span><span class="p">[</span><span class="mi">0</span><span class="p">][</span><span class="mi">1</span><span class="p">][</span><span class="s1">'stats'</span><span class="p">][</span><span class="s1">'avg_ms'</span><span class="p">]</span>
|
| 6888 |
+
|
| 6889 |
+
<span class="k">for</span> <span class="n">impl</span><span class="p">,</span> <span class="n">data</span> <span class="ow">in</span> <span class="n">sorted_results</span><span class="p">:</span>
|
| 6890 |
+
<span class="n">avg_ms</span> <span class="o">=</span> <span class="n">data</span><span class="p">[</span><span class="s1">'stats'</span><span class="p">][</span><span class="s1">'avg_ms'</span><span class="p">]</span>
|
| 6891 |
+
<span class="n">p95_ms</span> <span class="o">=</span> <span class="n">data</span><span class="p">[</span><span class="s1">'stats'</span><span class="p">][</span><span class="s1">'p95_ms'</span><span class="p">]</span>
|
| 6892 |
+
<span class="n">tokens_s</span> <span class="o">=</span> <span class="n">data</span><span class="p">[</span><span class="s1">'stats'</span><span class="p">]</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'tokens_per_s'</span><span class="p">,</span> <span class="mi">0</span><span class="p">)</span>
|
| 6893 |
+
<span class="n">relative_speed</span> <span class="o">=</span> <span class="n">fastest_latency</span> <span class="o">/</span> <span class="n">avg_ms</span>
|
| 6894 |
+
|
| 6895 |
+
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">"</span><span class="si">{</span><span class="n">impl</span><span class="si">:</span><span class="s2"><30</span><span class="si">}</span><span class="s2"> </span><span class="si">{</span><span class="n">avg_ms</span><span class="si">:</span><span class="s2">>8.2f</span><span class="si">}</span><span class="s2"> </span><span class="si">{</span><span class="n">p95_ms</span><span class="si">:</span><span class="s2">>8.2f</span><span class="si">}</span><span class="s2"> </span><span class="si">{</span><span class="n">tokens_s</span><span class="si">:</span><span class="s2">>8.0f</span><span class="si">}</span><span class="s2"> </span><span class="si">{</span><span class="n">relative_speed</span><span class="si">:</span><span class="s2">>6.2f</span><span class="si">}</span><span class="s2">x"</span><span class="p">)</span>
|
| 6896 |
+
|
| 6897 |
+
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">"</span><span class="se">\n</span><span class="s2">Fastest: </span><span class="si">{</span><span class="n">sorted_results</span><span class="p">[</span><span class="mi">0</span><span class="p">][</span><span class="mi">0</span><span class="p">]</span><span class="si">}</span><span class="s2"> (</span><span class="si">{</span><span class="n">sorted_results</span><span class="p">[</span><span class="mi">0</span><span class="p">][</span><span class="mi">1</span><span class="p">][</span><span class="s1">'stats'</span><span class="p">][</span><span class="s1">'avg_ms'</span><span class="p">]</span><span class="si">:</span><span class="s2">.2f</span><span class="si">}</span><span class="s2">ms avg)"</span><span class="p">)</span>
|
| 6898 |
+
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">sorted_results</span><span class="p">)</span> <span class="o">></span> <span class="mi">1</span><span class="p">:</span>
|
| 6899 |
+
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">"Slowest: </span><span class="si">{</span><span class="n">sorted_results</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">][</span><span class="mi">0</span><span class="p">]</span><span class="si">}</span><span class="s2"> (</span><span class="si">{</span><span class="n">sorted_results</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">][</span><span class="mi">1</span><span class="p">][</span><span class="s1">'stats'</span><span class="p">][</span><span class="s1">'avg_ms'</span><span class="p">]</span><span class="si">:</span><span class="s2">.2f</span><span class="si">}</span><span class="s2">ms avg)"</span><span class="p">)</span>
|
| 6900 |
+
<span class="n">speedup</span> <span class="o">=</span> <span class="n">sorted_results</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">][</span><span class="mi">1</span><span class="p">][</span><span class="s1">'stats'</span><span class="p">][</span><span class="s1">'avg_ms'</span><span class="p">]</span> <span class="o">/</span> <span class="n">sorted_results</span><span class="p">[</span><span class="mi">0</span><span class="p">][</span><span class="mi">1</span><span class="p">][</span><span class="s1">'stats'</span><span class="p">][</span><span class="s1">'avg_ms'</span><span class="p">]</span>
|
| 6901 |
+
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">"Max Speedup: </span><span class="si">{</span><span class="n">speedup</span><span class="si">:</span><span class="s2">.1f</span><span class="si">}</span><span class="s2">x"</span><span class="p">)</span>
|
| 6902 |
+
</pre></div>
|
| 6903 |
+
|
| 6904 |
+
<div class="code-line-highlight" id="line-highlight-visualization"></div>
|
| 6905 |
+
</div>
|
| 6906 |
+
</div>
|
| 6907 |
+
</div>
|
| 6908 |
+
<div id="output-visualization" class="cell-output">
|
| 6909 |
+
<div class="cell-stdout">Loaded /repo/moe_benchmarks/megablocks_yamoe/.uvnote/cache/274d1d4e0722f5affb811112832e03d26daafb5eaa96259e7ec575eb43a40f12/yamoe_results.json
|
| 6910 |
+
Loaded /repo/moe_benchmarks/megablocks_yamoe/.uvnote/cache/0e2a9f24cc405bb3c4ccb37530405ffe7cae24c59066185a87e856b3ac7344b3/binned_results.json
|
| 6911 |
+
Loaded /repo/moe_benchmarks/megablocks_yamoe/.uvnote/cache/b40a0492fc99c75ce021114ee849e7db60a33cfdf61891ace614b748953db1eb/gptoss_results.json
|
| 6912 |
+
Loaded /repo/moe_benchmarks/megablocks_yamoe/.uvnote/cache/ab389cf3b8cc56969604061ec8bc29a5701c53cdc24bd2682cf630b5e1eeb7bb/gptoss_training_results.json
|
| 6913 |
+
Loaded /repo/moe_benchmarks/megablocks_yamoe/.uvnote/cache/0febdf3420999533bc2e14bb2a4bffaba4af699a19ddf644f24806180c8347e1/megablocks_results.json
|
| 6914 |
+
|
| 6915 |
+
Performance Summary:
|
| 6916 |
+
Implementation Avg (ms) P95 (ms) Tokens/sec Relative Speed
|
| 6917 |
+
--------------------------------------------------------------------------------
|
| 6918 |
+
megablocks_results 3.87 8.54 25840 1.00x
|
| 6919 |
+
yamoe_results 4.25 4.27 23531 0.91x
|
| 6920 |
+
binned_results 36.54 39.00 2737 0.11x
|
| 6921 |
+
gptoss_results 45.24 48.43 2211 0.09x
|
| 6922 |
+
gptoss_training_results 45.75 50.54 2186 0.08x
|
| 6923 |
+
|
| 6924 |
+
Fastest: megablocks_results (3.87ms avg)
|
| 6925 |
+
Slowest: gptoss_training_results (45.75ms avg)
|
| 6926 |
+
Max Speedup: 11.8x
|
| 6927 |
+
</div>
|
| 6928 |
+
<div class="uv-install-logs" id="uv-logs-visualization">
|
| 6929 |
+
<div class="uv-logs-header" onclick="toggleUvLogs(this)">▶ UV Install Logs</div>
|
| 6930 |
+
<div class="uv-logs-content" style="display: none;">
|
| 6931 |
+
Downloading pillow (6.3MiB)
|
| 6932 |
+
Downloading kiwisolver (1.4MiB)
|
| 6933 |
+
Downloading numpy (16.2MiB)
|
| 6934 |
+
Downloading matplotlib (8.3MiB)
|
| 6935 |
+
Downloading fonttools (4.7MiB)
|
| 6936 |
+
Downloading kiwisolver
|
| 6937 |
+
Downloading pillow
|
| 6938 |
+
Downloading fonttools
|
| 6939 |
+
Downloading matplotlib
|
| 6940 |
+
Downloading numpy
|
| 6941 |
+
Installed 11 packages in 48ms
|
| 6942 |
+
</div>
|
| 6943 |
+
</div>
|
| 6944 |
+
<div class="cell-artifacts">
|
| 6945 |
+
<h4>Artifacts:</h4>
|
| 6946 |
+
<a href="artifacts/visualization/moe_performance_comparison.png" class="artifact" target="_blank">moe_performance_comparison.png</a>
|
| 6947 |
+
<div class="artifact-preview">
|
| 6948 |
+
<img src="artifacts/visualization/moe_performance_comparison.png" alt="moe_performance_comparison.png">
|
| 6949 |
+
</div>
|
| 6950 |
+
</div>
|
| 6951 |
+
</div>
|
| 6952 |
+
</div>
|
| 6953 |
</div>
|
| 6954 |
|
| 6955 |
</body>
|