drbh HF Staff commited on
Commit
b975ca1
·
verified ·
1 Parent(s): 39291b0

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ moe_benchmarks/megablocks_yamoe/artifacts/visualization/moe_performance_comparison.png filter=lfs diff=lfs merge=lfs -text
flash_attn/benchmark.html CHANGED
@@ -3,7 +3,7 @@
3
  <head>
4
  <meta charset="UTF-8">
5
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
- <title>benchmark</title>
7
  <script>
8
  // Apply theme and widget visibility immediately to prevent flicker
9
  (function() {
@@ -1058,7 +1058,21 @@ span.linenos.special { color: #000000; background-color: #ffffc0; padding-left:
1058
  .line-numbers .line-number { line-height: var(--code-line-height) !important; }
1059
 
1060
  /* Custom CSS from frontmatter */
1061
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1062
 
1063
 
1064
 
@@ -3701,43 +3715,363 @@ span.linenos.special { color: #000000; background-color: #ffffc0; padding-left:
3701
  </div>
3702
 
3703
  <div class="main-content">
3704
- <hr />
3705
- <p>title: "Flash Attention Benchmark"
3706
- author: "uvnote"
3707
- theme: "dark"
3708
- syntax_theme: "monokai"
3709
- show_line_numbers: true
3710
- collapse_code: false
3711
- custom_css: |
3712
- #output-setup {
3713
- overflow-x: auto;
3714
- }
3715
- .cell-output {
3716
- overflow: scroll;
3717
- }
3718
- .cell-stdout {
3719
- width: max-content;
3720
- overflow: scroll;
3721
- }
3722
- .cell-stderr {
3723
- width: max-content;
3724
- overflow: scroll;
3725
- max-height: 300px;
3726
- }</p>
3727
- <hr />
3728
- <div class="cell cell-failed" id="cell-benchmark">
3729
  <div class="cell-header">
3730
  <span class="collapse-indicators">
3731
  <span onclick="toggleCode('benchmark')" style="cursor: pointer;">▼ code</span>
3732
  <span onclick="toggleOutput('benchmark')" style="cursor: pointer;">▼ output</span>
3733
  <span id="uv-indicator-benchmark" onclick="toggleUvLogsFromHeader('benchmark')" style="cursor: pointer;">▶ uv-logs</span>
3734
  </span> |
3735
- Cell: benchmark | 50.28s | FAILED
3736
  | <button class="run-btn" onclick="runCell('benchmark')">▶ run</button>
3737
  <button class="copy-btn" onclick="copyCell('benchmark')">Copy</button>
3738
  <a href="cells/benchmark.py" target="_blank" class="raw-btn">Raw</a>
3739
  </div>
3740
  <div id="code-benchmark" class="cell-code" data-lines="341">
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3741
  <div class="code-wrap">
3742
  <div class="highlight"><pre><span></span><span class="c1"># /// script</span>
3743
  <span class="c1"># dependencies = [</span>
@@ -4085,6 +4419,7 @@ Cell: benchmark | 50.28s | FAILED
4085
  <div class="code-line-highlight" id="line-highlight-benchmark"></div>
4086
  </div>
4087
  </div>
 
4088
  <div id="output-benchmark" class="cell-output">
4089
  <div class="cell-stdout">Flash Attention 2 not found.
4090
  Flash Attention 3 not found.
@@ -4094,35 +4429,105 @@ xFormers not found.
4094
 
4095
 
4096
  ===== Testing shape: (1, 4224, 24, 128) =====
4097
- torch_cudnn : absmax=0.001547, mae=0.000075, mse=0.000000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4098
  </div>
4099
  <div class="uv-install-logs" id="uv-logs-benchmark">
4100
  <div class="uv-logs-header" onclick="toggleUvLogs(this)">▶ UV Install Logs</div>
4101
  <div class="uv-logs-content" style="display: none;">
4102
- Downloading networkx (1.9MiB)
4103
- Downloading sympy (6.0MiB)
4104
- Downloading pillow (6.3MiB)
4105
- Downloading matplotlib (8.3MiB)
4106
- Downloading numpy (16.2MiB)
4107
- Downloading nvidia-cufile-cu12 (1.1MiB)
4108
- Downloading fonttools (4.7MiB)
4109
- Downloading nvidia-cuda-nvrtc-cu12 (84.0MiB)
4110
- Downloading nvidia-curand-cu12 (60.7MiB)
4111
- Downloading setuptools (1.1MiB)
4112
  Downloading kiwisolver (1.4MiB)
4113
- Downloading nvidia-nccl-cu12 (307.4MiB)
4114
- Downloading nvidia-cusparselt-cu12 (273.9MiB)
4115
- Downloading nvidia-cufft-cu12 (184.2MiB)
4116
  Downloading nvidia-cublas-cu12 (566.8MiB)
4117
  Downloading pandas (11.8MiB)
 
 
 
 
 
 
 
4118
  Downloading nvidia-cudnn-cu12 (674.0MiB)
 
 
 
4119
  Downloading nvidia-cuda-cupti-cu12 (9.8MiB)
4120
- Downloading nvidia-cusolver-cu12 (255.1MiB)
 
4121
  Downloading nvidia-nvjitlink-cu12 (37.4MiB)
 
 
 
 
4122
  Downloading triton (148.3MiB)
4123
- Downloading torch (846.9MiB)
4124
- Downloading nvidia-cusparse-cu12 (274.9MiB)
4125
- Downloading hf-xet (3.0MiB)
4126
  Downloading nvidia-cufile-cu12
4127
  Downloading kiwisolver
4128
  Downloading hf-xet
@@ -4130,8 +4535,8 @@ Downloading hf-xet (3.0MiB)
4130
  Downloading networkx
4131
  Downloading fonttools
4132
  Downloading pillow
4133
- Downloading nvidia-cuda-cupti-cu12
4134
  Downloading matplotlib
 
4135
  Downloading sympy
4136
  Downloading numpy
4137
  Downloading nvidia-nvjitlink-cu12
@@ -4147,104 +4552,27 @@ Downloading hf-xet (3.0MiB)
4147
  Downloading nvidia-cublas-cu12
4148
  Downloading nvidia-cudnn-cu12
4149
  Downloading torch
4150
- Installed 49 packages in 539ms
4151
  </div>
4152
  </div>
4153
  <div class="cell-stderr">Fetching 20 files: 0%| | 0/20 [00:00&lt;?, ?it/s]
4154
- Fetching 20 files: 5%|▌ | 1/20 [00:00&lt;00:08, 2.21it/s]
4155
- Fetching 20 files: 10%|█ | 2/20 [00:02&lt;00:21, 1.17s/it]
4156
- Fetching 20 files: 100%|██████████| 20/20 [00:02&lt;00:00, 9.41it/s]
4157
 
4158
  Fetching 4 files: 0%| | 0/4 [00:00&lt;?, ?it/s]
4159
- Fetching 4 files: 25%|██▌ | 1/4 [00:00&lt;00:00, 5.28it/s]
4160
- Fetching 4 files: 50%|█████ | 2/4 [00:02&lt;00:02, 1.15s/it]
4161
- Fetching 4 files: 100%|██████████| 4/4 [00:02&lt;00:00, 1.99it/s]
4162
- /tmp/tmpyw1le_3d/cuda_utils.c:5:10: fatal error: Python.h: No such file or directory
4163
- 5 | #include &lt;Python.h&gt;
4164
- | ^~~~~~~~~~
4165
- compilation terminated.
4166
- Traceback (most recent call last):
4167
- File &quot;/repo/flash_attn/.uvnote/cells/benchmark.py&quot;, line 340, in &lt;module&gt;
4168
- correctness()
4169
- File &quot;/repo/flash_attn/.uvnote/cells/benchmark.py&quot;, line 299, in correctness
4170
- out = fn(query, key, value)
4171
- ^^^^^^^^^^^^^^^^^^^^^
4172
- File &quot;/repo/flash_attn/.uvnote/cells/benchmark.py&quot;, line 114, in _attention_torch_compile_default
4173
- return _compiled_attention_torch_default(query, key, value, backend=backend)
4174
- ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
4175
- File &quot;/tmp/uvnote-run-08by6gh7/home/.cache/uv/environments-v2/benchmark-bfbc462482636f25/lib/python3.11/site-packages/torch/_dynamo/eval_frame.py&quot;, line 749, in compile_wrapper
4176
- raise e.remove_dynamo_frames() from None # see TORCHDYNAMO_VERBOSE=1
4177
- ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
4178
- File &quot;/tmp/uvnote-run-08by6gh7/home/.cache/uv/environments-v2/benchmark-bfbc462482636f25/lib/python3.11/site-packages/torch/_inductor/compile_fx.py&quot;, line 923, in _compile_fx_inner
4179
- raise InductorError(e, currentframe()).with_traceback(
4180
- File &quot;/tmp/uvnote-run-08by6gh7/home/.cache/uv/environments-v2/benchmark-bfbc462482636f25/lib/python3.11/site-packages/torch/_inductor/compile_fx.py&quot;, line 907, in _compile_fx_inner
4181
- mb_compiled_graph = fx_codegen_and_compile(
4182
- ^^^^^^^^^^^^^^^^^^^^^^^
4183
- File &quot;/tmp/uvnote-run-08by6gh7/home/.cache/uv/environments-v2/benchmark-bfbc462482636f25/lib/python3.11/site-packages/torch/_inductor/compile_fx.py&quot;, line 1578, in fx_codegen_and_compile
4184
- return scheme.codegen_and_compile(gm, example_inputs, inputs_to_check, graph_kwargs)
4185
- ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
4186
- File &quot;/tmp/uvnote-run-08by6gh7/home/.cache/uv/environments-v2/benchmark-bfbc462482636f25/lib/python3.11/site-packages/torch/_inductor/compile_fx.py&quot;, line 1456, in codegen_and_compile
4187
- compiled_module = graph.compile_to_module()
4188
- ^^^^^^^^^^^^^^^^^^^^^^^^^
4189
- File &quot;/tmp/uvnote-run-08by6gh7/home/.cache/uv/environments-v2/benchmark-bfbc462482636f25/lib/python3.11/site-packages/torch/_inductor/graph.py&quot;, line 2293, in compile_to_module
4190
- return self._compile_to_module()
4191
- ^^^^^^^^^^^^^^^^^^^^^^^^^
4192
- File &quot;/tmp/uvnote-run-08by6gh7/home/.cache/uv/environments-v2/benchmark-bfbc462482636f25/lib/python3.11/site-packages/torch/_inductor/graph.py&quot;, line 2299, in _compile_to_module
4193
- self.codegen_with_cpp_wrapper() if self.cpp_wrapper else self.codegen()
4194
- ^^^^^^^^^^^^^^
4195
- File &quot;/tmp/uvnote-run-08by6gh7/home/.cache/uv/environments-v2/benchmark-bfbc462482636f25/lib/python3.11/site-packages/torch/_inductor/graph.py&quot;, line 2238, in codegen
4196
- self.scheduler.codegen()
4197
- File &quot;/tmp/uvnote-run-08by6gh7/home/.cache/uv/environments-v2/benchmark-bfbc462482636f25/lib/python3.11/site-packages/torch/_inductor/scheduler.py&quot;, line 4598, in codegen
4198
- else self._codegen(self.nodes)
4199
- ^^^^^^^^^^^^^^^^^^^^^^^^^
4200
- File &quot;/tmp/uvnote-run-08by6gh7/home/.cache/uv/environments-v2/benchmark-bfbc462482636f25/lib/python3.11/site-packages/torch/_inductor/scheduler.py&quot;, line 4750, in _codegen
4201
- self.get_backend(device).codegen_node(node)
4202
- File &quot;/tmp/uvnote-run-08by6gh7/home/.cache/uv/environments-v2/benchmark-bfbc462482636f25/lib/python3.11/site-packages/torch/_inductor/codegen/cuda_combined_scheduling.py&quot;, line 107, in codegen_node
4203
- return self._triton_scheduling.codegen_node(node)
4204
- ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
4205
- File &quot;/tmp/uvnote-run-08by6gh7/home/.cache/uv/environments-v2/benchmark-bfbc462482636f25/lib/python3.11/site-packages/torch/_inductor/codegen/simd.py&quot;, line 1371, in codegen_node
4206
- return self.codegen_node_schedule(
4207
- ^^^^^^^^^^^^^^^^^^^^^^^^^^^
4208
- File &quot;/tmp/uvnote-run-08by6gh7/home/.cache/uv/environments-v2/benchmark-bfbc462482636f25/lib/python3.11/site-packages/torch/_inductor/codegen/simd.py&quot;, line 1424, in codegen_node_schedule
4209
- src_code = kernel.codegen_kernel()
4210
- ^^^^^^^^^^^^^^^^^^^^^^^
4211
- File &quot;/tmp/uvnote-run-08by6gh7/home/.cache/uv/environments-v2/benchmark-bfbc462482636f25/lib/python3.11/site-packages/torch/_inductor/codegen/triton.py&quot;, line 3677, in codegen_kernel
4212
- **self.inductor_meta_common(),
4213
- ^^^^^^^^^^^^^^^^^^^^^^^^^^^
4214
- File &quot;/tmp/uvnote-run-08by6gh7/home/.cache/uv/environments-v2/benchmark-bfbc462482636f25/lib/python3.11/site-packages/torch/_inductor/codegen/triton.py&quot;, line 3501, in inductor_meta_common
4215
- &quot;backend_hash&quot;: torch.utils._triton.triton_hash_with_backend(),
4216
- ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
4217
- File &quot;/tmp/uvnote-run-08by6gh7/home/.cache/uv/environments-v2/benchmark-bfbc462482636f25/lib/python3.11/site-packages/torch/utils/_triton.py&quot;, line 165, in triton_hash_with_backend
4218
- backend = triton_backend()
4219
- ^^^^^^^^^^^^^^^^
4220
- File &quot;/tmp/uvnote-run-08by6gh7/home/.cache/uv/environments-v2/benchmark-bfbc462482636f25/lib/python3.11/site-packages/torch/utils/_triton.py&quot;, line 157, in triton_backend
4221
- target = driver.active.get_current_target()
4222
- ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
4223
- File &quot;/tmp/uvnote-run-08by6gh7/home/.cache/uv/environments-v2/benchmark-bfbc462482636f25/lib/python3.11/site-packages/triton/runtime/driver.py&quot;, line 30, in __getattr__
4224
- return getattr(self._initialize_obj(), name)
4225
- ^^^^^^^^^^^^^^^^^^^^^^
4226
- File &quot;/tmp/uvnote-run-08by6gh7/home/.cache/uv/environments-v2/benchmark-bfbc462482636f25/lib/python3.11/site-packages/triton/runtime/driver.py&quot;, line 26, in _initialize_obj
4227
- self._obj = self._init_fn()
4228
- ^^^^^^^^^^^^^^^
4229
- File &quot;/tmp/uvnote-run-08by6gh7/home/.cache/uv/environments-v2/benchmark-bfbc462482636f25/lib/python3.11/site-packages/triton/runtime/driver.py&quot;, line 12, in _create_driver
4230
- return active_drivers[0]()
4231
- ^^^^^^^^^^^^^^^^^^^
4232
- File &quot;/tmp/uvnote-run-08by6gh7/home/.cache/uv/environments-v2/benchmark-bfbc462482636f25/lib/python3.11/site-packages/triton/backends/nvidia/driver.py&quot;, line 715, in __init__
4233
- self.utils = CudaUtils() # TODO: make static
4234
- ^^^^^^^^^^^
4235
- File &quot;/tmp/uvnote-run-08by6gh7/home/.cache/uv/environments-v2/benchmark-bfbc462482636f25/lib/python3.11/site-packages/triton/backends/nvidia/driver.py&quot;, line 62, in __init__
4236
- mod = compile_module_from_src(
4237
- ^^^^^^^^^^^^^^^^^^^^^^^^
4238
- File &quot;/tmp/uvnote-run-08by6gh7/home/.cache/uv/environments-v2/benchmark-bfbc462482636f25/lib/python3.11/site-packages/triton/runtime/build.py&quot;, line 88, in compile_module_from_src
4239
- so = _build(name, src_path, tmpdir, library_dirs or [], include_dirs or [], libraries or [])
4240
- ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
4241
- File &quot;/tmp/uvnote-run-08by6gh7/home/.cache/uv/environments-v2/benchmark-bfbc462482636f25/lib/python3.11/site-packages/triton/runtime/build.py&quot;, line 51, in _build
4242
- subprocess.check_call(cc_cmd, stdout=subprocess.DEVNULL)
4243
- File &quot;/usr/lib/python3.11/subprocess.py&quot;, line 413, in check_call
4244
- raise CalledProcessError(retcode, cmd)
4245
- torch._inductor.exc.InductorError: CalledProcessError: Command &#x27;[&#x27;/usr/bin/gcc&#x27;, &#x27;/tmp/tmpyw1le_3d/cuda_utils.c&#x27;, &#x27;-O3&#x27;, &#x27;-shared&#x27;, &#x27;-fPIC&#x27;, &#x27;-Wno-psabi&#x27;, &#x27;-o&#x27;, &#x27;/tmp/tmpyw1le_3d/cuda_utils.cpython-311-x86_64-linux-gnu.so&#x27;, &#x27;-lcuda&#x27;, &#x27;-L/tmp/uvnote-run-08by6gh7/home/.cache/uv/environments-v2/benchmark-bfbc462482636f25/lib/python3.11/site-packages/triton/backends/nvidia/lib&#x27;, &#x27;-L/usr/lib/x86_64-linux-gnu&#x27;, &#x27;-I/tmp/uvnote-run-08by6gh7/home/.cache/uv/environments-v2/benchmark-bfbc462482636f25/lib/python3.11/site-packages/triton/backends/nvidia/include&#x27;, &#x27;-I/tmp/tmpyw1le_3d&#x27;, &#x27;-I/usr/include/python3.11&#x27;]&#x27; returned non-zero exit status 1.
4246
-
4247
- Set TORCHDYNAMO_VERBOSE=1 for the internal stack trace (please do this especially if you&#x27;re reporting a bug to PyTorch). For even more developer context, set TORCH_LOGS=&quot;+dynamo&quot;</div>
4248
  </div>
4249
  </div>
4250
  </div>
 
3
  <head>
4
  <meta charset="UTF-8">
5
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>Flash Attention Benchmark</title>
7
  <script>
8
  // Apply theme and widget visibility immediately to prevent flicker
9
  (function() {
 
1058
  .line-numbers .line-number { line-height: var(--code-line-height) !important; }
1059
 
1060
  /* Custom CSS from frontmatter */
1061
+ #output-setup {
1062
+ overflow-x: auto;
1063
+ }
1064
+ .cell-output {
1065
+ overflow: scroll;
1066
+ }
1067
+ .cell-stdout {
1068
+ width: max-content;
1069
+ overflow: scroll;
1070
+ }
1071
+ .cell-stderr {
1072
+ width: max-content;
1073
+ overflow: scroll;
1074
+ max-height: 300px;
1075
+ }
1076
 
1077
 
1078
 
 
3715
  </div>
3716
 
3717
  <div class="main-content">
3718
+ <div class="cell" id="cell-benchmark">
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3719
  <div class="cell-header">
3720
  <span class="collapse-indicators">
3721
  <span onclick="toggleCode('benchmark')" style="cursor: pointer;">▼ code</span>
3722
  <span onclick="toggleOutput('benchmark')" style="cursor: pointer;">▼ output</span>
3723
  <span id="uv-indicator-benchmark" onclick="toggleUvLogsFromHeader('benchmark')" style="cursor: pointer;">▶ uv-logs</span>
3724
  </span> |
3725
+ Cell: benchmark | 77.34s
3726
  | <button class="run-btn" onclick="runCell('benchmark')">▶ run</button>
3727
  <button class="copy-btn" onclick="copyCell('benchmark')">Copy</button>
3728
  <a href="cells/benchmark.py" target="_blank" class="raw-btn">Raw</a>
3729
  </div>
3730
  <div id="code-benchmark" class="cell-code" data-lines="341">
3731
+ <div class="highlight-with-lines">
3732
+ <div class="line-numbers" id="lines-benchmark">
3733
+ <a class="line-number" data-cell="benchmark" data-line="1" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 1, true);">1</a>
3734
+ <a class="line-number" data-cell="benchmark" data-line="2" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 2, true);">2</a>
3735
+ <a class="line-number" data-cell="benchmark" data-line="3" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 3, true);">3</a>
3736
+ <a class="line-number" data-cell="benchmark" data-line="4" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 4, true);">4</a>
3737
+ <a class="line-number" data-cell="benchmark" data-line="5" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 5, true);">5</a>
3738
+ <a class="line-number" data-cell="benchmark" data-line="6" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 6, true);">6</a>
3739
+ <a class="line-number" data-cell="benchmark" data-line="7" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 7, true);">7</a>
3740
+ <a class="line-number" data-cell="benchmark" data-line="8" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 8, true);">8</a>
3741
+ <a class="line-number" data-cell="benchmark" data-line="9" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 9, true);">9</a>
3742
+ <a class="line-number" data-cell="benchmark" data-line="10" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 10, true);">10</a>
3743
+ <a class="line-number" data-cell="benchmark" data-line="11" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 11, true);">11</a>
3744
+ <a class="line-number" data-cell="benchmark" data-line="12" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 12, true);">12</a>
3745
+ <a class="line-number" data-cell="benchmark" data-line="13" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 13, true);">13</a>
3746
+ <a class="line-number" data-cell="benchmark" data-line="14" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 14, true);">14</a>
3747
+ <a class="line-number" data-cell="benchmark" data-line="15" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 15, true);">15</a>
3748
+ <a class="line-number" data-cell="benchmark" data-line="16" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 16, true);">16</a>
3749
+ <a class="line-number" data-cell="benchmark" data-line="17" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 17, true);">17</a>
3750
+ <a class="line-number" data-cell="benchmark" data-line="18" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 18, true);">18</a>
3751
+ <a class="line-number" data-cell="benchmark" data-line="19" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 19, true);">19</a>
3752
+ <a class="line-number" data-cell="benchmark" data-line="20" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 20, true);">20</a>
3753
+ <a class="line-number" data-cell="benchmark" data-line="21" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 21, true);">21</a>
3754
+ <a class="line-number" data-cell="benchmark" data-line="22" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 22, true);">22</a>
3755
+ <a class="line-number" data-cell="benchmark" data-line="23" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 23, true);">23</a>
3756
+ <a class="line-number" data-cell="benchmark" data-line="24" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 24, true);">24</a>
3757
+ <a class="line-number" data-cell="benchmark" data-line="25" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 25, true);">25</a>
3758
+ <a class="line-number" data-cell="benchmark" data-line="26" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 26, true);">26</a>
3759
+ <a class="line-number" data-cell="benchmark" data-line="27" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 27, true);">27</a>
3760
+ <a class="line-number" data-cell="benchmark" data-line="28" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 28, true);">28</a>
3761
+ <a class="line-number" data-cell="benchmark" data-line="29" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 29, true);">29</a>
3762
+ <a class="line-number" data-cell="benchmark" data-line="30" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 30, true);">30</a>
3763
+ <a class="line-number" data-cell="benchmark" data-line="31" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 31, true);">31</a>
3764
+ <a class="line-number" data-cell="benchmark" data-line="32" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 32, true);">32</a>
3765
+ <a class="line-number" data-cell="benchmark" data-line="33" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 33, true);">33</a>
3766
+ <a class="line-number" data-cell="benchmark" data-line="34" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 34, true);">34</a>
3767
+ <a class="line-number" data-cell="benchmark" data-line="35" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 35, true);">35</a>
3768
+ <a class="line-number" data-cell="benchmark" data-line="36" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 36, true);">36</a>
3769
+ <a class="line-number" data-cell="benchmark" data-line="37" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 37, true);">37</a>
3770
+ <a class="line-number" data-cell="benchmark" data-line="38" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 38, true);">38</a>
3771
+ <a class="line-number" data-cell="benchmark" data-line="39" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 39, true);">39</a>
3772
+ <a class="line-number" data-cell="benchmark" data-line="40" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 40, true);">40</a>
3773
+ <a class="line-number" data-cell="benchmark" data-line="41" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 41, true);">41</a>
3774
+ <a class="line-number" data-cell="benchmark" data-line="42" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 42, true);">42</a>
3775
+ <a class="line-number" data-cell="benchmark" data-line="43" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 43, true);">43</a>
3776
+ <a class="line-number" data-cell="benchmark" data-line="44" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 44, true);">44</a>
3777
+ <a class="line-number" data-cell="benchmark" data-line="45" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 45, true);">45</a>
3778
+ <a class="line-number" data-cell="benchmark" data-line="46" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 46, true);">46</a>
3779
+ <a class="line-number" data-cell="benchmark" data-line="47" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 47, true);">47</a>
3780
+ <a class="line-number" data-cell="benchmark" data-line="48" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 48, true);">48</a>
3781
+ <a class="line-number" data-cell="benchmark" data-line="49" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 49, true);">49</a>
3782
+ <a class="line-number" data-cell="benchmark" data-line="50" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 50, true);">50</a>
3783
+ <a class="line-number" data-cell="benchmark" data-line="51" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 51, true);">51</a>
3784
+ <a class="line-number" data-cell="benchmark" data-line="52" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 52, true);">52</a>
3785
+ <a class="line-number" data-cell="benchmark" data-line="53" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 53, true);">53</a>
3786
+ <a class="line-number" data-cell="benchmark" data-line="54" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 54, true);">54</a>
3787
+ <a class="line-number" data-cell="benchmark" data-line="55" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 55, true);">55</a>
3788
+ <a class="line-number" data-cell="benchmark" data-line="56" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 56, true);">56</a>
3789
+ <a class="line-number" data-cell="benchmark" data-line="57" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 57, true);">57</a>
3790
+ <a class="line-number" data-cell="benchmark" data-line="58" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 58, true);">58</a>
3791
+ <a class="line-number" data-cell="benchmark" data-line="59" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 59, true);">59</a>
3792
+ <a class="line-number" data-cell="benchmark" data-line="60" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 60, true);">60</a>
3793
+ <a class="line-number" data-cell="benchmark" data-line="61" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 61, true);">61</a>
3794
+ <a class="line-number" data-cell="benchmark" data-line="62" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 62, true);">62</a>
3795
+ <a class="line-number" data-cell="benchmark" data-line="63" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 63, true);">63</a>
3796
+ <a class="line-number" data-cell="benchmark" data-line="64" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 64, true);">64</a>
3797
+ <a class="line-number" data-cell="benchmark" data-line="65" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 65, true);">65</a>
3798
+ <a class="line-number" data-cell="benchmark" data-line="66" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 66, true);">66</a>
3799
+ <a class="line-number" data-cell="benchmark" data-line="67" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 67, true);">67</a>
3800
+ <a class="line-number" data-cell="benchmark" data-line="68" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 68, true);">68</a>
3801
+ <a class="line-number" data-cell="benchmark" data-line="69" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 69, true);">69</a>
3802
+ <a class="line-number" data-cell="benchmark" data-line="70" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 70, true);">70</a>
3803
+ <a class="line-number" data-cell="benchmark" data-line="71" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 71, true);">71</a>
3804
+ <a class="line-number" data-cell="benchmark" data-line="72" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 72, true);">72</a>
3805
+ <a class="line-number" data-cell="benchmark" data-line="73" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 73, true);">73</a>
3806
+ <a class="line-number" data-cell="benchmark" data-line="74" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 74, true);">74</a>
3807
+ <a class="line-number" data-cell="benchmark" data-line="75" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 75, true);">75</a>
3808
+ <a class="line-number" data-cell="benchmark" data-line="76" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 76, true);">76</a>
3809
+ <a class="line-number" data-cell="benchmark" data-line="77" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 77, true);">77</a>
3810
+ <a class="line-number" data-cell="benchmark" data-line="78" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 78, true);">78</a>
3811
+ <a class="line-number" data-cell="benchmark" data-line="79" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 79, true);">79</a>
3812
+ <a class="line-number" data-cell="benchmark" data-line="80" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 80, true);">80</a>
3813
+ <a class="line-number" data-cell="benchmark" data-line="81" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 81, true);">81</a>
3814
+ <a class="line-number" data-cell="benchmark" data-line="82" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 82, true);">82</a>
3815
+ <a class="line-number" data-cell="benchmark" data-line="83" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 83, true);">83</a>
3816
+ <a class="line-number" data-cell="benchmark" data-line="84" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 84, true);">84</a>
3817
+ <a class="line-number" data-cell="benchmark" data-line="85" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 85, true);">85</a>
3818
+ <a class="line-number" data-cell="benchmark" data-line="86" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 86, true);">86</a>
3819
+ <a class="line-number" data-cell="benchmark" data-line="87" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 87, true);">87</a>
3820
+ <a class="line-number" data-cell="benchmark" data-line="88" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 88, true);">88</a>
3821
+ <a class="line-number" data-cell="benchmark" data-line="89" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 89, true);">89</a>
3822
+ <a class="line-number" data-cell="benchmark" data-line="90" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 90, true);">90</a>
3823
+ <a class="line-number" data-cell="benchmark" data-line="91" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 91, true);">91</a>
3824
+ <a class="line-number" data-cell="benchmark" data-line="92" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 92, true);">92</a>
3825
+ <a class="line-number" data-cell="benchmark" data-line="93" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 93, true);">93</a>
3826
+ <a class="line-number" data-cell="benchmark" data-line="94" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 94, true);">94</a>
3827
+ <a class="line-number" data-cell="benchmark" data-line="95" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 95, true);">95</a>
3828
+ <a class="line-number" data-cell="benchmark" data-line="96" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 96, true);">96</a>
3829
+ <a class="line-number" data-cell="benchmark" data-line="97" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 97, true);">97</a>
3830
+ <a class="line-number" data-cell="benchmark" data-line="98" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 98, true);">98</a>
3831
+ <a class="line-number" data-cell="benchmark" data-line="99" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 99, true);">99</a>
3832
+ <a class="line-number" data-cell="benchmark" data-line="100" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 100, true);">100</a>
3833
+ <a class="line-number" data-cell="benchmark" data-line="101" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 101, true);">101</a>
3834
+ <a class="line-number" data-cell="benchmark" data-line="102" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 102, true);">102</a>
3835
+ <a class="line-number" data-cell="benchmark" data-line="103" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 103, true);">103</a>
3836
+ <a class="line-number" data-cell="benchmark" data-line="104" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 104, true);">104</a>
3837
+ <a class="line-number" data-cell="benchmark" data-line="105" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 105, true);">105</a>
3838
+ <a class="line-number" data-cell="benchmark" data-line="106" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 106, true);">106</a>
3839
+ <a class="line-number" data-cell="benchmark" data-line="107" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 107, true);">107</a>
3840
+ <a class="line-number" data-cell="benchmark" data-line="108" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 108, true);">108</a>
3841
+ <a class="line-number" data-cell="benchmark" data-line="109" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 109, true);">109</a>
3842
+ <a class="line-number" data-cell="benchmark" data-line="110" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 110, true);">110</a>
3843
+ <a class="line-number" data-cell="benchmark" data-line="111" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 111, true);">111</a>
3844
+ <a class="line-number" data-cell="benchmark" data-line="112" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 112, true);">112</a>
3845
+ <a class="line-number" data-cell="benchmark" data-line="113" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 113, true);">113</a>
3846
+ <a class="line-number" data-cell="benchmark" data-line="114" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 114, true);">114</a>
3847
+ <a class="line-number" data-cell="benchmark" data-line="115" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 115, true);">115</a>
3848
+ <a class="line-number" data-cell="benchmark" data-line="116" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 116, true);">116</a>
3849
+ <a class="line-number" data-cell="benchmark" data-line="117" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 117, true);">117</a>
3850
+ <a class="line-number" data-cell="benchmark" data-line="118" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 118, true);">118</a>
3851
+ <a class="line-number" data-cell="benchmark" data-line="119" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 119, true);">119</a>
3852
+ <a class="line-number" data-cell="benchmark" data-line="120" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 120, true);">120</a>
3853
+ <a class="line-number" data-cell="benchmark" data-line="121" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 121, true);">121</a>
3854
+ <a class="line-number" data-cell="benchmark" data-line="122" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 122, true);">122</a>
3855
+ <a class="line-number" data-cell="benchmark" data-line="123" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 123, true);">123</a>
3856
+ <a class="line-number" data-cell="benchmark" data-line="124" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 124, true);">124</a>
3857
+ <a class="line-number" data-cell="benchmark" data-line="125" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 125, true);">125</a>
3858
+ <a class="line-number" data-cell="benchmark" data-line="126" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 126, true);">126</a>
3859
+ <a class="line-number" data-cell="benchmark" data-line="127" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 127, true);">127</a>
3860
+ <a class="line-number" data-cell="benchmark" data-line="128" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 128, true);">128</a>
3861
+ <a class="line-number" data-cell="benchmark" data-line="129" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 129, true);">129</a>
3862
+ <a class="line-number" data-cell="benchmark" data-line="130" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 130, true);">130</a>
3863
+ <a class="line-number" data-cell="benchmark" data-line="131" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 131, true);">131</a>
3864
+ <a class="line-number" data-cell="benchmark" data-line="132" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 132, true);">132</a>
3865
+ <a class="line-number" data-cell="benchmark" data-line="133" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 133, true);">133</a>
3866
+ <a class="line-number" data-cell="benchmark" data-line="134" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 134, true);">134</a>
3867
+ <a class="line-number" data-cell="benchmark" data-line="135" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 135, true);">135</a>
3868
+ <a class="line-number" data-cell="benchmark" data-line="136" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 136, true);">136</a>
3869
+ <a class="line-number" data-cell="benchmark" data-line="137" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 137, true);">137</a>
3870
+ <a class="line-number" data-cell="benchmark" data-line="138" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 138, true);">138</a>
3871
+ <a class="line-number" data-cell="benchmark" data-line="139" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 139, true);">139</a>
3872
+ <a class="line-number" data-cell="benchmark" data-line="140" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 140, true);">140</a>
3873
+ <a class="line-number" data-cell="benchmark" data-line="141" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 141, true);">141</a>
3874
+ <a class="line-number" data-cell="benchmark" data-line="142" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 142, true);">142</a>
3875
+ <a class="line-number" data-cell="benchmark" data-line="143" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 143, true);">143</a>
3876
+ <a class="line-number" data-cell="benchmark" data-line="144" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 144, true);">144</a>
3877
+ <a class="line-number" data-cell="benchmark" data-line="145" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 145, true);">145</a>
3878
+ <a class="line-number" data-cell="benchmark" data-line="146" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 146, true);">146</a>
3879
+ <a class="line-number" data-cell="benchmark" data-line="147" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 147, true);">147</a>
3880
+ <a class="line-number" data-cell="benchmark" data-line="148" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 148, true);">148</a>
3881
+ <a class="line-number" data-cell="benchmark" data-line="149" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 149, true);">149</a>
3882
+ <a class="line-number" data-cell="benchmark" data-line="150" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 150, true);">150</a>
3883
+ <a class="line-number" data-cell="benchmark" data-line="151" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 151, true);">151</a>
3884
+ <a class="line-number" data-cell="benchmark" data-line="152" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 152, true);">152</a>
3885
+ <a class="line-number" data-cell="benchmark" data-line="153" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 153, true);">153</a>
3886
+ <a class="line-number" data-cell="benchmark" data-line="154" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 154, true);">154</a>
3887
+ <a class="line-number" data-cell="benchmark" data-line="155" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 155, true);">155</a>
3888
+ <a class="line-number" data-cell="benchmark" data-line="156" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 156, true);">156</a>
3889
+ <a class="line-number" data-cell="benchmark" data-line="157" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 157, true);">157</a>
3890
+ <a class="line-number" data-cell="benchmark" data-line="158" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 158, true);">158</a>
3891
+ <a class="line-number" data-cell="benchmark" data-line="159" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 159, true);">159</a>
3892
+ <a class="line-number" data-cell="benchmark" data-line="160" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 160, true);">160</a>
3893
+ <a class="line-number" data-cell="benchmark" data-line="161" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 161, true);">161</a>
3894
+ <a class="line-number" data-cell="benchmark" data-line="162" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 162, true);">162</a>
3895
+ <a class="line-number" data-cell="benchmark" data-line="163" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 163, true);">163</a>
3896
+ <a class="line-number" data-cell="benchmark" data-line="164" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 164, true);">164</a>
3897
+ <a class="line-number" data-cell="benchmark" data-line="165" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 165, true);">165</a>
3898
+ <a class="line-number" data-cell="benchmark" data-line="166" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 166, true);">166</a>
3899
+ <a class="line-number" data-cell="benchmark" data-line="167" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 167, true);">167</a>
3900
+ <a class="line-number" data-cell="benchmark" data-line="168" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 168, true);">168</a>
3901
+ <a class="line-number" data-cell="benchmark" data-line="169" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 169, true);">169</a>
3902
+ <a class="line-number" data-cell="benchmark" data-line="170" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 170, true);">170</a>
3903
+ <a class="line-number" data-cell="benchmark" data-line="171" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 171, true);">171</a>
3904
+ <a class="line-number" data-cell="benchmark" data-line="172" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 172, true);">172</a>
3905
+ <a class="line-number" data-cell="benchmark" data-line="173" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 173, true);">173</a>
3906
+ <a class="line-number" data-cell="benchmark" data-line="174" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 174, true);">174</a>
3907
+ <a class="line-number" data-cell="benchmark" data-line="175" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 175, true);">175</a>
3908
+ <a class="line-number" data-cell="benchmark" data-line="176" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 176, true);">176</a>
3909
+ <a class="line-number" data-cell="benchmark" data-line="177" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 177, true);">177</a>
3910
+ <a class="line-number" data-cell="benchmark" data-line="178" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 178, true);">178</a>
3911
+ <a class="line-number" data-cell="benchmark" data-line="179" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 179, true);">179</a>
3912
+ <a class="line-number" data-cell="benchmark" data-line="180" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 180, true);">180</a>
3913
+ <a class="line-number" data-cell="benchmark" data-line="181" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 181, true);">181</a>
3914
+ <a class="line-number" data-cell="benchmark" data-line="182" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 182, true);">182</a>
3915
+ <a class="line-number" data-cell="benchmark" data-line="183" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 183, true);">183</a>
3916
+ <a class="line-number" data-cell="benchmark" data-line="184" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 184, true);">184</a>
3917
+ <a class="line-number" data-cell="benchmark" data-line="185" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 185, true);">185</a>
3918
+ <a class="line-number" data-cell="benchmark" data-line="186" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 186, true);">186</a>
3919
+ <a class="line-number" data-cell="benchmark" data-line="187" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 187, true);">187</a>
3920
+ <a class="line-number" data-cell="benchmark" data-line="188" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 188, true);">188</a>
3921
+ <a class="line-number" data-cell="benchmark" data-line="189" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 189, true);">189</a>
3922
+ <a class="line-number" data-cell="benchmark" data-line="190" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 190, true);">190</a>
3923
+ <a class="line-number" data-cell="benchmark" data-line="191" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 191, true);">191</a>
3924
+ <a class="line-number" data-cell="benchmark" data-line="192" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 192, true);">192</a>
3925
+ <a class="line-number" data-cell="benchmark" data-line="193" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 193, true);">193</a>
3926
+ <a class="line-number" data-cell="benchmark" data-line="194" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 194, true);">194</a>
3927
+ <a class="line-number" data-cell="benchmark" data-line="195" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 195, true);">195</a>
3928
+ <a class="line-number" data-cell="benchmark" data-line="196" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 196, true);">196</a>
3929
+ <a class="line-number" data-cell="benchmark" data-line="197" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 197, true);">197</a>
3930
+ <a class="line-number" data-cell="benchmark" data-line="198" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 198, true);">198</a>
3931
+ <a class="line-number" data-cell="benchmark" data-line="199" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 199, true);">199</a>
3932
+ <a class="line-number" data-cell="benchmark" data-line="200" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 200, true);">200</a>
3933
+ <a class="line-number" data-cell="benchmark" data-line="201" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 201, true);">201</a>
3934
+ <a class="line-number" data-cell="benchmark" data-line="202" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 202, true);">202</a>
3935
+ <a class="line-number" data-cell="benchmark" data-line="203" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 203, true);">203</a>
3936
+ <a class="line-number" data-cell="benchmark" data-line="204" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 204, true);">204</a>
3937
+ <a class="line-number" data-cell="benchmark" data-line="205" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 205, true);">205</a>
3938
+ <a class="line-number" data-cell="benchmark" data-line="206" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 206, true);">206</a>
3939
+ <a class="line-number" data-cell="benchmark" data-line="207" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 207, true);">207</a>
3940
+ <a class="line-number" data-cell="benchmark" data-line="208" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 208, true);">208</a>
3941
+ <a class="line-number" data-cell="benchmark" data-line="209" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 209, true);">209</a>
3942
+ <a class="line-number" data-cell="benchmark" data-line="210" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 210, true);">210</a>
3943
+ <a class="line-number" data-cell="benchmark" data-line="211" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 211, true);">211</a>
3944
+ <a class="line-number" data-cell="benchmark" data-line="212" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 212, true);">212</a>
3945
+ <a class="line-number" data-cell="benchmark" data-line="213" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 213, true);">213</a>
3946
+ <a class="line-number" data-cell="benchmark" data-line="214" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 214, true);">214</a>
3947
+ <a class="line-number" data-cell="benchmark" data-line="215" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 215, true);">215</a>
3948
+ <a class="line-number" data-cell="benchmark" data-line="216" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 216, true);">216</a>
3949
+ <a class="line-number" data-cell="benchmark" data-line="217" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 217, true);">217</a>
3950
+ <a class="line-number" data-cell="benchmark" data-line="218" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 218, true);">218</a>
3951
+ <a class="line-number" data-cell="benchmark" data-line="219" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 219, true);">219</a>
3952
+ <a class="line-number" data-cell="benchmark" data-line="220" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 220, true);">220</a>
3953
+ <a class="line-number" data-cell="benchmark" data-line="221" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 221, true);">221</a>
3954
+ <a class="line-number" data-cell="benchmark" data-line="222" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 222, true);">222</a>
3955
+ <a class="line-number" data-cell="benchmark" data-line="223" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 223, true);">223</a>
3956
+ <a class="line-number" data-cell="benchmark" data-line="224" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 224, true);">224</a>
3957
+ <a class="line-number" data-cell="benchmark" data-line="225" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 225, true);">225</a>
3958
+ <a class="line-number" data-cell="benchmark" data-line="226" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 226, true);">226</a>
3959
+ <a class="line-number" data-cell="benchmark" data-line="227" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 227, true);">227</a>
3960
+ <a class="line-number" data-cell="benchmark" data-line="228" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 228, true);">228</a>
3961
+ <a class="line-number" data-cell="benchmark" data-line="229" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 229, true);">229</a>
3962
+ <a class="line-number" data-cell="benchmark" data-line="230" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 230, true);">230</a>
3963
+ <a class="line-number" data-cell="benchmark" data-line="231" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 231, true);">231</a>
3964
+ <a class="line-number" data-cell="benchmark" data-line="232" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 232, true);">232</a>
3965
+ <a class="line-number" data-cell="benchmark" data-line="233" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 233, true);">233</a>
3966
+ <a class="line-number" data-cell="benchmark" data-line="234" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 234, true);">234</a>
3967
+ <a class="line-number" data-cell="benchmark" data-line="235" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 235, true);">235</a>
3968
+ <a class="line-number" data-cell="benchmark" data-line="236" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 236, true);">236</a>
3969
+ <a class="line-number" data-cell="benchmark" data-line="237" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 237, true);">237</a>
3970
+ <a class="line-number" data-cell="benchmark" data-line="238" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 238, true);">238</a>
3971
+ <a class="line-number" data-cell="benchmark" data-line="239" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 239, true);">239</a>
3972
+ <a class="line-number" data-cell="benchmark" data-line="240" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 240, true);">240</a>
3973
+ <a class="line-number" data-cell="benchmark" data-line="241" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 241, true);">241</a>
3974
+ <a class="line-number" data-cell="benchmark" data-line="242" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 242, true);">242</a>
3975
+ <a class="line-number" data-cell="benchmark" data-line="243" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 243, true);">243</a>
3976
+ <a class="line-number" data-cell="benchmark" data-line="244" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 244, true);">244</a>
3977
+ <a class="line-number" data-cell="benchmark" data-line="245" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 245, true);">245</a>
3978
+ <a class="line-number" data-cell="benchmark" data-line="246" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 246, true);">246</a>
3979
+ <a class="line-number" data-cell="benchmark" data-line="247" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 247, true);">247</a>
3980
+ <a class="line-number" data-cell="benchmark" data-line="248" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 248, true);">248</a>
3981
+ <a class="line-number" data-cell="benchmark" data-line="249" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 249, true);">249</a>
3982
+ <a class="line-number" data-cell="benchmark" data-line="250" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 250, true);">250</a>
3983
+ <a class="line-number" data-cell="benchmark" data-line="251" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 251, true);">251</a>
3984
+ <a class="line-number" data-cell="benchmark" data-line="252" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 252, true);">252</a>
3985
+ <a class="line-number" data-cell="benchmark" data-line="253" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 253, true);">253</a>
3986
+ <a class="line-number" data-cell="benchmark" data-line="254" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 254, true);">254</a>
3987
+ <a class="line-number" data-cell="benchmark" data-line="255" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 255, true);">255</a>
3988
+ <a class="line-number" data-cell="benchmark" data-line="256" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 256, true);">256</a>
3989
+ <a class="line-number" data-cell="benchmark" data-line="257" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 257, true);">257</a>
3990
+ <a class="line-number" data-cell="benchmark" data-line="258" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 258, true);">258</a>
3991
+ <a class="line-number" data-cell="benchmark" data-line="259" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 259, true);">259</a>
3992
+ <a class="line-number" data-cell="benchmark" data-line="260" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 260, true);">260</a>
3993
+ <a class="line-number" data-cell="benchmark" data-line="261" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 261, true);">261</a>
3994
+ <a class="line-number" data-cell="benchmark" data-line="262" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 262, true);">262</a>
3995
+ <a class="line-number" data-cell="benchmark" data-line="263" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 263, true);">263</a>
3996
+ <a class="line-number" data-cell="benchmark" data-line="264" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 264, true);">264</a>
3997
+ <a class="line-number" data-cell="benchmark" data-line="265" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 265, true);">265</a>
3998
+ <a class="line-number" data-cell="benchmark" data-line="266" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 266, true);">266</a>
3999
+ <a class="line-number" data-cell="benchmark" data-line="267" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 267, true);">267</a>
4000
+ <a class="line-number" data-cell="benchmark" data-line="268" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 268, true);">268</a>
4001
+ <a class="line-number" data-cell="benchmark" data-line="269" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 269, true);">269</a>
4002
+ <a class="line-number" data-cell="benchmark" data-line="270" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 270, true);">270</a>
4003
+ <a class="line-number" data-cell="benchmark" data-line="271" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 271, true);">271</a>
4004
+ <a class="line-number" data-cell="benchmark" data-line="272" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 272, true);">272</a>
4005
+ <a class="line-number" data-cell="benchmark" data-line="273" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 273, true);">273</a>
4006
+ <a class="line-number" data-cell="benchmark" data-line="274" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 274, true);">274</a>
4007
+ <a class="line-number" data-cell="benchmark" data-line="275" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 275, true);">275</a>
4008
+ <a class="line-number" data-cell="benchmark" data-line="276" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 276, true);">276</a>
4009
+ <a class="line-number" data-cell="benchmark" data-line="277" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 277, true);">277</a>
4010
+ <a class="line-number" data-cell="benchmark" data-line="278" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 278, true);">278</a>
4011
+ <a class="line-number" data-cell="benchmark" data-line="279" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 279, true);">279</a>
4012
+ <a class="line-number" data-cell="benchmark" data-line="280" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 280, true);">280</a>
4013
+ <a class="line-number" data-cell="benchmark" data-line="281" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 281, true);">281</a>
4014
+ <a class="line-number" data-cell="benchmark" data-line="282" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 282, true);">282</a>
4015
+ <a class="line-number" data-cell="benchmark" data-line="283" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 283, true);">283</a>
4016
+ <a class="line-number" data-cell="benchmark" data-line="284" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 284, true);">284</a>
4017
+ <a class="line-number" data-cell="benchmark" data-line="285" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 285, true);">285</a>
4018
+ <a class="line-number" data-cell="benchmark" data-line="286" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 286, true);">286</a>
4019
+ <a class="line-number" data-cell="benchmark" data-line="287" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 287, true);">287</a>
4020
+ <a class="line-number" data-cell="benchmark" data-line="288" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 288, true);">288</a>
4021
+ <a class="line-number" data-cell="benchmark" data-line="289" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 289, true);">289</a>
4022
+ <a class="line-number" data-cell="benchmark" data-line="290" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 290, true);">290</a>
4023
+ <a class="line-number" data-cell="benchmark" data-line="291" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 291, true);">291</a>
4024
+ <a class="line-number" data-cell="benchmark" data-line="292" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 292, true);">292</a>
4025
+ <a class="line-number" data-cell="benchmark" data-line="293" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 293, true);">293</a>
4026
+ <a class="line-number" data-cell="benchmark" data-line="294" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 294, true);">294</a>
4027
+ <a class="line-number" data-cell="benchmark" data-line="295" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 295, true);">295</a>
4028
+ <a class="line-number" data-cell="benchmark" data-line="296" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 296, true);">296</a>
4029
+ <a class="line-number" data-cell="benchmark" data-line="297" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 297, true);">297</a>
4030
+ <a class="line-number" data-cell="benchmark" data-line="298" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 298, true);">298</a>
4031
+ <a class="line-number" data-cell="benchmark" data-line="299" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 299, true);">299</a>
4032
+ <a class="line-number" data-cell="benchmark" data-line="300" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 300, true);">300</a>
4033
+ <a class="line-number" data-cell="benchmark" data-line="301" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 301, true);">301</a>
4034
+ <a class="line-number" data-cell="benchmark" data-line="302" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 302, true);">302</a>
4035
+ <a class="line-number" data-cell="benchmark" data-line="303" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 303, true);">303</a>
4036
+ <a class="line-number" data-cell="benchmark" data-line="304" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 304, true);">304</a>
4037
+ <a class="line-number" data-cell="benchmark" data-line="305" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 305, true);">305</a>
4038
+ <a class="line-number" data-cell="benchmark" data-line="306" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 306, true);">306</a>
4039
+ <a class="line-number" data-cell="benchmark" data-line="307" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 307, true);">307</a>
4040
+ <a class="line-number" data-cell="benchmark" data-line="308" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 308, true);">308</a>
4041
+ <a class="line-number" data-cell="benchmark" data-line="309" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 309, true);">309</a>
4042
+ <a class="line-number" data-cell="benchmark" data-line="310" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 310, true);">310</a>
4043
+ <a class="line-number" data-cell="benchmark" data-line="311" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 311, true);">311</a>
4044
+ <a class="line-number" data-cell="benchmark" data-line="312" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 312, true);">312</a>
4045
+ <a class="line-number" data-cell="benchmark" data-line="313" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 313, true);">313</a>
4046
+ <a class="line-number" data-cell="benchmark" data-line="314" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 314, true);">314</a>
4047
+ <a class="line-number" data-cell="benchmark" data-line="315" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 315, true);">315</a>
4048
+ <a class="line-number" data-cell="benchmark" data-line="316" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 316, true);">316</a>
4049
+ <a class="line-number" data-cell="benchmark" data-line="317" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 317, true);">317</a>
4050
+ <a class="line-number" data-cell="benchmark" data-line="318" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 318, true);">318</a>
4051
+ <a class="line-number" data-cell="benchmark" data-line="319" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 319, true);">319</a>
4052
+ <a class="line-number" data-cell="benchmark" data-line="320" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 320, true);">320</a>
4053
+ <a class="line-number" data-cell="benchmark" data-line="321" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 321, true);">321</a>
4054
+ <a class="line-number" data-cell="benchmark" data-line="322" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 322, true);">322</a>
4055
+ <a class="line-number" data-cell="benchmark" data-line="323" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 323, true);">323</a>
4056
+ <a class="line-number" data-cell="benchmark" data-line="324" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 324, true);">324</a>
4057
+ <a class="line-number" data-cell="benchmark" data-line="325" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 325, true);">325</a>
4058
+ <a class="line-number" data-cell="benchmark" data-line="326" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 326, true);">326</a>
4059
+ <a class="line-number" data-cell="benchmark" data-line="327" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 327, true);">327</a>
4060
+ <a class="line-number" data-cell="benchmark" data-line="328" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 328, true);">328</a>
4061
+ <a class="line-number" data-cell="benchmark" data-line="329" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 329, true);">329</a>
4062
+ <a class="line-number" data-cell="benchmark" data-line="330" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 330, true);">330</a>
4063
+ <a class="line-number" data-cell="benchmark" data-line="331" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 331, true);">331</a>
4064
+ <a class="line-number" data-cell="benchmark" data-line="332" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 332, true);">332</a>
4065
+ <a class="line-number" data-cell="benchmark" data-line="333" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 333, true);">333</a>
4066
+ <a class="line-number" data-cell="benchmark" data-line="334" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 334, true);">334</a>
4067
+ <a class="line-number" data-cell="benchmark" data-line="335" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 335, true);">335</a>
4068
+ <a class="line-number" data-cell="benchmark" data-line="336" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 336, true);">336</a>
4069
+ <a class="line-number" data-cell="benchmark" data-line="337" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 337, true);">337</a>
4070
+ <a class="line-number" data-cell="benchmark" data-line="338" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 338, true);">338</a>
4071
+ <a class="line-number" data-cell="benchmark" data-line="339" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 339, true);">339</a>
4072
+ <a class="line-number" data-cell="benchmark" data-line="340" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 340, true);">340</a>
4073
+ <a class="line-number" data-cell="benchmark" data-line="341" href="#cell-benchmark" onclick="event.preventDefault(); selectCellLine('benchmark', 341, true);">341</a>
4074
+ </div>
4075
  <div class="code-wrap">
4076
  <div class="highlight"><pre><span></span><span class="c1"># /// script</span>
4077
  <span class="c1"># dependencies = [</span>
 
4419
  <div class="code-line-highlight" id="line-highlight-benchmark"></div>
4420
  </div>
4421
  </div>
4422
+ </div>
4423
  <div id="output-benchmark" class="cell-output">
4424
  <div class="cell-stdout">Flash Attention 2 not found.
4425
  Flash Attention 3 not found.
 
4429
 
4430
 
4431
  ===== Testing shape: (1, 4224, 24, 128) =====
4432
+ torch_cudnn : absmax=0.001009, mae=0.000075, mse=0.000000
4433
+ torch_cudnn_compile_d : absmax=0.001009, mae=0.000075, mse=0.000000
4434
+ torch_cudnn_compile_ma : absmax=0.001009, mae=0.000075, mse=0.000000
4435
+ torch_flash : absmax=0.001009, mae=0.000075, mse=0.000000
4436
+ torch_flash_compile_d : absmax=0.001009, mae=0.000075, mse=0.000000
4437
+ torch_flash_compile_ma : absmax=0.001009, mae=0.000075, mse=0.000000
4438
+ hf_flash_attn : absmax=0.001009, mae=0.000075, mse=0.000000
4439
+ hf_flash_attn3 : absmax=0.001009, mae=0.000075, mse=0.000000
4440
+
4441
+
4442
+ ===== Testing shape: (1, 4352, 24, 128) =====
4443
+ torch_cudnn : absmax=0.001015, mae=0.000073, mse=0.000000
4444
+ torch_cudnn_compile_d : absmax=0.001015, mae=0.000073, mse=0.000000
4445
+ torch_cudnn_compile_ma : absmax=0.001015, mae=0.000073, mse=0.000000
4446
+ torch_flash : absmax=0.001015, mae=0.000073, mse=0.000000
4447
+ torch_flash_compile_d : absmax=0.001015, mae=0.000073, mse=0.000000
4448
+ torch_flash_compile_ma : absmax=0.001015, mae=0.000073, mse=0.000000
4449
+ hf_flash_attn : absmax=0.001015, mae=0.000073, mse=0.000000
4450
+ hf_flash_attn3 : absmax=0.001015, mae=0.000073, mse=0.000000
4451
+
4452
+
4453
+ ===== Testing shape: (1, 4416, 24, 128) =====
4454
+ torch_cudnn : absmax=0.001374, mae=0.000073, mse=0.000000
4455
+ torch_cudnn_compile_d : absmax=0.001374, mae=0.000073, mse=0.000000
4456
+ torch_cudnn_compile_ma : absmax=0.001374, mae=0.000073, mse=0.000000
4457
+ torch_flash : absmax=0.001374, mae=0.000073, mse=0.000000
4458
+ torch_flash_compile_d : absmax=0.001374, mae=0.000073, mse=0.000000
4459
+ torch_flash_compile_ma : absmax=0.001374, mae=0.000073, mse=0.000000
4460
+ hf_flash_attn : absmax=0.001374, mae=0.000073, mse=0.000000
4461
+ hf_flash_attn3 : absmax=0.001374, mae=0.000073, mse=0.000000
4462
+
4463
+
4464
+ ===== Testing shape: (1, 4480, 24, 128) =====
4465
+ torch_cudnn : absmax=0.001190, mae=0.000072, mse=0.000000
4466
+ torch_cudnn_compile_d : absmax=0.001190, mae=0.000072, mse=0.000000
4467
+ torch_cudnn_compile_ma : absmax=0.001190, mae=0.000072, mse=0.000000
4468
+ torch_flash : absmax=0.001190, mae=0.000072, mse=0.000000
4469
+ torch_flash_compile_d : absmax=0.001190, mae=0.000072, mse=0.000000
4470
+ torch_flash_compile_ma : absmax=0.001190, mae=0.000072, mse=0.000000
4471
+ hf_flash_attn : absmax=0.001190, mae=0.000072, mse=0.000000
4472
+ hf_flash_attn3 : absmax=0.001190, mae=0.000072, mse=0.000000
4473
+
4474
+
4475
+ ===== Testing shape: (1, 4544, 24, 128) =====
4476
+ torch_cudnn : absmax=0.001189, mae=0.000072, mse=0.000000
4477
+ torch_cudnn_compile_d : absmax=0.001189, mae=0.000072, mse=0.000000
4478
+ torch_cudnn_compile_ma : absmax=0.001189, mae=0.000072, mse=0.000000
4479
+ torch_flash : absmax=0.001189, mae=0.000072, mse=0.000000
4480
+ torch_flash_compile_d : absmax=0.001189, mae=0.000072, mse=0.000000
4481
+ torch_flash_compile_ma : absmax=0.001189, mae=0.000072, mse=0.000000
4482
+ hf_flash_attn : absmax=0.001189, mae=0.000072, mse=0.000000
4483
+ hf_flash_attn3 : absmax=0.001189, mae=0.000072, mse=0.000000
4484
+
4485
+
4486
+ ===== Testing shape: (1, 4608, 24, 128) =====
4487
+ torch_cudnn : absmax=0.000851, mae=0.000072, mse=0.000000
4488
+ torch_cudnn_compile_d : absmax=0.000851, mae=0.000072, mse=0.000000
4489
+ torch_cudnn_compile_ma : absmax=0.000851, mae=0.000072, mse=0.000000
4490
+ torch_flash : absmax=0.001095, mae=0.000072, mse=0.000000
4491
+ torch_flash_compile_d : absmax=0.001095, mae=0.000072, mse=0.000000
4492
+ torch_flash_compile_ma : absmax=0.001095, mae=0.000072, mse=0.000000
4493
+ hf_flash_attn : absmax=0.001095, mae=0.000072, mse=0.000000
4494
+ hf_flash_attn3 : absmax=0.000872, mae=0.000072, mse=0.000000
4495
+ Attention Benchmark:
4496
+ seq_len torch_cudnn torch_cudnn_compile_d torch_cudnn_compile_ma torch_flash torch_flash_compile_d torch_flash_compile_ma hf_flash_attn hf_flash_attn3
4497
+ 0 4224.0 3.798368 3.791408 4.181328 3.967520 3.956704 4.310320 3.395904 3.330144
4498
+ 1 4352.0 4.079536 4.073056 4.418592 4.399408 4.394336 4.733312 3.836944 3.757936
4499
+ 2 4416.0 4.143008 4.138512 4.483328 4.452528 4.444224 4.789856 3.894816 3.862496
4500
+ 3 4480.0 4.205120 4.199184 4.552352 4.529248 4.523552 4.870752 3.953808 3.870560
4501
+ 4 4544.0 4.437120 4.428784 4.782656 4.584704 4.576832 4.933216 4.008992 3.975952
4502
+ 5 4608.0 4.500704 4.493792 4.869824 4.658752 4.654752 5.028256 4.066272 3.985760
4503
  </div>
4504
  <div class="uv-install-logs" id="uv-logs-benchmark">
4505
  <div class="uv-logs-header" onclick="toggleUvLogs(this)">▶ UV Install Logs</div>
4506
  <div class="uv-logs-content" style="display: none;">
 
 
 
 
 
 
 
 
 
 
4507
  Downloading kiwisolver (1.4MiB)
4508
+ Downloading nvidia-cuda-nvrtc-cu12 (84.0MiB)
 
 
4509
  Downloading nvidia-cublas-cu12 (566.8MiB)
4510
  Downloading pandas (11.8MiB)
4511
+ Downloading hf-xet (3.0MiB)
4512
+ Downloading nvidia-curand-cu12 (60.7MiB)
4513
+ Downloading fonttools (4.7MiB)
4514
+ Downloading networkx (1.9MiB)
4515
+ Downloading numpy (16.2MiB)
4516
+ Downloading torch (846.9MiB)
4517
+ Downloading nvidia-cusparse-cu12 (274.9MiB)
4518
  Downloading nvidia-cudnn-cu12 (674.0MiB)
4519
+ Downloading nvidia-nccl-cu12 (307.4MiB)
4520
+ Downloading setuptools (1.1MiB)
4521
+ Downloading nvidia-cufft-cu12 (184.2MiB)
4522
  Downloading nvidia-cuda-cupti-cu12 (9.8MiB)
4523
+ Downloading sympy (6.0MiB)
4524
+ Downloading nvidia-cufile-cu12 (1.1MiB)
4525
  Downloading nvidia-nvjitlink-cu12 (37.4MiB)
4526
+ Downloading nvidia-cusparselt-cu12 (273.9MiB)
4527
+ Downloading nvidia-cusolver-cu12 (255.1MiB)
4528
+ Downloading pillow (6.3MiB)
4529
+ Downloading matplotlib (8.3MiB)
4530
  Downloading triton (148.3MiB)
 
 
 
4531
  Downloading nvidia-cufile-cu12
4532
  Downloading kiwisolver
4533
  Downloading hf-xet
 
4535
  Downloading networkx
4536
  Downloading fonttools
4537
  Downloading pillow
 
4538
  Downloading matplotlib
4539
+ Downloading nvidia-cuda-cupti-cu12
4540
  Downloading sympy
4541
  Downloading numpy
4542
  Downloading nvidia-nvjitlink-cu12
 
4552
  Downloading nvidia-cublas-cu12
4553
  Downloading nvidia-cudnn-cu12
4554
  Downloading torch
4555
+ Installed 49 packages in 563ms
4556
  </div>
4557
  </div>
4558
  <div class="cell-stderr">Fetching 20 files: 0%| | 0/20 [00:00&lt;?, ?it/s]
4559
+ Fetching 20 files: 5%|▌ | 1/20 [00:00&lt;00:04, 4.34it/s]
4560
+ Fetching 20 files: 10%|█ | 2/20 [00:02&lt;00:25, 1.42s/it]
4561
+ Fetching 20 files: 100%|██████████| 20/20 [00:02&lt;00:00, 8.05it/s]
4562
 
4563
  Fetching 4 files: 0%| | 0/4 [00:00&lt;?, ?it/s]
4564
+ Fetching 4 files: 25%|██▌ | 1/4 [00:00&lt;00:00, 5.49it/s]
4565
+ Fetching 4 files: 50%|█████ | 2/4 [00:01&lt;00:01, 1.15it/s]
4566
+ Fetching 4 files: 100%|██████████| 4/4 [00:01&lt;00:00, 2.60it/s]</div>
4567
+ <div class="cell-artifacts">
4568
+ <h4>Artifacts:</h4>
4569
+ <a href="artifacts/benchmark/dump_attention_benchmark/Attention Benchmark.png" class="artifact" target="_blank">dump_attention_benchmark/Attention Benchmark.png</a>
4570
+ <a href="artifacts/benchmark/dump_attention_benchmark/Attention Benchmark.csv" class="artifact" target="_blank">dump_attention_benchmark/Attention Benchmark.csv</a>
4571
+ <a href="artifacts/benchmark/dump_attention_benchmark/results.html" class="artifact" target="_blank">dump_attention_benchmark/results.html</a>
4572
+ <div class="artifact-preview">
4573
+ <img src="artifacts/benchmark/dump_attention_benchmark/Attention Benchmark.png" alt="dump_attention_benchmark/Attention Benchmark.png">
4574
+ </div>
4575
+ </div>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4576
  </div>
4577
  </div>
4578
  </div>
moe_benchmarks/megablocks/megablocks_only.html CHANGED
@@ -3724,219 +3724,122 @@ span.linenos.special { color: #000000; background-color: #ffffc0; padding-left:
3724
  <p>Next we can run with Megablocks kernels enabled.</p>
3725
  <h3>Forward</h3>
3726
  <p>First, we run a forward pass with Megablocks kernels.</p>
3727
- <h2>Forward and Backward</h2>
3728
- <p>Next, we run a forward and backward pass with Megablocks kernels enabled. This should be more memory efficient and allow us to complete the backward pass without running out of memory.</p>
3729
- <div class="cell cell-failed" id="cell-forward_and_backward">
3730
  <div class="cell-header">
3731
  <span class="collapse-indicators">
3732
- <span onclick="toggleCode('forward_and_backward')" style="cursor: pointer;">▼ code</span>
3733
- <span onclick="toggleOutput('forward_and_backward')" style="cursor: pointer;">▼ output</span>
3734
- <span id="uv-indicator-forward_and_backward" style="cursor: default; opacity: 0.3;">▶ uv-logs</span>
3735
  </span> |
3736
- Cell: forward_and_backward | 19.43s | FAILED
3737
- | <button class="run-btn" onclick="runCell('forward_and_backward')">▶ run</button>
3738
- <button class="copy-btn" onclick="copyCell('forward_and_backward')">Copy</button>
3739
- <a href="cells/forward_and_backward.py" target="_blank" class="raw-btn">Raw</a>
3740
  </div>
3741
- <div id="code-forward_and_backward" class="cell-code" data-lines="196">
3742
  <div class="highlight-with-lines">
3743
- <div class="line-numbers" id="lines-forward_and_backward">
3744
- <a class="line-number" data-cell="forward_and_backward" data-line="1" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 1, true);">1</a>
3745
- <a class="line-number" data-cell="forward_and_backward" data-line="2" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 2, true);">2</a>
3746
- <a class="line-number" data-cell="forward_and_backward" data-line="3" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 3, true);">3</a>
3747
- <a class="line-number" data-cell="forward_and_backward" data-line="4" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 4, true);">4</a>
3748
- <a class="line-number" data-cell="forward_and_backward" data-line="5" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 5, true);">5</a>
3749
- <a class="line-number" data-cell="forward_and_backward" data-line="6" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 6, true);">6</a>
3750
- <a class="line-number" data-cell="forward_and_backward" data-line="7" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 7, true);">7</a>
3751
- <a class="line-number" data-cell="forward_and_backward" data-line="8" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 8, true);">8</a>
3752
- <a class="line-number" data-cell="forward_and_backward" data-line="9" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 9, true);">9</a>
3753
- <a class="line-number" data-cell="forward_and_backward" data-line="10" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 10, true);">10</a>
3754
- <a class="line-number" data-cell="forward_and_backward" data-line="11" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 11, true);">11</a>
3755
- <a class="line-number" data-cell="forward_and_backward" data-line="12" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 12, true);">12</a>
3756
- <a class="line-number" data-cell="forward_and_backward" data-line="13" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 13, true);">13</a>
3757
- <a class="line-number" data-cell="forward_and_backward" data-line="14" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 14, true);">14</a>
3758
- <a class="line-number" data-cell="forward_and_backward" data-line="15" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 15, true);">15</a>
3759
- <a class="line-number" data-cell="forward_and_backward" data-line="16" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 16, true);">16</a>
3760
- <a class="line-number" data-cell="forward_and_backward" data-line="17" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 17, true);">17</a>
3761
- <a class="line-number" data-cell="forward_and_backward" data-line="18" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 18, true);">18</a>
3762
- <a class="line-number" data-cell="forward_and_backward" data-line="19" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 19, true);">19</a>
3763
- <a class="line-number" data-cell="forward_and_backward" data-line="20" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 20, true);">20</a>
3764
- <a class="line-number" data-cell="forward_and_backward" data-line="21" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 21, true);">21</a>
3765
- <a class="line-number" data-cell="forward_and_backward" data-line="22" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 22, true);">22</a>
3766
- <a class="line-number" data-cell="forward_and_backward" data-line="23" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 23, true);">23</a>
3767
- <a class="line-number" data-cell="forward_and_backward" data-line="24" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 24, true);">24</a>
3768
- <a class="line-number" data-cell="forward_and_backward" data-line="25" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 25, true);">25</a>
3769
- <a class="line-number" data-cell="forward_and_backward" data-line="26" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 26, true);">26</a>
3770
- <a class="line-number" data-cell="forward_and_backward" data-line="27" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 27, true);">27</a>
3771
- <a class="line-number" data-cell="forward_and_backward" data-line="28" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 28, true);">28</a>
3772
- <a class="line-number" data-cell="forward_and_backward" data-line="29" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 29, true);">29</a>
3773
- <a class="line-number" data-cell="forward_and_backward" data-line="30" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 30, true);">30</a>
3774
- <a class="line-number" data-cell="forward_and_backward" data-line="31" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 31, true);">31</a>
3775
- <a class="line-number" data-cell="forward_and_backward" data-line="32" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 32, true);">32</a>
3776
- <a class="line-number" data-cell="forward_and_backward" data-line="33" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 33, true);">33</a>
3777
- <a class="line-number" data-cell="forward_and_backward" data-line="34" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 34, true);">34</a>
3778
- <a class="line-number" data-cell="forward_and_backward" data-line="35" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 35, true);">35</a>
3779
- <a class="line-number" data-cell="forward_and_backward" data-line="36" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 36, true);">36</a>
3780
- <a class="line-number" data-cell="forward_and_backward" data-line="37" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 37, true);">37</a>
3781
- <a class="line-number" data-cell="forward_and_backward" data-line="38" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 38, true);">38</a>
3782
- <a class="line-number" data-cell="forward_and_backward" data-line="39" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 39, true);">39</a>
3783
- <a class="line-number" data-cell="forward_and_backward" data-line="40" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 40, true);">40</a>
3784
- <a class="line-number" data-cell="forward_and_backward" data-line="41" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 41, true);">41</a>
3785
- <a class="line-number" data-cell="forward_and_backward" data-line="42" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 42, true);">42</a>
3786
- <a class="line-number" data-cell="forward_and_backward" data-line="43" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 43, true);">43</a>
3787
- <a class="line-number" data-cell="forward_and_backward" data-line="44" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 44, true);">44</a>
3788
- <a class="line-number" data-cell="forward_and_backward" data-line="45" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 45, true);">45</a>
3789
- <a class="line-number" data-cell="forward_and_backward" data-line="46" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 46, true);">46</a>
3790
- <a class="line-number" data-cell="forward_and_backward" data-line="47" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 47, true);">47</a>
3791
- <a class="line-number" data-cell="forward_and_backward" data-line="48" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 48, true);">48</a>
3792
- <a class="line-number" data-cell="forward_and_backward" data-line="49" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 49, true);">49</a>
3793
- <a class="line-number" data-cell="forward_and_backward" data-line="50" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 50, true);">50</a>
3794
- <a class="line-number" data-cell="forward_and_backward" data-line="51" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 51, true);">51</a>
3795
- <a class="line-number" data-cell="forward_and_backward" data-line="52" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 52, true);">52</a>
3796
- <a class="line-number" data-cell="forward_and_backward" data-line="53" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 53, true);">53</a>
3797
- <a class="line-number" data-cell="forward_and_backward" data-line="54" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 54, true);">54</a>
3798
- <a class="line-number" data-cell="forward_and_backward" data-line="55" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 55, true);">55</a>
3799
- <a class="line-number" data-cell="forward_and_backward" data-line="56" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 56, true);">56</a>
3800
- <a class="line-number" data-cell="forward_and_backward" data-line="57" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 57, true);">57</a>
3801
- <a class="line-number" data-cell="forward_and_backward" data-line="58" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 58, true);">58</a>
3802
- <a class="line-number" data-cell="forward_and_backward" data-line="59" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 59, true);">59</a>
3803
- <a class="line-number" data-cell="forward_and_backward" data-line="60" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 60, true);">60</a>
3804
- <a class="line-number" data-cell="forward_and_backward" data-line="61" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 61, true);">61</a>
3805
- <a class="line-number" data-cell="forward_and_backward" data-line="62" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 62, true);">62</a>
3806
- <a class="line-number" data-cell="forward_and_backward" data-line="63" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 63, true);">63</a>
3807
- <a class="line-number" data-cell="forward_and_backward" data-line="64" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 64, true);">64</a>
3808
- <a class="line-number" data-cell="forward_and_backward" data-line="65" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 65, true);">65</a>
3809
- <a class="line-number" data-cell="forward_and_backward" data-line="66" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 66, true);">66</a>
3810
- <a class="line-number" data-cell="forward_and_backward" data-line="67" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 67, true);">67</a>
3811
- <a class="line-number" data-cell="forward_and_backward" data-line="68" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 68, true);">68</a>
3812
- <a class="line-number" data-cell="forward_and_backward" data-line="69" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 69, true);">69</a>
3813
- <a class="line-number" data-cell="forward_and_backward" data-line="70" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 70, true);">70</a>
3814
- <a class="line-number" data-cell="forward_and_backward" data-line="71" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 71, true);">71</a>
3815
- <a class="line-number" data-cell="forward_and_backward" data-line="72" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 72, true);">72</a>
3816
- <a class="line-number" data-cell="forward_and_backward" data-line="73" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 73, true);">73</a>
3817
- <a class="line-number" data-cell="forward_and_backward" data-line="74" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 74, true);">74</a>
3818
- <a class="line-number" data-cell="forward_and_backward" data-line="75" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 75, true);">75</a>
3819
- <a class="line-number" data-cell="forward_and_backward" data-line="76" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 76, true);">76</a>
3820
- <a class="line-number" data-cell="forward_and_backward" data-line="77" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 77, true);">77</a>
3821
- <a class="line-number" data-cell="forward_and_backward" data-line="78" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 78, true);">78</a>
3822
- <a class="line-number" data-cell="forward_and_backward" data-line="79" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 79, true);">79</a>
3823
- <a class="line-number" data-cell="forward_and_backward" data-line="80" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 80, true);">80</a>
3824
- <a class="line-number" data-cell="forward_and_backward" data-line="81" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 81, true);">81</a>
3825
- <a class="line-number" data-cell="forward_and_backward" data-line="82" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 82, true);">82</a>
3826
- <a class="line-number" data-cell="forward_and_backward" data-line="83" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 83, true);">83</a>
3827
- <a class="line-number" data-cell="forward_and_backward" data-line="84" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 84, true);">84</a>
3828
- <a class="line-number" data-cell="forward_and_backward" data-line="85" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 85, true);">85</a>
3829
- <a class="line-number" data-cell="forward_and_backward" data-line="86" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 86, true);">86</a>
3830
- <a class="line-number" data-cell="forward_and_backward" data-line="87" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 87, true);">87</a>
3831
- <a class="line-number" data-cell="forward_and_backward" data-line="88" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 88, true);">88</a>
3832
- <a class="line-number" data-cell="forward_and_backward" data-line="89" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 89, true);">89</a>
3833
- <a class="line-number" data-cell="forward_and_backward" data-line="90" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 90, true);">90</a>
3834
- <a class="line-number" data-cell="forward_and_backward" data-line="91" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 91, true);">91</a>
3835
- <a class="line-number" data-cell="forward_and_backward" data-line="92" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 92, true);">92</a>
3836
- <a class="line-number" data-cell="forward_and_backward" data-line="93" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 93, true);">93</a>
3837
- <a class="line-number" data-cell="forward_and_backward" data-line="94" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 94, true);">94</a>
3838
- <a class="line-number" data-cell="forward_and_backward" data-line="95" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 95, true);">95</a>
3839
- <a class="line-number" data-cell="forward_and_backward" data-line="96" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 96, true);">96</a>
3840
- <a class="line-number" data-cell="forward_and_backward" data-line="97" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 97, true);">97</a>
3841
- <a class="line-number" data-cell="forward_and_backward" data-line="98" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 98, true);">98</a>
3842
- <a class="line-number" data-cell="forward_and_backward" data-line="99" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 99, true);">99</a>
3843
- <a class="line-number" data-cell="forward_and_backward" data-line="100" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 100, true);">100</a>
3844
- <a class="line-number" data-cell="forward_and_backward" data-line="101" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 101, true);">101</a>
3845
- <a class="line-number" data-cell="forward_and_backward" data-line="102" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 102, true);">102</a>
3846
- <a class="line-number" data-cell="forward_and_backward" data-line="103" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 103, true);">103</a>
3847
- <a class="line-number" data-cell="forward_and_backward" data-line="104" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 104, true);">104</a>
3848
- <a class="line-number" data-cell="forward_and_backward" data-line="105" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 105, true);">105</a>
3849
- <a class="line-number" data-cell="forward_and_backward" data-line="106" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 106, true);">106</a>
3850
- <a class="line-number" data-cell="forward_and_backward" data-line="107" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 107, true);">107</a>
3851
- <a class="line-number" data-cell="forward_and_backward" data-line="108" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 108, true);">108</a>
3852
- <a class="line-number" data-cell="forward_and_backward" data-line="109" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 109, true);">109</a>
3853
- <a class="line-number" data-cell="forward_and_backward" data-line="110" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 110, true);">110</a>
3854
- <a class="line-number" data-cell="forward_and_backward" data-line="111" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 111, true);">111</a>
3855
- <a class="line-number" data-cell="forward_and_backward" data-line="112" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 112, true);">112</a>
3856
- <a class="line-number" data-cell="forward_and_backward" data-line="113" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 113, true);">113</a>
3857
- <a class="line-number" data-cell="forward_and_backward" data-line="114" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 114, true);">114</a>
3858
- <a class="line-number" data-cell="forward_and_backward" data-line="115" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 115, true);">115</a>
3859
- <a class="line-number" data-cell="forward_and_backward" data-line="116" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 116, true);">116</a>
3860
- <a class="line-number" data-cell="forward_and_backward" data-line="117" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 117, true);">117</a>
3861
- <a class="line-number" data-cell="forward_and_backward" data-line="118" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 118, true);">118</a>
3862
- <a class="line-number" data-cell="forward_and_backward" data-line="119" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 119, true);">119</a>
3863
- <a class="line-number" data-cell="forward_and_backward" data-line="120" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 120, true);">120</a>
3864
- <a class="line-number" data-cell="forward_and_backward" data-line="121" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 121, true);">121</a>
3865
- <a class="line-number" data-cell="forward_and_backward" data-line="122" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 122, true);">122</a>
3866
- <a class="line-number" data-cell="forward_and_backward" data-line="123" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 123, true);">123</a>
3867
- <a class="line-number" data-cell="forward_and_backward" data-line="124" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 124, true);">124</a>
3868
- <a class="line-number" data-cell="forward_and_backward" data-line="125" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 125, true);">125</a>
3869
- <a class="line-number" data-cell="forward_and_backward" data-line="126" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 126, true);">126</a>
3870
- <a class="line-number" data-cell="forward_and_backward" data-line="127" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 127, true);">127</a>
3871
- <a class="line-number" data-cell="forward_and_backward" data-line="128" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 128, true);">128</a>
3872
- <a class="line-number" data-cell="forward_and_backward" data-line="129" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 129, true);">129</a>
3873
- <a class="line-number" data-cell="forward_and_backward" data-line="130" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 130, true);">130</a>
3874
- <a class="line-number" data-cell="forward_and_backward" data-line="131" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 131, true);">131</a>
3875
- <a class="line-number" data-cell="forward_and_backward" data-line="132" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 132, true);">132</a>
3876
- <a class="line-number" data-cell="forward_and_backward" data-line="133" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 133, true);">133</a>
3877
- <a class="line-number" data-cell="forward_and_backward" data-line="134" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 134, true);">134</a>
3878
- <a class="line-number" data-cell="forward_and_backward" data-line="135" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 135, true);">135</a>
3879
- <a class="line-number" data-cell="forward_and_backward" data-line="136" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 136, true);">136</a>
3880
- <a class="line-number" data-cell="forward_and_backward" data-line="137" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 137, true);">137</a>
3881
- <a class="line-number" data-cell="forward_and_backward" data-line="138" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 138, true);">138</a>
3882
- <a class="line-number" data-cell="forward_and_backward" data-line="139" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 139, true);">139</a>
3883
- <a class="line-number" data-cell="forward_and_backward" data-line="140" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 140, true);">140</a>
3884
- <a class="line-number" data-cell="forward_and_backward" data-line="141" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 141, true);">141</a>
3885
- <a class="line-number" data-cell="forward_and_backward" data-line="142" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 142, true);">142</a>
3886
- <a class="line-number" data-cell="forward_and_backward" data-line="143" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 143, true);">143</a>
3887
- <a class="line-number" data-cell="forward_and_backward" data-line="144" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 144, true);">144</a>
3888
- <a class="line-number" data-cell="forward_and_backward" data-line="145" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 145, true);">145</a>
3889
- <a class="line-number" data-cell="forward_and_backward" data-line="146" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 146, true);">146</a>
3890
- <a class="line-number" data-cell="forward_and_backward" data-line="147" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 147, true);">147</a>
3891
- <a class="line-number" data-cell="forward_and_backward" data-line="148" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 148, true);">148</a>
3892
- <a class="line-number" data-cell="forward_and_backward" data-line="149" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 149, true);">149</a>
3893
- <a class="line-number" data-cell="forward_and_backward" data-line="150" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 150, true);">150</a>
3894
- <a class="line-number" data-cell="forward_and_backward" data-line="151" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 151, true);">151</a>
3895
- <a class="line-number" data-cell="forward_and_backward" data-line="152" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 152, true);">152</a>
3896
- <a class="line-number" data-cell="forward_and_backward" data-line="153" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 153, true);">153</a>
3897
- <a class="line-number" data-cell="forward_and_backward" data-line="154" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 154, true);">154</a>
3898
- <a class="line-number" data-cell="forward_and_backward" data-line="155" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 155, true);">155</a>
3899
- <a class="line-number" data-cell="forward_and_backward" data-line="156" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 156, true);">156</a>
3900
- <a class="line-number" data-cell="forward_and_backward" data-line="157" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 157, true);">157</a>
3901
- <a class="line-number" data-cell="forward_and_backward" data-line="158" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 158, true);">158</a>
3902
- <a class="line-number" data-cell="forward_and_backward" data-line="159" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 159, true);">159</a>
3903
- <a class="line-number" data-cell="forward_and_backward" data-line="160" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 160, true);">160</a>
3904
- <a class="line-number" data-cell="forward_and_backward" data-line="161" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 161, true);">161</a>
3905
- <a class="line-number" data-cell="forward_and_backward" data-line="162" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 162, true);">162</a>
3906
- <a class="line-number" data-cell="forward_and_backward" data-line="163" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 163, true);">163</a>
3907
- <a class="line-number" data-cell="forward_and_backward" data-line="164" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 164, true);">164</a>
3908
- <a class="line-number" data-cell="forward_and_backward" data-line="165" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 165, true);">165</a>
3909
- <a class="line-number" data-cell="forward_and_backward" data-line="166" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 166, true);">166</a>
3910
- <a class="line-number" data-cell="forward_and_backward" data-line="167" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 167, true);">167</a>
3911
- <a class="line-number" data-cell="forward_and_backward" data-line="168" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 168, true);">168</a>
3912
- <a class="line-number" data-cell="forward_and_backward" data-line="169" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 169, true);">169</a>
3913
- <a class="line-number" data-cell="forward_and_backward" data-line="170" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 170, true);">170</a>
3914
- <a class="line-number" data-cell="forward_and_backward" data-line="171" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 171, true);">171</a>
3915
- <a class="line-number" data-cell="forward_and_backward" data-line="172" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 172, true);">172</a>
3916
- <a class="line-number" data-cell="forward_and_backward" data-line="173" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 173, true);">173</a>
3917
- <a class="line-number" data-cell="forward_and_backward" data-line="174" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 174, true);">174</a>
3918
- <a class="line-number" data-cell="forward_and_backward" data-line="175" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 175, true);">175</a>
3919
- <a class="line-number" data-cell="forward_and_backward" data-line="176" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 176, true);">176</a>
3920
- <a class="line-number" data-cell="forward_and_backward" data-line="177" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 177, true);">177</a>
3921
- <a class="line-number" data-cell="forward_and_backward" data-line="178" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 178, true);">178</a>
3922
- <a class="line-number" data-cell="forward_and_backward" data-line="179" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 179, true);">179</a>
3923
- <a class="line-number" data-cell="forward_and_backward" data-line="180" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 180, true);">180</a>
3924
- <a class="line-number" data-cell="forward_and_backward" data-line="181" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 181, true);">181</a>
3925
- <a class="line-number" data-cell="forward_and_backward" data-line="182" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 182, true);">182</a>
3926
- <a class="line-number" data-cell="forward_and_backward" data-line="183" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 183, true);">183</a>
3927
- <a class="line-number" data-cell="forward_and_backward" data-line="184" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 184, true);">184</a>
3928
- <a class="line-number" data-cell="forward_and_backward" data-line="185" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 185, true);">185</a>
3929
- <a class="line-number" data-cell="forward_and_backward" data-line="186" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 186, true);">186</a>
3930
- <a class="line-number" data-cell="forward_and_backward" data-line="187" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 187, true);">187</a>
3931
- <a class="line-number" data-cell="forward_and_backward" data-line="188" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 188, true);">188</a>
3932
- <a class="line-number" data-cell="forward_and_backward" data-line="189" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 189, true);">189</a>
3933
- <a class="line-number" data-cell="forward_and_backward" data-line="190" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 190, true);">190</a>
3934
- <a class="line-number" data-cell="forward_and_backward" data-line="191" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 191, true);">191</a>
3935
- <a class="line-number" data-cell="forward_and_backward" data-line="192" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 192, true);">192</a>
3936
- <a class="line-number" data-cell="forward_and_backward" data-line="193" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 193, true);">193</a>
3937
- <a class="line-number" data-cell="forward_and_backward" data-line="194" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 194, true);">194</a>
3938
- <a class="line-number" data-cell="forward_and_backward" data-line="195" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 195, true);">195</a>
3939
- <a class="line-number" data-cell="forward_and_backward" data-line="196" href="#cell-forward_and_backward" onclick="event.preventDefault(); selectCellLine('forward_and_backward', 196, true);">196</a>
3940
  </div>
3941
  <div class="code-wrap">
3942
  <div class="highlight"><pre><span></span><span class="c1"># /// script</span>
@@ -3963,7 +3866,7 @@ Cell: forward_and_backward | 19.43s | FAILED
3963
  <span class="kn">import</span><span class="w"> </span><span class="nn">logging</span>
3964
  <span class="kn">from</span><span class="w"> </span><span class="nn">transformers.models.gpt_oss.modeling_gpt_oss</span><span class="w"> </span><span class="kn">import</span> <span class="n">GptOssRMSNorm</span>
3965
 
3966
- <span class="c1"># remove liger kernel for testing </span>
3967
  <span class="n">replace_kernel_forward_from_hub</span><span class="p">(</span><span class="n">GptOssRMSNorm</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span>
3968
 
3969
  <span class="c1"># set to debug logging</span>
@@ -4004,6 +3907,8 @@ Cell: forward_and_backward | 19.43s | FAILED
4004
  <span class="n">tokenizer</span> <span class="o">=</span> <span class="n">PreTrainedTokenizerFast</span><span class="o">.</span><span class="n">from_pretrained</span><span class="p">(</span><span class="n">model_id</span><span class="p">)</span>
4005
  <span class="n">quantization_config</span> <span class="o">=</span> <span class="n">Mxfp4Config</span><span class="p">(</span><span class="n">dequantize</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
4006
 
 
 
4007
  <span class="n">model</span> <span class="o">=</span> <span class="n">GptOssForCausalLM</span><span class="o">.</span><span class="n">from_pretrained</span><span class="p">(</span>
4008
  <span class="n">model_id</span><span class="p">,</span>
4009
  <span class="n">dtype</span><span class="o">=</span><span class="s2">&quot;bfloat16&quot;</span><span class="p">,</span>
@@ -4024,14 +3929,9 @@ Cell: forward_and_backward | 19.43s | FAILED
4024
  <span class="n">reasoning_effort</span><span class="o">=</span><span class="s2">&quot;low&quot;</span><span class="p">,</span>
4025
  <span class="p">)</span><span class="o">.</span><span class="n">to</span><span class="p">(</span><span class="s2">&quot;cuda&quot;</span><span class="p">)</span>
4026
 
4027
- <span class="n">max_tokens</span> <span class="o">=</span> <span class="mi">128</span> <span class="c1"># Reduced to help with memory usage</span>
4028
 
4029
- <span class="c1"># Clear memory before backward pass</span>
4030
- <span class="n">reset_peak_memory_stats</span><span class="p">()</span>
4031
- <span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Pre-generation memory: </span><span class="si">{</span><span class="n">get_memory_stats</span><span class="p">()</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span>
4032
-
4033
- <span class="c1"># forward and backward pass</span>
4034
- <span class="k">with</span> <span class="n">torch</span><span class="o">.</span><span class="n">autograd</span><span class="o">.</span><span class="n">set_grad_enabled</span><span class="p">(</span><span class="kc">True</span><span class="p">):</span>
4035
  <span class="n">start_time</span> <span class="o">=</span> <span class="n">time</span><span class="o">.</span><span class="n">perf_counter</span><span class="p">()</span>
4036
  <span class="n">generated</span> <span class="o">=</span> <span class="n">model</span><span class="o">.</span><span class="n">generate</span><span class="p">(</span>
4037
  <span class="o">**</span><span class="n">inputs</span><span class="p">,</span>
@@ -4040,109 +3940,17 @@ Cell: forward_and_backward | 19.43s | FAILED
4040
  <span class="n">temperature</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
4041
  <span class="p">)</span>
4042
  <span class="n">end_time</span> <span class="o">=</span> <span class="n">time</span><span class="o">.</span><span class="n">perf_counter</span><span class="p">()</span>
4043
- <span class="nb">print</span><span class="p">(</span><span class="n">tokenizer</span><span class="o">.</span><span class="n">decode</span><span class="p">(</span><span class="n">generated</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="n">skip_special_tokens</span><span class="o">=</span><span class="kc">False</span><span class="p">))</span>
4044
- <span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Generation took </span><span class="si">{</span><span class="n">end_time</span><span class="w"> </span><span class="o">-</span><span class="w"> </span><span class="n">start_time</span><span class="si">:</span><span class="s2">.2f</span><span class="si">}</span><span class="s2"> seconds&quot;</span><span class="p">)</span>
4045
- <span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Post-generation memory: </span><span class="si">{</span><span class="n">get_memory_stats</span><span class="p">()</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span>
4046
-
4047
- <span class="c1"># Use gradient checkpointing to reduce memory usage</span>
4048
- <span class="k">if</span> <span class="nb">hasattr</span><span class="p">(</span><span class="n">model</span><span class="p">,</span> <span class="s1">&#39;gradient_checkpointing_enable&#39;</span><span class="p">):</span>
4049
- <span class="n">model</span><span class="o">.</span><span class="n">gradient_checkpointing_enable</span><span class="p">()</span>
4050
- <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Enabled gradient checkpointing&quot;</span><span class="p">)</span>
4051
-
4052
- <span class="c1"># Reduce sequence length if needed for memory</span>
4053
- <span class="n">max_seq_len</span> <span class="o">=</span> <span class="mi">512</span> <span class="c1"># Limit sequence length for backward pass</span>
4054
- <span class="k">if</span> <span class="n">generated</span><span class="o">.</span><span class="n">size</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span> <span class="o">&gt;</span> <span class="n">max_seq_len</span><span class="p">:</span>
4055
- <span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Truncating sequence from </span><span class="si">{</span><span class="n">generated</span><span class="o">.</span><span class="n">size</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span><span class="si">}</span><span class="s2"> to </span><span class="si">{</span><span class="n">max_seq_len</span><span class="si">}</span><span class="s2"> tokens&quot;</span><span class="p">)</span>
4056
- <span class="n">full_sequence</span> <span class="o">=</span> <span class="n">generated</span><span class="p">[:,</span> <span class="o">-</span><span class="n">max_seq_len</span><span class="p">:]</span>
4057
- <span class="k">else</span><span class="p">:</span>
4058
- <span class="n">full_sequence</span> <span class="o">=</span> <span class="n">generated</span>
4059
-
4060
- <span class="c1"># Get model outputs for the full sequence</span>
4061
- <span class="n">model</span><span class="o">.</span><span class="n">train</span><span class="p">()</span> <span class="c1"># Enable dropout and other training behaviors</span>
4062
-
4063
- <span class="k">try</span><span class="p">:</span>
4064
- <span class="n">outputs</span> <span class="o">=</span> <span class="n">model</span><span class="p">(</span>
4065
- <span class="n">input_ids</span><span class="o">=</span><span class="n">full_sequence</span><span class="p">,</span>
4066
- <span class="n">labels</span><span class="o">=</span><span class="n">full_sequence</span><span class="p">,</span> <span class="c1"># This will compute loss internally</span>
4067
- <span class="n">return_dict</span><span class="o">=</span><span class="kc">True</span>
4068
- <span class="p">)</span>
4069
- <span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Post-forward memory: </span><span class="si">{</span><span class="n">get_memory_stats</span><span class="p">()</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span>
4070
-
4071
- <span class="c1"># If model doesn&#39;t compute loss, compute it manually</span>
4072
- <span class="k">if</span> <span class="n">outputs</span><span class="o">.</span><span class="n">loss</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
4073
- <span class="n">shift_logits</span> <span class="o">=</span> <span class="n">outputs</span><span class="o">.</span><span class="n">logits</span><span class="p">[</span><span class="o">...</span><span class="p">,</span> <span class="p">:</span><span class="o">-</span><span class="mi">1</span><span class="p">,</span> <span class="p">:]</span><span class="o">.</span><span class="n">contiguous</span><span class="p">()</span>
4074
- <span class="n">shift_labels</span> <span class="o">=</span> <span class="n">full_sequence</span><span class="p">[</span><span class="o">...</span><span class="p">,</span> <span class="mi">1</span><span class="p">:]</span><span class="o">.</span><span class="n">contiguous</span><span class="p">()</span>
4075
-
4076
- <span class="c1"># Use CrossEntropyLoss with ignore_index for padding tokens</span>
4077
- <span class="n">loss_fct</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">nn</span><span class="o">.</span><span class="n">CrossEntropyLoss</span><span class="p">(</span><span class="n">ignore_index</span><span class="o">=</span><span class="n">tokenizer</span><span class="o">.</span><span class="n">pad_token_id</span> <span class="k">if</span> <span class="n">tokenizer</span><span class="o">.</span><span class="n">pad_token_id</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> <span class="k">else</span> <span class="o">-</span><span class="mi">100</span><span class="p">)</span>
4078
- <span class="n">loss</span> <span class="o">=</span> <span class="n">loss_fct</span><span class="p">(</span>
4079
- <span class="n">shift_logits</span><span class="o">.</span><span class="n">view</span><span class="p">(</span><span class="o">-</span><span class="mi">1</span><span class="p">,</span> <span class="n">shift_logits</span><span class="o">.</span><span class="n">size</span><span class="p">(</span><span class="o">-</span><span class="mi">1</span><span class="p">)),</span>
4080
- <span class="n">shift_labels</span><span class="o">.</span><span class="n">view</span><span class="p">(</span><span class="o">-</span><span class="mi">1</span><span class="p">)</span>
4081
- <span class="p">)</span>
4082
- <span class="k">else</span><span class="p">:</span>
4083
- <span class="n">loss</span> <span class="o">=</span> <span class="n">outputs</span><span class="o">.</span><span class="n">loss</span>
4084
-
4085
- <span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Loss: </span><span class="si">{</span><span class="n">loss</span><span class="o">.</span><span class="n">item</span><span class="p">()</span><span class="si">:</span><span class="s2">.4f</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span>
4086
-
4087
- <span class="c1"># Clear intermediate tensors to save memory</span>
4088
- <span class="k">del</span> <span class="n">outputs</span>
4089
- <span class="n">torch</span><span class="o">.</span><span class="n">cuda</span><span class="o">.</span><span class="n">empty_cache</span><span class="p">()</span>
4090
-
4091
- <span class="c1"># Perform backward pass with memory management</span>
4092
- <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Running backward pass...&quot;</span><span class="p">)</span>
4093
- <span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Pre-backward memory: </span><span class="si">{</span><span class="n">get_memory_stats</span><span class="p">()</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span>
4094
-
4095
- <span class="n">loss</span><span class="o">.</span><span class="n">backward</span><span class="p">()</span>
4096
- <span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Post-backward memory: </span><span class="si">{</span><span class="n">get_memory_stats</span><span class="p">()</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span>
4097
-
4098
- <span class="k">except</span> <span class="n">torch</span><span class="o">.</span><span class="n">cuda</span><span class="o">.</span><span class="n">OutOfMemoryError</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
4099
- <span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;OOM during forward/backward pass: </span><span class="si">{</span><span class="n">e</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span>
4100
- <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Try reducing max_tokens or max_seq_len&quot;</span><span class="p">)</span>
4101
- <span class="k">raise</span>
4102
-
4103
- <span class="c1"># Calculate gradient statistics and print sample gradients</span>
4104
- <span class="n">total_norm</span> <span class="o">=</span> <span class="mf">0.0</span>
4105
- <span class="n">param_count</span> <span class="o">=</span> <span class="mi">0</span>
4106
- <span class="n">grad_samples</span> <span class="o">=</span> <span class="p">{}</span>
4107
-
4108
- <span class="k">for</span> <span class="n">name</span><span class="p">,</span> <span class="n">p</span> <span class="ow">in</span> <span class="n">model</span><span class="o">.</span><span class="n">named_parameters</span><span class="p">():</span>
4109
- <span class="k">if</span> <span class="n">p</span><span class="o">.</span><span class="n">grad</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
4110
- <span class="n">param_count</span> <span class="o">+=</span> <span class="mi">1</span>
4111
- <span class="n">grad_norm</span> <span class="o">=</span> <span class="n">p</span><span class="o">.</span><span class="n">grad</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">norm</span><span class="p">(</span><span class="mi">2</span><span class="p">)</span><span class="o">.</span><span class="n">item</span><span class="p">()</span>
4112
- <span class="n">total_norm</span> <span class="o">+=</span> <span class="n">grad_norm</span> <span class="o">**</span> <span class="mi">2</span>
4113
-
4114
- <span class="c1"># Collect gradient statistics for key layers</span>
4115
- <span class="k">if</span> <span class="nb">any</span><span class="p">(</span><span class="n">key</span> <span class="ow">in</span> <span class="n">name</span> <span class="k">for</span> <span class="n">key</span> <span class="ow">in</span> <span class="p">[</span><span class="s1">&#39;embed&#39;</span><span class="p">,</span> <span class="s1">&#39;lm_head&#39;</span><span class="p">,</span> <span class="s1">&#39;mlp.up&#39;</span><span class="p">,</span> <span class="s1">&#39;mlp.down&#39;</span><span class="p">,</span> <span class="s1">&#39;self_attn.q_proj&#39;</span><span class="p">,</span> <span class="s1">&#39;norm&#39;</span><span class="p">]):</span>
4116
- <span class="n">grad_samples</span><span class="p">[</span><span class="n">name</span><span class="p">]</span> <span class="o">=</span> <span class="p">{</span>
4117
- <span class="s1">&#39;norm&#39;</span><span class="p">:</span> <span class="n">grad_norm</span><span class="p">,</span>
4118
- <span class="s1">&#39;mean&#39;</span><span class="p">:</span> <span class="n">p</span><span class="o">.</span><span class="n">grad</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">mean</span><span class="p">()</span><span class="o">.</span><span class="n">item</span><span class="p">(),</span>
4119
- <span class="s1">&#39;std&#39;</span><span class="p">:</span> <span class="n">p</span><span class="o">.</span><span class="n">grad</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">std</span><span class="p">()</span><span class="o">.</span><span class="n">item</span><span class="p">(),</span>
4120
- <span class="s1">&#39;max&#39;</span><span class="p">:</span> <span class="n">p</span><span class="o">.</span><span class="n">grad</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">max</span><span class="p">()</span><span class="o">.</span><span class="n">item</span><span class="p">(),</span>
4121
- <span class="s1">&#39;min&#39;</span><span class="p">:</span> <span class="n">p</span><span class="o">.</span><span class="n">grad</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">min</span><span class="p">()</span><span class="o">.</span><span class="n">item</span><span class="p">(),</span>
4122
- <span class="p">}</span>
4123
-
4124
- <span class="n">total_norm</span> <span class="o">=</span> <span class="n">total_norm</span> <span class="o">**</span> <span class="mf">0.5</span>
4125
-
4126
- <span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;</span><span class="se">\n</span><span class="s2">Gradient norm: </span><span class="si">{</span><span class="n">total_norm</span><span class="si">:</span><span class="s2">.4f</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span>
4127
- <span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Parameters with gradients: </span><span class="si">{</span><span class="n">param_count</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span>
4128
-
4129
- <span class="c1"># Print sample gradients from important layers</span>
4130
- <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;</span><span class="se">\n</span><span class="s2">Sample gradient statistics:&quot;</span><span class="p">)</span>
4131
- <span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="p">(</span><span class="n">name</span><span class="p">,</span> <span class="n">stats</span><span class="p">)</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="nb">list</span><span class="p">(</span><span class="n">grad_samples</span><span class="o">.</span><span class="n">items</span><span class="p">())[:</span><span class="mi">10</span><span class="p">]):</span>
4132
- <span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot; </span><span class="si">{</span><span class="n">name</span><span class="p">[:</span><span class="mi">60</span><span class="p">]</span><span class="si">:</span><span class="s2">&lt;60</span><span class="si">}</span><span class="s2"> | norm: </span><span class="si">{</span><span class="n">stats</span><span class="p">[</span><span class="s1">&#39;norm&#39;</span><span class="p">]</span><span class="si">:</span><span class="s2">.4e</span><span class="si">}</span><span class="s2"> | mean: </span><span class="si">{</span><span class="n">stats</span><span class="p">[</span><span class="s1">&#39;mean&#39;</span><span class="p">]</span><span class="si">:</span><span class="s2">.4e</span><span class="si">}</span><span class="s2"> | std: </span><span class="si">{</span><span class="n">stats</span><span class="p">[</span><span class="s1">&#39;std&#39;</span><span class="p">]</span><span class="si">:</span><span class="s2">.4e</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span>
4133
-
4134
- <span class="c1"># Optional: zero gradients for next iteration</span>
4135
- <span class="n">model</span><span class="o">.</span><span class="n">zero_grad</span><span class="p">()</span>
4136
- <span class="n">model</span><span class="o">.</span><span class="n">eval</span><span class="p">()</span> <span class="c1"># Switch back to eval mode</span>
4137
  </pre></div>
4138
 
4139
- <div class="code-line-highlight" id="line-highlight-forward_and_backward"></div>
4140
  </div>
4141
  </div>
4142
  </div>
4143
- <div id="output-forward_and_backward" class="cell-output">
4144
- <div class="cell-stderr">Downloading cpython-3.13.7-linux-x86_64-gnu (download) (32.0MiB)
4145
- Downloading cpython-3.13.7-linux-x86_64-gnu (download)
4146
  Updating https://github.com/huggingface/transformers.git (HEAD)
4147
  Updated https://github.com/huggingface/transformers.git (449533af73874470e914a203391635e04ac2ffc8)
4148
  × No solution found when resolving script dependencies:
@@ -4158,6 +3966,9 @@ Cell: forward_and_backward | 19.43s | FAILED
4158
  </div>
4159
  </div>
4160
  </div>
 
 
 
4161
  </div>
4162
 
4163
  </body>
 
3724
  <p>Next we can run with Megablocks kernels enabled.</p>
3725
  <h3>Forward</h3>
3726
  <p>First, we run a forward pass with Megablocks kernels.</p>
3727
+ <div class="cell cell-failed" id="cell-forward_only">
 
 
3728
  <div class="cell-header">
3729
  <span class="collapse-indicators">
3730
+ <span onclick="toggleCode('forward_only')" style="cursor: pointer;">▼ code</span>
3731
+ <span onclick="toggleOutput('forward_only')" style="cursor: pointer;">▼ output</span>
3732
+ <span id="uv-indicator-forward_only" style="cursor: default; opacity: 0.3;">▶ uv-logs</span>
3733
  </span> |
3734
+ Cell: forward_only | 17.22s | FAILED
3735
+ | <button class="run-btn" onclick="runCell('forward_only')">▶ run</button>
3736
+ <button class="copy-btn" onclick="copyCell('forward_only')">Copy</button>
3737
+ <a href="cells/forward_only.py" target="_blank" class="raw-btn">Raw</a>
3738
  </div>
3739
+ <div id="code-forward_only" class="cell-code" data-lines="101">
3740
  <div class="highlight-with-lines">
3741
+ <div class="line-numbers" id="lines-forward_only">
3742
+ <a class="line-number" data-cell="forward_only" data-line="1" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 1, true);">1</a>
3743
+ <a class="line-number" data-cell="forward_only" data-line="2" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 2, true);">2</a>
3744
+ <a class="line-number" data-cell="forward_only" data-line="3" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 3, true);">3</a>
3745
+ <a class="line-number" data-cell="forward_only" data-line="4" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 4, true);">4</a>
3746
+ <a class="line-number" data-cell="forward_only" data-line="5" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 5, true);">5</a>
3747
+ <a class="line-number" data-cell="forward_only" data-line="6" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 6, true);">6</a>
3748
+ <a class="line-number" data-cell="forward_only" data-line="7" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 7, true);">7</a>
3749
+ <a class="line-number" data-cell="forward_only" data-line="8" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 8, true);">8</a>
3750
+ <a class="line-number" data-cell="forward_only" data-line="9" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 9, true);">9</a>
3751
+ <a class="line-number" data-cell="forward_only" data-line="10" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 10, true);">10</a>
3752
+ <a class="line-number" data-cell="forward_only" data-line="11" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 11, true);">11</a>
3753
+ <a class="line-number" data-cell="forward_only" data-line="12" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 12, true);">12</a>
3754
+ <a class="line-number" data-cell="forward_only" data-line="13" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 13, true);">13</a>
3755
+ <a class="line-number" data-cell="forward_only" data-line="14" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 14, true);">14</a>
3756
+ <a class="line-number" data-cell="forward_only" data-line="15" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 15, true);">15</a>
3757
+ <a class="line-number" data-cell="forward_only" data-line="16" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 16, true);">16</a>
3758
+ <a class="line-number" data-cell="forward_only" data-line="17" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 17, true);">17</a>
3759
+ <a class="line-number" data-cell="forward_only" data-line="18" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 18, true);">18</a>
3760
+ <a class="line-number" data-cell="forward_only" data-line="19" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 19, true);">19</a>
3761
+ <a class="line-number" data-cell="forward_only" data-line="20" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 20, true);">20</a>
3762
+ <a class="line-number" data-cell="forward_only" data-line="21" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 21, true);">21</a>
3763
+ <a class="line-number" data-cell="forward_only" data-line="22" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 22, true);">22</a>
3764
+ <a class="line-number" data-cell="forward_only" data-line="23" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 23, true);">23</a>
3765
+ <a class="line-number" data-cell="forward_only" data-line="24" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 24, true);">24</a>
3766
+ <a class="line-number" data-cell="forward_only" data-line="25" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 25, true);">25</a>
3767
+ <a class="line-number" data-cell="forward_only" data-line="26" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 26, true);">26</a>
3768
+ <a class="line-number" data-cell="forward_only" data-line="27" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 27, true);">27</a>
3769
+ <a class="line-number" data-cell="forward_only" data-line="28" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 28, true);">28</a>
3770
+ <a class="line-number" data-cell="forward_only" data-line="29" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 29, true);">29</a>
3771
+ <a class="line-number" data-cell="forward_only" data-line="30" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 30, true);">30</a>
3772
+ <a class="line-number" data-cell="forward_only" data-line="31" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 31, true);">31</a>
3773
+ <a class="line-number" data-cell="forward_only" data-line="32" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 32, true);">32</a>
3774
+ <a class="line-number" data-cell="forward_only" data-line="33" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 33, true);">33</a>
3775
+ <a class="line-number" data-cell="forward_only" data-line="34" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 34, true);">34</a>
3776
+ <a class="line-number" data-cell="forward_only" data-line="35" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 35, true);">35</a>
3777
+ <a class="line-number" data-cell="forward_only" data-line="36" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 36, true);">36</a>
3778
+ <a class="line-number" data-cell="forward_only" data-line="37" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 37, true);">37</a>
3779
+ <a class="line-number" data-cell="forward_only" data-line="38" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 38, true);">38</a>
3780
+ <a class="line-number" data-cell="forward_only" data-line="39" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 39, true);">39</a>
3781
+ <a class="line-number" data-cell="forward_only" data-line="40" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 40, true);">40</a>
3782
+ <a class="line-number" data-cell="forward_only" data-line="41" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 41, true);">41</a>
3783
+ <a class="line-number" data-cell="forward_only" data-line="42" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 42, true);">42</a>
3784
+ <a class="line-number" data-cell="forward_only" data-line="43" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 43, true);">43</a>
3785
+ <a class="line-number" data-cell="forward_only" data-line="44" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 44, true);">44</a>
3786
+ <a class="line-number" data-cell="forward_only" data-line="45" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 45, true);">45</a>
3787
+ <a class="line-number" data-cell="forward_only" data-line="46" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 46, true);">46</a>
3788
+ <a class="line-number" data-cell="forward_only" data-line="47" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 47, true);">47</a>
3789
+ <a class="line-number" data-cell="forward_only" data-line="48" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 48, true);">48</a>
3790
+ <a class="line-number" data-cell="forward_only" data-line="49" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 49, true);">49</a>
3791
+ <a class="line-number" data-cell="forward_only" data-line="50" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 50, true);">50</a>
3792
+ <a class="line-number" data-cell="forward_only" data-line="51" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 51, true);">51</a>
3793
+ <a class="line-number" data-cell="forward_only" data-line="52" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 52, true);">52</a>
3794
+ <a class="line-number" data-cell="forward_only" data-line="53" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 53, true);">53</a>
3795
+ <a class="line-number" data-cell="forward_only" data-line="54" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 54, true);">54</a>
3796
+ <a class="line-number" data-cell="forward_only" data-line="55" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 55, true);">55</a>
3797
+ <a class="line-number" data-cell="forward_only" data-line="56" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 56, true);">56</a>
3798
+ <a class="line-number" data-cell="forward_only" data-line="57" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 57, true);">57</a>
3799
+ <a class="line-number" data-cell="forward_only" data-line="58" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 58, true);">58</a>
3800
+ <a class="line-number" data-cell="forward_only" data-line="59" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 59, true);">59</a>
3801
+ <a class="line-number" data-cell="forward_only" data-line="60" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 60, true);">60</a>
3802
+ <a class="line-number" data-cell="forward_only" data-line="61" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 61, true);">61</a>
3803
+ <a class="line-number" data-cell="forward_only" data-line="62" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 62, true);">62</a>
3804
+ <a class="line-number" data-cell="forward_only" data-line="63" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 63, true);">63</a>
3805
+ <a class="line-number" data-cell="forward_only" data-line="64" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 64, true);">64</a>
3806
+ <a class="line-number" data-cell="forward_only" data-line="65" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 65, true);">65</a>
3807
+ <a class="line-number" data-cell="forward_only" data-line="66" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 66, true);">66</a>
3808
+ <a class="line-number" data-cell="forward_only" data-line="67" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 67, true);">67</a>
3809
+ <a class="line-number" data-cell="forward_only" data-line="68" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 68, true);">68</a>
3810
+ <a class="line-number" data-cell="forward_only" data-line="69" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 69, true);">69</a>
3811
+ <a class="line-number" data-cell="forward_only" data-line="70" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 70, true);">70</a>
3812
+ <a class="line-number" data-cell="forward_only" data-line="71" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 71, true);">71</a>
3813
+ <a class="line-number" data-cell="forward_only" data-line="72" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 72, true);">72</a>
3814
+ <a class="line-number" data-cell="forward_only" data-line="73" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 73, true);">73</a>
3815
+ <a class="line-number" data-cell="forward_only" data-line="74" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 74, true);">74</a>
3816
+ <a class="line-number" data-cell="forward_only" data-line="75" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 75, true);">75</a>
3817
+ <a class="line-number" data-cell="forward_only" data-line="76" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 76, true);">76</a>
3818
+ <a class="line-number" data-cell="forward_only" data-line="77" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 77, true);">77</a>
3819
+ <a class="line-number" data-cell="forward_only" data-line="78" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 78, true);">78</a>
3820
+ <a class="line-number" data-cell="forward_only" data-line="79" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 79, true);">79</a>
3821
+ <a class="line-number" data-cell="forward_only" data-line="80" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 80, true);">80</a>
3822
+ <a class="line-number" data-cell="forward_only" data-line="81" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 81, true);">81</a>
3823
+ <a class="line-number" data-cell="forward_only" data-line="82" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 82, true);">82</a>
3824
+ <a class="line-number" data-cell="forward_only" data-line="83" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 83, true);">83</a>
3825
+ <a class="line-number" data-cell="forward_only" data-line="84" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 84, true);">84</a>
3826
+ <a class="line-number" data-cell="forward_only" data-line="85" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 85, true);">85</a>
3827
+ <a class="line-number" data-cell="forward_only" data-line="86" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 86, true);">86</a>
3828
+ <a class="line-number" data-cell="forward_only" data-line="87" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 87, true);">87</a>
3829
+ <a class="line-number" data-cell="forward_only" data-line="88" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 88, true);">88</a>
3830
+ <a class="line-number" data-cell="forward_only" data-line="89" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 89, true);">89</a>
3831
+ <a class="line-number" data-cell="forward_only" data-line="90" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 90, true);">90</a>
3832
+ <a class="line-number" data-cell="forward_only" data-line="91" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 91, true);">91</a>
3833
+ <a class="line-number" data-cell="forward_only" data-line="92" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 92, true);">92</a>
3834
+ <a class="line-number" data-cell="forward_only" data-line="93" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 93, true);">93</a>
3835
+ <a class="line-number" data-cell="forward_only" data-line="94" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 94, true);">94</a>
3836
+ <a class="line-number" data-cell="forward_only" data-line="95" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 95, true);">95</a>
3837
+ <a class="line-number" data-cell="forward_only" data-line="96" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 96, true);">96</a>
3838
+ <a class="line-number" data-cell="forward_only" data-line="97" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 97, true);">97</a>
3839
+ <a class="line-number" data-cell="forward_only" data-line="98" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 98, true);">98</a>
3840
+ <a class="line-number" data-cell="forward_only" data-line="99" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 99, true);">99</a>
3841
+ <a class="line-number" data-cell="forward_only" data-line="100" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 100, true);">100</a>
3842
+ <a class="line-number" data-cell="forward_only" data-line="101" href="#cell-forward_only" onclick="event.preventDefault(); selectCellLine('forward_only', 101, true);">101</a>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3843
  </div>
3844
  <div class="code-wrap">
3845
  <div class="highlight"><pre><span></span><span class="c1"># /// script</span>
 
3866
  <span class="kn">import</span><span class="w"> </span><span class="nn">logging</span>
3867
  <span class="kn">from</span><span class="w"> </span><span class="nn">transformers.models.gpt_oss.modeling_gpt_oss</span><span class="w"> </span><span class="kn">import</span> <span class="n">GptOssRMSNorm</span>
3868
 
3869
+
3870
  <span class="n">replace_kernel_forward_from_hub</span><span class="p">(</span><span class="n">GptOssRMSNorm</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span>
3871
 
3872
  <span class="c1"># set to debug logging</span>
 
3907
  <span class="n">tokenizer</span> <span class="o">=</span> <span class="n">PreTrainedTokenizerFast</span><span class="o">.</span><span class="n">from_pretrained</span><span class="p">(</span><span class="n">model_id</span><span class="p">)</span>
3908
  <span class="n">quantization_config</span> <span class="o">=</span> <span class="n">Mxfp4Config</span><span class="p">(</span><span class="n">dequantize</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
3909
 
3910
+
3911
+
3912
  <span class="n">model</span> <span class="o">=</span> <span class="n">GptOssForCausalLM</span><span class="o">.</span><span class="n">from_pretrained</span><span class="p">(</span>
3913
  <span class="n">model_id</span><span class="p">,</span>
3914
  <span class="n">dtype</span><span class="o">=</span><span class="s2">&quot;bfloat16&quot;</span><span class="p">,</span>
 
3929
  <span class="n">reasoning_effort</span><span class="o">=</span><span class="s2">&quot;low&quot;</span><span class="p">,</span>
3930
  <span class="p">)</span><span class="o">.</span><span class="n">to</span><span class="p">(</span><span class="s2">&quot;cuda&quot;</span><span class="p">)</span>
3931
 
3932
+ <span class="n">max_tokens</span> <span class="o">=</span> <span class="mi">256</span>
3933
 
3934
+ <span class="k">with</span> <span class="n">torch</span><span class="o">.</span><span class="n">inference_mode</span><span class="p">():</span>
 
 
 
 
 
3935
  <span class="n">start_time</span> <span class="o">=</span> <span class="n">time</span><span class="o">.</span><span class="n">perf_counter</span><span class="p">()</span>
3936
  <span class="n">generated</span> <span class="o">=</span> <span class="n">model</span><span class="o">.</span><span class="n">generate</span><span class="p">(</span>
3937
  <span class="o">**</span><span class="n">inputs</span><span class="p">,</span>
 
3940
  <span class="n">temperature</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
3941
  <span class="p">)</span>
3942
  <span class="n">end_time</span> <span class="o">=</span> <span class="n">time</span><span class="o">.</span><span class="n">perf_counter</span><span class="p">()</span>
3943
+
3944
+ <span class="nb">print</span><span class="p">(</span><span class="n">tokenizer</span><span class="o">.</span><span class="n">decode</span><span class="p">(</span><span class="n">generated</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="n">skip_special_tokens</span><span class="o">=</span><span class="kc">False</span><span class="p">))</span>
3945
+ <span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Generation took </span><span class="si">{</span><span class="n">end_time</span><span class="w"> </span><span class="o">-</span><span class="w"> </span><span class="n">start_time</span><span class="si">:</span><span class="s2">.2f</span><span class="si">}</span><span class="s2"> seconds&quot;</span><span class="p">)</span>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3946
  </pre></div>
3947
 
3948
+ <div class="code-line-highlight" id="line-highlight-forward_only"></div>
3949
  </div>
3950
  </div>
3951
  </div>
3952
+ <div id="output-forward_only" class="cell-output">
3953
+ <div class="cell-stderr">warning: The requested interpreter resolved to Python 3.11.13, which is incompatible with the script&#x27;s Python requirement: `&gt;=3.12`
 
3954
  Updating https://github.com/huggingface/transformers.git (HEAD)
3955
  Updated https://github.com/huggingface/transformers.git (449533af73874470e914a203391635e04ac2ffc8)
3956
  × No solution found when resolving script dependencies:
 
3966
  </div>
3967
  </div>
3968
  </div>
3969
+
3970
+ <h2>Forward and Backward</h2>
3971
+ <p>Next, we run a forward and backward pass with Megablocks kernels enabled. This should be more memory efficient and allow us to complete the backward pass without running out of memory.</p>
3972
  </div>
3973
 
3974
  </body>
moe_benchmarks/megablocks_yamoe/artifacts/binned_run/binned_results.json CHANGED
@@ -9,16 +9,16 @@
9
  "vary_inputs": true
10
  },
11
  "stats": {
12
- "avg_ms": 36.06324691992995,
13
- "min_ms": 33.29206800026441,
14
- "max_ms": 38.40615900026023,
15
- "std_ms": 1.258567678508065,
16
- "p50_ms": 36.21510599987232,
17
- "p95_ms": 37.524451049966956,
18
- "p99_ms": 38.03603995002959,
19
  "num_iters": 50,
20
- "tokens_per_s": 2772.906172925215,
21
- "throughput_variance": 98.28636435515342
22
  },
23
  "output_sum": 3.97190523147583
24
  }
 
9
  "vary_inputs": true
10
  },
11
  "stats": {
12
+ "avg_ms": 36.539247979999345,
13
+ "min_ms": 32.831213000008574,
14
+ "max_ms": 40.07397899999887,
15
+ "std_ms": 1.6136977896039295,
16
+ "p50_ms": 36.86953950000316,
17
+ "p95_ms": 39.00453930000651,
18
+ "p99_ms": 39.94982966000805,
19
  "num_iters": 50,
20
+ "tokens_per_s": 2736.7831996634814,
21
+ "throughput_variance": 122.96225627695094
22
  },
23
  "output_sum": 3.97190523147583
24
  }
moe_benchmarks/megablocks_yamoe/artifacts/gptoss_run/gptoss_results.json CHANGED
@@ -9,16 +9,16 @@
9
  "vary_inputs": true
10
  },
11
  "stats": {
12
- "avg_ms": 45.286630379978305,
13
- "min_ms": 38.91367899996112,
14
- "max_ms": 49.84392799997295,
15
- "std_ms": 3.2326168009526866,
16
- "p50_ms": 45.42240999990099,
17
- "p95_ms": 49.729684149951936,
18
- "p99_ms": 49.82545450991893,
19
  "num_iters": 50,
20
- "tokens_per_s": 2208.1572234663554,
21
- "throughput_variance": 161.27578702324564
22
  },
23
  "output_sum": 11.53223705291748
24
  }
 
9
  "vary_inputs": true
10
  },
11
  "stats": {
12
+ "avg_ms": 45.23668440000279,
13
+ "min_ms": 39.77574300000697,
14
+ "max_ms": 49.24737599998252,
15
+ "std_ms": 2.8509890347901194,
16
+ "p50_ms": 46.46113100000093,
17
+ "p95_ms": 48.43337355002859,
18
+ "p99_ms": 49.07736706999401,
19
  "num_iters": 50,
20
+ "tokens_per_s": 2210.595257507286,
21
+ "throughput_variance": 143.00506086156602
22
  },
23
  "output_sum": 11.53223705291748
24
  }
moe_benchmarks/megablocks_yamoe/artifacts/gptoss_training_run/gptoss_training_results.json CHANGED
@@ -9,16 +9,16 @@
9
  "vary_inputs": true
10
  },
11
  "stats": {
12
- "avg_ms": 46.01034353989235,
13
- "min_ms": 39.20698799993261,
14
- "max_ms": 51.09754699969926,
15
- "std_ms": 3.2594474712819497,
16
- "p50_ms": 46.132551999562565,
17
- "p95_ms": 50.721096600273086,
18
- "p99_ms": 51.0080171399477,
19
  "num_iters": 50,
20
- "tokens_per_s": 2173.4243282338675,
21
- "throughput_variance": 158.68467070353637
22
  },
23
  "output_sum": 11.53223705291748
24
  }
 
9
  "vary_inputs": true
10
  },
11
  "stats": {
12
+ "avg_ms": 45.75117022000086,
13
+ "min_ms": 38.86002600000893,
14
+ "max_ms": 50.81734300000562,
15
+ "std_ms": 2.8335743767450845,
16
+ "p50_ms": 45.83255949998488,
17
+ "p95_ms": 50.53969424997433,
18
+ "p99_ms": 50.77732372002288,
19
  "num_iters": 50,
20
+ "tokens_per_s": 2185.7364416939745,
21
+ "throughput_variance": 141.0139740289509
22
  },
23
  "output_sum": 11.53223705291748
24
  }
moe_benchmarks/megablocks_yamoe/artifacts/megablocks_run/megablocks_results.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "implementation": "megablocks_results",
3
+ "config": {
4
+ "warmup": 10,
5
+ "iters": 50,
6
+ "device": "cuda",
7
+ "dtype": "torch.float32",
8
+ "tokens": 100,
9
+ "vary_inputs": true
10
+ },
11
+ "stats": {
12
+ "avg_ms": 3.8699466199977905,
13
+ "min_ms": 0.810037999940505,
14
+ "max_ms": 8.541234000063014,
15
+ "std_ms": 3.7276327220093757,
16
+ "p50_ms": 0.840403500035336,
17
+ "p95_ms": 8.54010504996836,
18
+ "p99_ms": 8.54096940002023,
19
+ "num_iters": 50,
20
+ "tokens_per_s": 25840.149702131315,
21
+ "throughput_variance": 53236.043048659514
22
+ },
23
+ "output_sum": 6.4738850593566895
24
+ }
moe_benchmarks/megablocks_yamoe/artifacts/visualization/moe_performance_comparison.png ADDED

Git LFS Details

  • SHA256: 38613c8ecfe4c6a523b288e13d45feea334d2ef891f6300c0ed73e31f46a5338
  • Pointer size: 131 Bytes
  • Size of remote file: 308 kB
moe_benchmarks/megablocks_yamoe/artifacts/yamoe_run/yamoe_results.json CHANGED
@@ -9,16 +9,16 @@
9
  "vary_inputs": true
10
  },
11
  "stats": {
12
- "avg_ms": 4.2510544400101935,
13
- "min_ms": 4.144352999901457,
14
- "max_ms": 4.320155999266717,
15
- "std_ms": 0.02873328656403644,
16
- "p50_ms": 4.2539659998510615,
17
- "p95_ms": 4.2857709999225335,
18
- "p99_ms": 4.306132199617423,
19
  "num_iters": 50,
20
- "tokens_per_s": 23523.575482547854,
21
- "throughput_variance": 160.28680309512873
22
  },
23
  "output_sum": 3.97190523147583
24
  }
 
9
  "vary_inputs": true
10
  },
11
  "stats": {
12
+ "avg_ms": 4.249726199998349,
13
+ "min_ms": 4.124869000008857,
14
+ "max_ms": 4.300293000028432,
15
+ "std_ms": 0.0233051162040199,
16
+ "p50_ms": 4.253981000005069,
17
+ "p95_ms": 4.2678174999991825,
18
+ "p99_ms": 4.289845220023949,
19
  "num_iters": 50,
20
+ "tokens_per_s": 23530.927710128442,
21
+ "throughput_variance": 131.31218448800846
22
  },
23
  "output_sum": 3.97190523147583
24
  }
moe_benchmarks/megablocks_yamoe/cells/__pycache__/bench_utils.cpython-311.pyc CHANGED
Binary files a/moe_benchmarks/megablocks_yamoe/cells/__pycache__/bench_utils.cpython-311.pyc and b/moe_benchmarks/megablocks_yamoe/cells/__pycache__/bench_utils.cpython-311.pyc differ
 
moe_benchmarks/megablocks_yamoe/cells/__pycache__/config.cpython-311.pyc CHANGED
Binary files a/moe_benchmarks/megablocks_yamoe/cells/__pycache__/config.cpython-311.pyc and b/moe_benchmarks/megablocks_yamoe/cells/__pycache__/config.cpython-311.pyc differ
 
moe_benchmarks/megablocks_yamoe/cells/visualization.py ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # /// script
2
+ # dependencies = [
3
+ # "matplotlib",
4
+ # ]
5
+ # ///
6
+
7
+ import json
8
+ import matplotlib.pyplot as plt
9
+ import numpy as np
10
+ from pathlib import Path
11
+ import os
12
+
13
+ # List of expected result files
14
+ yamoe_dir = os.environ.get('UVNOTE_INPUT_YAMOE_RUN', '.')
15
+ binned_dir = os.environ.get('UVNOTE_INPUT_BINNED_RUN', '.')
16
+ gptoss_dir = os.environ.get('UVNOTE_INPUT_GPTOSS_RUN', '.')
17
+ gptoss_training_dir = os.environ.get('UVNOTE_INPUT_GPTOSS_TRAINING_RUN', '.')
18
+ megablocks_dir = os.environ.get('UVNOTE_INPUT_MEGABLOCKS_RUN', '.')
19
+
20
+ result_files = [
21
+ Path(yamoe_dir) / "yamoe_results.json",
22
+ Path(binned_dir) / "binned_results.json",
23
+ Path(gptoss_dir) / "gptoss_results.json",
24
+ Path(gptoss_training_dir) / "gptoss_training_results.json",
25
+ Path(megablocks_dir) / "megablocks_results.json"
26
+ ]
27
+
28
+ # Load all benchmark results
29
+ results = {}
30
+ for file in result_files:
31
+ if Path(file).exists():
32
+ with open(file, 'r') as f:
33
+ data = json.load(f)
34
+ results[data['implementation']] = data
35
+ print(f"Loaded {file}")
36
+ else:
37
+ print(f"Missing {file}")
38
+
39
+ if not results:
40
+ print("No benchmark results found. Run the benchmark cells first.")
41
+ else:
42
+ # Extract data for plotting
43
+ implementations = list(results.keys())
44
+ avg_latencies = [results[impl]['stats']['avg_ms'] for impl in implementations]
45
+ p95_latencies = [results[impl]['stats']['p95_ms'] for impl in implementations]
46
+ throughputs = [results[impl]['stats'].get('tokens_per_s', 0) for impl in implementations]
47
+
48
+ # Create figure with subplots
49
+ fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(18, 6))
50
+ fig.suptitle('MoE Implementation Performance Comparison', fontsize=16, fontweight='bold')
51
+
52
+ # Colors for each implementation
53
+ colors = ['#FF6B6B', '#4ECDC4', '#45B7D1', '#96CEB4', '#FECA57'][:len(implementations)]
54
+
55
+ # 1. Average Latency Chart
56
+ bars1 = ax1.bar(implementations, avg_latencies, color=colors, alpha=0.8, edgecolor='black', linewidth=1)
57
+ ax1.set_title('Average Latency', fontweight='bold', fontsize=14)
58
+ ax1.set_ylabel('Latency (ms)', fontweight='bold')
59
+ ax1.tick_params(axis='x', rotation=45)
60
+ ax1.grid(axis='y', alpha=0.3)
61
+
62
+ # Add value labels on bars
63
+ for bar, val in zip(bars1, avg_latencies):
64
+ ax1.text(bar.get_x() + bar.get_width()/2, bar.get_height() + max(avg_latencies)*0.01,
65
+ f'{val:.2f}ms', ha='center', va='bottom', fontweight='bold')
66
+
67
+ # 2. P95 Latency Chart
68
+ bars2 = ax2.bar(implementations, p95_latencies, color=colors, alpha=0.8, edgecolor='black', linewidth=1)
69
+ ax2.set_title('95th Percentile Latency', fontweight='bold', fontsize=14)
70
+ ax2.set_ylabel('Latency (ms)', fontweight='bold')
71
+ ax2.tick_params(axis='x', rotation=45)
72
+ ax2.grid(axis='y', alpha=0.3)
73
+
74
+ # Add value labels on bars
75
+ for bar, val in zip(bars2, p95_latencies):
76
+ ax2.text(bar.get_x() + bar.get_width()/2, bar.get_height() + max(p95_latencies)*0.01,
77
+ f'{val:.2f}ms', ha='center', va='bottom', fontweight='bold')
78
+
79
+ # 3. Throughput Chart
80
+ bars3 = ax3.bar(implementations, throughputs, color=colors, alpha=0.8, edgecolor='black', linewidth=1)
81
+ ax3.set_title('Throughput', fontweight='bold', fontsize=14)
82
+ ax3.set_ylabel('Tokens/sec', fontweight='bold')
83
+ ax3.tick_params(axis='x', rotation=45)
84
+ ax3.grid(axis='y', alpha=0.3)
85
+
86
+ # Add value labels on bars
87
+ for bar, val in zip(bars3, throughputs):
88
+ if val > 0: # Only show label if throughput was calculated
89
+ ax3.text(bar.get_x() + bar.get_width()/2, bar.get_height() + max(throughputs)*0.01,
90
+ f'{val:.0f}', ha='center', va='bottom', fontweight='bold')
91
+
92
+ plt.tight_layout()
93
+ plt.savefig("moe_performance_comparison.png", dpi=300)
94
+
95
+ # Print summary table
96
+ print("\nPerformance Summary:")
97
+ print(f"{'Implementation':<30} {'Avg (ms)':<12} {'P95 (ms)':<12} {'Tokens/sec':<12} {'Relative Speed':<15}")
98
+ print("-"*80)
99
+
100
+ # Sort by average latency for relative speed calculation
101
+ sorted_results = sorted(results.items(), key=lambda x: x[1]['stats']['avg_ms'])
102
+ fastest_latency = sorted_results[0][1]['stats']['avg_ms']
103
+
104
+ for impl, data in sorted_results:
105
+ avg_ms = data['stats']['avg_ms']
106
+ p95_ms = data['stats']['p95_ms']
107
+ tokens_s = data['stats'].get('tokens_per_s', 0)
108
+ relative_speed = fastest_latency / avg_ms
109
+
110
+ print(f"{impl:<30} {avg_ms:>8.2f} {p95_ms:>8.2f} {tokens_s:>8.0f} {relative_speed:>6.2f}x")
111
+
112
+ print(f"\nFastest: {sorted_results[0][0]} ({sorted_results[0][1]['stats']['avg_ms']:.2f}ms avg)")
113
+ if len(sorted_results) > 1:
114
+ print(f"Slowest: {sorted_results[-1][0]} ({sorted_results[-1][1]['stats']['avg_ms']:.2f}ms avg)")
115
+ speedup = sorted_results[-1][1]['stats']['avg_ms'] / sorted_results[0][1]['stats']['avg_ms']
116
+ print(f"Max Speedup: {speedup:.1f}x")
moe_benchmarks/megablocks_yamoe/megablocks_yamoe.html CHANGED
@@ -3726,7 +3726,7 @@ span.linenos.special { color: #000000; background-color: #ffffc0; padding-left:
3726
  <span onclick="toggleOutput('setup')" style="cursor: pointer;">▼ output</span>
3727
  <span id="uv-indicator-setup" style="cursor: default; opacity: 0.3;">▶ uv-logs</span>
3728
  </span> |
3729
- Cell: setup | 19.20s | FAILED
3730
  | <button class="run-btn" onclick="runCell('setup')">▶ run</button>
3731
  <button class="copy-btn" onclick="copyCell('setup')">Copy</button>
3732
  <a href="cells/setup.py" target="_blank" class="raw-btn">Raw</a>
@@ -3975,8 +3975,7 @@ Cell: setup | 19.20s | FAILED
3975
  </div>
3976
  </div>
3977
  <div id="output-setup" class="cell-output">
3978
- <div class="cell-stderr">Downloading cpython-3.13.7-linux-x86_64-gnu (download) (32.0MiB)
3979
- Downloading cpython-3.13.7-linux-x86_64-gnu (download)
3980
  Updating https://github.com/huggingface/transformers.git (HEAD)
3981
  Updated https://github.com/huggingface/transformers.git (449533af73874470e914a203391635e04ac2ffc8)
3982
  × No solution found when resolving script dependencies:
 
3726
  <span onclick="toggleOutput('setup')" style="cursor: pointer;">▼ output</span>
3727
  <span id="uv-indicator-setup" style="cursor: default; opacity: 0.3;">▶ uv-logs</span>
3728
  </span> |
3729
+ Cell: setup | 17.01s | FAILED
3730
  | <button class="run-btn" onclick="runCell('setup')">▶ run</button>
3731
  <button class="copy-btn" onclick="copyCell('setup')">Copy</button>
3732
  <a href="cells/setup.py" target="_blank" class="raw-btn">Raw</a>
 
3975
  </div>
3976
  </div>
3977
  <div id="output-setup" class="cell-output">
3978
+ <div class="cell-stderr">warning: The requested interpreter resolved to Python 3.11.13, which is incompatible with the script&#x27;s Python requirement: `&gt;=3.12`
 
3979
  Updating https://github.com/huggingface/transformers.git (HEAD)
3980
  Updated https://github.com/huggingface/transformers.git (449533af73874470e914a203391635e04ac2ffc8)
3981
  × No solution found when resolving script dependencies:
moe_benchmarks/megablocks_yamoe/torch_profile.html CHANGED
@@ -3720,7 +3720,7 @@ span.linenos.special { color: #000000; background-color: #ffffc0; padding-left:
3720
  <span onclick="toggleOutput('utils')" style="cursor: pointer;">▼ output</span>
3721
  <span id="uv-indicator-utils" onclick="toggleUvLogsFromHeader('utils')" style="cursor: pointer;">▶ uv-logs</span>
3722
  </span> |
3723
- Cell: utils | deps: torch, numpy | 34.59s
3724
  | <button class="run-btn" onclick="runCell('utils')">▶ run</button>
3725
  <button class="copy-btn" onclick="copyCell('utils')">Copy</button>
3726
  <a href="cells/utils.py" target="_blank" class="raw-btn">Raw</a>
@@ -3794,23 +3794,23 @@ Cell: utils | deps: torch, numpy | 34.59s
3794
  <div class="uv-install-logs" id="uv-logs-utils">
3795
  <div class="uv-logs-header" onclick="toggleUvLogs(this)">▶ UV Install Logs</div>
3796
  <div class="uv-logs-content" style="display: none;">
3797
- Downloading setuptools (1.1MiB)
3798
- Downloading networkx (1.9MiB)
3799
- Downloading nvidia-cublas-cu12 (566.8MiB)
3800
- Downloading nvidia-cusparse-cu12 (274.9MiB)
3801
  Downloading nvidia-cufile-cu12 (1.1MiB)
3802
- Downloading sympy (6.0MiB)
3803
- Downloading nvidia-nvjitlink-cu12 (37.4MiB)
3804
- Downloading nvidia-cuda-cupti-cu12 (9.8MiB)
3805
  Downloading nvidia-cusparselt-cu12 (273.9MiB)
3806
- Downloading nvidia-curand-cu12 (60.7MiB)
3807
  Downloading torch (846.9MiB)
3808
- Downloading nvidia-cudnn-cu12 (674.0MiB)
3809
- Downloading nvidia-nccl-cu12 (307.4MiB)
3810
- Downloading nvidia-cufft-cu12 (184.2MiB)
3811
- Downloading nvidia-cuda-nvrtc-cu12 (84.0MiB)
3812
- Downloading numpy (16.2MiB)
3813
  Downloading nvidia-cusolver-cu12 (255.1MiB)
 
 
 
 
3814
  Downloading triton (148.3MiB)
3815
  Downloading nvidia-cufile-cu12
3816
  Downloading setuptools
@@ -3830,7 +3830,7 @@ Downloading triton (148.3MiB)
3830
  Downloading nvidia-cublas-cu12
3831
  Downloading nvidia-cudnn-cu12
3832
  Downloading torch
3833
- Installed 26 packages in 452ms
3834
  </div>
3835
  </div>
3836
  </div>
@@ -3843,7 +3843,7 @@ Installed 26 packages in 452ms
3843
  <span onclick="toggleOutput('bench_utils')" style="cursor: pointer;">▼ output</span>
3844
  <span id="uv-indicator-bench_utils" onclick="toggleUvLogsFromHeader('bench_utils')" style="cursor: pointer;">▶ uv-logs</span>
3845
  </span> |
3846
- Cell: bench_utils | deps: torch, numpy | 35.65s
3847
  | <button class="run-btn" onclick="runCell('bench_utils')">▶ run</button>
3848
  <button class="copy-btn" onclick="copyCell('bench_utils')">Copy</button>
3849
  <a href="cells/bench_utils.py" target="_blank" class="raw-btn">Raw</a>
@@ -4331,23 +4331,23 @@ Cell: bench_utils | deps: torch, numpy | 35.65s
4331
  <div class="uv-install-logs" id="uv-logs-bench_utils">
4332
  <div class="uv-logs-header" onclick="toggleUvLogs(this)">▶ UV Install Logs</div>
4333
  <div class="uv-logs-content" style="display: none;">
4334
- Downloading nvidia-nvjitlink-cu12 (37.4MiB)
4335
- Downloading nvidia-cusparselt-cu12 (273.9MiB)
4336
- Downloading nvidia-cusparse-cu12 (274.9MiB)
4337
- Downloading networkx (1.9MiB)
4338
- Downloading nvidia-cuda-cupti-cu12 (9.8MiB)
4339
- Downloading nvidia-cudnn-cu12 (674.0MiB)
4340
  Downloading setuptools (1.1MiB)
4341
  Downloading nvidia-cufile-cu12 (1.1MiB)
4342
- Downloading nvidia-cufft-cu12 (184.2MiB)
4343
- Downloading nvidia-cusolver-cu12 (255.1MiB)
4344
- Downloading torch (846.9MiB)
4345
  Downloading sympy (6.0MiB)
4346
- Downloading numpy (16.2MiB)
4347
  Downloading nvidia-curand-cu12 (60.7MiB)
4348
- Downloading nvidia-nccl-cu12 (307.4MiB)
4349
- Downloading nvidia-cuda-nvrtc-cu12 (84.0MiB)
4350
  Downloading nvidia-cublas-cu12 (566.8MiB)
 
 
 
 
 
 
 
 
 
 
 
4351
  Downloading triton (148.3MiB)
4352
  Downloading nvidia-cufile-cu12
4353
  Downloading setuptools
@@ -4367,7 +4367,7 @@ Downloading triton (148.3MiB)
4367
  Downloading nvidia-cublas-cu12
4368
  Downloading nvidia-cudnn-cu12
4369
  Downloading torch
4370
- Installed 26 packages in 452ms
4371
  </div>
4372
  </div>
4373
  </div>
@@ -4381,7 +4381,7 @@ Installed 26 packages in 452ms
4381
  <span onclick="toggleOutput('config')" style="cursor: pointer;">▼ output</span>
4382
  <span id="uv-indicator-config" onclick="toggleUvLogsFromHeader('config')" style="cursor: pointer;">▶ uv-logs</span>
4383
  </span> |
4384
- Cell: config | deps: torch, numpy | 34.53s
4385
  | <button class="run-btn" onclick="runCell('config')">▶ run</button>
4386
  <button class="copy-btn" onclick="copyCell('config')">Copy</button>
4387
  <a href="cells/config.py" target="_blank" class="raw-btn">Raw</a>
@@ -4441,23 +4441,23 @@ Cell: config | deps: torch, numpy | 34.53s
4441
  <div class="uv-install-logs" id="uv-logs-config">
4442
  <div class="uv-logs-header" onclick="toggleUvLogs(this)">▶ UV Install Logs</div>
4443
  <div class="uv-logs-content" style="display: none;">
 
4444
  Downloading nvidia-cufile-cu12 (1.1MiB)
 
 
4445
  Downloading nvidia-cusparse-cu12 (274.9MiB)
 
 
 
 
4446
  Downloading nvidia-nccl-cu12 (307.4MiB)
 
4447
  Downloading nvidia-cusparselt-cu12 (273.9MiB)
4448
- Downloading nvidia-cuda-cupti-cu12 (9.8MiB)
4449
- Downloading nvidia-cudnn-cu12 (674.0MiB)
4450
- Downloading numpy (16.2MiB)
4451
  Downloading networkx (1.9MiB)
4452
- Downloading setuptools (1.1MiB)
4453
- Downloading torch (846.9MiB)
4454
- Downloading nvidia-cufft-cu12 (184.2MiB)
4455
- Downloading nvidia-nvjitlink-cu12 (37.4MiB)
4456
- Downloading nvidia-curand-cu12 (60.7MiB)
4457
  Downloading nvidia-cublas-cu12 (566.8MiB)
4458
- Downloading nvidia-cuda-nvrtc-cu12 (84.0MiB)
4459
- Downloading nvidia-cusolver-cu12 (255.1MiB)
4460
- Downloading sympy (6.0MiB)
4461
  Downloading triton (148.3MiB)
4462
  Downloading nvidia-cufile-cu12
4463
  Downloading setuptools
@@ -4474,10 +4474,10 @@ Downloading triton (148.3MiB)
4474
  Downloading nvidia-cusparselt-cu12
4475
  Downloading nvidia-cusparse-cu12
4476
  Downloading nvidia-nccl-cu12
4477
- Downloading nvidia-cublas-cu12
4478
  Downloading nvidia-cudnn-cu12
 
4479
  Downloading torch
4480
- Installed 26 packages in 448ms
4481
  </div>
4482
  </div>
4483
  </div>
@@ -4490,7 +4490,7 @@ Installed 26 packages in 448ms
4490
  <span onclick="toggleOutput('save_data')" style="cursor: pointer;">▼ output</span>
4491
  <span id="uv-indicator-save_data" onclick="toggleUvLogsFromHeader('save_data')" style="cursor: pointer;">▶ uv-logs</span>
4492
  </span> |
4493
- Cell: save_data | deps: torch, numpy | 39.05s
4494
  | <button class="run-btn" onclick="runCell('save_data')">▶ run</button>
4495
  <button class="copy-btn" onclick="copyCell('save_data')">Copy</button>
4496
  <a href="cells/save_data.py" target="_blank" class="raw-btn">Raw</a>
@@ -4585,24 +4585,24 @@ Down sum: 206.729263
4585
  <div class="uv-install-logs" id="uv-logs-save_data">
4586
  <div class="uv-logs-header" onclick="toggleUvLogs(this)">▶ UV Install Logs</div>
4587
  <div class="uv-logs-content" style="display: none;">
4588
- Downloading networkx (1.9MiB)
4589
  Downloading nvidia-cufft-cu12 (184.2MiB)
4590
- Downloading nvidia-cufile-cu12 (1.1MiB)
4591
  Downloading nvidia-nvjitlink-cu12 (37.4MiB)
 
 
 
 
4592
  Downloading nvidia-nccl-cu12 (307.4MiB)
4593
- Downloading sympy (6.0MiB)
 
 
 
 
4594
  Downloading nvidia-cudnn-cu12 (674.0MiB)
4595
  Downloading nvidia-cublas-cu12 (566.8MiB)
4596
- Downloading setuptools (1.1MiB)
4597
- Downloading nvidia-cuda-nvrtc-cu12 (84.0MiB)
4598
  Downloading nvidia-cusparse-cu12 (274.9MiB)
4599
- Downloading nvidia-cusolver-cu12 (255.1MiB)
4600
- Downloading nvidia-curand-cu12 (60.7MiB)
4601
  Downloading nvidia-cusparselt-cu12 (273.9MiB)
4602
- Downloading triton (148.3MiB)
4603
- Downloading torch (846.9MiB)
4604
- Downloading numpy (16.2MiB)
4605
- Downloading nvidia-cuda-cupti-cu12 (9.8MiB)
4606
  Downloading nvidia-cufile-cu12
4607
  Downloading setuptools
4608
  Downloading networkx
@@ -4615,23 +4615,23 @@ Downloading nvidia-cuda-cupti-cu12 (9.8MiB)
4615
  Downloading triton
4616
  Downloading nvidia-cufft-cu12
4617
  Downloading nvidia-cusolver-cu12
4618
- Downloading nvidia-cusparselt-cu12
4619
  Downloading nvidia-cusparse-cu12
 
4620
  Downloading nvidia-nccl-cu12
4621
  Downloading nvidia-cublas-cu12
4622
  Downloading nvidia-cudnn-cu12
4623
  Downloading torch
4624
- Installed 26 packages in 450ms
4625
  </div>
4626
  </div>
4627
  <div class="cell-artifacts">
4628
  <h4>Artifacts:</h4>
4629
  <a href="artifacts/save_data/router_bias.pt" class="artifact" target="_blank">router_bias.pt</a>
4630
- <a href="artifacts/save_data/gate_up_proj_bias.pt" class="artifact" target="_blank">gate_up_proj_bias.pt</a>
4631
- <a href="artifacts/save_data/down_proj.pt" class="artifact" target="_blank">down_proj.pt</a>
4632
- <a href="artifacts/save_data/gate_up_proj.pt" class="artifact" target="_blank">gate_up_proj.pt</a>
4633
- <a href="artifacts/save_data/down_proj_bias.pt" class="artifact" target="_blank">down_proj_bias.pt</a>
4634
  <a href="artifacts/save_data/router_weight.pt" class="artifact" target="_blank">router_weight.pt</a>
 
 
 
 
4635
  </div>
4636
  </div>
4637
  </div>
@@ -4645,7 +4645,7 @@ Installed 26 packages in 450ms
4645
  <span onclick="toggleOutput('yamoe_run')" style="cursor: pointer;">▼ output</span>
4646
  <span id="uv-indicator-yamoe_run" onclick="toggleUvLogsFromHeader('yamoe_run')" style="cursor: pointer;">▶ uv-logs</span>
4647
  </span> |
4648
- Cell: yamoe_run | deps: torch, kernels, numpy | 39.19s
4649
  | <button class="run-btn" onclick="runCell('yamoe_run')">▶ run</button>
4650
  <button class="copy-btn" onclick="copyCell('yamoe_run')">Copy</button>
4651
  <a href="cells/yamoe_run.py" target="_blank" class="raw-btn">Raw</a>
@@ -4916,7 +4916,7 @@ Cell: yamoe_run | deps: torch, kernels, numpy | 39.19s
4916
  </div>
4917
  </div>
4918
  <div id="output-yamoe_run" class="cell-output">
4919
- <div class="cell-stdout">Loading weights from: /repo/moe_benchmarks/megablocks_yamoe/.uvnote/cache/f8744f31d9cf720409852d42748815c6d61f005a2a9b297b7b9bf986ed98bb90
4920
  Loaded shared weights from artifacts
4921
  Router weight sum: 12.588732
4922
  Gate/up sum: 1026.601807
@@ -4939,9 +4939,9 @@ Input Variation: +0.001 * iteration (deterministic)
4939
  Warming up (10 iterations)...
4940
  Benchmarking (50 iterations)...
4941
  Progress: 20% complete (avg: 4.253 ms)
4942
- Progress: 40% complete (avg: 4.250 ms)
4943
- Progress: 60% complete (avg: 4.250 ms)
4944
- Progress: 80% complete (avg: 4.251 ms)
4945
 
4946
  Output tensors:
4947
  Primary: shape=(1, 100, 1152), dtype=torch.float32, device=cuda:0, range=[-0.049506, 0.054984], mean=0.000034, std=0.006508, norm=2.208791
@@ -4951,19 +4951,19 @@ Output tensors:
4951
  Iterations: 50
4952
 
4953
  Latency Statistics:
4954
- Average: 4.251 ms
4955
- Min: 4.144 ms
4956
- Max: 4.320 ms
4957
- Std Dev: 0.029 ms
4958
 
4959
  Percentiles:
4960
  P50 (median): 4.254 ms
4961
- P95: 4.286 ms
4962
- P99: 4.306 ms
4963
 
4964
  Throughput:
4965
- Tokens/sec: 23523.6
4966
- Std Dev: 160.3
4967
  ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
4968
 
4969
  Saved benchmark results to yamoe_results.json
@@ -4973,25 +4973,25 @@ Output sum: 3.971905
4973
  <div class="uv-install-logs" id="uv-logs-yamoe_run">
4974
  <div class="uv-logs-header" onclick="toggleUvLogs(this)">▶ UV Install Logs</div>
4975
  <div class="uv-logs-content" style="display: none;">
4976
- Downloading nvidia-cufile-cu12 (1.1MiB)
 
 
 
4977
  Downloading networkx (1.9MiB)
4978
- Downloading nvidia-cusparselt-cu12 (273.9MiB)
 
4979
  Downloading sympy (6.0MiB)
 
4980
  Downloading nvidia-cublas-cu12 (566.8MiB)
4981
- Downloading numpy (16.2MiB)
4982
- Downloading nvidia-cudnn-cu12 (674.0MiB)
4983
  Downloading nvidia-cufft-cu12 (184.2MiB)
4984
- Downloading nvidia-nccl-cu12 (307.4MiB)
4985
- Downloading setuptools (1.1MiB)
4986
  Downloading nvidia-cusolver-cu12 (255.1MiB)
4987
- Downloading nvidia-curand-cu12 (60.7MiB)
4988
- Downloading nvidia-cusparse-cu12 (274.9MiB)
4989
- Downloading nvidia-cuda-cupti-cu12 (9.8MiB)
4990
- Downloading hf-xet (3.0MiB)
4991
  Downloading nvidia-cuda-nvrtc-cu12 (84.0MiB)
 
4992
  Downloading nvidia-nvjitlink-cu12 (37.4MiB)
4993
- Downloading torch (846.9MiB)
4994
- Downloading triton (148.3MiB)
4995
  Downloading nvidia-cufile-cu12
4996
  Downloading hf-xet
4997
  Downloading setuptools
@@ -5011,13 +5011,14 @@ Downloading triton (148.3MiB)
5011
  Downloading nvidia-cublas-cu12
5012
  Downloading nvidia-cudnn-cu12
5013
  Downloading torch
5014
- Installed 37 packages in 454ms
5015
  </div>
5016
  </div>
5017
  <div class="cell-stderr">Fetching 6 files: 0%| | 0/6 [00:00&lt;?, ?it/s]
5018
- Fetching 6 files: 17%|█▋ | 1/6 [00:00&lt;00:01, 3.18it/s]
5019
- Fetching 6 files: 50%|█████ | 3/6 [00:00&lt;00:00, 3.84it/s]
5020
- Fetching 6 files: 100%|██████████| 6/6 [00:00&lt;00:00, 7.53it/s]</div>
 
5021
  <div class="cell-artifacts">
5022
  <h4>Artifacts:</h4>
5023
  <a href="artifacts/yamoe_run/yamoe_results.json" class="artifact" target="_blank">yamoe_results.json</a>
@@ -5034,7 +5035,7 @@ Fetching 6 files: 100%|██████████| 6/6 [00:00&lt;00:00, 7.5
5034
  <span onclick="toggleOutput('binned_run')" style="cursor: pointer;">▼ output</span>
5035
  <span id="uv-indicator-binned_run" onclick="toggleUvLogsFromHeader('binned_run')" style="cursor: pointer;">▶ uv-logs</span>
5036
  </span> |
5037
- Cell: binned_run | deps: torch, numpy | 39.23s
5038
  | <button class="run-btn" onclick="runCell('binned_run')">▶ run</button>
5039
  <button class="copy-btn" onclick="copyCell('binned_run')">Copy</button>
5040
  <a href="cells/binned_run.py" target="_blank" class="raw-btn">Raw</a>
@@ -5448,10 +5449,10 @@ Input Variation: +0.001 * iteration (deterministic)
5448
 
5449
  Warming up (10 iterations)...
5450
  Benchmarking (50 iterations)...
5451
- Progress: 20% complete (avg: 37.503 ms)
5452
- Progress: 40% complete (avg: 37.304 ms)
5453
- Progress: 60% complete (avg: 36.964 ms)
5454
- Progress: 80% complete (avg: 36.508 ms)
5455
 
5456
  Output tensors:
5457
  Primary: shape=(1, 100, 1152), dtype=torch.float32, device=cuda:0, range=[-0.049506, 0.054984], mean=0.000034, std=0.006508, norm=2.208791
@@ -5461,19 +5462,19 @@ Output tensors:
5461
  Iterations: 50
5462
 
5463
  Latency Statistics:
5464
- Average: 36.063 ms
5465
- Min: 33.292 ms
5466
- Max: 38.406 ms
5467
- Std Dev: 1.259 ms
5468
 
5469
  Percentiles:
5470
- P50 (median): 36.215 ms
5471
- P95: 37.524 ms
5472
- P99: 38.036 ms
5473
 
5474
  Throughput:
5475
- Tokens/sec: 2772.9
5476
- Std Dev: 98.3
5477
  ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
5478
 
5479
  Saved benchmark results to binned_results.json
@@ -5483,23 +5484,23 @@ Output sum: 3.971905
5483
  <div class="uv-install-logs" id="uv-logs-binned_run">
5484
  <div class="uv-logs-header" onclick="toggleUvLogs(this)">▶ UV Install Logs</div>
5485
  <div class="uv-logs-content" style="display: none;">
5486
- Downloading sympy (6.0MiB)
 
 
 
5487
  Downloading nvidia-cusolver-cu12 (255.1MiB)
5488
  Downloading networkx (1.9MiB)
5489
- Downloading nvidia-cusparse-cu12 (274.9MiB)
5490
- Downloading nvidia-curand-cu12 (60.7MiB)
5491
  Downloading nvidia-nvjitlink-cu12 (37.4MiB)
5492
- Downloading nvidia-cublas-cu12 (566.8MiB)
5493
- Downloading nvidia-cufile-cu12 (1.1MiB)
5494
  Downloading nvidia-nccl-cu12 (307.4MiB)
5495
- Downloading nvidia-cudnn-cu12 (674.0MiB)
5496
- Downloading torch (846.9MiB)
5497
- Downloading nvidia-cuda-nvrtc-cu12 (84.0MiB)
5498
- Downloading nvidia-cusparselt-cu12 (273.9MiB)
5499
- Downloading setuptools (1.1MiB)
5500
  Downloading numpy (16.2MiB)
5501
- Downloading nvidia-cuda-cupti-cu12 (9.8MiB)
 
 
5502
  Downloading triton (148.3MiB)
 
 
5503
  Downloading nvidia-cufft-cu12 (184.2MiB)
5504
  Downloading nvidia-cufile-cu12
5505
  Downloading setuptools
@@ -5513,13 +5514,13 @@ Downloading nvidia-cufft-cu12 (184.2MiB)
5513
  Downloading triton
5514
  Downloading nvidia-cufft-cu12
5515
  Downloading nvidia-cusolver-cu12
5516
- Downloading nvidia-cusparse-cu12
5517
  Downloading nvidia-cusparselt-cu12
 
5518
  Downloading nvidia-nccl-cu12
5519
  Downloading nvidia-cublas-cu12
5520
  Downloading nvidia-cudnn-cu12
5521
  Downloading torch
5522
- Installed 26 packages in 449ms
5523
  </div>
5524
  </div>
5525
  <div class="cell-artifacts">
@@ -5538,7 +5539,7 @@ Installed 26 packages in 449ms
5538
  <span onclick="toggleOutput('gptoss_run')" style="cursor: pointer;">▼ output</span>
5539
  <span id="uv-indicator-gptoss_run" onclick="toggleUvLogsFromHeader('gptoss_run')" style="cursor: pointer;">▶ uv-logs</span>
5540
  </span> |
5541
- Cell: gptoss_run | deps: torch, numpy | 39.77s
5542
  | <button class="run-btn" onclick="runCell('gptoss_run')">▶ run</button>
5543
  <button class="copy-btn" onclick="copyCell('gptoss_run')">Copy</button>
5544
  <a href="cells/gptoss_run.py" target="_blank" class="raw-btn">Raw</a>
@@ -5856,10 +5857,10 @@ Input Variation: +0.001 * iteration (deterministic)
5856
 
5857
  Warming up (10 iterations)...
5858
  Benchmarking (50 iterations)...
5859
- Progress: 20% complete (avg: 48.905 ms)
5860
- Progress: 40% complete (avg: 48.717 ms)
5861
- Progress: 60% complete (avg: 47.570 ms)
5862
- Progress: 80% complete (avg: 46.370 ms)
5863
 
5864
  Output tensors:
5865
  Primary: shape=(1, 100, 1152), dtype=torch.float32, device=cuda:0, range=[-0.064982, 0.061193], mean=0.000100, std=0.013510, norm=4.585560
@@ -5869,19 +5870,19 @@ Output tensors:
5869
  Iterations: 50
5870
 
5871
  Latency Statistics:
5872
- Average: 45.287 ms
5873
- Min: 38.914 ms
5874
- Max: 49.844 ms
5875
- Std Dev: 3.233 ms
5876
 
5877
  Percentiles:
5878
- P50 (median): 45.422 ms
5879
- P95: 49.730 ms
5880
- P99: 49.825 ms
5881
 
5882
  Throughput:
5883
- Tokens/sec: 2208.2
5884
- Std Dev: 161.3
5885
  ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
5886
 
5887
  Saved benchmark results to gptoss_results.json
@@ -5891,24 +5892,24 @@ Output sum: 11.532237
5891
  <div class="uv-install-logs" id="uv-logs-gptoss_run">
5892
  <div class="uv-logs-header" onclick="toggleUvLogs(this)">▶ UV Install Logs</div>
5893
  <div class="uv-logs-content" style="display: none;">
5894
- Downloading networkx (1.9MiB)
5895
- Downloading setuptools (1.1MiB)
5896
- Downloading nvidia-nccl-cu12 (307.4MiB)
5897
  Downloading nvidia-cusparse-cu12 (274.9MiB)
5898
- Downloading sympy (6.0MiB)
5899
- Downloading nvidia-curand-cu12 (60.7MiB)
5900
- Downloading nvidia-cufft-cu12 (184.2MiB)
5901
  Downloading nvidia-cusparselt-cu12 (273.9MiB)
5902
  Downloading nvidia-cuda-cupti-cu12 (9.8MiB)
5903
- Downloading nvidia-nvjitlink-cu12 (37.4MiB)
 
 
 
5904
  Downloading nvidia-cufile-cu12 (1.1MiB)
 
5905
  Downloading nvidia-cublas-cu12 (566.8MiB)
 
5906
  Downloading nvidia-cusolver-cu12 (255.1MiB)
5907
  Downloading nvidia-cuda-nvrtc-cu12 (84.0MiB)
5908
- Downloading nvidia-cudnn-cu12 (674.0MiB)
5909
- Downloading numpy (16.2MiB)
5910
- Downloading triton (148.3MiB)
5911
  Downloading torch (846.9MiB)
 
5912
  Downloading nvidia-cufile-cu12
5913
  Downloading setuptools
5914
  Downloading networkx
@@ -5921,13 +5922,13 @@ Downloading torch (846.9MiB)
5921
  Downloading triton
5922
  Downloading nvidia-cufft-cu12
5923
  Downloading nvidia-cusolver-cu12
5924
- Downloading nvidia-cusparse-cu12
5925
  Downloading nvidia-cusparselt-cu12
 
5926
  Downloading nvidia-nccl-cu12
5927
  Downloading nvidia-cublas-cu12
5928
  Downloading nvidia-cudnn-cu12
5929
  Downloading torch
5930
- Installed 26 packages in 453ms
5931
  </div>
5932
  </div>
5933
  <div class="cell-artifacts">
@@ -5946,7 +5947,7 @@ Installed 26 packages in 453ms
5946
  <span onclick="toggleOutput('gptoss_training_run')" style="cursor: pointer;">▼ output</span>
5947
  <span id="uv-indicator-gptoss_training_run" onclick="toggleUvLogsFromHeader('gptoss_training_run')" style="cursor: pointer;">▶ uv-logs</span>
5948
  </span> |
5949
- Cell: gptoss_training_run | deps: torch, numpy | 40.24s
5950
  | <button class="run-btn" onclick="runCell('gptoss_training_run')">▶ run</button>
5951
  <button class="copy-btn" onclick="copyCell('gptoss_training_run')">Copy</button>
5952
  <a href="cells/gptoss_training_run.py" target="_blank" class="raw-btn">Raw</a>
@@ -6247,10 +6248,10 @@ Input Variation: +0.001 * iteration (deterministic)
6247
 
6248
  Warming up (10 iterations)...
6249
  Benchmarking (50 iterations)...
6250
- Progress: 20% complete (avg: 49.963 ms)
6251
- Progress: 40% complete (avg: 49.344 ms)
6252
- Progress: 60% complete (avg: 48.274 ms)
6253
- Progress: 80% complete (avg: 47.165 ms)
6254
 
6255
  Output tensors:
6256
  Primary: shape=(1, 100, 1152), dtype=torch.float32, device=cuda:0, range=[-0.064982, 0.061193], mean=0.000100, std=0.013510, norm=4.585560
@@ -6260,19 +6261,19 @@ Output tensors:
6260
  Iterations: 50
6261
 
6262
  Latency Statistics:
6263
- Average: 46.010 ms
6264
- Min: 39.207 ms
6265
- Max: 51.098 ms
6266
- Std Dev: 3.259 ms
6267
 
6268
  Percentiles:
6269
- P50 (median): 46.133 ms
6270
- P95: 50.721 ms
6271
- P99: 51.008 ms
6272
 
6273
  Throughput:
6274
- Tokens/sec: 2173.4
6275
- Std Dev: 158.7
6276
  ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
6277
 
6278
  Saved benchmark results to gptoss_training_results.json
@@ -6282,24 +6283,24 @@ Output sum: 11.532237
6282
  <div class="uv-install-logs" id="uv-logs-gptoss_training_run">
6283
  <div class="uv-logs-header" onclick="toggleUvLogs(this)">▶ UV Install Logs</div>
6284
  <div class="uv-logs-content" style="display: none;">
6285
- Downloading setuptools (1.1MiB)
6286
  Downloading sympy (6.0MiB)
 
 
 
 
6287
  Downloading numpy (16.2MiB)
 
6288
  Downloading networkx (1.9MiB)
6289
- Downloading nvidia-cuda-nvrtc-cu12 (84.0MiB)
6290
- Downloading nvidia-cufft-cu12 (184.2MiB)
6291
  Downloading nvidia-cusparselt-cu12 (273.9MiB)
6292
  Downloading nvidia-cuda-cupti-cu12 (9.8MiB)
6293
- Downloading torch (846.9MiB)
6294
  Downloading nvidia-cudnn-cu12 (674.0MiB)
6295
- Downloading nvidia-cusparse-cu12 (274.9MiB)
6296
- Downloading nvidia-cusolver-cu12 (255.1MiB)
6297
  Downloading nvidia-nccl-cu12 (307.4MiB)
6298
- Downloading nvidia-cufile-cu12 (1.1MiB)
6299
- Downloading nvidia-curand-cu12 (60.7MiB)
6300
  Downloading nvidia-nvjitlink-cu12 (37.4MiB)
6301
- Downloading nvidia-cublas-cu12 (566.8MiB)
6302
  Downloading triton (148.3MiB)
 
6303
  Downloading nvidia-cufile-cu12
6304
  Downloading setuptools
6305
  Downloading networkx
@@ -6318,7 +6319,7 @@ Downloading triton (148.3MiB)
6318
  Downloading nvidia-cublas-cu12
6319
  Downloading nvidia-cudnn-cu12
6320
  Downloading torch
6321
- Installed 26 packages in 448ms
6322
  </div>
6323
  </div>
6324
  <div class="cell-artifacts">
@@ -6330,14 +6331,14 @@ Installed 26 packages in 448ms
6330
 
6331
  <h2>MegaBlocks Implementation</h2>
6332
  <p>This section runs the MegaBlocks MoE implementation with optimized kernels from the Hugging Face hub.</p>
6333
- <div class="cell cell-failed" id="cell-megablocks_run">
6334
  <div class="cell-header">
6335
  <span class="collapse-indicators">
6336
  <span onclick="toggleCode('megablocks_run')" style="cursor: pointer;">▼ code</span>
6337
  <span onclick="toggleOutput('megablocks_run')" style="cursor: pointer;">▼ output</span>
6338
  <span id="uv-indicator-megablocks_run" onclick="toggleUvLogsFromHeader('megablocks_run')" style="cursor: pointer;">▶ uv-logs</span>
6339
  </span> |
6340
- Cell: megablocks_run | deps: torch, numpy, kernels | 40.58s | FAILED
6341
  | <button class="run-btn" onclick="runCell('megablocks_run')">▶ run</button>
6342
  <button class="copy-btn" onclick="copyCell('megablocks_run')">Copy</button>
6343
  <a href="cells/megablocks_run.py" target="_blank" class="raw-btn">Raw</a>
@@ -6544,7 +6545,7 @@ Cell: megablocks_run | deps: torch, numpy, kernels | 40.58s | FAILED
6544
  </div>
6545
  </div>
6546
  <div id="output-megablocks_run" class="cell-output">
6547
- <div class="cell-stdout">Loading weights from: /repo/moe_benchmarks/megablocks_yamoe/.uvnote/cache/f8744f31d9cf720409852d42748815c6d61f005a2a9b297b7b9bf986ed98bb90
6548
  Loaded shared weights from artifacts
6549
  Router weight sum: 12.588732
6550
  Gate/up sum: 1026.601807
@@ -6565,29 +6566,61 @@ Base Input: shape=(1, 100, 1152), dtype=torch.float32, device=cuda:0, range=[-0.
6565
  Input Variation: +0.001 * iteration (deterministic)
6566
 
6567
  Warming up (10 iterations)...
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6568
  </div>
6569
  <div class="uv-install-logs" id="uv-logs-megablocks_run">
6570
  <div class="uv-logs-header" onclick="toggleUvLogs(this)">▶ UV Install Logs</div>
6571
  <div class="uv-logs-content" style="display: none;">
6572
- Downloading sympy (6.0MiB)
6573
- Downloading setuptools (1.1MiB)
6574
- Downloading nvidia-nvjitlink-cu12 (37.4MiB)
6575
- Downloading numpy (16.2MiB)
6576
  Downloading nvidia-cufile-cu12 (1.1MiB)
6577
- Downloading nvidia-cudnn-cu12 (674.0MiB)
6578
- Downloading nvidia-cusolver-cu12 (255.1MiB)
6579
- Downloading nvidia-cuda-cupti-cu12 (9.8MiB)
6580
- Downloading triton (148.3MiB)
6581
  Downloading nvidia-cuda-nvrtc-cu12 (84.0MiB)
6582
- Downloading nvidia-nccl-cu12 (307.4MiB)
6583
  Downloading nvidia-cusparse-cu12 (274.9MiB)
6584
- Downloading nvidia-cusparselt-cu12 (273.9MiB)
6585
- Downloading nvidia-curand-cu12 (60.7MiB)
6586
- Downloading torch (846.9MiB)
6587
- Downloading nvidia-cufft-cu12 (184.2MiB)
6588
  Downloading hf-xet (3.0MiB)
6589
  Downloading nvidia-cublas-cu12 (566.8MiB)
6590
- Downloading networkx (1.9MiB)
 
 
 
 
 
 
6591
  Downloading nvidia-cufile-cu12
6592
  Downloading hf-xet
6593
  Downloading setuptools
@@ -6607,122 +6640,316 @@ Downloading networkx (1.9MiB)
6607
  Downloading nvidia-cublas-cu12
6608
  Downloading nvidia-cudnn-cu12
6609
  Downloading torch
6610
- Installed 37 packages in 545ms
6611
  </div>
6612
  </div>
6613
  <div class="cell-stderr">Fetching 66 files: 0%| | 0/66 [00:00&lt;?, ?it/s]
6614
- Fetching 66 files: 2%|▏ | 1/66 [00:00&lt;00:24, 2.66it/s]
6615
- Fetching 66 files: 14%|█▎ | 9/66 [00:00&lt;00:02, 20.99it/s]
6616
- Fetching 66 files: 24%|██▍ | 16/66 [00:00&lt;00:01, 31.57it/s]
6617
- Fetching 66 files: 32%|███▏ | 21/66 [00:01&lt;00:02, 17.74it/s]
6618
- Fetching 66 files: 53%|█████▎ | 35/66 [00:01&lt;00:01, 29.20it/s]
6619
- Fetching 66 files: 71%|███████ | 47/66 [00:01&lt;00:00, 40.39it/s]
6620
- Fetching 66 files: 85%|████████▍ | 56/66 [00:01&lt;00:00, 43.01it/s]
6621
- Fetching 66 files: 97%|█████████▋| 64/66 [00:01&lt;00:00, 47.82it/s]
6622
- Fetching 66 files: 100%|██████████| 66/66 [00:01&lt;00:00, 35.14it/s]
6623
- /tmp/tmpsyirxqys/cuda_utils.c:5:10: fatal error: Python.h: No such file or directory
6624
- 5 | #include &lt;Python.h&gt;
6625
- | ^~~~~~~~~~
6626
- compilation terminated.
6627
- Traceback (most recent call last):
6628
- File &quot;/repo/moe_benchmarks/megablocks_yamoe/.uvnote/cells/megablocks_run.py&quot;, line 102, in &lt;module&gt;
6629
- output, stats = bench(model, x)
6630
- ^^^^^^^^^^^^^^^
6631
- File &quot;/repo/moe_benchmarks/megablocks_yamoe/.uvnote/cells/bench_utils.py&quot;, line 189, in runner
6632
- result, times_s = _bench_engine(call, warmup=warmup, iters=iters, device=device, dtype=dtype, input_gen=input_gen)
6633
- ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
6634
- File &quot;/repo/moe_benchmarks/megablocks_yamoe/.uvnote/cells/bench_utils.py&quot;, line 96, in _bench_engine
6635
- _ = call(input_gen())
6636
- ^^^^^^^^^^^^^^^^^
6637
- File &quot;/repo/moe_benchmarks/megablocks_yamoe/.uvnote/cells/bench_utils.py&quot;, line 177, in &lt;lambda&gt;
6638
- call = lambda x: fn(x, *args[1:], **kwargs)
6639
- ^^^^^^^^^^^^^^^^^^^^^^^^^^
6640
- File &quot;/tmp/uvnote-run-4n1mby1e/home/.cache/uv/environments-v2/megablocks-run-8802ebf6d3566120/lib/python3.11/site-packages/torch/nn/modules/module.py&quot;, line 1773, in _wrapped_call_impl
6641
- return self._call_impl(*args, **kwargs)
6642
- ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
6643
- File &quot;/tmp/uvnote-run-4n1mby1e/home/.cache/uv/environments-v2/megablocks-run-8802ebf6d3566120/lib/python3.11/site-packages/torch/nn/modules/module.py&quot;, line 1784, in _call_impl
6644
- return forward_call(*args, **kwargs)
6645
- ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
6646
- File &quot;/repo/moe_benchmarks/megablocks_yamoe/.uvnote/cells/megablocks_run.py&quot;, line 81, in forward
6647
- output, dummy_routing_weights = self.model(hidden_states)
6648
- ^^^^^^^^^^^^^^^^^^^^^^^^^
6649
- File &quot;/tmp/uvnote-run-4n1mby1e/home/.cache/uv/environments-v2/megablocks-run-8802ebf6d3566120/lib/python3.11/site-packages/torch/nn/modules/module.py&quot;, line 1773, in _wrapped_call_impl
6650
- return self._call_impl(*args, **kwargs)
6651
- ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
6652
- File &quot;/tmp/uvnote-run-4n1mby1e/home/.cache/uv/environments-v2/megablocks-run-8802ebf6d3566120/lib/python3.11/site-packages/torch/nn/modules/module.py&quot;, line 1784, in _call_impl
6653
- return forward_call(*args, **kwargs)
6654
- ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
6655
- File &quot;/tmp/uvnote-run-4n1mby1e/home/.cache/huggingface/hub/models--kernels-community--megablocks/snapshots/e0fb1437de3f8d7079c4da13be8cb64dc0cfcdd5/build/torch28-cxx11-cu128-x86_64-linux/megablocks/layers.py&quot;, line 896, in forward
6656
- output, expert_weights_out, *_ = moe_forward(
6657
- ^^^^^^^^^^^^
6658
- File &quot;/tmp/uvnote-run-4n1mby1e/home/.cache/huggingface/hub/models--kernels-community--megablocks/snapshots/e0fb1437de3f8d7079c4da13be8cb64dc0cfcdd5/build/torch28-cxx11-cu128-x86_64-linux/megablocks/layers.py&quot;, line 730, in moe_forward
6659
- x, tokens_per_expert = forward_fn(**forward_args)
6660
- ^^^^^^^^^^^^^^^^^^^^^^^^^^
6661
- File &quot;/tmp/uvnote-run-4n1mby1e/home/.cache/huggingface/hub/models--kernels-community--megablocks/snapshots/e0fb1437de3f8d7079c4da13be8cb64dc0cfcdd5/build/torch28-cxx11-cu128-x86_64-linux/megablocks/layers.py&quot;, line 457, in forward_once
6662
- x = permute_and_compute(
6663
- ^^^^^^^^^^^^^^^^^^^^
6664
- File &quot;/tmp/uvnote-run-4n1mby1e/home/.cache/huggingface/hub/models--kernels-community--megablocks/snapshots/e0fb1437de3f8d7079c4da13be8cb64dc0cfcdd5/build/torch28-cxx11-cu128-x86_64-linux/megablocks/layers.py&quot;, line 401, in permute_and_compute
6665
- x = ops.binned_gather(x, indices, bins, expert_capacity, top_k)
6666
- ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
6667
- File &quot;/tmp/uvnote-run-4n1mby1e/home/.cache/uv/environments-v2/megablocks-run-8802ebf6d3566120/lib/python3.11/site-packages/torch/autograd/function.py&quot;, line 576, in apply
6668
- return super().apply(*args, **kwargs) # type: ignore[misc]
6669
- ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
6670
- File &quot;/tmp/uvnote-run-4n1mby1e/home/.cache/huggingface/hub/models--kernels-community--megablocks/snapshots/e0fb1437de3f8d7079c4da13be8cb64dc0cfcdd5/build/torch28-cxx11-cu128-x86_64-linux/megablocks/ops/stk_autocast.py&quot;, line 30, in decorate_fwd
6671
- return fwd(*args, **kwargs)
6672
- ^^^^^^^^^^^^^^^^^^^^
6673
- File &quot;/tmp/uvnote-run-4n1mby1e/home/.cache/huggingface/hub/models--kernels-community--megablocks/snapshots/e0fb1437de3f8d7079c4da13be8cb64dc0cfcdd5/build/torch28-cxx11-cu128-x86_64-linux/megablocks/ops/binned_gather.py&quot;, line 26, in forward
6674
- return kernels.binned_gather(x, indices, None, bins, bin_size, top_k)
6675
- ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
6676
- File &quot;/tmp/uvnote-run-4n1mby1e/home/.cache/huggingface/hub/models--kernels-community--megablocks/snapshots/e0fb1437de3f8d7079c4da13be8cb64dc0cfcdd5/build/torch28-cxx11-cu128-x86_64-linux/megablocks/backend/kernels.py&quot;, line 419, in binned_gather
6677
- _binned_copy[(num_experts, expert_capacity)](
6678
- File &quot;/tmp/uvnote-run-4n1mby1e/home/.cache/uv/environments-v2/megablocks-run-8802ebf6d3566120/lib/python3.11/site-packages/triton/runtime/jit.py&quot;, line 390, in &lt;lambda&gt;
6679
- return lambda *args, **kwargs: self.run(grid=grid, warmup=False, *args, **kwargs)
6680
- ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
6681
- File &quot;/tmp/uvnote-run-4n1mby1e/home/.cache/uv/environments-v2/megablocks-run-8802ebf6d3566120/lib/python3.11/site-packages/triton/runtime/autotuner.py&quot;, line 239, in run
6682
- benchmark()
6683
- File &quot;/tmp/uvnote-run-4n1mby1e/home/.cache/uv/environments-v2/megablocks-run-8802ebf6d3566120/lib/python3.11/site-packages/triton/runtime/autotuner.py&quot;, line 228, in benchmark
6684
- timings = {config: self._bench(*args, config=config, **kwargs) for config in pruned_configs}
6685
- ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
6686
- File &quot;/tmp/uvnote-run-4n1mby1e/home/.cache/uv/environments-v2/megablocks-run-8802ebf6d3566120/lib/python3.11/site-packages/triton/runtime/autotuner.py&quot;, line 228, in &lt;dictcomp&gt;
6687
- timings = {config: self._bench(*args, config=config, **kwargs) for config in pruned_configs}
6688
- ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
6689
- File &quot;/tmp/uvnote-run-4n1mby1e/home/.cache/uv/environments-v2/megablocks-run-8802ebf6d3566120/lib/python3.11/site-packages/triton/runtime/autotuner.py&quot;, line 160, in _bench
6690
- return self.do_bench(kernel_call, quantiles=(0.5, 0.2, 0.8))
6691
- ^^^^^^^^^^^^^
6692
- File &quot;/usr/lib/python3.11/functools.py&quot;, line 1001, in __get__
6693
- val = self.func(instance)
6694
- ^^^^^^^^^^^^^^^^^^^
6695
- File &quot;/tmp/uvnote-run-4n1mby1e/home/.cache/uv/environments-v2/megablocks-run-8802ebf6d3566120/lib/python3.11/site-packages/triton/runtime/autotuner.py&quot;, line 121, in do_bench
6696
- return driver.active.get_benchmarker()
6697
- ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
6698
- File &quot;/tmp/uvnote-run-4n1mby1e/home/.cache/uv/environments-v2/megablocks-run-8802ebf6d3566120/lib/python3.11/site-packages/triton/runtime/driver.py&quot;, line 30, in __getattr__
6699
- return getattr(self._initialize_obj(), name)
6700
- ^^^^^^^^^^^^^^^^^^^^^^
6701
- File &quot;/tmp/uvnote-run-4n1mby1e/home/.cache/uv/environments-v2/megablocks-run-8802ebf6d3566120/lib/python3.11/site-packages/triton/runtime/driver.py&quot;, line 26, in _initialize_obj
6702
- self._obj = self._init_fn()
6703
- ^^^^^^^^^^^^^^^
6704
- File &quot;/tmp/uvnote-run-4n1mby1e/home/.cache/uv/environments-v2/megablocks-run-8802ebf6d3566120/lib/python3.11/site-packages/triton/runtime/driver.py&quot;, line 12, in _create_driver
6705
- return active_drivers[0]()
6706
- ^^^^^^^^^^^^^^^^^^^
6707
- File &quot;/tmp/uvnote-run-4n1mby1e/home/.cache/uv/environments-v2/megablocks-run-8802ebf6d3566120/lib/python3.11/site-packages/triton/backends/nvidia/driver.py&quot;, line 715, in __init__
6708
- self.utils = CudaUtils() # TODO: make static
6709
- ^^^^^^^^^^^
6710
- File &quot;/tmp/uvnote-run-4n1mby1e/home/.cache/uv/environments-v2/megablocks-run-8802ebf6d3566120/lib/python3.11/site-packages/triton/backends/nvidia/driver.py&quot;, line 62, in __init__
6711
- mod = compile_module_from_src(
6712
- ^^^^^^^^^^^^^^^^^^^^^^^^
6713
- File &quot;/tmp/uvnote-run-4n1mby1e/home/.cache/uv/environments-v2/megablocks-run-8802ebf6d3566120/lib/python3.11/site-packages/triton/runtime/build.py&quot;, line 88, in compile_module_from_src
6714
- so = _build(name, src_path, tmpdir, library_dirs or [], include_dirs or [], libraries or [])
6715
- ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
6716
- File &quot;/tmp/uvnote-run-4n1mby1e/home/.cache/uv/environments-v2/megablocks-run-8802ebf6d3566120/lib/python3.11/site-packages/triton/runtime/build.py&quot;, line 51, in _build
6717
- subprocess.check_call(cc_cmd, stdout=subprocess.DEVNULL)
6718
- File &quot;/usr/lib/python3.11/subprocess.py&quot;, line 413, in check_call
6719
- raise CalledProcessError(retcode, cmd)
6720
- subprocess.CalledProcessError: Command &#x27;[&#x27;/usr/bin/gcc&#x27;, &#x27;/tmp/tmpsyirxqys/cuda_utils.c&#x27;, &#x27;-O3&#x27;, &#x27;-shared&#x27;, &#x27;-fPIC&#x27;, &#x27;-Wno-psabi&#x27;, &#x27;-o&#x27;, &#x27;/tmp/tmpsyirxqys/cuda_utils.cpython-311-x86_64-linux-gnu.so&#x27;, &#x27;-lcuda&#x27;, &#x27;-L/tmp/uvnote-run-4n1mby1e/home/.cache/uv/environments-v2/megablocks-run-8802ebf6d3566120/lib/python3.11/site-packages/triton/backends/nvidia/lib&#x27;, &#x27;-L/usr/lib/x86_64-linux-gnu&#x27;, &#x27;-I/tmp/uvnote-run-4n1mby1e/home/.cache/uv/environments-v2/megablocks-run-8802ebf6d3566120/lib/python3.11/site-packages/triton/backends/nvidia/include&#x27;, &#x27;-I/tmp/tmpsyirxqys&#x27;, &#x27;-I/usr/include/python3.11&#x27;]&#x27; returned non-zero exit status 1.</div>
6721
  </div>
6722
  </div>
6723
 
6724
  <h2>Performance Visualization</h2>
6725
  <p>This section reads all benchmark results and creates a comprehensive performance comparison chart.</p>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6726
  </div>
6727
 
6728
  </body>
 
3720
  <span onclick="toggleOutput('utils')" style="cursor: pointer;">▼ output</span>
3721
  <span id="uv-indicator-utils" onclick="toggleUvLogsFromHeader('utils')" style="cursor: pointer;">▶ uv-logs</span>
3722
  </span> |
3723
+ Cell: utils | deps: torch, numpy | 35.49s
3724
  | <button class="run-btn" onclick="runCell('utils')">▶ run</button>
3725
  <button class="copy-btn" onclick="copyCell('utils')">Copy</button>
3726
  <a href="cells/utils.py" target="_blank" class="raw-btn">Raw</a>
 
3794
  <div class="uv-install-logs" id="uv-logs-utils">
3795
  <div class="uv-logs-header" onclick="toggleUvLogs(this)">▶ UV Install Logs</div>
3796
  <div class="uv-logs-content" style="display: none;">
3797
+ Downloading nvidia-curand-cu12 (60.7MiB)
 
 
 
3798
  Downloading nvidia-cufile-cu12 (1.1MiB)
3799
+ Downloading nvidia-cuda-nvrtc-cu12 (84.0MiB)
3800
+ Downloading nvidia-cusparse-cu12 (274.9MiB)
3801
+ Downloading nvidia-cudnn-cu12 (674.0MiB)
3802
  Downloading nvidia-cusparselt-cu12 (273.9MiB)
 
3803
  Downloading torch (846.9MiB)
3804
+ Downloading nvidia-nvjitlink-cu12 (37.4MiB)
3805
+ Downloading setuptools (1.1MiB)
3806
+ Downloading nvidia-cublas-cu12 (566.8MiB)
3807
+ Downloading networkx (1.9MiB)
3808
+ Downloading nvidia-cuda-cupti-cu12 (9.8MiB)
3809
  Downloading nvidia-cusolver-cu12 (255.1MiB)
3810
+ Downloading numpy (16.2MiB)
3811
+ Downloading nvidia-cufft-cu12 (184.2MiB)
3812
+ Downloading nvidia-nccl-cu12 (307.4MiB)
3813
+ Downloading sympy (6.0MiB)
3814
  Downloading triton (148.3MiB)
3815
  Downloading nvidia-cufile-cu12
3816
  Downloading setuptools
 
3830
  Downloading nvidia-cublas-cu12
3831
  Downloading nvidia-cudnn-cu12
3832
  Downloading torch
3833
+ Installed 26 packages in 461ms
3834
  </div>
3835
  </div>
3836
  </div>
 
3843
  <span onclick="toggleOutput('bench_utils')" style="cursor: pointer;">▼ output</span>
3844
  <span id="uv-indicator-bench_utils" onclick="toggleUvLogsFromHeader('bench_utils')" style="cursor: pointer;">▶ uv-logs</span>
3845
  </span> |
3846
+ Cell: bench_utils | deps: torch, numpy | 34.17s
3847
  | <button class="run-btn" onclick="runCell('bench_utils')">▶ run</button>
3848
  <button class="copy-btn" onclick="copyCell('bench_utils')">Copy</button>
3849
  <a href="cells/bench_utils.py" target="_blank" class="raw-btn">Raw</a>
 
4331
  <div class="uv-install-logs" id="uv-logs-bench_utils">
4332
  <div class="uv-logs-header" onclick="toggleUvLogs(this)">▶ UV Install Logs</div>
4333
  <div class="uv-logs-content" style="display: none;">
 
 
 
 
 
 
4334
  Downloading setuptools (1.1MiB)
4335
  Downloading nvidia-cufile-cu12 (1.1MiB)
 
 
 
4336
  Downloading sympy (6.0MiB)
4337
+ Downloading nvidia-nvjitlink-cu12 (37.4MiB)
4338
  Downloading nvidia-curand-cu12 (60.7MiB)
 
 
4339
  Downloading nvidia-cublas-cu12 (566.8MiB)
4340
+ Downloading nvidia-cusolver-cu12 (255.1MiB)
4341
+ Downloading networkx (1.9MiB)
4342
+ Downloading nvidia-cuda-cupti-cu12 (9.8MiB)
4343
+ Downloading nvidia-cusparse-cu12 (274.9MiB)
4344
+ Downloading torch (846.9MiB)
4345
+ Downloading nvidia-cuda-nvrtc-cu12 (84.0MiB)
4346
+ Downloading nvidia-nccl-cu12 (307.4MiB)
4347
+ Downloading numpy (16.2MiB)
4348
+ Downloading nvidia-cudnn-cu12 (674.0MiB)
4349
+ Downloading nvidia-cufft-cu12 (184.2MiB)
4350
+ Downloading nvidia-cusparselt-cu12 (273.9MiB)
4351
  Downloading triton (148.3MiB)
4352
  Downloading nvidia-cufile-cu12
4353
  Downloading setuptools
 
4367
  Downloading nvidia-cublas-cu12
4368
  Downloading nvidia-cudnn-cu12
4369
  Downloading torch
4370
+ Installed 26 packages in 507ms
4371
  </div>
4372
  </div>
4373
  </div>
 
4381
  <span onclick="toggleOutput('config')" style="cursor: pointer;">▼ output</span>
4382
  <span id="uv-indicator-config" onclick="toggleUvLogsFromHeader('config')" style="cursor: pointer;">▶ uv-logs</span>
4383
  </span> |
4384
+ Cell: config | deps: torch, numpy | 34.91s
4385
  | <button class="run-btn" onclick="runCell('config')">▶ run</button>
4386
  <button class="copy-btn" onclick="copyCell('config')">Copy</button>
4387
  <a href="cells/config.py" target="_blank" class="raw-btn">Raw</a>
 
4441
  <div class="uv-install-logs" id="uv-logs-config">
4442
  <div class="uv-logs-header" onclick="toggleUvLogs(this)">▶ UV Install Logs</div>
4443
  <div class="uv-logs-content" style="display: none;">
4444
+ Downloading sympy (6.0MiB)
4445
  Downloading nvidia-cufile-cu12 (1.1MiB)
4446
+ Downloading setuptools (1.1MiB)
4447
+ Downloading nvidia-curand-cu12 (60.7MiB)
4448
  Downloading nvidia-cusparse-cu12 (274.9MiB)
4449
+ Downloading torch (846.9MiB)
4450
+ Downloading nvidia-cusolver-cu12 (255.1MiB)
4451
+ Downloading numpy (16.2MiB)
4452
+ Downloading nvidia-nvjitlink-cu12 (37.4MiB)
4453
  Downloading nvidia-nccl-cu12 (307.4MiB)
4454
+ Downloading nvidia-cufft-cu12 (184.2MiB)
4455
  Downloading nvidia-cusparselt-cu12 (273.9MiB)
4456
+ Downloading nvidia-cuda-nvrtc-cu12 (84.0MiB)
 
 
4457
  Downloading networkx (1.9MiB)
4458
+ Downloading nvidia-cudnn-cu12 (674.0MiB)
4459
+ Downloading nvidia-cuda-cupti-cu12 (9.8MiB)
 
 
 
4460
  Downloading nvidia-cublas-cu12 (566.8MiB)
 
 
 
4461
  Downloading triton (148.3MiB)
4462
  Downloading nvidia-cufile-cu12
4463
  Downloading setuptools
 
4474
  Downloading nvidia-cusparselt-cu12
4475
  Downloading nvidia-cusparse-cu12
4476
  Downloading nvidia-nccl-cu12
 
4477
  Downloading nvidia-cudnn-cu12
4478
+ Downloading nvidia-cublas-cu12
4479
  Downloading torch
4480
+ Installed 26 packages in 572ms
4481
  </div>
4482
  </div>
4483
  </div>
 
4490
  <span onclick="toggleOutput('save_data')" style="cursor: pointer;">▼ output</span>
4491
  <span id="uv-indicator-save_data" onclick="toggleUvLogsFromHeader('save_data')" style="cursor: pointer;">▶ uv-logs</span>
4492
  </span> |
4493
+ Cell: save_data | deps: torch, numpy | 39.37s
4494
  | <button class="run-btn" onclick="runCell('save_data')">▶ run</button>
4495
  <button class="copy-btn" onclick="copyCell('save_data')">Copy</button>
4496
  <a href="cells/save_data.py" target="_blank" class="raw-btn">Raw</a>
 
4585
  <div class="uv-install-logs" id="uv-logs-save_data">
4586
  <div class="uv-logs-header" onclick="toggleUvLogs(this)">▶ UV Install Logs</div>
4587
  <div class="uv-logs-content" style="display: none;">
 
4588
  Downloading nvidia-cufft-cu12 (184.2MiB)
4589
+ Downloading sympy (6.0MiB)
4590
  Downloading nvidia-nvjitlink-cu12 (37.4MiB)
4591
+ Downloading triton (148.3MiB)
4592
+ Downloading numpy (16.2MiB)
4593
+ Downloading networkx (1.9MiB)
4594
+ Downloading torch (846.9MiB)
4595
  Downloading nvidia-nccl-cu12 (307.4MiB)
4596
+ Downloading nvidia-cuda-cupti-cu12 (9.8MiB)
4597
+ Downloading nvidia-cuda-nvrtc-cu12 (84.0MiB)
4598
+ Downloading nvidia-curand-cu12 (60.7MiB)
4599
+ Downloading nvidia-cusolver-cu12 (255.1MiB)
4600
+ Downloading setuptools (1.1MiB)
4601
  Downloading nvidia-cudnn-cu12 (674.0MiB)
4602
  Downloading nvidia-cublas-cu12 (566.8MiB)
4603
+ Downloading nvidia-cufile-cu12 (1.1MiB)
 
4604
  Downloading nvidia-cusparse-cu12 (274.9MiB)
 
 
4605
  Downloading nvidia-cusparselt-cu12 (273.9MiB)
 
 
 
 
4606
  Downloading nvidia-cufile-cu12
4607
  Downloading setuptools
4608
  Downloading networkx
 
4615
  Downloading triton
4616
  Downloading nvidia-cufft-cu12
4617
  Downloading nvidia-cusolver-cu12
 
4618
  Downloading nvidia-cusparse-cu12
4619
+ Downloading nvidia-cusparselt-cu12
4620
  Downloading nvidia-nccl-cu12
4621
  Downloading nvidia-cublas-cu12
4622
  Downloading nvidia-cudnn-cu12
4623
  Downloading torch
4624
+ Installed 26 packages in 455ms
4625
  </div>
4626
  </div>
4627
  <div class="cell-artifacts">
4628
  <h4>Artifacts:</h4>
4629
  <a href="artifacts/save_data/router_bias.pt" class="artifact" target="_blank">router_bias.pt</a>
 
 
 
 
4630
  <a href="artifacts/save_data/router_weight.pt" class="artifact" target="_blank">router_weight.pt</a>
4631
+ <a href="artifacts/save_data/down_proj_bias.pt" class="artifact" target="_blank">down_proj_bias.pt</a>
4632
+ <a href="artifacts/save_data/gate_up_proj.pt" class="artifact" target="_blank">gate_up_proj.pt</a>
4633
+ <a href="artifacts/save_data/down_proj.pt" class="artifact" target="_blank">down_proj.pt</a>
4634
+ <a href="artifacts/save_data/gate_up_proj_bias.pt" class="artifact" target="_blank">gate_up_proj_bias.pt</a>
4635
  </div>
4636
  </div>
4637
  </div>
 
4645
  <span onclick="toggleOutput('yamoe_run')" style="cursor: pointer;">▼ output</span>
4646
  <span id="uv-indicator-yamoe_run" onclick="toggleUvLogsFromHeader('yamoe_run')" style="cursor: pointer;">▶ uv-logs</span>
4647
  </span> |
4648
+ Cell: yamoe_run | deps: torch, kernels, numpy | 38.45s
4649
  | <button class="run-btn" onclick="runCell('yamoe_run')">▶ run</button>
4650
  <button class="copy-btn" onclick="copyCell('yamoe_run')">Copy</button>
4651
  <a href="cells/yamoe_run.py" target="_blank" class="raw-btn">Raw</a>
 
4916
  </div>
4917
  </div>
4918
  <div id="output-yamoe_run" class="cell-output">
4919
+ <div class="cell-stdout">Loading weights from: /repo/moe_benchmarks/megablocks_yamoe/.uvnote/cache/b398a2853af91970392ae37f0d53a0eda463df639220863fbd38f33605bf9cbb
4920
  Loaded shared weights from artifacts
4921
  Router weight sum: 12.588732
4922
  Gate/up sum: 1026.601807
 
4939
  Warming up (10 iterations)...
4940
  Benchmarking (50 iterations)...
4941
  Progress: 20% complete (avg: 4.253 ms)
4942
+ Progress: 40% complete (avg: 4.248 ms)
4943
+ Progress: 60% complete (avg: 4.248 ms)
4944
+ Progress: 80% complete (avg: 4.250 ms)
4945
 
4946
  Output tensors:
4947
  Primary: shape=(1, 100, 1152), dtype=torch.float32, device=cuda:0, range=[-0.049506, 0.054984], mean=0.000034, std=0.006508, norm=2.208791
 
4951
  Iterations: 50
4952
 
4953
  Latency Statistics:
4954
+ Average: 4.250 ms
4955
+ Min: 4.125 ms
4956
+ Max: 4.300 ms
4957
+ Std Dev: 0.023 ms
4958
 
4959
  Percentiles:
4960
  P50 (median): 4.254 ms
4961
+ P95: 4.268 ms
4962
+ P99: 4.290 ms
4963
 
4964
  Throughput:
4965
+ Tokens/sec: 23530.9
4966
+ Std Dev: 131.3
4967
  ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
4968
 
4969
  Saved benchmark results to yamoe_results.json
 
4973
  <div class="uv-install-logs" id="uv-logs-yamoe_run">
4974
  <div class="uv-logs-header" onclick="toggleUvLogs(this)">▶ UV Install Logs</div>
4975
  <div class="uv-logs-content" style="display: none;">
4976
+ Downloading nvidia-curand-cu12 (60.7MiB)
4977
+ Downloading numpy (16.2MiB)
4978
+ Downloading nvidia-cuda-cupti-cu12 (9.8MiB)
4979
+ Downloading hf-xet (3.0MiB)
4980
  Downloading networkx (1.9MiB)
4981
+ Downloading nvidia-cufile-cu12 (1.1MiB)
4982
+ Downloading setuptools (1.1MiB)
4983
  Downloading sympy (6.0MiB)
4984
+ Downloading nvidia-cusparse-cu12 (274.9MiB)
4985
  Downloading nvidia-cublas-cu12 (566.8MiB)
4986
+ Downloading torch (846.9MiB)
 
4987
  Downloading nvidia-cufft-cu12 (184.2MiB)
4988
+ Downloading triton (148.3MiB)
 
4989
  Downloading nvidia-cusolver-cu12 (255.1MiB)
4990
+ Downloading nvidia-cudnn-cu12 (674.0MiB)
4991
+ Downloading nvidia-nccl-cu12 (307.4MiB)
 
 
4992
  Downloading nvidia-cuda-nvrtc-cu12 (84.0MiB)
4993
+ Downloading nvidia-cusparselt-cu12 (273.9MiB)
4994
  Downloading nvidia-nvjitlink-cu12 (37.4MiB)
 
 
4995
  Downloading nvidia-cufile-cu12
4996
  Downloading hf-xet
4997
  Downloading setuptools
 
5011
  Downloading nvidia-cublas-cu12
5012
  Downloading nvidia-cudnn-cu12
5013
  Downloading torch
5014
+ Installed 37 packages in 452ms
5015
  </div>
5016
  </div>
5017
  <div class="cell-stderr">Fetching 6 files: 0%| | 0/6 [00:00&lt;?, ?it/s]
5018
+ Fetching 6 files: 17%|█▋ | 1/6 [00:00&lt;00:01, 3.53it/s]
5019
+ Fetching 6 files: 33%|███▎ | 2/6 [00:00&lt;00:01, 3.45it/s]
5020
+ Fetching 6 files: 50%|█████ | 3/6 [00:00&lt;00:01, 2.94it/s]
5021
+ Fetching 6 files: 100%|██████████| 6/6 [00:00&lt;00:00, 6.14it/s]</div>
5022
  <div class="cell-artifacts">
5023
  <h4>Artifacts:</h4>
5024
  <a href="artifacts/yamoe_run/yamoe_results.json" class="artifact" target="_blank">yamoe_results.json</a>
 
5035
  <span onclick="toggleOutput('binned_run')" style="cursor: pointer;">▼ output</span>
5036
  <span id="uv-indicator-binned_run" onclick="toggleUvLogsFromHeader('binned_run')" style="cursor: pointer;">▶ uv-logs</span>
5037
  </span> |
5038
+ Cell: binned_run | deps: torch, numpy | 39.83s
5039
  | <button class="run-btn" onclick="runCell('binned_run')">▶ run</button>
5040
  <button class="copy-btn" onclick="copyCell('binned_run')">Copy</button>
5041
  <a href="cells/binned_run.py" target="_blank" class="raw-btn">Raw</a>
 
5449
 
5450
  Warming up (10 iterations)...
5451
  Benchmarking (50 iterations)...
5452
+ Progress: 20% complete (avg: 38.543 ms)
5453
+ Progress: 40% complete (avg: 37.857 ms)
5454
+ Progress: 60% complete (avg: 37.457 ms)
5455
+ Progress: 80% complete (avg: 37.143 ms)
5456
 
5457
  Output tensors:
5458
  Primary: shape=(1, 100, 1152), dtype=torch.float32, device=cuda:0, range=[-0.049506, 0.054984], mean=0.000034, std=0.006508, norm=2.208791
 
5462
  Iterations: 50
5463
 
5464
  Latency Statistics:
5465
+ Average: 36.539 ms
5466
+ Min: 32.831 ms
5467
+ Max: 40.074 ms
5468
+ Std Dev: 1.614 ms
5469
 
5470
  Percentiles:
5471
+ P50 (median): 36.870 ms
5472
+ P95: 39.005 ms
5473
+ P99: 39.950 ms
5474
 
5475
  Throughput:
5476
+ Tokens/sec: 2736.8
5477
+ Std Dev: 123.0
5478
  ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
5479
 
5480
  Saved benchmark results to binned_results.json
 
5484
  <div class="uv-install-logs" id="uv-logs-binned_run">
5485
  <div class="uv-logs-header" onclick="toggleUvLogs(this)">▶ UV Install Logs</div>
5486
  <div class="uv-logs-content" style="display: none;">
5487
+ Downloading nvidia-cudnn-cu12 (674.0MiB)
5488
+ Downloading nvidia-cublas-cu12 (566.8MiB)
5489
+ Downloading nvidia-cuda-nvrtc-cu12 (84.0MiB)
5490
+ Downloading setuptools (1.1MiB)
5491
  Downloading nvidia-cusolver-cu12 (255.1MiB)
5492
  Downloading networkx (1.9MiB)
5493
+ Downloading torch (846.9MiB)
 
5494
  Downloading nvidia-nvjitlink-cu12 (37.4MiB)
 
 
5495
  Downloading nvidia-nccl-cu12 (307.4MiB)
5496
+ Downloading nvidia-cusparse-cu12 (274.9MiB)
 
 
 
 
5497
  Downloading numpy (16.2MiB)
5498
+ Downloading nvidia-cufile-cu12 (1.1MiB)
5499
+ Downloading nvidia-cusparselt-cu12 (273.9MiB)
5500
+ Downloading sympy (6.0MiB)
5501
  Downloading triton (148.3MiB)
5502
+ Downloading nvidia-cuda-cupti-cu12 (9.8MiB)
5503
+ Downloading nvidia-curand-cu12 (60.7MiB)
5504
  Downloading nvidia-cufft-cu12 (184.2MiB)
5505
  Downloading nvidia-cufile-cu12
5506
  Downloading setuptools
 
5514
  Downloading triton
5515
  Downloading nvidia-cufft-cu12
5516
  Downloading nvidia-cusolver-cu12
 
5517
  Downloading nvidia-cusparselt-cu12
5518
+ Downloading nvidia-cusparse-cu12
5519
  Downloading nvidia-nccl-cu12
5520
  Downloading nvidia-cublas-cu12
5521
  Downloading nvidia-cudnn-cu12
5522
  Downloading torch
5523
+ Installed 26 packages in 442ms
5524
  </div>
5525
  </div>
5526
  <div class="cell-artifacts">
 
5539
  <span onclick="toggleOutput('gptoss_run')" style="cursor: pointer;">▼ output</span>
5540
  <span id="uv-indicator-gptoss_run" onclick="toggleUvLogsFromHeader('gptoss_run')" style="cursor: pointer;">▶ uv-logs</span>
5541
  </span> |
5542
+ Cell: gptoss_run | deps: torch, numpy | 39.94s
5543
  | <button class="run-btn" onclick="runCell('gptoss_run')">▶ run</button>
5544
  <button class="copy-btn" onclick="copyCell('gptoss_run')">Copy</button>
5545
  <a href="cells/gptoss_run.py" target="_blank" class="raw-btn">Raw</a>
 
5857
 
5858
  Warming up (10 iterations)...
5859
  Benchmarking (50 iterations)...
5860
+ Progress: 20% complete (avg: 48.070 ms)
5861
+ Progress: 40% complete (avg: 47.917 ms)
5862
+ Progress: 60% complete (avg: 47.432 ms)
5863
+ Progress: 80% complete (avg: 46.164 ms)
5864
 
5865
  Output tensors:
5866
  Primary: shape=(1, 100, 1152), dtype=torch.float32, device=cuda:0, range=[-0.064982, 0.061193], mean=0.000100, std=0.013510, norm=4.585560
 
5870
  Iterations: 50
5871
 
5872
  Latency Statistics:
5873
+ Average: 45.237 ms
5874
+ Min: 39.776 ms
5875
+ Max: 49.247 ms
5876
+ Std Dev: 2.851 ms
5877
 
5878
  Percentiles:
5879
+ P50 (median): 46.461 ms
5880
+ P95: 48.433 ms
5881
+ P99: 49.077 ms
5882
 
5883
  Throughput:
5884
+ Tokens/sec: 2210.6
5885
+ Std Dev: 143.0
5886
  ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
5887
 
5888
  Saved benchmark results to gptoss_results.json
 
5892
  <div class="uv-install-logs" id="uv-logs-gptoss_run">
5893
  <div class="uv-logs-header" onclick="toggleUvLogs(this)">▶ UV Install Logs</div>
5894
  <div class="uv-logs-content" style="display: none;">
5895
+ Downloading numpy (16.2MiB)
 
 
5896
  Downloading nvidia-cusparse-cu12 (274.9MiB)
5897
+ Downloading nvidia-cudnn-cu12 (674.0MiB)
5898
+ Downloading nvidia-nccl-cu12 (307.4MiB)
 
5899
  Downloading nvidia-cusparselt-cu12 (273.9MiB)
5900
  Downloading nvidia-cuda-cupti-cu12 (9.8MiB)
5901
+ Downloading nvidia-curand-cu12 (60.7MiB)
5902
+ Downloading networkx (1.9MiB)
5903
+ Downloading setuptools (1.1MiB)
5904
+ Downloading nvidia-cufft-cu12 (184.2MiB)
5905
  Downloading nvidia-cufile-cu12 (1.1MiB)
5906
+ Downloading nvidia-nvjitlink-cu12 (37.4MiB)
5907
  Downloading nvidia-cublas-cu12 (566.8MiB)
5908
+ Downloading sympy (6.0MiB)
5909
  Downloading nvidia-cusolver-cu12 (255.1MiB)
5910
  Downloading nvidia-cuda-nvrtc-cu12 (84.0MiB)
 
 
 
5911
  Downloading torch (846.9MiB)
5912
+ Downloading triton (148.3MiB)
5913
  Downloading nvidia-cufile-cu12
5914
  Downloading setuptools
5915
  Downloading networkx
 
5922
  Downloading triton
5923
  Downloading nvidia-cufft-cu12
5924
  Downloading nvidia-cusolver-cu12
 
5925
  Downloading nvidia-cusparselt-cu12
5926
+ Downloading nvidia-cusparse-cu12
5927
  Downloading nvidia-nccl-cu12
5928
  Downloading nvidia-cublas-cu12
5929
  Downloading nvidia-cudnn-cu12
5930
  Downloading torch
5931
+ Installed 26 packages in 443ms
5932
  </div>
5933
  </div>
5934
  <div class="cell-artifacts">
 
5947
  <span onclick="toggleOutput('gptoss_training_run')" style="cursor: pointer;">▼ output</span>
5948
  <span id="uv-indicator-gptoss_training_run" onclick="toggleUvLogsFromHeader('gptoss_training_run')" style="cursor: pointer;">▶ uv-logs</span>
5949
  </span> |
5950
+ Cell: gptoss_training_run | deps: torch, numpy | 41.85s
5951
  | <button class="run-btn" onclick="runCell('gptoss_training_run')">▶ run</button>
5952
  <button class="copy-btn" onclick="copyCell('gptoss_training_run')">Copy</button>
5953
  <a href="cells/gptoss_training_run.py" target="_blank" class="raw-btn">Raw</a>
 
6248
 
6249
  Warming up (10 iterations)...
6250
  Benchmarking (50 iterations)...
6251
+ Progress: 20% complete (avg: 49.277 ms)
6252
+ Progress: 40% complete (avg: 48.351 ms)
6253
+ Progress: 60% complete (avg: 47.557 ms)
6254
+ Progress: 80% complete (avg: 46.750 ms)
6255
 
6256
  Output tensors:
6257
  Primary: shape=(1, 100, 1152), dtype=torch.float32, device=cuda:0, range=[-0.064982, 0.061193], mean=0.000100, std=0.013510, norm=4.585560
 
6261
  Iterations: 50
6262
 
6263
  Latency Statistics:
6264
+ Average: 45.751 ms
6265
+ Min: 38.860 ms
6266
+ Max: 50.817 ms
6267
+ Std Dev: 2.834 ms
6268
 
6269
  Percentiles:
6270
+ P50 (median): 45.833 ms
6271
+ P95: 50.540 ms
6272
+ P99: 50.777 ms
6273
 
6274
  Throughput:
6275
+ Tokens/sec: 2185.7
6276
+ Std Dev: 141.0
6277
  ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
6278
 
6279
  Saved benchmark results to gptoss_training_results.json
 
6283
  <div class="uv-install-logs" id="uv-logs-gptoss_training_run">
6284
  <div class="uv-logs-header" onclick="toggleUvLogs(this)">▶ UV Install Logs</div>
6285
  <div class="uv-logs-content" style="display: none;">
 
6286
  Downloading sympy (6.0MiB)
6287
+ Downloading nvidia-cufile-cu12 (1.1MiB)
6288
+ Downloading nvidia-cufft-cu12 (184.2MiB)
6289
+ Downloading nvidia-cusparse-cu12 (274.9MiB)
6290
+ Downloading nvidia-cuda-nvrtc-cu12 (84.0MiB)
6291
  Downloading numpy (16.2MiB)
6292
+ Downloading nvidia-curand-cu12 (60.7MiB)
6293
  Downloading networkx (1.9MiB)
6294
+ Downloading setuptools (1.1MiB)
 
6295
  Downloading nvidia-cusparselt-cu12 (273.9MiB)
6296
  Downloading nvidia-cuda-cupti-cu12 (9.8MiB)
 
6297
  Downloading nvidia-cudnn-cu12 (674.0MiB)
6298
+ Downloading nvidia-cublas-cu12 (566.8MiB)
 
6299
  Downloading nvidia-nccl-cu12 (307.4MiB)
6300
+ Downloading nvidia-cusolver-cu12 (255.1MiB)
 
6301
  Downloading nvidia-nvjitlink-cu12 (37.4MiB)
 
6302
  Downloading triton (148.3MiB)
6303
+ Downloading torch (846.9MiB)
6304
  Downloading nvidia-cufile-cu12
6305
  Downloading setuptools
6306
  Downloading networkx
 
6319
  Downloading nvidia-cublas-cu12
6320
  Downloading nvidia-cudnn-cu12
6321
  Downloading torch
6322
+ Installed 26 packages in 544ms
6323
  </div>
6324
  </div>
6325
  <div class="cell-artifacts">
 
6331
 
6332
  <h2>MegaBlocks Implementation</h2>
6333
  <p>This section runs the MegaBlocks MoE implementation with optimized kernels from the Hugging Face hub.</p>
6334
+ <div class="cell" id="cell-megablocks_run">
6335
  <div class="cell-header">
6336
  <span class="collapse-indicators">
6337
  <span onclick="toggleCode('megablocks_run')" style="cursor: pointer;">▼ code</span>
6338
  <span onclick="toggleOutput('megablocks_run')" style="cursor: pointer;">▼ output</span>
6339
  <span id="uv-indicator-megablocks_run" onclick="toggleUvLogsFromHeader('megablocks_run')" style="cursor: pointer;">▶ uv-logs</span>
6340
  </span> |
6341
+ Cell: megablocks_run | deps: torch, numpy, kernels | 47.50s
6342
  | <button class="run-btn" onclick="runCell('megablocks_run')">▶ run</button>
6343
  <button class="copy-btn" onclick="copyCell('megablocks_run')">Copy</button>
6344
  <a href="cells/megablocks_run.py" target="_blank" class="raw-btn">Raw</a>
 
6545
  </div>
6546
  </div>
6547
  <div id="output-megablocks_run" class="cell-output">
6548
+ <div class="cell-stdout">Loading weights from: /repo/moe_benchmarks/megablocks_yamoe/.uvnote/cache/b398a2853af91970392ae37f0d53a0eda463df639220863fbd38f33605bf9cbb
6549
  Loaded shared weights from artifacts
6550
  Router weight sum: 12.588732
6551
  Gate/up sum: 1026.601807
 
6566
  Input Variation: +0.001 * iteration (deterministic)
6567
 
6568
  Warming up (10 iterations)...
6569
+ Benchmarking (50 iterations)...
6570
+ Progress: 20% complete (avg: 0.852 ms)
6571
+ Progress: 40% complete (avg: 0.837 ms)
6572
+ Progress: 60% complete (avg: 0.835 ms)
6573
+ Progress: 80% complete (avg: 2.704 ms)
6574
+
6575
+ Output tensors:
6576
+ Primary: shape=(1, 100, 1152), dtype=torch.float32, device=cuda:0, range=[-0.061104, 0.055115], mean=0.000056, std=0.013535, norm=4.593927
6577
+ Auxiliary: shape=(100, 4), dtype=torch.float32, device=cuda:0, range=[0.220999, 0.302948], mean=0.250000, std=0.012156, norm=5.005893
6578
+
6579
+ ━━━━━━━━━━━━━━━━━━━━ Benchmark Results ━━━━━━━━━━━━━━━━━━━━
6580
+ Iterations: 50
6581
+
6582
+ Latency Statistics:
6583
+ Average: 3.870 ms
6584
+ Min: 0.810 ms
6585
+ Max: 8.541 ms
6586
+ Std Dev: 3.728 ms
6587
+
6588
+ Percentiles:
6589
+ P50 (median): 0.840 ms
6590
+ P95: 8.540 ms
6591
+ P99: 8.541 ms
6592
+
6593
+ Throughput:
6594
+ Tokens/sec: 25840.1
6595
+ Std Dev: 53236.0
6596
+ ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
6597
+
6598
+ Saved benchmark results to megablocks_results.json
6599
+
6600
+ Output sum: 6.473885
6601
  </div>
6602
  <div class="uv-install-logs" id="uv-logs-megablocks_run">
6603
  <div class="uv-logs-header" onclick="toggleUvLogs(this)">▶ UV Install Logs</div>
6604
  <div class="uv-logs-content" style="display: none;">
6605
+ Downloading nvidia-nccl-cu12 (307.4MiB)
6606
+ Downloading nvidia-cufft-cu12 (184.2MiB)
6607
+ Downloading nvidia-curand-cu12 (60.7MiB)
 
6608
  Downloading nvidia-cufile-cu12 (1.1MiB)
 
 
 
 
6609
  Downloading nvidia-cuda-nvrtc-cu12 (84.0MiB)
6610
+ Downloading nvidia-nvjitlink-cu12 (37.4MiB)
6611
  Downloading nvidia-cusparse-cu12 (274.9MiB)
6612
+ Downloading nvidia-cudnn-cu12 (674.0MiB)
6613
+ Downloading triton (148.3MiB)
6614
+ Downloading networkx (1.9MiB)
 
6615
  Downloading hf-xet (3.0MiB)
6616
  Downloading nvidia-cublas-cu12 (566.8MiB)
6617
+ Downloading setuptools (1.1MiB)
6618
+ Downloading nvidia-cusolver-cu12 (255.1MiB)
6619
+ Downloading sympy (6.0MiB)
6620
+ Downloading torch (846.9MiB)
6621
+ Downloading numpy (16.2MiB)
6622
+ Downloading nvidia-cuda-cupti-cu12 (9.8MiB)
6623
+ Downloading nvidia-cusparselt-cu12 (273.9MiB)
6624
  Downloading nvidia-cufile-cu12
6625
  Downloading hf-xet
6626
  Downloading setuptools
 
6640
  Downloading nvidia-cublas-cu12
6641
  Downloading nvidia-cudnn-cu12
6642
  Downloading torch
6643
+ Installed 37 packages in 458ms
6644
  </div>
6645
  </div>
6646
  <div class="cell-stderr">Fetching 66 files: 0%| | 0/66 [00:00&lt;?, ?it/s]
6647
+ Fetching 66 files: 2%|▏ | 1/66 [00:00&lt;00:12, 5.34it/s]
6648
+ Fetching 66 files: 3%|▎ | 2/66 [00:00&lt;00:16, 3.98it/s]
6649
+ Fetching 66 files: 26%|██▌ | 17/66 [00:01&lt;00:02, 17.77it/s]
6650
+ Fetching 66 files: 67%|██████▋ | 44/66 [00:01&lt;00:00, 43.54it/s]
6651
+ Fetching 66 files: 76%|███████▌ | 50/66 [00:01&lt;00:00, 43.86it/s]
6652
+ Fetching 66 files: 83%|████████▎ | 55/66 [00:01&lt;00:00, 42.86it/s]
6653
+ Fetching 66 files: 91%|█████████ | 60/66 [00:01&lt;00:00, 33.73it/s]
6654
+ Fetching 66 files: 98%|█████████▊| 65/66 [00:02&lt;00:00, 33.63it/s]
6655
+ Fetching 66 files: 100%|██████████| 66/66 [00:02&lt;00:00, 32.65it/s]</div>
6656
+ <div class="cell-artifacts">
6657
+ <h4>Artifacts:</h4>
6658
+ <a href="artifacts/megablocks_run/megablocks_results.json" class="artifact" target="_blank">megablocks_results.json</a>
6659
+ </div>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6660
  </div>
6661
  </div>
6662
 
6663
  <h2>Performance Visualization</h2>
6664
  <p>This section reads all benchmark results and creates a comprehensive performance comparison chart.</p>
6665
+ <div class="cell" id="cell-visualization">
6666
+ <div class="cell-header">
6667
+ <span class="collapse-indicators">
6668
+ <span onclick="toggleCode('visualization')" style="cursor: pointer;">▼ code</span>
6669
+ <span onclick="toggleOutput('visualization')" style="cursor: pointer;">▼ output</span>
6670
+ <span id="uv-indicator-visualization" onclick="toggleUvLogsFromHeader('visualization')" style="cursor: pointer;">▶ uv-logs</span>
6671
+ </span> |
6672
+ Cell: visualization | deps: matplotlib | 3.14s
6673
+ | <button class="run-btn" onclick="runCell('visualization')">▶ run</button>
6674
+ <button class="copy-btn" onclick="copyCell('visualization')">Copy</button>
6675
+ <a href="cells/visualization.py" target="_blank" class="raw-btn">Raw</a>
6676
+ </div>
6677
+ <div id="code-visualization" class="cell-code" data-lines="110">
6678
+ <div class="highlight-with-lines">
6679
+ <div class="line-numbers" id="lines-visualization">
6680
+ <a class="line-number" data-cell="visualization" data-line="1" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 1, true);">1</a>
6681
+ <a class="line-number" data-cell="visualization" data-line="2" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 2, true);">2</a>
6682
+ <a class="line-number" data-cell="visualization" data-line="3" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 3, true);">3</a>
6683
+ <a class="line-number" data-cell="visualization" data-line="4" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 4, true);">4</a>
6684
+ <a class="line-number" data-cell="visualization" data-line="5" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 5, true);">5</a>
6685
+ <a class="line-number" data-cell="visualization" data-line="6" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 6, true);">6</a>
6686
+ <a class="line-number" data-cell="visualization" data-line="7" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 7, true);">7</a>
6687
+ <a class="line-number" data-cell="visualization" data-line="8" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 8, true);">8</a>
6688
+ <a class="line-number" data-cell="visualization" data-line="9" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 9, true);">9</a>
6689
+ <a class="line-number" data-cell="visualization" data-line="10" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 10, true);">10</a>
6690
+ <a class="line-number" data-cell="visualization" data-line="11" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 11, true);">11</a>
6691
+ <a class="line-number" data-cell="visualization" data-line="12" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 12, true);">12</a>
6692
+ <a class="line-number" data-cell="visualization" data-line="13" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 13, true);">13</a>
6693
+ <a class="line-number" data-cell="visualization" data-line="14" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 14, true);">14</a>
6694
+ <a class="line-number" data-cell="visualization" data-line="15" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 15, true);">15</a>
6695
+ <a class="line-number" data-cell="visualization" data-line="16" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 16, true);">16</a>
6696
+ <a class="line-number" data-cell="visualization" data-line="17" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 17, true);">17</a>
6697
+ <a class="line-number" data-cell="visualization" data-line="18" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 18, true);">18</a>
6698
+ <a class="line-number" data-cell="visualization" data-line="19" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 19, true);">19</a>
6699
+ <a class="line-number" data-cell="visualization" data-line="20" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 20, true);">20</a>
6700
+ <a class="line-number" data-cell="visualization" data-line="21" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 21, true);">21</a>
6701
+ <a class="line-number" data-cell="visualization" data-line="22" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 22, true);">22</a>
6702
+ <a class="line-number" data-cell="visualization" data-line="23" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 23, true);">23</a>
6703
+ <a class="line-number" data-cell="visualization" data-line="24" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 24, true);">24</a>
6704
+ <a class="line-number" data-cell="visualization" data-line="25" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 25, true);">25</a>
6705
+ <a class="line-number" data-cell="visualization" data-line="26" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 26, true);">26</a>
6706
+ <a class="line-number" data-cell="visualization" data-line="27" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 27, true);">27</a>
6707
+ <a class="line-number" data-cell="visualization" data-line="28" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 28, true);">28</a>
6708
+ <a class="line-number" data-cell="visualization" data-line="29" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 29, true);">29</a>
6709
+ <a class="line-number" data-cell="visualization" data-line="30" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 30, true);">30</a>
6710
+ <a class="line-number" data-cell="visualization" data-line="31" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 31, true);">31</a>
6711
+ <a class="line-number" data-cell="visualization" data-line="32" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 32, true);">32</a>
6712
+ <a class="line-number" data-cell="visualization" data-line="33" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 33, true);">33</a>
6713
+ <a class="line-number" data-cell="visualization" data-line="34" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 34, true);">34</a>
6714
+ <a class="line-number" data-cell="visualization" data-line="35" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 35, true);">35</a>
6715
+ <a class="line-number" data-cell="visualization" data-line="36" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 36, true);">36</a>
6716
+ <a class="line-number" data-cell="visualization" data-line="37" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 37, true);">37</a>
6717
+ <a class="line-number" data-cell="visualization" data-line="38" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 38, true);">38</a>
6718
+ <a class="line-number" data-cell="visualization" data-line="39" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 39, true);">39</a>
6719
+ <a class="line-number" data-cell="visualization" data-line="40" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 40, true);">40</a>
6720
+ <a class="line-number" data-cell="visualization" data-line="41" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 41, true);">41</a>
6721
+ <a class="line-number" data-cell="visualization" data-line="42" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 42, true);">42</a>
6722
+ <a class="line-number" data-cell="visualization" data-line="43" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 43, true);">43</a>
6723
+ <a class="line-number" data-cell="visualization" data-line="44" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 44, true);">44</a>
6724
+ <a class="line-number" data-cell="visualization" data-line="45" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 45, true);">45</a>
6725
+ <a class="line-number" data-cell="visualization" data-line="46" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 46, true);">46</a>
6726
+ <a class="line-number" data-cell="visualization" data-line="47" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 47, true);">47</a>
6727
+ <a class="line-number" data-cell="visualization" data-line="48" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 48, true);">48</a>
6728
+ <a class="line-number" data-cell="visualization" data-line="49" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 49, true);">49</a>
6729
+ <a class="line-number" data-cell="visualization" data-line="50" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 50, true);">50</a>
6730
+ <a class="line-number" data-cell="visualization" data-line="51" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 51, true);">51</a>
6731
+ <a class="line-number" data-cell="visualization" data-line="52" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 52, true);">52</a>
6732
+ <a class="line-number" data-cell="visualization" data-line="53" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 53, true);">53</a>
6733
+ <a class="line-number" data-cell="visualization" data-line="54" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 54, true);">54</a>
6734
+ <a class="line-number" data-cell="visualization" data-line="55" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 55, true);">55</a>
6735
+ <a class="line-number" data-cell="visualization" data-line="56" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 56, true);">56</a>
6736
+ <a class="line-number" data-cell="visualization" data-line="57" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 57, true);">57</a>
6737
+ <a class="line-number" data-cell="visualization" data-line="58" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 58, true);">58</a>
6738
+ <a class="line-number" data-cell="visualization" data-line="59" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 59, true);">59</a>
6739
+ <a class="line-number" data-cell="visualization" data-line="60" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 60, true);">60</a>
6740
+ <a class="line-number" data-cell="visualization" data-line="61" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 61, true);">61</a>
6741
+ <a class="line-number" data-cell="visualization" data-line="62" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 62, true);">62</a>
6742
+ <a class="line-number" data-cell="visualization" data-line="63" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 63, true);">63</a>
6743
+ <a class="line-number" data-cell="visualization" data-line="64" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 64, true);">64</a>
6744
+ <a class="line-number" data-cell="visualization" data-line="65" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 65, true);">65</a>
6745
+ <a class="line-number" data-cell="visualization" data-line="66" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 66, true);">66</a>
6746
+ <a class="line-number" data-cell="visualization" data-line="67" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 67, true);">67</a>
6747
+ <a class="line-number" data-cell="visualization" data-line="68" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 68, true);">68</a>
6748
+ <a class="line-number" data-cell="visualization" data-line="69" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 69, true);">69</a>
6749
+ <a class="line-number" data-cell="visualization" data-line="70" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 70, true);">70</a>
6750
+ <a class="line-number" data-cell="visualization" data-line="71" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 71, true);">71</a>
6751
+ <a class="line-number" data-cell="visualization" data-line="72" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 72, true);">72</a>
6752
+ <a class="line-number" data-cell="visualization" data-line="73" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 73, true);">73</a>
6753
+ <a class="line-number" data-cell="visualization" data-line="74" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 74, true);">74</a>
6754
+ <a class="line-number" data-cell="visualization" data-line="75" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 75, true);">75</a>
6755
+ <a class="line-number" data-cell="visualization" data-line="76" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 76, true);">76</a>
6756
+ <a class="line-number" data-cell="visualization" data-line="77" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 77, true);">77</a>
6757
+ <a class="line-number" data-cell="visualization" data-line="78" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 78, true);">78</a>
6758
+ <a class="line-number" data-cell="visualization" data-line="79" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 79, true);">79</a>
6759
+ <a class="line-number" data-cell="visualization" data-line="80" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 80, true);">80</a>
6760
+ <a class="line-number" data-cell="visualization" data-line="81" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 81, true);">81</a>
6761
+ <a class="line-number" data-cell="visualization" data-line="82" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 82, true);">82</a>
6762
+ <a class="line-number" data-cell="visualization" data-line="83" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 83, true);">83</a>
6763
+ <a class="line-number" data-cell="visualization" data-line="84" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 84, true);">84</a>
6764
+ <a class="line-number" data-cell="visualization" data-line="85" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 85, true);">85</a>
6765
+ <a class="line-number" data-cell="visualization" data-line="86" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 86, true);">86</a>
6766
+ <a class="line-number" data-cell="visualization" data-line="87" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 87, true);">87</a>
6767
+ <a class="line-number" data-cell="visualization" data-line="88" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 88, true);">88</a>
6768
+ <a class="line-number" data-cell="visualization" data-line="89" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 89, true);">89</a>
6769
+ <a class="line-number" data-cell="visualization" data-line="90" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 90, true);">90</a>
6770
+ <a class="line-number" data-cell="visualization" data-line="91" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 91, true);">91</a>
6771
+ <a class="line-number" data-cell="visualization" data-line="92" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 92, true);">92</a>
6772
+ <a class="line-number" data-cell="visualization" data-line="93" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 93, true);">93</a>
6773
+ <a class="line-number" data-cell="visualization" data-line="94" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 94, true);">94</a>
6774
+ <a class="line-number" data-cell="visualization" data-line="95" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 95, true);">95</a>
6775
+ <a class="line-number" data-cell="visualization" data-line="96" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 96, true);">96</a>
6776
+ <a class="line-number" data-cell="visualization" data-line="97" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 97, true);">97</a>
6777
+ <a class="line-number" data-cell="visualization" data-line="98" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 98, true);">98</a>
6778
+ <a class="line-number" data-cell="visualization" data-line="99" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 99, true);">99</a>
6779
+ <a class="line-number" data-cell="visualization" data-line="100" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 100, true);">100</a>
6780
+ <a class="line-number" data-cell="visualization" data-line="101" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 101, true);">101</a>
6781
+ <a class="line-number" data-cell="visualization" data-line="102" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 102, true);">102</a>
6782
+ <a class="line-number" data-cell="visualization" data-line="103" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 103, true);">103</a>
6783
+ <a class="line-number" data-cell="visualization" data-line="104" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 104, true);">104</a>
6784
+ <a class="line-number" data-cell="visualization" data-line="105" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 105, true);">105</a>
6785
+ <a class="line-number" data-cell="visualization" data-line="106" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 106, true);">106</a>
6786
+ <a class="line-number" data-cell="visualization" data-line="107" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 107, true);">107</a>
6787
+ <a class="line-number" data-cell="visualization" data-line="108" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 108, true);">108</a>
6788
+ <a class="line-number" data-cell="visualization" data-line="109" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 109, true);">109</a>
6789
+ <a class="line-number" data-cell="visualization" data-line="110" href="#cell-visualization" onclick="event.preventDefault(); selectCellLine('visualization', 110, true);">110</a>
6790
+ </div>
6791
+ <div class="code-wrap">
6792
+ <div class="highlight"><pre><span></span><span class="kn">import</span><span class="w"> </span><span class="nn">json</span>
6793
+ <span class="kn">import</span><span class="w"> </span><span class="nn">matplotlib.pyplot</span><span class="w"> </span><span class="k">as</span><span class="w"> </span><span class="nn">plt</span>
6794
+ <span class="kn">import</span><span class="w"> </span><span class="nn">numpy</span><span class="w"> </span><span class="k">as</span><span class="w"> </span><span class="nn">np</span>
6795
+ <span class="kn">from</span><span class="w"> </span><span class="nn">pathlib</span><span class="w"> </span><span class="kn">import</span> <span class="n">Path</span>
6796
+ <span class="kn">import</span><span class="w"> </span><span class="nn">os</span>
6797
+
6798
+ <span class="c1"># List of expected result files</span>
6799
+ <span class="n">yamoe_dir</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">environ</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">&#39;UVNOTE_INPUT_YAMOE_RUN&#39;</span><span class="p">,</span> <span class="s1">&#39;.&#39;</span><span class="p">)</span>
6800
+ <span class="n">binned_dir</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">environ</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">&#39;UVNOTE_INPUT_BINNED_RUN&#39;</span><span class="p">,</span> <span class="s1">&#39;.&#39;</span><span class="p">)</span>
6801
+ <span class="n">gptoss_dir</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">environ</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">&#39;UVNOTE_INPUT_GPTOSS_RUN&#39;</span><span class="p">,</span> <span class="s1">&#39;.&#39;</span><span class="p">)</span>
6802
+ <span class="n">gptoss_training_dir</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">environ</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">&#39;UVNOTE_INPUT_GPTOSS_TRAINING_RUN&#39;</span><span class="p">,</span> <span class="s1">&#39;.&#39;</span><span class="p">)</span>
6803
+ <span class="n">megablocks_dir</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">environ</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">&#39;UVNOTE_INPUT_MEGABLOCKS_RUN&#39;</span><span class="p">,</span> <span class="s1">&#39;.&#39;</span><span class="p">)</span>
6804
+
6805
+ <span class="n">result_files</span> <span class="o">=</span> <span class="p">[</span>
6806
+ <span class="n">Path</span><span class="p">(</span><span class="n">yamoe_dir</span><span class="p">)</span> <span class="o">/</span> <span class="s2">&quot;yamoe_results.json&quot;</span><span class="p">,</span>
6807
+ <span class="n">Path</span><span class="p">(</span><span class="n">binned_dir</span><span class="p">)</span> <span class="o">/</span> <span class="s2">&quot;binned_results.json&quot;</span><span class="p">,</span>
6808
+ <span class="n">Path</span><span class="p">(</span><span class="n">gptoss_dir</span><span class="p">)</span> <span class="o">/</span> <span class="s2">&quot;gptoss_results.json&quot;</span><span class="p">,</span>
6809
+ <span class="n">Path</span><span class="p">(</span><span class="n">gptoss_training_dir</span><span class="p">)</span> <span class="o">/</span> <span class="s2">&quot;gptoss_training_results.json&quot;</span><span class="p">,</span>
6810
+ <span class="n">Path</span><span class="p">(</span><span class="n">megablocks_dir</span><span class="p">)</span> <span class="o">/</span> <span class="s2">&quot;megablocks_results.json&quot;</span>
6811
+ <span class="p">]</span>
6812
+
6813
+ <span class="c1"># Load all benchmark results</span>
6814
+ <span class="n">results</span> <span class="o">=</span> <span class="p">{}</span>
6815
+ <span class="k">for</span> <span class="n">file</span> <span class="ow">in</span> <span class="n">result_files</span><span class="p">:</span>
6816
+ <span class="k">if</span> <span class="n">Path</span><span class="p">(</span><span class="n">file</span><span class="p">)</span><span class="o">.</span><span class="n">exists</span><span class="p">():</span>
6817
+ <span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">file</span><span class="p">,</span> <span class="s1">&#39;r&#39;</span><span class="p">)</span> <span class="k">as</span> <span class="n">f</span><span class="p">:</span>
6818
+ <span class="n">data</span> <span class="o">=</span> <span class="n">json</span><span class="o">.</span><span class="n">load</span><span class="p">(</span><span class="n">f</span><span class="p">)</span>
6819
+ <span class="n">results</span><span class="p">[</span><span class="n">data</span><span class="p">[</span><span class="s1">&#39;implementation&#39;</span><span class="p">]]</span> <span class="o">=</span> <span class="n">data</span>
6820
+ <span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Loaded </span><span class="si">{</span><span class="n">file</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span>
6821
+ <span class="k">else</span><span class="p">:</span>
6822
+ <span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Missing </span><span class="si">{</span><span class="n">file</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span>
6823
+
6824
+ <span class="k">if</span> <span class="ow">not</span> <span class="n">results</span><span class="p">:</span>
6825
+ <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;No benchmark results found. Run the benchmark cells first.&quot;</span><span class="p">)</span>
6826
+ <span class="k">else</span><span class="p">:</span>
6827
+ <span class="c1"># Extract data for plotting</span>
6828
+ <span class="n">implementations</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="n">results</span><span class="o">.</span><span class="n">keys</span><span class="p">())</span>
6829
+ <span class="n">avg_latencies</span> <span class="o">=</span> <span class="p">[</span><span class="n">results</span><span class="p">[</span><span class="n">impl</span><span class="p">][</span><span class="s1">&#39;stats&#39;</span><span class="p">][</span><span class="s1">&#39;avg_ms&#39;</span><span class="p">]</span> <span class="k">for</span> <span class="n">impl</span> <span class="ow">in</span> <span class="n">implementations</span><span class="p">]</span>
6830
+ <span class="n">p95_latencies</span> <span class="o">=</span> <span class="p">[</span><span class="n">results</span><span class="p">[</span><span class="n">impl</span><span class="p">][</span><span class="s1">&#39;stats&#39;</span><span class="p">][</span><span class="s1">&#39;p95_ms&#39;</span><span class="p">]</span> <span class="k">for</span> <span class="n">impl</span> <span class="ow">in</span> <span class="n">implementations</span><span class="p">]</span>
6831
+ <span class="n">throughputs</span> <span class="o">=</span> <span class="p">[</span><span class="n">results</span><span class="p">[</span><span class="n">impl</span><span class="p">][</span><span class="s1">&#39;stats&#39;</span><span class="p">]</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">&#39;tokens_per_s&#39;</span><span class="p">,</span> <span class="mi">0</span><span class="p">)</span> <span class="k">for</span> <span class="n">impl</span> <span class="ow">in</span> <span class="n">implementations</span><span class="p">]</span>
6832
+
6833
+ <span class="c1"># Create figure with subplots</span>
6834
+ <span class="n">fig</span><span class="p">,</span> <span class="p">(</span><span class="n">ax1</span><span class="p">,</span> <span class="n">ax2</span><span class="p">,</span> <span class="n">ax3</span><span class="p">)</span> <span class="o">=</span> <span class="n">plt</span><span class="o">.</span><span class="n">subplots</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="n">figsize</span><span class="o">=</span><span class="p">(</span><span class="mi">18</span><span class="p">,</span> <span class="mi">6</span><span class="p">))</span>
6835
+ <span class="n">fig</span><span class="o">.</span><span class="n">suptitle</span><span class="p">(</span><span class="s1">&#39;MoE Implementation Performance Comparison&#39;</span><span class="p">,</span> <span class="n">fontsize</span><span class="o">=</span><span class="mi">16</span><span class="p">,</span> <span class="n">fontweight</span><span class="o">=</span><span class="s1">&#39;bold&#39;</span><span class="p">)</span>
6836
+
6837
+ <span class="c1"># Colors for each implementation</span>
6838
+ <span class="n">colors</span> <span class="o">=</span> <span class="p">[</span><span class="s1">&#39;#FF6B6B&#39;</span><span class="p">,</span> <span class="s1">&#39;#4ECDC4&#39;</span><span class="p">,</span> <span class="s1">&#39;#45B7D1&#39;</span><span class="p">,</span> <span class="s1">&#39;#96CEB4&#39;</span><span class="p">,</span> <span class="s1">&#39;#FECA57&#39;</span><span class="p">][:</span><span class="nb">len</span><span class="p">(</span><span class="n">implementations</span><span class="p">)]</span>
6839
+
6840
+ <span class="c1"># 1. Average Latency Chart</span>
6841
+ <span class="n">bars1</span> <span class="o">=</span> <span class="n">ax1</span><span class="o">.</span><span class="n">bar</span><span class="p">(</span><span class="n">implementations</span><span class="p">,</span> <span class="n">avg_latencies</span><span class="p">,</span> <span class="n">color</span><span class="o">=</span><span class="n">colors</span><span class="p">,</span> <span class="n">alpha</span><span class="o">=</span><span class="mf">0.8</span><span class="p">,</span> <span class="n">edgecolor</span><span class="o">=</span><span class="s1">&#39;black&#39;</span><span class="p">,</span> <span class="n">linewidth</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
6842
+ <span class="n">ax1</span><span class="o">.</span><span class="n">set_title</span><span class="p">(</span><span class="s1">&#39;Average Latency&#39;</span><span class="p">,</span> <span class="n">fontweight</span><span class="o">=</span><span class="s1">&#39;bold&#39;</span><span class="p">,</span> <span class="n">fontsize</span><span class="o">=</span><span class="mi">14</span><span class="p">)</span>
6843
+ <span class="n">ax1</span><span class="o">.</span><span class="n">set_ylabel</span><span class="p">(</span><span class="s1">&#39;Latency (ms)&#39;</span><span class="p">,</span> <span class="n">fontweight</span><span class="o">=</span><span class="s1">&#39;bold&#39;</span><span class="p">)</span>
6844
+ <span class="n">ax1</span><span class="o">.</span><span class="n">tick_params</span><span class="p">(</span><span class="n">axis</span><span class="o">=</span><span class="s1">&#39;x&#39;</span><span class="p">,</span> <span class="n">rotation</span><span class="o">=</span><span class="mi">45</span><span class="p">)</span>
6845
+ <span class="n">ax1</span><span class="o">.</span><span class="n">grid</span><span class="p">(</span><span class="n">axis</span><span class="o">=</span><span class="s1">&#39;y&#39;</span><span class="p">,</span> <span class="n">alpha</span><span class="o">=</span><span class="mf">0.3</span><span class="p">)</span>
6846
+
6847
+ <span class="c1"># Add value labels on bars</span>
6848
+ <span class="k">for</span> <span class="n">bar</span><span class="p">,</span> <span class="n">val</span> <span class="ow">in</span> <span class="nb">zip</span><span class="p">(</span><span class="n">bars1</span><span class="p">,</span> <span class="n">avg_latencies</span><span class="p">):</span>
6849
+ <span class="n">ax1</span><span class="o">.</span><span class="n">text</span><span class="p">(</span><span class="n">bar</span><span class="o">.</span><span class="n">get_x</span><span class="p">()</span> <span class="o">+</span> <span class="n">bar</span><span class="o">.</span><span class="n">get_width</span><span class="p">()</span><span class="o">/</span><span class="mi">2</span><span class="p">,</span> <span class="n">bar</span><span class="o">.</span><span class="n">get_height</span><span class="p">()</span> <span class="o">+</span> <span class="nb">max</span><span class="p">(</span><span class="n">avg_latencies</span><span class="p">)</span><span class="o">*</span><span class="mf">0.01</span><span class="p">,</span>
6850
+ <span class="sa">f</span><span class="s1">&#39;</span><span class="si">{</span><span class="n">val</span><span class="si">:</span><span class="s1">.2f</span><span class="si">}</span><span class="s1">ms&#39;</span><span class="p">,</span> <span class="n">ha</span><span class="o">=</span><span class="s1">&#39;center&#39;</span><span class="p">,</span> <span class="n">va</span><span class="o">=</span><span class="s1">&#39;bottom&#39;</span><span class="p">,</span> <span class="n">fontweight</span><span class="o">=</span><span class="s1">&#39;bold&#39;</span><span class="p">)</span>
6851
+
6852
+ <span class="c1"># 2. P95 Latency Chart</span>
6853
+ <span class="n">bars2</span> <span class="o">=</span> <span class="n">ax2</span><span class="o">.</span><span class="n">bar</span><span class="p">(</span><span class="n">implementations</span><span class="p">,</span> <span class="n">p95_latencies</span><span class="p">,</span> <span class="n">color</span><span class="o">=</span><span class="n">colors</span><span class="p">,</span> <span class="n">alpha</span><span class="o">=</span><span class="mf">0.8</span><span class="p">,</span> <span class="n">edgecolor</span><span class="o">=</span><span class="s1">&#39;black&#39;</span><span class="p">,</span> <span class="n">linewidth</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
6854
+ <span class="n">ax2</span><span class="o">.</span><span class="n">set_title</span><span class="p">(</span><span class="s1">&#39;95th Percentile Latency&#39;</span><span class="p">,</span> <span class="n">fontweight</span><span class="o">=</span><span class="s1">&#39;bold&#39;</span><span class="p">,</span> <span class="n">fontsize</span><span class="o">=</span><span class="mi">14</span><span class="p">)</span>
6855
+ <span class="n">ax2</span><span class="o">.</span><span class="n">set_ylabel</span><span class="p">(</span><span class="s1">&#39;Latency (ms)&#39;</span><span class="p">,</span> <span class="n">fontweight</span><span class="o">=</span><span class="s1">&#39;bold&#39;</span><span class="p">)</span>
6856
+ <span class="n">ax2</span><span class="o">.</span><span class="n">tick_params</span><span class="p">(</span><span class="n">axis</span><span class="o">=</span><span class="s1">&#39;x&#39;</span><span class="p">,</span> <span class="n">rotation</span><span class="o">=</span><span class="mi">45</span><span class="p">)</span>
6857
+ <span class="n">ax2</span><span class="o">.</span><span class="n">grid</span><span class="p">(</span><span class="n">axis</span><span class="o">=</span><span class="s1">&#39;y&#39;</span><span class="p">,</span> <span class="n">alpha</span><span class="o">=</span><span class="mf">0.3</span><span class="p">)</span>
6858
+
6859
+ <span class="c1"># Add value labels on bars</span>
6860
+ <span class="k">for</span> <span class="n">bar</span><span class="p">,</span> <span class="n">val</span> <span class="ow">in</span> <span class="nb">zip</span><span class="p">(</span><span class="n">bars2</span><span class="p">,</span> <span class="n">p95_latencies</span><span class="p">):</span>
6861
+ <span class="n">ax2</span><span class="o">.</span><span class="n">text</span><span class="p">(</span><span class="n">bar</span><span class="o">.</span><span class="n">get_x</span><span class="p">()</span> <span class="o">+</span> <span class="n">bar</span><span class="o">.</span><span class="n">get_width</span><span class="p">()</span><span class="o">/</span><span class="mi">2</span><span class="p">,</span> <span class="n">bar</span><span class="o">.</span><span class="n">get_height</span><span class="p">()</span> <span class="o">+</span> <span class="nb">max</span><span class="p">(</span><span class="n">p95_latencies</span><span class="p">)</span><span class="o">*</span><span class="mf">0.01</span><span class="p">,</span>
6862
+ <span class="sa">f</span><span class="s1">&#39;</span><span class="si">{</span><span class="n">val</span><span class="si">:</span><span class="s1">.2f</span><span class="si">}</span><span class="s1">ms&#39;</span><span class="p">,</span> <span class="n">ha</span><span class="o">=</span><span class="s1">&#39;center&#39;</span><span class="p">,</span> <span class="n">va</span><span class="o">=</span><span class="s1">&#39;bottom&#39;</span><span class="p">,</span> <span class="n">fontweight</span><span class="o">=</span><span class="s1">&#39;bold&#39;</span><span class="p">)</span>
6863
+
6864
+ <span class="c1"># 3. Throughput Chart</span>
6865
+ <span class="n">bars3</span> <span class="o">=</span> <span class="n">ax3</span><span class="o">.</span><span class="n">bar</span><span class="p">(</span><span class="n">implementations</span><span class="p">,</span> <span class="n">throughputs</span><span class="p">,</span> <span class="n">color</span><span class="o">=</span><span class="n">colors</span><span class="p">,</span> <span class="n">alpha</span><span class="o">=</span><span class="mf">0.8</span><span class="p">,</span> <span class="n">edgecolor</span><span class="o">=</span><span class="s1">&#39;black&#39;</span><span class="p">,</span> <span class="n">linewidth</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
6866
+ <span class="n">ax3</span><span class="o">.</span><span class="n">set_title</span><span class="p">(</span><span class="s1">&#39;Throughput&#39;</span><span class="p">,</span> <span class="n">fontweight</span><span class="o">=</span><span class="s1">&#39;bold&#39;</span><span class="p">,</span> <span class="n">fontsize</span><span class="o">=</span><span class="mi">14</span><span class="p">)</span>
6867
+ <span class="n">ax3</span><span class="o">.</span><span class="n">set_ylabel</span><span class="p">(</span><span class="s1">&#39;Tokens/sec&#39;</span><span class="p">,</span> <span class="n">fontweight</span><span class="o">=</span><span class="s1">&#39;bold&#39;</span><span class="p">)</span>
6868
+ <span class="n">ax3</span><span class="o">.</span><span class="n">tick_params</span><span class="p">(</span><span class="n">axis</span><span class="o">=</span><span class="s1">&#39;x&#39;</span><span class="p">,</span> <span class="n">rotation</span><span class="o">=</span><span class="mi">45</span><span class="p">)</span>
6869
+ <span class="n">ax3</span><span class="o">.</span><span class="n">grid</span><span class="p">(</span><span class="n">axis</span><span class="o">=</span><span class="s1">&#39;y&#39;</span><span class="p">,</span> <span class="n">alpha</span><span class="o">=</span><span class="mf">0.3</span><span class="p">)</span>
6870
+
6871
+ <span class="c1"># Add value labels on bars</span>
6872
+ <span class="k">for</span> <span class="n">bar</span><span class="p">,</span> <span class="n">val</span> <span class="ow">in</span> <span class="nb">zip</span><span class="p">(</span><span class="n">bars3</span><span class="p">,</span> <span class="n">throughputs</span><span class="p">):</span>
6873
+ <span class="k">if</span> <span class="n">val</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">:</span> <span class="c1"># Only show label if throughput was calculated</span>
6874
+ <span class="n">ax3</span><span class="o">.</span><span class="n">text</span><span class="p">(</span><span class="n">bar</span><span class="o">.</span><span class="n">get_x</span><span class="p">()</span> <span class="o">+</span> <span class="n">bar</span><span class="o">.</span><span class="n">get_width</span><span class="p">()</span><span class="o">/</span><span class="mi">2</span><span class="p">,</span> <span class="n">bar</span><span class="o">.</span><span class="n">get_height</span><span class="p">()</span> <span class="o">+</span> <span class="nb">max</span><span class="p">(</span><span class="n">throughputs</span><span class="p">)</span><span class="o">*</span><span class="mf">0.01</span><span class="p">,</span>
6875
+ <span class="sa">f</span><span class="s1">&#39;</span><span class="si">{</span><span class="n">val</span><span class="si">:</span><span class="s1">.0f</span><span class="si">}</span><span class="s1">&#39;</span><span class="p">,</span> <span class="n">ha</span><span class="o">=</span><span class="s1">&#39;center&#39;</span><span class="p">,</span> <span class="n">va</span><span class="o">=</span><span class="s1">&#39;bottom&#39;</span><span class="p">,</span> <span class="n">fontweight</span><span class="o">=</span><span class="s1">&#39;bold&#39;</span><span class="p">)</span>
6876
+
6877
+ <span class="n">plt</span><span class="o">.</span><span class="n">tight_layout</span><span class="p">()</span>
6878
+ <span class="n">plt</span><span class="o">.</span><span class="n">savefig</span><span class="p">(</span><span class="s2">&quot;moe_performance_comparison.png&quot;</span><span class="p">,</span> <span class="n">dpi</span><span class="o">=</span><span class="mi">300</span><span class="p">)</span>
6879
+
6880
+ <span class="c1"># Print summary table</span>
6881
+ <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;</span><span class="se">\n</span><span class="s2">Performance Summary:&quot;</span><span class="p">)</span>
6882
+ <span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;</span><span class="si">{</span><span class="s1">&#39;Implementation&#39;</span><span class="si">:</span><span class="s2">&lt;30</span><span class="si">}</span><span class="s2"> </span><span class="si">{</span><span class="s1">&#39;Avg (ms)&#39;</span><span class="si">:</span><span class="s2">&lt;12</span><span class="si">}</span><span class="s2"> </span><span class="si">{</span><span class="s1">&#39;P95 (ms)&#39;</span><span class="si">:</span><span class="s2">&lt;12</span><span class="si">}</span><span class="s2"> </span><span class="si">{</span><span class="s1">&#39;Tokens/sec&#39;</span><span class="si">:</span><span class="s2">&lt;12</span><span class="si">}</span><span class="s2"> </span><span class="si">{</span><span class="s1">&#39;Relative Speed&#39;</span><span class="si">:</span><span class="s2">&lt;15</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span>
6883
+ <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;-&quot;</span><span class="o">*</span><span class="mi">80</span><span class="p">)</span>
6884
+
6885
+ <span class="c1"># Sort by average latency for relative speed calculation</span>
6886
+ <span class="n">sorted_results</span> <span class="o">=</span> <span class="nb">sorted</span><span class="p">(</span><span class="n">results</span><span class="o">.</span><span class="n">items</span><span class="p">(),</span> <span class="n">key</span><span class="o">=</span><span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="n">x</span><span class="p">[</span><span class="mi">1</span><span class="p">][</span><span class="s1">&#39;stats&#39;</span><span class="p">][</span><span class="s1">&#39;avg_ms&#39;</span><span class="p">])</span>
6887
+ <span class="n">fastest_latency</span> <span class="o">=</span> <span class="n">sorted_results</span><span class="p">[</span><span class="mi">0</span><span class="p">][</span><span class="mi">1</span><span class="p">][</span><span class="s1">&#39;stats&#39;</span><span class="p">][</span><span class="s1">&#39;avg_ms&#39;</span><span class="p">]</span>
6888
+
6889
+ <span class="k">for</span> <span class="n">impl</span><span class="p">,</span> <span class="n">data</span> <span class="ow">in</span> <span class="n">sorted_results</span><span class="p">:</span>
6890
+ <span class="n">avg_ms</span> <span class="o">=</span> <span class="n">data</span><span class="p">[</span><span class="s1">&#39;stats&#39;</span><span class="p">][</span><span class="s1">&#39;avg_ms&#39;</span><span class="p">]</span>
6891
+ <span class="n">p95_ms</span> <span class="o">=</span> <span class="n">data</span><span class="p">[</span><span class="s1">&#39;stats&#39;</span><span class="p">][</span><span class="s1">&#39;p95_ms&#39;</span><span class="p">]</span>
6892
+ <span class="n">tokens_s</span> <span class="o">=</span> <span class="n">data</span><span class="p">[</span><span class="s1">&#39;stats&#39;</span><span class="p">]</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">&#39;tokens_per_s&#39;</span><span class="p">,</span> <span class="mi">0</span><span class="p">)</span>
6893
+ <span class="n">relative_speed</span> <span class="o">=</span> <span class="n">fastest_latency</span> <span class="o">/</span> <span class="n">avg_ms</span>
6894
+
6895
+ <span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;</span><span class="si">{</span><span class="n">impl</span><span class="si">:</span><span class="s2">&lt;30</span><span class="si">}</span><span class="s2"> </span><span class="si">{</span><span class="n">avg_ms</span><span class="si">:</span><span class="s2">&gt;8.2f</span><span class="si">}</span><span class="s2"> </span><span class="si">{</span><span class="n">p95_ms</span><span class="si">:</span><span class="s2">&gt;8.2f</span><span class="si">}</span><span class="s2"> </span><span class="si">{</span><span class="n">tokens_s</span><span class="si">:</span><span class="s2">&gt;8.0f</span><span class="si">}</span><span class="s2"> </span><span class="si">{</span><span class="n">relative_speed</span><span class="si">:</span><span class="s2">&gt;6.2f</span><span class="si">}</span><span class="s2">x&quot;</span><span class="p">)</span>
6896
+
6897
+ <span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;</span><span class="se">\n</span><span class="s2">Fastest: </span><span class="si">{</span><span class="n">sorted_results</span><span class="p">[</span><span class="mi">0</span><span class="p">][</span><span class="mi">0</span><span class="p">]</span><span class="si">}</span><span class="s2"> (</span><span class="si">{</span><span class="n">sorted_results</span><span class="p">[</span><span class="mi">0</span><span class="p">][</span><span class="mi">1</span><span class="p">][</span><span class="s1">&#39;stats&#39;</span><span class="p">][</span><span class="s1">&#39;avg_ms&#39;</span><span class="p">]</span><span class="si">:</span><span class="s2">.2f</span><span class="si">}</span><span class="s2">ms avg)&quot;</span><span class="p">)</span>
6898
+ <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">sorted_results</span><span class="p">)</span> <span class="o">&gt;</span> <span class="mi">1</span><span class="p">:</span>
6899
+ <span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Slowest: </span><span class="si">{</span><span class="n">sorted_results</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">][</span><span class="mi">0</span><span class="p">]</span><span class="si">}</span><span class="s2"> (</span><span class="si">{</span><span class="n">sorted_results</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">][</span><span class="mi">1</span><span class="p">][</span><span class="s1">&#39;stats&#39;</span><span class="p">][</span><span class="s1">&#39;avg_ms&#39;</span><span class="p">]</span><span class="si">:</span><span class="s2">.2f</span><span class="si">}</span><span class="s2">ms avg)&quot;</span><span class="p">)</span>
6900
+ <span class="n">speedup</span> <span class="o">=</span> <span class="n">sorted_results</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">][</span><span class="mi">1</span><span class="p">][</span><span class="s1">&#39;stats&#39;</span><span class="p">][</span><span class="s1">&#39;avg_ms&#39;</span><span class="p">]</span> <span class="o">/</span> <span class="n">sorted_results</span><span class="p">[</span><span class="mi">0</span><span class="p">][</span><span class="mi">1</span><span class="p">][</span><span class="s1">&#39;stats&#39;</span><span class="p">][</span><span class="s1">&#39;avg_ms&#39;</span><span class="p">]</span>
6901
+ <span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Max Speedup: </span><span class="si">{</span><span class="n">speedup</span><span class="si">:</span><span class="s2">.1f</span><span class="si">}</span><span class="s2">x&quot;</span><span class="p">)</span>
6902
+ </pre></div>
6903
+
6904
+ <div class="code-line-highlight" id="line-highlight-visualization"></div>
6905
+ </div>
6906
+ </div>
6907
+ </div>
6908
+ <div id="output-visualization" class="cell-output">
6909
+ <div class="cell-stdout">Loaded /repo/moe_benchmarks/megablocks_yamoe/.uvnote/cache/274d1d4e0722f5affb811112832e03d26daafb5eaa96259e7ec575eb43a40f12/yamoe_results.json
6910
+ Loaded /repo/moe_benchmarks/megablocks_yamoe/.uvnote/cache/0e2a9f24cc405bb3c4ccb37530405ffe7cae24c59066185a87e856b3ac7344b3/binned_results.json
6911
+ Loaded /repo/moe_benchmarks/megablocks_yamoe/.uvnote/cache/b40a0492fc99c75ce021114ee849e7db60a33cfdf61891ace614b748953db1eb/gptoss_results.json
6912
+ Loaded /repo/moe_benchmarks/megablocks_yamoe/.uvnote/cache/ab389cf3b8cc56969604061ec8bc29a5701c53cdc24bd2682cf630b5e1eeb7bb/gptoss_training_results.json
6913
+ Loaded /repo/moe_benchmarks/megablocks_yamoe/.uvnote/cache/0febdf3420999533bc2e14bb2a4bffaba4af699a19ddf644f24806180c8347e1/megablocks_results.json
6914
+
6915
+ Performance Summary:
6916
+ Implementation Avg (ms) P95 (ms) Tokens/sec Relative Speed
6917
+ --------------------------------------------------------------------------------
6918
+ megablocks_results 3.87 8.54 25840 1.00x
6919
+ yamoe_results 4.25 4.27 23531 0.91x
6920
+ binned_results 36.54 39.00 2737 0.11x
6921
+ gptoss_results 45.24 48.43 2211 0.09x
6922
+ gptoss_training_results 45.75 50.54 2186 0.08x
6923
+
6924
+ Fastest: megablocks_results (3.87ms avg)
6925
+ Slowest: gptoss_training_results (45.75ms avg)
6926
+ Max Speedup: 11.8x
6927
+ </div>
6928
+ <div class="uv-install-logs" id="uv-logs-visualization">
6929
+ <div class="uv-logs-header" onclick="toggleUvLogs(this)">▶ UV Install Logs</div>
6930
+ <div class="uv-logs-content" style="display: none;">
6931
+ Downloading pillow (6.3MiB)
6932
+ Downloading kiwisolver (1.4MiB)
6933
+ Downloading numpy (16.2MiB)
6934
+ Downloading matplotlib (8.3MiB)
6935
+ Downloading fonttools (4.7MiB)
6936
+ Downloading kiwisolver
6937
+ Downloading pillow
6938
+ Downloading fonttools
6939
+ Downloading matplotlib
6940
+ Downloading numpy
6941
+ Installed 11 packages in 48ms
6942
+ </div>
6943
+ </div>
6944
+ <div class="cell-artifacts">
6945
+ <h4>Artifacts:</h4>
6946
+ <a href="artifacts/visualization/moe_performance_comparison.png" class="artifact" target="_blank">moe_performance_comparison.png</a>
6947
+ <div class="artifact-preview">
6948
+ <img src="artifacts/visualization/moe_performance_comparison.png" alt="moe_performance_comparison.png">
6949
+ </div>
6950
+ </div>
6951
+ </div>
6952
+ </div>
6953
  </div>
6954
 
6955
  </body>