drbh HF Staff commited on
Commit
2e0bc99
·
verified ·
1 Parent(s): 84c0d09

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. index.html +0 -0
  2. note_test_override.html +170 -167
index.html CHANGED
The diff for this file is too large to render. See raw diff
 
note_test_override.html CHANGED
@@ -3711,137 +3711,137 @@ span.linenos.special { color: #000000; background-color: #ffffc0; padding-left:
3711
  </div>
3712
 
3713
  <div class="main-content">
3714
- <h1>Reference kernel</h1>
3715
- <div class="cell cell-failed" id="cell-setup2">
3716
  <div class="cell-header">
3717
  <span class="collapse-indicators">
3718
- <span onclick="toggleCode('setup2')" style="cursor: pointer;">▼ code</span>
3719
- <span onclick="toggleOutput('setup2')" style="cursor: pointer;">▼ output</span>
3720
- <span id="uv-indicator-setup2" onclick="toggleUvLogsFromHeader('setup2')" style="cursor: pointer;">▶ uv-logs</span>
3721
  </span> |
3722
- Cell: setup2 | 99.84s | FAILED
3723
- | <button class="run-btn" onclick="runCell('setup2')">▶ run</button>
3724
- <button class="copy-btn" onclick="copyCell('setup2')">Copy</button>
3725
- <a href="cells/setup2.py" target="_blank" class="raw-btn">Raw</a>
3726
  </div>
3727
- <div id="code-setup2" class="cell-code" data-lines="115">
3728
  <div class="highlight-with-lines">
3729
- <div class="line-numbers" id="lines-setup2">
3730
- <a class="line-number" data-cell="setup2" data-line="1" href="#cell-setup2" onclick="event.preventDefault(); selectCellLine('setup2', 1, true);">1</a>
3731
- <a class="line-number" data-cell="setup2" data-line="2" href="#cell-setup2" onclick="event.preventDefault(); selectCellLine('setup2', 2, true);">2</a>
3732
- <a class="line-number" data-cell="setup2" data-line="3" href="#cell-setup2" onclick="event.preventDefault(); selectCellLine('setup2', 3, true);">3</a>
3733
- <a class="line-number" data-cell="setup2" data-line="4" href="#cell-setup2" onclick="event.preventDefault(); selectCellLine('setup2', 4, true);">4</a>
3734
- <a class="line-number" data-cell="setup2" data-line="5" href="#cell-setup2" onclick="event.preventDefault(); selectCellLine('setup2', 5, true);">5</a>
3735
- <a class="line-number" data-cell="setup2" data-line="6" href="#cell-setup2" onclick="event.preventDefault(); selectCellLine('setup2', 6, true);">6</a>
3736
- <a class="line-number" data-cell="setup2" data-line="7" href="#cell-setup2" onclick="event.preventDefault(); selectCellLine('setup2', 7, true);">7</a>
3737
- <a class="line-number" data-cell="setup2" data-line="8" href="#cell-setup2" onclick="event.preventDefault(); selectCellLine('setup2', 8, true);">8</a>
3738
- <a class="line-number" data-cell="setup2" data-line="9" href="#cell-setup2" onclick="event.preventDefault(); selectCellLine('setup2', 9, true);">9</a>
3739
- <a class="line-number" data-cell="setup2" data-line="10" href="#cell-setup2" onclick="event.preventDefault(); selectCellLine('setup2', 10, true);">10</a>
3740
- <a class="line-number" data-cell="setup2" data-line="11" href="#cell-setup2" onclick="event.preventDefault(); selectCellLine('setup2', 11, true);">11</a>
3741
- <a class="line-number" data-cell="setup2" data-line="12" href="#cell-setup2" onclick="event.preventDefault(); selectCellLine('setup2', 12, true);">12</a>
3742
- <a class="line-number" data-cell="setup2" data-line="13" href="#cell-setup2" onclick="event.preventDefault(); selectCellLine('setup2', 13, true);">13</a>
3743
- <a class="line-number" data-cell="setup2" data-line="14" href="#cell-setup2" onclick="event.preventDefault(); selectCellLine('setup2', 14, true);">14</a>
3744
- <a class="line-number" data-cell="setup2" data-line="15" href="#cell-setup2" onclick="event.preventDefault(); selectCellLine('setup2', 15, true);">15</a>
3745
- <a class="line-number" data-cell="setup2" data-line="16" href="#cell-setup2" onclick="event.preventDefault(); selectCellLine('setup2', 16, true);">16</a>
3746
- <a class="line-number" data-cell="setup2" data-line="17" href="#cell-setup2" onclick="event.preventDefault(); selectCellLine('setup2', 17, true);">17</a>
3747
- <a class="line-number" data-cell="setup2" data-line="18" href="#cell-setup2" onclick="event.preventDefault(); selectCellLine('setup2', 18, true);">18</a>
3748
- <a class="line-number" data-cell="setup2" data-line="19" href="#cell-setup2" onclick="event.preventDefault(); selectCellLine('setup2', 19, true);">19</a>
3749
- <a class="line-number" data-cell="setup2" data-line="20" href="#cell-setup2" onclick="event.preventDefault(); selectCellLine('setup2', 20, true);">20</a>
3750
- <a class="line-number" data-cell="setup2" data-line="21" href="#cell-setup2" onclick="event.preventDefault(); selectCellLine('setup2', 21, true);">21</a>
3751
- <a class="line-number" data-cell="setup2" data-line="22" href="#cell-setup2" onclick="event.preventDefault(); selectCellLine('setup2', 22, true);">22</a>
3752
- <a class="line-number" data-cell="setup2" data-line="23" href="#cell-setup2" onclick="event.preventDefault(); selectCellLine('setup2', 23, true);">23</a>
3753
- <a class="line-number" data-cell="setup2" data-line="24" href="#cell-setup2" onclick="event.preventDefault(); selectCellLine('setup2', 24, true);">24</a>
3754
- <a class="line-number" data-cell="setup2" data-line="25" href="#cell-setup2" onclick="event.preventDefault(); selectCellLine('setup2', 25, true);">25</a>
3755
- <a class="line-number" data-cell="setup2" data-line="26" href="#cell-setup2" onclick="event.preventDefault(); selectCellLine('setup2', 26, true);">26</a>
3756
- <a class="line-number" data-cell="setup2" data-line="27" href="#cell-setup2" onclick="event.preventDefault(); selectCellLine('setup2', 27, true);">27</a>
3757
- <a class="line-number" data-cell="setup2" data-line="28" href="#cell-setup2" onclick="event.preventDefault(); selectCellLine('setup2', 28, true);">28</a>
3758
- <a class="line-number" data-cell="setup2" data-line="29" href="#cell-setup2" onclick="event.preventDefault(); selectCellLine('setup2', 29, true);">29</a>
3759
- <a class="line-number" data-cell="setup2" data-line="30" href="#cell-setup2" onclick="event.preventDefault(); selectCellLine('setup2', 30, true);">30</a>
3760
- <a class="line-number" data-cell="setup2" data-line="31" href="#cell-setup2" onclick="event.preventDefault(); selectCellLine('setup2', 31, true);">31</a>
3761
- <a class="line-number" data-cell="setup2" data-line="32" href="#cell-setup2" onclick="event.preventDefault(); selectCellLine('setup2', 32, true);">32</a>
3762
- <a class="line-number" data-cell="setup2" data-line="33" href="#cell-setup2" onclick="event.preventDefault(); selectCellLine('setup2', 33, true);">33</a>
3763
- <a class="line-number" data-cell="setup2" data-line="34" href="#cell-setup2" onclick="event.preventDefault(); selectCellLine('setup2', 34, true);">34</a>
3764
- <a class="line-number" data-cell="setup2" data-line="35" href="#cell-setup2" onclick="event.preventDefault(); selectCellLine('setup2', 35, true);">35</a>
3765
- <a class="line-number" data-cell="setup2" data-line="36" href="#cell-setup2" onclick="event.preventDefault(); selectCellLine('setup2', 36, true);">36</a>
3766
- <a class="line-number" data-cell="setup2" data-line="37" href="#cell-setup2" onclick="event.preventDefault(); selectCellLine('setup2', 37, true);">37</a>
3767
- <a class="line-number" data-cell="setup2" data-line="38" href="#cell-setup2" onclick="event.preventDefault(); selectCellLine('setup2', 38, true);">38</a>
3768
- <a class="line-number" data-cell="setup2" data-line="39" href="#cell-setup2" onclick="event.preventDefault(); selectCellLine('setup2', 39, true);">39</a>
3769
- <a class="line-number" data-cell="setup2" data-line="40" href="#cell-setup2" onclick="event.preventDefault(); selectCellLine('setup2', 40, true);">40</a>
3770
- <a class="line-number" data-cell="setup2" data-line="41" href="#cell-setup2" onclick="event.preventDefault(); selectCellLine('setup2', 41, true);">41</a>
3771
- <a class="line-number" data-cell="setup2" data-line="42" href="#cell-setup2" onclick="event.preventDefault(); selectCellLine('setup2', 42, true);">42</a>
3772
- <a class="line-number" data-cell="setup2" data-line="43" href="#cell-setup2" onclick="event.preventDefault(); selectCellLine('setup2', 43, true);">43</a>
3773
- <a class="line-number" data-cell="setup2" data-line="44" href="#cell-setup2" onclick="event.preventDefault(); selectCellLine('setup2', 44, true);">44</a>
3774
- <a class="line-number" data-cell="setup2" data-line="45" href="#cell-setup2" onclick="event.preventDefault(); selectCellLine('setup2', 45, true);">45</a>
3775
- <a class="line-number" data-cell="setup2" data-line="46" href="#cell-setup2" onclick="event.preventDefault(); selectCellLine('setup2', 46, true);">46</a>
3776
- <a class="line-number" data-cell="setup2" data-line="47" href="#cell-setup2" onclick="event.preventDefault(); selectCellLine('setup2', 47, true);">47</a>
3777
- <a class="line-number" data-cell="setup2" data-line="48" href="#cell-setup2" onclick="event.preventDefault(); selectCellLine('setup2', 48, true);">48</a>
3778
- <a class="line-number" data-cell="setup2" data-line="49" href="#cell-setup2" onclick="event.preventDefault(); selectCellLine('setup2', 49, true);">49</a>
3779
- <a class="line-number" data-cell="setup2" data-line="50" href="#cell-setup2" onclick="event.preventDefault(); selectCellLine('setup2', 50, true);">50</a>
3780
- <a class="line-number" data-cell="setup2" data-line="51" href="#cell-setup2" onclick="event.preventDefault(); selectCellLine('setup2', 51, true);">51</a>
3781
- <a class="line-number" data-cell="setup2" data-line="52" href="#cell-setup2" onclick="event.preventDefault(); selectCellLine('setup2', 52, true);">52</a>
3782
- <a class="line-number" data-cell="setup2" data-line="53" href="#cell-setup2" onclick="event.preventDefault(); selectCellLine('setup2', 53, true);">53</a>
3783
- <a class="line-number" data-cell="setup2" data-line="54" href="#cell-setup2" onclick="event.preventDefault(); selectCellLine('setup2', 54, true);">54</a>
3784
- <a class="line-number" data-cell="setup2" data-line="55" href="#cell-setup2" onclick="event.preventDefault(); selectCellLine('setup2', 55, true);">55</a>
3785
- <a class="line-number" data-cell="setup2" data-line="56" href="#cell-setup2" onclick="event.preventDefault(); selectCellLine('setup2', 56, true);">56</a>
3786
- <a class="line-number" data-cell="setup2" data-line="57" href="#cell-setup2" onclick="event.preventDefault(); selectCellLine('setup2', 57, true);">57</a>
3787
- <a class="line-number" data-cell="setup2" data-line="58" href="#cell-setup2" onclick="event.preventDefault(); selectCellLine('setup2', 58, true);">58</a>
3788
- <a class="line-number" data-cell="setup2" data-line="59" href="#cell-setup2" onclick="event.preventDefault(); selectCellLine('setup2', 59, true);">59</a>
3789
- <a class="line-number" data-cell="setup2" data-line="60" href="#cell-setup2" onclick="event.preventDefault(); selectCellLine('setup2', 60, true);">60</a>
3790
- <a class="line-number" data-cell="setup2" data-line="61" href="#cell-setup2" onclick="event.preventDefault(); selectCellLine('setup2', 61, true);">61</a>
3791
- <a class="line-number" data-cell="setup2" data-line="62" href="#cell-setup2" onclick="event.preventDefault(); selectCellLine('setup2', 62, true);">62</a>
3792
- <a class="line-number" data-cell="setup2" data-line="63" href="#cell-setup2" onclick="event.preventDefault(); selectCellLine('setup2', 63, true);">63</a>
3793
- <a class="line-number" data-cell="setup2" data-line="64" href="#cell-setup2" onclick="event.preventDefault(); selectCellLine('setup2', 64, true);">64</a>
3794
- <a class="line-number" data-cell="setup2" data-line="65" href="#cell-setup2" onclick="event.preventDefault(); selectCellLine('setup2', 65, true);">65</a>
3795
- <a class="line-number" data-cell="setup2" data-line="66" href="#cell-setup2" onclick="event.preventDefault(); selectCellLine('setup2', 66, true);">66</a>
3796
- <a class="line-number" data-cell="setup2" data-line="67" href="#cell-setup2" onclick="event.preventDefault(); selectCellLine('setup2', 67, true);">67</a>
3797
- <a class="line-number" data-cell="setup2" data-line="68" href="#cell-setup2" onclick="event.preventDefault(); selectCellLine('setup2', 68, true);">68</a>
3798
- <a class="line-number" data-cell="setup2" data-line="69" href="#cell-setup2" onclick="event.preventDefault(); selectCellLine('setup2', 69, true);">69</a>
3799
- <a class="line-number" data-cell="setup2" data-line="70" href="#cell-setup2" onclick="event.preventDefault(); selectCellLine('setup2', 70, true);">70</a>
3800
- <a class="line-number" data-cell="setup2" data-line="71" href="#cell-setup2" onclick="event.preventDefault(); selectCellLine('setup2', 71, true);">71</a>
3801
- <a class="line-number" data-cell="setup2" data-line="72" href="#cell-setup2" onclick="event.preventDefault(); selectCellLine('setup2', 72, true);">72</a>
3802
- <a class="line-number" data-cell="setup2" data-line="73" href="#cell-setup2" onclick="event.preventDefault(); selectCellLine('setup2', 73, true);">73</a>
3803
- <a class="line-number" data-cell="setup2" data-line="74" href="#cell-setup2" onclick="event.preventDefault(); selectCellLine('setup2', 74, true);">74</a>
3804
- <a class="line-number" data-cell="setup2" data-line="75" href="#cell-setup2" onclick="event.preventDefault(); selectCellLine('setup2', 75, true);">75</a>
3805
- <a class="line-number" data-cell="setup2" data-line="76" href="#cell-setup2" onclick="event.preventDefault(); selectCellLine('setup2', 76, true);">76</a>
3806
- <a class="line-number" data-cell="setup2" data-line="77" href="#cell-setup2" onclick="event.preventDefault(); selectCellLine('setup2', 77, true);">77</a>
3807
- <a class="line-number" data-cell="setup2" data-line="78" href="#cell-setup2" onclick="event.preventDefault(); selectCellLine('setup2', 78, true);">78</a>
3808
- <a class="line-number" data-cell="setup2" data-line="79" href="#cell-setup2" onclick="event.preventDefault(); selectCellLine('setup2', 79, true);">79</a>
3809
- <a class="line-number" data-cell="setup2" data-line="80" href="#cell-setup2" onclick="event.preventDefault(); selectCellLine('setup2', 80, true);">80</a>
3810
- <a class="line-number" data-cell="setup2" data-line="81" href="#cell-setup2" onclick="event.preventDefault(); selectCellLine('setup2', 81, true);">81</a>
3811
- <a class="line-number" data-cell="setup2" data-line="82" href="#cell-setup2" onclick="event.preventDefault(); selectCellLine('setup2', 82, true);">82</a>
3812
- <a class="line-number" data-cell="setup2" data-line="83" href="#cell-setup2" onclick="event.preventDefault(); selectCellLine('setup2', 83, true);">83</a>
3813
- <a class="line-number" data-cell="setup2" data-line="84" href="#cell-setup2" onclick="event.preventDefault(); selectCellLine('setup2', 84, true);">84</a>
3814
- <a class="line-number" data-cell="setup2" data-line="85" href="#cell-setup2" onclick="event.preventDefault(); selectCellLine('setup2', 85, true);">85</a>
3815
- <a class="line-number" data-cell="setup2" data-line="86" href="#cell-setup2" onclick="event.preventDefault(); selectCellLine('setup2', 86, true);">86</a>
3816
- <a class="line-number" data-cell="setup2" data-line="87" href="#cell-setup2" onclick="event.preventDefault(); selectCellLine('setup2', 87, true);">87</a>
3817
- <a class="line-number" data-cell="setup2" data-line="88" href="#cell-setup2" onclick="event.preventDefault(); selectCellLine('setup2', 88, true);">88</a>
3818
- <a class="line-number" data-cell="setup2" data-line="89" href="#cell-setup2" onclick="event.preventDefault(); selectCellLine('setup2', 89, true);">89</a>
3819
- <a class="line-number" data-cell="setup2" data-line="90" href="#cell-setup2" onclick="event.preventDefault(); selectCellLine('setup2', 90, true);">90</a>
3820
- <a class="line-number" data-cell="setup2" data-line="91" href="#cell-setup2" onclick="event.preventDefault(); selectCellLine('setup2', 91, true);">91</a>
3821
- <a class="line-number" data-cell="setup2" data-line="92" href="#cell-setup2" onclick="event.preventDefault(); selectCellLine('setup2', 92, true);">92</a>
3822
- <a class="line-number" data-cell="setup2" data-line="93" href="#cell-setup2" onclick="event.preventDefault(); selectCellLine('setup2', 93, true);">93</a>
3823
- <a class="line-number" data-cell="setup2" data-line="94" href="#cell-setup2" onclick="event.preventDefault(); selectCellLine('setup2', 94, true);">94</a>
3824
- <a class="line-number" data-cell="setup2" data-line="95" href="#cell-setup2" onclick="event.preventDefault(); selectCellLine('setup2', 95, true);">95</a>
3825
- <a class="line-number" data-cell="setup2" data-line="96" href="#cell-setup2" onclick="event.preventDefault(); selectCellLine('setup2', 96, true);">96</a>
3826
- <a class="line-number" data-cell="setup2" data-line="97" href="#cell-setup2" onclick="event.preventDefault(); selectCellLine('setup2', 97, true);">97</a>
3827
- <a class="line-number" data-cell="setup2" data-line="98" href="#cell-setup2" onclick="event.preventDefault(); selectCellLine('setup2', 98, true);">98</a>
3828
- <a class="line-number" data-cell="setup2" data-line="99" href="#cell-setup2" onclick="event.preventDefault(); selectCellLine('setup2', 99, true);">99</a>
3829
- <a class="line-number" data-cell="setup2" data-line="100" href="#cell-setup2" onclick="event.preventDefault(); selectCellLine('setup2', 100, true);">100</a>
3830
- <a class="line-number" data-cell="setup2" data-line="101" href="#cell-setup2" onclick="event.preventDefault(); selectCellLine('setup2', 101, true);">101</a>
3831
- <a class="line-number" data-cell="setup2" data-line="102" href="#cell-setup2" onclick="event.preventDefault(); selectCellLine('setup2', 102, true);">102</a>
3832
- <a class="line-number" data-cell="setup2" data-line="103" href="#cell-setup2" onclick="event.preventDefault(); selectCellLine('setup2', 103, true);">103</a>
3833
- <a class="line-number" data-cell="setup2" data-line="104" href="#cell-setup2" onclick="event.preventDefault(); selectCellLine('setup2', 104, true);">104</a>
3834
- <a class="line-number" data-cell="setup2" data-line="105" href="#cell-setup2" onclick="event.preventDefault(); selectCellLine('setup2', 105, true);">105</a>
3835
- <a class="line-number" data-cell="setup2" data-line="106" href="#cell-setup2" onclick="event.preventDefault(); selectCellLine('setup2', 106, true);">106</a>
3836
- <a class="line-number" data-cell="setup2" data-line="107" href="#cell-setup2" onclick="event.preventDefault(); selectCellLine('setup2', 107, true);">107</a>
3837
- <a class="line-number" data-cell="setup2" data-line="108" href="#cell-setup2" onclick="event.preventDefault(); selectCellLine('setup2', 108, true);">108</a>
3838
- <a class="line-number" data-cell="setup2" data-line="109" href="#cell-setup2" onclick="event.preventDefault(); selectCellLine('setup2', 109, true);">109</a>
3839
- <a class="line-number" data-cell="setup2" data-line="110" href="#cell-setup2" onclick="event.preventDefault(); selectCellLine('setup2', 110, true);">110</a>
3840
- <a class="line-number" data-cell="setup2" data-line="111" href="#cell-setup2" onclick="event.preventDefault(); selectCellLine('setup2', 111, true);">111</a>
3841
- <a class="line-number" data-cell="setup2" data-line="112" href="#cell-setup2" onclick="event.preventDefault(); selectCellLine('setup2', 112, true);">112</a>
3842
- <a class="line-number" data-cell="setup2" data-line="113" href="#cell-setup2" onclick="event.preventDefault(); selectCellLine('setup2', 113, true);">113</a>
3843
- <a class="line-number" data-cell="setup2" data-line="114" href="#cell-setup2" onclick="event.preventDefault(); selectCellLine('setup2', 114, true);">114</a>
3844
- <a class="line-number" data-cell="setup2" data-line="115" href="#cell-setup2" onclick="event.preventDefault(); selectCellLine('setup2', 115, true);">115</a>
 
3845
  </div>
3846
  <div class="code-wrap">
3847
  <div class="highlight"><pre><span></span><span class="c1"># /// script</span>
@@ -3910,6 +3910,7 @@ Cell: setup2 | 99.84s | FAILED
3910
 
3911
  <span class="kn">from</span><span class="w"> </span><span class="nn">transformers.models.gpt_oss.modeling_gpt_oss</span><span class="w"> </span><span class="kn">import</span> <span class="n">GptOssMLP</span><span class="p">,</span> <span class="n">GptOssRMSNorm</span>
3912
 
 
3913
  <span class="n">replace_kernel_forward_from_hub</span><span class="p">(</span><span class="n">GptOssRMSNorm</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span> <span class="c1"># direct, type-safe</span>
3914
  <span class="n">custom_mapping</span> <span class="o">=</span> <span class="p">{</span>
3915
  <span class="s2">&quot;Yamoe&quot;</span><span class="p">:</span> <span class="p">{</span>
@@ -3961,44 +3962,44 @@ Cell: setup2 | 99.84s | FAILED
3961
  <span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Generation took </span><span class="si">{</span><span class="n">end_time</span><span class="w"> </span><span class="o">-</span><span class="w"> </span><span class="n">start_time</span><span class="si">:</span><span class="s2">.2f</span><span class="si">}</span><span class="s2"> seconds&quot;</span><span class="p">)</span>
3962
  </pre></div>
3963
 
3964
- <div class="code-line-highlight" id="line-highlight-setup2"></div>
3965
  </div>
3966
  </div>
3967
  </div>
3968
- <div id="output-setup2" class="cell-output">
3969
- <div class="uv-install-logs" id="uv-logs-setup2">
3970
  <div class="uv-logs-header" onclick="toggleUvLogs(this)">▶ UV Install Logs</div>
3971
  <div class="uv-logs-content" style="display: none;">
3972
  Downloading cpython-3.13.7-linux-x86_64-gnu (download) (32.0MiB)
3973
  Downloading cpython-3.13.7-linux-x86_64-gnu (download)
3974
  Updating https://github.com/huggingface/transformers.git (HEAD)
3975
  Updated https://github.com/huggingface/transformers.git (99b0995138c17ef953959c70f35cb2bdc41111a2)
3976
- Building transformers @ git+https://github.com/huggingface/transformers.git@99b0995138c17ef953959c70f35cb2bdc41111a2
3977
- Downloading fonttools (4.7MiB)
3978
- Downloading pillow (6.3MiB)
3979
- Downloading sympy (6.0MiB)
3980
  Downloading nvidia-cublas-cu12 (566.8MiB)
3981
- Downloading jedi (1.5MiB)
3982
- Downloading hf-xet (3.0MiB)
3983
- Downloading pygments (1.2MiB)
3984
- Downloading nvidia-curand-cu12 (60.7MiB)
3985
- Downloading networkx (1.9MiB)
3986
- Downloading nvidia-cudnn-cu12 (674.0MiB)
3987
- Downloading kiwisolver (1.4MiB)
3988
  Downloading nvidia-cufile-cu12 (1.1MiB)
3989
- Downloading tokenizers (3.1MiB)
3990
- Downloading numpy (15.9MiB)
3991
- Downloading nvidia-cuda-cupti-cu12 (9.8MiB)
3992
- Downloading nvidia-cusolver-cu12 (255.1MiB)
3993
  Downloading nvidia-cuda-nvrtc-cu12 (84.0MiB)
3994
- Downloading nvidia-nvjitlink-cu12 (37.4MiB)
3995
- Downloading matplotlib (8.3MiB)
3996
  Downloading nvidia-cusparse-cu12 (274.9MiB)
3997
- Downloading nvidia-cusparselt-cu12 (273.9MiB)
 
 
3998
  Downloading nvidia-cufft-cu12 (184.2MiB)
 
 
 
 
 
3999
  Downloading nvidia-nccl-cu12 (307.4MiB)
 
 
 
 
 
4000
  Downloading triton (148.4MiB)
4001
- Downloading torch (846.8MiB)
 
 
4002
  Downloading nvidia-cufile-cu12
4003
  Downloading kiwisolver
4004
  Downloading pygments
@@ -4011,8 +4012,8 @@ Downloading torch (846.8MiB)
4011
  Downloading nvidia-cuda-cupti-cu12
4012
  Downloading numpy
4013
  Downloading sympy
4014
- Built transformers @ git+https://github.com/huggingface/transformers.git@99b0995138c17ef953959c70f35cb2bdc41111a2
4015
  Downloading nvidia-nvjitlink-cu12
 
4016
  Downloading jedi
4017
  Downloading nvidia-curand-cu12
4018
  Downloading nvidia-cuda-nvrtc-cu12
@@ -4025,22 +4026,22 @@ Downloading torch (846.8MiB)
4025
  Downloading nvidia-cublas-cu12
4026
  Downloading nvidia-cudnn-cu12
4027
  Downloading torch
4028
- Installed 69 packages in 472ms
4029
  </div>
4030
  </div>
4031
  <div class="cell-stderr">Fetching 3 files: 0%| | 0/3 [00:00&lt;?, ?it/s]
4032
- Fetching 3 files: 33%|███▎ | 1/3 [00:14&lt;00:29, 14.90s/it]
4033
- Fetching 3 files: 67%|██████▋ | 2/3 [00:17&lt;00:07, 7.76s/it]
4034
- Fetching 3 files: 100%|██████████| 3/3 [00:17&lt;00:00, 5.89s/it]
4035
  You are using full precision kernels, we will dequantize the model to bf16. To use the quantized model with quantization kernels, please set use_kernels=False
4036
  INFO:accelerate.utils.modeling:We will use 90% of the memory on device 0 for storing the model, and 10% for the buffer to avoid OOM. You can set `max_memory` in to a higher value to use more memory (at your own risk).
4037
 
4038
  Loading checkpoint shards: 0%| | 0/3 [00:00&lt;?, ?it/s]
4039
- Loading checkpoint shards: 33%|███▎ | 1/3 [00:07&lt;00:14, 7.47s/it]
4040
- Loading checkpoint shards: 67%|██████▋ | 2/3 [00:14&lt;00:07, 7.28s/it]
4041
- Loading checkpoint shards: 67%|██████▋ | 2/3 [00:14&lt;00:07, 7.46s/it]
4042
  Traceback (most recent call last):
4043
- File &quot;/tmp/uvnote_gcl2d6qz/.uvnote/cells/setup2.py&quot;, line 82, in &lt;module&gt;
4044
  model = GptOssForCausalLM.from_pretrained(
4045
  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^
4046
  model_id,
@@ -4050,9 +4051,9 @@ Traceback (most recent call last):
4050
  ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
4051
  ).eval()
4052
  ^
4053
- File &quot;/tmp/uvnote-run-snv7hjt1/home/.cache/uv/environments-v2/setup2-2b58a6e1ef638261/lib/python3.13/site-packages/transformers/modeling_utils.py&quot;, line 285, in _wrapper
4054
  return func(*args, **kwargs)
4055
- File &quot;/tmp/uvnote-run-snv7hjt1/home/.cache/uv/environments-v2/setup2-2b58a6e1ef638261/lib/python3.13/site-packages/transformers/modeling_utils.py&quot;, line 5035, in from_pretrained
4056
  ) = cls._load_pretrained_model(
4057
  ~~~~~~~~~~~~~~~~~~~~~~~~~~^
4058
  model,
@@ -4062,10 +4063,10 @@ Traceback (most recent call last):
4062
  ^^^^^^^^^^^^^^^^^^^^^^^^^^
4063
  )
4064
  ^
4065
- File &quot;/tmp/uvnote-run-snv7hjt1/home/.cache/uv/environments-v2/setup2-2b58a6e1ef638261/lib/python3.13/site-packages/transformers/modeling_utils.py&quot;, line 5488, in _load_pretrained_model
4066
  _error_msgs, disk_offload_index, cpu_offload_index = load_shard_file(args)
4067
  ~~~~~~~~~~~~~~~^^^^^^
4068
- File &quot;/tmp/uvnote-run-snv7hjt1/home/.cache/uv/environments-v2/setup2-2b58a6e1ef638261/lib/python3.13/site-packages/transformers/modeling_utils.py&quot;, line 932, in load_shard_file
4069
  disk_offload_index, cpu_offload_index = _load_state_dict_into_meta_model(
4070
  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^
4071
  model_to_load,
@@ -4075,25 +4076,27 @@ Traceback (most recent call last):
4075
  ^^^^^^^^^^^^^^^^^^^^^^^^
4076
  )
4077
  ^
4078
- File &quot;/tmp/uvnote-run-snv7hjt1/home/.cache/uv/environments-v2/setup2-2b58a6e1ef638261/lib/python3.13/site-packages/torch/utils/_contextlib.py&quot;, line 120, in decorate_context
4079
  return func(*args, **kwargs)
4080
- File &quot;/tmp/uvnote-run-snv7hjt1/home/.cache/uv/environments-v2/setup2-2b58a6e1ef638261/lib/python3.13/site-packages/transformers/modeling_utils.py&quot;, line 840, in _load_state_dict_into_meta_model
4081
  hf_quantizer.create_quantized_param(
4082
  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^
4083
  model, param, param_name, param_device, state_dict, unexpected_keys
4084
  ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
4085
  )
4086
  ^
4087
- File &quot;/tmp/uvnote-run-snv7hjt1/home/.cache/uv/environments-v2/setup2-2b58a6e1ef638261/lib/python3.13/site-packages/transformers/quantizers/quantizer_mxfp4.py&quot;, line 249, in create_quantized_param
4088
  dequantize(module, param_name, param_value, target_device, dq_param_name, **shard_kwargs)
4089
  ~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
4090
- File &quot;/tmp/uvnote-run-snv7hjt1/home/.cache/uv/environments-v2/setup2-2b58a6e1ef638261/lib/python3.13/site-packages/transformers/integrations/mxfp4.py&quot;, line 329, in dequantize
4091
  dequantized = convert_moe_packed_tensors(getattr(module, blocks_attr), getattr(module, scales_attr))
4092
- File &quot;/tmp/uvnote-run-snv7hjt1/home/.cache/uv/environments-v2/setup2-2b58a6e1ef638261/lib/python3.13/site-packages/transformers/integrations/mxfp4.py&quot;, line 117, in convert_moe_packed_tensors
4093
  idx_hi = (blk &gt;&gt; 4).to(torch.long)
4094
- torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 1.98 GiB. GPU 0 has a total capacity of 22.30 GiB of which 1.69 GiB is free. Process 38244 has 20.61 GiB memory in use. Of the allocated memory 17.37 GiB is allocated by PyTorch, and 2.96 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)</div>
4095
  </div>
4096
  </div>
 
 
4097
  </div>
4098
 
4099
  </body>
 
3711
  </div>
3712
 
3713
  <div class="main-content">
3714
+ <div class="cell cell-failed" id="cell-setup">
 
3715
  <div class="cell-header">
3716
  <span class="collapse-indicators">
3717
+ <span onclick="toggleCode('setup')" style="cursor: pointer;">▼ code</span>
3718
+ <span onclick="toggleOutput('setup')" style="cursor: pointer;">▼ output</span>
3719
+ <span id="uv-indicator-setup" onclick="toggleUvLogsFromHeader('setup')" style="cursor: pointer;">▶ uv-logs</span>
3720
  </span> |
3721
+ Cell: setup | 99.80s | FAILED
3722
+ | <button class="run-btn" onclick="runCell('setup')">▶ run</button>
3723
+ <button class="copy-btn" onclick="copyCell('setup')">Copy</button>
3724
+ <a href="cells/setup.py" target="_blank" class="raw-btn">Raw</a>
3725
  </div>
3726
+ <div id="code-setup" class="cell-code" data-lines="116">
3727
  <div class="highlight-with-lines">
3728
+ <div class="line-numbers" id="lines-setup">
3729
+ <a class="line-number" data-cell="setup" data-line="1" href="#cell-setup" onclick="event.preventDefault(); selectCellLine('setup', 1, true);">1</a>
3730
+ <a class="line-number" data-cell="setup" data-line="2" href="#cell-setup" onclick="event.preventDefault(); selectCellLine('setup', 2, true);">2</a>
3731
+ <a class="line-number" data-cell="setup" data-line="3" href="#cell-setup" onclick="event.preventDefault(); selectCellLine('setup', 3, true);">3</a>
3732
+ <a class="line-number" data-cell="setup" data-line="4" href="#cell-setup" onclick="event.preventDefault(); selectCellLine('setup', 4, true);">4</a>
3733
+ <a class="line-number" data-cell="setup" data-line="5" href="#cell-setup" onclick="event.preventDefault(); selectCellLine('setup', 5, true);">5</a>
3734
+ <a class="line-number" data-cell="setup" data-line="6" href="#cell-setup" onclick="event.preventDefault(); selectCellLine('setup', 6, true);">6</a>
3735
+ <a class="line-number" data-cell="setup" data-line="7" href="#cell-setup" onclick="event.preventDefault(); selectCellLine('setup', 7, true);">7</a>
3736
+ <a class="line-number" data-cell="setup" data-line="8" href="#cell-setup" onclick="event.preventDefault(); selectCellLine('setup', 8, true);">8</a>
3737
+ <a class="line-number" data-cell="setup" data-line="9" href="#cell-setup" onclick="event.preventDefault(); selectCellLine('setup', 9, true);">9</a>
3738
+ <a class="line-number" data-cell="setup" data-line="10" href="#cell-setup" onclick="event.preventDefault(); selectCellLine('setup', 10, true);">10</a>
3739
+ <a class="line-number" data-cell="setup" data-line="11" href="#cell-setup" onclick="event.preventDefault(); selectCellLine('setup', 11, true);">11</a>
3740
+ <a class="line-number" data-cell="setup" data-line="12" href="#cell-setup" onclick="event.preventDefault(); selectCellLine('setup', 12, true);">12</a>
3741
+ <a class="line-number" data-cell="setup" data-line="13" href="#cell-setup" onclick="event.preventDefault(); selectCellLine('setup', 13, true);">13</a>
3742
+ <a class="line-number" data-cell="setup" data-line="14" href="#cell-setup" onclick="event.preventDefault(); selectCellLine('setup', 14, true);">14</a>
3743
+ <a class="line-number" data-cell="setup" data-line="15" href="#cell-setup" onclick="event.preventDefault(); selectCellLine('setup', 15, true);">15</a>
3744
+ <a class="line-number" data-cell="setup" data-line="16" href="#cell-setup" onclick="event.preventDefault(); selectCellLine('setup', 16, true);">16</a>
3745
+ <a class="line-number" data-cell="setup" data-line="17" href="#cell-setup" onclick="event.preventDefault(); selectCellLine('setup', 17, true);">17</a>
3746
+ <a class="line-number" data-cell="setup" data-line="18" href="#cell-setup" onclick="event.preventDefault(); selectCellLine('setup', 18, true);">18</a>
3747
+ <a class="line-number" data-cell="setup" data-line="19" href="#cell-setup" onclick="event.preventDefault(); selectCellLine('setup', 19, true);">19</a>
3748
+ <a class="line-number" data-cell="setup" data-line="20" href="#cell-setup" onclick="event.preventDefault(); selectCellLine('setup', 20, true);">20</a>
3749
+ <a class="line-number" data-cell="setup" data-line="21" href="#cell-setup" onclick="event.preventDefault(); selectCellLine('setup', 21, true);">21</a>
3750
+ <a class="line-number" data-cell="setup" data-line="22" href="#cell-setup" onclick="event.preventDefault(); selectCellLine('setup', 22, true);">22</a>
3751
+ <a class="line-number" data-cell="setup" data-line="23" href="#cell-setup" onclick="event.preventDefault(); selectCellLine('setup', 23, true);">23</a>
3752
+ <a class="line-number" data-cell="setup" data-line="24" href="#cell-setup" onclick="event.preventDefault(); selectCellLine('setup', 24, true);">24</a>
3753
+ <a class="line-number" data-cell="setup" data-line="25" href="#cell-setup" onclick="event.preventDefault(); selectCellLine('setup', 25, true);">25</a>
3754
+ <a class="line-number" data-cell="setup" data-line="26" href="#cell-setup" onclick="event.preventDefault(); selectCellLine('setup', 26, true);">26</a>
3755
+ <a class="line-number" data-cell="setup" data-line="27" href="#cell-setup" onclick="event.preventDefault(); selectCellLine('setup', 27, true);">27</a>
3756
+ <a class="line-number" data-cell="setup" data-line="28" href="#cell-setup" onclick="event.preventDefault(); selectCellLine('setup', 28, true);">28</a>
3757
+ <a class="line-number" data-cell="setup" data-line="29" href="#cell-setup" onclick="event.preventDefault(); selectCellLine('setup', 29, true);">29</a>
3758
+ <a class="line-number" data-cell="setup" data-line="30" href="#cell-setup" onclick="event.preventDefault(); selectCellLine('setup', 30, true);">30</a>
3759
+ <a class="line-number" data-cell="setup" data-line="31" href="#cell-setup" onclick="event.preventDefault(); selectCellLine('setup', 31, true);">31</a>
3760
+ <a class="line-number" data-cell="setup" data-line="32" href="#cell-setup" onclick="event.preventDefault(); selectCellLine('setup', 32, true);">32</a>
3761
+ <a class="line-number" data-cell="setup" data-line="33" href="#cell-setup" onclick="event.preventDefault(); selectCellLine('setup', 33, true);">33</a>
3762
+ <a class="line-number" data-cell="setup" data-line="34" href="#cell-setup" onclick="event.preventDefault(); selectCellLine('setup', 34, true);">34</a>
3763
+ <a class="line-number" data-cell="setup" data-line="35" href="#cell-setup" onclick="event.preventDefault(); selectCellLine('setup', 35, true);">35</a>
3764
+ <a class="line-number" data-cell="setup" data-line="36" href="#cell-setup" onclick="event.preventDefault(); selectCellLine('setup', 36, true);">36</a>
3765
+ <a class="line-number" data-cell="setup" data-line="37" href="#cell-setup" onclick="event.preventDefault(); selectCellLine('setup', 37, true);">37</a>
3766
+ <a class="line-number" data-cell="setup" data-line="38" href="#cell-setup" onclick="event.preventDefault(); selectCellLine('setup', 38, true);">38</a>
3767
+ <a class="line-number" data-cell="setup" data-line="39" href="#cell-setup" onclick="event.preventDefault(); selectCellLine('setup', 39, true);">39</a>
3768
+ <a class="line-number" data-cell="setup" data-line="40" href="#cell-setup" onclick="event.preventDefault(); selectCellLine('setup', 40, true);">40</a>
3769
+ <a class="line-number" data-cell="setup" data-line="41" href="#cell-setup" onclick="event.preventDefault(); selectCellLine('setup', 41, true);">41</a>
3770
+ <a class="line-number" data-cell="setup" data-line="42" href="#cell-setup" onclick="event.preventDefault(); selectCellLine('setup', 42, true);">42</a>
3771
+ <a class="line-number" data-cell="setup" data-line="43" href="#cell-setup" onclick="event.preventDefault(); selectCellLine('setup', 43, true);">43</a>
3772
+ <a class="line-number" data-cell="setup" data-line="44" href="#cell-setup" onclick="event.preventDefault(); selectCellLine('setup', 44, true);">44</a>
3773
+ <a class="line-number" data-cell="setup" data-line="45" href="#cell-setup" onclick="event.preventDefault(); selectCellLine('setup', 45, true);">45</a>
3774
+ <a class="line-number" data-cell="setup" data-line="46" href="#cell-setup" onclick="event.preventDefault(); selectCellLine('setup', 46, true);">46</a>
3775
+ <a class="line-number" data-cell="setup" data-line="47" href="#cell-setup" onclick="event.preventDefault(); selectCellLine('setup', 47, true);">47</a>
3776
+ <a class="line-number" data-cell="setup" data-line="48" href="#cell-setup" onclick="event.preventDefault(); selectCellLine('setup', 48, true);">48</a>
3777
+ <a class="line-number" data-cell="setup" data-line="49" href="#cell-setup" onclick="event.preventDefault(); selectCellLine('setup', 49, true);">49</a>
3778
+ <a class="line-number" data-cell="setup" data-line="50" href="#cell-setup" onclick="event.preventDefault(); selectCellLine('setup', 50, true);">50</a>
3779
+ <a class="line-number" data-cell="setup" data-line="51" href="#cell-setup" onclick="event.preventDefault(); selectCellLine('setup', 51, true);">51</a>
3780
+ <a class="line-number" data-cell="setup" data-line="52" href="#cell-setup" onclick="event.preventDefault(); selectCellLine('setup', 52, true);">52</a>
3781
+ <a class="line-number" data-cell="setup" data-line="53" href="#cell-setup" onclick="event.preventDefault(); selectCellLine('setup', 53, true);">53</a>
3782
+ <a class="line-number" data-cell="setup" data-line="54" href="#cell-setup" onclick="event.preventDefault(); selectCellLine('setup', 54, true);">54</a>
3783
+ <a class="line-number" data-cell="setup" data-line="55" href="#cell-setup" onclick="event.preventDefault(); selectCellLine('setup', 55, true);">55</a>
3784
+ <a class="line-number" data-cell="setup" data-line="56" href="#cell-setup" onclick="event.preventDefault(); selectCellLine('setup', 56, true);">56</a>
3785
+ <a class="line-number" data-cell="setup" data-line="57" href="#cell-setup" onclick="event.preventDefault(); selectCellLine('setup', 57, true);">57</a>
3786
+ <a class="line-number" data-cell="setup" data-line="58" href="#cell-setup" onclick="event.preventDefault(); selectCellLine('setup', 58, true);">58</a>
3787
+ <a class="line-number" data-cell="setup" data-line="59" href="#cell-setup" onclick="event.preventDefault(); selectCellLine('setup', 59, true);">59</a>
3788
+ <a class="line-number" data-cell="setup" data-line="60" href="#cell-setup" onclick="event.preventDefault(); selectCellLine('setup', 60, true);">60</a>
3789
+ <a class="line-number" data-cell="setup" data-line="61" href="#cell-setup" onclick="event.preventDefault(); selectCellLine('setup', 61, true);">61</a>
3790
+ <a class="line-number" data-cell="setup" data-line="62" href="#cell-setup" onclick="event.preventDefault(); selectCellLine('setup', 62, true);">62</a>
3791
+ <a class="line-number" data-cell="setup" data-line="63" href="#cell-setup" onclick="event.preventDefault(); selectCellLine('setup', 63, true);">63</a>
3792
+ <a class="line-number" data-cell="setup" data-line="64" href="#cell-setup" onclick="event.preventDefault(); selectCellLine('setup', 64, true);">64</a>
3793
+ <a class="line-number" data-cell="setup" data-line="65" href="#cell-setup" onclick="event.preventDefault(); selectCellLine('setup', 65, true);">65</a>
3794
+ <a class="line-number" data-cell="setup" data-line="66" href="#cell-setup" onclick="event.preventDefault(); selectCellLine('setup', 66, true);">66</a>
3795
+ <a class="line-number" data-cell="setup" data-line="67" href="#cell-setup" onclick="event.preventDefault(); selectCellLine('setup', 67, true);">67</a>
3796
+ <a class="line-number" data-cell="setup" data-line="68" href="#cell-setup" onclick="event.preventDefault(); selectCellLine('setup', 68, true);">68</a>
3797
+ <a class="line-number" data-cell="setup" data-line="69" href="#cell-setup" onclick="event.preventDefault(); selectCellLine('setup', 69, true);">69</a>
3798
+ <a class="line-number" data-cell="setup" data-line="70" href="#cell-setup" onclick="event.preventDefault(); selectCellLine('setup', 70, true);">70</a>
3799
+ <a class="line-number" data-cell="setup" data-line="71" href="#cell-setup" onclick="event.preventDefault(); selectCellLine('setup', 71, true);">71</a>
3800
+ <a class="line-number" data-cell="setup" data-line="72" href="#cell-setup" onclick="event.preventDefault(); selectCellLine('setup', 72, true);">72</a>
3801
+ <a class="line-number" data-cell="setup" data-line="73" href="#cell-setup" onclick="event.preventDefault(); selectCellLine('setup', 73, true);">73</a>
3802
+ <a class="line-number" data-cell="setup" data-line="74" href="#cell-setup" onclick="event.preventDefault(); selectCellLine('setup', 74, true);">74</a>
3803
+ <a class="line-number" data-cell="setup" data-line="75" href="#cell-setup" onclick="event.preventDefault(); selectCellLine('setup', 75, true);">75</a>
3804
+ <a class="line-number" data-cell="setup" data-line="76" href="#cell-setup" onclick="event.preventDefault(); selectCellLine('setup', 76, true);">76</a>
3805
+ <a class="line-number" data-cell="setup" data-line="77" href="#cell-setup" onclick="event.preventDefault(); selectCellLine('setup', 77, true);">77</a>
3806
+ <a class="line-number" data-cell="setup" data-line="78" href="#cell-setup" onclick="event.preventDefault(); selectCellLine('setup', 78, true);">78</a>
3807
+ <a class="line-number" data-cell="setup" data-line="79" href="#cell-setup" onclick="event.preventDefault(); selectCellLine('setup', 79, true);">79</a>
3808
+ <a class="line-number" data-cell="setup" data-line="80" href="#cell-setup" onclick="event.preventDefault(); selectCellLine('setup', 80, true);">80</a>
3809
+ <a class="line-number" data-cell="setup" data-line="81" href="#cell-setup" onclick="event.preventDefault(); selectCellLine('setup', 81, true);">81</a>
3810
+ <a class="line-number" data-cell="setup" data-line="82" href="#cell-setup" onclick="event.preventDefault(); selectCellLine('setup', 82, true);">82</a>
3811
+ <a class="line-number" data-cell="setup" data-line="83" href="#cell-setup" onclick="event.preventDefault(); selectCellLine('setup', 83, true);">83</a>
3812
+ <a class="line-number" data-cell="setup" data-line="84" href="#cell-setup" onclick="event.preventDefault(); selectCellLine('setup', 84, true);">84</a>
3813
+ <a class="line-number" data-cell="setup" data-line="85" href="#cell-setup" onclick="event.preventDefault(); selectCellLine('setup', 85, true);">85</a>
3814
+ <a class="line-number" data-cell="setup" data-line="86" href="#cell-setup" onclick="event.preventDefault(); selectCellLine('setup', 86, true);">86</a>
3815
+ <a class="line-number" data-cell="setup" data-line="87" href="#cell-setup" onclick="event.preventDefault(); selectCellLine('setup', 87, true);">87</a>
3816
+ <a class="line-number" data-cell="setup" data-line="88" href="#cell-setup" onclick="event.preventDefault(); selectCellLine('setup', 88, true);">88</a>
3817
+ <a class="line-number" data-cell="setup" data-line="89" href="#cell-setup" onclick="event.preventDefault(); selectCellLine('setup', 89, true);">89</a>
3818
+ <a class="line-number" data-cell="setup" data-line="90" href="#cell-setup" onclick="event.preventDefault(); selectCellLine('setup', 90, true);">90</a>
3819
+ <a class="line-number" data-cell="setup" data-line="91" href="#cell-setup" onclick="event.preventDefault(); selectCellLine('setup', 91, true);">91</a>
3820
+ <a class="line-number" data-cell="setup" data-line="92" href="#cell-setup" onclick="event.preventDefault(); selectCellLine('setup', 92, true);">92</a>
3821
+ <a class="line-number" data-cell="setup" data-line="93" href="#cell-setup" onclick="event.preventDefault(); selectCellLine('setup', 93, true);">93</a>
3822
+ <a class="line-number" data-cell="setup" data-line="94" href="#cell-setup" onclick="event.preventDefault(); selectCellLine('setup', 94, true);">94</a>
3823
+ <a class="line-number" data-cell="setup" data-line="95" href="#cell-setup" onclick="event.preventDefault(); selectCellLine('setup', 95, true);">95</a>
3824
+ <a class="line-number" data-cell="setup" data-line="96" href="#cell-setup" onclick="event.preventDefault(); selectCellLine('setup', 96, true);">96</a>
3825
+ <a class="line-number" data-cell="setup" data-line="97" href="#cell-setup" onclick="event.preventDefault(); selectCellLine('setup', 97, true);">97</a>
3826
+ <a class="line-number" data-cell="setup" data-line="98" href="#cell-setup" onclick="event.preventDefault(); selectCellLine('setup', 98, true);">98</a>
3827
+ <a class="line-number" data-cell="setup" data-line="99" href="#cell-setup" onclick="event.preventDefault(); selectCellLine('setup', 99, true);">99</a>
3828
+ <a class="line-number" data-cell="setup" data-line="100" href="#cell-setup" onclick="event.preventDefault(); selectCellLine('setup', 100, true);">100</a>
3829
+ <a class="line-number" data-cell="setup" data-line="101" href="#cell-setup" onclick="event.preventDefault(); selectCellLine('setup', 101, true);">101</a>
3830
+ <a class="line-number" data-cell="setup" data-line="102" href="#cell-setup" onclick="event.preventDefault(); selectCellLine('setup', 102, true);">102</a>
3831
+ <a class="line-number" data-cell="setup" data-line="103" href="#cell-setup" onclick="event.preventDefault(); selectCellLine('setup', 103, true);">103</a>
3832
+ <a class="line-number" data-cell="setup" data-line="104" href="#cell-setup" onclick="event.preventDefault(); selectCellLine('setup', 104, true);">104</a>
3833
+ <a class="line-number" data-cell="setup" data-line="105" href="#cell-setup" onclick="event.preventDefault(); selectCellLine('setup', 105, true);">105</a>
3834
+ <a class="line-number" data-cell="setup" data-line="106" href="#cell-setup" onclick="event.preventDefault(); selectCellLine('setup', 106, true);">106</a>
3835
+ <a class="line-number" data-cell="setup" data-line="107" href="#cell-setup" onclick="event.preventDefault(); selectCellLine('setup', 107, true);">107</a>
3836
+ <a class="line-number" data-cell="setup" data-line="108" href="#cell-setup" onclick="event.preventDefault(); selectCellLine('setup', 108, true);">108</a>
3837
+ <a class="line-number" data-cell="setup" data-line="109" href="#cell-setup" onclick="event.preventDefault(); selectCellLine('setup', 109, true);">109</a>
3838
+ <a class="line-number" data-cell="setup" data-line="110" href="#cell-setup" onclick="event.preventDefault(); selectCellLine('setup', 110, true);">110</a>
3839
+ <a class="line-number" data-cell="setup" data-line="111" href="#cell-setup" onclick="event.preventDefault(); selectCellLine('setup', 111, true);">111</a>
3840
+ <a class="line-number" data-cell="setup" data-line="112" href="#cell-setup" onclick="event.preventDefault(); selectCellLine('setup', 112, true);">112</a>
3841
+ <a class="line-number" data-cell="setup" data-line="113" href="#cell-setup" onclick="event.preventDefault(); selectCellLine('setup', 113, true);">113</a>
3842
+ <a class="line-number" data-cell="setup" data-line="114" href="#cell-setup" onclick="event.preventDefault(); selectCellLine('setup', 114, true);">114</a>
3843
+ <a class="line-number" data-cell="setup" data-line="115" href="#cell-setup" onclick="event.preventDefault(); selectCellLine('setup', 115, true);">115</a>
3844
+ <a class="line-number" data-cell="setup" data-line="116" href="#cell-setup" onclick="event.preventDefault(); selectCellLine('setup', 116, true);">116</a>
3845
  </div>
3846
  <div class="code-wrap">
3847
  <div class="highlight"><pre><span></span><span class="c1"># /// script</span>
 
3910
 
3911
  <span class="kn">from</span><span class="w"> </span><span class="nn">transformers.models.gpt_oss.modeling_gpt_oss</span><span class="w"> </span><span class="kn">import</span> <span class="n">GptOssMLP</span><span class="p">,</span> <span class="n">GptOssRMSNorm</span>
3912
 
3913
+ <span class="n">replace_kernel_forward_from_hub</span><span class="p">(</span><span class="n">GptOssMLP</span><span class="p">,</span> <span class="s2">&quot;Yamoe&quot;</span><span class="p">)</span> <span class="c1"># direct, type-safe</span>
3914
  <span class="n">replace_kernel_forward_from_hub</span><span class="p">(</span><span class="n">GptOssRMSNorm</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span> <span class="c1"># direct, type-safe</span>
3915
  <span class="n">custom_mapping</span> <span class="o">=</span> <span class="p">{</span>
3916
  <span class="s2">&quot;Yamoe&quot;</span><span class="p">:</span> <span class="p">{</span>
 
3962
  <span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Generation took </span><span class="si">{</span><span class="n">end_time</span><span class="w"> </span><span class="o">-</span><span class="w"> </span><span class="n">start_time</span><span class="si">:</span><span class="s2">.2f</span><span class="si">}</span><span class="s2"> seconds&quot;</span><span class="p">)</span>
3963
  </pre></div>
3964
 
3965
+ <div class="code-line-highlight" id="line-highlight-setup"></div>
3966
  </div>
3967
  </div>
3968
  </div>
3969
+ <div id="output-setup" class="cell-output">
3970
+ <div class="uv-install-logs" id="uv-logs-setup">
3971
  <div class="uv-logs-header" onclick="toggleUvLogs(this)">▶ UV Install Logs</div>
3972
  <div class="uv-logs-content" style="display: none;">
3973
  Downloading cpython-3.13.7-linux-x86_64-gnu (download) (32.0MiB)
3974
  Downloading cpython-3.13.7-linux-x86_64-gnu (download)
3975
  Updating https://github.com/huggingface/transformers.git (HEAD)
3976
  Updated https://github.com/huggingface/transformers.git (99b0995138c17ef953959c70f35cb2bdc41111a2)
 
 
 
 
3977
  Downloading nvidia-cublas-cu12 (566.8MiB)
3978
+ Building transformers @ git+https://github.com/huggingface/transformers.git@99b0995138c17ef953959c70f35cb2bdc41111a2
 
 
 
 
 
 
3979
  Downloading nvidia-cufile-cu12 (1.1MiB)
3980
+ Downloading jedi (1.5MiB)
3981
+ Downloading nvidia-cusparselt-cu12 (273.9MiB)
 
 
3982
  Downloading nvidia-cuda-nvrtc-cu12 (84.0MiB)
 
 
3983
  Downloading nvidia-cusparse-cu12 (274.9MiB)
3984
+ Downloading sympy (6.0MiB)
3985
+ Downloading nvidia-cusolver-cu12 (255.1MiB)
3986
+ Downloading hf-xet (3.0MiB)
3987
  Downloading nvidia-cufft-cu12 (184.2MiB)
3988
+ Downloading nvidia-cudnn-cu12 (674.0MiB)
3989
+ Downloading networkx (1.9MiB)
3990
+ Downloading numpy (15.9MiB)
3991
+ Downloading torch (846.8MiB)
3992
+ Downloading pillow (6.3MiB)
3993
  Downloading nvidia-nccl-cu12 (307.4MiB)
3994
+ Downloading nvidia-nvjitlink-cu12 (37.4MiB)
3995
+ Downloading pygments (1.2MiB)
3996
+ Downloading nvidia-curand-cu12 (60.7MiB)
3997
+ Downloading nvidia-cuda-cupti-cu12 (9.8MiB)
3998
+ Downloading tokenizers (3.1MiB)
3999
  Downloading triton (148.4MiB)
4000
+ Downloading matplotlib (8.3MiB)
4001
+ Downloading fonttools (4.7MiB)
4002
+ Downloading kiwisolver (1.4MiB)
4003
  Downloading nvidia-cufile-cu12
4004
  Downloading kiwisolver
4005
  Downloading pygments
 
4012
  Downloading nvidia-cuda-cupti-cu12
4013
  Downloading numpy
4014
  Downloading sympy
 
4015
  Downloading nvidia-nvjitlink-cu12
4016
+ Built transformers @ git+https://github.com/huggingface/transformers.git@99b0995138c17ef953959c70f35cb2bdc41111a2
4017
  Downloading jedi
4018
  Downloading nvidia-curand-cu12
4019
  Downloading nvidia-cuda-nvrtc-cu12
 
4026
  Downloading nvidia-cublas-cu12
4027
  Downloading nvidia-cudnn-cu12
4028
  Downloading torch
4029
+ Installed 69 packages in 465ms
4030
  </div>
4031
  </div>
4032
  <div class="cell-stderr">Fetching 3 files: 0%| | 0/3 [00:00&lt;?, ?it/s]
4033
+ Fetching 3 files: 33%|███▎ | 1/3 [00:15&lt;00:31, 15.83s/it]
4034
+ Fetching 3 files: 67%|██████▋ | 2/3 [00:18&lt;00:08, 8.05s/it]
4035
+ Fetching 3 files: 100%|██████████| 3/3 [00:18&lt;00:00, 6.14s/it]
4036
  You are using full precision kernels, we will dequantize the model to bf16. To use the quantized model with quantization kernels, please set use_kernels=False
4037
  INFO:accelerate.utils.modeling:We will use 90% of the memory on device 0 for storing the model, and 10% for the buffer to avoid OOM. You can set `max_memory` in to a higher value to use more memory (at your own risk).
4038
 
4039
  Loading checkpoint shards: 0%| | 0/3 [00:00&lt;?, ?it/s]
4040
+ Loading checkpoint shards: 33%|███▎ | 1/3 [00:07&lt;00:15, 7.50s/it]
4041
+ Loading checkpoint shards: 67%|██████▋ | 2/3 [00:14&lt;00:07, 7.33s/it]
4042
+ Loading checkpoint shards: 67%|██████▋ | 2/3 [00:15&lt;00:07, 7.51s/it]
4043
  Traceback (most recent call last):
4044
+ File &quot;/tmp/uvnote_5cbrsnjg/.uvnote/cells/setup.py&quot;, line 83, in &lt;module&gt;
4045
  model = GptOssForCausalLM.from_pretrained(
4046
  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^
4047
  model_id,
 
4051
  ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
4052
  ).eval()
4053
  ^
4054
+ File &quot;/tmp/uvnote-run-vr4catz8/home/.cache/uv/environments-v2/setup-4117b8f0d0f9a3df/lib/python3.13/site-packages/transformers/modeling_utils.py&quot;, line 285, in _wrapper
4055
  return func(*args, **kwargs)
4056
+ File &quot;/tmp/uvnote-run-vr4catz8/home/.cache/uv/environments-v2/setup-4117b8f0d0f9a3df/lib/python3.13/site-packages/transformers/modeling_utils.py&quot;, line 5035, in from_pretrained
4057
  ) = cls._load_pretrained_model(
4058
  ~~~~~~~~~~~~~~~~~~~~~~~~~~^
4059
  model,
 
4063
  ^^^^^^^^^^^^^^^^^^^^^^^^^^
4064
  )
4065
  ^
4066
+ File &quot;/tmp/uvnote-run-vr4catz8/home/.cache/uv/environments-v2/setup-4117b8f0d0f9a3df/lib/python3.13/site-packages/transformers/modeling_utils.py&quot;, line 5488, in _load_pretrained_model
4067
  _error_msgs, disk_offload_index, cpu_offload_index = load_shard_file(args)
4068
  ~~~~~~~~~~~~~~~^^^^^^
4069
+ File &quot;/tmp/uvnote-run-vr4catz8/home/.cache/uv/environments-v2/setup-4117b8f0d0f9a3df/lib/python3.13/site-packages/transformers/modeling_utils.py&quot;, line 932, in load_shard_file
4070
  disk_offload_index, cpu_offload_index = _load_state_dict_into_meta_model(
4071
  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^
4072
  model_to_load,
 
4076
  ^^^^^^^^^^^^^^^^^^^^^^^^
4077
  )
4078
  ^
4079
+ File &quot;/tmp/uvnote-run-vr4catz8/home/.cache/uv/environments-v2/setup-4117b8f0d0f9a3df/lib/python3.13/site-packages/torch/utils/_contextlib.py&quot;, line 120, in decorate_context
4080
  return func(*args, **kwargs)
4081
+ File &quot;/tmp/uvnote-run-vr4catz8/home/.cache/uv/environments-v2/setup-4117b8f0d0f9a3df/lib/python3.13/site-packages/transformers/modeling_utils.py&quot;, line 840, in _load_state_dict_into_meta_model
4082
  hf_quantizer.create_quantized_param(
4083
  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^
4084
  model, param, param_name, param_device, state_dict, unexpected_keys
4085
  ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
4086
  )
4087
  ^
4088
+ File &quot;/tmp/uvnote-run-vr4catz8/home/.cache/uv/environments-v2/setup-4117b8f0d0f9a3df/lib/python3.13/site-packages/transformers/quantizers/quantizer_mxfp4.py&quot;, line 249, in create_quantized_param
4089
  dequantize(module, param_name, param_value, target_device, dq_param_name, **shard_kwargs)
4090
  ~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
4091
+ File &quot;/tmp/uvnote-run-vr4catz8/home/.cache/uv/environments-v2/setup-4117b8f0d0f9a3df/lib/python3.13/site-packages/transformers/integrations/mxfp4.py&quot;, line 329, in dequantize
4092
  dequantized = convert_moe_packed_tensors(getattr(module, blocks_attr), getattr(module, scales_attr))
4093
+ File &quot;/tmp/uvnote-run-vr4catz8/home/.cache/uv/environments-v2/setup-4117b8f0d0f9a3df/lib/python3.13/site-packages/transformers/integrations/mxfp4.py&quot;, line 117, in convert_moe_packed_tensors
4094
  idx_hi = (blk &gt;&gt; 4).to(torch.long)
4095
+ torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 1.98 GiB. GPU 0 has a total capacity of 22.30 GiB of which 1.69 GiB is free. Process 43404 has 20.61 GiB memory in use. Of the allocated memory 17.37 GiB is allocated by PyTorch, and 2.96 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)</div>
4096
  </div>
4097
  </div>
4098
+
4099
+ <h1>Reference kernel</h1>
4100
  </div>
4101
 
4102
  </body>