some broken
Browse files- app/dist/_astro/{index.CnFuS3U1.css โ index.Cb8952bT.css} +0 -0
- app/dist/_astro/{index.CnFuS3U1.css.gz โ index.Cb8952bT.css.gz} +2 -2
- app/dist/index.html +11 -11
- app/dist/index.html.gz +2 -2
- app/src/content/embeds/old_banner.html +143 -0
- app/src/content/embeds/transformers/better-bloat.html +0 -0
- app/src/styles/_reset.css +2 -2
- app/src/styles/_variables.css +0 -1
app/dist/_astro/{index.CnFuS3U1.css โ index.Cb8952bT.css}
RENAMED
|
The diff for this file is too large to render.
See raw diff
|
|
|
app/dist/_astro/{index.CnFuS3U1.css.gz โ index.Cb8952bT.css.gz}
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:58d83d5019ff4253dac40a30b53e9e519bff80f16a23424df7ff3cf3152d0d5d
|
| 3 |
+
size 18340
|
app/dist/index.html
CHANGED
|
@@ -12,8 +12,8 @@
|
|
| 12 |
document.documentElement.setAttribute("data-theme", theme);
|
| 13 |
} catch {}
|
| 14 |
})();
|
| 15 |
-
</script><script type="module" src="/scripts/color-palettes.js"></script><!-- TO MANAGE PROPERLY --><script src="https://cdn.plot.ly/plotly-3.0.0.min.js" charset="utf-8"></script><link rel="stylesheet" href="/_astro/index.
|
| 16 |
-
<script type="module" src="/_astro/page.CH0W_C1Z.js"></script></head> <body> <button id="theme-toggle" aria-label="Toggle color theme" data-astro-cid-x3pjskd3> <svg class="icon light" width="20" height="20" viewBox="0 0 24 24" aria-hidden="true" focusable="false" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" data-astro-cid-x3pjskd3> <circle cx="12" cy="12" r="5" data-astro-cid-x3pjskd3></circle> <line x1="12" y1="1" x2="12" y2="4" data-astro-cid-x3pjskd3></line> <line x1="12" y1="20" x2="12" y2="23" data-astro-cid-x3pjskd3></line> <line x1="1" y1="12" x2="4" y2="12" data-astro-cid-x3pjskd3></line> <line x1="20" y1="12" x2="23" y2="12" data-astro-cid-x3pjskd3></line> <line x1="4.22" y1="4.22" x2="6.34" y2="6.34" data-astro-cid-x3pjskd3></line> <line x1="17.66" y1="17.66" x2="19.78" y2="19.78" data-astro-cid-x3pjskd3></line> <line x1="4.22" y1="19.78" x2="6.34" y2="17.66" data-astro-cid-x3pjskd3></line> <line x1="17.66" y1="6.34" x2="19.78" y2="4.22" data-astro-cid-x3pjskd3></line> </svg> <svg class="icon dark" width="20" height="20" viewBox="0 0 24 24" aria-hidden="true" focusable="false" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" data-astro-cid-x3pjskd3> <path d="M21 12.79A9 9 0 1 1 11.21 3 7 7 0 0 0 21 12.79z" data-astro-cid-x3pjskd3></path> </svg> </button> <section class="hero" data-astro-cid-bbe6dxrz> <h1 class="hero-title" data-astro-cid-bbe6dxrz>Maintain the unmaintainable:<br/>1M python loc, 400+ models</h1> <div class="hero-banner" data-astro-cid-bbe6dxrz> <figure class="html-embed"><div class="html-embed__card is-frameless"><div id="frag-
|
| 17 |
@import url('https://fonts.googleapis.com/css2?family=Inter:wght@500;600&display=swap');
|
| 18 |
|
| 19 |
.banner-container {
|
|
@@ -438,7 +438,7 @@ We continue to support all new models and expect to do so for the foreseeable fu
|
|
| 438 |
<p>It works as follows. In order to contribute a model, say for instance define a <code>modular_</code> file that can inherit from <em>any function across all other modeling, configuration and processor files</em>.
|
| 439 |
This modular file can use inheritance across models: and then, it will be unravelled into a fully functional modeling file.</p>
|
| 440 |
<summary id="generated-modeling">Auto-generated modeling code</summary>
|
| 441 |
-
<figure class="html-embed"><div class="html-embed__card"><div id="frag-
|
| 442 |
<div class="code-column" style="border: 1px solid #e2e8f0; border-radius: 8px; overflow: hidden;">
|
| 443 |
<div class="code-header" style="background: #f8f9fa; padding: 0.75rem 1rem; font-weight: 600; color: #495057; border-bottom: 1px solid #e2e8f0;">
|
| 444 |
modular_glm.py
|
|
@@ -599,7 +599,7 @@ However, if a model has a modular_<em>.py and a corresponding automatically gene
|
|
| 599 |
<p>That gives an โeffective LOCโ curve: the ๐บ๐ฎ๐ถ๐ป๐๐ฒ๐ป๐ฎ๐ป๐ฐ๐ฒ ๐๐๐ฟ๐ณ๐ฎ๐ฐ๐ฒ.</p>
|
| 600 |
<p>Measured on git history, raw <code>modeling_*.py</code> grew at ~362 LOC/day before modular; counting only modular shards yields ~25 LOC/day after โ about <strong>15ร lower</strong>. The curve represents the <strong>maintenance surface</strong> today: what maintainers actually read and review.</p>
|
| 601 |
<p>Less code to hand-maintain means fewer places to break. LOC is not complexity, but they correlate in review effort and change risk.</p>
|
| 602 |
-
<figure class="html-embed"><div class="html-embed__card"><div id="frag-
|
| 603 |
src="https://molbap-loc-1.hf.space"
|
| 604 |
style="width:100%; height:900px; border:0"
|
| 605 |
allow="clipboard-read; clipboard-write; fullscreen"
|
|
@@ -636,7 +636,7 @@ We choose to place the level of abstraction higher than the device placement: a
|
|
| 636 |
<p>Hence, we want to touch <a href="#minimal-user-api">minimally</a> to the modeling code, and only modify it when <em>architectural changes</em> are involved. For instance, for tensor parallelism, we instead now specify a simple <code>tp_plan</code>.</p>
|
| 637 |
<p>The alternative would be to modify parent classes specific to their</p>
|
| 638 |
<p>It is written once in the config and passed to <code>.from_pretrained()</code>. The plan maps module name patterns to partitioning strategies. Strategies are resolved by the internal <code>ParallelInterface</code>, which wires to sharding implementations <code>ColwiseParallel</code>, <code>RowwiseParallel</code>, packed variants, and so on.</p>
|
| 639 |
-
<figure class="html-embed"><div class="html-embed__card"><div id="frag-
|
| 640 |
base_model_tp_plan = {
|
| 641 |
"layers.*.self_attn.q_proj": "colwise",
|
| 642 |
"layers.*.self_attn.k_proj": "colwise",
|
|
@@ -706,7 +706,7 @@ So I wanted to take a look at the current <strong>state of modularity</strong> a
|
|
| 706 |
</ol>
|
| 707 |
<p>So what do we see? Llama is a basis for many models, and it shows.
|
| 708 |
Radically different architectures such as mamba have spawned their own dependency subgraph.</p>
|
| 709 |
-
<figure class="html-embed"><div class="html-embed__card"><div id="frag-
|
| 710 |
src="https://molbap-dependencies-1.hf.space"
|
| 711 |
style="width:100%; height:680px; border:0"
|
| 712 |
allow="clipboard-read; clipboard-write; fullscreen"
|
|
@@ -721,7 +721,7 @@ As you can see, there is a small DETR island, a little llava pocket, and so on,
|
|
| 721 |
<p>So I looked into Jaccard similarity, which we use to measure set differences. I know that code is more than a set of characters stringed together. We also tried code-embedding models that ranked candidates better in practice, but for this post we stick to the deterministic Jaccard index.</p>
|
| 722 |
<p>It is interesting, for that, to look at <em>when</em> we deployed this modular logic and what was its rippling effect on the library. You can check the <a href="https://huggingface.co/spaces/Molbap/transformers-modular-refactor">larger space</a> to play around, but the gist is: adding modular allowed to connect more and more models to solid reference points. We have a lot of gaps to fill in still.</p>
|
| 723 |
<p>Zoom out below - itโs full of models. You can click on a node to see its connections better, or use the text box to search for a model.</p>
|
| 724 |
-
<figure class="html-embed"><div class="html-embed__card"><div id="frag-
|
| 725 |
src="https://molbap-timeline-1.hf.space"
|
| 726 |
style="width:100%; height:680px; border:0"
|
| 727 |
allow="clipboard-read; clipboard-write; fullscreen"
|
|
@@ -739,7 +739,7 @@ As you can see, there is a small DETR island, a little llava pocket, and so on,
|
|
| 739 |
<p>What is the current state of these โabstractionsโ across the codebase?
|
| 740 |
You will see all the imports around a modeling file, here <a href="https://huggingface.co/google/gemma-3n-E4B-it">Gemma3n</a>.</p>
|
| 741 |
<p>Zoom and drag to explore.</p>
|
| 742 |
-
<figure class="html-embed"><div class="html-embed__card"><div id="frag-
|
| 743 |
<head>
|
| 744 |
<meta charset="utf-8">
|
| 745 |
|
|
@@ -1234,7 +1234,7 @@ That means every decision we make to abstract something else has to be extremely
|
|
| 1234 |
<div class="crumbs"><p>The shape of a contribution: add a model (or variant) with a small modular shard; the community and serving stacks pick it up immediately. Popularity trends (encoders/embeddings) guide where we invest. <strong>Next:</strong> power tools enabled by a consistent API.</p></div>
|
| 1235 |
<h3 id="-models-popularity"><a href="#-models-popularity"><a id="encoders-ftw"></a> Models popularity</a></h3>
|
| 1236 |
<p>Talking about dependencies, we can take a look at the number of downloads for transformer models popularity. One thing we see is the prominence of encoders: This is because the usage of encoders lies in embeddings, just check out <a href="https://huggingface.co/blog/embeddinggemma">EmbeddingGemma</a> for a modern recap. Hence, it is vital to keep the encoders part viable, usable, fine-tune-able.</p>
|
| 1237 |
-
<div><figure class="html-embed"><div class="html-embed__card"><div id="frag-
|
| 1238 |
<head><meta charset="utf-8" /></head>
|
| 1239 |
<body>
|
| 1240 |
<div> <script type="text/javascript">window.PlotlyConfig = {MathJaxConfig: 'local'};</script>
|
|
@@ -5130,7 +5130,7 @@ return Plotly;
|
|
| 5130 |
<h3 id="attention-visualisation"><a href="#attention-visualisation">Attention visualisation</a></h3>
|
| 5131 |
<p>All models have the same API internally for attention computation, thanks to <a href="#external-attention-classes">the externalisation of attention classes</a>. it allows us to build cool tools to visualize the inner workings of the attention mechanism.</p>
|
| 5132 |
<p>One particular piece of machinery is the <code>attention mask</code>. Here you see the famous bidirectional attention pattern for the whole prefix (text + image) in PaliGemma and all Gemma2+ models, contrasting with the usual โcausal-onlyโ models.</p>
|
| 5133 |
-
<figure class="html-embed"><div class="html-embed__card"><div id="frag-
|
| 5134 |
<div style="max-width: 940px; margin: 16px 0; border:1px solid #2a2f3a; border-radius:8px; background:#0b0f19; font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, 'Liberation Mono', 'Courier New', monospace; color:#e5e7eb;">
|
| 5135 |
<div style="display:flex; align-items:center; gap:8px; padding:8px 10px; border-bottom:1px solid #1f2430; background:#111827; border-top-left-radius:8px; border-top-right-radius:8px;">
|
| 5136 |
<span style="width:10px; height:10px; background:#ef4444; border-radius:50%; display:inline-block;"></span>
|
|
@@ -5183,7 +5183,7 @@ return Plotly;
|
|
| 5183 |
<div class="crumbs"><p>Forward interception and nested JSON logging align ports to reference implementations, reinforcing โSource of Truth.โ <strong>Next:</strong> CUDA warmup reduces load-time stalls without touching modeling semantics.</p></div>
|
| 5184 |
<h3 id="cooking-faster-cuda-warmups"><a href="#cooking-faster-cuda-warmups">Cooking faster CUDA warmups</a></h3>
|
| 5185 |
<p>Having a clean <em>external</em> API allows us to work on the <a href="#code-is-product">true inner workings of transformers</a>. One of the few recent additions was the <em>CUDA warmup</em> via <code>caching_allocator_warmup</code> which improved massively the loading footprint by pre-allocating GPU memory to avoid malloc bottlenecks during model loading, achieving a 7x factor for an 8B model, 6x for a 32B, you can check out <a href="https://github.com/huggingface/transformers/pull/36380">the source</a>!</p>
|
| 5186 |
-
<figure class="html-embed"><div class="html-embed__card"><div id="frag-
|
| 5187 |
/* 1) Scope tokens to the widget */
|
| 5188 |
.warmup-demo{
|
| 5189 |
--page-bg:#ffffff;
|
|
|
|
| 12 |
document.documentElement.setAttribute("data-theme", theme);
|
| 13 |
} catch {}
|
| 14 |
})();
|
| 15 |
+
</script><script type="module" src="/scripts/color-palettes.js"></script><!-- TO MANAGE PROPERLY --><script src="https://cdn.plot.ly/plotly-3.0.0.min.js" charset="utf-8"></script><link rel="stylesheet" href="/_astro/index.Cb8952bT.css"><script type="module" src="/_astro/hoisted.DK-CdsVg.js"></script>
|
| 16 |
+
<script type="module" src="/_astro/page.CH0W_C1Z.js"></script></head> <body> <button id="theme-toggle" aria-label="Toggle color theme" data-astro-cid-x3pjskd3> <svg class="icon light" width="20" height="20" viewBox="0 0 24 24" aria-hidden="true" focusable="false" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" data-astro-cid-x3pjskd3> <circle cx="12" cy="12" r="5" data-astro-cid-x3pjskd3></circle> <line x1="12" y1="1" x2="12" y2="4" data-astro-cid-x3pjskd3></line> <line x1="12" y1="20" x2="12" y2="23" data-astro-cid-x3pjskd3></line> <line x1="1" y1="12" x2="4" y2="12" data-astro-cid-x3pjskd3></line> <line x1="20" y1="12" x2="23" y2="12" data-astro-cid-x3pjskd3></line> <line x1="4.22" y1="4.22" x2="6.34" y2="6.34" data-astro-cid-x3pjskd3></line> <line x1="17.66" y1="17.66" x2="19.78" y2="19.78" data-astro-cid-x3pjskd3></line> <line x1="4.22" y1="19.78" x2="6.34" y2="17.66" data-astro-cid-x3pjskd3></line> <line x1="17.66" y1="6.34" x2="19.78" y2="4.22" data-astro-cid-x3pjskd3></line> </svg> <svg class="icon dark" width="20" height="20" viewBox="0 0 24 24" aria-hidden="true" focusable="false" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" data-astro-cid-x3pjskd3> <path d="M21 12.79A9 9 0 1 1 11.21 3 7 7 0 0 0 21 12.79z" data-astro-cid-x3pjskd3></path> </svg> </button> <section class="hero" data-astro-cid-bbe6dxrz> <h1 class="hero-title" data-astro-cid-bbe6dxrz>Maintain the unmaintainable:<br/>1M python loc, 400+ models</h1> <div class="hero-banner" data-astro-cid-bbe6dxrz> <figure class="html-embed"><div class="html-embed__card is-frameless"><div id="frag-su0f3gugr3"><style>
|
| 17 |
@import url('https://fonts.googleapis.com/css2?family=Inter:wght@500;600&display=swap');
|
| 18 |
|
| 19 |
.banner-container {
|
|
|
|
| 438 |
<p>It works as follows. In order to contribute a model, say for instance define a <code>modular_</code> file that can inherit from <em>any function across all other modeling, configuration and processor files</em>.
|
| 439 |
This modular file can use inheritance across models: and then, it will be unravelled into a fully functional modeling file.</p>
|
| 440 |
<summary id="generated-modeling">Auto-generated modeling code</summary>
|
| 441 |
+
<figure class="html-embed"><div class="html-embed__card"><div id="frag-fs7nee511q"><div class="code-compare" style="display: grid; grid-template-columns: 1fr 1fr; gap: 1rem; margin: 1.5rem 0;">
|
| 442 |
<div class="code-column" style="border: 1px solid #e2e8f0; border-radius: 8px; overflow: hidden;">
|
| 443 |
<div class="code-header" style="background: #f8f9fa; padding: 0.75rem 1rem; font-weight: 600; color: #495057; border-bottom: 1px solid #e2e8f0;">
|
| 444 |
modular_glm.py
|
|
|
|
| 599 |
<p>That gives an โeffective LOCโ curve: the ๐บ๐ฎ๐ถ๐ป๐๐ฒ๐ป๐ฎ๐ป๐ฐ๐ฒ ๐๐๐ฟ๐ณ๐ฎ๐ฐ๐ฒ.</p>
|
| 600 |
<p>Measured on git history, raw <code>modeling_*.py</code> grew at ~362 LOC/day before modular; counting only modular shards yields ~25 LOC/day after โ about <strong>15ร lower</strong>. The curve represents the <strong>maintenance surface</strong> today: what maintainers actually read and review.</p>
|
| 601 |
<p>Less code to hand-maintain means fewer places to break. LOC is not complexity, but they correlate in review effort and change risk.</p>
|
| 602 |
+
<figure class="html-embed"><div class="html-embed__card"><div id="frag-09l75wbrf8o5"><iframe
|
| 603 |
src="https://molbap-loc-1.hf.space"
|
| 604 |
style="width:100%; height:900px; border:0"
|
| 605 |
allow="clipboard-read; clipboard-write; fullscreen"
|
|
|
|
| 636 |
<p>Hence, we want to touch <a href="#minimal-user-api">minimally</a> to the modeling code, and only modify it when <em>architectural changes</em> are involved. For instance, for tensor parallelism, we instead now specify a simple <code>tp_plan</code>.</p>
|
| 637 |
<p>The alternative would be to modify parent classes specific to their</p>
|
| 638 |
<p>It is written once in the config and passed to <code>.from_pretrained()</code>. The plan maps module name patterns to partitioning strategies. Strategies are resolved by the internal <code>ParallelInterface</code>, which wires to sharding implementations <code>ColwiseParallel</code>, <code>RowwiseParallel</code>, packed variants, and so on.</p>
|
| 639 |
+
<figure class="html-embed"><div class="html-embed__card"><div id="frag-lywjo7xifz"><pre><code class="language-python"># In the model's config (example: ERNIE 4.5-style decoder blocks)
|
| 640 |
base_model_tp_plan = {
|
| 641 |
"layers.*.self_attn.q_proj": "colwise",
|
| 642 |
"layers.*.self_attn.k_proj": "colwise",
|
|
|
|
| 706 |
</ol>
|
| 707 |
<p>So what do we see? Llama is a basis for many models, and it shows.
|
| 708 |
Radically different architectures such as mamba have spawned their own dependency subgraph.</p>
|
| 709 |
+
<figure class="html-embed"><div class="html-embed__card"><div id="frag-ja1w4rtppo9"><iframe
|
| 710 |
src="https://molbap-dependencies-1.hf.space"
|
| 711 |
style="width:100%; height:680px; border:0"
|
| 712 |
allow="clipboard-read; clipboard-write; fullscreen"
|
|
|
|
| 721 |
<p>So I looked into Jaccard similarity, which we use to measure set differences. I know that code is more than a set of characters stringed together. We also tried code-embedding models that ranked candidates better in practice, but for this post we stick to the deterministic Jaccard index.</p>
|
| 722 |
<p>It is interesting, for that, to look at <em>when</em> we deployed this modular logic and what was its rippling effect on the library. You can check the <a href="https://huggingface.co/spaces/Molbap/transformers-modular-refactor">larger space</a> to play around, but the gist is: adding modular allowed to connect more and more models to solid reference points. We have a lot of gaps to fill in still.</p>
|
| 723 |
<p>Zoom out below - itโs full of models. You can click on a node to see its connections better, or use the text box to search for a model.</p>
|
| 724 |
+
<figure class="html-embed"><div class="html-embed__card"><div id="frag-897vw2ctxj"> <iframe
|
| 725 |
src="https://molbap-timeline-1.hf.space"
|
| 726 |
style="width:100%; height:680px; border:0"
|
| 727 |
allow="clipboard-read; clipboard-write; fullscreen"
|
|
|
|
| 739 |
<p>What is the current state of these โabstractionsโ across the codebase?
|
| 740 |
You will see all the imports around a modeling file, here <a href="https://huggingface.co/google/gemma-3n-E4B-it">Gemma3n</a>.</p>
|
| 741 |
<p>Zoom and drag to explore.</p>
|
| 742 |
+
<figure class="html-embed"><div class="html-embed__card"><div id="frag-zkoqofuoqk"><html>
|
| 743 |
<head>
|
| 744 |
<meta charset="utf-8">
|
| 745 |
|
|
|
|
| 1234 |
<div class="crumbs"><p>The shape of a contribution: add a model (or variant) with a small modular shard; the community and serving stacks pick it up immediately. Popularity trends (encoders/embeddings) guide where we invest. <strong>Next:</strong> power tools enabled by a consistent API.</p></div>
|
| 1235 |
<h3 id="-models-popularity"><a href="#-models-popularity"><a id="encoders-ftw"></a> Models popularity</a></h3>
|
| 1236 |
<p>Talking about dependencies, we can take a look at the number of downloads for transformer models popularity. One thing we see is the prominence of encoders: This is because the usage of encoders lies in embeddings, just check out <a href="https://huggingface.co/blog/embeddinggemma">EmbeddingGemma</a> for a modern recap. Hence, it is vital to keep the encoders part viable, usable, fine-tune-able.</p>
|
| 1237 |
+
<div><figure class="html-embed"><div class="html-embed__card"><div id="frag-in3xmqq4je"><html>
|
| 1238 |
<head><meta charset="utf-8" /></head>
|
| 1239 |
<body>
|
| 1240 |
<div> <script type="text/javascript">window.PlotlyConfig = {MathJaxConfig: 'local'};</script>
|
|
|
|
| 5130 |
<h3 id="attention-visualisation"><a href="#attention-visualisation">Attention visualisation</a></h3>
|
| 5131 |
<p>All models have the same API internally for attention computation, thanks to <a href="#external-attention-classes">the externalisation of attention classes</a>. it allows us to build cool tools to visualize the inner workings of the attention mechanism.</p>
|
| 5132 |
<p>One particular piece of machinery is the <code>attention mask</code>. Here you see the famous bidirectional attention pattern for the whole prefix (text + image) in PaliGemma and all Gemma2+ models, contrasting with the usual โcausal-onlyโ models.</p>
|
| 5133 |
+
<figure class="html-embed"><div class="html-embed__card"><div id="frag-yso5z65rmt"><!-- Minimal HTML fragment: terminal-style ASCII attention masks -->
|
| 5134 |
<div style="max-width: 940px; margin: 16px 0; border:1px solid #2a2f3a; border-radius:8px; background:#0b0f19; font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, 'Liberation Mono', 'Courier New', monospace; color:#e5e7eb;">
|
| 5135 |
<div style="display:flex; align-items:center; gap:8px; padding:8px 10px; border-bottom:1px solid #1f2430; background:#111827; border-top-left-radius:8px; border-top-right-radius:8px;">
|
| 5136 |
<span style="width:10px; height:10px; background:#ef4444; border-radius:50%; display:inline-block;"></span>
|
|
|
|
| 5183 |
<div class="crumbs"><p>Forward interception and nested JSON logging align ports to reference implementations, reinforcing โSource of Truth.โ <strong>Next:</strong> CUDA warmup reduces load-time stalls without touching modeling semantics.</p></div>
|
| 5184 |
<h3 id="cooking-faster-cuda-warmups"><a href="#cooking-faster-cuda-warmups">Cooking faster CUDA warmups</a></h3>
|
| 5185 |
<p>Having a clean <em>external</em> API allows us to work on the <a href="#code-is-product">true inner workings of transformers</a>. One of the few recent additions was the <em>CUDA warmup</em> via <code>caching_allocator_warmup</code> which improved massively the loading footprint by pre-allocating GPU memory to avoid malloc bottlenecks during model loading, achieving a 7x factor for an 8B model, 6x for a 32B, you can check out <a href="https://github.com/huggingface/transformers/pull/36380">the source</a>!</p>
|
| 5186 |
+
<figure class="html-embed"><div class="html-embed__card"><div id="frag-vko4f1u5op"><style>
|
| 5187 |
/* 1) Scope tokens to the widget */
|
| 5188 |
.warmup-demo{
|
| 5189 |
--page-bg:#ffffff;
|
app/dist/index.html.gz
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:af7c7f5f1e9896dde3c7f76843a8691cacca9699dcf0a856abcee7505744e43f
|
| 3 |
+
size 1654674
|
app/src/content/embeds/old_banner.html
ADDED
|
@@ -0,0 +1,143 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<div class="transformers-banner" style="width:100%;margin:10px 0;aspect-ratio:3/1;min-height:260px;"></div>
|
| 2 |
+
<script>
|
| 3 |
+
(() => {
|
| 4 |
+
const ensureD3 = (cb) => {
|
| 5 |
+
if (window.d3 && typeof window.d3.select === 'function') return cb();
|
| 6 |
+
let s = document.getElementById('d3-cdn-script');
|
| 7 |
+
if (!s) {
|
| 8 |
+
s = document.createElement('script');
|
| 9 |
+
s.id = 'd3-cdn-script';
|
| 10 |
+
s.src = 'https://cdn.jsdelivr.net/npm/d3@7/dist/d3.min.js';
|
| 11 |
+
document.head.appendChild(s);
|
| 12 |
+
}
|
| 13 |
+
const onReady = () => { if (window.d3 && typeof window.d3.select === 'function') cb(); };
|
| 14 |
+
s.addEventListener('load', onReady, { once: true });
|
| 15 |
+
if (window.d3) onReady();
|
| 16 |
+
};
|
| 17 |
+
|
| 18 |
+
const bootstrap = () => {
|
| 19 |
+
const mount = document.currentScript ? document.currentScript.previousElementSibling : null;
|
| 20 |
+
const container = (mount && mount.querySelector && mount.querySelector('.transformers-banner')) || document.querySelector('.transformers-banner');
|
| 21 |
+
if (!container) return;
|
| 22 |
+
if (container.dataset) {
|
| 23 |
+
if (container.dataset.mounted === 'true') return;
|
| 24 |
+
container.dataset.mounted = 'true';
|
| 25 |
+
}
|
| 26 |
+
|
| 27 |
+
// Simplified transformers network - showing key models
|
| 28 |
+
const nodes = [
|
| 29 |
+
{ id: "llama", is_base: true, size: 3.0, x: 0.5, y: 0.5 },
|
| 30 |
+
{ id: "mistral", is_base: false, size: 1.3, x: 0.3, y: 0.4 },
|
| 31 |
+
{ id: "gemma", is_base: false, size: 1.3, x: 0.7, y: 0.4 },
|
| 32 |
+
{ id: "qwen2", is_base: false, size: 1.2, x: 0.4, y: 0.6 },
|
| 33 |
+
{ id: "phi3", is_base: false, size: 1.2, x: 0.6, y: 0.6 },
|
| 34 |
+
{ id: "deepseek_v3", is_base: false, size: 1.15, x: 0.35, y: 0.3 },
|
| 35 |
+
{ id: "cohere", is_base: false, size: 1.2, x: 0.65, y: 0.3 },
|
| 36 |
+
{ id: "mixtral", is_base: false, size: 1.2, x: 0.25, y: 0.5 },
|
| 37 |
+
{ id: "glm4", is_base: false, size: 1.15, x: 0.75, y: 0.5 },
|
| 38 |
+
{ id: "llava", is_base: true, size: 1.3, x: 0.5, y: 0.7 }
|
| 39 |
+
];
|
| 40 |
+
|
| 41 |
+
const links = [
|
| 42 |
+
{ source: "llama", target: "mistral" },
|
| 43 |
+
{ source: "llama", target: "gemma" },
|
| 44 |
+
{ source: "llama", target: "qwen2" },
|
| 45 |
+
{ source: "llama", target: "phi3" },
|
| 46 |
+
{ source: "llama", target: "deepseek_v3" },
|
| 47 |
+
{ source: "llama", target: "cohere" },
|
| 48 |
+
{ source: "mistral", target: "mixtral" },
|
| 49 |
+
{ source: "llama", target: "llava" }
|
| 50 |
+
];
|
| 51 |
+
|
| 52 |
+
const svg = d3.select(container).append('svg')
|
| 53 |
+
.attr('width', '100%')
|
| 54 |
+
.attr('height', '100%')
|
| 55 |
+
.style('display', 'block');
|
| 56 |
+
|
| 57 |
+
const width = container.clientWidth;
|
| 58 |
+
const height = container.clientHeight;
|
| 59 |
+
|
| 60 |
+
const g = svg.append('g');
|
| 61 |
+
|
| 62 |
+
// Links
|
| 63 |
+
const link = g.append('g')
|
| 64 |
+
.selectAll('line')
|
| 65 |
+
.data(links)
|
| 66 |
+
.join('line')
|
| 67 |
+
.attr('stroke', '#999')
|
| 68 |
+
.attr('stroke-opacity', 0.4)
|
| 69 |
+
.attr('stroke-width', 1.5);
|
| 70 |
+
|
| 71 |
+
// Nodes
|
| 72 |
+
const node = g.append('g')
|
| 73 |
+
.selectAll('g')
|
| 74 |
+
.data(nodes)
|
| 75 |
+
.join('g')
|
| 76 |
+
.attr('class', d => d.is_base ? 'node base' : 'node derived');
|
| 77 |
+
|
| 78 |
+
// Base models: styled circles with emoji
|
| 79 |
+
node.filter(d => d.is_base)
|
| 80 |
+
.append('circle')
|
| 81 |
+
.attr('r', d => 30 * d.size)
|
| 82 |
+
.attr('fill', '#FFD21E')
|
| 83 |
+
.attr('stroke', '#FF9D00')
|
| 84 |
+
.attr('stroke-width', 2);
|
| 85 |
+
|
| 86 |
+
node.filter(d => d.is_base)
|
| 87 |
+
.append('text')
|
| 88 |
+
.attr('text-anchor', 'middle')
|
| 89 |
+
.attr('dy', '0.35em')
|
| 90 |
+
.style('font-size', '20px')
|
| 91 |
+
.text('๐ค');
|
| 92 |
+
|
| 93 |
+
// Derived models: simple circles
|
| 94 |
+
node.filter(d => !d.is_base)
|
| 95 |
+
.append('circle')
|
| 96 |
+
.attr('r', d => 15 * d.size)
|
| 97 |
+
.attr('fill', '#667eea');
|
| 98 |
+
|
| 99 |
+
// Labels
|
| 100 |
+
node.append('text')
|
| 101 |
+
.attr('text-anchor', 'middle')
|
| 102 |
+
.attr('dy', d => d.is_base ? 45 : 25)
|
| 103 |
+
.style('font-size', '11px')
|
| 104 |
+
.style('font-weight', '600')
|
| 105 |
+
.style('fill', 'var(--text-color, #333)')
|
| 106 |
+
.text(d => d.id);
|
| 107 |
+
|
| 108 |
+
// Position nodes and links
|
| 109 |
+
const updatePositions = () => {
|
| 110 |
+
link
|
| 111 |
+
.attr('x1', d => {
|
| 112 |
+
const source = nodes.find(n => n.id === d.source);
|
| 113 |
+
return source ? source.x * width : 0;
|
| 114 |
+
})
|
| 115 |
+
.attr('y1', d => {
|
| 116 |
+
const source = nodes.find(n => n.id === d.source);
|
| 117 |
+
return source ? source.y * height : 0;
|
| 118 |
+
})
|
| 119 |
+
.attr('x2', d => {
|
| 120 |
+
const target = nodes.find(n => n.id === d.target);
|
| 121 |
+
return target ? target.x * width : 0;
|
| 122 |
+
})
|
| 123 |
+
.attr('y2', d => {
|
| 124 |
+
const target = nodes.find(n => n.id === d.target);
|
| 125 |
+
return target ? target.y * height : 0;
|
| 126 |
+
});
|
| 127 |
+
|
| 128 |
+
node.attr('transform', d => `translate(${d.x * width}, ${d.y * height})`);
|
| 129 |
+
};
|
| 130 |
+
|
| 131 |
+
updatePositions();
|
| 132 |
+
|
| 133 |
+
// Responsive resize
|
| 134 |
+
let resizeTimer;
|
| 135 |
+
window.addEventListener('resize', () => {
|
| 136 |
+
clearTimeout(resizeTimer);
|
| 137 |
+
resizeTimer = setTimeout(updatePositions, 100);
|
| 138 |
+
});
|
| 139 |
+
};
|
| 140 |
+
|
| 141 |
+
ensureD3(bootstrap);
|
| 142 |
+
})();
|
| 143 |
+
</script>
|
app/src/content/embeds/transformers/better-bloat.html
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
app/src/styles/_reset.css
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
-
html { box-sizing: border-box; }
|
| 2 |
*, *::before, *::after { box-sizing: inherit; }
|
| 3 |
-
body { margin: 0; font-family: var(--default-font-family); color: var(--text-color); }
|
| 4 |
audio { display: block; width: 100%; }
|
| 5 |
|
| 6 |
img,
|
|
|
|
| 1 |
+
html { box-sizing: border-box; background: var(--page-bg); color: var(--text-color); }
|
| 2 |
*, *::before, *::after { box-sizing: inherit; }
|
| 3 |
+
body { margin: 0; font-family: var(--default-font-family); background: var(--page-bg); color: var(--text-color); }
|
| 4 |
audio { display: block; width: 100%; }
|
| 5 |
|
| 6 |
img,
|
app/src/styles/_variables.css
CHANGED
|
@@ -114,5 +114,4 @@
|
|
| 114 |
--on-primary: #0f1115;
|
| 115 |
|
| 116 |
color-scheme: dark;
|
| 117 |
-
background: var(--page-bg);
|
| 118 |
}
|
|
|
|
| 114 |
--on-primary: #0f1115;
|
| 115 |
|
| 116 |
color-scheme: dark;
|
|
|
|
| 117 |
}
|