more LLM
Browse files- prototype_web_chat.html +93 -31
prototype_web_chat.html
CHANGED
|
@@ -13,7 +13,7 @@
|
|
| 13 |
header .pill { font-size:12px; color:var(--bg); background:var(--accent); padding:.2rem .55rem; border-radius:999px; font-weight:700; letter-spacing:.02em; }
|
| 14 |
main { display:grid; grid-template-rows:auto 1fr auto; height:calc(100dvh - 58px); }
|
| 15 |
.bar { display:flex; flex-wrap:wrap; gap:8px; padding:10px 12px; background:#0f1216; border-bottom:1px solid #21262c; align-items:center; }
|
| 16 |
-
select, input[type="number"] { background:var(--card); color:var(--text); border:1px solid #29313a; border-radius:10px; padding:8px 10px; }
|
| 17 |
button { background:#1c2128; color:var(--text); border:1px solid #2a323c; border-radius:12px; padding:10px 12px; font-weight:600; cursor:pointer; }
|
| 18 |
button.primary { background:var(--accent); color:#08261b; border:none; }
|
| 19 |
button.ghost { background:transparent; border-color:#2a323c; }
|
|
@@ -35,39 +35,65 @@
|
|
| 35 |
.row { display:flex; gap:8px; align-items:center; flex-wrap:wrap; }
|
| 36 |
.spacer { flex:1; }
|
| 37 |
a { color:#93c5fd; }
|
|
|
|
|
|
|
| 38 |
</style>
|
| 39 |
</head>
|
| 40 |
<body>
|
| 41 |
<header>
|
| 42 |
<h1>Browser LLM</h1>
|
| 43 |
-
<span class="pill">WASM • CPU
|
| 44 |
<span id="isoNote" class="tiny"></span>
|
| 45 |
</header>
|
| 46 |
|
| 47 |
<main>
|
| 48 |
<div class="bar">
|
| 49 |
<label>Model:</label>
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
</option>
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
</option>
|
| 58 |
-
<option value='{"id":"QuantFactory/
|
| 59 |
-
|
| 60 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 61 |
</select>
|
| 62 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 63 |
<div class="row">
|
| 64 |
<label>Max new tokens</label>
|
| 65 |
<input id="nPredict" type="number" min="1" max="512" step="1" value="128" />
|
| 66 |
</div>
|
| 67 |
<div class="row">
|
| 68 |
<label>Temp</label><input id="temp" type="number" min="0" max="2" step="0.1" value="0.7" style="width:80px" />
|
| 69 |
-
<label>Top
|
| 70 |
-
<label>Top
|
| 71 |
</div>
|
| 72 |
|
| 73 |
<div class="spacer"></div>
|
|
@@ -113,6 +139,9 @@
|
|
| 113 |
const $send = document.getElementById('sendBtn');
|
| 114 |
const $stop = document.getElementById('stopBtn');
|
| 115 |
const $iso = document.getElementById('isoNote');
|
|
|
|
|
|
|
|
|
|
| 116 |
|
| 117 |
// ——— State ———
|
| 118 |
const decoder = new TextDecoder();
|
|
@@ -122,13 +151,13 @@
|
|
| 122 |
let eotToken = -1;
|
| 123 |
let sysPrompt = "You are a helpful, concise assistant. Keep answers short and clear.";
|
| 124 |
|
| 125 |
-
// Keep RAM low for mobile: small context +
|
| 126 |
const LOAD_CONFIG = {
|
| 127 |
n_ctx: 768,
|
| 128 |
n_batch: 48,
|
| 129 |
-
cache_type_k: "q4_0",
|
| 130 |
-
cache_type_v: "f16", //
|
| 131 |
-
flash_attn: false,
|
| 132 |
progressCallback: ({ loaded, total }) => {
|
| 133 |
const pct = (total && total > 0) ? Math.round(loaded / total * 100) : 0;
|
| 134 |
$prog.style.width = pct + '%';
|
|
@@ -156,9 +185,9 @@
|
|
| 156 |
|
| 157 |
function noteIsolation() {
|
| 158 |
if (!crossOriginIsolated) {
|
| 159 |
-
$iso.innerHTML = 'Single
|
| 160 |
} else {
|
| 161 |
-
$iso.textContent = 'Cross
|
| 162 |
}
|
| 163 |
}
|
| 164 |
noteIsolation();
|
|
@@ -172,18 +201,34 @@
|
|
| 172 |
messages.length = 0; messages.push(...kept);
|
| 173 |
}
|
| 174 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 175 |
async function ensureLoaded() {
|
| 176 |
if (loaded) return;
|
| 177 |
$prog.style.width = '0%';
|
| 178 |
-
const choice =
|
| 179 |
-
ui.setStats(
|
| 180 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 181 |
loaded = true;
|
| 182 |
eotToken = wllama.getEOT();
|
| 183 |
const meta = await wllama.getModelMetadata();
|
| 184 |
const ctx = wllama.getLoadedContextInfo();
|
| 185 |
const thr = wllama.getNumThreads?.() ?? 1;
|
| 186 |
-
ui.setStats(`Loaded ${choice.file} • ${meta.n_params?.toLocaleString?.() || '
|
| 187 |
$load.disabled = true; $unload.disabled = false;
|
| 188 |
}
|
| 189 |
|
|
@@ -200,6 +245,11 @@
|
|
| 200 |
document.getElementById('unloadBtn').addEventListener('click', unloadModel);
|
| 201 |
document.getElementById('stopBtn').addEventListener('click', () => aborter?.abort());
|
| 202 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 203 |
$form.addEventListener('submit', async (ev) => {
|
| 204 |
ev.preventDefault();
|
| 205 |
const text = ($input.value || '').trim();
|
|
@@ -258,7 +308,7 @@
|
|
| 258 |
}
|
| 259 |
});
|
| 260 |
|
| 261 |
-
// Enter
|
| 262 |
$input.addEventListener('keydown', (e) => {
|
| 263 |
if (e.key === 'Enter' && !e.shiftKey) {
|
| 264 |
e.preventDefault();
|
|
@@ -268,11 +318,23 @@
|
|
| 268 |
</script>
|
| 269 |
|
| 270 |
<!--
|
| 271 |
-
|
| 272 |
-
•
|
| 273 |
-
|
| 274 |
-
|
| 275 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 276 |
-->
|
| 277 |
</body>
|
| 278 |
</html>
|
|
|
|
| 13 |
header .pill { font-size:12px; color:var(--bg); background:var(--accent); padding:.2rem .55rem; border-radius:999px; font-weight:700; letter-spacing:.02em; }
|
| 14 |
main { display:grid; grid-template-rows:auto 1fr auto; height:calc(100dvh - 58px); }
|
| 15 |
.bar { display:flex; flex-wrap:wrap; gap:8px; padding:10px 12px; background:#0f1216; border-bottom:1px solid #21262c; align-items:center; }
|
| 16 |
+
select, input[type="number"], input[type="text"] { background:var(--card); color:var(--text); border:1px solid #29313a; border-radius:10px; padding:8px 10px; }
|
| 17 |
button { background:#1c2128; color:var(--text); border:1px solid #2a323c; border-radius:12px; padding:10px 12px; font-weight:600; cursor:pointer; }
|
| 18 |
button.primary { background:var(--accent); color:#08261b; border:none; }
|
| 19 |
button.ghost { background:transparent; border-color:#2a323c; }
|
|
|
|
| 35 |
.row { display:flex; gap:8px; align-items:center; flex-wrap:wrap; }
|
| 36 |
.spacer { flex:1; }
|
| 37 |
a { color:#93c5fd; }
|
| 38 |
+
details { margin-left:8px; }
|
| 39 |
+
.note { font-size:12px; color:var(--muted); max-width:720px; }
|
| 40 |
</style>
|
| 41 |
</head>
|
| 42 |
<body>
|
| 43 |
<header>
|
| 44 |
<h1>Browser LLM</h1>
|
| 45 |
+
<span class="pill">WASM • CPU‑only</span>
|
| 46 |
<span id="isoNote" class="tiny"></span>
|
| 47 |
</header>
|
| 48 |
|
| 49 |
<main>
|
| 50 |
<div class="bar">
|
| 51 |
<label>Model:</label>
|
| 52 |
+
$1 <!-- NEW: Gemma-3-270M from ggml-org (public GGUF) -->
|
| 53 |
+
<option value='{"id":"ggml-org/gemma-3-270m-GGUF","file":"gemma-3-270m-Q8_0.gguf","label":"Gemma‑3‑270M Q8_0 (≈292 MB)"}'>Gemma‑3‑270M Q8_0 (≈292 MB)</option>
|
| 54 |
+
<!-- Smallest RAM / fastest (good for phones) -->
|
| 55 |
+
<option value='{"id":"mradermacher/OpenELM-270M-Instruct-GGUF","file":"OpenELM-270M-Instruct.Q3_K_S.gguf","label":"OpenELM‑270M‑Instruct Q3_K_S (≈134 MB)"}'>OpenELM‑270M‑Instruct Q3_K_S (≈134 MB)</option>
|
| 56 |
+
<option value='{"id":"mradermacher/OpenELM-270M-Instruct-GGUF","file":"OpenELM-270M-Instruct.Q4_K_M.gguf","label":"OpenELM‑270M‑Instruct Q4_K_M (≈175 MB)"}'>OpenELM‑270M‑Instruct Q4_K_M (≈175 MB)</option>
|
| 57 |
+
|
| 58 |
+
<!-- Good quality while still small -->
|
| 59 |
+
<option value='{"id":"mav23/SmolLM-135M-Instruct-GGUF","file":"smollm-135m-instruct.Q3_K_S.gguf","label":"SmolLM‑135M‑Instruct Q3_K_S (≈88 MB)"}'>SmolLM‑135M‑Instruct Q3_K_S (≈88 MB)</option>
|
| 60 |
+
<option value='{"id":"QuantFactory/SmolLM-360M-Instruct-GGUF","file":"SmolLM-360M-Instruct.Q3_K_S.gguf","label":"SmolLM‑360M‑Instruct Q3_K_S (≈219 MB)"}'>SmolLM‑360M‑Instruct Q3_K_S (≈219 MB)</option>
|
| 61 |
+
|
| 62 |
+
<!-- Stronger tiny model (bigger, still phone‑possible on high‑end) -->
|
| 63 |
+
<option value='{"id":"Qwen/Qwen2.5-0.5B-Instruct-GGUF","file":"qwen2.5-0.5b-instruct-q3_k_m.gguf","label":"Qwen2.5‑0.5B‑Instruct Q3_K_M (≈432 MB)"}'>Qwen2.5‑0.5B‑Instruct Q3_K_M (≈432 MB)</option>
|
| 64 |
+
<option value='{"id":"Qwen/Qwen2.5-0.5B-Instruct-GGUF","file":"qwen2.5-0.5b-instruct-q4_k_m.gguf","label":"Qwen2.5‑0.5B‑Instruct Q4_K_M (≈491 MB)"}'>Qwen2.5‑0.5B‑Instruct Q4_K_M (≈491 MB)</option>
|
| 65 |
+
|
| 66 |
+
<!-- Optional: bigger but better; may be too heavy for some phones -->
|
| 67 |
+
<option value='{"id":"TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF","file":"tinyllama-1.1b-chat-v1.0.Q3_K_S.gguf","label":"TinyLlama‑1.1B‑Chat Q3_K_S (≈500 MB)"}'>TinyLlama‑1.1B‑Chat Q3_K_S (≈500 MB)</option>
|
| 68 |
+
|
| 69 |
+
<!-- Your original SmolLM2 360M options (kept) -->
|
| 70 |
+
<option value='{"id":"QuantFactory/SmolLM-360M-GGUF","file":"SmolLM-360M.Q4_0.gguf","label":"SmolLM2‑360M Q4_0 (≈229 MB)"}'>SmolLM2‑360M Q4_0 (≈229 MB)</option>
|
| 71 |
+
<option value='{"id":"QuantFactory/SmolLM-360M-GGUF","file":"SmolLM-360M.Q3_K_S.gguf","label":"SmolLM2‑360M Q3_K_S (≈219 MB, faster)"}'>SmolLM2‑360M Q3_K_S (≈219 MB, faster)</option>
|
| 72 |
+
<option value='{"id":"QuantFactory/SmolLM-360M-GGUF","file":"SmolLM-360M.Q2_K.gguf","label":"SmolLM2‑360M Q2_K (≈200 MB, min RAM / quality drop)"}'>SmolLM2‑360M Q2_K (≈200 MB, min RAM / quality drop)</option>
|
| 73 |
+
|
| 74 |
+
<!-- Custom (use for Gemma‑3‑270M when a public GGUF exists) -->
|
| 75 |
+
<option value='{"custom":true,"label":"Custom HF GGUF (e.g., Gemma‑3‑270M)"}'>Custom HF GGUF (e.g., Gemma‑3‑270M)</option>
|
| 76 |
</select>
|
| 77 |
|
| 78 |
+
<details id="customBox">
|
| 79 |
+
<summary class="tiny">Custom GGUF (paste HF repo + file)</summary>
|
| 80 |
+
<div class="row">
|
| 81 |
+
<label class="tiny">HF repo id</label>
|
| 82 |
+
<input id="customRepo" type="text" placeholder="e.g. google/gemma-3-270m-GGUF (when available)" style="width:280px" />
|
| 83 |
+
<label class="tiny">file</label>
|
| 84 |
+
<input id="customFile" type="text" placeholder="e.g. gemma-3-270m.Q4_0.gguf" style="width:240px" />
|
| 85 |
+
</div>
|
| 86 |
+
<div class="note">Note: official <a href="https://huggingface.co/google/gemma-3-270m" target="_blank" rel="noreferrer">Gemma‑3‑270M</a> is the base HF repo. A ready‑to‑use public GGUF is now available at <a href="https://huggingface.co/ggml-org/gemma-3-270m-GGUF" target="_blank" rel="noreferrer">ggml‑org/gemma‑3‑270m‑GGUF</a> (currently providing <code>gemma-3-270m-Q8_0.gguf</code> ≈292 MB). For maximum speed on low‑RAM phones, the OpenELM‑270M‑Instruct Q3_K_S option above is even lighter, but Gemma‑3‑270M offers strong quality for its size.</div>
|
| 87 |
+
</details>
|
| 88 |
+
|
| 89 |
<div class="row">
|
| 90 |
<label>Max new tokens</label>
|
| 91 |
<input id="nPredict" type="number" min="1" max="512" step="1" value="128" />
|
| 92 |
</div>
|
| 93 |
<div class="row">
|
| 94 |
<label>Temp</label><input id="temp" type="number" min="0" max="2" step="0.1" value="0.7" style="width:80px" />
|
| 95 |
+
<label>Top‑p</label><input id="topp" type="number" min="0" max="1" step="0.05" value="0.9" style="width:80px" />
|
| 96 |
+
<label>Top‑k</label><input id="topk" type="number" min="1" max="100" step="1" value="40" style="width:80px" />
|
| 97 |
</div>
|
| 98 |
|
| 99 |
<div class="spacer"></div>
|
|
|
|
| 139 |
const $send = document.getElementById('sendBtn');
|
| 140 |
const $stop = document.getElementById('stopBtn');
|
| 141 |
const $iso = document.getElementById('isoNote');
|
| 142 |
+
const $customBox = document.getElementById('customBox');
|
| 143 |
+
const $customRepo = document.getElementById('customRepo');
|
| 144 |
+
const $customFile = document.getElementById('customFile');
|
| 145 |
|
| 146 |
// ——— State ———
|
| 147 |
const decoder = new TextDecoder();
|
|
|
|
| 151 |
let eotToken = -1;
|
| 152 |
let sysPrompt = "You are a helpful, concise assistant. Keep answers short and clear.";
|
| 153 |
|
| 154 |
+
// Keep RAM low for mobile: small context + FP16 V‑cache (WASM safe)
|
| 155 |
const LOAD_CONFIG = {
|
| 156 |
n_ctx: 768,
|
| 157 |
n_batch: 48,
|
| 158 |
+
cache_type_k: "q4_0", // int4 K cache: reduces RAM without flash_attn
|
| 159 |
+
cache_type_v: "f16", // IMPORTANT: V cache quant requires flash_attn; not available in WASM
|
| 160 |
+
flash_attn: false,
|
| 161 |
progressCallback: ({ loaded, total }) => {
|
| 162 |
const pct = (total && total > 0) ? Math.round(loaded / total * 100) : 0;
|
| 163 |
$prog.style.width = pct + '%';
|
|
|
|
| 185 |
|
| 186 |
function noteIsolation() {
|
| 187 |
if (!crossOriginIsolated) {
|
| 188 |
+
$iso.innerHTML = 'Single‑thread mode (serve with COOP/COEP for multithread)';
|
| 189 |
} else {
|
| 190 |
+
$iso.textContent = 'Cross‑origin isolated: multithread on';
|
| 191 |
}
|
| 192 |
}
|
| 193 |
noteIsolation();
|
|
|
|
| 201 |
messages.length = 0; messages.push(...kept);
|
| 202 |
}
|
| 203 |
|
| 204 |
+
function getSelectedModel() {
|
| 205 |
+
const parsed = JSON.parse($model.value);
|
| 206 |
+
if (parsed.custom) {
|
| 207 |
+
const id = ($customRepo.value || '').trim();
|
| 208 |
+
const file = ($customFile.value || '').trim();
|
| 209 |
+
if (!id || !file) throw new Error('Enter HF repo id and GGUF file for custom model.');
|
| 210 |
+
return { id, file, label: `Custom: ${id}/${file}` };
|
| 211 |
+
}
|
| 212 |
+
return parsed;
|
| 213 |
+
}
|
| 214 |
+
|
| 215 |
async function ensureLoaded() {
|
| 216 |
if (loaded) return;
|
| 217 |
$prog.style.width = '0%';
|
| 218 |
+
const choice = getSelectedModel();
|
| 219 |
+
ui.setStats(`Fetching ${choice.file}…`);
|
| 220 |
+
try {
|
| 221 |
+
await wllama.loadModelFromHF(choice.id, choice.file, LOAD_CONFIG);
|
| 222 |
+
} catch (e) {
|
| 223 |
+
// Common causes: gated repo, missing file, or CORS
|
| 224 |
+
throw new Error(`Load failed for ${choice.id}/${choice.file}. If the repo is gated or lacks CORS, try a public mirror / different quant. Details: ${e?.message || e}`);
|
| 225 |
+
}
|
| 226 |
loaded = true;
|
| 227 |
eotToken = wllama.getEOT();
|
| 228 |
const meta = await wllama.getModelMetadata();
|
| 229 |
const ctx = wllama.getLoadedContextInfo();
|
| 230 |
const thr = wllama.getNumThreads?.() ?? 1;
|
| 231 |
+
ui.setStats(`Loaded ${choice.file} • ${meta.n_params?.toLocaleString?.() || ''} params • ctx ${ctx.n_ctx} • threads ${thr}`);
|
| 232 |
$load.disabled = true; $unload.disabled = false;
|
| 233 |
}
|
| 234 |
|
|
|
|
| 245 |
document.getElementById('unloadBtn').addEventListener('click', unloadModel);
|
| 246 |
document.getElementById('stopBtn').addEventListener('click', () => aborter?.abort());
|
| 247 |
|
| 248 |
+
$model.addEventListener('change', () => {
|
| 249 |
+
const isCustom = JSON.parse($model.value).custom === true;
|
| 250 |
+
$customBox.open = isCustom;
|
| 251 |
+
});
|
| 252 |
+
|
| 253 |
$form.addEventListener('submit', async (ev) => {
|
| 254 |
ev.preventDefault();
|
| 255 |
const text = ($input.value || '').trim();
|
|
|
|
| 308 |
}
|
| 309 |
});
|
| 310 |
|
| 311 |
+
// Enter‑to‑send on mobile; Shift+Enter for newline
|
| 312 |
$input.addEventListener('keydown', (e) => {
|
| 313 |
if (e.key === 'Enter' && !e.shiftKey) {
|
| 314 |
e.preventDefault();
|
|
|
|
| 318 |
</script>
|
| 319 |
|
| 320 |
<!--
|
| 321 |
+
What changed:
|
| 322 |
+
• Added multiple small, publicly downloadable GGUFs with CORS‑friendly repos:
|
| 323 |
+
- OpenELM‑270M‑Instruct (Q3_K_S / Q4_K_M) — ~134‑175 MB
|
| 324 |
+
- SmolLM‑135M‑Instruct (Q3_K_S) — ~88 MB
|
| 325 |
+
- SmolLM‑360M‑Instruct (Q3_K_S) — ~219 MB
|
| 326 |
+
- Qwen2.5‑0.5B‑Instruct (Q3_K_M / Q4_K_M) — ~432/491 MB
|
| 327 |
+
- TinyLlama‑1.1B‑Chat (Q3_K_S) — ~500 MB (optional, heavier but strong)
|
| 328 |
+
- Kept your SmolLM2‑360M options
|
| 329 |
+
• Added a "Custom HF GGUF" path so you can paste a Gemma‑3‑270M GGUF when a public mirror appears. The official repo is gated and not GGUF; direct download from the gated repo is unlikely to work in‑browser.
|
| 330 |
+
• Kept V‑cache at f16 and disabled flash_attn to avoid the llama.cpp error "V cache quantization requires flash_attn" in WASM.
|
| 331 |
+
• Modest n_ctx (768) and n_batch (48) to keep RAM low on phones.
|
| 332 |
+
• Pinned @wllama/wllama to 2.3.1 and referenced explicit .wasm URLs (no +esm).
|
| 333 |
+
|
| 334 |
+
Tips:
|
| 335 |
+
• For fastest sampling on low‑RAM phones: prefer Q3_K_S quants (OpenELM‑270M‑Instruct Q3_K_S or SmolLM‑135M‑Instruct Q3_K_S).
|
| 336 |
+
• For a nice quality bump still under ~500 MB: Qwen2.5‑0.5B‑Instruct Q3_K_M.
|
| 337 |
+
• Serve with COOP/COEP headers if you want multi‑threading.
|
| 338 |
-->
|
| 339 |
</body>
|
| 340 |
</html>
|