Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Added first token latency and replaced latency with total generation time.
Browse files
src/lib/components/InferencePlayground/InferencePlayground.svelte
CHANGED
|
@@ -42,7 +42,8 @@
|
|
| 42 |
let viewSettings = false;
|
| 43 |
let showTokenModal = false;
|
| 44 |
let loading = false;
|
| 45 |
-
let latency = 0;
|
|
|
|
| 46 |
let generatedTokensCount = 0;
|
| 47 |
let abortController: AbortController | undefined = undefined;
|
| 48 |
let waitForNonStreaming = true;
|
|
@@ -91,12 +92,16 @@
|
|
| 91 |
|
| 92 |
(document.activeElement as HTMLElement).blur();
|
| 93 |
loading = true;
|
|
|
|
|
|
|
|
|
|
| 94 |
|
| 95 |
try {
|
| 96 |
const startTime = performance.now();
|
| 97 |
const hf = createHfInference(hfToken);
|
| 98 |
|
| 99 |
if (conversation.streaming) {
|
|
|
|
| 100 |
const streamingMessage = { role: "assistant", content: "" };
|
| 101 |
conversation.messages = [...conversation.messages, streamingMessage];
|
| 102 |
abortController = new AbortController();
|
|
@@ -109,6 +114,11 @@
|
|
| 109 |
streamingMessage.content = content;
|
| 110 |
conversation.messages = [...conversation.messages];
|
| 111 |
generatedTokensCount += 1;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 112 |
}
|
| 113 |
},
|
| 114 |
abortController
|
|
@@ -123,11 +133,11 @@
|
|
| 123 |
if (waitForNonStreaming) {
|
| 124 |
conversation.messages = [...conversation.messages, newMessage];
|
| 125 |
generatedTokensCount += newTokensCount;
|
|
|
|
| 126 |
}
|
| 127 |
}
|
| 128 |
|
| 129 |
-
|
| 130 |
-
latency = Math.round(endTime - startTime);
|
| 131 |
} catch (error) {
|
| 132 |
if (conversation.messages.at(-1)?.role === "assistant" && !conversation.messages.at(-1)?.content?.trim()) {
|
| 133 |
conversation.messages.pop();
|
|
@@ -261,7 +271,9 @@
|
|
| 261 |
<IconDelete />
|
| 262 |
</button>
|
| 263 |
<div class="flex-1 items-center justify-center text-center text-sm text-gray-500">
|
| 264 |
-
<span class="max-xl:hidden">
|
|
|
|
|
|
|
| 265 |
</div>
|
| 266 |
<button
|
| 267 |
type="button"
|
|
@@ -269,8 +281,8 @@
|
|
| 269 |
class="flex h-[39px] items-center gap-2 rounded-lg border border-gray-200 bg-white px-3 py-2.5 text-sm font-medium text-gray-900 hover:bg-gray-100 hover:text-blue-700 focus:z-10 focus:outline-none focus:ring-4 focus:ring-gray-100 dark:border-gray-600 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-white dark:focus:ring-gray-700"
|
| 270 |
>
|
| 271 |
<IconCode />
|
| 272 |
-
{!viewCode ? "View Code" : "Hide Code"}
|
| 273 |
-
>
|
| 274 |
<button
|
| 275 |
on:click={() => {
|
| 276 |
viewCode = false;
|
|
@@ -357,4 +369,4 @@
|
|
| 357 |
>
|
| 358 |
<IconInfo classNames="text-xs" />
|
| 359 |
Give feedback
|
| 360 |
-
</a>
|
|
|
|
| 42 |
let viewSettings = false;
|
| 43 |
let showTokenModal = false;
|
| 44 |
let loading = false;
|
| 45 |
+
let latency = 0; // Renamed to total generation time
|
| 46 |
+
let firstTokenLatency = 0; // New variable for first token latency
|
| 47 |
let generatedTokensCount = 0;
|
| 48 |
let abortController: AbortController | undefined = undefined;
|
| 49 |
let waitForNonStreaming = true;
|
|
|
|
| 92 |
|
| 93 |
(document.activeElement as HTMLElement).blur();
|
| 94 |
loading = true;
|
| 95 |
+
firstTokenLatency = 0; // Reset before each submission
|
| 96 |
+
generatedTokensCount = 0; // Reset before each submission
|
| 97 |
+
|
| 98 |
|
| 99 |
try {
|
| 100 |
const startTime = performance.now();
|
| 101 |
const hf = createHfInference(hfToken);
|
| 102 |
|
| 103 |
if (conversation.streaming) {
|
| 104 |
+
let firstTokenReceived = false; // Flag to track first token
|
| 105 |
const streamingMessage = { role: "assistant", content: "" };
|
| 106 |
conversation.messages = [...conversation.messages, streamingMessage];
|
| 107 |
abortController = new AbortController();
|
|
|
|
| 114 |
streamingMessage.content = content;
|
| 115 |
conversation.messages = [...conversation.messages];
|
| 116 |
generatedTokensCount += 1;
|
| 117 |
+
|
| 118 |
+
if (!firstTokenReceived) { // Check if it's the first token
|
| 119 |
+
firstTokenLatency = Math.round(performance.now() - startTime);
|
| 120 |
+
firstTokenReceived = true;
|
| 121 |
+
}
|
| 122 |
}
|
| 123 |
},
|
| 124 |
abortController
|
|
|
|
| 133 |
if (waitForNonStreaming) {
|
| 134 |
conversation.messages = [...conversation.messages, newMessage];
|
| 135 |
generatedTokensCount += newTokensCount;
|
| 136 |
+
firstTokenLatency = latency; //In non-streaming, first token latency equals total latency.
|
| 137 |
}
|
| 138 |
}
|
| 139 |
|
| 140 |
+
latency = Math.round(performance.now() - startTime); // Total generation time
|
|
|
|
| 141 |
} catch (error) {
|
| 142 |
if (conversation.messages.at(-1)?.role === "assistant" && !conversation.messages.at(-1)?.content?.trim()) {
|
| 143 |
conversation.messages.pop();
|
|
|
|
| 271 |
<IconDelete />
|
| 272 |
</button>
|
| 273 |
<div class="flex-1 items-center justify-center text-center text-sm text-gray-500">
|
| 274 |
+
<span class="max-xl:hidden">
|
| 275 |
+
{generatedTokensCount} tokens 路 First Token: {firstTokenLatency}ms 路 Total Generation: {latency}ms
|
| 276 |
+
</span>
|
| 277 |
</div>
|
| 278 |
<button
|
| 279 |
type="button"
|
|
|
|
| 281 |
class="flex h-[39px] items-center gap-2 rounded-lg border border-gray-200 bg-white px-3 py-2.5 text-sm font-medium text-gray-900 hover:bg-gray-100 hover:text-blue-700 focus:z-10 focus:outline-none focus:ring-4 focus:ring-gray-100 dark:border-gray-600 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-white dark:focus:ring-gray-700"
|
| 282 |
>
|
| 283 |
<IconCode />
|
| 284 |
+
{!viewCode ? "View Code" : "Hide Code"}
|
| 285 |
+
</button>
|
| 286 |
<button
|
| 287 |
on:click={() => {
|
| 288 |
viewCode = false;
|
|
|
|
| 369 |
>
|
| 370 |
<IconInfo classNames="text-xs" />
|
| 371 |
Give feedback
|
| 372 |
+
</a>
|