try to improve cache allocator
Browse files
app.py
CHANGED
|
@@ -248,18 +248,35 @@ def profile_warmup(model_id: str):
|
|
| 248 |
|
| 249 |
def build_alloc_plot():
|
| 250 |
with gr.Group():
|
| 251 |
-
|
| 252 |
-
|
| 253 |
-
|
| 254 |
-
|
| 255 |
-
|
| 256 |
-
|
| 257 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 258 |
plot = gr.LinePlot(
|
| 259 |
x="t", y="MiB", color="mode", overlay_point=True,
|
| 260 |
-
title="
|
| 261 |
-
tooltip=["t", "MiB", "mode"],
|
|
|
|
|
|
|
|
|
|
| 262 |
)
|
|
|
|
|
|
|
| 263 |
go.click(profile_warmup, inputs=[model], outputs=plot)
|
| 264 |
|
| 265 |
# ---------------------------
|
|
@@ -395,6 +412,51 @@ hr { border: 0; border-top: 1px solid var(--border-color); margin: 2rem 0; }
|
|
| 395 |
/* Keep widgets full width */
|
| 396 |
.gr-form, .gr-panel, .gr-block { max-width: none; }
|
| 397 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 398 |
"""
|
| 399 |
|
| 400 |
with gr.Blocks(css=CSS, fill_height=True, title="Interactive Blog β Transformers Feature Showcase") as demo:
|
|
|
|
| 248 |
|
| 249 |
def build_alloc_plot():
|
| 250 |
with gr.Group():
|
| 251 |
+
gr.Markdown("### π Cache Pre-allocator Performance Demo")
|
| 252 |
+
gr.Markdown("Compare model loading with and without transformers' caching allocator warmup. This demonstrates the memory efficiency improvements.")
|
| 253 |
+
|
| 254 |
+
with gr.Row():
|
| 255 |
+
model = gr.Dropdown(
|
| 256 |
+
label="Model to Profile",
|
| 257 |
+
choices=[
|
| 258 |
+
"openai-community/gpt2",
|
| 259 |
+
"google/gemma-2-2b",
|
| 260 |
+
"microsoft/DialoGPT-small",
|
| 261 |
+
"distilbert-base-uncased",
|
| 262 |
+
"facebook/opt-125m"
|
| 263 |
+
],
|
| 264 |
+
value="openai-community/gpt2",
|
| 265 |
+
allow_custom_value=True,
|
| 266 |
+
info="Select a model or enter a custom HuggingFace model ID"
|
| 267 |
+
)
|
| 268 |
+
go = gr.Button("π₯ Profile Memory", variant="primary")
|
| 269 |
+
|
| 270 |
plot = gr.LinePlot(
|
| 271 |
x="t", y="MiB", color="mode", overlay_point=True,
|
| 272 |
+
title="Memory Allocation Timeline: Warmup ON vs OFF",
|
| 273 |
+
tooltip=["t", "MiB", "mode"],
|
| 274 |
+
width=900, height=450,
|
| 275 |
+
x_title="Time (seconds)",
|
| 276 |
+
y_title="Memory (MiB)"
|
| 277 |
)
|
| 278 |
+
|
| 279 |
+
gr.Markdown("**Note**: This demo requires GPU access. The warmup feature reduces peak memory usage during model loading.")
|
| 280 |
go.click(profile_warmup, inputs=[model], outputs=plot)
|
| 281 |
|
| 282 |
# ---------------------------
|
|
|
|
| 412 |
/* Keep widgets full width */
|
| 413 |
.gr-form, .gr-panel, .gr-block { max-width: none; }
|
| 414 |
|
| 415 |
+
/* Terminal styling - match light mode */
|
| 416 |
+
.gr-textbox textarea {
|
| 417 |
+
background: #f8fafc !important;
|
| 418 |
+
color: #1f2937 !important;
|
| 419 |
+
border: 1px solid var(--border-color) !important;
|
| 420 |
+
border-radius: 8px !important;
|
| 421 |
+
font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", monospace !important;
|
| 422 |
+
font-size: 0.9rem !important;
|
| 423 |
+
line-height: 1.5 !important;
|
| 424 |
+
}
|
| 425 |
+
|
| 426 |
+
.gr-textbox textarea:focus {
|
| 427 |
+
border-color: var(--link-text-color) !important;
|
| 428 |
+
box-shadow: 0 0 0 2px rgba(37, 99, 235, 0.1) !important;
|
| 429 |
+
}
|
| 430 |
+
|
| 431 |
+
/* Terminal output specifically */
|
| 432 |
+
.gr-textbox textarea[readonly] {
|
| 433 |
+
background: #111827 !important;
|
| 434 |
+
color: #f9fafb !important;
|
| 435 |
+
border: 1px solid #374151 !important;
|
| 436 |
+
font-weight: 500 !important;
|
| 437 |
+
}
|
| 438 |
+
|
| 439 |
+
/* Terminal input */
|
| 440 |
+
.gr-textbox:not(textarea[readonly]) textarea {
|
| 441 |
+
background: #ffffff !important;
|
| 442 |
+
color: #1f2937 !important;
|
| 443 |
+
border: 1px solid var(--border-color) !important;
|
| 444 |
+
}
|
| 445 |
+
|
| 446 |
+
/* Button styling */
|
| 447 |
+
.gr-button {
|
| 448 |
+
background: var(--link-text-color) !important;
|
| 449 |
+
color: white !important;
|
| 450 |
+
border: none !important;
|
| 451 |
+
border-radius: 6px !important;
|
| 452 |
+
font-weight: 600 !important;
|
| 453 |
+
padding: 0.5rem 1rem !important;
|
| 454 |
+
}
|
| 455 |
+
|
| 456 |
+
.gr-button:hover {
|
| 457 |
+
background: #1d4ed8 !important;
|
| 458 |
+
}
|
| 459 |
+
|
| 460 |
"""
|
| 461 |
|
| 462 |
with gr.Blocks(css=CSS, fill_height=True, title="Interactive Blog β Transformers Feature Showcase") as demo:
|