Molbap HF Staff commited on
Commit
0548742
Β·
1 Parent(s): dd22158

try to improve cache allocator

Browse files
Files changed (1) hide show
  1. app.py +71 -9
app.py CHANGED
@@ -248,18 +248,35 @@ def profile_warmup(model_id: str):
248
 
249
  def build_alloc_plot():
250
  with gr.Group():
251
- model = gr.Dropdown(
252
- label="Model",
253
- choices=["openai-community/gpt2", "google/gemma-2-2b"],
254
- value="openai-community/gpt2",
255
- allow_custom_value=True,
256
- )
257
- go = gr.Button("Run")
 
 
 
 
 
 
 
 
 
 
 
 
258
  plot = gr.LinePlot(
259
  x="t", y="MiB", color="mode", overlay_point=True,
260
- title="from_pretrained() load: time vs CUDA memory_allocated()",
261
- tooltip=["t", "MiB", "mode"], width=900, height=420
 
 
 
262
  )
 
 
263
  go.click(profile_warmup, inputs=[model], outputs=plot)
264
 
265
  # ---------------------------
@@ -395,6 +412,51 @@ hr { border: 0; border-top: 1px solid var(--border-color); margin: 2rem 0; }
395
  /* Keep widgets full width */
396
  .gr-form, .gr-panel, .gr-block { max-width: none; }
397
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
398
  """
399
 
400
  with gr.Blocks(css=CSS, fill_height=True, title="Interactive Blog β€” Transformers Feature Showcase") as demo:
 
248
 
249
  def build_alloc_plot():
250
  with gr.Group():
251
+ gr.Markdown("### πŸš€ Cache Pre-allocator Performance Demo")
252
+ gr.Markdown("Compare model loading with and without transformers' caching allocator warmup. This demonstrates the memory efficiency improvements.")
253
+
254
+ with gr.Row():
255
+ model = gr.Dropdown(
256
+ label="Model to Profile",
257
+ choices=[
258
+ "openai-community/gpt2",
259
+ "google/gemma-2-2b",
260
+ "microsoft/DialoGPT-small",
261
+ "distilbert-base-uncased",
262
+ "facebook/opt-125m"
263
+ ],
264
+ value="openai-community/gpt2",
265
+ allow_custom_value=True,
266
+ info="Select a model or enter a custom HuggingFace model ID"
267
+ )
268
+ go = gr.Button("πŸ”₯ Profile Memory", variant="primary")
269
+
270
  plot = gr.LinePlot(
271
  x="t", y="MiB", color="mode", overlay_point=True,
272
+ title="Memory Allocation Timeline: Warmup ON vs OFF",
273
+ tooltip=["t", "MiB", "mode"],
274
+ width=900, height=450,
275
+ x_title="Time (seconds)",
276
+ y_title="Memory (MiB)"
277
  )
278
+
279
+ gr.Markdown("**Note**: This demo requires GPU access. The warmup feature reduces peak memory usage during model loading.")
280
  go.click(profile_warmup, inputs=[model], outputs=plot)
281
 
282
  # ---------------------------
 
412
  /* Keep widgets full width */
413
  .gr-form, .gr-panel, .gr-block { max-width: none; }
414
 
415
+ /* Terminal styling - match light mode */
416
+ .gr-textbox textarea {
417
+ background: #f8fafc !important;
418
+ color: #1f2937 !important;
419
+ border: 1px solid var(--border-color) !important;
420
+ border-radius: 8px !important;
421
+ font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", monospace !important;
422
+ font-size: 0.9rem !important;
423
+ line-height: 1.5 !important;
424
+ }
425
+
426
+ .gr-textbox textarea:focus {
427
+ border-color: var(--link-text-color) !important;
428
+ box-shadow: 0 0 0 2px rgba(37, 99, 235, 0.1) !important;
429
+ }
430
+
431
+ /* Terminal output specifically */
432
+ .gr-textbox textarea[readonly] {
433
+ background: #111827 !important;
434
+ color: #f9fafb !important;
435
+ border: 1px solid #374151 !important;
436
+ font-weight: 500 !important;
437
+ }
438
+
439
+ /* Terminal input */
440
+ .gr-textbox:not(textarea[readonly]) textarea {
441
+ background: #ffffff !important;
442
+ color: #1f2937 !important;
443
+ border: 1px solid var(--border-color) !important;
444
+ }
445
+
446
+ /* Button styling */
447
+ .gr-button {
448
+ background: var(--link-text-color) !important;
449
+ color: white !important;
450
+ border: none !important;
451
+ border-radius: 6px !important;
452
+ font-weight: 600 !important;
453
+ padding: 0.5rem 1rem !important;
454
+ }
455
+
456
+ .gr-button:hover {
457
+ background: #1d4ed8 !important;
458
+ }
459
+
460
  """
461
 
462
  with gr.Blocks(css=CSS, fill_height=True, title="Interactive Blog β€” Transformers Feature Showcase") as demo: