Molbap HF Staff commited on
Commit
4e2e430
·
2 Parent(s): b20dcba f0b010e

Merge branch 'main' of https://huggingface.co/spaces/Molbap/Transformers-playthrough

Browse files
Files changed (1) hide show
  1. app.py +68 -183
app.py CHANGED
@@ -1,184 +1,69 @@
1
- import os, sys, time, threading, subprocess, json, textwrap, tempfile
2
- import gradio as gr
3
- import pandas as pd
4
-
5
- import spaces
6
- import torch
7
-
8
- # --- Minimal safe terminal ---
9
- def run_shell(cmd: str) -> str:
10
- banned = ["|", ">", "<", "&&", "||", "`"]
11
- if any(b in cmd for b in banned):
12
- return "$ " + cmd + "\nBlocked characters. Use a single command."
13
- try:
14
- p = subprocess.run(cmd, shell=True, check=False, capture_output=True, text=True, timeout=30)
15
- return f"$ {cmd}\n{p.stdout}{p.stderr}"
16
- except Exception as e:
17
- return f"$ {cmd}\n{e!r}"
18
-
19
- # --- Attention mask visualizer (Transformers) ---
20
- def _import_attention_visualizer():
21
- # Available in recent transformers (utils.attention_visualizer)
22
- from transformers.utils.attention_visualizer import AttentionMaskVisualizer # noqa: F401
23
- return AttentionMaskVisualizer
24
-
25
- @spaces.GPU(duration=120)
26
- def render_attention_mask(model_id: str, prompt: str) -> str:
27
- AttentionMaskVisualizer = _import_attention_visualizer()
28
- vis = AttentionMaskVisualizer(model_id)
29
- out = vis(prompt) # returns embeddable HTML or an object with _repr_html_
30
- return str(out)
31
-
32
- # --- Transformers caching allocator warmup: time vs memory_allocated() ---
33
- from transformers import AutoModelForCausalLM, modeling_utils as MU # noqa: E402
34
-
35
- def _measure_load_timeline(model_id: str, disable_warmup: bool):
36
- orig = MU.caching_allocator_warmup
37
- if disable_warmup:
38
- MU.caching_allocator_warmup = lambda *a, **k: None
39
- try:
40
- device = "cuda" if torch.cuda.is_available() else "cpu"
41
- tl = []
42
-
43
- def sample(start_t, stop_evt):
44
- while not stop_evt.is_set():
45
- if device == "cuda":
46
- torch.cuda.synchronize()
47
- alloc = torch.cuda.memory_allocated()
48
- else:
49
- alloc = 0
50
- tl.append({"t": time.perf_counter() - start_t, "MiB": alloc / (1024**2)})
51
- time.sleep(0.05)
52
-
53
- if device == "cuda":
54
- torch.cuda.empty_cache()
55
- torch.cuda.reset_peak_memory_stats()
56
-
57
- start = time.perf_counter()
58
- stop_evt = threading.Event()
59
- th = threading.Thread(target=sample, args=(start, stop_evt), daemon=True)
60
- th.start()
61
-
62
- kwargs = {}
63
- if device == "cuda":
64
- kwargs.update(dict(torch_dtype=torch.float16, device_map="cuda:0", low_cpu_mem_usage=True))
65
- model = AutoModelForCausalLM.from_pretrained(model_id, **kwargs)
66
-
67
- stop_evt.set()
68
- th.join()
69
-
70
- if device == "cuda":
71
- torch.cuda.synchronize()
72
- tl.append({"t": time.perf_counter() - start, "MiB": torch.cuda.memory_allocated() / (1024**2)})
73
-
74
- del model
75
- if device == "cuda":
76
- torch.cuda.empty_cache()
77
- torch.cuda.ipc_collect()
78
-
79
- return tl
80
- finally:
81
- MU.caching_allocator_warmup = orig
82
-
83
- @spaces.GPU(duration=240)
84
- def profile_warmup(model_id: str):
85
- on = _measure_load_timeline(model_id, disable_warmup=False)
86
- off = _measure_load_timeline(model_id, disable_warmup=True)
87
- rows = [{"t": r["t"], "MiB": r["MiB"], "mode": "warmup ON"} for r in on] + \
88
- [{"t": r["t"], "MiB": r["MiB"], "mode": "warmup OFF"} for r in off]
89
- return pd.DataFrame(rows)
90
-
91
- # --- (Optional) FastRTC demo: simple loopback for structure; expand later ---
92
- # Requires camera permissions in the browser.
93
- try:
94
- from fastrtc import WebRTC, ReplyOnPause # type: ignore
95
- def _echo_video(frame):
96
- yield frame
97
- HAS_FASTRTC = True
98
- except Exception:
99
- HAS_FASTRTC = False
100
-
101
- # --- CSS for anchored, scrollable “playbook” layout ---
102
- CSS = """
103
- :root { --toc-w: 280px; }
104
- #layout { display: grid; grid-template-columns: var(--toc-w) 1fr; gap: 1.25rem; }
105
- #toc { position: sticky; top: 0.75rem; height: calc(100vh - 1.5rem); overflow: auto; padding-right: .5rem; }
106
- #toc a { text-decoration: none; display: block; padding: .25rem 0; }
107
- .section { scroll-margin-top: 72px; }
108
- .gradio-container { max-width: 1200px !important; margin: 0 auto; }
109
- hr { border: none; border-top: 1px solid var(--neutral-300); margin: 1.25rem 0; }
110
- """
111
-
112
- with gr.Blocks(css=CSS, fill_height=True, title="Transformers Feature Showcase (ZeroGPU)") as demo:
113
- gr.HTML("<h1>Transformers Feature Showcase</h1><p>Interactive, scrollable demo.</p>")
114
- with gr.Row(elem_id="layout"):
115
- # TOC
116
- with gr.Column(scale=0):
117
- gr.HTML(
118
- """
119
- <nav id="toc">
120
- <h3>Sections</h3>
121
- <a href="#terminal">Terminal</a>
122
- <a href="#attention">Attention mask visualizer</a>
123
- <a href="#allocator">Allocator warmup timeline</a>
124
- <a href="#rtc">FastRTC (preview)</a>
125
- </nav>
126
- """
127
- )
128
- # Content
129
- with gr.Column():
130
- # Terminal
131
- gr.HTML('<h2 id="terminal" class="section">Terminal</h2>')
132
- with gr.Group():
133
- cmd = gr.Textbox(label="Command", value="python -c 'import torch; print(torch.__version__)'")
134
- run_btn = gr.Button("Run")
135
- out = gr.Textbox(label="Output", lines=12)
136
- run_btn.click(run_shell, inputs=cmd, outputs=out)
137
- gr.HTML("<hr/>")
138
-
139
- # Attention visualizer
140
- gr.HTML('<h2 id="attention" class="section">Attention mask visualizer</h2>')
141
- with gr.Group():
142
- with gr.Row():
143
- model_vis = gr.Dropdown(
144
- label="Model",
145
- choices=["openai-community/gpt2", "google/gemma-2-2b"],
146
- value="openai-community/gpt2",
147
- allow_custom_value=True,
148
- )
149
- prompt_vis = gr.Textbox(label="Prompt", value="You are an assistant. Make sure you print me.")
150
- go_vis = gr.Button("Render")
151
- html_vis = gr.HTML()
152
- go_vis.click(render_attention_mask, inputs=[model_vis, prompt_vis], outputs=html_vis)
153
- gr.HTML("<hr/>")
154
-
155
- # Allocator warmup
156
- gr.HTML('<h2 id="allocator" class="section">Transformers allocator warmup: time vs allocated MiB</h2>')
157
- with gr.Group():
158
- model_mem = gr.Dropdown(
159
- label="Model",
160
- choices=["openai-community/gpt2", "google/gemma-2-2b"],
161
- value="openai-community/gpt2",
162
- allow_custom_value=True,
163
- )
164
- go_mem = gr.Button("Run")
165
- plot = gr.LinePlot(
166
- x="t", y="MiB", color="mode", overlay_point=True,
167
- title="from_pretrained() load: time vs CUDA memory_allocated()",
168
- tooltip=["t", "MiB", "mode"], width=900, height=420
169
- )
170
- go_mem.click(profile_warmup, inputs=[model_mem], outputs=plot)
171
- gr.HTML("<hr/>")
172
-
173
- # FastRTC preview
174
- gr.HTML('<h2 id="rtc" class="section">FastRTC (preview)</h2>')
175
- if HAS_FASTRTC:
176
- with gr.Group():
177
- gr.Markdown("Camera loopback using FastRTC WebRTC. Extend with streaming handlers later.")
178
- rtc = WebRTC(mode="send-receive", modality="video")
179
- rtc.stream(ReplyOnPause(_echo_video), inputs=[rtc], outputs=[rtc], time_limit=60)
180
  else:
181
- gr.Markdown("Install `fastrtc` to enable this section.")
182
-
183
- if __name__ == "__main__":
184
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ from pathlib import Path
3
+ from markdown_it import MarkdownIt
4
+ from mdit_py_plugins.footnote import footnote
5
+ from mdit_py_plugins.tasklists import tasklists
6
+ from mdit_py_plugins.container import container
7
+
8
+ _md = MarkdownIt("gfm-like").use(footnote).use(tasklists).use(container, "details")
9
+
10
+ def md_to_html(text: str) -> str:
11
+ # Convert common Obsidian patterns to standard Markdown
12
+ text = re.sub(r'!\[\[([^\]|]+)\]\]', r'![](static/\1)', text) # image embeds ![[file.png]]
13
+ text = re.sub(r'\[\[([^\]|]+)\|([^\]]+)\]\]', r'[\2](\1)', text) # [[file|label]]
14
+ text = re.sub(r'\[\[([^\]]+)\]\]', r'[\1](\1)', text) # [[file]]
15
+ return _md.render(text)
16
+
17
+ def render_article(md_path: str, inserts: dict[str, callable]):
18
+ raw = Path(md_path).read_text(encoding="utf-8")
19
+ parts = re.split(r"\{\{([A-Z_]+)\}\}", raw) # split on {{TOKEN}}
20
+ with gr.Column():
21
+ for i, part in enumerate(parts):
22
+ if i % 2 == 0:
23
+ gr.HTML(md_to_html(part))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  else:
25
+ build = inserts.get(part)
26
+ (build or (lambda: gr.HTML(f"<p><em>Unknown insert: {part}</em></p>")))()
27
+
28
+ # --- Builders that drop your existing widgets in-place ---
29
+ def build_terminal():
30
+ with gr.Group():
31
+ cmd = gr.Textbox(label="Command", value="python -c 'import torch; print(torch.__version__)'")
32
+ run = gr.Button("Run")
33
+ out = gr.Textbox(label="Output", lines=12)
34
+ run.click(run_shell, inputs=cmd, outputs=out)
35
+
36
+ def build_attn_vis():
37
+ with gr.Group():
38
+ with gr.Row():
39
+ model = gr.Dropdown(
40
+ label="Model",
41
+ choices=["openai-community/gpt2", "google/gemma-2-2b"],
42
+ value="openai-community/gpt2",
43
+ allow_custom_value=True,
44
+ )
45
+ prompt = gr.Textbox(label="Prompt", value="You are an assistant. Make sure you print me.")
46
+ go = gr.Button("Render")
47
+ html = gr.HTML()
48
+ go.click(render_attention_mask, inputs=[model, prompt], outputs=html)
49
+
50
+ def build_alloc_plot():
51
+ with gr.Group():
52
+ model = gr.Dropdown(
53
+ label="Model",
54
+ choices=["openai-community/gpt2", "google/gemma-2-2b"],
55
+ value="openai-community/gpt2",
56
+ allow_custom_value=True,
57
+ )
58
+ go = gr.Button("Run")
59
+ plot = gr.LinePlot(
60
+ x="t", y="MiB", color="mode", overlay_point=True,
61
+ title="from_pretrained(): time vs CUDA memory_allocated()", width=900, height=420
62
+ )
63
+ go.click(profile_warmup, inputs=[model], outputs=plot)
64
+
65
+ INSERTS = {
66
+ "TERMINAL": build_terminal,
67
+ "ATTN_VIS": build_attn_vis,
68
+ "ALLOC_PLOT": build_alloc_plot,
69
+ }