Molbap HF Staff commited on
Commit
e3461d1
·
verified ·
1 Parent(s): 3af07f0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +233 -18
app.py CHANGED
@@ -1,11 +1,28 @@
 
 
1
  import re
 
 
 
 
2
  from pathlib import Path
3
- from markdown_it import MarkdownIt
4
- from importlib import import_module
5
 
6
- def _make_md():
 
 
 
 
 
 
 
 
 
 
 
 
7
  md = MarkdownIt("gfm-like")
8
 
 
9
  foot_mod = import_module("mdit_py_plugins.footnote")
10
  foot = getattr(foot_mod, "footnote", None) or getattr(foot_mod, "footnote_plugin")
11
  md.use(foot)
@@ -17,42 +34,112 @@ def _make_md():
17
  cont_mod = import_module("mdit_py_plugins.container")
18
  container = getattr(cont_mod, "container", None) or getattr(cont_mod, "container_plugin")
19
  try:
20
- md.use(container, "details") # newer signature
21
  except TypeError:
22
- md.use(lambda m: container(m, name="details")) # older signature
23
-
24
  return md
25
 
26
- _md = _make_md()
 
 
 
 
 
 
 
 
 
 
 
 
 
27
 
 
 
 
 
28
 
29
-
30
- def md_to_html(text: str) -> str:
31
- # Convert common Obsidian patterns to standard Markdown
32
- text = re.sub(r'!\[\[([^\]|]+)\]\]', r'![](static/\1)', text) # image embeds ![[file.png]]
33
  text = re.sub(r'\[\[([^\]|]+)\|([^\]]+)\]\]', r'[\2](\1)', text) # [[file|label]]
34
  text = re.sub(r'\[\[([^\]]+)\]\]', r'[\1](\1)', text) # [[file]]
35
- return _md.render(text)
 
 
 
 
 
 
 
 
 
36
 
37
  def render_article(md_path: str, inserts: dict[str, callable]):
38
- raw = Path(md_path).read_text(encoding="utf-8")
39
- parts = re.split(r"\{\{([A-Z_]+)\}\}", raw) # split on {{TOKEN}}
 
 
 
 
 
 
 
40
  with gr.Column():
41
  for i, part in enumerate(parts):
42
  if i % 2 == 0:
43
  gr.HTML(md_to_html(part))
44
  else:
45
  build = inserts.get(part)
46
- (build or (lambda: gr.HTML(f"<p><em>Unknown insert: {part}</em></p>")))()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
 
48
- # --- Builders that drop your existing widgets in-place ---
49
  def build_terminal():
50
  with gr.Group():
51
  cmd = gr.Textbox(label="Command", value="python -c 'import torch; print(torch.__version__)'")
52
  run = gr.Button("Run")
53
- out = gr.Textbox(label="Output", lines=12)
54
  run.click(run_shell, inputs=cmd, outputs=out)
55
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
  def build_attn_vis():
57
  with gr.Group():
58
  with gr.Row():
@@ -67,6 +154,69 @@ def build_attn_vis():
67
  html = gr.HTML()
68
  go.click(render_attention_mask, inputs=[model, prompt], outputs=html)
69
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
  def build_alloc_plot():
71
  with gr.Group():
72
  model = gr.Dropdown(
@@ -78,12 +228,77 @@ def build_alloc_plot():
78
  go = gr.Button("Run")
79
  plot = gr.LinePlot(
80
  x="t", y="MiB", color="mode", overlay_point=True,
81
- title="from_pretrained(): time vs CUDA memory_allocated()", width=900, height=420
 
82
  )
83
  go.click(profile_warmup, inputs=[model], outputs=plot)
84
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
  INSERTS = {
86
  "TERMINAL": build_terminal,
87
  "ATTN_VIS": build_attn_vis,
88
  "ALLOC_PLOT": build_alloc_plot,
89
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
  import re
4
+ import json
5
+ import time
6
+ import threading
7
+ import subprocess
8
  from pathlib import Path
 
 
9
 
10
+ import gradio as gr
11
+ import pandas as pd
12
+ import torch
13
+ import spaces
14
+
15
+ # ---------------------------
16
+ # Markdown rendering (Option A)
17
+ # ---------------------------
18
+
19
+ def _make_md_markdownit():
20
+ # Prefer markdown-it-py + mdit-py-plugins if available
21
+ from importlib import import_module
22
+ from markdown_it import MarkdownIt
23
  md = MarkdownIt("gfm-like")
24
 
25
+ # Version-agnostic plugin shims
26
  foot_mod = import_module("mdit_py_plugins.footnote")
27
  foot = getattr(foot_mod, "footnote", None) or getattr(foot_mod, "footnote_plugin")
28
  md.use(foot)
 
34
  cont_mod = import_module("mdit_py_plugins.container")
35
  container = getattr(cont_mod, "container", None) or getattr(cont_mod, "container_plugin")
36
  try:
37
+ md.use(container, "details")
38
  except TypeError:
39
+ md.use(lambda m: container(m, name="details"))
 
40
  return md
41
 
42
+ def _make_md_pythonmarkdown():
43
+ # Fallback: Python-Markdown + PyMdown
44
+ import markdown as md
45
+ exts = [
46
+ "extra", # tables + fenced code
47
+ "footnotes",
48
+ "admonition",
49
+ "toc",
50
+ "pymdownx.details",
51
+ "pymdownx.superfences",
52
+ "pymdownx.tasklist",
53
+ ]
54
+ ext_cfg = {"pymdownx.tasklist": {"custom_checkbox": True}, "toc": {"permalink": True}}
55
+ return ("python-markdown", exts, ext_cfg, md)
56
 
57
+ try:
58
+ _md_engine = ("markdown-it", _make_md_markdownit())
59
+ except Exception:
60
+ _md_engine = _make_md_pythonmarkdown()
61
 
62
+ def _obsidian_rewrites(text: str) -> str:
63
+ # Obsidian image/file embeds and wiki links
64
+ text = re.sub(r'!\[\[([^\]|]+)\]\]', r'![](static/\1)', text) # ![[file.png]]
 
65
  text = re.sub(r'\[\[([^\]|]+)\|([^\]]+)\]\]', r'[\2](\1)', text) # [[file|label]]
66
  text = re.sub(r'\[\[([^\]]+)\]\]', r'[\1](\1)', text) # [[file]]
67
+ return text
68
+
69
+ def md_to_html(text: str) -> str:
70
+ text = _obsidian_rewrites(text)
71
+ if _md_engine[0] == "markdown-it":
72
+ md = _md_engine[1]
73
+ return md.render(text)
74
+ else:
75
+ tag, exts, cfg, md = _md_engine
76
+ return md.markdown(text, extensions=exts, extension_configs=cfg, output_format="html5")
77
 
78
  def render_article(md_path: str, inserts: dict[str, callable]):
79
+ raw = ""
80
+ path = Path(md_path)
81
+ if path.exists():
82
+ raw = path.read_text(encoding="utf-8")
83
+ else:
84
+ raw = f"**Missing article**: `{md_path}` not found.\n\nCreate it in your Space repo."
85
+
86
+ # Split on {{TOKEN}} markers (e.g., {{ALLOC_PLOT}})
87
+ parts = re.split(r"\{\{([A-Z_]+)\}\}", raw)
88
  with gr.Column():
89
  for i, part in enumerate(parts):
90
  if i % 2 == 0:
91
  gr.HTML(md_to_html(part))
92
  else:
93
  build = inserts.get(part)
94
+ if build is None:
95
+ gr.HTML(f"<p><em>Unknown insert: {part}</em></p>")
96
+ else:
97
+ build()
98
+
99
+ # ---------------------------
100
+ # Terminal (safe, simplified)
101
+ # ---------------------------
102
+
103
+ def run_shell(cmd: str) -> str:
104
+ banned = ["|", ">", "<", "&&", "||", "`"]
105
+ if any(b in cmd for b in banned):
106
+ return "$ " + cmd + "\nBlocked characters. Use a single command."
107
+ try:
108
+ p = subprocess.run(cmd, shell=True, check=False, capture_output=True, text=True, timeout=30)
109
+ return f"$ {cmd}\n{p.stdout}{p.stderr}"
110
+ except Exception as e:
111
+ return f"$ {cmd}\n{e!r}"
112
 
 
113
  def build_terminal():
114
  with gr.Group():
115
  cmd = gr.Textbox(label="Command", value="python -c 'import torch; print(torch.__version__)'")
116
  run = gr.Button("Run")
117
+ out = gr.Textbox(label="Output", lines=12, interactive=False)
118
  run.click(run_shell, inputs=cmd, outputs=out)
119
 
120
+ # ---------------------------------------
121
+ # Attention Mask Visualizer (Transformers)
122
+ # ---------------------------------------
123
+
124
+ def _import_attention_visualizer():
125
+ try:
126
+ from transformers.utils.attention_visualizer import AttentionMaskVisualizer # type: ignore
127
+ except Exception as e:
128
+ raise RuntimeError(
129
+ "AttentionMaskVisualizer is unavailable in this Transformers version."
130
+ ) from e
131
+ return AttentionMaskVisualizer
132
+
133
+ @spaces.GPU(duration=120)
134
+ def render_attention_mask(model_id: str, prompt: str) -> str:
135
+ try:
136
+ AttentionMaskVisualizer = _import_attention_visualizer()
137
+ vis = AttentionMaskVisualizer(model_id)
138
+ out = vis(prompt) # returns embeddable HTML or object with _repr_html_
139
+ return str(out)
140
+ except Exception as e:
141
+ return f"<p>Attention visualizer error: {e}</p>"
142
+
143
  def build_attn_vis():
144
  with gr.Group():
145
  with gr.Row():
 
154
  html = gr.HTML()
155
  go.click(render_attention_mask, inputs=[model, prompt], outputs=html)
156
 
157
+ # -------------------------------------------------------
158
+ # Transformers caching allocator warmup (time vs MiB plot)
159
+ # -------------------------------------------------------
160
+
161
+ from transformers import AutoModelForCausalLM, modeling_utils as MU # noqa: E402
162
+
163
+ def _measure_load_timeline(model_id: str, disable_warmup: bool):
164
+ orig = getattr(MU, "caching_allocator_warmup", None)
165
+ if disable_warmup and orig is not None:
166
+ MU.caching_allocator_warmup = lambda *a, **k: None # type: ignore[attr-defined]
167
+ try:
168
+ device = "cuda" if torch.cuda.is_available() else "cpu"
169
+ tl = []
170
+
171
+ def sample(start_t, stop_evt):
172
+ while not stop_evt.is_set():
173
+ if device == "cuda":
174
+ torch.cuda.synchronize()
175
+ alloc = torch.cuda.memory_allocated()
176
+ else:
177
+ alloc = 0
178
+ tl.append({"t": time.perf_counter() - start_t, "MiB": alloc / (1024**2)})
179
+ time.sleep(0.05)
180
+
181
+ if device == "cuda":
182
+ torch.cuda.empty_cache()
183
+ torch.cuda.reset_peak_memory_stats()
184
+
185
+ start = time.perf_counter()
186
+ stop_evt = threading.Event()
187
+ th = threading.Thread(target=sample, args=(start, stop_evt), daemon=True)
188
+ th.start()
189
+
190
+ kwargs = {}
191
+ if device == "cuda":
192
+ kwargs.update(dict(torch_dtype=torch.float16, device_map="cuda:0", low_cpu_mem_usage=True))
193
+ model = AutoModelForCausalLM.from_pretrained(model_id, **kwargs)
194
+
195
+ stop_evt.set()
196
+ th.join()
197
+
198
+ if device == "cuda":
199
+ torch.cuda.synchronize()
200
+ tl.append({"t": time.perf_counter() - start, "MiB": torch.cuda.memory_allocated() / (1024**2)})
201
+
202
+ del model
203
+ if device == "cuda":
204
+ torch.cuda.empty_cache()
205
+ torch.cuda.ipc_collect()
206
+
207
+ return tl
208
+ finally:
209
+ if orig is not None:
210
+ MU.caching_allocator_warmup = orig # restore
211
+
212
+ @spaces.GPU(duration=240)
213
+ def profile_warmup(model_id: str):
214
+ on = _measure_load_timeline(model_id, disable_warmup=False)
215
+ off = _measure_load_timeline(model_id, disable_warmup=True)
216
+ rows = [{"t": r["t"], "MiB": r["MiB"], "mode": "warmup ON"} for r in on] + \
217
+ [{"t": r["t"], "MiB": r["MiB"], "mode": "warmup OFF"} for r in off]
218
+ return pd.DataFrame(rows)
219
+
220
  def build_alloc_plot():
221
  with gr.Group():
222
  model = gr.Dropdown(
 
228
  go = gr.Button("Run")
229
  plot = gr.LinePlot(
230
  x="t", y="MiB", color="mode", overlay_point=True,
231
+ title="from_pretrained() load: time vs CUDA memory_allocated()",
232
+ tooltip=["t", "MiB", "mode"], width=900, height=420
233
  )
234
  go.click(profile_warmup, inputs=[model], outputs=plot)
235
 
236
+ # ---------------------------
237
+ # Optional FastRTC preview
238
+ # ---------------------------
239
+
240
+ try:
241
+ from fastrtc import WebRTC, ReplyOnPause # type: ignore
242
+ def _echo_video(frame):
243
+ yield frame
244
+ HAS_FASTRTC = True
245
+ except Exception:
246
+ HAS_FASTRTC = False
247
+
248
+ def build_fastrtc():
249
+ if not HAS_FASTRTC:
250
+ gr.Markdown("Install `fastrtc` to enable this section.")
251
+ return
252
+ with gr.Group():
253
+ gr.Markdown("Camera loopback using FastRTC WebRTC. Extend with streaming handlers later.")
254
+ rtc = WebRTC(mode="send-receive", modality="video")
255
+ rtc.stream(ReplyOnPause(_echo_video), inputs=[rtc], outputs=[rtc], time_limit=60)
256
+
257
+ # ---------------------------
258
+ # Inserts registry
259
+ # ---------------------------
260
+
261
  INSERTS = {
262
  "TERMINAL": build_terminal,
263
  "ATTN_VIS": build_attn_vis,
264
  "ALLOC_PLOT": build_alloc_plot,
265
  }
266
+
267
+ # ---------------------------
268
+ # Layout / CSS / App
269
+ # ---------------------------
270
+
271
+ CSS = """
272
+ :root { --toc-w: 280px; }
273
+ #layout { display: grid; grid-template-columns: var(--toc-w) 1fr; gap: 1.25rem; }
274
+ #toc { position: sticky; top: 0.75rem; height: calc(100vh - 1.5rem); overflow: auto; padding-right: .5rem; }
275
+ #toc a { text-decoration: none; display: block; padding: .25rem 0; }
276
+ .section { scroll-margin-top: 72px; }
277
+ .gradio-container { max-width: 1200px !important; margin: 0 auto; }
278
+ hr { border: none; border-top: 1px solid var(--neutral-300); margin: 1.25rem 0; }
279
+ """
280
+
281
+ with gr.Blocks(css=CSS, fill_height=True, title="Interactive Blog — Transformers Feature Showcase") as demo:
282
+ gr.HTML("<h1>Transformers Feature Showcase</h1><p>Interactive, scrollable demo.</p>")
283
+ with gr.Row(elem_id="layout"):
284
+ with gr.Column(scale=0):
285
+ gr.HTML(
286
+ """
287
+ <nav id="toc">
288
+ <h3>Sections</h3>
289
+ <a href="#article">Article</a>
290
+ <a href="#rtc">FastRTC (preview)</a>
291
+ </nav>
292
+ """
293
+ )
294
+ with gr.Column():
295
+ gr.HTML('<h2 id="article" class="section">Article</h2>')
296
+ # Author in Obsidian. Put {{ALLOC_PLOT}}, {{ATTN_VIS}}, {{TERMINAL}} where you want widgets.
297
+ render_article("content/article.md", INSERTS)
298
+ gr.HTML("<hr/>")
299
+
300
+ gr.HTML('<h2 id="rtc" class="section">FastRTC (preview)</h2>')
301
+ build_fastrtc()
302
+
303
+ if __name__ == "__main__":
304
+ demo.launch()