Spaces:

transformers-community
/

Transformers-tenets

Running

App Files Files Community

Molbap HF Staff commited on Aug 19

Commit

4e2e430

2 Parent(s): b20dcba f0b010e

Merge branch 'main' of https://huggingface.co/spaces/Molbap/Transformers-playthrough

Browse files

Files changed (1) hide show

app.py +68 -183

app.py CHANGED Viewed

@@ -1,184 +1,69 @@
-import os, sys, time, threading, subprocess, json, textwrap, tempfile
-import gradio as gr
-import pandas as pd
-import spaces
-import torch
-# --- Minimal safe terminal ---
-def run_shell(cmd: str) -> str:
-    banned = ["|", ">", "<", "&&", "||", "`"]
-    if any(b in cmd for b in banned):
-        return "$ " + cmd + "\nBlocked characters. Use a single command."
-    try:
-        p = subprocess.run(cmd, shell=True, check=False, capture_output=True, text=True, timeout=30)
-        return f"$ {cmd}\n{p.stdout}{p.stderr}"
-    except Exception as e:
-        return f"$ {cmd}\n{e!r}"
-# --- Attention mask visualizer (Transformers) ---
-def _import_attention_visualizer():
-    # Available in recent transformers (utils.attention_visualizer)
-    from transformers.utils.attention_visualizer import AttentionMaskVisualizer  # noqa: F401
-    return AttentionMaskVisualizer
-@spaces.GPU(duration=120)
-def render_attention_mask(model_id: str, prompt: str) -> str:
-    AttentionMaskVisualizer = _import_attention_visualizer()
-    vis = AttentionMaskVisualizer(model_id)
-    out = vis(prompt)          # returns embeddable HTML or an object with _repr_html_
-    return str(out)
-# --- Transformers caching allocator warmup: time vs memory_allocated() ---
-from transformers import AutoModelForCausalLM, modeling_utils as MU  # noqa: E402
-def _measure_load_timeline(model_id: str, disable_warmup: bool):
-    orig = MU.caching_allocator_warmup
-    if disable_warmup:
-        MU.caching_allocator_warmup = lambda *a, **k: None
-    try:
-        device = "cuda" if torch.cuda.is_available() else "cpu"
-        tl = []
-        def sample(start_t, stop_evt):
-            while not stop_evt.is_set():
-                if device == "cuda":
-                    torch.cuda.synchronize()
-                    alloc = torch.cuda.memory_allocated()
-                else:
-                    alloc = 0
-                tl.append({"t": time.perf_counter() - start_t, "MiB": alloc / (1024**2)})
-                time.sleep(0.05)
-        if device == "cuda":
-            torch.cuda.empty_cache()
-            torch.cuda.reset_peak_memory_stats()
-        start = time.perf_counter()
-        stop_evt = threading.Event()
-        th = threading.Thread(target=sample, args=(start, stop_evt), daemon=True)
-        th.start()
-        kwargs = {}
-        if device == "cuda":
-            kwargs.update(dict(torch_dtype=torch.float16, device_map="cuda:0", low_cpu_mem_usage=True))
-        model = AutoModelForCausalLM.from_pretrained(model_id, **kwargs)
-        stop_evt.set()
-        th.join()
-        if device == "cuda":
-            torch.cuda.synchronize()
-            tl.append({"t": time.perf_counter() - start, "MiB": torch.cuda.memory_allocated() / (1024**2)})
-        del model
-        if device == "cuda":
-            torch.cuda.empty_cache()
-            torch.cuda.ipc_collect()
-        return tl
-    finally:
-        MU.caching_allocator_warmup = orig
-@spaces.GPU(duration=240)
-def profile_warmup(model_id: str):
-    on  = _measure_load_timeline(model_id, disable_warmup=False)
-    off = _measure_load_timeline(model_id, disable_warmup=True)
-    rows = [{"t": r["t"], "MiB": r["MiB"], "mode": "warmup ON"} for r in on] + \
-           [{"t": r["t"], "MiB": r["MiB"], "mode": "warmup OFF"} for r in off]
-    return pd.DataFrame(rows)
-# --- (Optional) FastRTC demo: simple loopback for structure; expand later ---
-# Requires camera permissions in the browser.
-try:
-    from fastrtc import WebRTC, ReplyOnPause  # type: ignore
-    def _echo_video(frame):
-        yield frame
-    HAS_FASTRTC = True
-except Exception:
-    HAS_FASTRTC = False
-# --- CSS for anchored, scrollable “playbook” layout ---
-CSS = """
-:root { --toc-w: 280px; }
-#layout { display: grid; grid-template-columns: var(--toc-w) 1fr; gap: 1.25rem; }
-#toc { position: sticky; top: 0.75rem; height: calc(100vh - 1.5rem); overflow: auto; padding-right: .5rem; }
-#toc a { text-decoration: none; display: block; padding: .25rem 0; }
-.section { scroll-margin-top: 72px; }
-.gradio-container { max-width: 1200px !important; margin: 0 auto; }
-hr { border: none; border-top: 1px solid var(--neutral-300); margin: 1.25rem 0; }
-"""
-with gr.Blocks(css=CSS, fill_height=True, title="Transformers Feature Showcase (ZeroGPU)") as demo:
-    gr.HTML("<h1>Transformers Feature Showcase</h1><p>Interactive, scrollable demo.</p>")
-    with gr.Row(elem_id="layout"):
-        # TOC
-        with gr.Column(scale=0):
-            gr.HTML(
-                """
-                <nav id="toc">
-                  <h3>Sections</h3>
-                  <a href="#terminal">Terminal</a>
-                  <a href="#attention">Attention mask visualizer</a>
-                  <a href="#allocator">Allocator warmup timeline</a>
-                  <a href="#rtc">FastRTC (preview)</a>
-                </nav>
-                """
-            )
-        # Content
-        with gr.Column():
-            # Terminal
-            gr.HTML('<h2 id="terminal" class="section">Terminal</h2>')
-            with gr.Group():
-                cmd = gr.Textbox(label="Command", value="python -c 'import torch; print(torch.__version__)'")
-                run_btn = gr.Button("Run")
-                out = gr.Textbox(label="Output", lines=12)
-                run_btn.click(run_shell, inputs=cmd, outputs=out)
-            gr.HTML("<hr/>")
-            # Attention visualizer
-            gr.HTML('<h2 id="attention" class="section">Attention mask visualizer</h2>')
-            with gr.Group():
-                with gr.Row():
-                    model_vis = gr.Dropdown(
-                        label="Model",
-                        choices=["openai-community/gpt2", "google/gemma-2-2b"],
-                        value="openai-community/gpt2",
-                        allow_custom_value=True,
-                    )
-                    prompt_vis = gr.Textbox(label="Prompt", value="You are an assistant. Make sure you print me.")
-                    go_vis = gr.Button("Render")
-                html_vis = gr.HTML()
-                go_vis.click(render_attention_mask, inputs=[model_vis, prompt_vis], outputs=html_vis)
-            gr.HTML("<hr/>")
-            # Allocator warmup
-            gr.HTML('<h2 id="allocator" class="section">Transformers allocator warmup: time vs allocated MiB</h2>')
-            with gr.Group():
-                model_mem = gr.Dropdown(
-                    label="Model",
-                    choices=["openai-community/gpt2", "google/gemma-2-2b"],
-                    value="openai-community/gpt2",
-                    allow_custom_value=True,
-                )
-                go_mem = gr.Button("Run")
-                plot = gr.LinePlot(
-                    x="t", y="MiB", color="mode", overlay_point=True,
-                    title="from_pretrained() load: time vs CUDA memory_allocated()",
-                    tooltip=["t", "MiB", "mode"], width=900, height=420
-                )
-                go_mem.click(profile_warmup, inputs=[model_mem], outputs=plot)
-            gr.HTML("<hr/>")
-            # FastRTC preview
-            gr.HTML('<h2 id="rtc" class="section">FastRTC (preview)</h2>')
-            if HAS_FASTRTC:
-                with gr.Group():
-                    gr.Markdown("Camera loopback using FastRTC WebRTC. Extend with streaming handlers later.")
-                    rtc = WebRTC(mode="send-receive", modality="video")
-                    rtc.stream(ReplyOnPause(_echo_video), inputs=[rtc], outputs=[rtc], time_limit=60)
             else:
-                gr.Markdown("Install `fastrtc` to enable this section.")
-if __name__ == "__main__":
-    demo.launch()

+import re
+from pathlib import Path
+from markdown_it import MarkdownIt
+from mdit_py_plugins.footnote import footnote
+from mdit_py_plugins.tasklists import tasklists
+from mdit_py_plugins.container import container
+_md = MarkdownIt("gfm-like").use(footnote).use(tasklists).use(container, "details")
+def md_to_html(text: str) -> str:
+    # Convert common Obsidian patterns to standard Markdown
+    text = re.sub(r'!\[\[([^\]|]+)\]\]', r'![](static/\1)', text)     # image embeds ![[file.png]]
+    text = re.sub(r'\[\[([^\]|]+)\|([^\]]+)\]\]', r'[\2](\1)', text)   # [[file|label]]
+    text = re.sub(r'\[\[([^\]]+)\]\]', r'[\1](\1)', text)              # [[file]]
+    return _md.render(text)
+def render_article(md_path: str, inserts: dict[str, callable]):
+    raw = Path(md_path).read_text(encoding="utf-8")
+    parts = re.split(r"\{\{([A-Z_]+)\}\}", raw)  # split on {{TOKEN}}
+    with gr.Column():
+        for i, part in enumerate(parts):
+            if i % 2 == 0:
+                gr.HTML(md_to_html(part))
             else:
+                build = inserts.get(part)
+                (build or (lambda: gr.HTML(f"<p><em>Unknown insert: {part}</em></p>")))()
+# --- Builders that drop your existing widgets in-place ---
+def build_terminal():
+    with gr.Group():
+        cmd = gr.Textbox(label="Command", value="python -c 'import torch; print(torch.__version__)'")
+        run = gr.Button("Run")
+        out = gr.Textbox(label="Output", lines=12)
+        run.click(run_shell, inputs=cmd, outputs=out)
+def build_attn_vis():
+    with gr.Group():
+        with gr.Row():
+            model = gr.Dropdown(
+                label="Model",
+                choices=["openai-community/gpt2", "google/gemma-2-2b"],
+                value="openai-community/gpt2",
+                allow_custom_value=True,
+            )
+            prompt = gr.Textbox(label="Prompt", value="You are an assistant. Make sure you print me.")
+            go = gr.Button("Render")
+        html = gr.HTML()
+        go.click(render_attention_mask, inputs=[model, prompt], outputs=html)
+def build_alloc_plot():
+    with gr.Group():
+        model = gr.Dropdown(
+            label="Model",
+            choices=["openai-community/gpt2", "google/gemma-2-2b"],
+            value="openai-community/gpt2",
+            allow_custom_value=True,
+        )
+        go = gr.Button("Run")
+        plot = gr.LinePlot(
+            x="t", y="MiB", color="mode", overlay_point=True,
+            title="from_pretrained(): time vs CUDA memory_allocated()", width=900, height=420
+        )
+        go.click(profile_warmup, inputs=[model], outputs=plot)
+INSERTS = {
+    "TERMINAL": build_terminal,
+    "ATTN_VIS": build_attn_vis,
+    "ALLOC_PLOT": build_alloc_plot,
+}