# Standard library imports import re import subprocess import threading import time from pathlib import Path # Third-party imports import gradio as gr import numpy as np import pandas as pd import torch import spaces from transformers import AutoModelForCausalLM from transformers import modeling_utils as transformers_modeling # Optional imports for markdown processing try: from importlib import import_module from markdown_it import MarkdownIt HAS_MARKDOWN_IT = True except ImportError: HAS_MARKDOWN_IT = False try: import markdown HAS_PYTHON_MARKDOWN = True except ImportError: HAS_PYTHON_MARKDOWN = False try: from fastrtc import WebRTC, ReplyOnPause HAS_FASTRTC = True except ImportError: HAS_FASTRTC = False # --------------------------- # Markdown rendering (Option A) # --------------------------- def _create_markdownit_renderer(): """Create markdown-it renderer with plugins if available.""" if not HAS_MARKDOWN_IT: return None try: markdown_parser = MarkdownIt("gfm-like") # Version-agnostic plugin loading footnote_module = import_module("mdit_py_plugins.footnote") footnote_plugin = getattr(footnote_module, "footnote", None) or getattr(footnote_module, "footnote_plugin") markdown_parser.use(footnote_plugin) tasklist_module = import_module("mdit_py_plugins.tasklists") tasklist_plugin = getattr(tasklist_module, "tasklists", None) or getattr(tasklist_module, "tasklists_plugin") markdown_parser.use(tasklist_plugin) container_module = import_module("mdit_py_plugins.container") container_plugin = getattr(container_module, "container", None) or getattr(container_module, "container_plugin") try: markdown_parser.use(container_plugin, "details") except TypeError: markdown_parser.use(lambda m: container_plugin(m, name="details")) return markdown_parser except Exception: return None def _create_python_markdown_config(): """Create Python-Markdown configuration as fallback.""" if not HAS_PYTHON_MARKDOWN: return None extensions = [ "extra", # tables + fenced code "footnotes", "admonition", "toc", "pymdownx.details", "pymdownx.superfences", "pymdownx.tasklist", ] extension_config = { "pymdownx.tasklist": {"custom_checkbox": True}, "toc": {"permalink": True} } return ("python-markdown", extensions, extension_config, markdown) # Initialize markdown engine markdown_renderer = _create_markdownit_renderer() if markdown_renderer: markdown_engine = ("markdown-it", markdown_renderer) else: markdown_engine = _create_python_markdown_config() if not markdown_engine: raise ImportError("No markdown processor available") def _obsidian_rewrites(text: str) -> str: # 1) Obsidian image embeds: ![[img.png]] -> ![](file=content/img.png) text = re.sub(r'!\[\[([^\]|]+)\]\]', r'![](file=content/\1)', text) # 2) Standard Markdown images with relative paths: ![alt](path.png) -> ![alt](file=path.png) # Skip if already http(s) or file= text = re.sub( r'!\[([^\]]*)\]\(((?!https?://|file=)[^)]+)\)', r'![\1](file=\2)', text, ) # 3) Obsidian wiki links (non-image): [[file|label]] / [[file]] text = re.sub(r'\[\[([^\]|]+)\|([^\]]+)\]\]', r'[\2](\1)', text) text = re.sub(r'\[\[([^\]]+)\]\]', r'[\1](\1)', text) # 4) Encode spaces in file= URLs so the browser doesn’t choke def _enc(m): return "file=" + m.group(1).replace(" ", "%20") text = re.sub(r'file=([^)>\s]+)', _enc, text) return text def markdown_to_html(text: str) -> str: """Convert markdown text to HTML using the configured renderer.""" text = _obsidian_rewrites(text) if markdown_engine[0] == "markdown-it": renderer = markdown_engine[1] return renderer.render(text) else: engine_type, extensions, extension_config, markdown_module = markdown_engine return markdown_module.markdown( text, extensions=extensions, extension_configs=extension_config, output_format="html5" ) def render_article(article_path: str, component_inserts: dict[str, callable]): raw = Path(article_path).read_text(encoding="utf-8") if Path(article_path).exists() else f"**Missing article**: `{article_path}`." parts = re.split(r"\{\{([A-Z_]+)\}\}", raw) with gr.Column(): for i, part in enumerate(parts): if i % 2 == 0: gr.HTML(f'
{markdown_to_html(part)}
') else: (component_inserts.get(part) or (lambda: gr.HTML(f"

Unknown component: {part}

")))() # --------------------------- # Terminal (safe, simplified) # --------------------------- def run_shell(cmd: str) -> str: banned = ["|", ">", "<", "&&", "||", "`"] if any(b in cmd for b in banned): return "$ " + cmd + "\nBlocked characters. Use a single command." try: p = subprocess.run(cmd, shell=True, check=False, capture_output=True, text=True, timeout=30) return f"$ {cmd}\n{p.stdout}{p.stderr}" except Exception as e: return f"$ {cmd}\n{e!r}" def build_terminal(): with gr.Group(): cmd = gr.Textbox(label="Command", value="python -c 'import torch; print(torch.__version__)'") run = gr.Button("Run") out = gr.Textbox(label="Output", lines=12, interactive=False) run.click(run_shell, inputs=cmd, outputs=out) # --------------------------------------- # Attention Mask Visualizer (Transformers) # --------------------------------------- def _import_attention_visualizer(): try: from transformers.utils.attention_visualizer import AttentionMaskVisualizer # type: ignore except Exception as e: raise RuntimeError( "AttentionMaskVisualizer is unavailable in this Transformers version." ) from e return AttentionMaskVisualizer @spaces.GPU(duration=120) def render_attention_mask(model_id: str, prompt: str) -> str: try: AttentionMaskVisualizer = _import_attention_visualizer() vis = AttentionMaskVisualizer(model_id) out = vis(prompt) # returns embeddable HTML or object with _repr_html_ return str(out) except Exception as e: return f"

Attention visualizer error: {e}

" def build_attn_vis(): with gr.Group(): with gr.Row(): model = gr.Dropdown( label="Model", choices=["openai-community/gpt2", "google/gemma-2-2b"], value="openai-community/gpt2", allow_custom_value=True, ) prompt = gr.Textbox(label="Prompt", value="You are an assistant. Make sure you print me.") go = gr.Button("Render") html = gr.HTML() go.click(render_attention_mask, inputs=[model, prompt], outputs=html) # ------------------------------------------------------- # Transformers caching allocator warmup (time vs MiB plot) # ------------------------------------------------------- def _measure_load_timeline(model_id: str, disable_warmup: bool): """Measure memory usage during model loading with/without cache warmup.""" original_warmup_func = getattr(transformers_modeling, "caching_allocator_warmup", None) if disable_warmup and original_warmup_func is not None: transformers_modeling.caching_allocator_warmup = lambda *args, **kwargs: None try: device = "cuda" if torch.cuda.is_available() else "cpu" timeline_data = [] def sample_memory(start_time, stop_event): while not stop_event.is_set(): if device == "cuda": torch.cuda.synchronize() # Use max memory to capture peaks better allocated_memory = torch.cuda.max_memory_allocated() torch.cuda.reset_peak_memory_stats() else: allocated_memory = 0 timeline_data.append({ "t": time.perf_counter() - start_time, "MiB": allocated_memory / (1024**2) }) time.sleep(0.02) # Sample more frequently if device == "cuda": torch.cuda.empty_cache() torch.cuda.reset_peak_memory_stats() initial_memory = torch.cuda.memory_allocated() else: initial_memory = 0 start_time = time.perf_counter() stop_event = threading.Event() memory_thread = threading.Thread(target=sample_memory, args=(start_time, stop_event), daemon=True) memory_thread.start() # Load model with appropriate settings model_kwargs = {"low_cpu_mem_usage": True} if device == "cuda": model_kwargs.update({ "torch_dtype": torch.float16, "device_map": "cuda:0" }) model = AutoModelForCausalLM.from_pretrained(model_id, **model_kwargs) stop_event.set() memory_thread.join() # Final memory measurement if device == "cuda": torch.cuda.synchronize() final_memory = torch.cuda.memory_allocated() timeline_data.append({ "t": time.perf_counter() - start_time, "MiB": final_memory / (1024**2) }) # Clean up del model if device == "cuda": torch.cuda.empty_cache() torch.cuda.ipc_collect() return timeline_data finally: if original_warmup_func is not None: transformers_modeling.caching_allocator_warmup = original_warmup_func @spaces.GPU(duration=240) def profile_warmup_comparison(model_id: str): """Profile memory usage with and without cache warmup.""" if not torch.cuda.is_available(): # Create dummy data for CPU demo time_points = np.linspace(0, 5, 50) base_memory = np.cumsum(np.random.exponential(50, 50)) warmup_enabled_data = [ {"t": t, "MiB": mem, "mode": "πŸš€ Warmup ON (Optimized)"} for t, mem in zip(time_points, base_memory * 0.8) ] warmup_disabled_data = [ {"t": t, "MiB": mem, "mode": "πŸ“ˆ Warmup OFF (Standard)"} for t, mem in zip(time_points, base_memory) ] return pd.DataFrame(warmup_enabled_data + warmup_disabled_data) try: warmup_enabled_timeline = _measure_load_timeline(model_id, disable_warmup=False) warmup_disabled_timeline = _measure_load_timeline(model_id, disable_warmup=True) # Create DataFrame with better labeling all_data = [] all_data.extend([ {"t": entry["t"], "MiB": entry["MiB"], "mode": "πŸš€ Warmup ON (Optimized)"} for entry in warmup_enabled_timeline ]) all_data.extend([ {"t": entry["t"], "MiB": entry["MiB"], "mode": "πŸ“ˆ Warmup OFF (Standard)"} for entry in warmup_disabled_timeline ]) result_dataframe = pd.DataFrame(all_data) # Calculate and log memory savings if warmup_enabled_timeline and warmup_disabled_timeline: peak_with_warmup = max(entry["MiB"] for entry in warmup_enabled_timeline) peak_without_warmup = max(entry["MiB"] for entry in warmup_disabled_timeline) if peak_without_warmup > 0: savings_percent = ((peak_without_warmup - peak_with_warmup) / peak_without_warmup * 100) print(f"Memory savings: {savings_percent:.1f}% (Peak: {peak_with_warmup:.0f} MiB vs {peak_without_warmup:.0f} MiB)") return result_dataframe except Exception as error: print(f"Error profiling {model_id}: {error}") return pd.DataFrame(columns=["t", "MiB", "mode"]) def build_alloc_plot(): with gr.Group(): gr.Markdown("### πŸš€ Cache Pre-allocator Performance Demo") gr.Markdown("Compare model loading with and without transformers' caching allocator warmup. This demonstrates the memory efficiency improvements.") with gr.Row(): model = gr.Dropdown( label="Model to Profile", choices=[ "openai-community/gpt2", "google/gemma-2-2b", "microsoft/DialoGPT-small", "facebook/opt-125m" ], value="openai-community/gpt2", allow_custom_value=True, info="Select a model or enter a custom HuggingFace model ID" ) go = gr.Button("πŸ”₯ Profile Memory", variant="primary") plot = gr.LinePlot( x="t", y="MiB", color="mode", overlay_point=True, title="Memory Allocation Timeline: Warmup ON vs OFF", tooltip=["t", "MiB", "mode"], width=900, height=450, x_title="Time (seconds)", y_title="Memory (MiB)" ) gr.Markdown("**Note**: This demo requires GPU access. The warmup feature reduces peak memory usage during model loading.") go.click(profile_warmup_comparison, inputs=[model], outputs=plot) # --------------------------- # Optional FastRTC preview # --------------------------- try: from fastrtc import WebRTC, ReplyOnPause # type: ignore def _echo_video(frame): yield frame HAS_FASTRTC = True except Exception: HAS_FASTRTC = False def build_fastrtc(): if not HAS_FASTRTC: gr.Markdown("Install `fastrtc` to enable this section.") return def echo_video_frame(frame): yield frame with gr.Group(): gr.Markdown("Camera loopback using FastRTC WebRTC. Extend with streaming handlers later.") webrtc_component = WebRTC(mode="send-receive", modality="video") webrtc_component.stream(ReplyOnPause(echo_video_frame), inputs=[webrtc_component], outputs=[webrtc_component], time_limit=60) # --------------------------- # Image display functions # --------------------------- def build_image(filename): def _build(): # Try both content/ and static/ directories for directory in ['content', 'static']: filepath = Path(directory) / filename if filepath.exists(): gr.Image(value=str(filepath), show_label=False, interactive=False, show_download_button=False) return gr.Markdown(f"*Image not found: {filename}*") return _build def build_d3_graph(): with gr.Group(): gr.Markdown("### πŸ”— Interactive Model Dependency Graph") fp = Path("static/d3_dependency_graph.html") if fp.exists(): gr.HTML( """ """ ) else: gr.Markdown("⚠️ **D3 dependency graph not found.** Put it at `static/d3_dependency_graph.html`.") # --------------------------- # Inserts registry # --------------------------- INSERTS = { "TERMINAL": build_terminal, "ATTN_VIS": build_attn_vis, "ALLOC_PLOT": build_alloc_plot, "D3_GRAPH": build_d3_graph, # Image inserts "GRAPH_MODULAR_RELATED_MODELS": build_image("graph_modular_related_models.png"), "JACCARD_SIMILARITY_PLOT": build_image("Jaccard_similarity_plot.png"), "BLOATEDNESS_VISUALIZER": build_image("Bloatedness_visualizer.png"), "MODULAR_CANDIDATES": build_image("modular_candidates.png"), "POPULAR_MODELS_BARPLOT": build_image("popular_models_barplot.png"), "MODEL_DEBUGGER": build_image("model_debugger.png"), } # --------------------------- # Layout / CSS / App # --------------------------- HLJS = """ """ CSS = """ /* Force light palette + high contrast */ :root, .gradio-container { color-scheme: light !important; --body-background-fill: #ffffff !important; --body-text-color: #0b0f19 !important; /* main text */ --body-text-color-subdued: #0b0f19 !important; /* kill the grey tint */ --heading-text-color: #0b0f19 !important; --link-text-color: #1d4ed8 !important; --border-color: #e5e7eb !important; } /* Font (slightly heavier by default to avoid β€œspindly” Inter on Linux) */ @import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&display=swap'); @font-face { font-family: 'Inter var'; font-style: normal; font-weight: 100 900; font-display: swap; src: url('https://rsms.me/inter/font-files/Inter.var.woff2?v=3.19') format('woff2'); } html, body, .gradio-container { background: #fff !important; } .gradio-container { font-family: 'Inter','Inter var',system-ui,-apple-system,Segoe UI,Roboto,sans-serif !important; } /* Layout */ #layout { display: grid; grid-template-columns: 280px 1fr; gap: 2rem; } #toc { position: sticky; top: 1rem; height: calc(100vh - 2rem); overflow: auto; padding-right: 1rem; } #toc a { display: block; padding: .5rem 0; color: #334155; font-size: .9rem; text-decoration: none; font-weight: 500; } #toc a:hover { color: var(--link-text-color); } /* HARD override: make sure no parent opacity dulls the article */ .gradio-container .gr-html, .gradio-container .gr-html * { opacity: 1 !important; } /* scope body text color to prose only */ .article { color: var(--body-text-color); } /* Scope article typography */ .article { max-width: 72ch; margin: 0 auto; } .article p, .article li { font-size: 1.04rem; line-height: 1.85rem; font-weight: 500; } .article h1, .article h2, .article h3, .article h4 { color: var(--heading-text-color) !important; } .article h1 { font-weight: 700; font-size: 2.25rem; line-height: 2.6rem; margin: 2rem 0 1.25rem; } .article h2 { font-weight: 650; font-size: 1.85rem; line-height: 2.25rem; margin: 2rem 0 1rem; } .article h3 { font-weight: 600; font-size: 1.5rem; line-height: 2rem; margin: 1.5rem 0 .75rem; } .article a { color: var(--link-text-color) !important; text-decoration: underline; } .article a:hover { text-decoration: none; } /* Code blocks (keep container styling, let hljs theme handle token colors) */ .article pre { background: #f8fafc !important; border: 1px solid #e2e8f0 !important; border-radius: 8px !important; padding: 1.25rem !important; margin: 1.5rem 0 !important; overflow-x: auto !important; font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", monospace !important; font-size: .92rem !important; line-height: 1.6 !important; } .article pre code { background: transparent !important; padding: 0 !important; } /* Let the theme show through */ .hljs { background: transparent !important; } /* Tenets highlight: any list item that contains an anchor id gets a card look */ .article ol > li.tenet { border-left: 4px solid #1d4ed8; background: #f8fafc; padding: .75rem 1rem; margin: .5rem 0; border-radius: 8px; } .article ol > li.tenet::marker { color: #1d4ed8; font-weight: 700; } .article ol > li.tenet code { background: #e0e7ff !important; } /* Blockquotes, images, rules */ .article blockquote { border-left: 4px solid var(--link-text-color); padding-left: 1rem; margin: 1.25rem 0; color: #334155 !important; font-style: italic; } .article img { display: block; max-width: 100%; height: auto; margin: 1.25rem auto; border-radius: 8px; box-shadow: 0 6px 20px rgba(0,0,0,.08); } hr { border: 0; border-top: 1px solid var(--border-color); margin: 2rem 0; } .section { scroll-margin-top: 80px; } /* Keep widgets full width */ .gr-form, .gr-panel, .gr-block { max-width: none; } /* Terminal styling - match light mode */ .gr-textbox textarea { background: #f8fafc !important; color: #1f2937 !important; border: 1px solid var(--border-color) !important; border-radius: 8px !important; font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", monospace !important; font-size: 0.9rem !important; line-height: 1.5 !important; } .gr-textbox textarea:focus { border-color: var(--link-text-color) !important; box-shadow: 0 0 0 2px rgba(37, 99, 235, 0.1) !important; } /* Terminal output specifically */ .gr-textbox textarea[readonly] { background: #111827 !important; color: #f9fafb !important; border: 1px solid #374151 !important; font-weight: 500 !important; } /* Terminal input */ .gr-textbox:not(textarea[readonly]) textarea { background: #ffffff !important; color: #1f2937 !important; border: 1px solid var(--border-color) !important; } /* Button styling */ .gr-button { background: var(--link-text-color) !important; color: white !important; border: none !important; border-radius: 6px !important; font-weight: 600 !important; padding: 0.5rem 1rem !important; } .gr-button:hover { background: #1d4ed8 !important; } /* Dropdown styling - fix contrast and visibility */ .gr-dropdown { background: #ffffff !important; border: 1px solid var(--border-color) !important; border-radius: 8px !important; } .gr-dropdown .gr-box { background: #ffffff !important; border: 1px solid var(--border-color) !important; } .gr-dropdown input { background: #ffffff !important; color: #1f2937 !important; border: none !important; font-weight: 500 !important; } .gr-dropdown .options { background: #ffffff !important; border: 1px solid var(--border-color) !important; border-radius: 8px !important; box-shadow: 0 4px 12px rgba(0, 0, 0, 0.1) !important; } .gr-dropdown .option { background: #ffffff !important; color: #1f2937 !important; padding: 0.75rem !important; font-weight: 500 !important; } .gr-dropdown .option:hover { background: #f8fafc !important; color: #1f2937 !important; } .gr-dropdown .option.selected { background: var(--link-text-color) !important; color: white !important; } /* Fix label styling */ .gr-dropdown label { color: #374151 !important; font-weight: 600 !important; margin-bottom: 0.5rem !important; } /* Fix contrast for all interactive components */ .gr-form, .gr-panel, .gr-block { background: #ffffff !important; border: 1px solid var(--border-color) !important; border-radius: 8px !important; } /* Fix text inputs */ .gr-textbox input { background: #ffffff !important; color: #1f2937 !important; border: 1px solid var(--border-color) !important; font-weight: 500 !important; } /* Fix all labels - but not in article */ .gr-form:not(.article) label, .gr-panel:not(.article) label, .gr-block:not(.article) label { color: #374151 !important; font-weight: 600 !important; } /* Fix info text - but not in article */ .gr-form:not(.article) .gr-info, .gr-panel:not(.article) .gr-info { color: #6b7280 !important; font-weight: 500 !important; } /* Fix plot styling */ .gr-plot { border: 1px solid var(--border-color) !important; border-radius: 8px !important; background: #ffffff !important; } /* Fix markdown in components - but protect article content */ .gr-markdown:not(.article):not(.article *) { color: #1f2937 !important; } .gr-markdown:not(.article):not(.article *) h1, .gr-markdown:not(.article):not(.article *) h2, .gr-markdown:not(.article):not(.article *) h3, .gr-markdown:not(.article):not(.article *) h4 { color: #111827 !important; font-weight: 600 !important; } """ with gr.Blocks(css=CSS, fill_height=True, title="Interactive Blog β€” Transformers Feature Showcase") as demo: gr.HTML(HLJS) gr.HTML("

Transformers Feature Showcase

Interactive, scrollable demo.

") with gr.Row(elem_id="layout"): with gr.Column(scale=0): gr.HTML( """ """ ) with gr.Column(): gr.HTML('

Article

') # Author in Obsidian. Put {{ALLOC_PLOT}}, {{ATTN_VIS}}, {{TERMINAL}} where you want widgets. render_article("content/article.md", INSERTS) gr.HTML("
") gr.HTML('

FastRTC (preview)

') build_fastrtc() if __name__ == "__main__": demo.launch()