Molbap's picture
Molbap HF Staff
Update app.py
514d471 verified
raw
history blame
25.7 kB
# Standard library imports
import re
import subprocess
import threading
import time
from pathlib import Path
# Third-party imports
import gradio as gr
import numpy as np
import pandas as pd
import torch
import spaces
from transformers import AutoModelForCausalLM
from transformers import modeling_utils as transformers_modeling
# Optional imports for markdown processing
try:
from importlib import import_module
from markdown_it import MarkdownIt
HAS_MARKDOWN_IT = True
except ImportError:
HAS_MARKDOWN_IT = False
try:
import markdown
HAS_PYTHON_MARKDOWN = True
except ImportError:
HAS_PYTHON_MARKDOWN = False
try:
from fastrtc import WebRTC, ReplyOnPause
HAS_FASTRTC = True
except ImportError:
HAS_FASTRTC = False
# ---------------------------
# Markdown rendering (Option A)
# ---------------------------
def _create_markdownit_renderer():
"""Create markdown-it renderer with plugins if available."""
if not HAS_MARKDOWN_IT:
return None
try:
markdown_parser = MarkdownIt("gfm-like")
# Version-agnostic plugin loading
footnote_module = import_module("mdit_py_plugins.footnote")
footnote_plugin = getattr(footnote_module, "footnote", None) or getattr(footnote_module, "footnote_plugin")
markdown_parser.use(footnote_plugin)
tasklist_module = import_module("mdit_py_plugins.tasklists")
tasklist_plugin = getattr(tasklist_module, "tasklists", None) or getattr(tasklist_module, "tasklists_plugin")
markdown_parser.use(tasklist_plugin)
container_module = import_module("mdit_py_plugins.container")
container_plugin = getattr(container_module, "container", None) or getattr(container_module, "container_plugin")
try:
markdown_parser.use(container_plugin, "details")
except TypeError:
markdown_parser.use(lambda m: container_plugin(m, name="details"))
return markdown_parser
except Exception:
return None
def _create_python_markdown_config():
"""Create Python-Markdown configuration as fallback."""
if not HAS_PYTHON_MARKDOWN:
return None
extensions = [
"extra", # tables + fenced code
"footnotes",
"admonition",
"toc",
"pymdownx.details",
"pymdownx.superfences",
"pymdownx.tasklist",
]
extension_config = {
"pymdownx.tasklist": {"custom_checkbox": True},
"toc": {"permalink": True}
}
return ("python-markdown", extensions, extension_config, markdown)
# Initialize markdown engine
markdown_renderer = _create_markdownit_renderer()
if markdown_renderer:
markdown_engine = ("markdown-it", markdown_renderer)
else:
markdown_engine = _create_python_markdown_config()
if not markdown_engine:
raise ImportError("No markdown processor available")
def _obsidian_rewrites(text: str) -> str:
# 1) Obsidian image embeds: ![[img.png]] -> ![](file=content/img.png)
text = re.sub(r'!\[\[([^\]|]+)\]\]', r'![](file=content/\1)', text)
# 2) Standard Markdown images with relative paths: ![alt](path.png) -> ![alt](file=path.png)
# Skip if already http(s) or file=
text = re.sub(
r'!\[([^\]]*)\]\(((?!https?://|file=)[^)]+)\)',
r'![\1](file=\2)',
text,
)
# 3) Obsidian wiki links (non-image): [[file|label]] / [[file]]
text = re.sub(r'\[\[([^\]|]+)\|([^\]]+)\]\]', r'[\2](\1)', text)
text = re.sub(r'\[\[([^\]]+)\]\]', r'[\1](\1)', text)
# 4) Encode spaces in file= URLs so the browser doesn’t choke
def _enc(m):
return "file=" + m.group(1).replace(" ", "%20")
text = re.sub(r'file=([^)>\s]+)', _enc, text)
return text
def markdown_to_html(text: str) -> str:
"""Convert markdown text to HTML using the configured renderer."""
text = _obsidian_rewrites(text)
if markdown_engine[0] == "markdown-it":
renderer = markdown_engine[1]
return renderer.render(text)
else:
engine_type, extensions, extension_config, markdown_module = markdown_engine
return markdown_module.markdown(
text,
extensions=extensions,
extension_configs=extension_config,
output_format="html5"
)
def render_article(article_path: str, component_inserts: dict[str, callable]):
raw = Path(article_path).read_text(encoding="utf-8") if Path(article_path).exists() else f"**Missing article**: `{article_path}`."
parts = re.split(r"\{\{([A-Z_]+)\}\}", raw)
with gr.Column():
for i, part in enumerate(parts):
if i % 2 == 0:
gr.HTML(f'<div class="article">{markdown_to_html(part)}</div>')
else:
(component_inserts.get(part) or (lambda: gr.HTML(f"<p><em>Unknown component: {part}</em></p>")))()
# ---------------------------
# Terminal (safe, simplified)
# ---------------------------
def run_shell(cmd: str) -> str:
banned = ["|", ">", "<", "&&", "||", "`"]
if any(b in cmd for b in banned):
return "$ " + cmd + "\nBlocked characters. Use a single command."
try:
p = subprocess.run(cmd, shell=True, check=False, capture_output=True, text=True, timeout=30)
return f"$ {cmd}\n{p.stdout}{p.stderr}"
except Exception as e:
return f"$ {cmd}\n{e!r}"
def build_terminal():
with gr.Group():
cmd = gr.Textbox(label="Command", value="python -c 'import torch; print(torch.__version__)'")
run = gr.Button("Run")
out = gr.Textbox(label="Output", lines=12, interactive=False)
run.click(run_shell, inputs=cmd, outputs=out)
# ---------------------------------------
# Attention Mask Visualizer (Transformers)
# ---------------------------------------
def _import_attention_visualizer():
try:
from transformers.utils.attention_visualizer import AttentionMaskVisualizer # type: ignore
except Exception as e:
raise RuntimeError(
"AttentionMaskVisualizer is unavailable in this Transformers version."
) from e
return AttentionMaskVisualizer
@spaces.GPU(duration=120)
def render_attention_mask(model_id: str, prompt: str) -> str:
try:
AttentionMaskVisualizer = _import_attention_visualizer()
vis = AttentionMaskVisualizer(model_id)
out = vis(prompt) # returns embeddable HTML or object with _repr_html_
return str(out)
except Exception as e:
return f"<p>Attention visualizer error: {e}</p>"
def build_attn_vis():
with gr.Group():
with gr.Row():
model = gr.Dropdown(
label="Model",
choices=["openai-community/gpt2", "google/gemma-2-2b"],
value="openai-community/gpt2",
allow_custom_value=True,
)
prompt = gr.Textbox(label="Prompt", value="You are an assistant. Make sure you print me.")
go = gr.Button("Render")
html = gr.HTML()
go.click(render_attention_mask, inputs=[model, prompt], outputs=html)
# -------------------------------------------------------
# Transformers caching allocator warmup (time vs MiB plot)
# -------------------------------------------------------
def _measure_load_timeline(model_id: str, disable_warmup: bool):
"""Measure memory usage during model loading with/without cache warmup."""
original_warmup_func = getattr(transformers_modeling, "caching_allocator_warmup", None)
if disable_warmup and original_warmup_func is not None:
transformers_modeling.caching_allocator_warmup = lambda *args, **kwargs: None
try:
device = "cuda" if torch.cuda.is_available() else "cpu"
timeline_data = []
def sample_memory(start_time, stop_event):
while not stop_event.is_set():
if device == "cuda":
torch.cuda.synchronize()
# Use max memory to capture peaks better
allocated_memory = torch.cuda.max_memory_allocated()
torch.cuda.reset_peak_memory_stats()
else:
allocated_memory = 0
timeline_data.append({
"t": time.perf_counter() - start_time,
"MiB": allocated_memory / (1024**2)
})
time.sleep(0.02) # Sample more frequently
if device == "cuda":
torch.cuda.empty_cache()
torch.cuda.reset_peak_memory_stats()
initial_memory = torch.cuda.memory_allocated()
else:
initial_memory = 0
start_time = time.perf_counter()
stop_event = threading.Event()
memory_thread = threading.Thread(target=sample_memory, args=(start_time, stop_event), daemon=True)
memory_thread.start()
# Load model with appropriate settings
model_kwargs = {"low_cpu_mem_usage": True}
if device == "cuda":
model_kwargs.update({
"torch_dtype": torch.float16,
"device_map": "cuda:0"
})
model = AutoModelForCausalLM.from_pretrained(model_id, **model_kwargs)
stop_event.set()
memory_thread.join()
# Final memory measurement
if device == "cuda":
torch.cuda.synchronize()
final_memory = torch.cuda.memory_allocated()
timeline_data.append({
"t": time.perf_counter() - start_time,
"MiB": final_memory / (1024**2)
})
# Clean up
del model
if device == "cuda":
torch.cuda.empty_cache()
torch.cuda.ipc_collect()
return timeline_data
finally:
if original_warmup_func is not None:
transformers_modeling.caching_allocator_warmup = original_warmup_func
@spaces.GPU(duration=240)
def profile_warmup_comparison(model_id: str):
"""Profile memory usage with and without cache warmup."""
if not torch.cuda.is_available():
# Create dummy data for CPU demo
time_points = np.linspace(0, 5, 50)
base_memory = np.cumsum(np.random.exponential(50, 50))
warmup_enabled_data = [
{"t": t, "MiB": mem, "mode": "🚀 Warmup ON (Optimized)"}
for t, mem in zip(time_points, base_memory * 0.8)
]
warmup_disabled_data = [
{"t": t, "MiB": mem, "mode": "📈 Warmup OFF (Standard)"}
for t, mem in zip(time_points, base_memory)
]
return pd.DataFrame(warmup_enabled_data + warmup_disabled_data)
try:
warmup_enabled_timeline = _measure_load_timeline(model_id, disable_warmup=False)
warmup_disabled_timeline = _measure_load_timeline(model_id, disable_warmup=True)
# Create DataFrame with better labeling
all_data = []
all_data.extend([
{"t": entry["t"], "MiB": entry["MiB"], "mode": "🚀 Warmup ON (Optimized)"}
for entry in warmup_enabled_timeline
])
all_data.extend([
{"t": entry["t"], "MiB": entry["MiB"], "mode": "📈 Warmup OFF (Standard)"}
for entry in warmup_disabled_timeline
])
result_dataframe = pd.DataFrame(all_data)
# Calculate and log memory savings
if warmup_enabled_timeline and warmup_disabled_timeline:
peak_with_warmup = max(entry["MiB"] for entry in warmup_enabled_timeline)
peak_without_warmup = max(entry["MiB"] for entry in warmup_disabled_timeline)
if peak_without_warmup > 0:
savings_percent = ((peak_without_warmup - peak_with_warmup) / peak_without_warmup * 100)
print(f"Memory savings: {savings_percent:.1f}% (Peak: {peak_with_warmup:.0f} MiB vs {peak_without_warmup:.0f} MiB)")
return result_dataframe
except Exception as error:
print(f"Error profiling {model_id}: {error}")
return pd.DataFrame(columns=["t", "MiB", "mode"])
def build_alloc_plot():
with gr.Group():
gr.Markdown("### 🚀 Cache Pre-allocator Performance Demo")
gr.Markdown("Compare model loading with and without transformers' caching allocator warmup. This demonstrates the memory efficiency improvements.")
with gr.Row():
model = gr.Dropdown(
label="Model to Profile",
choices=[
"openai-community/gpt2",
"google/gemma-2-2b",
"microsoft/DialoGPT-small",
"facebook/opt-125m"
],
value="openai-community/gpt2",
allow_custom_value=True,
info="Select a model or enter a custom HuggingFace model ID"
)
go = gr.Button("🔥 Profile Memory", variant="primary")
plot = gr.LinePlot(
x="t", y="MiB", color="mode", overlay_point=True,
title="Memory Allocation Timeline: Warmup ON vs OFF",
tooltip=["t", "MiB", "mode"],
width=900, height=450,
x_title="Time (seconds)",
y_title="Memory (MiB)"
)
gr.Markdown("**Note**: This demo requires GPU access. The warmup feature reduces peak memory usage during model loading.")
go.click(profile_warmup_comparison, inputs=[model], outputs=plot)
# ---------------------------
# Optional FastRTC preview
# ---------------------------
try:
from fastrtc import WebRTC, ReplyOnPause # type: ignore
def _echo_video(frame):
yield frame
HAS_FASTRTC = True
except Exception:
HAS_FASTRTC = False
def build_fastrtc():
if not HAS_FASTRTC:
gr.Markdown("Install `fastrtc` to enable this section.")
return
def echo_video_frame(frame):
yield frame
with gr.Group():
gr.Markdown("Camera loopback using FastRTC WebRTC. Extend with streaming handlers later.")
webrtc_component = WebRTC(mode="send-receive", modality="video")
webrtc_component.stream(ReplyOnPause(echo_video_frame), inputs=[webrtc_component], outputs=[webrtc_component], time_limit=60)
# ---------------------------
# Image display functions
# ---------------------------
def build_image(filename):
def _build():
# Try both content/ and static/ directories
for directory in ['content', 'static']:
filepath = Path(directory) / filename
if filepath.exists():
gr.Image(value=str(filepath), show_label=False, interactive=False, show_download_button=False)
return
gr.Markdown(f"*Image not found: {filename}*")
return _build
def build_d3_graph():
with gr.Group():
gr.Markdown("### 🔗 Interactive Model Dependency Graph")
fp = Path("static/d3_dependency_graph.html")
if fp.exists():
gr.HTML(
"""
<iframe src="file=static/d3_dependency_graph.html"
sandbox="allow-scripts allow-same-origin"
style="width:100%;height:640px;border:1px solid #e2e8f0;border-radius:8px"
loading="lazy"></iframe>
"""
)
else:
gr.Markdown("⚠️ **D3 dependency graph not found.** Put it at `static/d3_dependency_graph.html`.")
# ---------------------------
# Inserts registry
# ---------------------------
INSERTS = {
"TERMINAL": build_terminal,
"ATTN_VIS": build_attn_vis,
"ALLOC_PLOT": build_alloc_plot,
"D3_GRAPH": build_d3_graph,
# Image inserts
"GRAPH_MODULAR_RELATED_MODELS": build_image("graph_modular_related_models.png"),
"JACCARD_SIMILARITY_PLOT": build_image("Jaccard_similarity_plot.png"),
"BLOATEDNESS_VISUALIZER": build_image("Bloatedness_visualizer.png"),
"MODULAR_CANDIDATES": build_image("modular_candidates.png"),
"POPULAR_MODELS_BARPLOT": build_image("popular_models_barplot.png"),
"MODEL_DEBUGGER": build_image("model_debugger.png"),
}
# ---------------------------
# Layout / CSS / App
# ---------------------------
HLJS = """
<link rel="stylesheet"
href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/styles/github.min.css">
<script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/highlight.min.js"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/languages/python.min.js"></script>
<script>
(function(){
function run() {
document.querySelectorAll('pre code').forEach((el) => { hljs.highlightElement(el); });
}
run();
const mo = new MutationObserver(run);
mo.observe(document.body, {subtree: true, childList: true});
})();
</script>
<script>
(function(){
function highlightAll() {
document.querySelectorAll('pre code').forEach((el) => { hljs.highlightElement(el); });
document.querySelectorAll('.article ol > li').forEach((li) => {
if (li.querySelector(':scope > a[id]')) li.classList.add('tenet');
});
}
highlightAll();
new MutationObserver(highlightAll).observe(document.body, {subtree: true, childList: true});
})();
</script>
"""
CSS = """
/* Force light palette + high contrast */
:root,
.gradio-container {
color-scheme: light !important;
--body-background-fill: #ffffff !important;
--body-text-color: #0b0f19 !important; /* main text */
--body-text-color-subdued: #0b0f19 !important; /* kill the grey tint */
--heading-text-color: #0b0f19 !important;
--link-text-color: #1d4ed8 !important;
--border-color: #e5e7eb !important;
}
/* Font (slightly heavier by default to avoid “spindly” Inter on Linux) */
@import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&display=swap');
@font-face {
font-family: 'Inter var';
font-style: normal;
font-weight: 100 900;
font-display: swap;
src: url('https://rsms.me/inter/font-files/Inter.var.woff2?v=3.19') format('woff2');
}
html, body, .gradio-container { background: #fff !important; }
.gradio-container { font-family: 'Inter','Inter var',system-ui,-apple-system,Segoe UI,Roboto,sans-serif !important; }
/* Layout */
#layout { display: grid; grid-template-columns: 280px 1fr; gap: 2rem; }
#toc { position: sticky; top: 1rem; height: calc(100vh - 2rem); overflow: auto; padding-right: 1rem; }
#toc a { display: block; padding: .5rem 0; color: #334155; font-size: .9rem; text-decoration: none; font-weight: 500; }
#toc a:hover { color: var(--link-text-color); }
/* HARD override: make sure no parent opacity dulls the article */
.gradio-container .gr-html,
.gradio-container .gr-html * {
opacity: 1 !important;
}
/* scope body text color to prose only */
.article { color: var(--body-text-color); }
/* Scope article typography */
.article { max-width: 72ch; margin: 0 auto; }
.article p, .article li { font-size: 1.04rem; line-height: 1.85rem; font-weight: 500; }
.article h1, .article h2, .article h3, .article h4 { color: var(--heading-text-color) !important; }
.article h1 { font-weight: 700; font-size: 2.25rem; line-height: 2.6rem; margin: 2rem 0 1.25rem; }
.article h2 { font-weight: 650; font-size: 1.85rem; line-height: 2.25rem; margin: 2rem 0 1rem; }
.article h3 { font-weight: 600; font-size: 1.5rem; line-height: 2rem; margin: 1.5rem 0 .75rem; }
.article a { color: var(--link-text-color) !important; text-decoration: underline; }
.article a:hover { text-decoration: none; }
/* Code blocks (keep container styling, let hljs theme handle token colors) */
.article pre {
background: #f8fafc !important;
border: 1px solid #e2e8f0 !important;
border-radius: 8px !important;
padding: 1.25rem !important;
margin: 1.5rem 0 !important;
overflow-x: auto !important;
font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", monospace !important;
font-size: .92rem !important;
line-height: 1.6 !important;
}
.article pre code { background: transparent !important; padding: 0 !important; }
/* Let the theme show through */
.hljs { background: transparent !important; }
/* Tenets highlight: any list item that contains an anchor id gets a card look */
.article ol > li.tenet {
border-left: 4px solid #1d4ed8;
background: #f8fafc;
padding: .75rem 1rem;
margin: .5rem 0;
border-radius: 8px;
}
.article ol > li.tenet::marker { color: #1d4ed8; font-weight: 700; }
.article ol > li.tenet code { background: #e0e7ff !important; }
/* Blockquotes, images, rules */
.article blockquote { border-left: 4px solid var(--link-text-color); padding-left: 1rem; margin: 1.25rem 0; color: #334155 !important; font-style: italic; }
.article img { display: block; max-width: 100%; height: auto; margin: 1.25rem auto; border-radius: 8px; box-shadow: 0 6px 20px rgba(0,0,0,.08); }
hr { border: 0; border-top: 1px solid var(--border-color); margin: 2rem 0; }
.section { scroll-margin-top: 80px; }
/* Keep widgets full width */
.gr-form, .gr-panel, .gr-block { max-width: none; }
/* Terminal styling - match light mode */
.gr-textbox textarea {
background: #f8fafc !important;
color: #1f2937 !important;
border: 1px solid var(--border-color) !important;
border-radius: 8px !important;
font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", monospace !important;
font-size: 0.9rem !important;
line-height: 1.5 !important;
}
.gr-textbox textarea:focus {
border-color: var(--link-text-color) !important;
box-shadow: 0 0 0 2px rgba(37, 99, 235, 0.1) !important;
}
/* Terminal output specifically */
.gr-textbox textarea[readonly] {
background: #111827 !important;
color: #f9fafb !important;
border: 1px solid #374151 !important;
font-weight: 500 !important;
}
/* Terminal input */
.gr-textbox:not(textarea[readonly]) textarea {
background: #ffffff !important;
color: #1f2937 !important;
border: 1px solid var(--border-color) !important;
}
/* Button styling */
.gr-button {
background: var(--link-text-color) !important;
color: white !important;
border: none !important;
border-radius: 6px !important;
font-weight: 600 !important;
padding: 0.5rem 1rem !important;
}
.gr-button:hover {
background: #1d4ed8 !important;
}
/* Dropdown styling - fix contrast and visibility */
.gr-dropdown {
background: #ffffff !important;
border: 1px solid var(--border-color) !important;
border-radius: 8px !important;
}
.gr-dropdown .gr-box {
background: #ffffff !important;
border: 1px solid var(--border-color) !important;
}
.gr-dropdown input {
background: #ffffff !important;
color: #1f2937 !important;
border: none !important;
font-weight: 500 !important;
}
.gr-dropdown .options {
background: #ffffff !important;
border: 1px solid var(--border-color) !important;
border-radius: 8px !important;
box-shadow: 0 4px 12px rgba(0, 0, 0, 0.1) !important;
}
.gr-dropdown .option {
background: #ffffff !important;
color: #1f2937 !important;
padding: 0.75rem !important;
font-weight: 500 !important;
}
.gr-dropdown .option:hover {
background: #f8fafc !important;
color: #1f2937 !important;
}
.gr-dropdown .option.selected {
background: var(--link-text-color) !important;
color: white !important;
}
/* Fix label styling */
.gr-dropdown label {
color: #374151 !important;
font-weight: 600 !important;
margin-bottom: 0.5rem !important;
}
/* Fix contrast for all interactive components */
.gr-form, .gr-panel, .gr-block {
background: #ffffff !important;
border: 1px solid var(--border-color) !important;
border-radius: 8px !important;
}
/* Fix text inputs */
.gr-textbox input {
background: #ffffff !important;
color: #1f2937 !important;
border: 1px solid var(--border-color) !important;
font-weight: 500 !important;
}
/* Fix all labels - but not in article */
.gr-form:not(.article) label,
.gr-panel:not(.article) label,
.gr-block:not(.article) label {
color: #374151 !important;
font-weight: 600 !important;
}
/* Fix info text - but not in article */
.gr-form:not(.article) .gr-info,
.gr-panel:not(.article) .gr-info {
color: #6b7280 !important;
font-weight: 500 !important;
}
/* Fix plot styling */
.gr-plot {
border: 1px solid var(--border-color) !important;
border-radius: 8px !important;
background: #ffffff !important;
}
/* Fix markdown in components - but protect article content */
.gr-markdown:not(.article):not(.article *) {
color: #1f2937 !important;
}
.gr-markdown:not(.article):not(.article *) h1,
.gr-markdown:not(.article):not(.article *) h2,
.gr-markdown:not(.article):not(.article *) h3,
.gr-markdown:not(.article):not(.article *) h4 {
color: #111827 !important;
font-weight: 600 !important;
}
"""
with gr.Blocks(css=CSS, fill_height=True, title="Interactive Blog — Transformers Feature Showcase") as demo:
gr.HTML(HLJS)
gr.HTML("<h1>Transformers Feature Showcase</h1><p>Interactive, scrollable demo.</p>")
with gr.Row(elem_id="layout"):
with gr.Column(scale=0):
gr.HTML(
"""
<nav id="toc">
<h3>Sections</h3>
<a href="#article">Article</a>
<a href="#rtc">FastRTC (preview)</a>
</nav>
"""
)
with gr.Column():
gr.HTML('<h2 id="article" class="section">Article</h2>')
# Author in Obsidian. Put {{ALLOC_PLOT}}, {{ATTN_VIS}}, {{TERMINAL}} where you want widgets.
render_article("content/article.md", INSERTS)
gr.HTML("<hr/>")
gr.HTML('<h2 id="rtc" class="section">FastRTC (preview)</h2>')
build_fastrtc()
if __name__ == "__main__":
demo.launch()