import sys import gradio as gr import numpy as np import pandas as pd import plotly.express as px import plotly.graph_objects as go import re from config.constants import COLUMN_MAPPINGS, COLUMN_ORDER, TYPE_EMOJI, DISCARDED_MODELS def model_hyperlink(link, model_name, release, thinking=False): ret = f'{model_name}' new_badge = f' new' reasoning_badge = f' reasoning' if release == "V3": # show new badge only to the latest releases return ret + reasoning_badge + new_badge if thinking == "Reasoning" else ret + new_badge else: return ret + reasoning_badge if thinking == "Reasoning" else ret def extract_name_from_link(html: str) -> str: """ Extracts the model name from the HTML generated by model_hyperlink() """ if not isinstance(html, str): return html match = re.search(r']*>(.*?)', html) if match: return match.group(1).strip() return re.sub(r'<[^>]+>', '', html).strip() def handle_special_cases(benchmark, metric): if metric == "Exact Matching (EM)": benchmark = "RTL-Repo" elif benchmark == "RTL-Repo": metric = "Exact Matching (EM)" return benchmark, metric def filter_RTLRepo(subset: pd.DataFrame, name=str) -> pd.DataFrame: if subset.empty: return pd.DataFrame(columns=["Type", "Model", "Params", "Exact Matching (EM)"]) subset = subset.drop(subset[subset.Score < 0.0].index) # Check again if empty after filtering if subset.empty: return pd.DataFrame(columns=["Type", "Model", "Params", "Exact Matching (EM)"]) details = subset[["Model", "Model URL", "Model Type", "Params", "Release", "Thinking"]].drop_duplicates( "Model" ) filtered_df = subset[["Model", "Score"]].rename(columns={"Score": "Exact Matching (EM)"}) filtered_df = pd.merge(filtered_df, details, on="Model", how="left") filtered_df["Model"] = filtered_df.apply( lambda row: model_hyperlink( row["Model URL"], row["Model"], row["Release"], ), axis=1, ) filtered_df["Type"] = filtered_df["Model Type"].map(lambda x: TYPE_EMOJI.get(x, "")) filtered_df = filtered_df[["Type", "Model", "Params", "Exact Matching (EM)"]] filtered_df = filtered_df.sort_values(by="Exact Matching (EM)", ascending=False).reset_index(drop=True) if name == "Other Models": filtered_df["Date Discarded"] = filtered_df["Model"].apply(lambda x: DISCARDED_MODELS.get(extract_name_from_link(x), "N/A")) # reorder to put Date Discarded between Params and Exact Matching (EM) cols = ["Type", "Model", "Parameters (B)", "Date Discarded", "Exact Matching (EM)"] filtered_df = filtered_df[[c for c in cols if c in filtered_df.columns]] return filtered_df def filter_bench(subset: pd.DataFrame, df_agg=None, agg_column=None, name=str) -> pd.DataFrame: if subset.empty: return pd.DataFrame(columns=COLUMN_ORDER) details = subset[["Model", "Model URL", "Model Type", "Params", "Release", "Thinking"]].drop_duplicates( "Model" ) if "RTLLM" in subset["Benchmark"].unique(): pivot_df = ( subset.pivot_table(index="Model", columns="Metric", values="Score", aggfunc=custom_agg_s2r) .reset_index() .round(2) ) else: pivot_df = ( subset.pivot_table(index="Model", columns="Metric", values="Score", aggfunc=custom_agg_cc) .reset_index() .round(2) ) # if df_agg is not None and agg_column is not None and agg_column in df_agg.columns: # agg_data = df_agg[["Model", agg_column]].rename( # columns={agg_column: "Aggregated ⬆️"} # ) # pivot_df = pd.merge(pivot_df, agg_data, on="Model", how="left") # else: # fallback # pivot_df["Aggregated ⬆️"] = pivot_df.mean(axis=1, numeric_only=True).round(2) pivot_df = pd.merge(pivot_df, details, on="Model", how="left") pivot_df["Model"] = pivot_df.apply( lambda row: model_hyperlink(row["Model URL"], row["Model"], row["Release"], row["Thinking"]), axis=1, ) pivot_df["Type"] = pivot_df["Model Type"].map(lambda x: TYPE_EMOJI.get(x, "")) if all(col in pivot_df.columns for col in ["Power", "Performance", "Area"]): pivot_df["Post-Synthesis (PSQ)"] = pivot_df[["Power", "Performance", "Area"]].mean(axis=1).round(2) pivot_df.rename(columns=COLUMN_MAPPINGS, inplace=True) pivot_df = pivot_df[[col for col in COLUMN_ORDER if col in pivot_df.columns]] if "Functionality" in pivot_df.columns: pivot_df = pivot_df.sort_values(by="Functionality", ascending=False).reset_index(drop=True) if name == "Other Models": pivot_df["Date Discarded"] = pivot_df["Model"].apply(lambda x: DISCARDED_MODELS.get(extract_name_from_link(x), "N/A")) # reorder to put Date Discarded between Params Syntax cols = ["Type", "Model", "Parameters (B)", "Date Discarded", "Syntax", "Functionality", "Synthesis", "Post-Synthesis"] pivot_df = pivot_df[[c for c in cols if c in pivot_df.columns]] return pivot_df def custom_agg_s2r(vals): if len(vals) == 2: s2r_val = vals.iloc[0] rtllm_val = vals.iloc[1] w1 = 155 w2 = 47 result = (w1 * s2r_val + w2 * rtllm_val) / (w1 + w2) else: result = vals.iloc[0] return round(result, 2) def custom_agg_cc(vals): if len(vals) == 2: veval_val = vals.iloc[0] vgen_val = vals.iloc[1] w1 = 155 w2 = 17 result = (w1 * veval_val + w2 * vgen_val) / (w1 + w2) else: result = vals.iloc[0] return round(result, 2) def filter_bench_all(subset: pd.DataFrame, df_agg=None, agg_column=None, name=str) -> pd.DataFrame: if subset.empty: return pd.DataFrame(columns=COLUMN_ORDER) details = subset[["Model", "Model URL", "Model Type", "Params", "Release", "Thinking"]].drop_duplicates( "Model" ) if "RTLLM" in subset["Benchmark"].unique(): pivot_df = ( subset.pivot_table(index="Model", columns="Metric", values="Score", aggfunc=custom_agg_s2r) .reset_index() .round(2) ) else: pivot_df = ( subset.pivot_table(index="Model", columns="Metric", values="Score", aggfunc=custom_agg_cc) .reset_index() .round(2) ) pivot_df = pd.merge(pivot_df, details, on="Model", how="left") pivot_df["Model"] = pivot_df.apply( lambda row: model_hyperlink(row["Model URL"], row["Model"], row["Release"], row["Thinking"]), axis=1, ) pivot_df["Type"] = pivot_df["Model Type"].map(lambda x: TYPE_EMOJI.get(x, "")) if all(col in pivot_df.columns for col in ["Power", "Performance", "Area"]): pivot_df["Post-Synthesis (PSQ)"] = pivot_df[["Power", "Performance", "Area"]].mean(axis=1).round(2) pivot_df.rename(columns=COLUMN_MAPPINGS, inplace=True) pivot_df = pivot_df[[col for col in COLUMN_ORDER if col in pivot_df.columns]] if "Functionality" in pivot_df.columns: pivot_df = pivot_df.sort_values(by="Functionality", ascending=False).reset_index(drop=True) if name == "Other Models": pivot_df["Date Discarded"] = pivot_df["Model"].apply(lambda x: DISCARDED_MODELS.get(extract_name_from_link(x), "N/A")) # reorder to put Date Discarded between Params Syntax cols = ["Type", "Model", "Parameters (B)", "Date Discarded", "Syntax", "Functionality", "Synthesis", "Post-Synthesis"] pivot_df = pivot_df[[c for c in cols if c in pivot_df.columns]] return pivot_df