Spaces:
Runtime error
Runtime error
| import pickle | |
| import pandas as pd | |
| import gradio as gr | |
| from huggingface_hub import HfFileSystem, hf_hub_download | |
| if gr.NO_RELOAD: | |
| ################### | |
| ### Load Data | |
| ################### | |
| key_to_category_name = { | |
| "full": "Overall", | |
| "coding": "Coding", | |
| "long_user": "Longer Query", | |
| "english": "English", | |
| "chinese": "Chinese", | |
| "french": "French", | |
| "no_tie": "Exclude Ties", | |
| "no_short": "Exclude Short Query (< 5 tokens)", | |
| "no_refusal": "Exclude Refusal", | |
| } | |
| cat_name_to_explanation = { | |
| "Overall": "Overall Questions", | |
| "Coding": "Coding: whether conversation contains code snippets", | |
| "Longer Query": "Longer Query (>= 500 tokens)", | |
| "English": "English Prompts", | |
| "Chinese": "Chinese Prompts", | |
| "French": "French Prompts", | |
| "Exclude Ties": "Exclude Ties and Bothbad", | |
| "Exclude Short Query (< 5 tokens)": "Exclude Short User Query (< 5 tokens)", | |
| "Exclude Refusal": 'Exclude model responses with refusal (e.g., "I cannot answer")', | |
| } | |
| fs = HfFileSystem() | |
| def extract_date(filename): | |
| return filename.split("/")[-1].split(".")[0].split("_")[-1] | |
| # gather ELO data | |
| ELO_DATA_FILES = "spaces/lmsys/chatbot-arena-leaderboard/*.pkl" | |
| elo_files = fs.glob(ELO_DATA_FILES) | |
| latest_elo_file = sorted(elo_files, key=extract_date, reverse=True)[0] | |
| latest_elo_file_local = hf_hub_download( | |
| repo_id="lmsys/chatbot-arena-leaderboard", | |
| filename=latest_elo_file.split("/")[-1], | |
| repo_type="space", | |
| ) | |
| with open(latest_elo_file_local, "rb") as fin: | |
| elo_results = pickle.load(fin) | |
| arena_dfs = {} | |
| for k in key_to_category_name.keys(): | |
| if k not in elo_results: | |
| continue | |
| arena_dfs[key_to_category_name[k]] = elo_results[k]["leaderboard_table_df"] | |
| # gather open llm leaderboard data | |
| LEADERBOARD_DATA_FILES = "spaces/lmsys/chatbot-arena-leaderboard/*.csv" | |
| leaderboard_files = fs.glob(LEADERBOARD_DATA_FILES) | |
| latest_leaderboard_file = sorted(leaderboard_files, key=extract_date, reverse=True)[ | |
| 0 | |
| ] | |
| latest_leaderboard_file_local = hf_hub_download( | |
| repo_id="lmsys/chatbot-arena-leaderboard", | |
| filename=latest_leaderboard_file.split("/")[-1], | |
| repo_type="space", | |
| ) | |
| leaderboard_df = pd.read_csv(latest_leaderboard_file_local) | |
| ################### | |
| ### Prepare Data | |
| ################### | |
| # merge leaderboard data with ELO data | |
| merged_dfs = {} | |
| for k, v in arena_dfs.items(): | |
| merged_dfs[k] = ( | |
| pd.merge(arena_dfs[k], leaderboard_df, left_index=True, right_on="key") | |
| .sort_values("rating", ascending=False) | |
| .reset_index(drop=True) | |
| ) | |
| # add release dates into the merged data | |
| release_date_mapping = pd.read_json("release_date_mapping.json", orient="records") | |
| for k, v in merged_dfs.items(): | |
| merged_dfs[k] = pd.merge( | |
| merged_dfs[k], release_date_mapping[["key", "Release Date"]], on="key" | |
| ) | |
| df = merged_dfs["Overall"] | |
| y_min = df["rating"].min() | |
| y_max = df["rating"].max() | |
| y_buffer = (y_max - y_min) * 0.1 | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# Chatbot Arena Leaderboard") | |
| with gr.Row(): | |
| gr.ScatterPlot( | |
| df, | |
| title="hello", | |
| x="Release Date", | |
| y="rating", | |
| tooltip=["Model", "rating", "num_battles", "Organization", "License"], | |
| width=1000, | |
| height=700, | |
| x_label_angle=-45, | |
| y_lim=[y_min - y_buffer, y_max + y_buffer], | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |