data_only_open_llm_leaderboard

Runtime error

App Files Files Community

felix commited on Nov 12, 2023

Commit

5f65cec

1 Parent(s): ae85651

update with app.py

Browse files

Files changed (1) hide show

app.py +373 -294

app.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import json
 import os
 import gradio as gr
 import pandas as pd
@@ -85,13 +86,13 @@ def change_tab(query_param: str):
 # Searching and filtering
 def update_table(
-    hidden_df: pd.DataFrame,
-    columns: list,
-    type_query: list,
-    precision_query: str,
-    size_query: list,
-    show_deleted: bool,
-    query: str,
 ):
     filtered_df = filter_models(hidden_df, type_query, size_query, precision_query, show_deleted)
     filtered_df = filter_queries(query, filtered_df)
@@ -111,7 +112,7 @@ def select_columns(df: pd.DataFrame, columns: list) -> pd.DataFrame:
     # We use COLS to maintain sorting
     filtered_df = df[
         always_here_cols + [c for c in COLS if c in df.columns and c in columns] + [AutoEvalColumn.dummy.name]
-    ]
     return filtered_df
@@ -136,7 +137,7 @@ def filter_queries(query: str, filtered_df: pd.DataFrame):
 def filter_models(
-    df: pd.DataFrame, type_query: list, size_query: list, precision_query: list, show_deleted: bool
 ) -> pd.DataFrame:
     # Show all models
     if show_deleted:
@@ -156,293 +157,371 @@ def filter_models(
     return filtered_df
-demo = gr.Blocks(css=custom_css)
-with demo:
-    gr.HTML(TITLE)
-    gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
-    with gr.Tabs(elem_classes="tab-buttons") as tabs:
-        with gr.TabItem("🏅 LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
-            with gr.Row():
-                with gr.Column():
-                    with gr.Row():
-                        search_bar = gr.Textbox(
-                            placeholder=" 🔍 Search for your model (separate multiple queries with `;`) and press ENTER...",
-                            show_label=False,
-                            elem_id="search-bar",
-                        )
-                    with gr.Row():
-                        shown_columns = gr.CheckboxGroup(
-                            choices=[c.name for c in fields(AutoEvalColumn) if not c.hidden and not c.never_hidden and not c.dummy],
-                            value=[c.name for c in fields(AutoEvalColumn) if c.displayed_by_default and not c.hidden and not c.never_hidden],
-                            label="Select columns to show",
-                            elem_id="column-select",
-                            interactive=True,
-                        )
-                    with gr.Row():
-                        deleted_models_visibility = gr.Checkbox(
-                            value=False, label="Show gated/private/deleted models", interactive=True
-                        )
-                with gr.Column(min_width=320):
-                    with gr.Box(elem_id="box-filter"):
-                        filter_columns_type = gr.CheckboxGroup(
-                            label="Model types",
-                            choices=[t.to_str() for t in ModelType],
-                            value=[t.to_str() for t in ModelType],
-                            interactive=True,
-                            elem_id="filter-columns-type",
-                        )
-                        filter_columns_precision = gr.CheckboxGroup(
-                            label="Precision",
-                            choices=["torch.float16", "torch.bfloat16", "torch.float32", "8bit", "4bit", "GPTQ"],
-                            value=["torch.float16", "torch.bfloat16", "torch.float32", "8bit", "4bit", "GPTQ"],
-                            interactive=True,
-                            elem_id="filter-columns-precision",
-                        )
-                        filter_columns_size = gr.CheckboxGroup(
-                            label="Model sizes (in billions of parameters)",
-                            choices=list(NUMERIC_INTERVALS.keys()),
-                            value=list(NUMERIC_INTERVALS.keys()),
-                            interactive=True,
-                            elem_id="filter-columns-size",
-                        )
-            leaderboard_table = gr.components.Dataframe(
-                value=leaderboard_df[
-                    [c.name for c in fields(AutoEvalColumn) if c.never_hidden]
-                    + shown_columns.value
-                    + [AutoEvalColumn.dummy.name]
-                ],
-                headers=[c.name for c in fields(AutoEvalColumn) if c.never_hidden] + shown_columns.value,
-                datatype=TYPES,
-                max_rows=None,
-                elem_id="leaderboard-table",
-                interactive=False,
-                visible=True,
-            )
-            # Dummy leaderboard for handling the case when the user uses backspace key
-            hidden_leaderboard_table_for_search = gr.components.Dataframe(
-                value=original_df[COLS],
-                headers=COLS,
-                datatype=TYPES,
-                max_rows=None,
-                visible=False,
-            )
-            search_bar.submit(
-                update_table,
-                [
-                    hidden_leaderboard_table_for_search,
-                    shown_columns,
-                    filter_columns_type,
-                    filter_columns_precision,
-                    filter_columns_size,
-                    deleted_models_visibility,
-                    search_bar,
-                ],
-                leaderboard_table,
-            )
-            shown_columns.change(
-                update_table,
-                [
-                    hidden_leaderboard_table_for_search,
-                    shown_columns,
-                    filter_columns_type,
-                    filter_columns_precision,
-                    filter_columns_size,
-                    deleted_models_visibility,
-                    search_bar,
-                ],
-                leaderboard_table,
-                queue=True,
-            )
-            filter_columns_type.change(
-                update_table,
-                [
-                    hidden_leaderboard_table_for_search,
-                    shown_columns,
-                    filter_columns_type,
-                    filter_columns_precision,
-                    filter_columns_size,
-                    deleted_models_visibility,
-                    search_bar,
-                ],
-                leaderboard_table,
-                queue=True,
-            )
-            filter_columns_precision.change(
-                update_table,
-                [
-                    hidden_leaderboard_table_for_search,
-                    shown_columns,
-                    filter_columns_type,
-                    filter_columns_precision,
-                    filter_columns_size,
-                    deleted_models_visibility,
-                    search_bar,
-                ],
-                leaderboard_table,
-                queue=True,
-            )
-            filter_columns_size.change(
-                update_table,
-                [
-                    hidden_leaderboard_table_for_search,
-                    shown_columns,
-                    filter_columns_type,
-                    filter_columns_precision,
-                    filter_columns_size,
-                    deleted_models_visibility,
-                    search_bar,
-                ],
-                leaderboard_table,
-                queue=True,
-            )
-            deleted_models_visibility.change(
-                update_table,
-                [
-                    hidden_leaderboard_table_for_search,
-                    shown_columns,
-                    filter_columns_type,
-                    filter_columns_precision,
-                    filter_columns_size,
-                    deleted_models_visibility,
-                    search_bar,
-                ],
-                leaderboard_table,
-                queue=True,
-            )
-        # with gr.TabItem("📈
-        #  evolution through time", elem_id="llm-benchmark-tab-table", id=4):
-        #     with gr.Row():
-        #         with gr.Column():
-        #             chart = create_metric_plot_obj(
-        #                 plot_df,
-        #                 ["Average ⬆️"],
-        #                 HUMAN_BASELINES,
-        #                 title="Average of Top Scores and Human Baseline Over Time",
-        #             )
-        #             gr.Plot(value=chart, interactive=False, width=500, height=500)
-        #         with gr.Column():
-        #             chart = create_metric_plot_obj(
-        #                 plot_df,
-        #                 ["ARC", "HellaSwag", "MMLU", "TruthfulQA", "Winogrande", "GSM8K", "DROP"],
-        #                 HUMAN_BASELINES,
-        #                 title="Top Scores and Human Baseline Over Time",
-        #             )
-        #             gr.Plot(value=chart, interactive=False, width=500, height=500)
-        with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=2):
-            gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
-        with gr.TabItem("🚀 Submit here! ", elem_id="llm-benchmark-tab-table", id=3):
-            with gr.Column():
-                with gr.Row():
-                    gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
-                with gr.Column():
-                    with gr.Accordion(
-                        f"✅ Finished Evaluations ({len(finished_eval_queue_df)})",
-                        open=False,
-                    ):
-                        with gr.Row():
-                            finished_eval_table = gr.components.Dataframe(
-                                value=finished_eval_queue_df,
-                                headers=EVAL_COLS,
-                                datatype=EVAL_TYPES,
-                                max_rows=5,
-                            )
-                    with gr.Accordion(
-                        f"🔄 Running Evaluation Queue ({len(running_eval_queue_df)})",
-                        open=False,
-                    ):
-                        with gr.Row():
-                            running_eval_table = gr.components.Dataframe(
-                                value=running_eval_queue_df,
-                                headers=EVAL_COLS,
-                                datatype=EVAL_TYPES,
-                                max_rows=5,
-                            )
-                    with gr.Accordion(
-                        f"⏳ Pending Evaluation Queue ({len(pending_eval_queue_df)})",
-                        open=False,
-                    ):
-                        with gr.Row():
-                            pending_eval_table = gr.components.Dataframe(
-                                value=pending_eval_queue_df,
-                                headers=EVAL_COLS,
-                                datatype=EVAL_TYPES,
-                                max_rows=5,
-                            )
-            with gr.Row():
-                gr.Markdown("# ✉️✨ Submit your model here!", elem_classes="markdown-text")
-            with gr.Row():
-                with gr.Column():
-                    model_name_textbox = gr.Textbox(label="Model name")
-                    revision_name_textbox = gr.Textbox(label="revision", placeholder="main")
-                    private = gr.Checkbox(False, label="Private", visible=not IS_PUBLIC)
-                    model_type = gr.Dropdown(
-                        choices=[t.to_str(" : ") for t in ModelType],
-                        label="Model type",
-                        multiselect=False,
-                        value=None,
-                        interactive=True,
-                    )
-                with gr.Column():
-                    precision = gr.Dropdown(
-                        choices=["float16", "bfloat16", "8bit (LLM.int8)", "4bit (QLoRA / FP4)", "GPTQ"],
-                        label="Precision",
-                        multiselect=False,
-                        value="float16",
-                        interactive=True,
-                    )
-                    weight_type = gr.Dropdown(
-                        choices=["Original", "Delta", "Adapter"],
-                        label="Weights type",
-                        multiselect=False,
-                        value="Original",
-                        interactive=True,
-                    )
-                    base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)")
-            submit_button = gr.Button("Submit Eval")
-            submission_result = gr.Markdown()
-            submit_button.click(
-                add_new_eval,
-                [
-                    model_name_textbox,
-                    base_model_name_textbox,
-                    revision_name_textbox,
-                    precision,
-                    private,
-                    weight_type,
-                    model_type,
-                ],
-                submission_result,
-            )
-    with gr.Row():
-        with gr.Accordion("📙 Citation", open=False):
-            citation_button = gr.Textbox(
-                value=CITATION_BUTTON_TEXT,
-                label=CITATION_BUTTON_LABEL,
-                lines=20,
-                elem_id="citation-button",
-                show_copy_button=True,
-            )
-    dummy = gr.Textbox(visible=False)
-    demo.load(
-        change_tab,
-        dummy,
-        tabs,
-        _js=get_window_url_params,
-    )
-scheduler = BackgroundScheduler()
-scheduler.add_job(restart_space, "interval", seconds=1800)
-scheduler.start()
-demo.queue(concurrency_count=40).launch()

 import json
 import os
+from datetime import datetime, timezone
 import gradio as gr
 import pandas as pd
 # Searching and filtering
 def update_table(
+        hidden_df: pd.DataFrame,
+        columns: list,
+        type_query: list,
+        precision_query: str,
+        size_query: list,
+        show_deleted: bool,
+        query: str,
 ):
     filtered_df = filter_models(hidden_df, type_query, size_query, precision_query, show_deleted)
     filtered_df = filter_queries(query, filtered_df)
     # We use COLS to maintain sorting
     filtered_df = df[
         always_here_cols + [c for c in COLS if c in df.columns and c in columns] + [AutoEvalColumn.dummy.name]
+        ]
     return filtered_df
 def filter_models(
+        df: pd.DataFrame, type_query: list, size_query: list, precision_query: list, show_deleted: bool
 ) -> pd.DataFrame:
     # Show all models
     if show_deleted:
     return filtered_df
+# demo = gr.Blocks(css=custom_css)
+# with demo:
+#     gr.HTML(TITLE)
+#     gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
+#
+#     with gr.Tabs(elem_classes="tab-buttons") as tabs:
+#         with gr.TabItem("🏅 LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
+#             with gr.Row():
+#                 with gr.Column():
+#                     with gr.Row():
+#                         search_bar = gr.Textbox(
+#                             placeholder=" 🔍 Search for your model and press ENTER...",
+#                             show_label=False,
+#                             elem_id="search-bar",
+#                         )
+#                     with gr.Row():
+#                         shown_columns = gr.CheckboxGroup(
+#                             choices=[
+#                                 c
+#                                 for c in COLS
+#                                 if c
+#                                 not in [
+#                                     AutoEvalColumn.dummy.name,
+#                                     AutoEvalColumn.model.name,
+#                                     AutoEvalColumn.model_type_symbol.name,
+#                                     AutoEvalColumn.still_on_hub.name,
+#                                 ]
+#                             ],
+#                             value=[
+#                                 c
+#                                 for c in COLS_LITE
+#                                 if c
+#                                 not in [
+#                                     AutoEvalColumn.dummy.name,
+#                                     AutoEvalColumn.model.name,
+#                                     AutoEvalColumn.model_type_symbol.name,
+#                                     AutoEvalColumn.still_on_hub.name,
+#                                 ]
+#                             ],
+#                             label="Select columns to show",
+#                             elem_id="column-select",
+#                             interactive=True,
+#                         )
+#                     with gr.Row():
+#                         deleted_models_visibility = gr.Checkbox(
+#                             value=True, label="Show gated/private/deleted models", interactive=True
+#                         )
+#                 with gr.Column(min_width=320):
+#                     with gr.Box(elem_id="box-filter"):
+#                         filter_columns_type = gr.CheckboxGroup(
+#                             label="Model types",
+#                             choices=[
+#                                 ModelType.PT.to_str(),
+#                                 ModelType.FT.to_str(),
+#                                 ModelType.IFT.to_str(),
+#                                 ModelType.RL.to_str(),
+#                             ],
+#                             value=[
+#                                 ModelType.PT.to_str(),
+#                                 ModelType.FT.to_str(),
+#                                 ModelType.IFT.to_str(),
+#                                 ModelType.RL.to_str(),
+#                             ],
+#                             interactive=True,
+#                             elem_id="filter-columns-type",
+#                         )
+#                         filter_columns_precision = gr.CheckboxGroup(
+#                             label="Precision",
+#                             choices=["torch.float16", "torch.bfloat16", "torch.float32", "8bit", "4bit", "GPTQ"],
+#                             value=["torch.float16", "torch.bfloat16", "torch.float32", "8bit", "4bit", "GPTQ"],
+#                             interactive=True,
+#                             elem_id="filter-columns-precision",
+#                         )
+#                         filter_columns_size = gr.CheckboxGroup(
+#                             label="Model sizes",
+#                             choices=list(NUMERIC_INTERVALS.keys()),
+#                             value=list(NUMERIC_INTERVALS.keys()),
+#                             interactive=True,
+#                             elem_id="filter-columns-size",
+#                         )
+#
+#             leaderboard_table = gr.components.Dataframe(
+#                 value=leaderboard_df[
+#                     [AutoEvalColumn.model_type_symbol.name, AutoEvalColumn.model.name]
+#                     + shown_columns.value
+#                     + [AutoEvalColumn.dummy.name]
+#                 ],
+#                 headers=[
+#                     AutoEvalColumn.model_type_symbol.name,
+#                     AutoEvalColumn.model.name,
+#                 ]
+#                 + shown_columns.value
+#                 + [AutoEvalColumn.dummy.name],
+#                 datatype=TYPES,
+#                 max_rows=None,
+#                 elem_id="leaderboard-table",
+#                 interactive=False,
+#                 visible=True,
+#             )
+#
+#             # Dummy leaderboard for handling the case when the user uses backspace key
+#             hidden_leaderboard_table_for_search = gr.components.Dataframe(
+#                 value=original_df,
+#                 headers=COLS,
+#                 datatype=TYPES,
+#                 max_rows=None,
+#                 visible=False,
+#             )
+#             search_bar.submit(
+#                 update_table,
+#                 [
+#                     hidden_leaderboard_table_for_search,
+#                     leaderboard_table,
+#                     shown_columns,
+#                     filter_columns_type,
+#                     filter_columns_precision,
+#                     filter_columns_size,
+#                     deleted_models_visibility,
+#                     search_bar,
+#                 ],
+#                 leaderboard_table,
+#             )
+#             shown_columns.change(
+#                 update_table,
+#                 [
+#                     hidden_leaderboard_table_for_search,
+#                     leaderboard_table,
+#                     shown_columns,
+#                     filter_columns_type,
+#                     filter_columns_precision,
+#                     filter_columns_size,
+#                     deleted_models_visibility,
+#                     search_bar,
+#                 ],
+#                 leaderboard_table,
+#                 queue=True,
+#             )
+#             filter_columns_type.change(
+#                 update_table,
+#                 [
+#                     hidden_leaderboard_table_for_search,
+#                     leaderboard_table,
+#                     shown_columns,
+#                     filter_columns_type,
+#                     filter_columns_precision,
+#                     filter_columns_size,
+#                     deleted_models_visibility,
+#                     search_bar,
+#                 ],
+#                 leaderboard_table,
+#                 queue=True,
+#             )
+#             filter_columns_precision.change(
+#                 update_table,
+#                 [
+#                     hidden_leaderboard_table_for_search,
+#                     leaderboard_table,
+#                     shown_columns,
+#                     filter_columns_type,
+#                     filter_columns_precision,
+#                     filter_columns_size,
+#                     deleted_models_visibility,
+#                     search_bar,
+#                 ],
+#                 leaderboard_table,
+#                 queue=True,
+#             )
+#             filter_columns_size.change(
+#                 update_table,
+#                 [
+#                     hidden_leaderboard_table_for_search,
+#                     leaderboard_table,
+#                     shown_columns,
+#                     filter_columns_type,
+#                     filter_columns_precision,
+#                     filter_columns_size,
+#                     deleted_models_visibility,
+#                     search_bar,
+#                 ],
+#                 leaderboard_table,
+#                 queue=True,
+#             )
+#             deleted_models_visibility.change(
+#                 update_table,
+#                 [
+#                     hidden_leaderboard_table_for_search,
+#                     leaderboard_table,
+#                     shown_columns,
+#                     filter_columns_type,
+#                     filter_columns_precision,
+#                     filter_columns_size,
+#                     deleted_models_visibility,
+#                     search_bar,
+#                 ],
+#                 leaderboard_table,
+#                 queue=True,
+#             )
+#         with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=2):
+#             gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
+#
+#         with gr.TabItem("🚀 Submit here! ", elem_id="llm-benchmark-tab-table", id=3):
+#             with gr.Column():
+#                 with gr.Row():
+#                     gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
+#
+#                 with gr.Column():
+#                     with gr.Accordion(
+#                         f"✅ Finished Evaluations ({len(finished_eval_queue_df)})",
+#                         open=False,
+#                     ):
+#                         with gr.Row():
+#                             finished_eval_table = gr.components.Dataframe(
+#                                 value=finished_eval_queue_df,
+#                                 headers=EVAL_COLS,
+#                                 datatype=EVAL_TYPES,
+#                                 max_rows=5,
+#                             )
+#                     with gr.Accordion(
+#                         f"🔄 Running Evaluation Queue ({len(running_eval_queue_df)})",
+#                         open=False,
+#                     ):
+#                         with gr.Row():
+#                             running_eval_table = gr.components.Dataframe(
+#                                 value=running_eval_queue_df,
+#                                 headers=EVAL_COLS,
+#                                 datatype=EVAL_TYPES,
+#                                 max_rows=5,
+#                             )
+#
+#                     with gr.Accordion(
+#                         f"⏳ Pending Evaluation Queue ({len(pending_eval_queue_df)})",
+#                         open=False,
+#                     ):
+#                         with gr.Row():
+#                             pending_eval_table = gr.components.Dataframe(
+#                                 value=pending_eval_queue_df,
+#                                 headers=EVAL_COLS,
+#                                 datatype=EVAL_TYPES,
+#                                 max_rows=5,
+#                             )
+#             with gr.Row():
+#                 gr.Markdown("# ✉️✨ Submit your model here!", elem_classes="markdown-text")
+#
+#             with gr.Row():
+#                 with gr.Column():
+#                     model_name_textbox = gr.Textbox(label="Model name")
+#                     revision_name_textbox = gr.Textbox(label="revision", placeholder="main")
+#                     private = gr.Checkbox(False, label="Private", visible=not IS_PUBLIC)
+#                     model_type = gr.Dropdown(
+#                         choices=[
+#                             ModelType.PT.to_str(" : "),
+#                             ModelType.FT.to_str(" : "),
+#                             ModelType.IFT.to_str(" : "),
+#                             ModelType.RL.to_str(" : "),
+#                         ],
+#                         label="Model type",
+#                         multiselect=False,
+#                         value=None,
+#                         interactive=True,
+#                     )
+#
+#                 with gr.Column():
+#                     precision = gr.Dropdown(
+#                         choices=[
+#                             "float16",
+#                             "bfloat16",
+#                             "8bit (LLM.int8)",
+#                             "4bit (QLoRA / FP4)",
+#                             "GPTQ"
+#                         ],
+#                         label="Precision",
+#                         multiselect=False,
+#                         value="float16",
+#                         interactive=True,
+#                     )
+#                     weight_type = gr.Dropdown(
+#                         choices=["Original", "Delta", "Adapter"],
+#                         label="Weights type",
+#                         multiselect=False,
+#                         value="Original",
+#                         interactive=True,
+#                     )
+#                     base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)")
+#
+#             submit_button = gr.Button("Submit Eval")
+#             submission_result = gr.Markdown()
+#             submit_button.click(
+#                 add_new_eval,
+#                 [
+#                     model_name_textbox,
+#                     base_model_name_textbox,
+#                     revision_name_textbox,
+#                     precision,
+#                     private,
+#                     weight_type,
+#                     model_type,
+#                 ],
+#                 submission_result,
+#             )
+#
+#     with gr.Row():
+#         with gr.Accordion("📙 Citation", open=False):
+#             citation_button = gr.Textbox(
+#                 value=CITATION_BUTTON_TEXT,
+#                 label=CITATION_BUTTON_LABEL,
+#                 elem_id="citation-button",
+#             ).style(show_copy_button=True)
+#
+#     dummy = gr.Textbox(visible=False)
+#     demo.load(
+#         change_tab,
+#         dummy,
+#         tabs,
+#         _js=get_window_url_params,
+#     )
+dummy1 = gr.Textbox(visible=False)
+hidden_leaderboard_table_for_search = gr.components.Dataframe(
+    headers=COLS,
+    datatype=TYPES,
+    max_rows=None,
+    visible=False,
+)
+def display(x, y):
+    return original_df
+INTRODUCTION_TEXT = """
+This is a copied space from Open Source LLM leaderboard. Instead of displaying
+the results as table the space simply provides a gradio API interface to access
+the full leaderboard data easily.
+Example python on how to access the data:
+```python
+from gradio_client import Client
+import json
+client = Client("https://felixz-open-llm-leaderboard.hf.space/")
+json_data = client.predict("","", api_name='/predict')
+with open(json_data, 'r') as file:
+    file_data = file.read()
+# Load the JSON data
+data = json.loads(file_data)
+# Get the headers and the data
+headers = data['headers']
+data = data['data']
+```
+"""
+interface = gr.Interface(
+    fn=display,
+    inputs=[ gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text"),
+             dummy1
+             ],
+    outputs=[hidden_leaderboard_table_for_search]
+)
+#scheduler = BackgroundScheduler()
+#scheduler.add_job(restart_space, "interval", seconds=12000)
+#scheduler.start()
+interface.launch()
+#demo.queue(concurrency_count=40).launch()