Spaces:
				
			
			
	
			
			
		Runtime error
		
	
	
	
			
			
	
	
	
	
		
		
		Runtime error
		
	
		Sean Cho
		
	commited on
		
		
					Commit 
							
							·
						
						bcb8d03
	
1
								Parent(s):
							
							2a9714f
								
update to latest
Browse files- README.md +2 -1
 - app.py +85 -68
 - requirements.txt +4 -3
 - src/display_models/get_model_metadata.py +50 -15
 - src/display_models/model_metadata_flags.py +0 -7
 - src/display_models/read_results.py +2 -2
 - src/load_from_hub.py +1 -4
 
    	
        README.md
    CHANGED
    
    | 
         @@ -4,10 +4,11 @@ emoji: 📉 
     | 
|
| 4 | 
         
             
            colorFrom: green
         
     | 
| 5 | 
         
             
            colorTo: indigo
         
     | 
| 6 | 
         
             
            sdk: gradio
         
     | 
| 7 | 
         
            -
            sdk_version: 3. 
     | 
| 8 | 
         
             
            app_file: app.py
         
     | 
| 9 | 
         
             
            pinned: true
         
     | 
| 10 | 
         
             
            license: apache-2.0
         
     | 
| 
         | 
|
| 11 | 
         
             
            ---
         
     | 
| 12 | 
         | 
| 13 | 
         
             
            Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
         
     | 
| 
         | 
|
| 4 | 
         
             
            colorFrom: green
         
     | 
| 5 | 
         
             
            colorTo: indigo
         
     | 
| 6 | 
         
             
            sdk: gradio
         
     | 
| 7 | 
         
            +
            sdk_version: 3.43.2
         
     | 
| 8 | 
         
             
            app_file: app.py
         
     | 
| 9 | 
         
             
            pinned: true
         
     | 
| 10 | 
         
             
            license: apache-2.0
         
     | 
| 11 | 
         
            +
            duplicated_from: HuggingFaceH4/open_llm_leaderboard
         
     | 
| 12 | 
         
             
            ---
         
     | 
| 13 | 
         | 
| 14 | 
         
             
            Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
         
     | 
    	
        app.py
    CHANGED
    
    | 
         @@ -222,21 +222,6 @@ def add_new_eval( 
     | 
|
| 222 | 
         | 
| 223 | 
         | 
| 224 | 
         
             
            # Basics
         
     | 
| 225 | 
         
            -
            def refresh() -> list[pd.DataFrame]:
         
     | 
| 226 | 
         
            -
                leaderboard_df = get_leaderboard_df(eval_results, eval_results_private, COLS, BENCHMARK_COLS)
         
     | 
| 227 | 
         
            -
                (
         
     | 
| 228 | 
         
            -
                    finished_eval_queue_df,
         
     | 
| 229 | 
         
            -
                    running_eval_queue_df,
         
     | 
| 230 | 
         
            -
                    pending_eval_queue_df,
         
     | 
| 231 | 
         
            -
                ) = get_evaluation_queue_df(eval_queue, eval_queue_private, EVAL_REQUESTS_PATH, EVAL_COLS)
         
     | 
| 232 | 
         
            -
                return (
         
     | 
| 233 | 
         
            -
                    leaderboard_df,
         
     | 
| 234 | 
         
            -
                    finished_eval_queue_df,
         
     | 
| 235 | 
         
            -
                    running_eval_queue_df,
         
     | 
| 236 | 
         
            -
                    pending_eval_queue_df,
         
     | 
| 237 | 
         
            -
                )
         
     | 
| 238 | 
         
            -
             
     | 
| 239 | 
         
            -
             
     | 
| 240 | 
         
             
            def change_tab(query_param: str):
         
     | 
| 241 | 
         
             
                query_param = query_param.replace("'", '"')
         
     | 
| 242 | 
         
             
                query_param = json.loads(query_param)
         
     | 
| 
         @@ -248,17 +233,16 @@ def change_tab(query_param: str): 
     | 
|
| 248 | 
         | 
| 249 | 
         | 
| 250 | 
         
             
            # Searching and filtering
         
     | 
| 251 | 
         
            -
            def  
     | 
| 252 | 
         
            -
                 
     | 
| 253 | 
         
            -
                if  
     | 
| 254 | 
         
            -
                    filtered_df =  
     | 
| 255 | 
         
            -
             
     | 
| 256 | 
         
            -
             
     | 
| 257 | 
         
            -
             
     | 
| 258 | 
         
            -
                else:
         
     | 
| 259 | 
         
            -
                    filtered_df = df[(df[AutoEvalColumn.dummy.name].str.contains(query, case=False))]
         
     | 
| 260 | 
         
            -
                return filtered_df[current_columns]
         
     | 
| 261 | 
         | 
| 
         | 
|
| 
         | 
|
| 262 | 
         | 
| 263 | 
         
             
            def select_columns(df: pd.DataFrame, columns: list) -> pd.DataFrame:
         
     | 
| 264 | 
         
             
                always_here_cols = [
         
     | 
| 
         @@ -272,31 +256,32 @@ def select_columns(df: pd.DataFrame, columns: list) -> pd.DataFrame: 
     | 
|
| 272 | 
         
             
                return filtered_df
         
     | 
| 273 | 
         | 
| 274 | 
         
             
            NUMERIC_INTERVALS = {
         
     | 
| 275 | 
         
            -
                " 
     | 
| 276 | 
         
            -
                " 
     | 
| 277 | 
         
            -
                "~ 
     | 
| 278 | 
         
            -
                "~ 
     | 
| 279 | 
         
            -
                 
     | 
| 280 | 
         
            -
                # " 
     | 
| 
         | 
|
| 281 | 
         
             
            }
         
     | 
| 282 | 
         | 
| 283 | 
         
             
            def filter_models(
         
     | 
| 284 | 
         
            -
                df: pd.DataFrame,  
     | 
| 285 | 
         
             
            ) -> pd.DataFrame:
         
     | 
| 286 | 
         
            -
                current_columns = current_columns_df.columns
         
     | 
| 287 | 
         
            -
             
     | 
| 288 | 
         
             
                # Show all models
         
     | 
| 289 | 
         
             
                if show_deleted:
         
     | 
| 290 | 
         
            -
                    filtered_df = df 
     | 
| 291 | 
         
             
                else:  # Show only still on the hub models
         
     | 
| 292 | 
         
            -
                    filtered_df = df[df[AutoEvalColumn.still_on_hub.name] == True] 
     | 
| 293 | 
         | 
| 294 | 
         
             
                type_emoji = [t[0] for t in type_query]
         
     | 
| 295 | 
         
             
                filtered_df = filtered_df[df[AutoEvalColumn.model_type_symbol.name].isin(type_emoji)]
         
     | 
| 
         | 
|
| 296 | 
         | 
| 297 | 
         
            -
                numeric_interval = [NUMERIC_INTERVALS[s] for s in size_query]
         
     | 
| 298 | 
         
             
                params_column = pd.to_numeric(df[AutoEvalColumn.params.name], errors="coerce")
         
     | 
| 299 | 
         
            -
                 
     | 
| 
         | 
|
| 300 | 
         | 
| 301 | 
         
             
                return filtered_df
         
     | 
| 302 | 
         | 
| 
         @@ -310,6 +295,12 @@ with demo: 
     | 
|
| 310 | 
         
             
                    with gr.TabItem("🏅 LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
         
     | 
| 311 | 
         
             
                        with gr.Row():
         
     | 
| 312 | 
         
             
                            with gr.Column():
         
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 313 | 
         
             
                                with gr.Row():
         
     | 
| 314 | 
         
             
                                    shown_columns = gr.CheckboxGroup(
         
     | 
| 315 | 
         
             
                                        choices=[
         
     | 
| 
         @@ -343,11 +334,6 @@ with demo: 
     | 
|
| 343 | 
         
             
                                        value=True, label="👀 Show gated/private/deleted models", interactive=True
         
     | 
| 344 | 
         
             
                                    )
         
     | 
| 345 | 
         
             
                            with gr.Column(min_width=320):
         
     | 
| 346 | 
         
            -
                                search_bar = gr.Textbox(
         
     | 
| 347 | 
         
            -
                                    placeholder="🔍 Search for your model and press ENTER...",
         
     | 
| 348 | 
         
            -
                                    show_label=False,
         
     | 
| 349 | 
         
            -
                                    elem_id="search-bar",
         
     | 
| 350 | 
         
            -
                                )
         
     | 
| 351 | 
         
             
                                with gr.Box(elem_id="box-filter"):
         
     | 
| 352 | 
         
             
                                    filter_columns_type = gr.CheckboxGroup(
         
     | 
| 353 | 
         
             
                                        label="Model types",
         
     | 
| 
         @@ -366,6 +352,13 @@ with demo: 
     | 
|
| 366 | 
         
             
                                        interactive=True,
         
     | 
| 367 | 
         
             
                                        elem_id="filter-columns-type",
         
     | 
| 368 | 
         
             
                                    )
         
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 369 | 
         
             
                                    filter_columns_size = gr.CheckboxGroup(
         
     | 
| 370 | 
         
             
                                        label="Model sizes",
         
     | 
| 371 | 
         
             
                                        choices=list(NUMERIC_INTERVALS.keys()),
         
     | 
| 
         @@ -402,55 +395,93 @@ with demo: 
     | 
|
| 402 | 
         
             
                            visible=False,
         
     | 
| 403 | 
         
             
                        )
         
     | 
| 404 | 
         
             
                        search_bar.submit(
         
     | 
| 405 | 
         
            -
                             
     | 
| 406 | 
         
             
                            [
         
     | 
| 407 | 
         
             
                                hidden_leaderboard_table_for_search,
         
     | 
| 408 | 
         
             
                                leaderboard_table,
         
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 409 | 
         
             
                                search_bar,
         
     | 
| 410 | 
         
             
                            ],
         
     | 
| 411 | 
         
             
                            leaderboard_table,
         
     | 
| 412 | 
         
             
                        )
         
     | 
| 413 | 
         
             
                        shown_columns.change(
         
     | 
| 414 | 
         
            -
                             
     | 
| 415 | 
         
            -
                            [ 
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 416 | 
         
             
                            leaderboard_table,
         
     | 
| 417 | 
         
            -
                            queue= 
     | 
| 418 | 
         
             
                        )
         
     | 
| 419 | 
         
             
                        filter_columns_type.change(
         
     | 
| 420 | 
         
            -
                             
     | 
| 421 | 
         
             
                            [
         
     | 
| 422 | 
         
             
                                hidden_leaderboard_table_for_search,
         
     | 
| 423 | 
         
             
                                leaderboard_table,
         
     | 
| 
         | 
|
| 424 | 
         
             
                                filter_columns_type,
         
     | 
| 
         | 
|
| 425 | 
         
             
                                filter_columns_size,
         
     | 
| 426 | 
         
             
                                deleted_models_visibility,
         
     | 
| 
         | 
|
| 427 | 
         
             
                            ],
         
     | 
| 428 | 
         
             
                            leaderboard_table,
         
     | 
| 429 | 
         
            -
                            queue= 
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 430 | 
         
             
                        )
         
     | 
| 431 | 
         
             
                        filter_columns_size.change(
         
     | 
| 432 | 
         
            -
                             
     | 
| 433 | 
         
             
                            [
         
     | 
| 434 | 
         
             
                                hidden_leaderboard_table_for_search,
         
     | 
| 435 | 
         
             
                                leaderboard_table,
         
     | 
| 
         | 
|
| 436 | 
         
             
                                filter_columns_type,
         
     | 
| 
         | 
|
| 437 | 
         
             
                                filter_columns_size,
         
     | 
| 438 | 
         
             
                                deleted_models_visibility,
         
     | 
| 
         | 
|
| 439 | 
         
             
                            ],
         
     | 
| 440 | 
         
             
                            leaderboard_table,
         
     | 
| 441 | 
         
            -
                            queue= 
     | 
| 442 | 
         
             
                        )
         
     | 
| 443 | 
         
             
                        deleted_models_visibility.change(
         
     | 
| 444 | 
         
            -
                             
     | 
| 445 | 
         
             
                            [
         
     | 
| 446 | 
         
             
                                hidden_leaderboard_table_for_search,
         
     | 
| 447 | 
         
             
                                leaderboard_table,
         
     | 
| 
         | 
|
| 448 | 
         
             
                                filter_columns_type,
         
     | 
| 
         | 
|
| 449 | 
         
             
                                filter_columns_size,
         
     | 
| 450 | 
         
             
                                deleted_models_visibility,
         
     | 
| 
         | 
|
| 451 | 
         
             
                            ],
         
     | 
| 452 | 
         
             
                            leaderboard_table,
         
     | 
| 453 | 
         
            -
                            queue= 
     | 
| 454 | 
         
             
                        )
         
     | 
| 455 | 
         
             
                    with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=2):
         
     | 
| 456 | 
         
             
                        gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
         
     | 
| 
         @@ -556,20 +587,6 @@ with demo: 
     | 
|
| 556 | 
         
             
                            submission_result,
         
     | 
| 557 | 
         
             
                        )
         
     | 
| 558 | 
         | 
| 559 | 
         
            -
                    with gr.Row():
         
     | 
| 560 | 
         
            -
                        refresh_button = gr.Button("Refresh")
         
     | 
| 561 | 
         
            -
                        refresh_button.click(
         
     | 
| 562 | 
         
            -
                            refresh,
         
     | 
| 563 | 
         
            -
                            inputs=[],
         
     | 
| 564 | 
         
            -
                            outputs=[
         
     | 
| 565 | 
         
            -
                                leaderboard_table,
         
     | 
| 566 | 
         
            -
                                finished_eval_table,
         
     | 
| 567 | 
         
            -
                                running_eval_table,
         
     | 
| 568 | 
         
            -
                                pending_eval_table,
         
     | 
| 569 | 
         
            -
                            ],
         
     | 
| 570 | 
         
            -
                            api_name='refresh'
         
     | 
| 571 | 
         
            -
                        )
         
     | 
| 572 | 
         
            -
             
     | 
| 573 | 
         
             
                with gr.Row():
         
     | 
| 574 | 
         
             
                    with gr.Accordion("📙 Citation", open=False):
         
     | 
| 575 | 
         
             
                        citation_button = gr.Textbox(
         
     | 
| 
         @@ -589,6 +606,6 @@ with demo: 
     | 
|
| 589 | 
         
             
                )
         
     | 
| 590 | 
         | 
| 591 | 
         
             
            scheduler = BackgroundScheduler()
         
     | 
| 592 | 
         
            -
            scheduler.add_job(restart_space, "interval", seconds= 
     | 
| 593 | 
         
             
            scheduler.start()
         
     | 
| 594 | 
         
             
            demo.queue(concurrency_count=40).launch()
         
     | 
| 
         | 
|
| 222 | 
         | 
| 223 | 
         | 
| 224 | 
         
             
            # Basics
         
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 225 | 
         
             
            def change_tab(query_param: str):
         
     | 
| 226 | 
         
             
                query_param = query_param.replace("'", '"')
         
     | 
| 227 | 
         
             
                query_param = json.loads(query_param)
         
     | 
| 
         | 
|
| 233 | 
         | 
| 234 | 
         | 
| 235 | 
         
             
            # Searching and filtering
         
     | 
| 236 | 
         
            +
            def update_table(hidden_df: pd.DataFrame, current_columns_df: pd.DataFrame, columns: list, type_query: list, precision_query: str, size_query: list, show_deleted: bool, query: str):
         
     | 
| 237 | 
         
            +
                filtered_df = filter_models(hidden_df, type_query, size_query, precision_query, show_deleted)
         
     | 
| 238 | 
         
            +
                if query != "":
         
     | 
| 239 | 
         
            +
                    filtered_df = search_table(filtered_df, query)
         
     | 
| 240 | 
         
            +
                df = select_columns(filtered_df, columns)
         
     | 
| 241 | 
         
            +
             
     | 
| 242 | 
         
            +
                return df
         
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 243 | 
         | 
| 244 | 
         
            +
            def search_table(df: pd.DataFrame, query: str) -> pd.DataFrame:
         
     | 
| 245 | 
         
            +
                return df[(df[AutoEvalColumn.dummy.name].str.contains(query, case=False))]
         
     | 
| 246 | 
         | 
| 247 | 
         
             
            def select_columns(df: pd.DataFrame, columns: list) -> pd.DataFrame:
         
     | 
| 248 | 
         
             
                always_here_cols = [
         
     | 
| 
         | 
|
| 256 | 
         
             
                return filtered_df
         
     | 
| 257 | 
         | 
| 258 | 
         
             
            NUMERIC_INTERVALS = {
         
     | 
| 259 | 
         
            +
                "Unknown": pd.Interval(-1, 0, closed="right"), 
         
     | 
| 260 | 
         
            +
                "< 1.5B": pd.Interval(0, 1.5, closed="right"),
         
     | 
| 261 | 
         
            +
                "~3B": pd.Interval(1.5, 5, closed="right"),
         
     | 
| 262 | 
         
            +
                "~7B": pd.Interval(6, 11, closed="right"),
         
     | 
| 263 | 
         
            +
                "~13B": pd.Interval(12, 15, closed="right"),
         
     | 
| 264 | 
         
            +
                # "~35B": pd.Interval(16, 55, closed="right"),
         
     | 
| 265 | 
         
            +
                # "60B+": pd.Interval(55, 10000, closed="right"),
         
     | 
| 266 | 
         
             
            }
         
     | 
| 267 | 
         | 
| 268 | 
         
             
            def filter_models(
         
     | 
| 269 | 
         
            +
                df: pd.DataFrame, type_query: list, size_query: list, precision_query: list, show_deleted: bool
         
     | 
| 270 | 
         
             
            ) -> pd.DataFrame:
         
     | 
| 
         | 
|
| 
         | 
|
| 271 | 
         
             
                # Show all models
         
     | 
| 272 | 
         
             
                if show_deleted:
         
     | 
| 273 | 
         
            +
                    filtered_df = df
         
     | 
| 274 | 
         
             
                else:  # Show only still on the hub models
         
     | 
| 275 | 
         
            +
                    filtered_df = df[df[AutoEvalColumn.still_on_hub.name] == True]
         
     | 
| 276 | 
         | 
| 277 | 
         
             
                type_emoji = [t[0] for t in type_query]
         
     | 
| 278 | 
         
             
                filtered_df = filtered_df[df[AutoEvalColumn.model_type_symbol.name].isin(type_emoji)]
         
     | 
| 279 | 
         
            +
                filtered_df = filtered_df[df[AutoEvalColumn.precision.name].isin(precision_query)]
         
     | 
| 280 | 
         | 
| 281 | 
         
            +
                numeric_interval = pd.IntervalIndex(sorted([NUMERIC_INTERVALS[s] for s in size_query]))
         
     | 
| 282 | 
         
             
                params_column = pd.to_numeric(df[AutoEvalColumn.params.name], errors="coerce")
         
     | 
| 283 | 
         
            +
                mask = params_column.apply(lambda x: any(numeric_interval.contains(x)))
         
     | 
| 284 | 
         
            +
                filtered_df = filtered_df.loc[mask]
         
     | 
| 285 | 
         | 
| 286 | 
         
             
                return filtered_df
         
     | 
| 287 | 
         | 
| 
         | 
|
| 295 | 
         
             
                    with gr.TabItem("🏅 LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
         
     | 
| 296 | 
         
             
                        with gr.Row():
         
     | 
| 297 | 
         
             
                            with gr.Column():
         
     | 
| 298 | 
         
            +
                                with gr.Row():
         
     | 
| 299 | 
         
            +
                                    search_bar = gr.Textbox(
         
     | 
| 300 | 
         
            +
                                        placeholder=" 🔍 Search for your model and press ENTER...",
         
     | 
| 301 | 
         
            +
                                        show_label=False,
         
     | 
| 302 | 
         
            +
                                        elem_id="search-bar",
         
     | 
| 303 | 
         
            +
                                    )
         
     | 
| 304 | 
         
             
                                with gr.Row():
         
     | 
| 305 | 
         
             
                                    shown_columns = gr.CheckboxGroup(
         
     | 
| 306 | 
         
             
                                        choices=[
         
     | 
| 
         | 
|
| 334 | 
         
             
                                        value=True, label="👀 Show gated/private/deleted models", interactive=True
         
     | 
| 335 | 
         
             
                                    )
         
     | 
| 336 | 
         
             
                            with gr.Column(min_width=320):
         
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 337 | 
         
             
                                with gr.Box(elem_id="box-filter"):
         
     | 
| 338 | 
         
             
                                    filter_columns_type = gr.CheckboxGroup(
         
     | 
| 339 | 
         
             
                                        label="Model types",
         
     | 
| 
         | 
|
| 352 | 
         
             
                                        interactive=True,
         
     | 
| 353 | 
         
             
                                        elem_id="filter-columns-type",
         
     | 
| 354 | 
         
             
                                    )
         
     | 
| 355 | 
         
            +
                                    filter_columns_precision = gr.CheckboxGroup(
         
     | 
| 356 | 
         
            +
                                        label="Precision",
         
     | 
| 357 | 
         
            +
                                        choices=["torch.float16"], #, "torch.bfloat16", "torch.float32", "8bit", "4bit", "GPTQ"],
         
     | 
| 358 | 
         
            +
                                        value=["torch.float16"], #, "torch.bfloat16", "torch.float32", "8bit", "4bit", "GPTQ"],
         
     | 
| 359 | 
         
            +
                                        interactive=False,
         
     | 
| 360 | 
         
            +
                                        elem_id="filter-columns-precision",
         
     | 
| 361 | 
         
            +
                                    )
         
     | 
| 362 | 
         
             
                                    filter_columns_size = gr.CheckboxGroup(
         
     | 
| 363 | 
         
             
                                        label="Model sizes",
         
     | 
| 364 | 
         
             
                                        choices=list(NUMERIC_INTERVALS.keys()),
         
     | 
| 
         | 
|
| 395 | 
         
             
                            visible=False,
         
     | 
| 396 | 
         
             
                        )
         
     | 
| 397 | 
         
             
                        search_bar.submit(
         
     | 
| 398 | 
         
            +
                            update_table,
         
     | 
| 399 | 
         
             
                            [
         
     | 
| 400 | 
         
             
                                hidden_leaderboard_table_for_search,
         
     | 
| 401 | 
         
             
                                leaderboard_table,
         
     | 
| 402 | 
         
            +
                                shown_columns,
         
     | 
| 403 | 
         
            +
                                filter_columns_type,
         
     | 
| 404 | 
         
            +
                                filter_columns_precision,
         
     | 
| 405 | 
         
            +
                                filter_columns_size,
         
     | 
| 406 | 
         
            +
                                deleted_models_visibility,
         
     | 
| 407 | 
         
             
                                search_bar,
         
     | 
| 408 | 
         
             
                            ],
         
     | 
| 409 | 
         
             
                            leaderboard_table,
         
     | 
| 410 | 
         
             
                        )
         
     | 
| 411 | 
         
             
                        shown_columns.change(
         
     | 
| 412 | 
         
            +
                            update_table,
         
     | 
| 413 | 
         
            +
                            [
         
     | 
| 414 | 
         
            +
                                hidden_leaderboard_table_for_search,
         
     | 
| 415 | 
         
            +
                                leaderboard_table,
         
     | 
| 416 | 
         
            +
                                shown_columns,
         
     | 
| 417 | 
         
            +
                                filter_columns_type,
         
     | 
| 418 | 
         
            +
                                filter_columns_precision,
         
     | 
| 419 | 
         
            +
                                filter_columns_size,
         
     | 
| 420 | 
         
            +
                                deleted_models_visibility,
         
     | 
| 421 | 
         
            +
                                search_bar,
         
     | 
| 422 | 
         
            +
                            ],
         
     | 
| 423 | 
         
             
                            leaderboard_table,
         
     | 
| 424 | 
         
            +
                            queue=True,
         
     | 
| 425 | 
         
             
                        )
         
     | 
| 426 | 
         
             
                        filter_columns_type.change(
         
     | 
| 427 | 
         
            +
                            update_table,
         
     | 
| 428 | 
         
             
                            [
         
     | 
| 429 | 
         
             
                                hidden_leaderboard_table_for_search,
         
     | 
| 430 | 
         
             
                                leaderboard_table,
         
     | 
| 431 | 
         
            +
                                shown_columns,
         
     | 
| 432 | 
         
             
                                filter_columns_type,
         
     | 
| 433 | 
         
            +
                                filter_columns_precision,
         
     | 
| 434 | 
         
             
                                filter_columns_size,
         
     | 
| 435 | 
         
             
                                deleted_models_visibility,
         
     | 
| 436 | 
         
            +
                                search_bar,
         
     | 
| 437 | 
         
             
                            ],
         
     | 
| 438 | 
         
             
                            leaderboard_table,
         
     | 
| 439 | 
         
            +
                            queue=True,
         
     | 
| 440 | 
         
            +
                        )
         
     | 
| 441 | 
         
            +
                        filter_columns_precision.change(
         
     | 
| 442 | 
         
            +
                            update_table,
         
     | 
| 443 | 
         
            +
                            [
         
     | 
| 444 | 
         
            +
                                hidden_leaderboard_table_for_search,
         
     | 
| 445 | 
         
            +
                                leaderboard_table,
         
     | 
| 446 | 
         
            +
                                shown_columns,
         
     | 
| 447 | 
         
            +
                                filter_columns_type,
         
     | 
| 448 | 
         
            +
                                filter_columns_precision,
         
     | 
| 449 | 
         
            +
                                filter_columns_size,
         
     | 
| 450 | 
         
            +
                                deleted_models_visibility,
         
     | 
| 451 | 
         
            +
                                search_bar,
         
     | 
| 452 | 
         
            +
                            ],
         
     | 
| 453 | 
         
            +
                            leaderboard_table,
         
     | 
| 454 | 
         
            +
                            queue=True,
         
     | 
| 455 | 
         
             
                        )
         
     | 
| 456 | 
         
             
                        filter_columns_size.change(
         
     | 
| 457 | 
         
            +
                            update_table,
         
     | 
| 458 | 
         
             
                            [
         
     | 
| 459 | 
         
             
                                hidden_leaderboard_table_for_search,
         
     | 
| 460 | 
         
             
                                leaderboard_table,
         
     | 
| 461 | 
         
            +
                                shown_columns,
         
     | 
| 462 | 
         
             
                                filter_columns_type,
         
     | 
| 463 | 
         
            +
                                filter_columns_precision,
         
     | 
| 464 | 
         
             
                                filter_columns_size,
         
     | 
| 465 | 
         
             
                                deleted_models_visibility,
         
     | 
| 466 | 
         
            +
                                search_bar,
         
     | 
| 467 | 
         
             
                            ],
         
     | 
| 468 | 
         
             
                            leaderboard_table,
         
     | 
| 469 | 
         
            +
                            queue=True,
         
     | 
| 470 | 
         
             
                        )
         
     | 
| 471 | 
         
             
                        deleted_models_visibility.change(
         
     | 
| 472 | 
         
            +
                            update_table,
         
     | 
| 473 | 
         
             
                            [
         
     | 
| 474 | 
         
             
                                hidden_leaderboard_table_for_search,
         
     | 
| 475 | 
         
             
                                leaderboard_table,
         
     | 
| 476 | 
         
            +
                                shown_columns,
         
     | 
| 477 | 
         
             
                                filter_columns_type,
         
     | 
| 478 | 
         
            +
                                filter_columns_precision,
         
     | 
| 479 | 
         
             
                                filter_columns_size,
         
     | 
| 480 | 
         
             
                                deleted_models_visibility,
         
     | 
| 481 | 
         
            +
                                search_bar,
         
     | 
| 482 | 
         
             
                            ],
         
     | 
| 483 | 
         
             
                            leaderboard_table,
         
     | 
| 484 | 
         
            +
                            queue=True,
         
     | 
| 485 | 
         
             
                        )
         
     | 
| 486 | 
         
             
                    with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=2):
         
     | 
| 487 | 
         
             
                        gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
         
     | 
| 
         | 
|
| 587 | 
         
             
                            submission_result,
         
     | 
| 588 | 
         
             
                        )
         
     | 
| 589 | 
         | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 590 | 
         
             
                with gr.Row():
         
     | 
| 591 | 
         
             
                    with gr.Accordion("📙 Citation", open=False):
         
     | 
| 592 | 
         
             
                        citation_button = gr.Textbox(
         
     | 
| 
         | 
|
| 606 | 
         
             
                )
         
     | 
| 607 | 
         | 
| 608 | 
         
             
            scheduler = BackgroundScheduler()
         
     | 
| 609 | 
         
            +
            scheduler.add_job(restart_space, "interval", seconds=1800)
         
     | 
| 610 | 
         
             
            scheduler.start()
         
     | 
| 611 | 
         
             
            demo.queue(concurrency_count=40).launch()
         
     | 
    	
        requirements.txt
    CHANGED
    
    | 
         @@ -1,3 +1,4 @@ 
     | 
|
| 
         | 
|
| 1 | 
         
             
            aiofiles==23.1.0
         
     | 
| 2 | 
         
             
            aiohttp==3.8.4
         
     | 
| 3 | 
         
             
            aiosignal==1.3.1
         
     | 
| 
         @@ -19,8 +20,8 @@ filelock==3.11.0 
     | 
|
| 19 | 
         
             
            fonttools==4.39.3
         
     | 
| 20 | 
         
             
            frozenlist==1.3.3
         
     | 
| 21 | 
         
             
            fsspec==2023.4.0
         
     | 
| 22 | 
         
            -
            gradio==3. 
     | 
| 23 | 
         
            -
             
     | 
| 24 | 
         
             
            h11==0.14.0
         
     | 
| 25 | 
         
             
            httpcore==0.17.0
         
     | 
| 26 | 
         
             
            httpx==0.24.0
         
     | 
| 
         @@ -59,7 +60,7 @@ sniffio==1.3.0 
     | 
|
| 59 | 
         
             
            starlette==0.26.1
         
     | 
| 60 | 
         
             
            toolz==0.12.0
         
     | 
| 61 | 
         
             
            tqdm==4.65.0
         
     | 
| 62 | 
         
            -
            transformers==4. 
     | 
| 63 | 
         
             
            typing_extensions==4.5.0
         
     | 
| 64 | 
         
             
            tzdata==2023.3
         
     | 
| 65 | 
         
             
            tzlocal==4.3
         
     | 
| 
         | 
|
| 1 | 
         
            +
            accelerate==0.23.0
         
     | 
| 2 | 
         
             
            aiofiles==23.1.0
         
     | 
| 3 | 
         
             
            aiohttp==3.8.4
         
     | 
| 4 | 
         
             
            aiosignal==1.3.1
         
     | 
| 
         | 
|
| 20 | 
         
             
            fonttools==4.39.3
         
     | 
| 21 | 
         
             
            frozenlist==1.3.3
         
     | 
| 22 | 
         
             
            fsspec==2023.4.0
         
     | 
| 23 | 
         
            +
            gradio==3.43.2
         
     | 
| 24 | 
         
            +
            gradio-client==0.5.0
         
     | 
| 25 | 
         
             
            h11==0.14.0
         
     | 
| 26 | 
         
             
            httpcore==0.17.0
         
     | 
| 27 | 
         
             
            httpx==0.24.0
         
     | 
| 
         | 
|
| 60 | 
         
             
            starlette==0.26.1
         
     | 
| 61 | 
         
             
            toolz==0.12.0
         
     | 
| 62 | 
         
             
            tqdm==4.65.0
         
     | 
| 63 | 
         
            +
            transformers==4.33.1
         
     | 
| 64 | 
         
             
            typing_extensions==4.5.0
         
     | 
| 65 | 
         
             
            tzdata==2023.3
         
     | 
| 66 | 
         
             
            tzlocal==4.3
         
     | 
    	
        src/display_models/get_model_metadata.py
    CHANGED
    
    | 
         @@ -2,11 +2,14 @@ import glob 
     | 
|
| 2 | 
         
             
            import json
         
     | 
| 3 | 
         
             
            import os
         
     | 
| 4 | 
         
             
            import re
         
     | 
| 
         | 
|
| 5 | 
         
             
            from typing import List
         
     | 
| 6 | 
         | 
| 7 | 
         
             
            import huggingface_hub
         
     | 
| 8 | 
         
             
            from huggingface_hub import HfApi
         
     | 
| 9 | 
         
             
            from tqdm import tqdm
         
     | 
| 
         | 
|
| 
         | 
|
| 10 | 
         | 
| 11 | 
         
             
            from src.display_models.model_metadata_flags import DO_NOT_SUBMIT_MODELS, FLAGGED_MODELS
         
     | 
| 12 | 
         
             
            from src.display_models.model_metadata_type import MODEL_TYPE_METADATA, ModelType, model_type_from_str
         
     | 
| 
         @@ -16,27 +19,53 @@ api = HfApi(token=os.environ.get("H4_TOKEN", None)) 
     | 
|
| 16 | 
         | 
| 17 | 
         | 
| 18 | 
         
             
            def get_model_infos_from_hub(leaderboard_data: List[dict]):
         
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 19 | 
         
             
                for model_data in tqdm(leaderboard_data):
         
     | 
| 20 | 
         
             
                    model_name = model_data["model_name_for_query"]
         
     | 
| 21 | 
         
            -
             
     | 
| 22 | 
         
            -
             
     | 
| 23 | 
         
            -
             
     | 
| 24 | 
         
            -
             
     | 
| 25 | 
         
            -
                         
     | 
| 26 | 
         
            -
             
     | 
| 27 | 
         
            -
             
     | 
| 28 | 
         
            -
                         
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 29 | 
         | 
| 30 | 
         
             
                    model_data[AutoEvalColumn.license.name] = get_model_license(model_info)
         
     | 
| 31 | 
         
             
                    model_data[AutoEvalColumn.likes.name] = get_model_likes(model_info)
         
     | 
| 32 | 
         
            -
                     
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 33 | 
         | 
| 34 | 
         | 
| 35 | 
         
             
            def get_model_license(model_info):
         
     | 
| 36 | 
         
             
                try:
         
     | 
| 37 | 
         
             
                    return model_info.cardData["license"]
         
     | 
| 38 | 
         
             
                except Exception:
         
     | 
| 39 | 
         
            -
                    return  
     | 
| 40 | 
         | 
| 41 | 
         | 
| 42 | 
         
             
            def get_model_likes(model_info):
         
     | 
| 
         @@ -52,11 +81,17 @@ def get_model_size(model_name, model_info): 
     | 
|
| 52 | 
         
             
                    return round(model_info.safetensors["total"] / 1e9, 3)
         
     | 
| 53 | 
         
             
                except AttributeError:
         
     | 
| 54 | 
         
             
                    try:
         
     | 
| 55 | 
         
            -
                         
     | 
| 56 | 
         
            -
                         
     | 
| 57 | 
         
            -
             
     | 
| 58 | 
         
            -
             
     | 
| 59 | 
         
            -
             
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 60 | 
         | 
| 61 | 
         | 
| 62 | 
         
             
            def get_model_type(leaderboard_data: List[dict]):
         
     | 
| 
         | 
|
| 2 | 
         
             
            import json
         
     | 
| 3 | 
         
             
            import os
         
     | 
| 4 | 
         
             
            import re
         
     | 
| 5 | 
         
            +
            import pickle
         
     | 
| 6 | 
         
             
            from typing import List
         
     | 
| 7 | 
         | 
| 8 | 
         
             
            import huggingface_hub
         
     | 
| 9 | 
         
             
            from huggingface_hub import HfApi
         
     | 
| 10 | 
         
             
            from tqdm import tqdm
         
     | 
| 11 | 
         
            +
            from transformers import AutoModel, AutoConfig
         
     | 
| 12 | 
         
            +
            from accelerate import init_empty_weights
         
     | 
| 13 | 
         | 
| 14 | 
         
             
            from src.display_models.model_metadata_flags import DO_NOT_SUBMIT_MODELS, FLAGGED_MODELS
         
     | 
| 15 | 
         
             
            from src.display_models.model_metadata_type import MODEL_TYPE_METADATA, ModelType, model_type_from_str
         
     | 
| 
         | 
|
| 19 | 
         | 
| 20 | 
         | 
| 21 | 
         
             
            def get_model_infos_from_hub(leaderboard_data: List[dict]):
         
     | 
| 22 | 
         
            +
                # load cache from disk
         
     | 
| 23 | 
         
            +
                try:
         
     | 
| 24 | 
         
            +
                    with open("model_info_cache.pkl", "rb") as f:
         
     | 
| 25 | 
         
            +
                        model_info_cache = pickle.load(f)
         
     | 
| 26 | 
         
            +
                except (EOFError, FileNotFoundError):
         
     | 
| 27 | 
         
            +
                    model_info_cache = {}
         
     | 
| 28 | 
         
            +
                try:
         
     | 
| 29 | 
         
            +
                    with open("model_size_cache.pkl", "rb") as f:
         
     | 
| 30 | 
         
            +
                        model_size_cache = pickle.load(f)
         
     | 
| 31 | 
         
            +
                except (EOFError, FileNotFoundError):
         
     | 
| 32 | 
         
            +
                    model_size_cache = {}
         
     | 
| 33 | 
         
            +
             
     | 
| 34 | 
         
             
                for model_data in tqdm(leaderboard_data):
         
     | 
| 35 | 
         
             
                    model_name = model_data["model_name_for_query"]
         
     | 
| 36 | 
         
            +
             
     | 
| 37 | 
         
            +
                    if model_name in model_info_cache:
         
     | 
| 38 | 
         
            +
                        model_info = model_info_cache[model_name]
         
     | 
| 39 | 
         
            +
                    else:
         
     | 
| 40 | 
         
            +
                        try:
         
     | 
| 41 | 
         
            +
                            model_info = api.model_info(model_name)
         
     | 
| 42 | 
         
            +
                            model_info_cache[model_name] = model_info
         
     | 
| 43 | 
         
            +
                        except huggingface_hub.utils._errors.RepositoryNotFoundError:
         
     | 
| 44 | 
         
            +
                            print("Repo not found!", model_name)
         
     | 
| 45 | 
         
            +
                            model_data[AutoEvalColumn.license.name] = None
         
     | 
| 46 | 
         
            +
                            model_data[AutoEvalColumn.likes.name] = None
         
     | 
| 47 | 
         
            +
                            if model_name not in model_size_cache:
         
     | 
| 48 | 
         
            +
                                model_size_cache[model_name] = get_model_size(model_name, None)
         
     | 
| 49 | 
         
            +
                            model_data[AutoEvalColumn.params.name] = model_size_cache[model_name]
         
     | 
| 50 | 
         | 
| 51 | 
         
             
                    model_data[AutoEvalColumn.license.name] = get_model_license(model_info)
         
     | 
| 52 | 
         
             
                    model_data[AutoEvalColumn.likes.name] = get_model_likes(model_info)
         
     | 
| 53 | 
         
            +
                    if model_name not in model_size_cache:
         
     | 
| 54 | 
         
            +
                        model_size_cache[model_name] = get_model_size(model_name, model_info)
         
     | 
| 55 | 
         
            +
                    model_data[AutoEvalColumn.params.name] = model_size_cache[model_name]
         
     | 
| 56 | 
         
            +
                
         
     | 
| 57 | 
         
            +
                # save cache to disk in pickle format
         
     | 
| 58 | 
         
            +
                with open("model_info_cache.pkl", "wb") as f:
         
     | 
| 59 | 
         
            +
                    pickle.dump(model_info_cache, f)
         
     | 
| 60 | 
         
            +
                with open("model_size_cache.pkl", "wb") as f:
         
     | 
| 61 | 
         
            +
                    pickle.dump(model_size_cache, f)
         
     | 
| 62 | 
         | 
| 63 | 
         | 
| 64 | 
         
             
            def get_model_license(model_info):
         
     | 
| 65 | 
         
             
                try:
         
     | 
| 66 | 
         
             
                    return model_info.cardData["license"]
         
     | 
| 67 | 
         
             
                except Exception:
         
     | 
| 68 | 
         
            +
                    return "?"
         
     | 
| 69 | 
         | 
| 70 | 
         | 
| 71 | 
         
             
            def get_model_likes(model_info):
         
     | 
| 
         | 
|
| 81 | 
         
             
                    return round(model_info.safetensors["total"] / 1e9, 3)
         
     | 
| 82 | 
         
             
                except AttributeError:
         
     | 
| 83 | 
         
             
                    try:
         
     | 
| 84 | 
         
            +
                        config = AutoConfig.from_pretrained(model_name, trust_remote_code=False)
         
     | 
| 85 | 
         
            +
                        with init_empty_weights():
         
     | 
| 86 | 
         
            +
                            model = AutoModel.from_config(config, trust_remote_code=False)
         
     | 
| 87 | 
         
            +
                        return round(sum(p.numel() for p in model.parameters() if p.requires_grad) / 1e9, 3)
         
     | 
| 88 | 
         
            +
                    except (EnvironmentError, ValueError): # model config not found, likely private  
         
     | 
| 89 | 
         
            +
                        try:
         
     | 
| 90 | 
         
            +
                            size_match = re.search(size_pattern, model_name.lower())
         
     | 
| 91 | 
         
            +
                            size = size_match.group(0)
         
     | 
| 92 | 
         
            +
                            return round(float(size[:-1]) if size[-1] == "b" else float(size[:-1]) / 1e3, 3)
         
     | 
| 93 | 
         
            +
                        except AttributeError:
         
     | 
| 94 | 
         
            +
                            return 0
         
     | 
| 95 | 
         | 
| 96 | 
         | 
| 97 | 
         
             
            def get_model_type(leaderboard_data: List[dict]):
         
     | 
    	
        src/display_models/model_metadata_flags.py
    CHANGED
    
    | 
         @@ -1,15 +1,8 @@ 
     | 
|
| 1 | 
         
             
            # Models which have been flagged by users as being problematic for a reason or another
         
     | 
| 2 | 
         
             
            # (Model name to forum discussion link)
         
     | 
| 3 | 
         
             
            FLAGGED_MODELS = {
         
     | 
| 4 | 
         
            -
                "Voicelab/trurl-2-13b": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/202",
         
     | 
| 5 | 
         
            -
                "deepnight-research/llama-2-70B-inst": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/207",
         
     | 
| 6 | 
         
            -
                "Aspik101/trurl-2-13b-pl-instruct_unload": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/213",
         
     | 
| 7 | 
         
            -
                "Fredithefish/ReasonixPajama-3B-HF": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/236",
         
     | 
| 8 | 
         
            -
                "TigerResearch/tigerbot-7b-sft-v1": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/237",
         
     | 
| 9 | 
         
            -
                "gaodrew/gaodrew-gorgonzola-13b": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/215",
         
     | 
| 10 | 
         
             
            }
         
     | 
| 11 | 
         | 
| 12 | 
         
             
            # Models which have been requested by orgs to not be submitted on the leaderboard
         
     | 
| 13 | 
         
             
            DO_NOT_SUBMIT_MODELS = [
         
     | 
| 14 | 
         
            -
                "Voicelab/trurl-2-13b",  # trained on MMLU
         
     | 
| 15 | 
         
             
            ]
         
     | 
| 
         | 
|
| 1 | 
         
             
            # Models which have been flagged by users as being problematic for a reason or another
         
     | 
| 2 | 
         
             
            # (Model name to forum discussion link)
         
     | 
| 3 | 
         
             
            FLAGGED_MODELS = {
         
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 4 | 
         
             
            }
         
     | 
| 5 | 
         | 
| 6 | 
         
             
            # Models which have been requested by orgs to not be submitted on the leaderboard
         
     | 
| 7 | 
         
             
            DO_NOT_SUBMIT_MODELS = [
         
     | 
| 
         | 
|
| 8 | 
         
             
            ]
         
     | 
    	
        src/display_models/read_results.py
    CHANGED
    
    | 
         @@ -87,11 +87,11 @@ def parse_eval_result(json_filepath: str) -> Tuple[str, list[dict]]: 
     | 
|
| 87 | 
         
             
                if len(model_split) == 1:
         
     | 
| 88 | 
         
             
                    org = None
         
     | 
| 89 | 
         
             
                    model = model_split[0]
         
     | 
| 90 | 
         
            -
                    result_key = f"{model}_{ 
     | 
| 91 | 
         
             
                else:
         
     | 
| 92 | 
         
             
                    org = model_split[0]
         
     | 
| 93 | 
         
             
                    model = model_split[1]
         
     | 
| 94 | 
         
            -
                    result_key = f"{org}_{model}_{ 
     | 
| 95 | 
         | 
| 96 | 
         
             
                eval_results = []
         
     | 
| 97 | 
         
             
                for benchmark, metric in zip(BENCHMARKS, METRICS):
         
     | 
| 
         | 
|
| 87 | 
         
             
                if len(model_split) == 1:
         
     | 
| 88 | 
         
             
                    org = None
         
     | 
| 89 | 
         
             
                    model = model_split[0]
         
     | 
| 90 | 
         
            +
                    result_key = f"{model}_{precision}"
         
     | 
| 91 | 
         
             
                else:
         
     | 
| 92 | 
         
             
                    org = model_split[0]
         
     | 
| 93 | 
         
             
                    model = model_split[1]
         
     | 
| 94 | 
         
            +
                    result_key = f"{org}_{model}_{precision}"
         
     | 
| 95 | 
         | 
| 96 | 
         
             
                eval_results = []
         
     | 
| 97 | 
         
             
                for benchmark, metric in zip(BENCHMARKS, METRICS):
         
     | 
    	
        src/load_from_hub.py
    CHANGED
    
    | 
         @@ -80,11 +80,8 @@ def get_leaderboard_df( 
     | 
|
| 80 | 
         
             
                df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)
         
     | 
| 81 | 
         
             
                df = df[cols].round(decimals=2)
         
     | 
| 82 | 
         | 
| 83 | 
         
            -
             
     | 
| 84 | 
         
             
                # filter out if any of the benchmarks have not been produced
         
     | 
| 85 | 
         
             
                df = df[has_no_nan_values(df, benchmark_cols)]
         
     | 
| 86 | 
         
            -
             
     | 
| 87 | 
         
            -
                print(df)
         
     | 
| 88 | 
         
             
                return df
         
     | 
| 89 | 
         | 
| 90 | 
         | 
| 
         @@ -125,7 +122,7 @@ def get_evaluation_queue_df( 
     | 
|
| 125 | 
         | 
| 126 | 
         
             
                pending_list = [e for e in all_evals if e["status"] in ["PENDING", "RERUN"]]
         
     | 
| 127 | 
         
             
                running_list = [e for e in all_evals if e["status"] == "RUNNING"]
         
     | 
| 128 | 
         
            -
                finished_list = [e for e in all_evals if e["status"].startswith("FINISHED")]
         
     | 
| 129 | 
         
             
                df_pending = pd.DataFrame.from_records(pending_list, columns=cols)
         
     | 
| 130 | 
         
             
                df_running = pd.DataFrame.from_records(running_list, columns=cols)
         
     | 
| 131 | 
         
             
                df_finished = pd.DataFrame.from_records(finished_list, columns=cols)
         
     | 
| 
         | 
|
| 80 | 
         
             
                df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)
         
     | 
| 81 | 
         
             
                df = df[cols].round(decimals=2)
         
     | 
| 82 | 
         | 
| 
         | 
|
| 83 | 
         
             
                # filter out if any of the benchmarks have not been produced
         
     | 
| 84 | 
         
             
                df = df[has_no_nan_values(df, benchmark_cols)]
         
     | 
| 
         | 
|
| 
         | 
|
| 85 | 
         
             
                return df
         
     | 
| 86 | 
         | 
| 87 | 
         | 
| 
         | 
|
| 122 | 
         | 
| 123 | 
         
             
                pending_list = [e for e in all_evals if e["status"] in ["PENDING", "RERUN"]]
         
     | 
| 124 | 
         
             
                running_list = [e for e in all_evals if e["status"] == "RUNNING"]
         
     | 
| 125 | 
         
            +
                finished_list = [e for e in all_evals if e["status"].startswith("FINISHED") or e["status"] == "PENDING_NEW_EVAL"]
         
     | 
| 126 | 
         
             
                df_pending = pd.DataFrame.from_records(pending_list, columns=cols)
         
     | 
| 127 | 
         
             
                df_running = pd.DataFrame.from_records(running_list, columns=cols)
         
     | 
| 128 | 
         
             
                df_finished = pd.DataFrame.from_records(finished_list, columns=cols)
         
     |