Spaces:
Runtime error
Runtime error
| scenario,scenario_source,ref_scenario,ref_source,corr_type,model_select_strategy,model_subset_size_requested,exp_n,correlation,p_value | |
| hellaswag,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,0,0.0,1.0 | |
| hellaswag,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,1,0.39999999999999997,0.48333333333333334 | |
| hellaswag,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,2,-0.19999999999999998,0.8166666666666667 | |
| hellaswag,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,3,0.6,0.23333333333333334 | |
| hellaswag,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,4,0.39999999999999997,0.48333333333333334 | |
| hellaswag,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,5,0.19999999999999998,0.8166666666666667 | |
| hellaswag,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,6,0.39999999999999997,0.48333333333333334 | |
| hellaswag,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,7,0.6,0.23333333333333334 | |
| hellaswag,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,8,0.39999999999999997,0.48333333333333334 | |
| hellaswag,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,9,0.6,0.23333333333333334 | |
| humaneval,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,0,-0.39999999999999997,0.48333333333333334 | |
| humaneval,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,1,-0.5270462766947298,0.206507295485425 | |
| humaneval,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,2,0.5270462766947298,0.206507295485425 | |
| humaneval,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,3,0.5270462766947298,0.206507295485425 | |
| humaneval,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,4,-0.22360679774997896,0.6015081344405899 | |
| humaneval,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,5,0.10540925533894598,0.8005421074231263 | |
| humaneval,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,6,-0.10540925533894598,0.8005421074231263 | |
| humaneval,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,7,0.39999999999999997,0.48333333333333334 | |
| humaneval,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,8,-0.10540925533894598,0.8005421074231263 | |
| humaneval,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,9,-0.22360679774997896,0.6015081344405899 | |
| mbpp,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,0,0.10540925533894598,0.8005421074231263 | |
| mbpp,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,1,-0.31622776601683794,0.44848886103153174 | |
| mbpp,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,2,-0.31622776601683794,0.44848886103153174 | |
| mbpp,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,3,0.10540925533894598,0.8005421074231263 | |
| mbpp,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,4,-0.39999999999999997,0.48333333333333334 | |
| mbpp,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,5,0.10540925533894598,0.8005421074231263 | |
| mbpp,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,6,0.39999999999999997,0.48333333333333334 | |
| mbpp,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,7,0.39999999999999997,0.48333333333333334 | |
| mbpp,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,8,0.10540925533894598,0.8005421074231263 | |
| mbpp,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,9,0.39999999999999997,0.48333333333333334 | |
| winogrande,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,0,-0.19999999999999998,0.8166666666666667 | |
| winogrande,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,1,0.7999999999999999,0.08333333333333333 | |
| winogrande,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,2,0.0,1.0 | |
| winogrande,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,3,-0.19999999999999998,0.8166666666666667 | |
| winogrande,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,4,0.31622776601683794,0.44848886103153174 | |
| winogrande,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,5,0.7999999999999999,0.08333333333333333 | |
| winogrande,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,6,-0.9999999999999999,0.016666666666666666 | |
| winogrande,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,7,0.7999999999999999,0.08333333333333333 | |
| winogrande,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,8,0.0,1.0 | |
| winogrande,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,9,-0.19999999999999998,0.8166666666666667 | |
| grounding,biggen_240612,aggregate,holistic,kendall,somewhere_aggregate,5,0,0.9486832980505137,0.02297740150320607 | |
| grounding,biggen_240612,aggregate,holistic,kendall,somewhere_aggregate,5,1,0.19999999999999998,0.8166666666666667 | |
| grounding,biggen_240612,aggregate,holistic,kendall,somewhere_aggregate,5,2,0.10540925533894598,0.8005421074231263 | |
| grounding,biggen_240612,aggregate,holistic,kendall,somewhere_aggregate,5,3,0.31622776601683794,0.44848886103153174 | |
| grounding,biggen_240612,aggregate,holistic,kendall,somewhere_aggregate,5,4,0.9486832980505137,0.02297740150320607 | |
| grounding,biggen_240612,aggregate,holistic,kendall,somewhere_aggregate,5,5,0.9486832980505137,0.02297740150320607 | |
| grounding,biggen_240612,aggregate,holistic,kendall,somewhere_aggregate,5,6,0.31622776601683794,0.44848886103153174 | |
| grounding,biggen_240612,aggregate,holistic,kendall,somewhere_aggregate,5,7,0.19999999999999998,0.8166666666666667 | |
| grounding,biggen_240612,aggregate,holistic,kendall,somewhere_aggregate,5,8,0.39999999999999997,0.48333333333333334 | |
| grounding,biggen_240612,aggregate,holistic,kendall,somewhere_aggregate,5,9,0.19999999999999998,0.8166666666666667 | |
| instruction_following,biggen_240612,aggregate,holistic,kendall,somewhere_aggregate,5,0,0.0,1.0 | |
| instruction_following,biggen_240612,aggregate,holistic,kendall,somewhere_aggregate,5,1,0.39999999999999997,0.48333333333333334 | |
| instruction_following,biggen_240612,aggregate,holistic,kendall,somewhere_aggregate,5,2,0.7378647873726218,0.07697417298126676 | |
| instruction_following,biggen_240612,aggregate,holistic,kendall,somewhere_aggregate,5,3,-0.39999999999999997,0.48333333333333334 | |
| instruction_following,biggen_240612,aggregate,holistic,kendall,somewhere_aggregate,5,4,0.8944271909999159,0.0367138563627041 | |
| instruction_following,biggen_240612,aggregate,holistic,kendall,somewhere_aggregate,5,5,0.9486832980505137,0.02297740150320607 | |
| instruction_following,biggen_240612,aggregate,holistic,kendall,somewhere_aggregate,5,6,0.9486832980505137,0.02297740150320607 | |
| instruction_following,biggen_240612,aggregate,holistic,kendall,somewhere_aggregate,5,7,0.0,1.0 | |
| instruction_following,biggen_240612,aggregate,holistic,kendall,somewhere_aggregate,5,8,0.8944271909999159,0.0367138563627041 | |
| instruction_following,biggen_240612,aggregate,holistic,kendall,somewhere_aggregate,5,9,0.7378647873726218,0.07697417298126676 | |
| planning,biggen_240612,aggregate,holistic,kendall,somewhere_aggregate,5,0,0.7378647873726218,0.07697417298126676 | |
| planning,biggen_240612,aggregate,holistic,kendall,somewhere_aggregate,5,1,0.7999999999999999,0.08333333333333333 | |
| planning,biggen_240612,aggregate,holistic,kendall,somewhere_aggregate,5,2,0.39999999999999997,0.48333333333333334 | |
| planning,biggen_240612,aggregate,holistic,kendall,somewhere_aggregate,5,3,0.39999999999999997,0.48333333333333334 | |
| planning,biggen_240612,aggregate,holistic,kendall,somewhere_aggregate,5,4,0.19999999999999998,0.8166666666666667 | |
| planning,biggen_240612,aggregate,holistic,kendall,somewhere_aggregate,5,5,0.39999999999999997,0.48333333333333334 | |
| planning,biggen_240612,aggregate,holistic,kendall,somewhere_aggregate,5,6,0.39999999999999997,0.48333333333333334 | |
| planning,biggen_240612,aggregate,holistic,kendall,somewhere_aggregate,5,7,0.7378647873726218,0.07697417298126676 | |
| planning,biggen_240612,aggregate,holistic,kendall,somewhere_aggregate,5,8,-0.10540925533894598,0.8005421074231263 | |
| planning,biggen_240612,aggregate,holistic,kendall,somewhere_aggregate,5,9,0.7378647873726218,0.07697417298126676 | |
| reasoning,biggen_240612,aggregate,holistic,kendall,somewhere_aggregate,5,0,0.7378647873726218,0.07697417298126676 | |
| reasoning,biggen_240612,aggregate,holistic,kendall,somewhere_aggregate,5,1,0.6,0.23333333333333334 | |
| reasoning,biggen_240612,aggregate,holistic,kendall,somewhere_aggregate,5,2,0.7378647873726218,0.07697417298126676 | |
| reasoning,biggen_240612,aggregate,holistic,kendall,somewhere_aggregate,5,3,0.6,0.23333333333333334 | |
| reasoning,biggen_240612,aggregate,holistic,kendall,somewhere_aggregate,5,4,0.5270462766947298,0.206507295485425 | |
| reasoning,biggen_240612,aggregate,holistic,kendall,somewhere_aggregate,5,5,0.7999999999999999,0.08333333333333333 | |
| reasoning,biggen_240612,aggregate,holistic,kendall,somewhere_aggregate,5,6,0.5270462766947298,0.206507295485425 | |
| reasoning,biggen_240612,aggregate,holistic,kendall,somewhere_aggregate,5,7,0.6,0.23333333333333334 | |
| reasoning,biggen_240612,aggregate,holistic,kendall,somewhere_aggregate,5,8,0.0,1.0 | |
| reasoning,biggen_240612,aggregate,holistic,kendall,somewhere_aggregate,5,9,0.39999999999999997,0.48333333333333334 | |
| refinement,biggen_240612,aggregate,holistic,kendall,somewhere_aggregate,5,0,0.6,0.23333333333333334 | |
| refinement,biggen_240612,aggregate,holistic,kendall,somewhere_aggregate,5,1,0.39999999999999997,0.48333333333333334 | |
| refinement,biggen_240612,aggregate,holistic,kendall,somewhere_aggregate,5,2,0.19999999999999998,0.8166666666666667 | |
| refinement,biggen_240612,aggregate,holistic,kendall,somewhere_aggregate,5,3,0.39999999999999997,0.48333333333333334 | |
| refinement,biggen_240612,aggregate,holistic,kendall,somewhere_aggregate,5,4,0.39999999999999997,0.48333333333333334 | |
| refinement,biggen_240612,aggregate,holistic,kendall,somewhere_aggregate,5,5,0.39999999999999997,0.48333333333333334 | |
| refinement,biggen_240612,aggregate,holistic,kendall,somewhere_aggregate,5,6,0.39999999999999997,0.48333333333333334 | |
| refinement,biggen_240612,aggregate,holistic,kendall,somewhere_aggregate,5,7,0.39999999999999997,0.48333333333333334 | |
| refinement,biggen_240612,aggregate,holistic,kendall,somewhere_aggregate,5,8,0.39999999999999997,0.48333333333333334 | |
| refinement,biggen_240612,aggregate,holistic,kendall,somewhere_aggregate,5,9,0.6,0.23333333333333334 | |
| safety,biggen_240612,aggregate,holistic,kendall,somewhere_aggregate,5,0,0.0,1.0 | |
| safety,biggen_240612,aggregate,holistic,kendall,somewhere_aggregate,5,1,0.0,1.0 | |
| safety,biggen_240612,aggregate,holistic,kendall,somewhere_aggregate,5,2,-0.19999999999999998,0.8166666666666667 | |
| safety,biggen_240612,aggregate,holistic,kendall,somewhere_aggregate,5,3,0.0,1.0 | |
| safety,biggen_240612,aggregate,holistic,kendall,somewhere_aggregate,5,4,-0.19999999999999998,0.8166666666666667 | |
| safety,biggen_240612,aggregate,holistic,kendall,somewhere_aggregate,5,5,-0.6,0.23333333333333334 | |
| safety,biggen_240612,aggregate,holistic,kendall,somewhere_aggregate,5,6,0.7999999999999999,0.08333333333333333 | |
| safety,biggen_240612,aggregate,holistic,kendall,somewhere_aggregate,5,7,-0.6,0.23333333333333334 | |
| safety,biggen_240612,aggregate,holistic,kendall,somewhere_aggregate,5,8,0.6,0.23333333333333334 | |
| safety,biggen_240612,aggregate,holistic,kendall,somewhere_aggregate,5,9,0.6,0.23333333333333334 | |
| theory_of_mind,biggen_240612,aggregate,holistic,kendall,somewhere_aggregate,5,0,0.19999999999999998,0.8166666666666667 | |
| theory_of_mind,biggen_240612,aggregate,holistic,kendall,somewhere_aggregate,5,1,0.39999999999999997,0.48333333333333334 | |
| theory_of_mind,biggen_240612,aggregate,holistic,kendall,somewhere_aggregate,5,2,0.6,0.23333333333333334 | |
| theory_of_mind,biggen_240612,aggregate,holistic,kendall,somewhere_aggregate,5,3,0.0,1.0 | |
| theory_of_mind,biggen_240612,aggregate,holistic,kendall,somewhere_aggregate,5,4,0.19999999999999998,0.8166666666666667 | |
| theory_of_mind,biggen_240612,aggregate,holistic,kendall,somewhere_aggregate,5,5,0.19999999999999998,0.8166666666666667 | |
| theory_of_mind,biggen_240612,aggregate,holistic,kendall,somewhere_aggregate,5,6,0.39999999999999997,0.48333333333333334 | |
| theory_of_mind,biggen_240612,aggregate,holistic,kendall,somewhere_aggregate,5,7,0.0,1.0 | |
| theory_of_mind,biggen_240612,aggregate,holistic,kendall,somewhere_aggregate,5,8,0.19999999999999998,0.8166666666666667 | |
| theory_of_mind,biggen_240612,aggregate,holistic,kendall,somewhere_aggregate,5,9,0.39999999999999997,0.48333333333333334 | |
| tool_usage,biggen_240612,aggregate,holistic,kendall,somewhere_aggregate,5,0,0.19999999999999998,0.8166666666666667 | |
| tool_usage,biggen_240612,aggregate,holistic,kendall,somewhere_aggregate,5,1,0.39999999999999997,0.48333333333333334 | |
| tool_usage,biggen_240612,aggregate,holistic,kendall,somewhere_aggregate,5,2,0.9999999999999999,0.016666666666666666 | |
| tool_usage,biggen_240612,aggregate,holistic,kendall,somewhere_aggregate,5,3,0.9999999999999999,0.016666666666666666 | |
| tool_usage,biggen_240612,aggregate,holistic,kendall,somewhere_aggregate,5,4,0.39999999999999997,0.48333333333333334 | |
| tool_usage,biggen_240612,aggregate,holistic,kendall,somewhere_aggregate,5,5,0.9999999999999999,0.016666666666666666 | |
| tool_usage,biggen_240612,aggregate,holistic,kendall,somewhere_aggregate,5,6,0.19999999999999998,0.8166666666666667 | |
| tool_usage,biggen_240612,aggregate,holistic,kendall,somewhere_aggregate,5,7,0.6,0.23333333333333334 | |
| tool_usage,biggen_240612,aggregate,holistic,kendall,somewhere_aggregate,5,8,0.7999999999999999,0.08333333333333333 | |
| tool_usage,biggen_240612,aggregate,holistic,kendall,somewhere_aggregate,5,9,0.39999999999999997,0.48333333333333334 | |
| livebench_average,livebench_240701,aggregate,holistic,kendall,somewhere_aggregate,5,0,0.6,0.23333333333333334 | |
| livebench_average,livebench_240701,aggregate,holistic,kendall,somewhere_aggregate,5,1,0.7999999999999999,0.08333333333333333 | |
| livebench_average,livebench_240701,aggregate,holistic,kendall,somewhere_aggregate,5,2,0.7999999999999999,0.08333333333333333 | |
| livebench_average,livebench_240701,aggregate,holistic,kendall,somewhere_aggregate,5,3,0.6,0.23333333333333334 | |
| livebench_average,livebench_240701,aggregate,holistic,kendall,somewhere_aggregate,5,4,0.6,0.23333333333333334 | |
| livebench_average,livebench_240701,aggregate,holistic,kendall,somewhere_aggregate,5,5,0.39999999999999997,0.48333333333333334 | |
| livebench_average,livebench_240701,aggregate,holistic,kendall,somewhere_aggregate,5,6,0.6,0.23333333333333334 | |
| livebench_average,livebench_240701,aggregate,holistic,kendall,somewhere_aggregate,5,7,0.7999999999999999,0.08333333333333333 | |
| livebench_average,livebench_240701,aggregate,holistic,kendall,somewhere_aggregate,5,8,0.39999999999999997,0.48333333333333334 | |
| livebench_average,livebench_240701,aggregate,holistic,kendall,somewhere_aggregate,5,9,0.39999999999999997,0.48333333333333334 | |
| reasoning_average,livebench_240701,aggregate,holistic,kendall,somewhere_aggregate,5,0,0.7378647873726218,0.07697417298126676 | |
| reasoning_average,livebench_240701,aggregate,holistic,kendall,somewhere_aggregate,5,1,0.5270462766947298,0.206507295485425 | |
| reasoning_average,livebench_240701,aggregate,holistic,kendall,somewhere_aggregate,5,2,0.7378647873726218,0.07697417298126676 | |
| reasoning_average,livebench_240701,aggregate,holistic,kendall,somewhere_aggregate,5,3,0.7378647873726218,0.07697417298126676 | |
| reasoning_average,livebench_240701,aggregate,holistic,kendall,somewhere_aggregate,5,4,0.5270462766947298,0.206507295485425 | |
| reasoning_average,livebench_240701,aggregate,holistic,kendall,somewhere_aggregate,5,5,0.7999999999999999,0.08333333333333333 | |
| reasoning_average,livebench_240701,aggregate,holistic,kendall,somewhere_aggregate,5,6,0.7378647873726218,0.07697417298126676 | |
| reasoning_average,livebench_240701,aggregate,holistic,kendall,somewhere_aggregate,5,7,0.7999999999999999,0.08333333333333333 | |
| reasoning_average,livebench_240701,aggregate,holistic,kendall,somewhere_aggregate,5,8,0.5270462766947298,0.206507295485425 | |
| reasoning_average,livebench_240701,aggregate,holistic,kendall,somewhere_aggregate,5,9,0.7378647873726218,0.07697417298126676 | |
| coding_average,livebench_240701,aggregate,holistic,kendall,somewhere_aggregate,5,0,0.31622776601683794,0.44848886103153174 | |
| coding_average,livebench_240701,aggregate,holistic,kendall,somewhere_aggregate,5,1,0.31622776601683794,0.44848886103153174 | |
| coding_average,livebench_240701,aggregate,holistic,kendall,somewhere_aggregate,5,2,0.6,0.23333333333333334 | |
| coding_average,livebench_240701,aggregate,holistic,kendall,somewhere_aggregate,5,3,0.31622776601683794,0.44848886103153174 | |
| coding_average,livebench_240701,aggregate,holistic,kendall,somewhere_aggregate,5,4,0.31622776601683794,0.44848886103153174 | |
| coding_average,livebench_240701,aggregate,holistic,kendall,somewhere_aggregate,5,5,0.5270462766947298,0.206507295485425 | |
| coding_average,livebench_240701,aggregate,holistic,kendall,somewhere_aggregate,5,6,0.5270462766947298,0.206507295485425 | |
| coding_average,livebench_240701,aggregate,holistic,kendall,somewhere_aggregate,5,7,0.5270462766947298,0.206507295485425 | |
| coding_average,livebench_240701,aggregate,holistic,kendall,somewhere_aggregate,5,8,0.5270462766947298,0.206507295485425 | |
| coding_average,livebench_240701,aggregate,holistic,kendall,somewhere_aggregate,5,9,0.5270462766947298,0.206507295485425 | |
| mathematics_average,livebench_240701,aggregate,holistic,kendall,somewhere_aggregate,5,0,0.6,0.23333333333333334 | |
| mathematics_average,livebench_240701,aggregate,holistic,kendall,somewhere_aggregate,5,1,0.6,0.23333333333333334 | |
| mathematics_average,livebench_240701,aggregate,holistic,kendall,somewhere_aggregate,5,2,0.6,0.23333333333333334 | |
| mathematics_average,livebench_240701,aggregate,holistic,kendall,somewhere_aggregate,5,3,0.6,0.23333333333333334 | |
| mathematics_average,livebench_240701,aggregate,holistic,kendall,somewhere_aggregate,5,4,0.6,0.23333333333333334 | |
| mathematics_average,livebench_240701,aggregate,holistic,kendall,somewhere_aggregate,5,5,0.6,0.23333333333333334 | |
| mathematics_average,livebench_240701,aggregate,holistic,kendall,somewhere_aggregate,5,6,0.6,0.23333333333333334 | |
| mathematics_average,livebench_240701,aggregate,holistic,kendall,somewhere_aggregate,5,7,0.6,0.23333333333333334 | |
| mathematics_average,livebench_240701,aggregate,holistic,kendall,somewhere_aggregate,5,8,0.6,0.23333333333333334 | |
| mathematics_average,livebench_240701,aggregate,holistic,kendall,somewhere_aggregate,5,9,0.6,0.23333333333333334 | |
| data_analysis_average,livebench_240701,aggregate,holistic,kendall,somewhere_aggregate,5,0,0.6,0.23333333333333334 | |
| data_analysis_average,livebench_240701,aggregate,holistic,kendall,somewhere_aggregate,5,1,0.6,0.23333333333333334 | |
| data_analysis_average,livebench_240701,aggregate,holistic,kendall,somewhere_aggregate,5,2,0.6,0.23333333333333334 | |
| data_analysis_average,livebench_240701,aggregate,holistic,kendall,somewhere_aggregate,5,3,0.6,0.23333333333333334 | |
| data_analysis_average,livebench_240701,aggregate,holistic,kendall,somewhere_aggregate,5,4,0.7999999999999999,0.08333333333333333 | |
| data_analysis_average,livebench_240701,aggregate,holistic,kendall,somewhere_aggregate,5,5,0.7999999999999999,0.08333333333333333 | |
| data_analysis_average,livebench_240701,aggregate,holistic,kendall,somewhere_aggregate,5,6,0.7999999999999999,0.08333333333333333 | |
| data_analysis_average,livebench_240701,aggregate,holistic,kendall,somewhere_aggregate,5,7,0.6,0.23333333333333334 | |
| data_analysis_average,livebench_240701,aggregate,holistic,kendall,somewhere_aggregate,5,8,0.7999999999999999,0.08333333333333333 | |
| data_analysis_average,livebench_240701,aggregate,holistic,kendall,somewhere_aggregate,5,9,0.7999999999999999,0.08333333333333333 | |
| language_average,livebench_240701,aggregate,holistic,kendall,somewhere_aggregate,5,0,0.6,0.23333333333333334 | |
| language_average,livebench_240701,aggregate,holistic,kendall,somewhere_aggregate,5,1,0.7999999999999999,0.08333333333333333 | |
| language_average,livebench_240701,aggregate,holistic,kendall,somewhere_aggregate,5,2,0.7999999999999999,0.08333333333333333 | |
| language_average,livebench_240701,aggregate,holistic,kendall,somewhere_aggregate,5,3,0.7999999999999999,0.08333333333333333 | |
| language_average,livebench_240701,aggregate,holistic,kendall,somewhere_aggregate,5,4,0.7999999999999999,0.08333333333333333 | |
| language_average,livebench_240701,aggregate,holistic,kendall,somewhere_aggregate,5,5,0.7999999999999999,0.08333333333333333 | |
| language_average,livebench_240701,aggregate,holistic,kendall,somewhere_aggregate,5,6,0.6,0.23333333333333334 | |
| language_average,livebench_240701,aggregate,holistic,kendall,somewhere_aggregate,5,7,0.6,0.23333333333333334 | |
| language_average,livebench_240701,aggregate,holistic,kendall,somewhere_aggregate,5,8,0.6,0.23333333333333334 | |
| language_average,livebench_240701,aggregate,holistic,kendall,somewhere_aggregate,5,9,0.6,0.23333333333333334 | |
| if_average,livebench_240701,aggregate,holistic,kendall,somewhere_aggregate,5,0,0.0,1.0 | |
| if_average,livebench_240701,aggregate,holistic,kendall,somewhere_aggregate,5,1,-0.6,0.23333333333333334 | |
| if_average,livebench_240701,aggregate,holistic,kendall,somewhere_aggregate,5,2,0.19999999999999998,0.8166666666666667 | |
| if_average,livebench_240701,aggregate,holistic,kendall,somewhere_aggregate,5,3,0.19999999999999998,0.8166666666666667 | |
| if_average,livebench_240701,aggregate,holistic,kendall,somewhere_aggregate,5,4,0.19999999999999998,0.8166666666666667 | |
| if_average,livebench_240701,aggregate,holistic,kendall,somewhere_aggregate,5,5,0.19999999999999998,0.8166666666666667 | |
| if_average,livebench_240701,aggregate,holistic,kendall,somewhere_aggregate,5,6,0.19999999999999998,0.8166666666666667 | |
| if_average,livebench_240701,aggregate,holistic,kendall,somewhere_aggregate,5,7,0.19999999999999998,0.8166666666666667 | |
| if_average,livebench_240701,aggregate,holistic,kendall,somewhere_aggregate,5,8,0.0,1.0 | |
| if_average,livebench_240701,aggregate,holistic,kendall,somewhere_aggregate,5,9,0.19999999999999998,0.8166666666666667 | |
| arena_hard,arena_hard_2404,aggregate,holistic,kendall,somewhere_aggregate,5,0,0.39999999999999997,0.48333333333333334 | |
| arena_hard,arena_hard_2404,aggregate,holistic,kendall,somewhere_aggregate,5,1,0.9999999999999999,0.016666666666666666 | |
| arena_hard,arena_hard_2404,aggregate,holistic,kendall,somewhere_aggregate,5,2,0.19999999999999998,0.8166666666666667 | |
| arena_hard,arena_hard_2404,aggregate,holistic,kendall,somewhere_aggregate,5,3,0.7999999999999999,0.08333333333333333 | |
| arena_hard,arena_hard_2404,aggregate,holistic,kendall,somewhere_aggregate,5,4,0.9999999999999999,0.016666666666666666 | |
| arena_hard,arena_hard_2404,aggregate,holistic,kendall,somewhere_aggregate,5,5,0.7999999999999999,0.08333333333333333 | |
| arena_hard,arena_hard_2404,aggregate,holistic,kendall,somewhere_aggregate,5,6,0.39999999999999997,0.48333333333333334 | |
| arena_hard,arena_hard_2404,aggregate,holistic,kendall,somewhere_aggregate,5,7,0.9999999999999999,0.016666666666666666 | |
| arena_hard,arena_hard_2404,aggregate,holistic,kendall,somewhere_aggregate,5,8,0.7999999999999999,0.08333333333333333 | |
| arena_hard,arena_hard_2404,aggregate,holistic,kendall,somewhere_aggregate,5,9,0.7999999999999999,0.08333333333333333 | |
| mixeval,mixeval_240601,aggregate,holistic,kendall,somewhere_aggregate,5,0,0.7999999999999999,0.08333333333333333 | |
| mixeval,mixeval_240601,aggregate,holistic,kendall,somewhere_aggregate,5,1,0.7999999999999999,0.08333333333333333 | |
| mixeval,mixeval_240601,aggregate,holistic,kendall,somewhere_aggregate,5,2,0.7999999999999999,0.08333333333333333 | |
| mixeval,mixeval_240601,aggregate,holistic,kendall,somewhere_aggregate,5,3,0.7999999999999999,0.08333333333333333 | |
| mixeval,mixeval_240601,aggregate,holistic,kendall,somewhere_aggregate,5,4,0.7999999999999999,0.08333333333333333 | |
| mixeval,mixeval_240601,aggregate,holistic,kendall,somewhere_aggregate,5,5,0.7999999999999999,0.08333333333333333 | |
| mixeval,mixeval_240601,aggregate,holistic,kendall,somewhere_aggregate,5,6,0.7999999999999999,0.08333333333333333 | |
| mixeval,mixeval_240601,aggregate,holistic,kendall,somewhere_aggregate,5,7,0.7999999999999999,0.08333333333333333 | |
| mixeval,mixeval_240601,aggregate,holistic,kendall,somewhere_aggregate,5,8,0.7999999999999999,0.08333333333333333 | |
| mixeval,mixeval_240601,aggregate,holistic,kendall,somewhere_aggregate,5,9,0.7999999999999999,0.08333333333333333 | |
| agieval,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,0,0.31622776601683794,0.44848886103153174 | |
| agieval,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,1,0.31622776601683794,0.44848886103153174 | |
| agieval,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,2,0.39999999999999997,0.48333333333333334 | |
| agieval,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,3,0.7999999999999999,0.08333333333333333 | |
| agieval,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,4,0.7999999999999999,0.08333333333333333 | |
| agieval,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,5,0.9999999999999999,0.016666666666666666 | |
| agieval,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,6,0.19999999999999998,0.8166666666666667 | |
| agieval,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,7,0.19999999999999998,0.8166666666666667 | |
| agieval,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,8,0.39999999999999997,0.48333333333333334 | |
| agieval,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,9,0.7999999999999999,0.08333333333333333 | |
| arc_c,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,0,0.6,0.23333333333333334 | |
| arc_c,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,1,0.39999999999999997,0.48333333333333334 | |
| arc_c,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,2,-0.19999999999999998,0.8166666666666667 | |
| arc_c,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,3,-0.39999999999999997,0.48333333333333334 | |
| arc_c,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,4,0.0,1.0 | |
| arc_c,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,5,0.6,0.23333333333333334 | |
| arc_c,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,6,0.31622776601683794,0.44848886103153174 | |
| arc_c,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,7,0.6,0.23333333333333334 | |
| arc_c,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,8,0.39999999999999997,0.48333333333333334 | |
| arc_c,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,9,0.7378647873726218,0.07697417298126676 | |
| alpacav1,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,0,-0.31622776601683794,0.44848886103153174 | |
| alpacav1,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,1,-0.7999999999999999,0.08333333333333333 | |
| alpacav1,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,2,0.7999999999999999,0.08333333333333333 | |
| alpacav1,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,3,0.7999999999999999,0.08333333333333333 | |
| alpacav1,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,4,0.39999999999999997,0.48333333333333334 | |
| alpacav1,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,5,0.39999999999999997,0.48333333333333334 | |
| alpacav1,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,6,0.6,0.23333333333333334 | |
| alpacav1,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,7,0.7378647873726218,0.07697417298126676 | |
| alpacav1,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,8,0.6,0.23333333333333334 | |
| alpacav1,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,9,0.31622776601683794,0.44848886103153174 | |
| alpacav2,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,0,0.39999999999999997,0.48333333333333334 | |
| alpacav2,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,1,0.19999999999999998,0.8166666666666667 | |
| alpacav2,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,2,0.39999999999999997,0.48333333333333334 | |
| alpacav2,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,3,0.7378647873726218,0.07697417298126676 | |
| alpacav2,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,4,0.39999999999999997,0.48333333333333334 | |
| alpacav2,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,5,-0.6,0.23333333333333334 | |
| alpacav2,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,6,-0.39999999999999997,0.48333333333333334 | |
| alpacav2,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,7,0.6,0.23333333333333334 | |
| alpacav2,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,8,0.0,1.0 | |
| alpacav2,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,9,0.6,0.23333333333333334 | |
| alpacaeval2_lc,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,0,0.0,1.0 | |
| alpacaeval2_lc,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,1,-0.6,0.23333333333333334 | |
| alpacaeval2_lc,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,2,0.7999999999999999,0.08333333333333333 | |
| alpacaeval2_lc,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,3,0.0,1.0 | |
| alpacaeval2_lc,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,4,0.6,0.23333333333333334 | |
| alpacaeval2_lc,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,5,0.39999999999999997,0.48333333333333334 | |
| alpacaeval2_lc,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,6,0.39999999999999997,0.48333333333333334 | |
| alpacaeval2_lc,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,7,0.6,0.23333333333333334 | |
| alpacaeval2_lc,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,8,-0.39999999999999997,0.48333333333333334 | |
| alpacaeval2_lc,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,9,0.0,1.0 | |
| arena_elo,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,0,0.31622776601683794,0.44848886103153174 | |
| arena_elo,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,1,0.6,0.23333333333333334 | |
| arena_elo,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,2,0.31622776601683794,0.44848886103153174 | |
| arena_elo,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,3,-0.9999999999999999,0.016666666666666666 | |
| arena_elo,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,4,0.39999999999999997,0.48333333333333334 | |
| arena_elo,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,5,0.31622776601683794,0.44848886103153174 | |
| arena_elo,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,6,-0.9999999999999999,0.016666666666666666 | |
| arena_elo,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,7,0.6,0.23333333333333334 | |
| arena_elo,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,8,-0.10540925533894598,0.8005421074231263 | |
| arena_elo,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,9,0.6,0.23333333333333334 | |
| bbh,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,0,0.9486832980505137,0.02297740150320607 | |
| bbh,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,1,0.31622776601683794,0.44848886103153174 | |
| bbh,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,2,-0.39999999999999997,0.48333333333333334 | |
| bbh,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,3,0.9999999999999999,0.016666666666666666 | |
| bbh,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,4,0.0,1.0 | |
| bbh,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,5,-0.39999999999999997,0.48333333333333334 | |
| bbh,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,6,0.7999999999999999,0.08333333333333333 | |
| bbh,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,7,0.7999999999999999,0.08333333333333333 | |
| bbh,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,8,0.5270462766947298,0.206507295485425 | |
| bbh,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,9,0.5270462766947298,0.206507295485425 | |
| eq_benchv2,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,0,-0.19999999999999998,0.8166666666666667 | |
| eq_benchv2,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,1,0.19999999999999998,0.8166666666666667 | |
| eq_benchv2,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,2,0.0,1.0 | |
| eq_benchv2,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,3,0.0,1.0 | |
| eq_benchv2,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,4,-0.39999999999999997,0.48333333333333334 | |
| eq_benchv2,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,5,0.39999999999999997,0.48333333333333334 | |
| eq_benchv2,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,6,0.39999999999999997,0.48333333333333334 | |
| eq_benchv2,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,7,0.7999999999999999,0.08333333333333333 | |
| eq_benchv2,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,8,0.19999999999999998,0.8166666666666667 | |
| eq_benchv2,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,9,0.7999999999999999,0.08333333333333333 | |
| gpt4all,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,0,0.7378647873726218,0.07697417298126676 | |
| gpt4all,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,1,0.39999999999999997,0.48333333333333334 | |
| gpt4all,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,2,0.31622776601683794,0.44848886103153174 | |
| gpt4all,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,3,0.39999999999999997,0.48333333333333334 | |
| gpt4all,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,4,0.31622776601683794,0.44848886103153174 | |
| gpt4all,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,5,0.39999999999999997,0.48333333333333334 | |
| gpt4all,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,6,-0.39999999999999997,0.48333333333333334 | |
| gpt4all,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,7,-0.39999999999999997,0.48333333333333334 | |
| gpt4all,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,8,0.39999999999999997,0.48333333333333334 | |
| gpt4all,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,9,0.5270462766947298,0.206507295485425 | |
| hugging_6,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,0,-0.6,0.23333333333333334 | |
| hugging_6,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,1,-0.6,0.23333333333333334 | |
| hugging_6,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,2,0.19999999999999998,0.8166666666666667 | |
| hugging_6,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,3,0.39999999999999997,0.48333333333333334 | |
| hugging_6,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,4,0.19999999999999998,0.8166666666666667 | |
| hugging_6,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,5,0.6,0.23333333333333334 | |
| hugging_6,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,6,-0.39999999999999997,0.48333333333333334 | |
| hugging_6,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,7,-0.6,0.23333333333333334 | |
| hugging_6,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,8,-0.39999999999999997,0.48333333333333334 | |
| hugging_6,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,9,0.6,0.23333333333333334 | |
| llmonitor,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,0,0.11952286093343936,0.7815112949987133 | |
| llmonitor,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,1,0.0,1.0 | |
| llmonitor,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,2,0.0,1.0 | |
| llmonitor,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,3,0.6,0.23333333333333334 | |
| llmonitor,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,4,0.39999999999999997,0.48333333333333334 | |
| llmonitor,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,5,0.6,0.23333333333333334 | |
| llmonitor,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,6,0.35856858280031806,0.40538055645894233 | |
| llmonitor,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,7,0.31622776601683794,0.44848886103153174 | |
| llmonitor,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,8,0.0,1.0 | |
| llmonitor,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,9,0.5270462766947298,0.206507295485425 | |
| magi,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,0,0.39999999999999997,0.48333333333333334 | |
| magi,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,1,0.19999999999999998,0.8166666666666667 | |
| magi,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,2,-0.10540925533894598,0.8005421074231263 | |
| magi,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,3,0.39999999999999997,0.48333333333333334 | |
| magi,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,4,0.6,0.23333333333333334 | |
| magi,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,5,0.7999999999999999,0.08333333333333333 | |
| magi,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,6,0.6,0.23333333333333334 | |
| magi,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,7,0.7999999999999999,0.08333333333333333 | |
| magi,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,8,0.19999999999999998,0.8166666666666667 | |
| magi,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,9,-0.19999999999999998,0.8166666666666667 | |
| mmlu,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,0,0.6,0.23333333333333334 | |
| mmlu,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,1,0.39999999999999997,0.48333333333333334 | |
| mmlu,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,2,0.0,1.0 | |
| mmlu,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,3,0.39999999999999997,0.48333333333333334 | |
| mmlu,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,4,0.6,0.23333333333333334 | |
| mmlu,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,5,0.6,0.23333333333333334 | |
| mmlu,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,6,0.6,0.23333333333333334 | |
| mmlu,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,7,0.6,0.23333333333333334 | |
| mmlu,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,8,0.6,0.23333333333333334 | |
| mmlu,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,9,0.5270462766947298,0.206507295485425 | |
| mt_bench,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,0,0.6,0.23333333333333334 | |
| mt_bench,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,1,0.0,1.0 | |
| mt_bench,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,2,0.10540925533894598,0.8005421074231263 | |
| mt_bench,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,3,0.39999999999999997,0.48333333333333334 | |
| mt_bench,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,4,0.39999999999999997,0.48333333333333334 | |
| mt_bench,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,5,0.19999999999999998,0.8166666666666667 | |
| mt_bench,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,6,0.10540925533894598,0.8005421074231263 | |
| mt_bench,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,7,-0.39999999999999997,0.48333333333333334 | |
| mt_bench,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,8,0.19999999999999998,0.8166666666666667 | |
| mt_bench,BLZ_240312,aggregate,holistic,kendall,somewhere_aggregate,5,9,0.39999999999999997,0.48333333333333334 | |
| biggen_mwr,biggen_240612,aggregate,holistic,kendall,somewhere_aggregate,5,0,0.39999999999999997,0.48333333333333334 | |
| biggen_mwr,biggen_240612,aggregate,holistic,kendall,somewhere_aggregate,5,1,0.39999999999999997,0.48333333333333334 | |
| biggen_mwr,biggen_240612,aggregate,holistic,kendall,somewhere_aggregate,5,2,0.19999999999999998,0.8166666666666667 | |
| biggen_mwr,biggen_240612,aggregate,holistic,kendall,somewhere_aggregate,5,3,0.6,0.23333333333333334 | |
| biggen_mwr,biggen_240612,aggregate,holistic,kendall,somewhere_aggregate,5,4,0.6,0.23333333333333334 | |
| biggen_mwr,biggen_240612,aggregate,holistic,kendall,somewhere_aggregate,5,5,0.39999999999999997,0.48333333333333334 | |
| biggen_mwr,biggen_240612,aggregate,holistic,kendall,somewhere_aggregate,5,6,0.39999999999999997,0.48333333333333334 | |
| biggen_mwr,biggen_240612,aggregate,holistic,kendall,somewhere_aggregate,5,7,0.39999999999999997,0.48333333333333334 | |
| biggen_mwr,biggen_240612,aggregate,holistic,kendall,somewhere_aggregate,5,8,0.6,0.23333333333333334 | |
| biggen_mwr,biggen_240612,aggregate,holistic,kendall,somewhere_aggregate,5,9,0.7999999999999999,0.08333333333333333 | |
| aggregate,holistic,hellaswag,BLZ_240312,kendall,somewhere_aggregate,5,0,0.0,1.0 | |
| aggregate,holistic,hellaswag,BLZ_240312,kendall,somewhere_aggregate,5,1,0.39999999999999997,0.48333333333333334 | |
| aggregate,holistic,hellaswag,BLZ_240312,kendall,somewhere_aggregate,5,2,-0.19999999999999998,0.8166666666666667 | |
| aggregate,holistic,hellaswag,BLZ_240312,kendall,somewhere_aggregate,5,3,0.6,0.23333333333333334 | |
| aggregate,holistic,hellaswag,BLZ_240312,kendall,somewhere_aggregate,5,4,0.39999999999999997,0.48333333333333334 | |
| aggregate,holistic,hellaswag,BLZ_240312,kendall,somewhere_aggregate,5,5,0.19999999999999998,0.8166666666666667 | |
| aggregate,holistic,hellaswag,BLZ_240312,kendall,somewhere_aggregate,5,6,0.39999999999999997,0.48333333333333334 | |
| aggregate,holistic,hellaswag,BLZ_240312,kendall,somewhere_aggregate,5,7,0.6,0.23333333333333334 | |
| aggregate,holistic,hellaswag,BLZ_240312,kendall,somewhere_aggregate,5,8,0.39999999999999997,0.48333333333333334 | |
| aggregate,holistic,hellaswag,BLZ_240312,kendall,somewhere_aggregate,5,9,0.6,0.23333333333333334 | |
| aggregate,holistic,humaneval,BLZ_240312,kendall,somewhere_aggregate,5,0,-0.39999999999999997,0.48333333333333334 | |
| aggregate,holistic,humaneval,BLZ_240312,kendall,somewhere_aggregate,5,1,-0.5270462766947298,0.206507295485425 | |
| aggregate,holistic,humaneval,BLZ_240312,kendall,somewhere_aggregate,5,2,0.5270462766947298,0.206507295485425 | |
| aggregate,holistic,humaneval,BLZ_240312,kendall,somewhere_aggregate,5,3,0.5270462766947298,0.206507295485425 | |
| aggregate,holistic,humaneval,BLZ_240312,kendall,somewhere_aggregate,5,4,-0.22360679774997896,0.6015081344405899 | |
| aggregate,holistic,humaneval,BLZ_240312,kendall,somewhere_aggregate,5,5,0.10540925533894598,0.8005421074231263 | |
| aggregate,holistic,humaneval,BLZ_240312,kendall,somewhere_aggregate,5,6,-0.10540925533894598,0.8005421074231263 | |
| aggregate,holistic,humaneval,BLZ_240312,kendall,somewhere_aggregate,5,7,0.39999999999999997,0.48333333333333334 | |
| aggregate,holistic,humaneval,BLZ_240312,kendall,somewhere_aggregate,5,8,-0.10540925533894598,0.8005421074231263 | |
| aggregate,holistic,humaneval,BLZ_240312,kendall,somewhere_aggregate,5,9,-0.22360679774997896,0.6015081344405899 | |
| aggregate,holistic,mbpp,BLZ_240312,kendall,somewhere_aggregate,5,0,0.10540925533894598,0.8005421074231263 | |
| aggregate,holistic,mbpp,BLZ_240312,kendall,somewhere_aggregate,5,1,-0.31622776601683794,0.44848886103153174 | |
| aggregate,holistic,mbpp,BLZ_240312,kendall,somewhere_aggregate,5,2,-0.31622776601683794,0.44848886103153174 | |
| aggregate,holistic,mbpp,BLZ_240312,kendall,somewhere_aggregate,5,3,0.10540925533894598,0.8005421074231263 | |
| aggregate,holistic,mbpp,BLZ_240312,kendall,somewhere_aggregate,5,4,-0.39999999999999997,0.48333333333333334 | |
| aggregate,holistic,mbpp,BLZ_240312,kendall,somewhere_aggregate,5,5,0.10540925533894598,0.8005421074231263 | |
| aggregate,holistic,mbpp,BLZ_240312,kendall,somewhere_aggregate,5,6,0.39999999999999997,0.48333333333333334 | |
| aggregate,holistic,mbpp,BLZ_240312,kendall,somewhere_aggregate,5,7,0.39999999999999997,0.48333333333333334 | |
| aggregate,holistic,mbpp,BLZ_240312,kendall,somewhere_aggregate,5,8,0.10540925533894598,0.8005421074231263 | |
| aggregate,holistic,mbpp,BLZ_240312,kendall,somewhere_aggregate,5,9,0.39999999999999997,0.48333333333333334 | |
| aggregate,holistic,winogrande,BLZ_240312,kendall,somewhere_aggregate,5,0,-0.19999999999999998,0.8166666666666667 | |
| aggregate,holistic,winogrande,BLZ_240312,kendall,somewhere_aggregate,5,1,0.7999999999999999,0.08333333333333333 | |
| aggregate,holistic,winogrande,BLZ_240312,kendall,somewhere_aggregate,5,2,0.0,1.0 | |
| aggregate,holistic,winogrande,BLZ_240312,kendall,somewhere_aggregate,5,3,-0.19999999999999998,0.8166666666666667 | |
| aggregate,holistic,winogrande,BLZ_240312,kendall,somewhere_aggregate,5,4,0.31622776601683794,0.44848886103153174 | |
| aggregate,holistic,winogrande,BLZ_240312,kendall,somewhere_aggregate,5,5,0.7999999999999999,0.08333333333333333 | |
| aggregate,holistic,winogrande,BLZ_240312,kendall,somewhere_aggregate,5,6,-0.9999999999999999,0.016666666666666666 | |
| aggregate,holistic,winogrande,BLZ_240312,kendall,somewhere_aggregate,5,7,0.7999999999999999,0.08333333333333333 | |
| aggregate,holistic,winogrande,BLZ_240312,kendall,somewhere_aggregate,5,8,0.0,1.0 | |
| aggregate,holistic,winogrande,BLZ_240312,kendall,somewhere_aggregate,5,9,-0.19999999999999998,0.8166666666666667 | |
| aggregate,holistic,grounding,biggen_240612,kendall,somewhere_aggregate,5,0,0.9486832980505137,0.02297740150320607 | |
| aggregate,holistic,grounding,biggen_240612,kendall,somewhere_aggregate,5,1,0.19999999999999998,0.8166666666666667 | |
| aggregate,holistic,grounding,biggen_240612,kendall,somewhere_aggregate,5,2,0.10540925533894598,0.8005421074231263 | |
| aggregate,holistic,grounding,biggen_240612,kendall,somewhere_aggregate,5,3,0.31622776601683794,0.44848886103153174 | |
| aggregate,holistic,grounding,biggen_240612,kendall,somewhere_aggregate,5,4,0.9486832980505137,0.02297740150320607 | |
| aggregate,holistic,grounding,biggen_240612,kendall,somewhere_aggregate,5,5,0.9486832980505137,0.02297740150320607 | |
| aggregate,holistic,grounding,biggen_240612,kendall,somewhere_aggregate,5,6,0.31622776601683794,0.44848886103153174 | |
| aggregate,holistic,grounding,biggen_240612,kendall,somewhere_aggregate,5,7,0.19999999999999998,0.8166666666666667 | |
| aggregate,holistic,grounding,biggen_240612,kendall,somewhere_aggregate,5,8,0.39999999999999997,0.48333333333333334 | |
| aggregate,holistic,grounding,biggen_240612,kendall,somewhere_aggregate,5,9,0.19999999999999998,0.8166666666666667 | |
| aggregate,holistic,instruction_following,biggen_240612,kendall,somewhere_aggregate,5,0,0.0,1.0 | |
| aggregate,holistic,instruction_following,biggen_240612,kendall,somewhere_aggregate,5,1,0.39999999999999997,0.48333333333333334 | |
| aggregate,holistic,instruction_following,biggen_240612,kendall,somewhere_aggregate,5,2,0.7378647873726218,0.07697417298126676 | |
| aggregate,holistic,instruction_following,biggen_240612,kendall,somewhere_aggregate,5,3,-0.39999999999999997,0.48333333333333334 | |
| aggregate,holistic,instruction_following,biggen_240612,kendall,somewhere_aggregate,5,4,0.8944271909999159,0.0367138563627041 | |
| aggregate,holistic,instruction_following,biggen_240612,kendall,somewhere_aggregate,5,5,0.9486832980505137,0.02297740150320607 | |
| aggregate,holistic,instruction_following,biggen_240612,kendall,somewhere_aggregate,5,6,0.9486832980505137,0.02297740150320607 | |
| aggregate,holistic,instruction_following,biggen_240612,kendall,somewhere_aggregate,5,7,0.0,1.0 | |
| aggregate,holistic,instruction_following,biggen_240612,kendall,somewhere_aggregate,5,8,0.8944271909999159,0.0367138563627041 | |
| aggregate,holistic,instruction_following,biggen_240612,kendall,somewhere_aggregate,5,9,0.7378647873726218,0.07697417298126676 | |
| aggregate,holistic,planning,biggen_240612,kendall,somewhere_aggregate,5,0,0.7378647873726218,0.07697417298126676 | |
| aggregate,holistic,planning,biggen_240612,kendall,somewhere_aggregate,5,1,0.7999999999999999,0.08333333333333333 | |
| aggregate,holistic,planning,biggen_240612,kendall,somewhere_aggregate,5,2,0.39999999999999997,0.48333333333333334 | |
| aggregate,holistic,planning,biggen_240612,kendall,somewhere_aggregate,5,3,0.39999999999999997,0.48333333333333334 | |
| aggregate,holistic,planning,biggen_240612,kendall,somewhere_aggregate,5,4,0.19999999999999998,0.8166666666666667 | |
| aggregate,holistic,planning,biggen_240612,kendall,somewhere_aggregate,5,5,0.39999999999999997,0.48333333333333334 | |
| aggregate,holistic,planning,biggen_240612,kendall,somewhere_aggregate,5,6,0.39999999999999997,0.48333333333333334 | |
| aggregate,holistic,planning,biggen_240612,kendall,somewhere_aggregate,5,7,0.7378647873726218,0.07697417298126676 | |
| aggregate,holistic,planning,biggen_240612,kendall,somewhere_aggregate,5,8,-0.10540925533894598,0.8005421074231263 | |
| aggregate,holistic,planning,biggen_240612,kendall,somewhere_aggregate,5,9,0.7378647873726218,0.07697417298126676 | |
| aggregate,holistic,reasoning,biggen_240612,kendall,somewhere_aggregate,5,0,0.7378647873726218,0.07697417298126676 | |
| aggregate,holistic,reasoning,biggen_240612,kendall,somewhere_aggregate,5,1,0.6,0.23333333333333334 | |
| aggregate,holistic,reasoning,biggen_240612,kendall,somewhere_aggregate,5,2,0.7378647873726218,0.07697417298126676 | |
| aggregate,holistic,reasoning,biggen_240612,kendall,somewhere_aggregate,5,3,0.6,0.23333333333333334 | |
| aggregate,holistic,reasoning,biggen_240612,kendall,somewhere_aggregate,5,4,0.5270462766947298,0.206507295485425 | |
| aggregate,holistic,reasoning,biggen_240612,kendall,somewhere_aggregate,5,5,0.7999999999999999,0.08333333333333333 | |
| aggregate,holistic,reasoning,biggen_240612,kendall,somewhere_aggregate,5,6,0.5270462766947298,0.206507295485425 | |
| aggregate,holistic,reasoning,biggen_240612,kendall,somewhere_aggregate,5,7,0.6,0.23333333333333334 | |
| aggregate,holistic,reasoning,biggen_240612,kendall,somewhere_aggregate,5,8,0.0,1.0 | |
| aggregate,holistic,reasoning,biggen_240612,kendall,somewhere_aggregate,5,9,0.39999999999999997,0.48333333333333334 | |
| aggregate,holistic,refinement,biggen_240612,kendall,somewhere_aggregate,5,0,0.6,0.23333333333333334 | |
| aggregate,holistic,refinement,biggen_240612,kendall,somewhere_aggregate,5,1,0.39999999999999997,0.48333333333333334 | |
| aggregate,holistic,refinement,biggen_240612,kendall,somewhere_aggregate,5,2,0.19999999999999998,0.8166666666666667 | |
| aggregate,holistic,refinement,biggen_240612,kendall,somewhere_aggregate,5,3,0.39999999999999997,0.48333333333333334 | |
| aggregate,holistic,refinement,biggen_240612,kendall,somewhere_aggregate,5,4,0.39999999999999997,0.48333333333333334 | |
| aggregate,holistic,refinement,biggen_240612,kendall,somewhere_aggregate,5,5,0.39999999999999997,0.48333333333333334 | |
| aggregate,holistic,refinement,biggen_240612,kendall,somewhere_aggregate,5,6,0.39999999999999997,0.48333333333333334 | |
| aggregate,holistic,refinement,biggen_240612,kendall,somewhere_aggregate,5,7,0.39999999999999997,0.48333333333333334 | |
| aggregate,holistic,refinement,biggen_240612,kendall,somewhere_aggregate,5,8,0.39999999999999997,0.48333333333333334 | |
| aggregate,holistic,refinement,biggen_240612,kendall,somewhere_aggregate,5,9,0.6,0.23333333333333334 | |
| aggregate,holistic,safety,biggen_240612,kendall,somewhere_aggregate,5,0,0.0,1.0 | |
| aggregate,holistic,safety,biggen_240612,kendall,somewhere_aggregate,5,1,0.0,1.0 | |
| aggregate,holistic,safety,biggen_240612,kendall,somewhere_aggregate,5,2,-0.19999999999999998,0.8166666666666667 | |
| aggregate,holistic,safety,biggen_240612,kendall,somewhere_aggregate,5,3,0.0,1.0 | |
| aggregate,holistic,safety,biggen_240612,kendall,somewhere_aggregate,5,4,-0.19999999999999998,0.8166666666666667 | |
| aggregate,holistic,safety,biggen_240612,kendall,somewhere_aggregate,5,5,-0.6,0.23333333333333334 | |
| aggregate,holistic,safety,biggen_240612,kendall,somewhere_aggregate,5,6,0.7999999999999999,0.08333333333333333 | |
| aggregate,holistic,safety,biggen_240612,kendall,somewhere_aggregate,5,7,-0.6,0.23333333333333334 | |
| aggregate,holistic,safety,biggen_240612,kendall,somewhere_aggregate,5,8,0.6,0.23333333333333334 | |
| aggregate,holistic,safety,biggen_240612,kendall,somewhere_aggregate,5,9,0.6,0.23333333333333334 | |
| aggregate,holistic,theory_of_mind,biggen_240612,kendall,somewhere_aggregate,5,0,0.19999999999999998,0.8166666666666667 | |
| aggregate,holistic,theory_of_mind,biggen_240612,kendall,somewhere_aggregate,5,1,0.39999999999999997,0.48333333333333334 | |
| aggregate,holistic,theory_of_mind,biggen_240612,kendall,somewhere_aggregate,5,2,0.6,0.23333333333333334 | |
| aggregate,holistic,theory_of_mind,biggen_240612,kendall,somewhere_aggregate,5,3,0.0,1.0 | |
| aggregate,holistic,theory_of_mind,biggen_240612,kendall,somewhere_aggregate,5,4,0.19999999999999998,0.8166666666666667 | |
| aggregate,holistic,theory_of_mind,biggen_240612,kendall,somewhere_aggregate,5,5,0.19999999999999998,0.8166666666666667 | |
| aggregate,holistic,theory_of_mind,biggen_240612,kendall,somewhere_aggregate,5,6,0.39999999999999997,0.48333333333333334 | |
| aggregate,holistic,theory_of_mind,biggen_240612,kendall,somewhere_aggregate,5,7,0.0,1.0 | |
| aggregate,holistic,theory_of_mind,biggen_240612,kendall,somewhere_aggregate,5,8,0.19999999999999998,0.8166666666666667 | |
| aggregate,holistic,theory_of_mind,biggen_240612,kendall,somewhere_aggregate,5,9,0.39999999999999997,0.48333333333333334 | |
| aggregate,holistic,tool_usage,biggen_240612,kendall,somewhere_aggregate,5,0,0.19999999999999998,0.8166666666666667 | |
| aggregate,holistic,tool_usage,biggen_240612,kendall,somewhere_aggregate,5,1,0.39999999999999997,0.48333333333333334 | |
| aggregate,holistic,tool_usage,biggen_240612,kendall,somewhere_aggregate,5,2,0.9999999999999999,0.016666666666666666 | |
| aggregate,holistic,tool_usage,biggen_240612,kendall,somewhere_aggregate,5,3,0.9999999999999999,0.016666666666666666 | |
| aggregate,holistic,tool_usage,biggen_240612,kendall,somewhere_aggregate,5,4,0.39999999999999997,0.48333333333333334 | |
| aggregate,holistic,tool_usage,biggen_240612,kendall,somewhere_aggregate,5,5,0.9999999999999999,0.016666666666666666 | |
| aggregate,holistic,tool_usage,biggen_240612,kendall,somewhere_aggregate,5,6,0.19999999999999998,0.8166666666666667 | |
| aggregate,holistic,tool_usage,biggen_240612,kendall,somewhere_aggregate,5,7,0.6,0.23333333333333334 | |
| aggregate,holistic,tool_usage,biggen_240612,kendall,somewhere_aggregate,5,8,0.7999999999999999,0.08333333333333333 | |
| aggregate,holistic,tool_usage,biggen_240612,kendall,somewhere_aggregate,5,9,0.39999999999999997,0.48333333333333334 | |
| aggregate,holistic,livebench_average,livebench_240701,kendall,somewhere_aggregate,5,0,0.6,0.23333333333333334 | |
| aggregate,holistic,livebench_average,livebench_240701,kendall,somewhere_aggregate,5,1,0.7999999999999999,0.08333333333333333 | |
| aggregate,holistic,livebench_average,livebench_240701,kendall,somewhere_aggregate,5,2,0.7999999999999999,0.08333333333333333 | |
| aggregate,holistic,livebench_average,livebench_240701,kendall,somewhere_aggregate,5,3,0.6,0.23333333333333334 | |
| aggregate,holistic,livebench_average,livebench_240701,kendall,somewhere_aggregate,5,4,0.6,0.23333333333333334 | |
| aggregate,holistic,livebench_average,livebench_240701,kendall,somewhere_aggregate,5,5,0.39999999999999997,0.48333333333333334 | |
| aggregate,holistic,livebench_average,livebench_240701,kendall,somewhere_aggregate,5,6,0.6,0.23333333333333334 | |
| aggregate,holistic,livebench_average,livebench_240701,kendall,somewhere_aggregate,5,7,0.7999999999999999,0.08333333333333333 | |
| aggregate,holistic,livebench_average,livebench_240701,kendall,somewhere_aggregate,5,8,0.39999999999999997,0.48333333333333334 | |
| aggregate,holistic,livebench_average,livebench_240701,kendall,somewhere_aggregate,5,9,0.39999999999999997,0.48333333333333334 | |
| aggregate,holistic,reasoning_average,livebench_240701,kendall,somewhere_aggregate,5,0,0.7378647873726218,0.07697417298126676 | |
| aggregate,holistic,reasoning_average,livebench_240701,kendall,somewhere_aggregate,5,1,0.5270462766947298,0.206507295485425 | |
| aggregate,holistic,reasoning_average,livebench_240701,kendall,somewhere_aggregate,5,2,0.7378647873726218,0.07697417298126676 | |
| aggregate,holistic,reasoning_average,livebench_240701,kendall,somewhere_aggregate,5,3,0.7378647873726218,0.07697417298126676 | |
| aggregate,holistic,reasoning_average,livebench_240701,kendall,somewhere_aggregate,5,4,0.5270462766947298,0.206507295485425 | |
| aggregate,holistic,reasoning_average,livebench_240701,kendall,somewhere_aggregate,5,5,0.7999999999999999,0.08333333333333333 | |
| aggregate,holistic,reasoning_average,livebench_240701,kendall,somewhere_aggregate,5,6,0.7378647873726218,0.07697417298126676 | |
| aggregate,holistic,reasoning_average,livebench_240701,kendall,somewhere_aggregate,5,7,0.7999999999999999,0.08333333333333333 | |
| aggregate,holistic,reasoning_average,livebench_240701,kendall,somewhere_aggregate,5,8,0.5270462766947298,0.206507295485425 | |
| aggregate,holistic,reasoning_average,livebench_240701,kendall,somewhere_aggregate,5,9,0.7378647873726218,0.07697417298126676 | |
| aggregate,holistic,coding_average,livebench_240701,kendall,somewhere_aggregate,5,0,0.31622776601683794,0.44848886103153174 | |
| aggregate,holistic,coding_average,livebench_240701,kendall,somewhere_aggregate,5,1,0.31622776601683794,0.44848886103153174 | |
| aggregate,holistic,coding_average,livebench_240701,kendall,somewhere_aggregate,5,2,0.6,0.23333333333333334 | |
| aggregate,holistic,coding_average,livebench_240701,kendall,somewhere_aggregate,5,3,0.31622776601683794,0.44848886103153174 | |
| aggregate,holistic,coding_average,livebench_240701,kendall,somewhere_aggregate,5,4,0.31622776601683794,0.44848886103153174 | |
| aggregate,holistic,coding_average,livebench_240701,kendall,somewhere_aggregate,5,5,0.5270462766947298,0.206507295485425 | |
| aggregate,holistic,coding_average,livebench_240701,kendall,somewhere_aggregate,5,6,0.5270462766947298,0.206507295485425 | |
| aggregate,holistic,coding_average,livebench_240701,kendall,somewhere_aggregate,5,7,0.5270462766947298,0.206507295485425 | |
| aggregate,holistic,coding_average,livebench_240701,kendall,somewhere_aggregate,5,8,0.5270462766947298,0.206507295485425 | |
| aggregate,holistic,coding_average,livebench_240701,kendall,somewhere_aggregate,5,9,0.5270462766947298,0.206507295485425 | |
| aggregate,holistic,mathematics_average,livebench_240701,kendall,somewhere_aggregate,5,0,0.6,0.23333333333333334 | |
| aggregate,holistic,mathematics_average,livebench_240701,kendall,somewhere_aggregate,5,1,0.6,0.23333333333333334 | |
| aggregate,holistic,mathematics_average,livebench_240701,kendall,somewhere_aggregate,5,2,0.6,0.23333333333333334 | |
| aggregate,holistic,mathematics_average,livebench_240701,kendall,somewhere_aggregate,5,3,0.6,0.23333333333333334 | |
| aggregate,holistic,mathematics_average,livebench_240701,kendall,somewhere_aggregate,5,4,0.6,0.23333333333333334 | |
| aggregate,holistic,mathematics_average,livebench_240701,kendall,somewhere_aggregate,5,5,0.6,0.23333333333333334 | |
| aggregate,holistic,mathematics_average,livebench_240701,kendall,somewhere_aggregate,5,6,0.6,0.23333333333333334 | |
| aggregate,holistic,mathematics_average,livebench_240701,kendall,somewhere_aggregate,5,7,0.6,0.23333333333333334 | |
| aggregate,holistic,mathematics_average,livebench_240701,kendall,somewhere_aggregate,5,8,0.6,0.23333333333333334 | |
| aggregate,holistic,mathematics_average,livebench_240701,kendall,somewhere_aggregate,5,9,0.6,0.23333333333333334 | |
| aggregate,holistic,data_analysis_average,livebench_240701,kendall,somewhere_aggregate,5,0,0.6,0.23333333333333334 | |
| aggregate,holistic,data_analysis_average,livebench_240701,kendall,somewhere_aggregate,5,1,0.6,0.23333333333333334 | |
| aggregate,holistic,data_analysis_average,livebench_240701,kendall,somewhere_aggregate,5,2,0.6,0.23333333333333334 | |
| aggregate,holistic,data_analysis_average,livebench_240701,kendall,somewhere_aggregate,5,3,0.6,0.23333333333333334 | |
| aggregate,holistic,data_analysis_average,livebench_240701,kendall,somewhere_aggregate,5,4,0.7999999999999999,0.08333333333333333 | |
| aggregate,holistic,data_analysis_average,livebench_240701,kendall,somewhere_aggregate,5,5,0.7999999999999999,0.08333333333333333 | |
| aggregate,holistic,data_analysis_average,livebench_240701,kendall,somewhere_aggregate,5,6,0.7999999999999999,0.08333333333333333 | |
| aggregate,holistic,data_analysis_average,livebench_240701,kendall,somewhere_aggregate,5,7,0.6,0.23333333333333334 | |
| aggregate,holistic,data_analysis_average,livebench_240701,kendall,somewhere_aggregate,5,8,0.7999999999999999,0.08333333333333333 | |
| aggregate,holistic,data_analysis_average,livebench_240701,kendall,somewhere_aggregate,5,9,0.7999999999999999,0.08333333333333333 | |
| aggregate,holistic,language_average,livebench_240701,kendall,somewhere_aggregate,5,0,0.6,0.23333333333333334 | |
| aggregate,holistic,language_average,livebench_240701,kendall,somewhere_aggregate,5,1,0.7999999999999999,0.08333333333333333 | |
| aggregate,holistic,language_average,livebench_240701,kendall,somewhere_aggregate,5,2,0.7999999999999999,0.08333333333333333 | |
| aggregate,holistic,language_average,livebench_240701,kendall,somewhere_aggregate,5,3,0.7999999999999999,0.08333333333333333 | |
| aggregate,holistic,language_average,livebench_240701,kendall,somewhere_aggregate,5,4,0.7999999999999999,0.08333333333333333 | |
| aggregate,holistic,language_average,livebench_240701,kendall,somewhere_aggregate,5,5,0.7999999999999999,0.08333333333333333 | |
| aggregate,holistic,language_average,livebench_240701,kendall,somewhere_aggregate,5,6,0.6,0.23333333333333334 | |
| aggregate,holistic,language_average,livebench_240701,kendall,somewhere_aggregate,5,7,0.6,0.23333333333333334 | |
| aggregate,holistic,language_average,livebench_240701,kendall,somewhere_aggregate,5,8,0.6,0.23333333333333334 | |
| aggregate,holistic,language_average,livebench_240701,kendall,somewhere_aggregate,5,9,0.6,0.23333333333333334 | |
| aggregate,holistic,if_average,livebench_240701,kendall,somewhere_aggregate,5,0,0.0,1.0 | |
| aggregate,holistic,if_average,livebench_240701,kendall,somewhere_aggregate,5,1,-0.6,0.23333333333333334 | |
| aggregate,holistic,if_average,livebench_240701,kendall,somewhere_aggregate,5,2,0.19999999999999998,0.8166666666666667 | |
| aggregate,holistic,if_average,livebench_240701,kendall,somewhere_aggregate,5,3,0.19999999999999998,0.8166666666666667 | |
| aggregate,holistic,if_average,livebench_240701,kendall,somewhere_aggregate,5,4,0.19999999999999998,0.8166666666666667 | |
| aggregate,holistic,if_average,livebench_240701,kendall,somewhere_aggregate,5,5,0.19999999999999998,0.8166666666666667 | |
| aggregate,holistic,if_average,livebench_240701,kendall,somewhere_aggregate,5,6,0.19999999999999998,0.8166666666666667 | |
| aggregate,holistic,if_average,livebench_240701,kendall,somewhere_aggregate,5,7,0.19999999999999998,0.8166666666666667 | |
| aggregate,holistic,if_average,livebench_240701,kendall,somewhere_aggregate,5,8,0.0,1.0 | |
| aggregate,holistic,if_average,livebench_240701,kendall,somewhere_aggregate,5,9,0.19999999999999998,0.8166666666666667 | |
| aggregate,holistic,arena_hard,arena_hard_2404,kendall,somewhere_aggregate,5,0,0.39999999999999997,0.48333333333333334 | |
| aggregate,holistic,arena_hard,arena_hard_2404,kendall,somewhere_aggregate,5,1,0.9999999999999999,0.016666666666666666 | |
| aggregate,holistic,arena_hard,arena_hard_2404,kendall,somewhere_aggregate,5,2,0.19999999999999998,0.8166666666666667 | |
| aggregate,holistic,arena_hard,arena_hard_2404,kendall,somewhere_aggregate,5,3,0.7999999999999999,0.08333333333333333 | |
| aggregate,holistic,arena_hard,arena_hard_2404,kendall,somewhere_aggregate,5,4,0.9999999999999999,0.016666666666666666 | |
| aggregate,holistic,arena_hard,arena_hard_2404,kendall,somewhere_aggregate,5,5,0.7999999999999999,0.08333333333333333 | |
| aggregate,holistic,arena_hard,arena_hard_2404,kendall,somewhere_aggregate,5,6,0.39999999999999997,0.48333333333333334 | |
| aggregate,holistic,arena_hard,arena_hard_2404,kendall,somewhere_aggregate,5,7,0.9999999999999999,0.016666666666666666 | |
| aggregate,holistic,arena_hard,arena_hard_2404,kendall,somewhere_aggregate,5,8,0.7999999999999999,0.08333333333333333 | |
| aggregate,holistic,arena_hard,arena_hard_2404,kendall,somewhere_aggregate,5,9,0.7999999999999999,0.08333333333333333 | |
| aggregate,holistic,mixeval,mixeval_240601,kendall,somewhere_aggregate,5,0,0.7999999999999999,0.08333333333333333 | |
| aggregate,holistic,mixeval,mixeval_240601,kendall,somewhere_aggregate,5,1,0.7999999999999999,0.08333333333333333 | |
| aggregate,holistic,mixeval,mixeval_240601,kendall,somewhere_aggregate,5,2,0.7999999999999999,0.08333333333333333 | |
| aggregate,holistic,mixeval,mixeval_240601,kendall,somewhere_aggregate,5,3,0.7999999999999999,0.08333333333333333 | |
| aggregate,holistic,mixeval,mixeval_240601,kendall,somewhere_aggregate,5,4,0.7999999999999999,0.08333333333333333 | |
| aggregate,holistic,mixeval,mixeval_240601,kendall,somewhere_aggregate,5,5,0.7999999999999999,0.08333333333333333 | |
| aggregate,holistic,mixeval,mixeval_240601,kendall,somewhere_aggregate,5,6,0.7999999999999999,0.08333333333333333 | |
| aggregate,holistic,mixeval,mixeval_240601,kendall,somewhere_aggregate,5,7,0.7999999999999999,0.08333333333333333 | |
| aggregate,holistic,mixeval,mixeval_240601,kendall,somewhere_aggregate,5,8,0.7999999999999999,0.08333333333333333 | |
| aggregate,holistic,mixeval,mixeval_240601,kendall,somewhere_aggregate,5,9,0.7999999999999999,0.08333333333333333 | |
| aggregate,holistic,agieval,BLZ_240312,kendall,somewhere_aggregate,5,0,0.31622776601683794,0.44848886103153174 | |
| aggregate,holistic,agieval,BLZ_240312,kendall,somewhere_aggregate,5,1,0.31622776601683794,0.44848886103153174 | |
| aggregate,holistic,agieval,BLZ_240312,kendall,somewhere_aggregate,5,2,0.39999999999999997,0.48333333333333334 | |
| aggregate,holistic,agieval,BLZ_240312,kendall,somewhere_aggregate,5,3,0.7999999999999999,0.08333333333333333 | |
| aggregate,holistic,agieval,BLZ_240312,kendall,somewhere_aggregate,5,4,0.7999999999999999,0.08333333333333333 | |
| aggregate,holistic,agieval,BLZ_240312,kendall,somewhere_aggregate,5,5,0.9999999999999999,0.016666666666666666 | |
| aggregate,holistic,agieval,BLZ_240312,kendall,somewhere_aggregate,5,6,0.19999999999999998,0.8166666666666667 | |
| aggregate,holistic,agieval,BLZ_240312,kendall,somewhere_aggregate,5,7,0.19999999999999998,0.8166666666666667 | |
| aggregate,holistic,agieval,BLZ_240312,kendall,somewhere_aggregate,5,8,0.39999999999999997,0.48333333333333334 | |
| aggregate,holistic,agieval,BLZ_240312,kendall,somewhere_aggregate,5,9,0.7999999999999999,0.08333333333333333 | |
| aggregate,holistic,arc_c,BLZ_240312,kendall,somewhere_aggregate,5,0,0.6,0.23333333333333334 | |
| aggregate,holistic,arc_c,BLZ_240312,kendall,somewhere_aggregate,5,1,0.39999999999999997,0.48333333333333334 | |
| aggregate,holistic,arc_c,BLZ_240312,kendall,somewhere_aggregate,5,2,-0.19999999999999998,0.8166666666666667 | |
| aggregate,holistic,arc_c,BLZ_240312,kendall,somewhere_aggregate,5,3,-0.39999999999999997,0.48333333333333334 | |
| aggregate,holistic,arc_c,BLZ_240312,kendall,somewhere_aggregate,5,4,0.0,1.0 | |
| aggregate,holistic,arc_c,BLZ_240312,kendall,somewhere_aggregate,5,5,0.6,0.23333333333333334 | |
| aggregate,holistic,arc_c,BLZ_240312,kendall,somewhere_aggregate,5,6,0.31622776601683794,0.44848886103153174 | |
| aggregate,holistic,arc_c,BLZ_240312,kendall,somewhere_aggregate,5,7,0.6,0.23333333333333334 | |
| aggregate,holistic,arc_c,BLZ_240312,kendall,somewhere_aggregate,5,8,0.39999999999999997,0.48333333333333334 | |
| aggregate,holistic,arc_c,BLZ_240312,kendall,somewhere_aggregate,5,9,0.7378647873726218,0.07697417298126676 | |
| aggregate,holistic,alpacav1,BLZ_240312,kendall,somewhere_aggregate,5,0,-0.31622776601683794,0.44848886103153174 | |
| aggregate,holistic,alpacav1,BLZ_240312,kendall,somewhere_aggregate,5,1,-0.7999999999999999,0.08333333333333333 | |
| aggregate,holistic,alpacav1,BLZ_240312,kendall,somewhere_aggregate,5,2,0.7999999999999999,0.08333333333333333 | |
| aggregate,holistic,alpacav1,BLZ_240312,kendall,somewhere_aggregate,5,3,0.7999999999999999,0.08333333333333333 | |
| aggregate,holistic,alpacav1,BLZ_240312,kendall,somewhere_aggregate,5,4,0.39999999999999997,0.48333333333333334 | |
| aggregate,holistic,alpacav1,BLZ_240312,kendall,somewhere_aggregate,5,5,0.39999999999999997,0.48333333333333334 | |
| aggregate,holistic,alpacav1,BLZ_240312,kendall,somewhere_aggregate,5,6,0.6,0.23333333333333334 | |
| aggregate,holistic,alpacav1,BLZ_240312,kendall,somewhere_aggregate,5,7,0.7378647873726218,0.07697417298126676 | |
| aggregate,holistic,alpacav1,BLZ_240312,kendall,somewhere_aggregate,5,8,0.6,0.23333333333333334 | |
| aggregate,holistic,alpacav1,BLZ_240312,kendall,somewhere_aggregate,5,9,0.31622776601683794,0.44848886103153174 | |
| aggregate,holistic,alpacav2,BLZ_240312,kendall,somewhere_aggregate,5,0,0.39999999999999997,0.48333333333333334 | |
| aggregate,holistic,alpacav2,BLZ_240312,kendall,somewhere_aggregate,5,1,0.19999999999999998,0.8166666666666667 | |
| aggregate,holistic,alpacav2,BLZ_240312,kendall,somewhere_aggregate,5,2,0.39999999999999997,0.48333333333333334 | |
| aggregate,holistic,alpacav2,BLZ_240312,kendall,somewhere_aggregate,5,3,0.7378647873726218,0.07697417298126676 | |
| aggregate,holistic,alpacav2,BLZ_240312,kendall,somewhere_aggregate,5,4,0.39999999999999997,0.48333333333333334 | |
| aggregate,holistic,alpacav2,BLZ_240312,kendall,somewhere_aggregate,5,5,-0.6,0.23333333333333334 | |
| aggregate,holistic,alpacav2,BLZ_240312,kendall,somewhere_aggregate,5,6,-0.39999999999999997,0.48333333333333334 | |
| aggregate,holistic,alpacav2,BLZ_240312,kendall,somewhere_aggregate,5,7,0.6,0.23333333333333334 | |
| aggregate,holistic,alpacav2,BLZ_240312,kendall,somewhere_aggregate,5,8,0.0,1.0 | |
| aggregate,holistic,alpacav2,BLZ_240312,kendall,somewhere_aggregate,5,9,0.6,0.23333333333333334 | |
| aggregate,holistic,alpacaeval2_lc,BLZ_240312,kendall,somewhere_aggregate,5,0,0.0,1.0 | |
| aggregate,holistic,alpacaeval2_lc,BLZ_240312,kendall,somewhere_aggregate,5,1,-0.6,0.23333333333333334 | |
| aggregate,holistic,alpacaeval2_lc,BLZ_240312,kendall,somewhere_aggregate,5,2,0.7999999999999999,0.08333333333333333 | |
| aggregate,holistic,alpacaeval2_lc,BLZ_240312,kendall,somewhere_aggregate,5,3,0.0,1.0 | |
| aggregate,holistic,alpacaeval2_lc,BLZ_240312,kendall,somewhere_aggregate,5,4,0.6,0.23333333333333334 | |
| aggregate,holistic,alpacaeval2_lc,BLZ_240312,kendall,somewhere_aggregate,5,5,0.39999999999999997,0.48333333333333334 | |
| aggregate,holistic,alpacaeval2_lc,BLZ_240312,kendall,somewhere_aggregate,5,6,0.39999999999999997,0.48333333333333334 | |
| aggregate,holistic,alpacaeval2_lc,BLZ_240312,kendall,somewhere_aggregate,5,7,0.6,0.23333333333333334 | |
| aggregate,holistic,alpacaeval2_lc,BLZ_240312,kendall,somewhere_aggregate,5,8,-0.39999999999999997,0.48333333333333334 | |
| aggregate,holistic,alpacaeval2_lc,BLZ_240312,kendall,somewhere_aggregate,5,9,0.0,1.0 | |
| aggregate,holistic,arena_elo,BLZ_240312,kendall,somewhere_aggregate,5,0,0.31622776601683794,0.44848886103153174 | |
| aggregate,holistic,arena_elo,BLZ_240312,kendall,somewhere_aggregate,5,1,0.6,0.23333333333333334 | |
| aggregate,holistic,arena_elo,BLZ_240312,kendall,somewhere_aggregate,5,2,0.31622776601683794,0.44848886103153174 | |
| aggregate,holistic,arena_elo,BLZ_240312,kendall,somewhere_aggregate,5,3,-0.9999999999999999,0.016666666666666666 | |
| aggregate,holistic,arena_elo,BLZ_240312,kendall,somewhere_aggregate,5,4,0.39999999999999997,0.48333333333333334 | |
| aggregate,holistic,arena_elo,BLZ_240312,kendall,somewhere_aggregate,5,5,0.31622776601683794,0.44848886103153174 | |
| aggregate,holistic,arena_elo,BLZ_240312,kendall,somewhere_aggregate,5,6,-0.9999999999999999,0.016666666666666666 | |
| aggregate,holistic,arena_elo,BLZ_240312,kendall,somewhere_aggregate,5,7,0.6,0.23333333333333334 | |
| aggregate,holistic,arena_elo,BLZ_240312,kendall,somewhere_aggregate,5,8,-0.10540925533894598,0.8005421074231263 | |
| aggregate,holistic,arena_elo,BLZ_240312,kendall,somewhere_aggregate,5,9,0.6,0.23333333333333334 | |
| aggregate,holistic,bbh,BLZ_240312,kendall,somewhere_aggregate,5,0,0.9486832980505137,0.02297740150320607 | |
| aggregate,holistic,bbh,BLZ_240312,kendall,somewhere_aggregate,5,1,0.31622776601683794,0.44848886103153174 | |
| aggregate,holistic,bbh,BLZ_240312,kendall,somewhere_aggregate,5,2,-0.39999999999999997,0.48333333333333334 | |
| aggregate,holistic,bbh,BLZ_240312,kendall,somewhere_aggregate,5,3,0.9999999999999999,0.016666666666666666 | |
| aggregate,holistic,bbh,BLZ_240312,kendall,somewhere_aggregate,5,4,0.0,1.0 | |
| aggregate,holistic,bbh,BLZ_240312,kendall,somewhere_aggregate,5,5,-0.39999999999999997,0.48333333333333334 | |
| aggregate,holistic,bbh,BLZ_240312,kendall,somewhere_aggregate,5,6,0.7999999999999999,0.08333333333333333 | |
| aggregate,holistic,bbh,BLZ_240312,kendall,somewhere_aggregate,5,7,0.7999999999999999,0.08333333333333333 | |
| aggregate,holistic,bbh,BLZ_240312,kendall,somewhere_aggregate,5,8,0.5270462766947298,0.206507295485425 | |
| aggregate,holistic,bbh,BLZ_240312,kendall,somewhere_aggregate,5,9,0.5270462766947298,0.206507295485425 | |
| aggregate,holistic,eq_benchv2,BLZ_240312,kendall,somewhere_aggregate,5,0,-0.19999999999999998,0.8166666666666667 | |
| aggregate,holistic,eq_benchv2,BLZ_240312,kendall,somewhere_aggregate,5,1,0.19999999999999998,0.8166666666666667 | |
| aggregate,holistic,eq_benchv2,BLZ_240312,kendall,somewhere_aggregate,5,2,0.0,1.0 | |
| aggregate,holistic,eq_benchv2,BLZ_240312,kendall,somewhere_aggregate,5,3,0.0,1.0 | |
| aggregate,holistic,eq_benchv2,BLZ_240312,kendall,somewhere_aggregate,5,4,-0.39999999999999997,0.48333333333333334 | |
| aggregate,holistic,eq_benchv2,BLZ_240312,kendall,somewhere_aggregate,5,5,0.39999999999999997,0.48333333333333334 | |
| aggregate,holistic,eq_benchv2,BLZ_240312,kendall,somewhere_aggregate,5,6,0.39999999999999997,0.48333333333333334 | |
| aggregate,holistic,eq_benchv2,BLZ_240312,kendall,somewhere_aggregate,5,7,0.7999999999999999,0.08333333333333333 | |
| aggregate,holistic,eq_benchv2,BLZ_240312,kendall,somewhere_aggregate,5,8,0.19999999999999998,0.8166666666666667 | |
| aggregate,holistic,eq_benchv2,BLZ_240312,kendall,somewhere_aggregate,5,9,0.7999999999999999,0.08333333333333333 | |
| aggregate,holistic,gpt4all,BLZ_240312,kendall,somewhere_aggregate,5,0,0.7378647873726218,0.07697417298126676 | |
| aggregate,holistic,gpt4all,BLZ_240312,kendall,somewhere_aggregate,5,1,0.39999999999999997,0.48333333333333334 | |
| aggregate,holistic,gpt4all,BLZ_240312,kendall,somewhere_aggregate,5,2,0.31622776601683794,0.44848886103153174 | |
| aggregate,holistic,gpt4all,BLZ_240312,kendall,somewhere_aggregate,5,3,0.39999999999999997,0.48333333333333334 | |
| aggregate,holistic,gpt4all,BLZ_240312,kendall,somewhere_aggregate,5,4,0.31622776601683794,0.44848886103153174 | |
| aggregate,holistic,gpt4all,BLZ_240312,kendall,somewhere_aggregate,5,5,0.39999999999999997,0.48333333333333334 | |
| aggregate,holistic,gpt4all,BLZ_240312,kendall,somewhere_aggregate,5,6,-0.39999999999999997,0.48333333333333334 | |
| aggregate,holistic,gpt4all,BLZ_240312,kendall,somewhere_aggregate,5,7,-0.39999999999999997,0.48333333333333334 | |
| aggregate,holistic,gpt4all,BLZ_240312,kendall,somewhere_aggregate,5,8,0.39999999999999997,0.48333333333333334 | |
| aggregate,holistic,gpt4all,BLZ_240312,kendall,somewhere_aggregate,5,9,0.5270462766947298,0.206507295485425 | |
| aggregate,holistic,hugging_6,BLZ_240312,kendall,somewhere_aggregate,5,0,-0.6,0.23333333333333334 | |
| aggregate,holistic,hugging_6,BLZ_240312,kendall,somewhere_aggregate,5,1,-0.6,0.23333333333333334 | |
| aggregate,holistic,hugging_6,BLZ_240312,kendall,somewhere_aggregate,5,2,0.19999999999999998,0.8166666666666667 | |
| aggregate,holistic,hugging_6,BLZ_240312,kendall,somewhere_aggregate,5,3,0.39999999999999997,0.48333333333333334 | |
| aggregate,holistic,hugging_6,BLZ_240312,kendall,somewhere_aggregate,5,4,0.19999999999999998,0.8166666666666667 | |
| aggregate,holistic,hugging_6,BLZ_240312,kendall,somewhere_aggregate,5,5,0.6,0.23333333333333334 | |
| aggregate,holistic,hugging_6,BLZ_240312,kendall,somewhere_aggregate,5,6,-0.39999999999999997,0.48333333333333334 | |
| aggregate,holistic,hugging_6,BLZ_240312,kendall,somewhere_aggregate,5,7,-0.6,0.23333333333333334 | |
| aggregate,holistic,hugging_6,BLZ_240312,kendall,somewhere_aggregate,5,8,-0.39999999999999997,0.48333333333333334 | |
| aggregate,holistic,hugging_6,BLZ_240312,kendall,somewhere_aggregate,5,9,0.6,0.23333333333333334 | |
| aggregate,holistic,llmonitor,BLZ_240312,kendall,somewhere_aggregate,5,0,0.11952286093343936,0.7815112949987133 | |
| aggregate,holistic,llmonitor,BLZ_240312,kendall,somewhere_aggregate,5,1,0.0,1.0 | |
| aggregate,holistic,llmonitor,BLZ_240312,kendall,somewhere_aggregate,5,2,0.0,1.0 | |
| aggregate,holistic,llmonitor,BLZ_240312,kendall,somewhere_aggregate,5,3,0.6,0.23333333333333334 | |
| aggregate,holistic,llmonitor,BLZ_240312,kendall,somewhere_aggregate,5,4,0.39999999999999997,0.48333333333333334 | |
| aggregate,holistic,llmonitor,BLZ_240312,kendall,somewhere_aggregate,5,5,0.6,0.23333333333333334 | |
| aggregate,holistic,llmonitor,BLZ_240312,kendall,somewhere_aggregate,5,6,0.35856858280031806,0.40538055645894233 | |
| aggregate,holistic,llmonitor,BLZ_240312,kendall,somewhere_aggregate,5,7,0.31622776601683794,0.44848886103153174 | |
| aggregate,holistic,llmonitor,BLZ_240312,kendall,somewhere_aggregate,5,8,0.0,1.0 | |
| aggregate,holistic,llmonitor,BLZ_240312,kendall,somewhere_aggregate,5,9,0.5270462766947298,0.206507295485425 | |
| aggregate,holistic,magi,BLZ_240312,kendall,somewhere_aggregate,5,0,0.39999999999999997,0.48333333333333334 | |
| aggregate,holistic,magi,BLZ_240312,kendall,somewhere_aggregate,5,1,0.19999999999999998,0.8166666666666667 | |
| aggregate,holistic,magi,BLZ_240312,kendall,somewhere_aggregate,5,2,-0.10540925533894598,0.8005421074231263 | |
| aggregate,holistic,magi,BLZ_240312,kendall,somewhere_aggregate,5,3,0.39999999999999997,0.48333333333333334 | |
| aggregate,holistic,magi,BLZ_240312,kendall,somewhere_aggregate,5,4,0.6,0.23333333333333334 | |
| aggregate,holistic,magi,BLZ_240312,kendall,somewhere_aggregate,5,5,0.7999999999999999,0.08333333333333333 | |
| aggregate,holistic,magi,BLZ_240312,kendall,somewhere_aggregate,5,6,0.6,0.23333333333333334 | |
| aggregate,holistic,magi,BLZ_240312,kendall,somewhere_aggregate,5,7,0.7999999999999999,0.08333333333333333 | |
| aggregate,holistic,magi,BLZ_240312,kendall,somewhere_aggregate,5,8,0.19999999999999998,0.8166666666666667 | |
| aggregate,holistic,magi,BLZ_240312,kendall,somewhere_aggregate,5,9,-0.19999999999999998,0.8166666666666667 | |
| aggregate,holistic,mmlu,BLZ_240312,kendall,somewhere_aggregate,5,0,0.6,0.23333333333333334 | |
| aggregate,holistic,mmlu,BLZ_240312,kendall,somewhere_aggregate,5,1,0.39999999999999997,0.48333333333333334 | |
| aggregate,holistic,mmlu,BLZ_240312,kendall,somewhere_aggregate,5,2,0.0,1.0 | |
| aggregate,holistic,mmlu,BLZ_240312,kendall,somewhere_aggregate,5,3,0.39999999999999997,0.48333333333333334 | |
| aggregate,holistic,mmlu,BLZ_240312,kendall,somewhere_aggregate,5,4,0.6,0.23333333333333334 | |
| aggregate,holistic,mmlu,BLZ_240312,kendall,somewhere_aggregate,5,5,0.6,0.23333333333333334 | |
| aggregate,holistic,mmlu,BLZ_240312,kendall,somewhere_aggregate,5,6,0.6,0.23333333333333334 | |
| aggregate,holistic,mmlu,BLZ_240312,kendall,somewhere_aggregate,5,7,0.6,0.23333333333333334 | |
| aggregate,holistic,mmlu,BLZ_240312,kendall,somewhere_aggregate,5,8,0.6,0.23333333333333334 | |
| aggregate,holistic,mmlu,BLZ_240312,kendall,somewhere_aggregate,5,9,0.5270462766947298,0.206507295485425 | |
| aggregate,holistic,mt_bench,BLZ_240312,kendall,somewhere_aggregate,5,0,0.6,0.23333333333333334 | |
| aggregate,holistic,mt_bench,BLZ_240312,kendall,somewhere_aggregate,5,1,0.0,1.0 | |
| aggregate,holistic,mt_bench,BLZ_240312,kendall,somewhere_aggregate,5,2,0.10540925533894598,0.8005421074231263 | |
| aggregate,holistic,mt_bench,BLZ_240312,kendall,somewhere_aggregate,5,3,0.39999999999999997,0.48333333333333334 | |
| aggregate,holistic,mt_bench,BLZ_240312,kendall,somewhere_aggregate,5,4,0.39999999999999997,0.48333333333333334 | |
| aggregate,holistic,mt_bench,BLZ_240312,kendall,somewhere_aggregate,5,5,0.19999999999999998,0.8166666666666667 | |
| aggregate,holistic,mt_bench,BLZ_240312,kendall,somewhere_aggregate,5,6,0.10540925533894598,0.8005421074231263 | |
| aggregate,holistic,mt_bench,BLZ_240312,kendall,somewhere_aggregate,5,7,-0.39999999999999997,0.48333333333333334 | |
| aggregate,holistic,mt_bench,BLZ_240312,kendall,somewhere_aggregate,5,8,0.19999999999999998,0.8166666666666667 | |
| aggregate,holistic,mt_bench,BLZ_240312,kendall,somewhere_aggregate,5,9,0.39999999999999997,0.48333333333333334 | |
| aggregate,holistic,biggen_mwr,biggen_240612,kendall,somewhere_aggregate,5,0,0.39999999999999997,0.48333333333333334 | |
| aggregate,holistic,biggen_mwr,biggen_240612,kendall,somewhere_aggregate,5,1,0.39999999999999997,0.48333333333333334 | |
| aggregate,holistic,biggen_mwr,biggen_240612,kendall,somewhere_aggregate,5,2,0.19999999999999998,0.8166666666666667 | |
| aggregate,holistic,biggen_mwr,biggen_240612,kendall,somewhere_aggregate,5,3,0.6,0.23333333333333334 | |
| aggregate,holistic,biggen_mwr,biggen_240612,kendall,somewhere_aggregate,5,4,0.6,0.23333333333333334 | |
| aggregate,holistic,biggen_mwr,biggen_240612,kendall,somewhere_aggregate,5,5,0.39999999999999997,0.48333333333333334 | |
| aggregate,holistic,biggen_mwr,biggen_240612,kendall,somewhere_aggregate,5,6,0.39999999999999997,0.48333333333333334 | |
| aggregate,holistic,biggen_mwr,biggen_240612,kendall,somewhere_aggregate,5,7,0.39999999999999997,0.48333333333333334 | |
| aggregate,holistic,biggen_mwr,biggen_240612,kendall,somewhere_aggregate,5,8,0.6,0.23333333333333334 | |
| aggregate,holistic,biggen_mwr,biggen_240612,kendall,somewhere_aggregate,5,9,0.7999999999999999,0.08333333333333333 | |