Spaces:

Metric-AI
/

ArmBench-LLM

Running

App Files Files Community

Bagratuni commited on Mar 11

Commit

6b4ef20

1 Parent(s): 9bf8bd3

commit

Browse files

Files changed (2) hide show

app.py +10 -10
data_handler.py +0 -10

app.py CHANGED Viewed

@@ -17,8 +17,11 @@ def refresh_data():
     global_output_armenian = unified_exam_result_table(global_unified_exam_df)
     global_output_mmlu = mmlu_result_table(global_mmlu_df)
-    return global_output_armenian, unified_exam_chart(global_output_armenian, 'Average')
 def main():
     global global_mmlu_df, global_unified_exam_df, global_output_armenian, global_output_mmlu
@@ -60,10 +63,10 @@ def main():
                     **Creator Company:** Metric AI Research Lab, Yerevan, Armenia."""
                 )
                 gr.Image("logo.png", width=200, show_label=False, show_download_button=False, show_fullscreen_button=False, show_share_button=False)
-                gr.Markdown("""
                     - [Website](https://metric.am/)
                     - [Hugging Face](https://huggingface.co/Metric-AI)
                     MMLU-Pro-Hy is a massive multi-task test in MCQA format, inspired by the original MMLU benchmark, adapted for the Armenian language. The Armenian Unified Exams benchmark allows for comparison with human-level knowledge.
                     """
                 )
@@ -72,7 +75,7 @@ def main():
                     """
                     To submit a model for evaluation, please follow these steps:
                     1. **Evaluate your model**:
-                       - Follow the evaluation script provided here: [https://github.com/Anania-AI/Arm-LLM-Benchmark](https://github.com/Anania-AI/Arm-LLM-Benchmark)
                     2. **Format your submission file**:
                         - After evaluation, you will get a `result.json` file. Ensure the file follows this format:
                         ```json
@@ -111,12 +114,9 @@ def main():
         refresh_button = gr.Button("Refresh Data")
         refresh_button.click(
             fn=refresh_data,
-            outputs=[table_output_armenian,
-                     table_output_mmlu,
-                     plot_output_armenian,
-                     plot_output_mmlu
-                    ],
         )
     app.launch(share=True, debug=True)
 if __name__ == "__main__":

     global_output_armenian = unified_exam_result_table(global_unified_exam_df)
     global_output_mmlu = mmlu_result_table(global_mmlu_df)
+    unified_chart = unified_exam_chart(global_output_armenian, 'Average')
+    mmlu_chart_output = mmlu_chart(global_output_mmlu, 'Average')
+    return global_output_armenian, global_output_mmlu, unified_chart, mmlu_chart_output, 'Average', 'Average'
 def main():
     global global_mmlu_df, global_unified_exam_df, global_output_armenian, global_output_mmlu
                     **Creator Company:** Metric AI Research Lab, Yerevan, Armenia."""
                 )
                 gr.Image("logo.png", width=200, show_label=False, show_download_button=False, show_fullscreen_button=False, show_share_button=False)
+                gr.Markdown("""
                     - [Website](https://metric.am/)
                     - [Hugging Face](https://huggingface.co/Metric-AI)
                     MMLU-Pro-Hy is a massive multi-task test in MCQA format, inspired by the original MMLU benchmark, adapted for the Armenian language. The Armenian Unified Exams benchmark allows for comparison with human-level knowledge.
                     """
                 )
                     """
                     To submit a model for evaluation, please follow these steps:
                     1. **Evaluate your model**:
+                        - Follow the evaluation script provided here: [https://github.com/Anania-AI/Arm-LLM-Benchmark](https://github.com/Anania-AI/Arm-LLM-Benchmark)
                     2. **Format your submission file**:
                         - After evaluation, you will get a `result.json` file. Ensure the file follows this format:
                         ```json
         refresh_button = gr.Button("Refresh Data")
         refresh_button.click(
             fn=refresh_data,
+            outputs=[table_output_armenian, table_output_mmlu, plot_output_armenian, plot_output_mmlu, plot_column_dropdown_unified_exam, plot_column_dropdown_mmlu],
         )
     app.launch(share=True, debug=True)
 if __name__ == "__main__":

data_handler.py CHANGED Viewed

@@ -59,10 +59,6 @@ def unified_exam_chart(unified_exam_df, plot_column):
         title=title,
         orientation='h'
     )
-    # max_chart_height = 600
-    # chart_height = df.shape[0] * 50
-    # chart_height = min(chart_height, max_chart_height)
     fig.update_layout(
         xaxis=dict(range=[0, x_range_max]),
@@ -70,7 +66,6 @@ def unified_exam_chart(unified_exam_df, plot_column):
         xaxis_title=dict(font=dict(size=12)),
         yaxis_title=dict(font=dict(size=12)),
         yaxis=dict(autorange="reversed"),
-        # height=chart_height,
         width=1400
     )
     return fig
@@ -93,10 +88,6 @@ def mmlu_chart(mmlu_df, plot_column):
         orientation='h',
         range_color=[0,1]
     )
-    # max_chart_height = 600
-    # chart_height = df.shape[0] * 50
-    # chart_height = min(chart_height, max_chart_height)
     fig.update_layout(
         xaxis=dict(range=[0, x_range_max]),
@@ -104,7 +95,6 @@ def mmlu_chart(mmlu_df, plot_column):
         xaxis_title=dict(font=dict(size=12)),
         yaxis_title=dict(font=dict(size=12)),
         yaxis=dict(autorange="reversed"),
-        # height=chart_height,
         width=1400
     )
     return fig

         title=title,
         orientation='h'
     )
     fig.update_layout(
         xaxis=dict(range=[0, x_range_max]),
         xaxis_title=dict(font=dict(size=12)),
         yaxis_title=dict(font=dict(size=12)),
         yaxis=dict(autorange="reversed"),
         width=1400
     )
     return fig
         orientation='h',
         range_color=[0,1]
     )
     fig.update_layout(
         xaxis=dict(range=[0, x_range_max]),
         xaxis_title=dict(font=dict(size=12)),
         yaxis_title=dict(font=dict(size=12)),
         yaxis=dict(autorange="reversed"),
         width=1400
     )
     return fig