Spaces:
Running
Running
commit
Browse files- app.py +10 -10
- data_handler.py +0 -10
app.py
CHANGED
|
@@ -17,8 +17,11 @@ def refresh_data():
|
|
| 17 |
|
| 18 |
global_output_armenian = unified_exam_result_table(global_unified_exam_df)
|
| 19 |
global_output_mmlu = mmlu_result_table(global_mmlu_df)
|
| 20 |
-
|
| 21 |
-
|
|
|
|
|
|
|
|
|
|
| 22 |
|
| 23 |
def main():
|
| 24 |
global global_mmlu_df, global_unified_exam_df, global_output_armenian, global_output_mmlu
|
|
@@ -60,10 +63,10 @@ def main():
|
|
| 60 |
**Creator Company:** Metric AI Research Lab, Yerevan, Armenia."""
|
| 61 |
)
|
| 62 |
gr.Image("logo.png", width=200, show_label=False, show_download_button=False, show_fullscreen_button=False, show_share_button=False)
|
| 63 |
-
gr.Markdown("""
|
| 64 |
- [Website](https://metric.am/)
|
| 65 |
- [Hugging Face](https://huggingface.co/Metric-AI)
|
| 66 |
-
|
| 67 |
MMLU-Pro-Hy is a massive multi-task test in MCQA format, inspired by the original MMLU benchmark, adapted for the Armenian language. The Armenian Unified Exams benchmark allows for comparison with human-level knowledge.
|
| 68 |
"""
|
| 69 |
)
|
|
@@ -72,7 +75,7 @@ def main():
|
|
| 72 |
"""
|
| 73 |
To submit a model for evaluation, please follow these steps:
|
| 74 |
1. **Evaluate your model**:
|
| 75 |
-
|
| 76 |
2. **Format your submission file**:
|
| 77 |
- After evaluation, you will get a `result.json` file. Ensure the file follows this format:
|
| 78 |
```json
|
|
@@ -111,12 +114,9 @@ def main():
|
|
| 111 |
refresh_button = gr.Button("Refresh Data")
|
| 112 |
refresh_button.click(
|
| 113 |
fn=refresh_data,
|
| 114 |
-
outputs=[table_output_armenian,
|
| 115 |
-
table_output_mmlu,
|
| 116 |
-
plot_output_armenian,
|
| 117 |
-
plot_output_mmlu
|
| 118 |
-
],
|
| 119 |
)
|
|
|
|
| 120 |
app.launch(share=True, debug=True)
|
| 121 |
|
| 122 |
if __name__ == "__main__":
|
|
|
|
| 17 |
|
| 18 |
global_output_armenian = unified_exam_result_table(global_unified_exam_df)
|
| 19 |
global_output_mmlu = mmlu_result_table(global_mmlu_df)
|
| 20 |
+
|
| 21 |
+
unified_chart = unified_exam_chart(global_output_armenian, 'Average')
|
| 22 |
+
mmlu_chart_output = mmlu_chart(global_output_mmlu, 'Average')
|
| 23 |
+
|
| 24 |
+
return global_output_armenian, global_output_mmlu, unified_chart, mmlu_chart_output, 'Average', 'Average'
|
| 25 |
|
| 26 |
def main():
|
| 27 |
global global_mmlu_df, global_unified_exam_df, global_output_armenian, global_output_mmlu
|
|
|
|
| 63 |
**Creator Company:** Metric AI Research Lab, Yerevan, Armenia."""
|
| 64 |
)
|
| 65 |
gr.Image("logo.png", width=200, show_label=False, show_download_button=False, show_fullscreen_button=False, show_share_button=False)
|
| 66 |
+
gr.Markdown("""
|
| 67 |
- [Website](https://metric.am/)
|
| 68 |
- [Hugging Face](https://huggingface.co/Metric-AI)
|
| 69 |
+
|
| 70 |
MMLU-Pro-Hy is a massive multi-task test in MCQA format, inspired by the original MMLU benchmark, adapted for the Armenian language. The Armenian Unified Exams benchmark allows for comparison with human-level knowledge.
|
| 71 |
"""
|
| 72 |
)
|
|
|
|
| 75 |
"""
|
| 76 |
To submit a model for evaluation, please follow these steps:
|
| 77 |
1. **Evaluate your model**:
|
| 78 |
+
- Follow the evaluation script provided here: [https://github.com/Anania-AI/Arm-LLM-Benchmark](https://github.com/Anania-AI/Arm-LLM-Benchmark)
|
| 79 |
2. **Format your submission file**:
|
| 80 |
- After evaluation, you will get a `result.json` file. Ensure the file follows this format:
|
| 81 |
```json
|
|
|
|
| 114 |
refresh_button = gr.Button("Refresh Data")
|
| 115 |
refresh_button.click(
|
| 116 |
fn=refresh_data,
|
| 117 |
+
outputs=[table_output_armenian, table_output_mmlu, plot_output_armenian, plot_output_mmlu, plot_column_dropdown_unified_exam, plot_column_dropdown_mmlu],
|
|
|
|
|
|
|
|
|
|
|
|
|
| 118 |
)
|
| 119 |
+
|
| 120 |
app.launch(share=True, debug=True)
|
| 121 |
|
| 122 |
if __name__ == "__main__":
|
data_handler.py
CHANGED
|
@@ -59,10 +59,6 @@ def unified_exam_chart(unified_exam_df, plot_column):
|
|
| 59 |
title=title,
|
| 60 |
orientation='h'
|
| 61 |
)
|
| 62 |
-
# max_chart_height = 600
|
| 63 |
-
|
| 64 |
-
# chart_height = df.shape[0] * 50
|
| 65 |
-
# chart_height = min(chart_height, max_chart_height)
|
| 66 |
|
| 67 |
fig.update_layout(
|
| 68 |
xaxis=dict(range=[0, x_range_max]),
|
|
@@ -70,7 +66,6 @@ def unified_exam_chart(unified_exam_df, plot_column):
|
|
| 70 |
xaxis_title=dict(font=dict(size=12)),
|
| 71 |
yaxis_title=dict(font=dict(size=12)),
|
| 72 |
yaxis=dict(autorange="reversed"),
|
| 73 |
-
# height=chart_height,
|
| 74 |
width=1400
|
| 75 |
)
|
| 76 |
return fig
|
|
@@ -93,10 +88,6 @@ def mmlu_chart(mmlu_df, plot_column):
|
|
| 93 |
orientation='h',
|
| 94 |
range_color=[0,1]
|
| 95 |
)
|
| 96 |
-
# max_chart_height = 600
|
| 97 |
-
|
| 98 |
-
# chart_height = df.shape[0] * 50
|
| 99 |
-
# chart_height = min(chart_height, max_chart_height)
|
| 100 |
|
| 101 |
fig.update_layout(
|
| 102 |
xaxis=dict(range=[0, x_range_max]),
|
|
@@ -104,7 +95,6 @@ def mmlu_chart(mmlu_df, plot_column):
|
|
| 104 |
xaxis_title=dict(font=dict(size=12)),
|
| 105 |
yaxis_title=dict(font=dict(size=12)),
|
| 106 |
yaxis=dict(autorange="reversed"),
|
| 107 |
-
# height=chart_height,
|
| 108 |
width=1400
|
| 109 |
)
|
| 110 |
return fig
|
|
|
|
| 59 |
title=title,
|
| 60 |
orientation='h'
|
| 61 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
|
| 63 |
fig.update_layout(
|
| 64 |
xaxis=dict(range=[0, x_range_max]),
|
|
|
|
| 66 |
xaxis_title=dict(font=dict(size=12)),
|
| 67 |
yaxis_title=dict(font=dict(size=12)),
|
| 68 |
yaxis=dict(autorange="reversed"),
|
|
|
|
| 69 |
width=1400
|
| 70 |
)
|
| 71 |
return fig
|
|
|
|
| 88 |
orientation='h',
|
| 89 |
range_color=[0,1]
|
| 90 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 91 |
|
| 92 |
fig.update_layout(
|
| 93 |
xaxis=dict(range=[0, x_range_max]),
|
|
|
|
| 95 |
xaxis_title=dict(font=dict(size=12)),
|
| 96 |
yaxis_title=dict(font=dict(size=12)),
|
| 97 |
yaxis=dict(autorange="reversed"),
|
|
|
|
| 98 |
width=1400
|
| 99 |
)
|
| 100 |
return fig
|