Corey Morris
commited on
Commit
·
a79afe8
1
Parent(s):
c823b6d
Added bar chart for abstract algebra data.
Browse files
app.py
CHANGED
|
@@ -2,6 +2,49 @@ import streamlit as st
|
|
| 2 |
import pandas as pd
|
| 3 |
import plotly.express as px
|
| 4 |
from result_data_processor import ResultDataProcessor
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
|
| 6 |
data_provider = ResultDataProcessor()
|
| 7 |
|
|
@@ -113,6 +156,7 @@ def create_plot(df, arc_column, moral_column, models=None):
|
|
| 113 |
|
| 114 |
# Custom scatter plots
|
| 115 |
st.header('Custom scatter plots')
|
|
|
|
| 116 |
selected_x_column = st.selectbox('Select x-axis', filtered_data.columns.tolist(), index=0)
|
| 117 |
selected_y_column = st.selectbox('Select y-axis', filtered_data.columns.tolist(), index=3)
|
| 118 |
|
|
@@ -123,9 +167,9 @@ else:
|
|
| 123 |
st.write("Please select different columns for the x and y axes.")
|
| 124 |
|
| 125 |
# end of custom scatter plots
|
|
|
|
|
|
|
| 126 |
|
| 127 |
-
st.header('Moral Scenarios Performance')
|
| 128 |
-
st.write("The dashed red line represents the random chance performance of 0.25")
|
| 129 |
|
| 130 |
fig = create_plot(filtered_data, 'MMLU_average', 'MMLU_moral_scenarios')
|
| 131 |
st.plotly_chart(fig)
|
|
@@ -137,13 +181,16 @@ fig = px.histogram(filtered_data, x="MMLU_moral_scenarios", marginal="rug", hove
|
|
| 137 |
st.plotly_chart(fig)
|
| 138 |
|
| 139 |
st.header('Abstract Algebra Performance')
|
| 140 |
-
|
| 141 |
-
st.plotly_chart(fig)
|
| 142 |
|
| 143 |
-
fig = create_plot(filtered_data, 'MMLU_average', 'MMLU_abstract_algebra')
|
| 144 |
-
st.plotly_chart(fig)
|
| 145 |
|
| 146 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 147 |
st.markdown("***Thank you to hugging face for running the evaluations and supplying the data as well as the original authors of the evaluations.***")
|
| 148 |
|
| 149 |
st.markdown("""
|
|
|
|
| 2 |
import pandas as pd
|
| 3 |
import plotly.express as px
|
| 4 |
from result_data_processor import ResultDataProcessor
|
| 5 |
+
import matplotlib.pyplot as plt
|
| 6 |
+
import numpy as np
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
def plot_top_n(df, target_column, n=10):
|
| 10 |
+
top_n = df.nlargest(n, target_column)
|
| 11 |
+
|
| 12 |
+
# Initialize the bar plot
|
| 13 |
+
fig, ax1 = plt.subplots(figsize=(10, 5))
|
| 14 |
+
|
| 15 |
+
# Set width for each bar and their positions
|
| 16 |
+
width = 0.28
|
| 17 |
+
ind = np.arange(len(top_n))
|
| 18 |
+
|
| 19 |
+
# Plot target_column and MMLU_average on the primary y-axis with adjusted positions
|
| 20 |
+
ax1.bar(ind - width, top_n[target_column], width=width, color='blue', label=target_column)
|
| 21 |
+
ax1.bar(ind, top_n['MMLU_average'], width=width, color='orange', label='MMLU_average')
|
| 22 |
+
|
| 23 |
+
# Set the primary y-axis labels and title
|
| 24 |
+
ax1.set_title(f'Top {n} performing models on {target_column}')
|
| 25 |
+
ax1.set_xlabel('Model')
|
| 26 |
+
ax1.set_ylabel('Score')
|
| 27 |
+
|
| 28 |
+
# Create a secondary y-axis for Parameters
|
| 29 |
+
ax2 = ax1.twinx()
|
| 30 |
+
|
| 31 |
+
# Plot Parameters as bars on the secondary y-axis with adjusted position
|
| 32 |
+
ax2.bar(ind + width, top_n['Parameters'], width=width, color='red', label='Parameters')
|
| 33 |
+
|
| 34 |
+
# Set the secondary y-axis labels
|
| 35 |
+
ax2.set_ylabel('Parameters', color='red')
|
| 36 |
+
ax2.tick_params(axis='y', labelcolor='red')
|
| 37 |
+
|
| 38 |
+
# Set the x-ticks and their labels
|
| 39 |
+
ax1.set_xticks(ind)
|
| 40 |
+
ax1.set_xticklabels(top_n.index, rotation=45, ha="right")
|
| 41 |
+
|
| 42 |
+
# Adjust the legend
|
| 43 |
+
fig.tight_layout()
|
| 44 |
+
fig.legend(loc='center left', bbox_to_anchor=(1, 0.5))
|
| 45 |
+
|
| 46 |
+
# Show the plot
|
| 47 |
+
st.pyplot(fig)
|
| 48 |
|
| 49 |
data_provider = ResultDataProcessor()
|
| 50 |
|
|
|
|
| 156 |
|
| 157 |
# Custom scatter plots
|
| 158 |
st.header('Custom scatter plots')
|
| 159 |
+
st.write("The dashed red line represents the random chance performance of 0.25")
|
| 160 |
selected_x_column = st.selectbox('Select x-axis', filtered_data.columns.tolist(), index=0)
|
| 161 |
selected_y_column = st.selectbox('Select y-axis', filtered_data.columns.tolist(), index=3)
|
| 162 |
|
|
|
|
| 167 |
st.write("Please select different columns for the x and y axes.")
|
| 168 |
|
| 169 |
# end of custom scatter plots
|
| 170 |
+
st.markdown("## Notable findings and plots")
|
| 171 |
+
st.markdown("### Moral Scenarios Performance")
|
| 172 |
|
|
|
|
|
|
|
| 173 |
|
| 174 |
fig = create_plot(filtered_data, 'MMLU_average', 'MMLU_moral_scenarios')
|
| 175 |
st.plotly_chart(fig)
|
|
|
|
| 181 |
st.plotly_chart(fig)
|
| 182 |
|
| 183 |
st.header('Abstract Algebra Performance')
|
| 184 |
+
st.write("Small models showed surprisingly strong performance on the abstract algebra task. A 6 Billion parameter model is tied for the best performance on this task and there are a number of other small models in the top 10.")
|
|
|
|
| 185 |
|
|
|
|
|
|
|
| 186 |
|
| 187 |
|
| 188 |
+
# Usage example:
|
| 189 |
+
plot_top_n(filtered_data, 'MMLU_abstract_algebra', 10)
|
| 190 |
+
|
| 191 |
+
fig = create_plot(filtered_data, 'Parameters', 'MMLU_abstract_algebra')
|
| 192 |
+
st.plotly_chart(fig)
|
| 193 |
+
|
| 194 |
st.markdown("***Thank you to hugging face for running the evaluations and supplying the data as well as the original authors of the evaluations.***")
|
| 195 |
|
| 196 |
st.markdown("""
|