Spaces:
Sleeping
Sleeping
felix
commited on
Commit
·
5d6c941
1
Parent(s):
0f541ca
add arena
Browse files
app.py
CHANGED
|
@@ -111,12 +111,14 @@ if compare_mode:
|
|
| 111 |
|
| 112 |
hf_llm_diagrams = extract_images('hf_llm_diagram', imgs)
|
| 113 |
bigcode_diagrams = extract_images('bigcode', imgs)
|
| 114 |
-
mt_bench_diagrams = extract_images('
|
|
|
|
| 115 |
opencompass_diagrams = extract_images('opencompass_leaderboard', imgs)
|
| 116 |
|
| 117 |
compare_hf_llm_diagrams = extract_images('hf_llm_diagram', compare_imgs)
|
| 118 |
compare_bigcode_diagrams = extract_images('bigcode', compare_imgs)
|
| 119 |
-
compare_mt_bench_diagrams = extract_images('
|
|
|
|
| 120 |
compare_opencompass_diagrams = extract_images('opencompass_leaderboard', compare_imgs)
|
| 121 |
|
| 122 |
# Display each category side by side
|
|
@@ -136,6 +138,9 @@ if compare_mode:
|
|
| 136 |
# Displaying MT-Bench Models Leaderboard
|
| 137 |
display_side_by_side(mt_bench_diagrams, compare_mt_bench_diagrams, "MT-Bench Models Leaderboard")
|
| 138 |
|
|
|
|
|
|
|
|
|
|
| 139 |
# Displaying OpenCompass Models Leaderboard
|
| 140 |
display_side_by_side(opencompass_diagrams, compare_opencompass_diagrams, "OpenCompass Models Leaderboard")
|
| 141 |
|
|
@@ -168,11 +173,12 @@ else:
|
|
| 168 |
# Extracting images that start with "hf_llm_diagram"
|
| 169 |
hf_llm_diagrams = [img for img in imgs if 'hf_llm_diagram' in os.path.basename(img)]
|
| 170 |
bigcode_diagrams = [img for img in imgs if 'bigcode' in os.path.basename(img)]
|
| 171 |
-
mt_bench_diagrams = [img for img in imgs if '
|
|
|
|
| 172 |
opencompass_diagrams = [img for img in imgs if 'opencompass_leaderboard' in os.path.basename(img)]
|
| 173 |
|
| 174 |
# Getting the remaining images
|
| 175 |
-
remaining_imgs = list(set(imgs) - set(hf_llm_diagrams) - set(bigcode_diagrams) - set(mt_bench_diagrams) - set(opencompass_diagrams))
|
| 176 |
|
| 177 |
st.subheader("HuggingFace Open LLM leaderboard by Model Size", divider=True)
|
| 178 |
cols = st.columns(2)
|
|
@@ -213,6 +219,12 @@ else:
|
|
| 213 |
|
| 214 |
print_model_list(mt_bench_diagrams[0],st,True)
|
| 215 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 216 |
st.subheader("OpenCompass Models Leaderboard", divider=True)
|
| 217 |
cols = st.columns(2)
|
| 218 |
cols[0].image(opencompass_diagrams[0], use_column_width="auto")
|
|
@@ -238,7 +250,7 @@ st.write(
|
|
| 238 |
<p>Leaderboards tracked:</p>
|
| 239 |
<ul>
|
| 240 |
<li><a href="https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard">Hugging Face Open LLM</a></li>
|
| 241 |
-
<li><a href="https://huggingface.co/spaces/lmsys/chatbot-arena-leaderboard">MT-Bench</a> GPT4 judged evaluation of models
|
| 242 |
<li><a href="https://tatsu-lab.github.io/alpaca_eval/">AlpacaEval</a> GPT4 judged evaluation of models</li>
|
| 243 |
<li><a href="https://www.mosaicml.com/llm-evaluation">MosaicML</a> Balanced set of static benchmarks</li>
|
| 244 |
<li><a href="https://opencompass.org.cn/leaderboard-llm">OpenCompass</a> Balanced set of static benchmarks</li>
|
|
|
|
| 111 |
|
| 112 |
hf_llm_diagrams = extract_images('hf_llm_diagram', imgs)
|
| 113 |
bigcode_diagrams = extract_images('bigcode', imgs)
|
| 114 |
+
mt_bench_diagrams = extract_images('lmsys_leaderboard_mt_bench', imgs)
|
| 115 |
+
arena_diagrams = extract_images('lmsys_leaderboard_arena', imgs)
|
| 116 |
opencompass_diagrams = extract_images('opencompass_leaderboard', imgs)
|
| 117 |
|
| 118 |
compare_hf_llm_diagrams = extract_images('hf_llm_diagram', compare_imgs)
|
| 119 |
compare_bigcode_diagrams = extract_images('bigcode', compare_imgs)
|
| 120 |
+
compare_mt_bench_diagrams = extract_images('lmsys_leaderboard_mt_bench', compare_imgs)
|
| 121 |
+
compare_arena_diagrams = extract_images('lmsys_leaderboard_arena', compare_imgs)
|
| 122 |
compare_opencompass_diagrams = extract_images('opencompass_leaderboard', compare_imgs)
|
| 123 |
|
| 124 |
# Display each category side by side
|
|
|
|
| 138 |
# Displaying MT-Bench Models Leaderboard
|
| 139 |
display_side_by_side(mt_bench_diagrams, compare_mt_bench_diagrams, "MT-Bench Models Leaderboard")
|
| 140 |
|
| 141 |
+
# Displaying Arena Models Leaderboard
|
| 142 |
+
display_side_by_side(arena_diagrams, compare_arena_diagrams, "LMSYS Arena Elo Models Leaderboard")
|
| 143 |
+
|
| 144 |
# Displaying OpenCompass Models Leaderboard
|
| 145 |
display_side_by_side(opencompass_diagrams, compare_opencompass_diagrams, "OpenCompass Models Leaderboard")
|
| 146 |
|
|
|
|
| 173 |
# Extracting images that start with "hf_llm_diagram"
|
| 174 |
hf_llm_diagrams = [img for img in imgs if 'hf_llm_diagram' in os.path.basename(img)]
|
| 175 |
bigcode_diagrams = [img for img in imgs if 'bigcode' in os.path.basename(img)]
|
| 176 |
+
mt_bench_diagrams = [img for img in imgs if 'lmsys_leaderboard_mt_bench' in os.path.basename(img)]
|
| 177 |
+
arena_diagrams = [img for img in imgs if 'lmsys_leaderboard_arena' in os.path.basename(img)]
|
| 178 |
opencompass_diagrams = [img for img in imgs if 'opencompass_leaderboard' in os.path.basename(img)]
|
| 179 |
|
| 180 |
# Getting the remaining images
|
| 181 |
+
remaining_imgs = list(set(imgs) - set(hf_llm_diagrams) - set(bigcode_diagrams) - set(mt_bench_diagrams) - set(arena_diagrams) - set(opencompass_diagrams))
|
| 182 |
|
| 183 |
st.subheader("HuggingFace Open LLM leaderboard by Model Size", divider=True)
|
| 184 |
cols = st.columns(2)
|
|
|
|
| 219 |
|
| 220 |
print_model_list(mt_bench_diagrams[0],st,True)
|
| 221 |
|
| 222 |
+
st.subheader("LMSYS Arena Elo Models Leaderboard", divider=True)
|
| 223 |
+
cols = st.columns(2)
|
| 224 |
+
cols[0].image(arena_diagrams[0], use_column_width="auto")
|
| 225 |
+
|
| 226 |
+
print_model_list(arena_diagrams[0],st,True)
|
| 227 |
+
|
| 228 |
st.subheader("OpenCompass Models Leaderboard", divider=True)
|
| 229 |
cols = st.columns(2)
|
| 230 |
cols[0].image(opencompass_diagrams[0], use_column_width="auto")
|
|
|
|
| 250 |
<p>Leaderboards tracked:</p>
|
| 251 |
<ul>
|
| 252 |
<li><a href="https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard">Hugging Face Open LLM</a></li>
|
| 253 |
+
<li><a href="https://huggingface.co/spaces/lmsys/chatbot-arena-leaderboard">MT-Bench and Arena Elo</a>MT-Bench is GPT4 judged evaluation of models, Arena Elo is users ranking outputs between models.</li>
|
| 254 |
<li><a href="https://tatsu-lab.github.io/alpaca_eval/">AlpacaEval</a> GPT4 judged evaluation of models</li>
|
| 255 |
<li><a href="https://www.mosaicml.com/llm-evaluation">MosaicML</a> Balanced set of static benchmarks</li>
|
| 256 |
<li><a href="https://opencompass.org.cn/leaderboard-llm">OpenCompass</a> Balanced set of static benchmarks</li>
|