Spaces:
Running
Running
Use real results dataset (#1)
Browse files- Add real data (de2276bb88e03c3468fb4b1502a54969f7085351)
- app.py +3 -3
- data_utils.py +13 -13
app.py
CHANGED
|
@@ -6,11 +6,11 @@ from data_utils import *
|
|
| 6 |
|
| 7 |
from datasets import load_dataset
|
| 8 |
|
| 9 |
-
# Dummy
|
| 10 |
ds = load_dataset("visionLMsftw/vibe-testing-samples", split="train")
|
| 11 |
-
models = get_model_names()
|
| 12 |
evaluation_data = get_evaluation_data(ds)
|
| 13 |
-
|
|
|
|
|
|
|
| 14 |
|
| 15 |
|
| 16 |
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
|
|
| 6 |
|
| 7 |
from datasets import load_dataset
|
| 8 |
|
|
|
|
| 9 |
ds = load_dataset("visionLMsftw/vibe-testing-samples", split="train")
|
|
|
|
| 10 |
evaluation_data = get_evaluation_data(ds)
|
| 11 |
+
ds_results = load_dataset("visionLMsftw/vibe-testing-results", split="train")
|
| 12 |
+
models = get_model_names(ds_results)
|
| 13 |
+
responses = get_responses(ds_results)
|
| 14 |
|
| 15 |
|
| 16 |
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
data_utils.py
CHANGED
|
@@ -29,19 +29,19 @@ def get_evaluation_data(ds):
|
|
| 29 |
})
|
| 30 |
return evaluation_data
|
| 31 |
|
| 32 |
-
def get_model_names():
|
| 33 |
-
models = [
|
| 34 |
return models
|
| 35 |
|
| 36 |
-
def get_responses():
|
| 37 |
-
responses = {
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
}
|
| 46 |
-
|
| 47 |
return responses
|
|
|
|
| 29 |
})
|
| 30 |
return evaluation_data
|
| 31 |
|
| 32 |
+
def get_model_names(ds_results):
|
| 33 |
+
models = list(set(ds_results['model_id']))
|
| 34 |
return models
|
| 35 |
|
| 36 |
+
def get_responses(ds_results):
|
| 37 |
+
responses = {}
|
| 38 |
+
|
| 39 |
+
for model in set(ds_results['model_id']):
|
| 40 |
+
model_responses = [
|
| 41 |
+
row["model_response"]
|
| 42 |
+
for row in ds_results
|
| 43 |
+
if row["model_id"] == model
|
| 44 |
+
]
|
| 45 |
+
responses[model] = {i: resp for i, resp in enumerate(model_responses)}
|
| 46 |
+
|
| 47 |
return responses
|