Spaces:
Running
Running
sub
Browse files- app.py +9 -2
- data_utils.py +2 -1
app.py
CHANGED
|
@@ -91,7 +91,7 @@ def _tab_leaderboard():
|
|
| 91 |
|
| 92 |
|
| 93 |
def sample_explore_item(model_name, size_H, size_W):
|
| 94 |
-
print(model_name, size_H, size_W)
|
| 95 |
explore_item = get_random_item(model_name, size_H, size_W)
|
| 96 |
if explore_item is None:
|
| 97 |
return "No item found", "No item found", "No item found", "No item found"
|
|
@@ -134,7 +134,14 @@ def _tab_explore():
|
|
| 134 |
|
| 135 |
|
| 136 |
def _tab_submit():
|
| 137 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 138 |
|
| 139 |
|
| 140 |
def build_demo():
|
|
|
|
| 91 |
|
| 92 |
|
| 93 |
def sample_explore_item(model_name, size_H, size_W):
|
| 94 |
+
# print(model_name, size_H, size_W)
|
| 95 |
explore_item = get_random_item(model_name, size_H, size_W)
|
| 96 |
if explore_item is None:
|
| 97 |
return "No item found", "No item found", "No item found", "No item found"
|
|
|
|
| 134 |
|
| 135 |
|
| 136 |
def _tab_submit():
|
| 137 |
+
markdown_text = """
|
| 138 |
+
Please create an issue on our [Github](https://github.com/yuchenlin/ZeroEval/) repository to talk about your model. Then, we can test it for you and report the results here on the Leaderboard.
|
| 139 |
+
If you would like to do local testing, please read our code [here](https://github.com/yuchenlin/ZeroEval/blob/main/src/evaluation/zebra_grid_eval.py)
|
| 140 |
+
and apply for the access for the [private dataset](https://huggingface.co/datasets/allenai/ZebraLogicBench-private) that contains the truth solutions.
|
| 141 |
+
"""
|
| 142 |
+
|
| 143 |
+
gr.Markdown("### 🚀 Submit Your Results\n\n" + markdown_text, elem_classes="markdown-text")
|
| 144 |
+
|
| 145 |
|
| 146 |
|
| 147 |
def build_demo():
|
data_utils.py
CHANGED
|
@@ -96,7 +96,7 @@ def get_random_item(model_name="random", size_H="random", size_W="random"):
|
|
| 96 |
continue
|
| 97 |
prediction_reasoning = prediction_json.get("reasoning", "")
|
| 98 |
prediction_table = prediction_json["solution"]
|
| 99 |
-
if prediction_table is not None:
|
| 100 |
selected_item = item
|
| 101 |
break
|
| 102 |
|
|
@@ -112,6 +112,7 @@ def get_random_item(model_name="random", size_H="random", size_W="random"):
|
|
| 112 |
explore_item["puzzle"] = selected_item["puzzle"]
|
| 113 |
explore_item["solution"] = prediction_table
|
| 114 |
explore_item["reasoning"] = prediction_reasoning
|
|
|
|
| 115 |
headers = ["Houses"] + list(prediction_table["House 1"].keys())
|
| 116 |
rows = []
|
| 117 |
for row_id in range(len(prediction_table)):
|
|
|
|
| 96 |
continue
|
| 97 |
prediction_reasoning = prediction_json.get("reasoning", "")
|
| 98 |
prediction_table = prediction_json["solution"]
|
| 99 |
+
if prediction_table is not None and "House 1" in prediction_table:
|
| 100 |
selected_item = item
|
| 101 |
break
|
| 102 |
|
|
|
|
| 112 |
explore_item["puzzle"] = selected_item["puzzle"]
|
| 113 |
explore_item["solution"] = prediction_table
|
| 114 |
explore_item["reasoning"] = prediction_reasoning
|
| 115 |
+
|
| 116 |
headers = ["Houses"] + list(prediction_table["House 1"].keys())
|
| 117 |
rows = []
|
| 118 |
for row_id in range(len(prediction_table)):
|