Spaces:

allenai
/

ZebraLogic

Running

yuchenlin commited on Jul 19, 2024

Commit

0f9e3cb

1 Parent(s): c1a5b93

sub

Files changed (2) hide show

app.py CHANGED Viewed

@@ -91,7 +91,7 @@ def _tab_leaderboard():
 def sample_explore_item(model_name, size_H, size_W):
-    print(model_name, size_H, size_W)
     explore_item = get_random_item(model_name, size_H, size_W)
     if explore_item is None:
         return "No item found", "No item found", "No item found", "No item found"
@@ -134,7 +134,14 @@ def _tab_explore():
 def _tab_submit():
-    pass
 def build_demo():

 def sample_explore_item(model_name, size_H, size_W):
+    # print(model_name, size_H, size_W)
     explore_item = get_random_item(model_name, size_H, size_W)
     if explore_item is None:
         return "No item found", "No item found", "No item found", "No item found"
 def _tab_submit():
+    markdown_text = """
+    Please create an issue on our [Github](https://github.com/yuchenlin/ZeroEval/) repository to talk about your model. Then, we can test it for you and report the results here on the Leaderboard.
+    If you would like to do local testing, please read our code [here](https://github.com/yuchenlin/ZeroEval/blob/main/src/evaluation/zebra_grid_eval.py)
+    and apply for the access for the [private dataset](https://huggingface.co/datasets/allenai/ZebraLogicBench-private) that contains the truth solutions.
+    """
+    gr.Markdown("### 🚀 Submit Your Results\n\n" + markdown_text, elem_classes="markdown-text")
 def build_demo():

data_utils.py CHANGED Viewed

@@ -96,7 +96,7 @@ def get_random_item(model_name="random", size_H="random", size_W="random"):
             continue
         prediction_reasoning = prediction_json.get("reasoning", "")
         prediction_table = prediction_json["solution"]
-        if prediction_table is not None:
             selected_item = item
             break
@@ -112,6 +112,7 @@ def get_random_item(model_name="random", size_H="random", size_W="random"):
     explore_item["puzzle"] = selected_item["puzzle"]
     explore_item["solution"] = prediction_table
     explore_item["reasoning"] = prediction_reasoning
     headers = ["Houses"] + list(prediction_table["House 1"].keys())
     rows = []
     for row_id in range(len(prediction_table)):

             continue
         prediction_reasoning = prediction_json.get("reasoning", "")
         prediction_table = prediction_json["solution"]
+        if prediction_table is not None and "House 1" in prediction_table:
             selected_item = item
             break
     explore_item["puzzle"] = selected_item["puzzle"]
     explore_item["solution"] = prediction_table
     explore_item["reasoning"] = prediction_reasoning
     headers = ["Houses"] + list(prediction_table["House 1"].keys())
     rows = []
     for row_id in range(len(prediction_table)):