bigcodebench-leaderboard

Running

App Files Files Community

Terry Zhuo commited on Jul 16, 2024

Commit

dba3ac5

1 Parent(s): b736753

update

Browse files

Files changed (2) hide show

app.py +65 -1
src/envs.py +3 -0

app.py CHANGED Viewed

@@ -34,6 +34,8 @@ from src.envs import (
     API,
     EVAL_REQUESTS_PATH,
     RESULT_REPO,
     HARD_RESULT_REPO,
     ELO_REPO,
     HARD_ELO_REPO,
@@ -71,6 +73,42 @@ INSTRUCT_SOLVE_DF = None
 HARD_COMPLETE_SOLVE_DF = None
 HARD_INSTRUCT_SOLVE_DF = None
 def restart_space():
     API.restart_space(repo_id=REPO_ID, token=HF_TOKEN)
@@ -441,9 +479,35 @@ with main_block as demo:
                     demo.load(plot_solve_rate, [instruct_solve_gr,
                                                 gr.Textbox("Instruct", visible=False),
                                                 ], instruct_map)
         with gr.TabItem("📝 About", id=3):
             gr.Markdown(ABOUT_TEXT, elem_classes="markdown-text")
         with gr.TabItem("🚀 Request", id=4):
             gr.Markdown(SUBMISSION_TEXT_3)

     API,
     EVAL_REQUESTS_PATH,
     RESULT_REPO,
+    DATA_VERSION,
+    DATA_REPO,
     HARD_RESULT_REPO,
     ELO_REPO,
     HARD_ELO_REPO,
 HARD_COMPLETE_SOLVE_DF = None
 HARD_INSTRUCT_SOLVE_DF = None
+DATA = datasets.load_dataset(DATA_REPO, "default", cache_dir=HF_HOME, split=DATA_VERSION,
+                             verification_mode="no_checks")
+def filter_data(data, keyword):
+    if not keyword:
+        return data
+    filtered_data = [item for item in data if keyword.lower() in item['complete_prompt'].lower()]
+    return filtered_data
+def update_display(search_keyword, index, show_solution, show_test):
+    filtered_data = filter_data(DATA, search_keyword)
+    if not filtered_data:
+        return ["No data available. Check the search criteria."] + [""] * 4 + [0, gr.update(maximum=0, value=0)]
+    max_index = len(filtered_data) - 1
+    index = min(max(0, index), max_index)
+    task_id = filtered_data[index]['task_id']
+    snippet1 = filtered_data[index]['complete_prompt']
+    snippet2 = filtered_data[index]['instruct_prompt']
+    snippet3 = filtered_data[index]['canonical_solution'] if show_solution else ""
+    snippet4 = filtered_data[index]['test'] if show_test else ""
+    return [
+        task_id,
+        snippet1,
+        snippet2,
+        snippet3,
+        snippet4,
+        len(filtered_data),
+        gr.update(maximum=max_index, value=index)
+    ]
 def restart_space():
     API.restart_space(repo_id=REPO_ID, token=HF_TOKEN)
                     demo.load(plot_solve_rate, [instruct_solve_gr,
                                                 gr.Textbox("Instruct", visible=False),
                                                 ], instruct_map)
         with gr.TabItem("📝 About", id=3):
             gr.Markdown(ABOUT_TEXT, elem_classes="markdown-text")
+        with gr.TabItem("🔎 Data Viewer", id="viewer"):
+            search_input = gr.Textbox(label="Search by keyword")
+            count_output = gr.Number(label="Number of filtered items")
+            index_slider = gr.Slider(minimum=0, maximum=len(DATA)-1, step=1, label="Select Index")
+            show_solution = gr.Checkbox(label="Show Solution")
+            show_test = gr.Checkbox(label="Show Test Cases")
+            update_button = gr.Button("Update Display")
+            task_id_output = gr.Textbox(label="Task ID")
+            code_completion = gr.Code(language="python", label="Code Completion")
+            nl_instruction = gr.Code(language="python", label="Natural Language Instruction")
+            solution = gr.Code(language="python", label="Solution")
+            test_cases = gr.Code(language="python", label="Test Cases")
+            update_button.click(
+                update_display,
+                inputs=[search_input, index_slider, show_solution, show_test],
+                outputs=[task_id_output, code_completion, nl_instruction, solution, test_cases, count_output, index_slider]
+            )
+            # Initial load
+            demo.load(
+                update_display,
+                inputs=[search_input, index_slider, show_solution, show_test],
+                outputs=[task_id_output, code_completion, nl_instruction, solution, test_cases, count_output, index_slider]
+            )
         with gr.TabItem("🚀 Request", id=4):
             gr.Markdown(SUBMISSION_TEXT_3)

src/envs.py CHANGED Viewed

@@ -4,8 +4,11 @@ from huggingface_hub import HfApi
 # clone / pull the lmeh eval data
 HF_TOKEN = os.environ.get("HF_TOKEN", None)
 REPO_ID = "bigcode/bigcodebench-leaderboard"
 QUEUE_REPO = "bigcode/bigcodebench-requests"
 RESULT_REPO = "bigcode/bigcodebench-results"
 HARD_RESULT_REPO = "bigcode/bigcodebench-hard-results"

 # clone / pull the lmeh eval data
 HF_TOKEN = os.environ.get("HF_TOKEN", None)
+DATA_VERSION = "v0.1.0_hf"
 REPO_ID = "bigcode/bigcodebench-leaderboard"
 QUEUE_REPO = "bigcode/bigcodebench-requests"
+DATA_REPO = "bigcode/bigcodebench"
 RESULT_REPO = "bigcode/bigcodebench-results"
 HARD_RESULT_REPO = "bigcode/bigcodebench-hard-results"