Terry Zhuo
commited on
Commit
·
dba3ac5
1
Parent(s):
b736753
update
Browse files- app.py +65 -1
- src/envs.py +3 -0
app.py
CHANGED
|
@@ -34,6 +34,8 @@ from src.envs import (
|
|
| 34 |
API,
|
| 35 |
EVAL_REQUESTS_PATH,
|
| 36 |
RESULT_REPO,
|
|
|
|
|
|
|
| 37 |
HARD_RESULT_REPO,
|
| 38 |
ELO_REPO,
|
| 39 |
HARD_ELO_REPO,
|
|
@@ -71,6 +73,42 @@ INSTRUCT_SOLVE_DF = None
|
|
| 71 |
HARD_COMPLETE_SOLVE_DF = None
|
| 72 |
HARD_INSTRUCT_SOLVE_DF = None
|
| 73 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 74 |
def restart_space():
|
| 75 |
API.restart_space(repo_id=REPO_ID, token=HF_TOKEN)
|
| 76 |
|
|
@@ -441,9 +479,35 @@ with main_block as demo:
|
|
| 441 |
demo.load(plot_solve_rate, [instruct_solve_gr,
|
| 442 |
gr.Textbox("Instruct", visible=False),
|
| 443 |
], instruct_map)
|
| 444 |
-
|
| 445 |
with gr.TabItem("📝 About", id=3):
|
| 446 |
gr.Markdown(ABOUT_TEXT, elem_classes="markdown-text")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 447 |
with gr.TabItem("🚀 Request", id=4):
|
| 448 |
gr.Markdown(SUBMISSION_TEXT_3)
|
| 449 |
|
|
|
|
| 34 |
API,
|
| 35 |
EVAL_REQUESTS_PATH,
|
| 36 |
RESULT_REPO,
|
| 37 |
+
DATA_VERSION,
|
| 38 |
+
DATA_REPO,
|
| 39 |
HARD_RESULT_REPO,
|
| 40 |
ELO_REPO,
|
| 41 |
HARD_ELO_REPO,
|
|
|
|
| 73 |
HARD_COMPLETE_SOLVE_DF = None
|
| 74 |
HARD_INSTRUCT_SOLVE_DF = None
|
| 75 |
|
| 76 |
+
DATA = datasets.load_dataset(DATA_REPO, "default", cache_dir=HF_HOME, split=DATA_VERSION,
|
| 77 |
+
verification_mode="no_checks")
|
| 78 |
+
|
| 79 |
+
|
| 80 |
+
def filter_data(data, keyword):
|
| 81 |
+
if not keyword:
|
| 82 |
+
return data
|
| 83 |
+
filtered_data = [item for item in data if keyword.lower() in item['complete_prompt'].lower()]
|
| 84 |
+
return filtered_data
|
| 85 |
+
|
| 86 |
+
|
| 87 |
+
def update_display(search_keyword, index, show_solution, show_test):
|
| 88 |
+
filtered_data = filter_data(DATA, search_keyword)
|
| 89 |
+
|
| 90 |
+
if not filtered_data:
|
| 91 |
+
return ["No data available. Check the search criteria."] + [""] * 4 + [0, gr.update(maximum=0, value=0)]
|
| 92 |
+
|
| 93 |
+
max_index = len(filtered_data) - 1
|
| 94 |
+
index = min(max(0, index), max_index)
|
| 95 |
+
|
| 96 |
+
task_id = filtered_data[index]['task_id']
|
| 97 |
+
snippet1 = filtered_data[index]['complete_prompt']
|
| 98 |
+
snippet2 = filtered_data[index]['instruct_prompt']
|
| 99 |
+
snippet3 = filtered_data[index]['canonical_solution'] if show_solution else ""
|
| 100 |
+
snippet4 = filtered_data[index]['test'] if show_test else ""
|
| 101 |
+
|
| 102 |
+
return [
|
| 103 |
+
task_id,
|
| 104 |
+
snippet1,
|
| 105 |
+
snippet2,
|
| 106 |
+
snippet3,
|
| 107 |
+
snippet4,
|
| 108 |
+
len(filtered_data),
|
| 109 |
+
gr.update(maximum=max_index, value=index)
|
| 110 |
+
]
|
| 111 |
+
|
| 112 |
def restart_space():
|
| 113 |
API.restart_space(repo_id=REPO_ID, token=HF_TOKEN)
|
| 114 |
|
|
|
|
| 479 |
demo.load(plot_solve_rate, [instruct_solve_gr,
|
| 480 |
gr.Textbox("Instruct", visible=False),
|
| 481 |
], instruct_map)
|
|
|
|
| 482 |
with gr.TabItem("📝 About", id=3):
|
| 483 |
gr.Markdown(ABOUT_TEXT, elem_classes="markdown-text")
|
| 484 |
+
with gr.TabItem("🔎 Data Viewer", id="viewer"):
|
| 485 |
+
search_input = gr.Textbox(label="Search by keyword")
|
| 486 |
+
count_output = gr.Number(label="Number of filtered items")
|
| 487 |
+
index_slider = gr.Slider(minimum=0, maximum=len(DATA)-1, step=1, label="Select Index")
|
| 488 |
+
show_solution = gr.Checkbox(label="Show Solution")
|
| 489 |
+
show_test = gr.Checkbox(label="Show Test Cases")
|
| 490 |
+
update_button = gr.Button("Update Display")
|
| 491 |
+
|
| 492 |
+
task_id_output = gr.Textbox(label="Task ID")
|
| 493 |
+
code_completion = gr.Code(language="python", label="Code Completion")
|
| 494 |
+
nl_instruction = gr.Code(language="python", label="Natural Language Instruction")
|
| 495 |
+
solution = gr.Code(language="python", label="Solution")
|
| 496 |
+
test_cases = gr.Code(language="python", label="Test Cases")
|
| 497 |
+
|
| 498 |
+
update_button.click(
|
| 499 |
+
update_display,
|
| 500 |
+
inputs=[search_input, index_slider, show_solution, show_test],
|
| 501 |
+
outputs=[task_id_output, code_completion, nl_instruction, solution, test_cases, count_output, index_slider]
|
| 502 |
+
)
|
| 503 |
+
|
| 504 |
+
# Initial load
|
| 505 |
+
demo.load(
|
| 506 |
+
update_display,
|
| 507 |
+
inputs=[search_input, index_slider, show_solution, show_test],
|
| 508 |
+
outputs=[task_id_output, code_completion, nl_instruction, solution, test_cases, count_output, index_slider]
|
| 509 |
+
)
|
| 510 |
+
|
| 511 |
with gr.TabItem("🚀 Request", id=4):
|
| 512 |
gr.Markdown(SUBMISSION_TEXT_3)
|
| 513 |
|
src/envs.py
CHANGED
|
@@ -4,8 +4,11 @@ from huggingface_hub import HfApi
|
|
| 4 |
# clone / pull the lmeh eval data
|
| 5 |
HF_TOKEN = os.environ.get("HF_TOKEN", None)
|
| 6 |
|
|
|
|
|
|
|
| 7 |
REPO_ID = "bigcode/bigcodebench-leaderboard"
|
| 8 |
QUEUE_REPO = "bigcode/bigcodebench-requests"
|
|
|
|
| 9 |
RESULT_REPO = "bigcode/bigcodebench-results"
|
| 10 |
HARD_RESULT_REPO = "bigcode/bigcodebench-hard-results"
|
| 11 |
|
|
|
|
| 4 |
# clone / pull the lmeh eval data
|
| 5 |
HF_TOKEN = os.environ.get("HF_TOKEN", None)
|
| 6 |
|
| 7 |
+
DATA_VERSION = "v0.1.0_hf"
|
| 8 |
+
|
| 9 |
REPO_ID = "bigcode/bigcodebench-leaderboard"
|
| 10 |
QUEUE_REPO = "bigcode/bigcodebench-requests"
|
| 11 |
+
DATA_REPO = "bigcode/bigcodebench"
|
| 12 |
RESULT_REPO = "bigcode/bigcodebench-results"
|
| 13 |
HARD_RESULT_REPO = "bigcode/bigcodebench-hard-results"
|
| 14 |
|