Spaces:
Sleeping
Sleeping
jjyang77
commited on
Commit
·
ba3922a
1
Parent(s):
ccc64a6
refactor sample_data format check
Browse files- Dockerfile +3 -4
- api/app.py +17 -4
- api/bigcodebench_data.py +0 -39
- api/code_execution.py +1 -1
Dockerfile
CHANGED
|
@@ -7,6 +7,7 @@ RUN apt-get update && apt-get install -y git g++ python3-tk zip unzip procps r-b
|
|
| 7 |
|
| 8 |
# upgrade to latest pip
|
| 9 |
RUN pip install --upgrade pip
|
|
|
|
| 10 |
|
| 11 |
# Acquire benchmark code to local
|
| 12 |
# ADD "https://api.github.com/repos/bigcode-project/bigcodebench/commits?per_page=1" latest_commit
|
|
@@ -14,13 +15,11 @@ RUN pip install --upgrade pip
|
|
| 14 |
# RUN cd /bigcodebench
|
| 15 |
# RUN python3 -c "from bigcodebench.data import get_bigcodebench; get_bigcodebench()"
|
| 16 |
|
| 17 |
-
RUN pip install fastapi gunicorn uvicorn[standard] httpx pydantic==2.*
|
| 18 |
-
|
| 19 |
-
RUN pip install -I --timeout 2000 -r https://github.com/bigcode-project/bigcodebench-annotation/releases/download/v0.1.0/requirements.txt
|
| 20 |
-
|
| 21 |
# Add a new user "bigcodebenchuser"
|
| 22 |
RUN adduser --disabled-password --gecos "" bigcodebenchuser
|
| 23 |
|
|
|
|
|
|
|
| 24 |
COPY . .
|
| 25 |
|
| 26 |
WORKDIR /
|
|
|
|
| 7 |
|
| 8 |
# upgrade to latest pip
|
| 9 |
RUN pip install --upgrade pip
|
| 10 |
+
RUN pip install fastapi gunicorn uvicorn[standard] httpx pydantic==2.* plotly
|
| 11 |
|
| 12 |
# Acquire benchmark code to local
|
| 13 |
# ADD "https://api.github.com/repos/bigcode-project/bigcodebench/commits?per_page=1" latest_commit
|
|
|
|
| 15 |
# RUN cd /bigcodebench
|
| 16 |
# RUN python3 -c "from bigcodebench.data import get_bigcodebench; get_bigcodebench()"
|
| 17 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
# Add a new user "bigcodebenchuser"
|
| 19 |
RUN adduser --disabled-password --gecos "" bigcodebenchuser
|
| 20 |
|
| 21 |
+
RUN pip install -I --timeout 2000 -r https://github.com/bigcode-project/bigcodebench-annotation/releases/download/v0.1.0/requirements.txt
|
| 22 |
+
|
| 23 |
COPY . .
|
| 24 |
|
| 25 |
WORKDIR /
|
api/app.py
CHANGED
|
@@ -7,10 +7,9 @@ from concurrent.futures import ProcessPoolExecutor, as_completed
|
|
| 7 |
from typing import Dict, List, Tuple
|
| 8 |
import gc
|
| 9 |
|
| 10 |
-
from fastapi import FastAPI
|
| 11 |
from fastapi.responses import RedirectResponse
|
| 12 |
|
| 13 |
-
from api.bigcodebench_data import load_solutions
|
| 14 |
from api.code_execution import untrusted_check
|
| 15 |
|
| 16 |
Result = Tuple[str, List[bool]]
|
|
@@ -67,7 +66,20 @@ def create_app() -> FastAPI:
|
|
| 67 |
eval_results = defaultdict(list) # task_id ->
|
| 68 |
remainings = set()
|
| 69 |
|
| 70 |
-
for sample in
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 71 |
task_id = sample["task_id"]
|
| 72 |
|
| 73 |
solution = sample["solution"]
|
|
@@ -155,5 +167,6 @@ def check_correctness(
|
|
| 155 |
)
|
| 156 |
return ret
|
| 157 |
|
|
|
|
| 158 |
def get_groundtruth():
|
| 159 |
-
raise
|
|
|
|
| 7 |
from typing import Dict, List, Tuple
|
| 8 |
import gc
|
| 9 |
|
| 10 |
+
from fastapi import FastAPI, HTTPException
|
| 11 |
from fastapi.responses import RedirectResponse
|
| 12 |
|
|
|
|
| 13 |
from api.code_execution import untrusted_check
|
| 14 |
|
| 15 |
Result = Tuple[str, List[bool]]
|
|
|
|
| 66 |
eval_results = defaultdict(list) # task_id ->
|
| 67 |
remainings = set()
|
| 68 |
|
| 69 |
+
for i, sample in enumerate(samples):
|
| 70 |
+
# TODO: investigate why HTTPException detail is not passed to client.
|
| 71 |
+
|
| 72 |
+
for key in ["task_id", "res_id", "test", "solution", "entry_point"]:
|
| 73 |
+
if key not in sample:
|
| 74 |
+
raise HTTPException(status_code=400, detail=f"'{key}' not in sample {i}!")
|
| 75 |
+
|
| 76 |
+
if not isinstance(sample["solution"], str):
|
| 77 |
+
raise HTTPException(status_code=400, detail="Solution must be a string!")
|
| 78 |
+
|
| 79 |
+
sample["_identifier"] = (
|
| 80 |
+
sample["task_id"] + f" (line {i+1} )"
|
| 81 |
+
)
|
| 82 |
+
|
| 83 |
task_id = sample["task_id"]
|
| 84 |
|
| 85 |
solution = sample["solution"]
|
|
|
|
| 167 |
)
|
| 168 |
return ret
|
| 169 |
|
| 170 |
+
|
| 171 |
def get_groundtruth():
|
| 172 |
+
raise HTTPException(status_code=405, detail="Groundtruth execution is not implemented yet!")
|
api/bigcodebench_data.py
DELETED
|
@@ -1,39 +0,0 @@
|
|
| 1 |
-
import os
|
| 2 |
-
import json
|
| 3 |
-
import gzip
|
| 4 |
-
from typing import Dict, Iterable
|
| 5 |
-
|
| 6 |
-
def stream_jsonl(filename: str) -> Iterable[Dict]:
|
| 7 |
-
"""
|
| 8 |
-
Parses each jsonl line and yields it as a dictionary
|
| 9 |
-
"""
|
| 10 |
-
if filename.endswith(".gz"):
|
| 11 |
-
with open(filename, "rb") as gzfp:
|
| 12 |
-
with gzip.open(gzfp, "rt") as fp:
|
| 13 |
-
for line in fp:
|
| 14 |
-
if any(not x.isspace() for x in line):
|
| 15 |
-
yield json.loads(line)
|
| 16 |
-
else:
|
| 17 |
-
with open(filename, "r") as fp:
|
| 18 |
-
for line in fp:
|
| 19 |
-
if any(not x.isspace() for x in line):
|
| 20 |
-
yield json.loads(line)
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
def load_solutions(samples) -> Iterable[Dict]:
|
| 24 |
-
"""
|
| 25 |
-
"""
|
| 26 |
-
for i, sample in enumerate(samples):
|
| 27 |
-
assert "task_id" in sample, "No task_id found in sample!"
|
| 28 |
-
assert "res_id" in sample, "No res_id found in sample!"
|
| 29 |
-
assert "test" in sample, "No test found in sample!"
|
| 30 |
-
assert "solution" in sample, "No solution found in sample!"
|
| 31 |
-
assert "entry_point" in sample, "No entry_point found in sample!"
|
| 32 |
-
assert isinstance(
|
| 33 |
-
sample["solution"], str
|
| 34 |
-
), "Solution must be a string! If you have multiple solutions, please repeat the task_id."
|
| 35 |
-
|
| 36 |
-
sample["_identifier"] = (
|
| 37 |
-
sample["task_id"] + f" (line {i+1} )"
|
| 38 |
-
)
|
| 39 |
-
yield sample
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
api/code_execution.py
CHANGED
|
@@ -39,7 +39,7 @@ from typing import List, Tuple, Union
|
|
| 39 |
|
| 40 |
import numpy as np
|
| 41 |
|
| 42 |
-
TIMEOUT_LIMIT=
|
| 43 |
|
| 44 |
|
| 45 |
@contextlib.contextmanager
|
|
|
|
| 39 |
|
| 40 |
import numpy as np
|
| 41 |
|
| 42 |
+
TIMEOUT_LIMIT=240.0 # BCB default is 240.0
|
| 43 |
|
| 44 |
|
| 45 |
@contextlib.contextmanager
|