Spaces:
Runtime error
Runtime error
regisss
commited on
Commit
·
ca2da1c
1
Parent(s):
3f77013
Add try/catch in bash entrypoint to manage failed experiments
Browse files- entrypoint.sh +15 -2
- failed_run.py +46 -0
entrypoint.sh
CHANGED
|
@@ -8,12 +8,25 @@ echo "Attempting to run."
|
|
| 8 |
python /parse_requests.py | while read line; do
|
| 9 |
IFS="," read backend_model experiment_name <<< $(echo ${line})
|
| 10 |
echo "Benchmarking Model: ${backend_model}, Task: ${experiment_name}"
|
| 11 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
done
|
|
|
|
|
|
|
| 13 |
echo "Finished; uploading dataset results"
|
| 14 |
python /create_results.py ./runs
|
|
|
|
| 15 |
# Pausing space
|
| 16 |
echo "Pausing space."
|
| 17 |
python /pause_space.py
|
| 18 |
echo "Done."
|
| 19 |
-
#fi
|
|
|
|
| 8 |
python /parse_requests.py | while read line; do
|
| 9 |
IFS="," read backend_model experiment_name <<< $(echo ${line})
|
| 10 |
echo "Benchmarking Model: ${backend_model}, Task: ${experiment_name}"
|
| 11 |
+
export run_dir= "./runs/${experiment_name}/${backend_model}/${now:%Y-%m-%d-%H-%M-%S}"
|
| 12 |
+
|
| 13 |
+
optimum-benchmark --config-name ${experiment_name} --config-dir /optimum-benchmark/examples/energy_star/ backend.model=${backend_model} backend.processor=${backend_model} hydra.run.dir=${run_dir} 2> $run_dir/error.log
|
| 14 |
+
|
| 15 |
+
if [ -s $run_dir/error.log ]; then
|
| 16 |
+
# error.log is not-empty, an error was raised
|
| 17 |
+
echo "An error was raised while benchmarking the model..."
|
| 18 |
+
python /failed_run.py --run_dir $run_dir --model_name $backend_model
|
| 19 |
+
|
| 20 |
+
# Delete the current run directory so that it is not pushed by create_results.py later
|
| 21 |
+
rm -rf $run_dir
|
| 22 |
+
fi
|
| 23 |
done
|
| 24 |
+
|
| 25 |
+
# The file is empty, so no error
|
| 26 |
echo "Finished; uploading dataset results"
|
| 27 |
python /create_results.py ./runs
|
| 28 |
+
|
| 29 |
# Pausing space
|
| 30 |
echo "Pausing space."
|
| 31 |
python /pause_space.py
|
| 32 |
echo "Done."
|
|
|
failed_run.py
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import argparse
|
| 2 |
+
import os
|
| 3 |
+
|
| 4 |
+
from datasets import load_dataset, Dataset
|
| 5 |
+
from huggingface_hub import HfApi
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
TOKEN = os.environ.get("DEBUG")
|
| 9 |
+
api = HfApi(token=TOKEN)
|
| 10 |
+
|
| 11 |
+
parser = argparse.ArgumentParser()
|
| 12 |
+
parser.add_argument(
|
| 13 |
+
"--run_dir",
|
| 14 |
+
default=None,
|
| 15 |
+
type=str,
|
| 16 |
+
required=True,
|
| 17 |
+
help="Path to the run directory.",
|
| 18 |
+
)
|
| 19 |
+
parser.add_argument(
|
| 20 |
+
"--model_name",
|
| 21 |
+
default=None,
|
| 22 |
+
type=str,
|
| 23 |
+
required=True,
|
| 24 |
+
help="Model to benchmark.",
|
| 25 |
+
)
|
| 26 |
+
args = parser.parse_args()
|
| 27 |
+
|
| 28 |
+
# Updating request
|
| 29 |
+
dataset = load_dataset("EnergyStarAI/requests_debug", split="test", token=TOKEN).to_pandas()
|
| 30 |
+
|
| 31 |
+
# Set benchmark to failed
|
| 32 |
+
dataset.loc[dataset["model"].isin(args.model_name), ['status']] = "FAILED"
|
| 33 |
+
|
| 34 |
+
# Read error message
|
| 35 |
+
with open(f"{args.run_dir}/error.log", 'r') as file:
|
| 36 |
+
error_message = file.read()
|
| 37 |
+
|
| 38 |
+
# Add a new column for the error message if necessary
|
| 39 |
+
if "error_message" not in dataset.columns:
|
| 40 |
+
dataset["error_message"] = ""
|
| 41 |
+
dataset.loc[dataset["model"].isin(args.model_name), ['error_message']] = error_message
|
| 42 |
+
|
| 43 |
+
updated_dataset = Dataset.from_pandas(dataset)
|
| 44 |
+
updated_dataset.push_to_hub("EnergyStarAI/requests_debug", split="test", token=TOKEN)
|
| 45 |
+
|
| 46 |
+
print("Status set to FAILED")
|