Spaces:
Runtime error
Runtime error
meg-huggingface
commited on
Commit
·
216eab8
1
Parent(s):
51ccf18
Some clean-up
Browse files- entrypoint.sh +24 -23
- failed_run.py +4 -26
entrypoint.sh
CHANGED
|
@@ -4,45 +4,46 @@
|
|
| 4 |
set -e
|
| 5 |
|
| 6 |
export SPACE="EnergyStarAI/launch-computation-example"
|
| 7 |
-
failed=0
|
| 8 |
|
| 9 |
echo "Not checking h100 -- already know it's not there."
|
| 10 |
#python /check_h100.py
|
| 11 |
echo "Attempting to run."
|
| 12 |
#if [[ $? = 0 ]]; then
|
| 13 |
|
| 14 |
-
|
| 15 |
-
|
|
|
|
|
|
|
| 16 |
echo "Benchmarking Model: ${backend_model}, Task: ${experiment_name}"
|
|
|
|
|
|
|
| 17 |
now=$(date +%Y-%m-%d-%H-%M-%S)
|
| 18 |
-
|
| 19 |
-
mkdir -p $run_dir
|
| 20 |
|
| 21 |
# Let the benchmarking begin!
|
| 22 |
-
optimum-benchmark --config-name ${experiment_name} --config-dir /optimum-benchmark/examples/energy_star/ backend.model
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
done || {
|
| 24 |
-
|
| 25 |
-
|
|
|
|
|
|
|
| 26 |
}
|
| 27 |
|
| 28 |
echo "Uploading all output from the /runs folder."
|
| 29 |
python /upload_run_folder.py --run_dir "/runs"
|
| 30 |
|
| 31 |
-
if [ -s $run_dir/error.log ]; then
|
| 32 |
-
# error.log is not-empty, an error was raised
|
| 33 |
-
echo "An error was raised while benchmarking the model..."
|
| 34 |
-
python /failed_run.py --run_dir $run_dir --model_name $backend_model
|
| 35 |
-
# TODO: Is this necessary?
|
| 36 |
-
# Delete the current run directory so that it is not pushed by create_results.py later
|
| 37 |
-
rm -rf $run_dir
|
| 38 |
-
elif [ "$failed" -eq 1 ]; then
|
| 39 |
-
echo "Failed, but was not able to retrieve error log."
|
| 40 |
-
else
|
| 41 |
-
# The error log file is empty, and we didn't catch an error.
|
| 42 |
-
echo "Finished; uploading dataset results"
|
| 43 |
-
python /create_results.py ./runs
|
| 44 |
-
fi
|
| 45 |
-
|
| 46 |
# Pausing space
|
| 47 |
echo "Pausing space."
|
| 48 |
python /pause_space.py
|
|
|
|
| 4 |
set -e
|
| 5 |
|
| 6 |
export SPACE="EnergyStarAI/launch-computation-example"
|
|
|
|
| 7 |
|
| 8 |
echo "Not checking h100 -- already know it's not there."
|
| 9 |
#python /check_h100.py
|
| 10 |
echo "Attempting to run."
|
| 11 |
#if [[ $? = 0 ]]; then
|
| 12 |
|
| 13 |
+
# For each line in the requests dataset....
|
| 14 |
+
python /parse_requests.py | while read -r line; do
|
| 15 |
+
# Read the name of the model and the experiment.
|
| 16 |
+
IFS="," read backend_model experiment_name <<< "${line}"
|
| 17 |
echo "Benchmarking Model: ${backend_model}, Task: ${experiment_name}"
|
| 18 |
+
|
| 19 |
+
# Initialize the directory for output.
|
| 20 |
now=$(date +%Y-%m-%d-%H-%M-%S)
|
| 21 |
+
run_dir="./runs/${experiment_name}/${backend_model}/${now}"
|
| 22 |
+
mkdir -p "$run_dir"
|
| 23 |
|
| 24 |
# Let the benchmarking begin!
|
| 25 |
+
optimum-benchmark --config-name "${experiment_name}" --config-dir /optimum-benchmark/examples/energy_star/ backend.model="${backend_model}" backend.processor="${backend_model}" hydra.run.dir="${run_dir}" 2> "${run_dir}/error.log"
|
| 26 |
+
|
| 27 |
+
# Either mark that the benchmark FAILED, or upload the results.
|
| 28 |
+
if [ -s "${run_dir}/error.log" ]; then
|
| 29 |
+
# error.log is not-empty, an error was raised
|
| 30 |
+
echo "An error was raised while benchmarking the model..."
|
| 31 |
+
python /failed_run.py --run_dir "${run_dir}" --model_name "${backend_model}"
|
| 32 |
+
else
|
| 33 |
+
# The error log file is empty, and we didn't catch an error.
|
| 34 |
+
echo "Finished; uploading dataset results"
|
| 35 |
+
python /create_results.py ./runs
|
| 36 |
+
fi
|
| 37 |
done || {
|
| 38 |
+
# Catch any errors that get thrown; update the requests dataset to FAILED
|
| 39 |
+
# based on the last-read run_dir and backend_model.
|
| 40 |
+
echo "Error."
|
| 41 |
+
python /failed_run.py --run_dir "${run_dir}" --model_name "${backend_model}"
|
| 42 |
}
|
| 43 |
|
| 44 |
echo "Uploading all output from the /runs folder."
|
| 45 |
python /upload_run_folder.py --run_dir "/runs"
|
| 46 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
# Pausing space
|
| 48 |
echo "Pausing space."
|
| 49 |
python /pause_space.py
|
failed_run.py
CHANGED
|
@@ -23,13 +23,7 @@ parser.add_argument(
|
|
| 23 |
required=True,
|
| 24 |
help="Model to benchmark.",
|
| 25 |
)
|
| 26 |
-
|
| 27 |
-
"--logs_name",
|
| 28 |
-
default=None,
|
| 29 |
-
type=str,
|
| 30 |
-
required=False,
|
| 31 |
-
help="Location of space runtime error log -- note this is distinct from an optimum-benchmark log.",
|
| 32 |
-
)
|
| 33 |
args = parser.parse_args()
|
| 34 |
|
| 35 |
# Updating request
|
|
@@ -49,22 +43,6 @@ try:
|
|
| 49 |
except FileNotFoundError as e:
|
| 50 |
print(f"Could not find {args.run_dir}/error.log")
|
| 51 |
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
print("Status set to FAILED")
|
| 55 |
-
|
| 56 |
-
if args.logs_name:
|
| 57 |
-
print("Attempting to save space runtime error log at EnergyStarAI/error_logs")
|
| 58 |
-
try:
|
| 59 |
-
api.upload_file(
|
| 60 |
-
path_or_fileobj=args.error_log,
|
| 61 |
-
path_in_repo=args.error_log,
|
| 62 |
-
repo_id="EnergyStarAI/error_logs",
|
| 63 |
-
repo_type="dataset",
|
| 64 |
-
)
|
| 65 |
-
dataset.loc[dataset["model"].isin(args.model_name), ['status']] = "FAILED"
|
| 66 |
-
updated_dataset = Dataset.from_pandas(dataset)
|
| 67 |
-
updated_dataset.push_to_hub("EnergyStarAI/requests_debug", split="test", token=TOKEN)
|
| 68 |
-
except Exception as e:
|
| 69 |
-
print("That didn't work. Error:")
|
| 70 |
-
print(e)
|
|
|
|
| 23 |
required=True,
|
| 24 |
help="Model to benchmark.",
|
| 25 |
)
|
| 26 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
args = parser.parse_args()
|
| 28 |
|
| 29 |
# Updating request
|
|
|
|
| 43 |
except FileNotFoundError as e:
|
| 44 |
print(f"Could not find {args.run_dir}/error.log")
|
| 45 |
|
| 46 |
+
updated_dataset = Dataset.from_pandas(dataset)
|
| 47 |
+
updated_dataset.push_to_hub("EnergyStarAI/requests_debug", split="test", token=TOKEN)
|
| 48 |
+
print("Status set to FAILED")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|