Spaces:

AIEnergyScore
/

launch-computation-example

Runtime error

App Files Files Community

meg-huggingface commited on Oct 7, 2024

Commit

216eab8

1 Parent(s): 51ccf18

Some clean-up

Browse files

Files changed (2) hide show

entrypoint.sh +24 -23
failed_run.py +4 -26

entrypoint.sh CHANGED Viewed

@@ -4,45 +4,46 @@
 set -e
 export SPACE="EnergyStarAI/launch-computation-example"
-failed=0
 echo "Not checking h100 -- already know it's not there."
 #python /check_h100.py
 echo "Attempting to run."
 #if [[ $? = 0 ]]; then
-python /parse_requests.py | while read line; do
-    IFS="," read backend_model experiment_name <<< $(echo ${line})
     echo "Benchmarking Model: ${backend_model}, Task: ${experiment_name}"
     now=$(date +%Y-%m-%d-%H-%M-%S)
-    export run_dir="./runs/${experiment_name}/${backend_model}/${now}"
-    mkdir -p $run_dir
     # Let the benchmarking begin!
-    optimum-benchmark --config-name ${experiment_name}  --config-dir /optimum-benchmark/examples/energy_star/ backend.model=${backend_model} backend.processor=${backend_model} hydra.run.dir=${run_dir} 2> $run_dir/error.log
 done || {
-    echo "Error."
-    failed=1
 }
 echo "Uploading all output from the /runs folder."
 python /upload_run_folder.py --run_dir "/runs"
-if [ -s $run_dir/error.log ]; then
-    # error.log is not-empty, an error was raised
-    echo "An error was raised while benchmarking the model..."
-    python /failed_run.py --run_dir $run_dir --model_name $backend_model
-    # TODO: Is this necessary?
-    # Delete the current run directory so that it is not pushed by create_results.py later
-    rm -rf $run_dir
-elif [ "$failed" -eq 1 ]; then
-  echo "Failed, but was not able to retrieve error log."
-else
-  # The error log file is empty, and we didn't catch an error.
-  echo "Finished; uploading dataset results"
-  python /create_results.py ./runs
-fi
 # Pausing space
 echo "Pausing space."
 python /pause_space.py

 set -e
 export SPACE="EnergyStarAI/launch-computation-example"
 echo "Not checking h100 -- already know it's not there."
 #python /check_h100.py
 echo "Attempting to run."
 #if [[ $? = 0 ]]; then
+# For each line in the requests dataset....
+python /parse_requests.py | while read -r line; do
+    # Read the name of the model and the experiment.
+    IFS="," read backend_model experiment_name <<< "${line}"
     echo "Benchmarking Model: ${backend_model}, Task: ${experiment_name}"
+    # Initialize the directory for output.
     now=$(date +%Y-%m-%d-%H-%M-%S)
+    run_dir="./runs/${experiment_name}/${backend_model}/${now}"
+    mkdir -p "$run_dir"
     # Let the benchmarking begin!
+    optimum-benchmark --config-name "${experiment_name}"  --config-dir /optimum-benchmark/examples/energy_star/ backend.model="${backend_model}" backend.processor="${backend_model}" hydra.run.dir="${run_dir}" 2> "${run_dir}/error.log"
+    # Either mark that the benchmark FAILED, or upload the results.
+    if [ -s "${run_dir}/error.log" ]; then
+      # error.log is not-empty, an error was raised
+      echo "An error was raised while benchmarking the model..."
+      python /failed_run.py --run_dir "${run_dir}" --model_name "${backend_model}"
+    else
+      # The error log file is empty, and we didn't catch an error.
+      echo "Finished; uploading dataset results"
+      python /create_results.py ./runs
+    fi
 done || {
+  # Catch any errors that get thrown; update the requests dataset to FAILED
+  # based on the last-read run_dir and backend_model.
+  echo "Error."
+  python /failed_run.py --run_dir "${run_dir}" --model_name "${backend_model}"
 }
 echo "Uploading all output from the /runs folder."
 python /upload_run_folder.py --run_dir "/runs"
 # Pausing space
 echo "Pausing space."
 python /pause_space.py

failed_run.py CHANGED Viewed

@@ -23,13 +23,7 @@ parser.add_argument(
     required=True,
     help="Model to benchmark.",
 )
-parser.add_argument(
-    "--logs_name",
-    default=None,
-    type=str,
-    required=False,
-    help="Location of space runtime error log -- note this is distinct from an optimum-benchmark log.",
-)
 args = parser.parse_args()
 # Updating request
@@ -49,22 +43,6 @@ try:
 except FileNotFoundError as e:
     print(f"Could not find {args.run_dir}/error.log")
-print("Status set to FAILED")
-if args.logs_name:
-    print("Attempting to save space runtime error log at EnergyStarAI/error_logs")
-    try:
-        api.upload_file(
-            path_or_fileobj=args.error_log,
-            path_in_repo=args.error_log,
-            repo_id="EnergyStarAI/error_logs",
-            repo_type="dataset",
-        )
-    dataset.loc[dataset["model"].isin(args.model_name), ['status']] = "FAILED"
-    updated_dataset = Dataset.from_pandas(dataset)
-    updated_dataset.push_to_hub("EnergyStarAI/requests_debug", split="test", token=TOKEN)
-    except Exception as e:
-        print("That didn't work. Error:")
-        print(e)

     required=True,
     help="Model to benchmark.",
 )
 args = parser.parse_args()
 # Updating request
 except FileNotFoundError as e:
     print(f"Could not find {args.run_dir}/error.log")
+updated_dataset = Dataset.from_pandas(dataset)
+updated_dataset.push_to_hub("EnergyStarAI/requests_debug", split="test", token=TOKEN)
+print("Status set to FAILED")