Spaces:

AIEnergyScore
/

launch-computation-example

Runtime error

meg-huggingface commited on Oct 24, 2024

Commit

79fff16

1 Parent(s): c5729e2

Simplifying logic of handling failures & successes...it seemed like it was getting a bit ovelry complicated.

Files changed (2) hide show

create_results.py CHANGED Viewed

@@ -9,8 +9,10 @@ TOKEN = os.environ.get("DEBUG")
 api = HfApi(token=TOKEN)
 out_dir = sys.argv[1]
-# Uploading results
 api.upload_folder(
     folder_path=out_dir,
     repo_id="AIEnergyScore/results_debug",
@@ -22,19 +24,15 @@ requests = load_dataset("AIEnergyScore/requests_debug", split="test",
                         token=TOKEN)
 requests_dset = requests.to_pandas()
-models_ran = []
-for f in os.scandir(out_dir):
-    if f.is_dir():
-        for s in os.scandir(f):
-            if s.is_dir() and s.name not in ['hooks', 'info', 'objects', 'refs',
-                                             'logs']:
-                for m in os.scandir(s):
-                    models_ran.append(s.name + '/' + m.name)
-print("Models ran are: " + str(models_ran))
-requests_dset.loc[
-    requests_dset["model"].isin(models_ran), ['status']] = "COMPLETED"
 updated_dset = Dataset.from_pandas(requests_dset)
 updated_dset.push_to_hub("AIEnergyScore/requests_debug", split="test",
                          token=TOKEN)

 api = HfApi(token=TOKEN)
 out_dir = sys.argv[1]
+all_attempts_read = open("attempts.txt", "r+").readlines()
+failed_attempts_read = open("failed_attempts.txt", "r+").readlines()
+# Uploading output to the results dataset.
 api.upload_folder(
     folder_path=out_dir,
     repo_id="AIEnergyScore/results_debug",
                         token=TOKEN)
 requests_dset = requests.to_pandas()
+for line in all_attempts_read:
+    experiment_name, model = line.strip().split(',')
+    if line not in failed_attempts_read:
+        requests_dset.loc[
+            requests_dset["model"] == model, ['status']] = "COMPLETED"
+    else:
+        requests_dset.loc[
+            requests_dset["model"] == model, ['status']] = "FAILED"
 updated_dset = Dataset.from_pandas(requests_dset)
 updated_dset.push_to_hub("AIEnergyScore/requests_debug", split="test",
                          token=TOKEN)

entrypoint.sh CHANGED Viewed

@@ -7,6 +7,8 @@ echo "Not checking h100 -- already know it's not there."
 echo "Attempting to run."
 #if [[ $? = 0 ]]; then
 # For each line in the requests dataset....
 python /parse_requests.py | while read -r line; do
     # Read the name of the model and the experiment.
@@ -17,16 +19,18 @@ python /parse_requests.py | while read -r line; do
     now=$(date +%Y-%m-%d-%H-%M-%S)
     run_dir="./runs/${experiment_name}/${backend_model}/${now}"
     mkdir -p "$run_dir"
     # Let the benchmarking begin!
-    optimum-benchmark --config-name "${experiment_name}"  --config-dir /optimum-benchmark/examples/energy_star/ backend.model="${backend_model}" backend.processor="${backend_model}" hydra.run.dir="${run_dir}" 2> "${run_dir}/error.log" || (python /failed_run.py --run_dir "${run_dir}" --model_name "${backend_model}" && rm -rf $run_dir)
 done
-echo "Finished; uploading dataset results"
 python /create_results.py ./runs
-echo "Uploading all output from the /runs folder."
-python /upload_run_folder.py --run_dir "/runs"
 # Pausing space
 echo "Pausing space."

 echo "Attempting to run."
 #if [[ $? = 0 ]]; then
+touch attempts.txt
+touch failed_attempts.txt
 # For each line in the requests dataset....
 python /parse_requests.py | while read -r line; do
     # Read the name of the model and the experiment.
     now=$(date +%Y-%m-%d-%H-%M-%S)
     run_dir="./runs/${experiment_name}/${backend_model}/${now}"
     mkdir -p "$run_dir"
+    echo "${experiment_name},${backend_model}" >> attempts.txt
     # Let the benchmarking begin!
+    optimum-benchmark --config-name "${experiment_name}"  --config-dir /optimum-benchmark/examples/energy_star/ backend.model="${backend_model}" backend.processor="${backend_model}" hydra.run.dir="${run_dir}" 2> "${run_dir}/error.log" ||
+    echo "${experiment_name},${backend_model}" >> failed_attempts.txt #(python /failed_run.py --run_dir "${run_dir}" --model_name "${backend_model}" && rm -rf $run_dir)
 done
+echo "Finished; updating requests dataset and results dataset."
 python /create_results.py ./runs
+#echo "Uploading all output from the /runs folder."
+#python /upload_run_folder.py --run_dir "/runs"
 # Pausing space
 echo "Pausing space."