Spaces:
Runtime error
Runtime error
meg-huggingface
commited on
Commit
·
258cdcb
1
Parent(s):
9cfc9cd
Rolling back to 8 hours ago
Browse files- entrypoint.sh +3 -3
- failed_run.py +22 -31
entrypoint.sh
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
#!/bin/bash
|
| 2 |
|
| 3 |
-
export SPACE="
|
| 4 |
|
| 5 |
echo "Not checking h100 -- already know it's not there."
|
| 6 |
#python /check_h100.py
|
|
@@ -19,7 +19,7 @@ python /parse_requests.py | while read -r line; do
|
|
| 19 |
mkdir -p "$run_dir"
|
| 20 |
|
| 21 |
# Let the benchmarking begin!
|
| 22 |
-
optimum-benchmark --config-name "${experiment_name}" --config-dir /optimum-benchmark/examples/energy_star/ backend.model="${backend_model}" backend.processor="${backend_model}" hydra.run.dir="${run_dir}" 2> "${run_dir}/error.log" || (python /failed_run.py --run_dir "${run_dir}" --model_name "${backend_model}" && rm -rf $run_dir)
|
| 23 |
done
|
| 24 |
|
| 25 |
echo "Finished; uploading dataset results"
|
|
@@ -31,4 +31,4 @@ python /upload_run_folder.py --run_dir "/runs"
|
|
| 31 |
# Pausing space
|
| 32 |
echo "Pausing space."
|
| 33 |
python /pause_space.py
|
| 34 |
-
echo "Done."
|
|
|
|
| 1 |
#!/bin/bash
|
| 2 |
|
| 3 |
+
export SPACE="EnergyStarAI/launch-computation-example"
|
| 4 |
|
| 5 |
echo "Not checking h100 -- already know it's not there."
|
| 6 |
#python /check_h100.py
|
|
|
|
| 19 |
mkdir -p "$run_dir"
|
| 20 |
|
| 21 |
# Let the benchmarking begin!
|
| 22 |
+
optimum-benchmark --config-name "${experiment_name}" --config-dir /optimum-benchmark/examples/energy_star/ backend.model="${backend_model}" backend.processor="${backend_model}" hydra.run.dir="${run_dir}" 2> "${run_dir}/error.log" || (python /failed_run.py --run_dir "${run_dir}" --model_name "${backend_model}" && rm -rf $run_dir)
|
| 23 |
done
|
| 24 |
|
| 25 |
echo "Finished; uploading dataset results"
|
|
|
|
| 31 |
# Pausing space
|
| 32 |
echo "Pausing space."
|
| 33 |
python /pause_space.py
|
| 34 |
+
echo "Done."
|
failed_run.py
CHANGED
|
@@ -4,7 +4,6 @@ import os
|
|
| 4 |
from datasets import load_dataset, Dataset
|
| 5 |
from huggingface_hub import HfApi
|
| 6 |
|
| 7 |
-
|
| 8 |
TOKEN = os.environ.get("DEBUG")
|
| 9 |
api = HfApi(token=TOKEN)
|
| 10 |
|
|
@@ -23,39 +22,31 @@ parser.add_argument(
|
|
| 23 |
required=True,
|
| 24 |
help="Model to benchmark.",
|
| 25 |
)
|
| 26 |
-
parser.add_argument(
|
| 27 |
-
"--reason",
|
| 28 |
-
default=None,
|
| 29 |
-
type=str,
|
| 30 |
-
required=False,
|
| 31 |
-
help="Reason for failure -- to update in the requests file",
|
| 32 |
-
)
|
| 33 |
|
| 34 |
args = parser.parse_args()
|
| 35 |
|
| 36 |
# Updating request
|
| 37 |
-
dataset = load_dataset("
|
| 38 |
-
|
| 39 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
|
| 41 |
-
# If we have a custom reason for failure, add that instead of generic FAILED.
|
| 42 |
-
if args.reason:
|
| 43 |
-
dataset.loc[dataset["model"].isin([args.model_name]), ['status']] = args.reason
|
| 44 |
-
else:
|
| 45 |
-
# TODO: This doesn't have to be try-except, we could actually check if the file is there...
|
| 46 |
-
try:
|
| 47 |
-
# Read error message
|
| 48 |
-
with open(f"{args.run_dir}/error.log", 'r') as file:
|
| 49 |
-
for f in file.readlines():
|
| 50 |
-
if 'Traceback (most recent call last):' in f:
|
| 51 |
-
error_message = f
|
| 52 |
-
dataset.loc[dataset["model"].isin([args.model_name]), ['status']] = "FAILED"
|
| 53 |
-
print("Status set to FAILED")
|
| 54 |
-
else:
|
| 55 |
-
dataset.loc[dataset["model"].isin([args.model_name]), ['status']] = "COMPLETED"
|
| 56 |
-
# Add a new column for the error message if necessary
|
| 57 |
-
except FileNotFoundError as e:
|
| 58 |
-
print(f"Could not find {args.run_dir}/error.log")
|
| 59 |
-
|
| 60 |
updated_dataset = Dataset.from_pandas(dataset)
|
| 61 |
-
updated_dataset.push_to_hub("
|
|
|
|
|
|
| 4 |
from datasets import load_dataset, Dataset
|
| 5 |
from huggingface_hub import HfApi
|
| 6 |
|
|
|
|
| 7 |
TOKEN = os.environ.get("DEBUG")
|
| 8 |
api = HfApi(token=TOKEN)
|
| 9 |
|
|
|
|
| 22 |
required=True,
|
| 23 |
help="Model to benchmark.",
|
| 24 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
|
| 26 |
args = parser.parse_args()
|
| 27 |
|
| 28 |
# Updating request
|
| 29 |
+
dataset = load_dataset("EnergyStarAI/requests_debug", split="test",
|
| 30 |
+
token=TOKEN).to_pandas()
|
| 31 |
+
|
| 32 |
+
# Set benchmark to failed
|
| 33 |
+
# TODO: This doesn't have to be try-except, we could actually check if the file is there.
|
| 34 |
+
try:
|
| 35 |
+
# Read error message
|
| 36 |
+
with open(f"{args.run_dir}/error.log", 'r') as file:
|
| 37 |
+
for f in file.readlines():
|
| 38 |
+
if 'Traceback (most recent call last):' in f:
|
| 39 |
+
error_message = f
|
| 40 |
+
dataset.loc[dataset["model"].isin([args.model_name]), [
|
| 41 |
+
'status']] = "FAILED"
|
| 42 |
+
print("Status set to FAILED")
|
| 43 |
+
else:
|
| 44 |
+
dataset.loc[dataset["model"].isin([args.model_name]), [
|
| 45 |
+
'status']] = "COMPLETED"
|
| 46 |
+
# Add a new column for the error message if necessary
|
| 47 |
+
except FileNotFoundError as e:
|
| 48 |
+
print(f"Could not find {args.run_dir}/error.log")
|
| 49 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 50 |
updated_dataset = Dataset.from_pandas(dataset)
|
| 51 |
+
updated_dataset.push_to_hub("EnergyStarAI/requests_debug", split="test",
|
| 52 |
+
token=TOKEN)
|