Added a redundancy for a failing AMD CI
Browse filesThis PR adds a redundancy when loading AMD CI data: if the job links list is empty, we throw an error in the logs and go to the next files. This can happen in the artifact workflow fails.
data.py
CHANGED
|
@@ -98,11 +98,26 @@ def read_one_dataframe(json_path: str, device_label: str) -> tuple[pd.DataFrame,
|
|
| 98 |
df[f"failed_single_no_{device_label}"] = df["failures"].apply(lambda x: len(x["single"]) if "single" in x else 0)
|
| 99 |
return df, df_upload_date
|
| 100 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 101 |
def get_distant_data() -> tuple[pd.DataFrame, str]:
|
| 102 |
# Retrieve AMD dataframe
|
| 103 |
amd_src = "hf://datasets/optimum-amd/transformers_daily_ci/**/runs/**/ci_results_run_models_gpu/model_results.json"
|
| 104 |
files_amd = sorted(fs.glob(amd_src, refresh=True), reverse=True)
|
| 105 |
-
|
|
|
|
| 106 |
# Retrieve NVIDIA dataframe, which pattern should be:
|
| 107 |
# hf://datasets/hf-internal-testing`/transformers_daily_ci/raw/main/YYYY-MM-DD/ci_results_run_models_gpu/model_results.json
|
| 108 |
nvidia_src = "hf://datasets/hf-internal-testing/transformers_daily_ci/*/ci_results_run_models_gpu/model_results.json"
|
|
|
|
| 98 |
df[f"failed_single_no_{device_label}"] = df["failures"].apply(lambda x: len(x["single"]) if "single" in x else 0)
|
| 99 |
return df, df_upload_date
|
| 100 |
|
| 101 |
+
def get_first_working_df(file_list: list[str]) -> str:
|
| 102 |
+
for file in file_list:
|
| 103 |
+
job_links = file.rsplit('/', 1)[0] + "/job_links.json"
|
| 104 |
+
try:
|
| 105 |
+
links = pd.read_json(f"hf://{job_links}", typ="series")
|
| 106 |
+
has_one_working_link = any(links.values)
|
| 107 |
+
except Exception as e:
|
| 108 |
+
logger.error(f"Could not read job links from {job_links}: {e}")
|
| 109 |
+
has_one_working_link = False
|
| 110 |
+
if has_one_working_link:
|
| 111 |
+
return file
|
| 112 |
+
logger.warning(f"Skipping {file} as it has no working job links.")
|
| 113 |
+
raise RuntimeError("Could not find any working dataframe in the provided list.")
|
| 114 |
+
|
| 115 |
def get_distant_data() -> tuple[pd.DataFrame, str]:
|
| 116 |
# Retrieve AMD dataframe
|
| 117 |
amd_src = "hf://datasets/optimum-amd/transformers_daily_ci/**/runs/**/ci_results_run_models_gpu/model_results.json"
|
| 118 |
files_amd = sorted(fs.glob(amd_src, refresh=True), reverse=True)
|
| 119 |
+
file_amd = get_first_working_df(files_amd)
|
| 120 |
+
df_amd, date_df_amd = read_one_dataframe(f"hf://{file_amd}", "amd")
|
| 121 |
# Retrieve NVIDIA dataframe, which pattern should be:
|
| 122 |
# hf://datasets/hf-internal-testing`/transformers_daily_ci/raw/main/YYYY-MM-DD/ci_results_run_models_gpu/model_results.json
|
| 123 |
nvidia_src = "hf://datasets/hf-internal-testing/transformers_daily_ci/*/ci_results_run_models_gpu/model_results.json"
|