ror HF Staff commited on
Commit
2772ee9
·
verified ·
1 Parent(s): ffb305f

Added a redundancy for a failing AMD CI

Browse files

This PR adds a redundancy when loading AMD CI data: if the job links list is empty, we throw an error in the logs and go to the next files. This can happen in the artifact workflow fails.

Files changed (1) hide show
  1. data.py +16 -1
data.py CHANGED
@@ -98,11 +98,26 @@ def read_one_dataframe(json_path: str, device_label: str) -> tuple[pd.DataFrame,
98
  df[f"failed_single_no_{device_label}"] = df["failures"].apply(lambda x: len(x["single"]) if "single" in x else 0)
99
  return df, df_upload_date
100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
  def get_distant_data() -> tuple[pd.DataFrame, str]:
102
  # Retrieve AMD dataframe
103
  amd_src = "hf://datasets/optimum-amd/transformers_daily_ci/**/runs/**/ci_results_run_models_gpu/model_results.json"
104
  files_amd = sorted(fs.glob(amd_src, refresh=True), reverse=True)
105
- df_amd, date_df_amd = read_one_dataframe(f"hf://{files_amd[0]}", "amd")
 
106
  # Retrieve NVIDIA dataframe, which pattern should be:
107
  # hf://datasets/hf-internal-testing`/transformers_daily_ci/raw/main/YYYY-MM-DD/ci_results_run_models_gpu/model_results.json
108
  nvidia_src = "hf://datasets/hf-internal-testing/transformers_daily_ci/*/ci_results_run_models_gpu/model_results.json"
 
98
  df[f"failed_single_no_{device_label}"] = df["failures"].apply(lambda x: len(x["single"]) if "single" in x else 0)
99
  return df, df_upload_date
100
 
101
+ def get_first_working_df(file_list: list[str]) -> str:
102
+ for file in file_list:
103
+ job_links = file.rsplit('/', 1)[0] + "/job_links.json"
104
+ try:
105
+ links = pd.read_json(f"hf://{job_links}", typ="series")
106
+ has_one_working_link = any(links.values)
107
+ except Exception as e:
108
+ logger.error(f"Could not read job links from {job_links}: {e}")
109
+ has_one_working_link = False
110
+ if has_one_working_link:
111
+ return file
112
+ logger.warning(f"Skipping {file} as it has no working job links.")
113
+ raise RuntimeError("Could not find any working dataframe in the provided list.")
114
+
115
  def get_distant_data() -> tuple[pd.DataFrame, str]:
116
  # Retrieve AMD dataframe
117
  amd_src = "hf://datasets/optimum-amd/transformers_daily_ci/**/runs/**/ci_results_run_models_gpu/model_results.json"
118
  files_amd = sorted(fs.glob(amd_src, refresh=True), reverse=True)
119
+ file_amd = get_first_working_df(files_amd)
120
+ df_amd, date_df_amd = read_one_dataframe(f"hf://{file_amd}", "amd")
121
  # Retrieve NVIDIA dataframe, which pattern should be:
122
  # hf://datasets/hf-internal-testing`/transformers_daily_ci/raw/main/YYYY-MM-DD/ci_results_run_models_gpu/model_results.json
123
  nvidia_src = "hf://datasets/hf-internal-testing/transformers_daily_ci/*/ci_results_run_models_gpu/model_results.json"