Spaces:

transformers-community
/

transformers-ci-dashboard

Running

App Files Files Community

ror HF Staff commited on Sep 15

Commit

2772ee9

verified ·

1 Parent(s): ffb305f

Added a redundancy for a failing AMD CI

Browse files

This PR adds a redundancy when loading AMD CI data: if the job links list is empty, we throw an error in the logs and go to the next files. This can happen in the artifact workflow fails.

Files changed (1) hide show

data.py +16 -1

data.py CHANGED Viewed

@@ -98,11 +98,26 @@ def read_one_dataframe(json_path: str, device_label: str) -> tuple[pd.DataFrame,
     df[f"failed_single_no_{device_label}"] = df["failures"].apply(lambda x: len(x["single"]) if "single" in x else 0)
     return df, df_upload_date
 def get_distant_data() -> tuple[pd.DataFrame, str]:
     # Retrieve AMD dataframe
     amd_src = "hf://datasets/optimum-amd/transformers_daily_ci/**/runs/**/ci_results_run_models_gpu/model_results.json"
     files_amd = sorted(fs.glob(amd_src, refresh=True), reverse=True)
-    df_amd, date_df_amd = read_one_dataframe(f"hf://{files_amd[0]}", "amd")
     # Retrieve NVIDIA dataframe, which pattern should be:
     # hf://datasets/hf-internal-testing`/transformers_daily_ci/raw/main/YYYY-MM-DD/ci_results_run_models_gpu/model_results.json
     nvidia_src = "hf://datasets/hf-internal-testing/transformers_daily_ci/*/ci_results_run_models_gpu/model_results.json"

     df[f"failed_single_no_{device_label}"] = df["failures"].apply(lambda x: len(x["single"]) if "single" in x else 0)
     return df, df_upload_date
+def get_first_working_df(file_list: list[str]) -> str:
+    for file in file_list:
+        job_links = file.rsplit('/', 1)[0] + "/job_links.json"
+        try:
+            links = pd.read_json(f"hf://{job_links}", typ="series")
+            has_one_working_link = any(links.values)
+        except Exception as e:
+            logger.error(f"Could not read job links from {job_links}: {e}")
+            has_one_working_link = False
+        if has_one_working_link:
+            return file
+        logger.warning(f"Skipping {file} as it has no working job links.")
+    raise RuntimeError("Could not find any working dataframe in the provided list.")
 def get_distant_data() -> tuple[pd.DataFrame, str]:
     # Retrieve AMD dataframe
     amd_src = "hf://datasets/optimum-amd/transformers_daily_ci/**/runs/**/ci_results_run_models_gpu/model_results.json"
     files_amd = sorted(fs.glob(amd_src, refresh=True), reverse=True)
+    file_amd = get_first_working_df(files_amd)
+    df_amd, date_df_amd = read_one_dataframe(f"hf://{file_amd}", "amd")
     # Retrieve NVIDIA dataframe, which pattern should be:
     # hf://datasets/hf-internal-testing`/transformers_daily_ci/raw/main/YYYY-MM-DD/ci_results_run_models_gpu/model_results.json
     nvidia_src = "hf://datasets/hf-internal-testing/transformers_daily_ci/*/ci_results_run_models_gpu/model_results.json"