Spaces:

facebook
/

fairchem_leaderboard

Running on CPU Upgrade

App Files Files Community

mshuaibi commited on Aug 28

Commit

7c3b81b

1 Parent(s): e170ae5

better error handling

Browse files

Files changed (1) hide show

evaluator.py +43 -11

evaluator.py CHANGED Viewed

@@ -16,6 +16,11 @@ from fairchem.data.omol.modules.evaluator import (
     unoptimized_spin_gap,
 )
 OMOL_EVAL_FUNCTIONS = {
     "Ligand pocket": ligand_pocket,
     "Ligand strain": ligand_strain,
@@ -66,8 +71,13 @@ def reorder(ref: np.ndarray, to_reorder: np.ndarray) -> np.ndarray:
 def get_order(path_submission: Path, path_annotations: Path):
-    with np.load(path_submission) as data:
-        submission_ids = data["ids"]
     with np.load(path_annotations, allow_pickle=True) as data:
         annotations_ids = data["ids"]
@@ -86,6 +96,10 @@ def get_order(path_submission: Path, path_annotations: Path):
         )
         raise Exception(f"IDs don't match.\n{details}")
     return reorder(annotations_ids, submission_ids)
@@ -96,10 +110,17 @@ def s2ef_metrics(
 ) -> Dict[str, float]:
     order = get_order(submission_filename, annotations_path)
-    with np.load(submission_filename) as data:
-        forces = data["forces"]
-        energy = data["energy"][order]
-        forces = np.array(np.split(forces, np.cumsum(data["natoms"])[:-1]), dtype=object)[order]
     if len(set(np.where(np.isinf(energy))[0])) != 0:
         inf_energy_ids = list(set(np.where(np.isinf(energy))[0]))
@@ -129,10 +150,12 @@ def s2ef_metrics(
         forces_mae = 0
         natoms = 0
-        for sub_forces, sub_target_forces in zip(forces[subset_mask], target_forces[subset_mask]):
             forces_mae += np.sum(np.abs(sub_target_forces - sub_forces))
             natoms += sub_forces.shape[0]
-        forces_mae /= (3*natoms)
         metrics[f"{subset}_forces_mae"] = forces_mae
@@ -144,8 +167,12 @@ def omol_evaluations(
     submission_filename: Path,
     eval_type: str,
 ) -> Dict[str, float]:
-    with open(submission_filename) as f:
-        submission_data = json.load(f)
     with open(annotations_path) as f:
         annotations_data = json.load(f)
@@ -159,6 +186,11 @@ def omol_evaluations(
             f"Missing entries in submission: {missing}\n"
             f"Unexpected entries in submission: {unexpected}"
         )
     eval_fn = OMOL_EVAL_FUNCTIONS.get(eval_type)
     metrics = eval_fn(annotations_data, submission_data)
     return metrics
@@ -190,4 +222,4 @@ def evaluate(
     else:
         raise ValueError(f"Unknown eval_type: {eval_type}")
-    return metrics

     unoptimized_spin_gap,
 )
+class SubmissionLoadError(Exception):
+    """Raised if unable to load the submission file."""
 OMOL_EVAL_FUNCTIONS = {
     "Ligand pocket": ligand_pocket,
     "Ligand strain": ligand_strain,
 def get_order(path_submission: Path, path_annotations: Path):
+    try:
+        with np.load(path_submission) as data:
+            submission_ids = data["ids"]
+    except Exception as e:
+        raise SubmissionLoadError(
+            f"Error loading submission file. 'ids' must not be object types."
+        ) from e
     with np.load(path_annotations, allow_pickle=True) as data:
         annotations_ids = data["ids"]
         )
         raise Exception(f"IDs don't match.\n{details}")
+    assert len(submission_ids) == len(
+        submission_set
+    ), "Duplicate IDs found in submission."
     return reorder(annotations_ids, submission_ids)
 ) -> Dict[str, float]:
     order = get_order(submission_filename, annotations_path)
+    try:
+        with np.load(submission_filename) as data:
+            forces = data["forces"]
+            energy = data["energy"][order]
+            forces = np.array(
+                np.split(forces, np.cumsum(data["natoms"])[:-1]), dtype=object
+            )[order]
+    except Exception as e:
+        raise SubmissionLoadError(
+            f"Error loading submission data. Make sure you concatenated your forces and there are no object types."
+        ) from e
     if len(set(np.where(np.isinf(energy))[0])) != 0:
         inf_energy_ids = list(set(np.where(np.isinf(energy))[0]))
         forces_mae = 0
         natoms = 0
+        for sub_forces, sub_target_forces in zip(
+            forces[subset_mask], target_forces[subset_mask]
+        ):
             forces_mae += np.sum(np.abs(sub_target_forces - sub_forces))
             natoms += sub_forces.shape[0]
+        forces_mae /= 3 * natoms
         metrics[f"{subset}_forces_mae"] = forces_mae
     submission_filename: Path,
     eval_type: str,
 ) -> Dict[str, float]:
+    try:
+        with open(submission_filename) as f:
+            submission_data = json.load(f)
+    except Exception as e:
+        raise SubmissionLoadError(f"Error loading submission file") from e
     with open(annotations_path) as f:
         annotations_data = json.load(f)
             f"Missing entries in submission: {missing}\n"
             f"Unexpected entries in submission: {unexpected}"
         )
+    assert len(submission_entries) == len(
+        submission_data
+    ), "Duplicate entries found in submission."
     eval_fn = OMOL_EVAL_FUNCTIONS.get(eval_type)
     metrics = eval_fn(annotations_data, submission_data)
     return metrics
     else:
         raise ValueError(f"Unknown eval_type: {eval_type}")
+    return metrics