mshuaibi commited on
Commit
ed02609
·
1 Parent(s): b8a7697

speed up s2ef

Browse files
Files changed (3) hide show
  1. app.py +1 -1
  2. content.py +1 -2
  3. evaluator.py +77 -95
app.py CHANGED
@@ -324,7 +324,7 @@ def add_new_eval(
324
  return
325
 
326
  # Evaluate the submission
327
- yield "⚙️ Evaluating your submission..."
328
  metrics = evaluate(
329
  leaderboard_data.target_paths[eval_type],
330
  path_to_file,
 
324
  return
325
 
326
  # Evaluate the submission
327
+ yield "⚙️ Evaluating your submission...(do not close/refresh this page!)"
328
  metrics = evaluate(
329
  leaderboard_data.target_paths[eval_type],
330
  path_to_file,
content.py CHANGED
@@ -39,8 +39,7 @@ Users are limited to 5 successful submissions per month for each evaluation type
39
  - Ensure your prediction file format matches the expected format for the selected evaluation
40
  - Your email will be stored privately and only used for communication regarding your submission
41
  - Results will appear on the leaderboard after successful validation
42
- - Remain on the page until you see the "Success" message.
43
- - S2EF evaluations can take 10-20 minutes, the other evaluations happen in a few minutes. Please be patient.
44
  - If you wish to have your model removed from the leaderboard please reach out to mshuaibi@meta.com with the model name and submission date.
45
 
46
  This leaderboard is actively being developed and we are always open to feedback. If you run into any issues or have a question please
 
39
  - Ensure your prediction file format matches the expected format for the selected evaluation
40
  - Your email will be stored privately and only used for communication regarding your submission
41
  - Results will appear on the leaderboard after successful validation
42
+ - Remain on the page until you see the "Success" message. Evaluations can take several minutes, please be patient.
 
43
  - If you wish to have your model removed from the leaderboard please reach out to mshuaibi@meta.com with the model name and submission date.
44
 
45
  This leaderboard is actively being developed and we are always open to feedback. If you run into any issues or have a question please
evaluator.py CHANGED
@@ -35,79 +35,6 @@ OMOL_DATA_ID_MAPPING = {
35
  }
36
 
37
 
38
- def npz_2_s2ef_input(npz_input_file: Path, subset: str) -> Dict[str, torch.tensor]:
39
- with np.load(npz_input_file, allow_pickle=True) as data:
40
- forces = data["forces"]
41
- energy = data["energy"]
42
- data_ids = np.array(data["data_ids"])
43
-
44
- out_energy = []
45
- out_forces = []
46
- out_atoms = []
47
-
48
- order = range(len(forces))
49
- for x in order:
50
- data_id = data_ids[x]
51
- if subset == "all" or data_id in OMOL_DATA_ID_MAPPING.get(subset, []):
52
- out_energy.append(energy[x])
53
- force_array = forces[x]
54
- out_forces.append(torch.tensor(force_array, dtype=torch.float32))
55
- out_atoms.append(len(force_array))
56
-
57
- energy = torch.tensor(out_energy)
58
- out_forces = torch.cat(out_forces, dim=0)
59
- out_dict = {
60
- "energy": energy.float(),
61
- "forces": out_forces,
62
- "natoms": torch.tensor(out_atoms),
63
- }
64
-
65
- return out_dict
66
-
67
-
68
- def npz_2_s2ef_submission(
69
- npz_input_file: Path, order: List[int], subset: str = "All"
70
- ) -> Dict[str, torch.tensor]:
71
- with np.load(npz_input_file) as data:
72
- forces = data["forces"]
73
- energy = data["energy"]
74
- natoms = data["natoms"]
75
- data_ids = data["data_ids"]
76
- forces = np.split(forces, np.cumsum(natoms)[:-1])
77
-
78
- # check for infs
79
- if len(set(np.where(np.isinf(energy))[0])) != 0:
80
- inf_energy_ids = list(set(np.where(np.isinf(energy))[0]))
81
- raise Exception(
82
- f"Inf values found in `energy` for IDs: ({inf_energy_ids[:3]}, ...)"
83
- )
84
-
85
- out_energy = []
86
- out_forces = []
87
- out_atoms = []
88
-
89
- if order is None:
90
- order = range(len(forces))
91
-
92
- for x in order:
93
- data_id = data_ids[x]
94
- if subset == "all" or data_id in OMOL_DATA_ID_MAPPING.get(subset, []):
95
- out_energy.append(energy[x])
96
- force_array = forces[x]
97
- out_forces.append(torch.tensor(force_array, dtype=torch.float32))
98
- out_atoms.append(force_array.shape[0])
99
-
100
- energy = torch.tensor(out_energy)
101
- out_forces = torch.cat(out_forces, dim=0)
102
- out_dict = {
103
- "energy": energy.float().squeeze(),
104
- "forces": out_forces,
105
- "natoms": torch.tensor(out_atoms),
106
- }
107
-
108
- return out_dict
109
-
110
-
111
  def reorder(ref: np.ndarray, to_reorder: np.ndarray) -> np.ndarray:
112
  """
113
  Get the ordering so that `to_reorder[ordering]` == ref.
@@ -146,9 +73,13 @@ def get_order(path_submission: Path, path_annotations: Path):
146
  with np.load(path_annotations, allow_pickle=True) as data:
147
  annotations_ids = data["ids"]
148
 
149
- if set(submission_ids) != set(annotations_ids):
150
- missing_ids = set(annotations_ids) - set(submission_ids)
151
- unexpected_ids = set(submission_ids) - set(annotations_ids)
 
 
 
 
152
 
153
  details = (
154
  f"{len(missing_ids)} missing IDs: ({list(missing_ids)[:3]}, ...)\n"
@@ -159,39 +90,79 @@ def get_order(path_submission: Path, path_annotations: Path):
159
  return reorder(annotations_ids, submission_ids)
160
 
161
 
162
- def extract_and_align(
163
- path_submission: Path,
164
- path_annotations: Path,
165
- subset: str,
166
- ) -> Tuple[Dict[str, torch.tensor], Dict[str, torch.tensor]]:
167
-
168
- order = get_order(path_submission, path_annotations)
169
-
170
- submission_data = npz_2_s2ef_submission(path_submission, order, subset)
171
- annotations_data = npz_2_s2ef_input(path_annotations, subset)
172
-
173
- return submission_data, annotations_data
174
-
175
-
176
  def s2ef_metrics(
177
  annotations_path: Path,
178
  submission_filename: Path,
179
  subsets: list = ["all"],
180
  ) -> Dict[str, float]:
181
- evaluator = Evaluator(task="s2ef")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
182
 
183
  metrics = {}
184
  for subset in subsets:
185
- submission_data, annotations_data = extract_and_align(
186
- submission_filename,
187
- annotations_path,
188
- subset,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
189
  )
 
 
 
 
 
 
 
190
  subset_metrics = evaluator.eval(
191
  submission_data, annotations_data, prev_metrics={}
192
  )
193
  for key in ["energy_mae", "forces_mae"]:
194
  metrics[f"{subset}_{key}"] = subset_metrics[key]["metric"]
 
195
  return metrics
196
 
197
 
@@ -204,6 +175,17 @@ def omol_evaluations(
204
  submission_data = json.load(f)
205
  with open(annotations_path) as f:
206
  annotations_data = json.load(f)
 
 
 
 
 
 
 
 
 
 
 
207
  eval_fn = OMOL_EVAL_FUNCTIONS.get(eval_type)
208
  metrics = eval_fn(annotations_data, submission_data)
209
  return metrics
 
35
  }
36
 
37
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  def reorder(ref: np.ndarray, to_reorder: np.ndarray) -> np.ndarray:
39
  """
40
  Get the ordering so that `to_reorder[ordering]` == ref.
 
73
  with np.load(path_annotations, allow_pickle=True) as data:
74
  annotations_ids = data["ids"]
75
 
76
+ # Use sets for faster comparison
77
+ submission_set = set(submission_ids)
78
+ annotations_set = set(annotations_ids)
79
+
80
+ if submission_set != annotations_set:
81
+ missing_ids = annotations_set - submission_set
82
+ unexpected_ids = submission_set - annotations_set
83
 
84
  details = (
85
  f"{len(missing_ids)} missing IDs: ({list(missing_ids)[:3]}, ...)\n"
 
90
  return reorder(annotations_ids, submission_ids)
91
 
92
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
  def s2ef_metrics(
94
  annotations_path: Path,
95
  submission_filename: Path,
96
  subsets: list = ["all"],
97
  ) -> Dict[str, float]:
98
+ eval_metrics = {
99
+ "energy": ["mae"],
100
+ "forces": ["mae"],
101
+ }
102
+ evaluator = Evaluator(eval_metrics=eval_metrics)
103
+
104
+ # Get order once for all subsets
105
+ order = get_order(submission_filename, annotations_path)
106
+
107
+ with np.load(submission_filename) as data:
108
+ forces = data["forces"]
109
+ energy = data["energy"][order]
110
+ natoms = data["natoms"]
111
+ forces = np.array(np.split(forces, np.cumsum(natoms)[:-1]), dtype=object)[order]
112
+ natoms = natoms[order]
113
+
114
+ if len(set(np.where(np.isinf(energy))[0])) != 0:
115
+ inf_energy_ids = list(set(np.where(np.isinf(energy))[0]))
116
+ raise Exception(
117
+ f"Inf values found in `energy` for IDs: ({inf_energy_ids[:3]}, ...)"
118
+ )
119
+
120
+ with np.load(annotations_path, allow_pickle=True) as data:
121
+ target_forces = data["forces"]
122
+ target_energy = data["energy"]
123
+ target_data_ids = data["data_ids"]
124
 
125
  metrics = {}
126
  for subset in subsets:
127
+ if subset == "all":
128
+ subset_mask = np.ones(len(target_data_ids), dtype=bool)
129
+ else:
130
+ allowed_ids = set(OMOL_DATA_ID_MAPPING.get(subset, []))
131
+ subset_mask = np.array(
132
+ [data_id in allowed_ids for data_id in target_data_ids]
133
+ )
134
+
135
+ sub_energy = torch.from_numpy(energy[subset_mask])
136
+ sub_forces = torch.from_numpy(np.concatenate(forces[subset_mask]))
137
+ sub_natoms = torch.from_numpy(natoms[subset_mask])
138
+
139
+ submission_data = {
140
+ "energy": sub_energy,
141
+ "forces": sub_forces,
142
+ "natoms": sub_natoms,
143
+ }
144
+
145
+ target_energy_tensor = torch.from_numpy(target_energy[subset_mask])
146
+ target_force_tensors = torch.from_numpy(
147
+ np.concatenate(target_forces[subset_mask])
148
+ )
149
+ target_natoms_tensor = torch.tensor(
150
+ [force_array.shape[0] for force_array in target_forces[subset_mask]],
151
+ dtype=torch.long,
152
  )
153
+
154
+ annotations_data = {
155
+ "energy": target_energy_tensor,
156
+ "forces": target_force_tensors,
157
+ "natoms": target_natoms_tensor,
158
+ }
159
+
160
  subset_metrics = evaluator.eval(
161
  submission_data, annotations_data, prev_metrics={}
162
  )
163
  for key in ["energy_mae", "forces_mae"]:
164
  metrics[f"{subset}_{key}"] = subset_metrics[key]["metric"]
165
+
166
  return metrics
167
 
168
 
 
175
  submission_data = json.load(f)
176
  with open(annotations_path) as f:
177
  annotations_data = json.load(f)
178
+
179
+ submission_entries = set(submission_data.keys())
180
+ annotation_entries = set(annotations_data.keys())
181
+ if submission_entries != annotation_entries:
182
+ missing = annotation_entries - submission_entries
183
+ unexpected = submission_entries - annotation_entries
184
+ raise ValueError(
185
+ f"Submission and annotations entries do not match.\n"
186
+ f"Missing entries in submission: {missing}\n"
187
+ f"Unexpected entries in submission: {unexpected}"
188
+ )
189
  eval_fn = OMOL_EVAL_FUNCTIONS.get(eval_type)
190
  metrics = eval_fn(annotations_data, submission_data)
191
  return metrics