mshuaibi commited on
Commit
925b37d
·
1 Parent(s): 48274b5

initial leaderboard build

Browse files
Files changed (6) hide show
  1. README.md +11 -6
  2. app.py +561 -0
  3. content.py +70 -0
  4. evaluator.py +238 -0
  5. requirements.txt +8 -0
  6. submit_leaderboard.py +103 -0
README.md CHANGED
@@ -1,12 +1,17 @@
1
  ---
2
- title: Fairchem Leaderboard
3
- emoji: 🐠
4
- colorFrom: pink
5
- colorTo: blue
6
  sdk: gradio
7
- sdk_version: 5.43.1
8
  app_file: app.py
9
- pinned: false
 
 
 
 
 
 
10
  ---
11
 
12
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: FAIR Chemistry Leaderboard
3
+ emoji: 🥇
4
+ colorFrom: blue
5
+ colorTo: red
6
  sdk: gradio
 
7
  app_file: app.py
8
+ pinned: true
9
+ hf_oauth: true
10
+ failure_strategy: rollback
11
+ tags:
12
+ - leaderboard
13
+ - chemistry
14
+ - molecules
15
  ---
16
 
17
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,561 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import datetime
2
+ import json
3
+ import os
4
+ import tempfile
5
+ from email.utils import parseaddr
6
+ from typing import Dict, List, Tuple, Optional
7
+
8
+ import gradio as gr
9
+ import numpy as np
10
+ import pandas as pd
11
+ from apscheduler.schedulers.background import BackgroundScheduler
12
+ from datasets import VerificationMode, load_dataset, Dataset
13
+ from huggingface_hub import HfApi, snapshot_download
14
+
15
+ from content import (
16
+ CITATION_BUTTON_LABEL,
17
+ CITATION_BUTTON_TEXT,
18
+ INTRODUCTION_TEXT,
19
+ SUBMISSION_TEXT,
20
+ PRE_COLUMN_NAMES,
21
+ POST_COLUMN_NAMES,
22
+ TITLE,
23
+ TYPES,
24
+ model_hyperlink,
25
+ )
26
+ from evaluator import evaluate
27
+
28
+ # Configuration constants
29
+ TOKEN = os.environ.get("TOKEN", None)
30
+ OWNER = "facebook"
31
+
32
+ # Dataset repositories
33
+ INTERNAL_DATA_DATASET = f"{OWNER}/fairchem_internal"
34
+ SUBMISSION_DATASET = f"{OWNER}/fairchem_leaderboard_submissions"
35
+ RESULTS_DATASET = f"{OWNER}/fairchem_leaderboard_results"
36
+ CONTACT_DATASET = f"{OWNER}/fairchem_leaderboard_contact_info_internal"
37
+ LEADERBOARD_PATH = f"{OWNER}/fairchem_leaderboard"
38
+
39
+ # Initialize HuggingFace API
40
+ api = HfApi()
41
+
42
+ # S2EF subsplits for validation and test data
43
+ S2EF_SUBSPLITS = [
44
+ "all",
45
+ "biomolecules",
46
+ "electrolytes",
47
+ "metal_complexes",
48
+ "neutral_organics",
49
+ ]
50
+
51
+ # Evaluation types that are not S2EF
52
+ OTHER_EVAL_TYPES = [
53
+ "Ligand pocket",
54
+ "Ligand strain",
55
+ "Conformers",
56
+ "Protonation",
57
+ "IE_EA",
58
+ "Distance scaling",
59
+ "Spin gap",
60
+ ]
61
+
62
+ # All evaluation types for the dropdown
63
+ ALL_EVAL_TYPES = ["Validation", "Test"] + OTHER_EVAL_TYPES
64
+
65
+
66
+ class LeaderboardData:
67
+ """
68
+ Manages leaderboard data loading and processing.
69
+ """
70
+
71
+ def __init__(self):
72
+ self._setup_data_paths()
73
+ self._load_contact_info()
74
+
75
+ def _setup_data_paths(self):
76
+ """
77
+ Setup target and result file paths.
78
+ """
79
+ target_data_dir = snapshot_download(
80
+ repo_id=INTERNAL_DATA_DATASET,
81
+ repo_type="dataset",
82
+ token=TOKEN,
83
+ )
84
+
85
+ self.target_paths = {
86
+ "Validation": f"{target_data_dir}/omol_val_labels.npz",
87
+ "Test": f"{target_data_dir}/omol_test_labels.npz",
88
+ "Distance Scaling": f"{target_data_dir}/distance_scaling_labels.json",
89
+ "Ligand pocket": f"{target_data_dir}/ligand_pocket_labels.json",
90
+ "Ligand strain": f"{target_data_dir}/ligand_strain_labels.json",
91
+ "Conformers": f"{target_data_dir}/geom_conformers_labels.json",
92
+ "Protonation": f"{target_data_dir}/protonation_energies_labels.json",
93
+ "IE_EA": f"{target_data_dir}/unoptimized_ie_ea_labels.json",
94
+ "Distance scaling": f"{target_data_dir}/distance_scaling_labels.json",
95
+ "Spin gap": f"{target_data_dir}/unoptimized_spin_gap_labels.json",
96
+ }
97
+
98
+ self.result_paths = {
99
+ "Validation": "validation_s2ef.parquet",
100
+ "Test": "test_s2ef.parquet",
101
+ "Ligand pocket": "ligand_pocket.parquet",
102
+ "Ligand strain": "ligand_strain.parquet",
103
+ "Conformers": "geom_conformers.parquet",
104
+ "Protonation": "protonation.parquet",
105
+ "IE_EA": "ie_ea.parquet",
106
+ "Distance scaling": "distance_scaling.parquet",
107
+ "Spin gap": "spin_gap.parquet",
108
+ }
109
+
110
+ def _load_contact_info(self):
111
+ """
112
+ Load contact information dataset.
113
+ """
114
+ self.contact_infos = load_dataset(
115
+ CONTACT_DATASET,
116
+ token=TOKEN,
117
+ download_mode="force_redownload",
118
+ verification_mode=VerificationMode.NO_CHECKS,
119
+ )
120
+
121
+ def load_eval_data(self) -> Tuple[Dict, Dict[str, pd.DataFrame]]:
122
+ """
123
+ Load all evaluation data and return results and dataframes.
124
+ """
125
+ # Load S2EF results
126
+ s2ef_results = load_dataset(
127
+ RESULTS_DATASET,
128
+ token=TOKEN,
129
+ download_mode="force_redownload",
130
+ verification_mode=VerificationMode.NO_CHECKS,
131
+ data_files={
132
+ "Validation": os.path.join("data", self.result_paths["Validation"]),
133
+ "Test": os.path.join("data", self.result_paths["Test"]),
134
+ },
135
+ )
136
+ eval_results = dict(s2ef_results)
137
+
138
+ # Load other evaluation types
139
+ for eval_type in OTHER_EVAL_TYPES:
140
+ eval_type_data = load_dataset(
141
+ RESULTS_DATASET,
142
+ token=TOKEN,
143
+ download_mode="force_redownload",
144
+ verification_mode=VerificationMode.NO_CHECKS,
145
+ data_files={"data": os.path.join("data", self.result_paths[eval_type])},
146
+ )
147
+ eval_results[eval_type] = eval_type_data["data"]
148
+
149
+ # Generate result dataframes
150
+ results_dfs = {}
151
+
152
+ # S2EF dataframes
153
+ for split in ["Validation", "Test"]:
154
+ for subsplit in S2EF_SUBSPLITS:
155
+ df_key = f"{split}_{subsplit}"
156
+ results_dfs[df_key] = self._get_s2ef_df_from_results(
157
+ eval_results, split, subsplit
158
+ )
159
+
160
+ # Other evaluation dataframes
161
+ for split in OTHER_EVAL_TYPES:
162
+ results_dfs[split] = self._get_eval_df_from_results(eval_results, split)
163
+
164
+ return eval_results, results_dfs
165
+
166
+ def _get_s2ef_df_from_results(
167
+ self, eval_results: Dict, split: str, subsplit: str
168
+ ) -> pd.DataFrame:
169
+ """
170
+ Generate S2EF dataframe from evaluation results.
171
+ """
172
+ local_df = eval_results[split]
173
+ local_df = local_df.map(
174
+ lambda row: {"Model": model_hyperlink(row["url"], row["Model"])}
175
+ )
176
+ filtered_columns = (
177
+ PRE_COLUMN_NAMES
178
+ + [f"{subsplit}_energy_mae", f"{subsplit}_forces_mae"]
179
+ + POST_COLUMN_NAMES
180
+ )
181
+ df = pd.DataFrame(local_df)
182
+ avail_columns = list(df.columns)
183
+ missing_columns = list(set(filtered_columns) - set(avail_columns))
184
+ df[missing_columns] = "-"
185
+
186
+ df = df[filtered_columns].round(4)
187
+ # Unit conversion
188
+ for col in df.columns:
189
+ if "mae" in col.lower():
190
+ df[col] = (df[col] * 1000).round(2)
191
+ df = df.sort_values(by=[f"{subsplit}_energy_mae"], ascending=True)
192
+ df[f"{subsplit}_energy_mae"] = df[f"{subsplit}_energy_mae"]
193
+ df[f"{subsplit}_forces_mae"] = df[f"{subsplit}_forces_mae"]
194
+ df = df.rename(
195
+ columns={
196
+ f"{subsplit}_energy_mae": "Energy MAE [meV]",
197
+ f"{subsplit}_forces_mae": "Forces MAE [meV/Å]",
198
+ }
199
+ )
200
+ return df
201
+
202
+ def _get_eval_df_from_results(self, eval_results: Dict, split: str) -> pd.DataFrame:
203
+ """
204
+ Generate evaluation dataframe from results.
205
+ """
206
+ local_df = eval_results[split]
207
+ local_df = local_df.map(
208
+ lambda row: {"Model": model_hyperlink(row["url"], row["Model"])}
209
+ )
210
+ eval_columns = LEADERBOARD_COLUMNS[split]
211
+ filtered_columns = PRE_COLUMN_NAMES + eval_columns + POST_COLUMN_NAMES
212
+ df = pd.DataFrame(local_df)
213
+ avail_columns = list(df.columns)
214
+ missing_columns = list(set(filtered_columns) - set(avail_columns))
215
+ df[missing_columns] = "-"
216
+
217
+ df = df[filtered_columns].round(4)
218
+ # Unit conversion
219
+ for col in df.columns:
220
+ if "mae" in col.lower():
221
+ df[col] = (df[col] * 1000).round(2)
222
+ df = df.sort_values(by=[eval_columns[0]], ascending=True)
223
+ df = df.rename(columns=COLUMN_MAPPING)
224
+ return df
225
+
226
+
227
+ leaderboard_data = LeaderboardData()
228
+
229
+ # Column configurations for different evaluation types
230
+ LEADERBOARD_COLUMNS = {
231
+ "Ligand pocket": ["interaction_energy_mae", "interaction_forces_mae"],
232
+ "Ligand strain": ["strain_energy_mae", "global_min_rmsd"],
233
+ "Conformers": ["deltaE_mae", "ensemble_rmsd"],
234
+ "Protonation": ["deltaE_mae", "rmsd"],
235
+ "IE_EA": ["deltaE_mae", "deltaF_mae"],
236
+ "Distance scaling": ["lr_ddE_mae", "lr_ddF_mae", "sr_ddE_mae", "sr_ddF_mae"],
237
+ "Spin gap": ["deltaE_mae", "deltaF_mae"],
238
+ }
239
+
240
+ COLUMN_MAPPING = {
241
+ "interaction_energy_mae": "Ixn Energy MAE [meV]",
242
+ "interaction_forces_mae": "Ixn Forces MAE [meV/Å]",
243
+ "strain_energy_mae": "Strain Energy MAE [meV]",
244
+ "deltaE_mae": "\u0394Energy MAE [meV]",
245
+ "deltaF_mae": "\u0394Forces MAE [meV/Å]",
246
+ "ensemble_rmsd": "RMSD [Å]",
247
+ "global_min_rmsd": "RMSD [Å]",
248
+ "rmsd": "RMSD [Å]",
249
+ "lr_ddE_mae": "\u0394Energy (LR) MAE [meV]",
250
+ "lr_ddF_mae": "\u0394Forces (LR) MAE [meV/Å]",
251
+ "sr_ddE_mae": "\u0394Energy (SR) MAE [meV]",
252
+ "sr_ddF_mae": "\u0394Forces (SR) MAE [meV/Å]",
253
+ }
254
+
255
+
256
+ def add_new_eval(
257
+ path_to_file: str,
258
+ eval_type: str,
259
+ organization: str,
260
+ model: str,
261
+ url: str,
262
+ mail: str,
263
+ training_set: str,
264
+ additional_info: str,
265
+ profile: gr.OAuthProfile,
266
+ ) -> str:
267
+ """Add a new evaluation to the leaderboard."""
268
+ print(f"Adding new eval of type: {eval_type}")
269
+ try:
270
+ # Validate email address
271
+ _, parsed_mail = parseaddr(mail)
272
+ if "@" not in parsed_mail:
273
+ yield "⚠️ Please provide a valid email address."
274
+ return
275
+
276
+ # Check monthly submission limit (5 submissions per month)
277
+ contact_key = eval_type.replace(" ", "_")
278
+ user_submission_dates = sorted(
279
+ row["date"]
280
+ for row in leaderboard_data.contact_infos.get(contact_key, [])
281
+ if row["username"] == profile.username
282
+ )
283
+
284
+ current_month = datetime.datetime.now().strftime("%Y-%m")
285
+ current_month_submissions = [
286
+ date for date in user_submission_dates if date.startswith(current_month)
287
+ ]
288
+
289
+ if len(current_month_submissions) >= 5:
290
+ yield f"⚠️ You have reached the monthly submission limit of 5 submissions. Please try again next month."
291
+ return
292
+
293
+ # Validate file submission
294
+ if path_to_file is None:
295
+ yield "⚠️ Please upload a file."
296
+ return
297
+
298
+ if not (path_to_file.endswith(".npz") or path_to_file.endswith(".json")):
299
+ yield "⚠️ Please submit a valid npz or json file"
300
+ return
301
+
302
+ # Evaluate the submission
303
+ yield "⚙️ Evaluating your submission..."
304
+ metrics = evaluate(
305
+ leaderboard_data.target_paths[eval_type],
306
+ path_to_file,
307
+ eval_type,
308
+ )
309
+
310
+ submission_time = datetime.datetime.today().strftime("%Y-%m-%d-%H:%M")
311
+
312
+ # Upload submission file
313
+ yield "☁️ Uploading submission file..."
314
+ api.upload_file(
315
+ repo_id=SUBMISSION_DATASET,
316
+ path_or_fileobj=path_to_file,
317
+ path_in_repo=f"{organization}/{model}/submissions/{training_set}/{eval_type}_{submission_time}_{os.path.basename(path_to_file)}",
318
+ repo_type="dataset",
319
+ token=TOKEN,
320
+ )
321
+
322
+ # Update leaderboard data
323
+ yield "📋 Updating leaderboard data..."
324
+ eval_results, _ = leaderboard_data.load_eval_data()
325
+ eval_entry = {
326
+ "Model": model,
327
+ "Organization": organization,
328
+ "Submission date": submission_time,
329
+ "Training Set": training_set,
330
+ "Notes": additional_info,
331
+ "url": url,
332
+ }
333
+ eval_entry.update(metrics)
334
+
335
+ if eval_type not in eval_results:
336
+ eval_results[eval_type] = Dataset.from_dict(
337
+ {k: [v] for k, v in eval_entry.items()}
338
+ )
339
+ else:
340
+ eval_results[eval_type] = eval_results[eval_type].add_item(eval_entry)
341
+
342
+ data_file_name = leaderboard_data.result_paths[eval_type]
343
+
344
+ # Upload results
345
+ yield "💾 Saving results to database..."
346
+ with tempfile.NamedTemporaryFile(suffix=".parquet") as tmp_file:
347
+ eval_results[eval_type].to_parquet(tmp_file.name)
348
+ api.upload_file(
349
+ repo_id=RESULTS_DATASET,
350
+ path_or_fileobj=tmp_file.name,
351
+ path_in_repo=f"data/{data_file_name}",
352
+ repo_type="dataset",
353
+ token=TOKEN,
354
+ )
355
+
356
+ # Save contact information
357
+ contact_info = {
358
+ "model": model,
359
+ "organization": organization,
360
+ "username": profile.username,
361
+ "email": mail,
362
+ "date": submission_time,
363
+ }
364
+
365
+ if contact_key not in leaderboard_data.contact_infos:
366
+ leaderboard_data.contact_infos[contact_key] = Dataset.from_dict(
367
+ {k: [v] for k, v in contact_info.items()}
368
+ )
369
+ else:
370
+ leaderboard_data.contact_infos[contact_key] = (
371
+ leaderboard_data.contact_infos[contact_key].add_item(contact_info)
372
+ )
373
+
374
+ leaderboard_data.contact_infos.push_to_hub(CONTACT_DATASET, token=TOKEN)
375
+
376
+ success_str = f"✅ Model {model} is successfully evaluated and stored in our database.\nPlease wait an hour and refresh the leaderboard to see your results displayed."
377
+ yield success_str
378
+
379
+ except Exception as e:
380
+ print(f"Error during submission: {e}")
381
+ yield (
382
+ f"An error occurred, please open a discussion and indicate at what time you encountered the error.\n{e}"
383
+ )
384
+
385
+
386
+ def create_dataframe_tab(
387
+ tab_name: str, df: pd.DataFrame, datatype: List[str] = None
388
+ ) -> gr.Tab:
389
+ """
390
+ Create a tab with a dataframe.
391
+ """
392
+ if datatype is None:
393
+ datatype = TYPES
394
+
395
+ with gr.Tab(tab_name) as tab:
396
+ gr.Dataframe(
397
+ value=df,
398
+ datatype=datatype,
399
+ interactive=False,
400
+ column_widths=["20%"],
401
+ )
402
+ return tab
403
+
404
+
405
+ def create_s2ef_tabs(split: str, results_dfs: Dict[str, pd.DataFrame]) -> None:
406
+ """
407
+ Create S2EF tabs for a given split (Validation/Test).
408
+ """
409
+ subsplit_names = {
410
+ "all": "All",
411
+ "biomolecules": "Biomolecules",
412
+ "electrolytes": "Electrolytes",
413
+ "metal_complexes": "Metal Complexes",
414
+ "neutral_organics": "Neutral Organics",
415
+ }
416
+
417
+ for subsplit, display_name in subsplit_names.items():
418
+ df_key = f"{split}_{subsplit}"
419
+ create_dataframe_tab(display_name, results_dfs[df_key])
420
+
421
+
422
+ def create_evaluation_tabs(results_dfs: Dict[str, pd.DataFrame]) -> None:
423
+ """
424
+ Create evaluation tabs for non-S2EF evaluations.
425
+ """
426
+ eval_datatype = ["markdown", "markdown", "number", "str"]
427
+
428
+ for eval_type in OTHER_EVAL_TYPES:
429
+ display_name = "IE/EA" if eval_type == "IE_EA" else eval_type
430
+ create_dataframe_tab(display_name, results_dfs[eval_type], eval_datatype)
431
+
432
+
433
+ def create_submission_interface() -> Tuple[gr.components.Component, ...]:
434
+ """
435
+ Create the submission interface components.
436
+ """
437
+ with gr.Accordion("Submit predictions"):
438
+ with gr.Row():
439
+ gr.Markdown(SUBMISSION_TEXT, elem_classes="markdown-text")
440
+ with gr.Row():
441
+ with gr.Column():
442
+ model_name_textbox = gr.Textbox(label="Model name")
443
+ model_url = gr.Textbox(label="Model/Paper URL")
444
+ dataset = gr.Dropdown(
445
+ choices=["OMol-All", "OMol-4M", "UMA-459M", "Other"],
446
+ label="Training set",
447
+ interactive=True,
448
+ )
449
+ additional_info = gr.Textbox(
450
+ label="Additional info (cutoff radius, # of params, etc.)"
451
+ )
452
+ organization = gr.Textbox(label="Organization")
453
+ mail = gr.Textbox(
454
+ label="Contact email (will be stored privately, & used if there is an issue with your submission)"
455
+ )
456
+ with gr.Column():
457
+ file_output = gr.File()
458
+ with gr.Row():
459
+ eval_type = gr.Dropdown(
460
+ choices=ALL_EVAL_TYPES,
461
+ label="Eval Type",
462
+ interactive=True,
463
+ )
464
+ with gr.Column():
465
+ gr.LoginButton()
466
+ with gr.Column():
467
+ submit_button = gr.Button("Submit Eval")
468
+ submission_result = gr.Textbox(label="Status")
469
+
470
+ return (
471
+ submit_button,
472
+ file_output,
473
+ eval_type,
474
+ organization,
475
+ model_name_textbox,
476
+ model_url,
477
+ mail,
478
+ dataset,
479
+ additional_info,
480
+ submission_result,
481
+ )
482
+
483
+
484
+ def create_interface() -> gr.Blocks:
485
+ """
486
+ Create the complete Gradio interface.
487
+ """
488
+ # Load data
489
+ _, results_dfs = leaderboard_data.load_eval_data()
490
+
491
+ demo = gr.Blocks()
492
+ with demo:
493
+ gr.HTML(TITLE)
494
+ gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
495
+
496
+ # Citation section
497
+ with gr.Row():
498
+ with gr.Accordion("📙 Citation", open=False):
499
+ gr.Markdown(CITATION_BUTTON_LABEL)
500
+ gr.Markdown(CITATION_BUTTON_TEXT)
501
+
502
+ # S2EF Results tabs
503
+ with gr.Tab("Test"):
504
+ create_s2ef_tabs("Test", results_dfs)
505
+
506
+ with gr.Tab("Validation"):
507
+ create_s2ef_tabs("Validation", results_dfs)
508
+
509
+ # Evaluation results
510
+ gr.Markdown("## Evaluations", elem_classes="markdown-text")
511
+ with gr.Row():
512
+ create_evaluation_tabs(results_dfs)
513
+
514
+ (
515
+ submit_button,
516
+ file_output,
517
+ eval_type,
518
+ organization,
519
+ model_name_textbox,
520
+ model_url,
521
+ mail,
522
+ dataset,
523
+ additional_info,
524
+ submission_result,
525
+ ) = create_submission_interface()
526
+
527
+ submit_button.click(
528
+ add_new_eval,
529
+ [
530
+ file_output,
531
+ eval_type,
532
+ organization,
533
+ model_name_textbox,
534
+ model_url,
535
+ mail,
536
+ dataset,
537
+ additional_info,
538
+ ],
539
+ submission_result,
540
+ )
541
+
542
+ return demo
543
+
544
+
545
+ def restart_space():
546
+ api.restart_space(repo_id=LEADERBOARD_PATH, token=TOKEN)
547
+
548
+
549
+ def main():
550
+ demo = create_interface()
551
+
552
+ scheduler = BackgroundScheduler()
553
+ scheduler.add_job(restart_space, "interval", seconds=3600)
554
+ scheduler.start()
555
+
556
+ # Launch the demo
557
+ demo.launch(debug=True, share=True)
558
+
559
+
560
+ if __name__ == "__main__":
561
+ main()
content.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # HTML title for the application
2
+ TITLE = """<h1 align="center" id="space-title">FAIR Chemistry Leaderboard</h1>"""
3
+
4
+ # Main introduction text
5
+ INTRODUCTION_TEXT = """
6
+ ## Welcome!
7
+
8
+ This space will host the FAIR Chemistry team's series of leaderboards across the different chemical domains, e.g. molecules, catalysts, materials.
9
+ Leaderboards previously hosted on EvalAI ([OC20](https://eval.ai/web/challenges/challenge-page/712/overview)) will also be migrated here in the future.
10
+
11
+
12
+ ### 🧬 OMol25
13
+ This leaderboard showcases the performance of various machine learning interatomic potentials (MLIP) on the Open Molecules 2025 (OMol25) dataset.
14
+ OMol25 represents a diverse, high-quality dataset uniquely blending elemental, chemical, and structural diversity.
15
+
16
+ For more details about the dataset and evaluation metrics, please refer to our [paper](https://arxiv.org/pdf/2505.08762).
17
+
18
+ #### Evaluation Categories:
19
+ - **S2EF (Structure to Energy and Forces)**: Test and Validation splits across different molecular categories
20
+ - **Specialized Evaluations**: Practically relevant chemistry tasks to evaluate models beyond just S2EF metrics (i.e. ligand-strain, spin gap, etc.)
21
+
22
+ For details on how to generate prediction files for submission, please refer to the documentation provided [here](https://fair-chem.github.io/molecules/leaderboard.html).
23
+ """
24
+
25
+ # Submission instructions
26
+ SUBMISSION_TEXT = """
27
+ ## How to Submit
28
+
29
+ To submit your model predictions:
30
+
31
+ 1. **Prepare your predictions** in the required format (NPZ for S2EF tasks, JSON for other evaluations)
32
+ 2. **Fill in the model information** including name, organization, and contact details
33
+ 3. **Select the evaluation type** that matches your prediction file
34
+ 4. **Upload your file** and click Submit
35
+
36
+ **Important Notes:**
37
+ - Ensure your prediction file format matches the expected format for the selected evaluation
38
+ - Your email will be stored privately and only used for communication regarding your submission
39
+ - Results will appear on the leaderboard after successful validation
40
+ - Remain on the page until you see the "Success" message.
41
+ - S2EF evaluations can take 10-20 minutes, the other evaluations happen in a few minutes. Please be patient.
42
+
43
+ This leaderboard is actively being developed and we are always open to feedback. If you run into any issues or have a question please
44
+ reach out to us at our Github [page](https://github.com/facebookresearch/fairchem) or the [leaderboard discussion forum](https://huggingface.co/spaces/facebook/fairchem_leaderboard/discussions).
45
+ """
46
+
47
+ # Citation information
48
+ CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
49
+ CITATION_BUTTON_TEXT = r"""
50
+ ```latex
51
+ @article{levine2025open,
52
+ title={The open molecules 2025 (omol25) dataset, evaluations, and models},
53
+ author={Levine, Daniel S and Shuaibi, Muhammed and Spotte-Smith, Evan Walter Clark and Taylor, Michael G and Hasyim, Muhammad R and Michel, Kyle and Batatia, Ilyes and Cs{'a}nyi, G{'a}bor and Dzamba, Misko and Eastman, Peter and others},
54
+ journal={arXiv preprint arXiv:2505.08762},
55
+ year={2025}
56
+ }
57
+ ```
58
+ """
59
+
60
+ # Table configuration
61
+ PRE_COLUMN_NAMES = ["Model", "Organization", "Training Set"]
62
+ POST_COLUMN_NAMES = ["Submission date"]
63
+ TYPES = ["markdown", "markdown", "str", "number", "number", "str"]
64
+
65
+
66
+ def model_hyperlink(link: str, model_name: str) -> str:
67
+ """Create a hyperlink for model names in the leaderboard."""
68
+ if not link or link.strip() == "":
69
+ return model_name
70
+ return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
evaluator.py ADDED
@@ -0,0 +1,238 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ from pathlib import Path
3
+ from typing import Dict, List, Tuple
4
+
5
+ import numpy as np
6
+ import torch
7
+ import json
8
+ from fairchem.core.modules.evaluator import Evaluator
9
+
10
+ from fairchem.data.omol.modules.evaluator import (
11
+ ligand_pocket,
12
+ ligand_strain,
13
+ geom_conformers,
14
+ protonation_energies,
15
+ unoptimized_ie_ea,
16
+ distance_scaling,
17
+ unoptimized_spin_gap,
18
+ )
19
+
20
+ OMOL_EVAL_FUNCTIONS = {
21
+ "Ligand pocket": ligand_pocket,
22
+ "Ligand strain": ligand_strain,
23
+ "Conformers": geom_conformers,
24
+ "Protonation": protonation_energies,
25
+ "IE_EA": unoptimized_ie_ea,
26
+ "Distance scaling": distance_scaling,
27
+ "Spin gap": unoptimized_spin_gap,
28
+ }
29
+
30
+ OMOL_DATA_ID_MAPPING = {
31
+ "metal_complexes": ["metal_complexes"],
32
+ "electrolytes": ["elytes"],
33
+ "biomolecules": ["biomolecules"],
34
+ "neutral_organics": ["ani2x", "orbnet_denali", "geom_orca6", "trans1x", "rgd"],
35
+ }
36
+
37
+
38
+ def npz_2_s2ef_input(npz_input_file: Path, subset: str) -> Dict[str, torch.tensor]:
39
+ with np.load(npz_input_file, allow_pickle=True) as data:
40
+ forces = data["forces"]
41
+ energy = data["energy"]
42
+ data_ids = np.array(data["data_ids"])
43
+
44
+ out_energy = []
45
+ out_forces = []
46
+ out_atoms = []
47
+
48
+ order = range(len(forces))
49
+ for x in order:
50
+ data_id = data_ids[x]
51
+ if subset == "all" or data_id in OMOL_DATA_ID_MAPPING.get(subset, []):
52
+ out_energy.append(energy[x])
53
+ force_array = forces[x]
54
+ out_forces.append(torch.tensor(force_array, dtype=torch.float32))
55
+ out_atoms.append(len(force_array))
56
+
57
+ energy = torch.tensor(out_energy)
58
+ out_forces = torch.cat(out_forces, dim=0)
59
+ out_dict = {
60
+ "energy": energy.float(),
61
+ "forces": out_forces,
62
+ "natoms": torch.tensor(out_atoms),
63
+ }
64
+
65
+ return out_dict
66
+
67
+
68
+ def npz_2_s2ef_submission(
69
+ npz_input_file: Path, order: List[int], subset: str = "All"
70
+ ) -> Dict[str, torch.tensor]:
71
+ with np.load(npz_input_file) as data:
72
+ forces = data["forces"]
73
+ energy = data["energy"]
74
+ natoms = data["natoms"]
75
+ data_ids = data["data_ids"]
76
+ forces = np.split(forces, np.cumsum(natoms)[:-1])
77
+
78
+ # check for infs
79
+ if len(set(np.where(np.isinf(energy))[0])) != 0:
80
+ inf_energy_ids = list(set(np.where(np.isinf(energy))[0]))
81
+ raise Exception(
82
+ f"Inf values found in `energy` for IDs: ({inf_energy_ids[:3]}, ...)"
83
+ )
84
+
85
+ out_energy = []
86
+ out_forces = []
87
+ out_atoms = []
88
+
89
+ if order is None:
90
+ order = range(len(forces))
91
+
92
+ for x in order:
93
+ data_id = data_ids[x]
94
+ if subset == "all" or data_id in OMOL_DATA_ID_MAPPING.get(subset, []):
95
+ out_energy.append(energy[x])
96
+ force_array = forces[x]
97
+ out_forces.append(torch.tensor(force_array, dtype=torch.float32))
98
+ out_atoms.append(force_array.shape[0])
99
+
100
+ energy = torch.tensor(out_energy)
101
+ out_forces = torch.cat(out_forces, dim=0)
102
+ out_dict = {
103
+ "energy": energy.float().squeeze(),
104
+ "forces": out_forces,
105
+ "natoms": torch.tensor(out_atoms),
106
+ }
107
+
108
+ return out_dict
109
+
110
+
111
+ def reorder(ref: np.ndarray, to_reorder: np.ndarray) -> np.ndarray:
112
+ """
113
+ Get the ordering so that `to_reorder[ordering]` == ref.
114
+
115
+ eg:
116
+ ref = [c, a, b]
117
+ to_reorder = [b, a, c]
118
+ order = reorder(ref, to_reorder) # [2, 1, 0]
119
+ assert ref == to_reorder[order]
120
+
121
+ Parameters
122
+ ----------
123
+ ref : np.ndarray
124
+ Reference array. Must not contains duplicates.
125
+ to_reorder : np.ndarray
126
+ Array to re-order. Must not contains duplicates.
127
+ Items must be the same as in `ref`.
128
+
129
+ Returns
130
+ -------
131
+ np.ndarray
132
+ the ordering to apply on `to_reorder`
133
+ """
134
+ assert len(ref) == len(set(ref))
135
+ assert len(to_reorder) == len(set(to_reorder))
136
+ assert set(ref) == set(to_reorder)
137
+ item_to_idx = {item: idx for idx, item in enumerate(to_reorder)}
138
+ return np.array([item_to_idx[item] for item in ref])
139
+
140
+
141
+ def get_order(path_submission: Path, path_annotations: Path):
142
+
143
+ with np.load(path_submission) as data:
144
+ submission_ids = data["ids"]
145
+
146
+ with np.load(path_annotations, allow_pickle=True) as data:
147
+ annotations_ids = data["ids"]
148
+
149
+ if set(submission_ids) != set(annotations_ids):
150
+ missing_ids = set(annotations_ids) - set(submission_ids)
151
+ unexpected_ids = set(submission_ids) - set(annotations_ids)
152
+
153
+ details = (
154
+ f"{len(missing_ids)} missing IDs: ({list(missing_ids)[:3]}, ...)\n"
155
+ f"{len(unexpected_ids)} unexpected IDs: ({list(unexpected_ids)[:3]}, ...)"
156
+ )
157
+ raise Exception(f"IDs don't match.\n{details}")
158
+
159
+ return reorder(annotations_ids, submission_ids)
160
+
161
+
162
+ def extract_and_align(
163
+ path_submission: Path,
164
+ path_annotations: Path,
165
+ subset: str,
166
+ ) -> Tuple[Dict[str, torch.tensor], Dict[str, torch.tensor]]:
167
+
168
+ order = get_order(path_submission, path_annotations)
169
+
170
+ submission_data = npz_2_s2ef_submission(path_submission, order, subset)
171
+ annotations_data = npz_2_s2ef_input(path_annotations, subset)
172
+
173
+ return submission_data, annotations_data
174
+
175
+
176
+ def s2ef_metrics(
177
+ annotations_path: Path,
178
+ submission_filename: Path,
179
+ subsets: list = ["all"],
180
+ ) -> Dict[str, float]:
181
+ evaluator = Evaluator(task="s2ef")
182
+
183
+ metrics = {}
184
+ for subset in subsets:
185
+ submission_data, annotations_data = extract_and_align(
186
+ submission_filename,
187
+ annotations_path,
188
+ subset,
189
+ )
190
+ subset_metrics = evaluator.eval(
191
+ submission_data, annotations_data, prev_metrics={}
192
+ )
193
+ for key in ["energy_mae", "forces_mae"]:
194
+ metrics[f"{subset}_{key}"] = subset_metrics[key]["metric"]
195
+ return metrics
196
+
197
+
198
+ def omol_evaluations(
199
+ annotations_path: Path,
200
+ submission_filename: Path,
201
+ eval_type: str,
202
+ ) -> Dict[str, float]:
203
+ with open(submission_filename) as f:
204
+ submission_data = json.load(f)
205
+ with open(annotations_path) as f:
206
+ annotations_data = json.load(f)
207
+ eval_fn = OMOL_EVAL_FUNCTIONS.get(eval_type)
208
+ metrics = eval_fn(annotations_data, submission_data)
209
+ return metrics
210
+
211
+
212
+ def evaluate(
213
+ annotations_path: Path,
214
+ submission_filename: Path,
215
+ eval_type: str,
216
+ ):
217
+ if eval_type in ["Validation", "Test"]:
218
+ metrics = s2ef_metrics(
219
+ annotations_path,
220
+ submission_filename,
221
+ subsets=[
222
+ "all",
223
+ "metal_complexes",
224
+ "electrolytes",
225
+ "biomolecules",
226
+ "neutral_organics",
227
+ ],
228
+ )
229
+ elif eval_type in OMOL_EVAL_FUNCTIONS:
230
+ metrics = omol_evaluations(
231
+ annotations_path,
232
+ submission_filename,
233
+ eval_type,
234
+ )
235
+ else:
236
+ raise ValueError(f"Unknown eval_type: {eval_type}")
237
+
238
+ return metrics
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ datasets
2
+ gradio
3
+ huggingface-hub
4
+ numpy
5
+ pandas
6
+ APScheduler
7
+ fairchem-core
8
+ git+https://github.com/facebookresearch/fairchem.git#subdirectory=packages/fairchem-data-omol
submit_leaderboard.py ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from app import add_new_eval, LeaderboardData
2
+ from pathlib import Path
3
+ import gradio as gr
4
+ import os
5
+
6
+ # Create a mock profile for testing
7
+ class MockProfile:
8
+ def __init__(self, username):
9
+ self.username = username
10
+
11
+ mock_profile = MockProfile("mshuaibi_test")
12
+
13
+ evals = {
14
+ # "IE_EA": "unoptimized_ie_ea_results.json",
15
+ # "Ligand pocket": "pdb_pocket_results.json",
16
+ "Ligand strain": "ligand_strain_results.json",
17
+ # "Conformers": "geom_conformers_results.json",
18
+ # "Protonation": "protonation_energies_results.json",
19
+ # "Distance scaling": "distance_scaling_results.json",
20
+ # "Spin gap": "unoptimized_spin_gap_results.json",
21
+ # "Validation": "val_predictions.npz",
22
+ # "Test": "test_predictions.npz"
23
+ }
24
+
25
+ models = {
26
+ # "esen-s-c-4M": {
27
+ # "name": "eSEN-sm-cons.",
28
+ # "dataset_size": "OMol-4M",
29
+ # "results_dir": "/large_experiments/opencatalyst/foundation_models/data/omol/leaderboard/predictions/043025_esen_sm_conserving_4M",
30
+ # "paper_link": "https://arxiv.org/pdf/2505.08762",
31
+ # },
32
+ # "esen-s-c-All": {
33
+ # "name": "eSEN-sm-cons.",
34
+ # "dataset_size": "OMol-All",
35
+ # "results_dir": "/large_experiments/opencatalyst/foundation_models/data/omol/leaderboard/predictions/043025_esen_sm_conserving_all",
36
+ # "paper_link": "https://arxiv.org/pdf/2505.08762",
37
+ # },
38
+ # "esen-m-d-4M": {
39
+ # "name": "eSEN-md-d.",
40
+ # "dataset_size": "OMol-4M",
41
+ # "results_dir": "/large_experiments/opencatalyst/foundation_models/data/omol/leaderboard/predictions/043025_esen_md_direct_4M_finetune",
42
+ # "paper_link": "https://arxiv.org/pdf/2505.08762",
43
+ # },
44
+ # "esen-m-d-All": {
45
+ # "name": "eSEN-md-d.",
46
+ # "dataset_size": "OMol-All",
47
+ # "results_dir": "/large_experiments/opencatalyst/foundation_models/data/omol/leaderboard/predictions/043025_esen_md_direct_all_finetune",
48
+ # "paper_link": "https://arxiv.org/pdf/2505.08762",
49
+ # },
50
+ # "goc-4M": {
51
+ # "name": "GemNet-OC",
52
+ # "dataset_size": "OMol-4M",
53
+ # "results_dir": "/large_experiments/opencatalyst/foundation_models/data/omol/leaderboard/predictions/043025_gemnet_oc_4M",
54
+ # "paper_link": "https://arxiv.org/pdf/2505.08762",
55
+ # },
56
+ # "goc-All": {
57
+ # "name": "GemNet-OC",
58
+ # "dataset_size": "OMol-All",
59
+ # "results_dir": "/large_experiments/opencatalyst/foundation_models/data/omol/leaderboard/predictions/050325_gemnet_oc_all",
60
+ # "paper_link": "https://arxiv.org/pdf/2505.08762",
61
+ # },
62
+ # "uma-s-1p1": {
63
+ # "name": "UMA-S-1p1",
64
+ # "dataset_size": "UMA-459M",
65
+ # "results_dir": "/large_experiments/opencatalyst/foundation_models/data/omol/leaderboard/predictions/uma_sm_1p1",
66
+ # "paper_link": "https://arxiv.org/pdf/2506.23971",
67
+ # },
68
+ # "uma-m-1p1": {
69
+ # "name": "UMA-M-1p1",
70
+ # "dataset_size": "UMA-459M",
71
+ # "results_dir": "/large_experiments/opencatalyst/foundation_models/data/omol/leaderboard/predictions/uma_md_1p1",
72
+ # "paper_link": "https://arxiv.org/pdf/2506.23971",
73
+ # },
74
+ "mace": {
75
+ "name": "mace-omol-L-0",
76
+ "dataset_size": "OMol-All",
77
+ "results_dir": "/large_experiments/opencatalyst/foundation_models/data/omol/leaderboard/predictions/mace",
78
+ "paper_link": "https://github.com/ACEsuit/mace/releases/tag/v0.3.14",
79
+ "org": "MACE-Cambridge"
80
+ },
81
+ }
82
+
83
+ for model, model_info in models.items():
84
+ model_name = model_info["name"]
85
+ dataset_size = model_info["dataset_size"]
86
+ results_dir = model_info["results_dir"]
87
+ paper_link = model_info["paper_link"]
88
+ org = model_info.get("org", "Meta")
89
+
90
+ for _eval, eval_path in evals.items():
91
+ generator = add_new_eval(
92
+ path_to_file=os.path.join(results_dir, eval_path),
93
+ eval_type=_eval,
94
+ organization=org,
95
+ model=model_name,
96
+ url=paper_link,
97
+ mail="mshuaibi@meta.com",
98
+ training_set=dataset_size,
99
+ additional_info="",
100
+ profile=mock_profile,
101
+ )
102
+ for i in generator:
103
+ print(i)