abdev-leaderboard

Running

App Files Files Community

loodvanniekerkginkgo commited on Aug 21

Commit

58db0a0

1 Parent(s): d4cc92c

Removing old sterralator code, removed extra print statement

Browse files

Files changed (4) hide show

about.py +5 -4
eval.py +0 -0
evaluation.py +0 -28
utils.py +2 -73

about.py CHANGED Viewed

@@ -12,9 +12,9 @@ Here we show 5 of these properties and invite the community to submit and develo
 **How to submit?**
 1. Download the [GDPa1 dataset](https://huggingface.co/datasets/ginkgo-datapoints/GDPa1)
-2. Make predictions for all the antibody sequences in the list for your property of interest.
-3. Submit a CSV file containing the `"antibody_name"` column and a column per property you are predicting (e.g. `"antibody_name,Titer"` if you are predicting Titer).
-There is an example submission filename on the "✉️ Submit" tab.
 For the cross-validation metrics (if training only on the GDPa1 dataset), use the `"hierarchical_cluster_IgG_isotype_stratified_fold"` column to split the dataset into folds and make predictions for each of the folds.
 Submit a CSV file in the same format but also containing the `"hierarchical_cluster_IgG_isotype_stratified_fold"` column.
@@ -29,9 +29,10 @@ For the heldout private set, we will calculate these results privately at the en
 **How to contribute?**
 We'd like to add some more existing models to the leaderboard. Some examples of models we'd like to add:
-- ESM embeddings
 - Absolute folding stability models
 - AbLEF
 If you would like to collaborate with others, start a discussion on the "Community" tab at the top of this page.
 ### FAQs

 **How to submit?**
 1. Download the [GDPa1 dataset](https://huggingface.co/datasets/ginkgo-datapoints/GDPa1)
+2. Make predictions for all the antibody sequences for your property of interest.
+3. Submit a CSV file containing the `"antibody_name"` column and a column matching the property name you are predicting (e.g. `"antibody_name,Titer"` if you are predicting Titer).
+There is an example submission file on the "✉️ Submit" tab.
 For the cross-validation metrics (if training only on the GDPa1 dataset), use the `"hierarchical_cluster_IgG_isotype_stratified_fold"` column to split the dataset into folds and make predictions for each of the folds.
 Submit a CSV file in the same format but also containing the `"hierarchical_cluster_IgG_isotype_stratified_fold"` column.
 **How to contribute?**
 We'd like to add some more existing models to the leaderboard. Some examples of models we'd like to add:
+- ESM embeddings + ridge regression
 - Absolute folding stability models
 - AbLEF
 If you would like to collaborate with others, start a discussion on the "Community" tab at the top of this page.
 ### FAQs

eval.py DELETED Viewed

File without changes

evaluation.py DELETED Viewed

@@ -1,28 +0,0 @@
-def evaluate_problem(
-    problem_type: str,
-    input_file: str,
-    # ) -> problems.EvaluationSingleObjective | problems.EvaluationMultiObjective:
-):
-    pass
-    # with Path(input_file).open("r") as f:
-    #     raw = f.read()
-    #     data_dict = json.loads(raw)
-    #     data = data_dict['boundary_json']
-    # print("Starting evaluation.")
-    # match problem_type:
-    #     case "geometrical":
-    #         boundary = load_boundary(data)
-    #         result = problems.GeometricalProblem().evaluate(boundary)
-    #     case "simple_to_build":
-    #         boundary = load_boundary(data)
-    #         result = problems.SimpleToBuildQIStellarator().evaluate(boundary)
-    #     case "mhd_stable":
-    #         boundaries = load_boundaries(data)
-    #         result = problems.MHDStableQIStellarator().evaluate(boundaries)
-    #     case _:
-    #         raise ValueError(f"Unknown problem type: {problem_type}")
-    # print("Finished evaluation.")
-    # return result

utils.py CHANGED Viewed

@@ -1,24 +1,11 @@
-import pathlib
-import tempfile
-import json
-import gradio as gr
 import pandas as pd
 from datasets import load_dataset
-from huggingface_hub import hf_hub_download
-from constants import API, SUBMISSIONS_REPO, RESULTS_REPO, ASSAY_RENAME, LEADERBOARD_RESULTS_COLUMNS
 pd.set_option('display.max_columns', None)
-# def make_user_clickable(name):
-#     link =f'https://huggingface.co/{name}'
-#     return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{name}</a>'
-# def make_boundary_clickable(filename):
-#     link =f'https://huggingface.co/datasets/proxima-fusion/constellaration-bench-results/blob/main/{filename}'
-#     return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">link</a>'
 def show_output_box(message):
     return gr.update(value=message, visible=True)
@@ -33,62 +20,4 @@ def fetch_hf_results():
     # Show latest submission only
     df = df.sort_values("submission_time", ascending=False).drop_duplicates(subset=["model", "assay"], keep="first")
     df["property"] = df["assay"].map(ASSAY_RENAME)
-    print(df.head())
     return df
-def read_result_from_hub(filename):
-    local_path = hf_hub_download(
-        repo_id=RESULTS_REPO,
-        repo_type="dataset",
-        filename=filename,
-    )
-    return local_path
-def read_submission_from_hub(filename):
-    local_path = hf_hub_download(
-        repo_id=SUBMISSIONS_REPO,
-        repo_type="dataset",
-        filename=filename,
-    )
-    return local_path
-def write_results(record, result):
-    record.update(result)
-    record["result_filename"] = (
-        record["submission_filename"].rstrip(".json") + "_results.json"
-    )
-    print(record["result_filename"])
-    record["evaluated"] = True
-    record["objectives"] = json.dumps(record.get("objectives", []))
-    record["feasibilities"] = json.dumps(record.get("feasibility", []))
-    if "objective" not in record.keys():
-        record["objective"] = 0.0
-        record["minimize_objective"] = True
-        record["feasibility"] = sum(record["feasibility"]) / len(record["feasibility"])
-    with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as tmp:
-        json.dump(record, tmp, indent=2)
-        tmp.flush()
-        tmp_name = tmp.name
-    API.upload_file(
-        path_or_fileobj=tmp_name,
-        path_in_repo=record["result_filename"],
-        repo_id=RESULTS_REPO,
-        repo_type="dataset",
-        commit_message=f"Add result data for {record['result_filename']}",
-    )
-    pathlib.Path(tmp_name).unlink()
-    return
-def get_user(profile: gr.OAuthProfile | None) -> str:
-    if profile is None:
-        return "Please login to submit a boundary for evaluation."
-    return profile.username

 import pandas as pd
 from datasets import load_dataset
+import gradio as gr
+from constants import RESULTS_REPO, ASSAY_RENAME, LEADERBOARD_RESULTS_COLUMNS
 pd.set_option('display.max_columns', None)
 def show_output_box(message):
     return gr.update(value=message, visible=True)
     # Show latest submission only
     df = df.sort_values("submission_time", ascending=False).drop_duplicates(subset=["model", "assay"], keep="first")
     df["property"] = df["assay"].map(ASSAY_RENAME)
     return df