File size: 11,846 Bytes
2982a51
281711d
2dafeb1
1bcb06b
211c032
 
688f116
211c032
281711d
61fa714
84fdef4
2dafeb1
672339b
10e69e7
22f82e7
61fa714
15ae508
61fa714
 
 
688f116
84fdef4
3edbc93
688f116
8f9985e
2dafeb1
ba1131a
069fb2c
 
 
de75bee
1bcb06b
8f9985e
10e69e7
177a597
 
 
 
 
2dafeb1
 
 
 
61fa714
 
177a597
1bcb06b
8f9985e
fe04bb9
10e69e7
1bcb06b
5b5ee28
10e69e7
de9585b
211c032
de9585b
 
069fb2c
2dafeb1
069fb2c
2dafeb1
 
61fa714
de9585b
8f9985e
1bcb06b
de9585b
 
2982a51
de9585b
211c032
 
 
688f116
211c032
 
 
 
 
 
 
688f116
211c032
 
 
 
 
 
 
 
 
 
 
 
688f116
211c032
 
 
 
 
 
 
5554fb7
5135eea
6c94821
2dafeb1
22f82e7
2dafeb1
22f82e7
 
 
 
 
8f9985e
22f82e7
2dafeb1
 
22f82e7
4965e60
22f82e7
 
 
3d4c9af
 
 
 
0fdb208
27f3da5
2dafeb1
3d4c9af
2dafeb1
22f82e7
 
21f87d6
2dafeb1
27f3da5
2dafeb1
 
0fdb208
27f3da5
 
2dafeb1
3edbc93
27f3da5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8f9985e
2dafeb1
 
 
8f9985e
2dafeb1
 
 
688f116
672339b
27f3da5
2dafeb1
8f9985e
de9585b
211c032
 
2dafeb1
61fa714
22f82e7
3edbc93
 
 
 
809a553
10e69e7
22f82e7
 
ef27773
22f82e7
 
 
 
a7cc355
 
 
 
8f9985e
3edbc93
a7cc355
10e69e7
a7cc355
 
 
 
471531b
 
 
 
 
688f116
3edbc93
672339b
 
 
 
 
 
 
84fdef4
 
672339b
3edbc93
672339b
 
 
 
 
84fdef4
672339b
 
84fdef4
3edbc93
672339b
8f9985e
3edbc93
 
a7cc355
672339b
 
34f59c0
a7cc355
 
1de74c6
471531b
a7cc355
3edbc93
 
 
 
 
 
61fa714
22f82e7
 
 
 
 
 
2dafeb1
ba1131a
 
15ae508
ba1131a
688f116
 
ba1131a
 
8f9985e
ba1131a
281711d
 
688f116
27f3da5
688f116
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
import pandas as pd
import gradio as gr
from gradio.themes.utils import sizes
from gradio_leaderboard import Leaderboard
from dotenv import load_dotenv
import contextlib

load_dotenv()  # Load environment variables from .env file

from about import ABOUT_INTRO, ABOUT_TEXT, FAQS, SUBMIT_INTRUCTIONS
from constants import (
    ASSAY_RENAME,  # noqa: F401
    SEQUENCES_FILE_DICT,
    LEADERBOARD_DISPLAY_COLUMNS,
    ABOUT_TAB_NAME,
    FAQ_TAB_NAME,
    TERMS_URL,
    LEADERBOARD_COLUMNS_RENAME,
    LEADERBOARD_COLUMNS_RENAME_LIST,
    SUBMIT_TAB_NAME,
    SLACK_URL,
)
from submit import make_submission
from utils import fetch_hf_results, show_output_box


def format_leaderboard_table(df_results: pd.DataFrame, assay: str | None = None):
    """
    Format the dataframe for display on the leaderboard. The dataframe comes from utils.fetch_hf_results().
    """
    df = df_results.query("assay.isin(@ASSAY_RENAME.keys())").copy()
    if assay is not None:
        df = df[df["assay"] == assay]
    df = df[LEADERBOARD_DISPLAY_COLUMNS]
    df = df.sort_values(by="spearman", ascending=False)
    # After sorting, just add the reason for excluding heldout test set
    # Note: We can also just say the following as a text box at the bottom of the leaderboard: "Note: Results for the Heldout Test Set are only evaluated at competition close"
    # Convert spearman column to string to avoid dtype incompatibility when assigning text
    df["spearman"] = df["spearman"].astype(str)
    df.loc[
        (df["dataset"] == "Heldout Test Set") & (df["spearman"] == "nan"), "spearman"
    ] = "N/A, evaluated at competition close"

    # Finally, rename columns for readability
    df = df.rename(columns=LEADERBOARD_COLUMNS_RENAME)
    return df


def get_leaderboard_object(assay: str | None = None):
    filter_columns = ["dataset"]
    if assay is None:
        filter_columns.append("property")
    # Bug: Can't leave search_columns empty because then it says "Column None not found in headers"
    # Note(Lood): Would be nice to make it clear that the Search Column is searching on model name
    current_dataframe = pd.read_csv("debug-current-results.csv")
    lb = Leaderboard(
        value=format_leaderboard_table(df_results=current_dataframe, assay=assay),
        datatype=["str", "str", "str", "number", "str"],
        select_columns=LEADERBOARD_COLUMNS_RENAME_LIST(
            ["model", "property", "spearman", "dataset", "user"]
        ),
        search_columns=["Model Name"],
        filter_columns=LEADERBOARD_COLUMNS_RENAME_LIST(filter_columns),
        every=15,
        render=True,
    )
    return lb


# Initialize global dataframe
fetch_hf_results()
current_dataframe = pd.read_csv("debug-current-results.csv")


def refresh_overall_leaderboard():
    current_dataframe = pd.read_csv("debug-current-results.csv")
    return format_leaderboard_table(df_results=current_dataframe)


def fetch_latest_data(stop_event):
    import time

    while not stop_event.is_set():
        try:
            fetch_hf_results()
        except Exception as e:
            print(f"Error fetching latest data: {e}")
        time.sleep(3)  # Fetch every 60 seconds
    print("Exiting data fetch thread")


@contextlib.asynccontextmanager
async def periodic_data_fetch(app):
    import threading

    event = threading.Event()
    t = threading.Thread(target=fetch_latest_data, args=(event,), daemon=True)
    t.start()
    yield
    event.set()
    t.join(3)

# Make font size bigger using gradio theme
with gr.Blocks(theme=gr.themes.Default(text_size=sizes.text_lg)) as demo:
    timer = gr.Timer(3)  # Run every 3 seconds when page is focused

    ## Header

    with gr.Row():
        with gr.Column(scale=6):  # bigger text area
            gr.Markdown(
                f"""
                ## Welcome to the Ginkgo Antibody Developability Benchmark!

                Participants can submit their model to the leaderboards by simply uploading a CSV file (see the "βœ‰οΈ Submit" tab).

                You can **predict any or all of the 5 properties**, and you can filter the main leaderboard by property.
                See more details in the "{ABOUT_TAB_NAME}" tab.
                Submissions close on 1 November 2025.
                """
            )
        with gr.Column(scale=2):  # smaller side column for logo
            gr.Image(
                value="./assets/competition_logo.jpg",
                show_label=False,
                show_download_button=False,
                show_share_button=False,
                show_fullscreen_button=False,
                width="25vw",  # Take up the width of the column (2/8 = 1/4)
            )

    with gr.Tabs(elem_classes="tab-buttons"):
        with gr.TabItem(ABOUT_TAB_NAME, elem_id="abdev-benchmark-tab-table"):
            gr.Markdown(ABOUT_INTRO)
            gr.Image(
                value="./assets/prediction_explainer_cv.png",
                show_label=False,
                show_download_button=False,
                show_share_button=False,
                show_fullscreen_button=False,
                width="30vw",
            )
            gr.Markdown(ABOUT_TEXT)
            
            # Sequence download buttons
            gr.Markdown(
            """### πŸ“₯ Download Sequences
            The GDPa1 dataset (with assay data and sequences) is available on Hugging Face [here](https://huggingface.co/datasets/ginkgo-datapoints/GDPa1), 
            but we provide this and the private test set for convenience.""")
            with gr.Row():
                with gr.Column():
                    download_button_cv_about = gr.DownloadButton(
                        label="πŸ“₯ Download GDPa1 sequences",
                        value=SEQUENCES_FILE_DICT["GDPa1_cross_validation"],
                        variant="secondary",
                    )
                with gr.Column():
                    download_button_test_about = gr.DownloadButton(
                        label="πŸ“₯ Download Private Test Set sequences",
                        value=SEQUENCES_FILE_DICT["Heldout Test Set"],
                        variant="secondary",
                    )

        with gr.TabItem(
            "πŸ† Leaderboard", elem_id="abdev-benchmark-tab-table"
        ) as leaderboard_tab:
            gr.Markdown(
                """
                # Overall Leaderboard (filter below by property)
                Each property has its own prize, and participants can submit models for any combination of properties.

                **Note**: It is *easy to overfit* the public GDPa1 dataset, which results in artificially high Spearman correlations.
                We would suggest training using cross-validation to give a better indication of the model's performance on the eventual private test set.
                """
            )
            lb = get_leaderboard_object()
            timer.tick(fn=refresh_overall_leaderboard, outputs=lb)
            demo.load(fn=refresh_overall_leaderboard, outputs=lb)

        with gr.TabItem(SUBMIT_TAB_NAME, elem_id="boundary-benchmark-tab-table"):
            gr.Markdown(SUBMIT_INTRUCTIONS)

            with gr.Row():
                with gr.Column():
                    username_input = gr.Textbox(
                        label="Username",
                        placeholder="Enter your Hugging Face username",
                        info="This will be used to identify valid submissions, and to update your results if you submit again.",
                    )

                    anonymous_checkbox = gr.Checkbox(
                        label="Anonymous",
                        value=False,
                        info="If checked, your username will be replaced with an anonymous hash on the leaderboard.",
                    )
                    model_name_input = gr.Textbox(
                        label="Model Name",
                        placeholder="Enter your model name (e.g., 'MyProteinLM-v1')",
                        info="This will be displayed on the leaderboard.",
                    )
                    model_description_input = gr.Textbox(
                        label="Model Description (optional)",
                        placeholder="Brief description of your model and approach",
                        info="Describe your model, training data, or methodology.",
                        lines=3,
                    )
                    registration_code = gr.Textbox(
                        label="Registration Code",
                        placeholder="Enter your registration code",
                        info="If you did not receive a registration code, please sign up on the <a href='https://datapoints.ginkgo.bio/ai-competitions/2025-abdev-competition'>Competition Registration page</a> or email <a href='mailto:antibodycompetition@ginkgobioworks.com'>antibodycompetition@ginkgobioworks.com</a>.",
                    )

                with gr.Column():
                    gr.Markdown("### Upload Both Submission Files")

                    # GDPa1 Cross-validation file
                    gr.Markdown("**GDPa1 Cross-Validation Predictions:**")
                    download_button_cv = gr.DownloadButton(
                        label="πŸ“₯ Download GDPa1 sequences",
                        value=SEQUENCES_FILE_DICT["GDPa1_cross_validation"],
                        variant="secondary",
                    )
                    submission_file_cv = gr.File(label="GDPa1 Cross-Validation CSV")

                    # Test set file
                    gr.Markdown("**Private Test Set Predictions:**")
                    download_button_test = gr.DownloadButton(
                        label="πŸ“₯ Download Private Test Set sequences",
                        value=SEQUENCES_FILE_DICT["Heldout Test Set"],
                        variant="secondary",
                    )
                    submission_file_test = gr.File(label="Private Test Set CSV")

            submit_btn = gr.Button("Evaluate")
            message = gr.Textbox(label="Status", lines=3, visible=False)

            submit_btn.click(
                make_submission,
                inputs=[
                    submission_file_cv,
                    submission_file_test,
                    username_input,
                    model_name_input,
                    model_description_input,
                    anonymous_checkbox,
                    registration_code,
                ],
                outputs=[message],
            ).then(
                fn=show_output_box,
                inputs=[message],
                outputs=[message],
            )
        with gr.Tab(FAQ_TAB_NAME):
            gr.Markdown("# Frequently Asked Questions")
            for i, (question, answer) in enumerate(FAQS.items()):
                # Would love to make questions bold but accordion doesn't support it
                question = f"{i+1}. {question}"
                with gr.Accordion(question, open=False):
                    gr.Markdown(f"*{answer}*")  # Italics for answers

    # Footnote
    gr.Markdown(
        f"""
        <div style="text-align: center; font-size: 14px; color: gray; margin-top: 2em;">
        πŸ“¬ For questions or feedback, contact <a href="mailto:antibodycompetition@ginkgobioworks.com">antibodycompetition@ginkgobioworks.com</a> or discuss on the <a href="{SLACK_URL}">Slack community</a> co-hosted by Bits in Bio.<br>
        Visit the <a href="https://datapoints.ginkgo.bio/ai-competitions/2025-abdev-competition">Competition Registration page</a> to sign up for updates and to register, and see Terms <a href="{TERMS_URL}">here</a>.
        </div>
        """,
        elem_id="contact-footer",
    )

if __name__ == "__main__":
    demo.launch(
        ssr_mode=False, app_kwargs={"lifespan": periodic_data_fetch}
    )