File size: 3,249 Bytes
7a1c35b a3fddce 7a1c35b 62b6599 7a1c35b d6a0c44 61fa714 7a1c35b 6a48cdf 15ae508 688f116 767c884 471531b 7a1c35b 0f3e1b5 b2a1e67 672339b b2a1e67 d6a0c44 7a1c35b 8f9985e 7a1c35b 8f9985e 5d5df93 10e69e7 2dafeb1 672339b 2dafeb1 672339b 2dafeb1 672339b 2dafeb1 672339b 2dafeb1 61fa714 e76e95f 2dafeb1 61fa714 2dafeb1 a3fddce 4808b6b a3fddce |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 |
"""
Constants for the Antibody Developability Benchmark
"""
import os
from huggingface_hub import HfApi
import pandas as pd
ASSAY_LIST = ["AC-SINS_pH7.4", "PR_CHO", "HIC", "Tm2", "Titer"]
ASSAY_RENAME = {
"AC-SINS_pH7.4": "Self-association",
"PR_CHO": "Polyreactivity",
"HIC": "Hydrophobicity",
"Tm2": "Thermostability",
"Titer": "Titer",
}
ASSAY_DESCRIPTION = {
"AC-SINS_pH7.4": "Self association by AC-SINS at pH 7.4",
"PR_CHO": "Polyreactivity by bead-based method against CHO SMP",
"HIC": "Hydrophobicity by HIC",
"Tm2": "Thermostability by nanoDSF",
"Titer": "Titer by Valita",
}
ASSAY_EMOJIS = {
"AC-SINS_pH7.4": "🧲",
"PR_CHO": "🎯",
"HIC": "💧",
"Tm2": "🌡️",
"Titer": "🧪",
}
ASSAY_HIGHER_IS_BETTER = {
"HIC": False,
"Tm2": True,
"Titer": True,
"PR_CHO": False,
"AC-SINS_pH7.4": False,
}
# Tabs with emojis
ABOUT_TAB_NAME = "📖 About / Rules"
FAQ_TAB_NAME = "❓ FAQs"
SUBMIT_TAB_NAME = "✉️ Submit"
REGISTRATION_CODE = os.environ.get("REGISTRATION_CODE")
TERMS_URL = "https://euphsfcyogalqiqsawbo.supabase.co/storage/v1/object/public/gdpweb/pdfs/2025%20Ginkgo%20Antibody%20Developability%20Prediction%20Competition%202025-08-28-v2.pdf"
SLACK_URL = "https://join.slack.com/t/bitsinbio/shared_invite/zt-3dqigle2b-e0dEkfPPzzWL055j_8N_eQ"
TUTORIAL_URL = "https://huggingface.co/blog/ginkgo-datapoints/making-antibody-embeddings-and-predictions"
GITHUB_URL = "https://github.com/ginkgobioworks/abdev-benchmark"
# Input CSV file requirements
REQUIRED_COLUMNS: list[str] = [
"antibody_name",
]
# Cross validation
CV_COLUMN = "hierarchical_cluster_IgG_isotype_stratified_fold"
# Sequence files
SEQUENCES_FILE_DICT = {
"GDPa1": "data/GDPa1_v1.2_sequences.csv",
"GDPa1_cross_validation": "data/GDPa1_v1.2_sequences.csv",
"Heldout Test Set": "data/heldout-set-sequences.csv",
}
# GDPa1 dataset
GDPa1_path = "hf://datasets/ginkgo-datapoints/GDPa1/GDPa1_v1.2_20250814.csv"
# Huggingface API
TOKEN = os.environ.get("HF_TOKEN")
CACHE_PATH = os.getenv("HF_HOME", ".")
API = HfApi(token=TOKEN)
# Huggingface repos
ORGANIZATION = "ginkgo-datapoints"
SUBMISSIONS_REPO = f"{ORGANIZATION}/abdev-bench-submissions"
RESULTS_REPO = f"{ORGANIZATION}/abdev-bench-results"
# Leaderboard dataframes
LEADERBOARD_RESULTS_COLUMNS = [
"user",
"assay",
"spearman",
"dataset",
"model",
"submission_time",
] # The columns expected from the results dataset
LEADERBOARD_DISPLAY_COLUMNS = [
"user",
"property",
"spearman",
"dataset",
"model",
"submission_time",
] # After changing assay to property (pretty formatting)
LEADERBOARD_COLUMNS_RENAME = {
"spearman": "Spearman Correlation",
"dataset": "Dataset",
"user": "User",
"submission_time": "Submission Time",
"model": "Model Name",
"property": "Property",
}
BASELINE_USERNAMES = ["loodvanniekerkginkgo", "jmollerginkgobioworks", "sritter-ginkgobioworks"]
def LEADERBOARD_COLUMNS_RENAME_LIST(columns: list[str]) -> list[str]:
return list(map(lambda x: LEADERBOARD_COLUMNS_RENAME.get(x, x), columns))
# First deadline: 2025-10-14 23:59:59 EST
FIRST_DEADLINE = pd.to_datetime("2025-10-14 23:59:59").tz_localize("US/Eastern")
|