makiling's picture
Upload folder using huggingface_hub
5f58699 verified
raw
history blame
909 Bytes
"""Loader for the Jain et al. 2017 dataset."""
from __future__ import annotations
import pandas as pd
from .utils import standardize_frame
_COLUMN_ALIASES = {
"id": ("id", "antibody_id"),
"heavy_seq": ("heavy", "heavy_sequence", "H_chain"),
"light_seq": ("light", "light_sequence", "L_chain"),
"label": ("class", "polyreactive"),
}
_LABEL_MAP = {
"polyreactive": 1,
"non-polyreactive": 0,
"reactive": 1,
"non-reactive": 0,
1: 1,
0: 0,
1.0: 1,
0.0: 0,
"1": 1,
"0": 0,
}
def load_dataframe(path_or_url: str, heavy_only: bool = True) -> pd.DataFrame:
"""Load the Jain dataset into the canonical format."""
frame = pd.read_csv(path_or_url)
return standardize_frame(
frame,
source="jain2017",
heavy_only=heavy_only,
column_aliases=_COLUMN_ALIASES,
label_map=_LABEL_MAP,
is_test=True,
)