In [None]:
!pip install -q tabicl
!pip install -q openml
!pip install -q kaggle
!pip install -q skrub -U

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/103.9 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m103.9/103.9 kB[0m [31m5.4 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m160.4/160.4 kB[0m [31m7.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m95.8/95.8 kB[0m [31m10.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.3/2.3 MB[0m [31m57.3 MB/s[0m eta [36m0:00:00[0m
[?25h  Building wheel for liac-arff (setup.py) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m471.4/471.4 kB[0m [31m10.1 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
from google.colab import drive
drive.mount('/content/MyDrive')

Mounted at /content/MyDrive


In [None]:
from typing import Optional
import os, json
import numpy as np
import pandas as pd
import torch
from skrub import TableVectorizer
from tabicl import TabICLClassifier
from sklearn.impute import SimpleImputer
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import OrdinalEncoder
from sklearn.metrics import accuracy_score, roc_auc_score

### Custom Softmax

In [None]:
# raw_logits = None
# @staticmethod
# def hook_softmax(x, axis: int = -1, temperature: float = 0.9):
#         """Compute softmax values with temperature scaling using NumPy.

#         Parameters
#         ----------
#         x : ndarray
#             Input array of logits.

#         axis : int, default=-1
#             Axis along which to compute softmax.

#         temperature : float, default=0.9
#             Temperature scaling parameter.

#         Returns
#         -------
#         ndarray
#             Softmax probabilities along the specified axis, with the same shape as the input.
#         """
#         global raw_logits
#         raw_logits = np.copy(x)  # save raw logits
#         x = x / temperature
#         # Subtract max for numerical stability
#         x_max = np.max(x, axis=axis, keepdims=True)
#         e_x = np.exp(x - x_max)
#         # Compute softmax
#         return e_x / np.sum(e_x, axis=axis, keepdims=True)

# # Replace original softmax with hooked one
# TabICLClassifier.softmax = hook_softmax

## Conformal Prediction

In [None]:
import numpy as np
from numpy._typing import NDArray

def confidence_score(probs: NDArray):
    return np.max(-probs, axis=1)

def margin_score(probs: NDArray):
    sorted_probs = np.sort(probs, axis=1)
    return sorted_probs[:, -2] - sorted_probs[:, -1]

def entropy_score(probs: NDArray, eps = 1e-9):
    return -np.sum(probs * np.log(probs + eps), axis=1)

def nnl_score(probs: NDArray, true_labels: NDArray, eps = 1e-9):
    return -np.log(probs[np.arange(probs.shape[0]), true_labels] + eps)

def ri_score(probs: NDArray, eps = 1e-9):
    return -np.sum(np.log(probs + eps), axis=1)


def lac_conformal_score(probs: NDArray, true_labels: NDArray):
    """
    Compute the LAC conformal score for a batch of softmax score vectors and true labels.

    Parameters:
    - probs: 2D numpy array of shape (n_samples, num_classes), softmax probs for each sample
    - true_labels: 1D numpy array of shape (n_samples,), true class labels for each sample

    Returns:
    - conformal_scores: 1D numpy array of shape (n_samples,), LAC conformal probs for each sample
    """
    conformal_scores = 1 - probs[np.arange(probs.shape[0]), true_labels]
    return conformal_scores

def aps_conformal_score(probs: NDArray, true_labels: NDArray):
    """
    Compute the APS conformal score for a batch of softmax score vectors and true labels.

    Parameters:
    - probs: 2D numpy array of shape (n_samples, num_classes), softmax probs for each sample
    - true_labels: 1D numpy array of shape (n_samples,), true class labels for each sample

    Returns:
    - conformal_scores: 1D numpy array of shape (n_samples,), APS conformal probs for each sample
    """
    # Create a mask for each sample: probs >= true_score
    true_scores = probs[np.arange(probs.shape[0]), true_labels]
    mask = probs >= true_scores[:, np.newaxis]
    # Sum along the class axis
    conformal_scores = np.sum(probs * mask, axis=1)

    return conformal_scores

def compute_quantile(probs: NDArray, calibration_labels, n: int, type = "lac", alpha = 0.1):
    if type == "lac":
        scores = lac_conformal_score(probs, calibration_labels)
    elif type == "aps":
        scores = aps_conformal_score(probs, calibration_labels)
    else:
        raise AttributeError(f"type {type} is not supported. Use 'lac' or 'aps'")

    q_level = np.ceil((n + 1) * (1 - alpha)) / n
    return np.quantile(scores, q_level, method="higher")

def lac_prediction_set(calibration_probs: NDArray, probs: NDArray, calibration_labels: NDArray, alpha = 0.1):
    n = calibration_labels.shape[0]
    cal_scores = 1 - calibration_probs[np.arange(calibration_probs.shape[0]), calibration_labels]
    # Get the score quantile

    q_level = np.ceil((n + 1) * (1 - alpha)) / n
    qhat = np.quantile(cal_scores, q_level, method='higher')

    prediction_sets = probs >= (1 - qhat)
    return prediction_sets

def aps_prediction_set(calibration_probs: NDArray, probs: NDArray, calibration_labels: NDArray, alpha = 0.1):
    # Get scores. calib_X.shape[0] == calib_Y.shape[0] == n
    n = calibration_labels.shape[0]
    cal_order = calibration_probs.argsort(1)[:,::-1]
    # cal_sum = cal_probs[np.arange(n)[:, None], cal_pi].cumsum(axis=1)
    cal_sum = np.take_along_axis(calibration_probs, cal_order, axis=1).cumsum(axis=1)
    cal_scores = np.take_along_axis(cal_sum, cal_order.argsort(axis=1), axis=1)[range(n),calibration_labels]

    # Get the score quantile
    q_level = np.ceil((n + 1) * (1 - alpha)) / n
    qhat = np.quantile(cal_scores, q_level, method='higher')

    # Deploy (output=list of length n, each element is tensor of classes)
    test_order = probs.argsort(1)[:,::-1]
    test_sum = np.take_along_axis(probs,test_order,axis=1).cumsum(axis=1)
    prediction_sets = np.take_along_axis(test_sum <= qhat, test_order.argsort(axis=1), axis=1)
    return prediction_sets

def raps_prediction_set(calibration_probs: NDArray, test_probs: NDArray, calibration_labels: NDArray, alpha = 0.1, lam_reg=0.01, k_reg = 5, disallow_zero_sets = False, rand = True):
    probs = np.concatenate([calibration_probs, test_probs], axis=0)
    k_reg = min(k_reg, probs.shape[1] - 1)
    reg_vec = np.array(k_reg * [0,] + (probs.shape[1] - k_reg) * [lam_reg,])[None, :]

    n = calibration_labels.shape[0]
    cal_order = calibration_probs.argsort(axis=1)[:,::-1]
    cal_sort = np.take_along_axis(calibration_probs, cal_order, axis=1)
    cal_sort_reg = cal_sort + reg_vec
    cal_true_labels = np.where(cal_order == calibration_labels[:,None])[1]
    cal_scores = cal_sort_reg.cumsum(axis=1)[np.arange(n), cal_true_labels] - np.random.rand(n) * cal_sort_reg[np.arange(n), cal_true_labels]

    # Get the score quantile
    q_level = np.ceil((n + 1) * (1 - alpha)) / n
    qhat = np.quantile(cal_scores, q_level, method='higher')

    n_test = test_probs.shape[0]
    test_order = test_probs.argsort(1)[:,::-1]
    test_sort = np.take_along_axis(test_probs, test_order, axis=1)
    test_sort_reg = test_sort + reg_vec
    test_srt_reg_cumsum = test_sort_reg.cumsum(axis=1)
    indicators = (test_srt_reg_cumsum - np.random.rand(n_test, 1) * test_sort_reg) <= qhat if rand else test_srt_reg_cumsum - test_sort_reg <= qhat

    if disallow_zero_sets: indicators[:,0] = True
    prediction_sets = np.take_along_axis(indicators, test_order.argsort(axis=1), axis=1)
    return prediction_sets

def accuracy(y_true, y_pred):
    return np.mean(y_true == y_pred)

def set_size(pred_set):
    return np.mean([np.sum(ps) for ps in pred_set])

def coverage_rate(y_true, pred_set):
    return pred_set[np.arange(pred_set.shape[0]), y_true].mean()

## Eye Movement

In [None]:
import numpy as np
import torch
import openml
from tabicl import TabICLClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, roc_auc_score

In [None]:
dataset = openml.datasets.get_dataset(1044) # or 31 or 40688

X, y, _, _ = dataset.get_data(
    dataset_format="dataframe", target=dataset.default_target_attribute)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
device = "cuda" if torch.cuda.is_available() else (
    "mps" if getattr(torch.backends, "mps", None) and torch.backends.mps.is_available() else "cpu"
)
print("Using device:", device)

Using device: cuda


In [None]:
clf = TabICLClassifier(device=device)
clf.fit(X_train, y_train)  # this is cheap
proba = clf.predict_proba(X_test)  # in-context learning happens here

In [None]:
print("ROC AUC:", roc_auc_score(y_test.to_numpy(dtype=int), proba, multi_class='ovo'))
y = np.argmax(proba, axis=1)
y_pred = clf.y_encoder_.inverse_transform(y)
print("Accuracy:", accuracy_score(y_test, y_pred))

ROC AUC: 0.8958693234799394
Accuracy: 0.7340036563071298


## Rain in Autralia

In [None]:
from typing import Optional
import os, json
import numpy as np
import pandas as pd
import torch
from skrub import TableVectorizer
from tabicl import TabICLClassifier
from sklearn.impute import SimpleImputer
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import OrdinalEncoder
from sklearn.metrics import accuracy_score, roc_auc_score

DATA_DIR = '/content/MyDrive/MyDrive/Datasets/Rain_in_Australia'

device = "cuda" if torch.cuda.is_available() else (
    "mps" if getattr(torch.backends, "mps", None) and torch.backends.mps.is_available() else "cpu"
)
print("Using device:", device)

def load(name) -> Optional[np.ndarray]:
    p = os.path.join(DATA_DIR, name)
    return np.load(p, allow_pickle=True) if os.path.exists(p) else None

# ---- load arrays ----
C_train, N_train, y_train = load('C_train.npy'), load('N_train.npy'), load('y_train.npy')
C_val,   N_val,   y_val   = load('C_val.npy'),   load('N_val.npy'),   load('y_val.npy')
C_test,  N_test,  y_test  = load('C_test.npy'),  load('N_test.npy'),  load('y_test.npy')

print("C_train:", C_train.shape, C_train.dtype)
# ---- build X by concatenating [C | N] ----
def concat_features(C_part, N_part):
    parts = [p for p in (C_part, N_part) if p is not None]
    if not parts:
        raise ValueError("No features found (need at least C_* or N_*).")
    return np.concatenate(parts, axis=1) if len(parts) > 1 else parts[0]

X_train = concat_features(C_train, N_train)
X_val   = concat_features(C_val,   N_val) if (C_val is not None or N_val is not None) else None
X_test  = concat_features(C_test,  N_test)

print("X_train:", X_train.shape, X_train.dtype)
print("X_test :", X_test.shape,  X_test.dtype)
print("y_train:", y_train.shape,  y_train.dtype)
print("y_test:", y_test.shape,  y_test.dtype)
print("y_test  unique:", np.unique(y_test))

X_train = pd.DataFrame(X_train)
X_test = pd.DataFrame(X_test)

pipe = make_pipeline(
    TableVectorizer(),  # Automatically handles various data types
    TabICLClassifier(device=device)
)
# pipe = TabICLClassifier(device=device)

pipe.fit(X_train, y_train)  # this is cheap
proba = pipe.predict_proba(X_test)  # in-context learning happens here
print("ROC AUC:", roc_auc_score(y_test, proba, multi_class='ovo'))
y_pred = pipe.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))

Using device: cuda
C_train: (93094, 6) object
X_train: (93094, 18) object
X_test : (29092, 18) object
y_train: (93094,) int64
y_test: (29092,) int64
y_test  unique: [0 1 2]


AttributeError: 'numpy.ndarray' object has no attribute 'to_numpy'

In [None]:
print("ROC AUC:", roc_auc_score(y_test, proba, multi_class='ovo'))
y = np.argmax(proba, axis=1)
y_pred = pipe.y_encoder_.inverse_transform(y)
print("Accuracy:", accuracy_score(y_test, y_pred))

ROC AUC: 0.8840392885541616
Accuracy: 0.8509212154544205


In [None]:
print("ROC AUC:", roc_auc_score(y_test, proba, multi_class='ovr'))
y = np.argmax(proba, axis=1)
y_pred = pipe.y_encoder_.inverse_transform(y)
print("Accuracy:", accuracy_score(y_test, y_pred))

## Banknote auth

In [None]:
DATA_DIR = '/content/MyDrive/MyDrive/Datasets/banknote_authentication'

device = "cuda" if torch.cuda.is_available() else (
    "mps" if getattr(torch.backends, "mps", None) and torch.backends.mps.is_available() else "cpu"
)
print("Using device:", device)

def load(name) -> Optional[np.ndarray]:
    p = os.path.join(DATA_DIR, name)
    return np.load(p, allow_pickle=True) if os.path.exists(p) else None

# ---- load arrays ----
N_train, y_train = load('N_train.npy'), load('y_train.npy')
N_val,   y_val   = load('N_val.npy'),   load('y_val.npy')
N_test,  y_test  = load('N_test.npy'),  load('y_test.npy')

X_train = N_train
X_val   = N_val
X_test  = N_test

print("X_train:", X_train.shape, X_train.dtype)
print("X_test :", X_test.shape,  X_test.dtype)
print("y_train:", y_train.shape,  y_train.dtype)
print("y_test:", y_test.shape,  y_test.dtype)
print("y_test  unique:", np.unique(y_test))

X_train = pd.DataFrame(X_train)
X_test = pd.DataFrame(X_test)

# pipe = make_pipeline(
#     TableVectorizer(),  # Automatically handles various data types
#     TabICLClassifier(device=device)
# )
pipe = TabICLClassifier(device=device)

pipe.fit(X_train, y_train)  # this is cheap
cal_proba = pipe.predict_proba(X_val)  # in-context learning happens here
proba = pipe.predict_proba(X_test)  # in-context learning happens here

Using device: cuda
X_train: (877, 4) float64
X_test : (275, 4) float64
y_train: (877,) int64
y_test: (275,) int64
y_test  unique: [0 1]


In [None]:
lac_pred_set = lac_prediction_set(cal_proba, proba, y_val)
aps_pred_set = aps_prediction_set(cal_proba, proba, y_val)
raps_pred_set = raps_prediction_set(cal_proba, proba, y_val)

In [None]:
print("ROC AUC:", roc_auc_score(y_test, proba[:,1]))
y = np.argmax(proba, axis=1)
y_pred = pipe.y_encoder_.inverse_transform(y)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("SS (LAC):", set_size(lac_pred_set))
print("SS (APS):", set_size(aps_pred_set))
print("SS (RAPS):", set_size(raps_pred_set))
print("CR (LAC):", coverage_rate(y_test, lac_pred_set))
print("CR (APS):", coverage_rate(y_test, aps_pred_set))
print("CR (RAPS):", coverage_rate(y_test, raps_pred_set))

ROC AUC: 0.5084913746919533
Accuracy: 0.5563636363636364
SS (LAC): 1.8254545454545454
SS (APS): 2.0
SS (RAPS): 1.8581818181818182
CR (LAC): 0.9236363636363636
CR (APS): 1.0
CR (RAPS): 0.9418181818181818
