sqb-predict-api / model_inference.py
Ahmad Hathim bin Ahmad Azman
Fixed model loading
3674571
import os
import nltk
# βœ… Force NLTK to save/download in /tmp (writeable in Hugging Face Spaces)
nltk_data_dir = "/tmp/nltk_data"
os.makedirs(nltk_data_dir, exist_ok=True)
os.environ["NLTK_DATA"] = nltk_data_dir
nltk.data.path = [nltk_data_dir]
import torch
import numpy as np
import textstat
from utils.preprocess import compute_text_features
from model_architecture import EnsembleBertBiLSTMRegressor
from huggingface_hub import hf_hub_download
HF_REPO = "hathimazman/sqb-predict"
MODEL_CACHE = "/tmp/models" # βœ… Writable on Hugging Face Spaces
os.makedirs(MODEL_CACHE, exist_ok=True)
def download_from_hf(filename: str):
local_path = os.path.join(MODEL_CACHE, filename)
if not os.path.exists(local_path):
print(f"⬇ Downloading {filename} from {HF_REPO}")
hf_hub_download(repo_id=HF_REPO, filename=filename, local_dir=MODEL_CACHE)
return local_path
def load_model():
model_path = download_from_hf("best_checkpoint_regression.pt")
checkpoint = torch.load(model_path, map_location="cpu")
model = EnsembleBertBiLSTMRegressor(
model_name_mcq="microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract",
model_name_clinical="emilyalsentzer/Bio_ClinicalBERT",
hidden_dim=768,
extra_dim=67,
)
model.load_state_dict(checkpoint["model_state"])
model.eval()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
return model, device
def predict_from_input(data, model, device, tok_mcq, tok_clin, encoder, scaler):
"""
Predict difficulty and discrimination index for a single MCQ item.
Combines text, engineered numeric features, and one-hot categorical features.
"""
# Combine question text
text = " ".join([
data["StemText"],
data["LeadIn"],
data["OptionA"],
data["OptionB"],
data["OptionC"],
data["OptionD"]
])
# Compute text-derived numeric features (8 total)
features = compute_text_features(
data["StemText"],
data["LeadIn"],
[data["OptionA"], data["OptionB"], data["OptionC"], data["OptionD"]]
)
# Encode categorical features safely
known = encoder.categories_
fields = ["DepartmentName", "CourseName", "BloomLevel"]
cat_data = [[
data[f] if data[f] in known[i] else "Other"
for i, f in enumerate(fields)
]]
cat_enc = encoder.transform(cat_data)
num_feats_scaled = scaler.transform(features) # scaler expects only 8 numeric features
feats = np.hstack([num_feats_scaled, cat_enc]) # combine scaled numeric + one-hot encoded
extra_feats = torch.tensor(feats, dtype=torch.float32).to(device)
# Tokenize text using both models
enc_mcq = tok_mcq(text, truncation=True, padding="max_length", max_length=512, return_tensors="pt")
enc_clin = tok_clin(text, truncation=True, padding="max_length", max_length=512, return_tensors="pt")
ids_mcq = enc_mcq["input_ids"].to(device)
mask_mcq = enc_mcq["attention_mask"].to(device)
ids_clin = enc_clin["input_ids"].to(device)
mask_clin = enc_clin["attention_mask"].to(device)
# Forward pass through the model
with torch.no_grad():
preds = model(ids_mcq, mask_mcq, ids_clin, mask_clin, extra_feats)
# Extract predicted values
diff, disc = preds.squeeze().tolist()
return {
"difficulty": round(float(diff), 3),
"discrimination": round(float(disc), 3)
}