Ahmad Hathim bin Ahmad Azman commited on
Commit
b8fb185
·
1 Parent(s): 2ff2bcb

change setting

Browse files
Files changed (2) hide show
  1. app.py +38 -10
  2. space.yaml → runtime.yaml +1 -1
app.py CHANGED
@@ -3,29 +3,51 @@ from fastapi.middleware.cors import CORSMiddleware
3
  from pydantic import BaseModel
4
  import torch
5
  import joblib
6
- import numpy as np
7
  from transformers import AutoTokenizer
8
  from model_inference import load_model, predict_from_input
 
9
 
10
  app = FastAPI(title="Question Difficulty/Discrimination Predictor")
11
 
12
- # CORS for Next.js frontend
13
  app.add_middleware(
14
  CORSMiddleware,
15
- allow_origins=["http://localhost:3000"],
16
  allow_credentials=True,
17
  allow_methods=["*"],
18
  allow_headers=["*"],
19
  )
20
 
21
- # Load model, encoder, and scaler once at startup
22
- model, device = load_model("assets/best_checkpoint_regression.pt")
23
- encoder = joblib.load("assets/onehot_encoder.pkl")
24
- scaler = joblib.load("assets/scaler.pkl")
 
 
 
25
 
26
- # Tokenizers
27
- tok_mcq = AutoTokenizer.from_pretrained("microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract")
28
- tok_clin = AutoTokenizer.from_pretrained("emilyalsentzer/Bio_ClinicalBERT")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
  # Input schema
31
  class QuestionInput(BaseModel):
@@ -45,6 +67,12 @@ def health():
45
 
46
  @app.post("/predict")
47
  def predict(input_data: QuestionInput):
 
 
 
 
 
 
48
  pred = predict_from_input(
49
  input_data.dict(), model, device,
50
  tok_mcq, tok_clin, encoder, scaler
 
3
  from pydantic import BaseModel
4
  import torch
5
  import joblib
 
6
  from transformers import AutoTokenizer
7
  from model_inference import load_model, predict_from_input
8
+ import os
9
 
10
  app = FastAPI(title="Question Difficulty/Discrimination Predictor")
11
 
12
+ # CORS for frontend usage (Next.js, Streamlit, etc.)
13
  app.add_middleware(
14
  CORSMiddleware,
15
+ allow_origins=["*"],
16
  allow_credentials=True,
17
  allow_methods=["*"],
18
  allow_headers=["*"],
19
  )
20
 
21
+ # Global variables will be loaded at startup
22
+ model = None
23
+ device = None
24
+ encoder = None
25
+ scaler = None
26
+ tok_mcq = None
27
+ tok_clin = None
28
 
29
+ @app.on_event("startup")
30
+ def load_all_resources():
31
+ """
32
+ ✅ Load model + tokenizers + encoders only once at startup.
33
+ Avoids slow import times & prevents “Space in Error”.
34
+ """
35
+ global model, device, encoder, scaler, tok_mcq, tok_clin
36
+
37
+ print("🚀 Loading model and dependencies...")
38
+
39
+ # Load model from local or Hugging Face
40
+ model, device = load_model("assets/best_checkpoint_regression.pt")
41
+
42
+ # Load pretrained scaler + encoder
43
+ encoder = joblib.load("assets/onehot_encoder.pkl")
44
+ scaler = joblib.load("assets/scaler.pkl")
45
+
46
+ # Load tokenizers lazily
47
+ tok_mcq = AutoTokenizer.from_pretrained("microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract")
48
+ tok_clin = AutoTokenizer.from_pretrained("emilyalsentzer/Bio_ClinicalBERT")
49
+
50
+ print("✅ All resources successfully loaded.")
51
 
52
  # Input schema
53
  class QuestionInput(BaseModel):
 
67
 
68
  @app.post("/predict")
69
  def predict(input_data: QuestionInput):
70
+ """
71
+ ✅ Main prediction endpoint.
72
+ """
73
+ if model is None:
74
+ return {"error": "Model not loaded. Try again in a few seconds."}
75
+
76
  pred = predict_from_input(
77
  input_data.dict(), model, device,
78
  tok_mcq, tok_clin, encoder, scaler
space.yaml → runtime.yaml RENAMED
@@ -4,7 +4,7 @@ runtime:
4
 
5
  app:
6
  file: app.py
7
- entrypoint: "uvicorn app:app --host 0.0.0.0 --port 7860"
8
 
9
  hardware:
10
  accelerator: "cpu"
 
4
 
5
  app:
6
  file: app.py
7
+ entrypoint: "uvicorn app:app --host 0.0.0.0 --port $PORT"
8
 
9
  hardware:
10
  accelerator: "cpu"