Spaces:

joko333
/

logical_structure_analysis

Sleeping

App Files Files Community

joko333 commited on Dec 14, 2024

Commit

67bf242

1 Parent(s): 3681591

Enhance model loading for prediction by integrating pre-trained BERT and refining checkpoint handling

Browse files

Files changed (1) hide show

utils/prediction.py +19 -5

utils/prediction.py CHANGED Viewed

@@ -1,6 +1,6 @@
 from utils.model import BiLSTMAttentionBERT, BiLSTMConfig
 import torch
-from transformers import AutoTokenizer
 from sklearn.preprocessing import LabelEncoder
 import numpy as np
 import streamlit as st
@@ -12,6 +12,11 @@ from huggingface_hub import hf_hub_download
 def load_model_for_prediction():
     try:
         st.write("Starting model loading...")
         config = BiLSTMConfig(
             hidden_dim=128,
             num_classes=22,
@@ -19,20 +24,29 @@ def load_model_for_prediction():
             dropout=0.5
         )
-        # Initialize model
         model = BiLSTMAttentionBERT(config)
-        # Load checkpoint
         model_path = hf_hub_download(
             repo_id="joko333/BiLSTM_v01",
             filename="model_epoch8_acc72.53.pt"
         )
         checkpoint = torch.load(model_path, map_location='cpu')
-        # Extract model state dict from checkpoint
         if 'model_state_dict' in checkpoint:
             state_dict = checkpoint['model_state_dict']
-            model.load_state_dict(state_dict)
             st.write("Model loaded successfully")
         else:
             st.error("Invalid checkpoint format")

 from utils.model import BiLSTMAttentionBERT, BiLSTMConfig
 import torch
+from transformers import AutoTokenizer, AutoModel
 from sklearn.preprocessing import LabelEncoder
 import numpy as np
 import streamlit as st
 def load_model_for_prediction():
     try:
         st.write("Starting model loading...")
+        # Initialize BERT first
+        bert = AutoModel.from_pretrained('dmis-lab/biobert-base-cased-v1.2')
+        # Initialize config and model
         config = BiLSTMConfig(
             hidden_dim=128,
             num_classes=22,
             dropout=0.5
         )
         model = BiLSTMAttentionBERT(config)
+        model.bert = bert  # Set pre-trained BERT
+        # Load custom layers from checkpoint
         model_path = hf_hub_download(
             repo_id="joko333/BiLSTM_v01",
             filename="model_epoch8_acc72.53.pt"
         )
         checkpoint = torch.load(model_path, map_location='cpu')
+        # Debug checkpoint structure
+        st.write("Checkpoint keys:", checkpoint.keys())
         if 'model_state_dict' in checkpoint:
+            # Extract only custom layer weights
+            custom_state_dict = {}
             state_dict = checkpoint['model_state_dict']
+            for key, value in state_dict.items():
+                if not key.startswith('bert.'):
+                    custom_state_dict[key] = value
+            # Load custom layers
+            model.load_state_dict(custom_state_dict, strict=False)
             st.write("Model loaded successfully")
         else:
             st.error("Invalid checkpoint format")