Spaces:
Sleeping
Sleeping
| import torch | |
| from torch import nn | |
| from transformers import AutoModel | |
| class BiLSTMAttentionBERT(nn.Module): | |
| def __init__(self, | |
| hidden_dim=256, | |
| num_classes=22, # Based on the label distribution | |
| num_layers=2, # Multiple LSTM layers | |
| dropout=0.1): | |
| super().__init__() | |
| # Load BioBERT instead of BERT | |
| self.bert_model = AutoModel.from_pretrained('dmis-lab/biobert-base-cased-v1.2') | |
| bert_dim = self.bert_model.config.hidden_size # Still 768 for BioBERT basee | |
| # Dropout for BERT outputs | |
| self.dropout_bert = nn.Dropout(dropout) | |
| # Multi-layer BiLSTM | |
| self.lstm = nn.LSTM( | |
| input_size=bert_dim, | |
| hidden_size=hidden_dim, | |
| num_layers=num_layers, | |
| bidirectional=True, | |
| batch_first=True, | |
| dropout=dropout if num_layers > 1 else 0 | |
| ) | |
| # Multi-head attention | |
| self.attention = nn.MultiheadAttention( | |
| embed_dim=hidden_dim * 2, # *2 for bidirectional | |
| num_heads=1, | |
| dropout=dropout, | |
| batch_first=True | |
| ) | |
| # Regularization layers | |
| self.dropout1 = nn.Dropout(dropout) | |
| self.dropout2 = nn.Dropout(dropout + 0.1) | |
| self.layer_norm = nn.LayerNorm(hidden_dim * 2) | |
| self.batch_norm = nn.BatchNorm1d(hidden_dim * 2) | |
| # Classification head | |
| self.classifier = nn.Sequential( | |
| nn.Linear(hidden_dim * 2, hidden_dim), | |
| nn.ReLU(), | |
| nn.Dropout(dropout), | |
| nn.BatchNorm1d(hidden_dim), | |
| nn.Linear(hidden_dim, num_classes) | |
| ) | |
| def forward(self, input_ids, attention_mask): | |
| # BERT encoding | |
| bert_output = self.bert_model( | |
| input_ids=input_ids, | |
| attention_mask=attention_mask, | |
| return_dict=True | |
| ) | |
| sequence_output = self.dropout_bert(bert_output.last_hidden_state) | |
| # BiLSTM processing | |
| lstm_out, _ = self.lstm(sequence_output) | |
| lstm_out = self.layer_norm(lstm_out) | |
| # Self-attention | |
| attn_out, _ = self.attention( | |
| query=lstm_out, | |
| key=lstm_out, | |
| value=lstm_out, | |
| need_weights=False | |
| ) | |
| # Pooling and normalization | |
| pooled = torch.mean(attn_out, dim=1) | |
| pooled = self.batch_norm(pooled) | |
| pooled = self.dropout2(pooled) | |
| # Classification | |
| return self.classifier(pooled) |