|
|
import torch.nn as nn |
|
|
from transformers import ( |
|
|
AutoModelForSequenceClassification |
|
|
) |
|
|
|
|
|
RANK = 4 |
|
|
ALPHA = 4 |
|
|
model_ckpt = "distilbert-base-uncased" |
|
|
|
|
|
from loraLayer import LoRALayer |
|
|
|
|
|
class LoRALinear(nn.Module): |
|
|
def __init__(self, original_layer, rank, alpha): |
|
|
super().__init__() |
|
|
self.in_features = original_layer.in_features |
|
|
self.out_features = original_layer.out_features |
|
|
self.original_layer = original_layer |
|
|
self.lora = LoRALayer(self.in_features, self.out_features, rank, alpha) |
|
|
|
|
|
def forward(self, x): |
|
|
original_output = self.original_layer(x) |
|
|
lora_output = self.lora(x) |
|
|
return original_output + lora_output |
|
|
|
|
|
model = AutoModelForSequenceClassification.from_pretrained(model_ckpt) |
|
|
|
|
|
for param in model.parameters(): |
|
|
param.requires_grad = False |
|
|
|
|
|
print("--- Injecting LoRA adapters into q_lin and v_lin layers of DISTILBERT---") |
|
|
for layer in model.distilbert.transformer.layer: |
|
|
layer.attention.q_lin = LoRALinear(layer.attention.q_lin, RANK, ALPHA) |
|
|
layer.attention.v_lin = LoRALinear(layer.attention.v_lin, RANK, ALPHA) |
|
|
print("INFO: LoRA Adapters INJECTED") |
|
|
|
|
|
print("\nTrainable parameters:") |
|
|
for name, param in model.named_parameters(): |
|
|
if param.requires_grad: |
|
|
print(name) |
|
|
|
|
|
total_params = sum(p.numel() for p in model.parameters()) |
|
|
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad) |
|
|
print(f"\nTotal parameters: {total_params}") |
|
|
print(f"Trainable LoRA parameters: {trainable_params}") |
|
|
print(f"Percentage of trainable parameters: {100 * trainable_params / total_params:.4f}%") |