Spaces:

Ashish-R
/

LLMFromScratch

Sleeping

LLMFromScratch / multiHead.py

Ashish Reddy

added all blocks of codes

dde4f12 7 months ago

1.13 kB

	import torch, torch.nn as nn

	batch_size = 64
	max_len = 256
	d_model = 384
	n_head = 6
	d_q = int(d_model / n_head)
	dropout = 0.2

	from head import Head

	class MultiHead(nn.Module):
	def __init__(self, n_head, d_q):
	super().__init__()
	self.heads = nn.ModuleList([Head(d_q) for _ in range(n_head)]) # Create a list of 6 heads with different randomized weights each
	self.proj = nn.Linear(d_model, d_model) # You concat your 6 heads to shape (B, S, 384) * (384, 384) --> (B, S, 384) (Ready to be added! Residual connection)
	self.dropout = nn.Dropout(dropout)

	def forward(self, x):
	concatenated_outputs = torch.cat([_(x) for _ in self.heads], dim=-1) # Concat each output (B, S, 64) horizontally to get (B, S, 384) as there are 6 heads
	output = self.proj(concatenated_outputs)
	output = self.dropout(output)
	return output

	if __name__ == "__main__":
	x = torch.randn(batch_size, max_len, d_model)
	multi_head = MultiHead(n_head, d_q)
	output = multi_head(x)

	print("Input shape:", x.shape)
	print("Output shape from multi-head:", output.shape)