babbleGPT / first.py
shiffman's picture
Upload folder using huggingface_hub
9a570a0 verified
# Loads your checkpoint and prints the top 20 most likely first tokens
import torch, tiktoken
from model import GPT, GPTConfig
# Load checkpoint
ckpt = torch.load("out-shiffman/ckpt.pt", map_location="cpu")
cfg = GPTConfig(**ckpt["model_args"])
# Build model and load weights
model = GPT(cfg)
model.load_state_dict(ckpt["model"])
model.eval()
# GPT-2 tokenizer and BOS/EOT id
enc = tiktoken.get_encoding("gpt2")
eot_id = enc.eot_token if hasattr(enc, "eot_token") and enc.eot_token is not None else enc.encode("<|endoftext|>")[0]
# Single-token context = “silence”
idx = torch.tensor([[eot_id]], dtype=torch.long)
# Forward to get next-token distribution
out = model(idx)
logits = out[0] if isinstance(out, tuple) else out
probs = torch.softmax(logits[0, -1], dim=0)
# Show top-20 candidates
top_p, top_i = torch.topk(probs, k=20)
for p, i in zip(top_p.tolist(), top_i.tolist()):
print(f"{p:.4f} {i:>6} {repr(enc.decode([i]))}")
print("-------")
# Keep showing “top tokens”, but filter to word-like pieces and strip leading spaces
top_p, top_i = torch.topk(probs, k=200)
# Helper to decide if a decoded token looks like a word
def is_wordlike(s: str) -> bool:
s = s.lstrip()
return any(ch.isalpha() for ch in s)
# Show the top 20 word-like first tokens
shown = 0
for p, i in zip(top_p.tolist(), top_i.tolist()):
tok = enc.decode([i]).lstrip()
if is_wordlike(tok):
print(f"{p:.4f} {i:>6} {repr(tok)}")
shown += 1
if shown == 20:
break