Spaces:
Sleeping
Sleeping
| # Loads your checkpoint and prints the top 20 most likely first tokens | |
| import torch, tiktoken | |
| from model import GPT, GPTConfig | |
| # Load checkpoint | |
| ckpt = torch.load("out-shiffman/ckpt.pt", map_location="cpu") | |
| cfg = GPTConfig(**ckpt["model_args"]) | |
| # Build model and load weights | |
| model = GPT(cfg) | |
| model.load_state_dict(ckpt["model"]) | |
| model.eval() | |
| # GPT-2 tokenizer and BOS/EOT id | |
| enc = tiktoken.get_encoding("gpt2") | |
| eot_id = enc.eot_token if hasattr(enc, "eot_token") and enc.eot_token is not None else enc.encode("<|endoftext|>")[0] | |
| # Single-token context = “silence” | |
| idx = torch.tensor([[eot_id]], dtype=torch.long) | |
| # Forward to get next-token distribution | |
| out = model(idx) | |
| logits = out[0] if isinstance(out, tuple) else out | |
| probs = torch.softmax(logits[0, -1], dim=0) | |
| # Show top-20 candidates | |
| top_p, top_i = torch.topk(probs, k=20) | |
| for p, i in zip(top_p.tolist(), top_i.tolist()): | |
| print(f"{p:.4f} {i:>6} {repr(enc.decode([i]))}") | |
| print("-------") | |
| # Keep showing “top tokens”, but filter to word-like pieces and strip leading spaces | |
| top_p, top_i = torch.topk(probs, k=200) | |
| # Helper to decide if a decoded token looks like a word | |
| def is_wordlike(s: str) -> bool: | |
| s = s.lstrip() | |
| return any(ch.isalpha() for ch in s) | |
| # Show the top 20 word-like first tokens | |
| shown = 0 | |
| for p, i in zip(top_p.tolist(), top_i.tolist()): | |
| tok = enc.decode([i]).lstrip() | |
| if is_wordlike(tok): | |
| print(f"{p:.4f} {i:>6} {repr(tok)}") | |
| shown += 1 | |
| if shown == 20: | |
| break | |