Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,4 +1,3 @@
|
|
| 1 |
-
import spaces
|
| 2 |
import gradio as gr
|
| 3 |
import torch
|
| 4 |
from transformers import GPT2LMHeadModel, GPT2Tokenizer
|
|
@@ -7,11 +6,10 @@ from transformers import GPT2LMHeadModel, GPT2Tokenizer
|
|
| 7 |
model = GPT2LMHeadModel.from_pretrained("gpt2")
|
| 8 |
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
|
| 9 |
|
| 10 |
-
|
| 11 |
-
def get_next_token_probs(text, top_k=5):
|
| 12 |
# Handle empty input
|
| 13 |
if not text.strip():
|
| 14 |
-
return [""] *
|
| 15 |
|
| 16 |
# Tokenize input
|
| 17 |
input_ids = tokenizer.encode(text, return_tensors="pt")
|
|
@@ -25,8 +23,8 @@ def get_next_token_probs(text, top_k=5):
|
|
| 25 |
next_token_logits = logits[0, -1, :]
|
| 26 |
next_token_probs = torch.softmax(next_token_logits, dim=0)
|
| 27 |
|
| 28 |
-
# Get top-
|
| 29 |
-
topk_probs, topk_indices = torch.topk(next_token_probs,
|
| 30 |
topk_tokens = [tokenizer.decode([idx]) for idx in topk_indices]
|
| 31 |
|
| 32 |
# Format the results as strings
|
|
@@ -34,35 +32,15 @@ def get_next_token_probs(text, top_k=5):
|
|
| 34 |
for i, (token, prob) in enumerate(zip(topk_tokens, topk_probs)):
|
| 35 |
# Format probability as percentage with 1 decimal place
|
| 36 |
prob_percent = f"{prob.item()*100:.1f}%"
|
| 37 |
-
# Clean up token display (
|
| 38 |
-
display_token = token.replace(" ", "␣")
|
| 39 |
# Format the output string
|
| 40 |
formatted_results.append(f"{i+1}. \"{display_token}\" ({prob_percent})")
|
| 41 |
|
| 42 |
return formatted_results
|
| 43 |
|
| 44 |
-
# Create
|
| 45 |
-
|
| 46 |
-
.token-box {
|
| 47 |
-
margin-top: 10px;
|
| 48 |
-
padding: 15px;
|
| 49 |
-
border-radius: 8px;
|
| 50 |
-
background-color: #f7f7f7;
|
| 51 |
-
font-family: monospace;
|
| 52 |
-
font-size: 16px;
|
| 53 |
-
}
|
| 54 |
-
.token-item {
|
| 55 |
-
margin: 8px 0;
|
| 56 |
-
padding: 8px;
|
| 57 |
-
background-color: white;
|
| 58 |
-
border-left: 4px solid #2c8ecb;
|
| 59 |
-
border-radius: 4px;
|
| 60 |
-
}
|
| 61 |
-
footer {display: none}
|
| 62 |
-
"""
|
| 63 |
-
|
| 64 |
-
# Create minimal interface
|
| 65 |
-
with gr.Blocks(css=custom_css) as demo:
|
| 66 |
gr.Markdown("### GPT-2 Next Token Predictor")
|
| 67 |
|
| 68 |
# Input textbox
|
|
@@ -72,25 +50,28 @@ with gr.Blocks(css=custom_css) as demo:
|
|
| 72 |
value="The weather tomorrow will be"
|
| 73 |
)
|
| 74 |
|
| 75 |
-
#
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 79 |
|
| 80 |
-
|
| 81 |
-
def update_tokens(text):
|
| 82 |
-
return get_next_token_probs(text)
|
| 83 |
|
| 84 |
# Set up the live update
|
| 85 |
input_text.change(
|
| 86 |
-
fn=
|
| 87 |
inputs=input_text,
|
| 88 |
outputs=token_outputs
|
| 89 |
)
|
| 90 |
|
| 91 |
# Initialize with default text
|
| 92 |
demo.load(
|
| 93 |
-
fn=
|
| 94 |
inputs=input_text,
|
| 95 |
outputs=token_outputs
|
| 96 |
)
|
|
|
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
import torch
|
| 3 |
from transformers import GPT2LMHeadModel, GPT2Tokenizer
|
|
|
|
| 6 |
model = GPT2LMHeadModel.from_pretrained("gpt2")
|
| 7 |
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
|
| 8 |
|
| 9 |
+
def get_next_token_probs(text):
|
|
|
|
| 10 |
# Handle empty input
|
| 11 |
if not text.strip():
|
| 12 |
+
return ["No input text"] * 5
|
| 13 |
|
| 14 |
# Tokenize input
|
| 15 |
input_ids = tokenizer.encode(text, return_tensors="pt")
|
|
|
|
| 23 |
next_token_logits = logits[0, -1, :]
|
| 24 |
next_token_probs = torch.softmax(next_token_logits, dim=0)
|
| 25 |
|
| 26 |
+
# Get top-5 tokens and their probabilities
|
| 27 |
+
topk_probs, topk_indices = torch.topk(next_token_probs, 5)
|
| 28 |
topk_tokens = [tokenizer.decode([idx]) for idx in topk_indices]
|
| 29 |
|
| 30 |
# Format the results as strings
|
|
|
|
| 32 |
for i, (token, prob) in enumerate(zip(topk_tokens, topk_probs)):
|
| 33 |
# Format probability as percentage with 1 decimal place
|
| 34 |
prob_percent = f"{prob.item()*100:.1f}%"
|
| 35 |
+
# Clean up token display (replace space with visible space symbol)
|
| 36 |
+
display_token = token.replace(" ", "␣")
|
| 37 |
# Format the output string
|
| 38 |
formatted_results.append(f"{i+1}. \"{display_token}\" ({prob_percent})")
|
| 39 |
|
| 40 |
return formatted_results
|
| 41 |
|
| 42 |
+
# Create minimal interface with simpler components
|
| 43 |
+
with gr.Blocks(css="footer {display: none}") as demo:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
gr.Markdown("### GPT-2 Next Token Predictor")
|
| 45 |
|
| 46 |
# Input textbox
|
|
|
|
| 50 |
value="The weather tomorrow will be"
|
| 51 |
)
|
| 52 |
|
| 53 |
+
# Simple header for results
|
| 54 |
+
gr.Markdown("##### Most likely next tokens:")
|
| 55 |
+
|
| 56 |
+
# Individual output textboxes for each token
|
| 57 |
+
token1 = gr.Markdown()
|
| 58 |
+
token2 = gr.Markdown()
|
| 59 |
+
token3 = gr.Markdown()
|
| 60 |
+
token4 = gr.Markdown()
|
| 61 |
+
token5 = gr.Markdown()
|
| 62 |
|
| 63 |
+
token_outputs = [token1, token2, token3, token4, token5]
|
|
|
|
|
|
|
| 64 |
|
| 65 |
# Set up the live update
|
| 66 |
input_text.change(
|
| 67 |
+
fn=get_next_token_probs,
|
| 68 |
inputs=input_text,
|
| 69 |
outputs=token_outputs
|
| 70 |
)
|
| 71 |
|
| 72 |
# Initialize with default text
|
| 73 |
demo.load(
|
| 74 |
+
fn=get_next_token_probs,
|
| 75 |
inputs=input_text,
|
| 76 |
outputs=token_outputs
|
| 77 |
)
|