Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -76,6 +76,15 @@ def stream_wp_token_ids():
|
|
| 76 |
for id in ids:
|
| 77 |
yield f":{next(color)}-background[{id}] "
|
| 78 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 79 |
|
| 80 |
### Tokenizer Descriptions ###
|
| 81 |
|
|
@@ -131,3 +140,5 @@ elif tokenizer == "Byte Pair Encoding (Open AI GPT-4o)":
|
|
| 131 |
if token_id == True:
|
| 132 |
color = itertools.cycle(colors)
|
| 133 |
st.write(stream_wp_token_ids)
|
|
|
|
|
|
|
|
|
| 76 |
for id in ids:
|
| 77 |
yield f":{next(color)}-background[{id}] "
|
| 78 |
|
| 79 |
+
def num_tokens(txt):
|
| 80 |
+
words = white_space_tokenizer(txt)
|
| 81 |
+
n_words = len(n_words) if len(n_words) else 1
|
| 82 |
+
try:
|
| 83 |
+
return f'Token count {len(ids)}, f-rate {len(ids)/n_words}'
|
| 84 |
+
except:
|
| 85 |
+
return ''
|
| 86 |
+
|
| 87 |
+
|
| 88 |
|
| 89 |
### Tokenizer Descriptions ###
|
| 90 |
|
|
|
|
| 140 |
if token_id == True:
|
| 141 |
color = itertools.cycle(colors)
|
| 142 |
st.write(stream_wp_token_ids)
|
| 143 |
+
|
| 144 |
+
st.write(num_tokens())
|