tamang0000
commited on
Commit
·
e805282
1
Parent(s):
4d1fdaf
changed words
Browse files
app.py
CHANGED
|
@@ -106,9 +106,10 @@ def generate_split_token_table(text):
|
|
| 106 |
with gr.Blocks() as sutra_token_count:
|
| 107 |
gr.Markdown(
|
| 108 |
"""
|
| 109 |
-
#
|
| 110 |
## Tokenize paragraphs in multiple languages and compare token counts.
|
| 111 |
-
Space inspired from [SUTRA](https://huggingface.co/spaces/TWO/sutra-tokenizer-comparison
|
|
|
|
| 112 |
""")
|
| 113 |
textbox = gr.Textbox(label="Input Text")
|
| 114 |
submit_button = gr.Button("Submit")
|
|
@@ -136,9 +137,10 @@ def generate_tokens_table(text):
|
|
| 136 |
with gr.Blocks() as sutra_tokenize:
|
| 137 |
gr.Markdown(
|
| 138 |
"""
|
| 139 |
-
#
|
| 140 |
## Tokenize a sentence with various tokenizers and inspect how it's broken down.
|
| 141 |
Space inspired from [SUTRA](https://huggingface.co/spaces/TWO/sutra-tokenizer-comparison)
|
|
|
|
| 142 |
""")
|
| 143 |
textbox = gr.Textbox(label="Input Text")
|
| 144 |
submit_button = gr.Button("Submit")
|
|
|
|
| 106 |
with gr.Blocks() as sutra_token_count:
|
| 107 |
gr.Markdown(
|
| 108 |
"""
|
| 109 |
+
# Assamese Tokenizer Specs & Stats.
|
| 110 |
## Tokenize paragraphs in multiple languages and compare token counts.
|
| 111 |
+
Space inspired from [SUTRA](https://huggingface.co/spaces/TWO/sutra-tokenizer-comparison
|
| 112 |
+
Number of Tokens (Lesser the better)
|
| 113 |
""")
|
| 114 |
textbox = gr.Textbox(label="Input Text")
|
| 115 |
submit_button = gr.Button("Submit")
|
|
|
|
| 137 |
with gr.Blocks() as sutra_tokenize:
|
| 138 |
gr.Markdown(
|
| 139 |
"""
|
| 140 |
+
# Assamese Tokenizer Sentence Inspector.
|
| 141 |
## Tokenize a sentence with various tokenizers and inspect how it's broken down.
|
| 142 |
Space inspired from [SUTRA](https://huggingface.co/spaces/TWO/sutra-tokenizer-comparison)
|
| 143 |
+
Number of Tokens (Lesser the better)
|
| 144 |
""")
|
| 145 |
textbox = gr.Textbox(label="Input Text")
|
| 146 |
submit_button = gr.Button("Submit")
|