Spaces:
Running
Running
Commit
·
0afb4f9
1
Parent(s):
3fff62f
trust_remote_code=True
Browse files- app.py +1 -1
- test_transformers.py +12 -0
app.py
CHANGED
|
@@ -19,7 +19,7 @@ openai_tokenizer_list = [
|
|
| 19 |
|
| 20 |
# load tokenizers
|
| 21 |
hf_tokenizers = [
|
| 22 |
-
AutoTokenizer.from_pretrained(model_name_or_id, use_fast=use_fast)
|
| 23 |
for model_name_or_id, use_fast in hf_tokenizer_list
|
| 24 |
]
|
| 25 |
|
|
|
|
| 19 |
|
| 20 |
# load tokenizers
|
| 21 |
hf_tokenizers = [
|
| 22 |
+
AutoTokenizer.from_pretrained(model_name_or_id, use_fast=use_fast, trust_remote_code=True)
|
| 23 |
for model_name_or_id, use_fast in hf_tokenizer_list
|
| 24 |
]
|
| 25 |
|
test_transformers.py
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from transformers import AutoTokenizer
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
tokenizer = AutoTokenizer.from_pretrained('tugstugi/bert-large-mongolian-cased', use_fast=False)
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
test_input = "Мөнгөө тушаачихсаныхаа дараа мэдэгдээрэй"
|
| 8 |
+
|
| 9 |
+
print("input:", test_input)
|
| 10 |
+
print("tokenizer.encode()", tokenizer.encode(test_input))
|
| 11 |
+
print("tokenizer decode", [(tokenizer.decode(token_id), token_id) for token_id in tokenizer.encode(test_input)])
|
| 12 |
+
print("tokenizer()", tokenizer(test_input))
|