| { | |
| "version": "1.0", | |
| "truncation": null, | |
| "padding": null, | |
| "added_tokens": [ | |
| { | |
| "id": 0, | |
| "content": "<unk>", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": true | |
| }, | |
| { | |
| "id": 1, | |
| "content": "<s>", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": true | |
| }, | |
| { | |
| "id": 2, | |
| "content": "</s>", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": true | |
| }, | |
| { | |
| "id": 3, | |
| "content": "UNKNOWN_0", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": true | |
| }, | |
| { | |
| "id": 4, | |
| "content": "UNKNOWN_1", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": true | |
| }, | |
| { | |
| "id": 5, | |
| "content": "UNKNOWN_2", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": true | |
| }, | |
| { | |
| "id": 6, | |
| "content": "UNKNOWN_3", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": true | |
| }, | |
| { | |
| "id": 7, | |
| "content": "UNKNOWN_4", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": true | |
| }, | |
| { | |
| "id": 8, | |
| "content": "UNKNOWN_5", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": true | |
| }, | |
| { | |
| "id": 9, | |
| "content": "UNKNOWN_6", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": true | |
| }, | |
| { | |
| "id": 10, | |
| "content": "UNKNOWN_7", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": true | |
| }, | |
| { | |
| "id": 11, | |
| "content": "UNKNOWN_8", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": true | |
| }, | |
| { | |
| "id": 12, | |
| "content": "UNKNOWN_9", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": true | |
| }, | |
| { | |
| "id": 13, | |
| "content": "UNKNOWN_10", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": true | |
| }, | |
| { | |
| "id": 14, | |
| "content": "UNKNOWN_11", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": true | |
| }, | |
| { | |
| "id": 15, | |
| "content": "UNKNOWN_12", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": true | |
| }, | |
| { | |
| "id": 16, | |
| "content": "UNKNOWN_13", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": true | |
| }, | |
| { | |
| "id": 17, | |
| "content": "UNKNOWN_14", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": true | |
| }, | |
| { | |
| "id": 18, | |
| "content": "UNKNOWN_15", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": true | |
| }, | |
| { | |
| "id": 19, | |
| "content": "UNKNOWN_16", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": true | |
| }, | |
| { | |
| "id": 20, | |
| "content": "UNKNOWN_17", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": true | |
| }, | |
| { | |
| "id": 21, | |
| "content": "UNKNOWN_18", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": true | |
| }, | |
| { | |
| "id": 22, | |
| "content": "UNKNOWN_19", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": true | |
| }, | |
| { | |
| "id": 23, | |
| "content": "UNKNOWN_20", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": true | |
| }, | |
| { | |
| "id": 24, | |
| "content": "UNKNOWN_21", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": true | |
| }, | |
| { | |
| "id": 25, | |
| "content": "UNKNOWN_22", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": true | |
| }, | |
| { | |
| "id": 26, | |
| "content": "UNKNOWN_23", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": true | |
| }, | |
| { | |
| "id": 27, | |
| "content": "UNKNOWN_24", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": true | |
| }, | |
| { | |
| "id": 28, | |
| "content": "UNKNOWN_25", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": true | |
| }, | |
| { | |
| "id": 29, | |
| "content": "UNKNOWN_26", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": true | |
| }, | |
| { | |
| "id": 30, | |
| "content": "UNKNOWN_27", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": true | |
| }, | |
| { | |
| "id": 31, | |
| "content": "UNKNOWN_28", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": true | |
| }, | |
| { | |
| "id": 32, | |
| "content": "UNKNOWN_29", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": true | |
| }, | |
| { | |
| "id": 33, | |
| "content": "UNKNOWN_30", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": true | |
| }, | |
| { | |
| "id": 34, | |
| "content": "UNKNOWN_31", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": true | |
| }, | |
| { | |
| "id": 35, | |
| "content": "UNKNOWN_32", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": true | |
| }, | |
| { | |
| "id": 36, | |
| "content": "UNKNOWN_33", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": true | |
| }, | |
| { | |
| "id": 37, | |
| "content": "UNKNOWN_34", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": true | |
| }, | |
| { | |
| "id": 38, | |
| "content": "UNKNOWN_35", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": true | |
| }, | |
| { | |
| "id": 39, | |
| "content": "UNKNOWN_36", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": true | |
| }, | |
| { | |
| "id": 40, | |
| "content": "UNKNOWN_37", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": true | |
| }, | |
| { | |
| "id": 41, | |
| "content": "UNKNOWN_38", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": true | |
| }, | |
| { | |
| "id": 42, | |
| "content": "UNKNOWN_39", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": true | |
| }, | |
| { | |
| "id": 43, | |
| "content": "UNKNOWN_40", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": true | |
| }, | |
| { | |
| "id": 44, | |
| "content": "UNKNOWN_41", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": true | |
| }, | |
| { | |
| "id": 45, | |
| "content": "UNKNOWN_42", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": true | |
| }, | |
| { | |
| "id": 46, | |
| "content": "UNKNOWN_43", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": true | |
| }, | |
| { | |
| "id": 47, | |
| "content": "UNKNOWN_44", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": true | |
| }, | |
| { | |
| "id": 48, | |
| "content": "UNKNOWN_45", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": true | |
| }, | |
| { | |
| "id": 49, | |
| "content": "UNKNOWN_46", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": true | |
| }, | |
| { | |
| "id": 50, | |
| "content": "UNKNOWN_47", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": true | |
| }, | |
| { | |
| "id": 51, | |
| "content": "UNKNOWN_48", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": true | |
| }, | |
| { | |
| "id": 52, | |
| "content": "UNKNOWN_49", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": true | |
| } | |
| ], | |
| "normalizer": { | |
| "type": "NFKC" | |
| }, | |
| "pre_tokenizer": { | |
| "type": "Metaspace", | |
| "replacement": "▁", | |
| "prepend_scheme": "always", | |
| "split": true | |
| }, | |
| "post_processor": null, | |
| "decoder": { | |
| "type": "Metaspace", | |
| "replacement": "▁", | |
| "prepend_scheme": "always", | |
| "split": true | |
| }, | |
| "model": { | |
| "type": "BPE", | |
| "dropout": null, | |
| "unk_token": "<unk>", | |
| "continuing_subword_prefix": null, | |
| "end_of_word_suffix": null, | |
| "fuse_unk": false, | |
| "byte_fallback": false, | |
| "ignore_merges": false, | |
| "vocab": { | |
| "<unk>": 0, | |
| "<s>": 1, | |
| "</s>": 2, | |
| "UNKNOWN_0": 3, | |
| "UNKNOWN_1": 4, | |
| "UNKNOWN_2": 5, | |
| "UNKNOWN_3": 6, | |
| "UNKNOWN_4": 7, | |
| "UNKNOWN_5": 8, | |
| "UNKNOWN_6": 9, | |
| "UNKNOWN_7": 10, | |
| "UNKNOWN_8": 11, | |
| "UNKNOWN_9": 12, | |
| "UNKNOWN_10": 13, | |
| "UNKNOWN_11": 14, | |
| "UNKNOWN_12": 15, | |
| "UNKNOWN_13": 16, | |
| "UNKNOWN_14": 17, | |
| "UNKNOWN_15": 18, | |
| "UNKNOWN_16": 19, | |
| "UNKNOWN_17": 20, | |
| "UNKNOWN_18": 21, | |
| "UNKNOWN_19": 22, | |
| "UNKNOWN_20": 23, | |
| "UNKNOWN_21": 24, | |
| "UNKNOWN_22": 25, | |
| "UNKNOWN_23": 26, | |
| "UNKNOWN_24": 27, | |
| "UNKNOWN_25": 28, | |
| "UNKNOWN_26": 29, | |
| "UNKNOWN_27": 30, | |
| "UNKNOWN_28": 31, | |
| "UNKNOWN_29": 32, | |
| "UNKNOWN_30": 33, | |
| "UNKNOWN_31": 34, | |
| "UNKNOWN_32": 35, | |
| "UNKNOWN_33": 36, | |
| "UNKNOWN_34": 37, | |
| "UNKNOWN_35": 38, | |
| "UNKNOWN_36": 39, | |
| "UNKNOWN_37": 40, | |
| "UNKNOWN_38": 41, | |
| "UNKNOWN_39": 42, | |
| "UNKNOWN_40": 43, | |
| "UNKNOWN_41": 44, | |
| "UNKNOWN_42": 45, | |
| "UNKNOWN_43": 46, | |
| "UNKNOWN_44": 47, | |
| "UNKNOWN_45": 48, | |
| "UNKNOWN_46": 49, | |
| "UNKNOWN_47": 50, | |
| "UNKNOWN_48": 51, | |
| "UNKNOWN_49": 52, | |
| "!": 53, | |
| "\"": 54, | |
| "#": 55, | |
| "$": 56, | |
| "%": 57, | |
| "&": 58, | |
| "'": 59, | |
| "(": 60, | |
| ")": 61, | |
| "*": 62, | |
| "+": 63, | |
| "-": 64, | |
| ".": 65, | |
| "/": 66, | |
| "0": 67, | |
| "1": 68, | |
| "2": 69, | |
| "3": 70, | |
| "4": 71, | |
| "5": 72, | |
| "6": 73, | |
| "7": 74, | |
| "8": 75, | |
| "9": 76, | |
| ":": 77, | |
| ";": 78, | |
| "<": 79, | |
| "=": 80, | |
| ">": 81, | |
| "?": 82, | |
| "@": 83, | |
| "A": 84, | |
| "B": 85, | |
| "C": 86, | |
| "D": 87, | |
| "E": 88, | |
| "F": 89, | |
| "G": 90, | |
| "H": 91, | |
| "I": 92, | |
| "J": 93, | |
| "K": 94, | |
| "L": 95, | |
| "M": 96, | |
| "N": 97, | |
| "O": 98, | |
| "P": 99, | |
| "Q": 100, | |
| "R": 101, | |
| "S": 102, | |
| "T": 103, | |
| "U": 104, | |
| "V": 105, | |
| "W": 106, | |
| "X": 107, | |
| "Y": 108, | |
| "Z": 109, | |
| "[": 110, | |
| "\\": 111, | |
| "]": 112, | |
| "^": 113, | |
| "_": 114, | |
| "`": 115, | |
| "a": 116, | |
| "b": 117, | |
| "c": 118, | |
| "d": 119, | |
| "e": 120, | |
| "f": 121, | |
| "g": 122, | |
| "h": 123, | |
| "i": 124, | |
| "j": 125, | |
| "k": 126, | |
| "l": 127, | |
| "m": 128, | |
| "n": 129, | |
| "o": 130, | |
| "p": 131, | |
| "q": 132, | |
| "r": 133, | |
| "s": 134, | |
| "t": 135, | |
| "u": 136, | |
| "v": 137, | |
| "w": 138, | |
| "x": 139, | |
| "y": 140, | |
| "z": 141, | |
| "{": 142, | |
| "|": 143, | |
| "}": 144, | |
| "~": 145, | |
| "¡": 146, | |
| "¢": 147, | |
| "£": 148, | |
| "¤": 149, | |
| "¥": 150, | |
| "¦": 151, | |
| "§": 152, | |
| "©": 153, | |
| "¬": 154, | |
| "®": 155, | |
| "°": 156, | |
| "±": 157, | |
| "¶": 158, | |
| "·": 159, | |
| "»": 160, | |
| "¿": 161, | |
| "À": 162, | |
| "Á": 163, | |
| "Â": 164, | |
| "Ã": 165, | |
| "Ä": 166, | |
| "Å": 167, | |
| "Æ": 168, | |
| "Ç": 169, | |
| "È": 170, | |
| "É": 171, | |
| "Ê": 172, | |
| "Ë": 173, | |
| "Ì": 174, | |
| "Í": 175, | |
| "Î": 176, | |
| "Ï": 177, | |
| "Ð": 178, | |
| "Ñ": 179, | |
| "Ò": 180, | |
| "Ó": 181, | |
| "Ô": 182, | |
| "Õ": 183, | |
| "Ö": 184, | |
| "×": 185, | |
| "Ø": 186, | |
| "Ù": 187, | |
| "Ú": 188, | |
| "Û": 189, | |
| "Ü": 190, | |
| "Ý": 191, | |
| "Þ": 192, | |
| "ß": 193, | |
| "à": 194, | |
| "á": 195, | |
| "â": 196, | |
| "ã": 197, | |
| "ä": 198, | |
| "å": 199, | |
| "æ": 200, | |
| "ç": 201, | |
| "è": 202, | |
| "é": 203, | |
| "ê": 204, | |
| "ë": 205, | |
| "ì": 206, | |
| "í": 207, | |
| "î": 208, | |
| "ï": 209, | |
| "ð": 210, | |
| "ñ": 211, | |
| "ò": 212, | |
| "ó": 213, | |
| "ô": 214, | |
| "õ": 215, | |
| "ö": 216, | |
| "÷": 217, | |
| "ø": 218, | |
| "ù": 219, | |
| "ú": 220, | |
| "û": 221, | |
| "ü": 222, | |
| "ý": 223, | |
| "þ": 224, | |
| "ÿ": 225, | |
| "Œ": 226, | |
| "œ": 227, | |
| "Š": 228, | |
| "š": 229, | |
| "Ÿ": 230, | |
| "Ž": 231, | |
| "ž": 232, | |
| "ƒ": 233, | |
| "ˆ": 234, | |
| "́": 235, | |
| "̃": 236, | |
| "̄": 237, | |
| "̈": 238, | |
| "̧": 239, | |
| "μ": 240, | |
| "–": 241, | |
| "—": 242, | |
| "‚": 243, | |
| "“": 244, | |
| "”": 245, | |
| "„": 246, | |
| "†": 247, | |
| "‡": 248, | |
| "•": 249, | |
| "‰": 250, | |
| "‹": 251, | |
| "›": 252, | |
| "⁄": 253, | |
| "€": 254, | |
| "▁": 255 | |
| }, | |
| "merges": [] | |
| } | |
| } |