| { | |
| "add_prefix_space": false, | |
| "added_tokens_decoder": { | |
| "0": { | |
| "content": "<s>", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false, | |
| "special": true | |
| }, | |
| "1": { | |
| "content": "<pad>", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false, | |
| "special": true | |
| }, | |
| "2": { | |
| "content": "</s>", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false, | |
| "special": true | |
| }, | |
| "3": { | |
| "content": "<unk>", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false, | |
| "special": true | |
| }, | |
| "50264": { | |
| "content": "<mask>", | |
| "lstrip": true, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false, | |
| "special": true | |
| }, | |
| "50265": { | |
| "content": "[empty-title]", | |
| "lstrip": false, | |
| "normalized": true, | |
| "rstrip": false, | |
| "single_word": false, | |
| "special": false | |
| }, | |
| "50266": { | |
| "content": "<end-of-node>", | |
| "lstrip": false, | |
| "normalized": true, | |
| "rstrip": false, | |
| "single_word": false, | |
| "special": false | |
| } | |
| }, | |
| "auto_map": { | |
| "AutoProcessor": "processor.MarkupLMPhishProcessor" | |
| }, | |
| "bos_token": "<s>", | |
| "clean_up_tokenization_spaces": true, | |
| "cls_token": "<s>", | |
| "eos_token": "</s>", | |
| "errors": "replace", | |
| "mask_token": "<mask>", | |
| "max_depth": 50, | |
| "max_length": 512, | |
| "max_width": 1000, | |
| "model_max_length": 512, | |
| "only_label_first_subword": true, | |
| "pad_token": "<pad>", | |
| "pad_token_label": -100, | |
| "pad_width": 1001, | |
| "padding": "max_length", | |
| "processor_class": "MarkupLMPhishProcessor", | |
| "sep_token": "</s>", | |
| "tags_dict": { | |
| "a": 0, | |
| "abbr": 1, | |
| "acronym": 2, | |
| "address": 3, | |
| "altGlyph": 4, | |
| "altGlyphDef": 5, | |
| "altGlyphItem": 6, | |
| "animate": 7, | |
| "animateColor": 8, | |
| "animateMotion": 9, | |
| "animateTransform": 10, | |
| "applet": 11, | |
| "area": 12, | |
| "article": 13, | |
| "aside": 14, | |
| "audio": 15, | |
| "b": 16, | |
| "base": 17, | |
| "basefont": 18, | |
| "bdi": 19, | |
| "bdo": 20, | |
| "bgsound": 21, | |
| "big": 22, | |
| "blink": 23, | |
| "blockquote": 24, | |
| "body": 25, | |
| "br": 26, | |
| "button": 27, | |
| "canvas": 28, | |
| "caption": 29, | |
| "center": 30, | |
| "circle": 31, | |
| "cite": 32, | |
| "clipPath": 33, | |
| "code": 34, | |
| "col": 35, | |
| "colgroup": 36, | |
| "color-profile": 37, | |
| "content": 38, | |
| "cursor": 39, | |
| "data": 40, | |
| "datalist": 41, | |
| "dd": 42, | |
| "defs": 43, | |
| "del": 44, | |
| "desc": 45, | |
| "details": 46, | |
| "dfn": 47, | |
| "dialog": 48, | |
| "dir": 49, | |
| "div": 50, | |
| "dl": 51, | |
| "dt": 52, | |
| "ellipse": 53, | |
| "em": 54, | |
| "embed": 55, | |
| "feBlend": 56, | |
| "feColorMatrix": 57, | |
| "feComponentTransfer": 58, | |
| "feComposite": 59, | |
| "feConvolveMatrix": 60, | |
| "feDiffuseLighting": 61, | |
| "feDisplacementMap": 62, | |
| "feDistantLight": 63, | |
| "feFlood": 64, | |
| "feFuncA": 65, | |
| "feFuncB": 66, | |
| "feFuncG": 67, | |
| "feFuncR": 68, | |
| "feGaussianBlur": 69, | |
| "feImage": 70, | |
| "feMerge": 71, | |
| "feMergeNode": 72, | |
| "feMorphology": 73, | |
| "feOffset": 74, | |
| "fePointLight": 75, | |
| "feSpecularLighting": 76, | |
| "feSpotLight": 77, | |
| "feTile": 78, | |
| "feTurbulence": 79, | |
| "fieldset": 80, | |
| "figcaption": 81, | |
| "figure": 82, | |
| "filter": 83, | |
| "font": 89, | |
| "font-face": 88, | |
| "font-face-format": 84, | |
| "font-face-name": 85, | |
| "font-face-src": 86, | |
| "font-face-uri": 87, | |
| "footer": 90, | |
| "foreignObject": 91, | |
| "form": 92, | |
| "frame": 93, | |
| "frameset": 94, | |
| "g": 95, | |
| "glyph": 96, | |
| "glyphRef": 97, | |
| "h1": 98, | |
| "h2": 99, | |
| "h3": 100, | |
| "h4": 101, | |
| "h5": 102, | |
| "h6": 103, | |
| "head": 104, | |
| "header": 105, | |
| "hgroup": 106, | |
| "hkern": 107, | |
| "hr": 108, | |
| "html": 109, | |
| "i": 110, | |
| "iframe": 111, | |
| "image": 112, | |
| "img": 113, | |
| "input": 114, | |
| "ins": 115, | |
| "kbd": 116, | |
| "keygen": 117, | |
| "label": 118, | |
| "legend": 119, | |
| "li": 120, | |
| "line": 121, | |
| "linearGradient": 122, | |
| "link": 123, | |
| "main": 124, | |
| "map": 125, | |
| "mark": 126, | |
| "marker": 127, | |
| "marquee": 128, | |
| "mask": 129, | |
| "math": 130, | |
| "menu": 131, | |
| "menuitem": 132, | |
| "meta": 133, | |
| "metadata": 134, | |
| "meter": 135, | |
| "missing-glyph": 136, | |
| "mpath": 137, | |
| "nav": 138, | |
| "nobr": 139, | |
| "noembed": 140, | |
| "noframes": 141, | |
| "noscript": 142, | |
| "object": 143, | |
| "ol": 144, | |
| "optgroup": 145, | |
| "option": 146, | |
| "output": 147, | |
| "p": 148, | |
| "param": 149, | |
| "path": 150, | |
| "pattern": 151, | |
| "picture": 152, | |
| "plaintext": 153, | |
| "polygon": 154, | |
| "polyline": 155, | |
| "portal": 156, | |
| "pre": 157, | |
| "progress": 158, | |
| "q": 159, | |
| "radialGradient": 160, | |
| "rb": 161, | |
| "rect": 162, | |
| "rp": 163, | |
| "rt": 164, | |
| "rtc": 165, | |
| "ruby": 166, | |
| "s": 167, | |
| "samp": 168, | |
| "script": 169, | |
| "section": 170, | |
| "select": 171, | |
| "set": 172, | |
| "shadow": 173, | |
| "slot": 174, | |
| "small": 175, | |
| "source": 176, | |
| "spacer": 177, | |
| "span": 178, | |
| "stop": 179, | |
| "strike": 180, | |
| "strong": 181, | |
| "style": 182, | |
| "sub": 183, | |
| "summary": 184, | |
| "sup": 185, | |
| "svg": 186, | |
| "switch": 187, | |
| "symbol": 188, | |
| "table": 189, | |
| "tbody": 190, | |
| "td": 191, | |
| "template": 192, | |
| "text": 193, | |
| "textPath": 194, | |
| "textarea": 195, | |
| "tfoot": 196, | |
| "th": 197, | |
| "thead": 198, | |
| "time": 199, | |
| "title": 200, | |
| "tr": 201, | |
| "track": 202, | |
| "tref": 203, | |
| "tspan": 204, | |
| "tt": 205, | |
| "u": 206, | |
| "ul": 207, | |
| "use": 208, | |
| "var": 209, | |
| "video": 210, | |
| "view": 211, | |
| "vkern": 212, | |
| "wbr": 213, | |
| "xmp": 214 | |
| }, | |
| "tokenizer_class": "MarkupLMTokenizer", | |
| "trim_offsets": false, | |
| "truncation": true, | |
| "unk_token": "<unk>" | |
| } | |