deepseek-ai
/

deepseek-coder-1.3b-base

Text Generation

text-generation-inference

Model card Files Files and versions

Upload tokenizer.json

#2

by jonatanklosko - opened Nov 16, 2023

base: refs/heads/main

←

from: refs/pr/2

Discussion Files changed

Files changed (1) hide show

tokenizer.json +52 -4

tokenizer.json CHANGED Viewed

@@ -254,10 +254,58 @@
     ]
   },
   "post_processor": {
-    "type": "ByteLevel",
-    "add_prefix_space": true,
-    "trim_offsets": false,
-    "use_regex": true
   },
   "decoder": {
     "type": "ByteLevel",

     ]
   },
   "post_processor": {
+    "type": "TemplateProcessing",
+    "single": [
+      {
+        "SpecialToken": {
+          "id": "<｜begin▁of▁sentence｜>",
+          "type_id": 0
+        }
+      },
+      {
+        "Sequence": {
+          "id": "A",
+          "type_id": 0
+        }
+      }
+    ],
+    "pair": [
+      {
+        "SpecialToken": {
+          "id": "<｜begin▁of▁sentence｜>",
+          "type_id": 0
+        }
+      },
+      {
+        "Sequence": {
+          "id": "A",
+          "type_id": 0
+        }
+      },
+      {
+        "SpecialToken": {
+          "id": "<｜begin▁of▁sentence｜>",
+          "type_id": 1
+        }
+      },
+      {
+        "Sequence": {
+          "id": "B",
+          "type_id": 1
+        }
+      }
+    ],
+    "special_tokens": {
+      "<｜begin▁of▁sentence｜>": {
+        "id": "<｜begin▁of▁sentence｜>",
+        "ids": [
+          32013
+        ],
+        "tokens": [
+          "<｜begin▁of▁sentence｜>"
+        ]
+      }
+    }
   },
   "decoder": {
     "type": "ByteLevel",