fix(tokenizer): Sets fim and think tokens to non-special, and removes unk_token.
Browse files- tokenizer.json +5 -14
    	
        tokenizer.json
    CHANGED
    
    | @@ -3,15 +3,6 @@ | |
| 3 | 
             
              "truncation": null,
         | 
| 4 | 
             
              "padding": null,
         | 
| 5 | 
             
              "added_tokens": [
         | 
| 6 | 
            -
                {
         | 
| 7 | 
            -
                  "id": 5809,
         | 
| 8 | 
            -
                  "content": "�",
         | 
| 9 | 
            -
                  "single_word": false,
         | 
| 10 | 
            -
                  "lstrip": false,
         | 
| 11 | 
            -
                  "rstrip": false,
         | 
| 12 | 
            -
                  "normalized": false,
         | 
| 13 | 
            -
                  "special": true
         | 
| 14 | 
            -
                },
         | 
| 15 | 
             
                {
         | 
| 16 | 
             
                  "id": 100256,
         | 
| 17 | 
             
                  "content": "<|dummy_0|>",
         | 
| @@ -37,7 +28,7 @@ | |
| 37 | 
             
                  "lstrip": true,
         | 
| 38 | 
             
                  "rstrip": true,
         | 
| 39 | 
             
                  "normalized": false,
         | 
| 40 | 
            -
                  "special":  | 
| 41 | 
             
                },
         | 
| 42 | 
             
                {
         | 
| 43 | 
             
                  "id": 100259,
         | 
| @@ -46,7 +37,7 @@ | |
| 46 | 
             
                  "lstrip": true,
         | 
| 47 | 
             
                  "rstrip": true,
         | 
| 48 | 
             
                  "normalized": false,
         | 
| 49 | 
            -
                  "special":  | 
| 50 | 
             
                },
         | 
| 51 | 
             
                {
         | 
| 52 | 
             
                  "id": 100260,
         | 
| @@ -55,7 +46,7 @@ | |
| 55 | 
             
                  "lstrip": true,
         | 
| 56 | 
             
                  "rstrip": true,
         | 
| 57 | 
             
                  "normalized": false,
         | 
| 58 | 
            -
                  "special":  | 
| 59 | 
             
                },
         | 
| 60 | 
             
                {
         | 
| 61 | 
             
                  "id": 100261,
         | 
| @@ -865,7 +856,7 @@ | |
| 865 | 
             
                  "lstrip": true,
         | 
| 866 | 
             
                  "rstrip": true,
         | 
| 867 | 
             
                  "normalized": false,
         | 
| 868 | 
            -
                  "special":  | 
| 869 | 
             
                },
         | 
| 870 | 
             
                {
         | 
| 871 | 
             
                  "id": 100351,
         | 
| @@ -874,7 +865,7 @@ | |
| 874 | 
             
                  "lstrip": true,
         | 
| 875 | 
             
                  "rstrip": true,
         | 
| 876 | 
             
                  "normalized": false,
         | 
| 877 | 
            -
                  "special":  | 
| 878 | 
             
                }
         | 
| 879 | 
             
              ],
         | 
| 880 | 
             
              "normalizer": null,
         | 
|  | |
| 3 | 
             
              "truncation": null,
         | 
| 4 | 
             
              "padding": null,
         | 
| 5 | 
             
              "added_tokens": [
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 6 | 
             
                {
         | 
| 7 | 
             
                  "id": 100256,
         | 
| 8 | 
             
                  "content": "<|dummy_0|>",
         | 
|  | |
| 28 | 
             
                  "lstrip": true,
         | 
| 29 | 
             
                  "rstrip": true,
         | 
| 30 | 
             
                  "normalized": false,
         | 
| 31 | 
            +
                  "special": false
         | 
| 32 | 
             
                },
         | 
| 33 | 
             
                {
         | 
| 34 | 
             
                  "id": 100259,
         | 
|  | |
| 37 | 
             
                  "lstrip": true,
         | 
| 38 | 
             
                  "rstrip": true,
         | 
| 39 | 
             
                  "normalized": false,
         | 
| 40 | 
            +
                  "special": false
         | 
| 41 | 
             
                },
         | 
| 42 | 
             
                {
         | 
| 43 | 
             
                  "id": 100260,
         | 
|  | |
| 46 | 
             
                  "lstrip": true,
         | 
| 47 | 
             
                  "rstrip": true,
         | 
| 48 | 
             
                  "normalized": false,
         | 
| 49 | 
            +
                  "special": false
         | 
| 50 | 
             
                },
         | 
| 51 | 
             
                {
         | 
| 52 | 
             
                  "id": 100261,
         | 
|  | |
| 856 | 
             
                  "lstrip": true,
         | 
| 857 | 
             
                  "rstrip": true,
         | 
| 858 | 
             
                  "normalized": false,
         | 
| 859 | 
            +
                  "special": false
         | 
| 860 | 
             
                },
         | 
| 861 | 
             
                {
         | 
| 862 | 
             
                  "id": 100351,
         | 
|  | |
| 865 | 
             
                  "lstrip": true,
         | 
| 866 | 
             
                  "rstrip": true,
         | 
| 867 | 
             
                  "normalized": false,
         | 
| 868 | 
            +
                  "special": false
         | 
| 869 | 
             
                }
         | 
| 870 | 
             
              ],
         | 
| 871 | 
             
              "normalizer": null,
         | 

