gguf-editor

Sleeping

App Files Files Community

CISCai commited on Oct 12, 2024

Commit

5bf205a

verified ·

1 Parent(s): 335ca60

Updated with new FIM tokens

Browse files

Files changed (2) hide show

_hf_gguf.py +13 -0
app.py +10 -6

_hf_gguf.py CHANGED Viewed

@@ -128,6 +128,12 @@ standard_metadata = {
     "tokenizer.ggml.middle_token_id": (GGUFValueType.UINT32, 0),
     "tokenizer.ggml.eot_token_id": (GGUFValueType.UINT32, 0),
     "tokenizer.ggml.eom_token_id": (GGUFValueType.UINT32, 0),
     "quantize.imatrix.file": (GGUFValueType.STRING, ""),
     "quantize.imatrix.dataset": (GGUFValueType.STRING, ""),
     "quantize.imatrix.entries_count": (GGUFValueType.INT32, 0),
@@ -135,6 +141,13 @@ standard_metadata = {
 }
 gguf_scalar_size: dict[GGUFValueType, int] = {
     GGUFValueType.UINT8:   1,
     GGUFValueType.INT8:    1,

     "tokenizer.ggml.middle_token_id": (GGUFValueType.UINT32, 0),
     "tokenizer.ggml.eot_token_id": (GGUFValueType.UINT32, 0),
     "tokenizer.ggml.eom_token_id": (GGUFValueType.UINT32, 0),
+    "tokenizer.ggml.fim_pre_token_id": (GGUFValueType.UINT32, 0),
+    "tokenizer.ggml.fim_suf_token_id": (GGUFValueType.UINT32, 0),
+    "tokenizer.ggml.fim_mid_token_id": (GGUFValueType.UINT32, 0),
+    "tokenizer.ggml.fim_pad_token_id": (GGUFValueType.UINT32, 0),
+    "tokenizer.ggml.fim_rep_token_id": (GGUFValueType.UINT32, 0),
+    "tokenizer.ggml.fim_sep_token_id": (GGUFValueType.UINT32, 0),
     "quantize.imatrix.file": (GGUFValueType.STRING, ""),
     "quantize.imatrix.dataset": (GGUFValueType.STRING, ""),
     "quantize.imatrix.entries_count": (GGUFValueType.INT32, 0),
 }
+deprecated_metadata = {
+    "tokenizer.ggml.prefix_token_id",
+    "tokenizer.ggml.suffix_token_id",
+    "tokenizer.ggml.middle_token_id",
+}
 gguf_scalar_size: dict[GGUFValueType, int] = {
     GGUFValueType.UINT8:   1,
     GGUFValueType.INT8:    1,

app.py CHANGED Viewed

@@ -9,7 +9,7 @@ from typing import Annotated, Any, NamedTuple
 from urllib.parse import urlencode
 from _hf_explorer import FileExplorer
-from _hf_gguf import standard_metadata, TokenType, LlamaFileType, GGUFValueType, HuggingGGUFstream
 hfapi = HfApi()
@@ -361,7 +361,7 @@ Removing this metadata key from a model will cause `llama.cpp` to output a warni
                     example_description: dict(
                         value = """## Add missing/change incorrect tokens
-Sometimes converted models will be missing declarations of important tokens like EOT, Fill-in-Middle (prefix, suffix, middle) for various reasons.
 Other times they may have the incorrect tokens set as BOS, EOS, etc. Either way, missing or incorrectly declared tokens means inference will not work as expected.
 Token declaration is made with the metadata key(s) named "tokenizer.ggml.`token name`\_token\_id" which contains the ID (index number) of the token in the token list (`tokenizer.ggml.tokens`).
@@ -369,14 +369,14 @@ Token declaration is made with the metadata key(s) named "tokenizer.ggml.`token
 A recurring issue is misconfigured EOS/EOT/EOM tokens, the need to set each of these and what they should be will vary between models, but the effect when these are incorrect is usually the same;
 infinte generation responses, ie. inference does not know when to stop. Typically this would be because f.ex. EOS has been set to <|endoftext|> instead of <|im\_end|> (again, model specific, just an example).
-Another issue, mainly for code models, is that Fill-in-Middle tokens have not been declared (note; not all models have or use such tokens), causing sub-par results for filling in blanks in code/text.
-There are 3 main metadata keys that need to be present for this; tokenizer.ggml.`prefix`\_token\_id, `suffix` and `middle`, sometimes also EOT/EOM if it differs from EOS in this mode.
-They are usually named fim\_`something` or just `PRE`, `SUF` and `MID`, take extra care with DeepSeek-based models where prefix is (...fim...)`begin`, suffix is `hole` and middle is `end`.
                         """,
                         visible = True,
                     ),
                     example_keys: dict(
-                        value = "tokenizer.ggml.prefix_token_id",
                         info = "Select or enter any metadata key ending with _token_id",
                         visible = True,
                     ),
@@ -857,6 +857,7 @@ Any framework based on `llama-cpp-python` will let you select which chat templat
             meta_keys,
         ],
         outputs = [
             meta_types,
             btn_delete,
         ],
@@ -875,6 +876,9 @@ Any framework based on `llama-cpp-python` will let you select which chat templat
                 typ = GGUFValueType.UINT32.name
         return {
             meta_types: gr.Dropdown(
                 value = typ,
                 interactive = False if typ is not None else True,

 from urllib.parse import urlencode
 from _hf_explorer import FileExplorer
+from _hf_gguf import standard_metadata, deprecated_metadata, TokenType, LlamaFileType, GGUFValueType, HuggingGGUFstream
 hfapi = HfApi()
                     example_description: dict(
                         value = """## Add missing/change incorrect tokens
+Sometimes converted models will be missing declarations of important tokens like EOT, Fill-in-Middle (fim_pre, fim_suf, fim_mid, fim_pad, fim_rep, fim_sep) for various reasons.
 Other times they may have the incorrect tokens set as BOS, EOS, etc. Either way, missing or incorrectly declared tokens means inference will not work as expected.
 Token declaration is made with the metadata key(s) named "tokenizer.ggml.`token name`\_token\_id" which contains the ID (index number) of the token in the token list (`tokenizer.ggml.tokens`).
 A recurring issue is misconfigured EOS/EOT/EOM tokens, the need to set each of these and what they should be will vary between models, but the effect when these are incorrect is usually the same;
 infinte generation responses, ie. inference does not know when to stop. Typically this would be because f.ex. EOS has been set to <|endoftext|> instead of <|im\_end|> (again, model specific, just an example).
+Another issue, mainly for code models, is that Fill-in-Middle tokens have not been declared and not auto-detected (note; not all models have or use such tokens), causing sub-par results for filling in blanks in code/text.
+There are 3 main metadata keys that need to be present for this; tokenizer.ggml.`fim_pre`\_token\_id, `fim_suf` and `fim_mid`, and 3 auxiliary ones; `fim_pad`, `fim_rep` and `fim_sep`, sometimes also EOT/EOM if it differs from EOS in this mode.
+They are usually named fim\_`something` or just `PRE`, `SUF` and `MID`, take extra care with DeepSeek-based models where fim_pre is (...fim...)`begin`, fim_suf is `hole` and fim_mid is `end`.
                         """,
                         visible = True,
                     ),
                     example_keys: dict(
+                        value = "tokenizer.ggml.fim_pre_token_id",
                         info = "Select or enter any metadata key ending with _token_id",
                         visible = True,
                     ),
             meta_keys,
         ],
         outputs = [
+            meta_keys,
             meta_types,
             btn_delete,
         ],
                 typ = GGUFValueType.UINT32.name
         return {
+            meta_keys: gr.Dropdown(
+                info = "DEPRECATED" if key in deprecated_metadata else "Search by metadata key name",
+            ),
             meta_types: gr.Dropdown(
                 value = typ,
                 interactive = False if typ is not None else True,