Spaces:
Sleeping
Sleeping
Updated with new FIM tokens
Browse files- _hf_gguf.py +13 -0
- app.py +10 -6
_hf_gguf.py
CHANGED
|
@@ -128,6 +128,12 @@ standard_metadata = {
|
|
| 128 |
"tokenizer.ggml.middle_token_id": (GGUFValueType.UINT32, 0),
|
| 129 |
"tokenizer.ggml.eot_token_id": (GGUFValueType.UINT32, 0),
|
| 130 |
"tokenizer.ggml.eom_token_id": (GGUFValueType.UINT32, 0),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 131 |
"quantize.imatrix.file": (GGUFValueType.STRING, ""),
|
| 132 |
"quantize.imatrix.dataset": (GGUFValueType.STRING, ""),
|
| 133 |
"quantize.imatrix.entries_count": (GGUFValueType.INT32, 0),
|
|
@@ -135,6 +141,13 @@ standard_metadata = {
|
|
| 135 |
}
|
| 136 |
|
| 137 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 138 |
gguf_scalar_size: dict[GGUFValueType, int] = {
|
| 139 |
GGUFValueType.UINT8: 1,
|
| 140 |
GGUFValueType.INT8: 1,
|
|
|
|
| 128 |
"tokenizer.ggml.middle_token_id": (GGUFValueType.UINT32, 0),
|
| 129 |
"tokenizer.ggml.eot_token_id": (GGUFValueType.UINT32, 0),
|
| 130 |
"tokenizer.ggml.eom_token_id": (GGUFValueType.UINT32, 0),
|
| 131 |
+
"tokenizer.ggml.fim_pre_token_id": (GGUFValueType.UINT32, 0),
|
| 132 |
+
"tokenizer.ggml.fim_suf_token_id": (GGUFValueType.UINT32, 0),
|
| 133 |
+
"tokenizer.ggml.fim_mid_token_id": (GGUFValueType.UINT32, 0),
|
| 134 |
+
"tokenizer.ggml.fim_pad_token_id": (GGUFValueType.UINT32, 0),
|
| 135 |
+
"tokenizer.ggml.fim_rep_token_id": (GGUFValueType.UINT32, 0),
|
| 136 |
+
"tokenizer.ggml.fim_sep_token_id": (GGUFValueType.UINT32, 0),
|
| 137 |
"quantize.imatrix.file": (GGUFValueType.STRING, ""),
|
| 138 |
"quantize.imatrix.dataset": (GGUFValueType.STRING, ""),
|
| 139 |
"quantize.imatrix.entries_count": (GGUFValueType.INT32, 0),
|
|
|
|
| 141 |
}
|
| 142 |
|
| 143 |
|
| 144 |
+
deprecated_metadata = {
|
| 145 |
+
"tokenizer.ggml.prefix_token_id",
|
| 146 |
+
"tokenizer.ggml.suffix_token_id",
|
| 147 |
+
"tokenizer.ggml.middle_token_id",
|
| 148 |
+
}
|
| 149 |
+
|
| 150 |
+
|
| 151 |
gguf_scalar_size: dict[GGUFValueType, int] = {
|
| 152 |
GGUFValueType.UINT8: 1,
|
| 153 |
GGUFValueType.INT8: 1,
|
app.py
CHANGED
|
@@ -9,7 +9,7 @@ from typing import Annotated, Any, NamedTuple
|
|
| 9 |
from urllib.parse import urlencode
|
| 10 |
|
| 11 |
from _hf_explorer import FileExplorer
|
| 12 |
-
from _hf_gguf import standard_metadata, TokenType, LlamaFileType, GGUFValueType, HuggingGGUFstream
|
| 13 |
|
| 14 |
|
| 15 |
hfapi = HfApi()
|
|
@@ -361,7 +361,7 @@ Removing this metadata key from a model will cause `llama.cpp` to output a warni
|
|
| 361 |
example_description: dict(
|
| 362 |
value = """## Add missing/change incorrect tokens
|
| 363 |
|
| 364 |
-
Sometimes converted models will be missing declarations of important tokens like EOT, Fill-in-Middle (
|
| 365 |
Other times they may have the incorrect tokens set as BOS, EOS, etc. Either way, missing or incorrectly declared tokens means inference will not work as expected.
|
| 366 |
|
| 367 |
Token declaration is made with the metadata key(s) named "tokenizer.ggml.`token name`\_token\_id" which contains the ID (index number) of the token in the token list (`tokenizer.ggml.tokens`).
|
|
@@ -369,14 +369,14 @@ Token declaration is made with the metadata key(s) named "tokenizer.ggml.`token
|
|
| 369 |
A recurring issue is misconfigured EOS/EOT/EOM tokens, the need to set each of these and what they should be will vary between models, but the effect when these are incorrect is usually the same;
|
| 370 |
infinte generation responses, ie. inference does not know when to stop. Typically this would be because f.ex. EOS has been set to <|endoftext|> instead of <|im\_end|> (again, model specific, just an example).
|
| 371 |
|
| 372 |
-
Another issue, mainly for code models, is that Fill-in-Middle tokens have not been declared (note; not all models have or use such tokens), causing sub-par results for filling in blanks in code/text.
|
| 373 |
-
There are 3 main metadata keys that need to be present for this; tokenizer.ggml.`
|
| 374 |
-
They are usually named fim\_`something` or just `PRE`, `SUF` and `MID`, take extra care with DeepSeek-based models where
|
| 375 |
""",
|
| 376 |
visible = True,
|
| 377 |
),
|
| 378 |
example_keys: dict(
|
| 379 |
-
value = "tokenizer.ggml.
|
| 380 |
info = "Select or enter any metadata key ending with _token_id",
|
| 381 |
visible = True,
|
| 382 |
),
|
|
@@ -857,6 +857,7 @@ Any framework based on `llama-cpp-python` will let you select which chat templat
|
|
| 857 |
meta_keys,
|
| 858 |
],
|
| 859 |
outputs = [
|
|
|
|
| 860 |
meta_types,
|
| 861 |
btn_delete,
|
| 862 |
],
|
|
@@ -875,6 +876,9 @@ Any framework based on `llama-cpp-python` will let you select which chat templat
|
|
| 875 |
typ = GGUFValueType.UINT32.name
|
| 876 |
|
| 877 |
return {
|
|
|
|
|
|
|
|
|
|
| 878 |
meta_types: gr.Dropdown(
|
| 879 |
value = typ,
|
| 880 |
interactive = False if typ is not None else True,
|
|
|
|
| 9 |
from urllib.parse import urlencode
|
| 10 |
|
| 11 |
from _hf_explorer import FileExplorer
|
| 12 |
+
from _hf_gguf import standard_metadata, deprecated_metadata, TokenType, LlamaFileType, GGUFValueType, HuggingGGUFstream
|
| 13 |
|
| 14 |
|
| 15 |
hfapi = HfApi()
|
|
|
|
| 361 |
example_description: dict(
|
| 362 |
value = """## Add missing/change incorrect tokens
|
| 363 |
|
| 364 |
+
Sometimes converted models will be missing declarations of important tokens like EOT, Fill-in-Middle (fim_pre, fim_suf, fim_mid, fim_pad, fim_rep, fim_sep) for various reasons.
|
| 365 |
Other times they may have the incorrect tokens set as BOS, EOS, etc. Either way, missing or incorrectly declared tokens means inference will not work as expected.
|
| 366 |
|
| 367 |
Token declaration is made with the metadata key(s) named "tokenizer.ggml.`token name`\_token\_id" which contains the ID (index number) of the token in the token list (`tokenizer.ggml.tokens`).
|
|
|
|
| 369 |
A recurring issue is misconfigured EOS/EOT/EOM tokens, the need to set each of these and what they should be will vary between models, but the effect when these are incorrect is usually the same;
|
| 370 |
infinte generation responses, ie. inference does not know when to stop. Typically this would be because f.ex. EOS has been set to <|endoftext|> instead of <|im\_end|> (again, model specific, just an example).
|
| 371 |
|
| 372 |
+
Another issue, mainly for code models, is that Fill-in-Middle tokens have not been declared and not auto-detected (note; not all models have or use such tokens), causing sub-par results for filling in blanks in code/text.
|
| 373 |
+
There are 3 main metadata keys that need to be present for this; tokenizer.ggml.`fim_pre`\_token\_id, `fim_suf` and `fim_mid`, and 3 auxiliary ones; `fim_pad`, `fim_rep` and `fim_sep`, sometimes also EOT/EOM if it differs from EOS in this mode.
|
| 374 |
+
They are usually named fim\_`something` or just `PRE`, `SUF` and `MID`, take extra care with DeepSeek-based models where fim_pre is (...fim...)`begin`, fim_suf is `hole` and fim_mid is `end`.
|
| 375 |
""",
|
| 376 |
visible = True,
|
| 377 |
),
|
| 378 |
example_keys: dict(
|
| 379 |
+
value = "tokenizer.ggml.fim_pre_token_id",
|
| 380 |
info = "Select or enter any metadata key ending with _token_id",
|
| 381 |
visible = True,
|
| 382 |
),
|
|
|
|
| 857 |
meta_keys,
|
| 858 |
],
|
| 859 |
outputs = [
|
| 860 |
+
meta_keys,
|
| 861 |
meta_types,
|
| 862 |
btn_delete,
|
| 863 |
],
|
|
|
|
| 876 |
typ = GGUFValueType.UINT32.name
|
| 877 |
|
| 878 |
return {
|
| 879 |
+
meta_keys: gr.Dropdown(
|
| 880 |
+
info = "DEPRECATED" if key in deprecated_metadata else "Search by metadata key name",
|
| 881 |
+
),
|
| 882 |
meta_types: gr.Dropdown(
|
| 883 |
value = typ,
|
| 884 |
interactive = False if typ is not None else True,
|