Spaces:
Build error
Build error
| # Copyright (c) Facebook, Inc. and its affiliates. | |
| # | |
| # This source code is licensed under the MIT license found in the | |
| # LICENSE file in the root directory of this source tree. | |
| # This file defines example configuration arguments for quantizing | |
| # a transformer model with product quantization | |
| # Number of Centroids for Product Quantization, by default 256 (byte-aligned) | |
| n_centroids: | |
| Linear: | |
| key: in_features | |
| value: {"*": 256} | |
| Embedding: | |
| key: embedding_dim | |
| value: {"*": 256} | |
| # Block Sizes for Product Quantization | |
| # We suggest: 8 for FFN, 4 for ATTN, 4 for embedding projections, 8 for embeddings | |
| block_sizes: | |
| Linear: | |
| key: fuzzy_name | |
| value: {fc: 8, attn: 4, emb: 4} | |
| Embedding: | |
| key: fuzzy_name | |
| value: {emb: 8} | |
| # Layers to Quantize Sequentially | |
| # We suggest: first FFN, then EMB, then ATTN | |
| layers_to_quantize: | |
| - decoder\\.layers\\.\d+\\.fc[12] | |
| - decoder\\.embed_tokens\\.embeddings\\.[012]\\.[01] | |
| - decoder\\.layers\\.\d+\\.self_attn\\.(k_proj|v_proj|q_proj|out_proj) | |