Spaces:

OFA-Sys
/

OFA-Generic_Interface

Build error

OFA-Generic_Interface / fairseq /examples /quant_noise /transformer_quantization_config.yaml

init

c9bb3f2 over 3 years ago

1.05 kB

	# Copyright (c) Facebook, Inc. and its affiliates.
	#
	# This source code is licensed under the MIT license found in the
	# LICENSE file in the root directory of this source tree.

	# This file defines example configuration arguments for quantizing
	# a transformer model with product quantization

	# Number of Centroids for Product Quantization, by default 256 (byte-aligned)
	n_centroids:
	Linear:
	key: in_features
	value: {"*": 256}
	Embedding:
	key: embedding_dim
	value: {"*": 256}

	# Block Sizes for Product Quantization
	# We suggest: 8 for FFN, 4 for ATTN, 4 for embedding projections, 8 for embeddings
	block_sizes:
	Linear:
	key: fuzzy_name
	value: {fc: 8, attn: 4, emb: 4}
	Embedding:
	key: fuzzy_name
	value: {emb: 8}

	# Layers to Quantize Sequentially
	# We suggest: first FFN, then EMB, then ATTN
	layers_to_quantize:
	- decoder\\.layers\\.\d+\\.fc[12]
	- decoder\\.embed_tokens\\.embeddings\\.[012]\\.[01]
	- decoder\\.layers\\.\d+\\.self_attn\\.(k_proj\|v_proj\|q_proj\|out_proj)