Update README.md
Browse files
README.md
CHANGED
|
@@ -152,7 +152,10 @@ model = AutoModelForCausalLM.from_pretrained(
|
|
| 152 |
)
|
| 153 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
| 154 |
|
| 155 |
-
|
|
|
|
|
|
|
|
|
|
| 156 |
quant_config = AWQConfig(base_config, step="prepare")
|
| 157 |
quantize_(
|
| 158 |
model,
|
|
|
|
| 152 |
)
|
| 153 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
| 154 |
|
| 155 |
+
# Note: this is only compatible with H100
|
| 156 |
+
base_config = Int4WeightOnlyConfig(group_size=128)
|
| 157 |
+
# for A100, please use the following for base_config:
|
| 158 |
+
# base_config = Int4WeightOnlyConfig(group_size=128, int4_packing_format="tile_packed_to_4d", int4_choose_qparams_algorithm="hqq")
|
| 159 |
quant_config = AWQConfig(base_config, step="prepare")
|
| 160 |
quantize_(
|
| 161 |
model,
|