Skip to content

Commit 3fddad8

Browse files
Update NNCF defaults for several models (#377)
#### Updated NNCF defaults for: - zephyr-7b-beta - llama-7b - stable-zephyr-3b-dpo - baichuan2-7b-chat - mistral-7b-v0.1 Based on the experiments from 135227
1 parent fbf0cbb commit 3fddad8

File tree

1 file changed

+7
-2
lines changed

1 file changed

+7
-2
lines changed

llm_bench/python/utils/nncf_utils.py

+7-2
Original file line numberDiff line numberDiff line change
@@ -38,15 +38,16 @@ def get_compressed_path(output_dir: str, base_precision, option: str):
3838
"opt-6.7b": {"mode": nncf.CompressWeightsMode.INT4_ASYM, "group_size": 64, "ratio": 0.8},
3939
"bloomz-7b1": {"mode": nncf.CompressWeightsMode.INT4_ASYM, "group_size": 32, "ratio": 0.6},
4040
"red-pajama-incite-7b-instruct": {"mode": nncf.CompressWeightsMode.INT4_ASYM, "group_size": 128},
41-
"zephyr-7b-beta": {"mode": nncf.CompressWeightsMode.INT4_SYM, "group_size": 64, "ratio": 0.6},
41+
"zephyr-7b-beta": {"mode": nncf.CompressWeightsMode.INT4_SYM, "group_size": 128, "ratio": 0.8,
42+
"dataset": {"name": "wikitext,wikitext-2-v1,train[:1000],text", "awq": True}},
4243
"llama-2-7b": {"mode": nncf.CompressWeightsMode.INT4_SYM, "group_size": 128, "ratio": 0.6},
4344
"llama-2-7b-chat": {"mode": nncf.CompressWeightsMode.INT4_SYM, "group_size": 128, "ratio": 0.8},
4445
"llama-2-13b-chat": {"mode": nncf.CompressWeightsMode.INT4_SYM, "group_size": 64, "ratio": 0.8},
4546
"stablelm-3b-4e1t": {"mode": nncf.CompressWeightsMode.INT4_SYM, "group_size": 64, "ratio": 0.8,
4647
"dataset": {"name": "wikitext,wikitext-2-v1,train[:1000],text", "awq": True}},
4748
"stablelm-epoch-3b-preview": {"mode": nncf.CompressWeightsMode.INT4_SYM, "group_size": 64, "ratio": 0.8,
4849
"dataset": {"name": "wikitext,wikitext-2-v1,train[:1000],text", "awq": True}},
49-
"stable-zephyr-3b-dpo": {"mode": nncf.CompressWeightsMode.INT4_ASYM, "group_size": 64, "ratio": 0.8,
50+
"stable-zephyr-3b-dpo": {"mode": nncf.CompressWeightsMode.INT4_ASYM, "group_size": 128, "ratio": 1.0,
5051
"dataset": {"name": "wikitext,wikitext-2-v1,train[:1000],text", "awq": True}},
5152
"stable-code-3b": {"mode": nncf.CompressWeightsMode.INT4_SYM, "group_size": 64, "ratio": 0.8},
5253
"rocket-3b": {"mode": nncf.CompressWeightsMode.INT4_SYM, "group_size": 128, "ratio": 0.8},
@@ -59,4 +60,8 @@ def get_compressed_path(output_dir: str, base_precision, option: str):
5960
"bloomz-560m": {"mode": nncf.CompressWeightsMode.INT4_SYM, "group_size": 64, "ratio": 0.8,
6061
"dataset": {"name": "wikitext,wikitext-2-v1,train[:1000],text", "awq": True}},
6162
"mixtral-8x7b-v0.1": {"mode": nncf.CompressWeightsMode.INT4_SYM, "group_size": 128, "ratio": 0.8},
63+
"baichuan2-7b-chat": {"mode": nncf.CompressWeightsMode.INT4_SYM, "group_size": 128, "ratio": 0.8,
64+
"dataset": {"name": "wikitext,wikitext-2-v1,train[:1000],text", "awq": True}},
65+
"mistral-7b-v0.1": {"mode": nncf.CompressWeightsMode.INT4_SYM, "group_size": 128, "ratio": 0.9},
66+
"llama-7b": {"mode": nncf.CompressWeightsMode.INT4_SYM, "group_size": 128, "ratio": 0.7},
6267
}

0 commit comments

Comments
 (0)