@@ -98,21 +98,24 @@ def post_init(self):
98
98
99
99
100
100
DEFAULT_4BIT_CONFIGS = {
101
- "dolly-v2-3b" : {"mode" : nncf .CompressWeightsMode .INT4_ASYM , "group_size" : 32 , "ratio" : 0.5 },
102
- "gpt-j-6b" : {"mode" : nncf .CompressWeightsMode .INT4_ASYM , "group_size" : 64 },
103
- "opt-6.7b" : {"mode" : nncf .CompressWeightsMode .INT4_ASYM , "group_size" : 64 , "ratio" : 0.8 },
104
- "bloomz-7b1" : {"mode" : nncf .CompressWeightsMode .INT4_ASYM , "group_size" : 32 , "ratio" : 0.6 },
105
- "red-pajama-incite-7b-instruct " : {"mode" : nncf .CompressWeightsMode .INT4_ASYM , "group_size" : 128 },
106
- "zephyr-7b-beta" : {"mode" : nncf .CompressWeightsMode .INT4_SYM , "group_size" : 64 , "ratio" : 0.6 },
107
- "llama-2-7b" : {"mode" : nncf .CompressWeightsMode .INT4_SYM , "group_size" : 128 , "ratio" : 0.6 },
108
- "llama-2-7b-chat" : {"mode" : nncf .CompressWeightsMode .INT4_SYM , "group_size" : 128 , "ratio" : 0.8 },
109
- "llama-2-13b-chat" : {"mode" : nncf .CompressWeightsMode .INT4_SYM , "group_size" : 64 , "ratio" : 0.8 },
110
- "stablelm-3b-4e1t" : {"mode" : nncf .CompressWeightsMode .INT4_SYM , "group_size" : 64 , "ratio" : 0.8 },
101
+ "databricks/ dolly-v2-3b" : {"mode" : nncf .CompressWeightsMode .INT4_ASYM , "group_size" : 32 , "ratio" : 0.5 },
102
+ "EleutherAI/ gpt-j-6b" : {"mode" : nncf .CompressWeightsMode .INT4_ASYM , "group_size" : 64 },
103
+ "facebook/ opt-6.7b" : {"mode" : nncf .CompressWeightsMode .INT4_ASYM , "group_size" : 64 , "ratio" : 0.8 },
104
+ "bigscience/ bloomz-7b1" : {"mode" : nncf .CompressWeightsMode .INT4_ASYM , "group_size" : 32 , "ratio" : 0.6 },
105
+ "togethercomputer/RedPajama-INCITE-7B-Instruct " : {"mode" : nncf .CompressWeightsMode .INT4_ASYM , "group_size" : 128 },
106
+ "HuggingFaceH4/ zephyr-7b-beta" : {"mode" : nncf .CompressWeightsMode .INT4_SYM , "group_size" : 64 , "ratio" : 0.6 },
107
+ "meta- llama/Llama -2-7b" : {"mode" : nncf .CompressWeightsMode .INT4_SYM , "group_size" : 128 , "ratio" : 0.6 },
108
+ "meta- llama/Llama -2-7b-chat" : {"mode" : nncf .CompressWeightsMode .INT4_SYM , "group_size" : 128 , "ratio" : 0.8 },
109
+ "meta- llama/Llama -2-13b-chat" : {"mode" : nncf .CompressWeightsMode .INT4_SYM , "group_size" : 64 , "ratio" : 0.8 },
110
+ "stabilityai/ stablelm-3b-4e1t" : {"mode" : nncf .CompressWeightsMode .INT4_SYM , "group_size" : 64 , "ratio" : 0.8 },
111
111
"stablelm-epoch-3b-preview" : {"mode" : nncf .CompressWeightsMode .INT4_SYM , "group_size" : 64 , "ratio" : 0.8 },
112
112
"stable-zephyr-3b-dpo" : {"mode" : nncf .CompressWeightsMode .INT4_ASYM , "group_size" : 64 , "ratio" : 0.8 },
113
- "rocket-3b" : {"mode" : nncf .CompressWeightsMode .INT4_SYM , "group_size" : 128 , "ratio" : 0.8 },
114
- "chatglm2-6b" : {"mode" : nncf .CompressWeightsMode .INT4_SYM , "group_size" : 128 , "ratio" : 0.72 },
115
- "qwen-7b-chat" : {"mode" : nncf .CompressWeightsMode .INT4_SYM , "group_size" : 128 , "ratio" : 0.6 },
113
+ "pansophic/rocket-3B" : {"mode" : nncf .CompressWeightsMode .INT4_SYM , "group_size" : 128 , "ratio" : 0.8 },
114
+ "THUDM/chatglm2-6b" : {"mode" : nncf .CompressWeightsMode .INT4_SYM , "group_size" : 128 , "ratio" : 0.72 },
115
+ "Qwen/Qwen-7B-Chat" : {"mode" : nncf .CompressWeightsMode .INT4_SYM , "group_size" : 128 , "ratio" : 0.6 },
116
+ "openlm-research/open_llama_3b" : {"mode" : nncf .CompressWeightsMode .INT4_SYM , "group_size" : 64 , "all_layers" : True },
117
+ "tiiuae/falcon-7b" : {"mode" : nncf .CompressWeightsMode .INT4_SYM , "group_size" : 64 , "all_layers" : True },
118
+ "psmathur/orca_mini_3b" : {"mode" : nncf .CompressWeightsMode .INT4_SYM , "group_size" : 64 , "all_layers" : True },
116
119
}
117
120
118
121
0 commit comments