@@ -138,16 +138,16 @@ class OVQuantizerTest(unittest.TestCase):
138
138
OVModelForCausalLM ,
139
139
"llama" ,
140
140
dict (
141
- weight_quantization_config = dict (bits = 4 , dtype = "nf4" , group_size = 16 , weight_only = True ),
141
+ weight_quantization_config = dict (bits = 4 , dtype = "nf4" , group_size = 16 , weight_only = True , ratio = 0.5 ),
142
142
full_quantization_config = dict (dtype = "f8e4m3" , weight_only = False ),
143
143
dataset = "wikitext2" ,
144
144
num_samples = 1 ,
145
145
),
146
146
[
147
- 13 ,
147
+ 14 ,
148
148
],
149
149
[
150
- {"int8 " : 4 , "nf4" : 14 },
150
+ {"f8e4m3 " : 11 , "nf4" : 5 },
151
151
],
152
152
),
153
153
(
@@ -158,6 +158,7 @@ class OVQuantizerTest(unittest.TestCase):
158
158
bits = 4 ,
159
159
dtype = "nf4" ,
160
160
group_size = 16 ,
161
+ ratio = 0.5 ,
161
162
ignored_scope = {"patterns" : ["^__module.model.layers.0.self_attn" ]},
162
163
),
163
164
full_quantization_config = OVQuantizationConfig (
@@ -171,23 +172,64 @@ class OVQuantizerTest(unittest.TestCase):
171
172
7 ,
172
173
],
173
174
[
174
- {"int8" : 4 , " f8e4m3" : 4 , "nf4" : 6 },
175
+ {"f8e4m3" : 8 , "nf4" : 2 },
175
176
],
176
177
),
177
178
(
178
179
OVModelForCausalLM ,
179
180
"llama" ,
180
181
OVMixedQuantizationConfig (
181
- weight_quantization_config = OVWeightQuantizationConfig (bits = 4 , group_size = 16 ),
182
+ weight_quantization_config = OVWeightQuantizationConfig (
183
+ bits = 4 ,
184
+ dtype = "nf4" ,
185
+ group_size = 16 ,
186
+ ratio = 0.5 ,
187
+ ignored_scope = {"patterns" : ["^__module.model.layers.0.self_attn" ]},
188
+ ),
189
+ full_quantization_config = OVQuantizationConfig (
190
+ dtype = "f8e5m2" , ignored_scope = {"patterns" : ["^__module.model.layers.0.mlp" ]}
191
+ ),
192
+ ignored_scope = {"patterns" : ["^__module.model.layers.1.self_attn" ]},
193
+ dataset = "wikitext2" ,
194
+ num_samples = 1 ,
195
+ ),
196
+ [
197
+ 7 ,
198
+ ],
199
+ [
200
+ {"f8e5m2" : 8 , "nf4" : 2 },
201
+ ],
202
+ ),
203
+ (
204
+ OVModelForCausalLM ,
205
+ "llama" ,
206
+ OVMixedQuantizationConfig (
207
+ weight_quantization_config = OVWeightQuantizationConfig (bits = 4 , group_size = 16 , ratio = 0.5 ),
182
208
full_quantization_config = OVQuantizationConfig (dtype = "f8e4m3" ),
183
209
dataset = "wikitext2" ,
184
210
num_samples = 1 ,
185
211
),
212
+ [
213
+ 14 ,
214
+ ],
215
+ [
216
+ {"f8e4m3" : 11 , "int4" : 10 },
217
+ ],
218
+ ),
219
+ (
220
+ OVModelForCausalLM ,
221
+ "llama" ,
222
+ OVMixedQuantizationConfig (
223
+ weight_quantization_config = OVWeightQuantizationConfig (bits = 4 , group_size = 16 ),
224
+ full_quantization_config = OVQuantizationConfig (dtype = "f8e5m2" ),
225
+ dataset = "wikitext2" ,
226
+ num_samples = 1 ,
227
+ ),
186
228
[
187
229
13 ,
188
230
],
189
231
[
190
- {"int8 " : 4 , "int4" : 28 },
232
+ {"f8e5m2 " : 2 , "int4" : 28 },
191
233
],
192
234
),
193
235
]
0 commit comments