@@ -202,14 +202,43 @@ def test_ipex_static_quantization_with_smoothquant(self, task, model_name, expec
202
202
203
203
def test_weight_only_quantization (self ):
204
204
model_name = "hf-internal-testing/tiny-random-GPTNeoForCausalLM"
205
- quantization_config = WeightOnlyQuantConfig (weight_dtype = "int8" )
206
205
model = AutoModelForCausalLM .from_pretrained (model_name )
207
206
tokenizer = AutoTokenizer .from_pretrained (model_name )
208
207
tokenizer .add_special_tokens ({"pad_token" : "[PAD]" })
209
208
quantizer = INCQuantizer .from_pretrained (copy .deepcopy (model ), task = "text-generation" )
210
209
calibration_dataset = _generate_dataset (quantizer , tokenizer , num_samples = 2 )
211
210
212
211
with tempfile .TemporaryDirectory () as tmp_dir :
212
+ quantization_config = WeightOnlyQuantConfig (weight_dtype = "int8" )
213
+ q_model = quantizer .quantize (
214
+ quantization_config = quantization_config ,
215
+ save_directory = tmp_dir ,
216
+ )
217
+ inp = torch .tensor ([calibration_dataset [0 ]["input_ids" ]])
218
+ out = model (inp )[0 ]
219
+ q_out = q_model (inp )[0 ]
220
+ self .assertTrue (torch .all (torch .isclose (out , q_out , atol = 5e-1 )))
221
+
222
+ with tempfile .TemporaryDirectory () as tmp_dir :
223
+ quantization_config = WeightOnlyQuantConfig (
224
+ algorithm = "GPTQ" ,
225
+ weight_dtype = "int4_clip" ,
226
+ )
227
+ q_model = quantizer .quantize (
228
+ quantization_config = quantization_config ,
229
+ calibration_dataset = calibration_dataset ,
230
+ save_directory = tmp_dir ,
231
+ )
232
+ inp = torch .tensor ([calibration_dataset [0 ]["input_ids" ]])
233
+ out = model (inp )[0 ]
234
+ q_out = q_model (inp )[0 ]
235
+ self .assertTrue (torch .all (torch .isclose (out , q_out , atol = 5e-1 )))
236
+
237
+ with tempfile .TemporaryDirectory () as tmp_dir :
238
+ quantization_config = WeightOnlyQuantConfig (
239
+ algorithm = "AWQ" ,
240
+ weight_dtype = "int4_clip" ,
241
+ )
213
242
q_model = quantizer .quantize (
214
243
quantization_config = quantization_config ,
215
244
calibration_dataset = calibration_dataset ,
@@ -220,6 +249,16 @@ def test_weight_only_quantization(self):
220
249
q_out = q_model (inp )[0 ]
221
250
self .assertTrue (torch .all (torch .isclose (out , q_out , atol = 5e-1 )))
222
251
252
+ with tempfile .TemporaryDirectory () as tmp_dir :
253
+ q_model = quantizer .quantize (
254
+ weight_only = True , # use RTN quantization method and NF4 weight data type is default.
255
+ save_directory = tmp_dir ,
256
+ )
257
+ inp = torch .tensor ([calibration_dataset [0 ]["input_ids" ]])
258
+ out = model (inp )[0 ]
259
+ q_out = q_model (inp )[0 ]
260
+ self .assertTrue (torch .all (torch .isclose (out , q_out , atol = 5e-1 )))
261
+
223
262
def test_dynamic_accuracy_strategy_quantization (self ):
224
263
model_name = "distilbert-base-cased-distilled-squad"
225
264
model = AutoModelForQuestionAnswering .from_pretrained (model_name )
0 commit comments