@@ -205,10 +205,10 @@ def test_weight_only_quantization(self):
205
205
model = AutoModelForCausalLM .from_pretrained (model_name )
206
206
tokenizer = AutoTokenizer .from_pretrained (model_name )
207
207
tokenizer .add_special_tokens ({"pad_token" : "[PAD]" })
208
- quantizer = INCQuantizer .from_pretrained (copy .deepcopy (model ), task = "text-generation" )
209
208
calibration_dataset = _generate_dataset (quantizer , tokenizer , num_samples = 2 )
210
209
211
210
with tempfile .TemporaryDirectory () as tmp_dir :
211
+ quantizer = INCQuantizer .from_pretrained (copy .deepcopy (model ), task = "text-generation" )
212
212
quantization_config = WeightOnlyQuantConfig (weight_dtype = "int8" )
213
213
q_model = quantizer .quantize (
214
214
quantization_config = quantization_config ,
@@ -220,6 +220,7 @@ def test_weight_only_quantization(self):
220
220
self .assertTrue (torch .all (torch .isclose (out , q_out , atol = 5e-1 )))
221
221
222
222
with tempfile .TemporaryDirectory () as tmp_dir :
223
+ quantizer = INCQuantizer .from_pretrained (copy .deepcopy (model ), task = "text-generation" )
223
224
quantization_config = WeightOnlyQuantConfig (
224
225
algorithm = "GPTQ" ,
225
226
weight_dtype = "int4_clip" ,
@@ -235,6 +236,7 @@ def test_weight_only_quantization(self):
235
236
self .assertTrue (torch .all (torch .isclose (out , q_out , atol = 5e-1 )))
236
237
237
238
with tempfile .TemporaryDirectory () as tmp_dir :
239
+ quantizer = INCQuantizer .from_pretrained (copy .deepcopy (model ), task = "text-generation" )
238
240
quantization_config = WeightOnlyQuantConfig (
239
241
algorithm = "AWQ" ,
240
242
weight_dtype = "int4_clip" ,
@@ -250,6 +252,7 @@ def test_weight_only_quantization(self):
250
252
self .assertTrue (torch .all (torch .isclose (out , q_out , atol = 5e-1 )))
251
253
252
254
with tempfile .TemporaryDirectory () as tmp_dir :
255
+ quantizer = INCQuantizer .from_pretrained (copy .deepcopy (model ), task = "text-generation" )
253
256
q_model = quantizer .quantize (
254
257
weight_only = True , # use RTN quantization method and NF4 weight data type is default.
255
258
save_directory = tmp_dir ,
0 commit comments