@@ -88,6 +88,13 @@ class OptimizationTest(INCTestMixin):
88
88
"hf-internal-testing/tiny-random-GPTNeoForCausalLM" ,
89
89
)
90
90
91
+ WEIGHT_ONLY_CONFIG = (
92
+ (False , "RTN" , "int4_clip" ),
93
+ (False , "GPTQ" , "int4_clip" ),
94
+ (False , "RTN" , "int8" ),
95
+ (True , "" , "" ),
96
+ )
97
+
91
98
@parameterized .expand (SUPPORTED_ARCHITECTURES_DYNAMIC )
92
99
def test_dynamic_quantization (self , task , model_name , expected_quantized_matmuls ):
93
100
quantization_config = PostTrainingQuantConfig (approach = "dynamic" )
@@ -202,59 +209,41 @@ def test_ipex_static_quantization_with_smoothquant(self, task, model_name, expec
202
209
load_ipex_model = True ,
203
210
)
204
211
212
+ @parameterized .expand (WEIGHT_ONLY_CONFIG )
205
213
@unittest .skipIf (
206
214
not is_intel_extension_for_transformers_available (), reason = "Intel-extension-for-transformers not available!"
207
215
)
208
- def test_weight_only_quantization (self ):
216
+ def test_weight_only_quantization (self , no_config , algo , weight_dtype ):
209
217
model_name = "hf-internal-testing/tiny-random-GPTNeoForCausalLM"
210
218
model = AutoModelForCausalLM .from_pretrained (model_name )
211
219
tokenizer = AutoTokenizer .from_pretrained (model_name )
212
220
tokenizer .add_special_tokens ({"pad_token" : "[PAD]" })
221
+ quantizer = INCQuantizer .from_pretrained (copy .deepcopy (model ), task = "text-generation" )
222
+ calibration_dataset = _generate_dataset (quantizer , tokenizer , num_samples = 2 )
213
223
214
224
with tempfile .TemporaryDirectory () as tmp_dir :
215
- quantizer = INCQuantizer .from_pretrained (copy .deepcopy (model ), task = "text-generation" )
216
- calibration_dataset = _generate_dataset (quantizer , tokenizer , num_samples = 2 )
217
- quantization_config = WeightOnlyQuantConfig (weight_dtype = "int8" )
218
- q_model = quantizer .quantize (
219
- quantization_config = quantization_config ,
220
- save_directory = tmp_dir ,
221
- )
222
- q_model = ITREXAutoModelForCausalLM .from_pretrained (tmp_dir )
223
- inp = torch .tensor ([calibration_dataset [0 ]["input_ids" ]])
224
- out = model (inp )[0 ]
225
- q_out = q_model (inp )[0 ]
226
- self .assertTrue (torch .all (torch .isclose (out , q_out , atol = 5e-1 )))
227
-
228
- with tempfile .TemporaryDirectory () as tmp_dir :
229
- quantizer = INCQuantizer .from_pretrained (copy .deepcopy (model ), task = "text-generation" )
230
- calibration_dataset = _generate_dataset (quantizer , tokenizer , num_samples = 2 )
231
- quantization_config = WeightOnlyQuantConfig (
232
- algorithm = "GPTQ" ,
233
- algorithm_args = {
234
- "percdamp" : 0.01 ,
235
- "act_order" : False ,
236
- "scheme" : "sym" ,
237
- },
238
- weight_dtype = "int4_clip" ,
239
- )
240
- q_model = quantizer .quantize (
241
- quantization_config = quantization_config ,
242
- calibration_dataset = calibration_dataset ,
243
- save_directory = tmp_dir ,
244
- )
245
- q_model = ITREXAutoModelForCausalLM .from_pretrained (tmp_dir )
246
- inp = torch .tensor ([calibration_dataset [0 ]["input_ids" ]])
247
- out = model (inp )[0 ]
248
- q_out = q_model (inp )[0 ]
249
- self .assertTrue (torch .all (torch .isclose (out , q_out , atol = 5e-1 )))
250
-
251
- with tempfile .TemporaryDirectory () as tmp_dir :
252
- quantizer = INCQuantizer .from_pretrained (copy .deepcopy (model ), task = "text-generation" )
253
- calibration_dataset = _generate_dataset (quantizer , tokenizer , num_samples = 2 )
254
- q_model = quantizer .quantize (
255
- weight_only = True , # use RTN quantization method and NF4 weight data type is default.
256
- save_directory = tmp_dir ,
257
- )
225
+ if not no_config :
226
+ if algo == "GPTQ" :
227
+ algorithm_args = {
228
+ "percdamp" : 0.01 ,
229
+ "act_order" : False ,
230
+ "scheme" : "sym" ,
231
+ }
232
+ quantization_config = WeightOnlyQuantConfig (
233
+ algorithm = algo ,
234
+ algorithm_args = algorithm_args if algo == "GPTQ" else None ,
235
+ weight_dtype = weight_dtype ,
236
+ )
237
+ q_model = quantizer .quantize (
238
+ quantization_config = quantization_config ,
239
+ calibration_dataset = calibration_dataset if algo == "GPTQ" else None ,
240
+ save_directory = tmp_dir ,
241
+ )
242
+ else :
243
+ q_model = quantizer .quantize (
244
+ weight_only = True , # use RTN quantization method and NF4 weight data type is default.
245
+ save_directory = tmp_dir ,
246
+ )
258
247
q_model = ITREXAutoModelForCausalLM .from_pretrained (tmp_dir )
259
248
inp = torch .tensor ([calibration_dataset [0 ]["input_ids" ]])
260
249
out = model (inp )[0 ]
0 commit comments