Fixed example and UT for weight-only quantization

PenghuiCheng · PenghuiCheng · commit e804df350442 · 2024-03-13T17:24:47.000+08:00
Signed-off-by: Cheng, Penghui &lt;penghui.cheng@intel.com&gt;
diff --git a/optimum/intel/neural_compressor/quantization.py b/optimum/intel/neural_compressor/quantization.py
@@ -230,7 +230,7 @@ def quantize(
                     batch_size=batch_size,
                     remove_unused_columns=remove_unused_columns,
                     data_collator=data_collator,
-                    use_label=False,
+                    use_label=False if "GPTQ" in algo else True,
                 )
             quantization_config.calib_dataloader = calibration_dataloader
 
diff --git a/tests/neural_compressor/test_optimization.py b/tests/neural_compressor/test_optimization.py
@@ -205,10 +205,10 @@ def test_weight_only_quantization(self):
         model = AutoModelForCausalLM.from_pretrained(model_name)
         tokenizer = AutoTokenizer.from_pretrained(model_name)
         tokenizer.add_special_tokens({"pad_token": "[PAD]"})
-        quantizer = INCQuantizer.from_pretrained(copy.deepcopy(model), task="text-generation")
         calibration_dataset = _generate_dataset(quantizer, tokenizer, num_samples=2)
 
         with tempfile.TemporaryDirectory() as tmp_dir:
+            quantizer = INCQuantizer.from_pretrained(copy.deepcopy(model), task="text-generation")
             quantization_config = WeightOnlyQuantConfig(weight_dtype="int8")
             q_model = quantizer.quantize(
                 quantization_config=quantization_config,
@@ -220,6 +220,7 @@ def test_weight_only_quantization(self):
             self.assertTrue(torch.all(torch.isclose(out, q_out, atol=5e-1)))
 
         with tempfile.TemporaryDirectory() as tmp_dir:
+            quantizer = INCQuantizer.from_pretrained(copy.deepcopy(model), task="text-generation")
             quantization_config = WeightOnlyQuantConfig(
                 algorithm="GPTQ",
                 weight_dtype="int4_clip",
@@ -235,6 +236,7 @@ def test_weight_only_quantization(self):
             self.assertTrue(torch.all(torch.isclose(out, q_out, atol=5e-1)))
 
         with tempfile.TemporaryDirectory() as tmp_dir:
+            quantizer = INCQuantizer.from_pretrained(copy.deepcopy(model), task="text-generation")
             quantization_config = WeightOnlyQuantConfig(
                 algorithm="AWQ",
                 weight_dtype="int4_clip",
@@ -250,6 +252,7 @@ def test_weight_only_quantization(self):
             self.assertTrue(torch.all(torch.isclose(out, q_out, atol=5e-1)))
 
         with tempfile.TemporaryDirectory() as tmp_dir:
+            quantizer = INCQuantizer.from_pretrained(copy.deepcopy(model), task="text-generation")
             q_model = quantizer.quantize(
                 weight_only=True,  # use RTN quantization method and NF4 weight data type is default.
                 save_directory=tmp_dir,

Original file line number	Diff line number	Diff line change
`@@ -230,7 +230,7 @@ def quantize(`
`230`	`230`	`batch_size=batch_size,`
`231`	`231`	`remove_unused_columns=remove_unused_columns,`
`232`	`232`	`data_collator=data_collator,`
`233`		`- use_label=False,`
	`233`	`+ use_label=False if "GPTQ" in algo else True,`
`234`	`234`	`)`
`235`	`235`	`quantization_config.calib_dataloader = calibration_dataloader`
`236`	`236`