Skip to content

Commit e804df3

Browse files
committed
Fixed example and UT for weight-only quantization
Signed-off-by: Cheng, Penghui <penghui.cheng@intel.com>
1 parent 5d90b52 commit e804df3

File tree

2 files changed

+5
-2
lines changed

2 files changed

+5
-2
lines changed

optimum/intel/neural_compressor/quantization.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -230,7 +230,7 @@ def quantize(
230230
batch_size=batch_size,
231231
remove_unused_columns=remove_unused_columns,
232232
data_collator=data_collator,
233-
use_label=False,
233+
use_label=False if "GPTQ" in algo else True,
234234
)
235235
quantization_config.calib_dataloader = calibration_dataloader
236236

tests/neural_compressor/test_optimization.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -205,10 +205,10 @@ def test_weight_only_quantization(self):
205205
model = AutoModelForCausalLM.from_pretrained(model_name)
206206
tokenizer = AutoTokenizer.from_pretrained(model_name)
207207
tokenizer.add_special_tokens({"pad_token": "[PAD]"})
208-
quantizer = INCQuantizer.from_pretrained(copy.deepcopy(model), task="text-generation")
209208
calibration_dataset = _generate_dataset(quantizer, tokenizer, num_samples=2)
210209

211210
with tempfile.TemporaryDirectory() as tmp_dir:
211+
quantizer = INCQuantizer.from_pretrained(copy.deepcopy(model), task="text-generation")
212212
quantization_config = WeightOnlyQuantConfig(weight_dtype="int8")
213213
q_model = quantizer.quantize(
214214
quantization_config=quantization_config,
@@ -220,6 +220,7 @@ def test_weight_only_quantization(self):
220220
self.assertTrue(torch.all(torch.isclose(out, q_out, atol=5e-1)))
221221

222222
with tempfile.TemporaryDirectory() as tmp_dir:
223+
quantizer = INCQuantizer.from_pretrained(copy.deepcopy(model), task="text-generation")
223224
quantization_config = WeightOnlyQuantConfig(
224225
algorithm="GPTQ",
225226
weight_dtype="int4_clip",
@@ -235,6 +236,7 @@ def test_weight_only_quantization(self):
235236
self.assertTrue(torch.all(torch.isclose(out, q_out, atol=5e-1)))
236237

237238
with tempfile.TemporaryDirectory() as tmp_dir:
239+
quantizer = INCQuantizer.from_pretrained(copy.deepcopy(model), task="text-generation")
238240
quantization_config = WeightOnlyQuantConfig(
239241
algorithm="AWQ",
240242
weight_dtype="int4_clip",
@@ -250,6 +252,7 @@ def test_weight_only_quantization(self):
250252
self.assertTrue(torch.all(torch.isclose(out, q_out, atol=5e-1)))
251253

252254
with tempfile.TemporaryDirectory() as tmp_dir:
255+
quantizer = INCQuantizer.from_pretrained(copy.deepcopy(model), task="text-generation")
253256
q_model = quantizer.quantize(
254257
weight_only=True, # use RTN quantization method and NF4 weight data type is default.
255258
save_directory=tmp_dir,

0 commit comments

Comments
 (0)