Skip to content

Commit 5f14658

Browse files
committed
fix ut and example
Signed-off-by: changwa1 <chang1.wang@intel.com>
1 parent a842ded commit 5f14658

File tree

3 files changed

+11
-5
lines changed

3 files changed

+11
-5
lines changed

examples/neural_compressor/language-modeling/run_clm.py

+2-3
Original file line numberDiff line numberDiff line change
@@ -217,9 +217,7 @@ class OptimizationArguments:
217217
)
218218
use_layer_wise: bool = field(
219219
default=False,
220-
metadata={
221-
"help": "Use layer wise to do quantization to save memory."
222-
},
220+
metadata={"help": "Use layer wise to do quantization to save memory."},
223221
)
224222
quantization_methodology: str = field(
225223
default="rtn",
@@ -673,6 +671,7 @@ def compute_metrics(eval_preds):
673671
damp_percent=optim_args.damp_percent,
674672
nsamples=optim_args.num_calibration_samples,
675673
blocksize=optim_args.gptq_block_size,
674+
tokenizer=tokenizer,
676675
**algorithm_args,
677676
)
678677
else:

optimum/intel/neural_compressor/quantization.py

+6
Original file line numberDiff line numberDiff line change
@@ -398,6 +398,12 @@ def _weight_only_quantization(
398398
if (not torch.cuda.is_available() or device_map == "cpu") and model.config.model_type == "chatglm":
399399
model = model.float()
400400

401+
from neural_compressor.torch import load_empty_model
402+
403+
model = load_empty_model(
404+
model_id,
405+
trust_remote_code=trust_remote_code,
406+
)
401407
model = convert_to_quantized_model(model, quantization_config, device=device_map)
402408
quantization_config.remove_redundant_parameters()
403409
model.config.quantization_config = quantization_config

tests/neural_compressor/test_optimization.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -489,10 +489,10 @@ def test_weight_only_quantization(self, methodology, bits):
489489
batch_size=5,
490490
seq_len=32,
491491
block_size=16,
492-
user_layer_wise=True,
492+
use_layer_wise=True,
493493
)
494494
else:
495-
quantization_config = RtnConfig(bits=bits, group_size=8)
495+
quantization_config = RtnConfig(bits=bits, group_size=8, use_layer_wise=True)
496496

497497
tokenizer = AutoTokenizer.from_pretrained(model_name)
498498
tokenizer.add_special_tokens({"pad_token": "[PAD]"})
@@ -504,6 +504,7 @@ def test_weight_only_quantization(self, methodology, bits):
504504
with torch.no_grad():
505505
quantizer_outputs = quantized_model(**tokens)
506506
quantized_model.save_pretrained(tmp_dir)
507+
507508
loaded_model = INCModelForCausalLM.from_pretrained(tmp_dir)
508509
with torch.no_grad():
509510
loaded_outputs = loaded_model(**tokens)

0 commit comments

Comments
 (0)