Skip to content

Commit ea3f211

Browse files
Refactor through OVQuantizer call
1 parent fa4065f commit ea3f211

File tree

2 files changed

+38
-29
lines changed

2 files changed

+38
-29
lines changed

optimum/intel/openvino/modeling_decoder.py

+4-6
Original file line numberDiff line numberDiff line change
@@ -625,21 +625,19 @@ def _from_pretrained(
625625
"Quantization of the weights requires nncf, please install it with `pip install nncf`"
626626
)
627627

628-
from optimum.intel.openvino.quantization import _weight_only_quantization
628+
from optimum.intel.openvino.quantization import OVQuantizer
629629

630630
default_config = _check_default_4bit_configs(config)
631+
631632
if default_config:
632633
logger.info(
633634
f"For the given model, we recommend the following `quantization_config` : {default_config}"
634635
)
635636

637+
quantizer = OVQuantizer(causal_model)
636638
quantization_config_copy = copy.deepcopy(quantization_config)
637639
quantization_config_copy.tokenizer = quantization_config.tokenizer or model_id
638-
_weight_only_quantization(
639-
model,
640-
quantization_config_copy,
641-
transform_fn=lambda x: causal_model.prepare_inputs(**x),
642-
)
640+
quantizer.quantize(ov_config=OVConfig(quantization_config=quantization_config_copy))
643641

644642
return causal_model
645643

optimum/intel/openvino/quantization.py

+34-23
Original file line numberDiff line numberDiff line change
@@ -198,7 +198,7 @@ def from_pretrained(cls, model: PreTrainedModel, **kwargs):
198198
def quantize(
199199
self,
200200
calibration_dataset: Optional[Union[datasets.Dataset, nncf.Dataset, Iterable]] = None,
201-
save_directory: Union[str, Path] = None,
201+
save_directory: Optional[Union[str, Path]] = None,
202202
ov_config: OVConfig = None,
203203
file_name: Optional[str] = None,
204204
batch_size: int = 1,
@@ -214,7 +214,7 @@ def quantize(
214214
calibration_dataset (`datasets.Dataset` or `nncf.Dataset` or `Iterable`, *optional*):
215215
A collection of data samples to use for quantization calibration. Is optional for weight-only
216216
quantization and is required for full quantization.
217-
save_directory (`Union[str, Path]`):
217+
save_directory (`Union[str, Path]`, *optional*):
218218
The directory where the quantized model should be saved.
219219
ov_config (`OVConfig`, *optional*):
220220
The configuration containing the parameters related to quantization. If not provided, 8-bit symmetric
@@ -262,10 +262,6 @@ def quantize(
262262
"as an instance of `OVWeightQuantizationConfig` for weight-only compression or as an instance of `OVQuantizationConfig` for full model quantization."
263263
)
264264

265-
if save_directory is None:
266-
# TODO : can be set to self.model.config.name_or_path for OVModels when not provided
267-
raise ValueError("`save_directory` needs to be specified")
268-
269265
if ov_config is None:
270266
ov_config = OVConfig()
271267
if not isinstance(ov_config, OVConfig):
@@ -318,21 +314,41 @@ def quantize(
318314
def _quantize_ovbasemodel(
319315
self,
320316
ov_config: OVConfig,
321-
save_directory: Union[str, Path],
317+
save_directory: Union[str, Path] = None,
322318
calibration_dataset: Optional[Union[datasets.Dataset, nncf.Dataset, Iterable]] = None,
323319
batch_size: int = 1,
324320
data_collator: Optional[DataCollator] = None,
325321
remove_unused_columns: bool = True,
326322
**kwargs,
327323
):
328-
save_directory = Path(save_directory)
329-
save_directory.mkdir(parents=True, exist_ok=True)
324+
if save_directory is not None:
325+
save_directory = Path(save_directory)
326+
save_directory.mkdir(parents=True, exist_ok=True)
330327

331328
quantization_config = ov_config.quantization_config
332329
if isinstance(quantization_config, OVWeightQuantizationConfig):
330+
if calibration_dataset is None and isinstance(quantization_config.dataset, str):
331+
from optimum.intel import OVModelForCausalLM
332+
333+
if isinstance(self.model, OVModelForCausalLM):
334+
from optimum.gptq.data import get_dataset, prepare_dataset
335+
336+
tokenizer = AutoTokenizer.from_pretrained(quantization_config.tokenizer)
337+
nsamples = quantization_config.num_samples if quantization_config.num_samples else 128
338+
calibration_dataset = get_dataset(
339+
quantization_config.dataset, tokenizer, seqlen=32, nsamples=nsamples
340+
)
341+
calibration_dataset = prepare_dataset(calibration_dataset)
342+
calibration_dataset = nncf.Dataset(calibration_dataset, lambda x: self.model.prepare_inputs(**x))
343+
else:
344+
raise ValueError(
345+
f"Can't create weight compression calibration dataset from string for {type(self.model)}"
346+
)
347+
333348
_weight_only_quantization(self.model.model, quantization_config, calibration_dataset)
334-
self.model.save_pretrained(save_directory)
335-
ov_config.save_pretrained(save_directory)
349+
if save_directory is not None:
350+
self.model.save_pretrained(save_directory)
351+
ov_config.save_pretrained(save_directory)
336352
return
337353
if not isinstance(quantization_config, OVQuantizationConfig):
338354
raise ValueError(f"Unsupported type of quantization config: {type(quantization_config)}")
@@ -384,8 +400,9 @@ def _quantize_ovbasemodel(
384400
**kwargs,
385401
)
386402
self.model.model = quantized_model
387-
self.model.save_pretrained(save_directory)
388-
ov_config.save_pretrained(save_directory)
403+
if save_directory is not None:
404+
self.model.save_pretrained(save_directory)
405+
ov_config.save_pretrained(save_directory)
389406

390407
def _quantize_torchmodel(
391408
self,
@@ -398,6 +415,10 @@ def _quantize_torchmodel(
398415
remove_unused_columns: bool = True,
399416
**kwargs,
400417
):
418+
if save_directory is None:
419+
# TODO : can be set to self.model.config.name_or_path for OVModels when not provided
420+
raise ValueError("`save_directory` needs to be specified")
421+
401422
self._set_task()
402423
save_directory = Path(save_directory)
403424
save_directory.mkdir(parents=True, exist_ok=True)
@@ -622,7 +643,6 @@ def _weight_only_quantization(
622643
model: openvino.runtime.Model,
623644
quantization_config: Union[OVWeightQuantizationConfig, Dict],
624645
calibration_dataset: Optional[Union[nncf.Dataset, Iterable]] = None,
625-
transform_fn: Optional[Callable] = None,
626646
) -> openvino.runtime.Model:
627647
config = quantization_config
628648
if isinstance(config, dict):
@@ -645,15 +665,6 @@ def _weight_only_quantization(
645665
dataset = calibration_dataset
646666
else:
647667
dataset = nncf.Dataset(calibration_dataset)
648-
elif config.dataset is not None and isinstance(config.dataset, str):
649-
tokenizer = AutoTokenizer.from_pretrained(config.tokenizer)
650-
651-
from optimum.gptq.data import get_dataset, prepare_dataset
652-
653-
nsamples = config.num_samples if config.num_samples else 128
654-
dataset = get_dataset(config.dataset, tokenizer, seqlen=32, nsamples=nsamples)
655-
dataset = prepare_dataset(dataset)
656-
dataset = nncf.Dataset(dataset, transform_fn)
657668

658669
sensitivity_metric = None
659670
if isinstance(config.sensitivity_metric, str):

0 commit comments

Comments
 (0)