Skip to content

Commit c235ae1

Browse files
Refactor OV weight compression call inside from_pretrained (#683)
* Move calibration dataset construction to WC function * Tweak tokenizer * Removed not used import * ruff * ruff 2 * Refactor through OVQuantizer call
1 parent 4869104 commit c235ae1

File tree

2 files changed

+41
-37
lines changed

2 files changed

+41
-37
lines changed

optimum/intel/openvino/modeling_decoder.py

+7-16
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
14-
14+
import copy
1515
import logging
1616
import os
1717
import warnings
@@ -25,7 +25,7 @@
2525
from huggingface_hub.constants import HUGGINGFACE_HUB_CACHE
2626
from openvino.preprocess import PrePostProcessor
2727
from openvino.runtime import Core, Tensor, Type
28-
from transformers import AutoModelForCausalLM, AutoTokenizer, PretrainedConfig
28+
from transformers import AutoModelForCausalLM, PretrainedConfig
2929
from transformers.file_utils import add_start_docstrings, add_start_docstrings_to_model_forward
3030
from transformers.generation import GenerationMixin
3131
from transformers.modeling_outputs import CausalLMOutputWithPast
@@ -646,9 +646,8 @@ def _from_pretrained(
646646
raise ImportError(
647647
"Quantization of the weights requires nncf, please install it with `pip install nncf`"
648648
)
649-
import nncf
650649

651-
from .quantization import _weight_only_quantization
650+
from optimum.intel.openvino.quantization import OVQuantizer
652651

653652
default_config = _check_default_4bit_configs(config)
654653

@@ -657,18 +656,10 @@ def _from_pretrained(
657656
f"For the given model, we recommend the following `quantization_config` : {default_config}"
658657
)
659658

660-
calibration_dataset = None
661-
if isinstance(quantization_config.dataset, str):
662-
tokenizer = quantization_config.tokenizer or AutoTokenizer.from_pretrained(model_id)
663-
664-
from optimum.gptq.data import get_dataset, prepare_dataset
665-
666-
nsamples = quantization_config.num_samples or 128
667-
dataset = get_dataset(quantization_config.dataset, tokenizer, seqlen=32, nsamples=nsamples)
668-
dataset = prepare_dataset(dataset)
669-
calibration_dataset = nncf.Dataset(dataset, lambda x: causal_model.prepare_inputs(**x))
670-
671-
_weight_only_quantization(model, quantization_config, calibration_dataset)
659+
quantizer = OVQuantizer(causal_model)
660+
quantization_config_copy = copy.deepcopy(quantization_config)
661+
quantization_config_copy.tokenizer = quantization_config.tokenizer or model_id
662+
quantizer.quantize(ov_config=OVConfig(quantization_config=quantization_config_copy))
672663

673664
return causal_model
674665

optimum/intel/openvino/quantization.py

+34-21
Original file line numberDiff line numberDiff line change
@@ -201,7 +201,7 @@ def from_pretrained(cls, model: PreTrainedModel, **kwargs):
201201
def quantize(
202202
self,
203203
calibration_dataset: Optional[Union[datasets.Dataset, nncf.Dataset, Iterable]] = None,
204-
save_directory: Union[str, Path] = None,
204+
save_directory: Optional[Union[str, Path]] = None,
205205
ov_config: OVConfig = None,
206206
file_name: Optional[str] = None,
207207
batch_size: int = 1,
@@ -217,7 +217,7 @@ def quantize(
217217
calibration_dataset (`datasets.Dataset` or `nncf.Dataset` or `Iterable`, *optional*):
218218
A collection of data samples to use for quantization calibration. Is optional for weight-only
219219
quantization and is required for full quantization.
220-
save_directory (`Union[str, Path]`):
220+
save_directory (`Union[str, Path]`, *optional*):
221221
The directory where the quantized model should be saved.
222222
ov_config (`OVConfig`, *optional*):
223223
The configuration containing the parameters related to quantization. If not provided, 8-bit symmetric
@@ -265,10 +265,6 @@ def quantize(
265265
"as an instance of `OVWeightQuantizationConfig` for weight-only compression or as an instance of `OVQuantizationConfig` for full model quantization."
266266
)
267267

268-
if save_directory is None:
269-
# TODO : can be set to self.model.config.name_or_path for OVModels when not provided
270-
raise ValueError("`save_directory` needs to be specified")
271-
272268
if ov_config is None:
273269
ov_config = OVConfig()
274270
if not isinstance(ov_config, OVConfig):
@@ -321,21 +317,41 @@ def quantize(
321317
def _quantize_ovbasemodel(
322318
self,
323319
ov_config: OVConfig,
324-
save_directory: Union[str, Path],
320+
save_directory: Union[str, Path] = None,
325321
calibration_dataset: Optional[Union[datasets.Dataset, nncf.Dataset, Iterable]] = None,
326322
batch_size: int = 1,
327323
data_collator: Optional[DataCollator] = None,
328324
remove_unused_columns: bool = True,
329325
**kwargs,
330326
):
331-
save_directory = Path(save_directory)
332-
save_directory.mkdir(parents=True, exist_ok=True)
327+
if save_directory is not None:
328+
save_directory = Path(save_directory)
329+
save_directory.mkdir(parents=True, exist_ok=True)
333330

334331
quantization_config = ov_config.quantization_config
335332
if isinstance(quantization_config, OVWeightQuantizationConfig):
333+
if calibration_dataset is None and isinstance(quantization_config.dataset, str):
334+
from optimum.intel import OVModelForCausalLM
335+
336+
if isinstance(self.model, OVModelForCausalLM):
337+
from optimum.gptq.data import get_dataset, prepare_dataset
338+
339+
tokenizer = AutoTokenizer.from_pretrained(quantization_config.tokenizer)
340+
nsamples = quantization_config.num_samples if quantization_config.num_samples else 128
341+
calibration_dataset = get_dataset(
342+
quantization_config.dataset, tokenizer, seqlen=32, nsamples=nsamples
343+
)
344+
calibration_dataset = prepare_dataset(calibration_dataset)
345+
calibration_dataset = nncf.Dataset(calibration_dataset, lambda x: self.model.prepare_inputs(**x))
346+
else:
347+
raise ValueError(
348+
f"Can't create weight compression calibration dataset from string for {type(self.model)}"
349+
)
350+
336351
_weight_only_quantization(self.model.model, quantization_config, calibration_dataset)
337-
self.model.save_pretrained(save_directory)
338-
ov_config.save_pretrained(save_directory)
352+
if save_directory is not None:
353+
self.model.save_pretrained(save_directory)
354+
ov_config.save_pretrained(save_directory)
339355
return
340356
if not isinstance(quantization_config, OVQuantizationConfig):
341357
raise ValueError(f"Unsupported type of quantization config: {type(quantization_config)}")
@@ -387,8 +403,9 @@ def _quantize_ovbasemodel(
387403
**kwargs,
388404
)
389405
self.model.model = quantized_model
390-
self.model.save_pretrained(save_directory)
391-
ov_config.save_pretrained(save_directory)
406+
if save_directory is not None:
407+
self.model.save_pretrained(save_directory)
408+
ov_config.save_pretrained(save_directory)
392409

393410
def _quantize_torchmodel(
394411
self,
@@ -401,6 +418,10 @@ def _quantize_torchmodel(
401418
remove_unused_columns: bool = True,
402419
**kwargs,
403420
):
421+
if save_directory is None:
422+
# TODO : can be set to self.model.config.name_or_path for OVModels when not provided
423+
raise ValueError("`save_directory` needs to be specified")
424+
404425
self._set_task()
405426
save_directory = Path(save_directory)
406427
save_directory.mkdir(parents=True, exist_ok=True)
@@ -660,14 +681,6 @@ def _weight_only_quantization(
660681
dataset = calibration_dataset
661682
else:
662683
dataset = nncf.Dataset(calibration_dataset)
663-
elif config.dataset is not None and isinstance(config.dataset, str):
664-
tokenizer = AutoTokenizer.from_pretrained(config.tokenizer)
665-
666-
from optimum.gptq.data import get_dataset, prepare_dataset
667-
668-
nsamples = config.num_samples if config.num_samples else 128
669-
dataset = get_dataset(config.dataset, tokenizer, seqlen=32, nsamples=nsamples)
670-
dataset = prepare_dataset(dataset)
671684

672685
sensitivity_metric = None
673686
if isinstance(config.sensitivity_metric, str):

0 commit comments

Comments
 (0)