diff --git a/optimum/intel/openvino/modeling_decoder.py b/optimum/intel/openvino/modeling_decoder.py index e69792cec3..c818694442 100644 --- a/optimum/intel/openvino/modeling_decoder.py +++ b/optimum/intel/openvino/modeling_decoder.py @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - +import copy import logging import os from pathlib import Path @@ -24,7 +24,7 @@ from huggingface_hub.constants import HUGGINGFACE_HUB_CACHE from openvino.preprocess import PrePostProcessor from openvino.runtime import Core, Tensor, Type -from transformers import AutoModelForCausalLM, AutoTokenizer, PretrainedConfig +from transformers import AutoModelForCausalLM, PretrainedConfig from transformers.file_utils import add_start_docstrings, add_start_docstrings_to_model_forward from transformers.generation import GenerationMixin from transformers.modeling_outputs import CausalLMOutputWithPast @@ -625,9 +625,8 @@ def _from_pretrained( raise ImportError( "Quantization of the weights requires nncf, please install it with `pip install nncf`" ) - import nncf - from .quantization import _weight_only_quantization + from optimum.intel.openvino.quantization import OVQuantizer default_config = _check_default_4bit_configs(config) @@ -636,18 +635,10 @@ def _from_pretrained( f"For the given model, we recommend the following `quantization_config` : {default_config}" ) - calibration_dataset = None - if isinstance(quantization_config.dataset, str): - tokenizer = quantization_config.tokenizer or AutoTokenizer.from_pretrained(model_id) - - from optimum.gptq.data import get_dataset, prepare_dataset - - nsamples = quantization_config.num_samples or 128 - dataset = get_dataset(quantization_config.dataset, tokenizer, seqlen=32, nsamples=nsamples) - dataset = prepare_dataset(dataset) - calibration_dataset = nncf.Dataset(dataset, lambda x: causal_model.prepare_inputs(**x)) - - _weight_only_quantization(model, quantization_config, calibration_dataset) + quantizer = OVQuantizer(causal_model) + quantization_config_copy = copy.deepcopy(quantization_config) + quantization_config_copy.tokenizer = quantization_config.tokenizer or model_id + quantizer.quantize(ov_config=OVConfig(quantization_config=quantization_config_copy)) return causal_model diff --git a/optimum/intel/openvino/quantization.py b/optimum/intel/openvino/quantization.py index ddd0cdec75..5a267488c3 100644 --- a/optimum/intel/openvino/quantization.py +++ b/optimum/intel/openvino/quantization.py @@ -199,7 +199,7 @@ def from_pretrained(cls, model: PreTrainedModel, **kwargs): def quantize( self, calibration_dataset: Optional[Union[datasets.Dataset, nncf.Dataset, Iterable]] = None, - save_directory: Union[str, Path] = None, + save_directory: Optional[Union[str, Path]] = None, ov_config: OVConfig = None, file_name: Optional[str] = None, batch_size: int = 1, @@ -215,7 +215,7 @@ def quantize( calibration_dataset (`datasets.Dataset` or `nncf.Dataset` or `Iterable`, *optional*): A collection of data samples to use for quantization calibration. Is optional for weight-only quantization and is required for full quantization. - save_directory (`Union[str, Path]`): + save_directory (`Union[str, Path]`, *optional*): The directory where the quantized model should be saved. ov_config (`OVConfig`, *optional*): The configuration containing the parameters related to quantization. If not provided, 8-bit symmetric @@ -263,10 +263,6 @@ def quantize( "as an instance of `OVWeightQuantizationConfig` for weight-only compression or as an instance of `OVQuantizationConfig` for full model quantization." ) - if save_directory is None: - # TODO : can be set to self.model.config.name_or_path for OVModels when not provided - raise ValueError("`save_directory` needs to be specified") - if ov_config is None: ov_config = OVConfig() if not isinstance(ov_config, OVConfig): @@ -319,21 +315,41 @@ def quantize( def _quantize_ovbasemodel( self, ov_config: OVConfig, - save_directory: Union[str, Path], + save_directory: Union[str, Path] = None, calibration_dataset: Optional[Union[datasets.Dataset, nncf.Dataset, Iterable]] = None, batch_size: int = 1, data_collator: Optional[DataCollator] = None, remove_unused_columns: bool = True, **kwargs, ): - save_directory = Path(save_directory) - save_directory.mkdir(parents=True, exist_ok=True) + if save_directory is not None: + save_directory = Path(save_directory) + save_directory.mkdir(parents=True, exist_ok=True) quantization_config = ov_config.quantization_config if isinstance(quantization_config, OVWeightQuantizationConfig): + if calibration_dataset is None and isinstance(quantization_config.dataset, str): + from optimum.intel import OVModelForCausalLM + + if isinstance(self.model, OVModelForCausalLM): + from optimum.gptq.data import get_dataset, prepare_dataset + + tokenizer = AutoTokenizer.from_pretrained(quantization_config.tokenizer) + nsamples = quantization_config.num_samples if quantization_config.num_samples else 128 + calibration_dataset = get_dataset( + quantization_config.dataset, tokenizer, seqlen=32, nsamples=nsamples + ) + calibration_dataset = prepare_dataset(calibration_dataset) + calibration_dataset = nncf.Dataset(calibration_dataset, lambda x: self.model.prepare_inputs(**x)) + else: + raise ValueError( + f"Can't create weight compression calibration dataset from string for {type(self.model)}" + ) + _weight_only_quantization(self.model.model, quantization_config, calibration_dataset) - self.model.save_pretrained(save_directory) - ov_config.save_pretrained(save_directory) + if save_directory is not None: + self.model.save_pretrained(save_directory) + ov_config.save_pretrained(save_directory) return if not isinstance(quantization_config, OVQuantizationConfig): raise ValueError(f"Unsupported type of quantization config: {type(quantization_config)}") @@ -385,8 +401,9 @@ def _quantize_ovbasemodel( **kwargs, ) self.model.model = quantized_model - self.model.save_pretrained(save_directory) - ov_config.save_pretrained(save_directory) + if save_directory is not None: + self.model.save_pretrained(save_directory) + ov_config.save_pretrained(save_directory) def _quantize_torchmodel( self, @@ -399,6 +416,10 @@ def _quantize_torchmodel( remove_unused_columns: bool = True, **kwargs, ): + if save_directory is None: + # TODO : can be set to self.model.config.name_or_path for OVModels when not provided + raise ValueError("`save_directory` needs to be specified") + self._set_task() save_directory = Path(save_directory) save_directory.mkdir(parents=True, exist_ok=True) @@ -645,14 +666,6 @@ def _weight_only_quantization( dataset = calibration_dataset else: dataset = nncf.Dataset(calibration_dataset) - elif config.dataset is not None and isinstance(config.dataset, str): - tokenizer = AutoTokenizer.from_pretrained(config.tokenizer) - - from optimum.gptq.data import get_dataset, prepare_dataset - - nsamples = config.num_samples if config.num_samples else 128 - dataset = get_dataset(config.dataset, tokenizer, seqlen=32, nsamples=nsamples) - dataset = prepare_dataset(dataset) sensitivity_metric = None if isinstance(config.sensitivity_metric, str):