From fa8c3fd8e556c00d905700212d90920eb4c65048 Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Wed, 28 Feb 2024 15:25:11 +0100 Subject: [PATCH 01/17] fix doc --- docs/source/optimization_ov.mdx | 3 --- 1 file changed, 3 deletions(-) diff --git a/docs/source/optimization_ov.mdx b/docs/source/optimization_ov.mdx index 77dab40159..5686af4bf3 100644 --- a/docs/source/optimization_ov.mdx +++ b/docs/source/optimization_ov.mdx @@ -38,8 +38,6 @@ save_dir = "ptq_model" def preprocess_function(examples, tokenizer): return tokenizer(examples["sentence"], padding="max_length", max_length=128, truncation=True) -# Load the default quantization configuration detailing the quantization we wish to apply -quantization_config = OVConfig() # Instantiate our OVQuantizer using the desired configuration quantizer = OVQuantizer.from_pretrained(model) # Create the calibration dataset used to perform static quantization @@ -52,7 +50,6 @@ calibration_dataset = quantizer.get_calibration_dataset( ) # Apply static quantization and export the resulting quantized model to OpenVINO IR format quantizer.quantize( - quantization_config=quantization_config, calibration_dataset=calibration_dataset, save_directory=save_dir, ) From 3145898b5fa4628327b813d6fca5a52aa17add5b Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Wed, 28 Feb 2024 15:25:40 +0100 Subject: [PATCH 02/17] remove default compression value --- optimum/intel/openvino/configuration.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/optimum/intel/openvino/configuration.py b/optimum/intel/openvino/configuration.py index 6611e5d0dd..7bf8955b92 100644 --- a/optimum/intel/openvino/configuration.py +++ b/optimum/intel/openvino/configuration.py @@ -113,7 +113,7 @@ def __init__( **kwargs, ): super().__init__() - self.compression = compression or DEFAULT_QUANTIZATION_CONFIG + self.compression = compression self.input_info = input_info self.save_onnx_model = save_onnx_model self._enable_standard_onnx_export_option() From dfc4893376a5c0de6b5dfec8edd6b54c57e5085e Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Wed, 28 Feb 2024 15:29:02 +0100 Subject: [PATCH 03/17] set default compression config when not provided --- optimum/intel/openvino/quantization.py | 11 +++-- optimum/intel/openvino/trainer.py | 66 ++++++++++++++------------ 2 files changed, 42 insertions(+), 35 deletions(-) diff --git a/optimum/intel/openvino/quantization.py b/optimum/intel/openvino/quantization.py index 5ec4eac556..7da0a19b5d 100644 --- a/optimum/intel/openvino/quantization.py +++ b/optimum/intel/openvino/quantization.py @@ -44,7 +44,7 @@ from ..utils.constant import _TASK_ALIASES from ..utils.import_utils import DATASETS_IMPORT_ERROR, is_datasets_available from ..utils.modeling_utils import get_model_device -from .configuration import OVConfig, OVWeightQuantizationConfig +from .configuration import OVConfig, OVWeightQuantizationConfig, DEFAULT_QUANTIZATION_CONFIG from .modeling_base import OVBaseModel from .utils import ( MAX_ONNX_OPSET, @@ -231,8 +231,11 @@ def quantize( ) ov_config = ov_config or quantization_config - if ov_config is not None and not isinstance(ov_config, OVConfig): - raise TypeError(f"`ov_config` should be an `OVConfig`, but got: {type(ov_config)} instead.") + if ov_config is not None: + if not isinstance(ov_config, OVConfig): + raise TypeError(f"`ov_config` should be an `OVConfig`, but got: {type(ov_config)} instead.") + elif not ov_config.compression: + ov_config.compression = DEFAULT_QUANTIZATION_CONFIG if isinstance(self.model, OVBaseModel): self._quantize_ovbasemodel( @@ -351,7 +354,7 @@ def _quantize_torchmodel( logger.info( "No configuration describing the quantization process was provided, a default OVConfig will be generated." ) - ov_config = OVConfig() + ov_config = OVConfig(compression=DEFAULT_QUANTIZATION_CONFIG) onnx_file_name = ( ONNX_WEIGHTS_NAME if file_name is None and ov_config.save_onnx_model diff --git a/optimum/intel/openvino/trainer.py b/optimum/intel/openvino/trainer.py index 5c7d392292..80abcf95c8 100644 --- a/optimum/intel/openvino/trainer.py +++ b/optimum/intel/openvino/trainer.py @@ -89,7 +89,7 @@ from ..utils.constant import _TASK_ALIASES from ..utils.import_utils import is_transformers_version -from .configuration import OVConfig +from .configuration import OVConfig, DEFAULT_QUANTIZATION_CONFIG from .quantization import OVDataLoader from .training_args import OVTrainingArguments from .utils import ( @@ -225,37 +225,41 @@ def __init__( self.teacher.eval() self.compression_controller = None - if self.ov_config is not None and self.args.do_train: - self._set_task() - train_dataloader = self.get_train_dataloader() - model_inputs = next(iter(train_dataloader)) - for label_name in self.label_names: - model_inputs.pop(label_name) - force_batch_one = self._is_pruning_enabled() - self.ov_config.add_input_info(model_inputs, force_batch_one) - nncf_config = NNCFConfig.from_dict(self.ov_config.__dict__) - nncf_config.register_extra_structs( - [ - QuantizationRangeInitArgs(OVDataLoader(train_dataloader)), - BNAdaptationInitArgs(OVDataLoader(train_dataloader)), - ] - ) + if self.ov_config is not None: + if not self.ov_config.compression: + self.ov_config.compression = DEFAULT_QUANTIZATION_CONFIG + + if self.args.do_train: + self._set_task() + train_dataloader = self.get_train_dataloader() + model_inputs = next(iter(train_dataloader)) + for label_name in self.label_names: + model_inputs.pop(label_name) + force_batch_one = self._is_pruning_enabled() + self.ov_config.add_input_info(model_inputs, force_batch_one) + nncf_config = NNCFConfig.from_dict(self.ov_config.__dict__) + nncf_config.register_extra_structs( + [ + QuantizationRangeInitArgs(OVDataLoader(train_dataloader)), + BNAdaptationInitArgs(OVDataLoader(train_dataloader)), + ] + ) - # Configure NNCF logging - # Disable nncf logging to stdout except error - # but to file nncf_output.log - nncf_config["log_dir"] = args.output_dir - nncf_log_file_handler = logging.logging.FileHandler(os.path.join(args.output_dir, NNCF_LOG_FILE_NAME)) - nncf_log_file_handler.setFormatter(logging.logging.Formatter("%(levelname)s:%(name)s:%(message)s")) - nncf_logger.addHandler(nncf_log_file_handler) - set_log_level(logging.ERROR) - nncf_logger.setLevel(logging.INFO) - nncf_log_file_handler.setLevel(logging.INFO) - - self.compression_controller, self.model = create_compressed_model(self.model, nncf_config) - self.model_wrapped = self.model - # TODO : To deprecate once support transformers > 4.30.0 - self.deepspeed = None + # Configure NNCF logging + # Disable nncf logging to stdout except error + # but to file nncf_output.log + nncf_config["log_dir"] = args.output_dir + nncf_log_file_handler = logging.logging.FileHandler(os.path.join(args.output_dir, NNCF_LOG_FILE_NAME)) + nncf_log_file_handler.setFormatter(logging.logging.Formatter("%(levelname)s:%(name)s:%(message)s")) + nncf_logger.addHandler(nncf_log_file_handler) + set_log_level(logging.ERROR) + nncf_logger.setLevel(logging.INFO) + nncf_log_file_handler.setLevel(logging.INFO) + + self.compression_controller, self.model = create_compressed_model(self.model, nncf_config) + self.model_wrapped = self.model + # TODO : To deprecate once support transformers > 4.30.0 + self.deepspeed = None def _set_signature_columns_if_needed(self): if self._signature_columns is None: From db22a525e4e49f4e4eacdc716ae9572712cacfe8 Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Wed, 28 Feb 2024 15:59:10 +0100 Subject: [PATCH 04/17] save openvino config to include quantization configuration --- optimum/intel/openvino/modeling_base.py | 28 ++++++++++++++++--- .../intel/openvino/modeling_base_seq2seq.py | 14 ++++++++-- optimum/intel/openvino/modeling_decoder.py | 17 ++++++----- optimum/intel/openvino/modeling_diffusion.py | 15 ++++++---- 4 files changed, 53 insertions(+), 21 deletions(-) diff --git a/optimum/intel/openvino/modeling_base.py b/optimum/intel/openvino/modeling_base.py index 51633b0210..2260277680 100644 --- a/optimum/intel/openvino/modeling_base.py +++ b/optimum/intel/openvino/modeling_base.py @@ -57,6 +57,7 @@ def __init__( dynamic_shapes: bool = True, ov_config: Optional[Dict[str, str]] = None, model_save_dir: Optional[Union[str, Path, TemporaryDirectory]] = None, + quantization_config: Optional[Union[OVWeightQuantizationConfig, Dict]] = None, **kwargs, ): self.config = config @@ -91,6 +92,10 @@ def __init__( self.generation_config = GenerationConfig.from_model_config(config) if self.can_generate() else None + self._openvino_config = None + if quantization_config: + self._openvino_config = OVConfig(quantization_config=quantization_config) + @staticmethod def load_model(file_name: Union[str, Path], quantization_config: Union[OVWeightQuantizationConfig, Dict] = None): """ @@ -143,6 +148,14 @@ def _save_pretrained(self, save_directory: Union[str, Path]): dst_path = os.path.join(save_directory, OV_XML_FILE_NAME) openvino.save_model(self.model, dst_path, compress_to_fp16=False) + self._save_openvino_config(save_directory) + + + def _save_openvino_config(self, save_directory: Union[str, Path]): + if self._openvino_config is not None: + self._openvino_config.save_pretrained(save_directory) + + @classmethod def _from_pretrained( cls, @@ -203,12 +216,19 @@ def _from_pretrained( local_files_only=local_files_only, ) - # Give default quantization config if not provided and load_in_8bit=True - if load_in_8bit: - quantization_config = quantization_config or {"bits": 8} + quantization_config = self._prepare_quantization_config(quantization_config, load_in_8bit) model = cls.load_model(model_cache_path, quantization_config=quantization_config) - return cls(model, config=config, model_save_dir=model_cache_path.parent, **kwargs) + return cls(model, config=config, model_save_dir=model_cache_path.parent, quantization_config=quantization_config, **kwargs) + + def _prepare_quantization_config(quantization_config : Optional[Union[OVWeightQuantizationConfig, Dict]] = None, load_in_8bit:bool= False): + # Give default quantization config if not provided and load_in_8bit=True + if not quantization_config and load_in_8bit: + quantization_config = OVWeightQuantizationConfig(bits=8) + elif isinstance(quantization_config, dict): + quantization_config = OVWeightQuantizationConfig.from_dict(quantization_config) + + return quantization_config @staticmethod def _cached_file( diff --git a/optimum/intel/openvino/modeling_base_seq2seq.py b/optimum/intel/openvino/modeling_base_seq2seq.py index df9449b0b5..bc51d1b64d 100644 --- a/optimum/intel/openvino/modeling_base_seq2seq.py +++ b/optimum/intel/openvino/modeling_base_seq2seq.py @@ -58,6 +58,7 @@ def __init__( dynamic_shapes: bool = True, ov_config: Optional[Dict[str, str]] = None, model_save_dir: Optional[Union[str, Path, TemporaryDirectory]] = None, + quantization_config: Union[OVWeightQuantizationConfig, Dict] = None, **kwargs, ): self.config = config @@ -76,6 +77,12 @@ def __init__( self.decoder_model = decoder self.decoder_with_past_model = decoder_with_past self.generation_config = GenerationConfig.from_model_config(config) if self.can_generate() else None + if quantization_config: + self.ov_config["quantization_config"] = quantization_config + + self._openvino_config = None + if quantization_config: + self._openvino_config = OVConfig(quantization_config=quantization_config) def _save_pretrained(self, save_directory: Union[str, Path]): """ @@ -96,6 +103,8 @@ def _save_pretrained(self, save_directory: Union[str, Path]): dst_path = os.path.join(save_directory, dst_file_name) openvino.save_model(src_file, dst_path, compress_to_fp16=False) + self._save_openvino_config(save_directory) + @classmethod def _from_pretrained( cls, @@ -155,9 +164,7 @@ def _from_pretrained( decoder_with_past_file_name = decoder_with_past_file_name or default_decoder_with_past_file_name decoder_with_past = None - # Give default quantization config if not provided and load_in_8bit=True - if load_in_8bit: - quantization_config = quantization_config or {"bits": 8} + quantization_config = self._prepare_quantization_config(quantization_config, load_in_8bit) # Load model from a local directory if os.path.isdir(model_id): @@ -205,6 +212,7 @@ def _from_pretrained( decoder_with_past=decoder_with_past, config=config, model_save_dir=model_save_dir, + quantization_config=quantization_config, **kwargs, ) diff --git a/optimum/intel/openvino/modeling_decoder.py b/optimum/intel/openvino/modeling_decoder.py index c0274d3f5b..7581c353f4 100644 --- a/optimum/intel/openvino/modeling_decoder.py +++ b/optimum/intel/openvino/modeling_decoder.py @@ -100,6 +100,7 @@ def __init__( dynamic_shapes: bool = True, ov_config: Optional[Dict[str, str]] = None, model_save_dir: Optional[Union[str, Path, TemporaryDirectory]] = None, + quantization_config: Optional[Union[OVWeightQuantizationConfig, Dict]] = None, **kwargs, ): if not dynamic_shapes: @@ -117,6 +118,7 @@ def __init__( dynamic_shapes=False, ov_config=ov_config, model_save_dir=model_save_dir, + quantization_config=quantization_config, **kwargs, ) @@ -224,6 +226,8 @@ def _save_pretrained(self, save_directory: Union[str, Path]): dst_path = os.path.join(save_directory, OV_XML_FILE_NAME) openvino.save_model(model_to_save, dst_path, compress_to_fp16=False) + self._save_openvino_config(save_directory) + @classmethod def _from_transformers( cls, @@ -576,15 +580,10 @@ def _from_pretrained( local_files_only=local_files_only, ) - # Give default quantization config if not provided and load_in_8bit=True - if load_in_8bit: - quantization_config = quantization_config or {"bits": 8} - - if isinstance(quantization_config, dict): - if quantization_config == {"bits": 4} and config.name_or_path in _DEFAULT_4BIT_CONFIGS: - quantization_config = _DEFAULT_4BIT_CONFIGS[config.name_or_path] + if isinstance(quantization_config, dict) and quantization_config == {"bits": 4}: + quantization_config = _DEFAULT_4BIT_CONFIGS.get(config.name_or_path, quantization_config) - quantization_config = OVWeightQuantizationConfig.from_dict(quantization_config) + quantization_config = self._prepare_quantization_config(quantization_config, load_in_8bit) load_in_4bit = quantization_config.bits == 4 if quantization_config else False model = cls.load_model(model_cache_path, quantization_config=None if load_in_4bit else quantization_config) @@ -603,7 +602,7 @@ def _from_pretrained( enable_compilation = kwargs.pop("compile", True) and not load_in_4bit causal_model = init_cls( - model=model, config=config, model_save_dir=model_cache_path.parent, compile=enable_compilation, **kwargs + model=model, config=config, model_save_dir=model_cache_path.parent, compile=enable_compilation, quantization_config=quantization_config, **kwargs ) if load_in_4bit: diff --git a/optimum/intel/openvino/modeling_diffusion.py b/optimum/intel/openvino/modeling_diffusion.py index 5633f852a8..7d7785dd73 100644 --- a/optimum/intel/openvino/modeling_diffusion.py +++ b/optimum/intel/openvino/modeling_diffusion.py @@ -87,6 +87,7 @@ def __init__( compile: bool = True, ov_config: Optional[Dict[str, str]] = None, model_save_dir: Optional[Union[str, Path, TemporaryDirectory]] = None, + quantization_config: Optional[Union[OVWeightQuantizationConfig, Dict]] = None, **kwargs, ): self._internal_dict = config @@ -140,6 +141,11 @@ def __init__( self._internal_dict.pop("vae", None) + self._openvino_config = None + if quantization_config: + self._openvino_config = OVConfig(quantization_config=quantization_config) + + def _save_pretrained(self, save_directory: Union[str, Path]): """ Saves the model to the OpenVINO IR format so that it can be re-loaded using the @@ -177,6 +183,8 @@ def _save_pretrained(self, save_directory: Union[str, Path]): if self.tokenizer_2 is not None: self.tokenizer_2.save_pretrained(save_directory / "tokenizer_2") + self._save_openvino_config(save_directory) + @classmethod def _from_pretrained( cls, @@ -257,10 +265,7 @@ def _from_pretrained( else: kwargs[name] = load_method(new_model_save_dir) - # Give default quantization config if not provided and load_in_8bit=True - if load_in_8bit: - quantization_config = quantization_config or {"bits": 8} - + quantization_config = self._prepare_quantization_config(quantization_config, load_in_8bit) unet = cls.load_model( new_model_save_dir / DIFFUSION_MODEL_UNET_SUBFOLDER / unet_file_name, quantization_config ) @@ -278,7 +283,7 @@ def _from_pretrained( if model_save_dir is None: model_save_dir = new_model_save_dir - return cls(unet=unet, config=config, model_save_dir=model_save_dir, **components, **kwargs) + return cls(unet=unet, config=config, model_save_dir=model_save_dir, quantization_config=quantization_config, **components, **kwargs) @classmethod def _from_transformers( From f35aa15a0e85641067b0387499fdf0566f31e1ce Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Wed, 28 Feb 2024 16:05:15 +0100 Subject: [PATCH 05/17] fix style --- optimum/intel/openvino/modeling_base.py | 17 ++++++++++++----- optimum/intel/openvino/modeling_base_seq2seq.py | 2 +- optimum/intel/openvino/modeling_decoder.py | 11 ++++++++--- optimum/intel/openvino/modeling_diffusion.py | 12 +++++++++--- optimum/intel/openvino/quantization.py | 2 +- optimum/intel/openvino/trainer.py | 2 +- 6 files changed, 32 insertions(+), 14 deletions(-) diff --git a/optimum/intel/openvino/modeling_base.py b/optimum/intel/openvino/modeling_base.py index 2260277680..fc4b3f32ea 100644 --- a/optimum/intel/openvino/modeling_base.py +++ b/optimum/intel/openvino/modeling_base.py @@ -150,12 +150,10 @@ def _save_pretrained(self, save_directory: Union[str, Path]): self._save_openvino_config(save_directory) - def _save_openvino_config(self, save_directory: Union[str, Path]): if self._openvino_config is not None: self._openvino_config.save_pretrained(save_directory) - @classmethod def _from_pretrained( cls, @@ -216,12 +214,21 @@ def _from_pretrained( local_files_only=local_files_only, ) - quantization_config = self._prepare_quantization_config(quantization_config, load_in_8bit) + quantization_config = cls._prepare_quantization_config(quantization_config, load_in_8bit) model = cls.load_model(model_cache_path, quantization_config=quantization_config) - return cls(model, config=config, model_save_dir=model_cache_path.parent, quantization_config=quantization_config, **kwargs) + return cls( + model, + config=config, + model_save_dir=model_cache_path.parent, + quantization_config=quantization_config, + **kwargs, + ) - def _prepare_quantization_config(quantization_config : Optional[Union[OVWeightQuantizationConfig, Dict]] = None, load_in_8bit:bool= False): + @staticmethod + def _prepare_quantization_config( + quantization_config: Optional[Union[OVWeightQuantizationConfig, Dict]] = None, load_in_8bit: bool = False + ): # Give default quantization config if not provided and load_in_8bit=True if not quantization_config and load_in_8bit: quantization_config = OVWeightQuantizationConfig(bits=8) diff --git a/optimum/intel/openvino/modeling_base_seq2seq.py b/optimum/intel/openvino/modeling_base_seq2seq.py index bc51d1b64d..a275e150cd 100644 --- a/optimum/intel/openvino/modeling_base_seq2seq.py +++ b/optimum/intel/openvino/modeling_base_seq2seq.py @@ -164,7 +164,7 @@ def _from_pretrained( decoder_with_past_file_name = decoder_with_past_file_name or default_decoder_with_past_file_name decoder_with_past = None - quantization_config = self._prepare_quantization_config(quantization_config, load_in_8bit) + quantization_config = cls._prepare_quantization_config(quantization_config, load_in_8bit) # Load model from a local directory if os.path.isdir(model_id): diff --git a/optimum/intel/openvino/modeling_decoder.py b/optimum/intel/openvino/modeling_decoder.py index 7581c353f4..e659a98078 100644 --- a/optimum/intel/openvino/modeling_decoder.py +++ b/optimum/intel/openvino/modeling_decoder.py @@ -581,9 +581,9 @@ def _from_pretrained( ) if isinstance(quantization_config, dict) and quantization_config == {"bits": 4}: - quantization_config = _DEFAULT_4BIT_CONFIGS.get(config.name_or_path, quantization_config) + quantization_config = _DEFAULT_4BIT_CONFIGS.get(config.name_or_path, quantization_config) - quantization_config = self._prepare_quantization_config(quantization_config, load_in_8bit) + quantization_config = cls._prepare_quantization_config(quantization_config, load_in_8bit) load_in_4bit = quantization_config.bits == 4 if quantization_config else False model = cls.load_model(model_cache_path, quantization_config=None if load_in_4bit else quantization_config) @@ -602,7 +602,12 @@ def _from_pretrained( enable_compilation = kwargs.pop("compile", True) and not load_in_4bit causal_model = init_cls( - model=model, config=config, model_save_dir=model_cache_path.parent, compile=enable_compilation, quantization_config=quantization_config, **kwargs + model=model, + config=config, + model_save_dir=model_cache_path.parent, + compile=enable_compilation, + quantization_config=quantization_config, + **kwargs, ) if load_in_4bit: diff --git a/optimum/intel/openvino/modeling_diffusion.py b/optimum/intel/openvino/modeling_diffusion.py index 7d7785dd73..5f17cc086c 100644 --- a/optimum/intel/openvino/modeling_diffusion.py +++ b/optimum/intel/openvino/modeling_diffusion.py @@ -145,7 +145,6 @@ def __init__( if quantization_config: self._openvino_config = OVConfig(quantization_config=quantization_config) - def _save_pretrained(self, save_directory: Union[str, Path]): """ Saves the model to the OpenVINO IR format so that it can be re-loaded using the @@ -265,7 +264,7 @@ def _from_pretrained( else: kwargs[name] = load_method(new_model_save_dir) - quantization_config = self._prepare_quantization_config(quantization_config, load_in_8bit) + quantization_config = cls._prepare_quantization_config(quantization_config, load_in_8bit) unet = cls.load_model( new_model_save_dir / DIFFUSION_MODEL_UNET_SUBFOLDER / unet_file_name, quantization_config ) @@ -283,7 +282,14 @@ def _from_pretrained( if model_save_dir is None: model_save_dir = new_model_save_dir - return cls(unet=unet, config=config, model_save_dir=model_save_dir, quantization_config=quantization_config, **components, **kwargs) + return cls( + unet=unet, + config=config, + model_save_dir=model_save_dir, + quantization_config=quantization_config, + **components, + **kwargs, + ) @classmethod def _from_transformers( diff --git a/optimum/intel/openvino/quantization.py b/optimum/intel/openvino/quantization.py index 7da0a19b5d..dde96e091f 100644 --- a/optimum/intel/openvino/quantization.py +++ b/optimum/intel/openvino/quantization.py @@ -44,7 +44,7 @@ from ..utils.constant import _TASK_ALIASES from ..utils.import_utils import DATASETS_IMPORT_ERROR, is_datasets_available from ..utils.modeling_utils import get_model_device -from .configuration import OVConfig, OVWeightQuantizationConfig, DEFAULT_QUANTIZATION_CONFIG +from .configuration import DEFAULT_QUANTIZATION_CONFIG, OVConfig, OVWeightQuantizationConfig from .modeling_base import OVBaseModel from .utils import ( MAX_ONNX_OPSET, diff --git a/optimum/intel/openvino/trainer.py b/optimum/intel/openvino/trainer.py index 80abcf95c8..ab1f8a84c7 100644 --- a/optimum/intel/openvino/trainer.py +++ b/optimum/intel/openvino/trainer.py @@ -89,7 +89,7 @@ from ..utils.constant import _TASK_ALIASES from ..utils.import_utils import is_transformers_version -from .configuration import OVConfig, DEFAULT_QUANTIZATION_CONFIG +from .configuration import DEFAULT_QUANTIZATION_CONFIG, OVConfig from .quantization import OVDataLoader from .training_args import OVTrainingArguments from .utils import ( From 5634c29f7b28689e2bec966a28f2c4d2e2584a18 Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Wed, 28 Feb 2024 16:59:42 +0100 Subject: [PATCH 06/17] add test --- tests/openvino/test_quantization.py | 33 ++++++++++++++++++++++------- 1 file changed, 25 insertions(+), 8 deletions(-) diff --git a/tests/openvino/test_quantization.py b/tests/openvino/test_quantization.py index 07a9f14774..aee3b31a9d 100644 --- a/tests/openvino/test_quantization.py +++ b/tests/openvino/test_quantization.py @@ -155,7 +155,7 @@ class OVWeightCompressionTest(unittest.TestCase): ) SUPPORTED_ARCHITECTURES_WITH_EXPECTED_4BIT_COMPRESSED_MATMULS = ((OVModelForCausalLM, "opt125m", 64, 365),) - SUPPORTED_ARCHITECTURES_WITH_EXPECTED_4BIT_AUTOCOMPRESSED_MATMULS = ((OVModelForCausalLM, "opt125m", 6, 379),) + SUPPORTED_ARCHITECTURES_WITH_EXPECTED_4BIT_AUTOCOMPRESSED_MATMULS = ((OVModelForCausalLM, "opt125m", 0, 388),) SUPPORTED_ARCHITECTURES_WITH_EXPECTED_4BIT_AUTO_COMPRESSED_MATMULS = ( (OVModelForCausalLM, "hf-internal-testing/tiny-random-OPTForCausalLM", 16, 136), ) @@ -232,6 +232,8 @@ class OVWeightCompressionTest(unittest.TestCase): IS_SUPPORT_STATEFUL = is_openvino_version(">=", "2023.3") + DEFAULT_INT4_CONFIG = {"bits": 4, "sym": True, "group_size": 64, "all_layers": True} + @parameterized.expand(SUPPORTED_ARCHITECTURES_WITH_EXPECTED_8BIT_COMPRESSED_MATMULS) def test_automodel_weight_compression(self, model_cls, model_name, expected_pt_int8, expected_ov_int8): task = model_cls.export_feature @@ -254,8 +256,9 @@ def test_automodel_weight_compression(self, model_cls, model_name, expected_pt_i self.assertTrue("logits" in outputs) # Verify that that the configuration is correctly saved and loaded - loaded_config = OVConfig.from_pretrained(tmp_dir) - self.assertIsNotNone(loaded_config) + openvino_config = OVConfig.from_pretrained(tmp_dir) + self.assertEqual(openvino_config.quantization_config["bits"], 8) + self.assertEqual(openvino_config.dtype, "int8") @parameterized.expand(SUPPORTED_ARCHITECTURES_WITH_EXPECTED_8BIT_COMPRESSED_MATMULS) def test_ovmodel_8bit_weight_compression(self, model_cls, model_name, expected_pt_int8, expected_ov_int8): @@ -345,13 +348,14 @@ def test_ovmodel_load_with_compressed_weights(self, model_cls, model_type): _, num_int8, _ = get_num_quantized_nodes(model) self.assertEqual(expected_ov_int8[i], num_int8) + + @parameterized.expand(SUPPORTED_ARCHITECTURES_WITH_EXPECTED_4BIT_AUTOCOMPRESSED_MATMULS) + @unittest.mock.patch.dict("optimum.intel.openvino.configuration._DEFAULT_4BIT_CONFIGS", {"facebook/opt-125m": DEFAULT_INT4_CONFIG}) def test_ovmodel_4bit_auto_compression(self, model_cls, model_type, expected_ov_int8, expected_ov_int4): with tempfile.TemporaryDirectory() as tmp_dir: model_id = MODEL_NAMES[model_type] - model = model_cls.from_pretrained( - model_id, export=True, quantization_config=OVWeightQuantizationConfig(bits=4) - ) + model = model_cls.from_pretrained(model_id, export=True, quantization_config={"bits":4}) tokenizer = AutoTokenizer.from_pretrained(model_id) if tokenizer.pad_token is None: tokenizer.pad_token = tokenizer.eos_token @@ -360,6 +364,14 @@ def test_ovmodel_4bit_auto_compression(self, model_cls, model_type, expected_ov_ self.assertEqual(expected_ov_int4, num_int4) self.assertEqual(expected_ov_int8, num_int8) model.save_pretrained(tmp_dir) + + openvino_config = OVConfig.from_pretrained(tmp_dir) + self.assertEqual(openvino_config.quantization_config["bits"], 4) + self.assertEqual(openvino_config.dtype, "int4") + if model_id == "facebook/opt-125m": + for key, value in self.DEFAULT_INT4_CONFIG.items(): + self.assertEqual(value, openvino_config.quantization_config[key]) + @parameterized.expand(LOAD_IN_4_BITS_SCOPE) def test_ovmodel_4bit_auto_compression_with_config( @@ -375,8 +387,13 @@ def test_ovmodel_4bit_auto_compression_with_config( self.assertEqual(expected_ov_int4, num_int4) model.save_pretrained(tmp_dir) - ov_config = OVConfig(quantization_config=quantization_config) - ov_config.save_pretrained(tmp_dir) + expected_config = OVConfig(quantization_config=quantization_config) + + openvino_config = OVConfig.from_pretrained(tmp_dir) + self.assertEqual(openvino_config.quantization_config["bits"], 4) + self.assertEqual(openvino_config.dtype, "int4") + self.assertEqual(openvino_config.quantization_config, expected_config.quantization_config) + @parameterized.expand(SUPPORTED_ARCHITECTURES_WITH_EXPECTED_4BIT_AUTO_COMPRESSED_MATMULS) def test_ovmodel_4bit_auto_compression_with_custom_dataset( From 30a5101dce183b4492ad2c72daf6e8f54db27492 Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Wed, 28 Feb 2024 17:10:50 +0100 Subject: [PATCH 07/17] style --- tests/openvino/test_quantization.py | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/tests/openvino/test_quantization.py b/tests/openvino/test_quantization.py index aee3b31a9d..ee3d314ca5 100644 --- a/tests/openvino/test_quantization.py +++ b/tests/openvino/test_quantization.py @@ -348,14 +348,14 @@ def test_ovmodel_load_with_compressed_weights(self, model_cls, model_type): _, num_int8, _ = get_num_quantized_nodes(model) self.assertEqual(expected_ov_int8[i], num_int8) - - @parameterized.expand(SUPPORTED_ARCHITECTURES_WITH_EXPECTED_4BIT_AUTOCOMPRESSED_MATMULS) - @unittest.mock.patch.dict("optimum.intel.openvino.configuration._DEFAULT_4BIT_CONFIGS", {"facebook/opt-125m": DEFAULT_INT4_CONFIG}) + @unittest.mock.patch.dict( + "optimum.intel.openvino.configuration._DEFAULT_4BIT_CONFIGS", {"facebook/opt-125m": DEFAULT_INT4_CONFIG} + ) def test_ovmodel_4bit_auto_compression(self, model_cls, model_type, expected_ov_int8, expected_ov_int4): with tempfile.TemporaryDirectory() as tmp_dir: model_id = MODEL_NAMES[model_type] - model = model_cls.from_pretrained(model_id, export=True, quantization_config={"bits":4}) + model = model_cls.from_pretrained(model_id, export=True, quantization_config={"bits": 4}) tokenizer = AutoTokenizer.from_pretrained(model_id) if tokenizer.pad_token is None: tokenizer.pad_token = tokenizer.eos_token @@ -364,7 +364,7 @@ def test_ovmodel_4bit_auto_compression(self, model_cls, model_type, expected_ov_ self.assertEqual(expected_ov_int4, num_int4) self.assertEqual(expected_ov_int8, num_int8) model.save_pretrained(tmp_dir) - + openvino_config = OVConfig.from_pretrained(tmp_dir) self.assertEqual(openvino_config.quantization_config["bits"], 4) self.assertEqual(openvino_config.dtype, "int4") @@ -372,7 +372,6 @@ def test_ovmodel_4bit_auto_compression(self, model_cls, model_type, expected_ov_ for key, value in self.DEFAULT_INT4_CONFIG.items(): self.assertEqual(value, openvino_config.quantization_config[key]) - @parameterized.expand(LOAD_IN_4_BITS_SCOPE) def test_ovmodel_4bit_auto_compression_with_config( self, model_cls, model_id, quantization_config, expected_ov_int4 @@ -387,13 +386,9 @@ def test_ovmodel_4bit_auto_compression_with_config( self.assertEqual(expected_ov_int4, num_int4) model.save_pretrained(tmp_dir) - expected_config = OVConfig(quantization_config=quantization_config) - openvino_config = OVConfig.from_pretrained(tmp_dir) self.assertEqual(openvino_config.quantization_config["bits"], 4) self.assertEqual(openvino_config.dtype, "int4") - self.assertEqual(openvino_config.quantization_config, expected_config.quantization_config) - @parameterized.expand(SUPPORTED_ARCHITECTURES_WITH_EXPECTED_4BIT_AUTO_COMPRESSED_MATMULS) def test_ovmodel_4bit_auto_compression_with_custom_dataset( From f1b4c55e547ec765df8e50f82288febf22fde889 Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Wed, 28 Feb 2024 17:12:18 +0100 Subject: [PATCH 08/17] update setup --- setup.py | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/setup.py b/setup.py index 1701af990c..e37c997630 100644 --- a/setup.py +++ b/setup.py @@ -39,18 +39,11 @@ QUALITY_REQUIRE = ["black~=23.1", "ruff>=0.0.241"] EXTRAS_REQUIRE = { - "neural-compressor": [ - "neural-compressor>=2.2.0", - "onnx", - "onnxruntime<1.15.0", - ], - "openvino": [ - "openvino>=2023.3", - "onnx", - "onnxruntime", - ], + "neural-compressor": ["neural-compressor>=2.2.0", "onnx", "onnxruntime<1.15.0"], + "openvino": ["openvino>=2023.3", "onnx", "onnxruntime"], "openvino-tokenizers": ["openvino-tokenizers[transformers]"], - "nncf": ["nncf>=2.8.1"], + # "nncf": ["nncf>=2.8.1"], + "nncf": ["nncf @ git+https://github.com/openvinotoolkit/nncf.git"], "ipex": ["intel-extension-for-pytorch", "onnx"], "diffusers": ["diffusers"], "quality": QUALITY_REQUIRE, From 42613145d024f31055c0c3c5922076d0a853f2ab Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Wed, 28 Feb 2024 17:38:49 +0100 Subject: [PATCH 09/17] fix --- optimum/intel/openvino/modeling_base.py | 5 +++++ optimum/intel/openvino/modeling_decoder.py | 2 ++ 2 files changed, 7 insertions(+) diff --git a/optimum/intel/openvino/modeling_base.py b/optimum/intel/openvino/modeling_base.py index fc4b3f32ea..8bbfe25bcb 100644 --- a/optimum/intel/openvino/modeling_base.py +++ b/optimum/intel/openvino/modeling_base.py @@ -152,8 +152,13 @@ def _save_pretrained(self, save_directory: Union[str, Path]): def _save_openvino_config(self, save_directory: Union[str, Path]): if self._openvino_config is not None: + + if not isinstance(self._openvino_config.quantization_config.dataset, (str, type(None))): + self._openvino_config.quantization_config.dataset = None + self._openvino_config.save_pretrained(save_directory) + @classmethod def _from_pretrained( cls, diff --git a/optimum/intel/openvino/modeling_decoder.py b/optimum/intel/openvino/modeling_decoder.py index e659a98078..b77862eb42 100644 --- a/optimum/intel/openvino/modeling_decoder.py +++ b/optimum/intel/openvino/modeling_decoder.py @@ -14,6 +14,7 @@ import logging import os +import copy from pathlib import Path from tempfile import TemporaryDirectory from typing import Dict, Optional, Tuple, Union @@ -636,6 +637,7 @@ def _from_pretrained( # seqlen = get_seqlen(causal_model) dataset = get_dataset(quantization_config.dataset, tokenizer, seqlen=32) dataset = prepare_dataset(dataset) + quantization_config = copy.deepcopy(quantization_config) quantization_config.dataset = nncf.Dataset(dataset, lambda x: causal_model.prepare_inputs(**x)) _weight_only_quantization(model, quantization_config) From bae24bf2bf167eb4cc700db8dc19b4ca4bb94ce4 Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Wed, 28 Feb 2024 17:45:24 +0100 Subject: [PATCH 10/17] fix --- tests/openvino/test_quantization.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/openvino/test_quantization.py b/tests/openvino/test_quantization.py index ee3d314ca5..fe9c2b17c0 100644 --- a/tests/openvino/test_quantization.py +++ b/tests/openvino/test_quantization.py @@ -53,7 +53,7 @@ ) -from optimum.intel.openvino.configuration import INT8_WEIGHT_COMPRESSION_CONFIG +from optimum.intel.openvino.configuration import INT8_WEIGHT_COMPRESSION_CONFIG, DEFAULT_QUANTIZATION_CONFIG from optimum.intel.utils.import_utils import is_openvino_version from utils_tests import MODEL_NAMES, get_num_quantized_nodes, _ARCHITECTURES_TO_EXPECTED_INT8 @@ -106,9 +106,9 @@ def preprocess_function(examples, tokenizer): self.assertTrue("logits" in outputs) # Verify that that the configuration is correctly saved and loaded - expected_config = OVConfig() loaded_config = OVConfig.from_pretrained(tmp_dir) - self.assertEqual(expected_config.to_dict()["compression"], loaded_config.to_dict()["compression"]) + self.assertEqual(DEFAULT_QUANTIZATION_CONFIG, loaded_config.to_dict()["compression"]) + @parameterized.expand(SUPPORTED_ARCHITECTURES_WITH_EXPECTED_QUANTIZED_MATMULS) def test_ovmodel_static_quantization(self, model_cls, model_name, expected_fake_quantize, expected_int8): From fbbe8048d2f238658bd691ca16e9e971f9e3f653 Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Wed, 28 Feb 2024 17:45:46 +0100 Subject: [PATCH 11/17] style --- optimum/intel/openvino/modeling_base.py | 4 +--- optimum/intel/openvino/modeling_decoder.py | 2 +- tests/openvino/test_quantization.py | 1 - 3 files changed, 2 insertions(+), 5 deletions(-) diff --git a/optimum/intel/openvino/modeling_base.py b/optimum/intel/openvino/modeling_base.py index 8bbfe25bcb..55d363daa0 100644 --- a/optimum/intel/openvino/modeling_base.py +++ b/optimum/intel/openvino/modeling_base.py @@ -152,12 +152,10 @@ def _save_pretrained(self, save_directory: Union[str, Path]): def _save_openvino_config(self, save_directory: Union[str, Path]): if self._openvino_config is not None: - if not isinstance(self._openvino_config.quantization_config.dataset, (str, type(None))): self._openvino_config.quantization_config.dataset = None - - self._openvino_config.save_pretrained(save_directory) + self._openvino_config.save_pretrained(save_directory) @classmethod def _from_pretrained( diff --git a/optimum/intel/openvino/modeling_decoder.py b/optimum/intel/openvino/modeling_decoder.py index b77862eb42..4f82652445 100644 --- a/optimum/intel/openvino/modeling_decoder.py +++ b/optimum/intel/openvino/modeling_decoder.py @@ -12,9 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. +import copy import logging import os -import copy from pathlib import Path from tempfile import TemporaryDirectory from typing import Dict, Optional, Tuple, Union diff --git a/tests/openvino/test_quantization.py b/tests/openvino/test_quantization.py index fe9c2b17c0..809faf63cb 100644 --- a/tests/openvino/test_quantization.py +++ b/tests/openvino/test_quantization.py @@ -109,7 +109,6 @@ def preprocess_function(examples, tokenizer): loaded_config = OVConfig.from_pretrained(tmp_dir) self.assertEqual(DEFAULT_QUANTIZATION_CONFIG, loaded_config.to_dict()["compression"]) - @parameterized.expand(SUPPORTED_ARCHITECTURES_WITH_EXPECTED_QUANTIZED_MATMULS) def test_ovmodel_static_quantization(self, model_cls, model_name, expected_fake_quantize, expected_int8): task = model_cls.export_feature From 237cc73747afb43970bfcbf77e4d66079ce9ef2a Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Thu, 29 Feb 2024 12:25:48 +0100 Subject: [PATCH 12/17] remove from quantization_config key from ov_config --- optimum/intel/openvino/modeling_base_seq2seq.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/optimum/intel/openvino/modeling_base_seq2seq.py b/optimum/intel/openvino/modeling_base_seq2seq.py index a275e150cd..bf0dd673aa 100644 --- a/optimum/intel/openvino/modeling_base_seq2seq.py +++ b/optimum/intel/openvino/modeling_base_seq2seq.py @@ -77,9 +77,6 @@ def __init__( self.decoder_model = decoder self.decoder_with_past_model = decoder_with_past self.generation_config = GenerationConfig.from_model_config(config) if self.can_generate() else None - if quantization_config: - self.ov_config["quantization_config"] = quantization_config - self._openvino_config = None if quantization_config: self._openvino_config = OVConfig(quantization_config=quantization_config) From aa667ef4fa88f17af84b0580e73ce09f4be99472 Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Thu, 29 Feb 2024 12:31:20 +0100 Subject: [PATCH 13/17] add test --- tests/openvino/test_quantization.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/openvino/test_quantization.py b/tests/openvino/test_quantization.py index 809faf63cb..ac68753f21 100644 --- a/tests/openvino/test_quantization.py +++ b/tests/openvino/test_quantization.py @@ -256,8 +256,7 @@ def test_automodel_weight_compression(self, model_cls, model_name, expected_pt_i # Verify that that the configuration is correctly saved and loaded openvino_config = OVConfig.from_pretrained(tmp_dir) - self.assertEqual(openvino_config.quantization_config["bits"], 8) - self.assertEqual(openvino_config.dtype, "int8") + self.assertIsNotNone(loaded_config) @parameterized.expand(SUPPORTED_ARCHITECTURES_WITH_EXPECTED_8BIT_COMPRESSED_MATMULS) def test_ovmodel_8bit_weight_compression(self, model_cls, model_name, expected_pt_int8, expected_ov_int8): @@ -333,6 +332,8 @@ def test_ovmodel_8bit_weight_compression_stateful(self, model_cls, model_id, exp @parameterized.expand(SUPPORTED_ARCHITECTURES_WITH_AUTO_COMPRESSION) def test_ovmodel_load_with_compressed_weights(self, model_cls, model_type): model = model_cls.from_pretrained(MODEL_NAMES[model_type], export=True, load_in_8bit=True, stateful=False) + self.assertEqual(model._openvino_config.quantization_config["bits"], 8) + self.assertEqual(model._openvino_config.dtype, "int8") if model.export_feature.startswith("text2text-generation"): models = [model.encoder, model.decoder, model.decoder_with_past] From a297f091a4bdd5f4a4ab8052dcc0cb0ac5687b9b Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Thu, 29 Feb 2024 12:34:13 +0100 Subject: [PATCH 14/17] fix --- tests/openvino/test_quantization.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/openvino/test_quantization.py b/tests/openvino/test_quantization.py index ac68753f21..d3a842a2c9 100644 --- a/tests/openvino/test_quantization.py +++ b/tests/openvino/test_quantization.py @@ -332,7 +332,7 @@ def test_ovmodel_8bit_weight_compression_stateful(self, model_cls, model_id, exp @parameterized.expand(SUPPORTED_ARCHITECTURES_WITH_AUTO_COMPRESSION) def test_ovmodel_load_with_compressed_weights(self, model_cls, model_type): model = model_cls.from_pretrained(MODEL_NAMES[model_type], export=True, load_in_8bit=True, stateful=False) - self.assertEqual(model._openvino_config.quantization_config["bits"], 8) + self.assertEqual(model._openvino_config.quantization_config.bits, 8) self.assertEqual(model._openvino_config.dtype, "int8") if model.export_feature.startswith("text2text-generation"): From 704f91a06cdca916278dad27fc929853667cb5ac Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Thu, 29 Feb 2024 15:35:42 +0100 Subject: [PATCH 15/17] fix --- optimum/intel/openvino/quantization.py | 2 +- optimum/intel/openvino/trainer.py | 2 +- tests/openvino/test_quantization.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/optimum/intel/openvino/quantization.py b/optimum/intel/openvino/quantization.py index dde96e091f..8b692dd010 100644 --- a/optimum/intel/openvino/quantization.py +++ b/optimum/intel/openvino/quantization.py @@ -234,7 +234,7 @@ def quantize( if ov_config is not None: if not isinstance(ov_config, OVConfig): raise TypeError(f"`ov_config` should be an `OVConfig`, but got: {type(ov_config)} instead.") - elif not ov_config.compression: + elif ov_config.compression is None: ov_config.compression = DEFAULT_QUANTIZATION_CONFIG if isinstance(self.model, OVBaseModel): diff --git a/optimum/intel/openvino/trainer.py b/optimum/intel/openvino/trainer.py index ab1f8a84c7..b7d110c96a 100644 --- a/optimum/intel/openvino/trainer.py +++ b/optimum/intel/openvino/trainer.py @@ -226,7 +226,7 @@ def __init__( self.compression_controller = None if self.ov_config is not None: - if not self.ov_config.compression: + if self.ov_config.compression is None: self.ov_config.compression = DEFAULT_QUANTIZATION_CONFIG if self.args.do_train: diff --git a/tests/openvino/test_quantization.py b/tests/openvino/test_quantization.py index d3a842a2c9..62942d8891 100644 --- a/tests/openvino/test_quantization.py +++ b/tests/openvino/test_quantization.py @@ -255,7 +255,7 @@ def test_automodel_weight_compression(self, model_cls, model_name, expected_pt_i self.assertTrue("logits" in outputs) # Verify that that the configuration is correctly saved and loaded - openvino_config = OVConfig.from_pretrained(tmp_dir) + loaded_config = OVConfig.from_pretrained(tmp_dir) self.assertIsNotNone(loaded_config) @parameterized.expand(SUPPORTED_ARCHITECTURES_WITH_EXPECTED_8BIT_COMPRESSED_MATMULS) From cb847bb07defff3dd0878c5ff2dcf4e81d96f2c4 Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Thu, 29 Feb 2024 15:37:17 +0100 Subject: [PATCH 16/17] update setup --- setup.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/setup.py b/setup.py index e37c997630..91fc19f744 100644 --- a/setup.py +++ b/setup.py @@ -42,8 +42,7 @@ "neural-compressor": ["neural-compressor>=2.2.0", "onnx", "onnxruntime<1.15.0"], "openvino": ["openvino>=2023.3", "onnx", "onnxruntime"], "openvino-tokenizers": ["openvino-tokenizers[transformers]"], - # "nncf": ["nncf>=2.8.1"], - "nncf": ["nncf @ git+https://github.com/openvinotoolkit/nncf.git"], + "nncf": ["nncf>=2.8.1"], "ipex": ["intel-extension-for-pytorch", "onnx"], "diffusers": ["diffusers"], "quality": QUALITY_REQUIRE, From 3a71f42e58007a9b2023a273a3e3f832be1cacd1 Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Fri, 1 Mar 2024 10:46:49 +0100 Subject: [PATCH 17/17] modify method name --- optimum/intel/openvino/modeling_base.py | 4 ++-- optimum/intel/openvino/modeling_base_seq2seq.py | 2 +- optimum/intel/openvino/modeling_decoder.py | 2 +- optimum/intel/openvino/modeling_diffusion.py | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/optimum/intel/openvino/modeling_base.py b/optimum/intel/openvino/modeling_base.py index 55d363daa0..af00f7a06e 100644 --- a/optimum/intel/openvino/modeling_base.py +++ b/optimum/intel/openvino/modeling_base.py @@ -217,7 +217,7 @@ def _from_pretrained( local_files_only=local_files_only, ) - quantization_config = cls._prepare_quantization_config(quantization_config, load_in_8bit) + quantization_config = cls._prepare_weight_quantization_config(quantization_config, load_in_8bit) model = cls.load_model(model_cache_path, quantization_config=quantization_config) return cls( @@ -229,7 +229,7 @@ def _from_pretrained( ) @staticmethod - def _prepare_quantization_config( + def _prepare_weight_quantization_config( quantization_config: Optional[Union[OVWeightQuantizationConfig, Dict]] = None, load_in_8bit: bool = False ): # Give default quantization config if not provided and load_in_8bit=True diff --git a/optimum/intel/openvino/modeling_base_seq2seq.py b/optimum/intel/openvino/modeling_base_seq2seq.py index bf0dd673aa..3cb43e61b8 100644 --- a/optimum/intel/openvino/modeling_base_seq2seq.py +++ b/optimum/intel/openvino/modeling_base_seq2seq.py @@ -161,7 +161,7 @@ def _from_pretrained( decoder_with_past_file_name = decoder_with_past_file_name or default_decoder_with_past_file_name decoder_with_past = None - quantization_config = cls._prepare_quantization_config(quantization_config, load_in_8bit) + quantization_config = cls._prepare_weight_quantization_config(quantization_config, load_in_8bit) # Load model from a local directory if os.path.isdir(model_id): diff --git a/optimum/intel/openvino/modeling_decoder.py b/optimum/intel/openvino/modeling_decoder.py index 4f82652445..92a2ce436d 100644 --- a/optimum/intel/openvino/modeling_decoder.py +++ b/optimum/intel/openvino/modeling_decoder.py @@ -584,7 +584,7 @@ def _from_pretrained( if isinstance(quantization_config, dict) and quantization_config == {"bits": 4}: quantization_config = _DEFAULT_4BIT_CONFIGS.get(config.name_or_path, quantization_config) - quantization_config = cls._prepare_quantization_config(quantization_config, load_in_8bit) + quantization_config = cls._prepare_weight_quantization_config(quantization_config, load_in_8bit) load_in_4bit = quantization_config.bits == 4 if quantization_config else False model = cls.load_model(model_cache_path, quantization_config=None if load_in_4bit else quantization_config) diff --git a/optimum/intel/openvino/modeling_diffusion.py b/optimum/intel/openvino/modeling_diffusion.py index 5f17cc086c..1570a22457 100644 --- a/optimum/intel/openvino/modeling_diffusion.py +++ b/optimum/intel/openvino/modeling_diffusion.py @@ -264,7 +264,7 @@ def _from_pretrained( else: kwargs[name] = load_method(new_model_save_dir) - quantization_config = cls._prepare_quantization_config(quantization_config, load_in_8bit) + quantization_config = cls._prepare_weight_quantization_config(quantization_config, load_in_8bit) unet = cls.load_model( new_model_save_dir / DIFFUSION_MODEL_UNET_SUBFOLDER / unet_file_name, quantization_config )