huggingface · echarlaix · Apr 22, 2024 · Apr 15, 2024 · Apr 15, 2024 · Apr 15, 2024
diff --git a/optimum/intel/openvino/configuration.py b/optimum/intel/openvino/configuration.py
diff --git a/optimum/intel/openvino/modeling_base.py b/optimum/intel/openvino/modeling_base.py
@@ -31,7 +31,7 @@
 
 from ...exporters.openvino import export, main_export
 from ..utils.import_utils import is_nncf_available
-from .configuration import OVConfig, OVWeightQuantizationConfig
+from .configuration import OVConfig, OVDynamicQuantizationConfig, OVWeightQuantizationConfig
 from .utils import ONNX_WEIGHTS_NAME, OV_XML_FILE_NAME, _print_compiled_model_properties
 
 
@@ -64,10 +64,7 @@ def __init__(
         self.model_save_dir = model_save_dir
         self._device = device.upper()
         self.is_dynamic = dynamic_shapes
-        self.ov_config = ov_config if ov_config is not None else {}
-        if self.ov_config.get("PERFORMANCE_HINT") is None:
-            self.ov_config["PERFORMANCE_HINT"] = "LATENCY"
-
+        self.ov_config = {} if ov_config is None else {**ov_config}
         self.preprocessors = kwargs.get("preprocessors", [])
         enable_compilation = kwargs.get("compile", True)
 
@@ -98,12 +95,12 @@ def __init__(
         self._openvino_config = None
         if quantization_config:
             self._openvino_config = OVConfig(quantization_config=quantization_config)
+        self._set_ov_config_parameters()
 
     @staticmethod
     def load_model(
         file_name: Union[str, Path],
         quantization_config: Union[OVWeightQuantizationConfig, Dict] = None,
-        calibration_dataset: Optional = None,
     ):
         """
         Loads the model.
@@ -113,8 +110,6 @@ def load_model(
                 The path of the model ONNX or XML file.
             quantization_config (`OVWeightQuantizationConfig` or `Dict`, *optional*):
                 Quantization config to apply after model is loaded.
-            calibration_dataset (`nncf.Dataset`, *optional*):
-                Optional nncf.Dataset to feed to model weight compression when quantization config is provided.
         """
 
         def fix_op_names_duplicates(model: openvino.runtime.Model):
@@ -143,7 +138,7 @@ def fix_op_names_duplicates(model: openvino.runtime.Model):
 
             from optimum.intel.openvino.quantization import _weight_only_quantization
 
-            model = _weight_only_quantization(model, quantization_config, calibration_dataset=calibration_dataset)
+            model = _weight_only_quantization(model, quantization_config)
 
         return model
 
@@ -251,6 +246,14 @@ def _prepare_weight_quantization_config(
 
         return quantization_config
 
+    def _set_ov_config_parameters(self):
+        if self.ov_config.get("PERFORMANCE_HINT") is None:
+            self.ov_config["PERFORMANCE_HINT"] = "LATENCY"
+
+        q_config = self._openvino_config.quantization_config if self._openvino_config else None
+        if isinstance(q_config, OVDynamicQuantizationConfig):
+            self.ov_config["DYNAMIC_QUANTIZATION_GROUP_SIZE"] = str(q_config.activations_group_size)
+
     @staticmethod
     def _cached_file(
         model_path: Union[Path, str],

diff --git a/optimum/intel/openvino/modeling_base_seq2seq.py b/optimum/intel/openvino/modeling_base_seq2seq.py
@@ -66,11 +66,7 @@ def __init__(
         self.model_save_dir = model_save_dir
         self._device = device.upper()
         self.is_dynamic = dynamic_shapes
-        self.ov_config = ov_config if ov_config is not None else {}
-
-        if self.ov_config.get("PERFORMANCE_HINT") is None:
-            self.ov_config["PERFORMANCE_HINT"] = "LATENCY"
-
+        self.ov_config = {} if ov_config is None else {**ov_config}
         self.preprocessors = kwargs.get("preprocessors", [])
 
         if self.is_dynamic:
@@ -84,6 +80,7 @@ def __init__(
         self._openvino_config = None
         if quantization_config:
             self._openvino_config = OVConfig(quantization_config=quantization_config)
+        self._set_ov_config_parameters()
 
     def _save_pretrained(self, save_directory: Union[str, Path]):
         """

diff --git a/optimum/intel/openvino/modeling_decoder.py b/optimum/intel/openvino/modeling_decoder.py
@@ -12,7 +12,6 @@
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 
-import copy
 import logging
 import os
 from pathlib import Path
@@ -596,11 +595,10 @@ def _from_pretrained(
         quantization_config = cls._prepare_weight_quantization_config(quantization_config, load_in_8bit)
 
         load_in_4bit = quantization_config.bits == 4 if quantization_config else False
-        calibration_dataset = kwargs.get("calibration_dataset", None)
+
         model = cls.load_model(
             model_cache_path,
             quantization_config=None if load_in_4bit else quantization_config,
-            calibration_dataset=calibration_dataset,
         )
 
         model_type = config.model_type.replace("_", "-")
@@ -637,18 +635,15 @@ def _from_pretrained(
                     f"For the given model, we recommend the following `quantization_config` : {default_config}"
                 )
 
-            if calibration_dataset is None and isinstance(quantization_config.dataset, str):
+            calibration_dataset = None
+            if isinstance(quantization_config.dataset, str):
                 tokenizer = quantization_config.tokenizer or AutoTokenizer.from_pretrained(model_id)
 
                 from optimum.gptq.data import get_dataset, prepare_dataset
 
-                # from optimum.gptq.utils import get_seqlen
-
-                # seqlen = get_seqlen(causal_model)
-                nsamples = quantization_config.num_samples if quantization_config.num_samples else 128
+                nsamples = quantization_config.num_samples or 128
                 dataset = get_dataset(quantization_config.dataset, tokenizer, seqlen=32, nsamples=nsamples)
                 dataset = prepare_dataset(dataset)
-                quantization_config = copy.deepcopy(quantization_config)
                 calibration_dataset = nncf.Dataset(dataset, lambda x: causal_model.prepare_inputs(**x))
 
             _weight_only_quantization(model, quantization_config, calibration_dataset)

diff --git a/optimum/intel/openvino/modeling_diffusion.py b/optimum/intel/openvino/modeling_diffusion.py
@@ -100,9 +100,7 @@ def __init__(
         self._internal_dict = config
         self._device = device.upper()
         self.is_dynamic = dynamic_shapes
-        self.ov_config = ov_config if ov_config is not None else {}
-        if self.ov_config.get("PERFORMANCE_HINT") is None:
-            self.ov_config["PERFORMANCE_HINT"] = "LATENCY"
+        self.ov_config = {} if ov_config is None else {**ov_config}
 
         # This attribute is needed to keep one reference on the temporary directory, since garbage collecting
         # would end-up removing the directory containing the underlying OpenVINO model
@@ -162,6 +160,7 @@ def __init__(
         self._openvino_config = None
         if quantization_config:
             self._openvino_config = OVConfig(quantization_config=quantization_config)
+        self._set_ov_config_parameters()
 
     def _save_pretrained(self, save_directory: Union[str, Path]):
         """

diff --git a/optimum/intel/openvino/quantization.py b/optimum/intel/openvino/quantization.py
@@ -180,22 +180,15 @@ def __init__(self, model: transformers.PreTrainedModel, task: Optional[str] = No
         """
         super().__init__()
         self.model = model
-        feature = kwargs.pop("feature", None)
-        if feature is not None:
-            logger.warning("`feature` is deprecated and will be removed in a future version. Use `task` instead.")
-        if task is not None and task != feature:
-            logger.warning(
-                f"Both `feature` and `task` were specified. {task} will be used to define the model topology for the model ONNX export."
-            )
-        self.task = task or feature
+        self.task = task
         self.seed = seed
-        # TODO : deprecate input_names
-        self.input_names = None
         signature = inspect.signature(self.model.forward)
         self._signature_columns = list(signature.parameters.keys())
-        self._export_input_names = [
-            column for column in self._signature_columns if column not in {"label", "labels", "label_ids"}
-        ]
+
+    @property
+    def input_names(self):
+        logger.warning("The`input_names` attribute is deprecated and will be removed in v1.18.0")
+        return None
 
     @classmethod
     def from_pretrained(cls, model: PreTrainedModel, **kwargs):
@@ -265,9 +258,8 @@ def quantize(
         # TODO: deprecate weights_only argument
         if weights_only is not None:
             logger.warning(
-                "`weights_only` argument is deprecated. In the future please provide `ov_config.quantization_config` "
-                "as an instance of OVWeightQuantizationConfig for weight-only compression or as an instance of "
-                "OVQuantizationConfig for full model quantization."
+                "`weights_only` argument is deprecated and will be removed in v1.18.0. In the future please provide `ov_config.quantization_config` "
+                "as an instance of `OVWeightQuantizationConfig` for weight-only compression or as an instance of `OVQuantizationConfig` for full model quantization."
             )
 
         if save_directory is None:

diff --git a/optimum/intel/openvino/trainer.py b/optimum/intel/openvino/trainer.py
@@ -214,7 +214,6 @@ def __init__(
         preprocess_logits_for_metrics: Callable[[torch.Tensor, torch.Tensor], torch.Tensor] = None,
         ov_config: Optional[OVConfig] = None,
         task: Optional[str] = None,
-        feature: Optional[str] = None,
     ):
         self.neftune_noise_alpha = None
 
@@ -233,13 +232,7 @@ def __init__(
         )
 
         self.ov_config = ov_config
-        if feature is not None:
-            logger.warning("`feature` is deprecated and will be removed in a future version. Use `task` instead.")
-            if task is not None and task != feature:
-                logger.warning(
-                    f"Both `feature` and `task` were specified. {task} will be used to define the model topology for the model ONNX export."
-                )
-        self.task = task or feature
+        self.task = task
         self.teacher = None
         if teacher_model is not None:
             self.teacher = teacher_model.to(args.device)