Skip to content

Commit ca33bed

Browse files
echarlaixPenghuiCheng
authored andcommitted
Save an openvino config summarizing all information related to quantization when saving model (huggingface#578)
* fix doc * remove default compression value * set default compression config when not provided * save openvino config to include quantization configuration * fix style * add test * update setup * style * remove from quantization_config key from ov_config * add test * update setup * modify method name
1 parent 6621611 commit ca33bed

10 files changed

+139
-80
lines changed

docs/source/optimization_ov.mdx

-3
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,6 @@ save_dir = "ptq_model"
3838
def preprocess_function(examples, tokenizer):
3939
return tokenizer(examples["sentence"], padding="max_length", max_length=128, truncation=True)
4040

41-
# Load the default quantization configuration detailing the quantization we wish to apply
42-
quantization_config = OVConfig()
4341
# Instantiate our OVQuantizer using the desired configuration
4442
quantizer = OVQuantizer.from_pretrained(model)
4543
# Create the calibration dataset used to perform static quantization
@@ -52,7 +50,6 @@ calibration_dataset = quantizer.get_calibration_dataset(
5250
)
5351
# Apply static quantization and export the resulting quantized model to OpenVINO IR format
5452
quantizer.quantize(
55-
quantization_config=quantization_config,
5653
calibration_dataset=calibration_dataset,
5754
save_directory=save_dir,
5855
)

optimum/intel/openvino/configuration.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,7 @@ def __init__(
114114
**kwargs,
115115
):
116116
super().__init__()
117-
self.compression = compression or DEFAULT_QUANTIZATION_CONFIG
117+
self.compression = compression
118118
self.input_info = input_info
119119
self.save_onnx_model = save_onnx_model
120120
self._enable_standard_onnx_export_option()

optimum/intel/openvino/modeling_base.py

+34-4
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ def __init__(
5757
dynamic_shapes: bool = True,
5858
ov_config: Optional[Dict[str, str]] = None,
5959
model_save_dir: Optional[Union[str, Path, TemporaryDirectory]] = None,
60+
quantization_config: Optional[Union[OVWeightQuantizationConfig, Dict]] = None,
6061
**kwargs,
6162
):
6263
self.config = config
@@ -91,6 +92,10 @@ def __init__(
9192

9293
self.generation_config = GenerationConfig.from_model_config(config) if self.can_generate() else None
9394

95+
self._openvino_config = None
96+
if quantization_config:
97+
self._openvino_config = OVConfig(quantization_config=quantization_config)
98+
9499
@staticmethod
95100
def load_model(file_name: Union[str, Path], quantization_config: Union[OVWeightQuantizationConfig, Dict] = None):
96101
"""
@@ -143,6 +148,15 @@ def _save_pretrained(self, save_directory: Union[str, Path]):
143148
dst_path = os.path.join(save_directory, OV_XML_FILE_NAME)
144149
openvino.save_model(self.model, dst_path, compress_to_fp16=False)
145150

151+
self._save_openvino_config(save_directory)
152+
153+
def _save_openvino_config(self, save_directory: Union[str, Path]):
154+
if self._openvino_config is not None:
155+
if not isinstance(self._openvino_config.quantization_config.dataset, (str, type(None))):
156+
self._openvino_config.quantization_config.dataset = None
157+
158+
self._openvino_config.save_pretrained(save_directory)
159+
146160
@classmethod
147161
def _from_pretrained(
148162
cls,
@@ -203,12 +217,28 @@ def _from_pretrained(
203217
local_files_only=local_files_only,
204218
)
205219

206-
# Give default quantization config if not provided and load_in_8bit=True
207-
if load_in_8bit:
208-
quantization_config = quantization_config or {"bits": 8}
220+
quantization_config = cls._prepare_weight_quantization_config(quantization_config, load_in_8bit)
209221

210222
model = cls.load_model(model_cache_path, quantization_config=quantization_config)
211-
return cls(model, config=config, model_save_dir=model_cache_path.parent, **kwargs)
223+
return cls(
224+
model,
225+
config=config,
226+
model_save_dir=model_cache_path.parent,
227+
quantization_config=quantization_config,
228+
**kwargs,
229+
)
230+
231+
@staticmethod
232+
def _prepare_weight_quantization_config(
233+
quantization_config: Optional[Union[OVWeightQuantizationConfig, Dict]] = None, load_in_8bit: bool = False
234+
):
235+
# Give default quantization config if not provided and load_in_8bit=True
236+
if not quantization_config and load_in_8bit:
237+
quantization_config = OVWeightQuantizationConfig(bits=8)
238+
elif isinstance(quantization_config, dict):
239+
quantization_config = OVWeightQuantizationConfig.from_dict(quantization_config)
240+
241+
return quantization_config
212242

213243
@staticmethod
214244
def _cached_file(

optimum/intel/openvino/modeling_base_seq2seq.py

+8-3
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ def __init__(
5858
dynamic_shapes: bool = True,
5959
ov_config: Optional[Dict[str, str]] = None,
6060
model_save_dir: Optional[Union[str, Path, TemporaryDirectory]] = None,
61+
quantization_config: Union[OVWeightQuantizationConfig, Dict] = None,
6162
**kwargs,
6263
):
6364
self.config = config
@@ -76,6 +77,9 @@ def __init__(
7677
self.decoder_model = decoder
7778
self.decoder_with_past_model = decoder_with_past
7879
self.generation_config = GenerationConfig.from_model_config(config) if self.can_generate() else None
80+
self._openvino_config = None
81+
if quantization_config:
82+
self._openvino_config = OVConfig(quantization_config=quantization_config)
7983

8084
def _save_pretrained(self, save_directory: Union[str, Path]):
8185
"""
@@ -96,6 +100,8 @@ def _save_pretrained(self, save_directory: Union[str, Path]):
96100
dst_path = os.path.join(save_directory, dst_file_name)
97101
openvino.save_model(src_file, dst_path, compress_to_fp16=False)
98102

103+
self._save_openvino_config(save_directory)
104+
99105
@classmethod
100106
def _from_pretrained(
101107
cls,
@@ -155,9 +161,7 @@ def _from_pretrained(
155161
decoder_with_past_file_name = decoder_with_past_file_name or default_decoder_with_past_file_name
156162
decoder_with_past = None
157163

158-
# Give default quantization config if not provided and load_in_8bit=True
159-
if load_in_8bit:
160-
quantization_config = quantization_config or {"bits": 8}
164+
quantization_config = cls._prepare_weight_quantization_config(quantization_config, load_in_8bit)
161165

162166
# Load model from a local directory
163167
if os.path.isdir(model_id):
@@ -205,6 +209,7 @@ def _from_pretrained(
205209
decoder_with_past=decoder_with_past,
206210
config=config,
207211
model_save_dir=model_save_dir,
212+
quantization_config=quantization_config,
208213
**kwargs,
209214
)
210215

optimum/intel/openvino/modeling_decoder.py

+15-9
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15+
import copy
1516
import logging
1617
import os
1718
from pathlib import Path
@@ -100,6 +101,7 @@ def __init__(
100101
dynamic_shapes: bool = True,
101102
ov_config: Optional[Dict[str, str]] = None,
102103
model_save_dir: Optional[Union[str, Path, TemporaryDirectory]] = None,
104+
quantization_config: Optional[Union[OVWeightQuantizationConfig, Dict]] = None,
103105
**kwargs,
104106
):
105107
if not dynamic_shapes:
@@ -117,6 +119,7 @@ def __init__(
117119
dynamic_shapes=False,
118120
ov_config=ov_config,
119121
model_save_dir=model_save_dir,
122+
quantization_config=quantization_config,
120123
**kwargs,
121124
)
122125

@@ -224,6 +227,8 @@ def _save_pretrained(self, save_directory: Union[str, Path]):
224227
dst_path = os.path.join(save_directory, OV_XML_FILE_NAME)
225228
openvino.save_model(model_to_save, dst_path, compress_to_fp16=False)
226229

230+
self._save_openvino_config(save_directory)
231+
227232
@classmethod
228233
def _from_transformers(
229234
cls,
@@ -576,15 +581,10 @@ def _from_pretrained(
576581
local_files_only=local_files_only,
577582
)
578583

579-
# Give default quantization config if not provided and load_in_8bit=True
580-
if load_in_8bit:
581-
quantization_config = quantization_config or {"bits": 8}
582-
583-
if isinstance(quantization_config, dict):
584-
if quantization_config == {"bits": 4} and config.name_or_path in _DEFAULT_4BIT_CONFIGS:
585-
quantization_config = _DEFAULT_4BIT_CONFIGS[config.name_or_path]
584+
if isinstance(quantization_config, dict) and quantization_config == {"bits": 4}:
585+
quantization_config = _DEFAULT_4BIT_CONFIGS.get(config.name_or_path, quantization_config)
586586

587-
quantization_config = OVWeightQuantizationConfig.from_dict(quantization_config)
587+
quantization_config = cls._prepare_weight_quantization_config(quantization_config, load_in_8bit)
588588

589589
load_in_4bit = quantization_config.bits == 4 if quantization_config else False
590590
model = cls.load_model(model_cache_path, quantization_config=None if load_in_4bit else quantization_config)
@@ -603,7 +603,12 @@ def _from_pretrained(
603603

604604
enable_compilation = kwargs.pop("compile", True) and not load_in_4bit
605605
causal_model = init_cls(
606-
model=model, config=config, model_save_dir=model_cache_path.parent, compile=enable_compilation, **kwargs
606+
model=model,
607+
config=config,
608+
model_save_dir=model_cache_path.parent,
609+
compile=enable_compilation,
610+
quantization_config=quantization_config,
611+
**kwargs,
607612
)
608613

609614
if load_in_4bit:
@@ -632,6 +637,7 @@ def _from_pretrained(
632637
# seqlen = get_seqlen(causal_model)
633638
dataset = get_dataset(quantization_config.dataset, tokenizer, seqlen=32)
634639
dataset = prepare_dataset(dataset)
640+
quantization_config = copy.deepcopy(quantization_config)
635641
quantization_config.dataset = nncf.Dataset(dataset, lambda x: causal_model.prepare_inputs(**x))
636642

637643
_weight_only_quantization(model, quantization_config)

optimum/intel/openvino/modeling_diffusion.py

+16-5
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,7 @@ def __init__(
8787
compile: bool = True,
8888
ov_config: Optional[Dict[str, str]] = None,
8989
model_save_dir: Optional[Union[str, Path, TemporaryDirectory]] = None,
90+
quantization_config: Optional[Union[OVWeightQuantizationConfig, Dict]] = None,
9091
**kwargs,
9192
):
9293
self._internal_dict = config
@@ -140,6 +141,10 @@ def __init__(
140141

141142
self._internal_dict.pop("vae", None)
142143

144+
self._openvino_config = None
145+
if quantization_config:
146+
self._openvino_config = OVConfig(quantization_config=quantization_config)
147+
143148
def _save_pretrained(self, save_directory: Union[str, Path]):
144149
"""
145150
Saves the model to the OpenVINO IR format so that it can be re-loaded using the
@@ -177,6 +182,8 @@ def _save_pretrained(self, save_directory: Union[str, Path]):
177182
if self.tokenizer_2 is not None:
178183
self.tokenizer_2.save_pretrained(save_directory / "tokenizer_2")
179184

185+
self._save_openvino_config(save_directory)
186+
180187
@classmethod
181188
def _from_pretrained(
182189
cls,
@@ -257,10 +264,7 @@ def _from_pretrained(
257264
else:
258265
kwargs[name] = load_method(new_model_save_dir)
259266

260-
# Give default quantization config if not provided and load_in_8bit=True
261-
if load_in_8bit:
262-
quantization_config = quantization_config or {"bits": 8}
263-
267+
quantization_config = cls._prepare_weight_quantization_config(quantization_config, load_in_8bit)
264268
unet = cls.load_model(
265269
new_model_save_dir / DIFFUSION_MODEL_UNET_SUBFOLDER / unet_file_name, quantization_config
266270
)
@@ -278,7 +282,14 @@ def _from_pretrained(
278282
if model_save_dir is None:
279283
model_save_dir = new_model_save_dir
280284

281-
return cls(unet=unet, config=config, model_save_dir=model_save_dir, **components, **kwargs)
285+
return cls(
286+
unet=unet,
287+
config=config,
288+
model_save_dir=model_save_dir,
289+
quantization_config=quantization_config,
290+
**components,
291+
**kwargs,
292+
)
282293

283294
@classmethod
284295
def _from_transformers(

optimum/intel/openvino/quantization.py

+7-4
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@
4545
from ..utils.constant import _TASK_ALIASES
4646
from ..utils.import_utils import DATASETS_IMPORT_ERROR, is_datasets_available
4747
from ..utils.modeling_utils import get_model_device
48-
from .configuration import OVConfig, OVWeightQuantizationConfig
48+
from .configuration import DEFAULT_QUANTIZATION_CONFIG, OVConfig, OVWeightQuantizationConfig
4949
from .modeling_base import OVBaseModel
5050
from .utils import (
5151
MAX_ONNX_OPSET,
@@ -235,8 +235,11 @@ def quantize(
235235
)
236236
ov_config = ov_config or quantization_config
237237

238-
if ov_config is not None and not isinstance(ov_config, OVConfig):
239-
raise TypeError(f"`ov_config` should be an `OVConfig`, but got: {type(ov_config)} instead.")
238+
if ov_config is not None:
239+
if not isinstance(ov_config, OVConfig):
240+
raise TypeError(f"`ov_config` should be an `OVConfig`, but got: {type(ov_config)} instead.")
241+
elif ov_config.compression is None:
242+
ov_config.compression = DEFAULT_QUANTIZATION_CONFIG
240243

241244
if isinstance(self.model, OVBaseModel):
242245
self._quantize_ovbasemodel(
@@ -355,7 +358,7 @@ def _quantize_torchmodel(
355358
logger.info(
356359
"No configuration describing the quantization process was provided, a default OVConfig will be generated."
357360
)
358-
ov_config = OVConfig()
361+
ov_config = OVConfig(compression=DEFAULT_QUANTIZATION_CONFIG)
359362
onnx_file_name = (
360363
ONNX_WEIGHTS_NAME
361364
if file_name is None and ov_config.save_onnx_model

optimum/intel/openvino/trainer.py

+35-31
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@
8989

9090
from ..utils.constant import _TASK_ALIASES
9191
from ..utils.import_utils import is_transformers_version
92-
from .configuration import OVConfig
92+
from .configuration import DEFAULT_QUANTIZATION_CONFIG, OVConfig
9393
from .quantization import OVDataLoader
9494
from .training_args import OVTrainingArguments
9595
from .utils import (
@@ -225,37 +225,41 @@ def __init__(
225225
self.teacher.eval()
226226
self.compression_controller = None
227227

228-
if self.ov_config is not None and self.args.do_train:
229-
self._set_task()
230-
train_dataloader = self.get_train_dataloader()
231-
model_inputs = next(iter(train_dataloader))
232-
for label_name in self.label_names:
233-
model_inputs.pop(label_name)
234-
force_batch_one = self._is_pruning_enabled()
235-
self.ov_config.add_input_info(model_inputs, force_batch_one)
236-
nncf_config = NNCFConfig.from_dict(self.ov_config.__dict__)
237-
nncf_config.register_extra_structs(
238-
[
239-
QuantizationRangeInitArgs(OVDataLoader(train_dataloader)),
240-
BNAdaptationInitArgs(OVDataLoader(train_dataloader)),
241-
]
242-
)
228+
if self.ov_config is not None:
229+
if self.ov_config.compression is None:
230+
self.ov_config.compression = DEFAULT_QUANTIZATION_CONFIG
231+
232+
if self.args.do_train:
233+
self._set_task()
234+
train_dataloader = self.get_train_dataloader()
235+
model_inputs = next(iter(train_dataloader))
236+
for label_name in self.label_names:
237+
model_inputs.pop(label_name)
238+
force_batch_one = self._is_pruning_enabled()
239+
self.ov_config.add_input_info(model_inputs, force_batch_one)
240+
nncf_config = NNCFConfig.from_dict(self.ov_config.__dict__)
241+
nncf_config.register_extra_structs(
242+
[
243+
QuantizationRangeInitArgs(OVDataLoader(train_dataloader)),
244+
BNAdaptationInitArgs(OVDataLoader(train_dataloader)),
245+
]
246+
)
243247

244-
# Configure NNCF logging
245-
# Disable nncf logging to stdout except error
246-
# but to file nncf_output.log
247-
nncf_config["log_dir"] = args.output_dir
248-
nncf_log_file_handler = logging.logging.FileHandler(os.path.join(args.output_dir, NNCF_LOG_FILE_NAME))
249-
nncf_log_file_handler.setFormatter(logging.logging.Formatter("%(levelname)s:%(name)s:%(message)s"))
250-
nncf_logger.addHandler(nncf_log_file_handler)
251-
set_log_level(logging.ERROR)
252-
nncf_logger.setLevel(logging.INFO)
253-
nncf_log_file_handler.setLevel(logging.INFO)
254-
255-
self.compression_controller, self.model = create_compressed_model(self.model, nncf_config)
256-
self.model_wrapped = self.model
257-
# TODO : To deprecate once support transformers > 4.30.0
258-
self.deepspeed = None
248+
# Configure NNCF logging
249+
# Disable nncf logging to stdout except error
250+
# but to file nncf_output.log
251+
nncf_config["log_dir"] = args.output_dir
252+
nncf_log_file_handler = logging.logging.FileHandler(os.path.join(args.output_dir, NNCF_LOG_FILE_NAME))
253+
nncf_log_file_handler.setFormatter(logging.logging.Formatter("%(levelname)s:%(name)s:%(message)s"))
254+
nncf_logger.addHandler(nncf_log_file_handler)
255+
set_log_level(logging.ERROR)
256+
nncf_logger.setLevel(logging.INFO)
257+
nncf_log_file_handler.setLevel(logging.INFO)
258+
259+
self.compression_controller, self.model = create_compressed_model(self.model, nncf_config)
260+
self.model_wrapped = self.model
261+
# TODO : To deprecate once support transformers > 4.30.0
262+
self.deepspeed = None
259263

260264
def _set_signature_columns_if_needed(self):
261265
if self._signature_columns is None:

0 commit comments

Comments
 (0)