Skip to content

Commit 349350c

Browse files
Quantize SD submodels in OVQuantizer
1 parent 583e435 commit 349350c

File tree

3 files changed

+28
-24
lines changed

3 files changed

+28
-24
lines changed

notebooks/openvino/stable_diffusion_hybrid_quantization.ipynb

+3-2
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@
5353
"from pathlib import Path\n",
5454
"from openvino.runtime import Core\n",
5555
"from optimum.intel import OVConfig, OVQuantizer, OVStableDiffusionPipeline, OVWeightQuantizationConfig\n",
56+
"from optimum.intel.openvino.configuration import OVQuantizationMethod\n",
5657
"\n",
5758
"transformers.logging.set_verbosity_error()\n",
5859
"datasets.logging.set_verbosity_error()"
@@ -198,8 +199,8 @@
198199
},
199200
"outputs": [],
200201
"source": [
201-
"quantization_config = OVWeightQuantizationConfig(bits=8, num_samples=NUM_SAMPLES)\n",
202202
"int8_pipe = OVStableDiffusionPipeline.from_pretrained(model_id=MODEL_ID, export=True)\n",
203+
"quantization_config = OVWeightQuantizationConfig(bits=8, num_samples=NUM_SAMPLES, quant_method=OVQuantizationMethod.HYBRID)\n",
203204
"quantizer = OVQuantizer(int8_pipe)\n",
204205
"quantizer.quantize(\n",
205206
" ov_config=OVConfig(quantization_config=quantization_config),\n",
@@ -618,7 +619,7 @@
618619
"name": "python",
619620
"nbconvert_exporter": "python",
620621
"pygments_lexer": "ipython3",
621-
"version": "3.8.10"
622+
"version": "3.11.7"
622623
}
623624
},
624625
"nbformat": 4,

optimum/intel/openvino/modeling_diffusion.py

+14-19
Original file line numberDiff line numberDiff line change
@@ -292,33 +292,27 @@ def _from_pretrained(
292292
else:
293293
kwargs[name] = load_method(new_model_save_dir)
294294

295-
quantization_config = cls._prepare_weight_quantization_config(quantization_config, load_in_8bit)
296-
297295
unet_path = new_model_save_dir / DIFFUSION_MODEL_UNET_SUBFOLDER / unet_file_name
298-
if quantization_config is not None and quantization_config.dataset is not None:
299-
# load the UNet model uncompressed to apply hybrid quantization further
300-
unet = cls.load_model(unet_path)
301-
# Apply weights compression to other `components` without dataset
302-
quantization_config_without_dataset = deepcopy(quantization_config)
303-
quantization_config_without_dataset.dataset = None
304-
else:
305-
quantization_config_without_dataset = quantization_config
306-
unet = cls.load_model(unet_path, quantization_config_without_dataset)
307-
308296
components = {
309297
"vae_encoder": new_model_save_dir / DIFFUSION_MODEL_VAE_ENCODER_SUBFOLDER / vae_encoder_file_name,
310298
"vae_decoder": new_model_save_dir / DIFFUSION_MODEL_VAE_DECODER_SUBFOLDER / vae_decoder_file_name,
311299
"text_encoder": new_model_save_dir / DIFFUSION_MODEL_TEXT_ENCODER_SUBFOLDER / text_encoder_file_name,
312300
"text_encoder_2": new_model_save_dir / DIFFUSION_MODEL_TEXT_ENCODER_2_SUBFOLDER / text_encoder_2_file_name,
313301
}
314302

315-
for key, value in components.items():
316-
components[key] = cls.load_model(value, quantization_config_without_dataset) if value.is_file() else None
317-
318303
if model_save_dir is None:
319304
model_save_dir = new_model_save_dir
320305

321-
if quantization_config is not None and quantization_config.dataset is not None:
306+
quantization_config = cls._prepare_weight_quantization_config(quantization_config, load_in_8bit)
307+
if quantization_config is None or quantization_config.dataset is None:
308+
unet = cls.load_model(unet_path, quantization_config)
309+
for key, value in components.items():
310+
components[key] = cls.load_model(value, quantization_config) if value.is_file() else None
311+
else:
312+
# Load uncompressed models to apply hybrid quantization further
313+
unet = cls.load_model(unet_path)
314+
for key, value in components.items():
315+
components[key] = cls.load_model(value) if value.is_file() else None
322316
sd_model = cls(unet=unet, config=config, model_save_dir=model_save_dir, **components, **kwargs)
323317

324318
supported_pipelines = (
@@ -331,10 +325,10 @@ def _from_pretrained(
331325

332326
from optimum.intel import OVQuantizer
333327

328+
hybrid_quantization_config = deepcopy(quantization_config)
329+
hybrid_quantization_config.quant_method = OVQuantizationMethod.HYBRID
334330
quantizer = OVQuantizer(sd_model)
335-
quantization_config_copy = deepcopy(quantization_config)
336-
quantization_config_copy.quant_method = OVQuantizationMethod.HYBRID
337-
quantizer.quantize(ov_config=OVConfig(quantization_config=quantization_config_copy))
331+
quantizer.quantize(ov_config=OVConfig(quantization_config=hybrid_quantization_config))
338332

339333
return sd_model
340334

@@ -347,6 +341,7 @@ def _from_pretrained(
347341
**kwargs,
348342
)
349343

344+
350345
@classmethod
351346
def _from_transformers(
352347
cls,

optimum/intel/openvino/quantization.py

+11-3
Original file line numberDiff line numberDiff line change
@@ -63,8 +63,6 @@
6363

6464
if is_datasets_available():
6565
from datasets import Dataset
66-
else:
67-
Dataset = None
6866

6967
register_module(ignored_algorithms=[])(Conv1D)
7068

@@ -388,11 +386,21 @@ def _quantize_ovbasemodel(
388386
if calibration_dataset is None:
389387
raise ValueError("Calibration dataset is required to run hybrid quantization.")
390388
if isinstance(self.model, OVStableDiffusionPipelineBase):
389+
# Apply weight-only quantization to all SD submodels except UNet
390+
quantization_config_copy = copy.deepcopy(quantization_config)
391+
quantization_config_copy.dataset = None
392+
quantization_config_copy.quant_method = OVQuantizationMethod.DEFAULT
393+
for sd_submodel_name in ["vae_encoder", "vae_decoder", "text_encoder", "text_encoder_2"]:
394+
sd_submodel = getattr(self.model, sd_submodel_name)
395+
if sd_submodel is not None:
396+
_weight_only_quantization(sd_submodel.model, quantization_config_copy)
397+
398+
# Apply hybrid quantization to UNet
391399
self.model.unet.model = _hybrid_quantization(
392400
self.model.unet.model, quantization_config, calibration_dataset
393401
)
394402
else:
395-
# This may be for example OVModelForImageClassification, OVModelForAudioClassification, etc.
403+
# The model may be for example OVModelForImageClassification, OVModelForAudioClassification, etc.
396404
self.model.model = _hybrid_quantization(self.model.model, quantization_config, calibration_dataset)
397405
else:
398406
_weight_only_quantization(self.model.model, quantization_config, calibration_dataset)

0 commit comments

Comments
 (0)