Skip to content

Commit 91964c4

Browse files
Add ov_submodels property to OVBaseModel
1 parent c8c6beb commit 91964c4

8 files changed

+80
-133
lines changed

optimum/intel/openvino/modeling_base.py

+12-1
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
import warnings
1818
from pathlib import Path
1919
from tempfile import gettempdir
20-
from typing import Dict, Optional, Union
20+
from typing import Dict, List, Optional, Union
2121

2222
import openvino
2323
import torch
@@ -204,6 +204,17 @@ def dtype(self) -> Optional[torch.dtype]:
204204

205205
return None
206206

207+
@property
208+
def ov_submodels(self) -> Dict[str, openvino.runtime.Model]:
209+
return {submodel_name: getattr(self, submodel_name) for submodel_name in self._ov_submodel_names}
210+
211+
@property
212+
def _ov_submodel_names(self) -> List[str]:
213+
"""
214+
List of openvino submodel names. Used as keys for a dictionary returned by `.submodels` property.
215+
"""
216+
return ["model"]
217+
207218
@staticmethod
208219
def load_model(
209220
file_name: Union[str, Path],

optimum/intel/openvino/modeling_base_seq2seq.py

+11-8
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
import logging
1616
import os
1717
from pathlib import Path
18-
from typing import Dict, Optional, Union
18+
from typing import Dict, List, Optional, Union
1919

2020
import openvino
2121
from huggingface_hub import hf_hub_download
@@ -106,6 +106,13 @@ def __init__(
106106
self._openvino_config = OVConfig(quantization_config=quantization_config)
107107
self._set_ov_config_parameters()
108108

109+
@property
110+
def _ov_submodel_names(self) -> List[str]:
111+
submodel_names = ["encoder", "decoder"]
112+
if self.decoder_with_past_model is not None:
113+
submodel_names.append("decoder_with_past")
114+
return submodel_names
115+
109116
def _save_pretrained(self, save_directory: Union[str, Path]):
110117
"""
111118
Saves the model to the OpenVINO IR format so that it can be re-loaded using the
@@ -482,13 +489,9 @@ def half(self):
482489
raise ValueError(
483490
"`half()` is not supported with `compile_only` mode, please intialize model without this option"
484491
)
485-
apply_moc_transformations(self.encoder_model, cf=False)
486-
apply_moc_transformations(self.decoder_model, cf=False)
487-
compress_model_transformation(self.encoder_model)
488-
compress_model_transformation(self.decoder_model)
489-
if self.decoder_with_past_model is not None:
490-
apply_moc_transformations(self.decoder_with_past_model, cf=False)
491-
compress_model_transformation(self.decoder_with_past_model)
492+
for submodel in self.ov_submodels.values():
493+
apply_moc_transformations(submodel, cf=False)
494+
compress_model_transformation(submodel)
492495
return self
493496

494497
def forward(self, *args, **kwargs):

optimum/intel/openvino/modeling_diffusion.py

+20-11
Original file line numberDiff line numberDiff line change
@@ -264,6 +264,24 @@ def __init__(
264264
if compile and not self._compile_only:
265265
self.compile()
266266

267+
@property
268+
def ov_submodels(self) -> Dict[str, openvino.runtime.Model]:
269+
return {name: getattr(getattr(self, name), "model") for name in self._ov_submodel_names}
270+
271+
@property
272+
def _ov_submodel_names(self) -> List[str]:
273+
submodel_name_candidates = [
274+
"unet",
275+
"transformer",
276+
"vae_decoder",
277+
"vae_encoder",
278+
"text_encoder",
279+
"text_encoder_2",
280+
"text_encoder_3",
281+
]
282+
submodel_names = [name for name in submodel_name_candidates if getattr(self, name) is not None]
283+
return submodel_names
284+
267285
def _save_pretrained(self, save_directory: Union[str, Path]):
268286
"""
269287
Saves the model to the OpenVINO IR format so that it can be re-loaded using the
@@ -879,17 +897,8 @@ def half(self):
879897
"`half()` is not supported with `compile_only` mode, please intialize model without this option"
880898
)
881899

882-
for component in {
883-
self.unet,
884-
self.transformer,
885-
self.vae_encoder,
886-
self.vae_decoder,
887-
self.text_encoder,
888-
self.text_encoder_2,
889-
self.text_encoder_3,
890-
}:
891-
if component is not None:
892-
compress_model_transformation(component.model)
900+
for submodel in self.ov_submodels.values():
901+
compress_model_transformation(submodel)
893902

894903
self.clear_requests()
895904

optimum/intel/openvino/modeling_visual_language.py

+5-9
Original file line numberDiff line numberDiff line change
@@ -385,17 +385,17 @@ def _save_pretrained(self, save_directory: Union[str, Path]):
385385
save_directory (`str` or `Path`):
386386
The directory where to save the model files.
387387
"""
388-
src_models = self.submodels
388+
src_models = self.ov_submodels
389389
dst_file_names = {
390390
"lm_model": OV_LANGUAGE_MODEL_NAME,
391391
"text_embeddings_model": OV_TEXT_EMBEDDINGS_MODEL_NAME,
392392
"vision_embeddings_model": OV_VISION_EMBEDDINGS_MODEL_NAME,
393393
}
394-
for name in self._submodel_names:
394+
for name in self._ov_submodel_names:
395395
if name not in dst_file_names:
396396
dst_file_names[name] = f"openvino_{name}.xml"
397397

398-
for name in self._submodel_names:
398+
for name in self._ov_submodel_names:
399399
model = src_models[name]
400400
dst_file_name = dst_file_names[name]
401401
dst_path = os.path.join(save_directory, dst_file_name)
@@ -653,17 +653,13 @@ def components(self):
653653
return {component_name: getattr(self, component_name) for component_name in self._component_names}
654654

655655
@property
656-
def _submodel_names(self):
656+
def _ov_submodel_names(self):
657657
model_names = ["lm_model", "text_embeddings_model", "vision_embeddings_model"]
658658
for part in self.additional_parts:
659659
if getattr(self, part, None) is not None:
660660
model_names.append(part + "_model")
661661
return model_names
662662

663-
@property
664-
def submodels(self):
665-
return {submodel_name: getattr(self, submodel_name) for submodel_name in self._submodel_names}
666-
667663
def reshape(self, batch_size: int, sequence_length: int):
668664
logger.warning("Static shapes are not supported for causal language model.")
669665
return self
@@ -672,7 +668,7 @@ def half(self):
672668
"""
673669
Converts all the model weights to FP16 for more efficient inference on GPU.
674670
"""
675-
for _, submodel in self.submodels.items():
671+
for submodel in self.ov_submodels.values():
676672
apply_moc_transformations(submodel, cf=False)
677673
compress_model_transformation(submodel)
678674
return self

optimum/intel/openvino/quantization.py

+14-32
Original file line numberDiff line numberDiff line change
@@ -393,30 +393,23 @@ def _quantize_ovbasemodel(
393393
if calibration_dataset is None:
394394
raise ValueError("Calibration dataset is required to run hybrid quantization.")
395395
if is_diffusers_available() and isinstance(self.model, OVDiffusionPipeline):
396-
# Apply weight-only quantization to all SD submodels except UNet
396+
# Apply weight-only quantization to all SD submodels except UNet/Transformer
397397
quantization_config_copy = quantization_config.clone()
398398
quantization_config_copy.dataset = None
399399
quantization_config_copy.quant_method = OVQuantizationMethod.DEFAULT
400-
sub_model_names = [
401-
"vae_encoder",
402-
"vae_decoder",
403-
"text_encoder",
404-
"text_encoder_2",
405-
"text_encoder_3",
406-
]
407-
sub_models = filter(lambda x: x, (getattr(self.model, name) for name in sub_model_names))
400+
sub_models = [v for (k, v) in self.model.ov_submodels.items() if k not in ("unet", "transformer")]
408401
for sub_model in sub_models:
409-
_weight_only_quantization(sub_model.model, quantization_config_copy, **kwargs)
402+
_weight_only_quantization(sub_model, quantization_config_copy, **kwargs)
410403

411-
if self.model.unet is not None:
412-
# Apply hybrid quantization to UNet
413-
self.model.unet.model = _hybrid_quantization(
414-
self.model.unet.model, quantization_config, calibration_dataset, **kwargs
415-
)
404+
unet_is_present = self.model.unet is not None
405+
vision_model = (self.model.unet if unet_is_present else self.model.transformer).model
406+
quantized_vision_model = _hybrid_quantization(
407+
vision_model, quantization_config, calibration_dataset, **kwargs
408+
)
409+
if unet_is_present:
410+
self.model.unet.model = quantized_vision_model
416411
else:
417-
self.model.transformer.model = _hybrid_quantization(
418-
self.model.transformer.model, quantization_config, calibration_dataset, **kwargs
419-
)
412+
self.model.transformer.model = quantized_vision_model
420413

421414
self.model.clear_requests()
422415
else:
@@ -427,24 +420,13 @@ def _quantize_ovbasemodel(
427420
self.model.request = None
428421
else:
429422
if is_diffusers_available() and isinstance(self.model, OVDiffusionPipeline):
430-
sub_model_names = [
431-
"vae_encoder",
432-
"vae_decoder",
433-
"text_encoder",
434-
"text_encoder_2",
435-
"unet",
436-
"transformer",
437-
"text_encoder_3",
438-
]
439-
sub_models = filter(lambda x: x, (getattr(self.model, name) for name in sub_model_names))
440-
for sub_model in sub_models:
441-
_weight_only_quantization(sub_model.model, quantization_config, **kwargs)
423+
for submodel in self.model.ov_submodels.values():
424+
_weight_only_quantization(submodel, quantization_config, **kwargs)
442425
self.model.clear_requests()
443426
elif isinstance(self.model, OVModelForVisualCausalLM):
444427
language_model = self.model.language_model
445428
_weight_only_quantization(language_model.model, quantization_config, calibration_dataset, **kwargs)
446-
sub_model_names = ["vision_embeddings", "text_embeddings"] + self.model.additional_parts
447-
sub_models = [getattr(self.model, f"{name}_model") for name in sub_model_names]
429+
sub_models = [v for (k, v) in self.model.ov_submodels.items() if k != "lm_model"]
448430
for sub_model in sub_models:
449431
_weight_only_quantization(sub_model, OVWeightQuantizationConfig(bits=8, sym=True), **kwargs)
450432
self.model.clear_requests()

tests/openvino/test_exporters_cli.py

+2-26
Original file line numberDiff line numberDiff line change
@@ -606,29 +606,11 @@ def test_exporters_cli_int8(self, task: str, model_type: str):
606606
else _HEAD_TO_AUTOMODELS[model_type.replace("-refiner", "")]
607607
).from_pretrained(tmpdir, **model_kwargs)
608608

609-
if task.startswith("text2text-generation"):
610-
models = [model.encoder, model.decoder]
611-
if task.endswith("with-past") and not model.decoder.stateful:
612-
models.append(model.decoder_with_past)
613-
elif (
614-
model_type.startswith("stable-diffusion")
615-
or model_type.startswith("flux")
616-
or model_type.startswith("sana")
617-
):
618-
models = [model.unet or model.transformer, model.vae_encoder, model.vae_decoder]
619-
models.append(
620-
model.text_encoder if model_type in ["stable-diffusion", "sana"] else model.text_encoder_2
621-
)
622-
elif task.startswith("image-text-to-text"):
623-
models = list(model.submodels.values())
624-
else:
625-
models = [model]
626-
627609
expected_int8 = _ARCHITECTURES_TO_EXPECTED_INT8[model_type]
628610
expected_int8 = [{"int8": it} for it in expected_int8]
629611
if task.startswith("text2text-generation") and (not task.endswith("with-past") or model.decoder.stateful):
630612
expected_int8 = expected_int8[:2]
631-
check_compression_state_per_model(self, models, expected_int8)
613+
check_compression_state_per_model(self, model.ov_submodels.values(), expected_int8)
632614

633615
@parameterized.expand(SUPPORTED_SD_HYBRID_ARCHITECTURES)
634616
def test_exporters_cli_hybrid_quantization(
@@ -667,13 +649,7 @@ def test_exporters_cli_4bit(
667649
else _HEAD_TO_AUTOMODELS[model_type.replace("-refiner", "")]
668650
).from_pretrained(tmpdir, **model_kwargs)
669651

670-
submodels = []
671-
if task == "text-generation-with-past":
672-
submodels = [model]
673-
elif task == "image-text-to-text":
674-
submodels = list(model.submodels.values())
675-
676-
check_compression_state_per_model(self, submodels, expected_num_weight_nodes_per_model)
652+
check_compression_state_per_model(self, model.ov_submodels.values(), expected_num_weight_nodes_per_model)
677653

678654
self.assertTrue("--awq" not in option or b"Applying AWQ" in result.stdout)
679655
self.assertTrue("--scale-estimation" not in option or b"Applying Scale Estimation" in result.stdout)

tests/openvino/test_quantization.py

+10-40
Original file line numberDiff line numberDiff line change
@@ -818,30 +818,19 @@ def test_ovmodel_load_with_compressed_weights(self, model_cls, model_type, trust
818818
self.assertEqual(model._openvino_config.quantization_config.bits, 8)
819819
self.assertEqual(model._openvino_config.dtype, "int8")
820820

821-
if model.export_feature.startswith("text2text-generation"):
822-
models = [model.encoder, model.decoder]
823-
if model.decoder_with_past is not None:
824-
models.append(model.decoder_with_past)
825-
elif model.export_feature == "text-to-image":
826-
models = [model.unet, model.vae_encoder, model.vae_decoder]
827-
models.append(model.text_encoder if model_type in ["stable-diffusion", "sana"] else model.text_encoder_2)
828-
elif model_type == "open-clip":
829-
models = [model.text_model, model.visual_model]
830-
elif model.export_feature == "image-text-to-text":
831-
models = list(model.submodels.values())
832-
else:
833-
models = [model]
834-
835821
if model_type == "open-clip":
836822
pytest.skip(reason="ticket 161043")
837823
elif model_type == "t5":
838824
pytest.skip(reason="ticket 160958")
839825
else:
840826
check_optimization_not_applicable_to_optimized_model(model, quantization_config={"bits": 8})
841827

828+
submodels = (
829+
[model.text_model, model.visual_model] if model_type == "open-clip" else model.ov_submodels.values()
830+
)
842831
expected_ov_int8 = _ARCHITECTURES_TO_EXPECTED_INT8[model_type]
843832
expected_ov_int8 = [{"int8": it} for it in expected_ov_int8]
844-
check_compression_state_per_model(self, models, expected_ov_int8)
833+
check_compression_state_per_model(self, submodels, expected_ov_int8)
845834

846835
@parameterized.expand(SUPPORTED_ARCHITECTURES_WITH_HYBRID_QUANTIZATION)
847836
def test_ovmodel_hybrid_quantization(self, model_cls, model_type, expected_fake_nodes, expected_int8_nodes):
@@ -938,11 +927,7 @@ def test_ovmodel_4bit_auto_compression_with_config(
938927
# TODO: Check that AWQ was actually applied
939928
pass
940929

941-
submodels = []
942-
if isinstance(model, OVModelForCausalLM):
943-
submodels = [model.model]
944-
elif isinstance(model, OVModelForVisualCausalLM):
945-
submodels = list(model.submodels.values())
930+
submodels = list(model.ov_submodels.values())
946931
check_compression_state_per_model(self, submodels, expected_num_weight_nodes_per_model)
947932

948933
model.save_pretrained(tmp_dir)
@@ -976,21 +961,11 @@ def test_ovmodel_load_with_uncompressed_weights(self, model_cls, model_type, tru
976961
model = model_cls.from_pretrained(
977962
MODEL_NAMES[model_type], export=True, load_in_8bit=False, trust_remote_code=trust_remote_code
978963
)
979-
if model.export_feature.startswith("text2text-generation"):
980-
models = [model.encoder, model.decoder]
981-
if model.decoder_with_past is not None:
982-
models.append(model.decoder_with_past)
983-
elif model.export_feature == "text-to-image":
984-
models = [model.unet, model.vae_encoder, model.vae_decoder]
985-
models.append(model.text_encoder if model_type in ["stable-diffusion", "sana"] else model.text_encoder_2)
986-
elif model_type == "open-clip":
987-
models = [model.text_model, model.visual_model]
988-
elif model.export_feature == "image-text-to-text":
989-
models = list(model.submodels.values())
990-
else:
991-
models = [model]
992964

993-
for i, submodel in enumerate(models):
965+
submodels = (
966+
[model.text_model, model.visual_model] if model_type == "open-clip" else model.ov_submodels.values()
967+
)
968+
for i, submodel in enumerate(submodels):
994969
ov_model = submodel if isinstance(submodel, ov.Model) else submodel.model
995970
_, num_weight_nodes = get_num_quantized_nodes(ov_model)
996971
self.assertEqual(0, num_weight_nodes["int8"])
@@ -1106,12 +1081,7 @@ def test_ovmodel_4bit_dynamic_with_config(
11061081
self.assertEqual(model.ov_config["DYNAMIC_QUANTIZATION_GROUP_SIZE"], str(group_size))
11071082
self.assertEqual(model.ov_config["KV_CACHE_PRECISION"], "u8")
11081083

1109-
submodels = []
1110-
if isinstance(model, OVModelForCausalLM):
1111-
submodels = [model.model]
1112-
elif isinstance(model, OVModelForVisualCausalLM):
1113-
submodels = list(model.submodels.values())
1114-
check_compression_state_per_model(self, submodels, expected_num_weight_nodes_per_model)
1084+
check_compression_state_per_model(self, model.ov_submodels.values(), expected_num_weight_nodes_per_model)
11151085

11161086
model.save_pretrained(tmp_dir)
11171087
openvino_config = OVConfig.from_pretrained(tmp_dir)

tests/openvino/utils_tests.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -190,13 +190,13 @@
190190
"wav2vec2": (34,),
191191
"distilbert": (66,),
192192
"t5": (64, 104, 84),
193-
"stable-diffusion": (242, 34, 42, 64),
194-
"stable-diffusion-xl": (366, 34, 42, 66),
195-
"stable-diffusion-xl-refiner": (366, 34, 42, 66),
193+
"stable-diffusion": (242, 42, 34, 64),
194+
"stable-diffusion-xl": (366, 42, 34, 64, 66),
195+
"stable-diffusion-xl-refiner": (366, 42, 34, 66),
196196
"open-clip": (20, 28),
197-
"stable-diffusion-3": (66, 42, 58, 30),
198-
"flux": (56, 24, 28, 64),
199-
"flux-fill": (56, 24, 28, 64),
197+
"stable-diffusion-3": (66, 58, 42, 30, 30, 32),
198+
"flux": (56, 28, 24, 64, 64),
199+
"flux-fill": (56, 28, 24, 64, 64),
200200
"llava": (30, 1, 9),
201201
"llava_next": (30, 1, 9),
202202
"minicpmv": (30, 1, 26, 6),

0 commit comments

Comments
 (0)