Skip to content

Commit 3ffacd3

Browse files
Initial commit
1 parent 32d193d commit 3ffacd3

File tree

2 files changed

+51
-39
lines changed

2 files changed

+51
-39
lines changed

optimum/exporters/openvino/__main__.py

+48-4
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,9 @@
1414

1515
import gc
1616
import logging
17+
import operator
1718
import warnings
19+
from functools import reduce
1820
from pathlib import Path
1921
from typing import TYPE_CHECKING, Any, Callable, Dict, Optional, Union
2022

@@ -23,19 +25,19 @@
2325
from transformers import AutoConfig, AutoTokenizer, PreTrainedTokenizerBase
2426
from transformers.utils import is_torch_available
2527

28+
from openvino.runtime import Core, Type, save_model
2629
from optimum.exporters import TasksManager
2730
from optimum.exporters.onnx.base import OnnxConfig
2831
from optimum.exporters.onnx.constants import SDPA_ARCHS_ONNX_EXPORT_NOT_SUPPORTED
2932
from optimum.exporters.openvino.convert import export_from_model
3033
from optimum.intel.utils.import_utils import (
3134
is_openvino_tokenizers_available,
3235
is_openvino_version,
33-
is_transformers_version,
36+
is_transformers_version, is_nncf_available,
3437
)
3538
from optimum.utils.save_utils import maybe_load_preprocessors
3639

37-
from .utils import clear_class_registry
38-
40+
from .utils import clear_class_registry, _MAX_UNCOMPRESSED_SIZE
3941

4042
if TYPE_CHECKING:
4143
from optimum.intel.openvino.configuration import OVConfig
@@ -402,7 +404,7 @@ class StoreAttr(object):
402404
model_name_or_path, subfolder=subfolder, trust_remote_code=trust_remote_code
403405
)
404406

405-
export_from_model(
407+
submodel_paths = export_from_model(
406408
model=model,
407409
output=output,
408410
task=task,
@@ -425,6 +427,48 @@ class StoreAttr(object):
425427
del model
426428
gc.collect()
427429

430+
core = Core()
431+
compressed_submodel_paths = []
432+
for submodel_path in submodel_paths:
433+
submodel_path = Path(output) / submodel_path
434+
submodel = core.read_model(submodel_path)
435+
436+
quantization_config = ov_config.quantization_config if ov_config is not None else None
437+
if ov_config is None:
438+
num_parameters = 0
439+
for op in submodel.get_ops():
440+
if op.get_type_name() == "Constant" and op.get_element_type() in [Type.f16, Type.f32, Type.bf16]:
441+
num_parameters += reduce(operator.mul, op.shape, 1)
442+
if num_parameters >= _MAX_UNCOMPRESSED_SIZE:
443+
if is_nncf_available():
444+
quantization_config = {"bits": 8, "sym": False}
445+
logger.info("The model weights will be quantized to int8_asym.")
446+
else:
447+
continue
448+
if not quantization_config:
449+
continue
450+
451+
if not is_nncf_available():
452+
raise ImportError(
453+
"Quantization of the weights requires nncf, please install it with `pip install nncf`"
454+
)
455+
456+
from optimum.intel.openvino.quantization import _weight_only_quantization
457+
458+
_weight_only_quantization(submodel, quantization_config)
459+
460+
compressed_submodel_path = Path(str(submodel_path).replace(".xml", "_compressed.xml"))
461+
save_model(submodel, compressed_submodel_path, compress_to_fp16=ov_config and ov_config.dtype == "fp16")
462+
compressed_submodel_paths.append((submodel_path, compressed_submodel_path))
463+
464+
del submodel
465+
466+
for submodel_path, compressed_submodel_path in compressed_submodel_paths:
467+
submodel_path.unlink()
468+
submodel_path.with_suffix(".bin").unlink()
469+
compressed_submodel_path.rename(submodel_path)
470+
compressed_submodel_path.with_suffix(".bin").rename(submodel_path.with_suffix(".bin"))
471+
428472
# Unpatch modules after GPTQ export
429473
if do_gptq_patching:
430474
torch.cuda.is_available = orig_cuda_check

optimum/exporters/openvino/convert.py

+3-35
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,6 @@
4949
from .model_patcher import patch_model_with_bettertransformer
5050
from .stateful import ensure_export_task_support_stateful, ensure_stateful_is_available, patch_stateful
5151
from .utils import (
52-
_MAX_UNCOMPRESSED_SIZE,
5352
OV_XML_FILE_NAME,
5453
clear_class_registry,
5554
flattenize_inputs,
@@ -76,21 +75,7 @@
7675

7776

7877
def _save_model(model, path: str, ov_config: Optional["OVConfig"] = None, library_name: Optional[str] = None):
79-
compress_to_fp16 = False
80-
81-
if ov_config is not None:
82-
if ov_config.quantization_config:
83-
if not is_nncf_available():
84-
raise ImportError(
85-
"Quantization of the weights to int8 requires nncf, please install it with `pip install nncf`"
86-
)
87-
88-
from optimum.intel.openvino.quantization import _weight_only_quantization
89-
90-
_weight_only_quantization(model, ov_config.quantization_config)
91-
92-
compress_to_fp16 = ov_config.dtype == "fp16"
93-
78+
compress_to_fp16 = ov_config is not None and ov_config.dtype == "fp16"
9479
model = _add_version_info_to_model(model, library_name)
9580
save_model(model, path, compress_to_fp16)
9681

@@ -643,25 +628,6 @@ def export_from_model(
643628
)
644629
logging.disable(logging.NOTSET)
645630

646-
if ov_config is None:
647-
if library_name == "diffusers":
648-
num_parameters = model.unet.num_parameters()
649-
else:
650-
num_parameters = sum(param.numel() for param in list(model.parameters()) if param.requires_grad)
651-
652-
if num_parameters >= _MAX_UNCOMPRESSED_SIZE:
653-
if is_nncf_available():
654-
from ...intel.openvino.configuration import OVConfig
655-
656-
ov_config = OVConfig(quantization_config={"bits": 8, "sym": False})
657-
658-
logger.info("The model weights will be quantized to int8_asym.")
659-
else:
660-
logger.warning(
661-
"The model will be converted with no weights quantization. Quantization of the weights to int8 requires nncf."
662-
"please install it with `pip install nncf`"
663-
)
664-
665631
if library_name != "diffusers":
666632
# Saving the model config and preprocessor as this is needed sometimes.
667633
model.config.save_pretrained(output)
@@ -720,6 +686,8 @@ def export_from_model(
720686
patch_16bit_model=patch_16bit_model,
721687
)
722688

689+
return files_subpaths
690+
723691

724692
def export_tokenizer(
725693
tokenizer,

0 commit comments

Comments
 (0)