Skip to content

Commit 1c14957

Browse files
alexsu52echarlaix
andauthored
Skip automodel compression weights tests for nncf==2.8.0 (#535)
* skip compression weights tests for nncf==2.8.0 and reworked logic of optimization stateful PyTorch models * black happy * ruff happy * updated nncf version * replied to comments * replied comments * typo * cherry pick fixes for tests from PR 538 * replied to comments --------- Co-authored-by: Ella Charlaix <80481427+echarlaix@users.noreply.github.com>
1 parent a7b766e commit 1c14957

File tree

4 files changed

+88
-51
lines changed

4 files changed

+88
-51
lines changed

optimum/exporters/openvino/convert.py

+1-20
Original file line numberDiff line numberDiff line change
@@ -31,14 +31,7 @@
3131
from optimum.exporters.onnx.model_patcher import DecoderModelPatcher
3232
from optimum.utils import is_diffusers_available
3333

34-
from ...intel.utils.import_utils import (
35-
_torch_version,
36-
_transformers_version,
37-
is_nncf_available,
38-
is_optimum_version,
39-
is_torch_version,
40-
is_transformers_version,
41-
)
34+
from ...intel.utils.import_utils import is_nncf_available, is_optimum_version
4235
from .model_patcher import patch_model_with_bettertransformer
4336
from .stateful import ensure_stateful_is_available, patch_stateful
4437
from .utils import (
@@ -331,18 +324,6 @@ def export_pytorch(
331324
output = Path(output)
332325

333326
if stateful:
334-
if is_transformers_version("<", "4.36") or is_torch_version("<", "2.1.1"):
335-
COLOR_RED = "\033[1;31m"
336-
COLOR_RESET = "\033[0m"
337-
logger.warning(
338-
COLOR_RED
339-
+ "[WARNING] For good performance with stateful models, transformers>=4.36.2 and PyTorch>=2.1.1 are required. "
340-
f"This Python environment has Transformers {_transformers_version} and PyTorch {_torch_version}. "
341-
"Consider upgrading PyTorch and Transformers, for example by running "
342-
"`pip install --upgrade --upgrade-strategy eager optimum[openvino,nncf]`, and export the model again"
343-
+ COLOR_RESET
344-
)
345-
346327
# Trigger bettertransformer together with stateful model because OpenVINO HW-dependent transformations expect
347328
# both of them are applied to demonstrate the best performance.
348329
# TODO: Consider applying bettertransformer regardless of stateful flag -- requires additional validation.

optimum/exporters/openvino/model_patcher.py

+20-5
Original file line numberDiff line numberDiff line change
@@ -14,16 +14,31 @@
1414

1515
import logging as log
1616

17-
from optimum.intel.utils.import_utils import is_torch_version
17+
from optimum.intel.utils.import_utils import (
18+
_torch_version,
19+
_transformers_version,
20+
is_torch_version,
21+
is_transformers_version,
22+
)
1823

1924

2025
def patch_model_with_bettertransformer(model):
21-
if is_torch_version("<", "2.0"):
26+
# check that the model has not yet been pathced
27+
if hasattr(model, "use_bettertransformer") and model.use_bettertransformer is True:
28+
return model
29+
30+
if is_transformers_version("<", "4.36") or is_torch_version("<", "2.1.1"):
31+
COLOR_RED = "\033[1;31m"
32+
COLOR_RESET = "\033[0m"
2233
log.warn(
23-
"integration Scaled Dot Product Attention optimization supported only with torch > 2.0."
24-
"Usage model with stateful=True may be non-effective if model does not contain torch.functional.scaled_dot_product_attention"
25-
"It is recommended to upgrade PyTorch version for using stateful model or use stateful=False"
34+
COLOR_RED
35+
+ "[WARNING] For good performance with stateful models, transformers>=4.36.2 and PyTorch>=2.1.1 are required. "
36+
f"This Python environment has Transformers {_transformers_version} and PyTorch {_torch_version}. "
37+
"Consider upgrading PyTorch and Transformers, for example by running "
38+
"`pip install --upgrade --upgrade-strategy eager optimum[openvino,nncf]`, and export the model again"
39+
+ COLOR_RESET
2640
)
41+
2742
# model already has required SDPA implementation
2843
if getattr(model, "_supports_sdpa", False) and getattr(model.config, "_attn_implementation", "eager") == "sdpa":
2944
return model

optimum/intel/openvino/quantization.py

+46-26
Original file line numberDiff line numberDiff line change
@@ -24,22 +24,26 @@
2424
import transformers
2525
from accelerate.data_loader import DataLoaderStateMixin
2626
from datasets import Dataset, load_dataset
27-
from nncf import NNCFConfig, compress_weights
27+
from nncf import NNCFConfig
2828
from nncf.torch import create_compressed_model, register_default_init_args, register_module
2929
from nncf.torch.dynamic_graph.io_handling import wrap_nncf_model_inputs_with_objwalk
3030
from nncf.torch.initialization import PTInitializingDataLoader
3131
from openvino._offline_transformations import compress_quantize_weights_transformation
3232
from openvino.runtime import Core, Tensor
33+
from torch.utils._pytree import tree_map
3334
from torch.utils.data import DataLoader, RandomSampler
3435
from transformers import DataCollator, PreTrainedModel, default_data_collator
3536
from transformers.pytorch_utils import Conv1D
3637

38+
from optimum.exporters.onnx.convert import check_dummy_inputs_are_allowed
3739
from optimum.exporters.tasks import TasksManager
3840
from optimum.quantization_base import OptimumQuantizer
3941

4042
from ...exporters.openvino import export, export_pytorch_via_onnx
41-
from ...exporters.openvino.stateful import ensure_export_task_support_stateful
43+
from ...exporters.openvino.model_patcher import patch_model_with_bettertransformer
44+
from ...exporters.openvino.stateful import ensure_export_task_support_stateful, ensure_stateful_is_available
4245
from ..utils.constant import _TASK_ALIASES
46+
from ..utils.modeling_utils import get_model_device
4347
from .configuration import OVConfig
4448
from .modeling_base import OVBaseModel
4549
from .modeling_decoder import OVBaseDecoderModel
@@ -361,9 +365,7 @@ def _quantize_ovcausallm(
361365
self.model.model,
362366
quantization_dataset,
363367
model_type=nncf.ModelType.TRANSFORMER if not kwargs.get("model_type") else kwargs.get("model_type"),
364-
fast_bias_correction=True
365-
if not kwargs.get("fast_bias_correction")
366-
else kwargs.get("fast_bias_correction"),
368+
fast_bias_correction=kwargs.get("fast_bias_correction", True),
367369
**kwargs,
368370
)
369371
self.model.model = quantized_model
@@ -405,13 +407,42 @@ def _quantize_torchmodel(
405407
if file_name is None and ov_config.save_onnx_model
406408
else Path(ov_file_name).with_suffix(".onnx")
407409
)
410+
411+
task = self.task
412+
model = self.model
413+
self.model.config.save_pretrained(save_directory)
414+
if task.startswith("text-generation"):
415+
onnx_config = onnx_config_class(
416+
model.config, use_past=model.config.use_cache, use_past_in_inputs=model.config.use_cache
417+
)
418+
if model.config.use_cache:
419+
task = "text-generation-with-past"
420+
else:
421+
onnx_config = onnx_config_class(model.config)
422+
423+
stateful = ensure_stateful_is_available() and ensure_export_task_support_stateful(task)
424+
408425
if weights_only:
409-
if getattr(self.model.config, "tie_word_embeddings", True):
410-
# to fix problem with shared embedding weights in nncf compress_weights()
411-
self.model.tie_weights()
412-
compressed_model = compress_weights(self.model)
413-
self.model = compressed_model
426+
if stateful:
427+
# patch model before weight compression
428+
model = patch_model_with_bettertransformer(model)
429+
430+
dummy_inputs = onnx_config.generate_dummy_inputs(framework="pt")
431+
device = get_model_device(model)
432+
dummy_inputs = tree_map(
433+
lambda value: value.to(device) if isinstance(value, torch.Tensor) else value, dummy_inputs
434+
)
435+
check_dummy_inputs_are_allowed(model, dummy_inputs)
436+
437+
nncf.compress_weights(model, dataset=nncf.Dataset([dummy_inputs]))
414438
else:
439+
if stateful:
440+
logger.warn(
441+
"Quantization algorithm does not support optimized stateful models. "
442+
"The original model without optimization will be quantized and export."
443+
)
444+
stateful = False
445+
415446
calibration_dataloader = self._get_calibration_dataloader(
416447
calibration_dataset=calibration_dataset,
417448
batch_size=batch_size,
@@ -423,22 +454,10 @@ def _quantize_torchmodel(
423454
ov_config.add_input_info(model_inputs)
424455
nncf_config = NNCFConfig.from_dict(ov_config.__dict__)
425456
nncf_config = register_default_init_args(nncf_config, calibration_dataloader)
426-
controller, compressed_model = create_compressed_model(
427-
self.model, nncf_config, wrap_inputs_fn=wrap_nncf_model_inputs_with_objwalk
428-
)
429-
compressed_model = controller.strip(do_copy=False)
430-
431-
task = self.task
432-
model = self.model
433-
self.model.config.save_pretrained(save_directory)
434-
if task.startswith("text-generation"):
435-
onnx_config = onnx_config_class(
436-
model.config, use_past=model.config.use_cache, use_past_in_inputs=model.config.use_cache
457+
controller, model = create_compressed_model(
458+
model, nncf_config, wrap_inputs_fn=wrap_nncf_model_inputs_with_objwalk
437459
)
438-
if model.config.use_cache:
439-
task = "text-generation-with-past"
440-
else:
441-
onnx_config = onnx_config_class(model.config)
460+
model = controller.strip(do_copy=False)
442461

443462
model_path = save_directory / (onnx_file_name if ov_config.save_onnx_model else ov_file_name)
444463
onnx_path = save_directory / onnx_file_name
@@ -447,7 +466,8 @@ def _quantize_torchmodel(
447466
opset = max(opset, MIN_ONNX_QDQ_OPSET)
448467
kwargs = {}
449468
if not ov_config.save_onnx_model:
450-
kwargs = {"stateful": ensure_export_task_support_stateful(task)}
469+
kwargs = {"stateful": stateful}
470+
451471
_, _, is_onnx = export_fn(model=model, config=onnx_config, output=model_path, opset=opset, **kwargs)
452472
if is_onnx:
453473
# Load and save the compressed model

optimum/intel/utils/modeling_utils.py

+21
Original file line numberDiff line numberDiff line change
@@ -148,3 +148,24 @@ def patch_decoder_attention_mask(model: "PreTrainedModel"):
148148
elif model.config.model_type in {"blenderbot-small", "blenderbot", "opt", "pegasus", "bart"}:
149149
model.model.decoder._prepare_decoder_attention_mask = _prepare_decoder_attention_mask
150150
return model
151+
152+
153+
def get_model_device(model: torch.nn.Module) -> torch.device:
154+
"""
155+
Determines the device on which a PyTorch model is currently residing.
156+
157+
Args:
158+
model: The PyTorch model to query.
159+
160+
Returns:
161+
torch.device: The device where the model's parameters are located.
162+
163+
Raises:
164+
StopIteration: If the model has no parameters.
165+
"""
166+
try:
167+
device = next(model.parameters()).device
168+
except StopIteration:
169+
# The model had no parameters at all, doesn't matter which device to choose
170+
device = torch.device("cpu")
171+
return device

0 commit comments

Comments
 (0)