|
44 | 44 | from ...exporters.openvino.stateful import ensure_export_task_support_stateful, ensure_stateful_is_available
|
45 | 45 | from ..utils.constant import _TASK_ALIASES
|
46 | 46 | from ..utils.modeling_utils import get_model_device
|
47 |
| -from .configuration import OVConfig, OVWeightQuantizationConfig, _check_default_4bit_configs |
| 47 | +from .configuration import OVConfig, OVWeightQuantizationConfig |
48 | 48 | from .modeling_base import OVBaseModel
|
49 | 49 | from .utils import (
|
50 | 50 | MAX_ONNX_OPSET,
|
@@ -332,10 +332,8 @@ def _quantize_ovcausallm(
|
332 | 332 | quantization_config = None if ov_config is None else ov_config.quantization_config
|
333 | 333 | if quantization_config is None:
|
334 | 334 | # Use default 8-bit compression
|
335 |
| - quantization_config = OVWeightQuantizationConfig(mode=nncf.CompressWeightsMode.INT8_SYM) |
336 |
| - self.model.model = nncf.compress_weights(self.model.model) |
337 |
| - else: |
338 |
| - _int4_weight_only_quantization(self.model, quantization_config) |
| 335 | + quantization_config = OVWeightQuantizationConfig(bits=8, sym=True) |
| 336 | + _weight_only_quantization(self.model, quantization_config) |
339 | 337 |
|
340 | 338 | self.model.save_pretrained(save_directory)
|
341 | 339 | return
|
@@ -582,21 +580,6 @@ def _remove_unused_columns(self, dataset: Dataset):
|
582 | 580 | return dataset.remove_columns(ignored_columns)
|
583 | 581 |
|
584 | 582 |
|
585 |
| -def _int4_weight_only_quantization( |
586 |
| - model: OVBaseModel, quantization_config: Optional[Union[OVWeightQuantizationConfig, Dict]] = None |
587 |
| -): |
588 |
| - if model.export_feature != "text-generation": |
589 |
| - raise ValueError("Only `OVModelForCausalLM` are supported for now") |
590 |
| - |
591 |
| - quantization_config = quantization_config or _check_default_4bit_configs(model.config) |
592 |
| - |
593 |
| - # Data-free weight-only quantization to asymmetric INT4 |
594 |
| - if quantization_config is None: |
595 |
| - quantization_config = OVWeightQuantizationConfig(bits=4, sym=False) |
596 |
| - |
597 |
| - _weight_only_quantization(model, quantization_config) |
598 |
| - |
599 |
| - |
600 | 583 | def _weight_only_quantization(model: OVBaseModel, quantization_config: Union[OVWeightQuantizationConfig, Dict]):
|
601 | 584 | ov_model = model.model
|
602 | 585 |
|
|
0 commit comments