Skip to content

Commit e661d44

Browse files
committed
remove _int4_weight_only_quantization
1 parent 0abed19 commit e661d44

File tree

2 files changed

+11
-24
lines changed

2 files changed

+11
-24
lines changed

optimum/intel/openvino/modeling_decoder.py

+8-4
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434
from ...exporters.openvino.stateful import model_has_state
3535
from ..utils.import_utils import is_nncf_available
3636
from ..utils.modeling_utils import MULTI_QUERY_ATTN_MODELS
37-
from .configuration import OVWeightQuantizationConfig
37+
from .configuration import OVWeightQuantizationConfig, _check_default_4bit_configs
3838
from .modeling import _TOKENIZER_FOR_DOC, INPUTS_DOCSTRING, MODEL_START_DOCSTRING, OVModel
3939
from .utils import ONNX_WEIGHTS_NAME, OV_XML_FILE_NAME, STR_TO_OV_TYPE
4040

@@ -578,7 +578,6 @@ def _from_pretrained(
578578
quantization_config = OVWeightQuantizationConfig.from_dict(quantization_config)
579579

580580
load_in_4bit = quantization_config.bits == 4 if quantization_config else False
581-
582581
model = cls.load_model(model_cache_path, load_in_8bit=False if load_in_4bit else load_in_8bit)
583582

584583
model_type = config.model_type.replace("_", "-")
@@ -600,9 +599,14 @@ def _from_pretrained(
600599
raise ImportError(
601600
"Quantization of the weights requires nncf, please install it with `pip install nncf`"
602601
)
603-
from .quantization import _int4_weight_only_quantization
602+
from .quantization import _weight_only_quantization
603+
604+
default_config = _check_default_4bit_configs(config)
605+
606+
if default_config:
607+
logger.info(f"For the given mode, we recommend the following `quantization_config` : {default_config}")
604608

605-
_int4_weight_only_quantization(causal_model, quantization_config)
609+
_weight_only_quantization(causal_model, quantization_config)
606610
return causal_model
607611

608612

optimum/intel/openvino/quantization.py

+3-20
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@
4444
from ...exporters.openvino.stateful import ensure_export_task_support_stateful, ensure_stateful_is_available
4545
from ..utils.constant import _TASK_ALIASES
4646
from ..utils.modeling_utils import get_model_device
47-
from .configuration import OVConfig, OVWeightQuantizationConfig, _check_default_4bit_configs
47+
from .configuration import OVConfig, OVWeightQuantizationConfig
4848
from .modeling_base import OVBaseModel
4949
from .utils import (
5050
MAX_ONNX_OPSET,
@@ -332,10 +332,8 @@ def _quantize_ovcausallm(
332332
quantization_config = None if ov_config is None else ov_config.quantization_config
333333
if quantization_config is None:
334334
# Use default 8-bit compression
335-
quantization_config = OVWeightQuantizationConfig(mode=nncf.CompressWeightsMode.INT8_SYM)
336-
self.model.model = nncf.compress_weights(self.model.model)
337-
else:
338-
_int4_weight_only_quantization(self.model, quantization_config)
335+
quantization_config = OVWeightQuantizationConfig(bits=8, sym=True)
336+
_weight_only_quantization(self.model, quantization_config)
339337

340338
self.model.save_pretrained(save_directory)
341339
return
@@ -582,21 +580,6 @@ def _remove_unused_columns(self, dataset: Dataset):
582580
return dataset.remove_columns(ignored_columns)
583581

584582

585-
def _int4_weight_only_quantization(
586-
model: OVBaseModel, quantization_config: Optional[Union[OVWeightQuantizationConfig, Dict]] = None
587-
):
588-
if model.export_feature != "text-generation":
589-
raise ValueError("Only `OVModelForCausalLM` are supported for now")
590-
591-
quantization_config = quantization_config or _check_default_4bit_configs(model.config)
592-
593-
# Data-free weight-only quantization to asymmetric INT4
594-
if quantization_config is None:
595-
quantization_config = OVWeightQuantizationConfig(bits=4, sym=False)
596-
597-
_weight_only_quantization(model, quantization_config)
598-
599-
600583
def _weight_only_quantization(model: OVBaseModel, quantization_config: Union[OVWeightQuantizationConfig, Dict]):
601584
ov_model = model.model
602585

0 commit comments

Comments
 (0)