Skip to content

Commit 3befef7

Browse files
Check if nncf is installed before running quantization from optimum-cli (huggingface#1154)
1 parent 6407c9c commit 3befef7

File tree

1 file changed

+41
-38
lines changed

1 file changed

+41
-38
lines changed

optimum/commands/export/openvino.py

+41-38
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
from huggingface_hub.constants import HUGGINGFACE_HUB_CACHE
2222

2323
from ...exporters import TasksManager
24-
from ...intel.utils.import_utils import DIFFUSERS_IMPORT_ERROR, is_diffusers_available
24+
from ...intel.utils.import_utils import DIFFUSERS_IMPORT_ERROR, is_diffusers_available, is_nncf_available
2525
from ...intel.utils.modeling_utils import _infer_library_from_model_name_or_path
2626
from ...utils.save_utils import maybe_load_preprocessors
2727
from ..base import BaseOptimumCLICommand, CommandInfo
@@ -343,49 +343,52 @@ def run(self):
343343
)
344344
elif self.args.weight_format in {"fp16", "fp32"}:
345345
ov_config = OVConfig(dtype=self.args.weight_format)
346-
elif self.args.weight_format is not None:
347-
# For int4 quantization if no parameter is provided, then use the default config if exists
348-
if no_compression_parameter_provided(self.args) and self.args.weight_format == "int4":
349-
quantization_config = get_default_int4_config(self.args.model)
346+
else:
347+
if not is_nncf_available():
348+
raise ImportError("Applying quantization requires nncf, please install it with `pip install nncf`")
349+
350+
if self.args.weight_format is not None:
351+
# For int4 quantization if no parameter is provided, then use the default config if exists
352+
if no_compression_parameter_provided(self.args) and self.args.weight_format == "int4":
353+
quantization_config = get_default_int4_config(self.args.model)
354+
else:
355+
is_int8 = self.args.weight_format == "int8"
356+
quantization_config = {
357+
"bits": 8 if is_int8 else 4,
358+
"ratio": 1.0 if is_int8 else (self.args.ratio or _DEFAULT_4BIT_CONFIG["ratio"]),
359+
"sym": self.args.sym or False,
360+
"group_size": -1 if is_int8 else self.args.group_size,
361+
"all_layers": None if is_int8 else self.args.all_layers,
362+
"dataset": self.args.dataset,
363+
"num_samples": self.args.num_samples,
364+
"quant_method": "awq" if self.args.awq else "default",
365+
"sensitivity_metric": self.args.sensitivity_metric,
366+
"scale_estimation": self.args.scale_estimation,
367+
"gptq": self.args.gptq,
368+
"lora_correction": self.args.lora_correction,
369+
"weight_format": self.args.weight_format,
370+
"backup_precision": self.args.backup_precision,
371+
}
372+
373+
if quantization_config.get("dataset", None) is not None:
374+
quantization_config["trust_remote_code"] = self.args.trust_remote_code
375+
ov_config = OVConfig(quantization_config=quantization_config)
350376
else:
351-
is_int8 = self.args.weight_format == "int8"
377+
if self.args.dataset is None:
378+
raise ValueError(
379+
"Dataset is required for full quantization. Please provide it with --dataset argument."
380+
)
352381
quantization_config = {
353-
"bits": 8 if is_int8 else 4,
354-
"ratio": 1.0 if is_int8 else (self.args.ratio or _DEFAULT_4BIT_CONFIG["ratio"]),
382+
"weight_format": self.args.quant_mode,
383+
"activation_format": self.args.quant_mode,
384+
"bits": 8,
355385
"sym": self.args.sym or False,
356-
"group_size": -1 if is_int8 else self.args.group_size,
357-
"all_layers": None if is_int8 else self.args.all_layers,
358386
"dataset": self.args.dataset,
359387
"num_samples": self.args.num_samples,
360-
"quant_method": "awq" if self.args.awq else "default",
361-
"sensitivity_metric": self.args.sensitivity_metric,
362-
"scale_estimation": self.args.scale_estimation,
363-
"gptq": self.args.gptq,
364-
"lora_correction": self.args.lora_correction,
365-
"weight_format": self.args.weight_format,
366-
"backup_precision": self.args.backup_precision,
388+
"smooth_quant_alpha": self.args.smooth_quant_alpha,
389+
"trust_remote_code": self.args.trust_remote_code,
367390
}
368-
369-
if quantization_config.get("dataset", None) is not None:
370-
quantization_config["trust_remote_code"] = self.args.trust_remote_code
371-
ov_config = OVConfig(quantization_config=quantization_config)
372-
else:
373-
if self.args.dataset is None:
374-
raise ValueError(
375-
"Dataset is required for full quantization. Please provide it with --dataset argument."
376-
)
377-
378-
quantization_config = {
379-
"weight_format": self.args.quant_mode,
380-
"activation_format": self.args.quant_mode,
381-
"bits": 8,
382-
"sym": self.args.sym or False,
383-
"dataset": self.args.dataset,
384-
"num_samples": self.args.num_samples,
385-
"smooth_quant_alpha": self.args.smooth_quant_alpha,
386-
"trust_remote_code": self.args.trust_remote_code,
387-
}
388-
ov_config = OVConfig(quantization_config=quantization_config)
391+
ov_config = OVConfig(quantization_config=quantization_config)
389392

390393
quantization_config = ov_config.quantization_config if ov_config else None
391394
quantize_with_dataset = quantization_config and getattr(quantization_config, "dataset", None) is not None

0 commit comments

Comments
 (0)