|
21 | 21 | from huggingface_hub.constants import HUGGINGFACE_HUB_CACHE
|
22 | 22 |
|
23 | 23 | from ...exporters import TasksManager
|
24 |
| -from ...intel.utils.import_utils import DIFFUSERS_IMPORT_ERROR, is_diffusers_available |
| 24 | +from ...intel.utils.import_utils import DIFFUSERS_IMPORT_ERROR, is_diffusers_available, is_nncf_available |
25 | 25 | from ...intel.utils.modeling_utils import _infer_library_from_model_name_or_path
|
26 | 26 | from ...utils.save_utils import maybe_load_preprocessors
|
27 | 27 | from ..base import BaseOptimumCLICommand, CommandInfo
|
@@ -343,49 +343,52 @@ def run(self):
|
343 | 343 | )
|
344 | 344 | elif self.args.weight_format in {"fp16", "fp32"}:
|
345 | 345 | ov_config = OVConfig(dtype=self.args.weight_format)
|
346 |
| - elif self.args.weight_format is not None: |
347 |
| - # For int4 quantization if no parameter is provided, then use the default config if exists |
348 |
| - if no_compression_parameter_provided(self.args) and self.args.weight_format == "int4": |
349 |
| - quantization_config = get_default_int4_config(self.args.model) |
| 346 | + else: |
| 347 | + if not is_nncf_available(): |
| 348 | + raise ImportError("Applying quantization requires nncf, please install it with `pip install nncf`") |
| 349 | + |
| 350 | + if self.args.weight_format is not None: |
| 351 | + # For int4 quantization if no parameter is provided, then use the default config if exists |
| 352 | + if no_compression_parameter_provided(self.args) and self.args.weight_format == "int4": |
| 353 | + quantization_config = get_default_int4_config(self.args.model) |
| 354 | + else: |
| 355 | + is_int8 = self.args.weight_format == "int8" |
| 356 | + quantization_config = { |
| 357 | + "bits": 8 if is_int8 else 4, |
| 358 | + "ratio": 1.0 if is_int8 else (self.args.ratio or _DEFAULT_4BIT_CONFIG["ratio"]), |
| 359 | + "sym": self.args.sym or False, |
| 360 | + "group_size": -1 if is_int8 else self.args.group_size, |
| 361 | + "all_layers": None if is_int8 else self.args.all_layers, |
| 362 | + "dataset": self.args.dataset, |
| 363 | + "num_samples": self.args.num_samples, |
| 364 | + "quant_method": "awq" if self.args.awq else "default", |
| 365 | + "sensitivity_metric": self.args.sensitivity_metric, |
| 366 | + "scale_estimation": self.args.scale_estimation, |
| 367 | + "gptq": self.args.gptq, |
| 368 | + "lora_correction": self.args.lora_correction, |
| 369 | + "weight_format": self.args.weight_format, |
| 370 | + "backup_precision": self.args.backup_precision, |
| 371 | + } |
| 372 | + |
| 373 | + if quantization_config.get("dataset", None) is not None: |
| 374 | + quantization_config["trust_remote_code"] = self.args.trust_remote_code |
| 375 | + ov_config = OVConfig(quantization_config=quantization_config) |
350 | 376 | else:
|
351 |
| - is_int8 = self.args.weight_format == "int8" |
| 377 | + if self.args.dataset is None: |
| 378 | + raise ValueError( |
| 379 | + "Dataset is required for full quantization. Please provide it with --dataset argument." |
| 380 | + ) |
352 | 381 | quantization_config = {
|
353 |
| - "bits": 8 if is_int8 else 4, |
354 |
| - "ratio": 1.0 if is_int8 else (self.args.ratio or _DEFAULT_4BIT_CONFIG["ratio"]), |
| 382 | + "weight_format": self.args.quant_mode, |
| 383 | + "activation_format": self.args.quant_mode, |
| 384 | + "bits": 8, |
355 | 385 | "sym": self.args.sym or False,
|
356 |
| - "group_size": -1 if is_int8 else self.args.group_size, |
357 |
| - "all_layers": None if is_int8 else self.args.all_layers, |
358 | 386 | "dataset": self.args.dataset,
|
359 | 387 | "num_samples": self.args.num_samples,
|
360 |
| - "quant_method": "awq" if self.args.awq else "default", |
361 |
| - "sensitivity_metric": self.args.sensitivity_metric, |
362 |
| - "scale_estimation": self.args.scale_estimation, |
363 |
| - "gptq": self.args.gptq, |
364 |
| - "lora_correction": self.args.lora_correction, |
365 |
| - "weight_format": self.args.weight_format, |
366 |
| - "backup_precision": self.args.backup_precision, |
| 388 | + "smooth_quant_alpha": self.args.smooth_quant_alpha, |
| 389 | + "trust_remote_code": self.args.trust_remote_code, |
367 | 390 | }
|
368 |
| - |
369 |
| - if quantization_config.get("dataset", None) is not None: |
370 |
| - quantization_config["trust_remote_code"] = self.args.trust_remote_code |
371 |
| - ov_config = OVConfig(quantization_config=quantization_config) |
372 |
| - else: |
373 |
| - if self.args.dataset is None: |
374 |
| - raise ValueError( |
375 |
| - "Dataset is required for full quantization. Please provide it with --dataset argument." |
376 |
| - ) |
377 |
| - |
378 |
| - quantization_config = { |
379 |
| - "weight_format": self.args.quant_mode, |
380 |
| - "activation_format": self.args.quant_mode, |
381 |
| - "bits": 8, |
382 |
| - "sym": self.args.sym or False, |
383 |
| - "dataset": self.args.dataset, |
384 |
| - "num_samples": self.args.num_samples, |
385 |
| - "smooth_quant_alpha": self.args.smooth_quant_alpha, |
386 |
| - "trust_remote_code": self.args.trust_remote_code, |
387 |
| - } |
388 |
| - ov_config = OVConfig(quantization_config=quantization_config) |
| 391 | + ov_config = OVConfig(quantization_config=quantization_config) |
389 | 392 |
|
390 | 393 | quantization_config = ov_config.quantization_config if ov_config else None
|
391 | 394 | quantize_with_dataset = quantization_config and getattr(quantization_config, "dataset", None) is not None
|
|
0 commit comments