|
59 | 59 | _ipex_version,
|
60 | 60 | _neural_compressor_version,
|
61 | 61 | is_intel_extension_for_transformers_available,
|
62 |
| - is_intel_extension_for_transformers_version, |
63 | 62 | is_ipex_version,
|
64 | 63 | is_neural_compressor_version,
|
65 | 64 | )
|
|
80 | 79 |
|
81 | 80 | if is_intel_extension_for_transformers_available():
|
82 | 81 | from intel_extension_for_transformers.llm.quantization.utils import convert_to_quantized_model
|
83 |
| - if is_intel_extension_for_transformers_version("<=", "1.2.2"): |
84 |
| - from intel_extension_for_transformers.transformers.utils.quantization_config import WeightOnlyQuantConfig |
85 |
| - else: |
86 |
| - from intel_extension_for_transformers.transformers.utils.config import WeightOnlyQuantConfig |
| 82 | + from intel_extension_for_transformers.transformers.utils.config import WeightOnlyQuantConfig |
87 | 83 |
|
88 | 84 | logger = logging.getLogger(__name__)
|
89 | 85 |
|
@@ -186,7 +182,7 @@ def quantize(
|
186 | 182 | save_directory.mkdir(parents=True, exist_ok=True)
|
187 | 183 | save_onnx_model = kwargs.pop("save_onnx_model", False)
|
188 | 184 |
|
189 |
| - if save_onnx_model and isinstance(self._original_model, ORTModel): |
| 185 | + if save_onnx_model and (isinstance(self._original_model, ORTModel) or weight_only): |
190 | 186 | save_onnx_model = False
|
191 | 187 | logger.warning("Model provided is an ONNX model, `save_onnx_model` is set to False")
|
192 | 188 |
|
@@ -278,6 +274,9 @@ def quantize(
|
278 | 274 |
|
279 | 275 | if isinstance(quantization_config, WeightOnlyQuantConfig):
|
280 | 276 | self._quantized_model = convert_to_quantized_model(self._original_model, quantization_config)
|
| 277 | + # Save the quantized model |
| 278 | + output_path = save_directory.joinpath(file_name or default_name) |
| 279 | + self._quantized_model.save_pretrained(output_path) |
281 | 280 | else:
|
282 | 281 | if isinstance(self._original_model.config, PretrainedConfig):
|
283 | 282 | self._original_model.config.backend = quantization_config.backend
|
|
0 commit comments