|
33 | 33 | from torch.utils.data import DataLoader, RandomSampler
|
34 | 34 | from transformers import DataCollator, PreTrainedModel, default_data_collator
|
35 | 35 | from transformers.pytorch_utils import Conv1D
|
36 |
| -from transformers.utils.quantization_config import QuantizationConfigMixin |
37 | 36 |
|
38 | 37 | from optimum.exporters.tasks import TasksManager
|
39 | 38 | from optimum.quantization_base import OptimumQuantizer
|
@@ -159,7 +158,6 @@ def quantize(
|
159 | 158 | self,
|
160 | 159 | calibration_dataset: Dataset = None,
|
161 | 160 | save_directory: Union[str, Path] = None,
|
162 |
| - quantization_config: QuantizationConfigMixin = None, |
163 | 161 | ov_config: OVConfig = None,
|
164 | 162 | file_name: Optional[str] = None,
|
165 | 163 | batch_size: int = 1,
|
@@ -234,7 +232,7 @@ def quantize(
|
234 | 232 | data_collator,
|
235 | 233 | remove_unused_columns,
|
236 | 234 | weights_only,
|
237 |
| - quantization_config, |
| 235 | + ov_config, |
238 | 236 | **kwargs,
|
239 | 237 | )
|
240 | 238 | elif isinstance(self.model, OVBaseModel):
|
@@ -313,13 +311,14 @@ def _quantize_ovcausallm(
|
313 | 311 | data_collator: Optional[DataCollator] = None,
|
314 | 312 | remove_unused_columns: bool = True,
|
315 | 313 | weights_only: bool = False,
|
316 |
| - quantization_config: QuantizationConfigMixin = None, |
| 314 | + ov_config: OVConfig = None, |
317 | 315 | **kwargs,
|
318 | 316 | ):
|
319 | 317 | save_directory = Path(save_directory)
|
320 | 318 | save_directory.mkdir(parents=True, exist_ok=True)
|
321 | 319 |
|
322 | 320 | if weights_only:
|
| 321 | + quantization_config = None if ov_config is None else ov_config.quantization_config |
323 | 322 | if quantization_config is None:
|
324 | 323 | # Use default 8-bit compression
|
325 | 324 | self.model.model = nncf.compress_weights(self.model.model)
|
|
0 commit comments