@@ -198,7 +198,7 @@ def from_pretrained(cls, model: PreTrainedModel, **kwargs):
198
198
def quantize (
199
199
self ,
200
200
calibration_dataset : Optional [Union [datasets .Dataset , nncf .Dataset , Iterable ]] = None ,
201
- save_directory : Union [str , Path ] = None ,
201
+ save_directory : Optional [ Union [str , Path ] ] = None ,
202
202
ov_config : OVConfig = None ,
203
203
file_name : Optional [str ] = None ,
204
204
batch_size : int = 1 ,
@@ -214,7 +214,7 @@ def quantize(
214
214
calibration_dataset (`datasets.Dataset` or `nncf.Dataset` or `Iterable`, *optional*):
215
215
A collection of data samples to use for quantization calibration. Is optional for weight-only
216
216
quantization and is required for full quantization.
217
- save_directory (`Union[str, Path]`):
217
+ save_directory (`Union[str, Path]`, *optional* ):
218
218
The directory where the quantized model should be saved.
219
219
ov_config (`OVConfig`, *optional*):
220
220
The configuration containing the parameters related to quantization. If not provided, 8-bit symmetric
@@ -262,10 +262,6 @@ def quantize(
262
262
"as an instance of `OVWeightQuantizationConfig` for weight-only compression or as an instance of `OVQuantizationConfig` for full model quantization."
263
263
)
264
264
265
- if save_directory is None :
266
- # TODO : can be set to self.model.config.name_or_path for OVModels when not provided
267
- raise ValueError ("`save_directory` needs to be specified" )
268
-
269
265
if ov_config is None :
270
266
ov_config = OVConfig ()
271
267
if not isinstance (ov_config , OVConfig ):
@@ -318,21 +314,41 @@ def quantize(
318
314
def _quantize_ovbasemodel (
319
315
self ,
320
316
ov_config : OVConfig ,
321
- save_directory : Union [str , Path ],
317
+ save_directory : Union [str , Path ] = None ,
322
318
calibration_dataset : Optional [Union [datasets .Dataset , nncf .Dataset , Iterable ]] = None ,
323
319
batch_size : int = 1 ,
324
320
data_collator : Optional [DataCollator ] = None ,
325
321
remove_unused_columns : bool = True ,
326
322
** kwargs ,
327
323
):
328
- save_directory = Path (save_directory )
329
- save_directory .mkdir (parents = True , exist_ok = True )
324
+ if save_directory is not None :
325
+ save_directory = Path (save_directory )
326
+ save_directory .mkdir (parents = True , exist_ok = True )
330
327
331
328
quantization_config = ov_config .quantization_config
332
329
if isinstance (quantization_config , OVWeightQuantizationConfig ):
330
+ if calibration_dataset is None and isinstance (quantization_config .dataset , str ):
331
+ from optimum .intel import OVModelForCausalLM
332
+
333
+ if isinstance (self .model , OVModelForCausalLM ):
334
+ from optimum .gptq .data import get_dataset , prepare_dataset
335
+
336
+ tokenizer = AutoTokenizer .from_pretrained (quantization_config .tokenizer )
337
+ nsamples = quantization_config .num_samples if quantization_config .num_samples else 128
338
+ calibration_dataset = get_dataset (
339
+ quantization_config .dataset , tokenizer , seqlen = 32 , nsamples = nsamples
340
+ )
341
+ calibration_dataset = prepare_dataset (calibration_dataset )
342
+ calibration_dataset = nncf .Dataset (calibration_dataset , lambda x : self .model .prepare_inputs (** x ))
343
+ else :
344
+ raise ValueError (
345
+ f"Can't create weight compression calibration dataset from string for { type (self .model )} "
346
+ )
347
+
333
348
_weight_only_quantization (self .model .model , quantization_config , calibration_dataset )
334
- self .model .save_pretrained (save_directory )
335
- ov_config .save_pretrained (save_directory )
349
+ if save_directory is not None :
350
+ self .model .save_pretrained (save_directory )
351
+ ov_config .save_pretrained (save_directory )
336
352
return
337
353
if not isinstance (quantization_config , OVQuantizationConfig ):
338
354
raise ValueError (f"Unsupported type of quantization config: { type (quantization_config )} " )
@@ -384,8 +400,9 @@ def _quantize_ovbasemodel(
384
400
** kwargs ,
385
401
)
386
402
self .model .model = quantized_model
387
- self .model .save_pretrained (save_directory )
388
- ov_config .save_pretrained (save_directory )
403
+ if save_directory is not None :
404
+ self .model .save_pretrained (save_directory )
405
+ ov_config .save_pretrained (save_directory )
389
406
390
407
def _quantize_torchmodel (
391
408
self ,
@@ -398,6 +415,10 @@ def _quantize_torchmodel(
398
415
remove_unused_columns : bool = True ,
399
416
** kwargs ,
400
417
):
418
+ if save_directory is None :
419
+ # TODO : can be set to self.model.config.name_or_path for OVModels when not provided
420
+ raise ValueError ("`save_directory` needs to be specified" )
421
+
401
422
self ._set_task ()
402
423
save_directory = Path (save_directory )
403
424
save_directory .mkdir (parents = True , exist_ok = True )
@@ -622,7 +643,6 @@ def _weight_only_quantization(
622
643
model : openvino .runtime .Model ,
623
644
quantization_config : Union [OVWeightQuantizationConfig , Dict ],
624
645
calibration_dataset : Optional [Union [nncf .Dataset , Iterable ]] = None ,
625
- transform_fn : Optional [Callable ] = None ,
626
646
) -> openvino .runtime .Model :
627
647
config = quantization_config
628
648
if isinstance (config , dict ):
@@ -645,15 +665,6 @@ def _weight_only_quantization(
645
665
dataset = calibration_dataset
646
666
else :
647
667
dataset = nncf .Dataset (calibration_dataset )
648
- elif config .dataset is not None and isinstance (config .dataset , str ):
649
- tokenizer = AutoTokenizer .from_pretrained (config .tokenizer )
650
-
651
- from optimum .gptq .data import get_dataset , prepare_dataset
652
-
653
- nsamples = config .num_samples if config .num_samples else 128
654
- dataset = get_dataset (config .dataset , tokenizer , seqlen = 32 , nsamples = nsamples )
655
- dataset = prepare_dataset (dataset )
656
- dataset = nncf .Dataset (dataset , transform_fn )
657
668
658
669
sensitivity_metric = None
659
670
if isinstance (config .sensitivity_metric , str ):
0 commit comments