24
24
import openvino
25
25
import torch
26
26
import transformers
27
- from nncf import CompressWeightsMode , IgnoredScope , NNCFConfig , SensitivityMetric
27
+ from nncf import CompressWeightsMode , IgnoredScope , SensitivityMetric
28
28
from nncf .quantization .advanced_parameters import AdvancedSmoothQuantParameters
29
- from nncf .torch import create_compressed_model , register_default_init_args , register_module
30
- from nncf .torch .dynamic_graph .io_handling import wrap_nncf_model_inputs_with_objwalk
29
+ from nncf .torch import register_module
31
30
from nncf .torch .initialization import PTInitializingDataLoader
32
31
from openvino ._offline_transformations import compress_quantize_weights_transformation
33
32
from openvino .runtime import Core , Tensor
47
46
from ..utils .constant import _TASK_ALIASES
48
47
from ..utils .import_utils import DATASETS_IMPORT_ERROR , is_datasets_available
49
48
from ..utils .modeling_utils import get_model_device
50
- from .configuration import DEFAULT_QUANTIZATION_CONFIG , OVConfig , OVWeightQuantizationConfig
49
+ from .configuration import OVConfig , OVWeightQuantizationConfig
51
50
from .modeling_base import OVBaseModel
52
51
from .utils import (
53
52
MAX_ONNX_OPSET ,
@@ -240,8 +239,6 @@ def quantize(
240
239
if ov_config is not None :
241
240
if not isinstance (ov_config , OVConfig ):
242
241
raise TypeError (f"`ov_config` should be an `OVConfig`, but got: { type (ov_config )} instead." )
243
- elif ov_config .compression is None :
244
- ov_config .compression = DEFAULT_QUANTIZATION_CONFIG
245
242
246
243
if isinstance (self .model , OVBaseModel ):
247
244
self ._quantize_ovbasemodel (
@@ -263,7 +260,6 @@ def quantize(
263
260
self ._quantize_torchmodel (
264
261
calibration_dataset ,
265
262
save_directory ,
266
- ov_config ,
267
263
file_name ,
268
264
batch_size ,
269
265
data_collator ,
@@ -319,7 +315,7 @@ def _quantize_ovbasemodel(
319
315
calibration_dataloader = data_cache
320
316
321
317
# Actual model quantization
322
- quantization_dataset = nncf .Dataset (calibration_dataloader , lambda x : x )
318
+ quantization_dataset = nncf .Dataset (calibration_dataloader )
323
319
quantized_model = nncf .quantize (
324
320
self .model .model ,
325
321
quantization_dataset ,
@@ -334,12 +330,13 @@ def _quantize_torchmodel(
334
330
self ,
335
331
calibration_dataset : "Dataset" ,
336
332
save_directory : Union [str , Path ],
337
- ov_config : OVConfig = None ,
338
333
file_name : Optional [str ] = None ,
339
334
batch_size : int = 1 ,
340
335
data_collator : Optional [DataCollator ] = None ,
341
336
remove_unused_columns : bool = True ,
342
337
weights_only : bool = False ,
338
+ save_onnx_model : bool = False ,
339
+ ** kwargs ,
343
340
):
344
341
self ._set_task ()
345
342
save_directory = Path (save_directory )
@@ -356,15 +353,8 @@ def _quantize_torchmodel(
356
353
model_type = model_type ,
357
354
)
358
355
359
- if ov_config is None :
360
- logger .info (
361
- "No configuration describing the quantization process was provided, a default OVConfig will be generated."
362
- )
363
- ov_config = OVConfig (compression = DEFAULT_QUANTIZATION_CONFIG )
364
356
onnx_file_name = (
365
- ONNX_WEIGHTS_NAME
366
- if file_name is None and ov_config .save_onnx_model
367
- else Path (ov_file_name ).with_suffix (".onnx" )
357
+ ONNX_WEIGHTS_NAME if file_name is None and save_onnx_model else Path (ov_file_name ).with_suffix (".onnx" )
368
358
)
369
359
370
360
task = self .task
@@ -398,7 +388,7 @@ def _quantize_torchmodel(
398
388
if stateful :
399
389
logger .warn (
400
390
"Quantization algorithm does not support optimized stateful models. "
401
- "The original model without optimization will be quantized and export ."
391
+ "The original model without optimization will be quantized and exported ."
402
392
)
403
393
stateful = False
404
394
@@ -409,40 +399,38 @@ def _quantize_torchmodel(
409
399
data_collator = data_collator ,
410
400
)
411
401
412
- model_inputs = next (iter (calibration_dataloader ))
413
- ov_config .add_input_info (model_inputs )
414
- nncf_config = NNCFConfig .from_dict (ov_config .__dict__ )
415
- nncf_config = register_default_init_args (nncf_config , calibration_dataloader )
416
- controller , model = create_compressed_model (
417
- model , nncf_config , wrap_inputs_fn = wrap_nncf_model_inputs_with_objwalk
402
+ quantization_dataset = nncf .Dataset (calibration_dataloader )
403
+ model = nncf .quantize (
404
+ model ,
405
+ quantization_dataset ,
406
+ model_type = nncf .ModelType .TRANSFORMER if not kwargs .get ("model_type" ) else kwargs .get ("model_type" ),
407
+ fast_bias_correction = kwargs .get ("fast_bias_correction" , True ),
408
+ ** kwargs ,
418
409
)
419
- model = controller .strip (do_copy = False )
420
410
421
- model_path = save_directory / (onnx_file_name if ov_config . save_onnx_model else ov_file_name )
411
+ model_path = save_directory / (onnx_file_name if save_onnx_model else ov_file_name )
422
412
onnx_path = save_directory / onnx_file_name
423
- export_fn = export if not ov_config . save_onnx_model else export_pytorch_via_onnx
413
+ export_fn = export if not save_onnx_model else export_pytorch_via_onnx
424
414
opset = min (onnx_config .DEFAULT_ONNX_OPSET , MAX_ONNX_OPSET )
425
415
opset = max (opset , MIN_ONNX_QDQ_OPSET )
426
- kwargs = {}
427
- if not ov_config . save_onnx_model :
428
- kwargs = {"stateful" : stateful }
416
+ export_kwargs = {}
417
+ if not save_onnx_model :
418
+ export_kwargs = {"stateful" : stateful }
429
419
430
- _ , _ , is_onnx = export_fn (model = model , config = onnx_config , output = model_path , opset = opset , ** kwargs )
420
+ _ , _ , is_onnx = export_fn (model = model , config = onnx_config , output = model_path , opset = opset , ** export_kwargs )
431
421
if is_onnx :
432
422
# Load and save the compressed model
433
423
model = core .read_model (onnx_path )
434
424
# Model required second saving for appling weights compression transformations
435
425
self ._save_pretrained (model , output_path )
436
426
# if onnx conversion happens as fallback for pytorch conversion, remove onnx model
437
- if not ov_config . save_onnx_model :
427
+ if not save_onnx_model :
438
428
os .remove (onnx_path )
439
429
try :
440
430
os .remove (f"{ onnx_path } _data" )
441
431
except FileNotFoundError :
442
432
pass
443
433
444
- ov_config .save_pretrained (save_directory )
445
-
446
434
@staticmethod
447
435
def _save_pretrained (model : openvino .runtime .Model , output_path : str ):
448
436
compress_quantize_weights_transformation (model )
0 commit comments