16
16
import logging
17
17
from dataclasses import dataclass
18
18
from enum import Enum
19
- from typing import Any , Dict , List , Optional , Union
19
+ from typing import TYPE_CHECKING , Any , Dict , List , Optional , Union
20
20
21
- import nncf
22
21
import torch
23
- from nncf .quantization .advanced_parameters import OverflowFix
24
22
from transformers import PretrainedConfig
25
23
from transformers .utils .quantization_config import QuantizationConfigMixin , QuantizationMethod
26
24
27
25
from optimum .configuration_utils import BaseConfig
28
26
29
27
28
+ if TYPE_CHECKING :
29
+ import nncf
30
+
30
31
logger = logging .getLogger (__name__ )
31
32
32
33
_DEFAULT_4BIT_CONFIGS = {
@@ -75,7 +76,7 @@ def __init__(
75
76
weight_only (`bool`, *optional*):
76
77
Used to explicitly specify type of quantization (weight-only of full) to apply.
77
78
"""
78
- if isinstance (ignored_scope , nncf . IgnoredScope ):
79
+ if not isinstance (ignored_scope , dict ):
79
80
ignored_scope = ignored_scope .__dict__
80
81
self .ignored_scope = ignored_scope
81
82
self .num_samples = num_samples
@@ -91,7 +92,9 @@ def post_init(self):
91
92
if not (self .num_samples is None or isinstance (self .num_samples , int ) and self .num_samples > 0 ):
92
93
raise ValueError (f"`num_samples` is expected to be a positive integer, but found: { self .num_samples } " )
93
94
94
- def get_ignored_scope_instance (self ) -> nncf .IgnoredScope :
95
+ def get_ignored_scope_instance (self ) -> "nncf.IgnoredScope" :
96
+ import nncf
97
+
95
98
if self .ignored_scope is None :
96
99
return nncf .IgnoredScope ()
97
100
return nncf .IgnoredScope (** copy .deepcopy (self .ignored_scope ))
@@ -309,12 +312,12 @@ def post_init(self):
309
312
class OVQuantizationConfig (OVQuantizationConfigBase ):
310
313
def __init__ (
311
314
self ,
315
+ sym : bool = False ,
312
316
ignored_scope : Optional [dict ] = None ,
313
317
num_samples : Optional [int ] = 300 ,
314
- preset : nncf .QuantizationPreset = None ,
315
- model_type : nncf .ModelType = nncf .ModelType .TRANSFORMER ,
318
+ model_type : "nncf.ModelType" = None ,
316
319
fast_bias_correction : bool = True ,
317
- overflow_fix : OverflowFix = OverflowFix . DISABLE ,
320
+ overflow_fix : str = "disable" ,
318
321
weight_only : Optional [bool ] = False ,
319
322
** kwargs ,
320
323
):
@@ -323,23 +326,18 @@ def __init__(
323
326
compression, during quantization both weights and activations are converted to lower precision.
324
327
For weight-only model quantization please see OVWeightQuantizationConfig.
325
328
Args:
329
+ sym (`bool`, defaults to `False`):
330
+ Whether to use symmetric quantization on the activations. Symmetric quantization will be applied on the weights in any case.
326
331
ignored_scope (`dict`, *optional*):
327
332
An ignored scope that defines the list of model nodes to be ignored during quantization. Dictionary
328
333
entries provided via this argument are used to create an instance of `nncf.IgnoredScope` class.
329
334
num_samples (`int`, *optional*):
330
335
The maximum number of samples composing the calibration dataset.
331
- preset (`nncf.QuantizationPreset`, *optional*):
332
- A preset controls the quantization mode (symmetric and asymmetric).
333
- It can take the following values:
334
- - `performance`: Symmetric quantization of weights and activations.
335
- - `mixed`: Symmetric quantization of weights and asymmetric quantization of activations.
336
- Default value is None. In this case, `mixed` preset is used for `transformer`
337
- model type otherwise `performance`.
338
336
model_type (`nncf.ModelType`, defaults to nncf.ModelType.TRANSFORMER):
339
337
Model type is needed to specify additional patterns in the model. Supported only `transformer` now.
340
338
fast_bias_correction (`bool`, defaults to True):
341
339
Whether to apply fast or full bias correction algorithm.
342
- overflow_fix (`nncf.OverflowFix `, default to OverflowFix.DISABLE ):
340
+ overflow_fix (`str `, default to "disable" ):
343
341
Parameter for controlling overflow fix setting.
344
342
weight_only (`bool`, *optional*):
345
343
Used to explicitly specify type of quantization (weight-only of full) to apply. Useful when building
@@ -352,33 +350,24 @@ def __init__(
352
350
)
353
351
super ().__init__ (ignored_scope , num_samples , False )
354
352
# TODO: remove checks below once NNCF is updated to 2.10
355
- if isinstance (overflow_fix , str ):
356
- overflow_fix = OverflowFix (overflow_fix )
357
- if isinstance (preset , str ):
358
- preset = nncf .QuantizationPreset (preset )
359
-
360
- self .preset = preset
353
+ self .sym = sym
361
354
self .model_type = model_type
362
355
self .fast_bias_correction = fast_bias_correction
363
356
self .overflow_fix = overflow_fix
364
357
self .post_init ()
365
358
366
359
def to_dict (self ) -> Dict [str , Any ]:
367
360
# TODO: remove code below once NNCF is updated to 2.10
368
- if isinstance (self .overflow_fix , Enum ) or isinstance ( self . preset , Enum ) :
361
+ if isinstance (self .overflow_fix , Enum ):
369
362
overflow_fix_value = (
370
363
None
371
364
if self .overflow_fix is None
372
365
else self .overflow_fix
373
366
if isinstance (self .overflow_fix , str )
374
367
else self .overflow_fix .value
375
368
)
376
- preset_value = (
377
- None if self .preset is None else self .preset if isinstance (self .preset , str ) else self .preset .value
378
- )
379
369
self_copy = copy .deepcopy (self )
380
370
self_copy .overflow_fix = overflow_fix_value
381
- self_copy .preset = preset_value
382
371
return self_copy .to_dict ()
383
372
return super ().to_dict ()
384
373
0 commit comments