18
18
from enum import Enum
19
19
from typing import Any , Dict , List , Optional , Union
20
20
21
- import nncf
22
21
import torch
23
- from nncf .quantization .advanced_parameters import OverflowFix
24
22
from transformers import PretrainedConfig
25
23
from transformers .utils .quantization_config import QuantizationConfigMixin , QuantizationMethod
26
24
27
25
from optimum .configuration_utils import BaseConfig
28
26
27
+ from ..utils .import_utils import is_nncf_available
28
+
29
+
30
+ if is_nncf_available ():
31
+ import nncf
29
32
30
33
logger = logging .getLogger (__name__ )
31
34
52
55
}
53
56
54
57
58
+ class OVQuantizationMethod (str , Enum ):
59
+ DEFAULT = "default"
60
+
61
+
55
62
@dataclass
56
63
class OVQuantizationConfigBase (QuantizationConfigMixin ):
57
64
"""
58
65
Base configuration class for quantization parameters
59
66
"""
60
67
68
+ quant_method = OVQuantizationMethod .DEFAULT
69
+
61
70
def __init__ (
62
71
self ,
63
72
ignored_scope : Optional [dict ] = None ,
@@ -91,7 +100,7 @@ def post_init(self):
91
100
if not (self .num_samples is None or isinstance (self .num_samples , int ) and self .num_samples > 0 ):
92
101
raise ValueError (f"`num_samples` is expected to be a positive integer, but found: { self .num_samples } " )
93
102
94
- def get_ignored_scope_instance (self ) -> nncf .IgnoredScope :
103
+ def get_ignored_scope_instance (self ) -> " nncf.IgnoredScope" :
95
104
if self .ignored_scope is None :
96
105
return nncf .IgnoredScope ()
97
106
return nncf .IgnoredScope (** copy .deepcopy (self .ignored_scope ))
@@ -178,10 +187,6 @@ def to_diff_dict(self) -> Dict[str, Any]:
178
187
return self ._to_dict_safe (to_diff_dict = True )
179
188
180
189
181
- class OVQuantizationMethod (str , Enum ):
182
- DEFAULT = "default"
183
-
184
-
185
190
@dataclass
186
191
class OVWeightQuantizationConfig (OVQuantizationConfigBase ):
187
192
"""
@@ -240,7 +245,7 @@ def __init__(
240
245
sensitivity_metric : Optional [str ] = None ,
241
246
ignored_scope : Optional [dict ] = None ,
242
247
num_samples : Optional [int ] = None ,
243
- quant_method : Optional [ Union [QuantizationMethod , OVQuantizationMethod ] ] = OVQuantizationMethod .DEFAULT ,
248
+ quant_method : Union [QuantizationMethod , OVQuantizationMethod ] = OVQuantizationMethod .DEFAULT ,
244
249
weight_only : Optional [bool ] = True ,
245
250
** kwargs ,
246
251
):
@@ -309,12 +314,12 @@ def post_init(self):
309
314
class OVQuantizationConfig (OVQuantizationConfigBase ):
310
315
def __init__ (
311
316
self ,
317
+ sym : bool = False ,
312
318
ignored_scope : Optional [dict ] = None ,
313
319
num_samples : Optional [int ] = 300 ,
314
- preset : nncf .QuantizationPreset = None ,
315
- model_type : nncf .ModelType = nncf .ModelType .TRANSFORMER ,
320
+ model_type : str = "transformer" ,
316
321
fast_bias_correction : bool = True ,
317
- overflow_fix : OverflowFix = OverflowFix . DISABLE ,
322
+ overflow_fix : str = "disable" ,
318
323
weight_only : Optional [bool ] = False ,
319
324
** kwargs ,
320
325
):
@@ -323,23 +328,18 @@ def __init__(
323
328
compression, during quantization both weights and activations are converted to lower precision.
324
329
For weight-only model quantization please see OVWeightQuantizationConfig.
325
330
Args:
331
+ sym (`bool`, defaults to `False`):
332
+ Whether to use symmetric quantization on the activations. Symmetric quantization will be applied on the weights in any case.
326
333
ignored_scope (`dict`, *optional*):
327
334
An ignored scope that defines the list of model nodes to be ignored during quantization. Dictionary
328
335
entries provided via this argument are used to create an instance of `nncf.IgnoredScope` class.
329
336
num_samples (`int`, *optional*):
330
337
The maximum number of samples composing the calibration dataset.
331
- preset (`nncf.QuantizationPreset`, *optional*):
332
- A preset controls the quantization mode (symmetric and asymmetric).
333
- It can take the following values:
334
- - `performance`: Symmetric quantization of weights and activations.
335
- - `mixed`: Symmetric quantization of weights and asymmetric quantization of activations.
336
- Default value is None. In this case, `mixed` preset is used for `transformer`
337
- model type otherwise `performance`.
338
- model_type (`nncf.ModelType`, defaults to nncf.ModelType.TRANSFORMER):
338
+ model_type (`str`, defaults to "transformer"):
339
339
Model type is needed to specify additional patterns in the model. Supported only `transformer` now.
340
340
fast_bias_correction (`bool`, defaults to True):
341
341
Whether to apply fast or full bias correction algorithm.
342
- overflow_fix (`nncf.OverflowFix `, default to OverflowFix.DISABLE ):
342
+ overflow_fix (`str `, default to "disable" ):
343
343
Parameter for controlling overflow fix setting.
344
344
weight_only (`bool`, *optional*):
345
345
Used to explicitly specify type of quantization (weight-only of full) to apply. Useful when building
@@ -351,37 +351,12 @@ def __init__(
351
351
"Please check your configuration."
352
352
)
353
353
super ().__init__ (ignored_scope , num_samples , False )
354
- # TODO: remove checks below once NNCF is updated to 2.10
355
- if isinstance (overflow_fix , str ):
356
- overflow_fix = OverflowFix (overflow_fix )
357
- if isinstance (preset , str ):
358
- preset = nncf .QuantizationPreset (preset )
359
-
360
- self .preset = preset
354
+ self .sym = sym
361
355
self .model_type = model_type
362
356
self .fast_bias_correction = fast_bias_correction
363
357
self .overflow_fix = overflow_fix
364
358
self .post_init ()
365
359
366
- def to_dict (self ) -> Dict [str , Any ]:
367
- # TODO: remove code below once NNCF is updated to 2.10
368
- if isinstance (self .overflow_fix , Enum ) or isinstance (self .preset , Enum ):
369
- overflow_fix_value = (
370
- None
371
- if self .overflow_fix is None
372
- else self .overflow_fix
373
- if isinstance (self .overflow_fix , str )
374
- else self .overflow_fix .value
375
- )
376
- preset_value = (
377
- None if self .preset is None else self .preset if isinstance (self .preset , str ) else self .preset .value
378
- )
379
- self_copy = copy .deepcopy (self )
380
- self_copy .overflow_fix = overflow_fix_value
381
- self_copy .preset = preset_value
382
- return self_copy .to_dict ()
383
- return super ().to_dict ()
384
-
385
360
386
361
def _check_default_4bit_configs (config : PretrainedConfig ):
387
362
return _DEFAULT_4BIT_CONFIGS .get (config .name_or_path , None )
0 commit comments