31
31
from nncf .common .utils .helpers import create_table
32
32
from nncf .experimental .common .tensor_statistics .statistics import WCTensorStatistic
33
33
from nncf .parameters import BackupMode
34
+ from nncf .parameters import CompressionFormat
34
35
from nncf .parameters import CompressWeightsMode
35
36
from nncf .parameters import SensitivityMetric
36
37
from nncf .quantization .advanced_parameters import AdvancedCompressionParameters
@@ -122,6 +123,7 @@ def check_user_compression_configuration(
122
123
ignored_scope : Optional [IgnoredScope ],
123
124
sensitivity_metric : Optional [SensitivityMetric ],
124
125
backup_mode : Optional [BackupMode ],
126
+ compression_format : Optional [CompressionFormat ],
125
127
advanced_parameters : Optional [AdvancedCompressionParameters ],
126
128
) -> None :
127
129
"""
@@ -172,6 +174,10 @@ def check_user_compression_configuration(
172
174
requires a dataset, but it's not provided."
173
175
raise nncf .ValidationError (msg )
174
176
177
+ if lora_correction and compression_format in [CompressionFormat .FQ , CompressionFormat .FQ_LORA ]:
178
+ msg = "LoRA Correction algorithm is not compatible with FQ and FQ_LORA compression formats."
179
+ raise nncf .ValidationError (msg )
180
+
175
181
176
182
class WeightCompression (Algorithm ):
177
183
"""
@@ -195,6 +201,7 @@ def __init__(
195
201
gptq : bool ,
196
202
lora_correction : bool ,
197
203
backup_mode : BackupMode = BackupMode .INT8_ASYM ,
204
+ compression_format : CompressionFormat = CompressionFormat .DQ ,
198
205
advanced_parameters : Optional [AdvancedCompressionParameters ] = None ,
199
206
):
200
207
"""
@@ -233,6 +240,7 @@ def __init__(
233
240
In this mode, weights are retained in their original precision without any quantization.
234
241
INT8_SYM stands for 8-bit integer symmetric quantization without zero point.
235
242
INT8_ASYM stands for 8-bit integer asymmetric quantization with a typical non-fixed zero point.
243
+ :param compression_format: Describes the format in which the model is saved after weight compression.
236
244
:param advanced_parameters: advanced parameters for algorithms in compression pipeline.
237
245
"""
238
246
super ().__init__ ()
@@ -251,6 +259,7 @@ def __init__(
251
259
self ._gptq = gptq
252
260
self ._lora_correction = lora_correction
253
261
self ._backup_mode = backup_mode
262
+ self ._compression_format = compression_format
254
263
self ._advanced_parameters = (
255
264
advanced_parameters if advanced_parameters is not None else AdvancedCompressionParameters ()
256
265
)
@@ -646,6 +655,7 @@ def apply(
646
655
scales ,
647
656
zero_points ,
648
657
lora_correction_algo ,
658
+ self ._compression_format ,
649
659
)
650
660
651
661
self ._backend_entity .dump_parameters (
@@ -662,6 +672,7 @@ def apply(
662
672
"gptq" : self ._gptq ,
663
673
"lora_correction" : self ._lora_correction ,
664
674
"backup_mode" : self ._backup_mode .value ,
675
+ "compression_format" : self ._compression_format .value ,
665
676
"advanced_parameters" : convert_to_dict_recursively (self ._advanced_parameters ),
666
677
},
667
678
algo_name = "weight_compression" ,
0 commit comments