Skip to content

Commit cc04be9

Browse files
committed
Introduced strip format for more explicit behavior on strip
1 parent 47dbe38 commit cc04be9

File tree

21 files changed

+170
-100
lines changed

21 files changed

+170
-100
lines changed

nncf/__init__.py

+1
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
from nncf.parameters import ModelType as ModelType
4141
from nncf.parameters import QuantizationMode as QuantizationMode
4242
from nncf.parameters import SensitivityMetric as SensitivityMetric
43+
from nncf.parameters import StripFormat as StripFormat
4344
from nncf.parameters import TargetDevice as TargetDevice
4445
from nncf.quantization import QuantizationPreset as QuantizationPreset
4546
from nncf.quantization import compress_weights as compress_weights

nncf/api/compression.py

+10-5
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
from nncf.common.statistics import NNCFStatistics
2020
from nncf.common.utils.api_marker import api
2121
from nncf.common.utils.backend import copy_model
22+
from nncf.parameters import StripFormat
2223

2324
TModel = TypeVar("TModel")
2425

@@ -236,14 +237,17 @@ def statistics(self, quickly_collected_only: bool = False) -> NNCFStatistics:
236237
need to keep track of statistics on each training batch/step/iteration.
237238
"""
238239

239-
def strip_model(self, model: TModel, do_copy: bool = False) -> TModel:
240+
def strip_model(
241+
self, model: TModel, do_copy: bool = False, strip_format: StripFormat = StripFormat.NATIVE
242+
) -> TModel:
240243
"""
241244
Strips auxiliary layers that were used for the model compression, as it's
242245
only needed for training. The method is used before exporting the model
243246
in the target format.
244247
245248
:param model: The compressed model.
246249
:param do_copy: Modify copy of the model, defaults to False.
250+
:param strip format: Describes the format in which model is saved after strip.
247251
:return: The stripped model.
248252
"""
249253
if do_copy:
@@ -256,16 +260,17 @@ def prepare_for_export(self) -> None:
256260
"""
257261
self._model = self.strip_model(self._model)
258262

259-
def strip(self, do_copy: bool = True) -> TModel: # type: ignore[type-var]
263+
def strip(self, do_copy: bool = True, strip_format: StripFormat = StripFormat.NATIVE) -> TModel: # type: ignore[type-var]
260264
"""
261-
Returns the model object with as much custom NNCF additions as possible removed
262-
while still preserving the functioning of the model object as a compressed model.
265+
Removes auxiliary layers and operations added during the compression process, resulting in a clean
266+
model ready for deployment. The functionality of the model object is still preserved as a compressed model.
263267
264268
:param do_copy: If True (default), will return a copy of the currently associated model object. If False,
265269
will return the currently associated model object "stripped" in-place.
270+
:param strip format: Describes the format in which model is saved after strip.
266271
:return: The stripped model.
267272
"""
268-
return self.strip_model(self.model, do_copy) # type: ignore
273+
return self.strip_model(self.model, do_copy, strip_format) # type: ignore
269274

270275
@abstractmethod
271276
def export_model(

nncf/common/composite_compression.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
from nncf.common.utils.backend import BackendType
2424
from nncf.common.utils.backend import copy_model
2525
from nncf.common.utils.backend import get_backend
26+
from nncf.parameters import StripFormat
2627

2728

2829
class CompositeCompressionLoss(CompressionLoss):
@@ -276,12 +277,12 @@ def prepare_for_export(self) -> None:
276277
stripped_model = ctrl.strip_model(stripped_model)
277278
self._model = stripped_model
278279

279-
def strip(self, do_copy: bool = True) -> TModel: # type: ignore
280+
def strip(self, do_copy: bool = True, strip_format: StripFormat = StripFormat.NATIVE) -> TModel: # type: ignore
280281
model = self.model
281282
if do_copy:
282283
model = copy_model(model)
283284
for ctrl in self.child_ctrls:
284-
model = ctrl.strip_model(model, do_copy=False)
285+
model = ctrl.strip_model(model, do_copy=False, strip_format=strip_format)
285286
return model # type: ignore
286287

287288
@property

nncf/common/strip.py

+8-6
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
from nncf.common.utils.api_marker import api
1717
from nncf.common.utils.backend import BackendType
1818
from nncf.common.utils.backend import get_backend
19+
from nncf.parameters import StripFormat
1920
from nncf.telemetry.decorator import tracked_function
2021
from nncf.telemetry.events import MODEL_BASED_CATEGORY
2122
from nncf.telemetry.extractors import FunctionCallTelemetryExtractor
@@ -25,25 +26,26 @@
2526

2627
@api(canonical_alias="nncf.strip")
2728
@tracked_function(category=MODEL_BASED_CATEGORY, extractors=[FunctionCallTelemetryExtractor("nncf.strip")])
28-
def strip(model: TModel, do_copy: bool = True) -> TModel:
29+
def strip(model: TModel, do_copy: bool = True, strip_format: StripFormat = StripFormat.NATIVE) -> TModel:
2930
"""
30-
Returns the model object with as much custom NNCF additions as possible removed
31-
while still preserving the functioning of the model object as a compressed model.
31+
Removes auxiliary layers and operations added during the compression process, resulting in a clean
32+
model ready for deployment. The functionality of the model object is still preserved as a compressed model.
3233
3334
:param model: The compressed model.
3435
:param do_copy: If True (default), will return a copy of the currently associated model object. If False,
3536
will return the currently associated model object "stripped" in-place.
37+
:param strip format: Describes the format in which model is saved after strip.
3638
:return: The stripped model.
3739
"""
3840
model_backend = get_backend(model)
3941
if model_backend == BackendType.TORCH:
4042
from nncf.torch.strip import strip as strip_pt
4143

42-
return strip_pt(model, do_copy) # type: ignore
44+
return strip_pt(model, do_copy, strip_format) # type: ignore
4345
elif model_backend == BackendType.TENSORFLOW:
4446
from nncf.tensorflow.strip import strip as strip_tf
4547

46-
return strip_tf(model, do_copy) # type: ignore
48+
return strip_tf(model, do_copy, strip_format) # type: ignore
4749

48-
msg = f"Method `strip` does not support for {model_backend.value} backend."
50+
msg = f"Method `strip` does not support {model_backend.value} backend."
4951
raise nncf.UnsupportedBackendError(msg)

nncf/experimental/tensorflow/quantization/algorithm.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
from nncf.experimental.tensorflow.quantization.init_range import RangeInitializerV2
3636
from nncf.experimental.tensorflow.quantization.init_range import TFRangeInitParamsV2
3737
from nncf.experimental.tensorflow.quantization.quantizers import create_quantizer
38+
from nncf.parameters import StripFormat
3839
from nncf.tensorflow.algorithm_selector import TF_COMPRESSION_ALGORITHMS
3940
from nncf.tensorflow.graph.metatypes.tf_ops import TFOpWithWeightsMetatype
4041
from nncf.tensorflow.graph.transformations.commands import TFInsertionCommand
@@ -353,7 +354,9 @@ def apply_to(self, model: NNCFNetwork) -> NNCFNetwork:
353354

354355

355356
class QuantizationControllerV2(QuantizationController):
356-
def strip_model(self, model: NNCFNetwork, do_copy: bool = False) -> NNCFNetwork:
357+
def strip_model(
358+
self, model: NNCFNetwork, do_copy: bool = False, strip_format: StripFormat = StripFormat.NATIVE
359+
) -> NNCFNetwork:
357360
if do_copy:
358361
model = copy_model(model)
359362
return model

nncf/parameters.py

+17
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,23 @@ class CompressionFormat(StrEnum):
119119
FQ_LORA = "fake_quantize_with_lora"
120120

121121

122+
@api(canonical_alias="nncf.StripFormat")
123+
class StripFormat(StrEnum):
124+
"""
125+
Describes the format in which model is saved after strip: operation that removes auxiliary layers and
126+
operations added during the compression process, resulting in a clean model ready for deployment.
127+
The functionality of the model object is still preserved as a compressed model.
128+
129+
:param NATIVE: Returns the model with as much custom NNCF additions as possible,
130+
:param DQ: Replaces FakeQuantize operations with dequantization subgraph and compressed weights in low-bit
131+
precision using fake quantize parameters. This is the default format for deployment of models with compressed
132+
weights.
133+
"""
134+
135+
NATIVE = "native"
136+
DQ = "dequantize"
137+
138+
122139
@api(canonical_alias="nncf.BackupMode")
123140
class BackupMode(StrEnum):
124141
"""

nncf/quantization/algorithms/weight_compression/torch_backend.py

+8-1
Original file line numberDiff line numberDiff line change
@@ -283,6 +283,7 @@ def get_fq_insertion_command(
283283
orig_weight_shape: Tuple[int, ...],
284284
compression_format: CompressionFormat,
285285
lora_adapter_rank: int,
286+
is_all_8bit: bool,
286287
) -> PTTransformationCommand:
287288
"""
288289
Creates a fake quantization insertion command for the given compressed weight.
@@ -291,9 +292,11 @@ def get_fq_insertion_command(
291292
:param wc_params: Parameters for weight compression.
292293
:param orig_weight_shape: The original shape of the weight tensor.
293294
:param compression_format: The format of compression.
295+
:param is_all_8bit: Flag indicating if all weights should be compressed to 8-bit.
294296
:return: A PTTransformationCommand for inserting fake quantization to the model.
295297
"""
296298
compression_config = wc_params.compression_config
299+
# default mapping for 4bit weight compression and FQ_LORA format, no need to add lora adapters for 8bit weight
297300
mode_vs_schema_map = {
298301
CompressWeightsMode.INT4_ASYM: QuantizationScheme.ASYMMETRIC_LORA,
299302
CompressWeightsMode.INT4_SYM: QuantizationScheme.SYMMETRIC_LORA,
@@ -303,6 +306,9 @@ def get_fq_insertion_command(
303306
if compression_format == CompressionFormat.FQ:
304307
mode_vs_schema_map[CompressWeightsMode.INT4_ASYM] = QuantizationScheme.ASYMMETRIC
305308
mode_vs_schema_map[CompressWeightsMode.INT4_SYM] = QuantizationScheme.SYMMETRIC
309+
if is_all_8bit and compression_format == CompressionFormat.FQ_LORA:
310+
mode_vs_schema_map[CompressWeightsMode.INT8_ASYM] = QuantizationScheme.ASYMMETRIC_LORA
311+
mode_vs_schema_map[CompressWeightsMode.INT8_SYM] = QuantizationScheme.SYMMETRIC_LORA
306312

307313
schema = mode_vs_schema_map[compression_config.mode]
308314

@@ -469,6 +475,7 @@ def transform_model(
469475
model_transformer = PTModelTransformer(model)
470476

471477
transformation_layout = TransformationLayout()
478+
is_all_8bit = all(wc_params.compression_config.num_bits == 8 for wc_params in weight_compression_parameters)
472479
for wc_params in weight_compression_parameters:
473480
compression_config = wc_params.compression_config
474481
if compression_config.mode in [
@@ -499,7 +506,7 @@ def transform_model(
499506
else:
500507
rank = advanced_parameters.lora_adapter_rank
501508
command = self.get_fq_insertion_command(
502-
compressed_weight, wc_params, weight.shape, compression_format, rank
509+
compressed_weight, wc_params, weight.shape, compression_format, rank, is_all_8bit
503510
)
504511
transformation_layout.register(command)
505512

nncf/tensorflow/algorithm_selector.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
from nncf.common.statistics import NNCFStatistics
2323
from nncf.common.utils.backend import copy_model
2424
from nncf.common.utils.registry import Registry
25+
from nncf.parameters import StripFormat
2526
from nncf.tensorflow.api.compression import TFCompressionAlgorithmBuilder
2627
from nncf.tensorflow.loss import TFZeroCompressionLoss
2728

@@ -60,7 +61,7 @@ def scheduler(self) -> StubCompressionScheduler:
6061
def statistics(self, quickly_collected_only: bool = False) -> NNCFStatistics:
6162
return NNCFStatistics()
6263

63-
def strip(self, do_copy: bool = True) -> tf.keras.Model:
64+
def strip(self, do_copy: bool = True, strip_format: StripFormat = StripFormat.NATIVE) -> tf.keras.Model:
6465
model = self.model
6566
if do_copy:
6667
model = copy_model(self.model)

nncf/tensorflow/pruning/base_algorithm.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
from nncf.config.schemata.defaults import PRUNE_DOWNSAMPLE_CONVS
3636
from nncf.config.schemata.defaults import PRUNE_FIRST_CONV
3737
from nncf.config.schemata.defaults import PRUNING_INIT
38+
from nncf.parameters import StripFormat
3839
from nncf.tensorflow.api.compression import TFCompressionAlgorithmBuilder
3940
from nncf.tensorflow.graph.converter import TFModelConverterFactory
4041
from nncf.tensorflow.graph.metatypes.keras_layers import TFBatchNormalizationLayerMetatype
@@ -359,6 +360,8 @@ def _calculate_pruned_layers_summary(self) -> List[PrunedLayerSummary]:
359360

360361
return pruned_layers_summary
361362

362-
def strip_model(self, model: tf.keras.Model, do_copy: bool = False) -> tf.keras.Model:
363+
def strip_model(
364+
self, model: tf.keras.Model, do_copy: bool = False, strip_format: StripFormat = StripFormat.NATIVE
365+
) -> tf.keras.Model:
363366
# Transform model for pruning creates copy of the model.
364367
return strip_model_from_masks(model, self._op_names)

nncf/tensorflow/quantization/algorithm.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@
5656
from nncf.config.schemata.defaults import QUANTIZE_INPUTS
5757
from nncf.config.schemata.defaults import QUANTIZE_OUTPUTS
5858
from nncf.config.schemata.defaults import TARGET_DEVICE
59+
from nncf.parameters import StripFormat
5960
from nncf.tensorflow.algorithm_selector import TF_COMPRESSION_ALGORITHMS
6061
from nncf.tensorflow.api.compression import TFCompressionAlgorithmBuilder
6162
from nncf.tensorflow.graph.converter import TFModelConverter
@@ -753,7 +754,9 @@ def loss(self) -> CompressionLoss:
753754
"""
754755
return self._loss
755756

756-
def strip_model(self, model: tf.keras.Model, do_copy: bool = False) -> tf.keras.Model:
757+
def strip_model(
758+
self, model: tf.keras.Model, do_copy: bool = False, strip_format: StripFormat = StripFormat.NATIVE
759+
) -> tf.keras.Model:
757760
if do_copy:
758761
model = copy_model(model)
759762
apply_overflow_fix(model, self._op_names)

nncf/tensorflow/sparsity/base_algorithm.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212

1313
from nncf.common.compression import BaseCompressionAlgorithmController
1414
from nncf.common.sparsity.controller import SparsityController
15+
from nncf.parameters import StripFormat
1516
from nncf.tensorflow.graph.metatypes import keras_layers as layer_metatypes
1617
from nncf.tensorflow.sparsity.utils import strip_model_from_masks
1718

@@ -47,6 +48,8 @@ def __init__(self, target_model, op_names):
4748
super().__init__(target_model)
4849
self._op_names = op_names
4950

50-
def strip_model(self, model: tf.keras.Model, do_copy: bool = False) -> tf.keras.Model:
51+
def strip_model(
52+
self, model: tf.keras.Model, do_copy: bool = False, strip_format: StripFormat = StripFormat.NATIVE
53+
) -> tf.keras.Model:
5154
# Transform model for sparsity creates copy of the model.
5255
return strip_model_from_masks(model, self._op_names)

nncf/tensorflow/strip.py

+9-1
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,9 @@
1313

1414
import tensorflow as tf
1515

16+
import nncf
1617
from nncf.common.utils.backend import copy_model
18+
from nncf.parameters import StripFormat
1719
from nncf.tensorflow.graph.model_transformer import TFModelTransformer
1820
from nncf.tensorflow.graph.transformations.commands import TFOperationWithWeights
1921
from nncf.tensorflow.graph.transformations.commands import TFRemovalCommand
@@ -28,15 +30,21 @@
2830
from nncf.tensorflow.sparsity.utils import apply_mask
2931

3032

31-
def strip(model: tf.keras.Model, do_copy: bool = True) -> tf.keras.Model:
33+
def strip(
34+
model: tf.keras.Model, do_copy: bool = True, strip_format: StripFormat = StripFormat.NATIVE
35+
) -> tf.keras.Model:
3236
"""
3337
Implementation of the nncf.strip() function for the TF backend
3438
3539
:param model: The compressed model.
3640
:param do_copy: If True (default), will return a copy of the currently associated model object. If False,
3741
will return the currently associated model object "stripped" in-place.
42+
:param strip format: Describes the format in which model is saved after strip.
3843
:return: The stripped model.
3944
"""
45+
if strip_format != StripFormat.NATIVE:
46+
msg = f"Tensorflow does not support for {strip_format} strip format."
47+
raise nncf.UnsupportedBackendError(msg)
4048
if not isinstance(model, tf.keras.Model):
4149
return model
4250

nncf/torch/algo_selector.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
from nncf.common.statistics import NNCFStatistics
2020
from nncf.common.utils.backend import copy_model
2121
from nncf.common.utils.registry import Registry
22+
from nncf.parameters import StripFormat
2223
from nncf.torch.compression_method_api import PTCompressionAlgorithmBuilder
2324
from nncf.torch.compression_method_api import PTCompressionAlgorithmController
2425
from nncf.torch.compression_method_api import PTCompressionLoss
@@ -81,7 +82,7 @@ def scheduler(self) -> CompressionScheduler:
8182
def statistics(self, quickly_collected_only: bool = False) -> NNCFStatistics:
8283
return NNCFStatistics()
8384

84-
def strip(self, do_copy: bool = True) -> NNCFNetwork:
85+
def strip(self, do_copy: bool = True, strip_format: StripFormat = StripFormat.NATIVE) -> NNCFNetwork:
8586
model = self.model
8687
if do_copy:
8788
model = copy_model(self.model)

nncf/torch/nncf_network.py

+8-5
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
from nncf.common.insertion_point_graph import PostHookInsertionPoint
4040
from nncf.common.insertion_point_graph import PreHookInsertionPoint
4141
from nncf.common.utils.debug import is_debug
42+
from nncf.parameters import StripFormat
4243
from nncf.telemetry import tracked_function
4344
from nncf.telemetry.events import NNCF_PT_CATEGORY
4445
from nncf.telemetry.extractors import FunctionCallTelemetryExtractor
@@ -966,21 +967,23 @@ def get_op_address_to_op_name_map(self) -> Dict[OperationAddress, NNCFNodeName]:
966967
def set_compression_controller(self, ctrl: CompressionAlgorithmController):
967968
self.compression_controller = ctrl
968969

969-
def strip(self, do_copy: bool = True) -> "NNCFNetwork":
970+
def strip(self, do_copy: bool = True, strip_format: StripFormat = StripFormat.NATIVE) -> "NNCFNetwork":
970971
"""
971-
Returns the model object with as much custom NNCF additions as possible removed
972-
while still preserving the functioning of the model object as a compressed model.
972+
Removes auxiliary layers and operations added during the compression process, resulting in a clean
973+
model ready for deployment. The functionality of the model object is still preserved as a compressed model.
974+
973975
:param do_copy: If True (default), will return a copy of the currently associated model object. If False,
974976
will return the currently associated model object "stripped" in-place.
977+
:param strip format: Describes the format in which model is saved after strip.
975978
:return: The stripped model.
976979
"""
977980
if self.compression_controller is None:
978981
# PTQ algorithm does not set compressed controller
979982
from nncf.torch.quantization.strip import strip_quantized_model
980983

981984
model = deepcopy(self._model_ref) if do_copy else self._model_ref
982-
return strip_quantized_model(model)
983-
return self.compression_controller.strip(do_copy)
985+
return strip_quantized_model(model, strip_format=strip_format)
986+
return self.compression_controller.strip(do_copy, strip_format=strip_format)
984987

985988
def get_reused_parameters(self):
986989
"""

nncf/torch/pruning/filter_pruning/algo.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@
4545
from nncf.common.utils.debug import is_debug
4646
from nncf.common.utils.os import safe_open
4747
from nncf.config.extractors import extract_bn_adaptation_init_params
48+
from nncf.parameters import StripFormat
4849
from nncf.torch.algo_selector import PT_COMPRESSION_ALGORITHMS
4950
from nncf.torch.compression_method_api import PTCompressionAlgorithmController
5051
from nncf.torch.graph.operator_metatypes import PTModuleConv1dMetatype
@@ -693,7 +694,9 @@ def _run_batchnorm_adaptation(self):
693694
)
694695
self._bn_adaptation.run(self.model)
695696

696-
def strip_model(self, model: NNCFNetwork, do_copy: bool = False) -> NNCFNetwork:
697+
def strip_model(
698+
self, model: NNCFNetwork, do_copy: bool = False, strip_format: StripFormat = StripFormat.NATIVE
699+
) -> NNCFNetwork:
697700
if do_copy:
698701
model = copy_model(model)
699702

0 commit comments

Comments
 (0)