[PT2] SmoothQuant (#3276)

AlexanderDokuchaev · web-flow · commit 59511532060f · 2025-02-18T10:06:20.000+04:00
### Changes

Impanated SmoothQuant for experimental tracing
Added PT2ConstUpdateCommand to update data 

### Related tickets

152996
diff --git a/nncf/experimental/torch2/commands.py b/nncf/experimental/torch2/commands.py
@@ -11,8 +11,10 @@
 
 from typing import List, Optional
 
+import torch
 from torch import nn
 
+from nncf.common.graph.graph import NNCFNode
 from nncf.common.graph.transformations.commands import Command
 from nncf.common.graph.transformations.commands import TransformationType
 from nncf.experimental.torch2.function_hook.hook_storage import RemovableHookHandle
@@ -41,3 +43,18 @@ def __init__(
         self.target_points = target_points
         self.hook_module = hook_module
         self.handle_storage = handle_storage
+
+
+class PT2ConstUpdateCommand(Command):
+    """
+    Corrects weight value in the model based on the input value.
+    """
+
+    def __init__(self, node: NNCFNode, value: torch.Tensor):
+        """
+        :param const_node: The node of the data in the model.
+        :param value: The new value of the constant.
+        """
+        super().__init__(TransformationType.CHANGE)
+        self.node = node
+        self.value = value
diff --git a/nncf/experimental/torch2/model_transformer.py b/nncf/experimental/torch2/model_transformer.py
@@ -18,13 +18,15 @@
 from nncf.common.graph.transformations.commands import Command
 from nncf.common.graph.transformations.commands import TargetType
 from nncf.common.graph.transformations.layout import TransformationLayout
+from nncf.experimental.torch2.commands import PT2ConstUpdateCommand
 from nncf.experimental.torch2.commands import PT2InsertionCommand
 from nncf.experimental.torch2.function_hook.hook_storage import RemovableHookHandle
 from nncf.experimental.torch2.function_hook.nncf_graph.nncf_graph_builder import GraphModelWrapper
 from nncf.experimental.torch2.function_hook.wrapper import register_post_function_hook
 from nncf.experimental.torch2.function_hook.wrapper import register_pre_function_hook
 from nncf.torch.graph.transformations.commands import PTBiasCorrectionCommand
 from nncf.torch.graph.transformations.commands import PTTargetPoint
+from nncf.torch.model_graph_manager import set_const_data
 from nncf.torch.model_graph_manager import update_fused_bias
 
 TRANSFORMATION_PAIRS = Tuple[Tuple[Type[Any], Callable[[GraphModelWrapper, List[Any]], GraphModelWrapper]], ...]
@@ -41,6 +43,7 @@ def __init__(self, model: GraphModelWrapper):
         self._command_transformation_ordered_pairs: TRANSFORMATION_PAIRS = (
             (PT2InsertionCommand, self._apply_insertion_transformations),
             (PTBiasCorrectionCommand, self._apply_bias_correction_transformations),
+            (PT2ConstUpdateCommand, self._apply_const_update_transformations),
         )
 
     def transform(self, transformation_layout: TransformationLayout) -> GraphModelWrapper:
@@ -114,6 +117,24 @@ def _apply_bias_correction_transformations(
             )
         return wrapped_model
 
+    @staticmethod
+    def _apply_const_update_transformations(
+        wrapped_model: GraphModelWrapper, transformations: List[PT2ConstUpdateCommand]
+    ) -> GraphModelWrapper:
+        """
+        Applies const data update transformations on the model.
+
+        :param model: Model to apply transformations.
+        :param transformations: List of the const data update transformations.
+        :return: Model with corrected bias.
+        """
+        for transformation in transformations:
+            node = transformation.node
+            value = transformation.value
+            set_const_data(value, node, wrapped_model.model)
+
+        return wrapped_model
+
 
 def insert_hook(model: nn.Module, hook: nn.Module, target_point: PTTargetPoint) -> RemovableHookHandle:
     """
diff --git a/nncf/quantization/algorithms/min_max/torch_backend.py b/nncf/quantization/algorithms/min_max/torch_backend.py
@@ -26,6 +26,7 @@
 from nncf.experimental.common.check_feature import is_experimental_torch_tracing_enabled
 from nncf.experimental.common.tensor_statistics.collectors import REDUCERS_MAP
 from nncf.experimental.common.tensor_statistics.collectors import TensorReducerBase
+from nncf.experimental.torch2.commands import PT2InsertionCommand
 from nncf.parameters import ModelType
 from nncf.parameters import TargetDevice
 from nncf.quantization.algorithms.min_max.backend import MinMaxAlgoBackend
@@ -271,6 +272,9 @@ def create_quantizer_insertion_command(
         quantizer = PTMinMaxAlgoBackend._create_quantizer(
             quantizer_config, scale_shape, parameters, target_point.target_type
         )
+        if is_experimental_torch_tracing_enabled():
+            return PT2InsertionCommand(target_points=[target_point], hook_module=quantizer)
+
         return create_quantizer_insertion_command(target_point, quantizer)
 
     @staticmethod
@@ -287,6 +291,8 @@ def create_unified_scales_quantizers_insertion_commands(
         quantizer = PTMinMaxAlgoBackend._create_quantizer(
             quantizer_config, scale_shape, parameters, target_points[0].target_type
         )
+        if is_experimental_torch_tracing_enabled():
+            return [PT2InsertionCommand(target_points=target_points, hook_module=quantizer)]
         return [create_shared_quantizer_insertion_command(target_points, quantizer)]
 
     @staticmethod
@@ -312,7 +318,7 @@ def get_ignored_metatypes(model_type: ModelType, device: TargetDevice) -> List[O
                 # Batchnorm
                 om.PTBatchNormMetatype,
                 om.PTModuleBatchNormMetatype,
-                # Сomparison operations
+                # Comparison operations
                 om.PTGreaterEqualMetatype,
                 om.PTGreaterMetatype,
                 om.PTLessEqualMetatype,
diff --git a/nncf/quantization/algorithms/smooth_quant/algorithm.py b/nncf/quantization/algorithms/smooth_quant/algorithm.py
@@ -160,7 +160,9 @@ def apply(
                 weight_value = self._backend_entity.get_weight_value(node_to_smooth, model, graph)
                 weights_scale = self._calculate_weight_scale(best_scale, node_to_smooth, weight_value)
                 scaled_weight = weight_value * weights_scale
-                weight_update_command = self._backend_entity.weight_update_command(node_to_smooth, scaled_weight.data)
+                weight_update_command = self._backend_entity.weight_update_command(
+                    node_to_smooth, graph, scaled_weight.data
+                )
                 transformation_layout.register(weight_update_command)
 
             activations_by_output_id = {e.output_port_id: e for e in graph.get_output_edges(source_node)}
diff --git a/nncf/quantization/algorithms/smooth_quant/backend.py b/nncf/quantization/algorithms/smooth_quant/backend.py
@@ -127,14 +127,14 @@ def get_weight_value(node_with_weight: NNCFNode, model: TModel, port_id: int, nn
     @staticmethod
     @abstractmethod
     def weight_update_command(
-        node_with_weight: NNCFNode, weight_value: TTensor, weight_port_id: int
+        node_with_weight: NNCFNode, nncf_graph: NNCFGraph, weight_value: TTensor
     ) -> TransformationCommand:
         """
         Returns command to update weights.
 
         :param node_with_weight: NNCFNode instance.
+        :param nncf_graph: NNCFGraph instance.
         :param weight_value: New weight value.
-        :param weight_port_id: Weight port id.
         :return: TransformationCommand instance.
         """
 
diff --git a/nncf/quantization/algorithms/smooth_quant/openvino_backend.py b/nncf/quantization/algorithms/smooth_quant/openvino_backend.py
@@ -100,7 +100,9 @@ def get_weight_tensor_port_id(node: NNCFNode) -> int:
         return const_ids[0]
 
     @staticmethod
-    def weight_update_command(node_with_weight: NNCFNode, weight_value: np.ndarray) -> OVWeightUpdateCommand:
+    def weight_update_command(
+        node_with_weight: NNCFNode, nncf_graph: NNCFGraph, weight_value: np.ndarray
+    ) -> OVWeightUpdateCommand:
         weight_port_id = OVSmoothQuantAlgoBackend.get_weight_tensor_port_id(node_with_weight)
         return OVCommandCreator.create_command_to_update_weight(node_with_weight, weight_value, weight_port_id)
 
diff --git a/nncf/quantization/algorithms/smooth_quant/torch_backend.py b/nncf/quantization/algorithms/smooth_quant/torch_backend.py
@@ -11,7 +11,6 @@
 
 from typing import Any, Callable, Dict, List, Tuple
 
-import numpy as np
 import torch
 
 import nncf.torch.graph.operator_metatypes as om
@@ -21,9 +20,13 @@
 from nncf.common.graph.transformations.commands import TargetType
 from nncf.common.quantization.quantizer_propagation.structs import QuantizationTrait
 from nncf.common.tensor_statistics.statistic_point import StatisticPoint
+from nncf.experimental.common.check_feature import is_experimental_torch_tracing_enabled
 from nncf.experimental.common.tensor_statistics.collectors import AbsMaxReducer
 from nncf.experimental.common.tensor_statistics.collectors import MaxAggregator
 from nncf.experimental.common.tensor_statistics.collectors import TensorCollector
+from nncf.experimental.torch2.commands import PT2ConstUpdateCommand
+from nncf.experimental.torch2.commands import PT2InsertionCommand
+from nncf.experimental.torch2.function_hook.nncf_graph.nncf_graph_builder import GraphModelWrapper
 from nncf.quantization.algorithms.smooth_quant.backend import SmoothQuantAlgoBackend
 from nncf.tensor import Tensor
 from nncf.torch.graph.transformations.command_creation import create_command_to_update_weight
@@ -119,6 +122,9 @@ def get_abs_max_channel_collector(
 
     @staticmethod
     def get_weight_value(node_with_weight: NNCFNode, model: NNCFNetwork, nncf_graph: NNCFGraph) -> Tensor:
+        if isinstance(model, GraphModelWrapper):
+            model = model.model
+
         weight_node = get_const_node(node_with_weight, node_with_weight.metatype.weight_port_ids[0], nncf_graph)
         if weight_node is None:
             msg = f"{node_with_weight} node has no weight node."
@@ -127,7 +133,12 @@ def get_weight_value(node_with_weight: NNCFNode, model: NNCFNetwork, nncf_graph:
         return Tensor(weight_data)
 
     @staticmethod
-    def weight_update_command(node_with_weight: NNCFNode, weight_value: np.ndarray) -> PTWeightUpdateCommand:
+    def weight_update_command(
+        node_with_weight: NNCFNode, nncf_graph: NNCFGraph, weight_value: torch.Tensor
+    ) -> PTWeightUpdateCommand:
+        if is_experimental_torch_tracing_enabled():
+            weight_node = get_const_node(node_with_weight, node_with_weight.metatype.weight_port_ids[0], nncf_graph)
+            return PT2ConstUpdateCommand(weight_node, weight_value)
         return create_command_to_update_weight(node_with_weight, weight_value)
 
     @staticmethod
@@ -145,6 +156,9 @@ def scale_insertion_command(
 
         sq_multiply = SQMultiply(scale_value.shape)
         sq_multiply.scale = scale_value
+
+        if is_experimental_torch_tracing_enabled():
+            return PT2InsertionCommand(target_points=target_points, hook_module=sq_multiply)
         return PTSharedFnInsertionCommand(target_points, sq_multiply, scale_node_name)
 
     @staticmethod
@@ -161,6 +175,10 @@ def get_weight_channel_axis(node: NNCFNode) -> int:
 
     @staticmethod
     def is_node_with_shared_weight(node: NNCFNode, nncf_graph: NNCFGraph) -> bool:
+        if is_experimental_torch_tracing_enabled():
+            weight_node = get_const_node(node, node.metatype.weight_port_ids[0], nncf_graph)
+            output_edges = nncf_graph.get_next_nodes(weight_node)
+            return len(output_edges) > 1
         return node.is_shared()
 
     @staticmethod
diff --git a/nncf/quantization/algorithms/smooth_quant/torch_fx_backend.py b/nncf/quantization/algorithms/smooth_quant/torch_fx_backend.py
@@ -104,7 +104,9 @@ def get_weight_value(node_with_weight: NNCFNode, model: torch.fx.GraphModule, nn
         return Tensor(weight_data.data)
 
     @staticmethod
-    def weight_update_command(node_with_weight: NNCFNode, weight_value: torch.Tensor) -> FXApplyTransformationCommand:
+    def weight_update_command(
+        node_with_weight: NNCFNode, nncf_graph: NNCFGraph, weight_value: torch.Tensor
+    ) -> FXApplyTransformationCommand:
         # TODO(dlyakhov): Use input port id depending on the node metatype/attributes.
         return FXApplyTransformationCommand(
             constant_update_transformation_builder(node_with_weight, weight_value.data, input_port_id=1)
diff --git a/nncf/torch/graph/transformations/command_creation.py b/nncf/torch/graph/transformations/command_creation.py
@@ -18,8 +18,6 @@
 from nncf.common.graph.transformations.commands import TargetType
 from nncf.common.graph.transformations.commands import TransformationPriority
 from nncf.common.quantization.structs import NonWeightQuantizerId
-from nncf.experimental.common.check_feature import is_experimental_torch_tracing_enabled
-from nncf.experimental.torch2.commands import PT2InsertionCommand
 from nncf.torch.graph.transformations.commands import ExtraCompressionModuleType
 from nncf.torch.graph.transformations.commands import PTBiasCorrectionCommand
 from nncf.torch.graph.transformations.commands import PTInsertionCommand
@@ -56,9 +54,6 @@ def create_command_to_update_weight(node: NNCFNode, weight_value: Tensor) -> PTW
 def create_quantizer_insertion_command(
     target_point: PTTargetPoint, quantizer: BaseQuantizer
 ) -> Union[PTInsertionCommand, PTSharedFnInsertionCommand]:
-    if is_experimental_torch_tracing_enabled():
-        return PT2InsertionCommand(target_points=[target_point], hook_module=quantizer)
-
     quantizer_id = NonWeightQuantizerId(target_point.target_node_name, target_point.input_port_id)
     storage_key = str(quantizer_id)
     return PTSharedFnInsertionCommand(
@@ -73,8 +68,6 @@ def create_quantizer_insertion_command(
 def create_shared_quantizer_insertion_command(
     target_points: List[PTTargetPoint], quantizer: BaseQuantizer
 ) -> PTSharedFnInsertionCommand:
-    if is_experimental_torch_tracing_enabled():
-        return PT2InsertionCommand(target_points=target_points, hook_module=quantizer)
     quantizers_ids = []
     for target_point in target_points:
         quantizers_ids.append(NonWeightQuantizerId(target_point.target_node_name, target_point.input_port_id))
diff --git a/nncf/torch/layer_utils.py b/nncf/torch/layer_utils.py
@@ -10,7 +10,6 @@
 # limitations under the License.
 
 from abc import ABC
-from abc import abstractclassmethod
 from abc import abstractmethod
 from typing import Any, Dict
 
@@ -29,7 +28,7 @@ class StatefullModuleInterface(ABC):
     Interface that should be implemented for every registered compression module to make it possible
     to save an compression modules state and create an compression module from the saved state.
     Config of the module should be json serializable, no python objects except
-    standart (str, list and etc.) should be present in a compression module config.
+    standard (str, list and etc.) should be present in a compression module config.
     Values for attributes with type torch.nn.Parameter
     is recovered from the model `state_dict`, so there is no need to keep them in the module config.
     Modules should avoid implementation of `__call__` method and use `forward` method instead,
@@ -44,7 +43,8 @@ def get_config(self) -> Dict[str, Any]:
         Returns the compression module config.
         """
 
-    @abstractclassmethod
+    @classmethod
+    @abstractmethod
     def from_config(cls, state: Dict[str, Any]) -> object:
         """
         Creates a compression module instance from the given config.
diff --git a/tests/torch2/function_hook/quantization/test_smooth_quant.py b/tests/torch2/function_hook/quantization/test_smooth_quant.py