daniil-lyakhov · daniil-lyakhov · Dec 2, 2024
diff --git a/nncf/common/hardware/config.py b/nncf/common/hardware/config.py
@@ -176,8 +176,13 @@ def get_qconf_from_hw_config_subdict(quantization_subdict: Dict[str, Any]) -> Qu
             ), "Invalid value of quantizer parameter `level_high`.\
                          The parameter must be consistent with other parameters!"
 
+        narrow_range = quantization_subdict["narrow_range"]
         return QuantizerConfig(
-            num_bits=bits, mode=mode, per_channel=is_per_channel, signedness_to_force=signedness_to_force
+            num_bits=bits,
+            mode=mode,
+            per_channel=is_per_channel,
+            signedness_to_force=signedness_to_force,
+            narrow_range=narrow_range,
         )
 
     @staticmethod

diff --git a/nncf/common/hardware/configs/cpu.json b/nncf/common/hardware/configs/cpu.json
@@ -7,15 +7,17 @@
                 "mode": [
                     "symmetric"
                 ],
-                "granularity": "pertensor"
+                "granularity": "pertensor",
+                "narrow_range": false
             },
             "q8_a": {
                 "bits": 8,
                 "mode": [
                     "symmetric",
                     "asymmetric"
                 ],
-                "granularity": "pertensor"
+                "granularity": "pertensor",
+                "narrow_range": false
             },
             "q8_a_ch": {
                 "bits": 8,
@@ -26,19 +28,22 @@
                 "granularity": [
                     "perchannel",
                     "pertensor"
-                ]
+                ],
+                "narrow_range": false
             },
             "q8_w_sym": {
                 "bits": 8,
                 "mode": "symmetric",
                 "level_low": -128,
                 "level_high": 127,
-                "granularity": ["perchannel", "pertensor"]
+                "granularity": ["perchannel", "pertensor"],
+                "narrow_range": true
             },
             "q8_w_asym": {
                 "bits": 8,
                 "mode": "asymmetric",
-                "granularity": ["perchannel", "pertensor"]
+                "granularity": ["perchannel", "pertensor"],
+                "narrow_range": false
             }
         }
     },
@@ -288,7 +293,7 @@
         {
             "type": "Embedding",
             "quantization": {
-                "weights": ["q8_w_sym", "q8_w_asym"]
+                "weights": ["q8_w_sym", "q8_w_asym", "q8_a", "q8_a_sym", "q8_a_ch"]
             }
         },
         {"type": "EmbeddingBag"}

diff --git a/nncf/common/hardware/configs/gpu.json b/nncf/common/hardware/configs/gpu.json
@@ -8,7 +8,8 @@
                     "symmetric",
                     "asymmetric"
                 ],
-                "granularity": "pertensor"
+                "granularity": "pertensor",
+                "narrow_range": false
             },
             "q8_a_ch": {
                 "bits": 8,
@@ -19,7 +20,8 @@
                 "granularity": [
                     "perchannel",
                     "pertensor"
-                ]
+                ],
+                "narrow_range": false
             },
             "q8_w_sym": {
                 "bits": 8,
@@ -29,15 +31,17 @@
                 "granularity":  [
                     "perchannel",
                     "pertensor"
-                ]
+                ],
+                "narrow_range": true
             },
             "q8_w_asym": {
                 "bits": 8,
                 "mode": "asymmetric",
                 "granularity":  [
                     "perchannel",
                     "pertensor"
-                ]
+                ],
+                "narrow_range": false
             }
         }
     },

diff --git a/nncf/common/hardware/configs/npu.json b/nncf/common/hardware/configs/npu.json
@@ -7,15 +7,17 @@
                 "mode": [
                     "symmetric"
                 ],
-                "granularity": "pertensor"
+                "granularity": "pertensor",
+                "narrow_range": false
             },
             "q8_a": {
                 "bits": 8,
                 "mode": [
                     "symmetric",
                     "asymmetric"
                 ],
-                "granularity": "pertensor"
+                "granularity": "pertensor",
+                "narrow_range": false
             },
             "q8_a_ch": {
                 "bits": 8,
@@ -26,52 +28,60 @@
                 "granularity": [
                     "perchannel",
                     "pertensor"
-                ]
+                ],
+                "narrow_range": false
             },
             "q8_w_sym": {
                 "bits": 8,
                 "mode": "symmetric",
                 "level_low": -128,
                 "level_high": 127,
-                "granularity": ["perchannel", "pertensor"]
+                "granularity": ["perchannel", "pertensor"],
+                "narrow_range": true
             },
             "q8_w_asym": {
                 "bits": 8,
                 "mode": "asymmetric",
-                "granularity": ["perchannel", "pertensor"]
+                "granularity": ["perchannel", "pertensor"],
+                "narrow_range": false
             },
             // 4-bit configs
             "q4_tn": {
                 "bits": 4,
                 "mode": "symmetric",
-                "granularity": "pertensor"
+                "granularity": "pertensor",
+                "narrow_range": false
             },
             "q4_ch": {
                 "bits": 4,
                 "mode": "symmetric",
-                "granularity": "perchannel"
+                "granularity": "perchannel",
+                "narrow_range": false
             },
             "q4_w": {
                 "bits": 4,
                 "mode": "symmetric",
                 "granularity": [
                     "perchannel",
                     "pertensor"
-                ]
+                ],
+                "narrow_range": false
             },
             // 2-bit configs
             "q2_ch": {
                 "bits": 2,
                 "mode": "symmetric",
-                "granularity": "perchannel"
+                "granularity": "perchannel",
+                "narrow_range": false
             },
             "q2_w": {
                 "bits": 2,
                 "mode": "symmetric",
                 "granularity": [
                     "perchannel",
                     "pertensor"
-                ]
+                ],
+                "narrow_range": false
             }
         }
     },
@@ -382,7 +392,7 @@
             "type": "Embedding",
             "quantization": {
                 "weights": [
-                    "q8_w_sym", "q8_w_asym"
+                    "q8_w_sym", "q8_w_asym", "q8_a", "q8_a_sym", "q8_a_ch"
                 ]
             }
         },

diff --git a/nncf/common/hardware/configs/template.json b/nncf/common/hardware/configs/template.json
@@ -16,14 +16,19 @@
                 "granularity": [
                     "pertensor",
                     "perchannel"
-                ]
+                ],
+                /*
+                 * Narrow range: should NNCF use 2**num_bits quants or 2**num_bits - 1
+                 */
+                "narrow_range": false
             },
             "q8_sym_tnr_-128_127": { // Alias name for set of hyperparameters
                 "bits": 8, // Number of quantization bits
                 "mode": "symmetric", // Quantization mode
                 "granularity": "pertensor", // Granularity: one scale for output tensor
                 "level_low": -128, // Low quantization level
-                "level_high": 127  // High quantization level
+                "level_high": 127,  // High quantization level
+                "narrow_range": false
             }
         }
     },

diff --git a/nncf/common/quantization/config_assignment.py b/nncf/common/quantization/config_assignment.py
@@ -47,6 +47,8 @@ def get_scoped_quantizer_config(
                 qconfig.per_channel = config_overrides["per_channel"]
             if config_overrides.get("signed") is not None:
                 qconfig.signedness_to_force = config_overrides["signed"]
+            if config_overrides.get("narrow_range") is not None:
+                qconfig.narrow_range = config_overrides["narrow_range"]
     return qconfig
 
 

diff --git a/nncf/common/quantization/quantizer_propagation/graph.py b/nncf/common/quantization/quantizer_propagation/graph.py
@@ -1165,6 +1165,10 @@ def is_downstream_quantizer_redundant(
                 (ds_config.per_channel == us_config.per_channel)
                 or (ds_config.per_channel is True and us_config.per_channel is False)
             )
+
+            # Strictly prohibit merging of config with different narrow_range params
+            is_redundant = is_redundant and (ds_config.narrow_range == us_config.narrow_range)
+
             return is_redundant
 
         def merge_traverse_fn(

diff --git a/nncf/common/quantization/quantizer_propagation/solver.py b/nncf/common/quantization/quantizer_propagation/solver.py
@@ -161,6 +161,7 @@ def is_final_qconfig_compatible_to_initial(initial_qconfig: QuantizerConfig) ->
                             final_qconfig.per_channel == initial_qconfig.per_channel
                             and final_qconfig.mode == initial_qconfig.mode
                             and final_qconfig.num_bits == initial_qconfig.num_bits
+                            and final_qconfig.narrow_range == initial_qconfig.narrow_range
                             and (
                                 final_qconfig.signedness_to_force == initial_qconfig.signedness_to_force
                                 or initial_qconfig.signedness_to_force is None
@@ -301,7 +302,9 @@ class QuantizerPropagationSolver:
     """
 
     DEFAULT_QUANTIZATION_TYPES = [
-        QuantizerConfig(num_bits=8, mode=QuantizationMode.SYMMETRIC, signedness_to_force=None, per_channel=False)
+        QuantizerConfig(
+            num_bits=8, mode=QuantizationMode.SYMMETRIC, signedness_to_force=None, per_channel=False, narrow_range=False
+        )
     ]
 
     DEFAULT_PROPAGATION_STRATEGY = QuantizerPropagationRule.MERGE_ALL_IN_ONE
@@ -1373,7 +1376,7 @@ def get_merged_qconfigs_for_downward_branching_case(
         Returns a tuple, of which the first node is the qconfig list for the quantizer to be placed
         above the branching node (i.e. that will affect all of the downward branches), and a list
         of nodes which are either None (which means that the corresponding branch quantizer has been successfully
-        merged, or qconfigs list to be set for the corresponding branch quantizer if it cannot be merged (e.g. if
+        merged), or qconfigs list to be set for the corresponding branch quantizer if it cannot be merged (e.g. if
         requantization to a lower bitwidth has to be done for this branch)
 
         :param potential_qconfigs_for_each_branch: For each branch defines the list of available configurations
@@ -1494,7 +1497,8 @@ def __disambiguate_config_list(
         """
         The input list should be sorted in descending order of priority. In case some qconfigs in the list have the
         same priority, this function will resolve the ambiguity in ordering these qconfigs in the final returned
-        list.
+        list. Quantization configs could not contain different narrow range parameters, so it does
+        not participate in __lt__ method of the QConfigComparator.
         """
 
         class QConfigComparator:

diff --git a/nncf/common/quantization/quantizer_setup.py b/nncf/common/quantization/quantizer_setup.py
@@ -28,7 +28,11 @@
 QuantizationPointId = int
 
 DEFAULT_QUANTIZER_CONFIG = QuantizerConfig(
-    num_bits=8, mode=QuantizationMode.SYMMETRIC, signedness_to_force=None, per_channel=False
+    num_bits=8,
+    mode=QuantizationMode.SYMMETRIC,
+    signedness_to_force=None,
+    per_channel=False,
+    narrow_range=False,
 )
 
 
@@ -242,12 +246,13 @@ def __init__(self):
         self._next_unified_scale_gid = 0
         self._next_shared_inputs_gid = 0
 
-    def add_independent_quantization_point(self, qp: QuantizationPointBase):
+    def add_independent_quantization_point(self, qp: QuantizationPointBase) -> int:
         if self.quantization_points.keys():
             new_id = max(self.quantization_points.keys()) + 1
         else:
             new_id = 0
         self.quantization_points[new_id] = qp
+        return new_id
 
     def register_unified_scale_group(self, qp_group: List[QuantizationPointId]) -> int:
         for qp_id in qp_group:

diff --git a/nncf/common/quantization/structs.py b/nncf/common/quantization/structs.py
@@ -18,6 +18,7 @@
 from nncf.common.graph import NNCFNodeName
 from nncf.common.utils.api_marker import api
 from nncf.config.schemata.defaults import QUANTIZATION_BITS
+from nncf.config.schemata.defaults import QUANTIZATION_NARROW_RANGE
 from nncf.config.schemata.defaults import QUANTIZATION_PER_CHANNEL
 from nncf.parameters import StrEnum
 from nncf.parameters import TargetDevice
@@ -48,6 +49,7 @@ def __init__(
         mode: QuantizationScheme = QuantizationScheme.SYMMETRIC,
         signedness_to_force: Optional[bool] = None,
         per_channel: bool = QUANTIZATION_PER_CHANNEL,
+        narrow_range: bool = QUANTIZATION_NARROW_RANGE,
     ):
         """
         :param num_bits: Bitwidth of the quantization.
@@ -61,18 +63,20 @@ def __init__(
         self.mode = mode
         self.signedness_to_force = signedness_to_force
         self.per_channel = per_channel
+        self.narrow_range = narrow_range
 
     def __eq__(self, other: object) -> bool:
         if not isinstance(other, QuantizerConfig):
             return False
         return self.__dict__ == other.__dict__
 
     def __str__(self) -> str:
-        return "B:{bits} M:{mode} SGN:{signedness} PC:{per_channel}".format(
+        return "B:{bits} M:{mode} SGN:{signedness} PC:{per_channel} NR:{narrow_range}".format(
             bits=self.num_bits,
             mode="S" if self.mode == QuantizationScheme.SYMMETRIC else "A",
             signedness="ANY" if self.signedness_to_force is None else ("S" if self.signedness_to_force else "U"),
             per_channel="Y" if self.per_channel else "N",
+            narrow_range="Y" if self.narrow_range else "N",
         )
 
     def __hash__(self) -> int:
@@ -93,26 +97,9 @@ def is_valid_requantization_for(self, other: "QuantizerConfig") -> bool:
             self.mode is QuantizationScheme.ASYMMETRIC and other.mode is QuantizationScheme.SYMMETRIC,
             self.signedness_to_force is None and other.signedness_to_force is not None,
             self.signedness_to_force is True and other.signedness_to_force is False,
+            self.narrow_range != other.narrow_range,
         ]
-        if any(fail_conditions):
-            return False
-        return True
-
-    def compatible_with_a_unified_scale_linked_qconfig(self, linked_qconfig: "QuantizerConfig") -> bool:
-        """
-        For two configs to be compatible in a unified scale scenario, all of their fundamental parameters
-        must be aligned.
-
-        :param linked_qconfig: A QuantizerConfig that is compared against the current config.
-        :return: A boolean value specifying whether `linked_qconfig` is compatible with the current config in terms
-            of scale unification.
-        """
-        return (
-            self.num_bits == linked_qconfig.num_bits
-            and self.mode == linked_qconfig.mode
-            and self.signedness_to_force == linked_qconfig.signedness_to_force
-            and self.per_channel == linked_qconfig.per_channel
-        )
+        return not any(fail_conditions)
 
     def is_a_bitwidth_variant(self, other_qconfig: "QuantizerConfig") -> bool:
         """
@@ -138,6 +125,7 @@ def get_state(self) -> Dict[str, Any]:
             "mode": self.mode,
             "signedness_to_force": self.signedness_to_force,
             "per_channel": self.per_channel,
+            "narrow_range": self.narrow_range,
         }
 
     @classmethod

diff --git a/nncf/config/schemata/defaults.py b/nncf/config/schemata/defaults.py
@@ -35,6 +35,7 @@
 QUANTIZATION_OVERFLOW_FIX = "enable"
 QUANTIZATION_BITS = 8
 QUANTIZATION_PER_CHANNEL = False
+QUANTIZATION_NARROW_RANGE = False
 QUANTIZATION_LOGARITHM_SCALE = False
 
 ACTIVATIONS_QUANT_START_EPOCH = 1