Skip to content

Commit eeff312

Browse files
[HWConfig] narrow_range parameter is introduced in hardware config (#3196)
### Changes * Narrow range parameter is moved to hardware config * Embedding / embedding bag nodes propagates the narrow_range attribute of the activation quantizer up to the weight quantizer ### Reason for changes * To extend the set of possible quantization configuration (like weights + symmetric + narrow_range=False) required for the some runtimes (ex: XNNPACK) ### Related tickets ### Tests * tests/cross_fw/test_templates/test_calculate_quantizer_parameters.py is updated to check all possible combination of existing qconfigs with narrow_range True/False * tests/common/quantization/test_quantizer_propagation_solver.py is updated to check requantization rule with narrow_range and the rule of configuration merging with different narrow_range parameters * tests/common/quantization/test_quantizer_propagation_graph.py is updated to check subsequent quantizers with different narrow_range values do not merge * reference quantizers scales are updated to reflect the fix of embedding weights quantization post_training_quantization/586/ - Passed job/weekly/job/ubuntu20_eval/245/ (+ job/ubuntu20_eval/246/) Passed eval_tf/461/ - Passed torch_weekly/100/ - Passed job/nightly/job/torch_nightly/444/ - Passed TriggerBetta/969/ - Passed
1 parent 89aba29 commit eeff312

File tree

79 files changed

+5334
-4976
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

79 files changed

+5334
-4976
lines changed

nncf/common/hardware/config.py

+7-22
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@
2020
import nncf
2121
from nncf.common.graph.operator_metatypes import OperatorMetatype
2222
from nncf.common.logging import nncf_logger
23-
from nncf.common.quantization import quantizers as quant
2423
from nncf.common.quantization.structs import QuantizationScheme as QuantizationMode
2524
from nncf.common.quantization.structs import QuantizerConfig
2625
from nncf.common.utils.helpers import product_dict
@@ -158,28 +157,14 @@ def get_qconf_from_hw_config_subdict(quantization_subdict: Dict[str, Any]) -> Qu
158157
bits = quantization_subdict["bits"]
159158
mode = HWConfig.get_quantization_mode_from_config_value(quantization_subdict["mode"])
160159
is_per_channel = HWConfig.get_is_per_channel_from_config_value(quantization_subdict["granularity"])
161-
signedness_to_force = None
162-
if "level_low" in quantization_subdict and "level_high" in quantization_subdict:
163-
signedness_to_force = False
164-
if mode == QuantizationMode.SYMMETRIC:
165-
if quantization_subdict["level_low"] < 0 < quantization_subdict["level_high"]:
166-
signedness_to_force = True
167-
true_level_low, true_level_high = quant.calculate_symmetric_level_ranges(bits, signed=True)
168-
else:
169-
signedness_to_force = True
170-
true_level_low, true_level_high = quant.calculate_asymmetric_level_ranges(bits)
171-
172-
assert (
173-
quantization_subdict["level_low"] == true_level_low
174-
), "Invalid value of quantizer parameter `level_low`.\
175-
The parameter must be consistent with other parameters!"
176-
assert (
177-
quantization_subdict["level_high"] == true_level_high
178-
), "Invalid value of quantizer parameter `level_high`.\
179-
The parameter must be consistent with other parameters!"
180-
160+
signedness_to_force = quantization_subdict.get("signedness_to_force")
161+
narrow_range = quantization_subdict["narrow_range"]
181162
return QuantizerConfig(
182-
num_bits=bits, mode=mode, per_channel=is_per_channel, signedness_to_force=signedness_to_force
163+
num_bits=bits,
164+
mode=mode,
165+
per_channel=is_per_channel,
166+
signedness_to_force=signedness_to_force,
167+
narrow_range=narrow_range,
183168
)
184169

185170
@staticmethod

nncf/common/hardware/configs/cpu.json

+19-8
Original file line numberDiff line numberDiff line change
@@ -7,15 +7,17 @@
77
"mode": [
88
"symmetric"
99
],
10-
"granularity": "pertensor"
10+
"granularity": "pertensor",
11+
"narrow_range": false
1112
},
1213
"q8_a": {
1314
"bits": 8,
1415
"mode": [
1516
"symmetric",
1617
"asymmetric"
1718
],
18-
"granularity": "pertensor"
19+
"granularity": "pertensor",
20+
"narrow_range": false
1921
},
2022
"q8_a_ch": {
2123
"bits": 8,
@@ -26,19 +28,28 @@
2628
"granularity": [
2729
"perchannel",
2830
"pertensor"
29-
]
31+
],
32+
"narrow_range": false
3033
},
3134
"q8_w_sym": {
3235
"bits": 8,
3336
"mode": "symmetric",
34-
"level_low": -128,
35-
"level_high": 127,
36-
"granularity": ["perchannel", "pertensor"]
37+
"signedness_to_force": true,
38+
"granularity": ["perchannel", "pertensor"],
39+
"narrow_range": true
40+
},
41+
"q8_w_sym_any_nr": {
42+
"bits": 8,
43+
"mode": "symmetric",
44+
"signedness_to_force": true,
45+
"granularity": ["perchannel", "pertensor"],
46+
"narrow_range": [true, false]
3747
},
3848
"q8_w_asym": {
3949
"bits": 8,
4050
"mode": "asymmetric",
41-
"granularity": ["perchannel", "pertensor"]
51+
"granularity": ["perchannel", "pertensor"],
52+
"narrow_range": false
4253
}
4354
}
4455
},
@@ -288,7 +299,7 @@
288299
{
289300
"type": "Embedding",
290301
"quantization": {
291-
"weights": ["q8_w_sym", "q8_w_asym"]
302+
"weights": ["q8_w_sym_any_nr", "q8_w_asym"]
292303
}
293304
},
294305
{"type": "EmbeddingBag"}

nncf/common/hardware/configs/gpu.json

+9-6
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,8 @@
88
"symmetric",
99
"asymmetric"
1010
],
11-
"granularity": "pertensor"
11+
"granularity": "pertensor",
12+
"narrow_range": false
1213
},
1314
"q8_a_ch": {
1415
"bits": 8,
@@ -19,25 +20,27 @@
1920
"granularity": [
2021
"perchannel",
2122
"pertensor"
22-
]
23+
],
24+
"narrow_range": false
2325
},
2426
"q8_w_sym": {
2527
"bits": 8,
2628
"mode": "symmetric",
27-
"level_low": -128,
28-
"level_high": 127,
29+
"signedness_to_force": true,
2930
"granularity": [
3031
"perchannel",
3132
"pertensor"
32-
]
33+
],
34+
"narrow_range": true
3335
},
3436
"q8_w_asym": {
3537
"bits": 8,
3638
"mode": "asymmetric",
3739
"granularity": [
3840
"perchannel",
3941
"pertensor"
40-
]
42+
],
43+
"narrow_range": false
4144
}
4245
}
4346
},

nncf/common/hardware/configs/npu.json

+30-14
Original file line numberDiff line numberDiff line change
@@ -7,15 +7,17 @@
77
"mode": [
88
"symmetric"
99
],
10-
"granularity": "pertensor"
10+
"granularity": "pertensor",
11+
"narrow_range": false
1112
},
1213
"q8_a": {
1314
"bits": 8,
1415
"mode": [
1516
"symmetric",
1617
"asymmetric"
1718
],
18-
"granularity": "pertensor"
19+
"granularity": "pertensor",
20+
"narrow_range": false
1921
},
2022
"q8_a_ch": {
2123
"bits": 8,
@@ -26,52 +28,66 @@
2628
"granularity": [
2729
"perchannel",
2830
"pertensor"
29-
]
31+
],
32+
"narrow_range": false
3033
},
3134
"q8_w_sym": {
3235
"bits": 8,
3336
"mode": "symmetric",
34-
"level_low": -128,
35-
"level_high": 127,
36-
"granularity": ["perchannel", "pertensor"]
37+
"signedness_to_force": true,
38+
"granularity": ["perchannel", "pertensor"],
39+
"narrow_range": true
40+
},
41+
"q8_w_sym_any_nr": {
42+
"bits": 8,
43+
"mode": "symmetric",
44+
"signedness_to_force": true,
45+
"granularity": ["perchannel", "pertensor"],
46+
"narrow_range": [true, false]
3747
},
3848
"q8_w_asym": {
3949
"bits": 8,
4050
"mode": "asymmetric",
41-
"granularity": ["perchannel", "pertensor"]
51+
"granularity": ["perchannel", "pertensor"],
52+
"narrow_range": false
4253
},
4354
// 4-bit configs
4455
"q4_tn": {
4556
"bits": 4,
4657
"mode": "symmetric",
47-
"granularity": "pertensor"
58+
"granularity": "pertensor",
59+
"narrow_range": false
4860
},
4961
"q4_ch": {
5062
"bits": 4,
5163
"mode": "symmetric",
52-
"granularity": "perchannel"
64+
"granularity": "perchannel",
65+
"narrow_range": false
5366
},
5467
"q4_w": {
5568
"bits": 4,
5669
"mode": "symmetric",
5770
"granularity": [
5871
"perchannel",
5972
"pertensor"
60-
]
73+
],
74+
"narrow_range": false
6175
},
6276
// 2-bit configs
6377
"q2_ch": {
6478
"bits": 2,
6579
"mode": "symmetric",
66-
"granularity": "perchannel"
80+
"granularity": "perchannel",
81+
"narrow_range": false
6782
},
6883
"q2_w": {
6984
"bits": 2,
7085
"mode": "symmetric",
7186
"granularity": [
7287
"perchannel",
7388
"pertensor"
74-
]
89+
],
90+
"narrow_range": false
7591
}
7692
}
7793
},
@@ -382,15 +398,15 @@
382398
"type": "Embedding",
383399
"quantization": {
384400
"weights": [
385-
"q8_w_sym", "q8_w_asym"
401+
"q8_w_sym_any_nr", "q8_w_asym"
386402
]
387403
}
388404
},
389405
{
390406
"type": "EmbeddingBag",
391407
"quantization": {
392408
"weights": [
393-
"q8_w_sym", "q8_w_asym"
409+
"q8_w_sym_any_nr", "q8_w_asym"
394410
]
395411
}
396412
},

nncf/common/hardware/configs/template.json

+10-4
Original file line numberDiff line numberDiff line change
@@ -16,14 +16,20 @@
1616
"granularity": [
1717
"pertensor",
1818
"perchannel"
19-
]
19+
],
20+
/*
21+
* Narrow range: should NNCF use 2**num_bits quants or 2**num_bits - 1
22+
*/
23+
"narrow_range": false
2024
},
21-
"q8_sym_tnr_-128_127": { // Alias name for set of hyperparameters
25+
"q8_sym_tnr_signed": { // Alias name for set of hyperparameters
2226
"bits": 8, // Number of quantization bits
2327
"mode": "symmetric", // Quantization mode
2428
"granularity": "pertensor", // Granularity: one scale for output tensor
25-
"level_low": -128, // Low quantization level
26-
"level_high": 127 // High quantization level
29+
"signedness_to_force": true, // signedness_to_force: True if the quantizer must be signed, False if must be unsigned,
30+
// None if the signed/unsigned attribute should be determined based on the incoming activation
31+
// statistics during range initialization.
32+
"narrow_range": true
2733
}
2834
}
2935
},

nncf/common/quantization/config_assignment.py

+2
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,8 @@ def get_scoped_quantizer_config(
4747
qconfig.per_channel = config_overrides["per_channel"]
4848
if config_overrides.get("signed") is not None:
4949
qconfig.signedness_to_force = config_overrides["signed"]
50+
if config_overrides.get("narrow_range") is not None:
51+
qconfig.narrow_range = config_overrides["narrow_range"]
5052
return qconfig
5153

5254

nncf/common/quantization/quantizer_propagation/graph.py

+4
Original file line numberDiff line numberDiff line change
@@ -1170,6 +1170,10 @@ def is_downstream_quantizer_redundant(
11701170
(ds_config.per_channel == us_config.per_channel)
11711171
or (ds_config.per_channel is True and us_config.per_channel is False)
11721172
)
1173+
1174+
# Strictly prohibit merging of config with different narrow_range params
1175+
is_redundant = is_redundant and (ds_config.narrow_range == us_config.narrow_range)
1176+
11731177
return is_redundant
11741178

11751179
def merge_traverse_fn(

nncf/common/quantization/quantizer_propagation/solver.py

+12-3
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@
5151
from nncf.common.utils.debug import DEBUG_LOG_DIR
5252
from nncf.common.utils.debug import is_debug
5353
from nncf.common.utils.dot_file_rw import write_dot_graph
54+
from nncf.config.schemata.defaults import QUANTIZATION_NARROW_RANGE
5455

5556

5657
class TransitionStatus(Enum):
@@ -160,6 +161,7 @@ def is_final_qconfig_compatible_to_initial(initial_qconfig: QuantizerConfig) ->
160161
final_qconfig.per_channel == initial_qconfig.per_channel
161162
and final_qconfig.mode == initial_qconfig.mode
162163
and final_qconfig.num_bits == initial_qconfig.num_bits
164+
and final_qconfig.narrow_range == initial_qconfig.narrow_range
163165
and (
164166
final_qconfig.signedness_to_force == initial_qconfig.signedness_to_force
165167
or initial_qconfig.signedness_to_force is None
@@ -301,7 +303,13 @@ class QuantizerPropagationSolver:
301303
"""
302304

303305
DEFAULT_QUANTIZATION_TYPES = [
304-
QuantizerConfig(num_bits=8, mode=QuantizationMode.SYMMETRIC, signedness_to_force=None, per_channel=False)
306+
QuantizerConfig(
307+
num_bits=8,
308+
mode=QuantizationMode.SYMMETRIC,
309+
signedness_to_force=None,
310+
per_channel=False,
311+
narrow_range=QUANTIZATION_NARROW_RANGE,
312+
)
305313
]
306314

307315
DEFAULT_PROPAGATION_STRATEGY = QuantizerPropagationRule.MERGE_ALL_IN_ONE
@@ -1373,7 +1381,7 @@ def get_merged_qconfigs_for_downward_branching_case(
13731381
Returns a tuple, of which the first node is the qconfig list for the quantizer to be placed
13741382
above the branching node (i.e. that will affect all of the downward branches), and a list
13751383
of nodes which are either None (which means that the corresponding branch quantizer has been successfully
1376-
merged, or qconfigs list to be set for the corresponding branch quantizer if it cannot be merged (e.g. if
1384+
merged), or qconfigs list to be set for the corresponding branch quantizer if it cannot be merged (e.g. if
13771385
requantization to a lower bitwidth has to be done for this branch)
13781386
13791387
:param potential_qconfigs_for_each_branch: For each branch defines the list of available configurations
@@ -1495,7 +1503,8 @@ def __disambiguate_config_list(
14951503
"""
14961504
The input list should be sorted in descending order of priority. In case some qconfigs in the list have the
14971505
same priority, this function will resolve the ambiguity in ordering these qconfigs in the final returned
1498-
list.
1506+
list. Quantization configs could not contain different narrow range parameters, so it does
1507+
not participate in __lt__ method of the QConfigComparator.
14991508
"""
15001509

15011510
class QConfigComparator:

nncf/common/quantization/quantizer_setup.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -24,11 +24,16 @@
2424
from nncf.common.quantization.structs import UnifiedScaleType
2525
from nncf.common.quantization.structs import WeightQuantizerId
2626
from nncf.common.stateful_classes_registry import CommonStatefulClassesRegistry
27+
from nncf.config.schemata.defaults import QUANTIZATION_NARROW_RANGE
2728

2829
QuantizationPointId = int
2930

3031
DEFAULT_QUANTIZER_CONFIG = QuantizerConfig(
31-
num_bits=8, mode=QuantizationMode.SYMMETRIC, signedness_to_force=None, per_channel=False
32+
num_bits=8,
33+
mode=QuantizationMode.SYMMETRIC,
34+
signedness_to_force=None,
35+
per_channel=False,
36+
narrow_range=QUANTIZATION_NARROW_RANGE,
3237
)
3338

3439

0 commit comments

Comments
 (0)