Skip to content

Commit dd94ac5

Browse files
Function naming refactor
1 parent fb70c82 commit dd94ac5

File tree

10 files changed

+89
-85
lines changed

10 files changed

+89
-85
lines changed

nncf/openvino/optimized_functions/__init__.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,10 @@
1010
# limitations under the License.
1111

1212
from nncf.openvino.optimized_functions.functions import astype as astype
13-
from nncf.openvino.optimized_functions.functions import do_int_quantization as do_int_quantization
13+
from nncf.openvino.optimized_functions.functions import do_integer_quantization as do_integer_quantization
1414
from nncf.openvino.optimized_functions.functions import get_integer_quantization_error as get_integer_quantization_error
15-
from nncf.openvino.optimized_functions.functions import quantize_dequantize_weight as quantize_dequantize_weight
15+
from nncf.openvino.optimized_functions.functions import (
16+
integer_quantize_dequantize_weight as integer_quantize_dequantize_weight,
17+
)
1618
from nncf.openvino.optimized_functions.models import OVModelParameters as OVModelParameters
1719
from nncf.openvino.optimized_functions.models import clear_ov_model_cache as clear_ov_model_cache

nncf/openvino/optimized_functions/functions.py

+8-8
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,9 @@
1515
from nncf.openvino.optimized_functions.models import OV_MODEL_CACHE
1616
from nncf.openvino.optimized_functions.models import OVModelParameters
1717
from nncf.openvino.optimized_functions.models import get_astype_model
18-
from nncf.openvino.optimized_functions.models import get_compress_decompress_weight_model
19-
from nncf.openvino.optimized_functions.models import get_compress_weight_model
20-
from nncf.openvino.optimized_functions.models import get_quantization_error_model
18+
from nncf.openvino.optimized_functions.models import get_integer_quantization_error_model
19+
from nncf.openvino.optimized_functions.models import get_integer_quantization_model
20+
from nncf.openvino.optimized_functions.models import get_integer_quantize_dequantize_weight_model
2121
from nncf.quantization.algorithms.weight_compression.config import WeightCompressionConfig
2222
from nncf.quantization.algorithms.weight_compression.weight_lowering import reshape_weight_for_grouped_quantization
2323
from nncf.tensor import Tensor
@@ -27,7 +27,7 @@
2727
ReductionAxes = Union[int, Tuple[int, ...]]
2828

2929

30-
def do_int_quantization(
30+
def do_integer_quantization(
3131
weight: Tensor,
3232
config: WeightCompressionConfig,
3333
reduction_axes: Optional[ReductionAxes] = None,
@@ -63,7 +63,7 @@ def do_int_quantization(
6363
{"compressed_weight": compressed_weight_dtype, "zero_point": compressed_weight_dtype}
6464
)
6565

66-
model = get_compress_weight_model(
66+
model = get_integer_quantization_model(
6767
ov_model_params,
6868
config,
6969
weight_shape,
@@ -97,7 +97,7 @@ def do_int_quantization(
9797
return compressed_weight, scale, zero_point
9898

9999

100-
def quantize_dequantize_weight(
100+
def integer_quantize_dequantize_weight(
101101
weight: Tensor,
102102
config: WeightCompressionConfig,
103103
reduction_axes: Optional[ReductionAxes] = None,
@@ -135,7 +135,7 @@ def quantize_dequantize_weight(
135135
if precomputed_zero_point is not None:
136136
ov_model_params.input_dtypes["zero_point"] = precomputed_zero_point.dtype
137137

138-
model = get_compress_decompress_weight_model(
138+
model = get_integer_quantize_dequantize_weight_model(
139139
ov_model_params, config, weight_shape, scale_shape, zero_point_shape, reduction_axes, return_compressed_weight
140140
)
141141

@@ -188,7 +188,7 @@ def get_integer_quantization_error(
188188

189189
ov_model_params = OVModelParameters()
190190
ov_model_params.input_dtypes["weight"] = weight.dtype
191-
model = get_quantization_error_model(
191+
model = get_integer_quantization_error_model(
192192
ov_model_params, config, original_weight_shape, weight.shape, original_reduction_axes, reduction_axes
193193
)
194194

nncf/openvino/optimized_functions/models.py

+15-15
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,7 @@ def _infer_ov_model(
168168
return outputs
169169

170170

171-
def _prepare_compression_model_inputs(
171+
def _prepare_quantization_model_inputs(
172172
ov_model_params,
173173
weight_shape: Tuple,
174174
scale_shape: Optional[Tuple],
@@ -196,7 +196,7 @@ def _prepare_compression_model_inputs(
196196
return weight_shape, scale_shape, zero_point_shape
197197

198198

199-
def get_compress_weight_model(
199+
def get_integer_quantization_model(
200200
ov_model_params: OVModelParameters,
201201
config: WeightCompressionConfig,
202202
weight_shape: Tuple,
@@ -219,11 +219,11 @@ def get_compress_weight_model(
219219
:return: A model callable that compresses weights using the given configuration. Or a model as nodes, if
220220
`return_nodes` is True.
221221
"""
222-
weight_shape, scale_shape, zero_point_shape = _prepare_compression_model_inputs(
222+
weight_shape, scale_shape, zero_point_shape = _prepare_quantization_model_inputs(
223223
ov_model_params, weight_shape, scale_shape, zero_point_shape, reduction_axes
224224
)
225225

226-
return _build_compress_model(
226+
return _build_integer_quantization_model(
227227
config,
228228
ov_model_params,
229229
weight_shape,
@@ -233,7 +233,7 @@ def get_compress_weight_model(
233233
)
234234

235235

236-
def get_compress_decompress_weight_model(
236+
def get_integer_quantize_dequantize_weight_model(
237237
ov_model_params: OVModelParameters,
238238
config: WeightCompressionConfig,
239239
weight_shape: Tuple,
@@ -259,11 +259,11 @@ def get_compress_decompress_weight_model(
259259
:return: A model callable that returns a decompressed weight, and optionally compressed weight, scale,
260260
(and zero point) if `return_compressed_weight` is True.
261261
"""
262-
weight_shape, scale_shape, zero_point_shape = _prepare_compression_model_inputs(
262+
weight_shape, scale_shape, zero_point_shape = _prepare_quantization_model_inputs(
263263
ov_model_params, weight_shape, scale_shape, zero_point_shape, reduction_axes
264264
)
265265

266-
return _build_compress_decompress_model(
266+
return _build_integer_quantize_dequantize_weight_model(
267267
config,
268268
ov_model_params,
269269
weight_shape,
@@ -274,7 +274,7 @@ def get_compress_decompress_weight_model(
274274
)
275275

276276

277-
def get_quantization_error_model(
277+
def get_integer_quantization_error_model(
278278
ov_model_params: OVModelParameters,
279279
config: WeightCompressionConfig,
280280
original_weight_shape: Tuple,
@@ -296,15 +296,15 @@ def get_quantization_error_model(
296296
:param reduction_axes: Axes to reduce the weight tensor.
297297
:return: A model callable that returns the quantization error.
298298
"""
299-
weight_shape, _, _ = _prepare_compression_model_inputs(ov_model_params, weight_shape, None, None, reduction_axes)
299+
weight_shape, _, _ = _prepare_quantization_model_inputs(ov_model_params, weight_shape, None, None, reduction_axes)
300300

301-
return _build_quantization_error_model(
301+
return _build_integer_quantization_error_model(
302302
config, ov_model_params, original_weight_shape, weight_shape, original_reduction_axes, reduction_axes
303303
)
304304

305305

306306
@cache_results(OV_MODEL_CACHE)
307-
def _build_compress_model(
307+
def _build_integer_quantization_model(
308308
config: WeightCompressionConfig,
309309
ov_model_params: OVModelParameters,
310310
weight_shape: Tuple,
@@ -454,7 +454,7 @@ def _build_compress_model(
454454

455455

456456
@cache_results(OV_MODEL_CACHE)
457-
def _build_compress_decompress_model(
457+
def _build_integer_quantize_dequantize_weight_model(
458458
config: WeightCompressionConfig,
459459
ov_model_params: OVModelParameters,
460460
weight_shape: Tuple,
@@ -477,7 +477,7 @@ def _build_compress_decompress_model(
477477
raise ValueError(msg)
478478

479479
# Get compression model as input/result nodes and potentially modified ov model parameters
480-
ov_parameters, ov_results, ov_model_params = _build_compress_model(
480+
ov_parameters, ov_results, ov_model_params = _build_integer_quantization_model(
481481
config, ov_model_params, weight_shape, scale_shape, zero_point_shape, reduction_axes, return_nodes=True
482482
)
483483

@@ -514,15 +514,15 @@ def _build_compress_decompress_model(
514514

515515

516516
@cache_results(OV_MODEL_CACHE)
517-
def _build_quantization_error_model(
517+
def _build_integer_quantization_error_model(
518518
config: WeightCompressionConfig,
519519
ov_model_params: OVModelParameters,
520520
original_weight_shape: Tuple,
521521
weight_shape: Tuple,
522522
original_reduction_axes: ReductionAxes,
523523
reduction_axes: ReductionAxes,
524524
) -> ModelCallable:
525-
ov_parameters, ov_results, ov_model_params = _build_compress_decompress_model(
525+
ov_parameters, ov_results, ov_model_params = _build_integer_quantize_dequantize_weight_model(
526526
config,
527527
ov_model_params,
528528
weight_shape,

nncf/quantization/algorithms/weight_compression/awq.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -30,10 +30,10 @@
3030
from nncf.quantization.algorithms.weight_compression.activation_stats import process_stats
3131
from nncf.quantization.algorithms.weight_compression.backend import WeightCompressionAlgoBackend
3232
from nncf.quantization.algorithms.weight_compression.config import WeightCompressionParameters
33+
from nncf.quantization.algorithms.weight_compression.weight_lowering import calculate_nf4_quantized_weight
3334
from nncf.quantization.algorithms.weight_compression.weight_lowering import calculate_nf4_scale
34-
from nncf.quantization.algorithms.weight_compression.weight_lowering import do_nf4_dequantization
35-
from nncf.quantization.algorithms.weight_compression.weight_lowering import do_nf4_quantization
36-
from nncf.quantization.algorithms.weight_compression.weight_lowering import quantize_dequantize_weight
35+
from nncf.quantization.algorithms.weight_compression.weight_lowering import do_float_dequantization
36+
from nncf.quantization.algorithms.weight_compression.weight_lowering import integer_quantize_dequantize_weight
3737
from nncf.quantization.passes import transform_to_inference_graph
3838
from nncf.tensor import TensorDataType
3939
from nncf.tensor import functions as fns
@@ -256,10 +256,10 @@ def apply(
256256
weights_to_fake_quantize = gweight * cur_scale
257257
if config.mode == CompressWeightsMode.NF4:
258258
g_c_scale = calculate_nf4_scale(weights_to_fake_quantize, reduction_axis)
259-
g_compressed_weighs = do_nf4_quantization(weights_to_fake_quantize, g_c_scale)
260-
g_decompressed_weighs = do_nf4_dequantization(g_compressed_weighs, g_c_scale)
259+
g_compressed_weighs = calculate_nf4_quantized_weight(weights_to_fake_quantize, g_c_scale)
260+
g_decompressed_weighs = do_float_dequantization(g_compressed_weighs, g_c_scale)
261261
else:
262-
g_decompressed_weighs = quantize_dequantize_weight(
262+
g_decompressed_weighs = integer_quantize_dequantize_weight(
263263
weights_to_fake_quantize, awq_config, reduction_axis
264264
)
265265
sacts = gacts / fns.unsqueeze(cur_scale, 1)

nncf/quantization/algorithms/weight_compression/gptq.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -27,10 +27,10 @@
2727
from nncf.quantization.algorithms.weight_compression.config import WeightCompressionParameters
2828
from nncf.quantization.algorithms.weight_compression.scale_estimation import ScaleEstimation
2929
from nncf.quantization.algorithms.weight_compression.weight_lowering import calculate_integer_quantization_params
30+
from nncf.quantization.algorithms.weight_compression.weight_lowering import calculate_nf4_quantized_weight
3031
from nncf.quantization.algorithms.weight_compression.weight_lowering import calculate_nf4_scale
31-
from nncf.quantization.algorithms.weight_compression.weight_lowering import do_nf4_dequantization
32-
from nncf.quantization.algorithms.weight_compression.weight_lowering import do_nf4_quantization
33-
from nncf.quantization.algorithms.weight_compression.weight_lowering import quantize_dequantize_weight
32+
from nncf.quantization.algorithms.weight_compression.weight_lowering import do_float_dequantization
33+
from nncf.quantization.algorithms.weight_compression.weight_lowering import integer_quantize_dequantize_weight
3434
from nncf.tensor import Tensor
3535
from nncf.tensor import functions as fns
3636
from nncf.tensor.definitions import TensorDataType
@@ -284,12 +284,12 @@ def _quantize_weights(
284284
zero_points.append(zero_point)
285285

286286
if block_compression_config.mode == CompressWeightsMode.NF4:
287-
compressed_weights = do_nf4_quantization(
287+
compressed_weights = calculate_nf4_quantized_weight(
288288
fns.unsqueeze(weight_col, 1), scales[-1], is_normalized_weight=False
289289
)
290-
quantized_col = do_nf4_dequantization(compressed_weights, scales[-1], reduction_axis=-1)
290+
quantized_col = do_float_dequantization(compressed_weights, scales[-1], reduction_axis=-1)
291291
else:
292-
quantized_col, compressed_weights, _, _ = quantize_dequantize_weight(
292+
quantized_col, compressed_weights, _, _ = integer_quantize_dequantize_weight(
293293
fns.unsqueeze(weight_col, 1),
294294
block_compression_config,
295295
reduction_axes=None,

nncf/quantization/algorithms/weight_compression/lora_correction.py

+8-6
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,9 @@
2525
from nncf.quantization.algorithms.weight_compression.config import WeightCompressionConfig
2626
from nncf.quantization.algorithms.weight_compression.config import WeightCompressionParameters
2727
from nncf.quantization.algorithms.weight_compression.weight_lowering import CompressedWeight
28-
from nncf.quantization.algorithms.weight_compression.weight_lowering import do_int_dequantization
29-
from nncf.quantization.algorithms.weight_compression.weight_lowering import do_nf4_dequantization
30-
from nncf.quantization.algorithms.weight_compression.weight_lowering import do_nf4_quantization
28+
from nncf.quantization.algorithms.weight_compression.weight_lowering import calculate_nf4_quantized_weight
29+
from nncf.quantization.algorithms.weight_compression.weight_lowering import do_float_dequantization
30+
from nncf.quantization.algorithms.weight_compression.weight_lowering import do_integer_dequantization
3131
from nncf.tensor import Tensor
3232
from nncf.tensor import functions as fns
3333
from nncf.tensor.definitions import TensorDataType
@@ -170,15 +170,17 @@ def calculate_low_rank_matrices(
170170
assert len(reduction_axes) == 1, "Assumed a single reduction axis"
171171
reduction_axis = reduction_axes[0] if compression_config.group_size != -1 else -1
172172
if mode in (CompressWeightsMode.INT4_SYM, CompressWeightsMode.INT4_ASYM):
173-
fq_weights = do_int_dequantization(
173+
fq_weights = do_integer_dequantization(
174174
compressed_weight.tensor,
175175
compressed_weight.scale,
176176
compressed_weight.zero_point,
177177
reduction_axis,
178178
)
179179
elif mode == CompressWeightsMode.NF4:
180-
indexes = do_nf4_quantization(compressed_weight.tensor, compressed_weight.scale, is_normalized_weight=True)
181-
fq_weights = do_nf4_dequantization(indexes, compressed_weight.scale, reduction_axis)
180+
indexes = calculate_nf4_quantized_weight(
181+
compressed_weight.tensor, compressed_weight.scale, is_normalized_weight=True
182+
)
183+
fq_weights = do_float_dequantization(indexes, compressed_weight.scale, reduction_axis)
182184
else:
183185
msg = (
184186
f"{mode.value} mode is invalid for Lora Correction algorithm. Supported modes: INT4_SYM, INT4_ASYM, NF4"

nncf/quantization/algorithms/weight_compression/mixed_precision.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
from nncf.quantization.algorithms.weight_compression.config import WeightCompressionConfig
3030
from nncf.quantization.algorithms.weight_compression.config import WeightCompressionParameters
3131
from nncf.quantization.algorithms.weight_compression.weight_lowering import get_integer_quantization_error
32-
from nncf.quantization.algorithms.weight_compression.weight_lowering import quantize_dequantize_weight
32+
from nncf.quantization.algorithms.weight_compression.weight_lowering import integer_quantize_dequantize_weight
3333
from nncf.tensor import Tensor
3434
from nncf.tensor import functions as fns
3535
from nncf.tensor.definitions import TensorDataType
@@ -353,7 +353,7 @@ def _calc_weight_sensitivity(
353353
if weight.dtype != TensorDataType.float32:
354354
weight = weight.astype(TensorDataType.float32)
355355

356-
decompressed_weight = quantize_dequantize_weight(weight, backup_config, reduction_axes)
356+
decompressed_weight = integer_quantize_dequantize_weight(weight, backup_config, reduction_axes)
357357
decompressed_weight = decompressed_weight.reshape(orig_shape)
358358
return fns.linalg.norm(decompressed_weight - weight, ord="fro").item()
359359

0 commit comments

Comments
 (0)