Skip to content

Commit 9df265a

Browse files
authored
Fix: GPTQ fails with per-channel int4 compression. (#3285)
### Changes Use `block_compression_config` as input for the scale estimation algorithm. ### Reason for changes GPTQ fails with per-channel int4 compression. ### Related tickets ref: 159891 ### Tests test_call_gptq_with_dataset_scale_estimation_neg_group_size
1 parent 045c5c1 commit 9df265a

File tree

2 files changed

+12
-5
lines changed

2 files changed

+12
-5
lines changed

nncf/quantization/algorithms/weight_compression/gptq.py

+4-5
Original file line numberDiff line numberDiff line change
@@ -273,18 +273,17 @@ def _quantize_weights(
273273
wc_statistics,
274274
weight_tensor[:, (i1 + i) : (i1 + i + group_size)],
275275
reduction_axes,
276-
wc_params.compression_config,
276+
block_compression_config,
277277
)
278-
scales.append(scale.squeeze(axis=1))
279-
zero_points.append(zero_point if zero_point is None else zero_point.squeeze(axis=1))
280278
else:
281279
scale, zero_point = calculate_integer_quantization_params(
282280
weight_tensor[:, (i1 + i) : (i1 + i + group_size)],
283281
reduction_axes,
284282
block_compression_config,
285283
)
286-
scales.append(scale)
287-
zero_points.append(zero_point)
284+
scales.append(scale)
285+
zero_points.append(zero_point)
286+
288287
if block_compression_config.mode == CompressWeightsMode.NF4:
289288
compressed_weights = do_nf4_quantization(
290289
fns.unsqueeze(weight_col, 1), scales[-1], is_normalized_weight=False

tests/openvino/native/quantization/test_weights_compression.py

+8
Original file line numberDiff line numberDiff line change
@@ -952,6 +952,14 @@ def test_call_gptq(mode):
952952
compress_weights(model, mode=mode, ratio=1.0, group_size=2, dataset=dataset, gptq=True)
953953

954954

955+
@pytest.mark.parametrize("mode", INT4_NF4_MODES)
956+
def test_call_gptq_with_dataset_scale_estimation_neg_group_size(mode):
957+
model = AWQMatmulModel().ov_model
958+
dataset = Dataset([np.ones([1, 8, 8])])
959+
960+
compress_weights(model, mode=mode, ratio=1.0, group_size=-1, dataset=dataset, gptq=True, scale_estimation=True)
961+
962+
955963
# TODO(andreyanufr) Waiting for the e2m1 in OV release
956964
@pytest.mark.xfail
957965
@pytest.mark.parametrize(

0 commit comments

Comments
 (0)