Fix: GPTQ fails with per-channel int4 compression. (#3285)

alexsu52 · web-flow · commit 9df265abcf24 · 2025-02-17T17:29:54.000+01:00
### Changes

Use `block_compression_config` as input for the scale estimation
algorithm.

### Reason for changes

GPTQ fails with per-channel int4 compression.

### Related tickets

ref: 159891

### Tests

test_call_gptq_with_dataset_scale_estimation_neg_group_size
diff --git a/nncf/quantization/algorithms/weight_compression/gptq.py b/nncf/quantization/algorithms/weight_compression/gptq.py
@@ -273,18 +273,17 @@ def _quantize_weights(
                                 wc_statistics,
                                 weight_tensor[:, (i1 + i) : (i1 + i + group_size)],
                                 reduction_axes,
-                                wc_params.compression_config,
+                                block_compression_config,
                             )
-                            scales.append(scale.squeeze(axis=1))
-                            zero_points.append(zero_point if zero_point is None else zero_point.squeeze(axis=1))
                         else:
                             scale, zero_point = calculate_integer_quantization_params(
                                 weight_tensor[:, (i1 + i) : (i1 + i + group_size)],
                                 reduction_axes,
                                 block_compression_config,
                             )
-                            scales.append(scale)
-                            zero_points.append(zero_point)
+                        scales.append(scale)
+                        zero_points.append(zero_point)
+
                 if block_compression_config.mode == CompressWeightsMode.NF4:
                     compressed_weights = do_nf4_quantization(
                         fns.unsqueeze(weight_col, 1), scales[-1], is_normalized_weight=False
diff --git a/tests/openvino/native/quantization/test_weights_compression.py b/tests/openvino/native/quantization/test_weights_compression.py
@@ -952,6 +952,14 @@ def test_call_gptq(mode):
     compress_weights(model, mode=mode, ratio=1.0, group_size=2, dataset=dataset, gptq=True)
 
 
+@pytest.mark.parametrize("mode", INT4_NF4_MODES)
+def test_call_gptq_with_dataset_scale_estimation_neg_group_size(mode):
+    model = AWQMatmulModel().ov_model
+    dataset = Dataset([np.ones([1, 8, 8])])
+
+    compress_weights(model, mode=mode, ratio=1.0, group_size=-1, dataset=dataset, gptq=True, scale_estimation=True)
+
+
 # TODO(andreyanufr) Waiting for the e2m1 in OV release
 @pytest.mark.xfail
 @pytest.mark.parametrize(