Skip to content

Commit 4b0bb3b

Browse files
fix saving issue for group_size=-1 (#2138)
Signed-off-by: xin3he <xin3.he@intel.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
1 parent 9e06f44 commit 4b0bb3b

File tree

1 file changed

+7
-3
lines changed
  • neural_compressor/transformers/quantization

1 file changed

+7
-3
lines changed

neural_compressor/transformers/quantization/utils.py

+7-3
Original file line numberDiff line numberDiff line change
@@ -660,9 +660,13 @@ def convert_to_GPTQ_checkpoints(model, quantization_config):
660660
new_module.n_pack = 32 // bits
661661
scales = module._op_context.get_scales().t().contiguous()
662662
bias = module._op_context.get_bias()
663-
qzeros = new_module.pack_tensor_with_numpy(
664-
module._op_context.get_zero_points().t().to(torch.uint8) - 1
665-
).contiguous()
663+
qzeros = module._op_context.get_zero_points().t().to(torch.uint8)
664+
# For group_size = -1, the dimensions of scale and qzeros will be 1
665+
if len(scales.shape) == 1:
666+
scales = scales.unsqueeze(0)
667+
if len(qzeros.shape) == 1:
668+
qzeros = qzeros.unsqueeze(0)
669+
qzeros = new_module.pack_tensor_with_numpy(qzeros - 1).contiguous()
666670
g_idx = module._op_context.get_g_idx()
667671

668672
new_module.qweight = qweight

0 commit comments

Comments
 (0)