File tree 1 file changed +7
-3
lines changed
neural_compressor/transformers/quantization
1 file changed +7
-3
lines changed Original file line number Diff line number Diff line change @@ -660,9 +660,13 @@ def convert_to_GPTQ_checkpoints(model, quantization_config):
660
660
new_module .n_pack = 32 // bits
661
661
scales = module ._op_context .get_scales ().t ().contiguous ()
662
662
bias = module ._op_context .get_bias ()
663
- qzeros = new_module .pack_tensor_with_numpy (
664
- module ._op_context .get_zero_points ().t ().to (torch .uint8 ) - 1
665
- ).contiguous ()
663
+ qzeros = module ._op_context .get_zero_points ().t ().to (torch .uint8 )
664
+ # For group_size = -1, the dimensions of scale and qzeros will be 1
665
+ if len (scales .shape ) == 1 :
666
+ scales = scales .unsqueeze (0 )
667
+ if len (qzeros .shape ) == 1 :
668
+ qzeros = qzeros .unsqueeze (0 )
669
+ qzeros = new_module .pack_tensor_with_numpy (qzeros - 1 ).contiguous ()
666
670
g_idx = module ._op_context .get_g_idx ()
667
671
668
672
new_module .qweight = qweight
You can’t perform that action at this time.
0 commit comments