Adapt autoround v0.4 (#2073)

Kaihui-intel · web-flow · commit 25d1af81dcde · 2024-11-25T16:37:14.000+08:00
Signed-off-by: Kaihui-intel &lt;kaihui.tang@intel.com&gt;
diff --git a/neural_compressor/adaptor/pytorch.py b/neural_compressor/adaptor/pytorch.py
@@ -4926,7 +4926,7 @@ def autoround_quantize(self, model, tune_cfg, dataloader):
         act_group_size = self.recipes["autoround_args"].get("act_group_size", None)
         act_sym = self.recipes["autoround_args"].get("act_sym", None)
         act_dynamic = self.recipes["autoround_args"].get("act_dynamic", True)
-        quant_block_list = self.recipes["autoround_args"].get("quant_block_list", None)
+        to_quant_block_names = self.recipes["autoround_args"].get("to_quant_block_names", None)
         use_layer_wise = self.recipes["autoround_args"].get("use_layer_wise", False)
 
         if dataloader is not None:
@@ -4959,7 +4959,7 @@ def autoround_quantize(self, model, tune_cfg, dataloader):
             dynamic_max_gap=dynamic_max_gap,
             data_type=data_type,
             scale_dtype=scale_dtype,
-            quant_block_list=quant_block_list,
+            to_quant_block_names=to_quant_block_names,
             act_bits=act_bits,
             act_group_size=act_group_size,
             act_sym=act_sym,
diff --git a/neural_compressor/adaptor/torch_utils/weight_only.py b/neural_compressor/adaptor/torch_utils/weight_only.py
@@ -706,7 +706,7 @@ def autoround_quantize(
     dynamic_max_gap: int = -1,
     data_type: str = "int",  ##only support int for now
     scale_dtype: str = "fp16",
-    quant_block_list: list = None,
+    to_quant_block_names: list = None,
     act_bits: int = 32,
     act_group_size: int = None,
     act_sym: bool = None,
@@ -761,7 +761,7 @@ def autoround_quantize(
         data_type (str): The data type to be used (default is "int").
         scale_dtype (str): The data type of quantization scale to be used (default is "float32"), different kernels
                            have different choices.
-        quant_block_list (list): A list whose elements are list of block's layer names to be quantized.
+        to_quant_block_names (list): A list whose elements are list of block's layer names to be quantized.
         act_bits (int): Number of bits for activation quantization. Default is 32.
         act_group_size (int): Group size for activation quantization. Default is None.
         act_sym (bool): Whether to use symmetric activation quantization. Default is None.
@@ -800,7 +800,7 @@ def autoround_quantize(
         dynamic_max_gap=dynamic_max_gap,
         data_type=data_type,  ## only support data_type
         scale_dtype=scale_dtype,
-        quant_block_list=quant_block_list,
+        to_quant_block_names=to_quant_block_names,
         act_bits=act_bits,
         act_group_size=act_group_size,
         act_sym=act_sym,