[pre-commit.ci] auto fixes from pre-commit.com hooks

pre-commit-ci[bot] · pre-commit-ci[bot] · commit a3a061e22728 · 2024-06-25T09:03:24.000Z
for more information, see https://pre-commit.ci
diff --git a/neural_compressor/torch/algorithms/layer_wise/utils.py b/neural_compressor/torch/algorithms/layer_wise/utils.py
@@ -214,6 +214,7 @@ def _get_path(pretrained_model_name_or_path):
         path = dowload_hf_model(pretrained_model_name_or_path)
     return path
 
+
 get_path = _get_path
 
 
diff --git a/neural_compressor/torch/algorithms/weight_only/gptq.py b/neural_compressor/torch/algorithms/weight_only/gptq.py
@@ -240,8 +240,10 @@ def __init__(
         self.nsamples = nsamples
 
     def prepare_layer_wise(self, model_path):
-        from neural_compressor.torch.algorithms.layer_wise import LWQ_WORKSPACE, get_path, register_weight_hooks
         import os
+
+        from neural_compressor.torch.algorithms.layer_wise import LWQ_WORKSPACE, get_path, register_weight_hooks
+
         os.makedirs(LWQ_WORKSPACE, exist_ok=True)
         if model_path == "":
             model_path = self.model.path
@@ -250,7 +252,7 @@ def prepare_layer_wise(self, model_path):
         register_weight_hooks(
             self.model, self.model_path, device=self.device, clean_weight=True, saved_path=LWQ_WORKSPACE
         )
-    
+
     def get_full_layer_name(self, sub_layer_name, block_idx):
         transformer_name = self.gptq_related_blocks["transformers_name"]
         return ".".join([transformer_name, str(block_idx), sub_layer_name])
@@ -443,6 +445,7 @@ def execute_quantization(self, means=None, stds=None):
                 weight_config_this_layer = self.get_layer_config(full_layer_name)
                 if self.use_layer_wise:  # pragma: no cover
                     from neural_compressor.torch.algorithms.layer_wise import load_value
+
                     W = load_value(self.model, full_layer_name + ".weight", self.model_path)
                 else:
                     W = sub_layers[layer_name].weight.data.clone()
@@ -489,10 +492,10 @@ def tmp(_, inp, out):
                         else:
                             value = load_value(self.model, param_name, self.model_path)
                             set_module_tensor_to_device(self.model, param_name, self.device, value)
-                    
+
                 else:
                     W = sub_layers[layer_name].weight.data.clone()
-                    
+
                 accelerator.mark_step()
                 if "hpu" in self.device:
                     W = W.to("cpu")
@@ -504,7 +507,7 @@ def tmp(_, inp, out):
                     act_order=weight_config_this_layer["act_order"],
                     static_groups=weight_config_this_layer["static_groups"],
                 )
-                
+
                 # Step 2.5: export to compressed model
                 gptq_config[self.get_full_layer_name(layer_name, block_idx)] = {"scale": scale}
                 if not weight_config_this_layer["sym"]:
@@ -513,7 +516,7 @@ def tmp(_, inp, out):
                     gptq_config[self.get_full_layer_name(layer_name, block_idx)]["perm"] = gptq_for_this_block[
                         layer_name
                     ].perm
-                
+
                 weight_config_this_layer = self.get_layer_config(self.get_full_layer_name(layer_name, block_idx))
                 gptq_scale = gptq_config[self.get_full_layer_name(layer_name, block_idx)]["scale"]
                 if not weight_config_this_layer["sym"]:
@@ -564,7 +567,7 @@ def tmp(_, inp, out):
                     device=self.device,
                 )
                 new_module.pack(int_weight, gptq_scale, gptq_zp, sub_layers[layer_name].bias, gptq_perm)
-                    
+
                 if self.use_layer_wise:  # pragma: no cover
                     from neural_compressor.torch.algorithms.layer_wise import (
                         LWQ_WORKSPACE,
@@ -595,8 +598,7 @@ def tmp(_, inp, out):
                 self.gptq_related_blocks["transformers"][block_idx] = transformer_block
             else:
                 self.gptq_related_blocks["transformers"][block_idx] = transformer_block.cpu()
-            
-                
+
             del gptq_for_this_block
             torch.cuda.empty_cache()
             # iteratively replace the input with output, thus layerwise quantization can continue.
diff --git a/neural_compressor/torch/algorithms/weight_only/rtn.py b/neural_compressor/torch/algorithms/weight_only/rtn.py
@@ -19,10 +19,10 @@
 # limitations under the License.
 
 
+import gc
 from collections import OrderedDict
 
 import torch
-import gc
 
 from neural_compressor.torch.algorithms import Quantizer
 from neural_compressor.torch.utils import get_accelerator, is_transformers_imported, logger, set_module
@@ -157,18 +157,20 @@ def convert(
                 continue
             logger.debug(f"RTN quantized module:{name, m}")
             logger.debug(log_msg)
-            
+
             if use_layer_wise:
+                import os
+
                 from neural_compressor.common.utils import DEFAULT_WORKSPACE
                 from neural_compressor.torch.algorithms.layer_wise.utils import get_path, load_module, load_value
-                import os
+
                 lwq_workspace = os.path.join(DEFAULT_WORKSPACE, "lwq_tmpdir")
                 os.makedirs(lwq_workspace, exist_ok=True)
                 model_path = get_path(model_path)
-            
+
                 # load weight
                 load_module(model, name, model_path, device=device)
-           
+
             # for only group_dim is 0 or only `transformers.Conv1D`, we need transpose weight.
             if is_transformers_imported():
                 transpose = (group_dim == 0) ^ (isinstance(m, transformers.Conv1D))
@@ -230,7 +232,7 @@ def convert(
                 return new_module
             else:
                 set_module(model, name, new_module)
-            
+
         if use_layer_wise:
             # register hooks
             from neural_compressor.torch.algorithms.layer_wise.utils import register_weight_hooks
diff --git a/test/3x/torch/quantization/weight_only/test_gptq.py b/test/3x/torch/quantization/weight_only/test_gptq.py
@@ -178,20 +178,18 @@ def test_layer_wise(self):
         run_fn(model)
         model = convert(model)
         q_label = model(self.example_inputs)[0]
-    
+
         from neural_compressor.torch.algorithms.layer_wise import load_empty_model
+
         model = load_empty_model("hf-internal-testing/tiny-random-GPTJForCausalLM")
-    
-        quant_config = GPTQConfig(
-            use_layer_wise=True,
-            model_path="hf-internal-testing/tiny-random-GPTJForCausalLM"
-        )
+
+        quant_config = GPTQConfig(use_layer_wise=True, model_path="hf-internal-testing/tiny-random-GPTJForCausalLM")
         model = prepare(model, quant_config)
         run_fn(model)
         model = convert(model)
         out = model(self.example_inputs)[0]
         assert torch.equal(out, q_label), "use_layer_wise=True output should be same. Please double check."
-        
+
     @pytest.mark.parametrize("dtype", ["nf4", "int4"])
     @pytest.mark.parametrize("double_quant_bits", [6])
     @pytest.mark.parametrize("double_quant_group_size", [8, 256])
diff --git a/test/3x/torch/quantization/weight_only/test_rtn.py b/test/3x/torch/quantization/weight_only/test_rtn.py
@@ -141,6 +141,7 @@ def test_mse_search(self):
     def test_layer_wise(self):
         # model = copy.deepcopy(self.tiny_gptj)
         from neural_compressor.torch.algorithms.layer_wise import load_empty_model
+
         model = load_empty_model("hf-internal-testing/tiny-random-GPTJForCausalLM")
         quant_config = RTNConfig(
             use_layer_wise=True,
@@ -149,7 +150,7 @@ def test_layer_wise(self):
         model = prepare(model, quant_config)
         model = convert(model)
         out = model(self.example_inputs)[0]
-        assert torch.equal(out, self.q_label),  "use_layer_wise=True output should be same. Please double check."
+        assert torch.equal(out, self.q_label), "use_layer_wise=True output should be same. Please double check."
 
     @pytest.mark.parametrize(
         "dtype",