clean gptq

Signed-off-by: Kaihui-intel <kaihui.tang@intel.com>
intel · Kaihui-intel · Jul 16, 2024 · Jun 25, 2024 · Jun 25, 2024 · Jun 25, 2024
commit b4e93f3625d240ff93e7e143c5739d2ed9c88d08
diff --git a/neural_compressor/torch/algorithms/weight_only/gptq.py b/neural_compressor/torch/algorithms/weight_only/gptq.py
@@ -573,18 +573,6 @@ def tmp(_, inp, out):
                         set_module_tensor_to_device,
                     )
 
-                    # sub_layer = sub_layers[layer_name]
-                    # full_layer_name = self.get_full_layer_name(layer_name, block_idx)
-                    # for n, p in sub_layer.named_parameters():
-                    #     param_name = full_layer_name + "." + n
-                    #     # breakpoint()
-                    #     if n == "weight":
-                    #         set_module_tensor_to_device(self.model, param_name, self.device, Q)
-                    #     else:
-                    #         value = load_value(self.model, param_name, model_path)
-                    #         set_module_tensor_to_device(self.model, param_name, self.device, value)
-                    # sub_layer.weight.data = Q
-                    # torch.save(sub_layer.state_dict(), LWQ_WORKSPACE + f"/{full_layer_name}.pt")
                     torch.save(new_module.state_dict(), LWQ_WORKSPACE + f"/{full_layer_name}.pt")
                     clean_module_weight(new_module)
                     del Q

diff --git a/test/3x/torch/quantization/weight_only/test_gptq.py b/test/3x/torch/quantization/weight_only/test_gptq.py
@@ -28,10 +28,8 @@ def run_fn(model):
     # GPTQ uses ValueError to reduce computation when collecting input data of the first block
     # It's special for UTs, no need to add this wrapper in examples.
     with pytest.raises(ValueError):
-    #     model(torch.tensor([[10, 20, 30]], dtype=torch.long).to(device))
-    #     model(torch.tensor([[40, 50, 60]], dtype=torch.long).to(device))
-        model(torch.tensor([[10, 20, 30]], dtype=torch.long))
-        model(torch.tensor([[40, 50, 60]], dtype=torch.long))
+        model(torch.tensor([[10, 20, 30]], dtype=torch.long).to(device))
+        model(torch.tensor([[40, 50, 60]], dtype=torch.long).to(device))
 
 
 class TestGPTQQuant:
@@ -182,14 +180,15 @@ def test_layer_wise(self):
         q_label = model(self.example_inputs)[0]
 
         from neural_compressor.torch.algorithms.layer_wise import load_empty_model
-        model = load_empty_model("hf-internal-testing/tiny-random-GPTJForCausalLM",  torchscript=True)
-
+        model = load_empty_model("hf-internal-testing/tiny-random-GPTJForCausalLM")
 
         quant_config = GPTQConfig(
             use_layer_wise=True,
             model_path="hf-internal-testing/tiny-random-GPTJForCausalLM"
         )
-        model = quantize(model, quant_config, run_fn=run_fn)
+        model = prepare(model, quant_config)
+        run_fn(model)
+        model = convert(model)
         out = model(self.example_inputs)[0]
         assert torch.equal(out, q_label), "use_layer_wise=True output should be same. Please double check."