WOQ models export workaround for autoround different device (#1710)

changwangss · pre-commit-ci[bot] · web-flow · commit 7ee72159e2d1 · 2024-04-02T21:43:16.000+08:00
Co-authored-by: pre-commit-ci[bot] &lt;66853113+pre-commit-ci[bot]@users.noreply.github.com&gt;
diff --git a/neural_compressor/model/torch_model.py b/neural_compressor/model/torch_model.py
@@ -499,7 +499,7 @@ def export_compressed_model(
 
         autoround_config = self.autoround_config if hasattr(self, "autoround_config") else {}
 
-        if gptq_config:
+        if gptq_config or (autoround_config and device == "xpu"):
             for k, v in weight_config.items():
                 logger.debug(f"Compressing {k} on device {device}")
                 if v["dtype"] == "fp32":
@@ -558,7 +558,7 @@ def export_compressed_model(
                 )
                 new_module.pack(int_weight, gptq_scale, gptq_zp, m.bias, gptq_perm)
                 set_module(self.model, k, new_module)
-        elif autoround_config:
+        elif autoround_config and (device == "cpu" or device == "auto"):
             from auto_round.export.export_to_itrex.export import pack_model  # pylint: disable=E0401
 
             self.model = pack_model(