Skip to content

Commit 2179d33

Browse files
Disable the exllama on all non-cuda devices. (#2003)
* Disable the exllama on all non-cuda devices. 1. Disable the exllama on all non-cuda devices. 2. Don't raise the error when running on non-cuda device. Signed-off-by: yuanwu <yuan.wu@intel.com> * Refine the code Signed-off-by: yuanwu <yuan.wu@intel.com> * Fix errors of make style Signed-off-by: yuanwu <yuan.wu@intel.com> * Add hpu device Signed-off-by: yuanwu <yuan.wu@intel.com> * Update optimum/gptq/constants.py Co-authored-by: Ilyas Moutawwakil <57442720+IlyasMoutawwakil@users.noreply.github.com> * Update optimum/gptq/quantizer.py Co-authored-by: Ilyas Moutawwakil <57442720+IlyasMoutawwakil@users.noreply.github.com> * Update optimum/gptq/quantizer.py Co-authored-by: Ilyas Moutawwakil <57442720+IlyasMoutawwakil@users.noreply.github.com> * Update optimum/gptq/quantizer.py Co-authored-by: Ilyas Moutawwakil <57442720+IlyasMoutawwakil@users.noreply.github.com> * Fix error of make style Signed-off-by: yuanwu <yuan.wu@intel.com> --------- Signed-off-by: yuanwu <yuan.wu@intel.com> Co-authored-by: Ilyas Moutawwakil <57442720+IlyasMoutawwakil@users.noreply.github.com>
1 parent ca36fc4 commit 2179d33

File tree

1 file changed

+8
-7
lines changed

1 file changed

+8
-7
lines changed

optimum/gptq/quantizer.py

+8-7
Original file line numberDiff line numberDiff line change
@@ -546,7 +546,7 @@ def tmp(_, input, output):
546546

547547
if self.bits == 4:
548548
# device not on gpu
549-
if device == torch.device("cpu") or (has_device_map and any(d in devices for d in ["cpu", "disk"])):
549+
if device.type != "cuda" or (has_device_map and any(d in devices for d in ["cpu", "disk", "hpu"])):
550550
if not self.disable_exllama:
551551
logger.warning(
552552
"Found modules on cpu/disk. Using Exllama/Exllamav2 backend requires all the modules to be on GPU. Setting `disable_exllama=True`"
@@ -589,13 +589,14 @@ def post_init_model(self, model):
589589
The input model
590590
"""
591591
if self.bits == 4 and not self.disable_exllama:
592-
if get_device(model) == torch.device("cpu") or (
593-
hasattr(model, "hf_device_map") and any(d in model.hf_device_map for d in ["cpu", "disk"])
592+
if get_device(model).type != "cuda" or (
593+
hasattr(model, "hf_device_map") and any(d in model.hf_device_map for d in ["cpu", "disk", "hpu"])
594594
):
595-
raise ValueError(
596-
"Found modules on cpu/disk. Using Exllama or Exllamav2 backend requires all the modules to be on GPU."
597-
"You can deactivate exllama backend by setting `disable_exllama=True` in the quantization config object"
598-
)
595+
if not self.disable_exllama:
596+
logger.warning(
597+
"Found modules on cpu/disk. Using Exllama/Exllamav2 backend requires all the modules to be on GPU. Setting `disable_exllama=True`"
598+
)
599+
self.disable_exllama = True
599600

600601
class StoreAttr(object):
601602
pass

0 commit comments

Comments
 (0)