@@ -332,20 +332,23 @@ def quantize_model(self, model: nn.Module, tokenizer: Optional[Any] = None):
332
332
use_cache = model .config .use_cache
333
333
model .config .use_cache = False
334
334
335
+ # If the model has a device_map, we don't move to model. We have already dispatched the hook that will do the work
335
336
if hasattr (model , "hf_device_map" ):
336
337
devices = list (model .hf_device_map .values ())
338
+ has_device_map = True
337
339
if "disk" in devices :
338
340
raise ValueError ("disk offload is not supported with GPTQ quantization" )
339
- if "cpu" in devices and len (model .hf_device_map ) > 1 :
340
- logger .info ("Cpu offload is not recommended. There might be some issues with the memory" )
341
- hook = None
342
- for name , device in model .hf_device_map .items ():
343
- if device == "cpu" :
344
- module = recurse_getattr (model , name )
345
- remove_hook_from_module (module , recurse = True )
346
- module , hook = cpu_offload_with_hook (module , prev_module_hook = hook )
347
- # If the model has a device_map, we don't move to model. We have already dispatched the hook that will do the work
348
- has_device_map = True
341
+ if "cpu" in devices or torch .device ("cpu" ) in devices :
342
+ if len (model .hf_device_map ) > 1 :
343
+ logger .info ("Cpu offload is not recommended. There might be some issues with the memory" )
344
+ hook = None
345
+ for name , device in model .hf_device_map .items ():
346
+ if device == "cpu" :
347
+ module = recurse_getattr (model , name )
348
+ remove_hook_from_module (module , recurse = True )
349
+ module , hook = cpu_offload_with_hook (module , prev_module_hook = hook )
350
+ else :
351
+ has_device_map = False
349
352
350
353
if hasattr (model , "dtype" ):
351
354
self .use_cuda_fp16 = model .dtype == torch .float16
0 commit comments