Skip to content

Commit 15c39f1

Browse files
committed
fix
Signed-off-by: n1ck-guo <heng.guo@intel.com>
1 parent 63bea6b commit 15c39f1

File tree

2 files changed

+7
-5
lines changed

2 files changed

+7
-5
lines changed

neural_compressor/adaptor/torch_utils/gptq.py

+5-3
Original file line numberDiff line numberDiff line change
@@ -718,10 +718,10 @@ def tmp(_, inp, out):
718718
for n, p in sub_layer.named_parameters():
719719
param_name = full_layer_name + "." + n
720720
if n == "weight":
721-
set_module_tensor_to_device(self.model, param_name, self.device, Q)
721+
set_module_tensor_to_device(self.model, param_name, self.device, Q, dtype=Q.dtype)
722722
else:
723723
value = load_value(self.model, param_name, model_path)
724-
set_module_tensor_to_device(self.model, param_name, self.device, value)
724+
set_module_tensor_to_device(self.model, param_name, self.device, value, dtype=value.dtype)
725725
# sub_layer.weight.data = Q
726726
torch.save(sub_layer.state_dict(), LWQ_WORKSPACE + f"/{full_layer_name}.pt")
727727
clean_module_weight(sub_layer)
@@ -745,7 +745,8 @@ def tmp(_, inp, out):
745745
for j in range(len(self.dataloader)):
746746
cache_keyword_batch = self.gather_single_batch_from_dict(self.cache_key_arguments, j)
747747
cache_positional_batch = self.gather_single_batch_from_list(self.cache_positional_arguments, j)
748-
transformer_block = transformer_block.to(cache_positional_batch[0].dtype)
748+
# breakpoint()
749+
# transformer_block = transformer_block.to(getattr(torch, self.model.config.torch_dtype))
749750
out = transformer_block(*cache_positional_batch, **cache_keyword_batch)
750751
out = self.track_hidden_states(out)
751752
outs.append(out)
@@ -968,6 +969,7 @@ def fasterquant(self, W, blocksize=128, percdamp=0.01, groupsize=-1, act_order=F
968969
if not static_groups:
969970
if (i1 + i) % groupsize == 0:
970971
self.quantizer.find_params(W[:, (i1 + i) : (i1 + i + groupsize)], weight=True)
972+
scale.append(self.quantizer.scale)
971973
zero.append(self.quantizer.zero)
972974
else:
973975
idx = i1 + i

neural_compressor/adaptor/torch_utils/layer_wise_quant/utils.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -221,7 +221,7 @@ def load_module(model, module_name, path, device="cpu"):
221221
for n, p in module.named_parameters():
222222
param_name = module_name + "." + n
223223
value = load_value(model, param_name, path)
224-
set_module_tensor_to_device(model, param_name, device, value)
224+
set_module_tensor_to_device(model, param_name, device, value, dtype=value.dtype)
225225

226226

227227
def register_weight_hooks(model, path, device="cpu", clean_weight=True, saved_path=None):
@@ -239,7 +239,7 @@ def hook(module, input):
239239
value = state_dict[n]
240240
else:
241241
value = load_value(model, param_name, path)
242-
set_module_tensor_to_device(model, param_name, device, value)
242+
set_module_tensor_to_device(model, param_name, device, value, dtype=value.dtype)
243243

244244
return hook
245245

0 commit comments

Comments
 (0)