Skip to content

Commit a3a061e

Browse files
[pre-commit.ci] auto fixes from pre-commit.com hooks
for more information, see https://pre-commit.ci
1 parent b4e93f3 commit a3a061e

File tree

5 files changed

+27
-23
lines changed

5 files changed

+27
-23
lines changed

neural_compressor/torch/algorithms/layer_wise/utils.py

+1
Original file line numberDiff line numberDiff line change
@@ -214,6 +214,7 @@ def _get_path(pretrained_model_name_or_path):
214214
path = dowload_hf_model(pretrained_model_name_or_path)
215215
return path
216216

217+
217218
get_path = _get_path
218219

219220

neural_compressor/torch/algorithms/weight_only/gptq.py

+11-9
Original file line numberDiff line numberDiff line change
@@ -240,8 +240,10 @@ def __init__(
240240
self.nsamples = nsamples
241241

242242
def prepare_layer_wise(self, model_path):
243-
from neural_compressor.torch.algorithms.layer_wise import LWQ_WORKSPACE, get_path, register_weight_hooks
244243
import os
244+
245+
from neural_compressor.torch.algorithms.layer_wise import LWQ_WORKSPACE, get_path, register_weight_hooks
246+
245247
os.makedirs(LWQ_WORKSPACE, exist_ok=True)
246248
if model_path == "":
247249
model_path = self.model.path
@@ -250,7 +252,7 @@ def prepare_layer_wise(self, model_path):
250252
register_weight_hooks(
251253
self.model, self.model_path, device=self.device, clean_weight=True, saved_path=LWQ_WORKSPACE
252254
)
253-
255+
254256
def get_full_layer_name(self, sub_layer_name, block_idx):
255257
transformer_name = self.gptq_related_blocks["transformers_name"]
256258
return ".".join([transformer_name, str(block_idx), sub_layer_name])
@@ -443,6 +445,7 @@ def execute_quantization(self, means=None, stds=None):
443445
weight_config_this_layer = self.get_layer_config(full_layer_name)
444446
if self.use_layer_wise: # pragma: no cover
445447
from neural_compressor.torch.algorithms.layer_wise import load_value
448+
446449
W = load_value(self.model, full_layer_name + ".weight", self.model_path)
447450
else:
448451
W = sub_layers[layer_name].weight.data.clone()
@@ -489,10 +492,10 @@ def tmp(_, inp, out):
489492
else:
490493
value = load_value(self.model, param_name, self.model_path)
491494
set_module_tensor_to_device(self.model, param_name, self.device, value)
492-
495+
493496
else:
494497
W = sub_layers[layer_name].weight.data.clone()
495-
498+
496499
accelerator.mark_step()
497500
if "hpu" in self.device:
498501
W = W.to("cpu")
@@ -504,7 +507,7 @@ def tmp(_, inp, out):
504507
act_order=weight_config_this_layer["act_order"],
505508
static_groups=weight_config_this_layer["static_groups"],
506509
)
507-
510+
508511
# Step 2.5: export to compressed model
509512
gptq_config[self.get_full_layer_name(layer_name, block_idx)] = {"scale": scale}
510513
if not weight_config_this_layer["sym"]:
@@ -513,7 +516,7 @@ def tmp(_, inp, out):
513516
gptq_config[self.get_full_layer_name(layer_name, block_idx)]["perm"] = gptq_for_this_block[
514517
layer_name
515518
].perm
516-
519+
517520
weight_config_this_layer = self.get_layer_config(self.get_full_layer_name(layer_name, block_idx))
518521
gptq_scale = gptq_config[self.get_full_layer_name(layer_name, block_idx)]["scale"]
519522
if not weight_config_this_layer["sym"]:
@@ -564,7 +567,7 @@ def tmp(_, inp, out):
564567
device=self.device,
565568
)
566569
new_module.pack(int_weight, gptq_scale, gptq_zp, sub_layers[layer_name].bias, gptq_perm)
567-
570+
568571
if self.use_layer_wise: # pragma: no cover
569572
from neural_compressor.torch.algorithms.layer_wise import (
570573
LWQ_WORKSPACE,
@@ -595,8 +598,7 @@ def tmp(_, inp, out):
595598
self.gptq_related_blocks["transformers"][block_idx] = transformer_block
596599
else:
597600
self.gptq_related_blocks["transformers"][block_idx] = transformer_block.cpu()
598-
599-
601+
600602
del gptq_for_this_block
601603
torch.cuda.empty_cache()
602604
# iteratively replace the input with output, thus layerwise quantization can continue.

neural_compressor/torch/algorithms/weight_only/rtn.py

+8-6
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,10 @@
1919
# limitations under the License.
2020

2121

22+
import gc
2223
from collections import OrderedDict
2324

2425
import torch
25-
import gc
2626

2727
from neural_compressor.torch.algorithms import Quantizer
2828
from neural_compressor.torch.utils import get_accelerator, is_transformers_imported, logger, set_module
@@ -157,18 +157,20 @@ def convert(
157157
continue
158158
logger.debug(f"RTN quantized module:{name, m}")
159159
logger.debug(log_msg)
160-
160+
161161
if use_layer_wise:
162+
import os
163+
162164
from neural_compressor.common.utils import DEFAULT_WORKSPACE
163165
from neural_compressor.torch.algorithms.layer_wise.utils import get_path, load_module, load_value
164-
import os
166+
165167
lwq_workspace = os.path.join(DEFAULT_WORKSPACE, "lwq_tmpdir")
166168
os.makedirs(lwq_workspace, exist_ok=True)
167169
model_path = get_path(model_path)
168-
170+
169171
# load weight
170172
load_module(model, name, model_path, device=device)
171-
173+
172174
# for only group_dim is 0 or only `transformers.Conv1D`, we need transpose weight.
173175
if is_transformers_imported():
174176
transpose = (group_dim == 0) ^ (isinstance(m, transformers.Conv1D))
@@ -230,7 +232,7 @@ def convert(
230232
return new_module
231233
else:
232234
set_module(model, name, new_module)
233-
235+
234236
if use_layer_wise:
235237
# register hooks
236238
from neural_compressor.torch.algorithms.layer_wise.utils import register_weight_hooks

test/3x/torch/quantization/weight_only/test_gptq.py

+5-7
Original file line numberDiff line numberDiff line change
@@ -178,20 +178,18 @@ def test_layer_wise(self):
178178
run_fn(model)
179179
model = convert(model)
180180
q_label = model(self.example_inputs)[0]
181-
181+
182182
from neural_compressor.torch.algorithms.layer_wise import load_empty_model
183+
183184
model = load_empty_model("hf-internal-testing/tiny-random-GPTJForCausalLM")
184-
185-
quant_config = GPTQConfig(
186-
use_layer_wise=True,
187-
model_path="hf-internal-testing/tiny-random-GPTJForCausalLM"
188-
)
185+
186+
quant_config = GPTQConfig(use_layer_wise=True, model_path="hf-internal-testing/tiny-random-GPTJForCausalLM")
189187
model = prepare(model, quant_config)
190188
run_fn(model)
191189
model = convert(model)
192190
out = model(self.example_inputs)[0]
193191
assert torch.equal(out, q_label), "use_layer_wise=True output should be same. Please double check."
194-
192+
195193
@pytest.mark.parametrize("dtype", ["nf4", "int4"])
196194
@pytest.mark.parametrize("double_quant_bits", [6])
197195
@pytest.mark.parametrize("double_quant_group_size", [8, 256])

test/3x/torch/quantization/weight_only/test_rtn.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,7 @@ def test_mse_search(self):
141141
def test_layer_wise(self):
142142
# model = copy.deepcopy(self.tiny_gptj)
143143
from neural_compressor.torch.algorithms.layer_wise import load_empty_model
144+
144145
model = load_empty_model("hf-internal-testing/tiny-random-GPTJForCausalLM")
145146
quant_config = RTNConfig(
146147
use_layer_wise=True,
@@ -149,7 +150,7 @@ def test_layer_wise(self):
149150
model = prepare(model, quant_config)
150151
model = convert(model)
151152
out = model(self.example_inputs)[0]
152-
assert torch.equal(out, self.q_label), "use_layer_wise=True output should be same. Please double check."
153+
assert torch.equal(out, self.q_label), "use_layer_wise=True output should be same. Please double check."
153154

154155
@pytest.mark.parametrize(
155156
"dtype",

0 commit comments

Comments
 (0)