Skip to content

Commit 7b8e6fa

Browse files
[pre-commit.ci] auto fixes from pre-commit.com hooks
for more information, see https://pre-commit.ci
1 parent 6d4a097 commit 7b8e6fa

File tree

10 files changed

+63
-19
lines changed

10 files changed

+63
-19
lines changed

neural_compressor/evaluation/lm_eval/utils.py

+1
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222

2323
from neural_compressor.common import logger
2424

25+
2526
class LMEvalParser:
2627
def __init__(
2728
self,

neural_compressor/torch/algorithms/fp8_quant/_core/patching_common.py

+14
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,17 @@
1+
# Copyright (c) 2025 Intel Corporation
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
115
import importlib.util
216

317
from ..model_configs import ModuleInfo, ModuleType

neural_compressor/torch/algorithms/fp8_quant/_core/quantized_hpu_ops.py

+14
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,18 @@
11

2+
# Copyright (c) 2025 Intel Corporation
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
216
from .._quant_common.quant_config import ScaleFormat
317
from ..utils.logger import logger
418

neural_compressor/torch/algorithms/weight_only/save_load.py

+17-12
Original file line numberDiff line numberDiff line change
@@ -26,14 +26,14 @@
2626
from neural_compressor.torch.utils import (
2727
HPU_SAFE_WEIGHTS_NAME,
2828
HPU_WEIGHT_NAME,
29+
LM_HEAD_NAMES,
2930
QCONFIG_NAME,
3031
WEIGHT_NAME,
3132
SaveLoadFormat,
33+
get_accelerator,
34+
get_enum_from_format,
3235
logger,
3336
set_module,
34-
get_enum_from_format,
35-
LM_HEAD_NAMES,
36-
get_accelerator,
3737
)
3838

3939
from .modules import HPUWeightOnlyLinear, INCWeightOnlyLinear, MulLinear
@@ -964,8 +964,9 @@ def change_config_to_hf_format(config_mappings):
964964
"true_sequential": True,
965965
"model_name_or_path": None,
966966
"model_file_base_name": "model",
967-
"quant_method": "gptq" # INC is using AutoGPTQ format for RTN, GPTQ, AWQ, and TEQ
967+
"quant_method": "gptq", # INC is using AutoGPTQ format for RTN, GPTQ, AWQ, and TEQ
968968
}
969+
969970
def _is_lm_head(name):
970971
for lm_head_name in LM_HEAD_NAMES:
971972
if re.match(lm_head_name, name):
@@ -992,17 +993,21 @@ def _is_lm_head(name):
992993
else:
993994
assert bits == config.bits, "bits should be the same for all modules, got {bits} and {config.bits}."
994995
assert sym == config.use_sym, "sym should be the same for all modules, got {sym} and {config.use_sym}."
995-
assert group_size == config.group_size, \
996-
"group_size should be the same for all modules, got {group_size} and {config.group_size}."
996+
assert (
997+
group_size == config.group_size
998+
), "group_size should be the same for all modules, got {group_size} and {config.group_size}."
997999
if hasattr(config, "percdamp"):
998-
assert damp_percent == config.percdamp, \
999-
"percdamp should be the same for all modules, got {damp_percent} and {config.percdamp}."
1000+
assert (
1001+
damp_percent == config.percdamp
1002+
), "percdamp should be the same for all modules, got {damp_percent} and {config.percdamp}."
10001003
if hasattr(config, "act_order"):
1001-
assert desc_act == config.act_order, \
1002-
"act_order should be the same for all modules, got {desc_act} and {config.act_order}."
1004+
assert (
1005+
desc_act == config.act_order
1006+
), "act_order should be the same for all modules, got {desc_act} and {config.act_order}."
10031007
if hasattr(config, "true_sequential"):
1004-
assert true_sequential == config.true_sequential, \
1005-
"true_sequential should be the same for all modules, got {true_sequential} and {config.true_sequential}."
1008+
assert (
1009+
true_sequential == config.true_sequential
1010+
), "true_sequential should be the same for all modules, got {true_sequential} and {config.true_sequential}."
10061011
default_quantization_config["bits"] = bits
10071012
default_quantization_config["group_size"] = group_size
10081013
default_quantization_config["damp_percent"] = damp_percent

neural_compressor/torch/quantization/config.py

+1
Original file line numberDiff line numberDiff line change
@@ -1786,6 +1786,7 @@ def get_default_hqq_config() -> HQQConfig:
17861786
@register_config(framework_name=FRAMEWORK_NAME, algo_name=FP8_QUANT)
17871787
class FP8Config(TorchBaseConfig):
17881788
"""Config class for FP8 quantization."""
1789+
17891790
name = FP8_QUANT
17901791

17911792
def __init__(

neural_compressor/torch/quantization/save_load_entry.py

+1
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ def save(model, checkpoint_dir="saved_results", format="default"):
5050
# fp8_quant
5151
if isinstance(config_object, FP8Config):
5252
from neural_compressor.torch.algorithms import fp8_quant
53+
5354
if format == SaveLoadFormat.DEFAULT:
5455
format = SaveLoadFormat.HUGGINGFACE
5556
fp8_quant.save(model, checkpoint_dir, format)

neural_compressor/torch/utils/environ.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,8 @@
1515
"""Intel Neural Compressor PyTorch environment check."""
1616

1717
import importlib
18-
import sys
1918
import os
19+
import sys
2020

2121
import torch
2222
from packaging.version import Version

neural_compressor/torch/utils/llm_utility.py

+9-1
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,13 @@
1616

1717
def initialize_model_and_tokenizer(model_name_or_path, use_load=False, device="cpu"):
1818
import transformers
19-
from neural_compressor.torch.utils import local_rank, world_size, logger
19+
20+
from neural_compressor.torch.utils import local_rank, logger, world_size
21+
2022
tokenizer = transformers.AutoTokenizer.from_pretrained(model_name_or_path)
2123
if use_load:
2224
from neural_compressor.torch.quantization import load
25+
2326
model = load(model_name_or_path, format="huggingface", device=device)
2427
model, tokenizer = update_tokenizer(model, tokenizer)
2528
return model, tokenizer
@@ -37,6 +40,7 @@ def initialize_model_and_tokenizer(model_name_or_path, use_load=False, device="c
3740
"keep_module_on_host": True,
3841
}
3942
import deepspeed
43+
4044
ds_model = deepspeed.init_inference(model, **ds_inference_kwargs)
4145
model = ds_model.module
4246
model.eval()
@@ -95,10 +99,14 @@ def __getitem__(self, idx):
9599
dataloader = DataLoader(tokenized_dataset, batch_size=bs, shuffle=True)
96100
return dataloader
97101

102+
98103
def llm_benchmark(model, batch_size, input_length, warmup_iters=3, total_iters=20):
99104
import time
105+
100106
import torch
107+
101108
from neural_compressor.torch.utils import get_accelerator, logger
109+
102110
cur_accelerator = get_accelerator()
103111
# this is a simple example to show the performance benefit of quantization
104112
example_inputs = torch.ones((batch_size, input_length), dtype=torch.long)

test/3x/torch/algorithms/fp8_quant/tester.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@
4949
QUANT_MODES_QUANT_ONLY = [QuantMode.QUANTIZE]
5050

5151
DTYPE_TO_HPDTYPE_STR = {
52-
torch.bfloat16: "BF16",
52+
torch.bfloat16: "BF16",
5353
torch.float16: "FP16",
5454
torch.float32: "FP32",
5555
}

test/3x/torch/algorithms/fp8_quant/unit_tests/test_save_load.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -32,18 +32,18 @@ def compare_parameters_buffers(model1, model2):
3232
unique_keys_in_dict2 = keys2 - keys1
3333
unique_keys = unique_keys_in_dict1.union(unique_keys_in_dict2)
3434
assert len(dict1) == len(dict2), f"The number of parameters and buffers are different, {unique_keys}.\n" + \
35-
f"unique_keys_in_model1: {unique_keys_in_dict1}\nunique_keys_in_model2: {unique_keys_in_dict2}\n"
35+
f"unique_keys_in_model1: {unique_keys_in_dict1}\nunique_keys_in_model2: {unique_keys_in_dict2}\n"
3636
for k, v in dict1.items():
3737
assert k in dict2, "k not in dict2"
3838
assert v.dtype == dict2[k].dtype, f"dtype of {k} is differnt.\n{v.dtype}\n{dict2[k].dtype}"
3939
assert torch.allclose(v, dict2[k]), f"{k} is differnt in model1 and model2.\n" + f"{v}\n" + f"{dict2[k]}\n"
4040

4141

4242
@pytest.mark.parametrize("scale_method", [
43-
"unit_scale", "hw_aligned_single_scale", "maxabs_hw", "maxabs_pow2",
44-
"maxabs_arbitrary", "maxabs_hw_opt_weight", "maxabs_pow2_opt_weight",
43+
"unit_scale", "hw_aligned_single_scale", "maxabs_hw", "maxabs_pow2",
44+
"maxabs_arbitrary", "maxabs_hw_opt_weight", "maxabs_pow2_opt_weight",
4545
# per-channel
46-
"act_maxabs_hw_weights_pcs_maxabs_pow2", "act_maxabs_hw_weights_pcs_opt_pow2",
46+
"act_maxabs_hw_weights_pcs_maxabs_pow2", "act_maxabs_hw_weights_pcs_opt_pow2",
4747
"act_maxabs_pow2_weights_pcs_maxabs_pow2", "act_maxabs_pow2_weights_pcs_opt_pow2",
4848
])
4949
@pytest.mark.parametrize("scale_format", ["const", "scalar"])

0 commit comments

Comments
 (0)