Skip to content

Commit c543783

Browse files
author
sdp
committed
fix master conflict
Signed-off-by: sdp <sdp@9049fa09fd7b.jf.intel.com>
1 parent 483c219 commit c543783

File tree

4 files changed

+15
-15
lines changed

4 files changed

+15
-15
lines changed

neural_compressor/torch/algorithms/weight_only/gptq.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -1045,7 +1045,8 @@ def convert(self, model, *args, **kwargs):
10451045
self.gptq_quantizer.remove_prepare_for_calibration()
10461046

10471047
q_model, gptq_config = self.gptq_quantizer.execute_quantization()
1048-
q_model = q_model.to(self.model_device)
1048+
if not self.gptq_quantizer.use_layer_wise:
1049+
q_model = q_model.to(self.model_device)
10491050
q_model.gptq_config = gptq_config
10501051
logger.info("GPTQ quantizing done.")
10511052
return q_model

neural_compressor/torch/algorithms/weight_only/rtn.py

+7-7
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,8 @@ def convert(
146146
if dtype == "fp32":
147147
continue
148148
# Move modules to the accelerator device layer-by-layer
149-
m.to(device)
149+
if not use_layer_wise:
150+
m.to(device)
150151
### FP8 cast part
151152
if dtype in ["fp8_e5m2", "fp8_e5m2fnuz", "fp8_e4m3fn", "fp8_e4m3fnuz"]:
152153
logger.debug("Cast module {} to FP8 using qdq mode, no scaling".format(name))
@@ -200,7 +201,6 @@ def convert(
200201
weight = m.weight.detach()
201202
if use_mse_search:
202203
quantile = search_clip(m, bits, group_size, scheme, dtype, use_full_range)
203-
start_quant = time.time()
204204
int_weight, scale, zp = quant_tensor(
205205
weight,
206206
dtype=dtype,
@@ -212,8 +212,6 @@ def convert(
212212
full_range=use_full_range,
213213
**double_quant_config,
214214
)
215-
quant_int_time = time.time() - start_quant
216-
total_quant_int_time += quant_int_time
217215
int_weight = int_weight.t_().contiguous() if transpose else int_weight
218216
scale = scale.t_().contiguous() if transpose else scale
219217
zp = zp.t_().contiguous() if transpose and zp is not None else zp
@@ -248,7 +246,9 @@ def convert(
248246
else:
249247
set_module(model, name, new_module)
250248
# Move modules back to the model device layer-by-layer
251-
m.to(model_device)
252-
new_module.to(model_device)
253-
model.to(model_device)
249+
if not use_layer_wise:
250+
m.to(model_device)
251+
new_module.to(model_device)
252+
if not use_layer_wise:
253+
model.to(model_device)
254254
return model

neural_compressor/torch/quantization/config.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -200,7 +200,7 @@ def to_config_mapping(
200200
self, config_list: List[BaseConfig] = None, model_info: List[Tuple[str, str]] = None
201201
) -> OrderedDictType[Union[str, str], OrderedDictType[str, BaseConfig]]:
202202
if not self.quant_lm_head:
203-
self.set_local(LM_HEAD_NAMES, RTNConfig(dtype="fp32"))
203+
self.set_local(LM_HEAD_NAMES, RTNConfig(dtype="fp32", use_layer_wise=self.use_layer_wise, model_path=self.model_path))
204204
config_mapping = super().to_config_mapping(config_list, model_info)
205205
return config_mapping
206206

@@ -363,7 +363,7 @@ def to_config_mapping(
363363
self, config_list: List[BaseConfig] = None, model_info: List[Tuple[str, str]] = None
364364
) -> OrderedDictType[Union[str, str], OrderedDictType[str, BaseConfig]]:
365365
if not self.quant_lm_head:
366-
self.set_local(LM_HEAD_NAMES, GPTQConfig(dtype="fp32"))
366+
self.set_local(LM_HEAD_NAMES, GPTQConfig(dtype="fp32", use_layer_wise=self.use_layer_wise, model_path=self.model_path))
367367
config_mapping = super().to_config_mapping(config_list, model_info)
368368
return config_mapping
369369

@@ -385,7 +385,7 @@ def get_config_set_for_tuning(cls) -> Union[None, "GPTQConfig", List["GPTQConfig
385385
@classmethod
386386
def get_predefined_configs(cls) -> Dict[torch_utils.ProcessorType, "GPTQConfig"]:
387387
pre_defined_configs: Dict[torch_utils.ProcessorType, GPTQConfig] = {}
388-
pre_defined_configs[torch_utils.ProcessorType.Client] = cls(use_layer_wise=True)
388+
pre_defined_configs[torch_utils.ProcessorType.Client] = cls(use_layer_wise=True)#, model_path=self.model_path)
389389
pre_defined_configs[torch_utils.ProcessorType.Server] = cls()
390390
return pre_defined_configs
391391

@@ -508,7 +508,7 @@ def to_config_mapping(
508508
self, config_list: List[BaseConfig] = None, model_info: List[Tuple[str, str]] = None
509509
) -> OrderedDictType[Union[str, str], OrderedDictType[str, BaseConfig]]:
510510
if not self.quant_lm_head:
511-
self.set_local(LM_HEAD_NAMES, AWQConfig(dtype="fp32"))
511+
self.set_local(LM_HEAD_NAMES, AWQConfig(dtype="fp32", use_layer_wise=self.use_layer_wise, model_path=self.model_path))
512512
config_mapping = super().to_config_mapping(config_list, model_info)
513513
return config_mapping
514514

@@ -815,7 +815,7 @@ def get_config_set_for_tuning(cls) -> Union[None, "AutoRoundConfig", List["AutoR
815815
@classmethod
816816
def get_predefined_configs(cls) -> Dict[torch_utils.ProcessorType, "AutoRoundConfig"]:
817817
pre_defined_configs: Dict[torch_utils.ProcessorType, AutoRoundConfig] = {}
818-
pre_defined_configs[torch_utils.ProcessorType.Client] = cls(use_layer_wise=True)
818+
pre_defined_configs[torch_utils.ProcessorType.Client] = cls(use_layer_wise=True, model_path=self.model_path)
819819
pre_defined_configs[torch_utils.ProcessorType.Server] = cls()
820820
return pre_defined_configs
821821

test/3x/torch/quantization/weight_only/test_rtn.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ def setup_class(self):
4444
self.label = self.tiny_gptj(self.example_inputs)[0]
4545
# test_default_config
4646
model = copy.deepcopy(self.tiny_gptj)
47-
quant_config = get_default_rtn_config()
47+
quant_config = get_default_rtn_config("Server")
4848
model = prepare(model, quant_config)
4949
model = convert(model)
5050
# record q_label for comparison
@@ -172,7 +172,6 @@ def test_layer_wise(self):
172172
model = load_empty_model("hf-internal-testing/tiny-random-GPTJForCausalLM")
173173
quant_config = RTNConfig(
174174
use_layer_wise=True,
175-
model_path="hf-internal-testing/tiny-random-GPTJForCausalLM",
176175
)
177176
model = prepare(model, quant_config)
178177
model = convert(model)

0 commit comments

Comments
 (0)