Skip to content

Commit 9ec53ce

Browse files
committed
Fixed pre-CI error
Signed-off-by: Cheng, Penghui <penghui.cheng@intel.com>
1 parent 3d28d4a commit 9ec53ce

File tree

4 files changed

+45
-52
lines changed

4 files changed

+45
-52
lines changed

.github/workflows/test_inc.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ jobs:
3232
python -m pip install --upgrade pip
3333
pip install cmake>=3.16
3434
pip install py-cpuinfo
35-
pip install torch==2.1.0+cpu --extra-index-url https://download.pytorch.org/whl/cpu
35+
pip install torch==2.1.0 torchaudio==2.1.0 torchvision==0.16 --extra-index-url https://download.pytorch.org/whl/cpu
3636
pip install .[neural-compressor,diffusers,tests]
3737
pip install intel-extension-for-pytorch==2.1.100
3838
- name: Test with Pytest

optimum/intel/neural_compressor/quantization.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -287,17 +287,17 @@ def quantize(
287287

288288
if not isinstance(quantization_config, PostTrainingQuantConfig):
289289
if use_cpu:
290-
# will remove after intel-extension-for-transformers 1.3.3 released
290+
# will remove after intel-extension-for-transformers 1.3.3 release.
291291
quantization_config.device = "cpu"
292292
quantization_config.post_init()
293293
elif use_xpu:
294-
# will remove after intel-extension-for-transformers 1.3.3 released
294+
# will remove after intel-extension-for-transformers 1.3.3 release.
295295
quantization_config.device = "xpu"
296296
quantization_config.post_init_xpu()
297297
self._quantized_model = convert_to_quantized_model(
298298
self._original_model, quantization_config, device=quantization_config.device
299299
)
300-
# will remove after intel-extension-for-transformers 1.3.3 released
300+
# will remove after intel-extension-for-transformers 1.3.3 release.
301301
if hasattr(quantization_config, "calib_dataloader"):
302302
quantization_config.calib_dataloader = None
303303
self._quantized_model.quantization_config = quantization_config

setup.py

+8-4
Original file line numberDiff line numberDiff line change
@@ -49,9 +49,6 @@
4949
"rjieba",
5050
"timm",
5151
"invisible-watermark>=0.2.0",
52-
# Will remove after intel-extension-for-transformers 1.3.3 released.
53-
"intel-extension-for-transformers>=1.3",
54-
"peft",
5552
"auto-gptq",
5653
"transformers_stream_generator",
5754
"einops",
@@ -60,7 +57,14 @@
6057
QUALITY_REQUIRE = ["black~=23.1", "ruff>=0.0.241"]
6158

6259
EXTRAS_REQUIRE = {
63-
"neural-compressor": ["neural-compressor>=2.2.0", "onnxruntime<1.15.0", "accelerate"],
60+
"neural-compressor": [
61+
"neural-compressor>=2.2.0",
62+
"onnxruntime<1.15.0",
63+
"accelerate",
64+
# will remove after intel-extension-for-transformers 1.3.3 release.
65+
"intel-extension-for-transformers>=1.3",
66+
"peft",
67+
],
6468
"openvino": ["openvino>=2023.3", "nncf>=2.8.1"],
6569
"openvino-tokenizers": ["openvino-tokenizers[transformers]"],
6670
"nncf": ["nncf>=2.8.1"],

tests/neural_compressor/test_optimization.py

+33-44
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,13 @@ class OptimizationTest(INCTestMixin):
8888
"hf-internal-testing/tiny-random-GPTNeoForCausalLM",
8989
)
9090

91+
WEIGHT_ONLY_CONFIG = (
92+
(False, "RTN", "int4_clip"),
93+
(False, "GPTQ", "int4_clip"),
94+
(False, "RTN", "int8"),
95+
(True, "", ""),
96+
)
97+
9198
@parameterized.expand(SUPPORTED_ARCHITECTURES_DYNAMIC)
9299
def test_dynamic_quantization(self, task, model_name, expected_quantized_matmuls):
93100
quantization_config = PostTrainingQuantConfig(approach="dynamic")
@@ -202,59 +209,41 @@ def test_ipex_static_quantization_with_smoothquant(self, task, model_name, expec
202209
load_ipex_model=True,
203210
)
204211

212+
@parameterized.expand(WEIGHT_ONLY_CONFIG)
205213
@unittest.skipIf(
206214
not is_intel_extension_for_transformers_available(), reason="Intel-extension-for-transformers not available!"
207215
)
208-
def test_weight_only_quantization(self):
216+
def test_weight_only_quantization(self, no_config, algo, weight_dtype):
209217
model_name = "hf-internal-testing/tiny-random-GPTNeoForCausalLM"
210218
model = AutoModelForCausalLM.from_pretrained(model_name)
211219
tokenizer = AutoTokenizer.from_pretrained(model_name)
212220
tokenizer.add_special_tokens({"pad_token": "[PAD]"})
221+
quantizer = INCQuantizer.from_pretrained(copy.deepcopy(model), task="text-generation")
222+
calibration_dataset = _generate_dataset(quantizer, tokenizer, num_samples=2)
213223

214224
with tempfile.TemporaryDirectory() as tmp_dir:
215-
quantizer = INCQuantizer.from_pretrained(copy.deepcopy(model), task="text-generation")
216-
calibration_dataset = _generate_dataset(quantizer, tokenizer, num_samples=2)
217-
quantization_config = WeightOnlyQuantConfig(weight_dtype="int8")
218-
q_model = quantizer.quantize(
219-
quantization_config=quantization_config,
220-
save_directory=tmp_dir,
221-
)
222-
q_model = ITREXAutoModelForCausalLM.from_pretrained(tmp_dir)
223-
inp = torch.tensor([calibration_dataset[0]["input_ids"]])
224-
out = model(inp)[0]
225-
q_out = q_model(inp)[0]
226-
self.assertTrue(torch.all(torch.isclose(out, q_out, atol=5e-1)))
227-
228-
with tempfile.TemporaryDirectory() as tmp_dir:
229-
quantizer = INCQuantizer.from_pretrained(copy.deepcopy(model), task="text-generation")
230-
calibration_dataset = _generate_dataset(quantizer, tokenizer, num_samples=2)
231-
quantization_config = WeightOnlyQuantConfig(
232-
algorithm="GPTQ",
233-
algorithm_args={
234-
"percdamp": 0.01,
235-
"act_order": False,
236-
"scheme": "sym",
237-
},
238-
weight_dtype="int4_clip",
239-
)
240-
q_model = quantizer.quantize(
241-
quantization_config=quantization_config,
242-
calibration_dataset=calibration_dataset,
243-
save_directory=tmp_dir,
244-
)
245-
q_model = ITREXAutoModelForCausalLM.from_pretrained(tmp_dir)
246-
inp = torch.tensor([calibration_dataset[0]["input_ids"]])
247-
out = model(inp)[0]
248-
q_out = q_model(inp)[0]
249-
self.assertTrue(torch.all(torch.isclose(out, q_out, atol=5e-1)))
250-
251-
with tempfile.TemporaryDirectory() as tmp_dir:
252-
quantizer = INCQuantizer.from_pretrained(copy.deepcopy(model), task="text-generation")
253-
calibration_dataset = _generate_dataset(quantizer, tokenizer, num_samples=2)
254-
q_model = quantizer.quantize(
255-
weight_only=True, # use RTN quantization method and NF4 weight data type is default.
256-
save_directory=tmp_dir,
257-
)
225+
if not no_config:
226+
if algo == "GPTQ":
227+
algorithm_args = {
228+
"percdamp": 0.01,
229+
"act_order": False,
230+
"scheme": "sym",
231+
}
232+
quantization_config = WeightOnlyQuantConfig(
233+
algorithm=algo,
234+
algorithm_args=algorithm_args if algo == "GPTQ" else None,
235+
weight_dtype=weight_dtype,
236+
)
237+
q_model = quantizer.quantize(
238+
quantization_config=quantization_config,
239+
calibration_dataset=calibration_dataset if algo == "GPTQ" else None,
240+
save_directory=tmp_dir,
241+
)
242+
else:
243+
q_model = quantizer.quantize(
244+
weight_only=True, # use RTN quantization method and NF4 weight data type is default.
245+
save_directory=tmp_dir,
246+
)
258247
q_model = ITREXAutoModelForCausalLM.from_pretrained(tmp_dir)
259248
inp = torch.tensor([calibration_dataset[0]["input_ids"]])
260249
out = model(inp)[0]

0 commit comments

Comments
 (0)