Skip to content

Commit cda3fed

Browse files
committed
Update
1 parent 341c4a8 commit cda3fed

File tree

3 files changed

+34
-18
lines changed

3 files changed

+34
-18
lines changed

nncf/quantization/algorithms/weight_compression/gptq.py

+1-4
Original file line numberDiff line numberDiff line change
@@ -215,10 +215,7 @@ def _quantize_weights(
215215
"""
216216
if wc_params.node_with_weight.metatype in self._backend_entity.convolution_metatypes:
217217
msg = "Convolution metatypes are not supported"
218-
raise RuntimeError(msg)
219-
if not wc_params.node_with_weight.layer_attributes.constant_attributes[wc_params.weight_port_id]["transpose"]:
220-
msg = "Transpose is not supported"
221-
raise RuntimeError(msg)
218+
raise nncf.UnsupportedModelError(msg)
222219

223220
weight_tensor = self._backend_entity.get_weight(
224221
wc_params.node_with_weight, wc_params.weight_port_id, model, graph

nncf/quantization/algorithms/weight_compression/openvino_backend.py

+3
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,9 @@ def get_weight_names_and_port_ids(node: NNCFNode, graph: NNCFGraph) -> List[Tupl
130130
return result
131131

132132
def get_weight(self, node_with_weight: NNCFNode, weight_port_id: int, model: ov.Model, graph: NNCFGraph) -> Tensor:
133+
if not node_with_weight.layer_attributes.constant_attributes[weight_port_id]["transpose"]:
134+
msg = "Only transposed weights are supported"
135+
raise nncf.UnsupportedModelError(msg)
133136
weight_name = node_with_weight.layer_attributes.constant_attributes[weight_port_id]["name"]
134137
weight_node = self.name_to_node_mapping[weight_name]
135138
weight_tensor = get_const_value_as_numpy_tensor(weight_node)

tests/openvino/native/quantization/test_weights_compression.py

+30-14
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212
import inspect
1313
import os
14+
from contextlib import nullcontext
1415
from typing import Callable, Dict, List, Optional
1516

1617
import numpy as np
@@ -1457,9 +1458,19 @@ def test_compression_with_different_algo_combinations(input_shape, kwargs):
14571458
)
14581459

14591460

1461+
@pytest.mark.parametrize(
1462+
("transpose_a", "transpose_b", "raises_error"),
1463+
[
1464+
(False, True, False),
1465+
(True, True, False),
1466+
(False, False, True),
1467+
(True, False, True),
1468+
],
1469+
ids=["tb_nota", "ta_tb", "nota_notb", "ta_notb"],
1470+
)
14601471
@pytest.mark.parametrize(
14611472
"kwargs",
1462-
(
1473+
[
14631474
dict(scale_estimation=True),
14641475
dict(lora_correction=True),
14651476
dict(
@@ -1468,25 +1479,30 @@ def test_compression_with_different_algo_combinations(input_shape, kwargs):
14681479
scale_estimation=True,
14691480
advanced_parameters=CompressionParams(gptq_params=GPTQParams(subset_size=2)),
14701481
),
1471-
),
1482+
],
14721483
ids=["se", "lora", "gptq_se_awq"],
14731484
)
1474-
def test_compression_with_transpose(kwargs):
1485+
def test_compression_with_transpose(transpose_a, transpose_b, raises_error, kwargs):
14751486
dataset_size = 4
1476-
model = LMLinearModel(transpose_a=True, transpose_b=True).ov_model
1487+
model = LMLinearModel(transpose_a=transpose_a, transpose_b=transpose_b).ov_model
14771488
input_data = [np.ones(inp.shape) for inp in model.inputs] * dataset_size
14781489
dataset = Dataset(input_data)
14791490

1480-
compress_weights(
1481-
model,
1482-
mode=CompressWeightsMode.INT4_SYM,
1483-
ratio=1.0,
1484-
group_size=8,
1485-
subset_size=2,
1486-
dataset=dataset,
1487-
all_layers=True,
1488-
**kwargs,
1489-
)
1491+
with (
1492+
pytest.raises(nncf.UnsupportedModelError)
1493+
if raises_error and not kwargs.get("lora_correction", False)
1494+
else nullcontext()
1495+
):
1496+
compress_weights(
1497+
model,
1498+
mode=CompressWeightsMode.INT4_SYM,
1499+
ratio=1.0,
1500+
group_size=8,
1501+
subset_size=2,
1502+
dataset=dataset,
1503+
all_layers=True,
1504+
**kwargs,
1505+
)
14901506

14911507

14921508
class TestOVTemplateWeightCompression(TemplateWeightCompression):

0 commit comments

Comments
 (0)