|
30 | 30 | from nncf.parameters import BackupMode
|
31 | 31 | from nncf.quantization import compress_weights
|
32 | 32 | from nncf.quantization.advanced_parameters import AdvancedCompressionParameters as CompressionParams
|
| 33 | +from nncf.quantization.advanced_parameters import AdvancedGPTQParameters as GPTQParams |
33 | 34 | from nncf.quantization.advanced_parameters import AdvancedLoraCorrectionParameters as LoraParams
|
34 | 35 | from nncf.quantization.algorithms.weight_compression.config import WeightCompressionConfig
|
35 | 36 | from nncf.quantization.algorithms.weight_compression.config import WeightCompressionParameters
|
@@ -1377,29 +1378,42 @@ def test_data_aware_algo_with_different_activation_dimensions(n_extra_dims):
|
1377 | 1378 | group_size=-1,
|
1378 | 1379 | dataset=dataset,
|
1379 | 1380 | awq=True,
|
| 1381 | + ratio=0.5, |
| 1382 | + sensitivity_metric=SensitivityMetric.MEAN_ACTIVATION_MAGNITUDE, |
1380 | 1383 | )
|
1381 | 1384 |
|
1382 | 1385 |
|
1383 |
| -@pytest.mark.parametrize("n_extra_dims,raises", ([0, True], (1, False), (2, False))) |
1384 |
| -def test_data_aware_mixed_precision_with_different_activation_dimensions(n_extra_dims, raises): |
1385 |
| - model = AWQMatmulModel(n_extra_dims=n_extra_dims).ov_model |
1386 |
| - dataset = Dataset([np.ones([1] * n_extra_dims + [8, 8])]) |
1387 |
| - |
1388 |
| - def call_compression(): |
1389 |
| - compress_weights( |
1390 |
| - model, |
1391 |
| - mode=CompressWeightsMode.INT4_ASYM, |
1392 |
| - ratio=0.5, |
1393 |
| - sensitivity_metric=SensitivityMetric.MEAN_ACTIVATION_MAGNITUDE, |
1394 |
| - group_size=-1, |
1395 |
| - dataset=dataset, |
1396 |
| - ) |
| 1386 | +@pytest.mark.parametrize( |
| 1387 | + "kwargs", |
| 1388 | + [ |
| 1389 | + dict(scale_estimation=True), |
| 1390 | + dict(lora_correction=True), |
| 1391 | + dict( |
| 1392 | + gptq=True, |
| 1393 | + scale_estimation=True, |
| 1394 | + advanced_parameters=CompressionParams(gptq_params=GPTQParams(subset_size=2)), |
| 1395 | + ), |
| 1396 | + dict( |
| 1397 | + awq=True, |
| 1398 | + gptq=True, |
| 1399 | + scale_estimation=True, |
| 1400 | + advanced_parameters=CompressionParams(gptq_params=GPTQParams(subset_size=2)), |
| 1401 | + ), |
| 1402 | + ], |
| 1403 | +) |
| 1404 | +def test_compression_with_different_algo_combinations(kwargs): |
| 1405 | + dataset_size = 4 |
| 1406 | + model = LMLinearModel().ov_model |
| 1407 | + input_data = [np.ones(inp.shape) for inp in model.inputs] * dataset_size |
| 1408 | + dataset = Dataset(input_data) |
1397 | 1409 |
|
1398 |
| - if raises: |
1399 |
| - with pytest.raises(RuntimeError) as exc_info: |
1400 |
| - call_compression() |
1401 |
| - assert "Data-aware mixed precision criteria are not supported for MatMuls with 1D/2D activations." in str( |
1402 |
| - exc_info.value |
1403 |
| - ) |
1404 |
| - else: |
1405 |
| - call_compression() |
| 1410 | + compress_weights( |
| 1411 | + model, |
| 1412 | + mode=CompressWeightsMode.INT4_SYM, |
| 1413 | + ratio=1.0, |
| 1414 | + group_size=8, |
| 1415 | + subset_size=2, |
| 1416 | + dataset=dataset, |
| 1417 | + all_layers=True, |
| 1418 | + **kwargs, |
| 1419 | + ) |
0 commit comments