openvinotoolkit
diff --git a/‎.ci/cspell_dict.txt
+1 b/‎.ci/cspell_dict.txt
+1
diff --git a/‎nncf/__init__.py
+1 b/‎nncf/__init__.py
+1
diff --git a/‎nncf/common/quantization/structs.py
+12-4 b/‎nncf/common/quantization/structs.py
+12-4
diff --git a/‎nncf/experimental/torch/fx/quantization/quantize_model.py
+3 b/‎nncf/experimental/torch/fx/quantization/quantize_model.py
+3
diff --git a/‎nncf/openvino/quantization/quantize_model.py
+3 b/‎nncf/openvino/quantization/quantize_model.py
+3
diff --git a/‎nncf/parameters.py
+23 b/‎nncf/parameters.py
+23
diff --git a/‎nncf/quantization/advanced_parameters.py
+3 b/‎nncf/quantization/advanced_parameters.py
+3
diff --git a/‎nncf/quantization/algorithms/weight_compression/algorithm.py
+11 b/‎nncf/quantization/algorithms/weight_compression/algorithm.py
+11
diff --git a/‎nncf/quantization/algorithms/weight_compression/backend.py
+14-3 b/‎nncf/quantization/algorithms/weight_compression/backend.py
+14-3
diff --git a/‎nncf/quantization/algorithms/weight_compression/openvino_backend.py
+4 b/‎nncf/quantization/algorithms/weight_compression/openvino_backend.py
+4
diff --git a/‎nncf/quantization/algorithms/weight_compression/scale_estimation.py
-1 b/‎nncf/quantization/algorithms/weight_compression/scale_estimation.py
-1
@@ -220,6 +220,7 @@ logit
 loglikelihoods
 lstmsequence
 lstsq
+lspec
 lyalyushkin
 mapillary
 maskrcnn
 
@@ -34,6 +34,7 @@
 from nncf.errors import UnsupportedVersionError as UnsupportedVersionError
 from nncf.errors import ValidationError as ValidationError
 from nncf.parameters import BackupMode as BackupMode
+from nncf.parameters import CompressionFormat as CompressionFormat
 from nncf.parameters import CompressWeightsMode as CompressWeightsMode
 from nncf.parameters import DropType as DropType
 from nncf.parameters import ModelType as ModelType
 
@@ -27,14 +27,22 @@
 @api()
 class QuantizationScheme(StrEnum):
     """
-    Basic enumeration for quantization scheme specification.
-
-    :param SYMMETRIC:
-    :param ASYMMETRIC:
+    Enumeration for specifying quantization schemes.
+
+    :param SYMMETRIC: Symmetric quantization where the range is defined by a single parameter - scale.
+        This range can include both negative and positive values if signed, or only positive values if unsigned.
+    :param ASYMMETRIC: Asymmetric quantization where the range is defined by two parameters - input_low and input_high,
+        representing the lower and upper boundaries of the range, respectively.
+    :param SYMMETRIC_LORA: Symmetric quantization with Low-Rank Adapters (LoRA), involving the sum of weights and
+        the multiplication of low-rank adapters.
+    :param ASYMMETRIC_LORA: Asymmetric quantization with Low-Rank Adapters (LoRA), involving the sum of weights and
+        the multiplication of low-rank adapters.
     """
 
     SYMMETRIC = "symmetric"
     ASYMMETRIC = "asymmetric"
+    SYMMETRIC_LORA = "symmetric_lora"
+    ASYMMETRIC_LORA = "asymmetric_lora"
 
 
 class QuantizerConfig:
 
@@ -31,6 +31,7 @@
 from nncf.experimental.torch.fx.transformations import compress_post_quantize_transformation
 from nncf.experimental.torch.fx.transformations import fq_weights_transformation
 from nncf.parameters import BackupMode
+from nncf.parameters import CompressionFormat
 from nncf.parameters import CompressWeightsMode
 from nncf.parameters import ModelType
 from nncf.parameters import QuantizationMode
@@ -131,6 +132,7 @@ def compress_weights_impl(
     gptq: bool,
     lora_correction: bool,
     backup_mode: BackupMode,
+    compression_format: CompressionFormat,
     advanced_parameters: Optional[AdvancedCompressionParameters] = None,
 ) -> torch.fx.GraphModule:
     """
@@ -149,6 +151,7 @@ def compress_weights_impl(
         gptq,
         lora_correction,
         backup_mode,
+        compression_format,
         advanced_parameters,
     )
     graph = NNCFGraphFactory.create(model)
 
@@ -32,6 +32,7 @@
 from nncf.openvino.quantization.quantize_ifmodel import apply_algorithm_if_bodies
 from nncf.openvino.rt_info import dump_parameters
 from nncf.parameters import BackupMode
+from nncf.parameters import CompressionFormat
 from nncf.parameters import CompressWeightsMode
 from nncf.parameters import DropType
 from nncf.parameters import ModelType
@@ -376,6 +377,7 @@ def compress_weights_impl(
     gptq: bool,
     lora_correction: bool,
     backup_mode: BackupMode,
+    compression_format: CompressionFormat,
     advanced_parameters: Optional[AdvancedCompressionParameters] = None,
 ) -> ov.Model:
     """
@@ -396,6 +398,7 @@ def compress_weights_impl(
         gptq,
         lora_correction,
         backup_mode,
+        compression_format,
         advanced_parameters,
     )
 
 
@@ -96,6 +96,29 @@ class CompressWeightsMode(StrEnum):
     E2M1 = "e2m1"
 
 
+@api(canonical_alias="nncf.CompressionFormat")
+class CompressionFormat(StrEnum):
+    """
+    Describes the format in which the model is saved after weight compression.
+
+    :param DQ: Represents the 'dequantize' format, where weights are stored in low-bit precision,
+        and a dequantization subgraph is added to the model. This is the default format for post-training weight
+        compression methods.
+    :param FQ: Represents the 'fake_quantize' format, where quantization is simulated by applying
+        quantization and dequantization operations. Weights remain in the same precision. This format is
+        suitable for quantization-aware training (QAT).
+    :param FQ_LORA: Represents the 'fake_quantize_with_lora' format, which combines fake quantization
+        with absorbable low-rank adapters (LoRA). Quantization is applied to the sum of weights and
+        the multiplication of adapters. This makes quantization-aware training (QAT) more efficient in terms of
+        accuracy, as adapters can also be tuned and remain computationally affordable during training due to their
+        small dimensions.
+    """
+
+    DQ = "dequantize"
+    FQ = "fake_quantize"
+    FQ_LORA = "fake_quantize_with_lora"
+
+
 @api(canonical_alias="nncf.BackupMode")
 class BackupMode(StrEnum):
     """
 
@@ -384,6 +384,9 @@ class AdvancedCompressionParameters:
     # Advanced Lora Correction algorithm parameters
     lora_correction_params: AdvancedLoraCorrectionParameters = field(default_factory=AdvancedLoraCorrectionParameters)
 
+    # rank of lora adapters for FQ_LORA format. Defaults to 256.
+    lora_adapter_rank: int = 256
+
 
 @api()
 @dataclass
 
@@ -31,6 +31,7 @@
 from nncf.common.utils.helpers import create_table
 from nncf.experimental.common.tensor_statistics.statistics import WCTensorStatistic
 from nncf.parameters import BackupMode
+from nncf.parameters import CompressionFormat
 from nncf.parameters import CompressWeightsMode
 from nncf.parameters import SensitivityMetric
 from nncf.quantization.advanced_parameters import AdvancedCompressionParameters
@@ -122,6 +123,7 @@ def check_user_compression_configuration(
     ignored_scope: Optional[IgnoredScope],
     sensitivity_metric: Optional[SensitivityMetric],
     backup_mode: Optional[BackupMode],
+    compression_format: Optional[CompressionFormat],
     advanced_parameters: Optional[AdvancedCompressionParameters],
 ) -> None:
     """
@@ -172,6 +174,10 @@ def check_user_compression_configuration(
             requires a dataset, but it's not provided."
         raise nncf.ValidationError(msg)
 
+    if lora_correction and compression_format in [CompressionFormat.FQ, CompressionFormat.FQ_LORA]:
+        msg = "LoRA Correction algorithm is not compatible with FQ and FQ_LORA compression formats."
+        raise nncf.ValidationError(msg)
+
 
 class WeightCompression(Algorithm):
     """
@@ -195,6 +201,7 @@ def __init__(
         gptq: bool,
         lora_correction: bool,
         backup_mode: BackupMode = BackupMode.INT8_ASYM,
+        compression_format: CompressionFormat = CompressionFormat.DQ,
         advanced_parameters: Optional[AdvancedCompressionParameters] = None,
     ):
         """
@@ -233,6 +240,7 @@ def __init__(
                 In this mode, weights are retained in their original precision without any quantization.
             INT8_SYM stands for 8-bit integer symmetric quantization without zero point.
             INT8_ASYM stands for 8-bit integer asymmetric quantization with a typical non-fixed zero point.
+        :param compression_format: Describes the format in which the model is saved after weight compression.
         :param advanced_parameters: advanced parameters for algorithms in compression pipeline.
         """
         super().__init__()
@@ -251,6 +259,7 @@ def __init__(
         self._gptq = gptq
         self._lora_correction = lora_correction
         self._backup_mode = backup_mode
+        self._compression_format = compression_format
         self._advanced_parameters = (
             advanced_parameters if advanced_parameters is not None else AdvancedCompressionParameters()
         )
@@ -646,6 +655,7 @@ def apply(
             scales,
             zero_points,
             lora_correction_algo,
+            self._compression_format,
         )
 
         self._backend_entity.dump_parameters(
@@ -662,6 +672,7 @@ def apply(
                 "gptq": self._gptq,
                 "lora_correction": self._lora_correction,
                 "backup_mode": self._backup_mode.value,
+                "compression_format": self._compression_format.value,
                 "advanced_parameters": convert_to_dict_recursively(self._advanced_parameters),
             },
             algo_name="weight_compression",
 
@@ -24,7 +24,10 @@
 from nncf.experimental.common.tensor_statistics.collectors import RawReducer
 from nncf.experimental.common.tensor_statistics.collectors import TensorCollector
 from nncf.experimental.common.tensor_statistics.statistics import HessianTensorStatistic
+from nncf.parameters import CompressionFormat
+from nncf.quantization.advanced_parameters import AdvancedCompressionParameters
 from nncf.quantization.algorithms.weight_compression.config import WeightCompressionParameters
+from nncf.quantization.algorithms.weight_compression.lora_correction import LoraCorrectionAlgorithm
 from nncf.tensor import Tensor
 from nncf.tensor import TensorDataType
 
@@ -147,15 +150,23 @@ def transform_model(
         weight_compression_parameters: Iterable[WeightCompressionParameters],
         precomputed_scales: Dict[str, Tensor] = None,
         precomputed_zero_points: Dict[str, Tensor] = None,
+        lora_correction_algo: Optional[LoraCorrectionAlgorithm] = None,
+        compression_format: CompressionFormat = CompressionFormat.DQ,
+        advanced_parameters: AdvancedCompressionParameters = AdvancedCompressionParameters(),
     ) -> TModel:
         """
         Applies weight compression transformations to the model.
 
         :param model: Model in which the weights will be compressed according to the weight compression description.
         :param graph: The graph associated with the model.
-        :param weight_compression_parameters: List of weight compression parameters.
-        :param precomputed_scales: Precomputed scales for weights compression.
-        :param precomputed_zero_points: Precomputed zero points for weights compression.
+        :param weight_compression_parameters: An iterable of weight compression parameters.
+        :param precomputed_scales: Precomputed scales for weight compression.
+        :param precomputed_zero_points: Precomputed zero points for weight compression.
+        :param lora_correction_algo: An optional algorithm to reduce quantization noise after weight compression by
+            using low-rank adapters. This algorithm not only overrides weights with their quantized counterparts but
+            also expands the model's execution graph following the Low-Rank Adaptation (LoRA) concept.
+        :param compression_format: The format in which the model is saved after weight compression.
+        :param compression_format_params: Describes advanced parameters of compression formats.
         :return: The transformed model.
         """
 
 
@@ -46,7 +46,9 @@
 from nncf.openvino.statistics.collectors import OVMeanReducer
 from nncf.openvino.statistics.collectors import OVMeanVarianceReducer
 from nncf.openvino.statistics.collectors import OVShapeReducer
+from nncf.parameters import CompressionFormat
 from nncf.parameters import CompressWeightsMode
+from nncf.quantization.advanced_parameters import AdvancedCompressionParameters
 from nncf.quantization.algorithms.weight_compression.awq_patterns import get_awq_patterns
 from nncf.quantization.algorithms.weight_compression.backend import AWQAlgoBackend
 from nncf.quantization.algorithms.weight_compression.backend import MixedPrecisionAlgoBackend
@@ -283,6 +285,8 @@ def transform_model(
         precomputed_scales: Dict[str, Tensor] = None,
         precomputed_zero_points: Dict[str, Tensor] = None,
         lora_correction_algo: LoraCorrectionAlgorithm = None,
+        compression_format: CompressionFormat = CompressionFormat.DQ,
+        advanced_parameters: AdvancedCompressionParameters = AdvancedCompressionParameters(),
     ) -> ov.Model:
         for wc_params in weight_compression_parameters:
             const_attributes = wc_params.node_with_weight.layer_attributes.constant_attributes[wc_params.weight_port_id]
 
@@ -232,7 +232,6 @@ def calculate_quantization_params(
         X, _ = reshape_weight_for_grouped_quantization(X, 0, group_size)
         best_diffs = None
         result_scale = None
-
         fp_outs = fns.matmul(fns.transpose(original_weight, (1, 0, 2)), X)
         q_outs = fns.matmul(fns.transpose(q_weights, (1, 0, 2)), X)