Restoring state after inference only for stateful models (#2445)

alexsu52 · web-flow · commit b49390e9e5dc · 2024-02-06T16:12:01.000+04:00
### Changes

Restoring state after inference only for stateful models

### Reason for changes

Inference optimization

### Related tickets

131141

### Tests

test_examples 230
diff --git a/nncf/common/factory.py b/nncf/common/factory.py
@@ -18,9 +18,7 @@
 from nncf.common.graph.transformations.command_creation import CommandCreator
 from nncf.common.tensor_statistics import aggregator
 from nncf.common.utils.backend import BackendType
-from nncf.common.utils.backend import get_available_backends
 from nncf.common.utils.backend import get_backend
-from nncf.common.utils.backend import is_openvino_compiled_model
 from nncf.data.dataset import Dataset
 
 TModel = TypeVar("TModel")
@@ -88,12 +86,6 @@ def create(model: TModel) -> Engine:
         :param model: backend-specific model instance.
         :return: backend-specific Engine instance.
         """
-        available_backends = get_available_backends()
-        if BackendType.OPENVINO in available_backends and is_openvino_compiled_model(model):
-            from nncf.openvino.engine import OVCompiledModelEngine
-
-            return OVCompiledModelEngine(model)
-
         model_backend = get_backend(model)
         if model_backend == BackendType.ONNX:
             from nncf.onnx.engine import ONNXEngine
diff --git a/nncf/openvino/engine.py b/nncf/openvino/engine.py
@@ -16,6 +16,7 @@
 
 import nncf
 from nncf.common.engine import Engine
+from nncf.openvino.graph.model_utils import model_has_state
 from nncf.parameters import TargetDevice
 
 
@@ -28,13 +29,12 @@ class OVCompiledModelEngine(Engine):
     to infer the compiled model.
     """
 
-    def __init__(self, model: ov.CompiledModel):
-        self.compiled_model = model
-        self.infer_request = model.create_infer_request()
-        self.reset_state = hasattr(self.infer_request, "reset_state")
+    def __init__(self, compiled_model: ov.CompiledModel, stateful: bool):
+        self.infer_request = compiled_model.create_infer_request()
+        self.reset_state = stateful and hasattr(self.infer_request, "reset_state")
         self.input_tensor_names = set()
-        self.number_of_inputs = len(model.inputs)
-        for model_input in model.inputs:
+        self.number_of_inputs = len(compiled_model.inputs)
+        for model_input in compiled_model.inputs:
             self.input_tensor_names.update(model_input.get_names())
 
     def _check_input_data_format(
@@ -95,8 +95,9 @@ def __init__(self, model: ov.Model, target_device: TargetDevice = TargetDevice.C
             target_device = TargetDevice.CPU
 
         ie = ov.Core()
+        stateful = model_has_state(model)
         compiled_model = ie.compile_model(model, target_device.value)
-        self.engine = OVCompiledModelEngine(compiled_model)
+        self.engine = OVCompiledModelEngine(compiled_model, stateful)
 
     def infer(
         self, input_data: Union[np.ndarray, List[np.ndarray], Tuple[np.ndarray], Dict[str, np.ndarray]]
diff --git a/nncf/openvino/graph/model_utils.py b/nncf/openvino/graph/model_utils.py
@@ -60,3 +60,13 @@ def get_start_nodes_for_activation_path_tracing(nncf_graph: NNCFGraph) -> List[N
     :return: Target NNCFGraph input nodes.
     """
     return nncf_graph.get_input_nodes() + nncf_graph.get_nodes_by_metatypes([OVReadValueMetatype])
+
+
+def model_has_state(model: ov.Model) -> bool:
+    """
+    Returns True if model has state else False
+
+    :param model: OpenVINO model
+    :return: True if model has state else False
+    """
+    return len(model.get_sinks()) > 0
diff --git a/nncf/quantization/algorithms/accuracy_control/backend.py b/nncf/quantization/algorithms/accuracy_control/backend.py
@@ -13,6 +13,7 @@
 from abc import abstractmethod
 from typing import Any, List, Optional, TypeVar
 
+from nncf.common.engine import Engine
 from nncf.common.graph.graph import NNCFGraph
 from nncf.common.graph.graph import NNCFNode
 from nncf.common.graph.operator_metatypes import OperatorMetatype
@@ -21,6 +22,35 @@
 TPModel = TypeVar("TPModel")
 
 
+class PreparedModel(ABC):
+    @property
+    @abstractmethod
+    def model_for_inference(self) -> TPModel:
+        """
+        Returns prepared model for inference.
+
+        :return: Prepared model for inference.
+        """
+
+    @property
+    @abstractmethod
+    def engine(self) -> Engine:
+        """
+        Returns the engine for inference the prepared model.
+
+        :return: The engine for inference the prepared model.
+        """
+
+    def __call__(self, input_data: Any) -> Any:
+        """
+        Runs model on the provided input data and returns the raw model outputs.
+
+        :param input_data: inputs for the model
+        :return: raw model outputs
+        """
+        return self.engine.infer(input_data)
+
+
 class AccuracyControlAlgoBackend(ABC):
     # Metatypes
 
@@ -158,15 +188,3 @@ def get_model_size(model: TModel) -> int:
         :param model: A model
         :return: Model size (in bytes)
         """
-
-    # Preparation of model
-
-    @staticmethod
-    @abstractmethod
-    def prepare_for_inference(model: TModel) -> TPModel:
-        """
-        Prepares model for inference.
-
-        :param model: A model that should be prepared.
-        :return: Prepared model for inference.
-        """
diff --git a/nncf/quantization/algorithms/accuracy_control/evaluator.py b/nncf/quantization/algorithms/accuracy_control/evaluator.py
@@ -13,15 +13,14 @@
 from typing import Any, Callable, Iterable, List, Optional, Tuple, TypeVar, Union
 
 import nncf
-from nncf.common.factory import EngineFactory
 from nncf.common.logging import nncf_logger
 from nncf.common.utils.backend import BackendType
 from nncf.common.utils.backend import get_backend
 from nncf.common.utils.timer import timer
 from nncf.data.dataset import Dataset
+from nncf.quantization.algorithms.accuracy_control.backend import PreparedModel
 
 TModel = TypeVar("TModel")
-TPModel = TypeVar("TPModel")
 TTensor = TypeVar("TTensor")
 
 
@@ -112,7 +111,7 @@ def is_metric_mode(self) -> bool:
         """
         return self._metric_mode
 
-    def prepare_model_for_inference(self, model: TModel) -> TPModel:
+    def prepare_model(self, model: TModel) -> PreparedModel:
         """
         Prepares model for inference.
 
@@ -122,21 +121,19 @@ def prepare_model_for_inference(self, model: TModel) -> TPModel:
         backend = get_backend(model)
 
         if backend == BackendType.OPENVINO:
-            import openvino.runtime as ov
+            from nncf.quantization.algorithms.accuracy_control.openvino_backend import OVPreparedModel
 
-            return ov.compile_model(model)
+            return OVPreparedModel(model)
 
-        raise NotImplementedError(
-            f"The `prepare_model_for_inference()` method is not implemented for the {backend} backend."
-        )
+        raise NotImplementedError(f"The `prepare_model()` method is not implemented for the {backend} backend.")
 
-    def validate_model_for_inference(
-        self, model_for_inference: TPModel, dataset: Dataset, indices: Optional[List[int]] = None
+    def validate_prepared_model(
+        self, prepared_model: PreparedModel, dataset: Dataset, indices: Optional[List[int]] = None
     ):
         """
         Validates prepared model for inference.
 
-        :param model: Prepared model to validate.
+        :param prepared_model: Prepared model to validate.
         :param dataset: Dataset to validate the model.
         :param indices: Zero-based indices of data items that should be selected from
             the dataset.
@@ -148,7 +145,7 @@ def validate_model_for_inference(
                 item.
         """
         if self._metric_mode is None:
-            self._metric_mode = Evaluator.determine_mode(model_for_inference, dataset, self._validation_fn)
+            self._metric_mode = Evaluator.determine_mode(prepared_model, dataset, self._validation_fn)
 
         if not self.is_metric_mode() and indices is not None:
             raise ValueError("The `indices` parameter can be used only if Evaluator.is_metric_mode() = True")
@@ -157,7 +154,7 @@ def validate_model_for_inference(
         if self._enable_iteration_count:
             validation_dataset = IterationCounter(validation_dataset)
 
-        metric, values_for_each_item = self._validation_fn(model_for_inference, validation_dataset)
+        metric, values_for_each_item = self._validation_fn(prepared_model.model_for_inference, validation_dataset)
 
         self._num_passed_iterations = validation_dataset.num_iterations if self._enable_iteration_count else 0
 
@@ -190,20 +187,20 @@ def validate(
                 Otherwise, if the condition is false, it represents list of logits for each
                 item.
         """
-        model_for_inference = self.prepare_model_for_inference(model)
-        return self.validate_model_for_inference(model_for_inference, dataset, indices)
+        prepared_model = self.prepare_model(model)
+        return self.validate_prepared_model(prepared_model, dataset, indices)
 
     @staticmethod
     def determine_mode(
-        model_for_inference: TPModel,
+        prepared_model: PreparedModel,
         dataset: Dataset,
         validation_fn: Callable[[Any, Iterable[Any]], Tuple[float, Union[None, List[float], List[List[TTensor]]]]],
     ) -> bool:
         """
         Determines mode based on the type of returned value from the
         validation function.
 
-        :param model_for_inference: Model to validate.
+        :param prepared_model: Model to validate.
         :param dataset: Dataset to validate the model.
         :param validation_fn: Validation function to validate model.
         :return: A boolean indicator where `True` means that the `Evaluator` collects
@@ -215,7 +212,7 @@ def determine_mode(
         data_item = dataset.get_data([0])
 
         try:
-            metric_value, values_for_each_item = validation_fn(model_for_inference, data_item)
+            metric_value, values_for_each_item = validation_fn(prepared_model.model_for_inference, data_item)
         except Exception:
             metric_mode = False
 
@@ -262,15 +259,15 @@ def determine_mode(
 
         return metric_mode
 
-    def collect_values_for_each_item_using_model_for_inference(
-        self, model_for_inference: TPModel, dataset: Dataset, indices: Optional[List[int]] = None
+    def collect_values_for_each_item_using_prepared_model(
+        self, prepared_model: PreparedModel, dataset: Dataset, indices: Optional[List[int]] = None
     ) -> Union[List[float], List[List[TTensor]]]:
         """
         Collects value for each item from the dataset using prepared model for inference.
         If `is_metric_mode()` returns `True` then i-th value is a metric for i-th data item.
         It is an output of the model for i-th data item otherwise.
 
-        :param model: Model to infer.
+        :param prepared_model: Model to infer.
         :param dataset: Dataset to collect values.
         :param indices: The zero-based indices of data items that should be selected from
             the dataset.
@@ -279,15 +276,14 @@ def collect_values_for_each_item_using_model_for_inference(
         if self._metric_mode:
             # Collect metrics for each item
             values_for_each_item = [
-                self._validation_fn(model_for_inference, [data_item])[0] for data_item in dataset.get_data(indices)
+                self._validation_fn(prepared_model.model_for_inference, [data_item])[0]
+                for data_item in dataset.get_data(indices)
             ]
         else:
             # Collect outputs for each item
-            engine = EngineFactory.create(model_for_inference)
-
             values_for_each_item = []
             for data_item in dataset.get_inference_data(indices):
-                logits = engine.infer(data_item)
+                logits = prepared_model(data_item)
                 values_for_each_item.append(list(logits.values()))
 
         self._num_passed_iterations = len(values_for_each_item) if self._enable_iteration_count else 0
@@ -308,8 +304,8 @@ def collect_values_for_each_item(
             the dataset.
         :return: Collected values.
         """
-        model_for_inference = self.prepare_model_for_inference(model)
-        return self.collect_values_for_each_item_using_model_for_inference(model_for_inference, dataset, indices)
+        prepared_model = self.prepare_model(model)
+        return self.collect_values_for_each_item_using_prepared_model(prepared_model, dataset, indices)
 
     def collect_metric_results(self, model: TModel, dataset: Dataset, model_name: str = "") -> MetricResults:
         """
@@ -323,18 +319,16 @@ def collect_metric_results(self, model: TModel, dataset: Dataset, model_name: st
         nncf_logger.info(f"Validation of {model_name} model was started")
 
         with timer() as preparation_time:
-            model_for_inference = self.prepare_model_for_inference(model)
+            prepared_model = self.prepare_model(model)
 
         with timer() as validation_time:
-            metric, values_for_each_item = self.validate_model_for_inference(model_for_inference, dataset)
+            metric, values_for_each_item = self.validate_prepared_model(prepared_model, dataset)
 
         nncf_logger.info(f"Metric of {model_name} model: {metric}")
 
         if values_for_each_item is None:
             nncf_logger.info(f"Collecting values for each data item using the {model_name} model")
             with timer():
-                values_for_each_item = self.collect_values_for_each_item_using_model_for_inference(
-                    model_for_inference, dataset
-                )
+                values_for_each_item = self.collect_values_for_each_item_using_prepared_model(prepared_model, dataset)
 
         return MetricResults(metric, values_for_each_item, preparation_time(), validation_time())
diff --git a/nncf/quantization/algorithms/accuracy_control/openvino_backend.py b/nncf/quantization/algorithms/accuracy_control/openvino_backend.py
@@ -16,6 +16,7 @@
 
 from nncf.common.graph import NNCFGraph
 from nncf.common.graph import NNCFNode
+from nncf.openvino.engine import OVCompiledModelEngine
 from nncf.openvino.graph.layer_attributes import OVLayerAttributes
 from nncf.openvino.graph.metatypes.groups import CONSTANT_OPERATIONS
 from nncf.openvino.graph.metatypes.groups import FAKE_QUANTIZE_OPERATIONS
@@ -26,10 +27,33 @@
 from nncf.openvino.graph.metatypes.openvino_metatypes import OVConcatMetatype
 from nncf.openvino.graph.metatypes.openvino_metatypes import OVOpMetatype
 from nncf.openvino.graph.model_utils import get_start_nodes_for_activation_path_tracing
+from nncf.openvino.graph.model_utils import model_has_state
 from nncf.openvino.graph.node_utils import get_bias_value
 from nncf.openvino.graph.node_utils import get_weight_value
 from nncf.openvino.graph.node_utils import is_node_with_bias
 from nncf.quantization.algorithms.accuracy_control.backend import AccuracyControlAlgoBackend
+from nncf.quantization.algorithms.accuracy_control.backend import PreparedModel
+
+
+class OVPreparedModel(PreparedModel):
+    """
+    Implementation of the `PreparedModel` for OpenVINO backend.
+    """
+
+    def __init__(self, model: ov.Model):
+        self._stateful = model_has_state(model)
+        self._compiled_model = ov.compile_model(model)
+        self._engine = None
+
+    @property
+    def model_for_inference(self) -> ov.CompiledModel:
+        return self._compiled_model
+
+    @property
+    def engine(self) -> OVCompiledModelEngine:
+        if self._engine is None:
+            self._engine = OVCompiledModelEngine(self._compiled_model, self._stateful)
+        return self._engine
 
 
 class OVAccuracyControlAlgoBackend(AccuracyControlAlgoBackend):
@@ -97,9 +121,3 @@ def get_model_size(model: ov.Model) -> int:
                 model_size += op.data.nbytes
 
         return model_size
-
-    # Preparation of model
-
-    @staticmethod
-    def prepare_for_inference(model: ov.Model) -> ov.CompiledModel:
-        return ov.compile_model(model)
diff --git a/nncf/quantization/algorithms/accuracy_control/ranker.py b/nncf/quantization/algorithms/accuracy_control/ranker.py
@@ -200,7 +200,7 @@ def _sequential_calculation_ranking_score(
                 self._algo_backend.get_op_with_weights_metatypes(),
             )
 
-            prepared_model = self._algo_backend.prepare_for_inference(modified_model)
+            prepared_model = self._evaluator.prepare_model(modified_model)
             ranking_score = self._calculate_ranking_score(
                 prepared_model, ranking_subset_indices, reference_values_for_each_item
             )
@@ -229,7 +229,7 @@ def _multithreading_calculation_ranking_score(
                 self._algo_backend.get_op_with_weights_metatypes(),
             )
 
-            prepared_model_queue.append(executor.submit(self._algo_backend.prepare_for_inference, modified_model))
+            prepared_model_queue.append(executor.submit(self._evaluator.prepare_model, modified_model))
 
             if idx >= (self._num_workers - 1):
                 prepared_model = prepared_model_queue.pop(0).result()
@@ -263,12 +263,12 @@ def _calculate_ranking_score(
         """
         if self._evaluator.is_metric_mode():
             # Calculate ranking score based on metric
-            ranking_score, _ = self._evaluator.validate_model_for_inference(
+            ranking_score, _ = self._evaluator.validate_prepared_model(
                 prepared_model, self._dataset, ranking_subset_indices
             )
         else:
             # Calculate ranking score based on differences in logits
-            approximate_outputs = self._evaluator.collect_values_for_each_item_using_model_for_inference(
+            approximate_outputs = self._evaluator.collect_values_for_each_item_using_prepared_model(
                 prepared_model, self._dataset, ranking_subset_indices
             )
             reference_outputs = [reference_values_for_each_item[i] for i in ranking_subset_indices]
diff --git a/tests/common/accuracy_control/test_evaluator.py b/tests/common/accuracy_control/test_evaluator.py

Original file line number	Diff line number	Diff line change
`@@ -200,7 +200,7 @@ def _sequential_calculation_ranking_score(`
`200`	`200`	`self._algo_backend.get_op_with_weights_metatypes(),`
`201`	`201`	`)`
`202`	`202`
`203`		`- prepared_model = self._algo_backend.prepare_for_inference(modified_model)`
	`203`	`+ prepared_model = self._evaluator.prepare_model(modified_model)`
`204`	`204`	`ranking_score = self._calculate_ranking_score(`
`205`	`205`	`prepared_model, ranking_subset_indices, reference_values_for_each_item`
`206`	`206`	`)`
`@@ -229,7 +229,7 @@ def _multithreading_calculation_ranking_score(`
`229`	`229`	`self._algo_backend.get_op_with_weights_metatypes(),`
`230`	`230`	`)`
`231`	`231`
`232`		`- prepared_model_queue.append(executor.submit(self._algo_backend.prepare_for_inference, modified_model))`
	`232`	`+ prepared_model_queue.append(executor.submit(self._evaluator.prepare_model, modified_model))`
`233`	`233`
`234`	`234`	`if idx >= (self._num_workers - 1):`
`235`	`235`	`prepared_model = prepared_model_queue.pop(0).result()`
`@@ -263,12 +263,12 @@ def _calculate_ranking_score(`
`263`	`263`	`"""`
`264`	`264`	`if self._evaluator.is_metric_mode():`
`265`	`265`	`# Calculate ranking score based on metric`
`266`		`- ranking_score, _ = self._evaluator.validate_model_for_inference(`
	`266`	`+ ranking_score, _ = self._evaluator.validate_prepared_model(`
`267`	`267`	`prepared_model, self._dataset, ranking_subset_indices`
`268`	`268`	`)`
`269`	`269`	`else:`
`270`	`270`	`# Calculate ranking score based on differences in logits`
`271`		`- approximate_outputs = self._evaluator.collect_values_for_each_item_using_model_for_inference(`
	`271`	`+ approximate_outputs = self._evaluator.collect_values_for_each_item_using_prepared_model(`
`272`	`272`	`prepared_model, self._dataset, ranking_subset_indices`
`273`	`273`	`)`
`274`	`274`	`reference_outputs = [reference_values_for_each_item[i] for i in ranking_subset_indices]`