[PT2] cache for shared parameters (openvinotoolkit#3297)

AlexanderDokuchaev · shumaari · commit 64b01eaa6109 · 2025-03-08T16:58:47.000+05:30
### Changes

Add cache for result of post hooks for shared parameters
Use TypeVar for wrap_model
Modified to_comparable_nx_graph to dump node with name that contain `:`
diff --git a/nncf/experimental/torch2/function_hook/hook_executor_mode.py b/nncf/experimental/torch2/function_hook/hook_executor_mode.py
@@ -93,12 +93,13 @@ class FunctionHookMode(TorchFunctionMode):
     This mode wraps the function calls in the model to allow custom hooks to be executed before
     and after the actual function calls.
 
-
     :param model: The PyTorch model to which the hooks will be applied.
     :param hook_storage: Storage for hooks to be executed.
     :param module_call_stack: A stack tracking the modules being called.
     :param nested_enter_count: A counter to track nested context manager entries.
     :param op_calls: A dictionary to track operation calls.
+    :param counter_reusing_shared_weights: A dictionary to track shared weights.
+    :param cache_parameters: A dictionary to cache modified parameters.
     """
 
     def __init__(self, model: nn.Module, hook_storage: HookStorage) -> None:
@@ -127,6 +128,14 @@ def __init__(self, model: nn.Module, hook_storage: HookStorage) -> None:
         self._get_named_hooks(self.hook_storage.pre_hooks, "pre_hook")
         self._get_named_hooks(self.hook_storage.post_hooks, "post_hook")
 
+        # Collect how many times shared parameter used
+        counter_shared_weights: Dict[int, int] = defaultdict(int)
+        for name, parameter in chain(self.model.named_parameters(remove_duplicate=False)):
+            counter_shared_weights[id(parameter)] += 1
+
+        self.counter_reusing_shared_weights = {k: v - 1 for k, v in counter_shared_weights.items() if v > 1}
+        self.cache_parameters: Dict[int, Tensor] = {}
+
     def _get_named_hooks(self, storage: nn.ModuleDict, prefix: str) -> None:
         """
         Associates named hooks from the given module storage with a group name, updating
@@ -306,18 +315,41 @@ def execute_hooks_for_parameter(self, value: torch.Tensor) -> torch.Tensor:
         Executes post-hooks for a model parameter if a hook is defined for it.
         If the input is not a `torch.nn.Parameter`, or if no hook is defined, the original tensor is returned unchanged.
 
+        For shared parameters that are used more than once, the function caches the modified parameters.
+        Caching mechanism allows the function to avoid redundant computations for shared parameters.
+
         :param value: The tensor to which the post-hook will be applied..
         :return: The processed tensor with the applied post-hook, if applicable.
         """
         if not isinstance(value, torch.nn.Parameter):
             return value
 
+        id_param = id(value)
+        if id_param in self.cache_parameters:
+            ret = self.cache_parameters[id_param]
+            self.counter_reusing_shared_weights[id_param] -= 1
+            if self.counter_reusing_shared_weights[id_param] == 0:
+                # Clean cache for parameters for last used
+                del self.cache_parameters[id_param]
+                del self.counter_reusing_shared_weights[id_param]
+            return ret
+
+        ret_value = value
         name_in_model = self.const_name_map.get(value, None)
         if name_in_model is not None and not self.in_process_const:
             self.in_process_const = True
-            value = self.hook_storage.execute_post_function_hooks(name_in_model.replace(".", ":"), 0, value)
+            ret_value = self.hook_storage.execute_post_function_hooks(name_in_model.replace(".", ":"), 0, value)
             self.in_process_const = False
-        return value
+
+            if self.counter_reusing_shared_weights.get(id_param):
+                if ret_value is value:
+                    # Remove counter for parameters that does not change parameter
+                    del self.counter_reusing_shared_weights[id_param]
+                else:
+                    # Save modified parameters
+                    self.cache_parameters[id_param] = ret_value
+
+        return ret_value
 
     def process_parameters(self, args: List[Any], kwargs: Dict[str, Any]) -> Tuple[List[Any], Dict[str, Any]]:
         """
diff --git a/nncf/experimental/torch2/function_hook/wrapper.py b/nncf/experimental/torch2/function_hook/wrapper.py
@@ -14,7 +14,7 @@
 import inspect
 import types
 from types import MethodType
-from typing import Any, Callable, Dict, Tuple, cast
+from typing import Any, Callable, Dict, Tuple, TypeVar, cast
 
 from torch import nn
 
@@ -25,6 +25,8 @@
 
 ATR_HOOK_STORAGE = "__nncf_hooks"
 
+TModel = TypeVar("TModel", bound=nn.Module)
+
 
 class ForwardWithHooks:
     """Class to wrap forward function of nn.Module, to forward function of the model with enabled FunctionHookMode"""
@@ -149,7 +151,7 @@ def func(self) -> MethodType:
         return cast(MethodType, self._func)
 
 
-def wrap_model(model: nn.Module) -> nn.Module:
+def wrap_model(model: TModel) -> TModel:
     """
     Wraps a nn.Module to inject custom behavior into the forward pass and replication process.
 
diff --git a/tests/torch2/data/function_hook/nncf_graph/model_graph_with_shared_parameters.dot b/tests/torch2/data/function_hook/nncf_graph/model_graph_with_shared_parameters.dot
@@ -0,0 +1,17 @@
+strict digraph {
+x [id=0, type="nncf_model_input", metatype=PTInputNoopMetatype];
+"module1.0.weight" [id=1, type="nncf_model_const", metatype=PTConstNoopMetatype];
+"module1/0/post_hook__module1:0:weight__0[0]/add/0" [id=2, type=add, metatype=PTAddMetatype];
+"module1/0/linear/0" [id=3, type=linear, metatype=PTLinearMetatype];
+"module2/0/linear/0" [id=4, type=linear, metatype=PTLinearMetatype];
+"/add/0" [id=5, type=add, metatype=PTAddMetatype];
+output [id=6, type="nncf_model_output", metatype=PTOutputNoopMetatype];
+x -> "module1/0/linear/0" [dtype=float, shape="(1, 3)", out_port_id=0, in_port_id=0];
+x -> "module2/0/linear/0" [dtype=float, shape="(1, 3)", out_port_id=0, in_port_id=0];
+"module1.0.weight" -> "module1/0/post_hook__module1:0:weight__0[0]/add/0" [dtype=float, shape="(1, 3)", out_port_id=0, in_port_id=0];
+"module1/0/post_hook__module1:0:weight__0[0]/add/0" -> "module1/0/linear/0" [dtype=float, shape="(1, 3)", out_port_id=0, in_port_id=1];
+"module1/0/post_hook__module1:0:weight__0[0]/add/0" -> "module2/0/linear/0" [dtype=float, shape="(1, 3)", out_port_id=0, in_port_id=1];
+"module1/0/linear/0" -> "/add/0" [dtype=float, shape="(1, 1)", out_port_id=0, in_port_id=0];
+"module2/0/linear/0" -> "/add/0" [dtype=float, shape="(1, 1)", out_port_id=0, in_port_id=1];
+"/add/0" -> output [dtype=float, shape="(1, 1)", out_port_id=0, in_port_id=0];
+}
diff --git a/tests/torch2/function_hook/helpers.py b/tests/torch2/function_hook/helpers.py
@@ -136,3 +136,29 @@ def forward(self, x: torch.Tensor):
         x = self.conv(x)
         x = torch.relu(x)
         return x
+
+
+class SharedParamModel(nn.Module):
+
+    @staticmethod
+    def get_example_inputs():
+        return torch.ones([1, 3])
+
+    def __init__(self):
+        super().__init__()
+        shared_linear = nn.Linear(3, 1, bias=False)
+        self.module1 = nn.Sequential(shared_linear)
+        self.module2 = nn.Sequential(shared_linear)
+
+    def forward(self, x):
+        return self.module1(x) + self.module2(x)
+
+
+class CounterHook(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.counter = 0
+
+    def forward(self, x):
+        self.counter += 1
+        return x + 1
diff --git a/tests/torch2/function_hook/nncf_graph/test_nncf_graph.py b/tests/torch2/function_hook/nncf_graph/test_nncf_graph.py
@@ -29,6 +29,7 @@
 from nncf.experimental.torch2.function_hook.nncf_graph.nncf_graph_builder import get_dtype
 from nncf.experimental.torch2.function_hook.nncf_graph.nncf_graph_builder import get_name_of_node
 from nncf.experimental.torch2.function_hook.nncf_graph.nncf_graph_builder import get_node_type
+from nncf.experimental.torch2.function_hook.wrapper import register_post_function_hook
 from nncf.experimental.torch2.function_hook.wrapper import wrap_model
 from tests.cross_fw.shared.paths import TEST_ROOT
 from tests.torch2.function_hook import helpers
@@ -144,3 +145,13 @@ def test_model_graph(desc: ModelDesc, regen_ref_data: bool):
     nx_nncf_graph = nx.nx_pydot.to_pydot(graph)
     ref_file = REF_DIR / f"model_graph_{desc}.dot"
     compare_with_reference_file(str(nx_nncf_graph), ref_file, regen_ref_data)
+
+
+def test_model_graph_with_shared_parameters(regen_ref_data):
+    model = wrap_model(helpers.SharedParamModel())
+    register_post_function_hook(model, "module1:0:weight", 0, helpers.CounterHook())
+    nncf_graph = build_nncf_graph(model, model.get_example_inputs())
+    graph = to_comparable_nx_graph(nncf_graph)
+    nx_nncf_graph = nx.nx_pydot.to_pydot(graph)
+    ref_file = REF_DIR / "model_graph_with_shared_parameters.dot"
+    compare_with_reference_file(str(nx_nncf_graph), ref_file, regen_ref_data)
diff --git a/tests/torch2/function_hook/test_function_hook_mode.py b/tests/torch2/function_hook/test_function_hook_mode.py
@@ -27,6 +27,8 @@
 from nncf.experimental.torch2.function_hook.wrapper import wrap_model
 from tests.torch2.function_hook import helpers
 from tests.torch2.function_hook.helpers import CallCount
+from tests.torch2.function_hook.helpers import CounterHook
+from tests.torch2.function_hook.helpers import SharedParamModel
 
 
 @dataclass
@@ -139,3 +141,26 @@ def test_execute_pre_hooks_for_concat():
     register_pre_function_hook(model, op_name, 1, AddModule(2))
     ret_val = model(torch.zeros(2))
     assert torch.allclose(ret_val, torch.tensor([1.0, 1.0, 2.0, 2.0])), ret_val
+
+
+def test_shared_parameters():
+    model = SharedParamModel()
+    hook_storage = HookStorage()
+    hook = CounterHook()
+    hook_storage.register_post_function_hook("module1:0:weight", 0, hook)
+
+    args = (model.get_example_inputs(),)
+    kwargs = {}
+    with FunctionHookMode(model, hook_storage) as ctx:
+        assert hook.counter == 0
+        assert ctx.cache_parameters == {}
+        assert ctx.counter_reusing_shared_weights == {id(model.module1[0].weight): 1}
+
+        args, kwargs = ctx.process_model_inputs(args, kwargs)
+        outputs = model.forward(*args, **kwargs)
+        outputs = ctx.process_model_outputs(outputs)
+
+    assert hook.counter == 1
+    # Check that the cache cleared in the end
+    assert ctx.cache_parameters == {}
+    assert ctx.counter_reusing_shared_weights == {}
diff --git a/tests/torch2/utils.py b/tests/torch2/utils.py
@@ -43,6 +43,15 @@ def compare_with_reference_file(text_data: str, ref_path: Path, regen_ref_data:
     )
 
 
+def _quote_str(s: str) -> str:
+    """
+    Add quotes to a string if it contains a colon.
+    """
+    if ":" in s:
+        return f'"{s}"'
+    return s
+
+
 def to_comparable_nx_graph(graph: NNCFGraph) -> nx.DiGraph:
     """
     Convert NNCFGraph to nx.DiGraph for comparison with references.
@@ -70,7 +79,7 @@ def to_comparable_nx_graph(graph: NNCFGraph) -> nx.DiGraph:
             "type": node.node_type,
             "metatype": node.metatype.__name__,
         }
-        out_graph.add_node(node.node_name, **attrs_node)
+        out_graph.add_node(_quote_str(node.node_name), **attrs_node)
 
     for edge in graph.get_all_edges():
         attrs_edge = {
@@ -82,5 +91,5 @@ def to_comparable_nx_graph(graph: NNCFGraph) -> nx.DiGraph:
         if edge.parallel_input_port_ids:
             attrs_edge["parallel_input_port_ids"] = edge.parallel_input_port_ids
 
-        out_graph.add_edge(edge.from_node.node_name, edge.to_node.node_name, **attrs_edge)
+        out_graph.add_edge(_quote_str(edge.from_node.node_name), _quote_str(edge.to_node.node_name), **attrs_edge)
     return out_graph