dispatcher module for multiple graphs (pytorch#139439)

avikchaudhuri · pytorchmergebot · commit 76910647686f · 2024-11-12T09:53:40.000Z
Differential Revision: [D65307961](https://our.internmc.facebook.com/intern/diff/D65307961/) This PR introduces the concept of a "dispatcher" module `n` that carries multiple interpreter modules `n`, `n@1`, `n@2`, etc., each corresponding to a particular call of `n` and thus might carry a different specialized graph. We only do this when we're preserving module call signatures for `n`. The carried modules have the same number and order of calls to `n` appearing in the original module / exported program. In the unflattened module, all those calls go to the "dispatcher" module which internally tracks how many calls have been made so far and invokes the corresponding interpreter module. We reset this tracking after a successful or unsuccessful run of the unflattened module. Overall this makes swapping easier when module call signatures are preserved. Pull Request resolved: pytorch#139439 Approved by: https://github.com/tugsbayasgalan ghstack dependencies: pytorch#139438
diff --git a/test/export/test_export.py b/test/export/test_export.py
@@ -6767,7 +6767,7 @@ def test(ep, swap):
         if not is_retracebility_test(self._testMethodName):
             test(
                 export(M(), inp, preserve_module_call_signature=("n",)),
-                swap={"n": N(), "n@1": N()},
+                swap={"n": N()},
             )
 
         class _N(torch.nn.Module):
@@ -6820,7 +6820,7 @@ def forward(self, x):
             unflattened_result = ufm(*inp)
             self.assertTrue(torch.allclose(unflattened_result, eager_result))
 
-    def test_unflatten_multiple_graphs_preserve_signature_no_error(self):
+    def test_unflatten_multiple_graphs_dispatch(self):
         class N(torch.nn.Module):
             def forward(self, x, b):
                 if b:
@@ -6837,8 +6837,10 @@ def forward(self, x):
                 x = x + 3
                 x = self.n(x, True)
                 x = x + 4
-                x = self.n(x, False)
+                x = self.n(x, True)
                 x = x + 5
+                x = self.n(x, False)
+                x = x + 6
                 return x
 
         inp = (torch.ones(1),)
@@ -6856,8 +6858,65 @@ def test(ep):
             self.assertTrue(torch.allclose(unflattened_result, eager_result))
 
         if not is_retracebility_test(self._testMethodName):
+            if is_training_ir_test(self._testMethodName):
+                test(
+                    torch.export.export_for_training(
+                        M(),
+                        inp,
+                        strict=not is_non_strict_test(self._testMethodName),
+                        preserve_module_call_signature=("n",),
+                    )
+                )
+
             test(export(M(), inp, preserve_module_call_signature=("n",)))
 
+    def test_unflatten_multiple_graphs_preserve_signature_no_error(self):
+        class N(torch.nn.Module):
+            def forward(self, x, b):
+                if b:
+                    return x + 1
+                else:
+                    return x + 2
+
+        class M(torch.nn.Module):
+            def __init__(self):
+                super().__init__()
+                self.n = N()
+
+            def forward(self, x):
+                x = x + 3
+                x = self.n(x, True)
+                x = x + 4
+                x = self.n(x, False)
+                x = x + 5
+                return x
+
+        inp = (torch.ones(1),)
+        m = M()
+        eager_result = m(*inp)
+
+        def test(ep, swap=None):
+            epm = ep.module()
+            ufm = torch.export.unflatten(ep)
+
+            exported_result = epm(*inp)
+            self.assertTrue(torch.allclose(exported_result, eager_result))
+
+            unflattened_result = ufm(*inp)
+            self.assertTrue(torch.allclose(unflattened_result, eager_result))
+
+            if swap:
+                for fqn, mod in swap.items():
+                    ufm.set_submodule(fqn, mod)
+                unflattened_result = ufm(*inp)
+                self.assertTrue(torch.allclose(unflattened_result, eager_result))
+
+        if not is_retracebility_test(self._testMethodName):
+            test(
+                export(M(), inp, preserve_module_call_signature=("n",)),
+                swap={"n": N()},
+            )
+
         test(export(M(), inp))
 
     @testing.expectedFailureRetraceabilityNonStrict
@@ -6893,7 +6952,7 @@ def forward(self, x):
         m = M()
         eager_result = m(*inp)
 
-        def test(ep):
+        def test(ep, swap=None):
             epm = ep.module()
             ufm = torch.export.unflatten(ep)
 
@@ -6903,11 +6962,20 @@ def test(ep):
             unflattened_result = ufm(*inp)
             self.assertTrue(torch.allclose(unflattened_result, eager_result))
 
+            if swap:
+                for fqn, mod in swap.items():
+                    ufm.set_submodule(fqn, mod)
+                unflattened_result = ufm(*inp)
+                self.assertTrue(torch.allclose(unflattened_result, eager_result))
+
         if not is_retracebility_test(self._testMethodName):
-            test(export(M(), inp, preserve_module_call_signature=("n",)))
+            test(
+                export(M(), inp, preserve_module_call_signature=("n",)),
+                swap={"n": N()},
+            )
             # running decompositions again should work for all IRs
             ep = export(M(), inp, preserve_module_call_signature=("n",))
-            test(ep.run_decompositions({}))
+            test(ep.run_decompositions({}), swap={"n": N()})
             if is_training_ir_test(self._testMethodName):
                 # since we run decompositions by default when testing training IR,
                 # also test training IR without running decompositions
@@ -6918,7 +6986,7 @@ def test(ep):
                     strict=strict,
                     preserve_module_call_signature=("n",),
                 )
-                test(ept)
+                test(ept, swap={"n": N()})
 
         test(export(M(), inp))
 
diff --git a/torch/export/unflatten.py b/torch/export/unflatten.py
@@ -36,7 +36,13 @@
 log = logging.getLogger(__name__)
 
 
-__all__ = ["InterpreterModule", "UnflattenedModule", "unflatten", "FlatArgsAdapter"]
+__all__ = [
+    "FlatArgsAdapter",
+    "InterpreterModule",
+    "InterpreterModuleDispatcher",
+    "UnflattenedModule",
+    "unflatten",
+]
 
 
 class _AttrKind(Enum):
@@ -195,6 +201,50 @@ def print_readable(
         )
 
 
+class InterpreterModuleDispatcher(torch.nn.Module):
+    """
+    A module that carries a sequence of InterpreterModules corresponding to
+    a sequence of calls of that module. Each call to the module dispatches
+    to the next InterpreterModule, and wraps back around after the last.
+    """
+
+    def __init__(self, call_modules: List[InterpreterModule]):
+        super().__init__()
+        assert call_modules
+        self._call_modules = call_modules
+        self._num_calls = 0
+
+    def forward(self, *args, **kwargs):
+        call_module = self._call_modules[self._num_calls]
+        self._num_calls = (self._num_calls + 1) % len(self._call_modules)
+        try:
+            return call_module(*args, **kwargs)
+        except Exception:
+            self._num_calls = 0
+            raise
+
+    def call_modules(self):
+        return self._call_modules
+
+    def print_readable(
+        self,
+        print_output=True,
+        include_stride=False,
+        include_device=False,
+        colored=False,
+    ):
+        outputs = [
+            mod.print_readable(
+                print_output,
+                include_stride,
+                include_device,
+                colored,
+            )
+            for mod in self._call_modules
+        ]
+        return "\n".join(outputs)
+
+
 class FlatArgsAdapter(abc.ABC):
     """
     Adapts input arguments with ``input_spec`` to align ``target_spec``.
@@ -415,7 +465,7 @@ def add_to_consts_map(obj_id, node_name, target_name):
                 inputs_to_state[n] = targets
 
         _sink_params(self, inputs_to_state, [])
-        _deduplicate_modules(seen_modules.values())
+        redirected_call_indices = _deduplicate_modules(seen_modules.values())
 
         # Helper function to check input nodes of `module` has been processed.
         def check_module_inputs(module, scope):
@@ -445,6 +495,7 @@ def check_module_inputs(module, scope):
 
         # Recurively check all input nodes have been processed.
         check_module_inputs(self, [])
+        self._dispatch_modules(redirected_call_indices)
 
         # Cache so we don't have to compute this every time.
         # NOTE: this needs to be kept in sync with the placeholders in
@@ -541,6 +592,49 @@ def forward(self, *args, **kwargs):
             )
         return pytree.tree_unflatten(tree_out, signature.out_spec)
 
+    def _dispatch_modules(self, redirected_call_indices):
+        """For a module whose call signatures are preserved, replace
+        multiple modules corresponding to multiple calls to that module
+        with a single dispatcher module that tracks which module to call.
+        """
+
+        # some modules were removed and their fqns redirected to other
+        # fqns during deduplication; make a consolidated fqn -> module map
+        all_modules = {}
+        for fqn, mod in self.named_modules(remove_duplicate=False):
+            all_modules[fqn] = mod
+        for fqn, fqn_ in redirected_call_indices.items():
+            all_modules[fqn] = all_modules[fqn_]
+
+        # for each fqn whose module call signature is preserved,
+        # map that fqn to a list of called modules
+        module_call_graph = {
+            entry.fqn
+            for entry in self.module_call_graph
+            if entry.fqn and entry.signature
+        }
+        called_modules = defaultdict(list)
+        for fqn, mod in sorted(all_modules.items()):
+            if fqn in module_call_graph:
+                called_modules[fqn.split("@")[0]].append(mod)
+
+        # replace multiple call modules with a single dispatcher module
+        for orig_fqn, call_modules in called_modules.items():
+            if len(call_modules) > 1:
+                for i, call_module in enumerate(call_modules):
+                    fqn = _call_name(orig_fqn, i + 1)
+                    if fqn not in redirected_call_indices:
+                        self._modules.pop(fqn)
+                self.set_submodule(orig_fqn, InterpreterModuleDispatcher(call_modules))
+
+        # elide call indices in call modules because they are
+        # tracked automatically inside the dispatcher module
+        for node in self.graph.nodes:
+            if node.op == "call_module":
+                fqn = node.target.split("@")[0]
+                if fqn in called_modules:
+                    node.target = fqn
+
     def print_readable(
         self,
         print_output=True,
@@ -1340,6 +1434,7 @@ def _copy_graph_attrs(
 
 
 def _deduplicate_modules(partitions):
+    redirected_call_indices = {}
     for shared_submodules in partitions:
         for i, entry in enumerate(shared_submodules):
             child_fqn = _call_name(entry.fqn, entry.call_idx)
@@ -1364,6 +1459,7 @@ def _deduplicate_modules(partitions):
                             entry.parent_fqn, seen_child_fqn
                         )
                         entry.parent_call_module.target = seen_target  # type: ignore[union-attr]
+                        redirected_call_indices[child_fqn] = seen_child_fqn
                         break
                     elif not deduplicated:
                         # Case 2: The current module has a different fqn than the seen module.
@@ -1378,6 +1474,8 @@ def _deduplicate_modules(partitions):
                         entry.parent_module.set_submodule(target, seen.module)
                         deduplicated = True
 
+    return redirected_call_indices
+
 
 def _sink_params(
     module: torch.nn.Module,