FlopCounterMode: Decompose ops for inference mode (pytorch#138508)

Feuermagier · ezyang · pytorchmergebot · commit 4fa72168ea7b · 2024-11-25T16:53:10.000Z
Fixes pytorch#126268 I've basically followed @ezyang suggestion (I think) to use `func.decompose(...)`. Since `__torch_dispatch__` won't be called a second time for the same op, I've added a second `TorchDispatchMode` (`_DecomposedCounterMode`) that simpy dispatches to the parent flop counter. Using `self` as the inner context manager is not possible, since the second call to `__enter__` would re-initialize the counter's tracking state. Let me know if there's something wrong with this implementation, since I'm quite unsure how the decomposition thing actually works :D Pull Request resolved: pytorch#138508 Approved by: https://github.com/ezyang Co-authored-by: Edward Z. Yang <ezyang@meta.com>
diff --git a/test/test_flop_counter.py b/test/test_flop_counter.py
@@ -810,6 +810,30 @@ def formula(*args, **kwargs):
         self.assertEqual(called, 1)
         self.assertExpectedInline(get_total_flops(mode), """9001""")
 
+    @skipIfNoTorchVision
+    def test_inference_mode(self):
+        def get_flops(model):
+            with FlopCounterMode(model) as mode:
+                a = T(1, 3, 224, 224)
+                model(a).sum()
+            return mode
+
+        resnet18 = torchvision_models.resnet18()
+
+        mode_standard = get_flops(resnet18)
+
+        with torch.inference_mode():
+            mode_inference = get_flops(resnet18)
+
+        self.assertEqual(get_total_flops(mode_standard), get_total_flops(mode_inference))
+
+        layer1_conv_flops_standard = mode_standard.flop_counts["ResNet.layer1"][
+            torch.ops.aten.convolution
+        ]
+        layer1_conv_flops_inference = mode_inference.flop_counts["ResNet.layer1"][
+            torch.ops.aten.convolution
+        ]
+        self.assertEqual(layer1_conv_flops_standard, layer1_conv_flops_inference)
 
 if __name__ == "__main__":
     run_tests()
diff --git a/torch/utils/flop_counter.py b/torch/utils/flop_counter.py
@@ -593,7 +593,7 @@ def nf(args):
     return nf
 
 
-class FlopCounterMode(TorchDispatchMode):
+class FlopCounterMode:
     """
     ``FlopCounterMode`` is a context manager that counts the number of flops within its context.
 
@@ -623,6 +623,7 @@ def __init__(
         self.flop_counts: Dict[str, Dict[Any, int]] = defaultdict(lambda: defaultdict(int))
         self.depth = depth
         self.display = display
+        self.mode: Optional[_FlopCounterMode] = None
         if custom_mapping is None:
             custom_mapping = {}
         if mods is not None:
@@ -708,22 +709,22 @@ def process_mod(mod_name, depth):
 
         return tabulate.tabulate(values, headers=header, colalign=("left", "right", "right"))
 
+    # NB: This context manager is NOT reentrant
     def __enter__(self):
         self.flop_counts.clear()
         self.mod_tracker.__enter__()
-        super().__enter__()
+        self.mode = _FlopCounterMode(self)
+        self.mode.__enter__()
         return self
 
     def __exit__(self, *args):
-        super().__exit__(*args)
+        assert self.mode is not None
+        b = self.mode.__exit__(*args)
+        self.mode = None  # break cycles
         self.mod_tracker.__exit__()
         if self.display:
             print(self.get_table(self.depth))
-
-    def __torch_dispatch__(self, func, types, args=(), kwargs=None):
-        kwargs = kwargs if kwargs else {}
-        out = func(*args, **kwargs)
-        return self._count_flops(func._overloadpacket, out, args, kwargs)
+        return b
 
     def _count_flops(self, func_packet, out, args, kwargs):
         if func_packet in self.flop_registry:
@@ -733,3 +734,40 @@ def _count_flops(self, func_packet, out, args, kwargs):
                 self.flop_counts[par][func_packet] += flop_count
 
         return out
+
+
+class _FlopCounterMode(TorchDispatchMode):
+    def __init__(self, counter: FlopCounterMode):
+        self.counter = counter
+
+    def __torch_dispatch__(self, func, types, args=(), kwargs=None):
+        kwargs = kwargs if kwargs else {}
+
+        # Skip ops from non-standard dispatch_sizes_strides_policy such as NJT
+        if func in {torch.ops.aten.is_contiguous.default,
+                    torch.ops.aten.is_contiguous.memory_format,
+                    torch.ops.aten.is_strides_like_format.default,
+                    torch.ops.aten.is_non_overlapping_and_dense.default,
+                    torch.ops.aten.size.default,
+                    torch.ops.aten.sym_size.default,
+                    torch.ops.aten.stride.default,
+                    torch.ops.aten.sym_stride.default,
+                    torch.ops.aten.storage_offset.default,
+                    torch.ops.aten.sym_storage_offset.default,
+                    torch.ops.aten.numel.default,
+                    torch.ops.aten.sym_numel.default,
+                    torch.ops.aten.dim.default,
+                    torch.ops.prim.layout.default}:
+
+            return NotImplemented
+
+        # If we don't have func in flop_registry, see if it can decompose
+        if func not in self.counter.flop_registry and func is not torch.ops.prim.device.default:
+            with self:
+                r = func.decompose(*args, **kwargs)
+                if r is not NotImplemented:
+                    return r
+
+        # no further decomposition; execute & count flops
+        out = func(*args, **kwargs)
+        return self.counter._count_flops(func._overloadpacket, out, args, kwargs)