From c4aebaa825a0705c7773aedc4205f89083c2ebe7 Mon Sep 17 00:00:00 2001
From: maxren <maxren@fb.com>
Date: Wed, 27 Sep 2023 22:41:37 -0700
Subject: [PATCH 1/6] add dump_artifact for debugging

Differential Revision: D49249578

fbshipit-source-id: b691d5584acbb350737663cc458a48d68050da4c
---
 backends/xnnpack/test/tester/tester.py | 74 ++++++++++++++++++++++----
 1 file changed, 65 insertions(+), 9 deletions(-)

diff --git a/backends/xnnpack/test/tester/tester.py b/backends/xnnpack/test/tester/tester.py
index 680c508ae9a..a49bcf49025 100644
--- a/backends/xnnpack/test/tester/tester.py
+++ b/backends/xnnpack/test/tester/tester.py
@@ -5,6 +5,7 @@
 # LICENSE file in the root directory of this source tree.
 
 import copy
+import sys
 from abc import ABC, abstractmethod
 from collections import OrderedDict
 from typing import Any, Dict, List, Optional, Tuple
@@ -25,6 +26,7 @@
 from executorch.exir.backend.backend_api import to_backend, validation_disabled
 from executorch.exir.backend.partitioner import Partitioner
 from executorch.exir.passes.spec_prop_pass import SpecPropPass
+from executorch.exir.print_program import pretty_print, print_program
 
 from executorch.extension.pybindings.portable_lib import (  # @manual
     _load_for_executorch_from_buffer,
@@ -69,6 +71,33 @@ def graph_module(self):
         """
         pass
 
+    # Debug Tools for stages
+    def artifact_str(self):
+        """
+        Return string printable artifact for this stage
+        """
+        if isinstance(self.artifact, ExirExportedProgram):
+            return self.artifact.exported_program
+        return self.artifact
+
+    def stage_banner(self):
+        """
+        Returns banner string for this stage
+        """
+        return "#" * 36 + " " + str(self.__class__.__name__) + " " + "#" * 36 + "\n"
+
+    def dump_artifact(self, path_to_dump: Optional[str]):
+        """
+        Dumps string printable artifact to path. If path_to_dump, then it is printed to terminal
+        """
+        if path_to_dump:
+            with open(path_to_dump, "a") as fp:
+                fp.write(str(self.stage_banner() + "\n"))
+                fp.write(str(self.artifact_str()))
+        else:
+            print(self.stage_banner() + "\n")
+            print(self.artifact_str())
+
 
 _stages_: Dict[str, Stage] = {}
 
@@ -207,31 +236,43 @@ def __init__(
         self.config = config or ExecutorchBackendConfig(
             passes=[SpecPropPass()],
         )
-        self.exported_program = None
+        self.executorch_program = None
 
     def run(self, artifact: ExirExportedProgram, inputs=None):
-        self.exported_program = artifact.to_executorch(self.config)
+        self.executorch_program = artifact.to_executorch(self.config)
 
     @property
     def artifact(self) -> ExecutorchProgram:
-        return self.exported_program
+        return self.executorch_program
 
     @property
     def graph_module(self) -> str:
-        return self.exported_program.graph_module
+        return self.executorch_program.graph_module
+
+    def dump_artifact(self, path_to_dump: Optional[str]):
+        """
+        dump_artifact is overriden to dump the serialized program
+        """
+        original_stdout = sys.stdout
+
+        sys.stdout = open(path_to_dump, "a") if path_to_dump else sys.stdout
+        print(self.stage_banner() + "\n")
+        pretty_print(self.artifact.program)
+        print_program(
+            self.artifact.program,
+            show_meminfo=True,
+            mark_dynamic_shape_tensor=True,
+        )
+        sys.stdout = original_stdout
 
 
 @register_stage
 class Serialize(Stage):
-    def __init__(self, filename: Optional[str] = None):
+    def __init__(self):
         self.buffer = None
-        self.filename = filename
 
     def run(self, artifact: ExecutorchProgram, inputs=None) -> None:
         self.buffer = artifact.buffer
-        if self.filename is not None:
-            with open(self.filename, "wb") as f:
-                f.write(self.buffer)
 
     @property
     def artifact(self) -> bytes:
@@ -241,6 +282,16 @@ def artifact(self) -> bytes:
     def graph_module(self) -> None:
         return None
 
+    def dump_artifact(self, path_to_dump: Optional[str]):
+        """
+        dump_artifact is overridden to dump the serialized bytes into pte file
+        """
+        if not path_to_dump:
+            raise RuntimeError("path_to_dump file not provided")
+        else:
+            with open(path_to_dump, "wb") as f:
+                f.write(self.artifact)
+
 
 class Tester:
     def __init__(
@@ -332,6 +383,11 @@ def serialize(self, serialize_stage: Optional[Serialize] = None):
         return self._run_stage(serialize_stage or Serialize())
 
     # Util functions
+    def dump_artifact(self, path: Optional[str] = None, stage: Optional[str] = None):
+        stage = stage or self.cur
+        self.stages[stage].dump_artifact(path)
+        return self
+
     def get_artifact(self, stage: Optional[str] = None):
         stage = stage or self.cur
         return self.stages[stage].artifact

From 21b275ecbaececcef6a95f747c57dded385a367a Mon Sep 17 00:00:00 2001
From: maxren <maxren@fb.com>
Date: Wed, 27 Sep 2023 22:41:37 -0700
Subject: [PATCH 2/6] run method on different stages

Differential Revision: D49708597

fbshipit-source-id: 91663da2f81407c6706dc491fb0ec1b9360282ac
---
 backends/xnnpack/test/tester/tester.py | 83 ++++++++++++++++----------
 1 file changed, 52 insertions(+), 31 deletions(-)

diff --git a/backends/xnnpack/test/tester/tester.py b/backends/xnnpack/test/tester/tester.py
index a49bcf49025..c03536e9e2c 100644
--- a/backends/xnnpack/test/tester/tester.py
+++ b/backends/xnnpack/test/tester/tester.py
@@ -71,6 +71,12 @@ def graph_module(self):
         """
         pass
 
+    def run_artifact(self, inputs):
+        """
+        Returns the output of calling the artifact generated by this stage with inputs
+        """
+        return self.artifact(*inputs)
+
     # Debug Tools for stages
     def artifact_str(self):
         """
@@ -282,6 +288,14 @@ def artifact(self) -> bytes:
     def graph_module(self) -> None:
         return None
 
+    def run_artifact(self, inputs):
+        inputs_flattened, _ = tree_flatten(inputs)
+        executorch_module = _load_for_executorch_from_buffer(self.buffer)
+        executorch_output = copy.deepcopy(
+            executorch_module.run_method("forward", tuple(inputs_flattened))
+        )
+        return executorch_output
+
     def dump_artifact(self, path_to_dump: Optional[str]):
         """
         dump_artifact is overridden to dump the serialized bytes into pte file
@@ -299,7 +313,7 @@ def __init__(
         module: torch.nn.Module,
         inputs: Tuple[torch.Tensor],
     ):
-        self.module = module
+        self.original_module = module
         self.inputs = inputs
         self.stages: Dict[str, Stage] = OrderedDict.fromkeys(list(_stages_.keys()))
         self.pipeline = {
@@ -327,8 +341,8 @@ def __init__(
         # Reference output from Eager mode
         self.reference_output = None
 
-        # Output by running a serialized/lowered module on ET
-        self.executorch_output = None
+        # Artifact output from stage
+        self.stage_output = None
 
     @staticmethod
     def _stage_name(stage) -> str:
@@ -339,7 +353,7 @@ def _pre(self, stage):
         name: str = self._stage_name(stage)
         assert isinstance(name, str) and name in self.stages and not self.stages[name]
 
-        last_artifact = self.module
+        last_artifact = self.original_module
         if self.cur:
             assert self.cur in self.pipeline, f"Invalid state: {self.cur}"
             allowed_next_stages = self.pipeline[self.cur]
@@ -410,18 +424,19 @@ def check_count(self, input: Dict[Any, int]):
             )
         return self
 
-    def run_method(self, method="forward"):
-        # Reference
-        delegated_module = self.get_artifact(self._stage_name(Partition))
-        self.reference_output = delegated_module(*self.inputs)
-
-        # ExecuTorch
-        inputs_flattened, _ = tree_flatten(self.inputs)
-        serialized_buffer = self.get_artifact(self._stage_name(Serialize))
-        executorch_module = _load_for_executorch_from_buffer(serialized_buffer)
-        self.executorch_output = copy.deepcopy(
-            executorch_module.run_method(method, tuple(inputs_flattened))
+    def run_method(
+        self, stage: Optional[str] = None, inputs: Optional[Tuple[torch.Tensor]] = None
+    ):
+        inputs_to_run = inputs or self.inputs
+        # Reference Output
+        self.reference_output = self.stages[self._stage_name(Export)].run_artifact(
+            inputs_to_run
         )
+
+        # Output from running artifact at stage
+        stage = stage or self.cur
+        self.stage_output = self.stages[stage].run_artifact(inputs_to_run)
+
         return self
 
     @staticmethod
@@ -433,25 +448,31 @@ def _assert_outputs_equal(model_output, ref_output, atol=1e-03, rtol=1e-03):
         relative tolerance is 1e-3.
         """
 
-        # Compare the result from executor and eager mode direclty
-        if isinstance(ref_output, tuple) or isinstance(ref_output, list):
-            # Multiple outputs executor always returns tuple, even if there is one output
-            assert len(ref_output) == len(model_output)
-            for i in range(len(ref_output)):
-                assert torch.allclose(
-                    model_output[i],
-                    ref_output[i],
-                    atol=atol,
-                    rtol=rtol,
-                )
-        else:
-            # If one output, eager returns tensor while executor returns a tuple(tensor) of size 1
-            assert torch.allclose(model_output[0], ref_output, atol=atol, rtol=rtol)
+        # Multiple outputs executor always returns tuple, even if there is one output
+        assert len(ref_output) == len(model_output)
+        for i in range(len(ref_output)):
+            assert torch.allclose(
+                model_output[i],
+                ref_output[i],
+                atol=atol,
+                rtol=rtol,
+            )
 
     def compare_outputs(self, atol=1e-03, rtol=1e-03):
+        """
+        Compares the original of the original nn module with the output of the generated artifact.
+        This requres calling run_method before calling compare_outputs. As that runs the generated
+        artifact on the sample inputs and sets the stage output to be compared against the reference
+        """
         assert self.reference_output is not None
-        assert self.executorch_output is not None
+        assert self.stage_output is not None
+
+        # Wrap both outputs as tuple, since executor output is always a tuple even if single tensor
+        if isinstance(self.reference_output, torch.Tensor):
+            self.reference_output = (self.reference_output,)
+        if isinstance(self.stage_output, torch.Tensor):
+            self.stage_output = (self.stage_output,)
         self._assert_outputs_equal(
-            self.executorch_output, self.reference_output, atol=atol, rtol=rtol
+            self.stage_output, self.reference_output, atol=atol, rtol=rtol
         )
         return self

From 222d152d006bc53cf36bd57e0e95cbb636a5eb88 Mon Sep 17 00:00:00 2001
From: maxren <maxren@fb.com>
Date: Wed, 27 Sep 2023 22:41:37 -0700
Subject: [PATCH 3/6] Pass manager should take in type[pass] instead of pass
 instance

Differential Revision: D49710932

fbshipit-source-id: 0865c6f0f2171e49c540b0c625b6cd3a994a9380
---
 backends/xnnpack/passes/__init__.py    | 6 ++++--
 backends/xnnpack/test/tester/tester.py | 4 ++--
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/backends/xnnpack/passes/__init__.py b/backends/xnnpack/passes/__init__.py
index c4374c006a1..9cecf5ea482 100644
--- a/backends/xnnpack/passes/__init__.py
+++ b/backends/xnnpack/passes/__init__.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
-from typing import List, Optional
+from typing import List, Optional, Type
 
 from executorch.backends.xnnpack.passes.channels_last_tagged_reshape_pass import (
     ChannelsLastTaggedReshapePass,
@@ -29,7 +29,9 @@
 
 class XNNPACKPassManager:
     def __init__(
-        self, exported_program: ExportedProgram, passes: Optional[List[PassType]] = None
+        self,
+        exported_program: ExportedProgram,
+        passes: Optional[List[Type[PassType]]] = None,
     ) -> None:
         """
         A helper class to run multiple XNNPack passes on a program
diff --git a/backends/xnnpack/test/tester/tester.py b/backends/xnnpack/test/tester/tester.py
index c03536e9e2c..a2febf572ac 100644
--- a/backends/xnnpack/test/tester/tester.py
+++ b/backends/xnnpack/test/tester/tester.py
@@ -8,7 +8,7 @@
 import sys
 from abc import ABC, abstractmethod
 from collections import OrderedDict
-from typing import Any, Dict, List, Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple, Type
 
 import torch
 import torch._export as export
@@ -193,7 +193,7 @@ def graph_module(self) -> str:
 
 @register_stage
 class RunPasses(Stage):
-    def __init__(self, pass_list: Optional[List[PassType]] = None):
+    def __init__(self, pass_list: Optional[List[Type[PassType]]] = None):
         self.pass_list = pass_list
         self.edge_dialect_program = None
 

From 09b640ec356e818f0523177e01f683941a6a05be Mon Sep 17 00:00:00 2001
From: maxren <maxren@fb.com>
Date: Wed, 27 Sep 2023 22:41:37 -0700
Subject: [PATCH 4/6] Move Channels Last Reshape Pass to Tester

Differential Revision: D49715863

fbshipit-source-id: 12111db3ae3872d6d289dcd97724ce57bab516a0
---
 backends/xnnpack/test/TARGETS                 |   7 +-
 .../test_channels_last_tagged_reshape.py      | 180 +++++++++++++
 backends/xnnpack/test/test_xnnpack_passes.py  | 240 ------------------
 3 files changed, 184 insertions(+), 243 deletions(-)
 create mode 100644 backends/xnnpack/test/passes/test_channels_last_tagged_reshape.py

diff --git a/backends/xnnpack/test/TARGETS b/backends/xnnpack/test/TARGETS
index 2a1fe984cf8..111d087f15f 100644
--- a/backends/xnnpack/test/TARGETS
+++ b/backends/xnnpack/test/TARGETS
@@ -92,13 +92,16 @@ python_unittest(
 
 python_unittest(
     name = "test_xnnpack_passes",
-    srcs = [
+    srcs = glob([
+        "passes/*.py",
+    ]) + [
         "test_xnnpack_passes.py",
         "test_xnnpack_utils_classes.py",
     ],
     deps = [
         "//caffe2:torch",
         "//executorch/backends/xnnpack/passes:xnnpack_passes",
+        "//executorch/backends/xnnpack/test/tester:tester",
         "//executorch/backends/xnnpack/utils:xnnpack_utils",
         "//executorch/exir:lib",
         "//executorch/exir:pass_base",
@@ -127,9 +130,7 @@ python_unittest(
     ]),
     deps = [
         "//caffe2:torch",
-        "//executorch/backends/xnnpack/partition:xnnpack_partitioner",
         "//executorch/backends/xnnpack/test/tester:tester",
-        "//executorch/exir:lib",
         "//pytorch/vision:torchvision",
     ],
 )
diff --git a/backends/xnnpack/test/passes/test_channels_last_tagged_reshape.py b/backends/xnnpack/test/passes/test_channels_last_tagged_reshape.py
new file mode 100644
index 00000000000..abb18a8c0b2
--- /dev/null
+++ b/backends/xnnpack/test/passes/test_channels_last_tagged_reshape.py
@@ -0,0 +1,180 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import unittest
+
+import torch
+from executorch.backends.xnnpack.passes.channels_last_tagged_reshape_pass import (
+    ChannelsLastTaggedReshapePass,
+)
+from executorch.backends.xnnpack.test.test_xnnpack_utils_classes import (
+    OpSequencesAddConv2d,
+)
+from executorch.backends.xnnpack.test.tester import RunPasses, Tester
+
+
+class TestChannelsLastTaggedReshapePass(unittest.TestCase):
+    PassStage = RunPasses([ChannelsLastTaggedReshapePass])
+    # Dictionary mapping modules to expected number of reshapes
+    modules = {
+        OpSequencesAddConv2d(0, 0).eval(): 0,
+        OpSequencesAddConv2d(1, 1).eval(): 2,
+        OpSequencesAddConv2d(2, 2).eval(): 2,
+    }
+    to_copy_name = "executorch_exir_dialects_edge__ops_aten__to_copy_default"
+    quant_name = "executorch_exir_dialects_edge__ops_quantized_decomposed_quantize_per_tensor_default"
+    dequant_name = "executorch_exir_dialects_edge__ops_quantized_decomposed_dequantize_per_tensor_default"
+    conv_name = "executorch_exir_dialects_edge__ops_aten_convolution_default"
+    relu_name = "executorch_exir_dialects_edge__ops_aten_relu_default"
+
+    def test_fp32_channels_last_tagged_reshape_pass(self):
+        for module, num_reshape in self.modules.items():
+            (
+                Tester(module, (torch.randn(1, 1, 6, 6),))
+                .export()
+                .to_edge()
+                .run_passes(self.PassStage)
+                .check_count(
+                    {
+                        self.to_copy_name: num_reshape,
+                    }
+                )
+                .run_method()
+                .compare_outputs()
+            )
+
+    def test_qs8_channels_last_tagged_reshape_pass(self):
+        for module, num_reshape in self.modules.items():
+            (
+                Tester(module, (torch.randn(1, 1, 6, 6),))
+                .quantize()
+                .export()
+                .to_edge()
+                .run_passes(self.PassStage)
+                .check(
+                    [
+                        self.quant_name,
+                        self.dequant_name,
+                        self.to_copy_name,
+                        self.quant_name,
+                        self.dequant_name,
+                    ]
+                    * num_reshape
+                )
+                .run_method()
+                .compare_outputs()
+            )
+
+    class ConvRelu(torch.nn.Module):
+        def __init__(self):
+            super().__init__()
+            self.conv = torch.nn.Conv2d(1, 1, 1)
+            self.relu = torch.nn.ReLU()
+
+        def forward(self, x):
+            return self.relu(self.conv(x))
+
+    def test_fp32_channels_last_tagged_reshape_pass_conv_relu(self):
+        (
+            Tester(self.ConvRelu().eval(), (torch.randn(1, 1, 6, 6),))
+            .export()
+            .to_edge()
+            .run_passes(self.PassStage)
+            .check(
+                [self.to_copy_name, self.conv_name, self.relu_name, self.to_copy_name]
+            )
+            .run_method()
+            .compare_outputs()
+        )
+
+    def test_qs8_channels_last_tagged_reshape_pass_conv_relu(self):
+        (
+            Tester(self.ConvRelu().eval(), (torch.randn(1, 1, 6, 6),))
+            .quantize()
+            .export()
+            .to_edge()
+            .run_passes(self.PassStage)
+            .check(
+                [
+                    self.to_copy_name,
+                    self.quant_name,
+                    self.dequant_name,
+                    self.conv_name,
+                    self.relu_name,
+                    self.quant_name,
+                    self.dequant_name,
+                    self.to_copy_name,
+                ]
+            )
+            .run_method()
+            .compare_outputs()
+        )
+
+    class Conv2dBnHardtanhMeanSequenceModule(torch.nn.Module):
+        def __init__(self):
+            super().__init__()
+            self.conv = torch.nn.Conv2d(
+                in_channels=1,
+                out_channels=1,
+                kernel_size=(3, 3),
+                stride=[2, 2],
+                padding=[1, 1],
+                groups=1,
+                dilation=[1, 1],
+                bias=True,
+            )
+            self.native_batchnorm = torch.nn.BatchNorm2d(1)
+            self.hardtanh = torch.nn.Hardtanh(min_val=0, max_val=6)
+            self.eval()
+
+        def forward(self, x):
+            x = self.conv(x)
+            x = self.native_batchnorm(x)
+            x = self.hardtanh(x)
+            x = torch.mean(x, (-1, -2), keepdim=True)
+            return x
+
+    def test_fp32_channels_last_tagged_reshape_pass_conv_bn_hardtanh_mean_seq(self):
+        # Copy #1 is for input to conv, nchw -> nhwc
+        # Copy #2 is for conv to _native_batch_norm_legit_no_training, nhwc -> nchw
+        # Copy #3 is for input to mean, nchw -> nhwc
+        # Copy #4 is for output, nhwc -> nchw
+
+        # The graph looks like:
+        # graph():
+        #     %arg0_1 : [#users=1] = placeholder[target=arg0_1]
+        #     %aten__to_copy_default : [#users=1] = call_function[target=executorch.exir.dialects.edge._ops.aten._to_copy.default](args = (%arg0_1,), kwargs = {memory_format: torch.channels_last})
+        #     %_param_constant0 : [#users=1] = get_attr[target=_param_constant0]
+        #     %_param_constant1 : [#users=1] = get_attr[target=_param_constant1]
+        #     %aten_convolution_default : [#users=1] = call_function[target=executorch.exir.dialects.edge._ops.aten.convolution.default](args = (%aten__to_copy_default, %_param_constant0, %_param_constant1, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), kwargs = {})
+        #     %aten__to_copy_default_1 : [#users=1] = call_function[target=executorch.exir.dialects.edge._ops.aten._to_copy.default](args = (%aten_convolution_default,), kwargs = {memory_format: torch.contiguous_format})
+        #     %_param_constant2 : [#users=1] = get_attr[target=_param_constant2]
+        #     %_param_constant3 : [#users=1] = get_attr[target=_param_constant3]
+        #     %_tensor_constant0 : [#users=1] = get_attr[target=_tensor_constant0]
+        #     %_tensor_constant1 : [#users=1] = get_attr[target=_tensor_constant1]
+        #     %aten__native_batch_norm_legit_no_training_default : [#users=1] = call_function[target=executorch.exir.dialects.edge._ops.aten._native_batch_norm_legit_no_training.default](args = (%aten__to_copy_default_1, %_param_constant2, %_param_constant3, %_tensor_constant0, %_tensor_constant1, 0.1, 1e-05), kwargs = {})
+        #     %getitem : [#users=1] = call_function[target=operator.getitem](args = (%aten__native_batch_norm_legit_no_training_default, 0), kwargs = {})
+        #     %aten_hardtanh_default : [#users=1] = call_function[target=executorch.exir.dialects.edge._ops.aten.hardtanh.default](args = (%getitem, 0, 6), kwargs = {})
+        #     %aten__to_copy_default_2 : [#users=1] = call_function[target=executorch.exir.dialects.edge._ops.aten._to_copy.default](args = (%aten_hardtanh_default,), kwargs = {memory_format: torch.channels_last})
+        #     %aten_mean_dim : [#users=1] = call_function[target=executorch.exir.dialects.edge._ops.aten.mean.dim](args = (%aten__to_copy_default_2, [-1, -2], True), kwargs = {})
+        #     %aten__to_copy_default_3 : [#users=1] = call_function[target=executorch.exir.dialects.edge._ops.aten._to_copy.default](args = (%aten_mean_dim,), kwargs = {memory_format: torch.contiguous_format})
+        #     return [aten__to_copy_default_3]
+        (
+            Tester(
+                self.Conv2dBnHardtanhMeanSequenceModule().eval(),
+                (torch.randn(1, 1, 6, 6),),
+            )
+            .export()
+            .to_edge()
+            .run_passes(self.PassStage)
+            .check_count(
+                {
+                    self.to_copy_name: 4,
+                }
+            )
+            .run_method()
+            .compare_outputs()
+        )
diff --git a/backends/xnnpack/test/test_xnnpack_passes.py b/backends/xnnpack/test/test_xnnpack_passes.py
index a8b58fe7ef0..457c862846b 100644
--- a/backends/xnnpack/test/test_xnnpack_passes.py
+++ b/backends/xnnpack/test/test_xnnpack_passes.py
@@ -10,9 +10,6 @@
 import torch
 from executorch import exir
 from executorch.backends.xnnpack.passes import XNNPACKPassManager
-from executorch.backends.xnnpack.passes.channels_last_tagged_reshape_pass import (
-    ChannelsLastTaggedReshapePass,
-)
 from executorch.backends.xnnpack.passes.convert_to_linear import ConvertToLinearPass
 from executorch.backends.xnnpack.passes.fuse_batch_norm_with_conv import (
     FuseBatchNormWithConvPass,
@@ -20,9 +17,6 @@
 from executorch.backends.xnnpack.passes.remove_getitem_op import RemoveGetItemPass
 from executorch.backends.xnnpack.passes.tag_implicit_q_dq_pass import TagImplicitQDqPass
 
-from executorch.backends.xnnpack.test.test_xnnpack_utils_classes import (
-    OpSequencesAddConv2d,
-)
 from executorch.backends.xnnpack.utils.configs import get_xnnpack_capture_config
 from executorch.backends.xnnpack.utils.utils import capture_graph_for_xnnpack
 from executorch.exir.backend.canonical_partitioners.duplicate_dequant_node_pass import (
@@ -43,18 +37,6 @@
 
 
 class TestXNNPackPasses(unittest.TestCase):
-    class TwoOutputs(OpSequencesAddConv2d):
-        def __init__(self):
-            super().__init__(1, 2)
-            seq = self.op_sequence[0]
-            self.conv1 = seq[0]
-            self.conv2 = seq[1]
-
-        def forward(self, x):
-            y = self.conv1(x)
-            z = self.conv2(y)
-            return (y, z)
-
     class ReusedInput(torch.nn.Module):
         def __init__(self):
             super().__init__()
@@ -64,15 +46,6 @@ def __init__(self):
         def forward(self, x):
             return self.conv1(x) + self.conv2(x)
 
-    class ConvRelu(torch.nn.Module):
-        def __init__(self):
-            super().__init__()
-            self.conv = torch.nn.Conv2d(1, 1, 1)
-            self.relu = torch.nn.ReLU()
-
-        def forward(self, x):
-            return self.relu(self.conv(x))
-
     def capture_and_test_pass(
         self,
         module,
@@ -119,219 +92,6 @@ def capture_and_test_pass(
             )
         return new_exported_program
 
-    def test_channels_last_tagged_reshape_pass(self) -> None:
-        passes = [ChannelsLastTaggedReshapePass]
-
-        for enable_aot, unlift in [(False, None), (True, True), (True, False)]:
-            example_inputs = (torch.rand(1, 1, 6, 6),)
-            # No copies because no ops requiring NHWC format
-            single_add = OpSequencesAddConv2d(0, 0)
-            self.capture_and_test_pass(
-                single_add,
-                example_inputs,
-                passes,
-                0,
-                enable_aot=enable_aot,
-                unlift=unlift,
-            )
-
-            # One copy to NHWC before the conv, and one copy to NCHW at the end
-            single_conv = OpSequencesAddConv2d(1, 1).eval()
-            self.capture_and_test_pass(
-                single_conv,
-                example_inputs,
-                passes,
-                2,
-            )
-
-            # Still one copy to NHWC before the conv, and one copy to NCHW at the
-            # end
-            # Flaky - increased [ra]tol for tensor compare -TODO: look into this test
-            two_seq_two_convs = OpSequencesAddConv2d(2, 2)
-            self.capture_and_test_pass(
-                two_seq_two_convs,
-                example_inputs,
-                passes,
-                2,
-                rtol=1e-04,
-                atol=1e-04,
-            )
-
-    def test_channels_last_reshape_with_conv_relu(self) -> None:
-        passes = [ChannelsLastTaggedReshapePass]
-
-        sample_input = (torch.ones(1, 1, 6, 6),)
-        model = self.ConvRelu().eval()
-
-        for enable_aot, unlift in [(False, None), (True, True), (True, False)]:
-            new_exported_program = self.capture_and_test_pass(
-                model,
-                sample_input,
-                passes,
-                enable_aot=enable_aot,
-                unlift=unlift,
-            )
-            FileCheck().check(
-                "executorch_exir_dialects_edge__ops_aten__to_copy_default"
-            ).check(
-                "executorch_exir_dialects_edge__ops_aten_convolution_default"
-            ).check(
-                "executorch_exir_dialects_edge__ops_aten_relu_default"
-            ).check(
-                "executorch_exir_dialects_edge__ops_aten__to_copy_default"
-            ).run(
-                new_exported_program.graph_module.code
-            )
-
-            prepared = prepare_fx(
-                model,
-                _get_symmetric_qnnpack_qconfig_mapping(),
-                sample_input,
-                backend_config=get_executorch_backend_config(),
-            )
-
-            converted = _convert_to_reference_decomposed_fx(
-                prepared, backend_config=get_executorch_backend_config()
-            )
-            new_quantized_ep = self.capture_and_test_pass(
-                converted, sample_input, passes
-            )
-            FileCheck().check(
-                "executorch_exir_dialects_edge__ops_aten__to_copy_default"
-            ).check(
-                "executorch_exir_dialects_edge__ops_quantized_decomposed_quantize_per_tensor_default"
-            ).check(
-                "executorch_exir_dialects_edge__ops_quantized_decomposed_dequantize_per_tensor_default"
-            ).check(
-                "executorch_exir_dialects_edge__ops_aten_convolution_default"
-            ).check(
-                "executorch_exir_dialects_edge__ops_aten_relu_default"
-            ).check(
-                "executorch_exir_dialects_edge__ops_quantized_decomposed_quantize_per_tensor_default"
-            ).check(
-                "executorch_exir_dialects_edge__ops_quantized_decomposed_dequantize_per_tensor_default"
-            ).check(
-                "executorch_exir_dialects_edge__ops_aten__to_copy_default"
-            ).run(
-                new_quantized_ep.graph_module.code
-            )
-
-    def test_channels_last_tagged_reshape_pass_conv2d_bn_hardtanh_mean_sequence(
-        self,
-    ) -> None:
-        passes = [ChannelsLastTaggedReshapePass]
-
-        groups = 1
-        stride = [2, 2]
-        padding = [1, 1]
-        dilation = [1, 1]
-        in_channels = 1
-        out_channels = 1
-
-        class Conv2dBnHardtanhMeanSequenceModule(torch.nn.Module):
-            def __init__(self):
-                super(Conv2dBnHardtanhMeanSequenceModule, self).__init__()
-                self.conv = torch.nn.Conv2d(
-                    in_channels=in_channels,
-                    out_channels=out_channels,
-                    kernel_size=(3, 3),
-                    stride=stride,
-                    padding=padding,
-                    groups=groups,
-                    dilation=dilation,
-                    bias=True,
-                )
-                self.native_batchnorm = torch.nn.BatchNorm2d(out_channels)
-                self.hardtanh = torch.nn.Hardtanh(min_val=0, max_val=6)
-                self.eval()
-
-            def forward(self, x):
-                x = self.conv(x)
-                x = self.native_batchnorm(x)
-                x = self.hardtanh(x)
-                x = torch.mean(x, (-1, -2), keepdim=True)
-                return x
-
-        # Copy #1 is for input to conv, nchw -> nhwc
-        # Copy #2 is for conv to _native_batch_norm_legit_no_training, nhwc -> nchw
-        # Copy #3 is for input to mean, nchw -> nhwc
-        # Copy #4 is for output, nhwc -> nchw
-
-        # The graph looks like:
-        # graph():
-        #     %arg0_1 : [#users=1] = placeholder[target=arg0_1]
-        #     %aten__to_copy_default : [#users=1] = call_function[target=executorch.exir.dialects.edge._ops.aten._to_copy.default](args = (%arg0_1,), kwargs = {memory_format: torch.channels_last})
-        #     %_param_constant0 : [#users=1] = get_attr[target=_param_constant0]
-        #     %_param_constant1 : [#users=1] = get_attr[target=_param_constant1]
-        #     %aten_convolution_default : [#users=1] = call_function[target=executorch.exir.dialects.edge._ops.aten.convolution.default](args = (%aten__to_copy_default, %_param_constant0, %_param_constant1, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), kwargs = {})
-        #     %aten__to_copy_default_1 : [#users=1] = call_function[target=executorch.exir.dialects.edge._ops.aten._to_copy.default](args = (%aten_convolution_default,), kwargs = {memory_format: torch.contiguous_format})
-        #     %_param_constant2 : [#users=1] = get_attr[target=_param_constant2]
-        #     %_param_constant3 : [#users=1] = get_attr[target=_param_constant3]
-        #     %_tensor_constant0 : [#users=1] = get_attr[target=_tensor_constant0]
-        #     %_tensor_constant1 : [#users=1] = get_attr[target=_tensor_constant1]
-        #     %aten__native_batch_norm_legit_no_training_default : [#users=1] = call_function[target=executorch.exir.dialects.edge._ops.aten._native_batch_norm_legit_no_training.default](args = (%aten__to_copy_default_1, %_param_constant2, %_param_constant3, %_tensor_constant0, %_tensor_constant1, 0.1, 1e-05), kwargs = {})
-        #     %getitem : [#users=1] = call_function[target=operator.getitem](args = (%aten__native_batch_norm_legit_no_training_default, 0), kwargs = {})
-        #     %aten_hardtanh_default : [#users=1] = call_function[target=executorch.exir.dialects.edge._ops.aten.hardtanh.default](args = (%getitem, 0, 6), kwargs = {})
-        #     %aten__to_copy_default_2 : [#users=1] = call_function[target=executorch.exir.dialects.edge._ops.aten._to_copy.default](args = (%aten_hardtanh_default,), kwargs = {memory_format: torch.channels_last})
-        #     %aten_mean_dim : [#users=1] = call_function[target=executorch.exir.dialects.edge._ops.aten.mean.dim](args = (%aten__to_copy_default_2, [-1, -2], True), kwargs = {})
-        #     %aten__to_copy_default_3 : [#users=1] = call_function[target=executorch.exir.dialects.edge._ops.aten._to_copy.default](args = (%aten_mean_dim,), kwargs = {memory_format: torch.contiguous_format})
-        #     return [aten__to_copy_default_3]
-
-        sample_input = (torch.ones(1, 1, 6, 6),)
-        for enable_aot, unlift in [(False, None), (True, True), (True, False)]:
-            self.capture_and_test_pass(
-                Conv2dBnHardtanhMeanSequenceModule(),
-                sample_input,
-                passes,
-                4,
-                enable_aot=enable_aot,
-                unlift=unlift,
-            )
-
-    def test_quantized_channels_last_tagged_reshape_pass(self) -> None:
-        passes = [ChannelsLastTaggedReshapePass]
-        prepared_conv = prepare_fx(
-            torch.nn.Conv2d(
-                in_channels=1,
-                out_channels=1,
-                kernel_size=(3, 3),
-                padding=1,
-                bias=False,
-            ).eval(),
-            _get_symmetric_qnnpack_qconfig_mapping(),
-            (torch.randn(1, 1, 3, 3),),
-            backend_config=get_executorch_backend_config(),
-        )
-
-        converted = _convert_to_reference_decomposed_fx(prepared_conv)
-
-        for enable_aot, unlift in [(False, None), (True, True), (True, False)]:
-            result = self.capture_and_test_pass(
-                converted,
-                (torch.randn(1, 1, 3, 3),),
-                passes,
-                enable_aot=enable_aot,
-                unlift=unlift,
-            )
-
-            FileCheck().check_count(
-                "executorch_exir_dialects_edge__ops_aten__to_copy_default",
-                2,
-                exactly=True,
-            ).run(result.graph_module.code)
-
-            FileCheck().check_count(
-                "executorch_exir_dialects_edge__ops_quantized_decomposed_quantize_per_tensor_default",
-                5,  # 3 original q(input, weights, output) + 2 generated from to_copy
-                exactly=True,
-            ).run(result.graph_module.code)
-
-            FileCheck().check_count(
-                "executorch_exir_dialects_edge__ops_quantized_decomposed_dequantize_per_tensor_default",
-                5,  # 3 original q(input, weights, output) + 2 generated from to_copy
-                exactly=True,
-            ).run(result.graph_module.code)
-
     def test_conv_batch_norm_fusion(self) -> None:
         passes = [FuseBatchNormWithConvPass]
 

From 346d6baecd478a54825fd63eec69b4fc74d1ddc8 Mon Sep 17 00:00:00 2001
From: maxren <maxren@fb.com>
Date: Wed, 27 Sep 2023 22:41:37 -0700
Subject: [PATCH 5/6] Move BN fusion pass

Differential Revision: D49718566

fbshipit-source-id: 9420d42f81354a54b7b2b55e8a7d64ceba2349c0
---
 .../test/passes/test_batch_norm_fusion.py     | 57 +++++++++++++++++++
 backends/xnnpack/test/test_xnnpack_passes.py  | 38 -------------
 2 files changed, 57 insertions(+), 38 deletions(-)
 create mode 100644 backends/xnnpack/test/passes/test_batch_norm_fusion.py

diff --git a/backends/xnnpack/test/passes/test_batch_norm_fusion.py b/backends/xnnpack/test/passes/test_batch_norm_fusion.py
new file mode 100644
index 00000000000..4cadd9baf13
--- /dev/null
+++ b/backends/xnnpack/test/passes/test_batch_norm_fusion.py
@@ -0,0 +1,57 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import unittest
+from typing import Tuple
+
+import torch
+from executorch.backends.xnnpack.passes.fuse_batch_norm_with_conv import (
+    FuseBatchNormWithConvPass,
+)
+from executorch.backends.xnnpack.test.tester import RunPasses, Tester
+
+
+class TestBatchNormFusion(unittest.TestCase):
+    PassStage = RunPasses([FuseBatchNormWithConvPass])
+    bn_name = "executorch_exir_dialects_edge__ops_aten__native_batch_norm_legit_no_training_default"
+
+    class ModelConvBN(torch.nn.Module):
+        def __init__(
+            self, in_features: int, out_features: int, kernel_size: Tuple[int, int]
+        ):
+            super().__init__()
+            self.conv2d = torch.nn.Conv2d(in_features, out_features, kernel_size)
+            self.bn = torch.nn.BatchNorm2d(out_features)
+
+        def forward(self, x):
+            y = self.conv2d(x)
+            y = self.bn(y)
+            y = self.conv2d(y)
+            y = y + y
+            return self.bn(y)
+
+    def test_fp32_batch_norm_fusion(self):
+        (
+            Tester(self.ModelConvBN(2, 2, (2, 2)).eval(), (torch.randn(2, 2, 4, 4),))
+            .export()
+            .to_edge()
+            .run_passes(self.PassStage)
+            .check_count({self.bn_name: 1})
+            .run_method()
+            .compare_outputs()
+        )
+
+    def test_q8_batch_norm_fusion(self):
+        (
+            Tester(self.ModelConvBN(2, 2, (2, 2)).eval(), (torch.randn(2, 2, 4, 4),))
+            .quantize()
+            .export()
+            .to_edge()
+            .run_passes(self.PassStage)
+            .check_count({self.bn_name: 1})
+            .run_method()
+            .compare_outputs()
+        )
diff --git a/backends/xnnpack/test/test_xnnpack_passes.py b/backends/xnnpack/test/test_xnnpack_passes.py
index 457c862846b..ff44b773cd4 100644
--- a/backends/xnnpack/test/test_xnnpack_passes.py
+++ b/backends/xnnpack/test/test_xnnpack_passes.py
@@ -11,9 +11,6 @@
 from executorch import exir
 from executorch.backends.xnnpack.passes import XNNPACKPassManager
 from executorch.backends.xnnpack.passes.convert_to_linear import ConvertToLinearPass
-from executorch.backends.xnnpack.passes.fuse_batch_norm_with_conv import (
-    FuseBatchNormWithConvPass,
-)
 from executorch.backends.xnnpack.passes.remove_getitem_op import RemoveGetItemPass
 from executorch.backends.xnnpack.passes.tag_implicit_q_dq_pass import TagImplicitQDqPass
 
@@ -92,41 +89,6 @@ def capture_and_test_pass(
             )
         return new_exported_program
 
-    def test_conv_batch_norm_fusion(self) -> None:
-        passes = [FuseBatchNormWithConvPass]
-
-        class ModelConvBN(torch.nn.Module):
-            def __init__(
-                self, in_features: int, out_features: int, kernel_size: Tuple[int, int]
-            ):
-                super().__init__()
-                self.conv2d = torch.nn.Conv2d(in_features, out_features, kernel_size)
-                self.bn = torch.nn.BatchNorm2d(out_features)
-
-            def forward(self, x):
-                y = self.conv2d(x)
-                y = self.bn(y)
-                y = self.conv2d(y)
-                y = y + y
-                return self.bn(y)
-
-        model = ModelConvBN(2, 2, (2, 2))
-        sample_input = (torch.randn(2, 2, 4, 4),)
-
-        for enable_aot, unlift in [(False, None), (True, True), (True, False)]:
-            # one batchnorm was not removed because it was separated by add
-            # Filecheck exir_ops.edge.aten.native_batch_norm_legit_no_training.default node.
-            # Since we are in eval() mode we should check for no_training variant
-            self.capture_and_test_pass(
-                model.eval(),
-                sample_input,
-                passes,
-                expected_copies=1,
-                expected_node="executorch_exir_dialects_edge__ops_aten__native_batch_norm_legit_no_training_default",
-                enable_aot=enable_aot,
-                unlift=unlift,
-            )
-
     def test_max_pool2d_remove_getitem(self) -> None:
         passes = [RemoveGetItemPass()]
 

From f13a82fdaa5c373894a240ed81f2f46505ea37da Mon Sep 17 00:00:00 2001
From: Max Ren <maxren@meta.com>
Date: Wed, 27 Sep 2023 22:42:00 -0700
Subject: [PATCH 6/6] Move remove get item pass

Summary: Moving the tests for remove get item pass to use the new testing infra

Differential Revision: D49718911

fbshipit-source-id: d961dc60d8d7494636bc29be57575a37afa53534
---
 .../test/passes/test_remove_get_item_pass.py  | 100 ++++++++++++++++++
 backends/xnnpack/test/test_xnnpack_passes.py  |  68 ------------
 2 files changed, 100 insertions(+), 68 deletions(-)
 create mode 100644 backends/xnnpack/test/passes/test_remove_get_item_pass.py

diff --git a/backends/xnnpack/test/passes/test_remove_get_item_pass.py b/backends/xnnpack/test/passes/test_remove_get_item_pass.py
new file mode 100644
index 00000000000..35bd4d8b966
--- /dev/null
+++ b/backends/xnnpack/test/passes/test_remove_get_item_pass.py
@@ -0,0 +1,100 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import unittest
+
+import torch
+from executorch.backends.xnnpack.passes.remove_getitem_op import RemoveGetItemPass
+from executorch.backends.xnnpack.test.tester import RunPasses, Tester
+
+
+class TestRemoveGetItemPass(unittest.TestCase):
+    PassStage = RunPasses([RemoveGetItemPass])
+    max_pool2d_name = "executorch_exir_dialects_edge__ops_aten_max_pool2d_default"
+    amax_name = "executorch_exir_dialects_edge__ops_aten_amax_default"
+
+    class MaxPool2dModule(torch.nn.Module):
+        def __init__(
+            self,
+            kernel_size=3,
+            stride=1,
+            padding=0,
+            dilation=1,
+        ):
+            super().__init__()
+            self.max_pool2d_module = torch.nn.MaxPool2d(
+                kernel_size=kernel_size,
+                stride=stride,
+                padding=padding,
+                dilation=dilation,
+            )
+
+        def forward(self, x):
+            return self.max_pool2d_module(x)
+
+    def test_fp32_max_pool2d_remove_getitem(self):
+        (
+            Tester(self.MaxPool2dModule(), (torch.randn(4, 3, 24, 24),))
+            .export()
+            .to_edge()
+            .run_passes(self.PassStage)
+            .check_count({self.max_pool2d_name: 1})
+            .run_method()
+            .compare_outputs()
+        )
+
+    def test_q8_max_pool2d_remove_getitem(self):
+        (
+            Tester(self.MaxPool2dModule(), (torch.randn(4, 3, 24, 24),))
+            .quantize()
+            .export()
+            .to_edge()
+            .run_passes(self.PassStage)
+            .check_count({self.max_pool2d_name: 1})
+            .run_method()
+            .compare_outputs()
+        )
+
+    class MaxModule(torch.nn.Module):
+        def __init__(
+            self,
+        ):
+            super().__init__()
+
+        def forward(self, x):
+            max_vals, indices = torch.max(x, dim=2, keepdim=True)
+            return max_vals
+
+    def test_fp32_max_remove_getitem(self):
+        (
+            Tester(self.MaxModule(), (torch.randn(4, 3, 24, 24),))
+            .export()
+            .to_edge()
+            .run_passes(self.PassStage)
+            .check_count(
+                {
+                    self.amax_name: 1,
+                }
+            )
+            .run_method()
+            .compare_outputs()
+        )
+
+    def test_q8_max_remove_getitem(self):
+        (
+            Tester(self.MaxModule(), (torch.randn(4, 3, 24, 24),))
+            .quantize()
+            .export()
+            .to_edge()
+            .run_passes(self.PassStage)
+            .check_count(
+                {
+                    self.amax_name: 1,
+                }
+            )
+            .run_method()
+            .compare_outputs()
+        )
diff --git a/backends/xnnpack/test/test_xnnpack_passes.py b/backends/xnnpack/test/test_xnnpack_passes.py
index ff44b773cd4..06de7fc4c97 100644
--- a/backends/xnnpack/test/test_xnnpack_passes.py
+++ b/backends/xnnpack/test/test_xnnpack_passes.py
@@ -11,7 +11,6 @@
 from executorch import exir
 from executorch.backends.xnnpack.passes import XNNPACKPassManager
 from executorch.backends.xnnpack.passes.convert_to_linear import ConvertToLinearPass
-from executorch.backends.xnnpack.passes.remove_getitem_op import RemoveGetItemPass
 from executorch.backends.xnnpack.passes.tag_implicit_q_dq_pass import TagImplicitQDqPass
 
 from executorch.backends.xnnpack.utils.configs import get_xnnpack_capture_config
@@ -89,73 +88,6 @@ def capture_and_test_pass(
             )
         return new_exported_program
 
-    def test_max_pool2d_remove_getitem(self) -> None:
-        passes = [RemoveGetItemPass()]
-
-        class MaxPool2dModule(torch.nn.Module):
-            def __init__(
-                self,
-                kernel_size=3,
-                stride=1,
-                padding=0,
-                dilation=1,
-            ):
-                super().__init__()
-                self.max_pool2d_module = torch.nn.MaxPool2d(
-                    kernel_size=kernel_size,
-                    stride=stride,
-                    padding=padding,
-                    dilation=dilation,
-                )
-
-            def forward(self, x):
-                return self.max_pool2d_module(x)
-
-        maxpool2d_module = MaxPool2dModule(3, 1, 0, 1)
-        model_inputs = (torch.randn(4, 3, 24, 24),)
-
-        edge_ep = capture_graph_for_xnnpack(maxpool2d_module.eval(), model_inputs)
-        new_ep = edge_ep.transform(*passes)
-        result1 = edge_ep(model_inputs[0])[0]
-        result2 = new_ep(model_inputs[0])[0]
-
-        # Filecheck exir_ops.edge.aten.max_pool2d.default node.
-        FileCheck().check_count(
-            "executorch_exir_dialects_edge__ops_aten_max_pool2d_default",
-            1,
-            exactly=True,
-        ).run(new_ep.exported_program.graph_module.code)
-
-        self.assertTrue(torch.allclose(result1, result2))
-
-    def test_max_remove_getitem(self) -> None:
-        passes = [RemoveGetItemPass()]
-
-        class MaxModule(torch.nn.Module):
-            def __init__(
-                self,
-            ):
-                super().__init__()
-
-            def forward(self, x):
-                max_vals, indices = torch.max(x, dim=2, keepdim=True)
-                return max_vals
-
-        max_module = MaxModule()
-        model_inputs = (torch.randn(4, 3, 24, 24),)
-
-        edge_ep = capture_graph_for_xnnpack(max_module.eval(), model_inputs)
-
-        new_ep = edge_ep.transform(*passes)
-        result1 = edge_ep(model_inputs[0])[0]
-        result2 = new_ep(model_inputs[0])[0]
-
-        # Filecheck exir_ops.edge.aten.amax.default node.
-        FileCheck().check_count(
-            "executorch_exir_dialects_edge__ops_aten_amax_default", 1, exactly=True
-        ).run(new_ep.exported_program.graph_module.code)
-
-        self.assertTrue(torch.allclose(result1, result2))
 
     # TODO T154127848: Move this out of XNNPACK dir and into cannonical_partitioner dir
     def test_duplicate_dequant_node_pass(self) -> None: