From c4aebaa825a0705c7773aedc4205f89083c2ebe7 Mon Sep 17 00:00:00 2001 From: maxren Date: Wed, 27 Sep 2023 22:41:37 -0700 Subject: [PATCH 1/6] add dump_artifact for debugging Differential Revision: D49249578 fbshipit-source-id: b691d5584acbb350737663cc458a48d68050da4c --- backends/xnnpack/test/tester/tester.py | 74 ++++++++++++++++++++++---- 1 file changed, 65 insertions(+), 9 deletions(-) diff --git a/backends/xnnpack/test/tester/tester.py b/backends/xnnpack/test/tester/tester.py index 680c508ae9a..a49bcf49025 100644 --- a/backends/xnnpack/test/tester/tester.py +++ b/backends/xnnpack/test/tester/tester.py @@ -5,6 +5,7 @@ # LICENSE file in the root directory of this source tree. import copy +import sys from abc import ABC, abstractmethod from collections import OrderedDict from typing import Any, Dict, List, Optional, Tuple @@ -25,6 +26,7 @@ from executorch.exir.backend.backend_api import to_backend, validation_disabled from executorch.exir.backend.partitioner import Partitioner from executorch.exir.passes.spec_prop_pass import SpecPropPass +from executorch.exir.print_program import pretty_print, print_program from executorch.extension.pybindings.portable_lib import ( # @manual _load_for_executorch_from_buffer, @@ -69,6 +71,33 @@ def graph_module(self): """ pass + # Debug Tools for stages + def artifact_str(self): + """ + Return string printable artifact for this stage + """ + if isinstance(self.artifact, ExirExportedProgram): + return self.artifact.exported_program + return self.artifact + + def stage_banner(self): + """ + Returns banner string for this stage + """ + return "#" * 36 + " " + str(self.__class__.__name__) + " " + "#" * 36 + "\n" + + def dump_artifact(self, path_to_dump: Optional[str]): + """ + Dumps string printable artifact to path. If path_to_dump, then it is printed to terminal + """ + if path_to_dump: + with open(path_to_dump, "a") as fp: + fp.write(str(self.stage_banner() + "\n")) + fp.write(str(self.artifact_str())) + else: + print(self.stage_banner() + "\n") + print(self.artifact_str()) + _stages_: Dict[str, Stage] = {} @@ -207,31 +236,43 @@ def __init__( self.config = config or ExecutorchBackendConfig( passes=[SpecPropPass()], ) - self.exported_program = None + self.executorch_program = None def run(self, artifact: ExirExportedProgram, inputs=None): - self.exported_program = artifact.to_executorch(self.config) + self.executorch_program = artifact.to_executorch(self.config) @property def artifact(self) -> ExecutorchProgram: - return self.exported_program + return self.executorch_program @property def graph_module(self) -> str: - return self.exported_program.graph_module + return self.executorch_program.graph_module + + def dump_artifact(self, path_to_dump: Optional[str]): + """ + dump_artifact is overriden to dump the serialized program + """ + original_stdout = sys.stdout + + sys.stdout = open(path_to_dump, "a") if path_to_dump else sys.stdout + print(self.stage_banner() + "\n") + pretty_print(self.artifact.program) + print_program( + self.artifact.program, + show_meminfo=True, + mark_dynamic_shape_tensor=True, + ) + sys.stdout = original_stdout @register_stage class Serialize(Stage): - def __init__(self, filename: Optional[str] = None): + def __init__(self): self.buffer = None - self.filename = filename def run(self, artifact: ExecutorchProgram, inputs=None) -> None: self.buffer = artifact.buffer - if self.filename is not None: - with open(self.filename, "wb") as f: - f.write(self.buffer) @property def artifact(self) -> bytes: @@ -241,6 +282,16 @@ def artifact(self) -> bytes: def graph_module(self) -> None: return None + def dump_artifact(self, path_to_dump: Optional[str]): + """ + dump_artifact is overridden to dump the serialized bytes into pte file + """ + if not path_to_dump: + raise RuntimeError("path_to_dump file not provided") + else: + with open(path_to_dump, "wb") as f: + f.write(self.artifact) + class Tester: def __init__( @@ -332,6 +383,11 @@ def serialize(self, serialize_stage: Optional[Serialize] = None): return self._run_stage(serialize_stage or Serialize()) # Util functions + def dump_artifact(self, path: Optional[str] = None, stage: Optional[str] = None): + stage = stage or self.cur + self.stages[stage].dump_artifact(path) + return self + def get_artifact(self, stage: Optional[str] = None): stage = stage or self.cur return self.stages[stage].artifact From 21b275ecbaececcef6a95f747c57dded385a367a Mon Sep 17 00:00:00 2001 From: maxren Date: Wed, 27 Sep 2023 22:41:37 -0700 Subject: [PATCH 2/6] run method on different stages Differential Revision: D49708597 fbshipit-source-id: 91663da2f81407c6706dc491fb0ec1b9360282ac --- backends/xnnpack/test/tester/tester.py | 83 ++++++++++++++++---------- 1 file changed, 52 insertions(+), 31 deletions(-) diff --git a/backends/xnnpack/test/tester/tester.py b/backends/xnnpack/test/tester/tester.py index a49bcf49025..c03536e9e2c 100644 --- a/backends/xnnpack/test/tester/tester.py +++ b/backends/xnnpack/test/tester/tester.py @@ -71,6 +71,12 @@ def graph_module(self): """ pass + def run_artifact(self, inputs): + """ + Returns the output of calling the artifact generated by this stage with inputs + """ + return self.artifact(*inputs) + # Debug Tools for stages def artifact_str(self): """ @@ -282,6 +288,14 @@ def artifact(self) -> bytes: def graph_module(self) -> None: return None + def run_artifact(self, inputs): + inputs_flattened, _ = tree_flatten(inputs) + executorch_module = _load_for_executorch_from_buffer(self.buffer) + executorch_output = copy.deepcopy( + executorch_module.run_method("forward", tuple(inputs_flattened)) + ) + return executorch_output + def dump_artifact(self, path_to_dump: Optional[str]): """ dump_artifact is overridden to dump the serialized bytes into pte file @@ -299,7 +313,7 @@ def __init__( module: torch.nn.Module, inputs: Tuple[torch.Tensor], ): - self.module = module + self.original_module = module self.inputs = inputs self.stages: Dict[str, Stage] = OrderedDict.fromkeys(list(_stages_.keys())) self.pipeline = { @@ -327,8 +341,8 @@ def __init__( # Reference output from Eager mode self.reference_output = None - # Output by running a serialized/lowered module on ET - self.executorch_output = None + # Artifact output from stage + self.stage_output = None @staticmethod def _stage_name(stage) -> str: @@ -339,7 +353,7 @@ def _pre(self, stage): name: str = self._stage_name(stage) assert isinstance(name, str) and name in self.stages and not self.stages[name] - last_artifact = self.module + last_artifact = self.original_module if self.cur: assert self.cur in self.pipeline, f"Invalid state: {self.cur}" allowed_next_stages = self.pipeline[self.cur] @@ -410,18 +424,19 @@ def check_count(self, input: Dict[Any, int]): ) return self - def run_method(self, method="forward"): - # Reference - delegated_module = self.get_artifact(self._stage_name(Partition)) - self.reference_output = delegated_module(*self.inputs) - - # ExecuTorch - inputs_flattened, _ = tree_flatten(self.inputs) - serialized_buffer = self.get_artifact(self._stage_name(Serialize)) - executorch_module = _load_for_executorch_from_buffer(serialized_buffer) - self.executorch_output = copy.deepcopy( - executorch_module.run_method(method, tuple(inputs_flattened)) + def run_method( + self, stage: Optional[str] = None, inputs: Optional[Tuple[torch.Tensor]] = None + ): + inputs_to_run = inputs or self.inputs + # Reference Output + self.reference_output = self.stages[self._stage_name(Export)].run_artifact( + inputs_to_run ) + + # Output from running artifact at stage + stage = stage or self.cur + self.stage_output = self.stages[stage].run_artifact(inputs_to_run) + return self @staticmethod @@ -433,25 +448,31 @@ def _assert_outputs_equal(model_output, ref_output, atol=1e-03, rtol=1e-03): relative tolerance is 1e-3. """ - # Compare the result from executor and eager mode direclty - if isinstance(ref_output, tuple) or isinstance(ref_output, list): - # Multiple outputs executor always returns tuple, even if there is one output - assert len(ref_output) == len(model_output) - for i in range(len(ref_output)): - assert torch.allclose( - model_output[i], - ref_output[i], - atol=atol, - rtol=rtol, - ) - else: - # If one output, eager returns tensor while executor returns a tuple(tensor) of size 1 - assert torch.allclose(model_output[0], ref_output, atol=atol, rtol=rtol) + # Multiple outputs executor always returns tuple, even if there is one output + assert len(ref_output) == len(model_output) + for i in range(len(ref_output)): + assert torch.allclose( + model_output[i], + ref_output[i], + atol=atol, + rtol=rtol, + ) def compare_outputs(self, atol=1e-03, rtol=1e-03): + """ + Compares the original of the original nn module with the output of the generated artifact. + This requres calling run_method before calling compare_outputs. As that runs the generated + artifact on the sample inputs and sets the stage output to be compared against the reference + """ assert self.reference_output is not None - assert self.executorch_output is not None + assert self.stage_output is not None + + # Wrap both outputs as tuple, since executor output is always a tuple even if single tensor + if isinstance(self.reference_output, torch.Tensor): + self.reference_output = (self.reference_output,) + if isinstance(self.stage_output, torch.Tensor): + self.stage_output = (self.stage_output,) self._assert_outputs_equal( - self.executorch_output, self.reference_output, atol=atol, rtol=rtol + self.stage_output, self.reference_output, atol=atol, rtol=rtol ) return self From 222d152d006bc53cf36bd57e0e95cbb636a5eb88 Mon Sep 17 00:00:00 2001 From: maxren Date: Wed, 27 Sep 2023 22:41:37 -0700 Subject: [PATCH 3/6] Pass manager should take in type[pass] instead of pass instance Differential Revision: D49710932 fbshipit-source-id: 0865c6f0f2171e49c540b0c625b6cd3a994a9380 --- backends/xnnpack/passes/__init__.py | 6 ++++-- backends/xnnpack/test/tester/tester.py | 4 ++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/backends/xnnpack/passes/__init__.py b/backends/xnnpack/passes/__init__.py index c4374c006a1..9cecf5ea482 100644 --- a/backends/xnnpack/passes/__init__.py +++ b/backends/xnnpack/passes/__init__.py @@ -4,7 +4,7 @@ # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. -from typing import List, Optional +from typing import List, Optional, Type from executorch.backends.xnnpack.passes.channels_last_tagged_reshape_pass import ( ChannelsLastTaggedReshapePass, @@ -29,7 +29,9 @@ class XNNPACKPassManager: def __init__( - self, exported_program: ExportedProgram, passes: Optional[List[PassType]] = None + self, + exported_program: ExportedProgram, + passes: Optional[List[Type[PassType]]] = None, ) -> None: """ A helper class to run multiple XNNPack passes on a program diff --git a/backends/xnnpack/test/tester/tester.py b/backends/xnnpack/test/tester/tester.py index c03536e9e2c..a2febf572ac 100644 --- a/backends/xnnpack/test/tester/tester.py +++ b/backends/xnnpack/test/tester/tester.py @@ -8,7 +8,7 @@ import sys from abc import ABC, abstractmethod from collections import OrderedDict -from typing import Any, Dict, List, Optional, Tuple +from typing import Any, Dict, List, Optional, Tuple, Type import torch import torch._export as export @@ -193,7 +193,7 @@ def graph_module(self) -> str: @register_stage class RunPasses(Stage): - def __init__(self, pass_list: Optional[List[PassType]] = None): + def __init__(self, pass_list: Optional[List[Type[PassType]]] = None): self.pass_list = pass_list self.edge_dialect_program = None From 09b640ec356e818f0523177e01f683941a6a05be Mon Sep 17 00:00:00 2001 From: maxren Date: Wed, 27 Sep 2023 22:41:37 -0700 Subject: [PATCH 4/6] Move Channels Last Reshape Pass to Tester Differential Revision: D49715863 fbshipit-source-id: 12111db3ae3872d6d289dcd97724ce57bab516a0 --- backends/xnnpack/test/TARGETS | 7 +- .../test_channels_last_tagged_reshape.py | 180 +++++++++++++ backends/xnnpack/test/test_xnnpack_passes.py | 240 ------------------ 3 files changed, 184 insertions(+), 243 deletions(-) create mode 100644 backends/xnnpack/test/passes/test_channels_last_tagged_reshape.py diff --git a/backends/xnnpack/test/TARGETS b/backends/xnnpack/test/TARGETS index 2a1fe984cf8..111d087f15f 100644 --- a/backends/xnnpack/test/TARGETS +++ b/backends/xnnpack/test/TARGETS @@ -92,13 +92,16 @@ python_unittest( python_unittest( name = "test_xnnpack_passes", - srcs = [ + srcs = glob([ + "passes/*.py", + ]) + [ "test_xnnpack_passes.py", "test_xnnpack_utils_classes.py", ], deps = [ "//caffe2:torch", "//executorch/backends/xnnpack/passes:xnnpack_passes", + "//executorch/backends/xnnpack/test/tester:tester", "//executorch/backends/xnnpack/utils:xnnpack_utils", "//executorch/exir:lib", "//executorch/exir:pass_base", @@ -127,9 +130,7 @@ python_unittest( ]), deps = [ "//caffe2:torch", - "//executorch/backends/xnnpack/partition:xnnpack_partitioner", "//executorch/backends/xnnpack/test/tester:tester", - "//executorch/exir:lib", "//pytorch/vision:torchvision", ], ) diff --git a/backends/xnnpack/test/passes/test_channels_last_tagged_reshape.py b/backends/xnnpack/test/passes/test_channels_last_tagged_reshape.py new file mode 100644 index 00000000000..abb18a8c0b2 --- /dev/null +++ b/backends/xnnpack/test/passes/test_channels_last_tagged_reshape.py @@ -0,0 +1,180 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +import unittest + +import torch +from executorch.backends.xnnpack.passes.channels_last_tagged_reshape_pass import ( + ChannelsLastTaggedReshapePass, +) +from executorch.backends.xnnpack.test.test_xnnpack_utils_classes import ( + OpSequencesAddConv2d, +) +from executorch.backends.xnnpack.test.tester import RunPasses, Tester + + +class TestChannelsLastTaggedReshapePass(unittest.TestCase): + PassStage = RunPasses([ChannelsLastTaggedReshapePass]) + # Dictionary mapping modules to expected number of reshapes + modules = { + OpSequencesAddConv2d(0, 0).eval(): 0, + OpSequencesAddConv2d(1, 1).eval(): 2, + OpSequencesAddConv2d(2, 2).eval(): 2, + } + to_copy_name = "executorch_exir_dialects_edge__ops_aten__to_copy_default" + quant_name = "executorch_exir_dialects_edge__ops_quantized_decomposed_quantize_per_tensor_default" + dequant_name = "executorch_exir_dialects_edge__ops_quantized_decomposed_dequantize_per_tensor_default" + conv_name = "executorch_exir_dialects_edge__ops_aten_convolution_default" + relu_name = "executorch_exir_dialects_edge__ops_aten_relu_default" + + def test_fp32_channels_last_tagged_reshape_pass(self): + for module, num_reshape in self.modules.items(): + ( + Tester(module, (torch.randn(1, 1, 6, 6),)) + .export() + .to_edge() + .run_passes(self.PassStage) + .check_count( + { + self.to_copy_name: num_reshape, + } + ) + .run_method() + .compare_outputs() + ) + + def test_qs8_channels_last_tagged_reshape_pass(self): + for module, num_reshape in self.modules.items(): + ( + Tester(module, (torch.randn(1, 1, 6, 6),)) + .quantize() + .export() + .to_edge() + .run_passes(self.PassStage) + .check( + [ + self.quant_name, + self.dequant_name, + self.to_copy_name, + self.quant_name, + self.dequant_name, + ] + * num_reshape + ) + .run_method() + .compare_outputs() + ) + + class ConvRelu(torch.nn.Module): + def __init__(self): + super().__init__() + self.conv = torch.nn.Conv2d(1, 1, 1) + self.relu = torch.nn.ReLU() + + def forward(self, x): + return self.relu(self.conv(x)) + + def test_fp32_channels_last_tagged_reshape_pass_conv_relu(self): + ( + Tester(self.ConvRelu().eval(), (torch.randn(1, 1, 6, 6),)) + .export() + .to_edge() + .run_passes(self.PassStage) + .check( + [self.to_copy_name, self.conv_name, self.relu_name, self.to_copy_name] + ) + .run_method() + .compare_outputs() + ) + + def test_qs8_channels_last_tagged_reshape_pass_conv_relu(self): + ( + Tester(self.ConvRelu().eval(), (torch.randn(1, 1, 6, 6),)) + .quantize() + .export() + .to_edge() + .run_passes(self.PassStage) + .check( + [ + self.to_copy_name, + self.quant_name, + self.dequant_name, + self.conv_name, + self.relu_name, + self.quant_name, + self.dequant_name, + self.to_copy_name, + ] + ) + .run_method() + .compare_outputs() + ) + + class Conv2dBnHardtanhMeanSequenceModule(torch.nn.Module): + def __init__(self): + super().__init__() + self.conv = torch.nn.Conv2d( + in_channels=1, + out_channels=1, + kernel_size=(3, 3), + stride=[2, 2], + padding=[1, 1], + groups=1, + dilation=[1, 1], + bias=True, + ) + self.native_batchnorm = torch.nn.BatchNorm2d(1) + self.hardtanh = torch.nn.Hardtanh(min_val=0, max_val=6) + self.eval() + + def forward(self, x): + x = self.conv(x) + x = self.native_batchnorm(x) + x = self.hardtanh(x) + x = torch.mean(x, (-1, -2), keepdim=True) + return x + + def test_fp32_channels_last_tagged_reshape_pass_conv_bn_hardtanh_mean_seq(self): + # Copy #1 is for input to conv, nchw -> nhwc + # Copy #2 is for conv to _native_batch_norm_legit_no_training, nhwc -> nchw + # Copy #3 is for input to mean, nchw -> nhwc + # Copy #4 is for output, nhwc -> nchw + + # The graph looks like: + # graph(): + # %arg0_1 : [#users=1] = placeholder[target=arg0_1] + # %aten__to_copy_default : [#users=1] = call_function[target=executorch.exir.dialects.edge._ops.aten._to_copy.default](args = (%arg0_1,), kwargs = {memory_format: torch.channels_last}) + # %_param_constant0 : [#users=1] = get_attr[target=_param_constant0] + # %_param_constant1 : [#users=1] = get_attr[target=_param_constant1] + # %aten_convolution_default : [#users=1] = call_function[target=executorch.exir.dialects.edge._ops.aten.convolution.default](args = (%aten__to_copy_default, %_param_constant0, %_param_constant1, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), kwargs = {}) + # %aten__to_copy_default_1 : [#users=1] = call_function[target=executorch.exir.dialects.edge._ops.aten._to_copy.default](args = (%aten_convolution_default,), kwargs = {memory_format: torch.contiguous_format}) + # %_param_constant2 : [#users=1] = get_attr[target=_param_constant2] + # %_param_constant3 : [#users=1] = get_attr[target=_param_constant3] + # %_tensor_constant0 : [#users=1] = get_attr[target=_tensor_constant0] + # %_tensor_constant1 : [#users=1] = get_attr[target=_tensor_constant1] + # %aten__native_batch_norm_legit_no_training_default : [#users=1] = call_function[target=executorch.exir.dialects.edge._ops.aten._native_batch_norm_legit_no_training.default](args = (%aten__to_copy_default_1, %_param_constant2, %_param_constant3, %_tensor_constant0, %_tensor_constant1, 0.1, 1e-05), kwargs = {}) + # %getitem : [#users=1] = call_function[target=operator.getitem](args = (%aten__native_batch_norm_legit_no_training_default, 0), kwargs = {}) + # %aten_hardtanh_default : [#users=1] = call_function[target=executorch.exir.dialects.edge._ops.aten.hardtanh.default](args = (%getitem, 0, 6), kwargs = {}) + # %aten__to_copy_default_2 : [#users=1] = call_function[target=executorch.exir.dialects.edge._ops.aten._to_copy.default](args = (%aten_hardtanh_default,), kwargs = {memory_format: torch.channels_last}) + # %aten_mean_dim : [#users=1] = call_function[target=executorch.exir.dialects.edge._ops.aten.mean.dim](args = (%aten__to_copy_default_2, [-1, -2], True), kwargs = {}) + # %aten__to_copy_default_3 : [#users=1] = call_function[target=executorch.exir.dialects.edge._ops.aten._to_copy.default](args = (%aten_mean_dim,), kwargs = {memory_format: torch.contiguous_format}) + # return [aten__to_copy_default_3] + ( + Tester( + self.Conv2dBnHardtanhMeanSequenceModule().eval(), + (torch.randn(1, 1, 6, 6),), + ) + .export() + .to_edge() + .run_passes(self.PassStage) + .check_count( + { + self.to_copy_name: 4, + } + ) + .run_method() + .compare_outputs() + ) diff --git a/backends/xnnpack/test/test_xnnpack_passes.py b/backends/xnnpack/test/test_xnnpack_passes.py index a8b58fe7ef0..457c862846b 100644 --- a/backends/xnnpack/test/test_xnnpack_passes.py +++ b/backends/xnnpack/test/test_xnnpack_passes.py @@ -10,9 +10,6 @@ import torch from executorch import exir from executorch.backends.xnnpack.passes import XNNPACKPassManager -from executorch.backends.xnnpack.passes.channels_last_tagged_reshape_pass import ( - ChannelsLastTaggedReshapePass, -) from executorch.backends.xnnpack.passes.convert_to_linear import ConvertToLinearPass from executorch.backends.xnnpack.passes.fuse_batch_norm_with_conv import ( FuseBatchNormWithConvPass, @@ -20,9 +17,6 @@ from executorch.backends.xnnpack.passes.remove_getitem_op import RemoveGetItemPass from executorch.backends.xnnpack.passes.tag_implicit_q_dq_pass import TagImplicitQDqPass -from executorch.backends.xnnpack.test.test_xnnpack_utils_classes import ( - OpSequencesAddConv2d, -) from executorch.backends.xnnpack.utils.configs import get_xnnpack_capture_config from executorch.backends.xnnpack.utils.utils import capture_graph_for_xnnpack from executorch.exir.backend.canonical_partitioners.duplicate_dequant_node_pass import ( @@ -43,18 +37,6 @@ class TestXNNPackPasses(unittest.TestCase): - class TwoOutputs(OpSequencesAddConv2d): - def __init__(self): - super().__init__(1, 2) - seq = self.op_sequence[0] - self.conv1 = seq[0] - self.conv2 = seq[1] - - def forward(self, x): - y = self.conv1(x) - z = self.conv2(y) - return (y, z) - class ReusedInput(torch.nn.Module): def __init__(self): super().__init__() @@ -64,15 +46,6 @@ def __init__(self): def forward(self, x): return self.conv1(x) + self.conv2(x) - class ConvRelu(torch.nn.Module): - def __init__(self): - super().__init__() - self.conv = torch.nn.Conv2d(1, 1, 1) - self.relu = torch.nn.ReLU() - - def forward(self, x): - return self.relu(self.conv(x)) - def capture_and_test_pass( self, module, @@ -119,219 +92,6 @@ def capture_and_test_pass( ) return new_exported_program - def test_channels_last_tagged_reshape_pass(self) -> None: - passes = [ChannelsLastTaggedReshapePass] - - for enable_aot, unlift in [(False, None), (True, True), (True, False)]: - example_inputs = (torch.rand(1, 1, 6, 6),) - # No copies because no ops requiring NHWC format - single_add = OpSequencesAddConv2d(0, 0) - self.capture_and_test_pass( - single_add, - example_inputs, - passes, - 0, - enable_aot=enable_aot, - unlift=unlift, - ) - - # One copy to NHWC before the conv, and one copy to NCHW at the end - single_conv = OpSequencesAddConv2d(1, 1).eval() - self.capture_and_test_pass( - single_conv, - example_inputs, - passes, - 2, - ) - - # Still one copy to NHWC before the conv, and one copy to NCHW at the - # end - # Flaky - increased [ra]tol for tensor compare -TODO: look into this test - two_seq_two_convs = OpSequencesAddConv2d(2, 2) - self.capture_and_test_pass( - two_seq_two_convs, - example_inputs, - passes, - 2, - rtol=1e-04, - atol=1e-04, - ) - - def test_channels_last_reshape_with_conv_relu(self) -> None: - passes = [ChannelsLastTaggedReshapePass] - - sample_input = (torch.ones(1, 1, 6, 6),) - model = self.ConvRelu().eval() - - for enable_aot, unlift in [(False, None), (True, True), (True, False)]: - new_exported_program = self.capture_and_test_pass( - model, - sample_input, - passes, - enable_aot=enable_aot, - unlift=unlift, - ) - FileCheck().check( - "executorch_exir_dialects_edge__ops_aten__to_copy_default" - ).check( - "executorch_exir_dialects_edge__ops_aten_convolution_default" - ).check( - "executorch_exir_dialects_edge__ops_aten_relu_default" - ).check( - "executorch_exir_dialects_edge__ops_aten__to_copy_default" - ).run( - new_exported_program.graph_module.code - ) - - prepared = prepare_fx( - model, - _get_symmetric_qnnpack_qconfig_mapping(), - sample_input, - backend_config=get_executorch_backend_config(), - ) - - converted = _convert_to_reference_decomposed_fx( - prepared, backend_config=get_executorch_backend_config() - ) - new_quantized_ep = self.capture_and_test_pass( - converted, sample_input, passes - ) - FileCheck().check( - "executorch_exir_dialects_edge__ops_aten__to_copy_default" - ).check( - "executorch_exir_dialects_edge__ops_quantized_decomposed_quantize_per_tensor_default" - ).check( - "executorch_exir_dialects_edge__ops_quantized_decomposed_dequantize_per_tensor_default" - ).check( - "executorch_exir_dialects_edge__ops_aten_convolution_default" - ).check( - "executorch_exir_dialects_edge__ops_aten_relu_default" - ).check( - "executorch_exir_dialects_edge__ops_quantized_decomposed_quantize_per_tensor_default" - ).check( - "executorch_exir_dialects_edge__ops_quantized_decomposed_dequantize_per_tensor_default" - ).check( - "executorch_exir_dialects_edge__ops_aten__to_copy_default" - ).run( - new_quantized_ep.graph_module.code - ) - - def test_channels_last_tagged_reshape_pass_conv2d_bn_hardtanh_mean_sequence( - self, - ) -> None: - passes = [ChannelsLastTaggedReshapePass] - - groups = 1 - stride = [2, 2] - padding = [1, 1] - dilation = [1, 1] - in_channels = 1 - out_channels = 1 - - class Conv2dBnHardtanhMeanSequenceModule(torch.nn.Module): - def __init__(self): - super(Conv2dBnHardtanhMeanSequenceModule, self).__init__() - self.conv = torch.nn.Conv2d( - in_channels=in_channels, - out_channels=out_channels, - kernel_size=(3, 3), - stride=stride, - padding=padding, - groups=groups, - dilation=dilation, - bias=True, - ) - self.native_batchnorm = torch.nn.BatchNorm2d(out_channels) - self.hardtanh = torch.nn.Hardtanh(min_val=0, max_val=6) - self.eval() - - def forward(self, x): - x = self.conv(x) - x = self.native_batchnorm(x) - x = self.hardtanh(x) - x = torch.mean(x, (-1, -2), keepdim=True) - return x - - # Copy #1 is for input to conv, nchw -> nhwc - # Copy #2 is for conv to _native_batch_norm_legit_no_training, nhwc -> nchw - # Copy #3 is for input to mean, nchw -> nhwc - # Copy #4 is for output, nhwc -> nchw - - # The graph looks like: - # graph(): - # %arg0_1 : [#users=1] = placeholder[target=arg0_1] - # %aten__to_copy_default : [#users=1] = call_function[target=executorch.exir.dialects.edge._ops.aten._to_copy.default](args = (%arg0_1,), kwargs = {memory_format: torch.channels_last}) - # %_param_constant0 : [#users=1] = get_attr[target=_param_constant0] - # %_param_constant1 : [#users=1] = get_attr[target=_param_constant1] - # %aten_convolution_default : [#users=1] = call_function[target=executorch.exir.dialects.edge._ops.aten.convolution.default](args = (%aten__to_copy_default, %_param_constant0, %_param_constant1, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), kwargs = {}) - # %aten__to_copy_default_1 : [#users=1] = call_function[target=executorch.exir.dialects.edge._ops.aten._to_copy.default](args = (%aten_convolution_default,), kwargs = {memory_format: torch.contiguous_format}) - # %_param_constant2 : [#users=1] = get_attr[target=_param_constant2] - # %_param_constant3 : [#users=1] = get_attr[target=_param_constant3] - # %_tensor_constant0 : [#users=1] = get_attr[target=_tensor_constant0] - # %_tensor_constant1 : [#users=1] = get_attr[target=_tensor_constant1] - # %aten__native_batch_norm_legit_no_training_default : [#users=1] = call_function[target=executorch.exir.dialects.edge._ops.aten._native_batch_norm_legit_no_training.default](args = (%aten__to_copy_default_1, %_param_constant2, %_param_constant3, %_tensor_constant0, %_tensor_constant1, 0.1, 1e-05), kwargs = {}) - # %getitem : [#users=1] = call_function[target=operator.getitem](args = (%aten__native_batch_norm_legit_no_training_default, 0), kwargs = {}) - # %aten_hardtanh_default : [#users=1] = call_function[target=executorch.exir.dialects.edge._ops.aten.hardtanh.default](args = (%getitem, 0, 6), kwargs = {}) - # %aten__to_copy_default_2 : [#users=1] = call_function[target=executorch.exir.dialects.edge._ops.aten._to_copy.default](args = (%aten_hardtanh_default,), kwargs = {memory_format: torch.channels_last}) - # %aten_mean_dim : [#users=1] = call_function[target=executorch.exir.dialects.edge._ops.aten.mean.dim](args = (%aten__to_copy_default_2, [-1, -2], True), kwargs = {}) - # %aten__to_copy_default_3 : [#users=1] = call_function[target=executorch.exir.dialects.edge._ops.aten._to_copy.default](args = (%aten_mean_dim,), kwargs = {memory_format: torch.contiguous_format}) - # return [aten__to_copy_default_3] - - sample_input = (torch.ones(1, 1, 6, 6),) - for enable_aot, unlift in [(False, None), (True, True), (True, False)]: - self.capture_and_test_pass( - Conv2dBnHardtanhMeanSequenceModule(), - sample_input, - passes, - 4, - enable_aot=enable_aot, - unlift=unlift, - ) - - def test_quantized_channels_last_tagged_reshape_pass(self) -> None: - passes = [ChannelsLastTaggedReshapePass] - prepared_conv = prepare_fx( - torch.nn.Conv2d( - in_channels=1, - out_channels=1, - kernel_size=(3, 3), - padding=1, - bias=False, - ).eval(), - _get_symmetric_qnnpack_qconfig_mapping(), - (torch.randn(1, 1, 3, 3),), - backend_config=get_executorch_backend_config(), - ) - - converted = _convert_to_reference_decomposed_fx(prepared_conv) - - for enable_aot, unlift in [(False, None), (True, True), (True, False)]: - result = self.capture_and_test_pass( - converted, - (torch.randn(1, 1, 3, 3),), - passes, - enable_aot=enable_aot, - unlift=unlift, - ) - - FileCheck().check_count( - "executorch_exir_dialects_edge__ops_aten__to_copy_default", - 2, - exactly=True, - ).run(result.graph_module.code) - - FileCheck().check_count( - "executorch_exir_dialects_edge__ops_quantized_decomposed_quantize_per_tensor_default", - 5, # 3 original q(input, weights, output) + 2 generated from to_copy - exactly=True, - ).run(result.graph_module.code) - - FileCheck().check_count( - "executorch_exir_dialects_edge__ops_quantized_decomposed_dequantize_per_tensor_default", - 5, # 3 original q(input, weights, output) + 2 generated from to_copy - exactly=True, - ).run(result.graph_module.code) - def test_conv_batch_norm_fusion(self) -> None: passes = [FuseBatchNormWithConvPass] From 346d6baecd478a54825fd63eec69b4fc74d1ddc8 Mon Sep 17 00:00:00 2001 From: maxren Date: Wed, 27 Sep 2023 22:41:37 -0700 Subject: [PATCH 5/6] Move BN fusion pass Differential Revision: D49718566 fbshipit-source-id: 9420d42f81354a54b7b2b55e8a7d64ceba2349c0 --- .../test/passes/test_batch_norm_fusion.py | 57 +++++++++++++++++++ backends/xnnpack/test/test_xnnpack_passes.py | 38 ------------- 2 files changed, 57 insertions(+), 38 deletions(-) create mode 100644 backends/xnnpack/test/passes/test_batch_norm_fusion.py diff --git a/backends/xnnpack/test/passes/test_batch_norm_fusion.py b/backends/xnnpack/test/passes/test_batch_norm_fusion.py new file mode 100644 index 00000000000..4cadd9baf13 --- /dev/null +++ b/backends/xnnpack/test/passes/test_batch_norm_fusion.py @@ -0,0 +1,57 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +import unittest +from typing import Tuple + +import torch +from executorch.backends.xnnpack.passes.fuse_batch_norm_with_conv import ( + FuseBatchNormWithConvPass, +) +from executorch.backends.xnnpack.test.tester import RunPasses, Tester + + +class TestBatchNormFusion(unittest.TestCase): + PassStage = RunPasses([FuseBatchNormWithConvPass]) + bn_name = "executorch_exir_dialects_edge__ops_aten__native_batch_norm_legit_no_training_default" + + class ModelConvBN(torch.nn.Module): + def __init__( + self, in_features: int, out_features: int, kernel_size: Tuple[int, int] + ): + super().__init__() + self.conv2d = torch.nn.Conv2d(in_features, out_features, kernel_size) + self.bn = torch.nn.BatchNorm2d(out_features) + + def forward(self, x): + y = self.conv2d(x) + y = self.bn(y) + y = self.conv2d(y) + y = y + y + return self.bn(y) + + def test_fp32_batch_norm_fusion(self): + ( + Tester(self.ModelConvBN(2, 2, (2, 2)).eval(), (torch.randn(2, 2, 4, 4),)) + .export() + .to_edge() + .run_passes(self.PassStage) + .check_count({self.bn_name: 1}) + .run_method() + .compare_outputs() + ) + + def test_q8_batch_norm_fusion(self): + ( + Tester(self.ModelConvBN(2, 2, (2, 2)).eval(), (torch.randn(2, 2, 4, 4),)) + .quantize() + .export() + .to_edge() + .run_passes(self.PassStage) + .check_count({self.bn_name: 1}) + .run_method() + .compare_outputs() + ) diff --git a/backends/xnnpack/test/test_xnnpack_passes.py b/backends/xnnpack/test/test_xnnpack_passes.py index 457c862846b..ff44b773cd4 100644 --- a/backends/xnnpack/test/test_xnnpack_passes.py +++ b/backends/xnnpack/test/test_xnnpack_passes.py @@ -11,9 +11,6 @@ from executorch import exir from executorch.backends.xnnpack.passes import XNNPACKPassManager from executorch.backends.xnnpack.passes.convert_to_linear import ConvertToLinearPass -from executorch.backends.xnnpack.passes.fuse_batch_norm_with_conv import ( - FuseBatchNormWithConvPass, -) from executorch.backends.xnnpack.passes.remove_getitem_op import RemoveGetItemPass from executorch.backends.xnnpack.passes.tag_implicit_q_dq_pass import TagImplicitQDqPass @@ -92,41 +89,6 @@ def capture_and_test_pass( ) return new_exported_program - def test_conv_batch_norm_fusion(self) -> None: - passes = [FuseBatchNormWithConvPass] - - class ModelConvBN(torch.nn.Module): - def __init__( - self, in_features: int, out_features: int, kernel_size: Tuple[int, int] - ): - super().__init__() - self.conv2d = torch.nn.Conv2d(in_features, out_features, kernel_size) - self.bn = torch.nn.BatchNorm2d(out_features) - - def forward(self, x): - y = self.conv2d(x) - y = self.bn(y) - y = self.conv2d(y) - y = y + y - return self.bn(y) - - model = ModelConvBN(2, 2, (2, 2)) - sample_input = (torch.randn(2, 2, 4, 4),) - - for enable_aot, unlift in [(False, None), (True, True), (True, False)]: - # one batchnorm was not removed because it was separated by add - # Filecheck exir_ops.edge.aten.native_batch_norm_legit_no_training.default node. - # Since we are in eval() mode we should check for no_training variant - self.capture_and_test_pass( - model.eval(), - sample_input, - passes, - expected_copies=1, - expected_node="executorch_exir_dialects_edge__ops_aten__native_batch_norm_legit_no_training_default", - enable_aot=enable_aot, - unlift=unlift, - ) - def test_max_pool2d_remove_getitem(self) -> None: passes = [RemoveGetItemPass()] From f13a82fdaa5c373894a240ed81f2f46505ea37da Mon Sep 17 00:00:00 2001 From: Max Ren Date: Wed, 27 Sep 2023 22:42:00 -0700 Subject: [PATCH 6/6] Move remove get item pass Summary: Moving the tests for remove get item pass to use the new testing infra Differential Revision: D49718911 fbshipit-source-id: d961dc60d8d7494636bc29be57575a37afa53534 --- .../test/passes/test_remove_get_item_pass.py | 100 ++++++++++++++++++ backends/xnnpack/test/test_xnnpack_passes.py | 68 ------------ 2 files changed, 100 insertions(+), 68 deletions(-) create mode 100644 backends/xnnpack/test/passes/test_remove_get_item_pass.py diff --git a/backends/xnnpack/test/passes/test_remove_get_item_pass.py b/backends/xnnpack/test/passes/test_remove_get_item_pass.py new file mode 100644 index 00000000000..35bd4d8b966 --- /dev/null +++ b/backends/xnnpack/test/passes/test_remove_get_item_pass.py @@ -0,0 +1,100 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +import unittest + +import torch +from executorch.backends.xnnpack.passes.remove_getitem_op import RemoveGetItemPass +from executorch.backends.xnnpack.test.tester import RunPasses, Tester + + +class TestRemoveGetItemPass(unittest.TestCase): + PassStage = RunPasses([RemoveGetItemPass]) + max_pool2d_name = "executorch_exir_dialects_edge__ops_aten_max_pool2d_default" + amax_name = "executorch_exir_dialects_edge__ops_aten_amax_default" + + class MaxPool2dModule(torch.nn.Module): + def __init__( + self, + kernel_size=3, + stride=1, + padding=0, + dilation=1, + ): + super().__init__() + self.max_pool2d_module = torch.nn.MaxPool2d( + kernel_size=kernel_size, + stride=stride, + padding=padding, + dilation=dilation, + ) + + def forward(self, x): + return self.max_pool2d_module(x) + + def test_fp32_max_pool2d_remove_getitem(self): + ( + Tester(self.MaxPool2dModule(), (torch.randn(4, 3, 24, 24),)) + .export() + .to_edge() + .run_passes(self.PassStage) + .check_count({self.max_pool2d_name: 1}) + .run_method() + .compare_outputs() + ) + + def test_q8_max_pool2d_remove_getitem(self): + ( + Tester(self.MaxPool2dModule(), (torch.randn(4, 3, 24, 24),)) + .quantize() + .export() + .to_edge() + .run_passes(self.PassStage) + .check_count({self.max_pool2d_name: 1}) + .run_method() + .compare_outputs() + ) + + class MaxModule(torch.nn.Module): + def __init__( + self, + ): + super().__init__() + + def forward(self, x): + max_vals, indices = torch.max(x, dim=2, keepdim=True) + return max_vals + + def test_fp32_max_remove_getitem(self): + ( + Tester(self.MaxModule(), (torch.randn(4, 3, 24, 24),)) + .export() + .to_edge() + .run_passes(self.PassStage) + .check_count( + { + self.amax_name: 1, + } + ) + .run_method() + .compare_outputs() + ) + + def test_q8_max_remove_getitem(self): + ( + Tester(self.MaxModule(), (torch.randn(4, 3, 24, 24),)) + .quantize() + .export() + .to_edge() + .run_passes(self.PassStage) + .check_count( + { + self.amax_name: 1, + } + ) + .run_method() + .compare_outputs() + ) diff --git a/backends/xnnpack/test/test_xnnpack_passes.py b/backends/xnnpack/test/test_xnnpack_passes.py index ff44b773cd4..06de7fc4c97 100644 --- a/backends/xnnpack/test/test_xnnpack_passes.py +++ b/backends/xnnpack/test/test_xnnpack_passes.py @@ -11,7 +11,6 @@ from executorch import exir from executorch.backends.xnnpack.passes import XNNPACKPassManager from executorch.backends.xnnpack.passes.convert_to_linear import ConvertToLinearPass -from executorch.backends.xnnpack.passes.remove_getitem_op import RemoveGetItemPass from executorch.backends.xnnpack.passes.tag_implicit_q_dq_pass import TagImplicitQDqPass from executorch.backends.xnnpack.utils.configs import get_xnnpack_capture_config @@ -89,73 +88,6 @@ def capture_and_test_pass( ) return new_exported_program - def test_max_pool2d_remove_getitem(self) -> None: - passes = [RemoveGetItemPass()] - - class MaxPool2dModule(torch.nn.Module): - def __init__( - self, - kernel_size=3, - stride=1, - padding=0, - dilation=1, - ): - super().__init__() - self.max_pool2d_module = torch.nn.MaxPool2d( - kernel_size=kernel_size, - stride=stride, - padding=padding, - dilation=dilation, - ) - - def forward(self, x): - return self.max_pool2d_module(x) - - maxpool2d_module = MaxPool2dModule(3, 1, 0, 1) - model_inputs = (torch.randn(4, 3, 24, 24),) - - edge_ep = capture_graph_for_xnnpack(maxpool2d_module.eval(), model_inputs) - new_ep = edge_ep.transform(*passes) - result1 = edge_ep(model_inputs[0])[0] - result2 = new_ep(model_inputs[0])[0] - - # Filecheck exir_ops.edge.aten.max_pool2d.default node. - FileCheck().check_count( - "executorch_exir_dialects_edge__ops_aten_max_pool2d_default", - 1, - exactly=True, - ).run(new_ep.exported_program.graph_module.code) - - self.assertTrue(torch.allclose(result1, result2)) - - def test_max_remove_getitem(self) -> None: - passes = [RemoveGetItemPass()] - - class MaxModule(torch.nn.Module): - def __init__( - self, - ): - super().__init__() - - def forward(self, x): - max_vals, indices = torch.max(x, dim=2, keepdim=True) - return max_vals - - max_module = MaxModule() - model_inputs = (torch.randn(4, 3, 24, 24),) - - edge_ep = capture_graph_for_xnnpack(max_module.eval(), model_inputs) - - new_ep = edge_ep.transform(*passes) - result1 = edge_ep(model_inputs[0])[0] - result2 = new_ep(model_inputs[0])[0] - - # Filecheck exir_ops.edge.aten.amax.default node. - FileCheck().check_count( - "executorch_exir_dialects_edge__ops_aten_amax_default", 1, exactly=True - ).run(new_ep.exported_program.graph_module.code) - - self.assertTrue(torch.allclose(result1, result2)) # TODO T154127848: Move this out of XNNPACK dir and into cannonical_partitioner dir def test_duplicate_dequant_node_pass(self) -> None: