From 102f34493bde6da600119b246071064a4eda5dd7 Mon Sep 17 00:00:00 2001 From: Roman Janik Date: Fri, 12 Sep 2025 14:22:33 +0200 Subject: [PATCH 1/2] Remove IR optimization in move_relu_before_concat.py --- .../optimizations/move_relu_before_concat.py | 107 ------------------ .../backend/ir/tflite_optimizer/optimizer.py | 8 -- 2 files changed, 115 deletions(-) delete mode 100755 backends/nxp/backend/ir/tflite_optimizer/optimizations/move_relu_before_concat.py diff --git a/backends/nxp/backend/ir/tflite_optimizer/optimizations/move_relu_before_concat.py b/backends/nxp/backend/ir/tflite_optimizer/optimizations/move_relu_before_concat.py deleted file mode 100755 index 4d10b7c80ae..00000000000 --- a/backends/nxp/backend/ir/tflite_optimizer/optimizations/move_relu_before_concat.py +++ /dev/null @@ -1,107 +0,0 @@ -# Copyright 2024 NXP -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -from collections import defaultdict -from copy import deepcopy - -from executorch.backends.nxp.backend.ir.tflite_generator import tflite_model -from executorch.backends.nxp.backend.ir.tflite_optimizer.operator_rules import ( - AllInputsComeFrom, -) -from executorch.backends.nxp.backend.ir.tflite_optimizer.optimizations.base_optimization import ( - BaseOptimization, -) -from executorch.backends.nxp.backend.ir.tflite_optimizer.pattern_matcher import ( - Op, - PatternMatcher, -) -from executorch.backends.nxp.backend.ir.tflite_optimizer.tensor_rules import ( - TensorHasOneConsumer, - TensorsHaveSameQuantization, -) - - -class MoveActivationBeforeConcatenation(BaseOptimization): - """ - Move some operators around in the following pattern. - This is a common pattern that emerges from the conversion of separable convolutions. - - │ │ │ │ - ┌───▼────┐ ┌───▼────┐ ┌───▼────┐ ┌───▼────┐ - │ Conv2D │ ... │ Conv2D │ │ Conv2D │ ... │ Conv2D │ - └───┬────┘ └───┬────┘ └───┬────┘ └───┬────┘ - └──┐ ┌──┘ │ │ - ┌──▼──────────▼─┐ ┌──▼───┐ ┌──▼───┐ - │ Concatenation │ ─────► │ Relu │ ... │ Relu │ - └───────┬───────┘ └──┬───┘ └──┬───┘ - │ 'x' └──┐ ┌──┘ - ┌──▼───┐ ┌──▼──────────▼─┐ - │ Relu │ │ Concatenation │ - └──┬───┘ └───────┬───────┘ - │ 'y' │ - """ - - activations = ["Relu", "ReluN1To1", "Relu6", "Tanh", "Sign"] - - def __call__(self) -> bool: - matcher = PatternMatcher( - self._builder, - [ - Op(["Concatenation"], None, ["x"], [AllInputsComeFrom("Conv2D")]), - Op(self.activations, ["x"], ["y"]), - ], - [ - TensorHasOneConsumer("x"), - # If the activation function is not changing the quantization parameters, it can be moved without - # messing with the quantization elsewhere. - TensorsHaveSameQuantization(["x", "y"]), - ], - ) - - to_remove = [] - - # Mapping an operator to a list of operators. These operators (value) will later be added into the TFLite - # model's `operators` in front of the specified operator (key). - to_add: dict[tflite_model.Operator, list[tflite_model.Operator]] = defaultdict( - lambda: [] - ) - - for [concat, activation], _, _, _ in matcher.match_patterns(): - new_concat_inputs = [] - for concat_input in concat.tmp_inputs: - # Create a new operator for the activation function. - new_activation = deepcopy(activation) - new_activation.tmp_inputs = [concat_input] - new_activation_output = self._builder.duplicate_tensor(concat_input) - new_activation.tmp_outputs = [new_activation_output] - - to_add[concat].append( - new_activation - ) # Insert the new activation into the model later. - - new_concat_inputs.append( - new_activation_output - ) # Connect the activation with the `Concatenation`. - - concat.tmp_inputs = new_concat_inputs - - # Tensor rule ensures that only the activation functions is using the output of the `Concatenation`. - # It is safe to bypass. - concat.tmp_outputs[0] = activation.tmp_outputs[0] - to_remove.append(activation) - - operators = self._builder.get_operators() - - # Add the new activations into the model. - for concat, activations in to_add.items(): - idx = operators.index(concat) - for activation in activations: - operators.insert(idx, activation) - - # Remove the old activations. - for activation in to_remove: - operators.remove(activation) - - return len(to_remove) != 0 diff --git a/backends/nxp/backend/ir/tflite_optimizer/optimizer.py b/backends/nxp/backend/ir/tflite_optimizer/optimizer.py index 3611c55e995..52de6f224eb 100755 --- a/backends/nxp/backend/ir/tflite_optimizer/optimizer.py +++ b/backends/nxp/backend/ir/tflite_optimizer/optimizer.py @@ -11,9 +11,6 @@ from executorch.backends.nxp.backend.ir import logger from executorch.backends.nxp.backend.ir.conversion_config import ConversionConfig -from executorch.backends.nxp.backend.ir.tflite_optimizer.optimizations.move_relu_before_concat import ( - MoveActivationBeforeConcatenation, -) from executorch.backends.nxp.backend.ir.tflite_optimizer.optimizations.permute_fully_connected_weights_after_reshape import ( PermuteFullyConnectedWeightsAfterReshape, ) @@ -29,8 +26,6 @@ class Optimization(Enum): PERMUTE_FULLY_CONNECTED_WEIGHTS_AFTER_RESHAPE = 12 - MOVE_ACTIVATION_BEFORE_CONCAT = 15 - class Optimizer: """ @@ -68,9 +63,6 @@ def __init__( Optimization.PERMUTE_FULLY_CONNECTED_WEIGHTS_AFTER_RESHAPE: PermuteFullyConnectedWeightsAfterReshape( builder, conversion_config ), - Optimization.MOVE_ACTIVATION_BEFORE_CONCAT: MoveActivationBeforeConcatenation( - builder, conversion_config - ), } def optimize( From 73fb5e23a176da8cb75bb29374e4cba5e224f568 Mon Sep 17 00:00:00 2001 From: Roman Janik Date: Wed, 24 Sep 2025 16:51:03 +0200 Subject: [PATCH 2/2] Add Move activation before concat pass, Concat cluster quantization --- .../move_activation_before_concat.py | 102 ++ .../aten_passes/neutron_aten_pass_manager.py | 9 +- backends/nxp/quantizer/neutron_quantizer.py | 10 +- backends/nxp/quantizer/patterns.py | 147 ++- backends/nxp/tests/test_batch_norm_fusion.py | 9 +- backends/nxp/tests/test_gru_splitting.py | 17 +- .../nxp/tests/test_linear_and_add_fusion.py | 55 +- ...st_move_activation_before_concatenation.py | 947 ++++++++++++++++++ .../test_removing_nodes_with_known_outputs.py | 13 +- .../nxp/tests/test_split_group_convolution.py | 20 +- 10 files changed, 1283 insertions(+), 46 deletions(-) create mode 100644 backends/nxp/aten_passes/move_activation_before_concat.py create mode 100644 backends/nxp/tests/test_move_activation_before_concatenation.py diff --git a/backends/nxp/aten_passes/move_activation_before_concat.py b/backends/nxp/aten_passes/move_activation_before_concat.py new file mode 100644 index 00000000000..8ba306d42e2 --- /dev/null +++ b/backends/nxp/aten_passes/move_activation_before_concat.py @@ -0,0 +1,102 @@ +# Copyright 2025 NXP +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + + +import torch + +from executorch.backends.nxp.backend.neutron_target_spec import NeutronTargetSpec + +from torch.fx import GraphModule, Node +from torch.fx.passes.infra.pass_base import PassBase, PassResult + + +class MoveActivationBeforeConcat(PassBase): + """Move some operators around in the following pattern. + This is a common pattern that emerges from the conversion of separable convolutions. + This optimization works together with joint quantization of compute nodes and activations. Without it, + it is not beneficial. + + │ │ │ │ + ┌──────▼──────┐ ┌──────▼──────┐ ┌──────▼──────┐ ┌──────▼──────┐ + │ aten.conv2d │ ... │ aten.conv2d │ │ aten.conv2d │ ... │ aten.conv2d │ + └──────┬──────┘ └──────┬──────┘ └──────┬──────┘ └──────┬──────┘ + └───────┐ ┌──────┘ │ │ + ┌──▼─────▼─┐ replace with ┌─────▼─────┐ ┌─────▼─────┐ + │ aten.cat │ ──────────────► │ aten.relu │ ... │ aten.relu │ + └────┬─────┘ └─────┬─────┘ └─────┬─────┘ + │ └───────┐ ┌───────┘ + ┌─────▼─────┐ ┌──▼─────▼─┐ + │ aten.relu │ │ aten.cat │ + └─────┬─────┘ └────┬─────┘ + │ │ + """ + + def __init__(self, neutron_target_spec: NeutronTargetSpec): + self.neutron_target_spec = neutron_target_spec + + def call(self, module: GraphModule) -> bool: + def _is_concat(node_: Node) -> bool: + return ( + node_.op == "call_function" + and node_.target == torch.ops.aten.cat.default + ) + + made_changes = False + + for node in module.graph.nodes: + if not _is_concat(node): + continue # Not cat node. + + cat_node = node + activation = next(iter(cat_node.users)) + + # Check if all cat inputs nodes are conv 2D or linear 2D type and their only user is cat. + if not all( + self.neutron_target_spec.neutron_target_info.is_fusable_conv_or_linear__aten( + input_node + ) + and len(input_node.users) == 1 + for input_node in cat_node.all_input_nodes + ): + continue + + # Check if following activation is supported on Neutron as fused activation. + if not ( + len(cat_node.users) == 1 + and self.neutron_target_spec.neutron_target_info.is_supported_fused_activation__aten( + activation + ) + ): + continue + + # Loop all Cat input nodes and insert new activation after node. + for input_node in cat_node.all_input_nodes: + with module.graph.inserting_after(input_node): + new_activation = module.graph.call_function( + activation.target, + args=(), + kwargs=activation.kwargs, + ) + + new_activation.meta["source_fn_stack"] = [ + ( + new_activation.name, + activation.meta["source_fn_stack"][-1][-1], + ) + ] + new_activation.meta["val"] = input_node.meta["val"] + + # Replace the uses of the input node with the new activation node. + input_node.replace_all_uses_with(new_activation) + new_activation.args = (input_node, *activation.args[1:]) + + # Replace the uses of the activation node with the cat node. + activation.replace_all_uses_with(cat_node) + + module.graph.erase_node(activation) + + made_changes = True + + return PassResult(module, made_changes) diff --git a/backends/nxp/aten_passes/neutron_aten_pass_manager.py b/backends/nxp/aten_passes/neutron_aten_pass_manager.py index 407ebf5da61..35205c76c68 100644 --- a/backends/nxp/aten_passes/neutron_aten_pass_manager.py +++ b/backends/nxp/aten_passes/neutron_aten_pass_manager.py @@ -16,6 +16,9 @@ from executorch.backends.nxp.aten_passes.fuse_linear_and_add_pass import ( FuseLinearAndAddPass, ) +from executorch.backends.nxp.aten_passes.move_activation_before_concat import ( + MoveActivationBeforeConcat, +) from executorch.backends.nxp.aten_passes.remove_nodes_with_known_outputs import ( RemoveNodesWithKnownOutputs, ) @@ -25,6 +28,7 @@ from executorch.backends.nxp.aten_passes.split_gru_based_on_num_layers import ( SplitGRUBasedOnNumLayers, ) +from executorch.backends.nxp.backend.neutron_target_spec import NeutronTargetSpec from executorch.exir.pass_manager import PassManager from torch import nn from torch.fx.passes.infra.pass_base import PassResult @@ -34,7 +38,9 @@ class NeutronAtenPassManager(PassManager): - def __init__(self, passes: list[PassType] = None): + def __init__( + self, neutron_target_spec: NeutronTargetSpec, passes: list[PassType] = None + ): passes: list[PassType] = passes or [ FuseBatchNormWithConvPass(), FuseBatchNormWithLinearPass(), @@ -42,6 +48,7 @@ def __init__(self, passes: list[PassType] = None): SplitGRUBasedOnNumLayers(), RemoveNodesWithKnownOutputs(), FuseLinearAndAddPass(), + MoveActivationBeforeConcat(neutron_target_spec), ] super().__init__(passes) diff --git a/backends/nxp/quantizer/neutron_quantizer.py b/backends/nxp/quantizer/neutron_quantizer.py index 6564c19d7b9..f476e16628e 100644 --- a/backends/nxp/quantizer/neutron_quantizer.py +++ b/backends/nxp/quantizer/neutron_quantizer.py @@ -12,6 +12,7 @@ from executorch.backends.nxp.backend.neutron_target_spec import NeutronTargetSpec from executorch.backends.nxp.quantizer.patterns import ( AbsPattern, + ActivationsConcatClusterPattern, AdaptiveAvgPoolPattern, AddmmPattern, AddTensorPattern, @@ -225,13 +226,16 @@ def __init__(self, neutron_target_spec: NeutronTargetSpec): self.op_to_applied_quantizer = { pt: False for q in self.quantizers for pt in q.pattern.partition_types() } + self.cluster_quantizers = [ + NeutronAtenQuantizer(ActivationsConcatClusterPattern(self), static_qconfig) + ] def transform_for_annotation( self, model: torch.fx.GraphModule ) -> torch.fx.GraphModule: model.graph.eliminate_dead_code() # Remove dead code to simplify the graph for the passes. - model = NeutronAtenPassManager()(model).graph_module + model = NeutronAtenPassManager(self.neutron_target_spec)(model).graph_module model.graph.eliminate_dead_code() # Remove dead code again, in case it was created by the passes. @@ -240,6 +244,10 @@ def transform_for_annotation( def annotate(self, model: torch.fx.GraphModule) -> torch.fx.GraphModule: self._annotate_inputs(model) + # Annotate node clusters in model + for cluster_quantizer in self.cluster_quantizers: + cluster_quantizer.annotate(model) + nodes = list(model.graph.nodes) for node in nodes: if ( diff --git a/backends/nxp/quantizer/patterns.py b/backends/nxp/quantizer/patterns.py index ccd579d5c52..ee92cd42ef1 100644 --- a/backends/nxp/quantizer/patterns.py +++ b/backends/nxp/quantizer/patterns.py @@ -13,6 +13,7 @@ from executorch.backends.nxp.quantizer.utils import get_bias_qparams from torch import fx from torch._ops import OpOverload +from torch.fx import Node from torchao.quantization.pt2e import PerChannelMinMaxObserver from torchao.quantization.pt2e.quantizer import ( DerivedQuantizationSpec, @@ -20,6 +21,7 @@ QuantizationSpec, SharedQuantizationSpec, ) + from torchao.quantization.pt2e.quantizer.quantizer import Q_ANNOTATION_KEY @@ -199,7 +201,6 @@ def partition_types(self) -> list[OpOverload]: def get_anchors( self, gm: fx.GraphModule, fused_partition: list[fx.GraphModule] ) -> PartitionAnchors: - # pyre-fixme[29]: `Union[BoundMethod[typing.Callable(torch._C.TensorBase.__ge... addmm_node = fused_partition[0].nodes[-1] bias_qspec = DerivedQuantizationSpec( @@ -745,3 +746,147 @@ def get_anchors( return get_anchors_for_fixed_quant_specs( fused_partition, scale=1.0 / 128.0, zero_point=0 ) + + +class ActivationsConcatClusterPattern(QuantizationPattern): + """ + Quantizer for activations concat cluster pattern. + + The quantizer matches a pattern where concat node is preceded by activation nodes preceded by Conv 2D or Linear. + All activation nodes quantization parameters must be the same. Only activations, that have support for fusion + to preceding compute node on Neutron are allowed. This cluster is usually produced by MoveActivationBeforeConcat + pass. Cluster schema: + + │ │ + ┌──────▼──────┐ ┌──────▼──────┐ + │ aten.conv2d │ ... │ aten.conv2d │ + └──────┬──────┘ └──────┬──────┘ + │ │ + ┌─────▼─────┐ ┌─────▼─────┐ + │ aten.relu │ ... │ aten.relu │ + └─────┬─────┘ └─────┬─────┘ + └───────┐ ┌───────┘ + ┌──▼─────▼─┐ + │ aten.cat │ + └────┬─────┘ + │ + """ + + def __init__(self, neutron_quantizer): + self.neutron_quantizer = neutron_quantizer + self.neutron_target_info = ( + self.neutron_quantizer.neutron_target_spec.neutron_target_info + ) + + @staticmethod + def _all_activations_are_equal(activations: list[Node]) -> bool: + first_input_node = activations[0] + hardtanh_t = [ + torch.ops.aten.hardtanh.default, + torch.ops.aten.hardtanh_.default, + ] + relu_t = [ + torch.ops.aten.relu.default, + torch.ops.aten.relu_.default, + ] + tanh_t = [ + torch.ops.aten.tanh.default, + torch.ops.aten.tanh_.default, + ] + + def _activations_are_equal(activation1: Node, activation2: Node) -> bool: + if ( # Targets are equal also with their inplace variants + (activation1.target in hardtanh_t and activation2.target in hardtanh_t) + or (activation1.target in relu_t and activation2.target in relu_t) + or (activation1.target in tanh_t and activation2.target in tanh_t) + or ( + activation1.target == torch.ops.aten.sigmoid.default + and activation2.target == torch.ops.aten.sigmoid.default + ) + ): + return True + elif ( # Hardtanh with min_val 0 and max_val 'inf' is equal to Relu + activation1.target in hardtanh_t + and activation1.args[1:] == (0.0, float("inf")) + and activation2.target in relu_t + ) or ( + activation1.target in relu_t + and activation2.target in hardtanh_t + and activation2.args[1:] == (0.0, float("inf")) + ): + return True + else: + return False + + return all( + _activations_are_equal(activation, first_input_node) + for activation in activations + ) + + def partition_types(self) -> list[OpOverload]: + return [torch.ops.aten.cat.default] + + def get_anchors( + self, gm: fx.GraphModule, fused_partition: list[fx.GraphModule] + ) -> PartitionAnchors | None: + cat_node = fused_partition[0].nodes[-1] + + # Check all cat inputs are supported activations + if not all( + self.neutron_target_info.is_supported_fused_activation__aten(input_node) + for input_node in cat_node.all_input_nodes + ): + return None + + # Check all cat inputs are equal activations + if not self._all_activations_are_equal(cat_node.all_input_nodes): + return None + + # Check compute nodes are Conv 2D or Linear + if not all( + self.neutron_target_info.is_fusable_conv_or_linear__aten(compute_node) + for input_node in cat_node.all_input_nodes + for compute_node in input_node.all_input_nodes + ): + return None + + # Annotate compute nodes + for input_node in cat_node.all_input_nodes: + for compute_node in input_node.all_input_nodes: + if compute_node.target not in self.neutron_quantizer.op_to_quantizer: + return None + compute_node_quantizer = self.neutron_quantizer.op_to_quantizer[ + compute_node.target + ] + compute_node_quantizer.annotate(gm) + del compute_node.meta["quantization_annotation"].output_qspec + + # Annotate activations + for input_node in cat_node.all_input_nodes: + if input_node.target not in self.neutron_quantizer.op_to_quantizer: + return None + activation_quantizer = self.neutron_quantizer.op_to_quantizer[ + input_node.target + ] + activation_quantizer.annotate(gm) + input_node.meta["quantization_annotation"].input_qspec_map = {} + + # Annotate cat node + inputs = [] + first_input_node = cat_node.all_input_nodes[0] + for idx in range(len(cat_node.all_input_nodes)): + inputs.append( + ( + cat_node, + NodeArgsIdx(0, idx), + SharedQuantizationSpec(first_input_node), + ) + ) + outputs = [(cat_node, SharedQuantizationSpec(first_input_node))] + + return PartitionAnchors( + inputs=inputs, + weights=[], + biases=[], + output=outputs, + ) diff --git a/backends/nxp/tests/test_batch_norm_fusion.py b/backends/nxp/tests/test_batch_norm_fusion.py index fce11ce5aa2..eeb4b03d7a6 100644 --- a/backends/nxp/tests/test_batch_norm_fusion.py +++ b/backends/nxp/tests/test_batch_norm_fusion.py @@ -18,7 +18,10 @@ from executorch.backends.nxp.backend.ir.converter.node_converters.ops_converters.view_copy_converter import ( ViewCopyConverter, ) -from executorch.backends.nxp.tests.executorch_pipeline import to_quantized_edge_program +from executorch.backends.nxp.tests.executorch_pipeline import ( + neutron_target_spec, + to_quantized_edge_program, +) from executorch.backends.nxp.tests.executors import OverrideTargetSupportCheck from torch import nn @@ -98,7 +101,7 @@ def test_batch_norm_conv_fusing(bias: bool, input_shape: list[int]): program = torch.export.export(module, example_input, strict=True) og_module = program.module() - pm = NeutronAtenPassManager() + pm = NeutronAtenPassManager(neutron_target_spec) graph_module_out = pm(deepcopy(program.module())).graph_module # Make sure the fusion worked. @@ -133,7 +136,7 @@ def test_batch_norm_linear_fusing(bias: bool): program = torch.export.export(module, example_input, strict=True) og_module = program.module() - pm = NeutronAtenPassManager() + pm = NeutronAtenPassManager(neutron_target_spec) graph_module_out = pm(deepcopy(program.module())).graph_module # Make sure the fusion worked. diff --git a/backends/nxp/tests/test_gru_splitting.py b/backends/nxp/tests/test_gru_splitting.py index a2e9d324f69..297f9677fb2 100644 --- a/backends/nxp/tests/test_gru_splitting.py +++ b/backends/nxp/tests/test_gru_splitting.py @@ -13,6 +13,7 @@ from executorch.backends.nxp.aten_passes.split_gru_based_on_num_layers import ( SplitGRUBasedOnNumLayers, ) +from executorch.backends.nxp.tests.executorch_pipeline import neutron_target_spec @pytest.fixture(autouse=True) @@ -94,7 +95,9 @@ def test_gru_splitting__with_bias(num_layers): ) # Just 1 `GRU` in the model. # Run pre-processing passes of the float32 aten dialect program. - pytorch_pass_manager = NeutronAtenPassManager([SplitGRUBasedOnNumLayers()]) + pytorch_pass_manager = NeutronAtenPassManager( + neutron_target_spec, [SplitGRUBasedOnNumLayers()] + ) pytorch_pass_manager(exir_program_aten) post_pass_output = [t.detach() for t in exir_program_aten(*example_input)] @@ -143,7 +146,9 @@ def test_gru_splitting__no_bias(num_layers): ) # Just 1 `GRU` in the model. # Run pre-processing passes of the float32 aten dialect program. - pytorch_pass_manager = NeutronAtenPassManager([SplitGRUBasedOnNumLayers()]) + pytorch_pass_manager = NeutronAtenPassManager( + neutron_target_spec, [SplitGRUBasedOnNumLayers()] + ) pytorch_pass_manager(exir_program_aten) post_pass_output = [t.detach() for t in exir_program_aten(*example_input)] @@ -193,7 +198,9 @@ def test_gru_splitting__bidirectional__no_bias(num_layers): ) # Just 1 `GRU` in the model. # Run pre-processing passes of the float32 aten dialect program. - pytorch_pass_manager = NeutronAtenPassManager([SplitGRUBasedOnNumLayers()]) + pytorch_pass_manager = NeutronAtenPassManager( + neutron_target_spec, [SplitGRUBasedOnNumLayers()] + ) pytorch_pass_manager(exir_program_aten) nodes = list(exir_program_aten.graph.nodes) @@ -239,7 +246,9 @@ def test_gru_splitting__bidirectional__with_bias(num_layers): ) # Just 1 `GRU` in the model. # Run pre-processing passes of the float32 aten dialect program. - pytorch_pass_manager = NeutronAtenPassManager([SplitGRUBasedOnNumLayers()]) + pytorch_pass_manager = NeutronAtenPassManager( + neutron_target_spec, [SplitGRUBasedOnNumLayers()] + ) pytorch_pass_manager(exir_program_aten) nodes = list(exir_program_aten.graph.nodes) diff --git a/backends/nxp/tests/test_linear_and_add_fusion.py b/backends/nxp/tests/test_linear_and_add_fusion.py index 16d3c4140a2..222d748001c 100644 --- a/backends/nxp/tests/test_linear_and_add_fusion.py +++ b/backends/nxp/tests/test_linear_and_add_fusion.py @@ -18,6 +18,7 @@ from executorch.backends.nxp.aten_passes.remove_nodes_with_known_outputs import ( RemoveNodesWithKnownOutputs, ) +from executorch.backends.nxp.tests.executorch_pipeline import neutron_target_spec from executorch.backends.nxp.tests.executors import graph_contains_any_of_ops from parameterized import parameterized @@ -121,10 +122,11 @@ def test_linear_add_fusing__static__no_bias__valid_shape( original_module = program.module() modified_module = NeutronAtenPassManager( + neutron_target_spec, [ RemoveNodesWithKnownOutputs(), # Make the added tensor static. FuseLinearAndAddPass(), - ] + ], )(deepcopy(program.module())).graph_module # Make sure the module wasn't broken. @@ -167,10 +169,11 @@ def test_linear_add_fusing__static__no_bias__invalid_shape( original_module = program.module() modified_module = NeutronAtenPassManager( + neutron_target_spec, [ RemoveNodesWithKnownOutputs(), # Make the added tensor static. FuseLinearAndAddPass(), - ] + ], )(deepcopy(program.module())).graph_module # Make sure the module wasn't broken. @@ -209,10 +212,11 @@ def test_linear_add_fusing__static__bias__valid_shape( original_module = program.module() modified_module = NeutronAtenPassManager( + neutron_target_spec, [ RemoveNodesWithKnownOutputs(), # Make the added tensor static. FuseLinearAndAddPass(), - ] + ], )(deepcopy(program.module())).graph_module # Make sure the module wasn't broken. @@ -253,10 +257,11 @@ def test_linear_add_fusing__static__no_bias__reverse_order(self): original_module = program.module() modified_module = NeutronAtenPassManager( + neutron_target_spec, [ RemoveNodesWithKnownOutputs(), # Make the added tensor static. FuseLinearAndAddPass(), - ] + ], )(deepcopy(program.module())).graph_module # Make sure the module wasn't broken. @@ -295,10 +300,11 @@ def test_linear_add_fusing__static__bias__reverse_order(self): original_module = program.module() modified_module = NeutronAtenPassManager( + neutron_target_spec, [ RemoveNodesWithKnownOutputs(), # Make the added tensor static. FuseLinearAndAddPass(), - ] + ], )(deepcopy(program.module())).graph_module # Make sure the module wasn't broken. @@ -340,10 +346,11 @@ def test_linear_add_fusing__static__alpha__no_bias(self): original_module = program.module() modified_module = NeutronAtenPassManager( + neutron_target_spec, [ RemoveNodesWithKnownOutputs(), # Make the added tensor static. FuseLinearAndAddPass(), - ] + ], )(deepcopy(program.module())).graph_module # Make sure the module wasn't broken. @@ -381,10 +388,11 @@ def test_linear_add_fusing__static__alpha__bias(self): original_module = program.module() modified_module = NeutronAtenPassManager( + neutron_target_spec, [ RemoveNodesWithKnownOutputs(), # Make the added tensor static. FuseLinearAndAddPass(), - ] + ], )(deepcopy(program.module())).graph_module # Make sure the module wasn't broken. @@ -424,10 +432,11 @@ def test_linear_add_fusing__static__alpha__reversed_add_inputs(self): original_module = program.module() modified_module = NeutronAtenPassManager( + neutron_target_spec, [ RemoveNodesWithKnownOutputs(), # Make the added tensor static. FuseLinearAndAddPass(), - ] + ], )(deepcopy(program.module())).graph_module # Make sure the module wasn't broken. @@ -474,9 +483,9 @@ def test_linear_add_fusing__dynamic__no_bias__valid_shape( program = torch.export.export(module, example_input, strict=True) original_module = program.module() - modified_module = NeutronAtenPassManager([FuseLinearAndAddPass()])( - deepcopy(program.module()) - ).graph_module + modified_module = NeutronAtenPassManager( + neutron_target_spec, [FuseLinearAndAddPass()] + )(deepcopy(program.module())).graph_module # Make sure the module wasn't broken. original_nodes = list(original_module.graph.nodes) @@ -513,9 +522,9 @@ def test_linear_add_fusing__dynamic__no_bias__invalid_shape( program = torch.export.export(module, example_input, strict=True) original_module = program.module() - modified_module = NeutronAtenPassManager([FuseLinearAndAddPass()])( - deepcopy(program.module()) - ).graph_module + modified_module = NeutronAtenPassManager( + neutron_target_spec, [FuseLinearAndAddPass()] + )(deepcopy(program.module())).graph_module # Make sure the module wasn't broken. original_nodes = list(original_module.graph.nodes) @@ -550,9 +559,9 @@ def test_linear_add_fusing__dynamic__bias__valid_shape( program = torch.export.export(module, example_input, strict=True) original_module = program.module() - modified_module = NeutronAtenPassManager([FuseLinearAndAddPass()])( - deepcopy(program.module()) - ).graph_module + modified_module = NeutronAtenPassManager( + neutron_target_spec, [FuseLinearAndAddPass()] + )(deepcopy(program.module())).graph_module # Make sure the module wasn't broken. original_nodes = list(original_module.graph.nodes) @@ -584,9 +593,9 @@ def test_linear_add_fusing__dynamic__reverse_order(self): program = torch.export.export(module, example_input, strict=True) original_module = program.module() - modified_module = NeutronAtenPassManager([FuseLinearAndAddPass()])( - deepcopy(program.module()) - ).graph_module + modified_module = NeutronAtenPassManager( + neutron_target_spec, [FuseLinearAndAddPass()] + )(deepcopy(program.module())).graph_module # Make sure the module wasn't broken. original_nodes = list(original_module.graph.nodes) @@ -618,9 +627,9 @@ def test_linear_add_fusing__dynamic__alpha(self): program = torch.export.export(module, example_input, strict=True) original_module = program.module() - modified_module = NeutronAtenPassManager([FuseLinearAndAddPass()])( - deepcopy(program.module()) - ).graph_module + modified_module = NeutronAtenPassManager( + neutron_target_spec, [FuseLinearAndAddPass()] + )(deepcopy(program.module())).graph_module # Make sure the module wasn't broken. original_nodes = list(original_module.graph.nodes) diff --git a/backends/nxp/tests/test_move_activation_before_concatenation.py b/backends/nxp/tests/test_move_activation_before_concatenation.py new file mode 100644 index 00000000000..779c958c049 --- /dev/null +++ b/backends/nxp/tests/test_move_activation_before_concatenation.py @@ -0,0 +1,947 @@ +# Copyright 2025 NXP +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +import math +import unittest + +import kgb +import numpy as np +import torch +from executorch.backends.nxp.aten_passes.move_activation_before_concat import ( + MoveActivationBeforeConcat, +) +from executorch.backends.nxp.aten_passes.neutron_aten_pass_manager import ( + NeutronAtenPassManager, +) +from executorch.backends.nxp.backend.edge_program_converter import ( + EdgeProgramToIRConverter, +) +from executorch.backends.nxp.quantizer.neutron_quantizer import NeutronQuantizer +from executorch.backends.nxp.tests.executorch_pipeline import ( + _quantize_model, + get_random_calibration_inputs, + neutron_target_spec, + to_model_input_spec, + to_quantized_edge_program, +) +from executorch.backends.nxp.tests.executors import ( + convert_run_compare, + graph_contains_any_of_ops, + ToChannelFirstPreprocess, + ToChannelLastPreprocess, +) +from executorch.backends.nxp.tests.models import get_activation +from executorch.exir.dialects._ops import ops as exir_ops +from parameterized import parameterized +from torch import nn +from torch.export import ExportedProgram +from torch.fx import GraphModule + +concat_cluster_ops = [ + exir_ops.edge.aten.addmm.default, + exir_ops.edge.aten.convolution.default, + exir_ops.edge.aten.hardtanh.default, + exir_ops.edge.aten.relu.default, + exir_ops.edge.aten.sigmoid.default, + exir_ops.edge.aten.tanh.default, + exir_ops.edge.aten.cat.default, +] + + +class ConvConcatActivationModule(torch.nn.Module): + def __init__(self, activation: str, inplace: bool, in_channels: int): + super().__init__() + self.conv = nn.Conv2d( + in_channels, + in_channels, + (3, 3), + padding=1, + ) + + self.activation = get_activation(activation, inplace) + self.eval() + + def forward(self, x): + x1 = self.conv(x) + x2 = self.conv(x) + x = torch.cat((x1, x2), dim=1) + return self.activation(x) + + +class LinearConcatActivationModule(nn.Module): + def __init__( + self, activation: str, inplace: bool, in_channels: int, mode: str = "linear" + ): + super().__init__() + self.mode = mode.lower() + assert self.mode in [ + "linear", + "addmm", + "mm", + ], "Mode must be 'linear', 'addmm', or 'mm'" + + if self.mode == "linear": + self.linear = nn.Linear(in_channels, in_channels) + else: + # Manual weight and bias for addmm/mm. + self.weight = nn.Parameter(torch.empty(in_channels, in_channels)) + self.bias = nn.Parameter(torch.empty(in_channels)) + nn.init.kaiming_uniform_(self.weight, a=math.sqrt(5)) + fan_in, _ = nn.init._calculate_fan_in_and_fan_out(self.weight) + bound = 1 / math.sqrt(fan_in) + nn.init.uniform_(self.bias, -bound, bound) + + self.activation = get_activation(activation, inplace) + self.eval() + + def forward(self, x): + x1, x2 = None, None + + if self.mode == "linear": + x1 = self.linear(x) + x2 = self.linear(x) + if self.mode == "addmm": + x1 = torch.addmm(self.bias, x, self.weight) + x2 = torch.addmm(self.bias, x, self.weight) + elif self.mode == "mm": + x1 = torch.mm(x, self.weight) + x2 = torch.mm(x, self.weight) + + x = torch.cat((x1, x2), dim=1) + return self.activation(x) + + +class ConvActivationConcatModule(torch.nn.Module): + def __init__( + self, + activation1: str, + activation2: str, + act1_inplace: bool, + act2_inplace: bool, + in_channels: int, + ): + super().__init__() + self.conv = nn.Conv2d( + in_channels, + in_channels, + (3, 3), + padding=1, + ) + + self.activation1 = get_activation(activation1, act1_inplace) + self.activation2 = get_activation(activation2, act2_inplace) + self.eval() + + def forward(self, x): + x1 = self.conv(x) + x1 = self.activation1(x1) + x2 = self.conv(x) + x2 = self.activation2(x2) + return torch.cat((x1, x2), dim=1) + + +class LinearActivationConcatModule(torch.nn.Module): + def __init__( + self, + activation1: str, + activation2: str, + act1_inplace: bool, + act2_inplace: bool, + in_channels: int, + ): + super().__init__() + self.linear = nn.Linear(in_channels, in_channels) + + self.activation1 = get_activation(activation1, act1_inplace) + self.activation2 = get_activation(activation2, act2_inplace) + self.eval() + + def forward(self, x): + x1 = self.linear(x) + x1 = self.activation1(x1) + x2 = self.linear(x) + x2 = self.activation2(x2) + return torch.cat((x1, x2), dim=1) + + +class TestMoveActivationBeforeConcat(unittest.TestCase): + __test__ = False # Prevent interfering with PyTest tests. + + @classmethod + def setUpClass(cls): + torch.manual_seed(23) + np.random.seed(42) + + @parameterized.expand( + [ + ["relu", True], + ["relu", False], + ["relu6", True], + ["relu6", False], + ["tanh", True], + ["tanh", False], + ["sigmoid", False], + ] + ) + def test_move_activation_before_concat__conv(self, activation, inplace): + input_shape = (1, 3, 8, 8) + model = ConvConcatActivationModule( + activation=activation, inplace=inplace, in_channels=3 + ) + + calibration_inputs = get_random_calibration_inputs( + to_model_input_spec(input_shape) + ) + example_input = calibration_inputs[0] + + exir_program_aten = torch.export.export( + model, example_input, strict=True + ).module() + + outputs_before = [o.detach().numpy() for o in exir_program_aten(*example_input)] + nodes = list(exir_program_aten.graph.nodes) + assert len(nodes) == 8 + cat_node = nodes[5] + assert cat_node.target == torch.ops.aten.cat.default + assert all( + neutron_target_spec.neutron_target_info.is_fusable_conv_or_linear__aten( + input_node + ) + and len(input_node.users) == 1 + for input_node in cat_node.all_input_nodes + ) + assert ( + neutron_target_spec.neutron_target_info.is_supported_fused_activation__aten( + nodes[6] + ) + ) + + # Apply the optimization. + NeutronAtenPassManager( + neutron_target_spec, [MoveActivationBeforeConcat(neutron_target_spec)] + )(exir_program_aten) + + nodes = list(exir_program_aten.graph.nodes) + + # Make sure the optimization was applied. + assert len(nodes) == 9 + cat_node = nodes[7] + assert cat_node.target == torch.ops.aten.cat.default + assert all( + neutron_target_spec.neutron_target_info.is_supported_fused_activation__aten( + input_node + ) + and len(input_node.users) == 1 + for input_node in cat_node.all_input_nodes + ) + assert nodes[8].target == "output" + + outputs_after = [o.detach().numpy() for o in exir_program_aten(*example_input)] + + # Make sure the model still produces the exact same output. + assert np.allclose(outputs_before[0], outputs_after[0]) + + # Run pre-processing passes of the float32 aten dialect program. + neutron_aten_pass_manager = NeutronAtenPassManager(neutron_target_spec) + neutron_aten_pass_manager(exir_program_aten) # All passes by default. + + exir_program_aten_quant = _quantize_model( + exir_program_aten, + NeutronQuantizer(neutron_target_spec), + calibration_inputs, + ) + + # Check convolution and activation are in same QDQ cluster. + nodes = list(exir_program_aten_quant.graph.nodes) + assert len(nodes) == 26 + assert neutron_target_spec.neutron_target_info.is_fusable_conv_or_linear__aten( + nodes[14] + ) + assert ( + neutron_target_spec.neutron_target_info.is_supported_fused_activation__aten( + nodes[15] + ) + ) + assert ( + nodes[16].target + == torch.ops.quantized_decomposed.quantize_per_tensor.default + ) + assert neutron_target_spec.neutron_target_info.is_fusable_conv_or_linear__aten( + nodes[18] + ) + assert ( + neutron_target_spec.neutron_target_info.is_supported_fused_activation__aten( + nodes[19] + ) + ) + assert ( + nodes[20].target + == torch.ops.quantized_decomposed.quantize_per_tensor.default + ) + + @parameterized.expand( + [ + ["relu", True], + ["relu", False], + ["relu6", True], + ["relu6", False], + ["tanh", True], + ["tanh", False], + ["sigmoid", False], + ] + ) + def test_move_activation_before_concat__linear(self, activation, inplace): + input_shape = (1, 8) + model = LinearConcatActivationModule( + activation=activation, inplace=inplace, in_channels=8, mode="linear" + ) + + calibration_inputs = get_random_calibration_inputs( + to_model_input_spec(input_shape) + ) + example_input = calibration_inputs[0] + + exir_program_aten = torch.export.export( + model, example_input, strict=True + ).module() + + outputs_before = [o.detach().numpy() for o in exir_program_aten(*example_input)] + nodes = list(exir_program_aten.graph.nodes) + assert len(nodes) == 8 + cat_node = nodes[5] + assert cat_node.target == torch.ops.aten.cat.default + assert all( + neutron_target_spec.neutron_target_info.is_fusable_conv_or_linear__aten( + input_node + ) + and len(input_node.users) == 1 + for input_node in cat_node.all_input_nodes + ) + assert ( + neutron_target_spec.neutron_target_info.is_supported_fused_activation__aten( + nodes[6] + ) + ) + + # Apply the optimization. + NeutronAtenPassManager( + neutron_target_spec, [MoveActivationBeforeConcat(neutron_target_spec)] + )(exir_program_aten) + + nodes = list(exir_program_aten.graph.nodes) + + # Make sure the optimization was applied. + assert len(nodes) == 9 + cat_node = nodes[7] + assert cat_node.target == torch.ops.aten.cat.default + assert all( + neutron_target_spec.neutron_target_info.is_supported_fused_activation__aten( + input_node + ) + and len(input_node.users) == 1 + for input_node in cat_node.all_input_nodes + ) + assert nodes[8].target == "output" + + outputs_after = [o.detach().numpy() for o in exir_program_aten(*example_input)] + + # Make sure the model still produces the exact same output. + assert np.allclose(outputs_before[0], outputs_after[0]) + + # Run pre-processing passes of the float32 aten dialect program. + neutron_aten_pass_manager = NeutronAtenPassManager(neutron_target_spec) + neutron_aten_pass_manager(exir_program_aten) # All passes by default. + + exir_program_aten_quant = _quantize_model( + exir_program_aten, + NeutronQuantizer(neutron_target_spec), + calibration_inputs, + ) + + # Check linear and activation are in same QDQ cluster. + nodes = list(exir_program_aten_quant.graph.nodes) + assert len(nodes) == 22 + assert neutron_target_spec.neutron_target_info.is_fusable_conv_or_linear__aten( + nodes[10] + ) + assert ( + neutron_target_spec.neutron_target_info.is_supported_fused_activation__aten( + nodes[11] + ) + ) + assert ( + nodes[12].target + == torch.ops.quantized_decomposed.quantize_per_tensor.default + ) + assert neutron_target_spec.neutron_target_info.is_fusable_conv_or_linear__aten( + nodes[14] + ) + assert ( + neutron_target_spec.neutron_target_info.is_supported_fused_activation__aten( + nodes[15] + ) + ) + assert ( + nodes[16].target + == torch.ops.quantized_decomposed.quantize_per_tensor.default + ) + + @parameterized.expand( + [ + ["relu", True], + ["relu", False], + ["relu6", True], + ["relu6", False], + ["tanh", True], + ["tanh", False], + ["sigmoid", False], + ] + ) + def test_move_activation_before_concat__addmm(self, activation, inplace): + input_shape = (1, 8) + model = LinearConcatActivationModule( + activation=activation, inplace=inplace, in_channels=8, mode="addmm" + ) + + calibration_inputs = get_random_calibration_inputs( + to_model_input_spec(input_shape) + ) + example_input = calibration_inputs[0] + + exir_program_aten = torch.export.export( + model, example_input, strict=True + ).module() + + outputs_before = [o.detach().numpy() for o in exir_program_aten(*example_input)] + nodes = list(exir_program_aten.graph.nodes) + assert len(nodes) == 8 + cat_node = nodes[5] + assert cat_node.target == torch.ops.aten.cat.default + assert all( + neutron_target_spec.neutron_target_info.is_fusable_conv_or_linear__aten( + input_node + ) + and len(input_node.users) == 1 + for input_node in cat_node.all_input_nodes + ) + assert ( + neutron_target_spec.neutron_target_info.is_supported_fused_activation__aten( + nodes[6] + ) + ) + + # Apply the optimization. + NeutronAtenPassManager( + neutron_target_spec, [MoveActivationBeforeConcat(neutron_target_spec)] + )(exir_program_aten) + + nodes = list(exir_program_aten.graph.nodes) + + # Make sure the optimization was applied. + assert len(nodes) == 9 + cat_node = nodes[7] + assert cat_node.target == torch.ops.aten.cat.default + assert all( + neutron_target_spec.neutron_target_info.is_supported_fused_activation__aten( + input_node + ) + and len(input_node.users) == 1 + for input_node in cat_node.all_input_nodes + ) + assert nodes[8].target == "output" + + outputs_after = [o.detach().numpy() for o in exir_program_aten(*example_input)] + + # Make sure the model still produces the exact same output. + assert np.allclose(outputs_before[0], outputs_after[0]) + + # Run pre-processing passes of the float32 aten dialect program. + neutron_aten_pass_manager = NeutronAtenPassManager(neutron_target_spec) + neutron_aten_pass_manager(exir_program_aten) # All passes by default. + + exir_program_aten_quant = _quantize_model( + exir_program_aten, + NeutronQuantizer(neutron_target_spec), + calibration_inputs, + ) + + # Check addmm and activation are in same QDQ cluster. + nodes = list(exir_program_aten_quant.graph.nodes) + assert len(nodes) == 22 + assert neutron_target_spec.neutron_target_info.is_fusable_conv_or_linear__aten( + nodes[10] + ) + assert ( + neutron_target_spec.neutron_target_info.is_supported_fused_activation__aten( + nodes[11] + ) + ) + assert ( + nodes[12].target + == torch.ops.quantized_decomposed.quantize_per_tensor.default + ) + assert neutron_target_spec.neutron_target_info.is_fusable_conv_or_linear__aten( + nodes[14] + ) + assert ( + neutron_target_spec.neutron_target_info.is_supported_fused_activation__aten( + nodes[15] + ) + ) + assert ( + nodes[16].target + == torch.ops.quantized_decomposed.quantize_per_tensor.default + ) + + @parameterized.expand( + [ + ["relu", True], + ["relu", False], + ["relu6", True], + ["relu6", False], + ["tanh", True], + ["tanh", False], + ["sigmoid", False], + ] + ) + def test_move_activation_before_concat__mm(self, activation, inplace): + input_shape = (1, 8) + model = LinearConcatActivationModule( + activation=activation, inplace=inplace, in_channels=8, mode="mm" + ) + + calibration_inputs = get_random_calibration_inputs( + to_model_input_spec(input_shape) + ) + example_input = calibration_inputs[0] + + exir_program_aten = torch.export.export( + model, example_input, strict=True + ).module() + + outputs_before = [o.detach().numpy() for o in exir_program_aten(*example_input)] + nodes = list(exir_program_aten.graph.nodes) + assert len(nodes) == 7 + cat_node = nodes[4] + assert cat_node.target == torch.ops.aten.cat.default + assert all( + neutron_target_spec.neutron_target_info.is_fusable_conv_or_linear__aten( + input_node + ) + and len(input_node.users) == 1 + for input_node in cat_node.all_input_nodes + ) + assert ( + neutron_target_spec.neutron_target_info.is_supported_fused_activation__aten( + nodes[5] + ) + ) + + # Apply the optimization. + NeutronAtenPassManager( + neutron_target_spec, [MoveActivationBeforeConcat(neutron_target_spec)] + )(exir_program_aten) + + nodes = list(exir_program_aten.graph.nodes) + + # Make sure the optimization was applied. + assert len(nodes) == 8 + cat_node = nodes[6] + assert cat_node.target == torch.ops.aten.cat.default + assert all( + neutron_target_spec.neutron_target_info.is_supported_fused_activation__aten( + input_node + ) + and len(input_node.users) == 1 + for input_node in cat_node.all_input_nodes + ) + assert nodes[7].target == "output" + + outputs_after = [o.detach().numpy() for o in exir_program_aten(*example_input)] + + # Make sure the model still produces the exact same output. + assert np.allclose(outputs_before[0], outputs_after[0]) + + # Run pre-processing passes of the float32 aten dialect program. + neutron_aten_pass_manager = NeutronAtenPassManager(neutron_target_spec) + neutron_aten_pass_manager(exir_program_aten) # All passes by default. + + exir_program_aten_quant = _quantize_model( + exir_program_aten, + NeutronQuantizer(neutron_target_spec), + calibration_inputs, + ) + + # Check mm and activation are in same QDQ cluster. + nodes = list(exir_program_aten_quant.graph.nodes) + assert len(nodes) == 19 + assert neutron_target_spec.neutron_target_info.is_fusable_conv_or_linear__aten( + nodes[7] + ) + assert ( + neutron_target_spec.neutron_target_info.is_supported_fused_activation__aten( + nodes[8] + ) + ) + assert ( + nodes[9].target + == torch.ops.quantized_decomposed.quantize_per_tensor.default + ) + assert neutron_target_spec.neutron_target_info.is_fusable_conv_or_linear__aten( + nodes[11] + ) + assert ( + neutron_target_spec.neutron_target_info.is_supported_fused_activation__aten( + nodes[12] + ) + ) + assert ( + nodes[13].target + == torch.ops.quantized_decomposed.quantize_per_tensor.default + ) + + @parameterized.expand( + [ + ["relu", True], + ["relu", False], + ["relu6", True], + ["relu6", False], + ["tanh", True], + ["tanh", False], + ["sigmoid", False], + ] + ) + def test_move_activation_before_concat_quantization__conv( + self, activation, inplace + ): + with kgb.spy_on( + EdgeProgramToIRConverter.convert_program, + call_original=True, + owner=EdgeProgramToIRConverter, + ) as converter_spy: + input_shape = (1, 8, 8, 8) + model = ConvConcatActivationModule( + activation=activation, inplace=inplace, in_channels=8 + ) + + edge_program = to_quantized_edge_program( + model, input_shape + ).exported_program() + + # Make sure that all nodes were delegated. + assert not graph_contains_any_of_ops( + graph=edge_program.graph, ops=concat_cluster_ops + ) + assert any( + "lowered_module" in node.name for node in edge_program.graph.nodes + ) + + tflite_flatbuffers_model, io_formats = converter_spy.calls[-1].return_value + exported_program: ExportedProgram = converter_spy.calls[-1].args[0] + input_data = (np.random.random(input_shape).astype(np.float32) * 50).astype( + np.int8 + ) + convert_run_compare( + exported_program, + input_data, + tfl_model=tflite_flatbuffers_model, + tflite_input_preprocess=ToChannelLastPreprocess(), + tflite_output_preprocess=ToChannelFirstPreprocess(), + ) + + @parameterized.expand( + [ + ["relu", True], + ["relu", False], + ["relu6", True], + ["relu6", False], + ["tanh", True], + ["tanh", False], + ["sigmoid", False], + ] + ) + def test_move_activation_before_concat_quantization__linear( + self, activation, inplace + ): + with kgb.spy_on( + EdgeProgramToIRConverter.convert_program, + call_original=True, + owner=EdgeProgramToIRConverter, + ) as converter_spy: + input_shape = (1, 8) + model = LinearConcatActivationModule( + activation=activation, inplace=inplace, in_channels=8, mode="linear" + ) + + edge_program = to_quantized_edge_program( + model, input_shape + ).exported_program() + + # Make sure that all nodes were delegated. + assert not graph_contains_any_of_ops( + graph=edge_program.graph, ops=concat_cluster_ops + ) + assert any( + "lowered_module" in node.name for node in edge_program.graph.nodes + ) + + tflite_flatbuffers_model, io_formats = converter_spy.calls[-1].return_value + exported_program: ExportedProgram = converter_spy.calls[-1].args[0] + input_data = (np.random.random(input_shape).astype(np.float32) * 50).astype( + np.int8 + ) + convert_run_compare( + exported_program, + input_data, + tfl_model=tflite_flatbuffers_model, + ) + + @parameterized.expand( + [ + ["relu", True], + ["relu", False], + ["relu6", True], + ["relu6", False], + ["tanh", True], + ["tanh", False], + ["sigmoid", False], + ] + ) + def test_move_activation_before_concat_quantization__addmm( + self, activation, inplace + ): + torch.manual_seed(23) + with kgb.spy_on( + EdgeProgramToIRConverter.convert_program, + call_original=True, + owner=EdgeProgramToIRConverter, + ) as converter_spy: + input_shape = (1, 8) + model = LinearConcatActivationModule( + activation=activation, inplace=inplace, in_channels=8, mode="addmm" + ) + + edge_program = to_quantized_edge_program( + model, input_shape + ).exported_program() + + # Make sure that all nodes were delegated. + assert not graph_contains_any_of_ops( + graph=edge_program.graph, ops=concat_cluster_ops + ) + assert any( + "lowered_module" in node.name for node in edge_program.graph.nodes + ) + + tflite_flatbuffers_model, io_formats = converter_spy.calls[-1].return_value + exported_program: ExportedProgram = converter_spy.calls[-1].args[0] + input_data = (np.random.random(input_shape).astype(np.float32) * 50).astype( + np.int8 + ) + convert_run_compare( + exported_program, + input_data, + tfl_model=tflite_flatbuffers_model, + atol=1.0, + ) + + @parameterized.expand( + [ + ["relu", True], + ["relu", False], + ["relu6", True], + ["relu6", False], + ["tanh", True], + ["tanh", False], + ["sigmoid", False], + ] + ) + def test_move_activation_before_concat_quantization__mm(self, activation, inplace): + with kgb.spy_on( + EdgeProgramToIRConverter.convert_program, + call_original=True, + owner=EdgeProgramToIRConverter, + ) as converter_spy: + input_shape = (1, 8) + model = LinearConcatActivationModule( + activation=activation, inplace=inplace, in_channels=8, mode="mm" + ) + + edge_program = to_quantized_edge_program( + model, input_shape + ).exported_program() + + # Make sure that all nodes were delegated. + assert not graph_contains_any_of_ops( + graph=edge_program.graph, ops=concat_cluster_ops + ) + assert any( + "lowered_module" in node.name for node in edge_program.graph.nodes + ) + + tflite_flatbuffers_model, io_formats = converter_spy.calls[-1].return_value + exported_program: ExportedProgram = converter_spy.calls[-1].args[0] + input_data = (np.random.random(input_shape).astype(np.float32) * 50).astype( + np.int8 + ) + convert_run_compare( + exported_program, + input_data, + tfl_model=tflite_flatbuffers_model, + ) + + @parameterized.expand( + [ + ["relu", "relu", True, False], + ["relu6", "relu6", False, True], + ["tanh", "tanh", True, False], + ["sigmoid", "sigmoid", False, True], + ["relu", "relu_hardtanh", True, True], + ] + ) + def test_concat_cluster_quantization__conv( + self, activation1, activation2, act1_inplace, act2_inplace + ): + with kgb.spy_on( + EdgeProgramToIRConverter.convert_program, + call_original=True, + owner=EdgeProgramToIRConverter, + ) as converter_spy: + with kgb.spy_on(_quantize_model, call_original=True) as quantizer_spy: + input_shape = (1, 8, 8, 8) + model = ConvActivationConcatModule( + activation1, activation2, act1_inplace, act2_inplace, in_channels=8 + ) + + edge_program = to_quantized_edge_program( + model, input_shape + ).exported_program() + + # Make sure that all nodes were delegated. + assert not graph_contains_any_of_ops( + graph=edge_program.graph, + ops=concat_cluster_ops, + ) + assert any( + "lowered_module" in node.name for node in edge_program.graph.nodes + ) + + tflite_flatbuffers_model, io_formats = converter_spy.calls[ + -1 + ].return_value + exported_program: ExportedProgram = converter_spy.calls[-1].args[0] + exir_program_aten_quant: GraphModule = quantizer_spy.calls[-1].args[0] + + # Check convolution and activation are in same QDQ cluster. + nodes = list(exir_program_aten_quant.graph.nodes) + assert len(nodes) == 26 + assert neutron_target_spec.neutron_target_info.is_fusable_conv_or_linear__aten( + nodes[14] + ) + assert neutron_target_spec.neutron_target_info.is_supported_fused_activation__aten( + nodes[15] + ) + assert ( + nodes[16].target + == torch.ops.quantized_decomposed.quantize_per_tensor.default + ) + assert neutron_target_spec.neutron_target_info.is_fusable_conv_or_linear__aten( + nodes[18] + ) + assert neutron_target_spec.neutron_target_info.is_supported_fused_activation__aten( + nodes[19] + ) + assert ( + nodes[20].target + == torch.ops.quantized_decomposed.quantize_per_tensor.default + ) + + input_data = ( + np.random.random(input_shape).astype(np.float32) * 50 + ).astype(np.int8) + convert_run_compare( + exported_program, + input_data, + tfl_model=tflite_flatbuffers_model, + tflite_input_preprocess=ToChannelLastPreprocess(), + tflite_output_preprocess=ToChannelFirstPreprocess(), + ) + + @parameterized.expand( + [ + ["relu", "relu", True, False], + ["relu6", "relu6", False, True], + ["tanh", "tanh", True, False], + ["sigmoid", "sigmoid", False, True], + ["relu", "relu_hardtanh", True, True], + ] + ) + def test_concat_cluster_quantization__linear( + self, activation1, activation2, act1_inplace, act2_inplace + ): + with kgb.spy_on( + EdgeProgramToIRConverter.convert_program, + call_original=True, + owner=EdgeProgramToIRConverter, + ) as converter_spy: + with kgb.spy_on(_quantize_model, call_original=True) as quantizer_spy: + input_shape = (1, 8) + model = LinearActivationConcatModule( + activation1, activation2, act1_inplace, act2_inplace, in_channels=8 + ) + + edge_program = to_quantized_edge_program( + model, input_shape + ).exported_program() + + # Make sure that all nodes were delegated. + assert not graph_contains_any_of_ops( + graph=edge_program.graph, + ops=concat_cluster_ops, + ) + assert any( + "lowered_module" in node.name for node in edge_program.graph.nodes + ) + + tflite_flatbuffers_model, io_formats = converter_spy.calls[ + -1 + ].return_value + exported_program: ExportedProgram = converter_spy.calls[-1].args[0] + exir_program_aten_quant: GraphModule = quantizer_spy.calls[-1].args[0] + + # Check linear and activation are in same QDQ cluster. + nodes = list(exir_program_aten_quant.graph.nodes) + assert len(nodes) == 22 + assert neutron_target_spec.neutron_target_info.is_fusable_conv_or_linear__aten( + nodes[10] + ) + assert neutron_target_spec.neutron_target_info.is_supported_fused_activation__aten( + nodes[11] + ) + assert ( + nodes[12].target + == torch.ops.quantized_decomposed.quantize_per_tensor.default + ) + assert neutron_target_spec.neutron_target_info.is_fusable_conv_or_linear__aten( + nodes[14] + ) + assert neutron_target_spec.neutron_target_info.is_supported_fused_activation__aten( + nodes[15] + ) + assert ( + nodes[16].target + == torch.ops.quantized_decomposed.quantize_per_tensor.default + ) + + input_data = ( + np.random.random(input_shape).astype(np.float32) * 50 + ).astype(np.int8) + convert_run_compare( + exported_program, + input_data, + tfl_model=tflite_flatbuffers_model, + tflite_input_preprocess=ToChannelLastPreprocess(), + tflite_output_preprocess=ToChannelFirstPreprocess(), + ) diff --git a/backends/nxp/tests/test_removing_nodes_with_known_outputs.py b/backends/nxp/tests/test_removing_nodes_with_known_outputs.py index 8f5549c8526..0c496356791 100644 --- a/backends/nxp/tests/test_removing_nodes_with_known_outputs.py +++ b/backends/nxp/tests/test_removing_nodes_with_known_outputs.py @@ -17,6 +17,7 @@ from executorch.backends.nxp.aten_passes.split_gru_based_on_num_layers import ( SplitGRUBasedOnNumLayers, ) +from executorch.backends.nxp.tests.executorch_pipeline import neutron_target_spec from executorch.backends.nxp.tests.executors import graph_contains_any_of_ops from parameterized import parameterized from torch import nn @@ -57,7 +58,9 @@ def test_removing_nodes__zeros(self): outputs_before = [o.detach().numpy() for o in exir_program_aten(*example_input)] # Apply the optimization. - NeutronAtenPassManager([RemoveNodesWithKnownOutputs()])(exir_program_aten) + NeutronAtenPassManager(neutron_target_spec, [RemoveNodesWithKnownOutputs()])( + exir_program_aten + ) # Make sure the `aten.zeros` is no longer in the model. assert not graph_contains_any_of_ops( @@ -81,7 +84,9 @@ def test_removing_nodes__split(self, num_layers): exir_program_aten = torch.export.export(model, example_input).module() # Apply the pass to split the `aten.gru.input` into multiple instances, and add a `split` node. - NeutronAtenPassManager([SplitGRUBasedOnNumLayers()])(exir_program_aten) + NeutronAtenPassManager(neutron_target_spec, [SplitGRUBasedOnNumLayers()])( + exir_program_aten + ) # Make sure the `aten.zeros` and `torch.split` are in the model. assert graph_contains_any_of_ops( @@ -93,7 +98,9 @@ def test_removing_nodes__split(self, num_layers): outputs_before = [o.detach().numpy() for o in exir_program_aten(*example_input)] # Apply the optimization. - NeutronAtenPassManager([RemoveNodesWithKnownOutputs()])(exir_program_aten) + NeutronAtenPassManager(neutron_target_spec, [RemoveNodesWithKnownOutputs()])( + exir_program_aten + ) # Make sure the `aten.zeros` and `torch.split` are no longer in the model. assert not graph_contains_any_of_ops( diff --git a/backends/nxp/tests/test_split_group_convolution.py b/backends/nxp/tests/test_split_group_convolution.py index 8b2d5723dbb..6e084699307 100644 --- a/backends/nxp/tests/test_split_group_convolution.py +++ b/backends/nxp/tests/test_split_group_convolution.py @@ -88,9 +88,9 @@ def test_split_group_convolution__2d(self, _, input_shape: list[int], group: int graph_module = torch.export.export(module, example_input, strict=True).module() original_module = deepcopy(graph_module) - modified_module = NeutronAtenPassManager([SplitGroupConvolution()])( - graph_module - ).graph_module + modified_module = NeutronAtenPassManager( + neutron_target_spec, [SplitGroupConvolution()] + )(graph_module).graph_module # Make sure the fusion worked. original_nodes = list(original_module.graph.nodes) @@ -145,9 +145,9 @@ def test_split_group_convolution__1d(self, _, input_shape: list[int], group: int graph_module = torch.export.export(module, example_input).module() original_module = deepcopy(graph_module) - modified_module = NeutronAtenPassManager([SplitGroupConvolution()])( - graph_module - ).graph_module + modified_module = NeutronAtenPassManager( + neutron_target_spec, [SplitGroupConvolution()] + )(graph_module).graph_module # Make sure the fusion worked. original_nodes = list(original_module.graph.nodes) @@ -199,9 +199,9 @@ def test_split_group_convolution__3d(self, _, input_shape: list[int], group: int graph_module = torch.export.export(module, example_input).module() original_module = deepcopy(graph_module) - modified_module = NeutronAtenPassManager([SplitGroupConvolution()])( - graph_module - ).graph_module + modified_module = NeutronAtenPassManager( + neutron_target_spec, [SplitGroupConvolution()] + )(graph_module).graph_module # Verify that the pass has NOT made any changes, as it is disabled for 3D convolution. original_nodes = list(original_module.graph.nodes) @@ -233,7 +233,7 @@ def test_split_group_convolution__applied_by_default(self): graph_module = torch.export.export(module, example_input).module() original_module = deepcopy(graph_module) - modified_module = NeutronAtenPassManager()( + modified_module = NeutronAtenPassManager(neutron_target_spec)( graph_module ).graph_module # Default passes.