From 0b5b0e80f93309a7e017010fcccfef045cdec47b Mon Sep 17 00:00:00 2001 From: Zuby Afzal Date: Mon, 7 Apr 2025 20:46:33 -0700 Subject: [PATCH 01/24] WIP: add initial support for dq 2D conv --- .../xnnpack/partition/config/gemm_configs.py | 6 ++ .../xnnpack/quantizer/xnnpack_quantizer.py | 1 + .../quantizer/xnnpack_quantizer_utils.py | 11 +++ backends/xnnpack/test/ops/test_conv2d.py | 86 ++++++++++++++++++- 4 files changed, 103 insertions(+), 1 deletion(-) diff --git a/backends/xnnpack/partition/config/gemm_configs.py b/backends/xnnpack/partition/config/gemm_configs.py index 8712c2709ac..a05bf623e05 100644 --- a/backends/xnnpack/partition/config/gemm_configs.py +++ b/backends/xnnpack/partition/config/gemm_configs.py @@ -358,6 +358,11 @@ def check_constraints(self, node: torch.fx.Node, ep: ExportedProgram) -> bool: why(node, "Only support 1D + 2D Conv") return False # Only support 1D + 2D Conv + precision = self._detect_precision(node) + if precision == ConfigPrecisionType.DYNAMIC_QUANT and len(conv_stride) != 2: + why(node, "Only support 2D Conv for dynamic quantization") + return False + kernel_node = get_input_node(node, 1) weight_quant_params = QuantParams.from_weights(kernel_node, ep) @@ -394,6 +399,7 @@ def supported_precision_types(self): return [ ConfigPrecisionType.FP32, ConfigPrecisionType.STATIC_QUANT, + ConfigPrecisionType.DYNAMIC_QUANT, ] diff --git a/backends/xnnpack/quantizer/xnnpack_quantizer.py b/backends/xnnpack/quantizer/xnnpack_quantizer.py index 0ddee53a41a..fdabd0383e6 100644 --- a/backends/xnnpack/quantizer/xnnpack_quantizer.py +++ b/backends/xnnpack/quantizer/xnnpack_quantizer.py @@ -265,6 +265,7 @@ class XNNPACKQuantizer(Quantizer): DYNAMIC_OPS = [ "linear", + "conv", ] def __init__(self) -> None: diff --git a/backends/xnnpack/quantizer/xnnpack_quantizer_utils.py b/backends/xnnpack/quantizer/xnnpack_quantizer_utils.py index ce459806c6e..4763e39fa2f 100644 --- a/backends/xnnpack/quantizer/xnnpack_quantizer_utils.py +++ b/backends/xnnpack/quantizer/xnnpack_quantizer_utils.py @@ -304,6 +304,17 @@ def _do_annotate_conv( for n in gm.graph.nodes: if not is_conv_node(n): continue + + # TODO: Check for dynamically quantized convs and check if nn.Conv2d is always lowered + # Only dynamically quantize 2D convolutions + # Handle both nn.Conv2d and aten.conv2d.default + if n.op == "call_module": + mod = gm.get_submodule(n.target) + if not hasattr(mod, "padding") or len(mod.padding) != 2: + continue + elif n.op == "call_function" and n.target != torch.ops.aten.conv2d.default: + continue + conv_node = n # This is hacky! diff --git a/backends/xnnpack/test/ops/test_conv2d.py b/backends/xnnpack/test/ops/test_conv2d.py index 80b731bd18e..5001d2b6e88 100644 --- a/backends/xnnpack/test/ops/test_conv2d.py +++ b/backends/xnnpack/test/ops/test_conv2d.py @@ -18,6 +18,10 @@ except: has_quantized_ops = False +from executorch.backends.xnnpack.partition.config.xnnpack_config import ( + ConfigPrecisionType, +) +from executorch.backends.xnnpack.partition.xnnpack_partitioner import XnnpackPartitioner from executorch.backends.xnnpack.quantizer.xnnpack_quantizer import ( get_symmetric_quantization_config, ) @@ -26,7 +30,10 @@ ) from executorch.backends.xnnpack.test.test_xnnpack_utils import randomize_bn from executorch.backends.xnnpack.test.tester import Quantize, Tester - +from executorch.backends.xnnpack.test.tester.tester import ( + Partition, + ToEdgeTransformAndLower, +) from executorch.exir.dialects._ops import ops as exir_ops @@ -223,6 +230,61 @@ def _test( .run_method_and_compare_outputs(qtol=1) ) + def _test_dq_conv2d( + self, + m: torch.nn.Module, + inputs, + dynamic_shapes, + atol=5e-02, + ): + quant_config = get_symmetric_quantization_config( + is_per_channel=True, + is_dynamic=True, + act_qmin=-128, + act_qmax=127, + weight_qmin=-128, + weight_qmax=127, + ) + + DynamicallyQuantizedPartitioner = XnnpackPartitioner( + config_precisions=ConfigPrecisionType.DYNAMIC_QUANT, + per_op_mode=False, + ) + + tester = Tester(m, inputs, dynamic_shapes=dynamic_shapes) + tester = tester.quantize(Quantize(quantization_config=quant_config)) + + # Print after quantization + tester.stages["quantize"] = tester.stages[tester.cur] + print("\n----------Annotated Graph:") + print(tester.stages["quantize"].graph_module.code) + + exported = tester.export() + + # Print after exporting + tester.stages["export"] = exported.stages[exported.cur] + print("\n----------Exported Graph:") + print(tester.stages["export"].graph_module.code) + + # Check for choose_qparams + tester.check(["torch.ops.quantized_decomposed.choose_qparams"]) + + tester.to_edge_transform_and_lower( + ToEdgeTransformAndLower([DynamicallyQuantizedPartitioner]) + ) + + # Print after lower and partition + print("\n----------Lowered Graph:") + print(tester.stages[tester.cur].graph_module.code) + + tester.check(["executorch_exir_dialects_edge__ops_aten_convolution_default"]) + tester.check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) + tester.check_not(["executorch_exir_dialects_edge__ops_aten_conv2d_default"]) + + tester.to_executorch() + tester.serialize() + tester.run_method_and_compare_outputs(atol=atol) + def test_fp16_conv2d(self) -> None: for transpose in (True, False): for has_bias in (True, False): @@ -699,3 +761,25 @@ def forward(self, x): .serialize() .run_method_and_compare_outputs(qtol=1) ) + + def test_dq_conv2d(self) -> None: + class SimpleConv2d(torch.nn.Module): + def __init__(self): + super().__init__() + self.conv = torch.nn.Conv2d(1, 2, 3) + self.conv.weight.requires_grad = False + self.conv.bias.requires_grad = False + + def forward(self, x): + return self.conv(x) + + def get_inputs(self): + return (torch.randn(1, 1, 8, 8),) + + model = SimpleConv2d() + self._test_dq_conv2d( + model, + model.get_inputs(), + dynamic_shapes=None, + atol=5e-2, + ) From 8fcb1170dcddb39e70a2072227ac9e990773b477 Mon Sep 17 00:00:00 2001 From: Zuby Afzal Date: Fri, 11 Apr 2025 17:25:31 -0700 Subject: [PATCH 02/24] Permute before quant --- .../channels_last_tagged_reshape_pass.py | 28 +++++++++++++++++-- .../xnnpack/quantizer/xnnpack_quantizer.py | 2 ++ .../quantizer/xnnpack_quantizer_utils.py | 10 ------- backends/xnnpack/runtime/XNNCompiler.cpp | 2 +- backends/xnnpack/test/ops/test_conv2d.py | 25 ++++------------- 5 files changed, 34 insertions(+), 33 deletions(-) diff --git a/backends/xnnpack/_passes/channels_last_tagged_reshape_pass.py b/backends/xnnpack/_passes/channels_last_tagged_reshape_pass.py index 89a44f303df..ca8de0c32df 100644 --- a/backends/xnnpack/_passes/channels_last_tagged_reshape_pass.py +++ b/backends/xnnpack/_passes/channels_last_tagged_reshape_pass.py @@ -282,16 +282,38 @@ def input_to_nhwc( ChannelsLastTaggedReshapePass.PARTNER_NODE ] else: - # Need to create NHWC node - with graph_module.graph.inserting_after(input_node): + # trace back to permute + origin = input_node + while hasattr(origin, "args") and isinstance(origin.args, tuple) and len(origin.args) > 0: + origin = origin.args[0] + + # at x choose_qparams and quantize insert permute + with graph_module.graph.inserting_after(origin): input_node_nhwc = self.create_call_function_node( graph_module=graph_module, target=exir_ops.edge.aten._to_copy.default, - args=(input_node,), + args=(origin,), memory_format=torch.channels_last, ) + + for user in list(origin.users): + if user != input_node_nhwc: + user.replace_input_with(origin, input_node_nhwc) + + graph_module.recompile() self.mark_as_nhwc_node(input_node_nhwc) + # TODO: uncomment, use case when permute not needed + # # Need to create NHWC node ----------------------------- CONVERSION HAPPENING ----->> + # with graph_module.graph.inserting_after(input_node): + # input_node_nhwc = self.create_call_function_node( + # graph_module=graph_module, + # target=exir_ops.edge.aten._to_copy.default, + # args=(input_node,), + # memory_format=torch.channels_last, + # ) + # self.mark_as_nhwc_node(input_node_nhwc) + self.insert_copy_and_assign_partner_nodes_quantization_sensitive( graph_module=graph_module, original_input=input_node, diff --git a/backends/xnnpack/quantizer/xnnpack_quantizer.py b/backends/xnnpack/quantizer/xnnpack_quantizer.py index fdabd0383e6..9e24d7b0030 100644 --- a/backends/xnnpack/quantizer/xnnpack_quantizer.py +++ b/backends/xnnpack/quantizer/xnnpack_quantizer.py @@ -71,8 +71,10 @@ def _supported_symmetric_quantized_operators() -> dict[str, list[OperatorPattern "conv2d": [ [torch.nn.Conv2d, torch.nn.ReLU], [torch.nn.Conv2d, F.relu], + [torch.nn.Conv2d], [F.conv2d, torch.nn.ReLU], [F.conv2d, F.relu], + [F.conv2d], ], "linear": [[torch.nn.Linear], [F.linear]], "add": [[torch.add]], diff --git a/backends/xnnpack/quantizer/xnnpack_quantizer_utils.py b/backends/xnnpack/quantizer/xnnpack_quantizer_utils.py index 4763e39fa2f..91babc26cc9 100644 --- a/backends/xnnpack/quantizer/xnnpack_quantizer_utils.py +++ b/backends/xnnpack/quantizer/xnnpack_quantizer_utils.py @@ -305,16 +305,6 @@ def _do_annotate_conv( if not is_conv_node(n): continue - # TODO: Check for dynamically quantized convs and check if nn.Conv2d is always lowered - # Only dynamically quantize 2D convolutions - # Handle both nn.Conv2d and aten.conv2d.default - if n.op == "call_module": - mod = gm.get_submodule(n.target) - if not hasattr(mod, "padding") or len(mod.padding) != 2: - continue - elif n.op == "call_function" and n.target != torch.ops.aten.conv2d.default: - continue - conv_node = n # This is hacky! diff --git a/backends/xnnpack/runtime/XNNCompiler.cpp b/backends/xnnpack/runtime/XNNCompiler.cpp index c0204831c07..988dab86ab7 100644 --- a/backends/xnnpack/runtime/XNNCompiler.cpp +++ b/backends/xnnpack/runtime/XNNCompiler.cpp @@ -1172,7 +1172,7 @@ Error defineStaticTransposeNode( ET_CHECK_OR_RETURN_ERROR( status == xnn_status_success, Internal, - "Failed to create sigmoid node %i with code: %s", + "Failed to create static transpose node %i with code: %s", node->debug_handle(), xnn_status_to_string(status)); diff --git a/backends/xnnpack/test/ops/test_conv2d.py b/backends/xnnpack/test/ops/test_conv2d.py index 5001d2b6e88..dbd9f75ecaf 100644 --- a/backends/xnnpack/test/ops/test_conv2d.py +++ b/backends/xnnpack/test/ops/test_conv2d.py @@ -240,10 +240,6 @@ def _test_dq_conv2d( quant_config = get_symmetric_quantization_config( is_per_channel=True, is_dynamic=True, - act_qmin=-128, - act_qmax=127, - weight_qmin=-128, - weight_qmax=127, ) DynamicallyQuantizedPartitioner = XnnpackPartitioner( @@ -254,35 +250,26 @@ def _test_dq_conv2d( tester = Tester(m, inputs, dynamic_shapes=dynamic_shapes) tester = tester.quantize(Quantize(quantization_config=quant_config)) - # Print after quantization tester.stages["quantize"] = tester.stages[tester.cur] - print("\n----------Annotated Graph:") - print(tester.stages["quantize"].graph_module.code) exported = tester.export() - # Print after exporting tester.stages["export"] = exported.stages[exported.cur] - print("\n----------Exported Graph:") - print(tester.stages["export"].graph_module.code) - # Check for choose_qparams tester.check(["torch.ops.quantized_decomposed.choose_qparams"]) tester.to_edge_transform_and_lower( ToEdgeTransformAndLower([DynamicallyQuantizedPartitioner]) ) - # Print after lower and partition - print("\n----------Lowered Graph:") - print(tester.stages[tester.cur].graph_module.code) - - tester.check(["executorch_exir_dialects_edge__ops_aten_convolution_default"]) tester.check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) tester.check_not(["executorch_exir_dialects_edge__ops_aten_conv2d_default"]) tester.to_executorch() - tester.serialize() + + #tester.serialize() + tester.serialize().dump_artifact("conv2d.pte") + tester.run_method_and_compare_outputs(atol=atol) def test_fp16_conv2d(self) -> None: @@ -766,7 +753,7 @@ def test_dq_conv2d(self) -> None: class SimpleConv2d(torch.nn.Module): def __init__(self): super().__init__() - self.conv = torch.nn.Conv2d(1, 2, 3) + self.conv = torch.nn.Conv2d(3, 10, 3, ) self.conv.weight.requires_grad = False self.conv.bias.requires_grad = False @@ -774,7 +761,7 @@ def forward(self, x): return self.conv(x) def get_inputs(self): - return (torch.randn(1, 1, 8, 8),) + return (torch.randn(1, 3, 8, 8),) model = SimpleConv2d() self._test_dq_conv2d( From 4d064da53ab0f3c39cb124b8f5f0865eb826e92d Mon Sep 17 00:00:00 2001 From: Zuby Afzal Date: Fri, 11 Apr 2025 20:20:33 -0700 Subject: [PATCH 03/24] Refactor permute code --- .../channels_last_tagged_reshape_pass.py | 28 ++++++++----------- backends/xnnpack/test/ops/test_conv2d.py | 10 +++---- backends/xnnpack/xnnpack_preprocess.py | 2 +- 3 files changed, 16 insertions(+), 24 deletions(-) diff --git a/backends/xnnpack/_passes/channels_last_tagged_reshape_pass.py b/backends/xnnpack/_passes/channels_last_tagged_reshape_pass.py index ca8de0c32df..2bbc62fa588 100644 --- a/backends/xnnpack/_passes/channels_last_tagged_reshape_pass.py +++ b/backends/xnnpack/_passes/channels_last_tagged_reshape_pass.py @@ -282,12 +282,13 @@ def input_to_nhwc( ChannelsLastTaggedReshapePass.PARTNER_NODE ] else: - # trace back to permute + # Need to create NHWC node origin = input_node + # TODO: safe/correct to always trace back? + # Trace back to source node while hasattr(origin, "args") and isinstance(origin.args, tuple) and len(origin.args) > 0: origin = origin.args[0] - # at x choose_qparams and quantize insert permute with graph_module.graph.inserting_after(origin): input_node_nhwc = self.create_call_function_node( graph_module=graph_module, @@ -296,24 +297,17 @@ def input_to_nhwc( memory_format=torch.channels_last, ) - for user in list(origin.users): - if user != input_node_nhwc: - user.replace_input_with(origin, input_node_nhwc) + # If input_node was not source + if origin != input_node: + print("Permuted\n\n") + # Replace downstream source node with NHWC node + for user in list(origin.users): + if user != input_node_nhwc: + user.replace_input_with(origin, input_node_nhwc) + graph_module.recompile() - graph_module.recompile() self.mark_as_nhwc_node(input_node_nhwc) - # TODO: uncomment, use case when permute not needed - # # Need to create NHWC node ----------------------------- CONVERSION HAPPENING ----->> - # with graph_module.graph.inserting_after(input_node): - # input_node_nhwc = self.create_call_function_node( - # graph_module=graph_module, - # target=exir_ops.edge.aten._to_copy.default, - # args=(input_node,), - # memory_format=torch.channels_last, - # ) - # self.mark_as_nhwc_node(input_node_nhwc) - self.insert_copy_and_assign_partner_nodes_quantization_sensitive( graph_module=graph_module, original_input=input_node, diff --git a/backends/xnnpack/test/ops/test_conv2d.py b/backends/xnnpack/test/ops/test_conv2d.py index dbd9f75ecaf..902bdc5b50b 100644 --- a/backends/xnnpack/test/ops/test_conv2d.py +++ b/backends/xnnpack/test/ops/test_conv2d.py @@ -249,15 +249,14 @@ def _test_dq_conv2d( tester = Tester(m, inputs, dynamic_shapes=dynamic_shapes) tester = tester.quantize(Quantize(quantization_config=quant_config)) - - tester.stages["quantize"] = tester.stages[tester.cur] - exported = tester.export() - tester.stages["export"] = exported.stages[exported.cur] - tester.check(["torch.ops.quantized_decomposed.choose_qparams"]) + tester.stages["export"] = exported.stages[exported.cur] + print("\n----------Exported Graph:") + print(tester.stages["export"].graph_module.code) + tester.to_edge_transform_and_lower( ToEdgeTransformAndLower([DynamicallyQuantizedPartitioner]) ) @@ -266,7 +265,6 @@ def _test_dq_conv2d( tester.check_not(["executorch_exir_dialects_edge__ops_aten_conv2d_default"]) tester.to_executorch() - #tester.serialize() tester.serialize().dump_artifact("conv2d.pte") diff --git a/backends/xnnpack/xnnpack_preprocess.py b/backends/xnnpack/xnnpack_preprocess.py index 84cdfd69a48..086eeccbd58 100644 --- a/backends/xnnpack/xnnpack_preprocess.py +++ b/backends/xnnpack/xnnpack_preprocess.py @@ -144,7 +144,7 @@ def preprocess( graph_module = ep.graph_module node_to_external_map = generate_node_to_external_map(ep, graph_module) - + print("\n----------XNNPack Preprocess Graph:", graph_module) # Make sure all inputs are contiguous_format or NCHW or default dim order assert_default_dim_order(graph_module) From 2905b984015574ba0dd459a7a32ff92441ae6bce Mon Sep 17 00:00:00 2001 From: Zuby Afzal Date: Fri, 11 Apr 2025 21:30:55 -0700 Subject: [PATCH 04/24] Corrects input to conv --- .../channels_last_tagged_reshape_pass.py | 29 ++++++++++--------- backends/xnnpack/test/ops/test_conv2d.py | 14 ++++----- backends/xnnpack/xnnpack_preprocess.py | 2 +- 3 files changed, 24 insertions(+), 21 deletions(-) diff --git a/backends/xnnpack/_passes/channels_last_tagged_reshape_pass.py b/backends/xnnpack/_passes/channels_last_tagged_reshape_pass.py index 2bbc62fa588..29de407defd 100644 --- a/backends/xnnpack/_passes/channels_last_tagged_reshape_pass.py +++ b/backends/xnnpack/_passes/channels_last_tagged_reshape_pass.py @@ -283,31 +283,34 @@ def input_to_nhwc( ] else: # Need to create NHWC node - origin = input_node + source_node = input_node + # TODO: safe/correct to always trace back? - # Trace back to source node - while hasattr(origin, "args") and isinstance(origin.args, tuple) and len(origin.args) > 0: - origin = origin.args[0] + # Trace back to find original source node + while ( + hasattr(source_node, "args") + and isinstance(source_node.args, tuple) + and len(source_node.args) > 0 + ): + source_node = source_node.args[0] - with graph_module.graph.inserting_after(origin): + with graph_module.graph.inserting_after(source_node): input_node_nhwc = self.create_call_function_node( graph_module=graph_module, target=exir_ops.edge.aten._to_copy.default, - args=(origin,), + args=(source_node,), memory_format=torch.channels_last, ) - # If input_node was not source - if origin != input_node: - print("Permuted\n\n") + # If input_node was not the original source node + if source_node != input_node: + input_node = source_node # Replace downstream source node with NHWC node - for user in list(origin.users): + for user in list(input_node.users): if user != input_node_nhwc: - user.replace_input_with(origin, input_node_nhwc) + user.replace_input_with(input_node, input_node_nhwc) graph_module.recompile() - self.mark_as_nhwc_node(input_node_nhwc) - self.insert_copy_and_assign_partner_nodes_quantization_sensitive( graph_module=graph_module, original_input=input_node, diff --git a/backends/xnnpack/test/ops/test_conv2d.py b/backends/xnnpack/test/ops/test_conv2d.py index 902bdc5b50b..64e3d5e53ad 100644 --- a/backends/xnnpack/test/ops/test_conv2d.py +++ b/backends/xnnpack/test/ops/test_conv2d.py @@ -249,14 +249,10 @@ def _test_dq_conv2d( tester = Tester(m, inputs, dynamic_shapes=dynamic_shapes) tester = tester.quantize(Quantize(quantization_config=quant_config)) - exported = tester.export() + tester.export() tester.check(["torch.ops.quantized_decomposed.choose_qparams"]) - tester.stages["export"] = exported.stages[exported.cur] - print("\n----------Exported Graph:") - print(tester.stages["export"].graph_module.code) - tester.to_edge_transform_and_lower( ToEdgeTransformAndLower([DynamicallyQuantizedPartitioner]) ) @@ -265,7 +261,7 @@ def _test_dq_conv2d( tester.check_not(["executorch_exir_dialects_edge__ops_aten_conv2d_default"]) tester.to_executorch() - #tester.serialize() + # tester.serialize() tester.serialize().dump_artifact("conv2d.pte") tester.run_method_and_compare_outputs(atol=atol) @@ -751,7 +747,11 @@ def test_dq_conv2d(self) -> None: class SimpleConv2d(torch.nn.Module): def __init__(self): super().__init__() - self.conv = torch.nn.Conv2d(3, 10, 3, ) + self.conv = torch.nn.Conv2d( + 3, + 10, + 3, + ) self.conv.weight.requires_grad = False self.conv.bias.requires_grad = False diff --git a/backends/xnnpack/xnnpack_preprocess.py b/backends/xnnpack/xnnpack_preprocess.py index 086eeccbd58..84cdfd69a48 100644 --- a/backends/xnnpack/xnnpack_preprocess.py +++ b/backends/xnnpack/xnnpack_preprocess.py @@ -144,7 +144,7 @@ def preprocess( graph_module = ep.graph_module node_to_external_map = generate_node_to_external_map(ep, graph_module) - print("\n----------XNNPack Preprocess Graph:", graph_module) + # Make sure all inputs are contiguous_format or NCHW or default dim order assert_default_dim_order(graph_module) From 0fef04a2ac4e9d350a8449cab2d9792d64305497 Mon Sep 17 00:00:00 2001 From: Zuby Afzal Date: Sat, 12 Apr 2025 01:03:59 -0700 Subject: [PATCH 05/24] Add is_dequant check for trace back when inserting permute --- .../channels_last_tagged_reshape_pass.py | 35 ++++++++++--------- backends/xnnpack/test/ops/test_conv2d.py | 2 +- 2 files changed, 20 insertions(+), 17 deletions(-) diff --git a/backends/xnnpack/_passes/channels_last_tagged_reshape_pass.py b/backends/xnnpack/_passes/channels_last_tagged_reshape_pass.py index 29de407defd..f02a5f816a1 100644 --- a/backends/xnnpack/_passes/channels_last_tagged_reshape_pass.py +++ b/backends/xnnpack/_passes/channels_last_tagged_reshape_pass.py @@ -283,29 +283,32 @@ def input_to_nhwc( ] else: # Need to create NHWC node - source_node = input_node - - # TODO: safe/correct to always trace back? - # Trace back to find original source node - while ( - hasattr(source_node, "args") - and isinstance(source_node.args, tuple) - and len(source_node.args) > 0 - ): - source_node = source_node.args[0] + # TODO: Best way to determine if trace back required? + is_dequant = ( + input_node.op == "call_function" + and getattr(input_node.target, "__name__", "") + == "quantized_decomposed.dequantize_per_tensor.tensor" + ) + + if is_dequant: + # Trace back to find original source node + while ( + hasattr(input_node, "args") + and isinstance(input_node.args, tuple) + and len(input_node.args) > 0 + ): + input_node = input_node.args[0] - with graph_module.graph.inserting_after(source_node): + with graph_module.graph.inserting_after(input_node): input_node_nhwc = self.create_call_function_node( graph_module=graph_module, target=exir_ops.edge.aten._to_copy.default, - args=(source_node,), + args=(input_node,), memory_format=torch.channels_last, ) - # If input_node was not the original source node - if source_node != input_node: - input_node = source_node - # Replace downstream source node with NHWC node + if is_dequant: + # Replace downstream input_nodes with NHWC node for user in list(input_node.users): if user != input_node_nhwc: user.replace_input_with(input_node, input_node_nhwc) diff --git a/backends/xnnpack/test/ops/test_conv2d.py b/backends/xnnpack/test/ops/test_conv2d.py index 64e3d5e53ad..b03108dbb48 100644 --- a/backends/xnnpack/test/ops/test_conv2d.py +++ b/backends/xnnpack/test/ops/test_conv2d.py @@ -248,7 +248,7 @@ def _test_dq_conv2d( ) tester = Tester(m, inputs, dynamic_shapes=dynamic_shapes) - tester = tester.quantize(Quantize(quantization_config=quant_config)) + tester.quantize(Quantize(quantization_config=quant_config)) tester.export() tester.check(["torch.ops.quantized_decomposed.choose_qparams"]) From f8f998c4f41c745a499b1171bcd4834de273e6c4 Mon Sep 17 00:00:00 2001 From: Zuby Afzal Date: Sat, 12 Apr 2025 01:09:29 -0700 Subject: [PATCH 06/24] Fix node identity check --- backends/xnnpack/_passes/channels_last_tagged_reshape_pass.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backends/xnnpack/_passes/channels_last_tagged_reshape_pass.py b/backends/xnnpack/_passes/channels_last_tagged_reshape_pass.py index f02a5f816a1..dd8eea754a0 100644 --- a/backends/xnnpack/_passes/channels_last_tagged_reshape_pass.py +++ b/backends/xnnpack/_passes/channels_last_tagged_reshape_pass.py @@ -310,7 +310,7 @@ def input_to_nhwc( if is_dequant: # Replace downstream input_nodes with NHWC node for user in list(input_node.users): - if user != input_node_nhwc: + if user is not input_node_nhwc: user.replace_input_with(input_node, input_node_nhwc) graph_module.recompile() From 2efe9bbf50daa2eeeb0a8ef1bd8b89b7b5e65e04 Mon Sep 17 00:00:00 2001 From: Zuby Afzal Date: Sun, 13 Apr 2025 14:32:55 -0700 Subject: [PATCH 07/24] Use existing is_dequant check and update atol --- .../channels_last_tagged_reshape_pass.py | 20 ++++++------------- backends/xnnpack/test/ops/test_conv2d.py | 2 +- 2 files changed, 7 insertions(+), 15 deletions(-) diff --git a/backends/xnnpack/_passes/channels_last_tagged_reshape_pass.py b/backends/xnnpack/_passes/channels_last_tagged_reshape_pass.py index dd8eea754a0..cbd16f7de08 100644 --- a/backends/xnnpack/_passes/channels_last_tagged_reshape_pass.py +++ b/backends/xnnpack/_passes/channels_last_tagged_reshape_pass.py @@ -9,10 +9,10 @@ import torch from executorch.backends.xnnpack._passes.xnnpack_pass import XNNPACKPass from executorch.backends.xnnpack.utils.utils import is_param_node +from executorch.backends.xnnpack.utils.quant_utils import is_dequant from executorch.exir.dialects._ops import ops as exir_ops from executorch.exir.pass_base import PassResult - # TODO(T151254305) use subgraph_rewriter class ChannelsLastTaggedReshapePass(XNNPACKPass): """ @@ -283,20 +283,12 @@ def input_to_nhwc( ] else: # Need to create NHWC node - # TODO: Best way to determine if trace back required? - is_dequant = ( - input_node.op == "call_function" - and getattr(input_node.target, "__name__", "") - == "quantized_decomposed.dequantize_per_tensor.tensor" - ) + # TODO: If input is dequant does that it's from dynamic quantization? + input_is_dequant = is_dequant(input_node) - if is_dequant: + if input_is_dequant: # Trace back to find original source node - while ( - hasattr(input_node, "args") - and isinstance(input_node.args, tuple) - and len(input_node.args) > 0 - ): + while getattr(input_node, "args", None): input_node = input_node.args[0] with graph_module.graph.inserting_after(input_node): @@ -307,7 +299,7 @@ def input_to_nhwc( memory_format=torch.channels_last, ) - if is_dequant: + if input_is_dequant: # Replace downstream input_nodes with NHWC node for user in list(input_node.users): if user is not input_node_nhwc: diff --git a/backends/xnnpack/test/ops/test_conv2d.py b/backends/xnnpack/test/ops/test_conv2d.py index b03108dbb48..77e1f0b3f89 100644 --- a/backends/xnnpack/test/ops/test_conv2d.py +++ b/backends/xnnpack/test/ops/test_conv2d.py @@ -766,5 +766,5 @@ def get_inputs(self): model, model.get_inputs(), dynamic_shapes=None, - atol=5e-2, + atol=3.0, ) From 3762e0d7bf70e22a7d0157053cf4a8e2adc8a6c0 Mon Sep 17 00:00:00 2001 From: Zuby Afzal Date: Mon, 14 Apr 2025 19:35:25 -0700 Subject: [PATCH 08/24] Implement replace_all_uses_with function --- .../channels_last_tagged_reshape_pass.py | 8 +- backends/xnnpack/test/ops/test_conv2d.py | 6 +- .../spinquant/third-party/FFHT/Makefile | 236 +++++++++++++++++- 3 files changed, 230 insertions(+), 20 deletions(-) diff --git a/backends/xnnpack/_passes/channels_last_tagged_reshape_pass.py b/backends/xnnpack/_passes/channels_last_tagged_reshape_pass.py index cbd16f7de08..80d47b4630e 100644 --- a/backends/xnnpack/_passes/channels_last_tagged_reshape_pass.py +++ b/backends/xnnpack/_passes/channels_last_tagged_reshape_pass.py @@ -283,7 +283,7 @@ def input_to_nhwc( ] else: # Need to create NHWC node - # TODO: If input is dequant does that it's from dynamic quantization? + # TODO: Replace with check to determine if dynamic quant input_is_dequant = is_dequant(input_node) if input_is_dequant: @@ -301,10 +301,8 @@ def input_to_nhwc( if input_is_dequant: # Replace downstream input_nodes with NHWC node - for user in list(input_node.users): - if user is not input_node_nhwc: - user.replace_input_with(input_node, input_node_nhwc) - graph_module.recompile() + input_node.replace_all_uses_with(input_node_nhwc) + input_node_nhwc.args = (input_node,) self.insert_copy_and_assign_partner_nodes_quantization_sensitive( graph_module=graph_module, diff --git a/backends/xnnpack/test/ops/test_conv2d.py b/backends/xnnpack/test/ops/test_conv2d.py index 77e1f0b3f89..a33c9989770 100644 --- a/backends/xnnpack/test/ops/test_conv2d.py +++ b/backends/xnnpack/test/ops/test_conv2d.py @@ -244,7 +244,7 @@ def _test_dq_conv2d( DynamicallyQuantizedPartitioner = XnnpackPartitioner( config_precisions=ConfigPrecisionType.DYNAMIC_QUANT, - per_op_mode=False, + per_op_mode=True, ) tester = Tester(m, inputs, dynamic_shapes=dynamic_shapes) @@ -762,9 +762,11 @@ def get_inputs(self): return (torch.randn(1, 3, 8, 8),) model = SimpleConv2d() + inputs = model.get_inputs() + self._test_dq_conv2d( model, - model.get_inputs(), + inputs, dynamic_shapes=None, atol=3.0, ) diff --git a/extension/llm/custom_ops/spinquant/third-party/FFHT/Makefile b/extension/llm/custom_ops/spinquant/third-party/FFHT/Makefile index 7cbeb3ddae9..b06ca7909ae 100644 --- a/extension/llm/custom_ops/spinquant/third-party/FFHT/Makefile +++ b/extension/llm/custom_ops/spinquant/third-party/FFHT/Makefile @@ -1,21 +1,231 @@ -CC = gcc -CFLAGS = -O3 -march=native -std=c99 -pedantic -Wall -Wextra -Wshadow -Wpointer-arith -Wcast-qual -Wstrict-prototypes -Wmissing-prototypes +# CMAKE generated file: DO NOT EDIT! +# Generated by "Unix Makefiles" Generator, CMake Version 3.31 -all: test_float test_double fast_copy.o fht.o +# Default target executed when no arguments are given to make. +default_target: all +.PHONY : default_target -OBJ := dumb_fht.o fast_copy.o fht.o +# Allow only one "make -f Makefile2" at a time, but pass parallelism. +.NOTPARALLEL: -%.o: %.c - $(CC) $< -o $@ -c $(CFLAGS) +#============================================================================= +# Special targets provided by cmake. -test_%: test_%.c $(OBJ) - $(CC) $< $(OBJ) -o $@ $(CFLAGS) +# Disable implicit rules so canonical targets will work. +.SUFFIXES: -test_double_header_only: test_double_header_only.c - $(CC) $< -o $@ $(CFLAGS) +# Disable VCS-based implicit rules. +% : %,v -test_float_header_only: test_double_header_only.c - $(CC) $< -o $@ $(CFLAGS) +# Disable VCS-based implicit rules. +% : RCS/% +# Disable VCS-based implicit rules. +% : RCS/%,v + +# Disable VCS-based implicit rules. +% : SCCS/s.% + +# Disable VCS-based implicit rules. +% : s.% + +.SUFFIXES: .hpux_make_needs_suffix_list + +# Command-line flag to silence nested $(MAKE). +$(VERBOSE)MAKESILENT = -s + +#Suppress display of executed commands. +$(VERBOSE).SILENT: + +# A target that is always out of date. +cmake_force: +.PHONY : cmake_force + +#============================================================================= +# Set environment variables for the build. + +# The shell in which to execute make rules. +SHELL = /bin/sh + +# The CMake executable. +CMAKE_COMMAND = /opt/anaconda3/envs/executorch/lib/python3.10/site-packages/cmake/data/bin/cmake + +# The command to remove a file. +RM = /opt/anaconda3/envs/executorch/lib/python3.10/site-packages/cmake/data/bin/cmake -E rm -f + +# Escaping for special characters. +EQUALS = = + +# The top-level source directory on which CMake was run. +CMAKE_SOURCE_DIR = /Users/zuby/PycharmProjects/src/executorch + +# The top-level build directory on which CMake was run. +CMAKE_BINARY_DIR = /Users/zuby/PycharmProjects/src/executorch + +#============================================================================= +# Targets provided globally by CMake. + +# Special rule for the target edit_cache +edit_cache: + @$(CMAKE_COMMAND) -E cmake_echo_color "--switch=$(COLOR)" --cyan "Running CMake cache editor..." + /opt/anaconda3/envs/executorch/lib/python3.10/site-packages/cmake/data/bin/ccmake -S$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) +.PHONY : edit_cache + +# Special rule for the target edit_cache +edit_cache/fast: edit_cache +.PHONY : edit_cache/fast + +# Special rule for the target rebuild_cache +rebuild_cache: + @$(CMAKE_COMMAND) -E cmake_echo_color "--switch=$(COLOR)" --cyan "Running CMake to regenerate build system..." + /opt/anaconda3/envs/executorch/lib/python3.10/site-packages/cmake/data/bin/cmake --regenerate-during-build -S$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) +.PHONY : rebuild_cache + +# Special rule for the target rebuild_cache +rebuild_cache/fast: rebuild_cache +.PHONY : rebuild_cache/fast + +# Special rule for the target list_install_components +list_install_components: + @$(CMAKE_COMMAND) -E cmake_echo_color "--switch=$(COLOR)" --cyan "Available install components are: \"Unspecified\"" +.PHONY : list_install_components + +# Special rule for the target list_install_components +list_install_components/fast: list_install_components +.PHONY : list_install_components/fast + +# Special rule for the target install +install: preinstall + @$(CMAKE_COMMAND) -E cmake_echo_color "--switch=$(COLOR)" --cyan "Install the project..." + /opt/anaconda3/envs/executorch/lib/python3.10/site-packages/cmake/data/bin/cmake -P cmake_install.cmake +.PHONY : install + +# Special rule for the target install +install/fast: preinstall/fast + @$(CMAKE_COMMAND) -E cmake_echo_color "--switch=$(COLOR)" --cyan "Install the project..." + /opt/anaconda3/envs/executorch/lib/python3.10/site-packages/cmake/data/bin/cmake -P cmake_install.cmake +.PHONY : install/fast + +# Special rule for the target install/local +install/local: preinstall + @$(CMAKE_COMMAND) -E cmake_echo_color "--switch=$(COLOR)" --cyan "Installing only the local directory..." + /opt/anaconda3/envs/executorch/lib/python3.10/site-packages/cmake/data/bin/cmake -DCMAKE_INSTALL_LOCAL_ONLY=1 -P cmake_install.cmake +.PHONY : install/local + +# Special rule for the target install/local +install/local/fast: preinstall/fast + @$(CMAKE_COMMAND) -E cmake_echo_color "--switch=$(COLOR)" --cyan "Installing only the local directory..." + /opt/anaconda3/envs/executorch/lib/python3.10/site-packages/cmake/data/bin/cmake -DCMAKE_INSTALL_LOCAL_ONLY=1 -P cmake_install.cmake +.PHONY : install/local/fast + +# Special rule for the target install/strip +install/strip: preinstall + @$(CMAKE_COMMAND) -E cmake_echo_color "--switch=$(COLOR)" --cyan "Installing the project stripped..." + /opt/anaconda3/envs/executorch/lib/python3.10/site-packages/cmake/data/bin/cmake -DCMAKE_INSTALL_DO_STRIP=1 -P cmake_install.cmake +.PHONY : install/strip + +# Special rule for the target install/strip +install/strip/fast: preinstall/fast + @$(CMAKE_COMMAND) -E cmake_echo_color "--switch=$(COLOR)" --cyan "Installing the project stripped..." + /opt/anaconda3/envs/executorch/lib/python3.10/site-packages/cmake/data/bin/cmake -DCMAKE_INSTALL_DO_STRIP=1 -P cmake_install.cmake +.PHONY : install/strip/fast + +# The main all target +all: cmake_check_build_system + cd /Users/zuby/PycharmProjects/src/executorch && $(CMAKE_COMMAND) -E cmake_progress_start /Users/zuby/PycharmProjects/src/executorch/CMakeFiles /Users/zuby/PycharmProjects/src/executorch/extension/llm/custom_ops/spinquant/third-party/FFHT//CMakeFiles/progress.marks + cd /Users/zuby/PycharmProjects/src/executorch && $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 extension/llm/custom_ops/spinquant/third-party/FFHT/all + $(CMAKE_COMMAND) -E cmake_progress_start /Users/zuby/PycharmProjects/src/executorch/CMakeFiles 0 +.PHONY : all + +# The main clean target clean: - rm -f test_float test_double test_float_header_only test_double_header_only $(OBJ) + cd /Users/zuby/PycharmProjects/src/executorch && $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 extension/llm/custom_ops/spinquant/third-party/FFHT/clean +.PHONY : clean + +# The main clean target +clean/fast: clean +.PHONY : clean/fast + +# Prepare targets for installation. +preinstall: all + cd /Users/zuby/PycharmProjects/src/executorch && $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 extension/llm/custom_ops/spinquant/third-party/FFHT/preinstall +.PHONY : preinstall + +# Prepare targets for installation. +preinstall/fast: + cd /Users/zuby/PycharmProjects/src/executorch && $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 extension/llm/custom_ops/spinquant/third-party/FFHT/preinstall +.PHONY : preinstall/fast + +# clear depends +depend: + cd /Users/zuby/PycharmProjects/src/executorch && $(CMAKE_COMMAND) -S$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) --check-build-system CMakeFiles/Makefile.cmake 1 +.PHONY : depend + +# Convenience name for target. +extension/llm/custom_ops/spinquant/third-party/FFHT/CMakeFiles/dumb_fht.dir/rule: + cd /Users/zuby/PycharmProjects/src/executorch && $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 extension/llm/custom_ops/spinquant/third-party/FFHT/CMakeFiles/dumb_fht.dir/rule +.PHONY : extension/llm/custom_ops/spinquant/third-party/FFHT/CMakeFiles/dumb_fht.dir/rule + +# Convenience name for target. +dumb_fht: extension/llm/custom_ops/spinquant/third-party/FFHT/CMakeFiles/dumb_fht.dir/rule +.PHONY : dumb_fht + +# fast build rule for target. +dumb_fht/fast: + cd /Users/zuby/PycharmProjects/src/executorch && $(MAKE) $(MAKESILENT) -f extension/llm/custom_ops/spinquant/third-party/FFHT/CMakeFiles/dumb_fht.dir/build.make extension/llm/custom_ops/spinquant/third-party/FFHT/CMakeFiles/dumb_fht.dir/build +.PHONY : dumb_fht/fast + +dumb_fht.o: dumb_fht.c.o +.PHONY : dumb_fht.o + +# target to build an object file +dumb_fht.c.o: + cd /Users/zuby/PycharmProjects/src/executorch && $(MAKE) $(MAKESILENT) -f extension/llm/custom_ops/spinquant/third-party/FFHT/CMakeFiles/dumb_fht.dir/build.make extension/llm/custom_ops/spinquant/third-party/FFHT/CMakeFiles/dumb_fht.dir/dumb_fht.c.o +.PHONY : dumb_fht.c.o + +dumb_fht.i: dumb_fht.c.i +.PHONY : dumb_fht.i + +# target to preprocess a source file +dumb_fht.c.i: + cd /Users/zuby/PycharmProjects/src/executorch && $(MAKE) $(MAKESILENT) -f extension/llm/custom_ops/spinquant/third-party/FFHT/CMakeFiles/dumb_fht.dir/build.make extension/llm/custom_ops/spinquant/third-party/FFHT/CMakeFiles/dumb_fht.dir/dumb_fht.c.i +.PHONY : dumb_fht.c.i + +dumb_fht.s: dumb_fht.c.s +.PHONY : dumb_fht.s + +# target to generate assembly for a file +dumb_fht.c.s: + cd /Users/zuby/PycharmProjects/src/executorch && $(MAKE) $(MAKESILENT) -f extension/llm/custom_ops/spinquant/third-party/FFHT/CMakeFiles/dumb_fht.dir/build.make extension/llm/custom_ops/spinquant/third-party/FFHT/CMakeFiles/dumb_fht.dir/dumb_fht.c.s +.PHONY : dumb_fht.c.s + +# Help Target +help: + @echo "The following are some of the valid targets for this Makefile:" + @echo "... all (the default if no target is provided)" + @echo "... clean" + @echo "... depend" + @echo "... edit_cache" + @echo "... install" + @echo "... install/local" + @echo "... install/strip" + @echo "... list_install_components" + @echo "... rebuild_cache" + @echo "... dumb_fht" + @echo "... dumb_fht.o" + @echo "... dumb_fht.i" + @echo "... dumb_fht.s" +.PHONY : help + + + +#============================================================================= +# Special targets to cleanup operation of make. + +# Special rule to run CMake to check the build system integrity. +# No rule that depends on this can have commands that come from listfiles +# because they might be regenerated. +cmake_check_build_system: + cd /Users/zuby/PycharmProjects/src/executorch && $(CMAKE_COMMAND) -S$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) --check-build-system CMakeFiles/Makefile.cmake 0 +.PHONY : cmake_check_build_system + From 4112c6a11fe2e2f9a7040072434732afc435c3d9 Mon Sep 17 00:00:00 2001 From: Zuby Afzal Date: Mon, 14 Apr 2025 19:38:22 -0700 Subject: [PATCH 09/24] Remove cmake file --- .../spinquant/third-party/FFHT/Makefile | 236 +----------------- 1 file changed, 13 insertions(+), 223 deletions(-) diff --git a/extension/llm/custom_ops/spinquant/third-party/FFHT/Makefile b/extension/llm/custom_ops/spinquant/third-party/FFHT/Makefile index b06ca7909ae..7cbeb3ddae9 100644 --- a/extension/llm/custom_ops/spinquant/third-party/FFHT/Makefile +++ b/extension/llm/custom_ops/spinquant/third-party/FFHT/Makefile @@ -1,231 +1,21 @@ -# CMAKE generated file: DO NOT EDIT! -# Generated by "Unix Makefiles" Generator, CMake Version 3.31 +CC = gcc +CFLAGS = -O3 -march=native -std=c99 -pedantic -Wall -Wextra -Wshadow -Wpointer-arith -Wcast-qual -Wstrict-prototypes -Wmissing-prototypes -# Default target executed when no arguments are given to make. -default_target: all -.PHONY : default_target +all: test_float test_double fast_copy.o fht.o -# Allow only one "make -f Makefile2" at a time, but pass parallelism. -.NOTPARALLEL: +OBJ := dumb_fht.o fast_copy.o fht.o -#============================================================================= -# Special targets provided by cmake. +%.o: %.c + $(CC) $< -o $@ -c $(CFLAGS) -# Disable implicit rules so canonical targets will work. -.SUFFIXES: +test_%: test_%.c $(OBJ) + $(CC) $< $(OBJ) -o $@ $(CFLAGS) -# Disable VCS-based implicit rules. -% : %,v +test_double_header_only: test_double_header_only.c + $(CC) $< -o $@ $(CFLAGS) -# Disable VCS-based implicit rules. -% : RCS/% +test_float_header_only: test_double_header_only.c + $(CC) $< -o $@ $(CFLAGS) -# Disable VCS-based implicit rules. -% : RCS/%,v - -# Disable VCS-based implicit rules. -% : SCCS/s.% - -# Disable VCS-based implicit rules. -% : s.% - -.SUFFIXES: .hpux_make_needs_suffix_list - -# Command-line flag to silence nested $(MAKE). -$(VERBOSE)MAKESILENT = -s - -#Suppress display of executed commands. -$(VERBOSE).SILENT: - -# A target that is always out of date. -cmake_force: -.PHONY : cmake_force - -#============================================================================= -# Set environment variables for the build. - -# The shell in which to execute make rules. -SHELL = /bin/sh - -# The CMake executable. -CMAKE_COMMAND = /opt/anaconda3/envs/executorch/lib/python3.10/site-packages/cmake/data/bin/cmake - -# The command to remove a file. -RM = /opt/anaconda3/envs/executorch/lib/python3.10/site-packages/cmake/data/bin/cmake -E rm -f - -# Escaping for special characters. -EQUALS = = - -# The top-level source directory on which CMake was run. -CMAKE_SOURCE_DIR = /Users/zuby/PycharmProjects/src/executorch - -# The top-level build directory on which CMake was run. -CMAKE_BINARY_DIR = /Users/zuby/PycharmProjects/src/executorch - -#============================================================================= -# Targets provided globally by CMake. - -# Special rule for the target edit_cache -edit_cache: - @$(CMAKE_COMMAND) -E cmake_echo_color "--switch=$(COLOR)" --cyan "Running CMake cache editor..." - /opt/anaconda3/envs/executorch/lib/python3.10/site-packages/cmake/data/bin/ccmake -S$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) -.PHONY : edit_cache - -# Special rule for the target edit_cache -edit_cache/fast: edit_cache -.PHONY : edit_cache/fast - -# Special rule for the target rebuild_cache -rebuild_cache: - @$(CMAKE_COMMAND) -E cmake_echo_color "--switch=$(COLOR)" --cyan "Running CMake to regenerate build system..." - /opt/anaconda3/envs/executorch/lib/python3.10/site-packages/cmake/data/bin/cmake --regenerate-during-build -S$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) -.PHONY : rebuild_cache - -# Special rule for the target rebuild_cache -rebuild_cache/fast: rebuild_cache -.PHONY : rebuild_cache/fast - -# Special rule for the target list_install_components -list_install_components: - @$(CMAKE_COMMAND) -E cmake_echo_color "--switch=$(COLOR)" --cyan "Available install components are: \"Unspecified\"" -.PHONY : list_install_components - -# Special rule for the target list_install_components -list_install_components/fast: list_install_components -.PHONY : list_install_components/fast - -# Special rule for the target install -install: preinstall - @$(CMAKE_COMMAND) -E cmake_echo_color "--switch=$(COLOR)" --cyan "Install the project..." - /opt/anaconda3/envs/executorch/lib/python3.10/site-packages/cmake/data/bin/cmake -P cmake_install.cmake -.PHONY : install - -# Special rule for the target install -install/fast: preinstall/fast - @$(CMAKE_COMMAND) -E cmake_echo_color "--switch=$(COLOR)" --cyan "Install the project..." - /opt/anaconda3/envs/executorch/lib/python3.10/site-packages/cmake/data/bin/cmake -P cmake_install.cmake -.PHONY : install/fast - -# Special rule for the target install/local -install/local: preinstall - @$(CMAKE_COMMAND) -E cmake_echo_color "--switch=$(COLOR)" --cyan "Installing only the local directory..." - /opt/anaconda3/envs/executorch/lib/python3.10/site-packages/cmake/data/bin/cmake -DCMAKE_INSTALL_LOCAL_ONLY=1 -P cmake_install.cmake -.PHONY : install/local - -# Special rule for the target install/local -install/local/fast: preinstall/fast - @$(CMAKE_COMMAND) -E cmake_echo_color "--switch=$(COLOR)" --cyan "Installing only the local directory..." - /opt/anaconda3/envs/executorch/lib/python3.10/site-packages/cmake/data/bin/cmake -DCMAKE_INSTALL_LOCAL_ONLY=1 -P cmake_install.cmake -.PHONY : install/local/fast - -# Special rule for the target install/strip -install/strip: preinstall - @$(CMAKE_COMMAND) -E cmake_echo_color "--switch=$(COLOR)" --cyan "Installing the project stripped..." - /opt/anaconda3/envs/executorch/lib/python3.10/site-packages/cmake/data/bin/cmake -DCMAKE_INSTALL_DO_STRIP=1 -P cmake_install.cmake -.PHONY : install/strip - -# Special rule for the target install/strip -install/strip/fast: preinstall/fast - @$(CMAKE_COMMAND) -E cmake_echo_color "--switch=$(COLOR)" --cyan "Installing the project stripped..." - /opt/anaconda3/envs/executorch/lib/python3.10/site-packages/cmake/data/bin/cmake -DCMAKE_INSTALL_DO_STRIP=1 -P cmake_install.cmake -.PHONY : install/strip/fast - -# The main all target -all: cmake_check_build_system - cd /Users/zuby/PycharmProjects/src/executorch && $(CMAKE_COMMAND) -E cmake_progress_start /Users/zuby/PycharmProjects/src/executorch/CMakeFiles /Users/zuby/PycharmProjects/src/executorch/extension/llm/custom_ops/spinquant/third-party/FFHT//CMakeFiles/progress.marks - cd /Users/zuby/PycharmProjects/src/executorch && $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 extension/llm/custom_ops/spinquant/third-party/FFHT/all - $(CMAKE_COMMAND) -E cmake_progress_start /Users/zuby/PycharmProjects/src/executorch/CMakeFiles 0 -.PHONY : all - -# The main clean target clean: - cd /Users/zuby/PycharmProjects/src/executorch && $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 extension/llm/custom_ops/spinquant/third-party/FFHT/clean -.PHONY : clean - -# The main clean target -clean/fast: clean -.PHONY : clean/fast - -# Prepare targets for installation. -preinstall: all - cd /Users/zuby/PycharmProjects/src/executorch && $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 extension/llm/custom_ops/spinquant/third-party/FFHT/preinstall -.PHONY : preinstall - -# Prepare targets for installation. -preinstall/fast: - cd /Users/zuby/PycharmProjects/src/executorch && $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 extension/llm/custom_ops/spinquant/third-party/FFHT/preinstall -.PHONY : preinstall/fast - -# clear depends -depend: - cd /Users/zuby/PycharmProjects/src/executorch && $(CMAKE_COMMAND) -S$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) --check-build-system CMakeFiles/Makefile.cmake 1 -.PHONY : depend - -# Convenience name for target. -extension/llm/custom_ops/spinquant/third-party/FFHT/CMakeFiles/dumb_fht.dir/rule: - cd /Users/zuby/PycharmProjects/src/executorch && $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 extension/llm/custom_ops/spinquant/third-party/FFHT/CMakeFiles/dumb_fht.dir/rule -.PHONY : extension/llm/custom_ops/spinquant/third-party/FFHT/CMakeFiles/dumb_fht.dir/rule - -# Convenience name for target. -dumb_fht: extension/llm/custom_ops/spinquant/third-party/FFHT/CMakeFiles/dumb_fht.dir/rule -.PHONY : dumb_fht - -# fast build rule for target. -dumb_fht/fast: - cd /Users/zuby/PycharmProjects/src/executorch && $(MAKE) $(MAKESILENT) -f extension/llm/custom_ops/spinquant/third-party/FFHT/CMakeFiles/dumb_fht.dir/build.make extension/llm/custom_ops/spinquant/third-party/FFHT/CMakeFiles/dumb_fht.dir/build -.PHONY : dumb_fht/fast - -dumb_fht.o: dumb_fht.c.o -.PHONY : dumb_fht.o - -# target to build an object file -dumb_fht.c.o: - cd /Users/zuby/PycharmProjects/src/executorch && $(MAKE) $(MAKESILENT) -f extension/llm/custom_ops/spinquant/third-party/FFHT/CMakeFiles/dumb_fht.dir/build.make extension/llm/custom_ops/spinquant/third-party/FFHT/CMakeFiles/dumb_fht.dir/dumb_fht.c.o -.PHONY : dumb_fht.c.o - -dumb_fht.i: dumb_fht.c.i -.PHONY : dumb_fht.i - -# target to preprocess a source file -dumb_fht.c.i: - cd /Users/zuby/PycharmProjects/src/executorch && $(MAKE) $(MAKESILENT) -f extension/llm/custom_ops/spinquant/third-party/FFHT/CMakeFiles/dumb_fht.dir/build.make extension/llm/custom_ops/spinquant/third-party/FFHT/CMakeFiles/dumb_fht.dir/dumb_fht.c.i -.PHONY : dumb_fht.c.i - -dumb_fht.s: dumb_fht.c.s -.PHONY : dumb_fht.s - -# target to generate assembly for a file -dumb_fht.c.s: - cd /Users/zuby/PycharmProjects/src/executorch && $(MAKE) $(MAKESILENT) -f extension/llm/custom_ops/spinquant/third-party/FFHT/CMakeFiles/dumb_fht.dir/build.make extension/llm/custom_ops/spinquant/third-party/FFHT/CMakeFiles/dumb_fht.dir/dumb_fht.c.s -.PHONY : dumb_fht.c.s - -# Help Target -help: - @echo "The following are some of the valid targets for this Makefile:" - @echo "... all (the default if no target is provided)" - @echo "... clean" - @echo "... depend" - @echo "... edit_cache" - @echo "... install" - @echo "... install/local" - @echo "... install/strip" - @echo "... list_install_components" - @echo "... rebuild_cache" - @echo "... dumb_fht" - @echo "... dumb_fht.o" - @echo "... dumb_fht.i" - @echo "... dumb_fht.s" -.PHONY : help - - - -#============================================================================= -# Special targets to cleanup operation of make. - -# Special rule to run CMake to check the build system integrity. -# No rule that depends on this can have commands that come from listfiles -# because they might be regenerated. -cmake_check_build_system: - cd /Users/zuby/PycharmProjects/src/executorch && $(CMAKE_COMMAND) -S$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) --check-build-system CMakeFiles/Makefile.cmake 0 -.PHONY : cmake_check_build_system - + rm -f test_float test_double test_float_header_only test_double_header_only $(OBJ) From cdd6f2df68908350fca0f0eabdfcdaefa02bb227 Mon Sep 17 00:00:00 2001 From: Zuby Afzal Date: Tue, 15 Apr 2025 18:31:05 -0700 Subject: [PATCH 10/24] Restore original supported conv2d operators --- backends/xnnpack/quantizer/xnnpack_quantizer.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/backends/xnnpack/quantizer/xnnpack_quantizer.py b/backends/xnnpack/quantizer/xnnpack_quantizer.py index 9e24d7b0030..fdabd0383e6 100644 --- a/backends/xnnpack/quantizer/xnnpack_quantizer.py +++ b/backends/xnnpack/quantizer/xnnpack_quantizer.py @@ -71,10 +71,8 @@ def _supported_symmetric_quantized_operators() -> dict[str, list[OperatorPattern "conv2d": [ [torch.nn.Conv2d, torch.nn.ReLU], [torch.nn.Conv2d, F.relu], - [torch.nn.Conv2d], [F.conv2d, torch.nn.ReLU], [F.conv2d, F.relu], - [F.conv2d], ], "linear": [[torch.nn.Linear], [F.linear]], "add": [[torch.add]], From 7150872bdec7102257366e246359f7610ccdefe1 Mon Sep 17 00:00:00 2001 From: Zuby Afzal Date: Tue, 15 Apr 2025 18:34:05 -0700 Subject: [PATCH 11/24] Add dynamic quant check before NHWC permute --- .../_passes/channels_last_tagged_reshape_pass.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/backends/xnnpack/_passes/channels_last_tagged_reshape_pass.py b/backends/xnnpack/_passes/channels_last_tagged_reshape_pass.py index 80d47b4630e..1a52827418d 100644 --- a/backends/xnnpack/_passes/channels_last_tagged_reshape_pass.py +++ b/backends/xnnpack/_passes/channels_last_tagged_reshape_pass.py @@ -9,7 +9,7 @@ import torch from executorch.backends.xnnpack._passes.xnnpack_pass import XNNPACKPass from executorch.backends.xnnpack.utils.utils import is_param_node -from executorch.backends.xnnpack.utils.quant_utils import is_dequant +from executorch.backends.xnnpack.utils.quant_utils import is_dynamic_qdq from executorch.exir.dialects._ops import ops as exir_ops from executorch.exir.pass_base import PassResult @@ -283,11 +283,11 @@ def input_to_nhwc( ] else: # Need to create NHWC node - # TODO: Replace with check to determine if dynamic quant - input_is_dequant = is_dequant(input_node) + # Check if input uses dynamic quantization + is_dynamic_input = is_dynamic_qdq(input_node) - if input_is_dequant: - # Trace back to find original source node + if is_dynamic_input: + # Trace back to original source node while getattr(input_node, "args", None): input_node = input_node.args[0] @@ -299,7 +299,7 @@ def input_to_nhwc( memory_format=torch.channels_last, ) - if input_is_dequant: + if is_dynamic_input: # Replace downstream input_nodes with NHWC node input_node.replace_all_uses_with(input_node_nhwc) input_node_nhwc.args = (input_node,) From 6b44c4bbec825cd4a1626e8c5c8336dcd00407d9 Mon Sep 17 00:00:00 2001 From: Zuby Afzal Date: Tue, 15 Apr 2025 19:24:51 -0700 Subject: [PATCH 12/24] Refactor dq conv2d test --- backends/xnnpack/test/ops/test_conv2d.py | 55 +++++++++--------------- 1 file changed, 21 insertions(+), 34 deletions(-) diff --git a/backends/xnnpack/test/ops/test_conv2d.py b/backends/xnnpack/test/ops/test_conv2d.py index a33c9989770..20a668ed623 100644 --- a/backends/xnnpack/test/ops/test_conv2d.py +++ b/backends/xnnpack/test/ops/test_conv2d.py @@ -176,6 +176,20 @@ def get_inputs(self): return (torch.randn(2, 2, 4, 4),) +class DQConv2d(torch.nn.Module): + def __init__(self): + super().__init__() + self.conv = torch.nn.Conv2d(3, 10, 3) + self.conv.weight.requires_grad = False + self.conv.bias.requires_grad = False + + def forward(self, x): + return self.conv(x) + + def get_inputs(self): + return (torch.randn(1, 3, 8, 8),) + + class TestConv2d(unittest.TestCase): def setUp(self): torch._dynamo.reset() @@ -230,12 +244,11 @@ def _test( .run_method_and_compare_outputs(qtol=1) ) - def _test_dq_conv2d( + def _test_dq( self, m: torch.nn.Module, inputs, dynamic_shapes, - atol=5e-02, ): quant_config = get_symmetric_quantization_config( is_per_channel=True, @@ -250,21 +263,15 @@ def _test_dq_conv2d( tester = Tester(m, inputs, dynamic_shapes=dynamic_shapes) tester.quantize(Quantize(quantization_config=quant_config)) tester.export() - tester.check(["torch.ops.quantized_decomposed.choose_qparams"]) - tester.to_edge_transform_and_lower( ToEdgeTransformAndLower([DynamicallyQuantizedPartitioner]) ) - tester.check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) tester.check_not(["executorch_exir_dialects_edge__ops_aten_conv2d_default"]) - tester.to_executorch() - # tester.serialize() - tester.serialize().dump_artifact("conv2d.pte") - - tester.run_method_and_compare_outputs(atol=atol) + tester.serialize() + tester.run_method_and_compare_outputs(qtol=1) def test_fp16_conv2d(self) -> None: for transpose in (True, False): @@ -743,30 +750,10 @@ def forward(self, x): .run_method_and_compare_outputs(qtol=1) ) - def test_dq_conv2d(self) -> None: - class SimpleConv2d(torch.nn.Module): - def __init__(self): - super().__init__() - self.conv = torch.nn.Conv2d( - 3, - 10, - 3, - ) - self.conv.weight.requires_grad = False - self.conv.bias.requires_grad = False - - def forward(self, x): - return self.conv(x) - - def get_inputs(self): - return (torch.randn(1, 3, 8, 8),) - - model = SimpleConv2d() - inputs = model.get_inputs() - - self._test_dq_conv2d( + def test_qs8_dq_conv2d(self) -> None: + model = DQConv2d() + self._test_dq( model, - inputs, + model.get_inputs(), dynamic_shapes=None, - atol=3.0, ) From 7054f2ef59375878a4edbbfb4cbfbf808f11c570 Mon Sep 17 00:00:00 2001 From: Zuby Afzal Date: Tue, 15 Apr 2025 19:31:54 -0700 Subject: [PATCH 13/24] Revert formatting --- backends/xnnpack/quantizer/xnnpack_quantizer_utils.py | 1 - 1 file changed, 1 deletion(-) diff --git a/backends/xnnpack/quantizer/xnnpack_quantizer_utils.py b/backends/xnnpack/quantizer/xnnpack_quantizer_utils.py index 91babc26cc9..ce459806c6e 100644 --- a/backends/xnnpack/quantizer/xnnpack_quantizer_utils.py +++ b/backends/xnnpack/quantizer/xnnpack_quantizer_utils.py @@ -304,7 +304,6 @@ def _do_annotate_conv( for n in gm.graph.nodes: if not is_conv_node(n): continue - conv_node = n # This is hacky! From fc48e03b088399e7e80f58847f8ff2990e8b3b84 Mon Sep 17 00:00:00 2001 From: Zuby Afzal Date: Tue, 15 Apr 2025 20:21:51 -0700 Subject: [PATCH 14/24] Add check to only annotate dq conv2d --- .../xnnpack/quantizer/xnnpack_quantizer_utils.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/backends/xnnpack/quantizer/xnnpack_quantizer_utils.py b/backends/xnnpack/quantizer/xnnpack_quantizer_utils.py index ce459806c6e..2c2c074815e 100644 --- a/backends/xnnpack/quantizer/xnnpack_quantizer_utils.py +++ b/backends/xnnpack/quantizer/xnnpack_quantizer_utils.py @@ -42,6 +42,8 @@ "propagate_annotation", ] +from pytorch.ao.test.dtypes.test_bitpacking import dimensions + # In the absence of better name, just winging it with QuantizationConfig @dataclass(eq=True, frozen=True) @@ -323,6 +325,17 @@ def _do_annotate_conv( assert isinstance(weight, Node) input_qspec_map[weight] = get_weight_qspec(quantization_config) + # Only annotate dynamically quantized conv if it's 2D + if ( + quantization_config + and quantization_config.input_activation + and quantization_config.input_activation.is_dynamic + ): + weight_val = weight.meta.get("val", None) + weight_shape = getattr(weight_val, "shape", None) + if weight_shape is not None and len(weight_shape) != 4: + continue + # adding weight node to the partition as well partition = [conv_node, conv_node.args[1]] From 84b3634cfaab3859463e067e7f0b8b902b8db153 Mon Sep 17 00:00:00 2001 From: Zuby Afzal Date: Tue, 15 Apr 2025 20:24:35 -0700 Subject: [PATCH 15/24] Remove unused import --- backends/xnnpack/quantizer/xnnpack_quantizer_utils.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/backends/xnnpack/quantizer/xnnpack_quantizer_utils.py b/backends/xnnpack/quantizer/xnnpack_quantizer_utils.py index 2c2c074815e..f400f51a464 100644 --- a/backends/xnnpack/quantizer/xnnpack_quantizer_utils.py +++ b/backends/xnnpack/quantizer/xnnpack_quantizer_utils.py @@ -42,8 +42,6 @@ "propagate_annotation", ] -from pytorch.ao.test.dtypes.test_bitpacking import dimensions - # In the absence of better name, just winging it with QuantizationConfig @dataclass(eq=True, frozen=True) From 62e30e574cff026a7ac043c89a697f74af809242 Mon Sep 17 00:00:00 2001 From: Zuby Afzal Date: Wed, 16 Apr 2025 15:15:24 -0700 Subject: [PATCH 16/24] Add computation for non-batch dims; remove non-batch dims check --- backends/xnnpack/operators/quant_params.py | 7 ++++++- backends/xnnpack/runtime/XNNCompiler.cpp | 5 ----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/backends/xnnpack/operators/quant_params.py b/backends/xnnpack/operators/quant_params.py index e695b151560..ddb9db865f3 100644 --- a/backends/xnnpack/operators/quant_params.py +++ b/backends/xnnpack/operators/quant_params.py @@ -145,9 +145,14 @@ def quantize_tensor(self, tensor: torch.Tensor) -> torch.Tensor: def _from_dynamic_input_node(cls, quant_node: torch.fx.Node) -> QuantParams: q_input = quant_node.args[0] # fp32 input assert isinstance(q_input, torch.fx.Node) - # TODO - materialize this from the quant_node scale count and val shape num_nonbatch_dims = 1 + # Compute non-batch dimensions (shape length - 1), defaulting to 1 + q_input_val = q_input.meta.get("val", None) + q_input_shape = getattr(q_input_val, "shape", None) + if q_input_shape is not None: + num_nonbatch_dims = max(len(q_input_shape) - 1, 1) + return cls( per_channel=False, # True is not valid q_input=q_input, diff --git a/backends/xnnpack/runtime/XNNCompiler.cpp b/backends/xnnpack/runtime/XNNCompiler.cpp index 988dab86ab7..0b187d05df0 100644 --- a/backends/xnnpack/runtime/XNNCompiler.cpp +++ b/backends/xnnpack/runtime/XNNCompiler.cpp @@ -512,11 +512,6 @@ Error defineTensor( buffer_ptr == nullptr, Internal, "Dynamically quantized tensor should not have constant data but found non-nullptr"); - // TODO(T179441835): Dynamic Quantization with num_nonbatch_dims > 1 - ET_CHECK_OR_RETURN_ERROR( - qparams->num_nonbatch_dims() == 1, - Internal, - "Dynamically Quantized Tensors currently only support per token quantization"); status = xnn_define_dynamically_quantized_tensor_value( /*subgraph=*/subgraph_ptr, /*datatype=*/getDataType(tensor_value->datatype()), From 3c7fe328af6c28c3af70c5c41784f098bf7f4706 Mon Sep 17 00:00:00 2001 From: Zuby Afzal Date: Wed, 16 Apr 2025 15:21:21 -0700 Subject: [PATCH 17/24] Refactor test and imports --- .../_passes/channels_last_tagged_reshape_pass.py | 3 ++- backends/xnnpack/test/ops/test_conv2d.py | 11 ++++------- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/backends/xnnpack/_passes/channels_last_tagged_reshape_pass.py b/backends/xnnpack/_passes/channels_last_tagged_reshape_pass.py index 1a52827418d..768df1f4f04 100644 --- a/backends/xnnpack/_passes/channels_last_tagged_reshape_pass.py +++ b/backends/xnnpack/_passes/channels_last_tagged_reshape_pass.py @@ -8,11 +8,12 @@ import torch from executorch.backends.xnnpack._passes.xnnpack_pass import XNNPACKPass -from executorch.backends.xnnpack.utils.utils import is_param_node from executorch.backends.xnnpack.utils.quant_utils import is_dynamic_qdq +from executorch.backends.xnnpack.utils.utils import is_param_node from executorch.exir.dialects._ops import ops as exir_ops from executorch.exir.pass_base import PassResult + # TODO(T151254305) use subgraph_rewriter class ChannelsLastTaggedReshapePass(XNNPACKPass): """ diff --git a/backends/xnnpack/test/ops/test_conv2d.py b/backends/xnnpack/test/ops/test_conv2d.py index 20a668ed623..0c456ebc21a 100644 --- a/backends/xnnpack/test/ops/test_conv2d.py +++ b/backends/xnnpack/test/ops/test_conv2d.py @@ -30,10 +30,7 @@ ) from executorch.backends.xnnpack.test.test_xnnpack_utils import randomize_bn from executorch.backends.xnnpack.test.tester import Quantize, Tester -from executorch.backends.xnnpack.test.tester.tester import ( - Partition, - ToEdgeTransformAndLower, -) +from executorch.backends.xnnpack.test.tester.tester import ToEdgeTransformAndLower from executorch.exir.dialects._ops import ops as exir_ops @@ -176,7 +173,7 @@ def get_inputs(self): return (torch.randn(2, 2, 4, 4),) -class DQConv2d(torch.nn.Module): +class Conv2dDynamicQuant(torch.nn.Module): def __init__(self): super().__init__() self.conv = torch.nn.Conv2d(3, 10, 3) @@ -750,8 +747,8 @@ def forward(self, x): .run_method_and_compare_outputs(qtol=1) ) - def test_qs8_dq_conv2d(self) -> None: - model = DQConv2d() + def test_dq_conv2d(self) -> None: + model = Conv2dDynamicQuant() self._test_dq( model, model.get_inputs(), From 064671b2c5c5b97804b3042d3eb629e181291d2a Mon Sep 17 00:00:00 2001 From: Zuby Afzal Date: Wed, 16 Apr 2025 15:35:32 -0700 Subject: [PATCH 18/24] Update comments --- backends/xnnpack/quantizer/xnnpack_quantizer_utils.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/backends/xnnpack/quantizer/xnnpack_quantizer_utils.py b/backends/xnnpack/quantizer/xnnpack_quantizer_utils.py index f400f51a464..92eff3d0d68 100644 --- a/backends/xnnpack/quantizer/xnnpack_quantizer_utils.py +++ b/backends/xnnpack/quantizer/xnnpack_quantizer_utils.py @@ -331,6 +331,8 @@ def _do_annotate_conv( ): weight_val = weight.meta.get("val", None) weight_shape = getattr(weight_val, "shape", None) + + # Skip if not a 4D weight tensor (i.e. not conv2d) if weight_shape is not None and len(weight_shape) != 4: continue From b29030ed21e921424adeb1da25f42243b0a7827a Mon Sep 17 00:00:00 2001 From: Zuby Afzal Date: Sun, 20 Apr 2025 09:52:00 -0700 Subject: [PATCH 19/24] Add unit tests for dynamic quant sequential and parallel convs --- backends/xnnpack/test/ops/test_conv2d.py | 69 +++++++++++++++++++----- 1 file changed, 55 insertions(+), 14 deletions(-) diff --git a/backends/xnnpack/test/ops/test_conv2d.py b/backends/xnnpack/test/ops/test_conv2d.py index 0c456ebc21a..4d1b387b8ec 100644 --- a/backends/xnnpack/test/ops/test_conv2d.py +++ b/backends/xnnpack/test/ops/test_conv2d.py @@ -173,12 +173,10 @@ def get_inputs(self): return (torch.randn(2, 2, 4, 4),) -class Conv2dDynamicQuant(torch.nn.Module): +class Conv2dDQ(torch.nn.Module): def __init__(self): super().__init__() - self.conv = torch.nn.Conv2d(3, 10, 3) - self.conv.weight.requires_grad = False - self.conv.bias.requires_grad = False + self.conv = torch.nn.Conv2d(in_channels=3, out_channels=10, kernel_size=3) def forward(self, x): return self.conv(x) @@ -187,6 +185,43 @@ def get_inputs(self): return (torch.randn(1, 3, 8, 8),) +class Conv2dDQSeq(torch.nn.Module): + def __init__(self): + super().__init__() + self.first = torch.nn.Conv2d( + in_channels=3, out_channels=8, kernel_size=3, padding=1 + ) + self.second = torch.nn.Conv2d( + in_channels=8, out_channels=10, kernel_size=3, padding=1 + ) + + def forward(self, x): + y = self.first(x) + return self.second(y) + + def get_inputs(self): + return (torch.randn(1, 3, 8, 8),) + + +class Conv2dDQParallel(torch.nn.Module): + def __init__(self): + super().__init__() + self.first = torch.nn.Conv2d( + in_channels=3, out_channels=8, kernel_size=3, padding=1 + ) + self.second = torch.nn.Conv2d( + in_channels=3, out_channels=10, kernel_size=3, padding=1 + ) + + def forward(self, x): + first = self.first(x) + second = self.second(x) + return first, second + + def get_inputs(self): + return (torch.randn(1, 3, 8, 8),) + + class TestConv2d(unittest.TestCase): def setUp(self): torch._dynamo.reset() @@ -244,8 +279,8 @@ def _test( def _test_dq( self, m: torch.nn.Module, - inputs, - dynamic_shapes, + conv_count=1, + dynamic_shapes=None, ): quant_config = get_symmetric_quantization_config( is_per_channel=True, @@ -257,14 +292,16 @@ def _test_dq( per_op_mode=True, ) - tester = Tester(m, inputs, dynamic_shapes=dynamic_shapes) + tester = Tester(m, m.get_inputs(), dynamic_shapes=dynamic_shapes) tester.quantize(Quantize(quantization_config=quant_config)) tester.export() tester.check(["torch.ops.quantized_decomposed.choose_qparams"]) tester.to_edge_transform_and_lower( ToEdgeTransformAndLower([DynamicallyQuantizedPartitioner]) ) - tester.check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) + tester.check_count( + {"torch.ops.higher_order.executorch_call_delegate": conv_count} + ) tester.check_not(["executorch_exir_dialects_edge__ops_aten_conv2d_default"]) tester.to_executorch() tester.serialize() @@ -748,9 +785,13 @@ def forward(self, x): ) def test_dq_conv2d(self) -> None: - model = Conv2dDynamicQuant() - self._test_dq( - model, - model.get_inputs(), - dynamic_shapes=None, - ) + model = Conv2dDQ() + self._test_dq(model) + + def test_dq_conv2d_seq(self) -> None: + model = Conv2dDQSeq() + self._test_dq(model, conv_count=2) + + def test_dq_conv2d_parallel(self) -> None: + model = Conv2dDQParallel() + self._test_dq(model, conv_count=2) From 6da8b7d5414e852d0cb229cae797f3ded0e77bc7 Mon Sep 17 00:00:00 2001 From: Zuby Afzal Date: Sun, 20 Apr 2025 09:55:36 -0700 Subject: [PATCH 20/24] Add unit test for dynamic quant conv2d with channels-last permute --- .../test_channels_last_tagged_reshape.py | 43 ++++++++++++++++++- 1 file changed, 42 insertions(+), 1 deletion(-) diff --git a/backends/xnnpack/test/passes/test_channels_last_tagged_reshape.py b/backends/xnnpack/test/passes/test_channels_last_tagged_reshape.py index 6d60f9d76b5..a00209f4ea6 100644 --- a/backends/xnnpack/test/passes/test_channels_last_tagged_reshape.py +++ b/backends/xnnpack/test/passes/test_channels_last_tagged_reshape.py @@ -10,10 +10,13 @@ from executorch.backends.xnnpack._passes.channels_last_tagged_reshape_pass import ( ChannelsLastTaggedReshapePass, ) +from executorch.backends.xnnpack.quantizer.xnnpack_quantizer import ( + get_symmetric_quantization_config, +) from executorch.backends.xnnpack.test.test_xnnpack_utils_classes import ( OpSequencesAddConv2d, ) -from executorch.backends.xnnpack.test.tester import RunPasses, Tester +from executorch.backends.xnnpack.test.tester import Quantize, RunPasses, Tester class TestChannelsLastTaggedReshapePass(unittest.TestCase): @@ -35,6 +38,10 @@ def setUp(self): dequant_name = "executorch_exir_dialects_edge__ops_quantized_decomposed_dequantize_per_tensor_default" conv_name = "executorch_exir_dialects_edge__ops_aten_convolution_default" relu_name = "executorch_exir_dialects_edge__ops_aten_relu_default" + choose_qparams_name = ( + "executorch_exir_dialects_edge__ops_quantized_decomposed_choose_qparams_tensor" + ) + dynamic_quant_name = "executorch_exir_dialects_edge__ops_quantized_decomposed_quantize_per_tensor_tensor" def test_fp32_channels_last_tagged_reshape_pass(self): for module, num_reshape in self.modules.items(): @@ -179,3 +186,37 @@ def test_fp32_channels_last_tagged_reshape_pass_conv_bn_hardtanh_mean_seq(self): ) .run_method_and_compare_outputs() ) + + class Conv2dDynamicQuant(torch.nn.Module): + def __init__(self): + super().__init__() + self.conv = torch.nn.Conv2d(3, 10, 3) + + def forward(self, x): + return self.conv(x) + + def test_dq_conv2d_channels_last_tagged_reshape_pass(self) -> None: + ( + Tester(self.Conv2dDynamicQuant().eval(), (torch.randn(1, 3, 8, 8),)) + .quantize( + Quantize( + quantization_config=get_symmetric_quantization_config( + is_dynamic=True + ) + ) + ) + .export() + .to_edge() + .run_passes(self.PassStage) + .check( + [ + self.to_copy_name, + self.choose_qparams_name, + self.dynamic_quant_name, + self.dequant_name, + self.conv_name, + self.to_copy_name, + ] + ) + .run_method_and_compare_outputs() + ) From 7c534545344ed2b831e163ca0f4d207e253a7a23 Mon Sep 17 00:00:00 2001 From: Zuby Afzal Date: Sun, 20 Apr 2025 09:57:54 -0700 Subject: [PATCH 21/24] Add check to determine if node feeds into conv and set non-batch dims accordingly --- backends/xnnpack/operators/quant_params.py | 24 +++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/backends/xnnpack/operators/quant_params.py b/backends/xnnpack/operators/quant_params.py index ddb9db865f3..fbee1d192cf 100644 --- a/backends/xnnpack/operators/quant_params.py +++ b/backends/xnnpack/operators/quant_params.py @@ -141,17 +141,27 @@ def quantize_tensor(self, tensor: torch.Tensor) -> torch.Tensor: tensor, self.scale, self.zp, self.qmin, self.qmax, self.dtype ) + # Temporary helper until non-batch dimensions can be inferred + # Detects if a node feeds into a conv op by checking all downstream users + @staticmethod + def _feeds_into_conv(node: torch.fx.Node) -> bool: + users_list = [node] + + while users_list: + current_user = users_list.pop() + if "convolution" in str(current_user.target): + return True + users_list.extend(current_user.users) + + return False + @classmethod def _from_dynamic_input_node(cls, quant_node: torch.fx.Node) -> QuantParams: q_input = quant_node.args[0] # fp32 input assert isinstance(q_input, torch.fx.Node) - num_nonbatch_dims = 1 - - # Compute non-batch dimensions (shape length - 1), defaulting to 1 - q_input_val = q_input.meta.get("val", None) - q_input_shape = getattr(q_input_val, "shape", None) - if q_input_shape is not None: - num_nonbatch_dims = max(len(q_input_shape) - 1, 1) + # TODO - materialize this from the quant_node scale count and val shape + # Set non-batch dims to 3 if node feeds into conv (only 2D is supported), otherwise set to 1 for linear + num_nonbatch_dims = 3 if cls._feeds_into_conv(quant_node) else 1 return cls( per_channel=False, # True is not valid From eaba81962d63390e4a24abe6bb3471443e425e0c Mon Sep 17 00:00:00 2001 From: Zuby Afzal Date: Sun, 20 Apr 2025 10:01:43 -0700 Subject: [PATCH 22/24] Add depthwise conv checks for dynamic quant --- .../xnnpack/partition/config/gemm_configs.py | 34 ++++++++++++++----- .../quantizer/xnnpack_quantizer_utils.py | 18 +++++++++- 2 files changed, 43 insertions(+), 9 deletions(-) diff --git a/backends/xnnpack/partition/config/gemm_configs.py b/backends/xnnpack/partition/config/gemm_configs.py index a05bf623e05..b779f2e6204 100644 --- a/backends/xnnpack/partition/config/gemm_configs.py +++ b/backends/xnnpack/partition/config/gemm_configs.py @@ -9,6 +9,7 @@ from typing import cast, List, Optional, Tuple import torch +from executorch.backends.transforms import get_shape from executorch.backends.xnnpack.operators.quant_params import QuantParams from executorch.backends.xnnpack.partition.config.xnnpack_config import ( ConfigPrecisionType, @@ -358,18 +359,35 @@ def check_constraints(self, node: torch.fx.Node, ep: ExportedProgram) -> bool: why(node, "Only support 1D + 2D Conv") return False # Only support 1D + 2D Conv - precision = self._detect_precision(node) - if precision == ConfigPrecisionType.DYNAMIC_QUANT and len(conv_stride) != 2: - why(node, "Only support 2D Conv for dynamic quantization") - return False - kernel_node = get_input_node(node, 1) + kernel_shape = get_shape(kernel_node) weight_quant_params = QuantParams.from_weights(kernel_node, ep) - - is_transpose = node.args[6] groups = cast(int, node.args[8]) + is_transpose = node.args[6] + + if is_transpose: + group_input_channels = int(kernel_shape[0] / groups) + group_output_channels = kernel_shape[1] + else: + group_input_channels = kernel_shape[1] + group_output_channels = int(kernel_shape[0] / groups) + + is_depthwise = ( + group_input_channels == 1 + and group_output_channels % group_input_channels == 0 + ) + + # XNNPACK does not support dynamic quantization convs that are not 2D or are depthwise + if self._detect_precision(node) == ConfigPrecisionType.DYNAMIC_QUANT and ( + len(conv_stride) != 2 or is_depthwise + ): + why( + node, + "XNNPACK only supports standard 2D convolutions for dynamic quantization", + ) + return False - # XNNPack does not support non-zero output padding in transposed + # XNNPACK does not support non-zero output padding in transposed # convolutions. if is_transpose and any( out_pad != 0 for out_pad in cast(List[int], node.args[7]) diff --git a/backends/xnnpack/quantizer/xnnpack_quantizer_utils.py b/backends/xnnpack/quantizer/xnnpack_quantizer_utils.py index 92eff3d0d68..91cb816b795 100644 --- a/backends/xnnpack/quantizer/xnnpack_quantizer_utils.py +++ b/backends/xnnpack/quantizer/xnnpack_quantizer_utils.py @@ -323,7 +323,7 @@ def _do_annotate_conv( assert isinstance(weight, Node) input_qspec_map[weight] = get_weight_qspec(quantization_config) - # Only annotate dynamically quantized conv if it's 2D + # Only annotate dynamically quantized conv if it's 2D and not depthwise if ( quantization_config and quantization_config.input_activation @@ -336,6 +336,22 @@ def _do_annotate_conv( if weight_shape is not None and len(weight_shape) != 4: continue + # Default to 1 since groups is not available in the node + groups = 1 + if is_conv_transpose: + group_input_channels = int(weight_shape[0] / groups) + group_output_channels = weight_shape[1] + else: + group_input_channels = weight_shape[1] + group_output_channels = int(weight_shape[0] / groups) + + # Skip if depthwise + if ( + group_input_channels == 1 + and group_output_channels % group_input_channels == 0 + ): + continue + # adding weight node to the partition as well partition = [conv_node, conv_node.args[1]] From e336df6f8422d6be82f1458fa331af4613eeeaf0 Mon Sep 17 00:00:00 2001 From: Zuby Afzal Date: Mon, 21 Apr 2025 16:38:56 -0700 Subject: [PATCH 23/24] Move depthwise conv check to helper function in utils --- .../xnnpack/partition/config/gemm_configs.py | 16 ++-------- .../quantizer/xnnpack_quantizer_utils.py | 18 ++--------- backends/xnnpack/utils/utils.py | 30 +++++++++++++++++++ 3 files changed, 36 insertions(+), 28 deletions(-) diff --git a/backends/xnnpack/partition/config/gemm_configs.py b/backends/xnnpack/partition/config/gemm_configs.py index b779f2e6204..67bccbc52d1 100644 --- a/backends/xnnpack/partition/config/gemm_configs.py +++ b/backends/xnnpack/partition/config/gemm_configs.py @@ -28,6 +28,7 @@ ) from executorch.backends.xnnpack.utils.utils import ( get_input_node, + is_depthwise_conv, is_getitem, is_node, is_param_node, @@ -365,21 +366,10 @@ def check_constraints(self, node: torch.fx.Node, ep: ExportedProgram) -> bool: groups = cast(int, node.args[8]) is_transpose = node.args[6] - if is_transpose: - group_input_channels = int(kernel_shape[0] / groups) - group_output_channels = kernel_shape[1] - else: - group_input_channels = kernel_shape[1] - group_output_channels = int(kernel_shape[0] / groups) - - is_depthwise = ( - group_input_channels == 1 - and group_output_channels % group_input_channels == 0 - ) - # XNNPACK does not support dynamic quantization convs that are not 2D or are depthwise if self._detect_precision(node) == ConfigPrecisionType.DYNAMIC_QUANT and ( - len(conv_stride) != 2 or is_depthwise + len(conv_stride) != 2 + or is_depthwise_conv(kernel_shape, groups, is_transpose) ): why( node, diff --git a/backends/xnnpack/quantizer/xnnpack_quantizer_utils.py b/backends/xnnpack/quantizer/xnnpack_quantizer_utils.py index 91cb816b795..4b961bef81d 100644 --- a/backends/xnnpack/quantizer/xnnpack_quantizer_utils.py +++ b/backends/xnnpack/quantizer/xnnpack_quantizer_utils.py @@ -6,6 +6,7 @@ import torch import torch.nn.functional as F +from executorch.backends.xnnpack.utils.utils import is_depthwise_conv from torch._subclasses import FakeTensor from torch.ao.quantization.fx.utils import get_new_attr_name_with_prefix from torch.ao.quantization.pt2e.export_utils import _WrapperModule @@ -29,7 +30,6 @@ ) from torch.fx.passes.utils.source_matcher_utils import get_source_partitions - __all__ = [ "OperatorConfig", "OperatorPatternType", @@ -336,20 +336,8 @@ def _do_annotate_conv( if weight_shape is not None and len(weight_shape) != 4: continue - # Default to 1 since groups is not available in the node - groups = 1 - if is_conv_transpose: - group_input_channels = int(weight_shape[0] / groups) - group_output_channels = weight_shape[1] - else: - group_input_channels = weight_shape[1] - group_output_channels = int(weight_shape[0] / groups) - - # Skip if depthwise - if ( - group_input_channels == 1 - and group_output_channels % group_input_channels == 0 - ): + # Skip if depthwise (default to groups=1 since it's not an arg) + if is_depthwise_conv(weight_shape, 1, is_conv_transpose): continue # adding weight node to the partition as well diff --git a/backends/xnnpack/utils/utils.py b/backends/xnnpack/utils/utils.py index fab95618807..b23fd444117 100644 --- a/backends/xnnpack/utils/utils.py +++ b/backends/xnnpack/utils/utils.py @@ -158,3 +158,33 @@ def get_source_fn(node: torch.fx.Node) -> Optional[torch.fx.Node]: return None source_fn = source_fn_st[-1] return source_fn[1] + + +def is_depthwise_conv( + kernel_shape: Tuple[int, ...], groups: int = 1, is_transpose: bool = False +) -> bool: + """ + A convolution is depthwise if: + 1) groups = input_channels (i.e. group_input_channels = 1) + 2) output_channels is a positive integer multiple of input channels + + For standard convolutions: + weight shape = (out_channels, in_channels_per_group, height, width) + For transposed convolutions: + weight shape = (in_channels, out_channels_per_group, height, width) + + Returns True if the convolution is depthwise + """ + if len(kernel_shape) < 2 or groups < 1: + return False + + if is_transpose: + group_input_channels = int(kernel_shape[0] / groups) + group_output_channels = kernel_shape[1] + else: + group_input_channels = kernel_shape[1] + group_output_channels = int(kernel_shape[0] / groups) + + return ( + group_input_channels == 1 and group_output_channels % group_input_channels == 0 + ) From d82e08071cc5c7df51372b12db6b92778135f331 Mon Sep 17 00:00:00 2001 From: Zuby Afzal Date: Mon, 21 Apr 2025 16:40:22 -0700 Subject: [PATCH 24/24] Use existing Conv2d class; get conv count from model --- backends/xnnpack/test/ops/test_conv2d.py | 31 ++++++++++++------------ 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/backends/xnnpack/test/ops/test_conv2d.py b/backends/xnnpack/test/ops/test_conv2d.py index 4d1b387b8ec..92bb03c907a 100644 --- a/backends/xnnpack/test/ops/test_conv2d.py +++ b/backends/xnnpack/test/ops/test_conv2d.py @@ -173,18 +173,6 @@ def get_inputs(self): return (torch.randn(2, 2, 4, 4),) -class Conv2dDQ(torch.nn.Module): - def __init__(self): - super().__init__() - self.conv = torch.nn.Conv2d(in_channels=3, out_channels=10, kernel_size=3) - - def forward(self, x): - return self.conv(x) - - def get_inputs(self): - return (torch.randn(1, 3, 8, 8),) - - class Conv2dDQSeq(torch.nn.Module): def __init__(self): super().__init__() @@ -210,7 +198,7 @@ def __init__(self): in_channels=3, out_channels=8, kernel_size=3, padding=1 ) self.second = torch.nn.Conv2d( - in_channels=3, out_channels=10, kernel_size=3, padding=1 + in_channels=3, out_channels=8, kernel_size=3, padding=1 ) def forward(self, x): @@ -785,13 +773,24 @@ def forward(self, x): ) def test_dq_conv2d(self) -> None: - model = Conv2dDQ() + model = Conv2d( + in_channels=3, + out_channels=10, + kernel_size=(3, 3), + stride=(1, 1), + padding=(0, 0), + batches=1, + width=8, + height=8, + ) self._test_dq(model) def test_dq_conv2d_seq(self) -> None: model = Conv2dDQSeq() - self._test_dq(model, conv_count=2) + conv_count = sum(1 for m in model.modules() if type(m) is torch.nn.Conv2d) + self._test_dq(model, conv_count) def test_dq_conv2d_parallel(self) -> None: model = Conv2dDQParallel() - self._test_dq(model, conv_count=2) + conv_count = sum(1 for m in model.modules() if type(m) is torch.nn.Conv2d) + self._test_dq(model, conv_count)