diff --git a/backends/xnnpack/operators/op_avg_pooling2d.py b/backends/xnnpack/operators/op_avg_pooling2d.py index 18f981cb330..94cd06cc08e 100644 --- a/backends/xnnpack/operators/op_avg_pooling2d.py +++ b/backends/xnnpack/operators/op_avg_pooling2d.py @@ -16,6 +16,7 @@ XNNGraph, XNode, ) +from executorch.backends.xnnpack.utils.xnnpack_constants import XNN_FLAG_KEEP_DIMS @register_node_visitor @@ -67,7 +68,7 @@ def define_node( dilation_width=0, # Unused input_id=input_id, output_id=output_id, - flags=0, + flags=XNN_FLAG_KEEP_DIMS, ), debug_handle=debug_handle, ) diff --git a/backends/xnnpack/operators/op_max_pool2d.py b/backends/xnnpack/operators/op_max_pool2d.py index 6fb49d30d57..d1a010295ef 100644 --- a/backends/xnnpack/operators/op_max_pool2d.py +++ b/backends/xnnpack/operators/op_max_pool2d.py @@ -18,6 +18,7 @@ XNNMaxPooling2d, XNode, ) +from executorch.backends.xnnpack.utils.xnnpack_constants import XNN_FLAG_KEEP_DIMS @register_node_visitor @@ -80,7 +81,7 @@ def define_node( kwargs["dilation_height"] = dilation[0] kwargs["dilation_width"] = dilation[1] - kwargs["flags"] = 0 + kwargs["flags"] = XNN_FLAG_KEEP_DIMS ser_node = XNode( xnode_union=XNNMaxPooling2d( diff --git a/backends/xnnpack/operators/op_mean_dim.py b/backends/xnnpack/operators/op_mean_dim.py index fe9f2249631..663606a8880 100644 --- a/backends/xnnpack/operators/op_mean_dim.py +++ b/backends/xnnpack/operators/op_mean_dim.py @@ -18,6 +18,7 @@ XNNGraph, XNode, ) +from executorch.backends.xnnpack.utils.xnnpack_constants import XNN_FLAG_KEEP_DIMS @register_node_visitor @@ -70,7 +71,7 @@ def define_node( ser_node = XNode( xnode_union=XNNGlobalAvgPooling2d( - input_id=input_id, output_id=output_id, flags=0 + input_id=input_id, output_id=output_id, flags=XNN_FLAG_KEEP_DIMS ), debug_handle=debug_handle, ) diff --git a/backends/xnnpack/test/ops/linear.py b/backends/xnnpack/test/ops/linear.py index 85b760e38ad..06ca72e377c 100644 --- a/backends/xnnpack/test/ops/linear.py +++ b/backends/xnnpack/test/ops/linear.py @@ -48,6 +48,18 @@ def test_fp32_linear(self): num_batch_dims=num_batch_dims, ) + def test_qc8_linear(self): + for use_bias in (True, False): + for num_batch_dims in range(1, 3): + self._test_linear( + lambda in_size, out_size: torch.nn.Linear( + in_size, out_size, bias=use_bias # noqa + ), + uses_bias=use_bias, + quant_type="per_channel", + num_batch_dims=num_batch_dims, + ) + def test_fp32_addmm(self): """ Note that the ConvertToLinear pass requires the weight matrix to be transposed. @@ -107,7 +119,7 @@ def forward(self, x): ), num_batch_dims=num_batch_dims, uses_bias=use_bias, - quant=True, + quant_type="per_tensor", ) def test_qs8_linear(self): @@ -119,6 +131,7 @@ def test_qs8_linear(self): ), uses_bias=use_bias, num_batch_dims=num_batch_dims, + quant_type="per_tensor", ) @unittest.skip("XNNPACK currently only supports per-channel dynamic quantization.") @@ -726,7 +739,7 @@ def _test_linear( make_module, uses_bias, num_batch_dims=1, - quant=False, + quant_type=None, dtype: torch.dtype = torch.float, atol=1e-03, ): @@ -746,6 +759,8 @@ def _test_linear( input_sizes = [4, 37, 17] output_sizes = [4, 17, 37] + quant = quant_type is not None + """ Note that torch.nn.Linear maps to aten.mm.default (no bias) or aten.addmm.default (bias), which ares then transformed into aten.linear.default by the ConvertToLinear pass. @@ -769,7 +784,19 @@ def _test_linear( tester = Tester(module, inputs, dynamic_shapes=dynamic_shape) if quant: - tester.quantize() + if quant_type == "per_channel": + quant_config = get_symmetric_quantization_config( + is_per_channel=True, + is_dynamic=False, + ) + elif quant_type == "per_tensor": + quant_config = get_symmetric_quantization_config( + is_per_channel=False, + is_dynamic=False, + ) + else: + raise ValueError(f"Unsupported quant type {quant_type}") + tester.quantize(Quantize(quantization_config=quant_config)) tester.export() tester.check_count({aten_op: 1}) diff --git a/backends/xnnpack/third-party/XNNPACK b/backends/xnnpack/third-party/XNNPACK index 70bbd07c1de..20c0d886fb7 160000 --- a/backends/xnnpack/third-party/XNNPACK +++ b/backends/xnnpack/third-party/XNNPACK @@ -1 +1 @@ -Subproject commit 70bbd07c1de310a1f89379c746b8f24a506c3283 +Subproject commit 20c0d886fb78d6497362e8303b999bf5d67aaa02 diff --git a/backends/xnnpack/utils/xnnpack_constants.py b/backends/xnnpack/utils/xnnpack_constants.py index 63c8d6fdeef..351cc8ad897 100644 --- a/backends/xnnpack/utils/xnnpack_constants.py +++ b/backends/xnnpack/utils/xnnpack_constants.py @@ -8,21 +8,25 @@ UINT32_MAX = 4294967295 XNN_EXTRA_BYTES = 16 XNN_MAX_TENSOR_DIMS = 6 -XNN_FLAG_SPARSE_INFERENCE = 0x00000001 -XNN_FLAG_HINT_SPARSE_INFERENCE = XNN_FLAG_SPARSE_INFERENCE -XNN_FLAG_FP16_INFERENCE = 0x00000002 -XNN_FLAG_HINT_FP16_INFERENCE = XNN_FLAG_FP16_INFERENCE +XNN_FLAG_HINT_SPARSE_INFERENCE = 0x00000001 +XNN_FLAG_HINT_FP16_INFERENCE = 0x00000002 XNN_FLAG_FORCE_FP16_INFERENCE = 0x00000004 XNN_FLAG_BASIC_PROFILING = 0x00000008 +XNN_FLAG_JIT = 0x00000010 XNN_FLAG_DEPTHWISE_CONVOLUTION = 0x00000001 XNN_FLAG_TRANSPOSE_WEIGHTS = 0x00000001 XNN_FLAG_INPUT_NHWC = 0x00000002 XNN_FLAG_TENSORFLOW_SAME_PADDING = 0x00000004 +XNN_FLAG_TRANSPOSE_B = XNN_FLAG_TRANSPOSE_WEIGHTS +XNN_FLAG_TRANSPOSE_A = 0x00000002 XNN_FLAG_TENSORFLOW_RESHAPE_2D = 0x00000004 XNN_FLAG_TENSORFLOW_LEGACY_MODE = 0x00000004 XNN_FLAG_FP32_STATIC_WEIGHTS = 0x00000008 XNN_FLAG_ALIGN_CORNERS = 0x00000008 XNN_FLAG_YIELD_WORKERS = 0x00000010 +XNN_FLAG_TRANSIENT_INDIRECTION_BUFFER = 0x00000020 +XNN_FLAG_KEEP_DIMS = 0x00000040 +XNN_EXTRA_QUANTIZATION_PARAMS = 8 XNN_VALUE_FLAG_EXTERNAL_INPUT = 0x00000001 XNN_VALUE_FLAG_EXTERNAL_OUTPUT = 0x00000002 XNN_VALUE_FLAG_PERSISTENT = 0x00000004