pytorch · mcr229 · May 9, 2024 · May 9, 2024
@@ -16,6 +16,7 @@
     XNNGraph,
     XNode,
 )
+from executorch.backends.xnnpack.utils.xnnpack_constants import XNN_FLAG_KEEP_DIMS
 
 
 @register_node_visitor
@@ -67,7 +68,7 @@ def define_node(
                 dilation_width=0,  # Unused
                 input_id=input_id,
                 output_id=output_id,
-                flags=0,
+                flags=XNN_FLAG_KEEP_DIMS,
             ),
             debug_handle=debug_handle,
         )

@@ -18,6 +18,7 @@
     XNNMaxPooling2d,
     XNode,
 )
+from executorch.backends.xnnpack.utils.xnnpack_constants import XNN_FLAG_KEEP_DIMS
 
 
 @register_node_visitor
@@ -80,7 +81,7 @@ def define_node(
             kwargs["dilation_height"] = dilation[0]
             kwargs["dilation_width"] = dilation[1]
 
-        kwargs["flags"] = 0
+        kwargs["flags"] = XNN_FLAG_KEEP_DIMS
 
         ser_node = XNode(
             xnode_union=XNNMaxPooling2d(

@@ -18,6 +18,7 @@
     XNNGraph,
     XNode,
 )
+from executorch.backends.xnnpack.utils.xnnpack_constants import XNN_FLAG_KEEP_DIMS
 
 
 @register_node_visitor
@@ -70,7 +71,7 @@ def define_node(
 
         ser_node = XNode(
             xnode_union=XNNGlobalAvgPooling2d(
-                input_id=input_id, output_id=output_id, flags=0
+                input_id=input_id, output_id=output_id, flags=XNN_FLAG_KEEP_DIMS
             ),
             debug_handle=debug_handle,
         )

@@ -48,6 +48,18 @@ def test_fp32_linear(self):
                     num_batch_dims=num_batch_dims,
                 )
 
+    def test_qc8_linear(self):
+        for use_bias in (True, False):
+            for num_batch_dims in range(1, 3):
+                self._test_linear(
+                    lambda in_size, out_size: torch.nn.Linear(
+                        in_size, out_size, bias=use_bias  # noqa
+                    ),
+                    uses_bias=use_bias,
+                    quant_type="per_channel",
+                    num_batch_dims=num_batch_dims,
+                )
+
     def test_fp32_addmm(self):
         """
         Note that the ConvertToLinear pass requires the weight matrix to be transposed.
@@ -107,7 +119,7 @@ def forward(self, x):
                     ),
                     num_batch_dims=num_batch_dims,
                     uses_bias=use_bias,
-                    quant=True,
+                    quant_type="per_tensor",
                 )
 
     def test_qs8_linear(self):
@@ -119,6 +131,7 @@ def test_qs8_linear(self):
                     ),
                     uses_bias=use_bias,
                     num_batch_dims=num_batch_dims,
+                    quant_type="per_tensor",
                 )
 
     @unittest.skip("XNNPACK currently only supports per-channel dynamic quantization.")
@@ -726,7 +739,7 @@ def _test_linear(
         make_module,
         uses_bias,
         num_batch_dims=1,
-        quant=False,
+        quant_type=None,
         dtype: torch.dtype = torch.float,
         atol=1e-03,
     ):
@@ -746,6 +759,8 @@ def _test_linear(
         input_sizes = [4, 37, 17]
         output_sizes = [4, 17, 37]
 
+        quant = quant_type is not None
+
         """
         Note that torch.nn.Linear maps to aten.mm.default (no bias) or aten.addmm.default (bias),
         which ares then transformed into aten.linear.default by the ConvertToLinear pass.
@@ -769,7 +784,19 @@ def _test_linear(
             tester = Tester(module, inputs, dynamic_shapes=dynamic_shape)
 
             if quant:
-                tester.quantize()
+                if quant_type == "per_channel":
+                    quant_config = get_symmetric_quantization_config(
+                        is_per_channel=True,
+                        is_dynamic=False,
+                    )
+                elif quant_type == "per_tensor":
+                    quant_config = get_symmetric_quantization_config(
+                        is_per_channel=False,
+                        is_dynamic=False,
+                    )
+                else:
+                    raise ValueError(f"Unsupported quant type {quant_type}")
+                tester.quantize(Quantize(quantization_config=quant_config))
 
             tester.export()
             tester.check_count({aten_op: 1})

@@ -8,21 +8,25 @@
 UINT32_MAX = 4294967295
 XNN_EXTRA_BYTES = 16
 XNN_MAX_TENSOR_DIMS = 6
-XNN_FLAG_SPARSE_INFERENCE = 0x00000001
-XNN_FLAG_HINT_SPARSE_INFERENCE = XNN_FLAG_SPARSE_INFERENCE
-XNN_FLAG_FP16_INFERENCE = 0x00000002
-XNN_FLAG_HINT_FP16_INFERENCE = XNN_FLAG_FP16_INFERENCE
+XNN_FLAG_HINT_SPARSE_INFERENCE = 0x00000001
+XNN_FLAG_HINT_FP16_INFERENCE = 0x00000002
 XNN_FLAG_FORCE_FP16_INFERENCE = 0x00000004
 XNN_FLAG_BASIC_PROFILING = 0x00000008
+XNN_FLAG_JIT = 0x00000010
 XNN_FLAG_DEPTHWISE_CONVOLUTION = 0x00000001
 XNN_FLAG_TRANSPOSE_WEIGHTS = 0x00000001
 XNN_FLAG_INPUT_NHWC = 0x00000002
 XNN_FLAG_TENSORFLOW_SAME_PADDING = 0x00000004
+XNN_FLAG_TRANSPOSE_B = XNN_FLAG_TRANSPOSE_WEIGHTS
+XNN_FLAG_TRANSPOSE_A = 0x00000002
 XNN_FLAG_TENSORFLOW_RESHAPE_2D = 0x00000004
 XNN_FLAG_TENSORFLOW_LEGACY_MODE = 0x00000004
 XNN_FLAG_FP32_STATIC_WEIGHTS = 0x00000008
 XNN_FLAG_ALIGN_CORNERS = 0x00000008
 XNN_FLAG_YIELD_WORKERS = 0x00000010
+XNN_FLAG_TRANSIENT_INDIRECTION_BUFFER = 0x00000020
+XNN_FLAG_KEEP_DIMS = 0x00000040
+XNN_EXTRA_QUANTIZATION_PARAMS = 8
 XNN_VALUE_FLAG_EXTERNAL_INPUT = 0x00000001
 XNN_VALUE_FLAG_EXTERNAL_OUTPUT = 0x00000002
 XNN_VALUE_FLAG_PERSISTENT = 0x00000004
+9 −9		include/xnnpack.h
+2 −1		src/subgraph/fully-connected.c
+4 −4		src/subgraph/global-average-pooling.c
+4 −4		src/subgraph/global-sum-pooling.c
+11 −11		src/subgraph/static-mean.c
+4 −5		test/fully-connected.cc
+4 −4		test/global-average-pooling-1d.cc
+4 −4		test/global-average-pooling-2d.cc
+4 −4		test/global-sum-pooling-1d.cc
+4 −4		test/global-sum-pooling-2d.cc
+4 −4		test/static-mean.cc