From d65b3f4a1f530441ae191795b88c88b008b88103 Mon Sep 17 00:00:00 2001 From: Gavin Uberti Date: Thu, 1 Sep 2022 04:38:48 -0700 Subject: [PATCH 1/2] Allow int8 operations for Cortex-M cores --- python/tvm/relay/qnn/op/legalizations.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/python/tvm/relay/qnn/op/legalizations.py b/python/tvm/relay/qnn/op/legalizations.py index 2fcdaf362a22..1478bb40943f 100644 --- a/python/tvm/relay/qnn/op/legalizations.py +++ b/python/tvm/relay/qnn/op/legalizations.py @@ -417,6 +417,12 @@ def is_aarch64_arm(): return "aarch64" in target.attrs.get("mtriple", "") +def is_cortexm_arm(): + """Checks whether we are compiling for a Cortex-M target.""" + target = tvm.target.Target.current(allow_none=False) + return "cortex-m" in target.attrs.get("mcpu", "") + + ######################## # ARM CPU legalizations. ######################## @@ -433,7 +439,7 @@ def _qnn_conv2d_legalize_arm_cpu(attrs, inputs, types): attrs["groups"], ) use_int8_on_arm = (not is_depthwise) and is_aarch64_arm() and attrs["data_layout"] == "NHWC" - if use_int8_on_arm or is_fast_int8_on_arm(): + if use_int8_on_arm or is_fast_int8_on_arm() or is_cortexm_arm(): return helper_change_dtypes_to_be_same(attrs, inputs, types, relay.qnn.op.conv2d) return helper_no_fast_int8_hw_legalization(attrs, inputs, types, relay.nn.conv2d) @@ -441,7 +447,7 @@ def _qnn_conv2d_legalize_arm_cpu(attrs, inputs, types): @qnn_dense_legalize.register("arm_cpu") def _qnn_dense_legalize_arm_cpu(attrs, inputs, types): # ARM prefers the dtypes to be same. - if is_fast_int8_on_arm(): + if is_fast_int8_on_arm() or is_cortexm_arm(): return helper_change_dtypes_to_be_same(attrs, inputs, types, relay.qnn.op.dense) return helper_no_fast_int8_hw_legalization(attrs, inputs, types, relay.nn.dense) From 7a4c52dec2d0dacda17a500db3a9c23c4046f3fc Mon Sep 17 00:00:00 2001 From: Gavin Uberti Date: Thu, 1 Sep 2022 23:02:50 -0700 Subject: [PATCH 2/2] Fix conversion requirements Expand comment docstring Adjust int16 conversion requirements Adjust conversion requirements per code review --- python/tvm/relay/qnn/op/legalizations.py | 28 ++++++++++++------------ 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/python/tvm/relay/qnn/op/legalizations.py b/python/tvm/relay/qnn/op/legalizations.py index 1478bb40943f..9bc6efdad00f 100644 --- a/python/tvm/relay/qnn/op/legalizations.py +++ b/python/tvm/relay/qnn/op/legalizations.py @@ -417,12 +417,6 @@ def is_aarch64_arm(): return "aarch64" in target.attrs.get("mtriple", "") -def is_cortexm_arm(): - """Checks whether we are compiling for a Cortex-M target.""" - target = tvm.target.Target.current(allow_none=False) - return "cortex-m" in target.attrs.get("mcpu", "") - - ######################## # ARM CPU legalizations. ######################## @@ -430,7 +424,8 @@ def is_cortexm_arm(): @qnn_conv2d_legalize.register("arm_cpu") def _qnn_conv2d_legalize_arm_cpu(attrs, inputs, types): - # ARM prefers the dtypes to be same. + target = tvm.target.Target.current(allow_none=False) + has_asimd = is_aarch64_arm() or "+neon" in target.mattr is_depthwise = relay.op.strategy.is_depthwise_conv2d( types[0].shape, attrs["data_layout"], @@ -438,18 +433,23 @@ def _qnn_conv2d_legalize_arm_cpu(attrs, inputs, types): attrs["kernel_layout"], attrs["groups"], ) - use_int8_on_arm = (not is_depthwise) and is_aarch64_arm() and attrs["data_layout"] == "NHWC" - if use_int8_on_arm or is_fast_int8_on_arm() or is_cortexm_arm(): - return helper_change_dtypes_to_be_same(attrs, inputs, types, relay.qnn.op.conv2d) - return helper_no_fast_int8_hw_legalization(attrs, inputs, types, relay.nn.conv2d) + use_int8_on_arm = (not is_depthwise) and attrs["data_layout"] == "NHWC" + has_dotprod = is_fast_int8_on_arm() + other_options = use_int8_on_arm or has_dotprod + if has_asimd and not other_options: + return helper_no_fast_int8_hw_legalization(attrs, inputs, types, relay.nn.conv2d) + # ARM prefers the dtypes to be same. + return helper_change_dtypes_to_be_same(attrs, inputs, types, relay.qnn.op.conv2d) @qnn_dense_legalize.register("arm_cpu") def _qnn_dense_legalize_arm_cpu(attrs, inputs, types): + target = tvm.target.Target.current(allow_none=False) + has_asimd = is_aarch64_arm() or "+neon" in target.mattr + if has_asimd and not is_fast_int8_on_arm(): + return helper_no_fast_int8_hw_legalization(attrs, inputs, types, relay.nn.dense) # ARM prefers the dtypes to be same. - if is_fast_int8_on_arm() or is_cortexm_arm(): - return helper_change_dtypes_to_be_same(attrs, inputs, types, relay.qnn.op.dense) - return helper_no_fast_int8_hw_legalization(attrs, inputs, types, relay.nn.dense) + return helper_change_dtypes_to_be_same(attrs, inputs, types, relay.qnn.op.dense) ##########################