From d65b3f4a1f530441ae191795b88c88b008b88103 Mon Sep 17 00:00:00 2001
From: Gavin Uberti <gavin.uberti@gmail.com>
Date: Thu, 1 Sep 2022 04:38:48 -0700
Subject: [PATCH 1/2] Allow int8 operations for Cortex-M cores

---
 python/tvm/relay/qnn/op/legalizations.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/python/tvm/relay/qnn/op/legalizations.py b/python/tvm/relay/qnn/op/legalizations.py
index 2fcdaf362a22..1478bb40943f 100644
--- a/python/tvm/relay/qnn/op/legalizations.py
+++ b/python/tvm/relay/qnn/op/legalizations.py
@@ -417,6 +417,12 @@ def is_aarch64_arm():
     return "aarch64" in target.attrs.get("mtriple", "")
 
 
+def is_cortexm_arm():
+    """Checks whether we are compiling for a Cortex-M target."""
+    target = tvm.target.Target.current(allow_none=False)
+    return "cortex-m" in target.attrs.get("mcpu", "")
+
+
 ########################
 # ARM CPU legalizations.
 ########################
@@ -433,7 +439,7 @@ def _qnn_conv2d_legalize_arm_cpu(attrs, inputs, types):
         attrs["groups"],
     )
     use_int8_on_arm = (not is_depthwise) and is_aarch64_arm() and attrs["data_layout"] == "NHWC"
-    if use_int8_on_arm or is_fast_int8_on_arm():
+    if use_int8_on_arm or is_fast_int8_on_arm() or is_cortexm_arm():
         return helper_change_dtypes_to_be_same(attrs, inputs, types, relay.qnn.op.conv2d)
     return helper_no_fast_int8_hw_legalization(attrs, inputs, types, relay.nn.conv2d)
 
@@ -441,7 +447,7 @@ def _qnn_conv2d_legalize_arm_cpu(attrs, inputs, types):
 @qnn_dense_legalize.register("arm_cpu")
 def _qnn_dense_legalize_arm_cpu(attrs, inputs, types):
     # ARM prefers the dtypes to be same.
-    if is_fast_int8_on_arm():
+    if is_fast_int8_on_arm() or is_cortexm_arm():
         return helper_change_dtypes_to_be_same(attrs, inputs, types, relay.qnn.op.dense)
     return helper_no_fast_int8_hw_legalization(attrs, inputs, types, relay.nn.dense)
 

From 7a4c52dec2d0dacda17a500db3a9c23c4046f3fc Mon Sep 17 00:00:00 2001
From: Gavin Uberti <gavin.uberti@gmail.com>
Date: Thu, 1 Sep 2022 23:02:50 -0700
Subject: [PATCH 2/2] Fix conversion requirements

Expand comment docstring

Adjust int16 conversion requirements

Adjust conversion requirements per code review
---
 python/tvm/relay/qnn/op/legalizations.py | 28 ++++++++++++------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/python/tvm/relay/qnn/op/legalizations.py b/python/tvm/relay/qnn/op/legalizations.py
index 1478bb40943f..9bc6efdad00f 100644
--- a/python/tvm/relay/qnn/op/legalizations.py
+++ b/python/tvm/relay/qnn/op/legalizations.py
@@ -417,12 +417,6 @@ def is_aarch64_arm():
     return "aarch64" in target.attrs.get("mtriple", "")
 
 
-def is_cortexm_arm():
-    """Checks whether we are compiling for a Cortex-M target."""
-    target = tvm.target.Target.current(allow_none=False)
-    return "cortex-m" in target.attrs.get("mcpu", "")
-
-
 ########################
 # ARM CPU legalizations.
 ########################
@@ -430,7 +424,8 @@ def is_cortexm_arm():
 
 @qnn_conv2d_legalize.register("arm_cpu")
 def _qnn_conv2d_legalize_arm_cpu(attrs, inputs, types):
-    # ARM prefers the dtypes to be same.
+    target = tvm.target.Target.current(allow_none=False)
+    has_asimd = is_aarch64_arm() or "+neon" in target.mattr
     is_depthwise = relay.op.strategy.is_depthwise_conv2d(
         types[0].shape,
         attrs["data_layout"],
@@ -438,18 +433,23 @@ def _qnn_conv2d_legalize_arm_cpu(attrs, inputs, types):
         attrs["kernel_layout"],
         attrs["groups"],
     )
-    use_int8_on_arm = (not is_depthwise) and is_aarch64_arm() and attrs["data_layout"] == "NHWC"
-    if use_int8_on_arm or is_fast_int8_on_arm() or is_cortexm_arm():
-        return helper_change_dtypes_to_be_same(attrs, inputs, types, relay.qnn.op.conv2d)
-    return helper_no_fast_int8_hw_legalization(attrs, inputs, types, relay.nn.conv2d)
+    use_int8_on_arm = (not is_depthwise) and attrs["data_layout"] == "NHWC"
+    has_dotprod = is_fast_int8_on_arm()
+    other_options = use_int8_on_arm or has_dotprod
+    if has_asimd and not other_options:
+        return helper_no_fast_int8_hw_legalization(attrs, inputs, types, relay.nn.conv2d)
+    # ARM prefers the dtypes to be same.
+    return helper_change_dtypes_to_be_same(attrs, inputs, types, relay.qnn.op.conv2d)
 
 
 @qnn_dense_legalize.register("arm_cpu")
 def _qnn_dense_legalize_arm_cpu(attrs, inputs, types):
+    target = tvm.target.Target.current(allow_none=False)
+    has_asimd = is_aarch64_arm() or "+neon" in target.mattr
+    if has_asimd and not is_fast_int8_on_arm():
+        return helper_no_fast_int8_hw_legalization(attrs, inputs, types, relay.nn.dense)
     # ARM prefers the dtypes to be same.
-    if is_fast_int8_on_arm() or is_cortexm_arm():
-        return helper_change_dtypes_to_be_same(attrs, inputs, types, relay.qnn.op.dense)
-    return helper_no_fast_int8_hw_legalization(attrs, inputs, types, relay.nn.dense)
+    return helper_change_dtypes_to_be_same(attrs, inputs, types, relay.qnn.op.dense)
 
 
 ##########################