From 4d510c8bfb87736ade358b17a062429a95559d1a Mon Sep 17 00:00:00 2001
From: Ubuntu
 <jwfromm@jwfromm-cpu-dev.itxhlkosmouevgkdrmwxfbs5qh.xx.internal.cloudapp.net>
Date: Sat, 6 Mar 2021 01:47:30 +0000
Subject: [PATCH 01/21] Add initial implementation of flexible simulated qnn
 ops.

---
 .../tvm/relay/transform/quantize/test_sim.py  | 83 +++++++++++++++++++
 1 file changed, 83 insertions(+)
 create mode 100644 python/tvm/relay/transform/quantize/test_sim.py

diff --git a/python/tvm/relay/transform/quantize/test_sim.py b/python/tvm/relay/transform/quantize/test_sim.py
new file mode 100644
index 000000000000..dad09b2f3db6
--- /dev/null
+++ b/python/tvm/relay/transform/quantize/test_sim.py
@@ -0,0 +1,83 @@
+import tvm
+import numpy as np
+from tvm import te, tir, topi
+
+
+SQNN_FP32 = 0
+SQNN_INT8 = 1
+SQNN_UINT8 = 2
+
+SQNN_DATATYPE_MAP = {
+    SQNN_FP32: 'float32',
+    SQNN_INT8: 'int8',
+    SQNN_UINT8: 'uint8',
+}
+
+
+def simulated_qnn(data, out_dtype, output_scale=None, output_zero_point=None, axis=-1):
+
+    def _compute_fp32(value, *indices):
+        return value[indices]
+
+    def _compute_int8(value, *indices):
+        assert output_scale is not None and output_zero_point is not None
+        const_min = tvm.tir.min_value(SQNN_DATATYPE_MAP[SQNN_INT8])
+        const_max = tvm.tir.max_value(SQNN_DATATYPE_MAP[SQNN_INT8])
+        # Use indexmod to handle both scalar and per-channel QNN parameters.
+        scale_idx = tir.indexmod(indices[axis], topi.shape(output_scale)[0])
+        zp_idx = tir.indexmod(indices[axis], topi.shape(output_zero_point)[0])
+        return te.max(te.min(te.round(value[indices] / output_scale[scale_idx]) + output_zero_point[zp_idx], const_max), const_min)
+
+    def _compute_uint8(value, *indices):
+        assert output_scale is not None and output_zero_point is not None
+        const_min = tvm.tir.min_value(SQNN_DATATYPE_MAP[SQNN_UINT8])
+        const_max = tvm.tir.max_value(SQNN_DATATYPE_MAP[SQNN_UINT8])
+        # Use indexmod to handle both scalar and per-channel QNN parameters.
+        scale_idx = tir.indexmod(indices[axis], topi.shape(output_scale)[0])
+        zp_idx = tir.indexmod(indices[axis], topi.shape(output_zero_point)[0])
+        return te.max(te.min(te.round(value[indices] / output_scale[scale_idx]) + output_zero_point[zp_idx], const_max), const_min)
+
+    def _dispatch_sim_qnn(value):
+        fp32_value = te.compute(data.shape, lambda *indices: _compute_fp32(value, *indices))
+        int8_value = te.compute(
+            data.shape, 
+            lambda *indices: tir.if_then_else(out_dtype[0] == SQNN_INT8, _compute_int8(value, *indices), fp32_value[indices]))
+        uint8_value = te.compute(
+            data.shape, 
+            lambda *indices: tir.if_then_else(out_dtype[0] == SQNN_UINT8, _compute_uint8(value, *indices), int8_value[indices]))
+
+        return uint8_value
+
+
+    #return _dispatch_qnn()
+    return te.compute(
+        data.shape,
+        lambda *indices: _dispatch_sim_qnn(data)[indices]
+    )
+    
+def test_sim_qnn():
+    V = te.placeholder([8], name="value")
+    D = te.placeholder([1], name="dtype", dtype='int32')
+    S = te.placeholder([te.size_var("dim")], name="scale", dtype='float32')
+    ZP = te.placeholder([te.size_var("dim")], name="zero_point", dtype='int32')
+    Q = simulated_qnn(V, D, output_scale=S, output_zero_point=ZP)
+    s = te.create_schedule([Q.op])
+    a = tvm.nd.array(np.asarray([200] * 8).astype('float32'), tvm.cpu())
+    q = tvm.nd.array(np.zeros((8), dtype='float32'), tvm.cpu())
+    f = tvm.build(s, [V, D, S, ZP, Q], "llvm", name="sim_qnn")
+    for d_val in [[0], [1], [2]]:
+        d = tvm.nd.array(np.asarray(d_val).astype('int32'), tvm.cpu())
+        s = tvm.nd.array(np.asarray([1, 2]*4).astype('float32'), tvm.cpu())
+        zp = tvm.nd.array(np.asarray([0]*8).astype('int32'), tvm.cpu())
+        f(a, d, s, zp, q)
+        print(q)
+        s = tvm.nd.array(np.asarray([1]).astype('float32'), tvm.cpu())
+        zp = tvm.nd.array(np.asarray([0]).astype('int32'), tvm.cpu())
+        f(a, d, s, zp, q)
+        print(q)
+
+
+if __name__ == "__main__":
+    test_sim_qnn()
+
+

From 61657f68d7c4462fbb46c92fd46d796b77a0c95c Mon Sep 17 00:00:00 2001
From: Ubuntu
 <jwfromm@jwfromm-cpu-dev.itxhlkosmouevgkdrmwxfbs5qh.xx.internal.cloudapp.net>
Date: Sun, 7 Mar 2021 21:52:00 +0000
Subject: [PATCH 02/21] Added proper topi testing and fixed qnn axis bug.

---
 .../tvm/relay/transform/quantize/test_sim.py  |  83 ------------
 python/tvm/topi/nn/__init__.py                |   1 +
 python/tvm/topi/nn/qnn.py                     | 120 ++++++++++++++++++
 src/relay/qnn/op/quantize.cc                  |  26 ++--
 tests/python/relay/test_op_qnn_quantize.py    |   2 +-
 tests/python/topi/python/test_topi_qnn.py     |  88 +++++++++++++
 6 files changed, 227 insertions(+), 93 deletions(-)
 delete mode 100644 python/tvm/relay/transform/quantize/test_sim.py
 create mode 100644 python/tvm/topi/nn/qnn.py
 create mode 100644 tests/python/topi/python/test_topi_qnn.py

diff --git a/python/tvm/relay/transform/quantize/test_sim.py b/python/tvm/relay/transform/quantize/test_sim.py
deleted file mode 100644
index dad09b2f3db6..000000000000
--- a/python/tvm/relay/transform/quantize/test_sim.py
+++ /dev/null
@@ -1,83 +0,0 @@
-import tvm
-import numpy as np
-from tvm import te, tir, topi
-
-
-SQNN_FP32 = 0
-SQNN_INT8 = 1
-SQNN_UINT8 = 2
-
-SQNN_DATATYPE_MAP = {
-    SQNN_FP32: 'float32',
-    SQNN_INT8: 'int8',
-    SQNN_UINT8: 'uint8',
-}
-
-
-def simulated_qnn(data, out_dtype, output_scale=None, output_zero_point=None, axis=-1):
-
-    def _compute_fp32(value, *indices):
-        return value[indices]
-
-    def _compute_int8(value, *indices):
-        assert output_scale is not None and output_zero_point is not None
-        const_min = tvm.tir.min_value(SQNN_DATATYPE_MAP[SQNN_INT8])
-        const_max = tvm.tir.max_value(SQNN_DATATYPE_MAP[SQNN_INT8])
-        # Use indexmod to handle both scalar and per-channel QNN parameters.
-        scale_idx = tir.indexmod(indices[axis], topi.shape(output_scale)[0])
-        zp_idx = tir.indexmod(indices[axis], topi.shape(output_zero_point)[0])
-        return te.max(te.min(te.round(value[indices] / output_scale[scale_idx]) + output_zero_point[zp_idx], const_max), const_min)
-
-    def _compute_uint8(value, *indices):
-        assert output_scale is not None and output_zero_point is not None
-        const_min = tvm.tir.min_value(SQNN_DATATYPE_MAP[SQNN_UINT8])
-        const_max = tvm.tir.max_value(SQNN_DATATYPE_MAP[SQNN_UINT8])
-        # Use indexmod to handle both scalar and per-channel QNN parameters.
-        scale_idx = tir.indexmod(indices[axis], topi.shape(output_scale)[0])
-        zp_idx = tir.indexmod(indices[axis], topi.shape(output_zero_point)[0])
-        return te.max(te.min(te.round(value[indices] / output_scale[scale_idx]) + output_zero_point[zp_idx], const_max), const_min)
-
-    def _dispatch_sim_qnn(value):
-        fp32_value = te.compute(data.shape, lambda *indices: _compute_fp32(value, *indices))
-        int8_value = te.compute(
-            data.shape, 
-            lambda *indices: tir.if_then_else(out_dtype[0] == SQNN_INT8, _compute_int8(value, *indices), fp32_value[indices]))
-        uint8_value = te.compute(
-            data.shape, 
-            lambda *indices: tir.if_then_else(out_dtype[0] == SQNN_UINT8, _compute_uint8(value, *indices), int8_value[indices]))
-
-        return uint8_value
-
-
-    #return _dispatch_qnn()
-    return te.compute(
-        data.shape,
-        lambda *indices: _dispatch_sim_qnn(data)[indices]
-    )
-    
-def test_sim_qnn():
-    V = te.placeholder([8], name="value")
-    D = te.placeholder([1], name="dtype", dtype='int32')
-    S = te.placeholder([te.size_var("dim")], name="scale", dtype='float32')
-    ZP = te.placeholder([te.size_var("dim")], name="zero_point", dtype='int32')
-    Q = simulated_qnn(V, D, output_scale=S, output_zero_point=ZP)
-    s = te.create_schedule([Q.op])
-    a = tvm.nd.array(np.asarray([200] * 8).astype('float32'), tvm.cpu())
-    q = tvm.nd.array(np.zeros((8), dtype='float32'), tvm.cpu())
-    f = tvm.build(s, [V, D, S, ZP, Q], "llvm", name="sim_qnn")
-    for d_val in [[0], [1], [2]]:
-        d = tvm.nd.array(np.asarray(d_val).astype('int32'), tvm.cpu())
-        s = tvm.nd.array(np.asarray([1, 2]*4).astype('float32'), tvm.cpu())
-        zp = tvm.nd.array(np.asarray([0]*8).astype('int32'), tvm.cpu())
-        f(a, d, s, zp, q)
-        print(q)
-        s = tvm.nd.array(np.asarray([1]).astype('float32'), tvm.cpu())
-        zp = tvm.nd.array(np.asarray([0]).astype('int32'), tvm.cpu())
-        f(a, d, s, zp, q)
-        print(q)
-
-
-if __name__ == "__main__":
-    test_sim_qnn()
-
-
diff --git a/python/tvm/topi/nn/__init__.py b/python/tvm/topi/nn/__init__.py
index 2ebbd1d67bd1..94a5b30c9b76 100644
--- a/python/tvm/topi/nn/__init__.py
+++ b/python/tvm/topi/nn/__init__.py
@@ -36,6 +36,7 @@
 from .conv2d_transpose import *
 from .conv1d_transpose import *
 from .bnn import *
+from .qnn import *
 from .upsampling import *
 from .local_response_norm import *
 from .bitserial_conv2d import *
diff --git a/python/tvm/topi/nn/qnn.py b/python/tvm/topi/nn/qnn.py
new file mode 100644
index 000000000000..01858ae69d2a
--- /dev/null
+++ b/python/tvm/topi/nn/qnn.py
@@ -0,0 +1,120 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""Quantized Neural Network (QNN) Operators"""
+import tvm
+from tvm import te, tir, topi
+
+SQNN_FP32 = 0
+SQNN_INT8 = 1
+SQNN_UINT8 = 2
+SQNN_INT32 = 3
+
+SQNN_DTYPE_TO_CODE = {
+    "float32": SQNN_FP32,
+    "int8": SQNN_INT8,
+    "uint8": SQNN_UINT8,
+    "int32": SQNN_INT32,
+}
+
+SQNN_CODE_TO_DTYPE = {v: k for k, v in SQNN_DTYPE_TO_CODE.items()}
+
+
+def simulated_quantize(data, out_dtype, output_scale=None, output_zero_point=None, axis=-1):
+    """Simulated QNN quantize operator that mimics QNN outputs in floating point. The benefit
+    of this operator over true QNN quantize is that this operator allows dynamic datatype
+    selection and can operate on both per-channel and scalar scales and zero points while
+    QNN quantize requires both of these to be fixed at compile time.
+
+    Parameters
+    ----------
+    data: tvm.te.Tensor
+        An N-D input tensor to the operator.
+
+    out_dtype: tvm.te.Tensor
+        A 1-D variable that indicates which datatype to simulate quantization with. Use
+        SQNN_DTYPE_TO_CODE to convert a dtype string into the corresponding variable
+        value.
+
+    output_scale: tvm.te.Tensor, optional
+        A 1-D tensor representing the scale to use when quantizing to integer datatypes.
+        When it contains more than a single value, N must match the number of channels in data.
+
+    output_zero_point: tvm.te.Tensor, optional
+        A 1-D tensor representing the zero point to use when quantizing to integer datatypes.
+        When it contains more than a single value, N must match the number of channels in data.
+
+    axis: int, optional
+        The channel axis for quantization. Default value is -1 which corresponds to the last axis.
+
+    """
+
+    # Since all simulated outputs are in float32, we can just return the input tensor for fp32.
+    def _compute_fp32(value, *indices):
+        return value[indices]
+
+    # Simulate quantization for arbitrary integer datatypes. The computation for all datatypes is:
+    # Q_output = clip((round(input_tensor/output_scale) + output_zero_point),
+    #                 out_dtype::min,
+    #                 out_dtype::max)
+    def _compute_intn(dtype, value, *indices):
+        assert output_scale is not None and output_zero_point is not None
+        const_min = tvm.tir.min_value(dtype)
+        const_max = tvm.tir.max_value(dtype)
+        # Use indexmod to handle both scalar and per-channel QNN parameters.
+        scale_idx = tir.indexmod(indices[axis], topi.shape(output_scale)[0])
+        zp_idx = tir.indexmod(indices[axis], topi.shape(output_zero_point)[0])
+        return te.max(
+            te.min(
+                te.round(value[indices] / output_scale[scale_idx]) + output_zero_point[zp_idx],
+                const_max,
+            ),
+            const_min,
+        )
+
+    # Use an if chain to dynamically return the proper quantization based on the input datatype.
+    # This allows the op to compile once but apply different quantization approaches
+    # using a variable datatype input.
+    def _dispatch_sim_qnn(value):
+        fp32_value = te.compute(data.shape, lambda *indices: _compute_fp32(value, *indices))
+        int8_value = te.compute(
+            data.shape,
+            lambda *indices: tir.if_then_else(
+                out_dtype[0] == SQNN_INT8,
+                _compute_intn(SQNN_CODE_TO_DTYPE[SQNN_INT8], value, *indices),
+                fp32_value[indices],
+            ),
+        )
+        uint8_value = te.compute(
+            data.shape,
+            lambda *indices: tir.if_then_else(
+                out_dtype[0] == SQNN_UINT8,
+                _compute_intn(SQNN_CODE_TO_DTYPE[SQNN_UINT8], value, *indices),
+                int8_value[indices],
+            ),
+        )
+        int32_value = te.compute(
+            data.shape,
+            lambda *indices: tir.if_then_else(
+                out_dtype[0] == SQNN_INT32,
+                _compute_intn(SQNN_CODE_TO_DTYPE[SQNN_INT32], value, *indices),
+                uint8_value[indices],
+            ),
+        )
+
+        return int32_value
+
+    return te.compute(data.shape, lambda *indices: _dispatch_sim_qnn(data)[indices])
diff --git a/src/relay/qnn/op/quantize.cc b/src/relay/qnn/op/quantize.cc
index 9829834f43a3..5c4705546f46 100644
--- a/src/relay/qnn/op/quantize.cc
+++ b/src/relay/qnn/op/quantize.cc
@@ -50,15 +50,18 @@ bool QuantizeRel(const Array<Type>& types, int num_inputs, const Attrs& attrs,
       << "Input type should be one of float32 but was " << input_dtype;
 
   const auto* quantize_attrs = attrs.as<QuantizeAttrs>();
-  int axis = quantize_attrs->axis;
-  axis = (axis == -1) ? data->shape.size() - 1 : axis;
-  ICHECK_LT(axis, static_cast<int>(data->shape.size()))
-      << "axis " << quantize_attrs->axis << " is out of range";
-  ICHECK_GE(axis, 0) << "axis " << quantize_attrs->axis << " is out of range";
 
-  // Check and assign types for scale and zero points.
-  AssignType(types[1], DataType::Float(32), data->shape[axis], reporter);  // scale
-  AssignType(types[2], DataType::Int(32), data->shape[axis], reporter);    // zero point
+  // Assign type to scale and zero point if they're channelwise.
+  if (data->shape.size() != 0) {
+    int axis = quantize_attrs->axis;
+    axis = (axis < 0) ? data->shape.size() + axis : axis;
+    ICHECK_LT(axis, static_cast<int>(data->shape.size()))
+        << "axis " << quantize_attrs->axis << " is out of range";
+    ICHECK_GE(axis, 0) << "axis " << quantize_attrs->axis << " is out of range";
+    // Check and assign types for scale and zero points.
+    AssignType(types[1], DataType::Float(32), data->shape[axis], reporter);  // scale
+    AssignType(types[2], DataType::Int(32), data->shape[axis], reporter);    // zero point
+  }
 
   const Array<tvm::PrimExpr> oshape = data->shape;
   const DataType out_dtype = quantize_attrs->out_dtype;
@@ -93,10 +96,15 @@ Expr QuantizeLower(const Expr& input_tensor, const Expr& output_scale,
   Array<IndexExpr> input_shape = in_tensor_type->shape;
 
   const auto out_dtype = attrs->out_dtype;
-  const auto axis = attrs->axis;
+  auto axis = attrs->axis;
 
   size_t n_dim = input_shape.size();
 
+  // Wrap axis from negative to positive if needed.
+  if (axis < 0) {
+    axis = ((int) n_dim) + axis;
+  }
+
   auto expanded_output_scale = output_scale;
   if (!IsConstScalar(output_scale) && !IsScalarType(types[1])) {
     expanded_output_scale = ExpandBiasToMatchAxis(output_scale, n_dim, {axis});
diff --git a/tests/python/relay/test_op_qnn_quantize.py b/tests/python/relay/test_op_qnn_quantize.py
index 2ef298679904..b300c5612174 100644
--- a/tests/python/relay/test_op_qnn_quantize.py
+++ b/tests/python/relay/test_op_qnn_quantize.py
@@ -127,7 +127,7 @@ def test_channelwise_axis_1():
     quantize_test_driver(
         in_dtype="float32",
         quant_args=quant_args,
-        axis=1,
+        axis=-1,
         out_dtype="uint8",
         in_data=data,
         verify_output_data=output,
diff --git a/tests/python/topi/python/test_topi_qnn.py b/tests/python/topi/python/test_topi_qnn.py
new file mode 100644
index 000000000000..0e154c3d9fd9
--- /dev/null
+++ b/tests/python/topi/python/test_topi_qnn.py
@@ -0,0 +1,88 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""Test code for QNN operators."""
+import numpy as np
+import tvm
+from tvm import topi, relay, te
+from tvm.contrib import graph_runtime
+
+
+def verify_simulated_quantize(
+    data_shape, out_dtype, channels, axis
+):
+    # Create placeholder variables for all qnn inputs.
+    A = te.placeholder(data_shape, name='value', dtype='float32')
+    D = te.placeholder([1], name='dtype', dtype='int32')
+    S = te.placeholder([te.size_var("scale_dim")], name="scale", dtype='float32')
+    Z = te.placeholder([te.size_var("zp_dim")], name="zp", dtype='int32')
+    SIM_Q = topi.nn.simulated_quantize(A, D, output_scale=S, output_zero_point=Z, axis=axis)
+
+    # Create random numpy values to assign to inputs.
+    a_np = np.random.uniform(size=data_shape).astype('float32')
+    d_np = np.asarray([topi.nn.SQNN_DTYPE_TO_CODE[out_dtype]]).astype('int32')
+    s_np = np.random.uniform(low=1e-4, high=.1, size=channels).astype('float32')
+    z_np = np.random.uniform(low=-10, high=10, size=channels).astype('int32')
+    q_np = np.zeros(shape=data_shape, dtype='float32')
+
+    ctx = tvm.cpu()
+
+    # Wrap the numpy arrays in nd arrays.
+    a = tvm.nd.array(a_np, ctx)
+    d = tvm.nd.array(d_np, ctx)
+    s = tvm.nd.array(s_np, ctx)
+    z = tvm.nd.array(z_np, ctx)
+    q = tvm.nd.array(q_np, ctx)
+
+    # Construct equivalent relay graph.
+    per_channel = channels[0] != 1
+    a_var = relay.var('a', shape=data_shape, dtype='float32')
+    if per_channel:
+        s_var = relay.const(s_np)
+        z_var = relay.const(z_np)
+    else:
+        s_var = relay.const(s_np[0])
+        z_var = relay.const(z_np[0])
+    real_q_op = relay.qnn.op.quantize(a_var, s_var, z_var, axis=axis, out_dtype=out_dtype)
+    with tvm.transform.PassContext(opt_level=3):
+        lib = relay.build(tvm.IRModule.from_expr(real_q_op), target='llvm')
+
+    # Get real qnn quantize output.
+    m = graph_runtime.GraphModule(lib["default"](ctx))
+    m.set_input('a', a_np)
+
+    m.run()
+    real_q_out = m.get_output(0)
+
+    # Compile the simulated quantize function.
+    sched = te.create_schedule([SIM_Q.op])
+    func = tvm.build(sched, [A, D, S, Z, SIM_Q], 'llvm', name="sim_quantize")
+    func(a, d, s, z, q)
+
+    # Check correctness against the true qnn output.
+    tvm.testing.assert_allclose(q.asnumpy(), real_q_out.asnumpy().astype('float32'))
+
+
+def test_simulated_quantize():
+    verify_simulated_quantize([1], 'int8', [1], -1)
+    verify_simulated_quantize([2, 5], 'int8', [5], 1)
+    verify_simulated_quantize([1, 32, 32, 32], 'int8', [32], -1)
+    verify_simulated_quantize([1, 32, 32, 32], 'uint8', [32], -2)
+    verify_simulated_quantize([2, 5], 'int32', [5], 1)
+
+if __name__ == "__main__":
+    test_simulated_quantize()
+

From cdc262c496ab02f128869b046d2f896abfd9ba46 Mon Sep 17 00:00:00 2001
From: Ubuntu
 <jwfromm@jwfromm-cpu-dev.itxhlkosmouevgkdrmwxfbs5qh.xx.internal.cloudapp.net>
Date: Sun, 7 Mar 2021 22:30:17 +0000
Subject: [PATCH 03/21] Add injective schedule wrapping.

---
 python/tvm/topi/nn/qnn.py                 |  1 +
 tests/python/topi/python/test_topi_qnn.py | 64 ++++++++++++-----------
 2 files changed, 35 insertions(+), 30 deletions(-)

diff --git a/python/tvm/topi/nn/qnn.py b/python/tvm/topi/nn/qnn.py
index 01858ae69d2a..995fc1c53c59 100644
--- a/python/tvm/topi/nn/qnn.py
+++ b/python/tvm/topi/nn/qnn.py
@@ -33,6 +33,7 @@
 SQNN_CODE_TO_DTYPE = {v: k for k, v in SQNN_DTYPE_TO_CODE.items()}
 
 
+@tvm.te.tag_scope(tag=topi.tag.ELEMWISE)
 def simulated_quantize(data, out_dtype, output_scale=None, output_zero_point=None, axis=-1):
     """Simulated QNN quantize operator that mimics QNN outputs in floating point. The benefit
     of this operator over true QNN quantize is that this operator allows dynamic datatype
diff --git a/tests/python/topi/python/test_topi_qnn.py b/tests/python/topi/python/test_topi_qnn.py
index 0e154c3d9fd9..92e4c180a723 100644
--- a/tests/python/topi/python/test_topi_qnn.py
+++ b/tests/python/topi/python/test_topi_qnn.py
@@ -19,6 +19,7 @@
 import tvm
 from tvm import topi, relay, te
 from tvm.contrib import graph_runtime
+import tvm.topi.testing
 
 
 def verify_simulated_quantize(
@@ -38,42 +39,45 @@ def verify_simulated_quantize(
     z_np = np.random.uniform(low=-10, high=10, size=channels).astype('int32')
     q_np = np.zeros(shape=data_shape, dtype='float32')
 
-    ctx = tvm.cpu()
+    def check_device(device, ctx):
+        # Wrap the numpy arrays in nd arrays.
+        a = tvm.nd.array(a_np, ctx)
+        d = tvm.nd.array(d_np, ctx)
+        s = tvm.nd.array(s_np, ctx)
+        z = tvm.nd.array(z_np, ctx)
+        q = tvm.nd.array(q_np, ctx)
 
-    # Wrap the numpy arrays in nd arrays.
-    a = tvm.nd.array(a_np, ctx)
-    d = tvm.nd.array(d_np, ctx)
-    s = tvm.nd.array(s_np, ctx)
-    z = tvm.nd.array(z_np, ctx)
-    q = tvm.nd.array(q_np, ctx)
+        # Construct equivalent relay graph.
+        per_channel = channels[0] != 1
+        a_var = relay.var('a', shape=data_shape, dtype='float32')
+        if per_channel:
+            s_var = relay.const(s_np)
+            z_var = relay.const(z_np)
+        else:
+            s_var = relay.const(s_np[0])
+            z_var = relay.const(z_np[0])
+        real_q_op = relay.qnn.op.quantize(a_var, s_var, z_var, axis=axis, out_dtype=out_dtype)
+        with tvm.transform.PassContext(opt_level=3):
+            lib = relay.build(tvm.IRModule.from_expr(real_q_op), target=device)
 
-    # Construct equivalent relay graph.
-    per_channel = channels[0] != 1
-    a_var = relay.var('a', shape=data_shape, dtype='float32')
-    if per_channel:
-        s_var = relay.const(s_np)
-        z_var = relay.const(z_np)
-    else:
-        s_var = relay.const(s_np[0])
-        z_var = relay.const(z_np[0])
-    real_q_op = relay.qnn.op.quantize(a_var, s_var, z_var, axis=axis, out_dtype=out_dtype)
-    with tvm.transform.PassContext(opt_level=3):
-        lib = relay.build(tvm.IRModule.from_expr(real_q_op), target='llvm')
+        # Get real qnn quantize output.
+        m = graph_runtime.GraphModule(lib["default"](ctx))
+        m.set_input('a', a_np)
 
-    # Get real qnn quantize output.
-    m = graph_runtime.GraphModule(lib["default"](ctx))
-    m.set_input('a', a_np)
+        m.run()
+        real_q_out = m.get_output(0)
 
-    m.run()
-    real_q_out = m.get_output(0)
+        # Compile the simulated quantize function.
+        with tvm.target.Target(device):
+            sched = tvm.topi.testing.get_injective_schedule(device)(SIM_Q)
+        func = tvm.build(sched, [A, D, S, Z, SIM_Q], device, name="sim_quantize")
+        func(a, d, s, z, q)
 
-    # Compile the simulated quantize function.
-    sched = te.create_schedule([SIM_Q.op])
-    func = tvm.build(sched, [A, D, S, Z, SIM_Q], 'llvm', name="sim_quantize")
-    func(a, d, s, z, q)
+        # Check correctness against the true qnn output.
+        tvm.testing.assert_allclose(q.asnumpy(), real_q_out.asnumpy().astype('float32'))
 
-    # Check correctness against the true qnn output.
-    tvm.testing.assert_allclose(q.asnumpy(), real_q_out.asnumpy().astype('float32'))
+    for target, ctx in tvm.testing.enabled_targets():
+        check_device(target, ctx)
 
 
 def test_simulated_quantize():

From 477d244ca69cbe15c22ae052af44bf3e563aa89b Mon Sep 17 00:00:00 2001
From: Ubuntu
 <jwfromm@jwfromm-cpu-dev.itxhlkosmouevgkdrmwxfbs5qh.xx.internal.cloudapp.net>
Date: Mon, 8 Mar 2021 04:58:05 +0000
Subject: [PATCH 04/21] Stuck on typerel problem.

---
 include/tvm/relay/qnn/attrs.h                 |  12 +
 python/tvm/relay/qnn/op/__init__.py           |   2 +-
 python/tvm/relay/qnn/op/_qnn.py               |  32 ++
 python/tvm/relay/qnn/op/qnn.py                |  36 +++
 python/tvm/topi/nn/qnn.py                     |  12 +-
 src/relay/op/Assign                           | 273 ++++++++++++++++++
 src/relay/qnn/op/simulated_quantize.cc        |  83 ++++++
 .../relay/test_op_qnn_simulated_quantize.py   |  92 ++++++
 8 files changed, 535 insertions(+), 7 deletions(-)
 create mode 100644 python/tvm/relay/qnn/op/_qnn.py
 create mode 100644 src/relay/op/Assign
 create mode 100644 src/relay/qnn/op/simulated_quantize.cc
 create mode 100644 tests/python/relay/test_op_qnn_simulated_quantize.py

diff --git a/include/tvm/relay/qnn/attrs.h b/include/tvm/relay/qnn/attrs.h
index c5213fe07471..f0280a90c604 100644
--- a/include/tvm/relay/qnn/attrs.h
+++ b/include/tvm/relay/qnn/attrs.h
@@ -75,6 +75,18 @@ struct QuantizeAttrs : public tvm::AttrsNode<QuantizeAttrs> {
   }
 };
 
+struct SimulatedQuantizeAttrs : public tvm::AttrsNode<SimulatedQuantizeAttrs> {
+  int axis;
+
+  TVM_DECLARE_ATTRS(SimulatedQuantizeAttrs, "relay.attrs.SimulatedQuantizeAttrs") {
+    TVM_ATTR_FIELD(axis)
+        .describe(
+            "The output channel axis for channel wise quantization. Default value is -1,"
+            "which corresponds to the last axis.")
+        .set_default(-1);
+  }
+};
+
 /*! \brief Attribute for dequantize operator */
 struct DequantizeAttrs : public tvm::AttrsNode<DequantizeAttrs> {
   int axis;
diff --git a/python/tvm/relay/qnn/op/__init__.py b/python/tvm/relay/qnn/op/__init__.py
index 6d66e12eeafc..848409360a9d 100644
--- a/python/tvm/relay/qnn/op/__init__.py
+++ b/python/tvm/relay/qnn/op/__init__.py
@@ -19,4 +19,4 @@
 from __future__ import absolute_import as _abs
 from .qnn import *
 from .op import register_qnn_legalize
-from . import legalizations, layout_conversions
+from . import _qnn, legalizations, layout_conversions
diff --git a/python/tvm/relay/qnn/op/_qnn.py b/python/tvm/relay/qnn/op/_qnn.py
new file mode 100644
index 000000000000..e66234fc9bc7
--- /dev/null
+++ b/python/tvm/relay/qnn/op/_qnn.py
@@ -0,0 +1,32 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# pylint: disable=invalid-name, unused-argument, len-as-condition
+"""QNN operator feature registration"""
+
+from tvm import topi, relay
+
+from ...op.op import register_compute, register_shape_func
+from ...op.op import register_broadcast_schedule, register_injective_schedule
+from ...op.op import register_pattern, OpPattern
+
+@register_compute("qnn.simulated_quantize")
+def simulated_quantize_compute(attrs, inputs, output_type):
+    assert len(inputs) == 5
+    return [topi.nn.simulated_quantize(inputs[0], inputs[1], inputs[2], inputs[3], inputs[4], axis=attrs.get_int("axis"))]
+
+register_injective_schedule("qnn.simulated_quantize")
+register_pattern("qnn.simulated_quantize", OpPattern.ELEMWISE)
diff --git a/python/tvm/relay/qnn/op/qnn.py b/python/tvm/relay/qnn/op/qnn.py
index a5892f331f06..9fe8455009ea 100644
--- a/python/tvm/relay/qnn/op/qnn.py
+++ b/python/tvm/relay/qnn/op/qnn.py
@@ -18,9 +18,11 @@
 """QNN dialect operators."""
 
 from __future__ import absolute_import as _abs
+from tvm import relay
 from tvm.relay.expr import Tuple, TupleWrapper
 from tvm.relay.op.nn.utils import get_pad_tuple2d
 from . import _make
+from tvm.topi.nn.qnn import *
 from ... import op as reg
 from ...op import OpPattern
 
@@ -118,7 +120,41 @@ def quantize(data, output_scale, output_zero_point, axis=-1, out_dtype="int8"):
     return _make.quantize(data, output_scale, output_zero_point, axis, out_dtype)
 
 
+<<<<<<< HEAD
 def dequantize(data, input_scale, input_zero_point, axis=-1):
+=======
+def simulated_quantize(data, output_scale, output_zero_point, axis=-1, out_dtype="int8"):
+    r"""Simulated Quantize op
+    Mimics the quantize op but has more flexibility in valid inputs and always
+    outputs float32. This can be useful for calibrating or training a quantized network.
+
+    Parameters
+    ----------
+    data : tvm.relay.Expr
+        The input tensor to be quantized. Can be of type float32.
+    output_zero_point : tvm.relay.Expr
+        The output zero_point.
+    output_scale : tvm.relay.Expr
+        The output scale.
+    axis : int
+        The channel axis for quantization. Default value is -1 which corresponds to the last axis.
+    out_dtype : string or tvm.relay.Expr
+        A string or tensor indicating which datatype to quantize to. Uses
+
+    Returns
+    -------
+    result : tvm.relay.Expr
+        The computed result.
+    """
+    # Convert string dtype to a constant if needed.
+    if isinstance(out_dtype, str):
+        type_code = SQNN_DTYPE_TO_CODE[out_dtype]
+        out_dtype = relay.const([type_code], dtype='int32')
+    return _make.simulated_quantize(data, out_dtype, output_scale, output_zero_point, axis)
+
+
+def dequantize(data, input_scale, input_zero_point, axis=-1, out_dtype="float32"):
+>>>>>>> Stuck on typerel problem.
     r"""Dequantize op
     This operator takes quantized int8 and unit8 as input and produces
     dequantized float32 as output. The output shape is the same as input shape. The input
diff --git a/python/tvm/topi/nn/qnn.py b/python/tvm/topi/nn/qnn.py
index 995fc1c53c59..813111e86c11 100644
--- a/python/tvm/topi/nn/qnn.py
+++ b/python/tvm/topi/nn/qnn.py
@@ -94,24 +94,24 @@ def _dispatch_sim_qnn(value):
         int8_value = te.compute(
             data.shape,
             lambda *indices: tir.if_then_else(
-                out_dtype[0] == SQNN_INT8,
-                _compute_intn(SQNN_CODE_TO_DTYPE[SQNN_INT8], value, *indices),
+                out_dtype[0] == SQNN_DTYPE_TO_CODE['int8'],
+                _compute_intn('int8', value, *indices),
                 fp32_value[indices],
             ),
         )
         uint8_value = te.compute(
             data.shape,
             lambda *indices: tir.if_then_else(
-                out_dtype[0] == SQNN_UINT8,
-                _compute_intn(SQNN_CODE_TO_DTYPE[SQNN_UINT8], value, *indices),
+                out_dtype[0] == SQNN_DTYPE_TO_CODE['uint8'],
+                _compute_intn('uint8', value, *indices),
                 int8_value[indices],
             ),
         )
         int32_value = te.compute(
             data.shape,
             lambda *indices: tir.if_then_else(
-                out_dtype[0] == SQNN_INT32,
-                _compute_intn(SQNN_CODE_TO_DTYPE[SQNN_INT32], value, *indices),
+                out_dtype[0] == SQNN_DTYPE_TO_CODE['int32'],
+                _compute_intn('int32', value, *indices),
                 uint8_value[indices],
             ),
         )
diff --git a/src/relay/op/Assign b/src/relay/op/Assign
new file mode 100644
index 000000000000..2651fb5b68e9
--- /dev/null
+++ b/src/relay/op/Assign
@@ -0,0 +1,273 @@
+algorithm/argsort.cc:  reporter->Assign(types[1], TensorType(data->shape, param->dtype));
+algorithm/sort.cc:  reporter->Assign(types[1], TensorType(data->shape, data->dtype));
+algorithm/topk.cc:    reporter->Assign(types[1], TupleType({values_ty, indices_ty}));
+algorithm/topk.cc:    reporter->Assign(types[1], values_ty);
+algorithm/topk.cc:    reporter->Assign(types[1], indices_ty);
+type_relations.cc:    reporter->Assign(types[i], types[0]);
+type_relations.cc:        reporter->GetDiagCtx().Emit(Diagnostic::Error(t0->span)
+type_relations.cc:      reporter->Assign(
+type_relations.cc:        reporter->GetDiagCtx().Emit(Diagnostic::Error(t0->span)
+type_relations.cc:      reporter->Assign(types[2], ConcreteBroadcast(GetRef<TensorType>(t0), GetRef<TensorType>(t1),
+type_relations.cc:    reporter->Assign(types[1], out_type);
+type_relations.cc:  reporter->Assign(types[1], TensorType(rank_shape, param->dtype));
+vision/nms.cc:  reporter->Assign(types[2], TupleType(Array<Type>(fields)));
+vision/nms.cc:    reporter->Assign(types[5], TupleType(Array<Type>(fields)));
+vision/nms.cc:    reporter->Assign(types[5], TensorType(dshape, data->dtype));
+vision/multibox_op.cc:  reporter->Assign(types[1], TensorType(oshape, data->dtype));
+vision/multibox_op.cc:  ICHECK(reporter->AssertEQ(cls_shape[2], anchor_shape[1])) << "Number of anchors mismatch found";
+vision/multibox_op.cc:  ICHECK(reporter->AssertEQ(cls_shape[2] * 4, loc_shape[1])) << "# anchors mismatch with # loc.";
+vision/multibox_op.cc:  ICHECK(reporter->Assert(anchor_shape[1] > 0)) << "Number of anchors must > 0.";
+vision/multibox_op.cc:  ICHECK(reporter->AssertEQ(anchor_shape[2], 4));
+vision/multibox_op.cc:  reporter->Assign(types[3], TupleType(Array<Type>(fields)));
+vision/rcnn_op.cc:  reporter->Assign(types[2], TensorType(oshape, data->dtype));
+vision/rcnn_op.cc:  reporter->Assign(types[2], TensorType(oshape, data->dtype));
+vision/rcnn_op.cc:  ICHECK(reporter->AssertEQ(im_info->shape[1], 3));
+vision/rcnn_op.cc:  reporter->Assign(types[3], TensorType(oshape, cls_prob->dtype));
+vision/yolo.cc:  reporter->Assign(types[1], TensorType(oshape, data->dtype));
+random/kernel.cc:  reporter->Assign(types[0], ThreefryKeyType());
+random/kernel.cc:  reporter->Assign(types[1],
+random/kernel.cc:  reporter->Assign(types[0], ThreefryKeyType());
+random/kernel.cc:  reporter->Assign(types[1], TupleType({ThreefryKeyType(), ThreefryKeyType()}));
+image/dilation2d.cc:  reporter->Assign(types[2], TensorType(oshape, out_dtype));
+image/resize.cc:  reporter->Assign(types[1], TensorType(layout_converter.BackwardShape(oshape), out_dtype));
+image/resize.cc:  reporter->Assign(types[1], TensorType(layout_converter.BackwardShape(oshape), out_dtype));
+image/resize.cc:  reporter->Assign(types[3], TensorType(bshape, out_dtype));
+image/grid_sample.cc:  ICHECK(data->shape.size() == 3U && reporter->AssertEQ(data->shape[1], 2) &&
+image/grid_sample.cc:         reporter->AssertEQ(data->shape[2], 3))
+image/grid_sample.cc:  reporter->Assign(types[1], TensorType(oshape, data->dtype));
+image/grid_sample.cc:  reporter->Assign(types[2], TensorType(layout_converter.BackwardShape(oshape), data->dtype));
+tensor/transform.cc:  reporter->Assign(types[1], TensorType(data->shape, param->dtype));
+tensor/transform.cc:  reporter->Assign(types[2], TensorType(data->shape, dtype_like->dtype));
+tensor/transform.cc:  reporter->Assign(types[1], TensorType(oshape, data->dtype));
+tensor/transform.cc:          reporter->AssertEQ(first->shape[j], e->shape[j]))
+tensor/transform.cc:  reporter->Assign(types[1], TensorType(oshape, dtype));
+tensor/transform.cc:  reporter->Assign(types[1], TensorType(oshape, data->dtype));
+tensor/transform.cc:  reporter->Assign(types[1], TensorType(oshape, data->dtype));
+tensor/transform.cc:  reporter->Assign(types[1],
+tensor/transform.cc:    ICHECK(reporter->AssertEQ(data->Size(), output_type->Size()))
+tensor/transform.cc:  reporter->Assign(types[2], output_type);
+tensor/transform.cc:  reporter->Assign(types[1], TensorType(result_shape, DataType::Int(32)));
+tensor/transform.cc:  reporter->Assign(types[3], TensorType(data->shape, data->dtype));
+tensor/transform.cc:  reporter->Assign(types[3], TensorType(data->shape, data->dtype));
+tensor/transform.cc:    reporter->AssertEQ(indices->shape[i + 1], data->shape[i]);
+tensor/transform.cc:    reporter->AssertEQ(data->shape[i - mdim->value + kdim], oshape[i]);
+tensor/transform.cc:  reporter->Assign(types[2], TensorType(oshape, data->dtype));
+tensor/transform.cc:    reporter->Assign(types[2], TensorType(oshape, data->dtype));
+tensor/transform.cc:  reporter->Assign(types[2], TensorType(oshape, data->dtype));
+tensor/transform.cc:  reporter->Assign(types[1], TensorType(oshape, out_dtype));
+tensor/transform.cc:  reporter->Assign(types[0], TensorType(oshape, out_dtype));
+tensor/transform.cc:  reporter->Assign(types[2], TensorType(data->shape, data->dtype));
+tensor/transform.cc:  reporter->Assign(types[0], types[1]);
+tensor/transform.cc:  reporter->Assign(types[1], types[2]);
+tensor/transform.cc:  reporter->Assign(types[2], TensorType({}, attrs->dtype));
+tensor/transform.cc:    reporter->Assign(types[3], TensorType({num_elem}, attrs->dtype));
+tensor/transform.cc:    reporter->Assign(types[3], TensorType({Any()}, attrs->dtype));
+tensor/transform.cc:  reporter->Assign(types[1], TensorType(oshape, data->dtype));
+tensor/transform.cc:  reporter->Assign(types[1], TupleType(Array<Type>(grids)));
+tensor/transform.cc:  reporter->Assign(types[1], TensorType(oshape, data->dtype));
+tensor/transform.cc:  reporter->Assign(types[1], types[0]);
+tensor/transform.cc:  ICHECK(reporter->Assert(seq_lengths->shape[0] == data->shape[batch_axis]))
+tensor/transform.cc:  reporter->Assign(types[2], types[0]);
+tensor/transform.cc:  reporter->Assign(types[3], ret_ty);
+tensor/transform.cc:  reporter->Assign(types[1], TensorType(result_shape, data->dtype));
+tensor/transform.cc:  reporter->Assign(types[2], types[1]);
+tensor/transform.cc:  reporter->Assign(types[2], TensorType(oshape, out_dtype));
+tensor/transform.cc:  reporter->Assign(types[1], TensorType(oshape, out_dtype));
+tensor/transform.cc:  reporter->Assign(types[2], types[1]);
+tensor/transform.cc:  reporter->Assign(types[1], TensorType(oshape, data->dtype));
+tensor/transform.cc:  reporter->Assign(types[5], types[0]);
+tensor/transform.cc:      ICHECK(reporter->Assert(indexmod(data->shape[axis], sections->value) ==
+tensor/transform.cc:    reporter->Assign(types[1], TupleType(Array<Type>(fields)));
+tensor/transform.cc:      ICHECK(reporter->Assert(Downcast<IndexExpr>(indices[i]) > begin))
+tensor/transform.cc:      ICHECK(reporter->Assert(begin < data->shape[axis]))
+tensor/transform.cc:    reporter->Assign(types[1], TupleType(Array<Type>(fields)));
+tensor/transform.cc:        ICHECK(reporter->Assert(oshape[i] <= dshape[i]))
+tensor/transform.cc:      ICHECK(reporter->Assert(oshape[axis] <= dshape[axis]))
+tensor/transform.cc:  reporter->Assign(types[2], TensorType(oshape, data->dtype));
+tensor/transform.cc:  reporter->Assign(types[1], TensorType(out_shape, data->dtype));
+tensor/transform.cc:  reporter->Assign(types[1], TensorType(dst_shape, data->dtype));
+tensor/transform.cc:      ICHECK(reporter->AssertEQ(indices->shape[i], data->shape[i]));
+tensor/transform.cc:  reporter->Assign(types[2], TensorType(oshape, data->dtype));
+tensor/transform.cc:  reporter->Assign(types[2], TensorType(oshape, data->dtype));
+tensor/transform.cc:  reporter->Assign(types[1], TensorType(valid_length_shape, valid_length->dtype));
+tensor/transform.cc:  reporter->Assign(types[2], types[0]);
+tensor/transform.cc:  reporter->Assign(types[3], TensorType(oshape, param->dtype));
+tensor/transform.cc:  reporter->Assign(types[2], TensorType(oshape, indices->dtype));
+tensor/transform.cc:  reporter->Assign(types[3], TensorType(oshape, sparse_values->dtype));
+tensor/transform.cc:  reporter->Assert(input->shape[i_ndims - 2] > -param->k1);
+tensor/transform.cc:  reporter->Assert(input->shape[i_ndims - 1] > param->k2);
+tensor/transform.cc:    reporter->AssertEQ(input->shape[i], diagonal->shape[i]);
+tensor/transform.cc:    reporter->AssertEQ(diagonal->shape[d_ndims - 2], param->k2 - param->k1 + 1);
+tensor/transform.cc:    reporter->AssertEQ(input->shape[d_ndims - 2], diagonal->shape[d_ndims - 2]);
+tensor/transform.cc:  reporter->AssertEQ(diagonal->shape[d_ndims - 1], max_diag_len);
+tensor/transform.cc:  reporter->Assign(types[2], TensorType(input->shape, input->dtype));
+tensor/transform.cc:  reporter->Assign(types[1], TensorType(oshape, data->dtype));
+tensor/transform.cc:    reporter->Assign(types[1], TensorType(data->shape, dtype));
+tensor/transform.cc:    reporter->Assign(types[1], TensorType({prod}, dtype));
+tensor/transform.h:    reporter->GetDiagCtx().EmitFatal(
+tensor/transform.h:        Diagnostic::Error(reporter->GetSpan())
+tensor/transform.h:    reporter->GetDiagCtx().EmitFatal(Diagnostic::Error(reporter->GetSpan())
+tensor/transform.h:        if (reporter->AssertEQ(non_any[0], non_any[k])) continue;
+tensor/transform.h:  reporter->Assign(types[1], rtype);
+tensor/reduce.cc:    ICHECK(reporter->Assert(
+tensor/reduce.cc:  reporter->Assign(types[1], TensorType(oshape, DataType::Int(32)));
+tensor/reduce.cc:  reporter->Assign(types[1], TensorType(oshape, data->dtype));
+tensor/reduce.cc:  reporter->Assign(types[2], TensorType(oshape, data->dtype));
+tensor/unary.cc:  reporter->Assign(types[1], TensorType({}, param->dtype));
+memory/memory.cc:  auto mod = reporter->GetModule();
+memory/memory.cc:  reporter->Assign(types[2], storage);
+memory/memory.cc:  auto mod = reporter->GetModule();
+memory/memory.cc:  reporter->Assign(types[0], storage);
+memory/memory.cc:  reporter->Assign(types[3], alloc_type);
+memory/memory.cc:  reporter->Assign(types[1], TupleType::Empty());
+nn/nn.h:      reporter->Assign(types[1], TensorType(wshape, weight_dtype));
+nn/nn.h:      ICHECK(reporter->AssertEQ(data->shape[data->shape.size() - 1], weight->shape[1]))
+nn/nn.h:  reporter->Assign(types[2], TensorType(oshape, out_dtype));
+nn/nn.h:  reporter->Assign(types[2], TensorType(oshape, out_dtype));
+nn/convolution.h:    reporter->Assign(types[1], TensorType(wshape, weight_dtype));
+nn/convolution.h:      ICHECK(reporter->AssertEQ(param->kernel_size[0], wshape[2]))
+nn/convolution.h:      ICHECK(reporter->AssertEQ(param->channels, wshape[0]))
+nn/convolution.h:      ICHECK(reporter->AssertEQ(dshape_ncw[1], wshape[1]));
+nn/convolution.h:  reporter->Assign(types[2], TensorType(oshape, out_dtype));
+nn/convolution.h:    reporter->GetDiagCtx().Emit(
+nn/convolution.h:        Diagnostic::Error(reporter->GetSpan())
+nn/convolution.h:    reporter->GetDiagCtx().Emit(
+nn/convolution.h:        Diagnostic::Error(reporter->GetSpan())
+nn/convolution.h:    reporter->GetDiagCtx().Emit(
+nn/convolution.h:        Diagnostic::Error(reporter->GetSpan())
+nn/convolution.h:      reporter->GetDiagCtx().Emit(
+nn/convolution.h:          Diagnostic::Error(reporter->GetSpan())
+nn/convolution.h:      reporter->Assign(types[1], TensorType(wshape, weight_dtype));
+nn/convolution.h:      if (!reporter->AssertEQ(param->kernel_size[0], wshape[2])) {
+nn/convolution.h:        reporter->GetDiagCtx().Emit(Diagnostic::Error(reporter->GetSpan())
+nn/convolution.h:      if (!reporter->AssertEQ(param->kernel_size[1], wshape[3])) {
+nn/convolution.h:        reporter->GetDiagCtx().Emit(Diagnostic::Error(reporter->GetSpan())
+nn/convolution.h:    if (param->channels.defined() && !reporter->AssertEQ(param->channels, wshape[0])) {
+nn/convolution.h:      reporter->GetDiagCtx().Emit(
+nn/convolution.h:          Diagnostic::Error(reporter->GetSpan())
+nn/convolution.h:      if (!reporter->AssertEQ(indexdiv(dshape_nchw[1], param->groups), wshape[1])) {
+nn/convolution.h:        reporter->GetDiagCtx().Emit(Diagnostic::Error(reporter->GetSpan())
+nn/convolution.h:  reporter->Assign(types[2], TensorType(oshape, out_dtype));
+nn/convolution.h:      reporter->Assign(types[1], TensorType(wshape, weight_dtype));
+nn/convolution.h:      ICHECK(reporter->AssertEQ(param->kernel_size[0], wshape[2]) &&
+nn/convolution.h:             reporter->AssertEQ(param->kernel_size[1], wshape[3]) &&
+nn/convolution.h:             reporter->AssertEQ(param->kernel_size[2], wshape[4]))
+nn/convolution.h:      ICHECK(reporter->AssertEQ(param->channels, wshape[0]))
+nn/convolution.h:      ICHECK(reporter->AssertEQ(indexdiv(dshape_ncdhw[1], param->groups), wshape[1]));
+nn/convolution.h:  reporter->Assign(types[2], TensorType(oshape, out_dtype));
+nn/convolution.h:  reporter->Assign(types[1], TensorType(Array<IndexExpr>(oshape), data->dtype));
+nn/convolution.h:  reporter->Assign(types[1], TensorType(oshape, weight->dtype));
+nn/convolution.h:  reporter->Assign(types[1], TensorType(oshape, data->dtype));
+nn/convolution.h:  reporter->Assign(types[1], TensorType(Array<IndexExpr>(oshape), out_dtype));
+nn/convolution.h:  reporter->Assign(types[2], TensorType(oshape, out_dtype));
+nn/convolution.h:  reporter->Assign(types[2], TensorType(oshape, out_dtype));
+nn/convolution.h:  reporter->Assign(types[2], TensorType(oshape, out_dtype));
+nn/convolution.h:    reporter->Assign(types[1], TensorType(wshape, data->dtype));
+nn/convolution.h:      ICHECK(reporter->AssertEQ(param->kernel_size[0], wshape[2]))
+nn/convolution.h:      ICHECK(reporter->AssertEQ(param->channels, wshape[1]))
+nn/convolution.h:      ICHECK(reporter->AssertEQ(indexdiv(dshape_ncw[1], param->groups), wshape[0]));
+nn/convolution.h:  reporter->Assign(types[2], TensorType(oshape, out_dtype));
+nn/convolution.h:    reporter->Assign(types[1], TensorType(wshape, data->dtype));
+nn/convolution.h:      ICHECK(reporter->AssertEQ(param->kernel_size[0], wshape[2]) &&
+nn/convolution.h:             reporter->AssertEQ(param->kernel_size[1], wshape[3]) &&
+nn/convolution.h:             reporter->AssertEQ(param->kernel_size[2], wshape[4]))
+nn/convolution.h:      ICHECK(reporter->AssertEQ(param->channels, wshape[1]))
+nn/convolution.h:      ICHECK(reporter->AssertEQ(indexdiv(dshape_ncdhw[1], param->groups), wshape[0]));
+nn/convolution.h:  reporter->Assign(types[2], TensorType(oshape, out_dtype));
+nn/convolution.h:    reporter->Assign(types[1], TensorType(wshape, data->dtype));
+nn/convolution.h:      ICHECK(reporter->AssertEQ(param->kernel_size[0], wshape[2]) &&
+nn/convolution.h:             reporter->AssertEQ(param->kernel_size[1], wshape[3]))
+nn/convolution.h:      ICHECK(reporter->AssertEQ(param->channels, wshape[1]))
+nn/convolution.h:      ICHECK(reporter->AssertEQ(indexdiv(dshape_nchw[1], param->groups), wshape[0]));
+nn/convolution.h:  reporter->Assign(types[2], TensorType(oshape, out_dtype));
+nn/convolution.h:    reporter->GetDiagCtx().Emit(
+nn/convolution.h:        Diagnostic::Error(reporter->GetSpan())
+nn/convolution.h:    reporter->GetDiagCtx().Emit(
+nn/convolution.h:        Diagnostic::Error(reporter->GetSpan())
+nn/convolution.h:    reporter->GetDiagCtx().Emit(
+nn/convolution.h:        Diagnostic::Error(reporter->GetSpan())
+nn/convolution.h:    reporter->Assign(types[2], TensorType(wshape, data->dtype));
+nn/convolution.h:      ICHECK(reporter->AssertEQ(param->kernel_size[0], wshape[2]) &&
+nn/convolution.h:             reporter->AssertEQ(param->kernel_size[1], wshape[3]))
+nn/convolution.h:      ICHECK(reporter->AssertEQ(param->channels, wshape[0]))
+nn/convolution.h:      ICHECK(reporter->AssertEQ(indexdiv(dshape_nchw[1], param->groups), wshape[1]));
+nn/convolution.h:  reporter->Assign(types[1], TensorType(offset_shape, data->dtype));
+nn/convolution.h:  reporter->Assign(types[3], TensorType(oshape, out_dtype));
+nn/upsampling.cc:  reporter->Assign(types[1], TensorType(layout_converter.BackwardShape(oshape), data->dtype));
+nn/upsampling.cc:  reporter->Assign(types[1], TensorType(layout_converter.BackwardShape(oshape), data->dtype));
+nn/sparse.cc:      reporter->Assign(types[4], TensorType(oshape, weight->dtype));
+nn/sparse.cc:      reporter->Assign(types[4], TensorType(oshape, weight->dtype));
+nn/sparse.cc:      reporter->Assign(types[4], TensorType(oshape, data->dtype));
+nn/sparse.cc:      reporter->Assign(types[4], TensorType(oshape, data->dtype));
+nn/sparse.cc:  reporter->Assign(types[3], TupleType(Array<Type>(output_types)));
+nn/nn.cc:  reporter->Assign(types[1], TensorType({data->shape[axis]}, data->dtype));
+nn/nn.cc:  reporter->Assign(types[2], types[0]);
+nn/nn.cc:  reporter->Assert(buffer_axis < buffer->shape.size());
+nn/nn.cc:      reporter->AssertEQ(input->shape[i], buffer->shape[i]);
+nn/nn.cc:  reporter->Assert(input->shape[buffer_axis] < buffer->shape[buffer_axis]);
+nn/nn.cc:  reporter->Assign(types[2], TensorType(oshape, buffer->dtype));
+nn/nn.cc:  reporter->Assign(types[1], TensorType(alpha_shape, data->dtype));
+nn/nn.cc:  reporter->Assign(types[2], TensorType(data->shape, data->dtype));
+nn/nn.cc:  reporter->Assign(types[1], TensorType(oshape, data->dtype));
+nn/nn.cc:  reporter->Assign(types[1], TupleType(Array<Type>({ret_type, ret_type})));
+nn/nn.cc:  reporter->Assign(types[1], TensorType({axis_size}, data->dtype));
+nn/nn.cc:  reporter->Assign(types[2], TensorType({axis_size}, data->dtype));
+nn/nn.cc:  reporter->Assign(types[3], TensorType({axis_size}, data->dtype));
+nn/nn.cc:  reporter->Assign(types[4], TensorType({axis_size}, data->dtype));
+nn/nn.cc:  reporter->Assign(types[5], TupleType(Array<Type>(fields)));
+nn/nn.cc:  reporter->Assign(types[1], TensorType({data->shape[axis]}, data->dtype));
+nn/nn.cc:  reporter->Assign(types[2], TensorType({data->shape[axis]}, data->dtype));
+nn/nn.cc:  reporter->Assign(types[3], TensorType(data->shape, data->dtype));
+nn/nn.cc:  reporter->Assign(types[1], TensorType({data->shape[axis]}, data->dtype));
+nn/nn.cc:  reporter->Assign(types[2], TensorType({data->shape[axis]}, data->dtype));
+nn/nn.cc:  reporter->Assign(types[3], TensorType(data->shape, data->dtype));
+nn/nn.cc:  reporter->Assign(types[1], TensorType({data->shape[axis]}, data->dtype));
+nn/nn.cc:  reporter->Assign(types[2], TensorType({data->shape[axis]}, data->dtype));
+nn/nn.cc:  reporter->Assign(types[3], TensorType(data->shape, data->dtype));
+nn/nn.cc:    ICHECK(reporter->AssertEQ(x->shape[0], y_shape[0]) || reporter->AssertEQ(x->shape[0], 1) ||
+nn/nn.cc:           reporter->AssertEQ(y_shape[0], 1))
+nn/nn.cc:    ICHECK(reporter->AssertEQ(x->shape[2], y_shape[2]))
+nn/nn.cc:  reporter->Assign(types[2], TensorType(oshape, x->dtype));
+nn/nn.cc:  ICHECK(reporter->AssertEQ(x->shape[0], y->shape[0]))
+nn/nn.cc:  ICHECK(reporter->AssertEQ(x->shape[1], y->shape[1]))
+nn/nn.cc:  reporter->Assign(types[2], TensorType({}, x->dtype));
+nn/nn.cc:  reporter->Assign(types[1], TensorType(Array<IndexExpr>(oshape), x->dtype));
+nn/nn.cc:  reporter->Assign(types[1], TensorType(layout_converter.BackwardShape(oshape), data->dtype));
+nn/nn.cc:  reporter->Assign(types[1], TensorType(layout_converter.BackwardShape(oshape), data->dtype));
+nn/nn.cc:  reporter->Assign(types[1], TensorType(Array<IndexExpr>(out_shape), input->dtype));
+nn/nn.cc:  reporter->Assign(types[1], TensorType(Array<IndexExpr>(out_shape), input->dtype));
+nn/correlation.cc:  reporter->Assign(types[2], TensorType(oshape, data1->dtype));
+nn/pad.cc:  reporter->Assign(types[1], TensorType(Array<IndexExpr>(oshape), data->dtype));
+nn/pad.cc:  reporter->Assign(types[1], TensorType(Array<IndexExpr>(oshape), data->dtype));
+nn/bitserial.cc:  reporter->Assign(types[1], TensorType(out_shape, pack_type));
+nn/bitserial.cc:  reporter->Assign(types[2], TensorType(oshape, out_dtype));
+nn/bitserial.cc:  reporter->Assign(types[2], TensorType(oshape, out_dtype));
+nn/pooling.cc:  reporter->Assign(types[1], TensorType(oshape, data->dtype));
+nn/pooling.cc:  reporter->Assign(types[1], TensorType(oshape, data->dtype));
+nn/pooling.cc:  reporter->Assign(types[1], TensorType(oshape, data->dtype));
+nn/pooling.cc:  reporter->Assign(types[1], TensorType(oshape, data->dtype));
+nn/pooling.cc:  reporter->Assign(types[2], types[1]);
+nn/pooling.cc:  reporter->Assign(types[1], TensorType(oshape, data->dtype));
+nn/pooling.cc:  reporter->Assign(types[1], TensorType(oshape, data->dtype));
+vm/vm.cc:  reporter->Assign(types[1], input_type);
+vm/vm.cc:  reporter->Assign(types[2], output_type);
+vm/vm.cc:  reporter->Assign(types[3], TupleType::Empty());
+vm/vm.cc:  reporter->Assign(ex_input, GetRef<Type>(input_type));
+vm/vm.cc:  reporter->Assign(ex_output, GetRef<Type>(output_type));
+vm/vm.cc:  reporter->Assign(types[3], TupleType::Empty());
+vm/vm.cc:  reporter->Assign(types[2], TensorType(reshape_attrs->newshape, tt->dtype));
+dyn/algorithm/topk.cc:    reporter->Assign(types[2], TupleType({values_ty, indices_ty}));
+dyn/algorithm/topk.cc:    reporter->Assign(types[2], values_ty);
+dyn/algorithm/topk.cc:    reporter->Assign(types[2], indices_ty);
+dyn/image/resize.cc:  reporter->Assign(types[2], TensorType(layout_converter.BackwardShape(oshape), out_dtype));
+dyn/tensor/transform.cc:  reporter->Assign(types[2], TensorType(oshape, data->dtype));
+dyn/tensor/transform.cc:  reporter->Assign(types[2], TensorType(oshape, data->dtype));
+dyn/tensor/transform.cc:  reporter->Assign(types[2], TensorType(oshape, out_dtype));
+dyn/tensor/transform.cc:  reporter->Assign(types[1], TensorType(oshape, out_dtype));
+dyn/tensor/transform.cc:  reporter->Assign(types[4], TensorType(oshape, param->dtype));
+dyn/tensor/transform.cc:  reporter->Assign(types[2], TensorType(oshape, out_dtype));
+dyn/tensor/transform.cc:  reporter->Assign(types[4], TensorType(oshape, data->dtype));
+dyn/tensor/transform.cc:  reporter->Assign(types[4], TensorType(oshape, sparse_values->dtype));
+dyn/nn/upsampling.cc:  reporter->Assign(types[3], TensorType(oshape, data->dtype));
+dyn/nn/upsampling.cc:  reporter->Assign(types[4], TensorType(oshape, data->dtype));
+dyn/nn/pad.cc:  reporter->Assign(types[3], TensorType(oshape, data->dtype));
diff --git a/src/relay/qnn/op/simulated_quantize.cc b/src/relay/qnn/op/simulated_quantize.cc
new file mode 100644
index 000000000000..b97cef83fa2d
--- /dev/null
+++ b/src/relay/qnn/op/simulated_quantize.cc
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file src/relay/qnn/op/quantize.cc
+ * \brief QNN dequantize operator. Dequantize operator converts from quantized
+ * domain to unquantized domain.
+ */
+
+#include <tvm/relay/analysis.h>
+#include <tvm/relay/op_attr_types.h>
+#include <tvm/relay/qnn/attrs.h>
+
+#include "../../transforms/pattern_utils.h"
+#include "../utils.h"
+
+namespace tvm {
+namespace relay {
+namespace qnn {
+
+TVM_REGISTER_NODE_TYPE(SimulatedQuantizeAttrs);
+
+bool SimulatedQuantizeRel(const Array<Type>& types, int num_inputs, const Attrs& attrs,
+                          const TypeReporter& reporter) {
+  // types = [data_type, datatype_type, scale_type, zp_type, ret_type]
+  ICHECK_EQ(types.size(), 5);
+  const auto* data = types[0].as<TensorTypeNode>();
+  const auto* dtype = types[1].as<TensorTypeNode>();
+
+  if ((data == nullptr) or (dtype == nullptr)) {
+    return false;
+  }
+
+  // assign output type
+  const Array<tvm::PrimExpr> oshape = data->shape;
+  reporter->Assign(types[4], TensorType(oshape, data->dtype));
+  return true;
+}
+
+Expr MakeSimulatedQuantize(Expr data, Expr out_dtype, Expr output_scale, Expr output_zero_point, int axis) {
+  auto attrs = make_object<SimulatedQuantizeAttrs>();
+  attrs->axis = axis;
+  static const Op& op = Op::Get("qnn.simulated_quantize");
+  auto out = Call(op, {data, out_dtype, output_scale, output_zero_point}, Attrs(attrs), {});
+  
+  return out;
+}
+
+RELAY_REGISTER_OP("qnn.simulated_quantize")
+    .describe(R"code(Simulates the functionality of qnn.quantize but allows more flexible
+    dynamic input type conversion and always outputs float values.
+)code" TVM_ADD_FILELINE)
+    .set_attrs_type<SimulatedQuantizeAttrs>()
+    .set_num_inputs(4)
+    .add_argument("data", "Tensor", "The tensor to quantize.")
+    .add_argument("out_dtype", "Tensor", "A code corresponding to the type of quantization to apply.")
+    .add_argument("output_scale", "Tensor", "The quantization scale of the output tensor.")
+    .add_argument("output_zero_point", "Tensor",
+                  "The quantization zero_point of the output tensor.")
+    .set_support_level(11)
+    .add_type_rel("SimulatedQuantize", SimulatedQuantizeRel);
+
+TVM_REGISTER_GLOBAL("relay.qnn.op._make.simulated_quantize").set_body_typed(MakeSimulatedQuantize);
+
+}  // namespace qnn
+}  // namespace relay
+}  // namespace tvm
diff --git a/tests/python/relay/test_op_qnn_simulated_quantize.py b/tests/python/relay/test_op_qnn_simulated_quantize.py
new file mode 100644
index 000000000000..b19d0bd2f0b0
--- /dev/null
+++ b/tests/python/relay/test_op_qnn_simulated_quantize.py
@@ -0,0 +1,92 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import tvm
+from tvm import te
+import numpy as np
+from tvm import relay
+from tvm.contrib import graph_runtime
+from tvm.relay.testing import run_infer_type
+
+
+def quantize_test_driver(in_dtype, quant_args, axis, out_dtype, in_data):
+    shape = in_data.shape
+    input_data = relay.var("input_data", shape=shape, dtype=in_dtype)
+    output_zero_point = relay.const(quant_args["out_zero_point"])
+    output_scale = relay.const(quant_args["out_scale"])
+    quantized_output = relay.qnn.op.quantize(
+        input_data,
+        output_scale=output_scale,
+        output_zero_point=output_zero_point,
+        axis=axis,
+        out_dtype=out_dtype,
+    )
+    mod = relay.Function(relay.analysis.free_vars(quantized_output), quantized_output)
+    mod = tvm.IRModule.from_expr(mod)
+    with tvm.transform.PassContext(opt_level=3):
+        graph, lib, params = relay.build(mod, "llvm", params=None)
+    rt_mod = graph_runtime.create(graph, lib, ctx=tvm.cpu(0))
+    rt_mod.set_input(input_data=in_data)
+    rt_mod.set_input(**params)
+    rt_mod.run()
+    res = rt_mod.get_output(0).asnumpy()
+    return res
+
+
+def test_float32_to_uint8():
+    data = (
+        np.array([-63.5, -63, -62.5, -62, -61.5, 62, 62.5, 63, 63.5, 64])
+        .astype("float32")
+        .reshape((2, 5))
+    )
+    scale_np = np.float32(0.5)
+    zp_np = np.int32(127)
+    quant_args = {"out_zero_point": zp_np, "out_scale": scale_np}
+    q_out = quantize_test_driver(
+        in_dtype="float32",
+        quant_args=quant_args,
+        axis=-1,
+        out_dtype="uint8",
+        in_data=data,
+    )
+    input_data = relay.var("input_data", shape=data.shape, dtype='float32')
+    scale = relay.var("scale", shape=[relay.Any()], dtype="float32")
+    zp = relay.var("zp", shape=[relay.Any()], dtype="int32")
+    dtype = relay.var("dtype", shape=[1], dtype='int32')
+    sim_q_out = relay.qnn.op.simulated_quantize(
+        input_data,
+        scale,
+        zp,
+        axis=-1,
+        out_dtype=dtype,
+    )
+    mod = tvm.IRModule.from_expr(sim_q_out)
+    print(mod)
+    print(relay.transform.InferType()(mod))
+    with tvm.transform.PassContext(opt_level=3):
+        graph, lib, params = relay.build(mod, "llvm", params=None)
+    rt_mod = graph_runtime.create(graph, lib, ctx=tvm.cpu(0))
+    rt_mod.set_input(input_data=data, scale=scale_np, zp=zp_np)
+    rt_mod.set_input(**params)
+    rt_mod.run()
+    res = rt_mod.get_output(0).asnumpy()
+
+    print(sim_q_out)
+
+
+if __name__ == "__main__":
+    test_float32_to_uint8()
\ No newline at end of file

From 06a6b185e5b4a1f415ae48e475a06b424baf285c Mon Sep 17 00:00:00 2001
From: Ubuntu
 <jwfromm@jwfromm-cpu-dev.itxhlkosmouevgkdrmwxfbs5qh.xx.internal.cloudapp.net>
Date: Mon, 8 Mar 2021 20:17:50 +0000
Subject: [PATCH 05/21] Relay integration fully working.

---
 python/tvm/relay/qnn/op/_qnn.py               |  7 +-
 python/tvm/relay/qnn/op/qnn.py                |  4 +
 python/tvm/topi/nn/qnn.py                     |  1 -
 src/relay/qnn/op/simulated_quantize.cc        |  5 +-
 .../relay/test_op_qnn_simulated_quantize.py   | 98 ++++++++++++++-----
 5 files changed, 82 insertions(+), 33 deletions(-)

diff --git a/python/tvm/relay/qnn/op/_qnn.py b/python/tvm/relay/qnn/op/_qnn.py
index e66234fc9bc7..8f478704e887 100644
--- a/python/tvm/relay/qnn/op/_qnn.py
+++ b/python/tvm/relay/qnn/op/_qnn.py
@@ -25,8 +25,9 @@
 
 @register_compute("qnn.simulated_quantize")
 def simulated_quantize_compute(attrs, inputs, output_type):
-    assert len(inputs) == 5
-    return [topi.nn.simulated_quantize(inputs[0], inputs[1], inputs[2], inputs[3], inputs[4], axis=attrs.get_int("axis"))]
+    assert len(inputs) == 4
+    return [topi.nn.simulated_quantize(inputs[0], inputs[1], inputs[2], inputs[3], axis=attrs.get_int("axis"))]
 
 register_injective_schedule("qnn.simulated_quantize")
-register_pattern("qnn.simulated_quantize", OpPattern.ELEMWISE)
+register_pattern("qnn.simulated_quantize", OpPattern.OPAQUE)
+
diff --git a/python/tvm/relay/qnn/op/qnn.py b/python/tvm/relay/qnn/op/qnn.py
index 9fe8455009ea..ba61d296a8aa 100644
--- a/python/tvm/relay/qnn/op/qnn.py
+++ b/python/tvm/relay/qnn/op/qnn.py
@@ -150,6 +150,10 @@ def simulated_quantize(data, output_scale, output_zero_point, axis=-1, out_dtype
     if isinstance(out_dtype, str):
         type_code = SQNN_DTYPE_TO_CODE[out_dtype]
         out_dtype = relay.const([type_code], dtype='int32')
+    # Wrap reshapes around input tensors to guarantee shape compatibility.
+    out_dtype = relay.op.reshape(out_dtype, [1])
+    output_scale = relay.op.reshape(output_scale, [-1])
+    output_zero_point= relay.op.reshape(output_zero_point, [-1])
     return _make.simulated_quantize(data, out_dtype, output_scale, output_zero_point, axis)
 
 
diff --git a/python/tvm/topi/nn/qnn.py b/python/tvm/topi/nn/qnn.py
index 813111e86c11..c8d8c9638838 100644
--- a/python/tvm/topi/nn/qnn.py
+++ b/python/tvm/topi/nn/qnn.py
@@ -62,7 +62,6 @@ def simulated_quantize(data, out_dtype, output_scale=None, output_zero_point=Non
         The channel axis for quantization. Default value is -1 which corresponds to the last axis.
 
     """
-
     # Since all simulated outputs are in float32, we can just return the input tensor for fp32.
     def _compute_fp32(value, *indices):
         return value[indices]
diff --git a/src/relay/qnn/op/simulated_quantize.cc b/src/relay/qnn/op/simulated_quantize.cc
index b97cef83fa2d..275604b93533 100644
--- a/src/relay/qnn/op/simulated_quantize.cc
+++ b/src/relay/qnn/op/simulated_quantize.cc
@@ -48,8 +48,7 @@ bool SimulatedQuantizeRel(const Array<Type>& types, int num_inputs, const Attrs&
   }
 
   // assign output type
-  const Array<tvm::PrimExpr> oshape = data->shape;
-  reporter->Assign(types[4], TensorType(oshape, data->dtype));
+  reporter->Assign(types[4], TensorType(data->shape, data->dtype));
   return true;
 }
 
@@ -74,7 +73,7 @@ RELAY_REGISTER_OP("qnn.simulated_quantize")
     .add_argument("output_zero_point", "Tensor",
                   "The quantization zero_point of the output tensor.")
     .set_support_level(11)
-    .add_type_rel("SimulatedQuantize", SimulatedQuantizeRel);
+    .add_type_rel("QNNSimulatedQuantize", SimulatedQuantizeRel);
 
 TVM_REGISTER_GLOBAL("relay.qnn.op._make.simulated_quantize").set_body_typed(MakeSimulatedQuantize);
 
diff --git a/tests/python/relay/test_op_qnn_simulated_quantize.py b/tests/python/relay/test_op_qnn_simulated_quantize.py
index b19d0bd2f0b0..7b4e780bdd94 100644
--- a/tests/python/relay/test_op_qnn_simulated_quantize.py
+++ b/tests/python/relay/test_op_qnn_simulated_quantize.py
@@ -21,6 +21,8 @@
 from tvm import relay
 from tvm.contrib import graph_runtime
 from tvm.relay.testing import run_infer_type
+from tvm.runtime.vm import VirtualMachine
+from tvm.topi.nn.qnn import SQNN_DTYPE_TO_CODE
 
 
 def quantize_test_driver(in_dtype, quant_args, axis, out_dtype, in_data):
@@ -47,14 +49,26 @@ def quantize_test_driver(in_dtype, quant_args, axis, out_dtype, in_data):
     return res
 
 
-def test_float32_to_uint8():
-    data = (
-        np.array([-63.5, -63, -62.5, -62, -61.5, 62, 62.5, 63, 63.5, 64])
-        .astype("float32")
-        .reshape((2, 5))
+def build_simulated_quantize(input_data, scale, zp, dtype, axis=-1):
+    sim_q = relay.qnn.op.simulated_quantize(
+        input_data,
+        scale,
+        zp,
+        axis=axis,
+        out_dtype=dtype,
     )
+    mod = tvm.IRModule.from_expr(sim_q)
+    with tvm.transform.PassContext(opt_level=3):
+        vm_exec = relay.vm.compile(mod, "llvm", params=None)
+    vm = VirtualMachine(vm_exec, tvm.cpu(0))
+    return vm
+
+
+def test_float32_to_uint8_simple():
+    data = np.random.uniform(low=-128, high=127, size=[2, 5]).astype('float32')
     scale_np = np.float32(0.5)
     zp_np = np.int32(127)
+    dtype_np = np.int32(SQNN_DTYPE_TO_CODE['uint8'])
     quant_args = {"out_zero_point": zp_np, "out_scale": scale_np}
     q_out = quantize_test_driver(
         in_dtype="float32",
@@ -64,29 +78,61 @@ def test_float32_to_uint8():
         in_data=data,
     )
     input_data = relay.var("input_data", shape=data.shape, dtype='float32')
-    scale = relay.var("scale", shape=[relay.Any()], dtype="float32")
-    zp = relay.var("zp", shape=[relay.Any()], dtype="int32")
-    dtype = relay.var("dtype", shape=[1], dtype='int32')
-    sim_q_out = relay.qnn.op.simulated_quantize(
-        input_data,
-        scale,
-        zp,
-        axis=-1,
-        out_dtype=dtype,
+    scale = relay.var("scale", shape=[])
+    zp = relay.var("zp", shape=[])
+    dtype = relay.var("dtype", shape=[])
+    vm = build_simulated_quantize(input_data, scale, zp, dtype)
+    sim_q_out = vm.invoke("main", input_data=data, scale=scale_np, zp=zp_np, dtype=dtype_np)
+    np.testing.assert_equal(sim_q_out.asnumpy(), q_out)
+
+
+def test_dynamic_channels():
+    # Compile simulated quantize once but support either per-channel or scalar params.
+    #data = np.random.uniform(low=-64, high=64, size=[2, 5]).astype('float32')
+    data = (
+        np.array([-63.5, -63, -62.5, -62, -61.5, 30, 31, 31.5, 31.75, 32])
+        .astype("float32")
+        .reshape((2, 5))
     )
-    mod = tvm.IRModule.from_expr(sim_q_out)
-    print(mod)
-    print(relay.transform.InferType()(mod))
-    with tvm.transform.PassContext(opt_level=3):
-        graph, lib, params = relay.build(mod, "llvm", params=None)
-    rt_mod = graph_runtime.create(graph, lib, ctx=tvm.cpu(0))
-    rt_mod.set_input(input_data=data, scale=scale_np, zp=zp_np)
-    rt_mod.set_input(**params)
-    rt_mod.run()
-    res = rt_mod.get_output(0).asnumpy()
+    # Test scalar qnn params.
+    scale_np = np.asarray([0.5]).astype('float32')
+    zp_np = np.asarray([127]).astype('int32')
+    dtype_np = np.int32(SQNN_DTYPE_TO_CODE['uint8'])
+    quant_args = {"out_zero_point": zp_np[0], "out_scale": scale_np[0]}
+    q_out = quantize_test_driver(
+        in_dtype="float32",
+        quant_args=quant_args,
+        axis=0,
+        out_dtype="uint8",
+        in_data=data,
+    )
+    # Create variables with undefined shape and run with scalar inputs.
+    input_data = relay.var("input_data", shape=data.shape, dtype='float32')
+    scale = relay.var("scale", shape=[relay.Any()], dtype='float32')
+    zp = relay.var("zp", shape=[relay.Any()], dtype='int32')
+    dtype = relay.var("dtype", shape=[])
+    vm = build_simulated_quantize(input_data, scale, zp, dtype, axis=0)
+    sim_q_out = vm.invoke("main", input_data=data, scale=scale_np, zp=zp_np, dtype=dtype_np)
+    np.testing.assert_equal(sim_q_out.asnumpy(), q_out)
 
-    print(sim_q_out)
+    # Now get the perchannel quantize output and compare without recompiling.
+    scale_np = np.array([0.5, 0.25]).astype("float32")
+    zp_np = np.array([127, 123]).astype("int32")
+    
+    # Get the reference quantize output.
+    quant_args = {"out_zero_point": zp_np, "out_scale": scale_np}
+    q_out = quantize_test_driver(
+        in_dtype="float32",
+        quant_args=quant_args,
+        axis=0,
+        out_dtype="uint8",
+        in_data=data,
+    )
+    # Run the simulated quantize without recompiling and confirm results match.
+    sim_q_out = vm.invoke("main", input_data=data, scale=scale_np, zp=zp_np, dtype=dtype_np)
+    np.testing.assert_equal(sim_q_out.asnumpy(), q_out)
 
 
 if __name__ == "__main__":
-    test_float32_to_uint8()
\ No newline at end of file
+    test_float32_to_uint8_simple()
+    test_dynamic_channels()
\ No newline at end of file

From 7ec929b82c9712781b0b7290988a8f3e7e807158 Mon Sep 17 00:00:00 2001
From: Ubuntu
 <jwfromm@jwfromm-cpu-dev.itxhlkosmouevgkdrmwxfbs5qh.xx.internal.cloudapp.net>
Date: Mon, 8 Mar 2021 20:27:53 +0000
Subject: [PATCH 06/21] Simulated quantize totally finished.

---
 include/tvm/relay/qnn/attrs.h                 |  6 ++
 python/tvm/relay/qnn/op/_qnn.py               |  9 ++-
 python/tvm/relay/qnn/op/qnn.py                |  5 +-
 python/tvm/topi/nn/qnn.py                     | 12 +--
 src/relay/qnn/op/quantize.cc                  |  2 +-
 src/relay/qnn/op/simulated_quantize.cc        |  8 +-
 .../relay/test_op_qnn_simulated_quantize.py   | 77 ++++++++++++++-----
 tests/python/topi/python/test_topi_qnn.py     | 40 +++++-----
 8 files changed, 104 insertions(+), 55 deletions(-)

diff --git a/include/tvm/relay/qnn/attrs.h b/include/tvm/relay/qnn/attrs.h
index f0280a90c604..96e57be18af2 100644
--- a/include/tvm/relay/qnn/attrs.h
+++ b/include/tvm/relay/qnn/attrs.h
@@ -97,6 +97,12 @@ struct DequantizeAttrs : public tvm::AttrsNode<DequantizeAttrs> {
             "The channel axis for channel wise dequantization. Default value is -1,"
             "which corresponds to the last axis.")
         .set_default(-1);
+<<<<<<< HEAD
+=======
+    TVM_ATTR_FIELD(out_dtype)
+        .describe("The datatype we are dequantizing to (float32 or int32). Defaults to float32.")
+        .set_default(DataType::Float(32));
+>>>>>>> Simulated quantize totally finished.
   }
 };
 
diff --git a/python/tvm/relay/qnn/op/_qnn.py b/python/tvm/relay/qnn/op/_qnn.py
index 8f478704e887..fb585efd0468 100644
--- a/python/tvm/relay/qnn/op/_qnn.py
+++ b/python/tvm/relay/qnn/op/_qnn.py
@@ -23,11 +23,16 @@
 from ...op.op import register_broadcast_schedule, register_injective_schedule
 from ...op.op import register_pattern, OpPattern
 
+
 @register_compute("qnn.simulated_quantize")
 def simulated_quantize_compute(attrs, inputs, output_type):
     assert len(inputs) == 4
-    return [topi.nn.simulated_quantize(inputs[0], inputs[1], inputs[2], inputs[3], axis=attrs.get_int("axis"))]
+    return [
+        topi.nn.simulated_quantize(
+            inputs[0], inputs[1], inputs[2], inputs[3], axis=attrs.get_int("axis")
+        )
+    ]
+
 
 register_injective_schedule("qnn.simulated_quantize")
 register_pattern("qnn.simulated_quantize", OpPattern.OPAQUE)
-
diff --git a/python/tvm/relay/qnn/op/qnn.py b/python/tvm/relay/qnn/op/qnn.py
index ba61d296a8aa..69953336017a 100644
--- a/python/tvm/relay/qnn/op/qnn.py
+++ b/python/tvm/relay/qnn/op/qnn.py
@@ -27,6 +27,7 @@
 from ...op import OpPattern
 
 
+
 def requantize(
     data,
     input_scale,
@@ -149,11 +150,11 @@ def simulated_quantize(data, output_scale, output_zero_point, axis=-1, out_dtype
     # Convert string dtype to a constant if needed.
     if isinstance(out_dtype, str):
         type_code = SQNN_DTYPE_TO_CODE[out_dtype]
-        out_dtype = relay.const([type_code], dtype='int32')
+        out_dtype = relay.const([type_code], dtype="int32")
     # Wrap reshapes around input tensors to guarantee shape compatibility.
     out_dtype = relay.op.reshape(out_dtype, [1])
     output_scale = relay.op.reshape(output_scale, [-1])
-    output_zero_point= relay.op.reshape(output_zero_point, [-1])
+    output_zero_point = relay.op.reshape(output_zero_point, [-1])
     return _make.simulated_quantize(data, out_dtype, output_scale, output_zero_point, axis)
 
 
diff --git a/python/tvm/topi/nn/qnn.py b/python/tvm/topi/nn/qnn.py
index c8d8c9638838..c98ea1b8e8d4 100644
--- a/python/tvm/topi/nn/qnn.py
+++ b/python/tvm/topi/nn/qnn.py
@@ -93,24 +93,24 @@ def _dispatch_sim_qnn(value):
         int8_value = te.compute(
             data.shape,
             lambda *indices: tir.if_then_else(
-                out_dtype[0] == SQNN_DTYPE_TO_CODE['int8'],
-                _compute_intn('int8', value, *indices),
+                out_dtype[0] == SQNN_DTYPE_TO_CODE["int8"],
+                _compute_intn("int8", value, *indices),
                 fp32_value[indices],
             ),
         )
         uint8_value = te.compute(
             data.shape,
             lambda *indices: tir.if_then_else(
-                out_dtype[0] == SQNN_DTYPE_TO_CODE['uint8'],
-                _compute_intn('uint8', value, *indices),
+                out_dtype[0] == SQNN_DTYPE_TO_CODE["uint8"],
+                _compute_intn("uint8", value, *indices),
                 int8_value[indices],
             ),
         )
         int32_value = te.compute(
             data.shape,
             lambda *indices: tir.if_then_else(
-                out_dtype[0] == SQNN_DTYPE_TO_CODE['int32'],
-                _compute_intn('int32', value, *indices),
+                out_dtype[0] == SQNN_DTYPE_TO_CODE["int32"],
+                _compute_intn("int32", value, *indices),
                 uint8_value[indices],
             ),
         )
diff --git a/src/relay/qnn/op/quantize.cc b/src/relay/qnn/op/quantize.cc
index 5c4705546f46..cd6ac8c84f4a 100644
--- a/src/relay/qnn/op/quantize.cc
+++ b/src/relay/qnn/op/quantize.cc
@@ -102,7 +102,7 @@ Expr QuantizeLower(const Expr& input_tensor, const Expr& output_scale,
 
   // Wrap axis from negative to positive if needed.
   if (axis < 0) {
-    axis = ((int) n_dim) + axis;
+    axis = ((int)n_dim) + axis;
   }
 
   auto expanded_output_scale = output_scale;
diff --git a/src/relay/qnn/op/simulated_quantize.cc b/src/relay/qnn/op/simulated_quantize.cc
index 275604b93533..98823c917190 100644
--- a/src/relay/qnn/op/simulated_quantize.cc
+++ b/src/relay/qnn/op/simulated_quantize.cc
@@ -52,12 +52,13 @@ bool SimulatedQuantizeRel(const Array<Type>& types, int num_inputs, const Attrs&
   return true;
 }
 
-Expr MakeSimulatedQuantize(Expr data, Expr out_dtype, Expr output_scale, Expr output_zero_point, int axis) {
+Expr MakeSimulatedQuantize(Expr data, Expr out_dtype, Expr output_scale, Expr output_zero_point,
+                           int axis) {
   auto attrs = make_object<SimulatedQuantizeAttrs>();
   attrs->axis = axis;
   static const Op& op = Op::Get("qnn.simulated_quantize");
   auto out = Call(op, {data, out_dtype, output_scale, output_zero_point}, Attrs(attrs), {});
-  
+
   return out;
 }
 
@@ -68,7 +69,8 @@ RELAY_REGISTER_OP("qnn.simulated_quantize")
     .set_attrs_type<SimulatedQuantizeAttrs>()
     .set_num_inputs(4)
     .add_argument("data", "Tensor", "The tensor to quantize.")
-    .add_argument("out_dtype", "Tensor", "A code corresponding to the type of quantization to apply.")
+    .add_argument("out_dtype", "Tensor",
+                  "A code corresponding to the type of quantization to apply.")
     .add_argument("output_scale", "Tensor", "The quantization scale of the output tensor.")
     .add_argument("output_zero_point", "Tensor",
                   "The quantization zero_point of the output tensor.")
diff --git a/tests/python/relay/test_op_qnn_simulated_quantize.py b/tests/python/relay/test_op_qnn_simulated_quantize.py
index 7b4e780bdd94..b1a8f58ba005 100644
--- a/tests/python/relay/test_op_qnn_simulated_quantize.py
+++ b/tests/python/relay/test_op_qnn_simulated_quantize.py
@@ -64,20 +64,20 @@ def build_simulated_quantize(input_data, scale, zp, dtype, axis=-1):
     return vm
 
 
-def test_float32_to_uint8_simple():
-    data = np.random.uniform(low=-128, high=127, size=[2, 5]).astype('float32')
+def test_simulated_qnn_simple(dtype):
+    data = np.random.uniform(low=-128, high=127, size=[2, 5]).astype("float32")
     scale_np = np.float32(0.5)
     zp_np = np.int32(127)
-    dtype_np = np.int32(SQNN_DTYPE_TO_CODE['uint8'])
+    dtype_np = np.int32(SQNN_DTYPE_TO_CODE[dtype])
     quant_args = {"out_zero_point": zp_np, "out_scale": scale_np}
     q_out = quantize_test_driver(
         in_dtype="float32",
         quant_args=quant_args,
         axis=-1,
-        out_dtype="uint8",
+        out_dtype=dtype,
         in_data=data,
     )
-    input_data = relay.var("input_data", shape=data.shape, dtype='float32')
+    input_data = relay.var("input_data", shape=data.shape, dtype="float32")
     scale = relay.var("scale", shape=[])
     zp = relay.var("zp", shape=[])
     dtype = relay.var("dtype", shape=[])
@@ -88,16 +88,11 @@ def test_float32_to_uint8_simple():
 
 def test_dynamic_channels():
     # Compile simulated quantize once but support either per-channel or scalar params.
-    #data = np.random.uniform(low=-64, high=64, size=[2, 5]).astype('float32')
-    data = (
-        np.array([-63.5, -63, -62.5, -62, -61.5, 30, 31, 31.5, 31.75, 32])
-        .astype("float32")
-        .reshape((2, 5))
-    )
+    data = np.random.uniform(low=-64, high=64, size=[2, 5]).astype("float32")
     # Test scalar qnn params.
-    scale_np = np.asarray([0.5]).astype('float32')
-    zp_np = np.asarray([127]).astype('int32')
-    dtype_np = np.int32(SQNN_DTYPE_TO_CODE['uint8'])
+    scale_np = np.asarray([0.5]).astype("float32")
+    zp_np = np.asarray([127]).astype("int32")
+    dtype_np = np.int32(SQNN_DTYPE_TO_CODE["uint8"])
     quant_args = {"out_zero_point": zp_np[0], "out_scale": scale_np[0]}
     q_out = quantize_test_driver(
         in_dtype="float32",
@@ -107,9 +102,9 @@ def test_dynamic_channels():
         in_data=data,
     )
     # Create variables with undefined shape and run with scalar inputs.
-    input_data = relay.var("input_data", shape=data.shape, dtype='float32')
-    scale = relay.var("scale", shape=[relay.Any()], dtype='float32')
-    zp = relay.var("zp", shape=[relay.Any()], dtype='int32')
+    input_data = relay.var("input_data", shape=data.shape, dtype="float32")
+    scale = relay.var("scale", shape=[relay.Any()], dtype="float32")
+    zp = relay.var("zp", shape=[relay.Any()], dtype="int32")
     dtype = relay.var("dtype", shape=[])
     vm = build_simulated_quantize(input_data, scale, zp, dtype, axis=0)
     sim_q_out = vm.invoke("main", input_data=data, scale=scale_np, zp=zp_np, dtype=dtype_np)
@@ -118,7 +113,7 @@ def test_dynamic_channels():
     # Now get the perchannel quantize output and compare without recompiling.
     scale_np = np.array([0.5, 0.25]).astype("float32")
     zp_np = np.array([127, 123]).astype("int32")
-    
+
     # Get the reference quantize output.
     quant_args = {"out_zero_point": zp_np, "out_scale": scale_np}
     q_out = quantize_test_driver(
@@ -133,6 +128,48 @@ def test_dynamic_channels():
     np.testing.assert_equal(sim_q_out.asnumpy(), q_out)
 
 
+def test_dynamic_dtype():
+    # Compile simulated quantize once but support any type of quantization.
+    data = np.random.uniform(low=-64, high=64, size=[2, 5]).astype("float32")
+    # Test scalar float32 to uint8.
+    scale_np = np.asarray([0.5]).astype("float32")
+    zp_np = np.asarray([127]).astype("int32")
+    dtype_np = np.int32(SQNN_DTYPE_TO_CODE["uint8"])
+    quant_args = {"out_zero_point": zp_np[0], "out_scale": scale_np[0]}
+    q_out = quantize_test_driver(
+        in_dtype="float32",
+        quant_args=quant_args,
+        axis=-1,
+        out_dtype="uint8",
+        in_data=data,
+    )
+    # Create variables with undefined shape and run with scalar inputs.
+    input_data = relay.var("input_data", shape=data.shape, dtype="float32")
+    scale = relay.var("scale", shape=[relay.Any()], dtype="float32")
+    zp = relay.var("zp", shape=[relay.Any()], dtype="int32")
+    dtype = relay.var("dtype", shape=[])
+    vm = build_simulated_quantize(input_data, scale, zp, dtype, axis=0)
+    sim_q_out = vm.invoke("main", input_data=data, scale=scale_np, zp=zp_np, dtype=dtype_np)
+    np.testing.assert_equal(sim_q_out.asnumpy(), q_out)
+
+    # Now test float32 to int32 compilation.
+    # Get the reference quantize output.
+    q_out = quantize_test_driver(
+        in_dtype="float32",
+        quant_args=quant_args,
+        axis=-1,
+        out_dtype="int32",
+        in_data=data,
+    )
+    # Run the simulated quantize without recompiling and confirm results match.
+    dtype_np = np.int32(SQNN_DTYPE_TO_CODE["int32"])
+    sim_q_out = vm.invoke("main", input_data=data, scale=scale_np, zp=zp_np, dtype=dtype_np)
+    np.testing.assert_equal(sim_q_out.asnumpy(), q_out)
+
+
 if __name__ == "__main__":
-    test_float32_to_uint8_simple()
-    test_dynamic_channels()
\ No newline at end of file
+    test_simulated_qnn_simple("uint8")
+    test_simulated_qnn_simple("int8")
+    test_simulated_qnn_simple("int32")
+    test_dynamic_channels()
+    test_dynamic_dtype()
diff --git a/tests/python/topi/python/test_topi_qnn.py b/tests/python/topi/python/test_topi_qnn.py
index 92e4c180a723..df7682026ce1 100644
--- a/tests/python/topi/python/test_topi_qnn.py
+++ b/tests/python/topi/python/test_topi_qnn.py
@@ -22,22 +22,20 @@
 import tvm.topi.testing
 
 
-def verify_simulated_quantize(
-    data_shape, out_dtype, channels, axis
-):
+def verify_simulated_quantize(data_shape, out_dtype, channels, axis):
     # Create placeholder variables for all qnn inputs.
-    A = te.placeholder(data_shape, name='value', dtype='float32')
-    D = te.placeholder([1], name='dtype', dtype='int32')
-    S = te.placeholder([te.size_var("scale_dim")], name="scale", dtype='float32')
-    Z = te.placeholder([te.size_var("zp_dim")], name="zp", dtype='int32')
+    A = te.placeholder(data_shape, name="value", dtype="float32")
+    D = te.placeholder([1], name="dtype", dtype="int32")
+    S = te.placeholder([te.size_var("scale_dim")], name="scale", dtype="float32")
+    Z = te.placeholder([te.size_var("zp_dim")], name="zp", dtype="int32")
     SIM_Q = topi.nn.simulated_quantize(A, D, output_scale=S, output_zero_point=Z, axis=axis)
 
     # Create random numpy values to assign to inputs.
-    a_np = np.random.uniform(size=data_shape).astype('float32')
-    d_np = np.asarray([topi.nn.SQNN_DTYPE_TO_CODE[out_dtype]]).astype('int32')
-    s_np = np.random.uniform(low=1e-4, high=.1, size=channels).astype('float32')
-    z_np = np.random.uniform(low=-10, high=10, size=channels).astype('int32')
-    q_np = np.zeros(shape=data_shape, dtype='float32')
+    a_np = np.random.uniform(size=data_shape).astype("float32")
+    d_np = np.asarray([topi.nn.SQNN_DTYPE_TO_CODE[out_dtype]]).astype("int32")
+    s_np = np.random.uniform(low=1e-4, high=0.1, size=channels).astype("float32")
+    z_np = np.random.uniform(low=-10, high=10, size=channels).astype("int32")
+    q_np = np.zeros(shape=data_shape, dtype="float32")
 
     def check_device(device, ctx):
         # Wrap the numpy arrays in nd arrays.
@@ -49,7 +47,7 @@ def check_device(device, ctx):
 
         # Construct equivalent relay graph.
         per_channel = channels[0] != 1
-        a_var = relay.var('a', shape=data_shape, dtype='float32')
+        a_var = relay.var("a", shape=data_shape, dtype="float32")
         if per_channel:
             s_var = relay.const(s_np)
             z_var = relay.const(z_np)
@@ -62,7 +60,7 @@ def check_device(device, ctx):
 
         # Get real qnn quantize output.
         m = graph_runtime.GraphModule(lib["default"](ctx))
-        m.set_input('a', a_np)
+        m.set_input("a", a_np)
 
         m.run()
         real_q_out = m.get_output(0)
@@ -74,19 +72,19 @@ def check_device(device, ctx):
         func(a, d, s, z, q)
 
         # Check correctness against the true qnn output.
-        tvm.testing.assert_allclose(q.asnumpy(), real_q_out.asnumpy().astype('float32'))
+        tvm.testing.assert_allclose(q.asnumpy(), real_q_out.asnumpy().astype("float32"))
 
     for target, ctx in tvm.testing.enabled_targets():
         check_device(target, ctx)
 
 
 def test_simulated_quantize():
-    verify_simulated_quantize([1], 'int8', [1], -1)
-    verify_simulated_quantize([2, 5], 'int8', [5], 1)
-    verify_simulated_quantize([1, 32, 32, 32], 'int8', [32], -1)
-    verify_simulated_quantize([1, 32, 32, 32], 'uint8', [32], -2)
-    verify_simulated_quantize([2, 5], 'int32', [5], 1)
+    verify_simulated_quantize([1], "int8", [1], -1)
+    verify_simulated_quantize([2, 5], "int8", [5], 1)
+    verify_simulated_quantize([1, 32, 32, 32], "int8", [32], -1)
+    verify_simulated_quantize([1, 32, 32, 32], "uint8", [32], -2)
+    verify_simulated_quantize([2, 5], "int32", [5], 1)
+
 
 if __name__ == "__main__":
     test_simulated_quantize()
-

From a5b3211c5899b452146628cabec6f386c7e6b56d Mon Sep 17 00:00:00 2001
From: Ubuntu
 <jwfromm@jwfromm-cpu-dev.itxhlkosmouevgkdrmwxfbs5qh.xx.internal.cloudapp.net>
Date: Mon, 8 Mar 2021 23:09:26 +0000
Subject: [PATCH 07/21] Change dtype to be a scalar rather than tensor.

---
 python/tvm/relay/qnn/op/qnn.py            | 5 ++---
 python/tvm/topi/nn/qnn.py                 | 8 ++++----
 tests/python/topi/python/test_topi_qnn.py | 4 ++--
 3 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/python/tvm/relay/qnn/op/qnn.py b/python/tvm/relay/qnn/op/qnn.py
index 69953336017a..148a9f55e20b 100644
--- a/python/tvm/relay/qnn/op/qnn.py
+++ b/python/tvm/relay/qnn/op/qnn.py
@@ -150,9 +150,8 @@ def simulated_quantize(data, output_scale, output_zero_point, axis=-1, out_dtype
     # Convert string dtype to a constant if needed.
     if isinstance(out_dtype, str):
         type_code = SQNN_DTYPE_TO_CODE[out_dtype]
-        out_dtype = relay.const([type_code], dtype="int32")
-    # Wrap reshapes around input tensors to guarantee shape compatibility.
-    out_dtype = relay.op.reshape(out_dtype, [1])
+        out_dtype = relay.const(type_code, dtype="int32")
+    # Wrap reshapes around qnn parameter tensors to guarantee shape compatibility.
     output_scale = relay.op.reshape(output_scale, [-1])
     output_zero_point = relay.op.reshape(output_zero_point, [-1])
     return _make.simulated_quantize(data, out_dtype, output_scale, output_zero_point, axis)
diff --git a/python/tvm/topi/nn/qnn.py b/python/tvm/topi/nn/qnn.py
index c98ea1b8e8d4..619f94dd1d3d 100644
--- a/python/tvm/topi/nn/qnn.py
+++ b/python/tvm/topi/nn/qnn.py
@@ -51,7 +51,7 @@ def simulated_quantize(data, out_dtype, output_scale=None, output_zero_point=Non
         value.
 
     output_scale: tvm.te.Tensor, optional
-        A 1-D tensor representing the scale to use when quantizing to integer datatypes.
+        A scalar tensor representing the scale to use when quantizing to integer datatypes.
         When it contains more than a single value, N must match the number of channels in data.
 
     output_zero_point: tvm.te.Tensor, optional
@@ -93,7 +93,7 @@ def _dispatch_sim_qnn(value):
         int8_value = te.compute(
             data.shape,
             lambda *indices: tir.if_then_else(
-                out_dtype[0] == SQNN_DTYPE_TO_CODE["int8"],
+                out_dtype.equal(SQNN_DTYPE_TO_CODE["int8"]),
                 _compute_intn("int8", value, *indices),
                 fp32_value[indices],
             ),
@@ -101,7 +101,7 @@ def _dispatch_sim_qnn(value):
         uint8_value = te.compute(
             data.shape,
             lambda *indices: tir.if_then_else(
-                out_dtype[0] == SQNN_DTYPE_TO_CODE["uint8"],
+                out_dtype.equal(SQNN_DTYPE_TO_CODE["uint8"]),
                 _compute_intn("uint8", value, *indices),
                 int8_value[indices],
             ),
@@ -109,7 +109,7 @@ def _dispatch_sim_qnn(value):
         int32_value = te.compute(
             data.shape,
             lambda *indices: tir.if_then_else(
-                out_dtype[0] == SQNN_DTYPE_TO_CODE["int32"],
+                out_dtype.equal(SQNN_DTYPE_TO_CODE["int32"]),
                 _compute_intn("int32", value, *indices),
                 uint8_value[indices],
             ),
diff --git a/tests/python/topi/python/test_topi_qnn.py b/tests/python/topi/python/test_topi_qnn.py
index df7682026ce1..d397b99070a8 100644
--- a/tests/python/topi/python/test_topi_qnn.py
+++ b/tests/python/topi/python/test_topi_qnn.py
@@ -25,14 +25,14 @@
 def verify_simulated_quantize(data_shape, out_dtype, channels, axis):
     # Create placeholder variables for all qnn inputs.
     A = te.placeholder(data_shape, name="value", dtype="float32")
-    D = te.placeholder([1], name="dtype", dtype="int32")
+    D = te.placeholder([], name="dtype", dtype="int32")
     S = te.placeholder([te.size_var("scale_dim")], name="scale", dtype="float32")
     Z = te.placeholder([te.size_var("zp_dim")], name="zp", dtype="int32")
     SIM_Q = topi.nn.simulated_quantize(A, D, output_scale=S, output_zero_point=Z, axis=axis)
 
     # Create random numpy values to assign to inputs.
     a_np = np.random.uniform(size=data_shape).astype("float32")
-    d_np = np.asarray([topi.nn.SQNN_DTYPE_TO_CODE[out_dtype]]).astype("int32")
+    d_np = np.int32(topi.nn.SQNN_DTYPE_TO_CODE[out_dtype])
     s_np = np.random.uniform(low=1e-4, high=0.1, size=channels).astype("float32")
     z_np = np.random.uniform(low=-10, high=10, size=channels).astype("int32")
     q_np = np.zeros(shape=data_shape, dtype="float32")

From 6a4745094a5fa05a20828a93dd23a29b542b6fcf Mon Sep 17 00:00:00 2001
From: Ubuntu
 <jwfromm@jwfromm-cpu-dev.itxhlkosmouevgkdrmwxfbs5qh.xx.internal.cloudapp.net>
Date: Mon, 8 Mar 2021 23:22:30 +0000
Subject: [PATCH 08/21] Undo change to quantize.

---
 src/relay/qnn/op/quantize.cc | 21 +++++++++------------
 1 file changed, 9 insertions(+), 12 deletions(-)

diff --git a/src/relay/qnn/op/quantize.cc b/src/relay/qnn/op/quantize.cc
index cd6ac8c84f4a..ae3d1b4a544e 100644
--- a/src/relay/qnn/op/quantize.cc
+++ b/src/relay/qnn/op/quantize.cc
@@ -50,18 +50,15 @@ bool QuantizeRel(const Array<Type>& types, int num_inputs, const Attrs& attrs,
       << "Input type should be one of float32 but was " << input_dtype;
 
   const auto* quantize_attrs = attrs.as<QuantizeAttrs>();
-
-  // Assign type to scale and zero point if they're channelwise.
-  if (data->shape.size() != 0) {
-    int axis = quantize_attrs->axis;
-    axis = (axis < 0) ? data->shape.size() + axis : axis;
-    ICHECK_LT(axis, static_cast<int>(data->shape.size()))
-        << "axis " << quantize_attrs->axis << " is out of range";
-    ICHECK_GE(axis, 0) << "axis " << quantize_attrs->axis << " is out of range";
-    // Check and assign types for scale and zero points.
-    AssignType(types[1], DataType::Float(32), data->shape[axis], reporter);  // scale
-    AssignType(types[2], DataType::Int(32), data->shape[axis], reporter);    // zero point
-  }
+  int axis = quantize_attrs->axis;
+  axis = (axis < 0) ? data->shape.size() + axis : axis;
+  ICHECK_LT(axis, static_cast<int>(data->shape.size()))
+      << "axis " << quantize_attrs->axis << " is out of range";
+  ICHECK_GE(axis, 0) << "axis " << quantize_attrs->axis << " is out of range";
+
+  // Check and assign types for scale and zero points.
+  AssignType(types[1], DataType::Float(32), data->shape[axis], reporter);  // scale
+  AssignType(types[2], DataType::Int(32), data->shape[axis], reporter);    // zero point
 
   const Array<tvm::PrimExpr> oshape = data->shape;
   const DataType out_dtype = quantize_attrs->out_dtype;

From 773f764c3b843790b4a8b9fa187de550fcb7a9d7 Mon Sep 17 00:00:00 2001
From: Ubuntu
 <jwfromm@jwfromm-cpu-dev.itxhlkosmouevgkdrmwxfbs5qh.xx.internal.cloudapp.net>
Date: Mon, 8 Mar 2021 23:23:33 +0000
Subject: [PATCH 09/21] formatting.

---
 python/tvm/relay/qnn/op/qnn.py | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/python/tvm/relay/qnn/op/qnn.py b/python/tvm/relay/qnn/op/qnn.py
index 148a9f55e20b..03b7395d264d 100644
--- a/python/tvm/relay/qnn/op/qnn.py
+++ b/python/tvm/relay/qnn/op/qnn.py
@@ -27,7 +27,6 @@
 from ...op import OpPattern
 
 
-
 def requantize(
     data,
     input_scale,
@@ -121,9 +120,6 @@ def quantize(data, output_scale, output_zero_point, axis=-1, out_dtype="int8"):
     return _make.quantize(data, output_scale, output_zero_point, axis, out_dtype)
 
 
-<<<<<<< HEAD
-def dequantize(data, input_scale, input_zero_point, axis=-1):
-=======
 def simulated_quantize(data, output_scale, output_zero_point, axis=-1, out_dtype="int8"):
     r"""Simulated Quantize op
     Mimics the quantize op but has more flexibility in valid inputs and always
@@ -158,7 +154,6 @@ def simulated_quantize(data, output_scale, output_zero_point, axis=-1, out_dtype
 
 
 def dequantize(data, input_scale, input_zero_point, axis=-1, out_dtype="float32"):
->>>>>>> Stuck on typerel problem.
     r"""Dequantize op
     This operator takes quantized int8 and unit8 as input and produces
     dequantized float32 as output. The output shape is the same as input shape. The input

From 769c6de6c56ae550cb68c75060a66d583426b897 Mon Sep 17 00:00:00 2001
From: Ubuntu
 <jwfromm@jwfromm-cpu-dev.itxhlkosmouevgkdrmwxfbs5qh.xx.internal.cloudapp.net>
Date: Mon, 8 Mar 2021 23:31:55 +0000
Subject: [PATCH 10/21] Fix attritubes.

---
 include/tvm/relay/qnn/attrs.h | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/include/tvm/relay/qnn/attrs.h b/include/tvm/relay/qnn/attrs.h
index 96e57be18af2..f0280a90c604 100644
--- a/include/tvm/relay/qnn/attrs.h
+++ b/include/tvm/relay/qnn/attrs.h
@@ -97,12 +97,6 @@ struct DequantizeAttrs : public tvm::AttrsNode<DequantizeAttrs> {
             "The channel axis for channel wise dequantization. Default value is -1,"
             "which corresponds to the last axis.")
         .set_default(-1);
-<<<<<<< HEAD
-=======
-    TVM_ATTR_FIELD(out_dtype)
-        .describe("The datatype we are dequantizing to (float32 or int32). Defaults to float32.")
-        .set_default(DataType::Float(32));
->>>>>>> Simulated quantize totally finished.
   }
 };
 

From e465038f5da65ed55031d0ec09be4228ca7c25b1 Mon Sep 17 00:00:00 2001
From: Ubuntu
 <jwfromm@jwfromm-cpu-dev.itxhlkosmouevgkdrmwxfbs5qh.xx.internal.cloudapp.net>
Date: Tue, 9 Mar 2021 00:44:13 +0000
Subject: [PATCH 11/21] Fix negative axis dequantize bug.

---
 src/relay/qnn/op/dequantize.cc | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/src/relay/qnn/op/dequantize.cc b/src/relay/qnn/op/dequantize.cc
index 724441e0c523..d29106e28513 100644
--- a/src/relay/qnn/op/dequantize.cc
+++ b/src/relay/qnn/op/dequantize.cc
@@ -53,7 +53,7 @@ bool DequantizeRel(const Array<Type>& types, int num_inputs, const Attrs& attrs,
 
   const auto* dequantize_attrs = attrs.as<DequantizeAttrs>();
   int axis = dequantize_attrs->axis;
-  axis = (axis == -1) ? data->shape.size() - 1 : axis;
+  axis = (axis < 0) ? data->shape.size() + axis : axis;
   ICHECK_LT(axis, static_cast<int>(data->shape.size()))
       << "axis " << dequantize_attrs->axis << " is out of range";
   ICHECK_GE(axis, 0) << "axis " << dequantize_attrs->axis << " is out of range";
@@ -81,7 +81,7 @@ Expr MakeDequantize(Expr data, Expr input_scale, Expr input_zero_point, int axis
 Expr DequantizeLower(const Expr& input_tensor, const Expr& input_scale,
                      const Expr& input_zero_point, const Array<tvm::relay::Type>& types,
                      const DequantizeAttrs* attrs) {
-  const auto axis = attrs->axis;
+  auto axis = attrs->axis;
 
   ICHECK_EQ(types.size(), 4);
   auto in_type = types[0];
@@ -92,6 +92,11 @@ Expr DequantizeLower(const Expr& input_tensor, const Expr& input_scale,
 
   size_t n_dim = input_shape.size();
 
+  // Wrap axis from negative to positive if needed.
+  if (axis < 0) {
+    axis = ((int)n_dim) + axis;
+  }
+
   // Expand scale and zero point if the input tensor is channel quantized
   auto expanded_input_scale = input_scale;
   if (!IsConstScalar(input_scale) && !IsScalarType(types[1])) {

From c81968d9d5660c1944ec6972ba94871cdb1558ab Mon Sep 17 00:00:00 2001
From: Ubuntu
 <jwfromm@jwfromm-cpu-dev.itxhlkosmouevgkdrmwxfbs5qh.xx.internal.cloudapp.net>
Date: Tue, 9 Mar 2021 00:50:47 +0000
Subject: [PATCH 12/21] Add topi simulated dequantize.

---
 python/tvm/relay/qnn/op/_qnn.py           |  2 +-
 python/tvm/relay/qnn/op/qnn.py            |  2 +-
 python/tvm/topi/nn/qnn.py                 | 70 ++++++++++++++++++++++-
 tests/python/topi/python/test_topi_qnn.py | 66 +++++++++++++++++++++
 4 files changed, 136 insertions(+), 4 deletions(-)

diff --git a/python/tvm/relay/qnn/op/_qnn.py b/python/tvm/relay/qnn/op/_qnn.py
index fb585efd0468..3194bab46430 100644
--- a/python/tvm/relay/qnn/op/_qnn.py
+++ b/python/tvm/relay/qnn/op/_qnn.py
@@ -35,4 +35,4 @@ def simulated_quantize_compute(attrs, inputs, output_type):
 
 
 register_injective_schedule("qnn.simulated_quantize")
-register_pattern("qnn.simulated_quantize", OpPattern.OPAQUE)
+register_pattern("qnn.simulated_quantize", OpPattern.ELEMWISE)
diff --git a/python/tvm/relay/qnn/op/qnn.py b/python/tvm/relay/qnn/op/qnn.py
index 03b7395d264d..3b6b350c66db 100644
--- a/python/tvm/relay/qnn/op/qnn.py
+++ b/python/tvm/relay/qnn/op/qnn.py
@@ -162,7 +162,7 @@ def dequantize(data, input_scale, input_zero_point, axis=-1, out_dtype="float32"
     Parameters
     ----------
     data : tvm.relay.Expr
-        The input tensor to be dequantized. Can be of type [int8, uint8].
+        The input tensor to be dequantized. Can be of type [int8, uint8, int32].
     input_zero_point : tvm.relay.Expr
         The input zero_point.
     input_scale : tvm.relay.Expr
diff --git a/python/tvm/topi/nn/qnn.py b/python/tvm/topi/nn/qnn.py
index 619f94dd1d3d..ab0bf9cf00e4 100644
--- a/python/tvm/topi/nn/qnn.py
+++ b/python/tvm/topi/nn/qnn.py
@@ -88,7 +88,7 @@ def _compute_intn(dtype, value, *indices):
     # Use an if chain to dynamically return the proper quantization based on the input datatype.
     # This allows the op to compile once but apply different quantization approaches
     # using a variable datatype input.
-    def _dispatch_sim_qnn(value):
+    def _dispatch_sim_quantize(value):
         fp32_value = te.compute(data.shape, lambda *indices: _compute_fp32(value, *indices))
         int8_value = te.compute(
             data.shape,
@@ -117,4 +117,70 @@ def _dispatch_sim_qnn(value):
 
         return int32_value
 
-    return te.compute(data.shape, lambda *indices: _dispatch_sim_qnn(data)[indices])
+    return te.compute(data.shape, lambda *indices: _dispatch_sim_quantize(data)[indices])
+
+
+@tvm.te.tag_scope(tag=topi.tag.ELEMWISE)
+def simulated_dequantize(data, out_dtype, output_scale=None, output_zero_point=None, axis=-1):
+    """Simulated QNN dequantize operator that mimics QNN outputs in floating point. The benefit
+    of this operator over true QNN quantize is that this operator allows dynamic datatype
+    selection and can operate on both per-channel and scalar scales and zero points while
+    QNN quantize requires both of these to be fixed at compile time.
+
+    Parameters
+    ----------
+    data: tvm.te.Tensor
+        An N-D input tensor to the operator.
+
+    out_dtype: tvm.te.Tensor
+        A 1-D variable that indicates which datatype to simulate quantization with. Use
+        SQNN_DTYPE_TO_CODE to convert a dtype string into the corresponding variable
+        value.
+
+    output_scale: tvm.te.Tensor, optional
+        A scalar tensor representing the scale to use when quantizing to integer datatypes.
+        When it contains more than a single value, N must match the number of channels in data.
+
+    output_zero_point: tvm.te.Tensor, optional
+        A 1-D tensor representing the zero point to use when quantizing to integer datatypes.
+        When it contains more than a single value, N must match the number of channels in data.
+
+    axis: int, optional
+        The channel axis for quantization. Default value is -1 which corresponds to the last axis.
+
+    """
+    # Since all simulated inputs are in float32, we can just return the input tensor for fp32.
+    def _compute_fp32(value, *indices):
+        return value[indices]
+
+    # Simulate dequantization for arbitrary integer datatypes. The computation for all datatypes is:
+    # DQ_output = (input - zero_point) * scale
+    def _compute_intn(value, *indices):
+        assert output_scale is not None and output_zero_point is not None
+        # Use indexmod to handle both scalar and per-channel QNN parameters.
+        scale_idx = tir.indexmod(indices[axis], topi.shape(output_scale)[0])
+        zp_idx = tir.indexmod(indices[axis], topi.shape(output_zero_point)[0])
+        return (value[indices] - output_zero_point[zp_idx]) * output_scale[scale_idx]
+
+    # Use an if chain to dynamically return the proper dequantization based on the input datatype.
+    # This allows the op to compile once but apply different quantization approaches
+    # using a variable datatype input.
+    def _dispatch_sim_dequantize(value):
+        fp32_value = te.compute(data.shape, lambda *indices: _compute_fp32(value, *indices))
+        intn_condition = tvm.te.any(
+            out_dtype.equal(SQNN_DTYPE_TO_CODE["int8"]),
+            out_dtype.equal(SQNN_DTYPE_TO_CODE["uint8"]),
+            out_dtype.equal(SQNN_DTYPE_TO_CODE["int32"]),
+        )
+        intn_value = te.compute(
+            data.shape,
+            lambda *indices: tir.if_then_else(
+                intn_condition,
+                _compute_intn(value, *indices),
+                fp32_value[indices],
+            ),
+        )
+
+        return intn_value
+
+    return te.compute(data.shape, lambda *indices: _dispatch_sim_dequantize(data)[indices])
diff --git a/tests/python/topi/python/test_topi_qnn.py b/tests/python/topi/python/test_topi_qnn.py
index d397b99070a8..c4664b766f7f 100644
--- a/tests/python/topi/python/test_topi_qnn.py
+++ b/tests/python/topi/python/test_topi_qnn.py
@@ -86,5 +86,71 @@ def test_simulated_quantize():
     verify_simulated_quantize([2, 5], "int32", [5], 1)
 
 
+def verify_simulated_dequantize(data_shape, dtype, channels, axis):
+    # Create placeholder variables for all qnn inputs.
+    A = te.placeholder(data_shape, name="value", dtype="float32")
+    D = te.placeholder([], name="dtype", dtype="int32")
+    S = te.placeholder([te.size_var("scale_dim")], name="scale", dtype="float32")
+    Z = te.placeholder([te.size_var("zp_dim")], name="zp", dtype="int32")
+    SIM_DQ = topi.nn.simulated_dequantize(A, D, output_scale=S, output_zero_point=Z, axis=axis)
+
+    # Create random numpy values to assign to inputs.
+    a_np = np.random.uniform(low=-128, high=127, size=data_shape).astype(dtype)
+    a_np_f = a_np.astype('float32')
+    d_np = np.int32(topi.nn.SQNN_DTYPE_TO_CODE[dtype])
+    s_np = np.random.uniform(low=1e-4, high=0.1, size=channels).astype("float32")
+    z_np = np.random.uniform(low=-10, high=10, size=channels).astype("int32")
+    dq_np = np.zeros(shape=data_shape, dtype="float32")
+
+    def check_device(device, ctx):
+        # Wrap the numpy arrays in nd arrays.
+        a = tvm.nd.array(a_np_f, ctx)
+        d = tvm.nd.array(d_np, ctx)
+        s = tvm.nd.array(s_np, ctx)
+        z = tvm.nd.array(z_np, ctx)
+        dq = tvm.nd.array(dq_np, ctx)
+
+        # Construct equivalent relay graph.
+        per_channel = channels[0] != 1
+        a_var = relay.var("a", shape=data_shape, dtype=dtype)
+        if per_channel:
+            s_var = relay.const(s_np)
+            z_var = relay.const(z_np)
+        else:
+            s_var = relay.const(s_np[0])
+            z_var = relay.const(z_np[0])
+        real_dq_op = relay.qnn.op.dequantize(a_var, s_var, z_var, axis=axis, out_dtype=dtype)
+        with tvm.transform.PassContext(opt_level=3):
+            lib = relay.build(tvm.IRModule.from_expr(real_dq_op), target=device)
+
+        # Get real qnn quantize output.
+        m = graph_runtime.GraphModule(lib["default"](ctx))
+        m.set_input("a", a_np)
+
+        m.run()
+        real_dq_out = m.get_output(0)
+
+        # Compile the simulated quantize function.
+        with tvm.target.Target(device):
+            sched = tvm.topi.testing.get_injective_schedule(device)(SIM_DQ)
+        func = tvm.build(sched, [A, D, S, Z, SIM_DQ], device, name="sim_quantize")
+        func(a, d, s, z, dq)
+
+        # Check correctness against the true qnn output.
+        tvm.testing.assert_allclose(dq.asnumpy(), real_dq_out.asnumpy().astype("float32"))
+
+    for target, ctx in tvm.testing.enabled_targets():
+        check_device(target, ctx)
+
+
+def test_simulated_dequantize():
+    verify_simulated_dequantize([1], "int8", [1], -1)
+    verify_simulated_dequantize([2, 5], "int8", [5], 1)
+    verify_simulated_dequantize([1, 32, 32, 32], "int8", [32], -1)
+    verify_simulated_dequantize([1, 32, 32, 32], "uint8", [32], -2)
+    verify_simulated_dequantize([2, 5], "int32", [5], 1)
+
+
 if __name__ == "__main__":
     test_simulated_quantize()
+    test_simulated_dequantize()

From fefc2b01c6fe3479b8f09692f56d9a0605afafd9 Mon Sep 17 00:00:00 2001
From: Ubuntu
 <jwfromm@jwfromm-cpu-dev.itxhlkosmouevgkdrmwxfbs5qh.xx.internal.cloudapp.net>
Date: Tue, 9 Mar 2021 01:45:48 +0000
Subject: [PATCH 13/21] Add simulated_dequantize op to topi and relay.

---
 python/tvm/relay/qnn/op/_qnn.py               |  20 +-
 python/tvm/relay/qnn/op/qnn.py                |  37 +++-
 python/tvm/topi/nn/qnn.py                     |  30 +--
 src/relay/qnn/op/quantize.cc                  |   4 +-
 src/relay/qnn/op/simulated_dequantize.cc      |  82 +++++++++
 src/relay/qnn/op/simulated_quantize.cc        |   6 +-
 .../relay/test_op_qnn_simulated_dequantize.py | 173 ++++++++++++++++++
 .../relay/test_op_qnn_simulated_quantize.py   |  11 +-
 tests/python/topi/python/test_topi_qnn.py     |  13 +-
 9 files changed, 339 insertions(+), 37 deletions(-)
 create mode 100644 src/relay/qnn/op/simulated_dequantize.cc
 create mode 100644 tests/python/relay/test_op_qnn_simulated_dequantize.py

diff --git a/python/tvm/relay/qnn/op/_qnn.py b/python/tvm/relay/qnn/op/_qnn.py
index 3194bab46430..7890726d4fef 100644
--- a/python/tvm/relay/qnn/op/_qnn.py
+++ b/python/tvm/relay/qnn/op/_qnn.py
@@ -17,10 +17,10 @@
 # pylint: disable=invalid-name, unused-argument, len-as-condition
 """QNN operator feature registration"""
 
-from tvm import topi, relay
+from tvm import topi
 
-from ...op.op import register_compute, register_shape_func
-from ...op.op import register_broadcast_schedule, register_injective_schedule
+from ...op.op import register_compute 
+from ...op.op import register_injective_schedule
 from ...op.op import register_pattern, OpPattern
 
 
@@ -36,3 +36,17 @@ def simulated_quantize_compute(attrs, inputs, output_type):
 
 register_injective_schedule("qnn.simulated_quantize")
 register_pattern("qnn.simulated_quantize", OpPattern.ELEMWISE)
+
+
+@register_compute("qnn.simulated_dequantize")
+def simulated_dequantize_compute(attrs, inputs, output_type):
+    assert len(inputs) == 4
+    return [
+        topi.nn.simulated_dequantize(
+            inputs[0], inputs[1], inputs[2], inputs[3], axis=attrs.get_int("axis")
+        )
+    ]
+
+
+register_injective_schedule("qnn.simulated_dequantize")
+register_pattern("qnn.simulated_dequantize", OpPattern.ELEMWISE)
diff --git a/python/tvm/relay/qnn/op/qnn.py b/python/tvm/relay/qnn/op/qnn.py
index 3b6b350c66db..f9c84efa37a6 100644
--- a/python/tvm/relay/qnn/op/qnn.py
+++ b/python/tvm/relay/qnn/op/qnn.py
@@ -136,7 +136,7 @@ def simulated_quantize(data, output_scale, output_zero_point, axis=-1, out_dtype
     axis : int
         The channel axis for quantization. Default value is -1 which corresponds to the last axis.
     out_dtype : string or tvm.relay.Expr
-        A string or tensor indicating which datatype to quantize to. Uses
+        A string or tensor indicating which datatype to quantize to.
 
     Returns
     -------
@@ -153,7 +153,7 @@ def simulated_quantize(data, output_scale, output_zero_point, axis=-1, out_dtype
     return _make.simulated_quantize(data, out_dtype, output_scale, output_zero_point, axis)
 
 
-def dequantize(data, input_scale, input_zero_point, axis=-1, out_dtype="float32"):
+def dequantize(data, input_scale, input_zero_point, axis=-1):
     r"""Dequantize op
     This operator takes quantized int8 and unit8 as input and produces
     dequantized float32 as output. The output shape is the same as input shape. The input
@@ -178,6 +178,39 @@ def dequantize(data, input_scale, input_zero_point, axis=-1, out_dtype="float32"
     return _make.dequantize(data, input_scale, input_zero_point, axis)
 
 
+def simulated_dequantize(data, input_scale, input_zero_point, axis=-1, in_dtype="int8"):
+    r"""Simulated Quantize op
+    Mimics the quantize op but has more flexibility in valid inputs and always
+    outputs float32. This can be useful for calibrating or training a quantized network.
+
+    Parameters
+    ----------
+    data : tvm.relay.Expr
+        The input tensor to be quantized. Can be of type float32.
+    input_zero_point : tvm.relay.Expr
+        The input zero_point.
+    input_scale : tvm.relay.Expr
+        The input scale.
+    axis : int
+        The channel axis for quantization. Default value is -1 which corresponds to the last axis.
+    in_dtype : string or tvm.relay.Expr
+        A string or tensor indicating which datatype to dequantize from.
+
+    Returns
+    -------
+    result : tvm.relay.Expr
+        The computed result.
+    """
+    # Convert string dtype to a constant if needed.
+    if isinstance(in_dtype, str):
+        type_code = SQNN_DTYPE_TO_CODE[in_dtype]
+        in_dtype = relay.const(type_code, dtype="int32")
+    # Wrap reshapes around qnn parameter tensors to guarantee shape compatibility.
+    input_scale = relay.op.reshape(input_scale, [-1])
+    input_zero_point = relay.op.reshape(input_zero_point, [-1])
+    return _make.simulated_dequantize(data, in_dtype, input_scale, input_zero_point, axis)
+
+
 def concatenate(data, input_scales, input_zero_points, output_scale, output_zero_point, axis):
     """Concatenate the quantized input tensors along the given axis.
 
diff --git a/python/tvm/topi/nn/qnn.py b/python/tvm/topi/nn/qnn.py
index ab0bf9cf00e4..656916cad99d 100644
--- a/python/tvm/topi/nn/qnn.py
+++ b/python/tvm/topi/nn/qnn.py
@@ -46,7 +46,7 @@ def simulated_quantize(data, out_dtype, output_scale=None, output_zero_point=Non
         An N-D input tensor to the operator.
 
     out_dtype: tvm.te.Tensor
-        A 1-D variable that indicates which datatype to simulate quantization with. Use
+        A scalar variable that indicates which datatype to simulate quantization with. Use
         SQNN_DTYPE_TO_CODE to convert a dtype string into the corresponding variable
         value.
 
@@ -121,7 +121,7 @@ def _dispatch_sim_quantize(value):
 
 
 @tvm.te.tag_scope(tag=topi.tag.ELEMWISE)
-def simulated_dequantize(data, out_dtype, output_scale=None, output_zero_point=None, axis=-1):
+def simulated_dequantize(data, in_dtype, input_scale=None, input_zero_point=None, axis=-1):
     """Simulated QNN dequantize operator that mimics QNN outputs in floating point. The benefit
     of this operator over true QNN quantize is that this operator allows dynamic datatype
     selection and can operate on both per-channel and scalar scales and zero points while
@@ -132,17 +132,17 @@ def simulated_dequantize(data, out_dtype, output_scale=None, output_zero_point=N
     data: tvm.te.Tensor
         An N-D input tensor to the operator.
 
-    out_dtype: tvm.te.Tensor
-        A 1-D variable that indicates which datatype to simulate quantization with. Use
+    in_dtype: tvm.te.Tensor
+        A scalar variable that indicates which datatype to simulate dequantization with. Use
         SQNN_DTYPE_TO_CODE to convert a dtype string into the corresponding variable
         value.
 
-    output_scale: tvm.te.Tensor, optional
-        A scalar tensor representing the scale to use when quantizing to integer datatypes.
+    input_scale: tvm.te.Tensor, optional
+        A scalar tensor representing the scale to use when dequantizing from integer datatypes.
         When it contains more than a single value, N must match the number of channels in data.
 
-    output_zero_point: tvm.te.Tensor, optional
-        A 1-D tensor representing the zero point to use when quantizing to integer datatypes.
+    input_zero_point: tvm.te.Tensor, optional
+        A 1-D tensor representing the zero point to use when dequantizing from integer datatypes.
         When it contains more than a single value, N must match the number of channels in data.
 
     axis: int, optional
@@ -156,11 +156,11 @@ def _compute_fp32(value, *indices):
     # Simulate dequantization for arbitrary integer datatypes. The computation for all datatypes is:
     # DQ_output = (input - zero_point) * scale
     def _compute_intn(value, *indices):
-        assert output_scale is not None and output_zero_point is not None
+        assert input_scale is not None and input_zero_point is not None
         # Use indexmod to handle both scalar and per-channel QNN parameters.
-        scale_idx = tir.indexmod(indices[axis], topi.shape(output_scale)[0])
-        zp_idx = tir.indexmod(indices[axis], topi.shape(output_zero_point)[0])
-        return (value[indices] - output_zero_point[zp_idx]) * output_scale[scale_idx]
+        scale_idx = tir.indexmod(indices[axis], topi.shape(input_scale)[0])
+        zp_idx = tir.indexmod(indices[axis], topi.shape(input_zero_point)[0])
+        return (value[indices] - input_zero_point[zp_idx]) * input_scale[scale_idx]
 
     # Use an if chain to dynamically return the proper dequantization based on the input datatype.
     # This allows the op to compile once but apply different quantization approaches
@@ -168,9 +168,9 @@ def _compute_intn(value, *indices):
     def _dispatch_sim_dequantize(value):
         fp32_value = te.compute(data.shape, lambda *indices: _compute_fp32(value, *indices))
         intn_condition = tvm.te.any(
-            out_dtype.equal(SQNN_DTYPE_TO_CODE["int8"]),
-            out_dtype.equal(SQNN_DTYPE_TO_CODE["uint8"]),
-            out_dtype.equal(SQNN_DTYPE_TO_CODE["int32"]),
+            in_dtype.equal(SQNN_DTYPE_TO_CODE["int8"]),
+            in_dtype.equal(SQNN_DTYPE_TO_CODE["uint8"]),
+            in_dtype.equal(SQNN_DTYPE_TO_CODE["int32"]),
         )
         intn_value = te.compute(
             data.shape,
diff --git a/src/relay/qnn/op/quantize.cc b/src/relay/qnn/op/quantize.cc
index ae3d1b4a544e..6e291d2c97d5 100644
--- a/src/relay/qnn/op/quantize.cc
+++ b/src/relay/qnn/op/quantize.cc
@@ -19,8 +19,8 @@
 
 /*!
  * \file src/relay/qnn/op/quantize.cc
- * \brief QNN dequantize operator. Dequantize operator converts from quantized
- * domain to unquantized domain.
+ * \brief QNN quantize operator. Quantize operator converts from unquantized 
+ * domain to quantized domain.
  */
 
 #include <tvm/relay/analysis.h>
diff --git a/src/relay/qnn/op/simulated_dequantize.cc b/src/relay/qnn/op/simulated_dequantize.cc
new file mode 100644
index 000000000000..f4c1291b9893
--- /dev/null
+++ b/src/relay/qnn/op/simulated_dequantize.cc
@@ -0,0 +1,82 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file src/relay/qnn/op/simulated_dequantize.cc
+ * \brief QNN simulated dequantize operator. Mimics the behavior
+ * of QNN dequantize in floating point with added flexibility.
+ */
+
+#include <tvm/relay/analysis.h>
+#include <tvm/relay/op_attr_types.h>
+#include <tvm/relay/qnn/attrs.h>
+
+#include "../../transforms/pattern_utils.h"
+#include "../utils.h"
+
+namespace tvm {
+namespace relay {
+namespace qnn {
+
+bool SimulatedDequantizeRel(const Array<Type>& types, int num_inputs, const Attrs& attrs,
+                          const TypeReporter& reporter) {
+  // types = [data_type, datatype_type, scale_type, zp_type, ret_type]
+  ICHECK_EQ(types.size(), 5);
+  const auto* data = types[0].as<TensorTypeNode>();
+  const auto* dtype = types[1].as<TensorTypeNode>();
+
+  if ((data == nullptr) or (dtype == nullptr)) {
+    return false;
+  }
+
+  // assign output type
+  reporter->Assign(types[4], TensorType(data->shape, data->dtype));
+  return true;
+}
+
+Expr MakeSimulatedDequantize(Expr data, Expr in_dtype, Expr input_scale, Expr input_zero_point,
+                             int axis) {
+  auto attrs = make_object<DequantizeAttrs>();
+  attrs->axis = axis;
+  static const Op& op = Op::Get("qnn.simulated_dequantize");
+  auto out = Call(op, {data, in_dtype, input_scale, input_zero_point}, Attrs(attrs), {});
+
+  return out;
+}
+
+RELAY_REGISTER_OP("qnn.simulated_dequantize")
+    .describe(R"code(Simulates the functionality of qnn.dequantize but allows more flexible
+    dynamic input type conversion and always operates on float values.
+)code" TVM_ADD_FILELINE)
+    .set_attrs_type<DequantizeAttrs>()
+    .set_num_inputs(4)
+    .add_argument("data", "Tensor", "The tensor to dequantize.")
+    .add_argument("in_dtype", "Tensor",
+                  "A code corresponding to the type of quantization to convert from.")
+    .add_argument("input_scale", "Tensor", "The quantization scale of the input tensor.")
+    .add_argument("input_zero_point", "Tensor",
+                  "The quantization zero_point of the input tensor.")
+    .set_support_level(11)
+    .add_type_rel("QNNSimulatedDequantize", SimulatedDequantizeRel);
+
+TVM_REGISTER_GLOBAL("relay.qnn.op._make.simulated_dequantize").set_body_typed(MakeSimulatedDequantize);
+
+}  // namespace qnn
+}  // namespace relay
+}  // namespace tvm
diff --git a/src/relay/qnn/op/simulated_quantize.cc b/src/relay/qnn/op/simulated_quantize.cc
index 98823c917190..a00525a93191 100644
--- a/src/relay/qnn/op/simulated_quantize.cc
+++ b/src/relay/qnn/op/simulated_quantize.cc
@@ -18,9 +18,9 @@
  */
 
 /*!
- * \file src/relay/qnn/op/quantize.cc
- * \brief QNN dequantize operator. Dequantize operator converts from quantized
- * domain to unquantized domain.
+ * \file src/relay/qnn/op/simulated_quantize.cc
+ * \brief QNN simulated quantize operator. Mimics the behavior
+ * of QNN quantize in floating point with added flexibility.
  */
 
 #include <tvm/relay/analysis.h>
diff --git a/tests/python/relay/test_op_qnn_simulated_dequantize.py b/tests/python/relay/test_op_qnn_simulated_dequantize.py
new file mode 100644
index 000000000000..b0236d40aa6f
--- /dev/null
+++ b/tests/python/relay/test_op_qnn_simulated_dequantize.py
@@ -0,0 +1,173 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import tvm
+from tvm import te
+import numpy as np
+from tvm import relay
+from tvm.contrib import graph_runtime
+from tvm.runtime.vm import VirtualMachine
+from tvm.topi.nn.qnn import SQNN_DTYPE_TO_CODE
+
+
+def dequantize_test_driver(in_dtype, quant_args, axis, in_data):
+    shape = in_data.shape
+    input_data = relay.var("input_data", shape=shape, dtype=in_dtype)
+    input_zero_point = relay.const(quant_args["in_zero_point"])
+    input_scale = relay.const(quant_args["in_scale"])
+    dequantized_output = relay.qnn.op.dequantize(
+        input_data,
+        input_scale=input_scale,
+        input_zero_point=input_zero_point,
+        axis=axis,
+    )
+    mod = relay.Function(relay.analysis.free_vars(dequantized_output), dequantized_output)
+    mod = tvm.IRModule.from_expr(mod)
+    with tvm.transform.PassContext(opt_level=3):
+        graph, lib, params = relay.build(mod, "llvm", params=None)
+    rt_mod = graph_runtime.create(graph, lib, ctx=tvm.cpu(0))
+    rt_mod.set_input(input_data=in_data)
+    rt_mod.set_input(**params)
+    rt_mod.run()
+    res = rt_mod.get_output(0).asnumpy()
+    return res
+
+
+def build_simulated_dequantize(input_data, scale, zp, dtype, axis=-1):
+    sim_q = relay.qnn.op.simulated_dequantize(
+        input_data,
+        scale,
+        zp,
+        axis=axis,
+        in_dtype=dtype,
+    )
+    mod = tvm.IRModule.from_expr(sim_q)
+    with tvm.transform.PassContext(opt_level=3):
+        vm_exec = relay.vm.compile(mod, "llvm", params=None)
+    vm = VirtualMachine(vm_exec, tvm.cpu(0))
+    return vm
+
+
+def test_simulated_dequantize_simple(dtype):
+    data = np.random.uniform(low=-128, high=127, size=[2, 5]).astype(dtype)
+    data_fp = data.astype('float32')
+    scale_np = np.float32(0.5)
+    zp_np = np.int32(127)
+    dtype_np = np.int32(SQNN_DTYPE_TO_CODE[dtype])
+    quant_args = {"in_zero_point": zp_np, "in_scale": scale_np}
+    dq_out = dequantize_test_driver(
+        in_dtype=dtype,
+        quant_args=quant_args,
+        axis=-1,
+        in_data=data,
+    )
+    input_data = relay.var("input_data", shape=data.shape, dtype="float32")
+    scale = relay.var("scale", shape=[])
+    zp = relay.var("zp", shape=[])
+    dtype = relay.var("dtype", shape=[])
+    vm = build_simulated_dequantize(input_data, scale, zp, dtype)
+    sim_dq_out = vm.invoke("main", input_data=data_fp, scale=scale_np, zp=zp_np, dtype=dtype_np)
+    np.testing.assert_equal(sim_dq_out.asnumpy(), dq_out)
+
+
+def test_dynamic_channels():
+    # Compile simulated quantize once but support either per-channel or scalar params.
+    data = np.random.uniform(low=-64, high=64, size=[2, 5]).astype("int8")
+    data_fp = data.astype('float32')
+    # Test scalar qnn params.
+    scale_np = np.asarray([0.5]).astype("float32")
+    zp_np = np.asarray([0]).astype("int32")
+    dtype_np = np.int32(SQNN_DTYPE_TO_CODE["int8"])
+    quant_args = {"in_zero_point": zp_np[0], "in_scale": scale_np[0]}
+    dq_out = dequantize_test_driver(
+        in_dtype="int8",
+        quant_args=quant_args,
+        axis=0,
+        in_data=data,
+    )
+    # Create variables with undefined shape and run with scalar inputs.
+    input_data = relay.var("input_data", shape=data.shape, dtype="float32")
+    scale = relay.var("scale", shape=[relay.Any()], dtype="float32")
+    zp = relay.var("zp", shape=[relay.Any()], dtype="int32")
+    dtype = relay.var("dtype", shape=[])
+    vm = build_simulated_dequantize(input_data, scale, zp, dtype, axis=0)
+    sim_dq_out = vm.invoke("main", input_data=data_fp, scale=scale_np, zp=zp_np, dtype=dtype_np)
+    np.testing.assert_equal(sim_dq_out.asnumpy(), dq_out)
+
+    # Now get the perchannel quantize output and compare without recompiling.
+    scale_np = np.array([0.5, 0.25]).astype("float32")
+    zp_np = np.array([127, 123]).astype("int32")
+
+    # Get the reference quantize output.
+    quant_args = {"in_zero_point": zp_np, "in_scale": scale_np}
+    dq_out = dequantize_test_driver(
+        in_dtype="int8",
+        quant_args=quant_args,
+        axis=0,
+        in_data=data,
+    )
+    # Run the simulated quantize without recompiling and confirm results match.
+    sim_dq_out = vm.invoke("main", input_data=data_fp, scale=scale_np, zp=zp_np, dtype=dtype_np)
+    np.testing.assert_equal(sim_dq_out.asnumpy(), dq_out)
+
+
+def test_dynamic_dtype():
+    # Compile simulated quantize once but support any type of quantization.
+    data = np.random.uniform(low=0, high=255, size=[2, 5]).astype("uint8")
+    data_fp = data.astype('float32')
+    # Test scalar uint8 to fp32.
+    scale_np = np.asarray([0.5]).astype("float32")
+    zp_np = np.asarray([127]).astype("int32")
+    dtype_np = np.int32(SQNN_DTYPE_TO_CODE["uint8"])
+    quant_args = {"in_zero_point": zp_np[0], "in_scale": scale_np[0]}
+    dq_out = dequantize_test_driver(
+        in_dtype="uint8",
+        quant_args=quant_args,
+        axis=-1,
+        in_data=data,
+    )
+    # Create variables with undefined shape and run with scalar inputs.
+    input_data = relay.var("input_data", shape=data.shape, dtype="float32")
+    scale = relay.var("scale", shape=[relay.Any()], dtype="float32")
+    zp = relay.var("zp", shape=[relay.Any()], dtype="int32")
+    dtype = relay.var("dtype", shape=[])
+    vm = build_simulated_dequantize(input_data, scale, zp, dtype)
+    sim_dq_out = vm.invoke("main", input_data=data_fp, scale=scale_np, zp=zp_np, dtype=dtype_np)
+    np.testing.assert_equal(sim_dq_out.asnumpy(), dq_out)
+
+    # Now test int8 to float32 compilation.
+    data = np.random.uniform(low=0, high=255, size=[2, 5]).astype("int8")
+    data_fp = data.astype('float32')
+    # Get the reference quantize output.
+    dq_out = dequantize_test_driver(
+        in_dtype="int8",
+        quant_args=quant_args,
+        axis=-1,
+        in_data=data,
+    )
+    # Run the simulated quantize without recompiling and confirm results match.
+    dtype_np = np.int32(SQNN_DTYPE_TO_CODE["int8"])
+    sim_dq_out = vm.invoke("main", input_data=data_fp, scale=scale_np, zp=zp_np, dtype=dtype_np)
+    np.testing.assert_equal(sim_dq_out.asnumpy(), dq_out)
+
+
+if __name__ == "__main__":
+    test_simulated_dequantize_simple("uint8")
+    test_simulated_dequantize_simple("int8")
+    test_simulated_dequantize_simple("int32")
+    test_dynamic_channels()
+    test_dynamic_dtype()
diff --git a/tests/python/relay/test_op_qnn_simulated_quantize.py b/tests/python/relay/test_op_qnn_simulated_quantize.py
index b1a8f58ba005..d6081baf333f 100644
--- a/tests/python/relay/test_op_qnn_simulated_quantize.py
+++ b/tests/python/relay/test_op_qnn_simulated_quantize.py
@@ -20,7 +20,6 @@
 import numpy as np
 from tvm import relay
 from tvm.contrib import graph_runtime
-from tvm.relay.testing import run_infer_type
 from tvm.runtime.vm import VirtualMachine
 from tvm.topi.nn.qnn import SQNN_DTYPE_TO_CODE
 
@@ -64,7 +63,7 @@ def build_simulated_quantize(input_data, scale, zp, dtype, axis=-1):
     return vm
 
 
-def test_simulated_qnn_simple(dtype):
+def test_simulated_quantize_simple(dtype):
     data = np.random.uniform(low=-128, high=127, size=[2, 5]).astype("float32")
     scale_np = np.float32(0.5)
     zp_np = np.int32(127)
@@ -148,7 +147,7 @@ def test_dynamic_dtype():
     scale = relay.var("scale", shape=[relay.Any()], dtype="float32")
     zp = relay.var("zp", shape=[relay.Any()], dtype="int32")
     dtype = relay.var("dtype", shape=[])
-    vm = build_simulated_quantize(input_data, scale, zp, dtype, axis=0)
+    vm = build_simulated_quantize(input_data, scale, zp, dtype)
     sim_q_out = vm.invoke("main", input_data=data, scale=scale_np, zp=zp_np, dtype=dtype_np)
     np.testing.assert_equal(sim_q_out.asnumpy(), q_out)
 
@@ -168,8 +167,8 @@ def test_dynamic_dtype():
 
 
 if __name__ == "__main__":
-    test_simulated_qnn_simple("uint8")
-    test_simulated_qnn_simple("int8")
-    test_simulated_qnn_simple("int32")
+    test_simulated_quantize_simple("uint8")
+    test_simulated_quantize_simple("int8")
+    test_simulated_quantize_simple("int32")
     test_dynamic_channels()
     test_dynamic_dtype()
diff --git a/tests/python/topi/python/test_topi_qnn.py b/tests/python/topi/python/test_topi_qnn.py
index c4664b766f7f..d9fd23b5138b 100644
--- a/tests/python/topi/python/test_topi_qnn.py
+++ b/tests/python/topi/python/test_topi_qnn.py
@@ -86,18 +86,18 @@ def test_simulated_quantize():
     verify_simulated_quantize([2, 5], "int32", [5], 1)
 
 
-def verify_simulated_dequantize(data_shape, dtype, channels, axis):
+def verify_simulated_dequantize(data_shape, in_dtype, channels, axis):
     # Create placeholder variables for all qnn inputs.
     A = te.placeholder(data_shape, name="value", dtype="float32")
     D = te.placeholder([], name="dtype", dtype="int32")
     S = te.placeholder([te.size_var("scale_dim")], name="scale", dtype="float32")
     Z = te.placeholder([te.size_var("zp_dim")], name="zp", dtype="int32")
-    SIM_DQ = topi.nn.simulated_dequantize(A, D, output_scale=S, output_zero_point=Z, axis=axis)
+    SIM_DQ = topi.nn.simulated_dequantize(A, D, input_scale=S, input_zero_point=Z, axis=axis)
 
     # Create random numpy values to assign to inputs.
-    a_np = np.random.uniform(low=-128, high=127, size=data_shape).astype(dtype)
+    a_np = np.random.uniform(low=-128, high=127, size=data_shape).astype(in_dtype)
     a_np_f = a_np.astype('float32')
-    d_np = np.int32(topi.nn.SQNN_DTYPE_TO_CODE[dtype])
+    d_np = np.int32(topi.nn.SQNN_DTYPE_TO_CODE[in_dtype])
     s_np = np.random.uniform(low=1e-4, high=0.1, size=channels).astype("float32")
     z_np = np.random.uniform(low=-10, high=10, size=channels).astype("int32")
     dq_np = np.zeros(shape=data_shape, dtype="float32")
@@ -112,14 +112,14 @@ def check_device(device, ctx):
 
         # Construct equivalent relay graph.
         per_channel = channels[0] != 1
-        a_var = relay.var("a", shape=data_shape, dtype=dtype)
+        a_var = relay.var("a", shape=data_shape, dtype=in_dtype)
         if per_channel:
             s_var = relay.const(s_np)
             z_var = relay.const(z_np)
         else:
             s_var = relay.const(s_np[0])
             z_var = relay.const(z_np[0])
-        real_dq_op = relay.qnn.op.dequantize(a_var, s_var, z_var, axis=axis, out_dtype=dtype)
+        real_dq_op = relay.qnn.op.dequantize(a_var, s_var, z_var, axis=axis)
         with tvm.transform.PassContext(opt_level=3):
             lib = relay.build(tvm.IRModule.from_expr(real_dq_op), target=device)
 
@@ -146,6 +146,7 @@ def check_device(device, ctx):
 def test_simulated_dequantize():
     verify_simulated_dequantize([1], "int8", [1], -1)
     verify_simulated_dequantize([2, 5], "int8", [5], 1)
+    verify_simulated_dequantize([2, 5], "int8", [2], 0)
     verify_simulated_dequantize([1, 32, 32, 32], "int8", [32], -1)
     verify_simulated_dequantize([1, 32, 32, 32], "uint8", [32], -2)
     verify_simulated_dequantize([2, 5], "int32", [5], 1)

From 90b3853693df50a2456534929038d6de9053fd7c Mon Sep 17 00:00:00 2001
From: Ubuntu
 <jwfromm@jwfromm-cpu-dev.itxhlkosmouevgkdrmwxfbs5qh.xx.internal.cloudapp.net>
Date: Tue, 9 Mar 2021 01:46:02 +0000
Subject: [PATCH 14/21] Formatting.

---
 python/tvm/relay/qnn/op/_qnn.py                        | 2 +-
 src/relay/qnn/op/quantize.cc                           | 2 +-
 src/relay/qnn/op/simulated_dequantize.cc               | 8 ++++----
 tests/python/relay/test_op_qnn_simulated_dequantize.py | 8 ++++----
 tests/python/topi/python/test_topi_qnn.py              | 2 +-
 5 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/python/tvm/relay/qnn/op/_qnn.py b/python/tvm/relay/qnn/op/_qnn.py
index 7890726d4fef..a059c293a0f8 100644
--- a/python/tvm/relay/qnn/op/_qnn.py
+++ b/python/tvm/relay/qnn/op/_qnn.py
@@ -19,7 +19,7 @@
 
 from tvm import topi
 
-from ...op.op import register_compute 
+from ...op.op import register_compute
 from ...op.op import register_injective_schedule
 from ...op.op import register_pattern, OpPattern
 
diff --git a/src/relay/qnn/op/quantize.cc b/src/relay/qnn/op/quantize.cc
index 6e291d2c97d5..018c24ef3205 100644
--- a/src/relay/qnn/op/quantize.cc
+++ b/src/relay/qnn/op/quantize.cc
@@ -19,7 +19,7 @@
 
 /*!
  * \file src/relay/qnn/op/quantize.cc
- * \brief QNN quantize operator. Quantize operator converts from unquantized 
+ * \brief QNN quantize operator. Quantize operator converts from unquantized
  * domain to quantized domain.
  */
 
diff --git a/src/relay/qnn/op/simulated_dequantize.cc b/src/relay/qnn/op/simulated_dequantize.cc
index f4c1291b9893..13f9dde288d1 100644
--- a/src/relay/qnn/op/simulated_dequantize.cc
+++ b/src/relay/qnn/op/simulated_dequantize.cc
@@ -35,7 +35,7 @@ namespace relay {
 namespace qnn {
 
 bool SimulatedDequantizeRel(const Array<Type>& types, int num_inputs, const Attrs& attrs,
-                          const TypeReporter& reporter) {
+                            const TypeReporter& reporter) {
   // types = [data_type, datatype_type, scale_type, zp_type, ret_type]
   ICHECK_EQ(types.size(), 5);
   const auto* data = types[0].as<TensorTypeNode>();
@@ -70,12 +70,12 @@ RELAY_REGISTER_OP("qnn.simulated_dequantize")
     .add_argument("in_dtype", "Tensor",
                   "A code corresponding to the type of quantization to convert from.")
     .add_argument("input_scale", "Tensor", "The quantization scale of the input tensor.")
-    .add_argument("input_zero_point", "Tensor",
-                  "The quantization zero_point of the input tensor.")
+    .add_argument("input_zero_point", "Tensor", "The quantization zero_point of the input tensor.")
     .set_support_level(11)
     .add_type_rel("QNNSimulatedDequantize", SimulatedDequantizeRel);
 
-TVM_REGISTER_GLOBAL("relay.qnn.op._make.simulated_dequantize").set_body_typed(MakeSimulatedDequantize);
+TVM_REGISTER_GLOBAL("relay.qnn.op._make.simulated_dequantize")
+    .set_body_typed(MakeSimulatedDequantize);
 
 }  // namespace qnn
 }  // namespace relay
diff --git a/tests/python/relay/test_op_qnn_simulated_dequantize.py b/tests/python/relay/test_op_qnn_simulated_dequantize.py
index b0236d40aa6f..cf562abd9aca 100644
--- a/tests/python/relay/test_op_qnn_simulated_dequantize.py
+++ b/tests/python/relay/test_op_qnn_simulated_dequantize.py
@@ -64,7 +64,7 @@ def build_simulated_dequantize(input_data, scale, zp, dtype, axis=-1):
 
 def test_simulated_dequantize_simple(dtype):
     data = np.random.uniform(low=-128, high=127, size=[2, 5]).astype(dtype)
-    data_fp = data.astype('float32')
+    data_fp = data.astype("float32")
     scale_np = np.float32(0.5)
     zp_np = np.int32(127)
     dtype_np = np.int32(SQNN_DTYPE_TO_CODE[dtype])
@@ -87,7 +87,7 @@ def test_simulated_dequantize_simple(dtype):
 def test_dynamic_channels():
     # Compile simulated quantize once but support either per-channel or scalar params.
     data = np.random.uniform(low=-64, high=64, size=[2, 5]).astype("int8")
-    data_fp = data.astype('float32')
+    data_fp = data.astype("float32")
     # Test scalar qnn params.
     scale_np = np.asarray([0.5]).astype("float32")
     zp_np = np.asarray([0]).astype("int32")
@@ -128,7 +128,7 @@ def test_dynamic_channels():
 def test_dynamic_dtype():
     # Compile simulated quantize once but support any type of quantization.
     data = np.random.uniform(low=0, high=255, size=[2, 5]).astype("uint8")
-    data_fp = data.astype('float32')
+    data_fp = data.astype("float32")
     # Test scalar uint8 to fp32.
     scale_np = np.asarray([0.5]).astype("float32")
     zp_np = np.asarray([127]).astype("int32")
@@ -151,7 +151,7 @@ def test_dynamic_dtype():
 
     # Now test int8 to float32 compilation.
     data = np.random.uniform(low=0, high=255, size=[2, 5]).astype("int8")
-    data_fp = data.astype('float32')
+    data_fp = data.astype("float32")
     # Get the reference quantize output.
     dq_out = dequantize_test_driver(
         in_dtype="int8",
diff --git a/tests/python/topi/python/test_topi_qnn.py b/tests/python/topi/python/test_topi_qnn.py
index d9fd23b5138b..a63f34fe08d0 100644
--- a/tests/python/topi/python/test_topi_qnn.py
+++ b/tests/python/topi/python/test_topi_qnn.py
@@ -96,7 +96,7 @@ def verify_simulated_dequantize(data_shape, in_dtype, channels, axis):
 
     # Create random numpy values to assign to inputs.
     a_np = np.random.uniform(low=-128, high=127, size=data_shape).astype(in_dtype)
-    a_np_f = a_np.astype('float32')
+    a_np_f = a_np.astype("float32")
     d_np = np.int32(topi.nn.SQNN_DTYPE_TO_CODE[in_dtype])
     s_np = np.random.uniform(low=1e-4, high=0.1, size=channels).astype("float32")
     z_np = np.random.uniform(low=-10, high=10, size=channels).astype("int32")

From 4d6f76390024ab33c628e345ecfce7b6aec509a5 Mon Sep 17 00:00:00 2001
From: Ubuntu
 <jwfromm@jwfromm-cpu-dev.itxhlkosmouevgkdrmwxfbs5qh.xx.internal.cloudapp.net>
Date: Tue, 9 Mar 2021 01:52:39 +0000
Subject: [PATCH 15/21] Test negative axis perchannel dequantization.

---
 src/relay/op/Assign                          | 273 -------------------
 tests/python/relay/test_op_qnn_dequantize.py |   2 +-
 2 files changed, 1 insertion(+), 274 deletions(-)
 delete mode 100644 src/relay/op/Assign

diff --git a/src/relay/op/Assign b/src/relay/op/Assign
deleted file mode 100644
index 2651fb5b68e9..000000000000
--- a/src/relay/op/Assign
+++ /dev/null
@@ -1,273 +0,0 @@
-algorithm/argsort.cc:  reporter->Assign(types[1], TensorType(data->shape, param->dtype));
-algorithm/sort.cc:  reporter->Assign(types[1], TensorType(data->shape, data->dtype));
-algorithm/topk.cc:    reporter->Assign(types[1], TupleType({values_ty, indices_ty}));
-algorithm/topk.cc:    reporter->Assign(types[1], values_ty);
-algorithm/topk.cc:    reporter->Assign(types[1], indices_ty);
-type_relations.cc:    reporter->Assign(types[i], types[0]);
-type_relations.cc:        reporter->GetDiagCtx().Emit(Diagnostic::Error(t0->span)
-type_relations.cc:      reporter->Assign(
-type_relations.cc:        reporter->GetDiagCtx().Emit(Diagnostic::Error(t0->span)
-type_relations.cc:      reporter->Assign(types[2], ConcreteBroadcast(GetRef<TensorType>(t0), GetRef<TensorType>(t1),
-type_relations.cc:    reporter->Assign(types[1], out_type);
-type_relations.cc:  reporter->Assign(types[1], TensorType(rank_shape, param->dtype));
-vision/nms.cc:  reporter->Assign(types[2], TupleType(Array<Type>(fields)));
-vision/nms.cc:    reporter->Assign(types[5], TupleType(Array<Type>(fields)));
-vision/nms.cc:    reporter->Assign(types[5], TensorType(dshape, data->dtype));
-vision/multibox_op.cc:  reporter->Assign(types[1], TensorType(oshape, data->dtype));
-vision/multibox_op.cc:  ICHECK(reporter->AssertEQ(cls_shape[2], anchor_shape[1])) << "Number of anchors mismatch found";
-vision/multibox_op.cc:  ICHECK(reporter->AssertEQ(cls_shape[2] * 4, loc_shape[1])) << "# anchors mismatch with # loc.";
-vision/multibox_op.cc:  ICHECK(reporter->Assert(anchor_shape[1] > 0)) << "Number of anchors must > 0.";
-vision/multibox_op.cc:  ICHECK(reporter->AssertEQ(anchor_shape[2], 4));
-vision/multibox_op.cc:  reporter->Assign(types[3], TupleType(Array<Type>(fields)));
-vision/rcnn_op.cc:  reporter->Assign(types[2], TensorType(oshape, data->dtype));
-vision/rcnn_op.cc:  reporter->Assign(types[2], TensorType(oshape, data->dtype));
-vision/rcnn_op.cc:  ICHECK(reporter->AssertEQ(im_info->shape[1], 3));
-vision/rcnn_op.cc:  reporter->Assign(types[3], TensorType(oshape, cls_prob->dtype));
-vision/yolo.cc:  reporter->Assign(types[1], TensorType(oshape, data->dtype));
-random/kernel.cc:  reporter->Assign(types[0], ThreefryKeyType());
-random/kernel.cc:  reporter->Assign(types[1],
-random/kernel.cc:  reporter->Assign(types[0], ThreefryKeyType());
-random/kernel.cc:  reporter->Assign(types[1], TupleType({ThreefryKeyType(), ThreefryKeyType()}));
-image/dilation2d.cc:  reporter->Assign(types[2], TensorType(oshape, out_dtype));
-image/resize.cc:  reporter->Assign(types[1], TensorType(layout_converter.BackwardShape(oshape), out_dtype));
-image/resize.cc:  reporter->Assign(types[1], TensorType(layout_converter.BackwardShape(oshape), out_dtype));
-image/resize.cc:  reporter->Assign(types[3], TensorType(bshape, out_dtype));
-image/grid_sample.cc:  ICHECK(data->shape.size() == 3U && reporter->AssertEQ(data->shape[1], 2) &&
-image/grid_sample.cc:         reporter->AssertEQ(data->shape[2], 3))
-image/grid_sample.cc:  reporter->Assign(types[1], TensorType(oshape, data->dtype));
-image/grid_sample.cc:  reporter->Assign(types[2], TensorType(layout_converter.BackwardShape(oshape), data->dtype));
-tensor/transform.cc:  reporter->Assign(types[1], TensorType(data->shape, param->dtype));
-tensor/transform.cc:  reporter->Assign(types[2], TensorType(data->shape, dtype_like->dtype));
-tensor/transform.cc:  reporter->Assign(types[1], TensorType(oshape, data->dtype));
-tensor/transform.cc:          reporter->AssertEQ(first->shape[j], e->shape[j]))
-tensor/transform.cc:  reporter->Assign(types[1], TensorType(oshape, dtype));
-tensor/transform.cc:  reporter->Assign(types[1], TensorType(oshape, data->dtype));
-tensor/transform.cc:  reporter->Assign(types[1], TensorType(oshape, data->dtype));
-tensor/transform.cc:  reporter->Assign(types[1],
-tensor/transform.cc:    ICHECK(reporter->AssertEQ(data->Size(), output_type->Size()))
-tensor/transform.cc:  reporter->Assign(types[2], output_type);
-tensor/transform.cc:  reporter->Assign(types[1], TensorType(result_shape, DataType::Int(32)));
-tensor/transform.cc:  reporter->Assign(types[3], TensorType(data->shape, data->dtype));
-tensor/transform.cc:  reporter->Assign(types[3], TensorType(data->shape, data->dtype));
-tensor/transform.cc:    reporter->AssertEQ(indices->shape[i + 1], data->shape[i]);
-tensor/transform.cc:    reporter->AssertEQ(data->shape[i - mdim->value + kdim], oshape[i]);
-tensor/transform.cc:  reporter->Assign(types[2], TensorType(oshape, data->dtype));
-tensor/transform.cc:    reporter->Assign(types[2], TensorType(oshape, data->dtype));
-tensor/transform.cc:  reporter->Assign(types[2], TensorType(oshape, data->dtype));
-tensor/transform.cc:  reporter->Assign(types[1], TensorType(oshape, out_dtype));
-tensor/transform.cc:  reporter->Assign(types[0], TensorType(oshape, out_dtype));
-tensor/transform.cc:  reporter->Assign(types[2], TensorType(data->shape, data->dtype));
-tensor/transform.cc:  reporter->Assign(types[0], types[1]);
-tensor/transform.cc:  reporter->Assign(types[1], types[2]);
-tensor/transform.cc:  reporter->Assign(types[2], TensorType({}, attrs->dtype));
-tensor/transform.cc:    reporter->Assign(types[3], TensorType({num_elem}, attrs->dtype));
-tensor/transform.cc:    reporter->Assign(types[3], TensorType({Any()}, attrs->dtype));
-tensor/transform.cc:  reporter->Assign(types[1], TensorType(oshape, data->dtype));
-tensor/transform.cc:  reporter->Assign(types[1], TupleType(Array<Type>(grids)));
-tensor/transform.cc:  reporter->Assign(types[1], TensorType(oshape, data->dtype));
-tensor/transform.cc:  reporter->Assign(types[1], types[0]);
-tensor/transform.cc:  ICHECK(reporter->Assert(seq_lengths->shape[0] == data->shape[batch_axis]))
-tensor/transform.cc:  reporter->Assign(types[2], types[0]);
-tensor/transform.cc:  reporter->Assign(types[3], ret_ty);
-tensor/transform.cc:  reporter->Assign(types[1], TensorType(result_shape, data->dtype));
-tensor/transform.cc:  reporter->Assign(types[2], types[1]);
-tensor/transform.cc:  reporter->Assign(types[2], TensorType(oshape, out_dtype));
-tensor/transform.cc:  reporter->Assign(types[1], TensorType(oshape, out_dtype));
-tensor/transform.cc:  reporter->Assign(types[2], types[1]);
-tensor/transform.cc:  reporter->Assign(types[1], TensorType(oshape, data->dtype));
-tensor/transform.cc:  reporter->Assign(types[5], types[0]);
-tensor/transform.cc:      ICHECK(reporter->Assert(indexmod(data->shape[axis], sections->value) ==
-tensor/transform.cc:    reporter->Assign(types[1], TupleType(Array<Type>(fields)));
-tensor/transform.cc:      ICHECK(reporter->Assert(Downcast<IndexExpr>(indices[i]) > begin))
-tensor/transform.cc:      ICHECK(reporter->Assert(begin < data->shape[axis]))
-tensor/transform.cc:    reporter->Assign(types[1], TupleType(Array<Type>(fields)));
-tensor/transform.cc:        ICHECK(reporter->Assert(oshape[i] <= dshape[i]))
-tensor/transform.cc:      ICHECK(reporter->Assert(oshape[axis] <= dshape[axis]))
-tensor/transform.cc:  reporter->Assign(types[2], TensorType(oshape, data->dtype));
-tensor/transform.cc:  reporter->Assign(types[1], TensorType(out_shape, data->dtype));
-tensor/transform.cc:  reporter->Assign(types[1], TensorType(dst_shape, data->dtype));
-tensor/transform.cc:      ICHECK(reporter->AssertEQ(indices->shape[i], data->shape[i]));
-tensor/transform.cc:  reporter->Assign(types[2], TensorType(oshape, data->dtype));
-tensor/transform.cc:  reporter->Assign(types[2], TensorType(oshape, data->dtype));
-tensor/transform.cc:  reporter->Assign(types[1], TensorType(valid_length_shape, valid_length->dtype));
-tensor/transform.cc:  reporter->Assign(types[2], types[0]);
-tensor/transform.cc:  reporter->Assign(types[3], TensorType(oshape, param->dtype));
-tensor/transform.cc:  reporter->Assign(types[2], TensorType(oshape, indices->dtype));
-tensor/transform.cc:  reporter->Assign(types[3], TensorType(oshape, sparse_values->dtype));
-tensor/transform.cc:  reporter->Assert(input->shape[i_ndims - 2] > -param->k1);
-tensor/transform.cc:  reporter->Assert(input->shape[i_ndims - 1] > param->k2);
-tensor/transform.cc:    reporter->AssertEQ(input->shape[i], diagonal->shape[i]);
-tensor/transform.cc:    reporter->AssertEQ(diagonal->shape[d_ndims - 2], param->k2 - param->k1 + 1);
-tensor/transform.cc:    reporter->AssertEQ(input->shape[d_ndims - 2], diagonal->shape[d_ndims - 2]);
-tensor/transform.cc:  reporter->AssertEQ(diagonal->shape[d_ndims - 1], max_diag_len);
-tensor/transform.cc:  reporter->Assign(types[2], TensorType(input->shape, input->dtype));
-tensor/transform.cc:  reporter->Assign(types[1], TensorType(oshape, data->dtype));
-tensor/transform.cc:    reporter->Assign(types[1], TensorType(data->shape, dtype));
-tensor/transform.cc:    reporter->Assign(types[1], TensorType({prod}, dtype));
-tensor/transform.h:    reporter->GetDiagCtx().EmitFatal(
-tensor/transform.h:        Diagnostic::Error(reporter->GetSpan())
-tensor/transform.h:    reporter->GetDiagCtx().EmitFatal(Diagnostic::Error(reporter->GetSpan())
-tensor/transform.h:        if (reporter->AssertEQ(non_any[0], non_any[k])) continue;
-tensor/transform.h:  reporter->Assign(types[1], rtype);
-tensor/reduce.cc:    ICHECK(reporter->Assert(
-tensor/reduce.cc:  reporter->Assign(types[1], TensorType(oshape, DataType::Int(32)));
-tensor/reduce.cc:  reporter->Assign(types[1], TensorType(oshape, data->dtype));
-tensor/reduce.cc:  reporter->Assign(types[2], TensorType(oshape, data->dtype));
-tensor/unary.cc:  reporter->Assign(types[1], TensorType({}, param->dtype));
-memory/memory.cc:  auto mod = reporter->GetModule();
-memory/memory.cc:  reporter->Assign(types[2], storage);
-memory/memory.cc:  auto mod = reporter->GetModule();
-memory/memory.cc:  reporter->Assign(types[0], storage);
-memory/memory.cc:  reporter->Assign(types[3], alloc_type);
-memory/memory.cc:  reporter->Assign(types[1], TupleType::Empty());
-nn/nn.h:      reporter->Assign(types[1], TensorType(wshape, weight_dtype));
-nn/nn.h:      ICHECK(reporter->AssertEQ(data->shape[data->shape.size() - 1], weight->shape[1]))
-nn/nn.h:  reporter->Assign(types[2], TensorType(oshape, out_dtype));
-nn/nn.h:  reporter->Assign(types[2], TensorType(oshape, out_dtype));
-nn/convolution.h:    reporter->Assign(types[1], TensorType(wshape, weight_dtype));
-nn/convolution.h:      ICHECK(reporter->AssertEQ(param->kernel_size[0], wshape[2]))
-nn/convolution.h:      ICHECK(reporter->AssertEQ(param->channels, wshape[0]))
-nn/convolution.h:      ICHECK(reporter->AssertEQ(dshape_ncw[1], wshape[1]));
-nn/convolution.h:  reporter->Assign(types[2], TensorType(oshape, out_dtype));
-nn/convolution.h:    reporter->GetDiagCtx().Emit(
-nn/convolution.h:        Diagnostic::Error(reporter->GetSpan())
-nn/convolution.h:    reporter->GetDiagCtx().Emit(
-nn/convolution.h:        Diagnostic::Error(reporter->GetSpan())
-nn/convolution.h:    reporter->GetDiagCtx().Emit(
-nn/convolution.h:        Diagnostic::Error(reporter->GetSpan())
-nn/convolution.h:      reporter->GetDiagCtx().Emit(
-nn/convolution.h:          Diagnostic::Error(reporter->GetSpan())
-nn/convolution.h:      reporter->Assign(types[1], TensorType(wshape, weight_dtype));
-nn/convolution.h:      if (!reporter->AssertEQ(param->kernel_size[0], wshape[2])) {
-nn/convolution.h:        reporter->GetDiagCtx().Emit(Diagnostic::Error(reporter->GetSpan())
-nn/convolution.h:      if (!reporter->AssertEQ(param->kernel_size[1], wshape[3])) {
-nn/convolution.h:        reporter->GetDiagCtx().Emit(Diagnostic::Error(reporter->GetSpan())
-nn/convolution.h:    if (param->channels.defined() && !reporter->AssertEQ(param->channels, wshape[0])) {
-nn/convolution.h:      reporter->GetDiagCtx().Emit(
-nn/convolution.h:          Diagnostic::Error(reporter->GetSpan())
-nn/convolution.h:      if (!reporter->AssertEQ(indexdiv(dshape_nchw[1], param->groups), wshape[1])) {
-nn/convolution.h:        reporter->GetDiagCtx().Emit(Diagnostic::Error(reporter->GetSpan())
-nn/convolution.h:  reporter->Assign(types[2], TensorType(oshape, out_dtype));
-nn/convolution.h:      reporter->Assign(types[1], TensorType(wshape, weight_dtype));
-nn/convolution.h:      ICHECK(reporter->AssertEQ(param->kernel_size[0], wshape[2]) &&
-nn/convolution.h:             reporter->AssertEQ(param->kernel_size[1], wshape[3]) &&
-nn/convolution.h:             reporter->AssertEQ(param->kernel_size[2], wshape[4]))
-nn/convolution.h:      ICHECK(reporter->AssertEQ(param->channels, wshape[0]))
-nn/convolution.h:      ICHECK(reporter->AssertEQ(indexdiv(dshape_ncdhw[1], param->groups), wshape[1]));
-nn/convolution.h:  reporter->Assign(types[2], TensorType(oshape, out_dtype));
-nn/convolution.h:  reporter->Assign(types[1], TensorType(Array<IndexExpr>(oshape), data->dtype));
-nn/convolution.h:  reporter->Assign(types[1], TensorType(oshape, weight->dtype));
-nn/convolution.h:  reporter->Assign(types[1], TensorType(oshape, data->dtype));
-nn/convolution.h:  reporter->Assign(types[1], TensorType(Array<IndexExpr>(oshape), out_dtype));
-nn/convolution.h:  reporter->Assign(types[2], TensorType(oshape, out_dtype));
-nn/convolution.h:  reporter->Assign(types[2], TensorType(oshape, out_dtype));
-nn/convolution.h:  reporter->Assign(types[2], TensorType(oshape, out_dtype));
-nn/convolution.h:    reporter->Assign(types[1], TensorType(wshape, data->dtype));
-nn/convolution.h:      ICHECK(reporter->AssertEQ(param->kernel_size[0], wshape[2]))
-nn/convolution.h:      ICHECK(reporter->AssertEQ(param->channels, wshape[1]))
-nn/convolution.h:      ICHECK(reporter->AssertEQ(indexdiv(dshape_ncw[1], param->groups), wshape[0]));
-nn/convolution.h:  reporter->Assign(types[2], TensorType(oshape, out_dtype));
-nn/convolution.h:    reporter->Assign(types[1], TensorType(wshape, data->dtype));
-nn/convolution.h:      ICHECK(reporter->AssertEQ(param->kernel_size[0], wshape[2]) &&
-nn/convolution.h:             reporter->AssertEQ(param->kernel_size[1], wshape[3]) &&
-nn/convolution.h:             reporter->AssertEQ(param->kernel_size[2], wshape[4]))
-nn/convolution.h:      ICHECK(reporter->AssertEQ(param->channels, wshape[1]))
-nn/convolution.h:      ICHECK(reporter->AssertEQ(indexdiv(dshape_ncdhw[1], param->groups), wshape[0]));
-nn/convolution.h:  reporter->Assign(types[2], TensorType(oshape, out_dtype));
-nn/convolution.h:    reporter->Assign(types[1], TensorType(wshape, data->dtype));
-nn/convolution.h:      ICHECK(reporter->AssertEQ(param->kernel_size[0], wshape[2]) &&
-nn/convolution.h:             reporter->AssertEQ(param->kernel_size[1], wshape[3]))
-nn/convolution.h:      ICHECK(reporter->AssertEQ(param->channels, wshape[1]))
-nn/convolution.h:      ICHECK(reporter->AssertEQ(indexdiv(dshape_nchw[1], param->groups), wshape[0]));
-nn/convolution.h:  reporter->Assign(types[2], TensorType(oshape, out_dtype));
-nn/convolution.h:    reporter->GetDiagCtx().Emit(
-nn/convolution.h:        Diagnostic::Error(reporter->GetSpan())
-nn/convolution.h:    reporter->GetDiagCtx().Emit(
-nn/convolution.h:        Diagnostic::Error(reporter->GetSpan())
-nn/convolution.h:    reporter->GetDiagCtx().Emit(
-nn/convolution.h:        Diagnostic::Error(reporter->GetSpan())
-nn/convolution.h:    reporter->Assign(types[2], TensorType(wshape, data->dtype));
-nn/convolution.h:      ICHECK(reporter->AssertEQ(param->kernel_size[0], wshape[2]) &&
-nn/convolution.h:             reporter->AssertEQ(param->kernel_size[1], wshape[3]))
-nn/convolution.h:      ICHECK(reporter->AssertEQ(param->channels, wshape[0]))
-nn/convolution.h:      ICHECK(reporter->AssertEQ(indexdiv(dshape_nchw[1], param->groups), wshape[1]));
-nn/convolution.h:  reporter->Assign(types[1], TensorType(offset_shape, data->dtype));
-nn/convolution.h:  reporter->Assign(types[3], TensorType(oshape, out_dtype));
-nn/upsampling.cc:  reporter->Assign(types[1], TensorType(layout_converter.BackwardShape(oshape), data->dtype));
-nn/upsampling.cc:  reporter->Assign(types[1], TensorType(layout_converter.BackwardShape(oshape), data->dtype));
-nn/sparse.cc:      reporter->Assign(types[4], TensorType(oshape, weight->dtype));
-nn/sparse.cc:      reporter->Assign(types[4], TensorType(oshape, weight->dtype));
-nn/sparse.cc:      reporter->Assign(types[4], TensorType(oshape, data->dtype));
-nn/sparse.cc:      reporter->Assign(types[4], TensorType(oshape, data->dtype));
-nn/sparse.cc:  reporter->Assign(types[3], TupleType(Array<Type>(output_types)));
-nn/nn.cc:  reporter->Assign(types[1], TensorType({data->shape[axis]}, data->dtype));
-nn/nn.cc:  reporter->Assign(types[2], types[0]);
-nn/nn.cc:  reporter->Assert(buffer_axis < buffer->shape.size());
-nn/nn.cc:      reporter->AssertEQ(input->shape[i], buffer->shape[i]);
-nn/nn.cc:  reporter->Assert(input->shape[buffer_axis] < buffer->shape[buffer_axis]);
-nn/nn.cc:  reporter->Assign(types[2], TensorType(oshape, buffer->dtype));
-nn/nn.cc:  reporter->Assign(types[1], TensorType(alpha_shape, data->dtype));
-nn/nn.cc:  reporter->Assign(types[2], TensorType(data->shape, data->dtype));
-nn/nn.cc:  reporter->Assign(types[1], TensorType(oshape, data->dtype));
-nn/nn.cc:  reporter->Assign(types[1], TupleType(Array<Type>({ret_type, ret_type})));
-nn/nn.cc:  reporter->Assign(types[1], TensorType({axis_size}, data->dtype));
-nn/nn.cc:  reporter->Assign(types[2], TensorType({axis_size}, data->dtype));
-nn/nn.cc:  reporter->Assign(types[3], TensorType({axis_size}, data->dtype));
-nn/nn.cc:  reporter->Assign(types[4], TensorType({axis_size}, data->dtype));
-nn/nn.cc:  reporter->Assign(types[5], TupleType(Array<Type>(fields)));
-nn/nn.cc:  reporter->Assign(types[1], TensorType({data->shape[axis]}, data->dtype));
-nn/nn.cc:  reporter->Assign(types[2], TensorType({data->shape[axis]}, data->dtype));
-nn/nn.cc:  reporter->Assign(types[3], TensorType(data->shape, data->dtype));
-nn/nn.cc:  reporter->Assign(types[1], TensorType({data->shape[axis]}, data->dtype));
-nn/nn.cc:  reporter->Assign(types[2], TensorType({data->shape[axis]}, data->dtype));
-nn/nn.cc:  reporter->Assign(types[3], TensorType(data->shape, data->dtype));
-nn/nn.cc:  reporter->Assign(types[1], TensorType({data->shape[axis]}, data->dtype));
-nn/nn.cc:  reporter->Assign(types[2], TensorType({data->shape[axis]}, data->dtype));
-nn/nn.cc:  reporter->Assign(types[3], TensorType(data->shape, data->dtype));
-nn/nn.cc:    ICHECK(reporter->AssertEQ(x->shape[0], y_shape[0]) || reporter->AssertEQ(x->shape[0], 1) ||
-nn/nn.cc:           reporter->AssertEQ(y_shape[0], 1))
-nn/nn.cc:    ICHECK(reporter->AssertEQ(x->shape[2], y_shape[2]))
-nn/nn.cc:  reporter->Assign(types[2], TensorType(oshape, x->dtype));
-nn/nn.cc:  ICHECK(reporter->AssertEQ(x->shape[0], y->shape[0]))
-nn/nn.cc:  ICHECK(reporter->AssertEQ(x->shape[1], y->shape[1]))
-nn/nn.cc:  reporter->Assign(types[2], TensorType({}, x->dtype));
-nn/nn.cc:  reporter->Assign(types[1], TensorType(Array<IndexExpr>(oshape), x->dtype));
-nn/nn.cc:  reporter->Assign(types[1], TensorType(layout_converter.BackwardShape(oshape), data->dtype));
-nn/nn.cc:  reporter->Assign(types[1], TensorType(layout_converter.BackwardShape(oshape), data->dtype));
-nn/nn.cc:  reporter->Assign(types[1], TensorType(Array<IndexExpr>(out_shape), input->dtype));
-nn/nn.cc:  reporter->Assign(types[1], TensorType(Array<IndexExpr>(out_shape), input->dtype));
-nn/correlation.cc:  reporter->Assign(types[2], TensorType(oshape, data1->dtype));
-nn/pad.cc:  reporter->Assign(types[1], TensorType(Array<IndexExpr>(oshape), data->dtype));
-nn/pad.cc:  reporter->Assign(types[1], TensorType(Array<IndexExpr>(oshape), data->dtype));
-nn/bitserial.cc:  reporter->Assign(types[1], TensorType(out_shape, pack_type));
-nn/bitserial.cc:  reporter->Assign(types[2], TensorType(oshape, out_dtype));
-nn/bitserial.cc:  reporter->Assign(types[2], TensorType(oshape, out_dtype));
-nn/pooling.cc:  reporter->Assign(types[1], TensorType(oshape, data->dtype));
-nn/pooling.cc:  reporter->Assign(types[1], TensorType(oshape, data->dtype));
-nn/pooling.cc:  reporter->Assign(types[1], TensorType(oshape, data->dtype));
-nn/pooling.cc:  reporter->Assign(types[1], TensorType(oshape, data->dtype));
-nn/pooling.cc:  reporter->Assign(types[2], types[1]);
-nn/pooling.cc:  reporter->Assign(types[1], TensorType(oshape, data->dtype));
-nn/pooling.cc:  reporter->Assign(types[1], TensorType(oshape, data->dtype));
-vm/vm.cc:  reporter->Assign(types[1], input_type);
-vm/vm.cc:  reporter->Assign(types[2], output_type);
-vm/vm.cc:  reporter->Assign(types[3], TupleType::Empty());
-vm/vm.cc:  reporter->Assign(ex_input, GetRef<Type>(input_type));
-vm/vm.cc:  reporter->Assign(ex_output, GetRef<Type>(output_type));
-vm/vm.cc:  reporter->Assign(types[3], TupleType::Empty());
-vm/vm.cc:  reporter->Assign(types[2], TensorType(reshape_attrs->newshape, tt->dtype));
-dyn/algorithm/topk.cc:    reporter->Assign(types[2], TupleType({values_ty, indices_ty}));
-dyn/algorithm/topk.cc:    reporter->Assign(types[2], values_ty);
-dyn/algorithm/topk.cc:    reporter->Assign(types[2], indices_ty);
-dyn/image/resize.cc:  reporter->Assign(types[2], TensorType(layout_converter.BackwardShape(oshape), out_dtype));
-dyn/tensor/transform.cc:  reporter->Assign(types[2], TensorType(oshape, data->dtype));
-dyn/tensor/transform.cc:  reporter->Assign(types[2], TensorType(oshape, data->dtype));
-dyn/tensor/transform.cc:  reporter->Assign(types[2], TensorType(oshape, out_dtype));
-dyn/tensor/transform.cc:  reporter->Assign(types[1], TensorType(oshape, out_dtype));
-dyn/tensor/transform.cc:  reporter->Assign(types[4], TensorType(oshape, param->dtype));
-dyn/tensor/transform.cc:  reporter->Assign(types[2], TensorType(oshape, out_dtype));
-dyn/tensor/transform.cc:  reporter->Assign(types[4], TensorType(oshape, data->dtype));
-dyn/tensor/transform.cc:  reporter->Assign(types[4], TensorType(oshape, sparse_values->dtype));
-dyn/nn/upsampling.cc:  reporter->Assign(types[3], TensorType(oshape, data->dtype));
-dyn/nn/upsampling.cc:  reporter->Assign(types[4], TensorType(oshape, data->dtype));
-dyn/nn/pad.cc:  reporter->Assign(types[3], TensorType(oshape, data->dtype));
diff --git a/tests/python/relay/test_op_qnn_dequantize.py b/tests/python/relay/test_op_qnn_dequantize.py
index e7fb161a13cb..1833458fdb75 100644
--- a/tests/python/relay/test_op_qnn_dequantize.py
+++ b/tests/python/relay/test_op_qnn_dequantize.py
@@ -98,7 +98,7 @@ def test_channelwise_axis_1():
     }
 
     dequantize_test_driver(
-        in_dtype="uint8", quant_args=quant_args, in_data=data, verify_output_data=output, axis=1
+        in_dtype="uint8", quant_args=quant_args, in_data=data, verify_output_data=output, axis=-1
     )
 
 

From 7297198dc996b61a71d47584c6542575ae3076a5 Mon Sep 17 00:00:00 2001
From: Ubuntu
 <jwfromm@jwfromm-cpu-dev.itxhlkosmouevgkdrmwxfbs5qh.xx.internal.cloudapp.net>
Date: Tue, 9 Mar 2021 03:36:27 +0000
Subject: [PATCH 16/21] Lint formatting.

---
 python/tvm/relay/qnn/op/qnn.py           | 2 +-
 src/relay/qnn/op/dequantize.cc           | 2 +-
 src/relay/qnn/op/quantize.cc             | 2 +-
 src/relay/qnn/op/simulated_dequantize.cc | 2 +-
 src/relay/qnn/op/simulated_quantize.cc   | 2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/python/tvm/relay/qnn/op/qnn.py b/python/tvm/relay/qnn/op/qnn.py
index f9c84efa37a6..3cd4c6f9f5f4 100644
--- a/python/tvm/relay/qnn/op/qnn.py
+++ b/python/tvm/relay/qnn/op/qnn.py
@@ -22,7 +22,7 @@
 from tvm.relay.expr import Tuple, TupleWrapper
 from tvm.relay.op.nn.utils import get_pad_tuple2d
 from . import _make
-from tvm.topi.nn.qnn import *
+from tvm.topi.nn.qnn import SQNN_DTYPE_TO_CODE
 from ... import op as reg
 from ...op import OpPattern
 
diff --git a/src/relay/qnn/op/dequantize.cc b/src/relay/qnn/op/dequantize.cc
index d29106e28513..b0fe9356a758 100644
--- a/src/relay/qnn/op/dequantize.cc
+++ b/src/relay/qnn/op/dequantize.cc
@@ -94,7 +94,7 @@ Expr DequantizeLower(const Expr& input_tensor, const Expr& input_scale,
 
   // Wrap axis from negative to positive if needed.
   if (axis < 0) {
-    axis = ((int)n_dim) + axis;
+    axis = static_cast<int>(n_dim) + axis;
   }
 
   // Expand scale and zero point if the input tensor is channel quantized
diff --git a/src/relay/qnn/op/quantize.cc b/src/relay/qnn/op/quantize.cc
index 018c24ef3205..751abfc5ca81 100644
--- a/src/relay/qnn/op/quantize.cc
+++ b/src/relay/qnn/op/quantize.cc
@@ -99,7 +99,7 @@ Expr QuantizeLower(const Expr& input_tensor, const Expr& output_scale,
 
   // Wrap axis from negative to positive if needed.
   if (axis < 0) {
-    axis = ((int)n_dim) + axis;
+    axis = static_cast<int>(n_dim) + axis;
   }
 
   auto expanded_output_scale = output_scale;
diff --git a/src/relay/qnn/op/simulated_dequantize.cc b/src/relay/qnn/op/simulated_dequantize.cc
index 13f9dde288d1..fe69c746c7b2 100644
--- a/src/relay/qnn/op/simulated_dequantize.cc
+++ b/src/relay/qnn/op/simulated_dequantize.cc
@@ -41,7 +41,7 @@ bool SimulatedDequantizeRel(const Array<Type>& types, int num_inputs, const Attr
   const auto* data = types[0].as<TensorTypeNode>();
   const auto* dtype = types[1].as<TensorTypeNode>();
 
-  if ((data == nullptr) or (dtype == nullptr)) {
+  if ((data == nullptr) || (dtype == nullptr)) {
     return false;
   }
 
diff --git a/src/relay/qnn/op/simulated_quantize.cc b/src/relay/qnn/op/simulated_quantize.cc
index a00525a93191..2322ae1961b2 100644
--- a/src/relay/qnn/op/simulated_quantize.cc
+++ b/src/relay/qnn/op/simulated_quantize.cc
@@ -43,7 +43,7 @@ bool SimulatedQuantizeRel(const Array<Type>& types, int num_inputs, const Attrs&
   const auto* data = types[0].as<TensorTypeNode>();
   const auto* dtype = types[1].as<TensorTypeNode>();
 
-  if ((data == nullptr) or (dtype == nullptr)) {
+  if ((data == nullptr) || (dtype == nullptr)) {
     return false;
   }
 

From 46ed909330d3c341111cc4f30f0bfc8fee7953ff Mon Sep 17 00:00:00 2001
From: Ubuntu
 <jwfromm@jwfromm-cpu-dev.itxhlkosmouevgkdrmwxfbs5qh.xx.internal.cloudapp.net>
Date: Tue, 9 Mar 2021 03:51:01 +0000
Subject: [PATCH 17/21] Change import order to make lint happy.

---
 python/tvm/relay/qnn/op/qnn.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/tvm/relay/qnn/op/qnn.py b/python/tvm/relay/qnn/op/qnn.py
index 3cd4c6f9f5f4..5145bebc1905 100644
--- a/python/tvm/relay/qnn/op/qnn.py
+++ b/python/tvm/relay/qnn/op/qnn.py
@@ -21,8 +21,8 @@
 from tvm import relay
 from tvm.relay.expr import Tuple, TupleWrapper
 from tvm.relay.op.nn.utils import get_pad_tuple2d
-from . import _make
 from tvm.topi.nn.qnn import SQNN_DTYPE_TO_CODE
+from . import _make
 from ... import op as reg
 from ...op import OpPattern
 

From e83c8331a2e7182935cd32d74e1ab2facd0c74db Mon Sep 17 00:00:00 2001
From: Ubuntu
 <jwfromm@jwfromm-cpu-dev.itxhlkosmouevgkdrmwxfbs5qh.xx.internal.cloudapp.net>
Date: Tue, 9 Mar 2021 06:01:50 +0000
Subject: [PATCH 18/21] Fix pytest.

---
 .../python/relay/test_op_qnn_simulated_dequantize.py | 12 ++++++++----
 tests/python/relay/test_op_qnn_simulated_quantize.py | 12 ++++++++----
 2 files changed, 16 insertions(+), 8 deletions(-)

diff --git a/tests/python/relay/test_op_qnn_simulated_dequantize.py b/tests/python/relay/test_op_qnn_simulated_dequantize.py
index cf562abd9aca..0cc04e4998eb 100644
--- a/tests/python/relay/test_op_qnn_simulated_dequantize.py
+++ b/tests/python/relay/test_op_qnn_simulated_dequantize.py
@@ -62,7 +62,7 @@ def build_simulated_dequantize(input_data, scale, zp, dtype, axis=-1):
     return vm
 
 
-def test_simulated_dequantize_simple(dtype):
+def verify_simulated_dequantize_simple(dtype):
     data = np.random.uniform(low=-128, high=127, size=[2, 5]).astype(dtype)
     data_fp = data.astype("float32")
     scale_np = np.float32(0.5)
@@ -84,6 +84,12 @@ def test_simulated_dequantize_simple(dtype):
     np.testing.assert_equal(sim_dq_out.asnumpy(), dq_out)
 
 
+def test_simulated_dequantize():
+    verify_simulated_dequantize_simple("uint8")
+    verify_simulated_dequantize_simple("int8")
+    verify_simulated_dequantize_simple("int32")
+
+
 def test_dynamic_channels():
     # Compile simulated quantize once but support either per-channel or scalar params.
     data = np.random.uniform(low=-64, high=64, size=[2, 5]).astype("int8")
@@ -166,8 +172,6 @@ def test_dynamic_dtype():
 
 
 if __name__ == "__main__":
-    test_simulated_dequantize_simple("uint8")
-    test_simulated_dequantize_simple("int8")
-    test_simulated_dequantize_simple("int32")
+    test_simulated_dequantize()
     test_dynamic_channels()
     test_dynamic_dtype()
diff --git a/tests/python/relay/test_op_qnn_simulated_quantize.py b/tests/python/relay/test_op_qnn_simulated_quantize.py
index d6081baf333f..ee4ba209dcb8 100644
--- a/tests/python/relay/test_op_qnn_simulated_quantize.py
+++ b/tests/python/relay/test_op_qnn_simulated_quantize.py
@@ -63,7 +63,7 @@ def build_simulated_quantize(input_data, scale, zp, dtype, axis=-1):
     return vm
 
 
-def test_simulated_quantize_simple(dtype):
+def verify_simulated_quantize_simple(dtype):
     data = np.random.uniform(low=-128, high=127, size=[2, 5]).astype("float32")
     scale_np = np.float32(0.5)
     zp_np = np.int32(127)
@@ -85,6 +85,12 @@ def test_simulated_quantize_simple(dtype):
     np.testing.assert_equal(sim_q_out.asnumpy(), q_out)
 
 
+def test_simulated_quantize():
+    verify_simulated_quantize_simple("uint8")
+    verify_simulated_quantize_simple("int8")
+    verify_simulated_quantize_simple("int32")
+
+
 def test_dynamic_channels():
     # Compile simulated quantize once but support either per-channel or scalar params.
     data = np.random.uniform(low=-64, high=64, size=[2, 5]).astype("float32")
@@ -167,8 +173,6 @@ def test_dynamic_dtype():
 
 
 if __name__ == "__main__":
-    test_simulated_quantize_simple("uint8")
-    test_simulated_quantize_simple("int8")
-    test_simulated_quantize_simple("int32")
+    test_simulated_quantize()
     test_dynamic_channels()
     test_dynamic_dtype()

From a1b767d83f37a0b5cebbbf84764d1ab0a8d8612a Mon Sep 17 00:00:00 2001
From: Ubuntu
 <jwfromm@jwfromm-cpu-dev.itxhlkosmouevgkdrmwxfbs5qh.xx.internal.cloudapp.net>
Date: Tue, 9 Mar 2021 06:44:28 +0000
Subject: [PATCH 19/21] Directly return make call.

---
 src/relay/qnn/op/simulated_dequantize.cc | 4 +---
 src/relay/qnn/op/simulated_quantize.cc   | 4 +---
 2 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/src/relay/qnn/op/simulated_dequantize.cc b/src/relay/qnn/op/simulated_dequantize.cc
index fe69c746c7b2..e1fc47d700c9 100644
--- a/src/relay/qnn/op/simulated_dequantize.cc
+++ b/src/relay/qnn/op/simulated_dequantize.cc
@@ -55,9 +55,7 @@ Expr MakeSimulatedDequantize(Expr data, Expr in_dtype, Expr input_scale, Expr in
   auto attrs = make_object<DequantizeAttrs>();
   attrs->axis = axis;
   static const Op& op = Op::Get("qnn.simulated_dequantize");
-  auto out = Call(op, {data, in_dtype, input_scale, input_zero_point}, Attrs(attrs), {});
-
-  return out;
+  return Call(op, {data, in_dtype, input_scale, input_zero_point}, Attrs(attrs), {});
 }
 
 RELAY_REGISTER_OP("qnn.simulated_dequantize")
diff --git a/src/relay/qnn/op/simulated_quantize.cc b/src/relay/qnn/op/simulated_quantize.cc
index 2322ae1961b2..089762a6ade0 100644
--- a/src/relay/qnn/op/simulated_quantize.cc
+++ b/src/relay/qnn/op/simulated_quantize.cc
@@ -57,9 +57,7 @@ Expr MakeSimulatedQuantize(Expr data, Expr out_dtype, Expr output_scale, Expr ou
   auto attrs = make_object<SimulatedQuantizeAttrs>();
   attrs->axis = axis;
   static const Op& op = Op::Get("qnn.simulated_quantize");
-  auto out = Call(op, {data, out_dtype, output_scale, output_zero_point}, Attrs(attrs), {});
-
-  return out;
+  return Call(op, {data, out_dtype, output_scale, output_zero_point}, Attrs(attrs), {});
 }
 
 RELAY_REGISTER_OP("qnn.simulated_quantize")

From efc2e66cf690eac39a934edd85c1c8c6bffd3075 Mon Sep 17 00:00:00 2001
From: Ubuntu
 <jwfromm@jwfromm-cpu-dev.itxhlkosmouevgkdrmwxfbs5qh.xx.internal.cloudapp.net>
Date: Tue, 9 Mar 2021 22:13:47 +0000
Subject: [PATCH 20/21] Clarify disable mode for simulated qnn ops and fix
 typos.

---
 python/tvm/relay/qnn/op/qnn.py | 12 +++++++-----
 python/tvm/topi/nn/qnn.py      | 32 ++++++++++++++++++--------------
 2 files changed, 25 insertions(+), 19 deletions(-)

diff --git a/python/tvm/relay/qnn/op/qnn.py b/python/tvm/relay/qnn/op/qnn.py
index 5145bebc1905..f02f8227e14a 100644
--- a/python/tvm/relay/qnn/op/qnn.py
+++ b/python/tvm/relay/qnn/op/qnn.py
@@ -123,7 +123,8 @@ def quantize(data, output_scale, output_zero_point, axis=-1, out_dtype="int8"):
 def simulated_quantize(data, output_scale, output_zero_point, axis=-1, out_dtype="int8"):
     r"""Simulated Quantize op
     Mimics the quantize op but has more flexibility in valid inputs and always
-    outputs float32. This can be useful for calibrating or training a quantized network.
+    outputs the same type as the input. This can be useful for
+    calibrating or training a quantized network.
 
     Parameters
     ----------
@@ -179,14 +180,15 @@ def dequantize(data, input_scale, input_zero_point, axis=-1):
 
 
 def simulated_dequantize(data, input_scale, input_zero_point, axis=-1, in_dtype="int8"):
-    r"""Simulated Quantize op
-    Mimics the quantize op but has more flexibility in valid inputs and always
-    outputs float32. This can be useful for calibrating or training a quantized network.
+    r"""Simulated Dequantize op
+    Mimics the dequantize op but has more flexibility in valid inputs and always
+    outputs the same type as the input. This can be useful for calibrating or
+    training a quantized network.
 
     Parameters
     ----------
     data : tvm.relay.Expr
-        The input tensor to be quantized. Can be of type float32.
+        The input tensor to be dequantized.
     input_zero_point : tvm.relay.Expr
         The input zero_point.
     input_scale : tvm.relay.Expr
diff --git a/python/tvm/topi/nn/qnn.py b/python/tvm/topi/nn/qnn.py
index 656916cad99d..82fe359e03f7 100644
--- a/python/tvm/topi/nn/qnn.py
+++ b/python/tvm/topi/nn/qnn.py
@@ -18,13 +18,13 @@
 import tvm
 from tvm import te, tir, topi
 
-SQNN_FP32 = 0
+SQNN_DISABLE = 0
 SQNN_INT8 = 1
 SQNN_UINT8 = 2
 SQNN_INT32 = 3
 
 SQNN_DTYPE_TO_CODE = {
-    "float32": SQNN_FP32,
+    "disable": SQNN_DISABLE,
     "int8": SQNN_INT8,
     "uint8": SQNN_UINT8,
     "int32": SQNN_INT32,
@@ -35,7 +35,7 @@
 
 @tvm.te.tag_scope(tag=topi.tag.ELEMWISE)
 def simulated_quantize(data, out_dtype, output_scale=None, output_zero_point=None, axis=-1):
-    """Simulated QNN quantize operator that mimics QNN outputs in floating point. The benefit
+    """Simulated QNN quantize operator that mimics QNN outputs without changing datatype. The benefit
     of this operator over true QNN quantize is that this operator allows dynamic datatype
     selection and can operate on both per-channel and scalar scales and zero points while
     QNN quantize requires both of these to be fixed at compile time.
@@ -62,8 +62,8 @@ def simulated_quantize(data, out_dtype, output_scale=None, output_zero_point=Non
         The channel axis for quantization. Default value is -1 which corresponds to the last axis.
 
     """
-    # Since all simulated outputs are in float32, we can just return the input tensor for fp32.
-    def _compute_fp32(value, *indices):
+    # When disabled, just pass through the input values.
+    def _compute_pass_through(value, *indices):
         return value[indices]
 
     # Simulate quantization for arbitrary integer datatypes. The computation for all datatypes is:
@@ -89,13 +89,15 @@ def _compute_intn(dtype, value, *indices):
     # This allows the op to compile once but apply different quantization approaches
     # using a variable datatype input.
     def _dispatch_sim_quantize(value):
-        fp32_value = te.compute(data.shape, lambda *indices: _compute_fp32(value, *indices))
+        pass_through_value = te.compute(
+            data.shape, lambda *indices: _compute_pass_through(value, *indices)
+        )
         int8_value = te.compute(
             data.shape,
             lambda *indices: tir.if_then_else(
                 out_dtype.equal(SQNN_DTYPE_TO_CODE["int8"]),
                 _compute_intn("int8", value, *indices),
-                fp32_value[indices],
+                pass_through_value[indices],
             ),
         )
         uint8_value = te.compute(
@@ -122,10 +124,10 @@ def _dispatch_sim_quantize(value):
 
 @tvm.te.tag_scope(tag=topi.tag.ELEMWISE)
 def simulated_dequantize(data, in_dtype, input_scale=None, input_zero_point=None, axis=-1):
-    """Simulated QNN dequantize operator that mimics QNN outputs in floating point. The benefit
-    of this operator over true QNN quantize is that this operator allows dynamic datatype
+    """Simulated QNN dequantize operator that mimics QNN outputs without changing datatype. The benefit
+    of this operator over true QNN dequantize is that this operator allows dynamic datatype
     selection and can operate on both per-channel and scalar scales and zero points while
-    QNN quantize requires both of these to be fixed at compile time.
+    QNN dequantize requires both of these to be fixed at compile time.
 
     Parameters
     ----------
@@ -149,8 +151,8 @@ def simulated_dequantize(data, in_dtype, input_scale=None, input_zero_point=None
         The channel axis for quantization. Default value is -1 which corresponds to the last axis.
 
     """
-    # Since all simulated inputs are in float32, we can just return the input tensor for fp32.
-    def _compute_fp32(value, *indices):
+    # When disabled simply return the input tensor.
+    def _compute_pass_through(value, *indices):
         return value[indices]
 
     # Simulate dequantization for arbitrary integer datatypes. The computation for all datatypes is:
@@ -166,7 +168,9 @@ def _compute_intn(value, *indices):
     # This allows the op to compile once but apply different quantization approaches
     # using a variable datatype input.
     def _dispatch_sim_dequantize(value):
-        fp32_value = te.compute(data.shape, lambda *indices: _compute_fp32(value, *indices))
+        pass_through_value = te.compute(
+            data.shape, lambda *indices: _compute_pass_through(value, *indices)
+        )
         intn_condition = tvm.te.any(
             in_dtype.equal(SQNN_DTYPE_TO_CODE["int8"]),
             in_dtype.equal(SQNN_DTYPE_TO_CODE["uint8"]),
@@ -177,7 +181,7 @@ def _dispatch_sim_dequantize(value):
             lambda *indices: tir.if_then_else(
                 intn_condition,
                 _compute_intn(value, *indices),
-                fp32_value[indices],
+                pass_through_value[indices],
             ),
         )
 

From 79a03dda9d0650b8080511ac7f26549170fe169b Mon Sep 17 00:00:00 2001
From: Ubuntu
 <jwfromm@jwfromm-cpu-dev.itxhlkosmouevgkdrmwxfbs5qh.xx.internal.cloudapp.net>
Date: Tue, 9 Mar 2021 22:32:01 +0000
Subject: [PATCH 21/21] Line too long oops.

---
 python/tvm/topi/nn/qnn.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/python/tvm/topi/nn/qnn.py b/python/tvm/topi/nn/qnn.py
index 82fe359e03f7..caed28580037 100644
--- a/python/tvm/topi/nn/qnn.py
+++ b/python/tvm/topi/nn/qnn.py
@@ -35,9 +35,9 @@
 
 @tvm.te.tag_scope(tag=topi.tag.ELEMWISE)
 def simulated_quantize(data, out_dtype, output_scale=None, output_zero_point=None, axis=-1):
-    """Simulated QNN quantize operator that mimics QNN outputs without changing datatype. The benefit
-    of this operator over true QNN quantize is that this operator allows dynamic datatype
-    selection and can operate on both per-channel and scalar scales and zero points while
+    """Simulated QNN quantize operator that mimics QNN outputs without changing datatype.
+    The benefit of this operator over true QNN quantize is that this operator allows dynamic
+    datatype selection and can operate on both per-channel and scalar scales and zero points while
     QNN quantize requires both of these to be fixed at compile time.
 
     Parameters
@@ -124,9 +124,9 @@ def _dispatch_sim_quantize(value):
 
 @tvm.te.tag_scope(tag=topi.tag.ELEMWISE)
 def simulated_dequantize(data, in_dtype, input_scale=None, input_zero_point=None, axis=-1):
-    """Simulated QNN dequantize operator that mimics QNN outputs without changing datatype. The benefit
-    of this operator over true QNN dequantize is that this operator allows dynamic datatype
-    selection and can operate on both per-channel and scalar scales and zero points while
+    """Simulated QNN dequantize operator that mimics QNN outputs without changing datatype.
+    The benefit of this operator over true QNN dequantize is that this operator allows dynamic
+    datatype selection and can operate on both per-channel and scalar scales and zero points while
     QNN dequantize requires both of these to be fixed at compile time.
 
     Parameters