From 8d9e317053b474dd12b2c76a3abf8c5a4a9fbd2f Mon Sep 17 00:00:00 2001
From: "shoubhikbhatti@gmail.com" <shoubhikbhatti@gmail.coml>
Date: Mon, 8 Jul 2019 12:12:40 -0700
Subject: [PATCH 1/8] [Relay] [Quantization] WIP - Common files for the
 qauntization work.

---
 include/tvm/relay/attrs/qnn.h         |  37 +++++++
 include/tvm/relay/quantize_util.h     | 139 ++++++++++++++++++++++++++
 python/tvm/relay/op/__init__.py       |   1 +
 python/tvm/relay/op/qnn/__init__.py   |  20 ++++
 python/tvm/relay/op/qnn/_make.py      |  20 ++++
 python/tvm/relay/op/qnn/qnn.py        |  21 ++++
 python/tvm/relay/quantize/__init__.py |   1 +
 python/tvm/relay/quantize/rewrite.py  |  38 +++++++
 src/relay/pass/pattern_util.h         |  20 ++++
 src/relay/pass/quantize_rewrite.cc    |  38 +++++++
 10 files changed, 335 insertions(+)
 create mode 100644 include/tvm/relay/attrs/qnn.h
 create mode 100644 include/tvm/relay/quantize_util.h
 create mode 100644 python/tvm/relay/op/qnn/__init__.py
 create mode 100644 python/tvm/relay/op/qnn/_make.py
 create mode 100644 python/tvm/relay/op/qnn/qnn.py
 create mode 100644 python/tvm/relay/quantize/rewrite.py
 create mode 100644 src/relay/pass/quantize_rewrite.cc
diff --git a/include/tvm/relay/attrs/qnn.h b/include/tvm/relay/attrs/qnn.h
new file mode 100644
index 000000000000..c45a33c786f7
--- /dev/null
+++ b/include/tvm/relay/attrs/qnn.h
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file tvm/relay/attrs/nn.h
+ * \brief Auxiliary attributes for nn operators.
+ */
+#ifndef TVM_RELAY_ATTRS_NN_QUANTIZE_H_
+#define TVM_RELAY_ATTRS_NN_QUANTIZE_H_
+
+#include <tvm/attrs.h>
+#include <string>
+
+namespace tvm {
+namespace relay {
+
+
+
+}  // namespace relay
+}  // namespace tvm
+#endif  // TVM_RELAY_ATTRS_NN_QUANTIZE_H_
diff --git a/include/tvm/relay/quantize_util.h b/include/tvm/relay/quantize_util.h
new file mode 100644
index 000000000000..bb054fb8fb65
--- /dev/null
+++ b/include/tvm/relay/quantize_util.h
@@ -0,0 +1,139 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file nnvm/compiler/quantize_util.h
+ * \brief Utility methods needs for quantized ops that can be shared
+ */
+
+#ifndef TVM_QUANTIZE_UTIL_H
+#define TVM_QUANTIZE_UTIL_H
+
+#include <tvm/expr.h>
+#include "./base.h"
+
+namespace tvm {
+namespace relay {
+
+inline bool is_Int8(const DataType& dtype) {
+  return dtype == Int(8);
+}
+
+inline bool is_UInt8(const DataType& dtype) {
+  return dtype == UInt(8);
+}
+
+
+inline bool is_Int16(const DataType& dtype) {
+  return dtype == Int(16);
+}
+
+inline bool is_UInt16(const DataType& dtype) {
+  return dtype == UInt(16);
+}
+
+inline bool is_Int32(const DataType& dtype) {
+  return dtype == Int(32);
+}
+
+inline bool is_UInt32(const DataType& dtype) {
+  return dtype == UInt(32);
+}
+
+
+
+inline bool is_Float32(const DataType& dtype) {
+  return dtype == Float(32);
+}
+
+inline bool is_quantized_type(const DataType& dtype) {
+  return is_Int8(dtype) || is_UInt8(dtype)
+      || is_Int16(dtype) || is_UInt16(dtype);
+}
+
+enum class QuantizeOpType : uint8_t {
+  Quantize_Requantize,
+  Dequantize,
+  Requantize
+};
+
+inline bool is_valid_quantized_op_input_type(const QuantizeOpType &op_type, const DataType &in_dtype) {
+  switch(op_type) {
+    case QuantizeOpType::Quantize_Requantize:
+      return is_Float32(in_dtype) || is_quantized_type(in_dtype);
+    case QuantizeOpType ::Dequantize:
+      return is_quantized_type(in_dtype);
+    case QuantizeOpType ::Requantize:
+      return is_Int16(in_dtype) || is_Int32(in_dtype);
+    default:
+      return false;
+  }
+}
+
+inline bool is_valid_quantized_op_output_type(const QuantizeOpType &op_type, const DataType &in_dtype) {
+  switch(op_type) {
+    case QuantizeOpType::Quantize_Requantize:
+      return is_quantized_type(in_dtype);
+    case QuantizeOpType::Dequantize:
+      return is_Float32(in_dtype);
+    default:
+      return false;
+  }
+}
+
+inline const int32_t get_qmin(const DataType&  dtype) {
+  if (is_Int8(dtype)) {
+    return std::numeric_limits<int8_t>::min();
+  } else if (is_UInt8(dtype)) {
+    return std::numeric_limits<uint8_t>::min();
+  } else if (is_Int16(dtype)) {
+    return std::numeric_limits<int16_t>::min();
+  } else if (is_UInt16(dtype)) {
+    return std::numeric_limits<uint16_t>::min();
+  } else if (is_Int32(dtype)) {
+    return std::numeric_limits<int32_t>::min();
+  } else if (is_UInt32(dtype)) {
+    return std::numeric_limits<uint32_t>::min();
+  }
+  LOG(FATAL) << "Type not supported\n";
+  return -1;
+}
+
+
+inline const int32_t get_qmax(const DataType&  dtype) {
+  if (is_Int8(dtype)) {
+    return std::numeric_limits<int8_t>::max();
+  } else if (is_UInt8(dtype)) {
+    return std::numeric_limits<uint8_t>::max();
+  } else if (is_Int16(dtype)) {
+    return std::numeric_limits<int16_t>::max();
+  } else if (is_UInt16(dtype)) {
+    return std::numeric_limits<uint16_t>::max();
+  } else if (is_Int32(dtype)) {
+    return std::numeric_limits<int32_t>::max();
+  } else if (is_UInt32(dtype)) {
+    return std::numeric_limits<uint32_t>::max();
+  }
+  LOG(FATAL) << "Type not supported\n";
+  return -1;
+}
+
+} // namespace relay
+} // namespace tvm
+#endif //TVM_QUANTIZE_UTIL_H
diff --git a/python/tvm/relay/op/__init__.py b/python/tvm/relay/op/__init__.py
index a27ab1dc50ff..1d634ef18fc0 100644
--- a/python/tvm/relay/op/__init__.py
+++ b/python/tvm/relay/op/__init__.py
@@ -26,6 +26,7 @@
 from .transform import *
 from .algorithm import *
 from . import nn
+from . import qnn
 from . import annotation
 from . import image
 from . import vision
diff --git a/python/tvm/relay/op/qnn/__init__.py b/python/tvm/relay/op/qnn/__init__.py
new file mode 100644
index 000000000000..aef02300ab63
--- /dev/null
+++ b/python/tvm/relay/op/qnn/__init__.py
@@ -0,0 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# pylint: disable=wildcard-import
+"""Neural network related operators."""
+from __future__ import absolute_import as _abs
+from .qnn import *
\ No newline at end of file
diff --git a/python/tvm/relay/op/qnn/_make.py b/python/tvm/relay/op/qnn/_make.py
new file mode 100644
index 000000000000..b1695629b8f9
--- /dev/null
+++ b/python/tvm/relay/op/qnn/_make.py
@@ -0,0 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""Constructor APIs"""
+from ...._ffi.function import _init_api
+
+_init_api("relay.op.qnn._make", __name__)
diff --git a/python/tvm/relay/op/qnn/qnn.py b/python/tvm/relay/op/qnn/qnn.py
new file mode 100644
index 000000000000..008e6cbb7f80
--- /dev/null
+++ b/python/tvm/relay/op/qnn/qnn.py
@@ -0,0 +1,21 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#pylint: disable=invalid-name, too-many-lines
+"""Neural network operations."""
+from __future__ import absolute_import as _abs
+from . import _make
+
diff --git a/python/tvm/relay/quantize/__init__.py b/python/tvm/relay/quantize/__init__.py
index 45bb62e66853..8da4e7953566 100644
--- a/python/tvm/relay/quantize/__init__.py
+++ b/python/tvm/relay/quantize/__init__.py
@@ -19,4 +19,5 @@
 from __future__ import absolute_import as _abs
 
 from .quantize import *
+from .rewrite import *
 from ._annotate import register_annotate_function
diff --git a/python/tvm/relay/quantize/rewrite.py b/python/tvm/relay/quantize/rewrite.py
new file mode 100644
index 000000000000..89429e522115
--- /dev/null
+++ b/python/tvm/relay/quantize/rewrite.py
@@ -0,0 +1,38 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#pylint: disable=unused-argument
+"""Automatic quantization toolkit."""
+from __future__ import absolute_import
+
+from . import _quantize
+from .. import expr as _expr
+
+def rewrite(expr):
+    """
+    Rewrites the high-level quantized ops into low-level exisiting Relay ops.
+
+    Parameters
+    ----------
+    expr : tvm.relay.Expr
+        The input expression.
+
+    Returns
+    -------
+    expr : tvm.relay.Expr
+        The output expression.
+    """
+    return _quantize.rewrite(expr)
diff --git a/src/relay/pass/pattern_util.h b/src/relay/pass/pattern_util.h
index 5c303905968e..7249d1d4c086 100644
--- a/src/relay/pass/pattern_util.h
+++ b/src/relay/pass/pattern_util.h
@@ -373,6 +373,26 @@ inline Expr Copy(Expr data) {
 }
 
 
+inline Expr Where(const Expr& condition, const Expr& x, const Expr& y) {
+  static const Op& op = Op::Get("where");
+  return CallNode::make(op, {condition, x, y});
+}
+
+inline Expr GreaterEqual(const Expr& lhs, const Expr& rhs) {
+  static const Op& op = Op::Get("greater_equal");
+  return CallNode::make(op, {lhs, rhs}, Attrs(), {});
+}
+
+inline Expr Full(Expr fill_value,
+              Array<IndexExpr> shape,
+              DataType dtype) {
+  auto attrs = make_node<InitOpAttrs>();
+  attrs->shape = std::move(shape);
+  attrs->dtype = std::move(dtype);
+  static const Op& op = Op::Get("full");
+  return CallNode::make(op, {fill_value}, Attrs(attrs), {});
+}
+
 Expr MakeConcatenate(Expr data, int axis);
 
 Expr MakeStridedSlice(Expr data, Array<Integer> begin, Array<Integer> end, Array<Integer> strides);
diff --git a/src/relay/pass/quantize_rewrite.cc b/src/relay/pass/quantize_rewrite.cc
new file mode 100644
index 000000000000..925c516b41ed
--- /dev/null
+++ b/src/relay/pass/quantize_rewrite.cc
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ *  Copyright (c) 2018 by Contributors
+ * \file quantize_rewrite.cc
+ * \brief Lower quantized ops to exisiting Relay ops.
+ */
+
+#include <tvm/relay/analysis.h>
+#include <tvm/relay/transform.h>
+#include <tvm/relay/op_attr_types.h>
+#include <tvm/relay/quantize_util.h>
+#include <tvm/relay/attrs/qnn.h>
+#include "pattern_util.h"
+
+namespace tvm {
+namespace relay {
+
+
+}  // namespace relay
+}  // namespace tvm

From 7081694a266673797c093c53e5fe9f0c77b6c6e7 Mon Sep 17 00:00:00 2001
From: "shoubhikbhatti@gmail.com" <shoubhikbhatti@gmail.coml>
Date: Mon, 8 Jul 2019 13:45:14 -0700
Subject: [PATCH 2/8] [Relay] [Quantization] WIP - Adding the tests file.

---
 tests/python/unittest/test_quantized_ops.py | 41 +++++++++++++++++++++
 1 file changed, 41 insertions(+)
 create mode 100644 tests/python/unittest/test_quantized_ops.py

diff --git a/tests/python/unittest/test_quantized_ops.py b/tests/python/unittest/test_quantized_ops.py
new file mode 100644
index 000000000000..11082f774c59
--- /dev/null
+++ b/tests/python/unittest/test_quantized_ops.py
@@ -0,0 +1,41 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import tvm
+import numpy as np
+from tvm import relay
+from tvm.relay.testing import create_workload
+from tvm.contrib import graph_runtime
+
+# TODOs for janimesh before submitting this patch.
+# TODO - Add tests for int8 input/weight dtype
+# TODO - opt_level=0 fails mostly due to fusion.
+# TODO - opt_level=3 fails, likely culprit kernel layout for int8
+# compute. Work with Rankyung to see if this is the culprit. Handle
+# it in a separate patch.
+
+def run_infer_type(expr):
+    mod = relay.Module.from_expr(expr)
+    mod = relay.transform.InferType()(mod)
+    entry = mod["main"]
+    return entry if isinstance(expr, relay.Function) else entry.body
+
+
+
+if __name__ == "__main__":
+    # add your tests here.
+    pass

From c089ebcdf4b13f98b776bb0213779f6783fa6743 Mon Sep 17 00:00:00 2001
From: "shoubhikbhatti@gmail.com" <shoubhikbhatti@gmail.coml>
Date: Mon, 8 Jul 2019 13:43:24 -0700
Subject: [PATCH 3/8] [Relay] [Quantization] WIP - This is the continuation of
 pull request [#3367](https://github.com/dmlc/tvm/pull/3367)

In this PR I want to discuss the design and implementation of the
- Quantize op -> FP32 to i8/u8
- Dequantize Op -> i8/u8 -> fp32

I have added test cases to verify the correctness of the ops.
---
 include/tvm/relay/attrs/qnn.h               | 31 +++++++
 include/tvm/relay/quantize_util.h           |  8 +-
 python/tvm/relay/op/qnn/qnn.py              | 45 ++++++++++
 src/relay/op/nn/dequantize.cc               | 79 +++++++++++++++++
 src/relay/op/nn/quantize_op.cc              | 94 +++++++++++++++++++++
 src/relay/pass/quantize_rewrite.cc          | 46 +++++++++-
 tests/python/unittest/test_quantized_ops.py | 94 ++++++++++++++++++++-
 7 files changed, 389 insertions(+), 8 deletions(-)
 create mode 100644 src/relay/op/nn/dequantize.cc
 create mode 100644 src/relay/op/nn/quantize_op.cc

diff --git a/include/tvm/relay/attrs/qnn.h b/include/tvm/relay/attrs/qnn.h
index c45a33c786f7..30ec16fd21d6 100644
--- a/include/tvm/relay/attrs/qnn.h
+++ b/include/tvm/relay/attrs/qnn.h
@@ -30,6 +30,37 @@
 namespace tvm {
 namespace relay {
 
+struct QuantizeAttrs : public tvm::AttrsNode<QuantizeAttrs> {
+  int32_t output_zero_point;
+  double output_scale;
+  DataType out_dtype;
+
+  TVM_DECLARE_ATTRS(QuantizeAttrs, "relay.attrs.QuantizeAttrs") {
+    TVM_ATTR_FIELD(out_dtype)
+        .describe("Output data type, can be one of [int8 or uint8].");
+
+    TVM_ATTR_FIELD(output_zero_point)
+        .describe("The zero_point for the activation of this op.");
+
+    TVM_ATTR_FIELD(output_scale)
+        .describe("The scale for the activation of this op.");
+  }
+};
+
+struct DequantizeAttrs : public tvm::AttrsNode<DequantizeAttrs> {
+  int32_t input_zero_point;
+  double input_scale;
+
+  TVM_DECLARE_ATTRS(QuantizeAttrs, "relay.attrs.QuantizeAttrs") {
+
+    TVM_ATTR_FIELD(input_zero_point)
+        .describe("The zero_point for the input tensor of this op.");
+
+    TVM_ATTR_FIELD(input_scale)
+        .describe("The scale for the input tensor of this op.");
+  }
+};
+
 
 
 }  // namespace relay
diff --git a/include/tvm/relay/quantize_util.h b/include/tvm/relay/quantize_util.h
index bb054fb8fb65..312262c8cd67 100644
--- a/include/tvm/relay/quantize_util.h
+++ b/include/tvm/relay/quantize_util.h
@@ -68,15 +68,15 @@ inline bool is_quantized_type(const DataType& dtype) {
 }
 
 enum class QuantizeOpType : uint8_t {
-  Quantize_Requantize,
+  Quantize,
   Dequantize,
   Requantize
 };
 
 inline bool is_valid_quantized_op_input_type(const QuantizeOpType &op_type, const DataType &in_dtype) {
   switch(op_type) {
-    case QuantizeOpType::Quantize_Requantize:
-      return is_Float32(in_dtype) || is_quantized_type(in_dtype);
+    case QuantizeOpType::Quantize:
+      return is_Float32(in_dtype);
     case QuantizeOpType ::Dequantize:
       return is_quantized_type(in_dtype);
     case QuantizeOpType ::Requantize:
@@ -88,7 +88,7 @@ inline bool is_valid_quantized_op_input_type(const QuantizeOpType &op_type, cons
 
 inline bool is_valid_quantized_op_output_type(const QuantizeOpType &op_type, const DataType &in_dtype) {
   switch(op_type) {
-    case QuantizeOpType::Quantize_Requantize:
+    case QuantizeOpType::Quantize:
       return is_quantized_type(in_dtype);
     case QuantizeOpType::Dequantize:
       return is_Float32(in_dtype);
diff --git a/python/tvm/relay/op/qnn/qnn.py b/python/tvm/relay/op/qnn/qnn.py
index 008e6cbb7f80..810e2a8fad00 100644
--- a/python/tvm/relay/op/qnn/qnn.py
+++ b/python/tvm/relay/op/qnn/qnn.py
@@ -19,3 +19,48 @@
 from __future__ import absolute_import as _abs
 from . import _make
 
+def quantize(input_data, output_zero_point, output_scale, out_dtype='int8'):
+    r""" Quantize op
+     This operator takes floating point 32 or quantized int8 and unit8 as input and produces
+    quantized int8 or unit8 as output. The output shape is the same as input shape. The input
+    tensor can be of any shape.
+     ..math::
+            \mbox{out}[x] =
+                \mbox{clamp(round(input_tensor/output_scale) + output_zero_point); out_dtype::min, out_dtype::max}
+     Parameters
+    ----------
+    input_data : tvm.relay.Expr
+        The input tensor to be quantized. Can be of type [float32, int8, uint8].
+    output_zero_point :
+        The output zero_point.
+    output_scale:
+        The output scale.
+    input_dtype:
+        The data type of the input tensor. Can be [int8, uint8, float32]
+    Returns
+    -------
+    result : tvm.relay.Expr
+        The computed result.
+    """
+    return _make.quantize(input_data, output_zero_point, output_scale, out_dtype)
+
+
+def dequantize(input_data, input_zero_point, input_scale):
+    r""" Dequantize op
+     This operator takes quantized int8 and unit8 as input and produces
+    dequantized float32 as output. The output shape is the same as input shape. The input
+    tensor can be of any shape.
+     Parameters
+    ----------
+    input_data : tvm.relay.Expr
+        The input tensor to be quantized. Can be of type [float32, int8, uint8].
+    input_zero_point :
+        The output zero_point.
+    input_scale:
+        The output scale.
+    Returns
+    -------
+    result : tvm.relay.Expr
+        The computed result.
+    """
+    return _make.dequantize(input_data, input_zero_point, input_scale)
\ No newline at end of file
diff --git a/src/relay/op/nn/dequantize.cc b/src/relay/op/nn/dequantize.cc
new file mode 100644
index 000000000000..7cbab466830b
--- /dev/null
+++ b/src/relay/op/nn/dequantize.cc
@@ -0,0 +1,79 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ *  Copyright (c) 2018 by Contributors
+ * \file quantize.cpp
+ * \brief Quantize and requantize operator
+ */
+
+//#include <tvm/data_layout.h>
+//#include <tvm/relay/op_attr_types.h>
+//#include <tvm/relay/pass.h>
+#include <tvm/relay/op.h>
+#include <tvm/relay/attrs/qnn.h>
+#include <tvm/relay/quantize_util.h>
+
+namespace tvm {
+namespace relay {
+
+TVM_REGISTER_NODE_TYPE(DequantizeAttrs);
+
+bool DequantizeRel(const Array<Type>& types,
+                 int num_inputs,
+                 const Attrs& attrs,
+                 const TypeReporter& reporter) {
+  CHECK_EQ(types.size(), 2);
+  const auto* data = types[0].as<TensorTypeNode>();
+  const auto input_dtype = data->dtype;
+  CHECK(is_valid_quantized_op_input_type(QuantizeOpType::Dequantize, input_dtype))
+    << "Input type should be one of the quantized types [unit8, int8] but was " <<  input_dtype;
+  const Array<tvm::Expr> oshape = data->shape;
+  // assign output type
+  reporter->Assign(types[1], TensorTypeNode::make(oshape, Float(32)));
+  return true;
+}
+
+Expr MakeDequantize(Expr data,
+                  int32_t input_zero_point,
+                  double input_scale) {
+  auto attrs = make_node<DequantizeAttrs>();
+  attrs->input_scale = input_scale;
+  attrs->input_zero_point = input_zero_point;
+  static const Op& op = Op::Get("qnn.dequantize");
+  return CallNode::make(op, {data}, Attrs(attrs), {});
+}
+
+RELAY_REGISTER_OP("qnn.dequantize")
+    .describe(R"code(Quantizes the input and produces quantized output.
+
+The input is always quantized (int8, uint8) and will be converted to float32 given input scale and shift.
+- **data**: Quantized tensor of any shape to dequantize. The input data can be of floating point
+)code" TVM_ADD_FILELINE)
+.set_attrs_type_key("relay.attrs.DequantizeAttrs")
+.set_num_inputs(1)
+.add_argument("data", "Tensor", "The tensor to dequantize.")
+.set_support_level(10)
+.add_type_rel("Dequantize", DequantizeRel);
+
+TVM_REGISTER_API("relay.op.qnn._make.dequantize")
+.set_body_typed(MakeDequantize);
+
+}  // namespace relay
+}  // namespace tvm
diff --git a/src/relay/op/nn/quantize_op.cc b/src/relay/op/nn/quantize_op.cc
new file mode 100644
index 000000000000..51308e86a0da
--- /dev/null
+++ b/src/relay/op/nn/quantize_op.cc
@@ -0,0 +1,94 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ *  Copyright (c) 2018 by Contributors
+ * \file quantize.cpp
+ * \brief Quantize and requantize operator
+ */
+
+//#include <tvm/data_layout.h>
+//#include <tvm/relay/op_attr_types.h>
+//#include <tvm/relay/pass.h>
+//#include <tvm/relay/attrs/nn_quantize.h>
+//#include <tvm/relay/quantize_util.h>
+#include <tvm/relay/op.h>
+#include <tvm/relay/attrs/qnn.h>
+#include <tvm/relay/quantize_util.h>
+
+namespace tvm {
+namespace relay {
+
+TVM_REGISTER_NODE_TYPE(QuantizeAttrs);
+
+bool QuantizeRel(const Array<Type>& types,
+                 int num_inputs,
+                 const Attrs& attrs,
+                 const TypeReporter& reporter) {
+  CHECK_EQ(types.size(), 2);
+  const auto* data = types[0].as<TensorTypeNode>();
+  const auto input_dtype = data->dtype;
+  CHECK(is_valid_quantized_op_input_type(QuantizeOpType::Quantize, input_dtype))
+      << "Input type should be one of float32 but was " <<  input_dtype;
+  const auto* param = attrs.as<QuantizeAttrs>();
+  const Array<tvm::Expr> oshape = data->shape;
+  const DataType out_dtype = param->out_dtype;
+  CHECK(is_valid_quantized_op_output_type(QuantizeOpType::Quantize, out_dtype))
+      << "Output type should be one of [int8, unit8 ] but was " << out_dtype;
+  // assign output type
+  reporter->Assign(types[1], TensorTypeNode::make(oshape, out_dtype));
+  return true;
+}
+
+Expr MakeQuantize(Expr data,
+                  int32_t output_zero_point,
+                  double output_scale,
+                  DataType out_dtype) {
+  auto attrs = make_node<QuantizeAttrs>();
+  attrs->output_scale = output_scale;
+  attrs->output_zero_point = output_zero_point;
+  attrs->out_dtype = std::move(out_dtype);
+  static const Op& op = Op::Get("qnn.quantize");
+  return CallNode::make(op, {data}, Attrs(attrs), {});
+}
+
+RELAY_REGISTER_OP("qnn.quantize")
+.describe(R"code(Quantizes the input and produces quantized output.
+
+The input can be either float or quantized(int8, unit8). If the input is float,
+this op takes scale and zero point and quantize the float value to
+quantized output, in int8 or uint8 format. If the input is quantized value,
+the op requantize the input (of a certain type, with a given scale and zero
+point) to the output of the same or different type with a same or different
+scale and zero point.
+
+- **data**: Tensor of any shape to quantize. The input data can be of floating point
+            or quantized.
+)code" TVM_ADD_FILELINE)
+.set_attrs_type_key("relay.attrs.QuantizeAttrs")
+.set_num_inputs(1)
+.add_argument("data", "Tensor", "The tensor to quantize.")
+.set_support_level(10)
+.add_type_rel("Quantize", QuantizeRel);
+
+TVM_REGISTER_API("relay.op.qnn._make.quantize")
+.set_body_typed(MakeQuantize);
+
+}  // namespace relay
+}  // namespace tvm
\ No newline at end of file
diff --git a/src/relay/pass/quantize_rewrite.cc b/src/relay/pass/quantize_rewrite.cc
index 925c516b41ed..b602294f8c4a 100644
--- a/src/relay/pass/quantize_rewrite.cc
+++ b/src/relay/pass/quantize_rewrite.cc
@@ -24,15 +24,57 @@
  */
 
 #include <tvm/relay/analysis.h>
-#include <tvm/relay/transform.h>
+#include <tvm/relay/attrs/qnn.h>
 #include <tvm/relay/op_attr_types.h>
 #include <tvm/relay/quantize_util.h>
-#include <tvm/relay/attrs/qnn.h>
+#include <tvm/relay/transform.h>
 #include "pattern_util.h"
 
 namespace tvm {
 namespace relay {
 
+Expr QuantizeForwardRewrite(const Call& ref_call, const Array<Expr>& new_args, const NodeRef& ctx) {
+  CHECK_EQ(new_args.size(), 1);
+  Expr data = new_args[0];
+  const auto* attrs = ref_call->attrs.as<QuantizeAttrs>();
+  const auto out_dtype = attrs->out_dtype;
+  const auto* new_tensor = data.operator->()->checked_type().as<TensorTypeNode>();
+  CHECK(new_tensor) << "Expected TensorTypeNode but was " << data.operator->()->checked_type();
+  const auto output_zero_point = MakeConstantScalar(Int(32), attrs->output_zero_point);
+  const auto scale = MakeConstantScalar(Float(32), attrs->output_scale);
+  const int32_t min_val = get_qmin(out_dtype);
+  const int32_t max_val = get_qmax(out_dtype);
+  auto scale_data = Cast(Round(Divide(data, scale)), Int(32));
+  // we are trying to do - std::min(std::max(unclamped, min_val), max_val);
+  auto unclamped = Cast(Add(scale_data, output_zero_point), out_dtype);
+  auto clamped_output = Clip(unclamped, min_val, max_val);
+  return clamped_output;
+}
+
+RELAY_REGISTER_OP("qnn.quantize")
+    .set_attr<FForwardRewrite>("FQuantizeForwardRewrite", QuantizeForwardRewrite);
+
+Expr DequantizeForwardRewrite(const Call& ref_call, const Array<Expr>& new_args,
+                              const NodeRef& ctx) {
+  CHECK_EQ(new_args.size(), 1);
+  Expr data = new_args[0];
+  const auto* attrs = ref_call->attrs.as<DequantizeAttrs>();
+  const auto* new_tensor = data.operator->()->checked_type().as<TensorTypeNode>();
+  CHECK(new_tensor) << "Expected TensorTypeNode but was " << data.operator->()->checked_type();
+  const auto input_zero_point = MakeConstantScalar(Int(32), attrs->input_zero_point);
+  const auto input_scale = MakeConstantScalar(Float(32), attrs->input_scale);
+  auto shift = Subtract(Cast(data, Int(32)), input_zero_point);
+  auto scale = Multiply(Cast(shift, Float(32)), input_scale);
+  return scale;
+}
+
+RELAY_REGISTER_OP("qnn.dequantize")
+    .set_attr<FForwardRewrite>("FQuantizeForwardRewrite", DequantizeForwardRewrite);
+
+TVM_REGISTER_API("relay._quantize.rewrite").set_body_typed<Expr(Expr)>([](const Expr& e) {
+  Expr ret = ForwardRewrite(e, "FQuantizeForwardRewrite", nullptr, nullptr);
+  return ret;
+});
 
 }  // namespace relay
 }  // namespace tvm
diff --git a/tests/python/unittest/test_quantized_ops.py b/tests/python/unittest/test_quantized_ops.py
index 11082f774c59..272f22aecb45 100644
--- a/tests/python/unittest/test_quantized_ops.py
+++ b/tests/python/unittest/test_quantized_ops.py
@@ -34,8 +34,98 @@ def run_infer_type(expr):
     entry = mod["main"]
     return entry if isinstance(expr, relay.Function) else entry.body
 
+def test_quantize_op():
 
+    def quantize_test_driver(in_dtype, quant_args, out_dtype, in_data, verify_output_data):
+        shape = in_data.shape
+        input_data = relay.var("input_data", shape=shape, dtype=in_dtype)
+        output_zero_point = quant_args['out_zero_point']
+        output_scale = quant_args['out_scale']
+        quantized_output = relay.op.qnn.quantize(input_data, output_zero_point=output_zero_point,
+                                                          output_scale=output_scale, out_dtype=out_dtype)
+        func = relay.Function(relay.analysis.free_vars(quantized_output), quantized_output)
+        func = run_infer_type(func)
+        func = relay.quantize.rewrite(func)
+        func = run_infer_type(func)
+        graph, lib, params = relay.build(func, "llvm", params=None)
+        mod = graph_runtime.create(graph, lib, ctx=tvm.cpu(0))
+        mod.set_input(input_data=in_data)
+        mod.run()
+        res = mod.get_output(0).asnumpy()
+        np.testing.assert_equal(res, verify_output_data)
+        assert res.dtype == out_dtype
+
+    def test_float32_to_uint8():
+        data = np.array([-63.5, -63, -62.5, -62, -61.5, 62, 62.5, 63, 63.5, 64]) \
+            .astype('float32') \
+            .reshape((2,5))
+        output = np.array([0, 1, 2, 3, 4, 251, 252, 253, 254, 255]) \
+            .astype('uint8') \
+            .reshape((2,5))
+        quant_args = {"out_zero_point":127, "out_scale":0.5}
+        quantize_test_driver(in_dtype='float32', quant_args=quant_args, out_dtype='uint8', in_data=data,
+                             verify_output_data=output)
+
+    def test_float32_to_int8():
+        data = np.array([-63.5, -63, -62.5, -62, -61.5, 62, 62.5, 63, 63.5, 64]) \
+            .astype('float32') \
+            .reshape((2,5))
+        output = np.array([-128, -127, -126, -125, -124, 123, 124, 125, 126, 127]) \
+            .astype('int8') \
+            .reshape((2,5))
+        quant_args = {"out_zero_point":-1, "out_scale":0.5}
+        quantize_test_driver(in_dtype='float32', quant_args=quant_args, out_dtype='int8', in_data=data,
+                             verify_output_data=output)
+
+    test_float32_to_uint8()
+    test_float32_to_int8()
+
+def test_dequantize_op():
+
+    def quantize_test_driver(in_dtype, quant_args, in_data, verify_output_data):
+        shape = in_data.shape
+        input_data = relay.var("input_data", shape=shape, dtype=in_dtype)
+        input_zero_point = quant_args['in_zero_point']
+        input_scale = quant_args['in_scale']
+        quantized_output = relay.op.qnn.dequantize(input_data, input_zero_point=input_zero_point,
+                                                            input_scale=input_scale)
+        func = relay.Function(relay.analysis.free_vars(quantized_output), quantized_output)
+        func = run_infer_type(func)
+        func = relay.quantize.rewrite(func)
+        func = run_infer_type(func)
+        graph, lib, params = relay.build(func, "llvm", params=None)
+        mod = graph_runtime.create(graph, lib, ctx=tvm.cpu(0))
+        mod.set_input(input_data=in_data)
+        mod.run()
+        res = mod.get_output(0).asnumpy()
+        np.testing.assert_allclose(res, verify_output_data)
+        assert res.dtype == np.float32
+
+    def test_uint8_to_float32():
+        data = np.array([0, 1, 2, 3, 4, 251, 252, 253, 254, 255]) \
+            .astype('uint8') \
+            .reshape((2,5))
+        output = np.array([-63.5, -63, -62.5, -62, -61.5, 62, 62.5, 63, 63.5, 64]) \
+            .astype('float32') \
+            .reshape((2,5))
+        quant_args = {"in_zero_point":127, "in_scale":0.5}
+        quantize_test_driver(in_dtype='uint8', quant_args=quant_args, in_data=data,
+                             verify_output_data=output)
+
+    def test_int8_to_float32():
+        data = np.array([-128, -127, -126, -125, -124, 123, 124, 125, 126, 127]) \
+            .astype('int8') \
+            .reshape((2,5))
+        output = np.array([-63.5, -63, -62.5, -62, -61.5, 62, 62.5, 63, 63.5, 64]) \
+            .astype('float32') \
+            .reshape((2,5))
+        quant_args = {"in_zero_point":-1, "in_scale":0.5}
+        quantize_test_driver(in_dtype='int8', quant_args=quant_args, in_data=data,
+                             verify_output_data=output)
+
+    test_uint8_to_float32()
+    test_int8_to_float32()
 
 if __name__ == "__main__":
-    # add your tests here.
-    pass
+    test_quantize_op()
+    test_dequantize_op()

From bcf003bbb6300b590df870522efbc57cd6ab6a1b Mon Sep 17 00:00:00 2001
From: "shoubhikbhatti@gmail.com" <shoubhikbhatti@gmail.coml>
Date: Mon, 8 Jul 2019 14:27:19 -0700
Subject: [PATCH 4/8] [Relay] [Quantization] Removing redundant code.

---
 src/relay/op/nn/dequantize.cc  | 3 ---
 src/relay/op/nn/quantize_op.cc | 5 -----
 2 files changed, 8 deletions(-)

diff --git a/src/relay/op/nn/dequantize.cc b/src/relay/op/nn/dequantize.cc
index 7cbab466830b..dfc7cade7d0b 100644
--- a/src/relay/op/nn/dequantize.cc
+++ b/src/relay/op/nn/dequantize.cc
@@ -23,9 +23,6 @@
  * \brief Quantize and requantize operator
  */
 
-//#include <tvm/data_layout.h>
-//#include <tvm/relay/op_attr_types.h>
-//#include <tvm/relay/pass.h>
 #include <tvm/relay/op.h>
 #include <tvm/relay/attrs/qnn.h>
 #include <tvm/relay/quantize_util.h>
diff --git a/src/relay/op/nn/quantize_op.cc b/src/relay/op/nn/quantize_op.cc
index 51308e86a0da..106abc93364d 100644
--- a/src/relay/op/nn/quantize_op.cc
+++ b/src/relay/op/nn/quantize_op.cc
@@ -23,11 +23,6 @@
  * \brief Quantize and requantize operator
  */
 
-//#include <tvm/data_layout.h>
-//#include <tvm/relay/op_attr_types.h>
-//#include <tvm/relay/pass.h>
-//#include <tvm/relay/attrs/nn_quantize.h>
-//#include <tvm/relay/quantize_util.h>
 #include <tvm/relay/op.h>
 #include <tvm/relay/attrs/qnn.h>
 #include <tvm/relay/quantize_util.h>

From 6766af9ced609e1840cac0e64ed27690768d5c00 Mon Sep 17 00:00:00 2001
From: "shoubhikbhatti@gmail.com" <shoubhikbhatti@gmail.coml>
Date: Mon, 8 Jul 2019 15:51:16 -0700
Subject: [PATCH 5/8] [Relay] [Quantization] 1. Correcting docs 2. Reordering
 Clip and Cast in the dequantize op for stability.

---
 python/tvm/relay/op/qnn/qnn.py     | 11 +++++------
 src/relay/op/nn/dequantize.cc      |  2 +-
 src/relay/pass/quantize_rewrite.cc |  7 ++++---
 3 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/python/tvm/relay/op/qnn/qnn.py b/python/tvm/relay/op/qnn/qnn.py
index 810e2a8fad00..45fb127159d0 100644
--- a/python/tvm/relay/op/qnn/qnn.py
+++ b/python/tvm/relay/op/qnn/qnn.py
@@ -21,22 +21,21 @@
 
 def quantize(input_data, output_zero_point, output_scale, out_dtype='int8'):
     r""" Quantize op
-     This operator takes floating point 32 or quantized int8 and unit8 as input and produces
-    quantized int8 or unit8 as output. The output shape is the same as input shape. The input
-    tensor can be of any shape.
+     This operator takes float32 as input and produces quantized int8 or unit8 as output. The input tensor can be of
+     any shape. The output shape is the same as input shape.
      ..math::
             \mbox{out}[x] =
                 \mbox{clamp(round(input_tensor/output_scale) + output_zero_point); out_dtype::min, out_dtype::max}
      Parameters
     ----------
     input_data : tvm.relay.Expr
-        The input tensor to be quantized. Can be of type [float32, int8, uint8].
+        The input tensor to be quantized. Can be of type float32.
     output_zero_point :
         The output zero_point.
     output_scale:
         The output scale.
     input_dtype:
-        The data type of the input tensor. Can be [int8, uint8, float32]
+        The data type of the input tensor. Can be [int8, uint8]
     Returns
     -------
     result : tvm.relay.Expr
@@ -53,7 +52,7 @@ def dequantize(input_data, input_zero_point, input_scale):
      Parameters
     ----------
     input_data : tvm.relay.Expr
-        The input tensor to be quantized. Can be of type [float32, int8, uint8].
+        The input tensor to be dequantized. Can be of type [int8, uint8].
     input_zero_point :
         The output zero_point.
     input_scale:
diff --git a/src/relay/op/nn/dequantize.cc b/src/relay/op/nn/dequantize.cc
index dfc7cade7d0b..55213f168b6d 100644
--- a/src/relay/op/nn/dequantize.cc
+++ b/src/relay/op/nn/dequantize.cc
@@ -60,7 +60,7 @@ Expr MakeDequantize(Expr data,
 RELAY_REGISTER_OP("qnn.dequantize")
     .describe(R"code(Quantizes the input and produces quantized output.
 
-The input is always quantized (int8, uint8) and will be converted to float32 given input scale and shift.
+The input is always quantized (int8, uint8) and will be converted to float32 given input scale and zero_point.
 - **data**: Quantized tensor of any shape to dequantize. The input data can be of floating point
 )code" TVM_ADD_FILELINE)
 .set_attrs_type_key("relay.attrs.DequantizeAttrs")
diff --git a/src/relay/pass/quantize_rewrite.cc b/src/relay/pass/quantize_rewrite.cc
index b602294f8c4a..08c8a18c1ef3 100644
--- a/src/relay/pass/quantize_rewrite.cc
+++ b/src/relay/pass/quantize_rewrite.cc
@@ -46,9 +46,10 @@ Expr QuantizeForwardRewrite(const Call& ref_call, const Array<Expr>& new_args, c
   const int32_t max_val = get_qmax(out_dtype);
   auto scale_data = Cast(Round(Divide(data, scale)), Int(32));
   // we are trying to do - std::min(std::max(unclamped, min_val), max_val);
-  auto unclamped = Cast(Add(scale_data, output_zero_point), out_dtype);
-  auto clamped_output = Clip(unclamped, min_val, max_val);
-  return clamped_output;
+  auto add_zero_point = Add(scale_data, output_zero_point);
+  auto clamped_output = Clip(add_zero_point, min_val, max_val);
+  auto clamp_out_dtype = Cast(clamped_output, out_dtype);
+  return clamp_out_dtype;
 }
 
 RELAY_REGISTER_OP("qnn.quantize")

From 186af5a3ea2f7b847ac8f6851b0b8e953c38763f Mon Sep 17 00:00:00 2001
From: "shoubhikbhatti@gmail.com" <shoubhikbhatti@gmail.coml>
Date: Fri, 12 Jul 2019 11:34:24 -0700
Subject: [PATCH 6/8] [Relay] [Quantization] 1. Correcting the file paths as
 suggested in the reviews.

---
 .../tvm/relay/{attrs/qnn.h => qnn/attrs.h}    |  6 +-
 src/relay/{ => qnn}/pass/quantize_rewrite.cc  |  7 +-
 .../quantize_util.h => src/relay/qnn/util.h   | 92 +++++++++----------
 3 files changed, 53 insertions(+), 52 deletions(-)
 rename include/tvm/relay/{attrs/qnn.h => qnn/attrs.h} (89%)
 rename src/relay/{ => qnn}/pass/quantize_rewrite.cc (92%)
 rename include/tvm/relay/quantize_util.h => src/relay/qnn/util.h (54%)

diff --git a/include/tvm/relay/attrs/qnn.h b/include/tvm/relay/qnn/attrs.h
similarity index 89%
rename from include/tvm/relay/attrs/qnn.h
rename to include/tvm/relay/qnn/attrs.h
index c45a33c786f7..facc0937e2cf 100644
--- a/include/tvm/relay/attrs/qnn.h
+++ b/include/tvm/relay/qnn/attrs.h
@@ -21,8 +21,8 @@
  * \file tvm/relay/attrs/nn.h
  * \brief Auxiliary attributes for nn operators.
  */
-#ifndef TVM_RELAY_ATTRS_NN_QUANTIZE_H_
-#define TVM_RELAY_ATTRS_NN_QUANTIZE_H_
+#ifndef TVM_RELAY_ATTRS_QNN_H_
+#define TVM_RELAY_ATTRS_QNN_H_
 
 #include <tvm/attrs.h>
 #include <string>
@@ -34,4 +34,4 @@ namespace relay {
 
 }  // namespace relay
 }  // namespace tvm
-#endif  // TVM_RELAY_ATTRS_NN_QUANTIZE_H_
+#endif  // TVM_RELAY_ATTRS_QNN_H_
diff --git a/src/relay/pass/quantize_rewrite.cc b/src/relay/qnn/pass/quantize_rewrite.cc
similarity index 92%
rename from src/relay/pass/quantize_rewrite.cc
rename to src/relay/qnn/pass/quantize_rewrite.cc
index 925c516b41ed..eb4848c87928 100644
--- a/src/relay/pass/quantize_rewrite.cc
+++ b/src/relay/qnn/pass/quantize_rewrite.cc
@@ -26,13 +26,14 @@
 #include <tvm/relay/analysis.h>
 #include <tvm/relay/transform.h>
 #include <tvm/relay/op_attr_types.h>
-#include <tvm/relay/quantize_util.h>
-#include <tvm/relay/attrs/qnn.h>
-#include "pattern_util.h"
+#include <tvm/relay/qnn/attrs.h>
+#include "../util.h"
+#include "../../pass/pattern_util.h"
 
 namespace tvm {
 namespace relay {
 
 
+
 }  // namespace relay
 }  // namespace tvm
diff --git a/include/tvm/relay/quantize_util.h b/src/relay/qnn/util.h
similarity index 54%
rename from include/tvm/relay/quantize_util.h
rename to src/relay/qnn/util.h
index bb054fb8fb65..61663b0da85e 100644
--- a/include/tvm/relay/quantize_util.h
+++ b/src/relay/qnn/util.h
@@ -18,97 +18,97 @@
  */
 
 /*!
- * \file nnvm/compiler/quantize_util.h
+ * \file tvm/relay/quantize_util.h
  * \brief Utility methods needs for quantized ops that can be shared
  */
 
-#ifndef TVM_QUANTIZE_UTIL_H
-#define TVM_QUANTIZE_UTIL_H
+#ifndef TVM_RELAY_QUANTIZE_UTIL_H_
+#define TVM_RELAY_QUANTIZE_UTIL_H_
 
 #include <tvm/expr.h>
-#include "./base.h"
+#include <limits>
+#include <tvm/relay/expr.h>
 
 namespace tvm {
 namespace relay {
 
-inline bool is_Int8(const DataType& dtype) {
+inline bool IsInt8(const DataType& dtype) {
   return dtype == Int(8);
 }
 
-inline bool is_UInt8(const DataType& dtype) {
+inline bool IsUint8(const DataType& dtype) {
   return dtype == UInt(8);
 }
 
-
-inline bool is_Int16(const DataType& dtype) {
+inline bool IsInt16(const DataType& dtype) {
   return dtype == Int(16);
 }
 
-inline bool is_UInt16(const DataType& dtype) {
+inline bool IsUint16(const DataType& dtype) {
   return dtype == UInt(16);
 }
 
-inline bool is_Int32(const DataType& dtype) {
+inline bool IsInt32(const DataType& dtype) {
   return dtype == Int(32);
 }
 
-inline bool is_UInt32(const DataType& dtype) {
+inline bool IsUint32(const DataType& dtype) {
   return dtype == UInt(32);
 }
 
-
-
-inline bool is_Float32(const DataType& dtype) {
+inline bool IsFloat32(const DataType& dtype) {
   return dtype == Float(32);
 }
 
-inline bool is_quantized_type(const DataType& dtype) {
-  return is_Int8(dtype) || is_UInt8(dtype)
-      || is_Int16(dtype) || is_UInt16(dtype);
+inline bool IsQuantizedType(const DataType& dtype) {
+  return IsInt8(dtype) || IsUint8(dtype)
+      || IsInt16(dtype) || IsUint16(dtype);
 }
 
 enum class QuantizeOpType : uint8_t {
-  Quantize_Requantize,
+  Quantize,
   Dequantize,
   Requantize
 };
 
-inline bool is_valid_quantized_op_input_type(const QuantizeOpType &op_type, const DataType &in_dtype) {
-  switch(op_type) {
-    case QuantizeOpType::Quantize_Requantize:
-      return is_Float32(in_dtype) || is_quantized_type(in_dtype);
+inline bool IsValidOpInputType(const QuantizeOpType& op_type,
+        const DataType& in_dtype) {
+  switch (op_type) {
+    case QuantizeOpType::Quantize:
+      return IsFloat32(in_dtype) || IsQuantizedType(in_dtype);
     case QuantizeOpType ::Dequantize:
-      return is_quantized_type(in_dtype);
+      return IsQuantizedType(in_dtype);
     case QuantizeOpType ::Requantize:
-      return is_Int16(in_dtype) || is_Int32(in_dtype);
+      return IsInt16(in_dtype) || IsInt32(in_dtype);
     default:
       return false;
   }
 }
 
-inline bool is_valid_quantized_op_output_type(const QuantizeOpType &op_type, const DataType &in_dtype) {
-  switch(op_type) {
-    case QuantizeOpType::Quantize_Requantize:
-      return is_quantized_type(in_dtype);
+inline bool IsValidOpOutputType(const QuantizeOpType& op_type,
+        const DataType& in_dtype) {
+  switch (op_type) {
+    case QuantizeOpType::Quantize:
+      return IsQuantizedType(in_dtype);
     case QuantizeOpType::Dequantize:
-      return is_Float32(in_dtype);
+      return IsFloat32(in_dtype);
     default:
       return false;
   }
 }
 
-inline const int32_t get_qmin(const DataType&  dtype) {
-  if (is_Int8(dtype)) {
+inline const int32_t GetQmin(const DataType& dtype) {
+  if (IsInt8(dtype)) {
     return std::numeric_limits<int8_t>::min();
-  } else if (is_UInt8(dtype)) {
+  } else if (IsUint8(dtype)) {
     return std::numeric_limits<uint8_t>::min();
-  } else if (is_Int16(dtype)) {
+  } else if (IsInt16(dtype)) {
     return std::numeric_limits<int16_t>::min();
-  } else if (is_UInt16(dtype)) {
+  } else if (IsUint16(dtype)) {
     return std::numeric_limits<uint16_t>::min();
-  } else if (is_Int32(dtype)) {
+  } else if (IsInt32(dtype)) {
     return std::numeric_limits<int32_t>::min();
-  } else if (is_UInt32(dtype)) {
+  } else if (IsUint32(dtype)) {
     return std::numeric_limits<uint32_t>::min();
   }
   LOG(FATAL) << "Type not supported\n";
@@ -116,24 +116,24 @@ inline const int32_t get_qmin(const DataType&  dtype) {
 }
 
 
-inline const int32_t get_qmax(const DataType&  dtype) {
-  if (is_Int8(dtype)) {
+inline const int32_t GetQmax(const DataType& dtype) {
+  if (IsInt8(dtype)) {
     return std::numeric_limits<int8_t>::max();
-  } else if (is_UInt8(dtype)) {
+  } else if (IsUint8(dtype)) {
     return std::numeric_limits<uint8_t>::max();
-  } else if (is_Int16(dtype)) {
+  } else if (IsInt16(dtype)) {
     return std::numeric_limits<int16_t>::max();
-  } else if (is_UInt16(dtype)) {
+  } else if (IsUint16(dtype)) {
     return std::numeric_limits<uint16_t>::max();
-  } else if (is_Int32(dtype)) {
+  } else if (IsInt32(dtype)) {
     return std::numeric_limits<int32_t>::max();
-  } else if (is_UInt32(dtype)) {
+  } else if (IsUint32(dtype)) {
     return std::numeric_limits<uint32_t>::max();
   }
   LOG(FATAL) << "Type not supported\n";
   return -1;
 }
 
-} // namespace relay
-} // namespace tvm
-#endif //TVM_QUANTIZE_UTIL_H
+}  // namespace relay
+}  // namespace tvm
+#endif  // TVM_RELAY_QUANTIZE_UTIL_H_

From e83704bb45eadc64de130af87bd43913e9545cf1 Mon Sep 17 00:00:00 2001
From: "shoubhikbhatti@gmail.com" <shoubhikbhatti@gmail.coml>
Date: Fri, 12 Jul 2019 11:49:40 -0700
Subject: [PATCH 7/8] Adding the common python files.

---
 python/tvm/relay/qnn/__init__.py    | 20 ++++++++++++++++
 python/tvm/relay/qnn/_qnn.py        | 22 +++++++++++++++++
 python/tvm/relay/qnn/ir_pass.py     | 37 +++++++++++++++++++++++++++++
 python/tvm/relay/qnn/op/__init__.py | 20 ++++++++++++++++
 python/tvm/relay/qnn/op/_make.py    | 20 ++++++++++++++++
 python/tvm/relay/qnn/op/qnn.py      | 20 ++++++++++++++++
 6 files changed, 139 insertions(+)
 create mode 100644 python/tvm/relay/qnn/__init__.py
 create mode 100644 python/tvm/relay/qnn/_qnn.py
 create mode 100644 python/tvm/relay/qnn/ir_pass.py
 create mode 100644 python/tvm/relay/qnn/op/__init__.py
 create mode 100644 python/tvm/relay/qnn/op/_make.py
 create mode 100644 python/tvm/relay/qnn/op/qnn.py

diff --git a/python/tvm/relay/qnn/__init__.py b/python/tvm/relay/qnn/__init__.py
new file mode 100644
index 000000000000..5de0656eaaca
--- /dev/null
+++ b/python/tvm/relay/qnn/__init__.py
@@ -0,0 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""Neural network related operators."""
+from __future__ import absolute_import as _abs
+from . import op
+from . import ir_pass
diff --git a/python/tvm/relay/qnn/_qnn.py b/python/tvm/relay/qnn/_qnn.py
new file mode 100644
index 000000000000..bd3cdbb976d6
--- /dev/null
+++ b/python/tvm/relay/qnn/_qnn.py
@@ -0,0 +1,22 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#pylint: disable=unused-argument
+"""Internal module for quantization."""
+from __future__ import absolute_import
+from tvm._ffi.function import _init_api
+
+_init_api("relay._qnn", __name__)
diff --git a/python/tvm/relay/qnn/ir_pass.py b/python/tvm/relay/qnn/ir_pass.py
new file mode 100644
index 000000000000..24e3329e961c
--- /dev/null
+++ b/python/tvm/relay/qnn/ir_pass.py
@@ -0,0 +1,37 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#pylint: disable=unused-argument
+"""Automatic quantization toolkit."""
+from __future__ import absolute_import
+
+from . import _qnn
+
+def rewrite(expr):
+    """
+    Rewrites the high-level quantized ops into low-level exisiting Relay ops.
+
+    Parameters
+    ----------
+    expr : tvm.relay.Expr
+        The input expression.
+
+    Returns
+    -------
+    expr : tvm.relay.Expr
+        The output expression.
+    """
+    return _qnn.rewrite(expr)
diff --git a/python/tvm/relay/qnn/op/__init__.py b/python/tvm/relay/qnn/op/__init__.py
new file mode 100644
index 000000000000..e9adfa783f93
--- /dev/null
+++ b/python/tvm/relay/qnn/op/__init__.py
@@ -0,0 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# pylint: disable=wildcard-import
+"""Neural network related operators."""
+from __future__ import absolute_import as _abs
+from .qnn import *
diff --git a/python/tvm/relay/qnn/op/_make.py b/python/tvm/relay/qnn/op/_make.py
new file mode 100644
index 000000000000..b1695629b8f9
--- /dev/null
+++ b/python/tvm/relay/qnn/op/_make.py
@@ -0,0 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""Constructor APIs"""
+from ...._ffi.function import _init_api
+
+_init_api("relay.op.qnn._make", __name__)
diff --git a/python/tvm/relay/qnn/op/qnn.py b/python/tvm/relay/qnn/op/qnn.py
new file mode 100644
index 000000000000..ff7d0876d56e
--- /dev/null
+++ b/python/tvm/relay/qnn/op/qnn.py
@@ -0,0 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#pylint: disable=invalid-name, too-many-lines
+"""Neural network operations."""
+from __future__ import absolute_import as _abs
+from . import _make

From 05b1ac6df8330318991bda3a5737281e5acb0174 Mon Sep 17 00:00:00 2001
From: "shoubhikbhatti@gmail.com" <shoubhikbhatti@gmail.coml>
Date: Mon, 15 Jul 2019 10:53:54 -0700
Subject: [PATCH 8/8] Fixing lint issues.

---
 include/tvm/relay/qnn/attrs.h          | 7 +++----
 python/tvm/relay/qnn/__init__.py       | 2 +-
 python/tvm/relay/qnn/_qnn.py           | 2 +-
 python/tvm/relay/qnn/ir_pass.py        | 2 +-
 python/tvm/relay/qnn/op/__init__.py    | 1 -
 python/tvm/relay/qnn/op/_make.py       | 2 +-
 python/tvm/relay/qnn/op/qnn.py         | 2 +-
 src/relay/pass/pattern_util.h          | 2 +-
 src/relay/qnn/op/dequantize.cc         | 2 +-
 src/relay/qnn/op/quantize_op.cc        | 2 +-
 src/relay/qnn/pass/quantize_rewrite.cc | 2 +-
 src/relay/qnn/util.h                   | 8 ++++----
 12 files changed, 16 insertions(+), 18 deletions(-)

diff --git a/include/tvm/relay/qnn/attrs.h b/include/tvm/relay/qnn/attrs.h
index 51be43154b59..9645b3cf587d 100644
--- a/include/tvm/relay/qnn/attrs.h
+++ b/include/tvm/relay/qnn/attrs.h
@@ -21,8 +21,8 @@
  * \file tvm/relay/qnn/attrs.h
  * \brief Auxiliary attributes for quantized nn operators.
  */
-#ifndef TVM_RELAY_ATTRS_QNN_H_
-#define TVM_RELAY_ATTRS_QNN_H_
+#ifndef TVM_RELAY_QNN_ATTRS_H_
+#define TVM_RELAY_QNN_ATTRS_H_
 
 #include <tvm/attrs.h>
 #include <string>
@@ -52,7 +52,6 @@ struct DequantizeAttrs : public tvm::AttrsNode<DequantizeAttrs> {
   double input_scale;
 
   TVM_DECLARE_ATTRS(QuantizeAttrs, "relay.attrs.QuantizeAttrs") {
-
     TVM_ATTR_FIELD(input_zero_point)
         .describe("The zero_point for the input tensor of this op.");
 
@@ -64,4 +63,4 @@ struct DequantizeAttrs : public tvm::AttrsNode<DequantizeAttrs> {
 }  // namespace relay
 }  // namespace tvm
 
-#endif  // TVM_RELAY_ATTRS_QNN_H_
+#endif  // TVM_RELAY_QNN_ATTRS_H_
diff --git a/python/tvm/relay/qnn/__init__.py b/python/tvm/relay/qnn/__init__.py
index d3dfd85d653f..236b094a6988 100644
--- a/python/tvm/relay/qnn/__init__.py
+++ b/python/tvm/relay/qnn/__init__.py
@@ -18,4 +18,4 @@
 """Neural network related operators."""
 from __future__ import absolute_import as _abs
 from . import op
-from . import ir_pass
\ No newline at end of file
+from . import ir_pass
diff --git a/python/tvm/relay/qnn/_qnn.py b/python/tvm/relay/qnn/_qnn.py
index 1187b632b3db..77ecc325ae18 100644
--- a/python/tvm/relay/qnn/_qnn.py
+++ b/python/tvm/relay/qnn/_qnn.py
@@ -20,4 +20,4 @@
 from __future__ import absolute_import
 from tvm._ffi.function import _init_api
 
-_init_api("relay._qnn", __name__)
\ No newline at end of file
+_init_api("relay._qnn", __name__)
diff --git a/python/tvm/relay/qnn/ir_pass.py b/python/tvm/relay/qnn/ir_pass.py
index 24e3329e961c..ea272dec429a 100644
--- a/python/tvm/relay/qnn/ir_pass.py
+++ b/python/tvm/relay/qnn/ir_pass.py
@@ -14,7 +14,7 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
-#pylint: disable=unused-argument
+
 """Automatic quantization toolkit."""
 from __future__ import absolute_import
 
diff --git a/python/tvm/relay/qnn/op/__init__.py b/python/tvm/relay/qnn/op/__init__.py
index cd2367b77d2c..f1c896489fd3 100644
--- a/python/tvm/relay/qnn/op/__init__.py
+++ b/python/tvm/relay/qnn/op/__init__.py
@@ -14,7 +14,6 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
-# pylint: disable=wildcard-import
 
 """Neural network related operators."""
 
diff --git a/python/tvm/relay/qnn/op/_make.py b/python/tvm/relay/qnn/op/_make.py
index c611becf36d4..82d5e5a9cdc3 100644
--- a/python/tvm/relay/qnn/op/_make.py
+++ b/python/tvm/relay/qnn/op/_make.py
@@ -19,4 +19,4 @@
 
 from ...._ffi.function import _init_api
 
-_init_api("relay.op.qnn._make", __name__)
\ No newline at end of file
+_init_api("relay.op.qnn._make", __name__)
diff --git a/python/tvm/relay/qnn/op/qnn.py b/python/tvm/relay/qnn/op/qnn.py
index e12769d688cc..290fb4912a17 100644
--- a/python/tvm/relay/qnn/op/qnn.py
+++ b/python/tvm/relay/qnn/op/qnn.py
@@ -63,4 +63,4 @@ def dequantize(input_data, input_zero_point, input_scale):
     result : tvm.relay.Expr
         The computed result.
     """
-    return _make.dequantize(input_data, input_zero_point, input_scale)
\ No newline at end of file
+    return _make.dequantize(input_data, input_zero_point, input_scale)
diff --git a/src/relay/pass/pattern_util.h b/src/relay/pass/pattern_util.h
index 7249d1d4c086..906e3193729e 100644
--- a/src/relay/pass/pattern_util.h
+++ b/src/relay/pass/pattern_util.h
@@ -34,7 +34,7 @@
 #include <tvm/relay/attrs/nn.h>
 #include <tvm/relay/attrs/transform.h>
 #include <string>
-
+#include <utility>
 
 namespace tvm {
 namespace relay {
diff --git a/src/relay/qnn/op/dequantize.cc b/src/relay/qnn/op/dequantize.cc
index 7b346df198e6..cfaff3f23755 100644
--- a/src/relay/qnn/op/dequantize.cc
+++ b/src/relay/qnn/op/dequantize.cc
@@ -73,4 +73,4 @@ TVM_REGISTER_API("relay.op.qnn._make.dequantize")
 .set_body_typed(MakeDequantize);
 
 }  // namespace relay
-}  // namespace tvm
\ No newline at end of file
+}  // namespace tvm
diff --git a/src/relay/qnn/op/quantize_op.cc b/src/relay/qnn/op/quantize_op.cc
index 809cf33f58ae..b3be62742aae 100644
--- a/src/relay/qnn/op/quantize_op.cc
+++ b/src/relay/qnn/op/quantize_op.cc
@@ -86,4 +86,4 @@ TVM_REGISTER_API("relay.op.qnn._make.quantize")
 .set_body_typed(MakeQuantize);
 
 }  // namespace relay
-}  // namespace tvm
\ No newline at end of file
+}  // namespace tvm
diff --git a/src/relay/qnn/pass/quantize_rewrite.cc b/src/relay/qnn/pass/quantize_rewrite.cc
index fbdfda486ac7..1da4824772ea 100644
--- a/src/relay/qnn/pass/quantize_rewrite.cc
+++ b/src/relay/qnn/pass/quantize_rewrite.cc
@@ -77,4 +77,4 @@ TVM_REGISTER_API("relay._qnn.rewrite").set_body_typed<Expr(Expr)>([](const Expr&
 });
 
 }  // namespace relay
-}  // namespace tvm
\ No newline at end of file
+}  // namespace tvm
diff --git a/src/relay/qnn/util.h b/src/relay/qnn/util.h
index 0f57f5564b43..c96227c3667a 100644
--- a/src/relay/qnn/util.h
+++ b/src/relay/qnn/util.h
@@ -22,12 +22,12 @@
  * \brief Utility methods needs for quantized ops that can be shared
  */
 
-#ifndef TVM_RELAY_QUANTIZE_UTIL_H_
-#define TVM_RELAY_QUANTIZE_UTIL_H_
+#ifndef TVM_RELAY_QNN_UTIL_H_
+#define TVM_RELAY_QNN_UTIL_H_
 
 #include <tvm/expr.h>
-#include <limits>
 #include <tvm/relay/expr.h>
+#include <limits>
 
 namespace tvm {
 namespace relay {
@@ -134,4 +134,4 @@ inline const int32_t GetQmax(const DataType& dtype) {
 }  // namespace relay
 }  // namespace tvm
 
-#endif  // TVM_RELAY_QUANTIZE_UTIL_H_
+#endif  // TVM_RELAY_QNN_UTIL_H_