apache · shoubhik · Jun 27, 2019 · Jun 28, 2019 · Jul 1, 2019 · Jul 1, 2019
diff --git a/3rdparty/HalideIR b/3rdparty/HalideIR
diff --git a/include/tvm/relay/attrs/nn_quantize.h b/include/tvm/relay/attrs/nn_quantize.h
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file tvm/relay/attrs/nn.h
+ * \brief Auxiliary attributes for nn operators.
+ */
+#ifndef TVM_RELAY_ATTRS_NN_QUANTIZE_H_
+#define TVM_RELAY_ATTRS_NN_QUANTIZE_H_
+
+#include <tvm/attrs.h>
+#include <string>
+
+namespace tvm {
+namespace relay {
+
+struct QuantizeAttrs : public tvm::AttrsNode<QuantizeAttrs> {
+  int32_t output_zero_point;
+  double output_scale;
+  DataType out_dtype;
+
+  TVM_DECLARE_ATTRS(QuantizeAttrs, "relay.attrs.QuantizeAttrs") {
+    TVM_ATTR_FIELD(out_dtype)
+      .describe("Output data type, can be one of [int8 or uint8].");
+
+    TVM_ATTR_FIELD(output_zero_point)
+      .describe("The zero_point for the activation of this op.");
+
+    TVM_ATTR_FIELD(output_scale)
+      .describe("The scale for the activation of this op.");
+  }
+};
+
+struct DequantizeAttrs : public tvm::AttrsNode<DequantizeAttrs> {
+  int32_t input_zero_point;
+  double input_scale;
+
+  TVM_DECLARE_ATTRS(QuantizeAttrs, "relay.attrs.QuantizeAttrs") {
+
+    TVM_ATTR_FIELD(input_zero_point)
+      .describe("The zero_point for the input tensor of this op.");
+
+    TVM_ATTR_FIELD(input_scale)
+      .describe("The scale for the input tensor of this op.");
+  }
+};
+
+
+}  // namespace relay
+}  // namespace tvm
+#endif  // TVM_RELAY_ATTRS_NN_QUANTIZE_H_
diff --git a/include/tvm/relay/quantize_util.h b/include/tvm/relay/quantize_util.h
@@ -0,0 +1,98 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file nnvm/compiler/quantize_util.h
+ * \brief Utility methods needs for quantized ops that can be shared
+ */
+
+#ifndef TVM_QUANTIZE_UTIL_H
+#define TVM_QUANTIZE_UTIL_H
+
+#include <tvm/expr.h>
+#include "./base.h"
+
+namespace tvm {
+namespace relay {
+
+inline bool is_Int8(const DataType& dtype) {
+  return dtype == Int(8);
+}
+
+inline bool is_UInt8(const DataType& dtype) {
+  return dtype == UInt(8);
+}
+
+inline bool is_Float32(const DataType& dtype) {
+  return dtype == Float(32);
+}
+
+inline bool is_qauntized_type(const DataType& dtype) {
+  return is_Int8(dtype) || is_UInt8(dtype);
+}
+
+enum class QuantizeOpType : uint8_t {
+  Quantize_Requantize,
+  Dequantize
+};
+
+inline bool is_valid_quantized_op_input_type(const QuantizeOpType &op_type, const DataType &in_dtype) {
+  switch(op_type) {
+    case QuantizeOpType::Quantize_Requantize:
+      return is_Float32(in_dtype) || is_qauntized_type(in_dtype);
+    case QuantizeOpType ::Dequantize:
+      return is_qauntized_type(in_dtype);
+    default:
+      return false;
+  }
+}
+
+inline bool is_valid_quantized_op_output_type(const QuantizeOpType &op_type, const DataType &in_dtype) {
+  switch(op_type) {
+    case QuantizeOpType::Quantize_Requantize:
+      return is_qauntized_type(in_dtype);
+    case QuantizeOpType::Dequantize:
+      return is_Float32(in_dtype);
+    default:
+      return false;
+  }
+}
+
+inline const int32_t get_qmin(const DataType&  dtype) {
+  CHECK(is_qauntized_type(dtype)) << "Expected quantized data type [int8, uint8] but was " << dtype;
+  if(is_Int8(dtype)) {
+    return std::numeric_limits<int8_t>::min();
+  } else {
+    return std::numeric_limits<uint8_t>::min();
+  }
+}
+
+
+inline const int32_t get_qmax(const DataType&  dtype) {
+  CHECK(is_qauntized_type(dtype)) << "Expected quantized data type [int8, uint8] but was " << dtype;
+  if(dtype == Int(8)) {
+    return std::numeric_limits<int8_t>::max();
+  } else {
+    return std::numeric_limits<uint8_t>::max();
+  }
+}
+
+} // namespace relay
+} // namespace tvm
+#endif //TVM_QUANTIZE_UTIL_H
diff --git a/python/tvm/relay/op/nn/__init__.py b/python/tvm/relay/op/nn/__init__.py
@@ -19,3 +19,4 @@
 from __future__ import absolute_import as _abs
 from .nn import *
 from . import _nn
+from . import _quantize
diff --git a/python/tvm/relay/op/nn/_make_quantize.py b/python/tvm/relay/op/nn/_make_quantize.py
@@ -0,0 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""Constructor APIs"""
+from ...._ffi.function import _init_api
+
+_init_api("relay.op.nn._quantize._make", __name__)
diff --git a/python/tvm/relay/op/nn/_quantize.py b/python/tvm/relay/op/nn/_quantize.py
@@ -0,0 +1,73 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#pylint: disable=invalid-name, too-many-lines
+"""Neural network operations."""
+from __future__ import absolute_import as _abs
+from . import _make_quantize
+
+def quantize(input_data, output_zero_point, output_scale, out_dtype='int8'):
+    r""" Quantize op
+
+    This operator takes floating point 32 or quantized int8 and unit8 as input and produces
+    quantized int8 or unit8 as output. The output shape is the same as input shape. The input
+    tensor can be of any shape.
+
+    ..math::
+            \mbox{out}[x] =
+                \mbox{clamp(round(input_tensor/output_scale) + output_zero_point); out_dtype::min, out_dtype::max}
+
+    Parameters
+    ----------
+    input_data : tvm.relay.Expr
+        The input tensor to be quantized. Can be of type [float32, int8, uint8].
+    output_zero_point :
+        The output zero_point.
+    output_scale:
+        The output scale.
+    input_dtype:
+        The data type of the input tensor. Can be [int8, uint8, float32]
+    Returns
+    -------
+    result : tvm.relay.Expr
+        The computed result.
+    """
+    return _make_quantize.quantize(input_data, output_zero_point, output_scale, out_dtype)
+
+
+def dequantize(input_data, input_zero_point, input_scale):
+    r""" Dequantize op
+
+    This operator takes quantized int8 and unit8 as input and produces
+    dequantized float32 as output. The output shape is the same as input shape. The input
+    tensor can be of any shape.
+
+    Parameters
+    ----------
+    input_data : tvm.relay.Expr
+        The input tensor to be quantized. Can be of type [float32, int8, uint8].
+    input_zero_point :
+        The output zero_point.
+    input_scale:
+        The output scale.
+    Returns
+    -------
+    result : tvm.relay.Expr
+        The computed result.
+    """
+    return _make_quantize.dequantize(input_data, input_zero_point, input_scale)
+
+
diff --git a/python/tvm/relay/quantize/__init__.py b/python/tvm/relay/quantize/__init__.py
@@ -20,3 +20,4 @@
 
 from .quantize import *
 from ._annotate import register_annotate_function
+from ._quantize import *
diff --git a/src/relay/op/nn/dequantize.cc b/src/relay/op/nn/dequantize.cc
@@ -0,0 +1,78 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ *  Copyright (c) 2018 by Contributors
+ * \file quantize.cpp
+ * \brief Quantize and requantize operator
+ */
+
+#include <tvm/data_layout.h>
+#include <tvm/relay/op_attr_types.h>
+#include <tvm/relay/pass.h>
+#include <tvm/relay/attrs/nn_quantize.h>
+#include <tvm/relay/quantize_util.h>
+
+namespace tvm {
+namespace relay {
+
+TVM_REGISTER_NODE_TYPE(DequantizeAttrs);
+
+bool DequantizeRel(const Array<Type>& types,
+                 int num_inputs,
+                 const Attrs& attrs,
+                 const TypeReporter& reporter) {
+  CHECK_EQ(types.size(), 2);
+  const auto* data = types[0].as<TensorTypeNode>();
+  const auto input_dtype = data->dtype;
+  CHECK(is_valid_quantized_op_input_type(QuantizeOpType::Dequantize, input_dtype))
+    << "Input type should be one of the quantized types [unit8, int8] but was " <<  input_dtype;
+  const Array<tvm::Expr> oshape = data->shape;
+  // assign output type
+  reporter->Assign(types[1], TensorTypeNode::make(oshape, Float(32)));
+  return true;
+}
+
+Expr MakeDequantize(Expr data,
+                  int32_t input_zero_point,
+                  double input_scale) {
+  auto attrs = make_node<DequantizeAttrs>();
+  attrs->input_scale = input_scale;
+  attrs->input_zero_point = input_zero_point;
+  static const Op& op = Op::Get("nn_quantized.dequantize");
+  return CallNode::make(op, {data}, Attrs(attrs), {});
+}
+
+RELAY_REGISTER_OP("nn_quantized.dequantize")
+    .describe(R"code(Quantizes the input and produces quantized output.
+
+The input is always quantized (int8, uint8) and will be converted to float32 given input scale and shift.
+- **data**: Quantized tensor of any shape to dequantize. The input data can be of floating point
+)code" TVM_ADD_FILELINE)
+.set_attrs_type_key("relay.attrs.DequantizeAttrs")
+.set_num_inputs(1)
+.add_argument("data", "Tensor", "The tensor to dequantize.")
+.set_support_level(10)
+.add_type_rel("Dequantize", DequantizeRel);
+
+TVM_REGISTER_API("relay.op.nn._quantize._make.dequantize")
+.set_body_typed(MakeDequantize);
+
+}  // namespace relay
+}  // namespace tvm
Original file line number	Diff line number	Diff line change
Expand Up		@@ -20,3 +20,4 @@

		from .quantize import *
		from ._annotate import register_annotate_function
		from ._quantize import *