From 34d39adba23a6683d41f74b3fa89cb0a06c39ffc Mon Sep 17 00:00:00 2001 From: "shoubhikbhatti@gmail.com" Date: Thu, 27 Jun 2019 16:40:50 -0700 Subject: [PATCH 1/4] [Relay] [Quantization] WIP - Prototyping Quantize and Dequantize operator with type infer type, lowering and test cases. --- include/tvm/relay/attrs/nn_quantize.h | 156 ++++++++++++++++++++ include/tvm/relay/quantize_util.h | 98 ++++++++++++ python/tvm/relay/op/nn/__init__.py | 1 + python/tvm/relay/op/nn/_make_quantize.py | 20 +++ python/tvm/relay/op/nn/_quantize.py | 73 +++++++++ python/tvm/relay/quantize/__init__.py | 1 + src/relay/op/nn/dequantize.cc | 78 ++++++++++ src/relay/op/nn/quantize_op.cc | 91 ++++++++++++ src/relay/pass/quantize_rewrite.cc | 93 ++++++++++++ tests/python/unittest/test_quantized_ops.py | 117 +++++++++++++++ 10 files changed, 728 insertions(+) create mode 100644 include/tvm/relay/attrs/nn_quantize.h create mode 100644 include/tvm/relay/quantize_util.h create mode 100644 python/tvm/relay/op/nn/_make_quantize.py create mode 100644 python/tvm/relay/op/nn/_quantize.py create mode 100644 src/relay/op/nn/dequantize.cc create mode 100644 src/relay/op/nn/quantize_op.cc create mode 100644 src/relay/pass/quantize_rewrite.cc create mode 100644 tests/python/unittest/test_quantized_ops.py diff --git a/include/tvm/relay/attrs/nn_quantize.h b/include/tvm/relay/attrs/nn_quantize.h new file mode 100644 index 000000000000..af420dca8139 --- /dev/null +++ b/include/tvm/relay/attrs/nn_quantize.h @@ -0,0 +1,156 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file tvm/relay/attrs/nn.h + * \brief Auxiliary attributes for nn operators. + */ +#ifndef TVM_RELAY_ATTRS_NN_QUANTIZE_H_ +#define TVM_RELAY_ATTRS_NN_QUANTIZE_H_ + +#include +#include + +namespace tvm { +namespace relay { + +struct QuantizeAttrs : public tvm::AttrsNode { + DataType input_dtype; + int32_t output_zero_point; + double output_scale; + DataType out_dtype; + + TVM_DECLARE_ATTRS(QuantizeAttrs, "relay.attrs.QuantizeAttrs") { + TVM_ATTR_FIELD(out_dtype) + .describe("Output data type, can be one of [int8 or uint8]."); + + TVM_ATTR_FIELD(input_dtype) + .describe("Input data type, can be one of [float32, int8, uint8]."); + + TVM_ATTR_FIELD(output_zero_point) + .describe("The zero_point for the activation of this op."); + + TVM_ATTR_FIELD(output_scale) + .describe("The scale for the activation of this op."); + } +}; + +struct DequantizeAttrs : public tvm::AttrsNode { + int32_t input_zero_point; + double input_scale; + + TVM_DECLARE_ATTRS(QuantizeAttrs, "relay.attrs.QuantizeAttrs") { + + TVM_ATTR_FIELD(input_zero_point) + .describe("The zero_point for the input tensor of this op."); + + TVM_ATTR_FIELD(input_scale) + .describe("The scale for the input tensor of this op."); + } +}; + +// TODO(anijain2305) - Copy of QuantizedConv2DAttrs. Should we inherit? +/*! \brief Attribute for quantized conv2d operator */ +struct QuantizedConv2DAttrs : public tvm::AttrsNode { + // Traditional conv2d attributes. + Array strides; + Array padding; + Array dilation; + int groups; + IndexExpr channels; + Array kernel_size; + std::string data_layout; + std::string kernel_layout; + std::string out_layout; + DataType out_dtype; + + // Quantization related attributes. + int32_t input_zero_point; + int32_t kernel_zero_point; + int32_t output_zero_point; + double input_scale; + double kernel_scale; + double output_scale; + bool use_int_compute_for_requantize; + std::string rounding; + + TVM_DECLARE_ATTRS(QuantizedConv2DAttrs, "relay.attrs.QuantizedConv2DAttrs") { + TVM_ATTR_FIELD(strides).set_default(Array({1, 1})) + .describe("Specifies the strides of the convolution."); + TVM_ATTR_FIELD(padding).set_default(Array({0, 0})) + .describe("If padding is non-zero, then the input is implicitly zero-padded" + "on both sides for padding number of points"); + TVM_ATTR_FIELD(dilation).set_default(Array({1, 1})) + .describe("Specifies the dilation rate to use for dilated convolution."); + TVM_ATTR_FIELD(groups).set_default(1) + .describe("Controls the connections between inputs and outputs." + "At groups=1, all inputs are convolved to all outputs." + "At groups=2, the operation becomes equivalent to having two convolution" + "layers side by side, each seeing half the input channels, and producing" + "half the output channels, and both subsequently concatenated."); + TVM_ATTR_FIELD(channels) + .describe("The number of output channels in the convolution." + " If it is not set, inferred by shape of the weight.") + .set_default(NullValue()); + TVM_ATTR_FIELD(kernel_size) + .describe("Specifies the dimensions of the convolution window.") + .set_default(NullValue >()); + TVM_ATTR_FIELD(data_layout).set_default("NCHW") + .describe("Dimension ordering of input data. Can be 'NCHW', 'NHWC', etc." + "'N', 'C', 'H', 'W' stands for batch, channel, height, and width" + "dimensions respectively. Convolution is applied on the 'H' and" + "'W' dimensions."); + TVM_ATTR_FIELD(kernel_layout).set_default("OIHW") + .describe("Dimension ordering of weight. Can be 'OIHW', 'OIHW16o16i', etc." + "'O', 'I', 'H', 'W' stands for num_filter, input_channel, height, and width" + "dimensions respectively."); + TVM_ATTR_FIELD(out_layout).set_default("") + .describe("Dimension ordering of output. Can be 'NCHW', 'NHWC', etc." + "'N', 'C', 'H', 'W' stands for batch, channel, height, and width" + "dimensions respectively. Default to be same as input layout."); + + // use 0 bits to indicate none. + TVM_ATTR_FIELD(out_dtype) + .set_default(NullValue()) + .describe("Output data type, set to explicit type under mixed precision setting"); + + + TVM_ATTR_FIELD(input_zero_point) + .describe("The zero point of the input tensor."); + TVM_ATTR_FIELD(kernel_zero_point) + .describe("The zero point of the kernel tensor."); + TVM_ATTR_FIELD(output_zero_point) + .describe("The zero point of the output tensor."); + TVM_ATTR_FIELD(input_scale) + .describe("The scale of the input tensor."); + TVM_ATTR_FIELD(kernel_scale) + .describe("The scale of the kernel tensor."); + TVM_ATTR_FIELD(output_scale) + .describe("The scale of the output tensor."); + TVM_ATTR_FIELD(use_int_compute_for_requantize).set_default(false) + .describe("When true, the integer computation is used to handle output scale"); + TVM_ATTR_FIELD(rounding).set_default("ceil") + .describe("The rounding that has to be used for handling scales."); + + } +}; + +} // namespace relay +} // namespace tvm +#endif // TVM_RELAY_ATTRS_NN_QUANTIZE_H_ \ No newline at end of file diff --git a/include/tvm/relay/quantize_util.h b/include/tvm/relay/quantize_util.h new file mode 100644 index 000000000000..85b7e55b4461 --- /dev/null +++ b/include/tvm/relay/quantize_util.h @@ -0,0 +1,98 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file nnvm/compiler/quantize_util.h + * \brief Utility methods needs for quantized ops that can be shared + */ + +#ifndef TVM_QUANTIZE_UTIL_H +#define TVM_QUANTIZE_UTIL_H + +#include +#include "./base.h" + +namespace tvm { +namespace relay { + +inline bool is_Int8(const DataType& dtype) { + return dtype == Int(8); +} + +inline bool is_UInt8(const DataType& dtype) { + return dtype == UInt(8); +} + +inline bool is_Float32(const DataType& dtype) { + return dtype == Float(32); +} + +inline bool is_qauntized_type(const DataType& dtype) { + return is_Int8(dtype) || is_UInt8(dtype); +} + +enum class QuantizeOpType : uint8_t { + Quantize_Requantize, + Dequantize +}; + +inline bool is_valid_quantized_op_input_type(const QuantizeOpType &op_type, const DataType &in_dtype) { + switch(op_type) { + case QuantizeOpType::Quantize_Requantize: + return is_Float32(in_dtype) || is_qauntized_type(in_dtype); + case QuantizeOpType ::Dequantize: + return is_qauntized_type(in_dtype); + default: + return false; + } +} + +inline bool is_valid_quantized_op_output_type(const QuantizeOpType &op_type, const DataType &in_dtype) { + switch(op_type) { + case QuantizeOpType::Quantize_Requantize: + return is_qauntized_type(in_dtype); + case QuantizeOpType::Dequantize: + return is_Float32(in_dtype); + default: + return false; + } +} + +inline const int32_t get_qmin(const DataType& dtype) { + CHECK(is_qauntized_type(dtype)) << "Expected quantized data type [int8, uint8] but was " << dtype; + if(is_Int8(dtype)) { + return std::numeric_limits::min(); + } else { + return std::numeric_limits::min(); + } +} + + +inline const int32_t get_qmax(const DataType& dtype) { + CHECK(is_qauntized_type(dtype)) << "Expected quantized data type [int8, uint8] but was " << dtype; + if(dtype == Int(8)) { + return std::numeric_limits::max(); + } else { + return std::numeric_limits::max(); + } +} + +} // namespace relay +} // namespace tvm +#endif //TVM_QUANTIZE_UTIL_H \ No newline at end of file diff --git a/python/tvm/relay/op/nn/__init__.py b/python/tvm/relay/op/nn/__init__.py index ebabbbcd9d3a..25ab07b48d67 100644 --- a/python/tvm/relay/op/nn/__init__.py +++ b/python/tvm/relay/op/nn/__init__.py @@ -19,3 +19,4 @@ from __future__ import absolute_import as _abs from .nn import * from . import _nn +from . import _quantize diff --git a/python/tvm/relay/op/nn/_make_quantize.py b/python/tvm/relay/op/nn/_make_quantize.py new file mode 100644 index 000000000000..2480c99068c4 --- /dev/null +++ b/python/tvm/relay/op/nn/_make_quantize.py @@ -0,0 +1,20 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Constructor APIs""" +from ...._ffi.function import _init_api + +_init_api("relay.op.nn._quantize._make", __name__) diff --git a/python/tvm/relay/op/nn/_quantize.py b/python/tvm/relay/op/nn/_quantize.py new file mode 100644 index 000000000000..767628dee21e --- /dev/null +++ b/python/tvm/relay/op/nn/_quantize.py @@ -0,0 +1,73 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +#pylint: disable=invalid-name, too-many-lines +"""Neural network operations.""" +from __future__ import absolute_import as _abs +from . import _make_quantize + +def quantize(input_data, output_zero_point, output_scale, out_dtype='int8'): + r""" Quantize op + + This operator takes floating point 32 or quantized int8 and unit8 as input and produces + quantized int8 or unit8 as output. The output shape is the same as input shape. The input + tensor can be of any shape. + + ..math:: + \mbox{out}[x] = + \mbox{clamp(round(input_tensor/output_scale) + output_zero_point); out_dtype::min, out_dtype::max} + + Parameters + ---------- + input_data : tvm.relay.Expr + The input tensor to be quantized. Can be of type [float32, int8, uint8]. + output_zero_point : + The output zero_point. + output_scale: + The output scale. + input_dtype: + The data type of the input tensor. Can be [int8, uint8, float32] + Returns + ------- + result : tvm.relay.Expr + The computed result. + """ + return _make_quantize.quantize(input_data, output_zero_point, output_scale, out_dtype) + + +def dequantize(input_data, input_zero_point, input_scale): + r""" Dequantize op + + This operator takes quantized int8 and unit8 as input and produces + dequantized float32 as output. The output shape is the same as input shape. The input + tensor can be of any shape. + + Parameters + ---------- + input_data : tvm.relay.Expr + The input tensor to be quantized. Can be of type [float32, int8, uint8]. + input_zero_point : + The output zero_point. + input_scale: + The output scale. + Returns + ------- + result : tvm.relay.Expr + The computed result. + """ + return _make_quantize.dequantize(input_data, input_zero_point, input_scale) + + diff --git a/python/tvm/relay/quantize/__init__.py b/python/tvm/relay/quantize/__init__.py index 45bb62e66853..ddb99efe057c 100644 --- a/python/tvm/relay/quantize/__init__.py +++ b/python/tvm/relay/quantize/__init__.py @@ -20,3 +20,4 @@ from .quantize import * from ._annotate import register_annotate_function +from ._quantize import * diff --git a/src/relay/op/nn/dequantize.cc b/src/relay/op/nn/dequantize.cc new file mode 100644 index 000000000000..8fd25efd6bad --- /dev/null +++ b/src/relay/op/nn/dequantize.cc @@ -0,0 +1,78 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * Copyright (c) 2018 by Contributors + * \file quantize.cpp + * \brief Quantize and requantize operator + */ + +#include +#include +#include +#include +#include + +namespace tvm { +namespace relay { + +TVM_REGISTER_NODE_TYPE(DequantizeAttrs); + +bool DequantizeRel(const Array& types, + int num_inputs, + const Attrs& attrs, + const TypeReporter& reporter) { + CHECK_EQ(types.size(), 2); + const auto* data = types[0].as(); + const auto input_dtype = data->dtype; + CHECK(is_valid_quantized_op_input_type(QuantizeOpType::Dequantize, input_dtype)) + << "Input type should be one of the quantized types [unit8, int8] but was " << input_dtype; + const Array oshape = data->shape; + // assign output type + reporter->Assign(types[1], TensorTypeNode::make(oshape, Float(32))); + return true; +} + +Expr MakeDequantize(Expr data, + int32_t input_zero_point, + double input_scale) { + auto attrs = make_node(); + attrs->input_scale = input_scale; + attrs->input_zero_point = input_zero_point; + static const Op& op = Op::Get("nn_quantized.dequantize"); + return CallNode::make(op, {data}, Attrs(attrs), {}); +} + +RELAY_REGISTER_OP("nn_quantized.dequantize") + .describe(R"code(Quantizes the input and produces quantized output. + +The input is always quantized (int8, uint8) and will be converted to float32 given input scale and shift. +- **data**: Quantized tensor of any shape to dequantize. The input data can be of floating point +)code" TVM_ADD_FILELINE) +.set_attrs_type_key("relay.attrs.DequantizeAttrs") +.set_num_inputs(1) +.add_argument("data", "Tensor", "The tensor to dequantize.") +.set_support_level(10) +.add_type_rel("Dequantize", DequantizeRel); + +TVM_REGISTER_API("relay.op.nn._quantize._make.dequantize") +.set_body_typed(MakeDequantize); + +} // namespace relay +} // namespace tvm diff --git a/src/relay/op/nn/quantize_op.cc b/src/relay/op/nn/quantize_op.cc new file mode 100644 index 000000000000..dd79895d1601 --- /dev/null +++ b/src/relay/op/nn/quantize_op.cc @@ -0,0 +1,91 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * Copyright (c) 2018 by Contributors + * \file quantize.cpp + * \brief Quantize and requantize operator + */ + +#include +#include +#include +#include +#include + +namespace tvm { +namespace relay { + +TVM_REGISTER_NODE_TYPE(QuantizeAttrs); + +bool QuantizeRel(const Array& types, + int num_inputs, + const Attrs& attrs, + const TypeReporter& reporter) { + CHECK_EQ(types.size(), 2); + const auto* data = types[0].as(); + const auto input_dtype = data->dtype; + CHECK(is_valid_quantized_op_input_type(QuantizeOpType::Quantize_Requantize, input_dtype)) + << "Input type should be one of [float32, unit8, int8] but was " << input_dtype; + const auto* param = attrs.as(); + const Array oshape = data->shape; + const DataType out_dtype = param->out_dtype; + CHECK(is_valid_quantized_op_output_type(QuantizeOpType::Quantize_Requantize, out_dtype)) + << "Output type should be one of [int8, unit8 ] but was " << out_dtype; + // assign output type + reporter->Assign(types[1], TensorTypeNode::make(oshape, out_dtype)); // ??: should it be reporter->Assign(types[1], types[0]); + return true; +} + +Expr MakeQuantize(Expr data, + int32_t output_zero_point, + double output_scale, + DataType out_dtype) { + auto attrs = make_node(); + attrs->output_scale = output_scale; + attrs->output_zero_point = output_zero_point; + attrs->out_dtype = std::move(out_dtype); + static const Op& op = Op::Get("nn_quantized.quantize"); + return CallNode::make(op, {data}, Attrs(attrs), {}); +} + +RELAY_REGISTER_OP("nn_quantized.quantize") +.describe(R"code(Quantizes the input and produces quantized output. + +The input can be either float or quantized(int8, unit8). If the input is float, +this op takes scale and zero point and quantize the float value to +quantized output, in int8 or uint8 format. If the input is quantized value, +the op requantize the input (of a certain type, with a given scale and zero +point) to the output of the same or different type with a same or different +scale and zero point. + +- **data**: Tensor of any shape to quantize. The input data can be of floating point + or quantized. +)code" TVM_ADD_FILELINE) +.set_attrs_type_key("relay.attrs.QuantizeAttrs") +.set_num_inputs(1) +.add_argument("data", "Tensor", "The tensor to quantize.") +.set_support_level(10) +.add_type_rel("Quantize", QuantizeRel); + +TVM_REGISTER_API("relay.op.nn._quantize._make.quantize") +.set_body_typed(MakeQuantize); + +} // namespace relay +} // namespace tvm \ No newline at end of file diff --git a/src/relay/pass/quantize_rewrite.cc b/src/relay/pass/quantize_rewrite.cc new file mode 100644 index 000000000000..7e8491c0da7e --- /dev/null +++ b/src/relay/pass/quantize_rewrite.cc @@ -0,0 +1,93 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * Copyright (c) 2018 by Contributors + * \file quantize_rewrite.cc + * \brief Lower quantized ops to exisiting Relay ops. + */ + +#include +#include +#include +#include +#include "pattern_util.h" + +namespace tvm { +namespace relay { + +Expr QuantizeForwardRewrite(const Call& ref_call, + const Array& new_args, + const NodeRef& ctx) { + CHECK_EQ(new_args.size(), 1); + Expr data = new_args[0]; + const auto* attrs = ref_call->attrs.as(); + const auto out_dtype = attrs->out_dtype; + const auto* new_tesnor = data.operator->()->checked_type().as(); + CHECK(new_tesnor) << "Expected TensorTypeNode but was " << data.operator->()->checked_type(); + const auto input_dtype = new_tesnor->dtype; + if(is_Float32(input_dtype)) // this the quantization, float32 -> [int8, uint8] + { + const auto output_zero_point = MakeConstantScalar(Int(32), attrs->output_zero_point); + const auto scale = MakeConstantScalar(Float(32), attrs->output_scale); + const int32_t min_val = get_qmin(out_dtype); + const int32_t max_val = get_qmax(out_dtype); + auto scale_data = Cast(Round(Divide(data, scale)), Int(32)); + // we are trying to do - std::min(std::max(unclamped, min_val), max_val); + auto unclamped = Cast(Add(scale_data, output_zero_point), out_dtype); + auto clamped_output = Clip(unclamped, min_val, max_val); + return clamped_output; + } + else { // this is requantization, [int8, uint8] -> [int8, uint8] with possible different scales. TODO: See how to take input scale and shift as input. + //TODO: Implement requantization + return Expr(); // to hide the warning. + } +} + +RELAY_REGISTER_OP("nn_quantized.quantize") +.set_attr("FQuantizeForwardRewrite", QuantizeForwardRewrite); + +Expr DequantizeForwardRewrite(const Call& ref_call, + const Array& new_args, + const NodeRef& ctx) { + CHECK_EQ(new_args.size(), 1); + Expr data = new_args[0]; + const auto* attrs = ref_call->attrs.as(); + const auto* new_tesnor = data.operator->()->checked_type().as(); + CHECK(new_tesnor) << "Expected TensorTypeNode but was " << data.operator->()->checked_type(); + const auto input_zero_point = MakeConstantScalar(Int(32), attrs->input_zero_point); + const auto input_scale = MakeConstantScalar(Float(32), attrs->input_scale); + auto shift = Subtract(Cast(data, Int(32)), input_zero_point); + auto scale = Multiply(Cast(shift, Float(32)), input_scale); + return scale; +} + +RELAY_REGISTER_OP("nn_quantized.dequantize") +.set_attr("FQuantizeForwardRewrite", DequantizeForwardRewrite); + + +TVM_REGISTER_API("relay._quantize.quantize_rewrite") +.set_body_typed([](const Expr& e) { + Expr ret = ForwardRewrite(e, "FQuantizeForwardRewrite", nullptr, nullptr); + return ret; +}); + + +} // namespace relay +} // namespace tvm \ No newline at end of file diff --git a/tests/python/unittest/test_quantized_ops.py b/tests/python/unittest/test_quantized_ops.py new file mode 100644 index 000000000000..a8300d27136a --- /dev/null +++ b/tests/python/unittest/test_quantized_ops.py @@ -0,0 +1,117 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import tvm +from tvm import relay +from tvm.contrib import graph_runtime +import numpy as np + +def test_quantize_op(): + + def quantize_test_driver(in_dtype, quant_args, out_dtype, in_data, verify_output_data): + shape = in_data.shape + input_data = relay.var("input_data", shape=shape, dtype=in_dtype) + output_zero_point = quant_args['out_zero_point'] + output_scale = quant_args['out_scale'] + quantized_output = relay.op.nn._quantize.quantize(input_data, output_zero_point=output_zero_point, + output_scale=output_scale, out_dtype=out_dtype) + func = relay.Function(relay.ir_pass.free_vars(quantized_output), quantized_output) + func = relay.ir_pass.infer_type(func) + func = relay.quantize.quantize_rewrite(func) + func = relay.ir_pass.infer_type(func) + graph, lib, params = relay.build(func, "llvm", params=None) + mod = graph_runtime.create(graph, lib, ctx=tvm.cpu(0)) + mod.set_input(input_data=in_data) + mod.run() + res = mod.get_output(0).asnumpy() + np.testing.assert_equal(res, verify_output_data) + assert res.dtype == out_dtype + + def test_float32_to_uint8(): + data = np.array([-63.5, -63, -62.5, -62, -61.5, 62, 62.5, 63, 63.5, 64]) \ + .astype('float32') \ + .reshape((2,5)) + output = np.array([0, 1, 2, 3, 4, 251, 252, 253, 254, 255]) \ + .astype('uint8') \ + .reshape((2,5)) + quant_args = {"out_zero_point":127, "out_scale":0.5} + quantize_test_driver(in_dtype='float32', quant_args=quant_args, out_dtype='uint8', in_data=data, + verify_output_data=output) + + def test_float32_to_int8(): + data = np.array([-63.5, -63, -62.5, -62, -61.5, 62, 62.5, 63, 63.5, 64]) \ + .astype('float32') \ + .reshape((2,5)) + output = np.array([-128, -127, -126, -125, -124, 123, 124, 125, 126, 127]) \ + .astype('int8') \ + .reshape((2,5)) + quant_args = {"out_zero_point":-1, "out_scale":0.5} + quantize_test_driver(in_dtype='float32', quant_args=quant_args, out_dtype='int8', in_data=data, + verify_output_data=output) + + test_float32_to_uint8() + test_float32_to_int8() + +def test_dequantize_op(): + + def quantize_test_driver(in_dtype, quant_args, in_data, verify_output_data): + shape = in_data.shape + input_data = relay.var("input_data", shape=shape, dtype=in_dtype) + input_zero_point = quant_args['in_zero_point'] + input_scale = quant_args['in_scale'] + quantized_output = relay.op.nn._quantize.dequantize(input_data, input_zero_point=input_zero_point, + input_scale=input_scale) + func = relay.Function(relay.ir_pass.free_vars(quantized_output), quantized_output) + func = relay.ir_pass.infer_type(func) + func = relay.quantize.quantize_rewrite(func) + func = relay.ir_pass.infer_type(func) + graph, lib, params = relay.build(func, "llvm", params=None) + mod = graph_runtime.create(graph, lib, ctx=tvm.cpu(0)) + mod.set_input(input_data=in_data) + mod.run() + res = mod.get_output(0).asnumpy() + np.testing.assert_allclose(res, verify_output_data) + assert res.dtype == np.float32 + + def test_uint8_to_float32(): + data = np.array([0, 1, 2, 3, 4, 251, 252, 253, 254, 255]) \ + .astype('uint8') \ + .reshape((2,5)) + output = np.array([-63.5, -63, -62.5, -62, -61.5, 62, 62.5, 63, 63.5, 64]) \ + .astype('float32') \ + .reshape((2,5)) + quant_args = {"in_zero_point":127, "in_scale":0.5} + quantize_test_driver(in_dtype='uint8', quant_args=quant_args, in_data=data, + verify_output_data=output) + + def test_int8_to_float32(): + data = np.array([-128, -127, -126, -125, -124, 123, 124, 125, 126, 127]) \ + .astype('int8') \ + .reshape((2,5)) + output = np.array([-63.5, -63, -62.5, -62, -61.5, 62, 62.5, 63, 63.5, 64]) \ + .astype('float32') \ + .reshape((2,5)) + quant_args = {"in_zero_point":-1, "in_scale":0.5} + quantize_test_driver(in_dtype='int8', quant_args=quant_args, in_data=data, + verify_output_data=output) + + test_uint8_to_float32() + test_int8_to_float32() + +if __name__ == "__main__": + test_quantize_op() + test_dequantize_op() From f9b043dfadc5d144d977e1092d6fe003983d28bc Mon Sep 17 00:00:00 2001 From: "shoubhikbhatti@gmail.com" Date: Fri, 28 Jun 2019 10:01:53 -0700 Subject: [PATCH 2/4] [Relay] [Quantization] WIP - Fixing typos and removing redundant code. [Relay] [Quantization] WIP - Removing redundant code. --- include/tvm/relay/attrs/nn_quantize.h | 89 --------------------------- src/relay/pass/quantize_rewrite.cc | 10 +-- 2 files changed, 5 insertions(+), 94 deletions(-) diff --git a/include/tvm/relay/attrs/nn_quantize.h b/include/tvm/relay/attrs/nn_quantize.h index af420dca8139..8f86d4019aaa 100644 --- a/include/tvm/relay/attrs/nn_quantize.h +++ b/include/tvm/relay/attrs/nn_quantize.h @@ -31,7 +31,6 @@ namespace tvm { namespace relay { struct QuantizeAttrs : public tvm::AttrsNode { - DataType input_dtype; int32_t output_zero_point; double output_scale; DataType out_dtype; @@ -40,9 +39,6 @@ struct QuantizeAttrs : public tvm::AttrsNode { TVM_ATTR_FIELD(out_dtype) .describe("Output data type, can be one of [int8 or uint8]."); - TVM_ATTR_FIELD(input_dtype) - .describe("Input data type, can be one of [float32, int8, uint8]."); - TVM_ATTR_FIELD(output_zero_point) .describe("The zero_point for the activation of this op."); @@ -65,91 +61,6 @@ struct DequantizeAttrs : public tvm::AttrsNode { } }; -// TODO(anijain2305) - Copy of QuantizedConv2DAttrs. Should we inherit? -/*! \brief Attribute for quantized conv2d operator */ -struct QuantizedConv2DAttrs : public tvm::AttrsNode { - // Traditional conv2d attributes. - Array strides; - Array padding; - Array dilation; - int groups; - IndexExpr channels; - Array kernel_size; - std::string data_layout; - std::string kernel_layout; - std::string out_layout; - DataType out_dtype; - - // Quantization related attributes. - int32_t input_zero_point; - int32_t kernel_zero_point; - int32_t output_zero_point; - double input_scale; - double kernel_scale; - double output_scale; - bool use_int_compute_for_requantize; - std::string rounding; - - TVM_DECLARE_ATTRS(QuantizedConv2DAttrs, "relay.attrs.QuantizedConv2DAttrs") { - TVM_ATTR_FIELD(strides).set_default(Array({1, 1})) - .describe("Specifies the strides of the convolution."); - TVM_ATTR_FIELD(padding).set_default(Array({0, 0})) - .describe("If padding is non-zero, then the input is implicitly zero-padded" - "on both sides for padding number of points"); - TVM_ATTR_FIELD(dilation).set_default(Array({1, 1})) - .describe("Specifies the dilation rate to use for dilated convolution."); - TVM_ATTR_FIELD(groups).set_default(1) - .describe("Controls the connections between inputs and outputs." - "At groups=1, all inputs are convolved to all outputs." - "At groups=2, the operation becomes equivalent to having two convolution" - "layers side by side, each seeing half the input channels, and producing" - "half the output channels, and both subsequently concatenated."); - TVM_ATTR_FIELD(channels) - .describe("The number of output channels in the convolution." - " If it is not set, inferred by shape of the weight.") - .set_default(NullValue()); - TVM_ATTR_FIELD(kernel_size) - .describe("Specifies the dimensions of the convolution window.") - .set_default(NullValue >()); - TVM_ATTR_FIELD(data_layout).set_default("NCHW") - .describe("Dimension ordering of input data. Can be 'NCHW', 'NHWC', etc." - "'N', 'C', 'H', 'W' stands for batch, channel, height, and width" - "dimensions respectively. Convolution is applied on the 'H' and" - "'W' dimensions."); - TVM_ATTR_FIELD(kernel_layout).set_default("OIHW") - .describe("Dimension ordering of weight. Can be 'OIHW', 'OIHW16o16i', etc." - "'O', 'I', 'H', 'W' stands for num_filter, input_channel, height, and width" - "dimensions respectively."); - TVM_ATTR_FIELD(out_layout).set_default("") - .describe("Dimension ordering of output. Can be 'NCHW', 'NHWC', etc." - "'N', 'C', 'H', 'W' stands for batch, channel, height, and width" - "dimensions respectively. Default to be same as input layout."); - - // use 0 bits to indicate none. - TVM_ATTR_FIELD(out_dtype) - .set_default(NullValue()) - .describe("Output data type, set to explicit type under mixed precision setting"); - - - TVM_ATTR_FIELD(input_zero_point) - .describe("The zero point of the input tensor."); - TVM_ATTR_FIELD(kernel_zero_point) - .describe("The zero point of the kernel tensor."); - TVM_ATTR_FIELD(output_zero_point) - .describe("The zero point of the output tensor."); - TVM_ATTR_FIELD(input_scale) - .describe("The scale of the input tensor."); - TVM_ATTR_FIELD(kernel_scale) - .describe("The scale of the kernel tensor."); - TVM_ATTR_FIELD(output_scale) - .describe("The scale of the output tensor."); - TVM_ATTR_FIELD(use_int_compute_for_requantize).set_default(false) - .describe("When true, the integer computation is used to handle output scale"); - TVM_ATTR_FIELD(rounding).set_default("ceil") - .describe("The rounding that has to be used for handling scales."); - - } -}; } // namespace relay } // namespace tvm diff --git a/src/relay/pass/quantize_rewrite.cc b/src/relay/pass/quantize_rewrite.cc index 7e8491c0da7e..f93658bee4c1 100644 --- a/src/relay/pass/quantize_rewrite.cc +++ b/src/relay/pass/quantize_rewrite.cc @@ -39,9 +39,9 @@ Expr QuantizeForwardRewrite(const Call& ref_call, Expr data = new_args[0]; const auto* attrs = ref_call->attrs.as(); const auto out_dtype = attrs->out_dtype; - const auto* new_tesnor = data.operator->()->checked_type().as(); - CHECK(new_tesnor) << "Expected TensorTypeNode but was " << data.operator->()->checked_type(); - const auto input_dtype = new_tesnor->dtype; + const auto* new_tensor = data.operator->()->checked_type().as(); + CHECK(new_tensor) << "Expected TensorTypeNode but was " << data.operator->()->checked_type(); + const auto input_dtype = new_tensor->dtype; if(is_Float32(input_dtype)) // this the quantization, float32 -> [int8, uint8] { const auto output_zero_point = MakeConstantScalar(Int(32), attrs->output_zero_point); @@ -69,8 +69,8 @@ Expr DequantizeForwardRewrite(const Call& ref_call, CHECK_EQ(new_args.size(), 1); Expr data = new_args[0]; const auto* attrs = ref_call->attrs.as(); - const auto* new_tesnor = data.operator->()->checked_type().as(); - CHECK(new_tesnor) << "Expected TensorTypeNode but was " << data.operator->()->checked_type(); + const auto* new_tensor = data.operator->()->checked_type().as(); + CHECK(new_tensor) << "Expected TensorTypeNode but was " << data.operator->()->checked_type(); const auto input_zero_point = MakeConstantScalar(Int(32), attrs->input_zero_point); const auto input_scale = MakeConstantScalar(Float(32), attrs->input_scale); auto shift = Subtract(Cast(data, Int(32)), input_zero_point); From 0b00e96c8ed01e046239f70a4e75f007e9b58de4 Mon Sep 17 00:00:00 2001 From: "shoubhikbhatti@gmail.com" Date: Sun, 30 Jun 2019 20:47:06 -0700 Subject: [PATCH 3/4] merge from upstream --- 3rdparty/HalideIR | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/3rdparty/HalideIR b/3rdparty/HalideIR index c4e5bc77bd7b..32057b53eee8 160000 --- a/3rdparty/HalideIR +++ b/3rdparty/HalideIR @@ -1 +1 @@ -Subproject commit c4e5bc77bd7bca05e45664b35c6ce88246c43b1b +Subproject commit 32057b53eee870d73c6c21dc820d6546b4d9a13f From 6e1f5af0faa7e9ab8827f0eea760a6f1190a1c65 Mon Sep 17 00:00:00 2001 From: "shoubhikbhatti@gmail.com" Date: Sun, 30 Jun 2019 21:13:17 -0700 Subject: [PATCH 4/4] merge from upstream/mainline --- 3rdparty/HalideIR | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/3rdparty/HalideIR b/3rdparty/HalideIR index c4e5bc77bd7b..32057b53eee8 160000 --- a/3rdparty/HalideIR +++ b/3rdparty/HalideIR @@ -1 +1 @@ -Subproject commit c4e5bc77bd7bca05e45664b35c6ce88246c43b1b +Subproject commit 32057b53eee870d73c6c21dc820d6546b4d9a13f