From 8d9e317053b474dd12b2c76a3abf8c5a4a9fbd2f Mon Sep 17 00:00:00 2001 From: "shoubhikbhatti@gmail.com" Date: Mon, 8 Jul 2019 12:12:40 -0700 Subject: [PATCH 1/8] [Relay] [Quantization] WIP - Common files for the qauntization work. --- include/tvm/relay/attrs/qnn.h | 37 +++++++ include/tvm/relay/quantize_util.h | 139 ++++++++++++++++++++++++++ python/tvm/relay/op/__init__.py | 1 + python/tvm/relay/op/qnn/__init__.py | 20 ++++ python/tvm/relay/op/qnn/_make.py | 20 ++++ python/tvm/relay/op/qnn/qnn.py | 21 ++++ python/tvm/relay/quantize/__init__.py | 1 + python/tvm/relay/quantize/rewrite.py | 38 +++++++ src/relay/pass/pattern_util.h | 20 ++++ src/relay/pass/quantize_rewrite.cc | 38 +++++++ 10 files changed, 335 insertions(+) create mode 100644 include/tvm/relay/attrs/qnn.h create mode 100644 include/tvm/relay/quantize_util.h create mode 100644 python/tvm/relay/op/qnn/__init__.py create mode 100644 python/tvm/relay/op/qnn/_make.py create mode 100644 python/tvm/relay/op/qnn/qnn.py create mode 100644 python/tvm/relay/quantize/rewrite.py create mode 100644 src/relay/pass/quantize_rewrite.cc diff --git a/include/tvm/relay/attrs/qnn.h b/include/tvm/relay/attrs/qnn.h new file mode 100644 index 000000000000..c45a33c786f7 --- /dev/null +++ b/include/tvm/relay/attrs/qnn.h @@ -0,0 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file tvm/relay/attrs/nn.h + * \brief Auxiliary attributes for nn operators. + */ +#ifndef TVM_RELAY_ATTRS_NN_QUANTIZE_H_ +#define TVM_RELAY_ATTRS_NN_QUANTIZE_H_ + +#include +#include + +namespace tvm { +namespace relay { + + + +} // namespace relay +} // namespace tvm +#endif // TVM_RELAY_ATTRS_NN_QUANTIZE_H_ diff --git a/include/tvm/relay/quantize_util.h b/include/tvm/relay/quantize_util.h new file mode 100644 index 000000000000..bb054fb8fb65 --- /dev/null +++ b/include/tvm/relay/quantize_util.h @@ -0,0 +1,139 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file nnvm/compiler/quantize_util.h + * \brief Utility methods needs for quantized ops that can be shared + */ + +#ifndef TVM_QUANTIZE_UTIL_H +#define TVM_QUANTIZE_UTIL_H + +#include +#include "./base.h" + +namespace tvm { +namespace relay { + +inline bool is_Int8(const DataType& dtype) { + return dtype == Int(8); +} + +inline bool is_UInt8(const DataType& dtype) { + return dtype == UInt(8); +} + + +inline bool is_Int16(const DataType& dtype) { + return dtype == Int(16); +} + +inline bool is_UInt16(const DataType& dtype) { + return dtype == UInt(16); +} + +inline bool is_Int32(const DataType& dtype) { + return dtype == Int(32); +} + +inline bool is_UInt32(const DataType& dtype) { + return dtype == UInt(32); +} + + + +inline bool is_Float32(const DataType& dtype) { + return dtype == Float(32); +} + +inline bool is_quantized_type(const DataType& dtype) { + return is_Int8(dtype) || is_UInt8(dtype) + || is_Int16(dtype) || is_UInt16(dtype); +} + +enum class QuantizeOpType : uint8_t { + Quantize_Requantize, + Dequantize, + Requantize +}; + +inline bool is_valid_quantized_op_input_type(const QuantizeOpType &op_type, const DataType &in_dtype) { + switch(op_type) { + case QuantizeOpType::Quantize_Requantize: + return is_Float32(in_dtype) || is_quantized_type(in_dtype); + case QuantizeOpType ::Dequantize: + return is_quantized_type(in_dtype); + case QuantizeOpType ::Requantize: + return is_Int16(in_dtype) || is_Int32(in_dtype); + default: + return false; + } +} + +inline bool is_valid_quantized_op_output_type(const QuantizeOpType &op_type, const DataType &in_dtype) { + switch(op_type) { + case QuantizeOpType::Quantize_Requantize: + return is_quantized_type(in_dtype); + case QuantizeOpType::Dequantize: + return is_Float32(in_dtype); + default: + return false; + } +} + +inline const int32_t get_qmin(const DataType& dtype) { + if (is_Int8(dtype)) { + return std::numeric_limits::min(); + } else if (is_UInt8(dtype)) { + return std::numeric_limits::min(); + } else if (is_Int16(dtype)) { + return std::numeric_limits::min(); + } else if (is_UInt16(dtype)) { + return std::numeric_limits::min(); + } else if (is_Int32(dtype)) { + return std::numeric_limits::min(); + } else if (is_UInt32(dtype)) { + return std::numeric_limits::min(); + } + LOG(FATAL) << "Type not supported\n"; + return -1; +} + + +inline const int32_t get_qmax(const DataType& dtype) { + if (is_Int8(dtype)) { + return std::numeric_limits::max(); + } else if (is_UInt8(dtype)) { + return std::numeric_limits::max(); + } else if (is_Int16(dtype)) { + return std::numeric_limits::max(); + } else if (is_UInt16(dtype)) { + return std::numeric_limits::max(); + } else if (is_Int32(dtype)) { + return std::numeric_limits::max(); + } else if (is_UInt32(dtype)) { + return std::numeric_limits::max(); + } + LOG(FATAL) << "Type not supported\n"; + return -1; +} + +} // namespace relay +} // namespace tvm +#endif //TVM_QUANTIZE_UTIL_H diff --git a/python/tvm/relay/op/__init__.py b/python/tvm/relay/op/__init__.py index a27ab1dc50ff..1d634ef18fc0 100644 --- a/python/tvm/relay/op/__init__.py +++ b/python/tvm/relay/op/__init__.py @@ -26,6 +26,7 @@ from .transform import * from .algorithm import * from . import nn +from . import qnn from . import annotation from . import image from . import vision diff --git a/python/tvm/relay/op/qnn/__init__.py b/python/tvm/relay/op/qnn/__init__.py new file mode 100644 index 000000000000..aef02300ab63 --- /dev/null +++ b/python/tvm/relay/op/qnn/__init__.py @@ -0,0 +1,20 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# pylint: disable=wildcard-import +"""Neural network related operators.""" +from __future__ import absolute_import as _abs +from .qnn import * \ No newline at end of file diff --git a/python/tvm/relay/op/qnn/_make.py b/python/tvm/relay/op/qnn/_make.py new file mode 100644 index 000000000000..b1695629b8f9 --- /dev/null +++ b/python/tvm/relay/op/qnn/_make.py @@ -0,0 +1,20 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Constructor APIs""" +from ...._ffi.function import _init_api + +_init_api("relay.op.qnn._make", __name__) diff --git a/python/tvm/relay/op/qnn/qnn.py b/python/tvm/relay/op/qnn/qnn.py new file mode 100644 index 000000000000..008e6cbb7f80 --- /dev/null +++ b/python/tvm/relay/op/qnn/qnn.py @@ -0,0 +1,21 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +#pylint: disable=invalid-name, too-many-lines +"""Neural network operations.""" +from __future__ import absolute_import as _abs +from . import _make + diff --git a/python/tvm/relay/quantize/__init__.py b/python/tvm/relay/quantize/__init__.py index 45bb62e66853..8da4e7953566 100644 --- a/python/tvm/relay/quantize/__init__.py +++ b/python/tvm/relay/quantize/__init__.py @@ -19,4 +19,5 @@ from __future__ import absolute_import as _abs from .quantize import * +from .rewrite import * from ._annotate import register_annotate_function diff --git a/python/tvm/relay/quantize/rewrite.py b/python/tvm/relay/quantize/rewrite.py new file mode 100644 index 000000000000..89429e522115 --- /dev/null +++ b/python/tvm/relay/quantize/rewrite.py @@ -0,0 +1,38 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +#pylint: disable=unused-argument +"""Automatic quantization toolkit.""" +from __future__ import absolute_import + +from . import _quantize +from .. import expr as _expr + +def rewrite(expr): + """ + Rewrites the high-level quantized ops into low-level exisiting Relay ops. + + Parameters + ---------- + expr : tvm.relay.Expr + The input expression. + + Returns + ------- + expr : tvm.relay.Expr + The output expression. + """ + return _quantize.rewrite(expr) diff --git a/src/relay/pass/pattern_util.h b/src/relay/pass/pattern_util.h index 5c303905968e..7249d1d4c086 100644 --- a/src/relay/pass/pattern_util.h +++ b/src/relay/pass/pattern_util.h @@ -373,6 +373,26 @@ inline Expr Copy(Expr data) { } +inline Expr Where(const Expr& condition, const Expr& x, const Expr& y) { + static const Op& op = Op::Get("where"); + return CallNode::make(op, {condition, x, y}); +} + +inline Expr GreaterEqual(const Expr& lhs, const Expr& rhs) { + static const Op& op = Op::Get("greater_equal"); + return CallNode::make(op, {lhs, rhs}, Attrs(), {}); +} + +inline Expr Full(Expr fill_value, + Array shape, + DataType dtype) { + auto attrs = make_node(); + attrs->shape = std::move(shape); + attrs->dtype = std::move(dtype); + static const Op& op = Op::Get("full"); + return CallNode::make(op, {fill_value}, Attrs(attrs), {}); +} + Expr MakeConcatenate(Expr data, int axis); Expr MakeStridedSlice(Expr data, Array begin, Array end, Array strides); diff --git a/src/relay/pass/quantize_rewrite.cc b/src/relay/pass/quantize_rewrite.cc new file mode 100644 index 000000000000..925c516b41ed --- /dev/null +++ b/src/relay/pass/quantize_rewrite.cc @@ -0,0 +1,38 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * Copyright (c) 2018 by Contributors + * \file quantize_rewrite.cc + * \brief Lower quantized ops to exisiting Relay ops. + */ + +#include +#include +#include +#include +#include +#include "pattern_util.h" + +namespace tvm { +namespace relay { + + +} // namespace relay +} // namespace tvm From 7081694a266673797c093c53e5fe9f0c77b6c6e7 Mon Sep 17 00:00:00 2001 From: "shoubhikbhatti@gmail.com" Date: Mon, 8 Jul 2019 13:45:14 -0700 Subject: [PATCH 2/8] [Relay] [Quantization] WIP - Adding the tests file. --- tests/python/unittest/test_quantized_ops.py | 41 +++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 tests/python/unittest/test_quantized_ops.py diff --git a/tests/python/unittest/test_quantized_ops.py b/tests/python/unittest/test_quantized_ops.py new file mode 100644 index 000000000000..11082f774c59 --- /dev/null +++ b/tests/python/unittest/test_quantized_ops.py @@ -0,0 +1,41 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import tvm +import numpy as np +from tvm import relay +from tvm.relay.testing import create_workload +from tvm.contrib import graph_runtime + +# TODOs for janimesh before submitting this patch. +# TODO - Add tests for int8 input/weight dtype +# TODO - opt_level=0 fails mostly due to fusion. +# TODO - opt_level=3 fails, likely culprit kernel layout for int8 +# compute. Work with Rankyung to see if this is the culprit. Handle +# it in a separate patch. + +def run_infer_type(expr): + mod = relay.Module.from_expr(expr) + mod = relay.transform.InferType()(mod) + entry = mod["main"] + return entry if isinstance(expr, relay.Function) else entry.body + + + +if __name__ == "__main__": + # add your tests here. + pass From c089ebcdf4b13f98b776bb0213779f6783fa6743 Mon Sep 17 00:00:00 2001 From: "shoubhikbhatti@gmail.com" Date: Mon, 8 Jul 2019 13:43:24 -0700 Subject: [PATCH 3/8] [Relay] [Quantization] WIP - This is the continuation of pull request [#3367](https://github.com/dmlc/tvm/pull/3367) In this PR I want to discuss the design and implementation of the - Quantize op -> FP32 to i8/u8 - Dequantize Op -> i8/u8 -> fp32 I have added test cases to verify the correctness of the ops. --- include/tvm/relay/attrs/qnn.h | 31 +++++++ include/tvm/relay/quantize_util.h | 8 +- python/tvm/relay/op/qnn/qnn.py | 45 ++++++++++ src/relay/op/nn/dequantize.cc | 79 +++++++++++++++++ src/relay/op/nn/quantize_op.cc | 94 +++++++++++++++++++++ src/relay/pass/quantize_rewrite.cc | 46 +++++++++- tests/python/unittest/test_quantized_ops.py | 94 ++++++++++++++++++++- 7 files changed, 389 insertions(+), 8 deletions(-) create mode 100644 src/relay/op/nn/dequantize.cc create mode 100644 src/relay/op/nn/quantize_op.cc diff --git a/include/tvm/relay/attrs/qnn.h b/include/tvm/relay/attrs/qnn.h index c45a33c786f7..30ec16fd21d6 100644 --- a/include/tvm/relay/attrs/qnn.h +++ b/include/tvm/relay/attrs/qnn.h @@ -30,6 +30,37 @@ namespace tvm { namespace relay { +struct QuantizeAttrs : public tvm::AttrsNode { + int32_t output_zero_point; + double output_scale; + DataType out_dtype; + + TVM_DECLARE_ATTRS(QuantizeAttrs, "relay.attrs.QuantizeAttrs") { + TVM_ATTR_FIELD(out_dtype) + .describe("Output data type, can be one of [int8 or uint8]."); + + TVM_ATTR_FIELD(output_zero_point) + .describe("The zero_point for the activation of this op."); + + TVM_ATTR_FIELD(output_scale) + .describe("The scale for the activation of this op."); + } +}; + +struct DequantizeAttrs : public tvm::AttrsNode { + int32_t input_zero_point; + double input_scale; + + TVM_DECLARE_ATTRS(QuantizeAttrs, "relay.attrs.QuantizeAttrs") { + + TVM_ATTR_FIELD(input_zero_point) + .describe("The zero_point for the input tensor of this op."); + + TVM_ATTR_FIELD(input_scale) + .describe("The scale for the input tensor of this op."); + } +}; + } // namespace relay diff --git a/include/tvm/relay/quantize_util.h b/include/tvm/relay/quantize_util.h index bb054fb8fb65..312262c8cd67 100644 --- a/include/tvm/relay/quantize_util.h +++ b/include/tvm/relay/quantize_util.h @@ -68,15 +68,15 @@ inline bool is_quantized_type(const DataType& dtype) { } enum class QuantizeOpType : uint8_t { - Quantize_Requantize, + Quantize, Dequantize, Requantize }; inline bool is_valid_quantized_op_input_type(const QuantizeOpType &op_type, const DataType &in_dtype) { switch(op_type) { - case QuantizeOpType::Quantize_Requantize: - return is_Float32(in_dtype) || is_quantized_type(in_dtype); + case QuantizeOpType::Quantize: + return is_Float32(in_dtype); case QuantizeOpType ::Dequantize: return is_quantized_type(in_dtype); case QuantizeOpType ::Requantize: @@ -88,7 +88,7 @@ inline bool is_valid_quantized_op_input_type(const QuantizeOpType &op_type, cons inline bool is_valid_quantized_op_output_type(const QuantizeOpType &op_type, const DataType &in_dtype) { switch(op_type) { - case QuantizeOpType::Quantize_Requantize: + case QuantizeOpType::Quantize: return is_quantized_type(in_dtype); case QuantizeOpType::Dequantize: return is_Float32(in_dtype); diff --git a/python/tvm/relay/op/qnn/qnn.py b/python/tvm/relay/op/qnn/qnn.py index 008e6cbb7f80..810e2a8fad00 100644 --- a/python/tvm/relay/op/qnn/qnn.py +++ b/python/tvm/relay/op/qnn/qnn.py @@ -19,3 +19,48 @@ from __future__ import absolute_import as _abs from . import _make +def quantize(input_data, output_zero_point, output_scale, out_dtype='int8'): + r""" Quantize op + This operator takes floating point 32 or quantized int8 and unit8 as input and produces + quantized int8 or unit8 as output. The output shape is the same as input shape. The input + tensor can be of any shape. + ..math:: + \mbox{out}[x] = + \mbox{clamp(round(input_tensor/output_scale) + output_zero_point); out_dtype::min, out_dtype::max} + Parameters + ---------- + input_data : tvm.relay.Expr + The input tensor to be quantized. Can be of type [float32, int8, uint8]. + output_zero_point : + The output zero_point. + output_scale: + The output scale. + input_dtype: + The data type of the input tensor. Can be [int8, uint8, float32] + Returns + ------- + result : tvm.relay.Expr + The computed result. + """ + return _make.quantize(input_data, output_zero_point, output_scale, out_dtype) + + +def dequantize(input_data, input_zero_point, input_scale): + r""" Dequantize op + This operator takes quantized int8 and unit8 as input and produces + dequantized float32 as output. The output shape is the same as input shape. The input + tensor can be of any shape. + Parameters + ---------- + input_data : tvm.relay.Expr + The input tensor to be quantized. Can be of type [float32, int8, uint8]. + input_zero_point : + The output zero_point. + input_scale: + The output scale. + Returns + ------- + result : tvm.relay.Expr + The computed result. + """ + return _make.dequantize(input_data, input_zero_point, input_scale) \ No newline at end of file diff --git a/src/relay/op/nn/dequantize.cc b/src/relay/op/nn/dequantize.cc new file mode 100644 index 000000000000..7cbab466830b --- /dev/null +++ b/src/relay/op/nn/dequantize.cc @@ -0,0 +1,79 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * Copyright (c) 2018 by Contributors + * \file quantize.cpp + * \brief Quantize and requantize operator + */ + +//#include +//#include +//#include +#include +#include +#include + +namespace tvm { +namespace relay { + +TVM_REGISTER_NODE_TYPE(DequantizeAttrs); + +bool DequantizeRel(const Array& types, + int num_inputs, + const Attrs& attrs, + const TypeReporter& reporter) { + CHECK_EQ(types.size(), 2); + const auto* data = types[0].as(); + const auto input_dtype = data->dtype; + CHECK(is_valid_quantized_op_input_type(QuantizeOpType::Dequantize, input_dtype)) + << "Input type should be one of the quantized types [unit8, int8] but was " << input_dtype; + const Array oshape = data->shape; + // assign output type + reporter->Assign(types[1], TensorTypeNode::make(oshape, Float(32))); + return true; +} + +Expr MakeDequantize(Expr data, + int32_t input_zero_point, + double input_scale) { + auto attrs = make_node(); + attrs->input_scale = input_scale; + attrs->input_zero_point = input_zero_point; + static const Op& op = Op::Get("qnn.dequantize"); + return CallNode::make(op, {data}, Attrs(attrs), {}); +} + +RELAY_REGISTER_OP("qnn.dequantize") + .describe(R"code(Quantizes the input and produces quantized output. + +The input is always quantized (int8, uint8) and will be converted to float32 given input scale and shift. +- **data**: Quantized tensor of any shape to dequantize. The input data can be of floating point +)code" TVM_ADD_FILELINE) +.set_attrs_type_key("relay.attrs.DequantizeAttrs") +.set_num_inputs(1) +.add_argument("data", "Tensor", "The tensor to dequantize.") +.set_support_level(10) +.add_type_rel("Dequantize", DequantizeRel); + +TVM_REGISTER_API("relay.op.qnn._make.dequantize") +.set_body_typed(MakeDequantize); + +} // namespace relay +} // namespace tvm diff --git a/src/relay/op/nn/quantize_op.cc b/src/relay/op/nn/quantize_op.cc new file mode 100644 index 000000000000..51308e86a0da --- /dev/null +++ b/src/relay/op/nn/quantize_op.cc @@ -0,0 +1,94 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * Copyright (c) 2018 by Contributors + * \file quantize.cpp + * \brief Quantize and requantize operator + */ + +//#include +//#include +//#include +//#include +//#include +#include +#include +#include + +namespace tvm { +namespace relay { + +TVM_REGISTER_NODE_TYPE(QuantizeAttrs); + +bool QuantizeRel(const Array& types, + int num_inputs, + const Attrs& attrs, + const TypeReporter& reporter) { + CHECK_EQ(types.size(), 2); + const auto* data = types[0].as(); + const auto input_dtype = data->dtype; + CHECK(is_valid_quantized_op_input_type(QuantizeOpType::Quantize, input_dtype)) + << "Input type should be one of float32 but was " << input_dtype; + const auto* param = attrs.as(); + const Array oshape = data->shape; + const DataType out_dtype = param->out_dtype; + CHECK(is_valid_quantized_op_output_type(QuantizeOpType::Quantize, out_dtype)) + << "Output type should be one of [int8, unit8 ] but was " << out_dtype; + // assign output type + reporter->Assign(types[1], TensorTypeNode::make(oshape, out_dtype)); + return true; +} + +Expr MakeQuantize(Expr data, + int32_t output_zero_point, + double output_scale, + DataType out_dtype) { + auto attrs = make_node(); + attrs->output_scale = output_scale; + attrs->output_zero_point = output_zero_point; + attrs->out_dtype = std::move(out_dtype); + static const Op& op = Op::Get("qnn.quantize"); + return CallNode::make(op, {data}, Attrs(attrs), {}); +} + +RELAY_REGISTER_OP("qnn.quantize") +.describe(R"code(Quantizes the input and produces quantized output. + +The input can be either float or quantized(int8, unit8). If the input is float, +this op takes scale and zero point and quantize the float value to +quantized output, in int8 or uint8 format. If the input is quantized value, +the op requantize the input (of a certain type, with a given scale and zero +point) to the output of the same or different type with a same or different +scale and zero point. + +- **data**: Tensor of any shape to quantize. The input data can be of floating point + or quantized. +)code" TVM_ADD_FILELINE) +.set_attrs_type_key("relay.attrs.QuantizeAttrs") +.set_num_inputs(1) +.add_argument("data", "Tensor", "The tensor to quantize.") +.set_support_level(10) +.add_type_rel("Quantize", QuantizeRel); + +TVM_REGISTER_API("relay.op.qnn._make.quantize") +.set_body_typed(MakeQuantize); + +} // namespace relay +} // namespace tvm \ No newline at end of file diff --git a/src/relay/pass/quantize_rewrite.cc b/src/relay/pass/quantize_rewrite.cc index 925c516b41ed..b602294f8c4a 100644 --- a/src/relay/pass/quantize_rewrite.cc +++ b/src/relay/pass/quantize_rewrite.cc @@ -24,15 +24,57 @@ */ #include -#include +#include #include #include -#include +#include #include "pattern_util.h" namespace tvm { namespace relay { +Expr QuantizeForwardRewrite(const Call& ref_call, const Array& new_args, const NodeRef& ctx) { + CHECK_EQ(new_args.size(), 1); + Expr data = new_args[0]; + const auto* attrs = ref_call->attrs.as(); + const auto out_dtype = attrs->out_dtype; + const auto* new_tensor = data.operator->()->checked_type().as(); + CHECK(new_tensor) << "Expected TensorTypeNode but was " << data.operator->()->checked_type(); + const auto output_zero_point = MakeConstantScalar(Int(32), attrs->output_zero_point); + const auto scale = MakeConstantScalar(Float(32), attrs->output_scale); + const int32_t min_val = get_qmin(out_dtype); + const int32_t max_val = get_qmax(out_dtype); + auto scale_data = Cast(Round(Divide(data, scale)), Int(32)); + // we are trying to do - std::min(std::max(unclamped, min_val), max_val); + auto unclamped = Cast(Add(scale_data, output_zero_point), out_dtype); + auto clamped_output = Clip(unclamped, min_val, max_val); + return clamped_output; +} + +RELAY_REGISTER_OP("qnn.quantize") + .set_attr("FQuantizeForwardRewrite", QuantizeForwardRewrite); + +Expr DequantizeForwardRewrite(const Call& ref_call, const Array& new_args, + const NodeRef& ctx) { + CHECK_EQ(new_args.size(), 1); + Expr data = new_args[0]; + const auto* attrs = ref_call->attrs.as(); + const auto* new_tensor = data.operator->()->checked_type().as(); + CHECK(new_tensor) << "Expected TensorTypeNode but was " << data.operator->()->checked_type(); + const auto input_zero_point = MakeConstantScalar(Int(32), attrs->input_zero_point); + const auto input_scale = MakeConstantScalar(Float(32), attrs->input_scale); + auto shift = Subtract(Cast(data, Int(32)), input_zero_point); + auto scale = Multiply(Cast(shift, Float(32)), input_scale); + return scale; +} + +RELAY_REGISTER_OP("qnn.dequantize") + .set_attr("FQuantizeForwardRewrite", DequantizeForwardRewrite); + +TVM_REGISTER_API("relay._quantize.rewrite").set_body_typed([](const Expr& e) { + Expr ret = ForwardRewrite(e, "FQuantizeForwardRewrite", nullptr, nullptr); + return ret; +}); } // namespace relay } // namespace tvm diff --git a/tests/python/unittest/test_quantized_ops.py b/tests/python/unittest/test_quantized_ops.py index 11082f774c59..272f22aecb45 100644 --- a/tests/python/unittest/test_quantized_ops.py +++ b/tests/python/unittest/test_quantized_ops.py @@ -34,8 +34,98 @@ def run_infer_type(expr): entry = mod["main"] return entry if isinstance(expr, relay.Function) else entry.body +def test_quantize_op(): + def quantize_test_driver(in_dtype, quant_args, out_dtype, in_data, verify_output_data): + shape = in_data.shape + input_data = relay.var("input_data", shape=shape, dtype=in_dtype) + output_zero_point = quant_args['out_zero_point'] + output_scale = quant_args['out_scale'] + quantized_output = relay.op.qnn.quantize(input_data, output_zero_point=output_zero_point, + output_scale=output_scale, out_dtype=out_dtype) + func = relay.Function(relay.analysis.free_vars(quantized_output), quantized_output) + func = run_infer_type(func) + func = relay.quantize.rewrite(func) + func = run_infer_type(func) + graph, lib, params = relay.build(func, "llvm", params=None) + mod = graph_runtime.create(graph, lib, ctx=tvm.cpu(0)) + mod.set_input(input_data=in_data) + mod.run() + res = mod.get_output(0).asnumpy() + np.testing.assert_equal(res, verify_output_data) + assert res.dtype == out_dtype + + def test_float32_to_uint8(): + data = np.array([-63.5, -63, -62.5, -62, -61.5, 62, 62.5, 63, 63.5, 64]) \ + .astype('float32') \ + .reshape((2,5)) + output = np.array([0, 1, 2, 3, 4, 251, 252, 253, 254, 255]) \ + .astype('uint8') \ + .reshape((2,5)) + quant_args = {"out_zero_point":127, "out_scale":0.5} + quantize_test_driver(in_dtype='float32', quant_args=quant_args, out_dtype='uint8', in_data=data, + verify_output_data=output) + + def test_float32_to_int8(): + data = np.array([-63.5, -63, -62.5, -62, -61.5, 62, 62.5, 63, 63.5, 64]) \ + .astype('float32') \ + .reshape((2,5)) + output = np.array([-128, -127, -126, -125, -124, 123, 124, 125, 126, 127]) \ + .astype('int8') \ + .reshape((2,5)) + quant_args = {"out_zero_point":-1, "out_scale":0.5} + quantize_test_driver(in_dtype='float32', quant_args=quant_args, out_dtype='int8', in_data=data, + verify_output_data=output) + + test_float32_to_uint8() + test_float32_to_int8() + +def test_dequantize_op(): + + def quantize_test_driver(in_dtype, quant_args, in_data, verify_output_data): + shape = in_data.shape + input_data = relay.var("input_data", shape=shape, dtype=in_dtype) + input_zero_point = quant_args['in_zero_point'] + input_scale = quant_args['in_scale'] + quantized_output = relay.op.qnn.dequantize(input_data, input_zero_point=input_zero_point, + input_scale=input_scale) + func = relay.Function(relay.analysis.free_vars(quantized_output), quantized_output) + func = run_infer_type(func) + func = relay.quantize.rewrite(func) + func = run_infer_type(func) + graph, lib, params = relay.build(func, "llvm", params=None) + mod = graph_runtime.create(graph, lib, ctx=tvm.cpu(0)) + mod.set_input(input_data=in_data) + mod.run() + res = mod.get_output(0).asnumpy() + np.testing.assert_allclose(res, verify_output_data) + assert res.dtype == np.float32 + + def test_uint8_to_float32(): + data = np.array([0, 1, 2, 3, 4, 251, 252, 253, 254, 255]) \ + .astype('uint8') \ + .reshape((2,5)) + output = np.array([-63.5, -63, -62.5, -62, -61.5, 62, 62.5, 63, 63.5, 64]) \ + .astype('float32') \ + .reshape((2,5)) + quant_args = {"in_zero_point":127, "in_scale":0.5} + quantize_test_driver(in_dtype='uint8', quant_args=quant_args, in_data=data, + verify_output_data=output) + + def test_int8_to_float32(): + data = np.array([-128, -127, -126, -125, -124, 123, 124, 125, 126, 127]) \ + .astype('int8') \ + .reshape((2,5)) + output = np.array([-63.5, -63, -62.5, -62, -61.5, 62, 62.5, 63, 63.5, 64]) \ + .astype('float32') \ + .reshape((2,5)) + quant_args = {"in_zero_point":-1, "in_scale":0.5} + quantize_test_driver(in_dtype='int8', quant_args=quant_args, in_data=data, + verify_output_data=output) + + test_uint8_to_float32() + test_int8_to_float32() if __name__ == "__main__": - # add your tests here. - pass + test_quantize_op() + test_dequantize_op() From bcf003bbb6300b590df870522efbc57cd6ab6a1b Mon Sep 17 00:00:00 2001 From: "shoubhikbhatti@gmail.com" Date: Mon, 8 Jul 2019 14:27:19 -0700 Subject: [PATCH 4/8] [Relay] [Quantization] Removing redundant code. --- src/relay/op/nn/dequantize.cc | 3 --- src/relay/op/nn/quantize_op.cc | 5 ----- 2 files changed, 8 deletions(-) diff --git a/src/relay/op/nn/dequantize.cc b/src/relay/op/nn/dequantize.cc index 7cbab466830b..dfc7cade7d0b 100644 --- a/src/relay/op/nn/dequantize.cc +++ b/src/relay/op/nn/dequantize.cc @@ -23,9 +23,6 @@ * \brief Quantize and requantize operator */ -//#include -//#include -//#include #include #include #include diff --git a/src/relay/op/nn/quantize_op.cc b/src/relay/op/nn/quantize_op.cc index 51308e86a0da..106abc93364d 100644 --- a/src/relay/op/nn/quantize_op.cc +++ b/src/relay/op/nn/quantize_op.cc @@ -23,11 +23,6 @@ * \brief Quantize and requantize operator */ -//#include -//#include -//#include -//#include -//#include #include #include #include From 6766af9ced609e1840cac0e64ed27690768d5c00 Mon Sep 17 00:00:00 2001 From: "shoubhikbhatti@gmail.com" Date: Mon, 8 Jul 2019 15:51:16 -0700 Subject: [PATCH 5/8] [Relay] [Quantization] 1. Correcting docs 2. Reordering Clip and Cast in the dequantize op for stability. --- python/tvm/relay/op/qnn/qnn.py | 11 +++++------ src/relay/op/nn/dequantize.cc | 2 +- src/relay/pass/quantize_rewrite.cc | 7 ++++--- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/python/tvm/relay/op/qnn/qnn.py b/python/tvm/relay/op/qnn/qnn.py index 810e2a8fad00..45fb127159d0 100644 --- a/python/tvm/relay/op/qnn/qnn.py +++ b/python/tvm/relay/op/qnn/qnn.py @@ -21,22 +21,21 @@ def quantize(input_data, output_zero_point, output_scale, out_dtype='int8'): r""" Quantize op - This operator takes floating point 32 or quantized int8 and unit8 as input and produces - quantized int8 or unit8 as output. The output shape is the same as input shape. The input - tensor can be of any shape. + This operator takes float32 as input and produces quantized int8 or unit8 as output. The input tensor can be of + any shape. The output shape is the same as input shape. ..math:: \mbox{out}[x] = \mbox{clamp(round(input_tensor/output_scale) + output_zero_point); out_dtype::min, out_dtype::max} Parameters ---------- input_data : tvm.relay.Expr - The input tensor to be quantized. Can be of type [float32, int8, uint8]. + The input tensor to be quantized. Can be of type float32. output_zero_point : The output zero_point. output_scale: The output scale. input_dtype: - The data type of the input tensor. Can be [int8, uint8, float32] + The data type of the input tensor. Can be [int8, uint8] Returns ------- result : tvm.relay.Expr @@ -53,7 +52,7 @@ def dequantize(input_data, input_zero_point, input_scale): Parameters ---------- input_data : tvm.relay.Expr - The input tensor to be quantized. Can be of type [float32, int8, uint8]. + The input tensor to be dequantized. Can be of type [int8, uint8]. input_zero_point : The output zero_point. input_scale: diff --git a/src/relay/op/nn/dequantize.cc b/src/relay/op/nn/dequantize.cc index dfc7cade7d0b..55213f168b6d 100644 --- a/src/relay/op/nn/dequantize.cc +++ b/src/relay/op/nn/dequantize.cc @@ -60,7 +60,7 @@ Expr MakeDequantize(Expr data, RELAY_REGISTER_OP("qnn.dequantize") .describe(R"code(Quantizes the input and produces quantized output. -The input is always quantized (int8, uint8) and will be converted to float32 given input scale and shift. +The input is always quantized (int8, uint8) and will be converted to float32 given input scale and zero_point. - **data**: Quantized tensor of any shape to dequantize. The input data can be of floating point )code" TVM_ADD_FILELINE) .set_attrs_type_key("relay.attrs.DequantizeAttrs") diff --git a/src/relay/pass/quantize_rewrite.cc b/src/relay/pass/quantize_rewrite.cc index b602294f8c4a..08c8a18c1ef3 100644 --- a/src/relay/pass/quantize_rewrite.cc +++ b/src/relay/pass/quantize_rewrite.cc @@ -46,9 +46,10 @@ Expr QuantizeForwardRewrite(const Call& ref_call, const Array& new_args, c const int32_t max_val = get_qmax(out_dtype); auto scale_data = Cast(Round(Divide(data, scale)), Int(32)); // we are trying to do - std::min(std::max(unclamped, min_val), max_val); - auto unclamped = Cast(Add(scale_data, output_zero_point), out_dtype); - auto clamped_output = Clip(unclamped, min_val, max_val); - return clamped_output; + auto add_zero_point = Add(scale_data, output_zero_point); + auto clamped_output = Clip(add_zero_point, min_val, max_val); + auto clamp_out_dtype = Cast(clamped_output, out_dtype); + return clamp_out_dtype; } RELAY_REGISTER_OP("qnn.quantize") From 186af5a3ea2f7b847ac8f6851b0b8e953c38763f Mon Sep 17 00:00:00 2001 From: "shoubhikbhatti@gmail.com" Date: Fri, 12 Jul 2019 11:34:24 -0700 Subject: [PATCH 6/8] [Relay] [Quantization] 1. Correcting the file paths as suggested in the reviews. --- .../tvm/relay/{attrs/qnn.h => qnn/attrs.h} | 6 +- src/relay/{ => qnn}/pass/quantize_rewrite.cc | 7 +- .../quantize_util.h => src/relay/qnn/util.h | 92 +++++++++---------- 3 files changed, 53 insertions(+), 52 deletions(-) rename include/tvm/relay/{attrs/qnn.h => qnn/attrs.h} (89%) rename src/relay/{ => qnn}/pass/quantize_rewrite.cc (92%) rename include/tvm/relay/quantize_util.h => src/relay/qnn/util.h (54%) diff --git a/include/tvm/relay/attrs/qnn.h b/include/tvm/relay/qnn/attrs.h similarity index 89% rename from include/tvm/relay/attrs/qnn.h rename to include/tvm/relay/qnn/attrs.h index c45a33c786f7..facc0937e2cf 100644 --- a/include/tvm/relay/attrs/qnn.h +++ b/include/tvm/relay/qnn/attrs.h @@ -21,8 +21,8 @@ * \file tvm/relay/attrs/nn.h * \brief Auxiliary attributes for nn operators. */ -#ifndef TVM_RELAY_ATTRS_NN_QUANTIZE_H_ -#define TVM_RELAY_ATTRS_NN_QUANTIZE_H_ +#ifndef TVM_RELAY_ATTRS_QNN_H_ +#define TVM_RELAY_ATTRS_QNN_H_ #include #include @@ -34,4 +34,4 @@ namespace relay { } // namespace relay } // namespace tvm -#endif // TVM_RELAY_ATTRS_NN_QUANTIZE_H_ +#endif // TVM_RELAY_ATTRS_QNN_H_ diff --git a/src/relay/pass/quantize_rewrite.cc b/src/relay/qnn/pass/quantize_rewrite.cc similarity index 92% rename from src/relay/pass/quantize_rewrite.cc rename to src/relay/qnn/pass/quantize_rewrite.cc index 925c516b41ed..eb4848c87928 100644 --- a/src/relay/pass/quantize_rewrite.cc +++ b/src/relay/qnn/pass/quantize_rewrite.cc @@ -26,13 +26,14 @@ #include #include #include -#include -#include -#include "pattern_util.h" +#include +#include "../util.h" +#include "../../pass/pattern_util.h" namespace tvm { namespace relay { + } // namespace relay } // namespace tvm diff --git a/include/tvm/relay/quantize_util.h b/src/relay/qnn/util.h similarity index 54% rename from include/tvm/relay/quantize_util.h rename to src/relay/qnn/util.h index bb054fb8fb65..61663b0da85e 100644 --- a/include/tvm/relay/quantize_util.h +++ b/src/relay/qnn/util.h @@ -18,97 +18,97 @@ */ /*! - * \file nnvm/compiler/quantize_util.h + * \file tvm/relay/quantize_util.h * \brief Utility methods needs for quantized ops that can be shared */ -#ifndef TVM_QUANTIZE_UTIL_H -#define TVM_QUANTIZE_UTIL_H +#ifndef TVM_RELAY_QUANTIZE_UTIL_H_ +#define TVM_RELAY_QUANTIZE_UTIL_H_ #include -#include "./base.h" +#include +#include namespace tvm { namespace relay { -inline bool is_Int8(const DataType& dtype) { +inline bool IsInt8(const DataType& dtype) { return dtype == Int(8); } -inline bool is_UInt8(const DataType& dtype) { +inline bool IsUint8(const DataType& dtype) { return dtype == UInt(8); } - -inline bool is_Int16(const DataType& dtype) { +inline bool IsInt16(const DataType& dtype) { return dtype == Int(16); } -inline bool is_UInt16(const DataType& dtype) { +inline bool IsUint16(const DataType& dtype) { return dtype == UInt(16); } -inline bool is_Int32(const DataType& dtype) { +inline bool IsInt32(const DataType& dtype) { return dtype == Int(32); } -inline bool is_UInt32(const DataType& dtype) { +inline bool IsUint32(const DataType& dtype) { return dtype == UInt(32); } - - -inline bool is_Float32(const DataType& dtype) { +inline bool IsFloat32(const DataType& dtype) { return dtype == Float(32); } -inline bool is_quantized_type(const DataType& dtype) { - return is_Int8(dtype) || is_UInt8(dtype) - || is_Int16(dtype) || is_UInt16(dtype); +inline bool IsQuantizedType(const DataType& dtype) { + return IsInt8(dtype) || IsUint8(dtype) + || IsInt16(dtype) || IsUint16(dtype); } enum class QuantizeOpType : uint8_t { - Quantize_Requantize, + Quantize, Dequantize, Requantize }; -inline bool is_valid_quantized_op_input_type(const QuantizeOpType &op_type, const DataType &in_dtype) { - switch(op_type) { - case QuantizeOpType::Quantize_Requantize: - return is_Float32(in_dtype) || is_quantized_type(in_dtype); +inline bool IsValidOpInputType(const QuantizeOpType& op_type, + const DataType& in_dtype) { + switch (op_type) { + case QuantizeOpType::Quantize: + return IsFloat32(in_dtype) || IsQuantizedType(in_dtype); case QuantizeOpType ::Dequantize: - return is_quantized_type(in_dtype); + return IsQuantizedType(in_dtype); case QuantizeOpType ::Requantize: - return is_Int16(in_dtype) || is_Int32(in_dtype); + return IsInt16(in_dtype) || IsInt32(in_dtype); default: return false; } } -inline bool is_valid_quantized_op_output_type(const QuantizeOpType &op_type, const DataType &in_dtype) { - switch(op_type) { - case QuantizeOpType::Quantize_Requantize: - return is_quantized_type(in_dtype); +inline bool IsValidOpOutputType(const QuantizeOpType& op_type, + const DataType& in_dtype) { + switch (op_type) { + case QuantizeOpType::Quantize: + return IsQuantizedType(in_dtype); case QuantizeOpType::Dequantize: - return is_Float32(in_dtype); + return IsFloat32(in_dtype); default: return false; } } -inline const int32_t get_qmin(const DataType& dtype) { - if (is_Int8(dtype)) { +inline const int32_t GetQmin(const DataType& dtype) { + if (IsInt8(dtype)) { return std::numeric_limits::min(); - } else if (is_UInt8(dtype)) { + } else if (IsUint8(dtype)) { return std::numeric_limits::min(); - } else if (is_Int16(dtype)) { + } else if (IsInt16(dtype)) { return std::numeric_limits::min(); - } else if (is_UInt16(dtype)) { + } else if (IsUint16(dtype)) { return std::numeric_limits::min(); - } else if (is_Int32(dtype)) { + } else if (IsInt32(dtype)) { return std::numeric_limits::min(); - } else if (is_UInt32(dtype)) { + } else if (IsUint32(dtype)) { return std::numeric_limits::min(); } LOG(FATAL) << "Type not supported\n"; @@ -116,24 +116,24 @@ inline const int32_t get_qmin(const DataType& dtype) { } -inline const int32_t get_qmax(const DataType& dtype) { - if (is_Int8(dtype)) { +inline const int32_t GetQmax(const DataType& dtype) { + if (IsInt8(dtype)) { return std::numeric_limits::max(); - } else if (is_UInt8(dtype)) { + } else if (IsUint8(dtype)) { return std::numeric_limits::max(); - } else if (is_Int16(dtype)) { + } else if (IsInt16(dtype)) { return std::numeric_limits::max(); - } else if (is_UInt16(dtype)) { + } else if (IsUint16(dtype)) { return std::numeric_limits::max(); - } else if (is_Int32(dtype)) { + } else if (IsInt32(dtype)) { return std::numeric_limits::max(); - } else if (is_UInt32(dtype)) { + } else if (IsUint32(dtype)) { return std::numeric_limits::max(); } LOG(FATAL) << "Type not supported\n"; return -1; } -} // namespace relay -} // namespace tvm -#endif //TVM_QUANTIZE_UTIL_H +} // namespace relay +} // namespace tvm +#endif // TVM_RELAY_QUANTIZE_UTIL_H_ From e83704bb45eadc64de130af87bd43913e9545cf1 Mon Sep 17 00:00:00 2001 From: "shoubhikbhatti@gmail.com" Date: Fri, 12 Jul 2019 11:49:40 -0700 Subject: [PATCH 7/8] Adding the common python files. --- python/tvm/relay/qnn/__init__.py | 20 ++++++++++++++++ python/tvm/relay/qnn/_qnn.py | 22 +++++++++++++++++ python/tvm/relay/qnn/ir_pass.py | 37 +++++++++++++++++++++++++++++ python/tvm/relay/qnn/op/__init__.py | 20 ++++++++++++++++ python/tvm/relay/qnn/op/_make.py | 20 ++++++++++++++++ python/tvm/relay/qnn/op/qnn.py | 20 ++++++++++++++++ 6 files changed, 139 insertions(+) create mode 100644 python/tvm/relay/qnn/__init__.py create mode 100644 python/tvm/relay/qnn/_qnn.py create mode 100644 python/tvm/relay/qnn/ir_pass.py create mode 100644 python/tvm/relay/qnn/op/__init__.py create mode 100644 python/tvm/relay/qnn/op/_make.py create mode 100644 python/tvm/relay/qnn/op/qnn.py diff --git a/python/tvm/relay/qnn/__init__.py b/python/tvm/relay/qnn/__init__.py new file mode 100644 index 000000000000..5de0656eaaca --- /dev/null +++ b/python/tvm/relay/qnn/__init__.py @@ -0,0 +1,20 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Neural network related operators.""" +from __future__ import absolute_import as _abs +from . import op +from . import ir_pass diff --git a/python/tvm/relay/qnn/_qnn.py b/python/tvm/relay/qnn/_qnn.py new file mode 100644 index 000000000000..bd3cdbb976d6 --- /dev/null +++ b/python/tvm/relay/qnn/_qnn.py @@ -0,0 +1,22 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +#pylint: disable=unused-argument +"""Internal module for quantization.""" +from __future__ import absolute_import +from tvm._ffi.function import _init_api + +_init_api("relay._qnn", __name__) diff --git a/python/tvm/relay/qnn/ir_pass.py b/python/tvm/relay/qnn/ir_pass.py new file mode 100644 index 000000000000..24e3329e961c --- /dev/null +++ b/python/tvm/relay/qnn/ir_pass.py @@ -0,0 +1,37 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +#pylint: disable=unused-argument +"""Automatic quantization toolkit.""" +from __future__ import absolute_import + +from . import _qnn + +def rewrite(expr): + """ + Rewrites the high-level quantized ops into low-level exisiting Relay ops. + + Parameters + ---------- + expr : tvm.relay.Expr + The input expression. + + Returns + ------- + expr : tvm.relay.Expr + The output expression. + """ + return _qnn.rewrite(expr) diff --git a/python/tvm/relay/qnn/op/__init__.py b/python/tvm/relay/qnn/op/__init__.py new file mode 100644 index 000000000000..e9adfa783f93 --- /dev/null +++ b/python/tvm/relay/qnn/op/__init__.py @@ -0,0 +1,20 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# pylint: disable=wildcard-import +"""Neural network related operators.""" +from __future__ import absolute_import as _abs +from .qnn import * diff --git a/python/tvm/relay/qnn/op/_make.py b/python/tvm/relay/qnn/op/_make.py new file mode 100644 index 000000000000..b1695629b8f9 --- /dev/null +++ b/python/tvm/relay/qnn/op/_make.py @@ -0,0 +1,20 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Constructor APIs""" +from ...._ffi.function import _init_api + +_init_api("relay.op.qnn._make", __name__) diff --git a/python/tvm/relay/qnn/op/qnn.py b/python/tvm/relay/qnn/op/qnn.py new file mode 100644 index 000000000000..ff7d0876d56e --- /dev/null +++ b/python/tvm/relay/qnn/op/qnn.py @@ -0,0 +1,20 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +#pylint: disable=invalid-name, too-many-lines +"""Neural network operations.""" +from __future__ import absolute_import as _abs +from . import _make From 05b1ac6df8330318991bda3a5737281e5acb0174 Mon Sep 17 00:00:00 2001 From: "shoubhikbhatti@gmail.com" Date: Mon, 15 Jul 2019 10:53:54 -0700 Subject: [PATCH 8/8] Fixing lint issues. --- include/tvm/relay/qnn/attrs.h | 7 +++---- python/tvm/relay/qnn/__init__.py | 2 +- python/tvm/relay/qnn/_qnn.py | 2 +- python/tvm/relay/qnn/ir_pass.py | 2 +- python/tvm/relay/qnn/op/__init__.py | 1 - python/tvm/relay/qnn/op/_make.py | 2 +- python/tvm/relay/qnn/op/qnn.py | 2 +- src/relay/pass/pattern_util.h | 2 +- src/relay/qnn/op/dequantize.cc | 2 +- src/relay/qnn/op/quantize_op.cc | 2 +- src/relay/qnn/pass/quantize_rewrite.cc | 2 +- src/relay/qnn/util.h | 8 ++++---- 12 files changed, 16 insertions(+), 18 deletions(-) diff --git a/include/tvm/relay/qnn/attrs.h b/include/tvm/relay/qnn/attrs.h index 51be43154b59..9645b3cf587d 100644 --- a/include/tvm/relay/qnn/attrs.h +++ b/include/tvm/relay/qnn/attrs.h @@ -21,8 +21,8 @@ * \file tvm/relay/qnn/attrs.h * \brief Auxiliary attributes for quantized nn operators. */ -#ifndef TVM_RELAY_ATTRS_QNN_H_ -#define TVM_RELAY_ATTRS_QNN_H_ +#ifndef TVM_RELAY_QNN_ATTRS_H_ +#define TVM_RELAY_QNN_ATTRS_H_ #include #include @@ -52,7 +52,6 @@ struct DequantizeAttrs : public tvm::AttrsNode { double input_scale; TVM_DECLARE_ATTRS(QuantizeAttrs, "relay.attrs.QuantizeAttrs") { - TVM_ATTR_FIELD(input_zero_point) .describe("The zero_point for the input tensor of this op."); @@ -64,4 +63,4 @@ struct DequantizeAttrs : public tvm::AttrsNode { } // namespace relay } // namespace tvm -#endif // TVM_RELAY_ATTRS_QNN_H_ +#endif // TVM_RELAY_QNN_ATTRS_H_ diff --git a/python/tvm/relay/qnn/__init__.py b/python/tvm/relay/qnn/__init__.py index d3dfd85d653f..236b094a6988 100644 --- a/python/tvm/relay/qnn/__init__.py +++ b/python/tvm/relay/qnn/__init__.py @@ -18,4 +18,4 @@ """Neural network related operators.""" from __future__ import absolute_import as _abs from . import op -from . import ir_pass \ No newline at end of file +from . import ir_pass diff --git a/python/tvm/relay/qnn/_qnn.py b/python/tvm/relay/qnn/_qnn.py index 1187b632b3db..77ecc325ae18 100644 --- a/python/tvm/relay/qnn/_qnn.py +++ b/python/tvm/relay/qnn/_qnn.py @@ -20,4 +20,4 @@ from __future__ import absolute_import from tvm._ffi.function import _init_api -_init_api("relay._qnn", __name__) \ No newline at end of file +_init_api("relay._qnn", __name__) diff --git a/python/tvm/relay/qnn/ir_pass.py b/python/tvm/relay/qnn/ir_pass.py index 24e3329e961c..ea272dec429a 100644 --- a/python/tvm/relay/qnn/ir_pass.py +++ b/python/tvm/relay/qnn/ir_pass.py @@ -14,7 +14,7 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. -#pylint: disable=unused-argument + """Automatic quantization toolkit.""" from __future__ import absolute_import diff --git a/python/tvm/relay/qnn/op/__init__.py b/python/tvm/relay/qnn/op/__init__.py index cd2367b77d2c..f1c896489fd3 100644 --- a/python/tvm/relay/qnn/op/__init__.py +++ b/python/tvm/relay/qnn/op/__init__.py @@ -14,7 +14,6 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. -# pylint: disable=wildcard-import """Neural network related operators.""" diff --git a/python/tvm/relay/qnn/op/_make.py b/python/tvm/relay/qnn/op/_make.py index c611becf36d4..82d5e5a9cdc3 100644 --- a/python/tvm/relay/qnn/op/_make.py +++ b/python/tvm/relay/qnn/op/_make.py @@ -19,4 +19,4 @@ from ...._ffi.function import _init_api -_init_api("relay.op.qnn._make", __name__) \ No newline at end of file +_init_api("relay.op.qnn._make", __name__) diff --git a/python/tvm/relay/qnn/op/qnn.py b/python/tvm/relay/qnn/op/qnn.py index e12769d688cc..290fb4912a17 100644 --- a/python/tvm/relay/qnn/op/qnn.py +++ b/python/tvm/relay/qnn/op/qnn.py @@ -63,4 +63,4 @@ def dequantize(input_data, input_zero_point, input_scale): result : tvm.relay.Expr The computed result. """ - return _make.dequantize(input_data, input_zero_point, input_scale) \ No newline at end of file + return _make.dequantize(input_data, input_zero_point, input_scale) diff --git a/src/relay/pass/pattern_util.h b/src/relay/pass/pattern_util.h index 7249d1d4c086..906e3193729e 100644 --- a/src/relay/pass/pattern_util.h +++ b/src/relay/pass/pattern_util.h @@ -34,7 +34,7 @@ #include #include #include - +#include namespace tvm { namespace relay { diff --git a/src/relay/qnn/op/dequantize.cc b/src/relay/qnn/op/dequantize.cc index 7b346df198e6..cfaff3f23755 100644 --- a/src/relay/qnn/op/dequantize.cc +++ b/src/relay/qnn/op/dequantize.cc @@ -73,4 +73,4 @@ TVM_REGISTER_API("relay.op.qnn._make.dequantize") .set_body_typed(MakeDequantize); } // namespace relay -} // namespace tvm \ No newline at end of file +} // namespace tvm diff --git a/src/relay/qnn/op/quantize_op.cc b/src/relay/qnn/op/quantize_op.cc index 809cf33f58ae..b3be62742aae 100644 --- a/src/relay/qnn/op/quantize_op.cc +++ b/src/relay/qnn/op/quantize_op.cc @@ -86,4 +86,4 @@ TVM_REGISTER_API("relay.op.qnn._make.quantize") .set_body_typed(MakeQuantize); } // namespace relay -} // namespace tvm \ No newline at end of file +} // namespace tvm diff --git a/src/relay/qnn/pass/quantize_rewrite.cc b/src/relay/qnn/pass/quantize_rewrite.cc index fbdfda486ac7..1da4824772ea 100644 --- a/src/relay/qnn/pass/quantize_rewrite.cc +++ b/src/relay/qnn/pass/quantize_rewrite.cc @@ -77,4 +77,4 @@ TVM_REGISTER_API("relay._qnn.rewrite").set_body_typed([](const Expr& }); } // namespace relay -} // namespace tvm \ No newline at end of file +} // namespace tvm diff --git a/src/relay/qnn/util.h b/src/relay/qnn/util.h index 0f57f5564b43..c96227c3667a 100644 --- a/src/relay/qnn/util.h +++ b/src/relay/qnn/util.h @@ -22,12 +22,12 @@ * \brief Utility methods needs for quantized ops that can be shared */ -#ifndef TVM_RELAY_QUANTIZE_UTIL_H_ -#define TVM_RELAY_QUANTIZE_UTIL_H_ +#ifndef TVM_RELAY_QNN_UTIL_H_ +#define TVM_RELAY_QNN_UTIL_H_ #include -#include #include +#include namespace tvm { namespace relay { @@ -134,4 +134,4 @@ inline const int32_t GetQmax(const DataType& dtype) { } // namespace relay } // namespace tvm -#endif // TVM_RELAY_QUANTIZE_UTIL_H_ +#endif // TVM_RELAY_QNN_UTIL_H_