From e38bd2bed62257aea949222edaf6b3ec7ff3c97a Mon Sep 17 00:00:00 2001 From: Nicola Lancellotti Date: Wed, 10 Aug 2022 15:04:51 +0000 Subject: [PATCH 1/2] [ETHOSN] Add support for Requantize Change-Id: I4a2c900a2ab2a5066152c60d7aeb10a12c144d4e --- python/tvm/relay/op/contrib/ethosn.py | 14 +++ src/relay/backend/contrib/ethosn/codegen.cc | 38 ++++++++ .../backend/contrib/ethosn/codegen_ethosn.h | 1 + .../backend/contrib/ethosn/ethosn_api.cc | 30 +++++++ src/relay/backend/contrib/ethosn/ethosn_api.h | 8 ++ .../contrib/test_ethosn/test_requantize.py | 86 +++++++++++++++++++ 6 files changed, 177 insertions(+) create mode 100644 tests/python/contrib/test_ethosn/test_requantize.py diff --git a/python/tvm/relay/op/contrib/ethosn.py b/python/tvm/relay/op/contrib/ethosn.py index 2b1d7b13ce43..8db5aaca6605 100644 --- a/python/tvm/relay/op/contrib/ethosn.py +++ b/python/tvm/relay/op/contrib/ethosn.py @@ -153,6 +153,12 @@ def qnn_leaky_relu_pattern(): pattern = is_op("qnn.quantize")(pattern, is_constant(), is_constant()) return pattern + def qnn_requantize_pattern(): + pattern = is_op("qnn.requantize")( + wildcard(), is_constant(), is_constant(), is_constant(), is_constant() + ) + return pattern + def check_conv2d(extract): """Check if a conv2d is supported by Ethos-N.""" if not ethosn_available(): @@ -202,6 +208,13 @@ def check_leaky_relu(extract): return support.leaky_relu(extract) + def check_requantize(extract): + """Check if requantize is supported.""" + if not ethosn_available(): + return False + + return support.requantize(extract) + return [ ("ethos-n.qnn_conv2d", qnn_conv_pattern(), check_conv2d), ("ethos-n.qnn_avg_pool2d", qnn_avg_pool2d_pattern(), check_avg_pool2d), @@ -210,6 +223,7 @@ def check_leaky_relu(extract): ("ethos-n.qnn_mean", qnn_mean_pattern(), check_mean), ("ethos-n.qnn_tanh", qnn_tanh_pattern(), check_tanh), ("ethos-n.qnn_leaky_relu", qnn_leaky_relu_pattern(), check_leaky_relu), + ("ethos-n.qnn_requantize", qnn_requantize_pattern(), check_requantize), ] diff --git a/src/relay/backend/contrib/ethosn/codegen.cc b/src/relay/backend/contrib/ethosn/codegen.cc index 9fb8fcd4cdeb..d74b4f829098 100644 --- a/src/relay/backend/contrib/ethosn/codegen.cc +++ b/src/relay/backend/contrib/ethosn/codegen.cc @@ -143,6 +143,10 @@ void InferTensorsVisitor::InferCall(const CallNode* cn) { params.input_info = GetTensorInfo(tensor_table_, call); err = EthosnAPI::Relu(call, ¶ms); tensor_table_[cn->args[0]] = {params.input_info}; + } else if (IsEthosnFunc(call, "ethos-n.qnn_requantize")) { + RequantizeParams params; + err += EthosnAPI::Requantize(cn->op.as()->body, ¶ms); + tensor_table_[cn->args[0]] = {params.input_info}; } else { err = EthosnError("unknown operator"); } @@ -314,6 +318,9 @@ sl::TensorsAndId ConstructNetworkVisitor::HandleCall(const CallNode* cn) { } else if (IsEthosnOp(call, "clip")) { if ((err = MakeReluLayer(call, &tensor))) ReportFatalError(call, err); return MakeOps(tensor); + } else if (IsEthosnFunc(call, "ethos-n.qnn_requantize")) { + if ((err = MakeRequantizeLayer(call, &tensor))) ReportFatalError(call, err); + return MakeOps(tensor); } else { ReportFatalError(call, EthosnError("unknown operator")); return {}; @@ -596,6 +603,24 @@ EthosnError ConstructNetworkVisitor::MakeReluLayer(const Call& call, return EthosnError(); } +EthosnError ConstructNetworkVisitor::MakeRequantizeLayer(const Call& call, + sl::TensorAndId* out) { + RequantizeParams params; + params.input_info = GetTensorInfo(tensor_table_, call); + if (auto err = EthosnAPI::Requantize(call->op.as()->body, ¶ms)) { + return err; + } + + auto input = operand_table_[call->args[0]][0]; + + try { + *out = AddRequantize(network_, *input, params.requantize_info); + } catch (const sl::NotSupportedException& e) { + return EthosnError(e.what()); + } + return EthosnError(); +} + runtime::Module EthosnCompiler::CreateRuntimeModule(const ObjectRef& ref) { std::vector cmms; if (ref->IsInstance()) { @@ -918,6 +943,19 @@ TVM_REGISTER_GLOBAL("relay.ethos-n.support.relu") err += EthosnError(reason); }); +TVM_REGISTER_GLOBAL("relay.ethos-n.support.requantize") + .set_body([](tvm::TVMArgs args, tvm::TVMRetValue* rv) { + Call call = args[0]; + RequantizeParams params; + auto err = EthosnAPI::Requantize(call, ¶ms); + err += EthosnCompiler::SupportedSetup(); + char reason[kReasonMaxLength]; + reason[0] = '\0'; + *rv = !err && EthosnCompiler::GetSupported()->IsRequantizeSupported( + params.requantize_info, params.input_info, nullptr, reason, sizeof(reason)); + err += EthosnError(reason); + }); + TVM_REGISTER_GLOBAL("relay.ethos-n.query").set_body([](tvm::TVMArgs args, tvm::TVMRetValue* rv) { #if defined ETHOSN_HW *rv = true; diff --git a/src/relay/backend/contrib/ethosn/codegen_ethosn.h b/src/relay/backend/contrib/ethosn/codegen_ethosn.h index 6d26cc7daacc..3fbf5a77a70b 100644 --- a/src/relay/backend/contrib/ethosn/codegen_ethosn.h +++ b/src/relay/backend/contrib/ethosn/codegen_ethosn.h @@ -212,6 +212,7 @@ class ConstructNetworkVisitor : public MixedModeVisitor, private ErrorReportingP EthosnError MakeDepthToSpaceLayer(const Call& call, sl::TensorAndId* out); EthosnError MakeReluLayer(const Call& call, sl::TensorAndId* out); EthosnError MakeLeakyReLULayer(const Call& call, sl::TensorAndId* out); + EthosnError MakeRequantizeLayer(const Call& call, sl::TensorAndId* out); /*! \brief A look-up table from Expr to layers. */ std::map>> operand_table_; diff --git a/src/relay/backend/contrib/ethosn/ethosn_api.cc b/src/relay/backend/contrib/ethosn/ethosn_api.cc index 493b827c2868..2a7e252f5720 100644 --- a/src/relay/backend/contrib/ethosn/ethosn_api.cc +++ b/src/relay/backend/contrib/ethosn/ethosn_api.cc @@ -39,6 +39,7 @@ #include "ethosn_api_version.h" #include "ethosn_support_library/Support.hpp" #include "ethosn_support_library/SupportQueries.hpp" +#include "tvm/relay/qnn/attrs.h" namespace tvm { namespace relay { @@ -676,6 +677,35 @@ EthosnError EthosnAPI::Relu(const Expr& expr, ReluParams* params) { return err; } +EthosnError EthosnAPI::Requantize(const Expr& expr, RequantizeParams* params) { + Call call = Downcast(expr); + const auto* input_dtype = call->args[0]->checked_type().as(); + sl::TensorShape input_tensor_shape = {1, 1, 1, 1}; + sl::DataType input_data_type; + EthosnError err = Tvm2Npu(input_dtype->shape, &input_tensor_shape); + err += Tvm2Npu(input_dtype->dtype, &input_data_type); + + float input_sc, output_sc; + int input_zp, output_zp; + err += AsConstant(call->args[1], &input_sc); + err += AsConstant(call->args[2], &input_zp); + err += AsConstant(call->args[3], &output_sc); + err += AsConstant(call->args[4], &output_zp); + + params->requantize_info = sl::RequantizeInfo(sl::QuantizationInfo(output_zp, output_sc)); + params->input_info = sl::TensorInfo(input_tensor_shape, input_data_type, sl::DataFormat::NHWC, + sl::QuantizationInfo(input_zp, input_sc)); + + sl::QuantizationInfo output_q_info; + err += Tvm2Npu(output_zp, output_sc, &output_q_info); + sl::TensorInfo output_tensor_info; + err += Tvm2Npu(call->checked_type(), &output_tensor_info); + output_tensor_info.m_QuantizationInfo = output_q_info; + params->output_info = output_tensor_info; + + return err; +} + EthosnError EthosnAPI::Tvm2Npu(const Array& padding, sl::Padding* npu_padding) { std::array dim; if (EthosnError err = AsArray(padding, &dim)) { diff --git a/src/relay/backend/contrib/ethosn/ethosn_api.h b/src/relay/backend/contrib/ethosn/ethosn_api.h index 3adb2981cc8c..bb1cd29a5bc4 100644 --- a/src/relay/backend/contrib/ethosn/ethosn_api.h +++ b/src/relay/backend/contrib/ethosn/ethosn_api.h @@ -140,6 +140,12 @@ struct ReluParams { sl::TensorInfo output_info; }; +struct RequantizeParams { + sl::RequantizeInfo requantize_info; + sl::TensorInfo input_info; + sl::TensorInfo output_info; +}; + /*! * \brief A wrapper around std::stringstream to build an EthosnError. */ @@ -233,6 +239,8 @@ class EthosnAPI { static EthosnError DepthToSpace(const Expr& expr, DepthToSpaceParams* params); /*! \brief Extract the Support Library relu params from a Relay relu call */ static EthosnError Relu(const Expr& expr, ReluParams* params); + /*! \brief Extract the Support Library requantize params from a Relay qnn.requantize call */ + static EthosnError Requantize(const Expr& expr, RequantizeParams* params); private: /*! \brief Convert a TVM IndexExpr array to a SL tensor shape */ diff --git a/tests/python/contrib/test_ethosn/test_requantize.py b/tests/python/contrib/test_ethosn/test_requantize.py new file mode 100644 index 000000000000..4626a0d92bc1 --- /dev/null +++ b/tests/python/contrib/test_ethosn/test_requantize.py @@ -0,0 +1,86 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""Arm(R) Ethos(TM)-N integration requantize tests""" + +import pytest +import numpy as np +import tvm +from tvm import relay +from tvm.testing import requires_ethosn +from . import infrastructure as tei + + +def _get_model(shape, input_zp, input_sc, output_zp, output_sc, in_dtype, out_dtype): + a = relay.var("a", shape=shape, dtype=in_dtype) + model = relay.qnn.op.requantize( + data=a, + input_scale=relay.const(input_sc, "float32"), + input_zero_point=relay.const(input_zp, "int32"), + output_scale=relay.const(output_sc, "float32"), + output_zero_point=relay.const(output_zp, "int32"), + out_dtype=out_dtype, + ) + return model + + +@requires_ethosn +@pytest.mark.parametrize("in_dtype", ["int8", "uint8"]) +@pytest.mark.parametrize("out_dtype", ["int8", "uint8"]) +@pytest.mark.parametrize("shape", [(1, 52, 52, 3)]) +def test_requantize(in_dtype, out_dtype, shape): + np.random.seed(0) + low = 0 if in_dtype == "uint8" else -5 + high = low + 10 + input_zp = (high + low) / 2 + inputs = { + "a": tvm.nd.array(np.random.randint(low=low, high=high, size=shape, dtype=in_dtype)), + } + outputs = [] + for npu in [False, True]: + model = _get_model( + shape=shape, + input_zp=input_zp, + input_sc=0.002, + output_zp=10, + output_sc=0.008, + in_dtype=in_dtype, + out_dtype=out_dtype, + ) + mod = tei.make_module(model, []) + x = tei.build_and_run(mod, inputs, 1, {}, npu=npu) + outputs.append(x) + + tei.verify(outputs, out_dtype, 1) + + +@requires_ethosn +def test_requantize_failure(): + input_sc = 0.8 + output_sc = (input_sc / 128) - 0.0001 + model = _get_model( + shape=(1, 52, 52, 3), + input_zp=0, + input_sc=input_sc, + output_zp=0, + output_sc=output_sc, + in_dtype="int8", + out_dtype="int8", + ) + model = tei.make_ethosn_composite(model, "ethos-n.qnn_requantize") + mod = tei.make_ethosn_partition(model) + tei.test_error(mod, {}, "Output scale must be bigger than input scale / 128") From 6f0c3637da740cd9d2a94c300abee66f2d7773e3 Mon Sep 17 00:00:00 2001 From: Nicola Lancellotti Date: Mon, 15 Aug 2022 15:54:33 +0100 Subject: [PATCH 2/2] Address comments --- src/relay/backend/contrib/ethosn/codegen.cc | 3 ++- .../backend/contrib/ethosn/ethosn_api.cc | 23 +++++++++++-------- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/src/relay/backend/contrib/ethosn/codegen.cc b/src/relay/backend/contrib/ethosn/codegen.cc index d74b4f829098..5ae580861aaa 100644 --- a/src/relay/backend/contrib/ethosn/codegen.cc +++ b/src/relay/backend/contrib/ethosn/codegen.cc @@ -952,7 +952,8 @@ TVM_REGISTER_GLOBAL("relay.ethos-n.support.requantize") char reason[kReasonMaxLength]; reason[0] = '\0'; *rv = !err && EthosnCompiler::GetSupported()->IsRequantizeSupported( - params.requantize_info, params.input_info, nullptr, reason, sizeof(reason)); + params.requantize_info, params.input_info, ¶ms.output_info, reason, + sizeof(reason)); err += EthosnError(reason); }); diff --git a/src/relay/backend/contrib/ethosn/ethosn_api.cc b/src/relay/backend/contrib/ethosn/ethosn_api.cc index 2a7e252f5720..2a9dd330e230 100644 --- a/src/relay/backend/contrib/ethosn/ethosn_api.cc +++ b/src/relay/backend/contrib/ethosn/ethosn_api.cc @@ -692,16 +692,21 @@ EthosnError EthosnAPI::Requantize(const Expr& expr, RequantizeParams* params) { err += AsConstant(call->args[3], &output_sc); err += AsConstant(call->args[4], &output_zp); - params->requantize_info = sl::RequantizeInfo(sl::QuantizationInfo(output_zp, output_sc)); - params->input_info = sl::TensorInfo(input_tensor_shape, input_data_type, sl::DataFormat::NHWC, - sl::QuantizationInfo(input_zp, input_sc)); + sl::QuantizationInfo input_q_info; + err += Tvm2Npu(input_zp, input_sc, &input_q_info); + params->input_info = + sl::TensorInfo(input_tensor_shape, input_data_type, sl::DataFormat::NHWC, input_q_info); - sl::QuantizationInfo output_q_info; - err += Tvm2Npu(output_zp, output_sc, &output_q_info); - sl::TensorInfo output_tensor_info; - err += Tvm2Npu(call->checked_type(), &output_tensor_info); - output_tensor_info.m_QuantizationInfo = output_q_info; - params->output_info = output_tensor_info; + sl::QuantizationInfo requantize_q_info; + err += Tvm2Npu(output_zp, output_sc, &requantize_q_info); + params->requantize_info = sl::RequantizeInfo(requantize_q_info); + + sl::TensorInfo output_info = params->input_info; + output_info.m_QuantizationInfo = params->requantize_info.m_OutputQuantizationInfo; + if (params->requantize_info.m_OutputDataType.has_value()) { + output_info.m_DataType = params->requantize_info.m_OutputDataType.value(); + } + params->output_info = output_info; return err; }