From d47aff4ec65b7b2386b8ecec0187e3e00ff847ce Mon Sep 17 00:00:00 2001 From: Ashutosh Parkhi Date: Fri, 30 Sep 2022 09:23:28 +0100 Subject: [PATCH 1/3] [CMSIS-NN] Support for int16 conv2d -Pattern matching and RelayToTIR introduce int16 support -Added new context buffer size APIs for int16 Conv2d -Added int16 variants to integration and buffer size tests Change-Id: I6083ea20b9125a9700a69a93c52c07eb463618b2 --- python/tvm/relay/op/contrib/cmsisnn.py | 29 +++- .../backend/contrib/cmsisnn/buffer_size.cc | 80 ++++++++++- .../backend/contrib/cmsisnn/buffer_size.h | 36 ++++- .../backend/contrib/cmsisnn/relay_to_tir.cc | 52 ++++--- .../backend/contrib/cmsisnn/tir_to_runtime.cc | 4 +- .../contrib/cmsisnn/buffer_size_test.cc | 86 +++++++++--- .../contrib/test_cmsisnn/test_conv2d.py | 130 +++++++++++------- tests/python/contrib/test_cmsisnn/utils.py | 65 +++++---- 8 files changed, 352 insertions(+), 130 deletions(-) diff --git a/python/tvm/relay/op/contrib/cmsisnn.py b/python/tvm/relay/op/contrib/cmsisnn.py index b887fafd7e00..1ec70195c9e0 100644 --- a/python/tvm/relay/op/contrib/cmsisnn.py +++ b/python/tvm/relay/op/contrib/cmsisnn.py @@ -121,11 +121,9 @@ def check_qnn_conv2d(pattern): requantize = pattern requantize_input = requantize.args[0] bias_add = None - bias_dtype = "int32" if str(requantize_input.op.name) == "nn.bias_add": bias_add = requantize_input conv2d = bias_add.args[0] - bias_dtype = bias_add.args[1].checked_type.dtype else: conv2d = requantize_input conv2d_input = conv2d.args[0] @@ -145,12 +143,29 @@ def check_qnn_conv2d(pattern): ): is_depthwise = True + # check if dtypes are supported for the following entities + # (input_dtype, weight_dtype, bias_dtype, out_dtype, pattern_dtype) + are_dtypes_valid = False + if bias_add: + bias_dtype = bias_add.args[1].checked_type.dtype + else: + bias_dtype = "int32" if conv2d_input.checked_type.dtype == "int8" else "int64" + valid_dtypes = None + if conv2d_input.checked_type.dtype == "int8": + valid_dtypes = ("int8", "int8", "int32", "int32", "int8") + elif conv2d_input.checked_type.dtype == "int16": + valid_dtypes = ("int16", "int8", "int64", "int64", "int16") + if ( + conv2d_input.checked_type.dtype, + conv2d_weight.checked_type.dtype, + bias_dtype, + conv2d.attrs.out_dtype, + pattern.checked_type.dtype, + ) == valid_dtypes: + are_dtypes_valid = True + ret = ( - conv2d.attrs.out_dtype == "int32" - and conv2d_input.checked_type.dtype == "int8" - and conv2d_weight.checked_type.dtype == "int8" - and pattern.checked_type.dtype == "int8" - and bias_dtype == "int32" + are_dtypes_valid and all([zp == 0 for zp in kernel_zp]) and (not is_depthwise or bias_add is not None) ) diff --git a/src/relay/backend/contrib/cmsisnn/buffer_size.cc b/src/relay/backend/contrib/cmsisnn/buffer_size.cc index 25f4d054e810..d5ac80cdfc26 100644 --- a/src/relay/backend/contrib/cmsisnn/buffer_size.cc +++ b/src/relay/backend/contrib/cmsisnn/buffer_size.cc @@ -29,10 +29,27 @@ namespace relay { namespace contrib { namespace cmsisnn { -int Conv2dBufferSize(Target target, int32_t padding_w, int32_t padding_h, int32_t input_n, - int32_t input_h, int32_t input_c, int32_t output_h, int32_t output_w, - int32_t stride_w, int32_t stride_h, int32_t dilation_w, int32_t dilation_h, - int32_t filter_w, int32_t filter_h) { +int Conv2dBufferSize(bool is_int16, Target target, int32_t padding_w, int32_t padding_h, + int32_t input_n, int32_t input_h, int32_t input_c, int32_t output_h, + int32_t output_w, int32_t stride_w, int32_t stride_h, int32_t dilation_w, + int32_t dilation_h, int32_t filter_w, int32_t filter_h) { + int size = -1; + if (is_int16) { + size = Conv2dBufferSizeInt16(target, padding_w, padding_h, input_n, input_h, input_c, output_h, + output_w, stride_w, stride_h, dilation_w, dilation_h, filter_w, + filter_h); + } else { + size = Conv2dBufferSizeInt8(target, padding_w, padding_h, input_n, input_h, input_c, output_h, + output_w, stride_w, stride_h, dilation_w, dilation_h, filter_w, + filter_h); + } + return size; +} + +int Conv2dBufferSizeInt8(Target target, int32_t padding_w, int32_t padding_h, int32_t input_n, + int32_t input_h, int32_t input_c, int32_t output_h, int32_t output_w, + int32_t stride_w, int32_t stride_h, int32_t dilation_w, int32_t dilation_h, + int32_t filter_w, int32_t filter_h) { bool is1x1 = (padding_w == 0) && (padding_h == 0) && (input_c % 4 == 0) && (stride_w == 1) && (stride_h == 1) && (filter_w == 1) && (filter_h == 1) && (dilation_w == 1) && (dilation_h == 1); @@ -62,9 +79,38 @@ int Conv2dBufferSize(Target target, int32_t padding_w, int32_t padding_h, int32_ return 0; } -int DepthwiseConv2dBufferSize(Target target, int32_t input_n, int32_t input_c, int32_t output_c, - int32_t filter_w, int32_t filter_h, int32_t dilation_w, - int32_t dilation_h) { +int Conv2dBufferSizeInt16(Target target, int32_t padding_w, int32_t padding_h, int32_t input_n, + int32_t input_h, int32_t input_c, int32_t output_h, int32_t output_w, + int32_t stride_w, int32_t stride_h, int32_t dilation_w, + int32_t dilation_h, int32_t filter_w, int32_t filter_h) { + bool has_mve = target->GetFeature("has_mve").value_or(Bool(false)); + bool has_dsp = target->GetFeature("has_dsp").value_or(Bool(false)); + + if (has_dsp && !has_mve) { + if ((filter_w * filter_h * input_c < 512) && dilation_w == 1 && dilation_h == 1) { + return (2 * input_c * filter_w * filter_h) * (int32_t)sizeof(int16_t); + } + } + return 0; +} + +int DepthwiseConv2dBufferSize(bool is_int16, Target target, int32_t input_n, int32_t input_c, + int32_t output_c, int32_t filter_w, int32_t filter_h, + int32_t dilation_w, int32_t dilation_h, int32_t depth_multiplier) { + int size = -1; + if (is_int16) { + size = DepthwiseConv2dBufferSizeInt16(target, input_n, input_c, output_c, filter_w, filter_h, + dilation_w, dilation_h, depth_multiplier); + } else { + size = DepthwiseConv2dBufferSizeInt8(target, input_n, input_c, output_c, filter_w, filter_h, + dilation_w, dilation_h, depth_multiplier); + } + return size; +} + +int DepthwiseConv2dBufferSizeInt8(Target target, int32_t input_n, int32_t input_c, int32_t output_c, + int32_t filter_w, int32_t filter_h, int32_t dilation_w, + int32_t dilation_h, int32_t depth_multiplier) { bool has_mve = target->GetFeature("has_mve").value_or(Bool(false)); bool has_dsp = target->GetFeature("has_dsp").value_or(Bool(false)); @@ -78,6 +124,26 @@ int DepthwiseConv2dBufferSize(Target target, int32_t input_n, int32_t input_c, i return 0; } +int DepthwiseConv2dBufferSizeInt16(Target target, int32_t input_n, int32_t input_c, + int32_t output_c, int32_t filter_w, int32_t filter_h, + int32_t dilation_w, int32_t dilation_h, + int32_t depth_multiplier) { + bool has_mve = target->GetFeature("has_mve").value_or(Bool(false)); + bool has_dsp = target->GetFeature("has_dsp").value_or(Bool(false)); + + if (depth_multiplier == 1 && dilation_w == 1 && dilation_h == 1 && + filter_w * filter_h * input_c < 512) { + if (has_dsp) { + if (has_mve) { + return 4 * input_c * filter_w * filter_h * (int32_t)sizeof(int16_t) + 8; + } else { + return input_c * filter_w * filter_h * (int32_t)sizeof(int16_t); + } + } + } + return 0; +} + int AvgPoolBufferSize(Target target, int32_t input_c) { bool has_mve = target->GetFeature("has_mve").value_or(Bool(false)); bool has_dsp = target->GetFeature("has_dsp").value_or(Bool(false)); diff --git a/src/relay/backend/contrib/cmsisnn/buffer_size.h b/src/relay/backend/contrib/cmsisnn/buffer_size.h index 9dae17c0a220..5cf8c309cc5e 100644 --- a/src/relay/backend/contrib/cmsisnn/buffer_size.h +++ b/src/relay/backend/contrib/cmsisnn/buffer_size.h @@ -41,6 +41,7 @@ namespace cmsisnn { * See: * https://github.com/ARM-software/CMSIS_5/blob/8c60448c0e1e50e426180b26db9bc31ddf774361/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_wrapper_s8.c#L108-L127 * + * \param is_int16 - type of conv2d * \param target - CMSIS-NN Target * \param padding_w - Width padding * \param padding_h - Height padding @@ -56,16 +57,27 @@ namespace cmsisnn { * * \return Size of buffer to allocate for convolution */ -int Conv2dBufferSize(Target target, int32_t padding_w, int32_t padding_h, int32_t input_n, - int32_t input_h, int32_t input_c, int32_t output_h, int32_t output_w, - int32_t stride_w, int32_t stride_h, int32_t dilation_w, int32_t dilation_h, - int32_t filter_w, int32_t filter_h); +int Conv2dBufferSize(bool is_int16, Target target, int32_t padding_w, int32_t padding_h, + int32_t input_n, int32_t input_h, int32_t input_c, int32_t output_h, + int32_t output_w, int32_t stride_w, int32_t stride_h, int32_t dilation_w, + int32_t dilation_h, int32_t filter_w, int32_t filter_h); + +int Conv2dBufferSizeInt8(Target target, int32_t padding_w, int32_t padding_h, int32_t input_n, + int32_t input_h, int32_t input_c, int32_t output_h, int32_t output_w, + int32_t stride_w, int32_t stride_h, int32_t dilation_w, int32_t dilation_h, + int32_t filter_w, int32_t filter_h); + +int Conv2dBufferSizeInt16(Target target, int32_t padding_w, int32_t padding_h, int32_t input_n, + int32_t input_h, int32_t input_c, int32_t output_h, int32_t output_w, + int32_t stride_w, int32_t stride_h, int32_t dilation_w, + int32_t dilation_h, int32_t filter_w, int32_t filter_h); /*! * \brief Calculates the appropriate buffer size for CMSIS-NN Depthwise Convolutions * See: * https://github.com/ARM-software/CMSIS_5/blob/325443e52637b6c7eedbd160d238a6c462e89c9f/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_wrapper_s8.c#L115-L129 * + * \param is_int16 - type of conv2d * \param target - CMSIS-NN Target * \param input_n - Input batch size * \param input_c - Input channels @@ -74,12 +86,22 @@ int Conv2dBufferSize(Target target, int32_t padding_w, int32_t padding_h, int32_ * \param filter_h - Filter height * \param dilation_w - Dilation width * \param dilation_h - Dilation height + * \param depth_multiplier - Depth Multiplier for Depthwise Convolution * * \return Size of buffer to allocate for depthwise convolution */ -int DepthwiseConv2dBufferSize(Target target, int32_t input_n, int32_t input_c, int32_t output_c, - int32_t filter_w, int32_t filter_h, int32_t dilation_w, - int32_t dilation_h); +int DepthwiseConv2dBufferSize(bool is_int16, Target target, int32_t input_n, int32_t input_c, + int32_t output_c, int32_t filter_w, int32_t filter_h, + int32_t dilation_w, int32_t dilation_h, int32_t depth_multiplier); + +int DepthwiseConv2dBufferSizeInt8(Target target, int32_t input_n, int32_t input_c, int32_t output_c, + int32_t filter_w, int32_t filter_h, int32_t dilation_w, + int32_t dilation_h, int32_t depth_multiplier); + +int DepthwiseConv2dBufferSizeInt16(Target target, int32_t input_n, int32_t input_c, + int32_t output_c, int32_t filter_w, int32_t filter_h, + int32_t dilation_w, int32_t dilation_h, + int32_t depth_multiplier); /*! * \brief Calculates the appropriate buffer size for CMSIS-NN Average Pooling diff --git a/src/relay/backend/contrib/cmsisnn/relay_to_tir.cc b/src/relay/backend/contrib/cmsisnn/relay_to_tir.cc index a5cdfd570fea..da51e6b762dd 100644 --- a/src/relay/backend/contrib/cmsisnn/relay_to_tir.cc +++ b/src/relay/backend/contrib/cmsisnn/relay_to_tir.cc @@ -93,17 +93,17 @@ class RelayToTIRVisitor : public MixedModeMutator { const Map& buffer_map, tvm::Array call_extern_args, PrimExpr context_buffer_var = PrimExpr(), - int context_buffer_size = 0) { + int context_buffer_size = 0, int num_bits = 8) { Map dict_attrs; dict_attrs.Set(tvm::attr::kGlobalSymbol, global_var->name_hint); dict_attrs.Set(tvm::attr::kTarget, target_); dict_attrs.Set("tir.noalias", Bool(true)); tir::Stmt body = tir::Evaluate( - tvm::tir::Call(DataType::Int(8), tir::builtin::call_extern(), call_extern_args)); + tvm::tir::Call(DataType::Int(num_bits), tir::builtin::call_extern(), call_extern_args)); if (context_buffer_size) { - body = tir::Allocate(Downcast(context_buffer_var), DataType::Int(8), + body = tir::Allocate(Downcast(context_buffer_var), DataType::Int(num_bits), {context_buffer_size}, tir::const_true(), body); } @@ -133,6 +133,22 @@ class RelayToTIRVisitor : public MixedModeMutator { } else { conv2d_call = requantize_input; } + int32_t dtype_bits = conv2d_call->args[0]->type_as()->dtype.bits(); + + // Determine bitwidth of buffers based on input dtype + int32_t input_bits = 8; + int32_t filter_bits = 8; + int32_t bias_bits = 32; + int32_t output_bits = 8; + int32_t context_buffer_bits = 8; + bool is_int16 = false; + if (dtype_bits == 16) { + is_int16 = true; + input_bits = 16; + bias_bits = 64; + output_bits = 16; + context_buffer_bits = 16; + } // TIR variables are created in the order they appear in the Relay partitioned function // %1 = qnn.conv2d(%input, %weight_const_0, input_zero_point_scalar, @@ -145,14 +161,14 @@ class RelayToTIRVisitor : public MixedModeMutator { const int filter_scale_pos = 3; const int input_scale_pos = bias_add_call ? 5 : 4; BufferCreator buffer_creator; - tir::Var input = buffer_creator.CreateBufferVar("input", DataType::Handle(8)); - tir::Var filter = buffer_creator.CreateBufferVar("filter", DataType::Handle(8)); + tir::Var input = buffer_creator.CreateBufferVar("input", DataType::Handle(input_bits)); + tir::Var filter = buffer_creator.CreateBufferVar("filter", DataType::Handle(filter_bits)); tir::Var multiplier = buffer_creator.CreateBufferVar("multiplier", DataType::Handle(32)); if (bias_add_call) { - buffer_creator.CreateBufferVar("bias", DataType::Handle(32)); + buffer_creator.CreateBufferVar("bias", DataType::Handle(bias_bits)); } tir::Var shift = buffer_creator.CreateBufferVar("shift", DataType::Handle(32)); - tir::Var output = buffer_creator.CreateBufferVar("output", DataType::Handle(8)); + tir::Var output = buffer_creator.CreateBufferVar("output", DataType::Handle(output_bits)); // Relay function contains input_scale and filter_scale as function parameters at the following // locations in the global partitioned function for Conv2D @@ -217,10 +233,10 @@ class RelayToTIRVisitor : public MixedModeMutator { scalar_args.push_back(ToArg(depth_multiplier)); // original filter_layout for depthwise is HWOI - std::string cmsisnn_api = "arm_convolve_wrapper_s8"; + std::string cmsisnn_api = is_int16 ? "arm_convolve_wrapper_s16" : "arm_convolve_wrapper_s8"; bool is_depthwise = depth_multiplier != -1; if (is_depthwise) { - cmsisnn_api = "arm_depthwise_conv_wrapper_s8"; + cmsisnn_api = is_int16 ? "arm_depthwise_conv_wrapper_s16" : "arm_depthwise_conv_wrapper_s8"; int filter_pos_h = kernel_layout.find("H"); int filter_pos_w = kernel_layout.find("W"); Array depthwise_filter_shape{1, filter_shape[filter_pos_h], @@ -242,18 +258,20 @@ class RelayToTIRVisitor : public MixedModeMutator { Target target = CreateTarget(transform::PassContext::Current()); size_t context_buffer_size; if (is_depthwise) { - context_buffer_size = DepthwiseConv2dBufferSize(target, input_n, input_c, output_c, filter_w, - filter_h, dilation_w, dilation_h); + context_buffer_size = + DepthwiseConv2dBufferSize(is_int16, target, input_n, input_c, output_c, filter_w, + filter_h, dilation_w, dilation_h, depth_multiplier); } else { - context_buffer_size = Conv2dBufferSize(target, padding_w, padding_h, input_n, input_h, - input_c, output_h, output_w, stride_w, stride_h, - dilation_w, dilation_h, filter_w, filter_h); + context_buffer_size = Conv2dBufferSize(is_int16, target, padding_w, padding_h, input_n, + input_h, input_c, output_h, output_w, stride_w, + stride_h, dilation_w, dilation_h, filter_w, filter_h); } if (context_buffer_size) { String context_buffer_name = "context_buffer_" + std::to_string(context_buffer_id_++); - context_buffer_var = tir::Var(context_buffer_name, - PointerType(PrimType(DataType::Int(8)), "global.workspace")); + context_buffer_var = + tir::Var(context_buffer_name, + PointerType(PrimType(DataType::Int(context_buffer_bits)), "global.workspace")); } tvm::Array context_buffer_args = {context_buffer_var, ToArg(context_buffer_size)}; @@ -266,7 +284,7 @@ class RelayToTIRVisitor : public MixedModeMutator { CreatePrimFuncForExtern(global_var, buffer_creator.GetPrimFuncParams(), buffer_creator.GetBufferMap(), call_ext_args, context_buffer_var, - context_buffer_size); + context_buffer_size, context_buffer_bits); } void EmitFullyConnected(const GlobalVar& global_var, const Expr& expr) { diff --git a/src/relay/backend/contrib/cmsisnn/tir_to_runtime.cc b/src/relay/backend/contrib/cmsisnn/tir_to_runtime.cc index 50fa3821b7fa..ae9f195ca509 100644 --- a/src/relay/backend/contrib/cmsisnn/tir_to_runtime.cc +++ b/src/relay/backend/contrib/cmsisnn/tir_to_runtime.cc @@ -111,7 +111,9 @@ class CodeGenCMSISNN : public codegen::CodeGenCHost { cmsis_func_name == "arm_elementwise_add_s8") { CodeGenC::VisitExpr_(op, os); } else if (cmsis_func_name == "arm_convolve_wrapper_s8" || - cmsis_func_name == "arm_depthwise_conv_wrapper_s8") { + cmsis_func_name == "arm_convolve_wrapper_s16" || + cmsis_func_name == "arm_depthwise_conv_wrapper_s8" || + cmsis_func_name == "arm_depthwise_conv_wrapper_s16") { EmitConv2D(op); } else if (cmsis_func_name == "arm_fully_connected_s8") { EmitFullyConnected(op); diff --git a/tests/cpp/relay/backend/contrib/cmsisnn/buffer_size_test.cc b/tests/cpp/relay/backend/contrib/cmsisnn/buffer_size_test.cc index d8870fa71525..2094b70eb872 100644 --- a/tests/cpp/relay/backend/contrib/cmsisnn/buffer_size_test.cc +++ b/tests/cpp/relay/backend/contrib/cmsisnn/buffer_size_test.cc @@ -46,10 +46,10 @@ static const Target kNoExt("cmsis-nn -mcpu=cortex-m55 -mattr=+nodsp,+nomve"); class CMSISNNCalculatedBufferSize : public testing::TestWithParam> {}; -TEST(CMSISNNConv2dBufferSize, Conv1x1) { +TEST(CMSISNNConv2dBufferSizeInt8, Conv1x1) { int32_t any = fake_parameters(gen); auto conv2d_1x1 = [=](Target target, int32_t input_c) { - return Conv2dBufferSize(target, 0, 0, any, any, input_c, any, any, 1, 1, 1, 1, 1, 1); + return Conv2dBufferSizeInt8(target, 0, 0, any, any, input_c, any, any, 1, 1, 1, 1, 1, 1); }; ASSERT_EQ(conv2d_1x1(kNoExt, 4), 0); @@ -71,7 +71,7 @@ TEST(CMSISNNConv2dBufferSize, Conv1x1) { ASSERT_EQ(conv2d_1x1(kHasMVE, 32), 0); } -TEST(CMSISNNConv2dBufferSize, Conv1xN) { +TEST(CMSISNNConv2dBufferSizeInt8, Conv1xN) { int32_t any = fake_parameters(gen); int32_t input_c = fake_parameters(gen); int32_t filter_w = fake_parameters(gen); @@ -79,8 +79,8 @@ TEST(CMSISNNConv2dBufferSize, Conv1xN) { int32_t calculated_buffer = (2 * input_c * filter_w * filter_h) * (int32_t)sizeof(int16_t); auto conv2d_1xn = [=](Target target, int32_t output_w) { - return Conv2dBufferSize(target, any, any, 1, 1, input_c, 1, output_w, any, any, 1, 1, filter_w, - filter_h); + return Conv2dBufferSizeInt8(target, any, any, 1, 1, input_c, 1, output_w, any, any, 1, 1, + filter_w, filter_h); }; ASSERT_EQ(conv2d_1xn(kNoExt, 4), calculated_buffer); @@ -102,7 +102,7 @@ TEST(CMSISNNConv2dBufferSize, Conv1xN) { ASSERT_EQ(conv2d_1xn(kHasMVE, 32), 0); } -TEST(CMSISNNConv2dBufferSize, Default) { +TEST(CMSISNNConv2dBufferSizeInt8, Default) { int32_t any = fake_parameters(gen); int32_t input_c = fake_parameters(gen); @@ -114,8 +114,8 @@ TEST(CMSISNNConv2dBufferSize, Default) { int32_t calculated_buffer_mve = 4 * col_length * 8 * (int32_t)sizeof(int8_t); auto conv2d = [=](Target target, int32_t output_w) { - return Conv2dBufferSize(target, any, any, 1, 1, input_c, 1, output_w, any, any, any, any, - filter_w, filter_h); + return Conv2dBufferSizeInt8(target, any, any, 1, 1, input_c, 1, output_w, any, any, any, any, + filter_w, filter_h); }; ASSERT_EQ(conv2d(kNoExt, 4), calculated_buffer); @@ -137,13 +137,39 @@ TEST(CMSISNNConv2dBufferSize, Default) { ASSERT_EQ(conv2d(kHasMVE, 32), calculated_buffer_mve); } -TEST(CMSISNNDepthwiseConv2dBufferSize, UnEvenChannels) { +TEST(CMSISNNConv2dBufferSizeInt16, Default) { + int32_t any = fake_parameters(gen); + + auto conv2d_int16_buffer = [=](Target target, int32_t input_c, int32_t filter_w, + int32_t filter_h) { + return Conv2dBufferSizeInt16(target, any, any, 1, 1, input_c, any, any, any, any, 1, 1, + filter_w, filter_h); + }; + + auto calculated_buffer = [=](int32_t input_c, int32_t filter_w, int32_t filter_h) { + return (2 * input_c * filter_w * filter_h) * (int32_t)sizeof(int16_t); + }; + + ASSERT_EQ(conv2d_int16_buffer(kNoExt, 3, 5, 5), 0); + ASSERT_EQ(conv2d_int16_buffer(kNoExt, 32, 3, 3), 0); + + ASSERT_EQ(conv2d_int16_buffer(kHasDSP, 3, 3, 3), calculated_buffer(3, 3, 3)); + ASSERT_EQ(conv2d_int16_buffer(kHasDSP, 12, 5, 5), calculated_buffer(12, 5, 5)); + ASSERT_EQ(conv2d_int16_buffer(kHasDSP, 24, 5, 5), 0); + + ASSERT_EQ(conv2d_int16_buffer(kHasMVE, 3, 3, 3), 0); + ASSERT_EQ(conv2d_int16_buffer(kHasMVE, 12, 5, 5), 0); + ASSERT_EQ(conv2d_int16_buffer(kHasMVE, 24, 5, 5), 0); +} + +TEST(CMSISNNDepthwiseConv2dBufferSizeInt8, UnEvenChannels) { int32_t filter_w = fake_parameters(gen); int32_t filter_h = fake_parameters(gen); int32_t input_n = 1; auto depthwise_conv2d_with_channels = [=](Target target, int32_t input_c, int32_t output_c) { - return DepthwiseConv2dBufferSize(target, input_n, input_c, output_c, filter_w, filter_h, 1, 1); + return DepthwiseConv2dBufferSizeInt8(target, input_n, input_c, output_c, filter_w, filter_h, 1, + 1, 1); }; ASSERT_EQ(depthwise_conv2d_with_channels(kNoExt, 4, 6), 0); @@ -154,14 +180,14 @@ TEST(CMSISNNDepthwiseConv2dBufferSize, UnEvenChannels) { ASSERT_EQ(depthwise_conv2d_with_channels(kHasMVE, 8, 7), 0); } -TEST(CMSISNNDepthwiseConv2dBufferSize, MultipleBatches) { +TEST(CMSISNNDepthwiseConv2dBufferSizeInt8, MultipleBatches) { int32_t input_output_c = fake_parameters(gen); int32_t filter_w = fake_parameters(gen); int32_t filter_h = fake_parameters(gen); auto depthwise_conv2d_with_batch = [=](Target target, int32_t input_n) { - return DepthwiseConv2dBufferSize(target, input_n, input_output_c, input_output_c, filter_w, - filter_h, 1, 1); + return DepthwiseConv2dBufferSizeInt8(target, input_n, input_output_c, input_output_c, filter_w, + filter_h, 1, 1, 1); }; ASSERT_EQ(depthwise_conv2d_with_batch(kNoExt, 4), 0); @@ -172,7 +198,7 @@ TEST(CMSISNNDepthwiseConv2dBufferSize, MultipleBatches) { ASSERT_EQ(depthwise_conv2d_with_batch(kHasMVE, 7), 0); } -TEST(CMSISNNDepthwiseConv2dBufferSize, Default) { +TEST(CMSISNNDepthwiseConv2dBufferSizeInt8, Default) { int32_t input_output_c = fake_parameters(gen); int32_t filter_w = fake_parameters(gen); int32_t filter_h = fake_parameters(gen); @@ -183,8 +209,8 @@ TEST(CMSISNNDepthwiseConv2dBufferSize, Default) { int32_t dsp_calculated_buffer = (input_output_c * filter_w * filter_h) * (int32_t)sizeof(int16_t); auto depthwise_conv2d = [=](Target target) { - return DepthwiseConv2dBufferSize(target, input_n, input_output_c, input_output_c, filter_w, - filter_h, 1, 1); + return DepthwiseConv2dBufferSizeInt8(target, input_n, input_output_c, input_output_c, filter_w, + filter_h, 1, 1, 1); }; ASSERT_EQ(depthwise_conv2d(kNoExt), 0); @@ -195,6 +221,34 @@ TEST(CMSISNNDepthwiseConv2dBufferSize, Default) { ASSERT_EQ(depthwise_conv2d(kHasMVE), mve_calculated_buffer); } +TEST(CMSISNNDepthwiseConv2dBufferSizeInt16, Default) { + int32_t any = fake_parameters(gen); + + auto depthwise_int16_buffer = [=](Target target, int32_t input_c, int32_t filter_w, + int32_t filter_h) { + return DepthwiseConv2dBufferSizeInt16(target, any, input_c, any, filter_w, filter_h, 1, 1, 1); + }; + + auto dsp_only_buffer = [=](int32_t input_c, int32_t filter_w, int32_t filter_h) { + return (input_c * filter_w * filter_h) * (int32_t)sizeof(int16_t); + }; + + auto dsp_mve_buffer = [=](int32_t input_c, int32_t filter_w, int32_t filter_h) { + return (4 * input_c * filter_w * filter_h) * (int32_t)sizeof(int16_t) + 8; + }; + + ASSERT_EQ(depthwise_int16_buffer(kNoExt, 3, 5, 5), 0); + ASSERT_EQ(depthwise_int16_buffer(kNoExt, 32, 3, 3), 0); + + ASSERT_EQ(depthwise_int16_buffer(kHasDSP, 3, 3, 3), dsp_only_buffer(3, 3, 3)); + ASSERT_EQ(depthwise_int16_buffer(kHasDSP, 12, 5, 5), dsp_only_buffer(12, 5, 5)); + ASSERT_EQ(depthwise_int16_buffer(kHasDSP, 24, 5, 5), 0); + + ASSERT_EQ(depthwise_int16_buffer(kHasMVE, 3, 3, 3), dsp_mve_buffer(3, 3, 3)); + ASSERT_EQ(depthwise_int16_buffer(kHasMVE, 12, 5, 5), dsp_mve_buffer(12, 5, 5)); + ASSERT_EQ(depthwise_int16_buffer(kHasMVE, 24, 5, 5), 0); +} + TEST(CMSISNNAvgPoolBufferSize, Default) { int32_t input_c = fake_parameters(gen); int32_t calculated_buffer = (input_c * sizeof(int32_t)); diff --git a/tests/python/contrib/test_cmsisnn/test_conv2d.py b/tests/python/contrib/test_cmsisnn/test_conv2d.py index d33d71261613..9ff55c952c88 100644 --- a/tests/python/contrib/test_cmsisnn/test_conv2d.py +++ b/tests/python/contrib/test_cmsisnn/test_conv2d.py @@ -36,6 +36,7 @@ get_range_for_dtype_str, get_same_padding, get_conv2d_qnn_params, + get_kernel_bias_dtype, make_qnn_relu, assert_partitioned_function, assert_no_external_function, @@ -59,8 +60,9 @@ def make_model( groups, dtype, kernel_dtype, + bias_dtype, out_channels, - weight_format, + kernel_layout, enable_bias, relu_type, input_op=None, @@ -71,8 +73,8 @@ def make_model( else: op = relay.var("input", shape=shape, dtype=dtype) - h_index = weight_format.index("H") - w_index = weight_format.index("W") + h_index = kernel_layout.index("H") + w_index = kernel_layout.index("W") kernel_h = kernel_shape[h_index] kernel_w = kernel_shape[w_index] p = (0, 0, 0, 0) @@ -80,7 +82,7 @@ def make_model( p = get_same_padding((shape[1], shape[2]), (kernel_h, kernel_w), dilation, strides) rng = np.random.default_rng(12321) - weight = tvm.nd.array( + kernel = tvm.nd.array( rng.integers( np.iinfo(kernel_dtype).min, high=np.iinfo(kernel_dtype).max, @@ -88,27 +90,27 @@ def make_model( dtype=kernel_dtype, ) ) - weight_const = relay.const(weight, kernel_dtype) + kernel_const = relay.const(kernel, kernel_dtype) conv2d_kernel_sc = kernel_scale[0] if out_channels == 1 else kernel_scale conv = relay.qnn.op.conv2d( op, - weight_const, + kernel_const, input_zero_point=relay.const(input_zero_point, "int32"), kernel_zero_point=relay.const(kernel_zero_point, "int32"), input_scale=relay.const(input_scale, "float32"), kernel_scale=relay.const(conv2d_kernel_sc, "float32"), kernel_size=(kernel_h, kernel_w), data_layout="NHWC", - kernel_layout=weight_format, + kernel_layout=kernel_layout, dilation=dilation, strides=strides, groups=groups, channels=out_channels, padding=p, - out_dtype="int32", + out_dtype=bias_dtype, ) - bias = tvm.nd.array(rng.integers(0, high=10, size=(out_channels,), dtype="int32")) - bias_const = relay.const(bias, "int32") + bias = tvm.nd.array(rng.integers(0, high=10, size=(out_channels,), dtype=bias_dtype)) + bias_const = relay.const(bias, bias_dtype) last_op = relay.nn.bias_add(conv, bias_const, axis=3) if enable_bias else conv requant_input_sc = [sc * input_scale for sc in kernel_scale] requant_input_sc = requant_input_sc[0] if out_channels == 1 else requant_input_sc @@ -121,7 +123,7 @@ def make_model( out_dtype=dtype, ) last_op = make_qnn_relu(last_op, relu_type, output_scale, output_zero_point, dtype) - params = {"w": weight, "b": bias} + params = {"w": kernel, "b": bias} return last_op, params @@ -150,7 +152,7 @@ def test_conv2d_number_primfunc_args( dilation = (1, 1) dtype = "int8" groups = 1 - weight_format = "HWIO" + kernel_layout = "HWIO" kernel_h = kernel_size[0] kernel_w = kernel_size[1] kernel_shape = (kernel_h, kernel_w, ifm_shape[3] // groups, out_channels) @@ -158,6 +160,8 @@ def test_conv2d_number_primfunc_args( in_min, in_max = get_range_for_dtype_str(dtype) relu_type = "RELU" + kernel_dtype, bias_dtype = get_kernel_bias_dtype(dtype) + output_scale, output_zero_point = get_conv2d_qnn_params( kernel_shape, input_scale, @@ -165,7 +169,7 @@ def test_conv2d_number_primfunc_args( kernel_scale, kernel_zero_point, input_dtype=dtype, - weights_dtype=dtype, + kernel_dtype=kernel_dtype, output_dtype=dtype, ) @@ -183,9 +187,10 @@ def test_conv2d_number_primfunc_args( dilation, groups, dtype, - dtype, + kernel_dtype, + bias_dtype, out_channels, - weight_format, + kernel_layout, enable_bias, relu_type, ) @@ -220,6 +225,7 @@ def test_conv2d_number_primfunc_args( @tvm.testing.requires_cmsisnn +@pytest.mark.parametrize("dtype", ["int8", "int16"]) @pytest.mark.parametrize("padding", ["SAME", "VALID"]) @pytest.mark.parametrize("relu_type", ["RELU"]) @pytest.mark.parametrize("enable_bias", [True, False]) @@ -230,7 +236,8 @@ def test_conv2d_number_primfunc_args( @pytest.mark.parametrize( "compiler_cpu, cpu_flags", [("cortex-m55", "+nomve"), ("cortex-m55", ""), ("cortex-m7", "")] ) -def test_conv2d_symmetric_padding_int8( +def test_conv2d_symmetric_padding( + dtype, padding, enable_bias, relu_type, @@ -249,15 +256,17 @@ def test_conv2d_symmetric_padding_int8( kernel_size = (3, 3) strides = (1, 1) dilation = (1, 1) - dtype = "int8" groups = 1 - weight_format = "HWIO" + input_zero_point = input_zero_point if dtype == "int8" else 0 + kernel_layout = "HWIO" kernel_h = kernel_size[0] kernel_w = kernel_size[1] kernel_shape = (kernel_h, kernel_w, ifm_shape[3] // groups, out_channels) kernel_zero_point = 0 in_min, in_max = get_range_for_dtype_str(dtype) + kernel_dtype, bias_dtype = get_kernel_bias_dtype(dtype) + output_scale, output_zero_point = get_conv2d_qnn_params( kernel_shape, input_scale, @@ -265,7 +274,7 @@ def test_conv2d_symmetric_padding_int8( kernel_scale, kernel_zero_point, input_dtype=dtype, - weights_dtype=dtype, + kernel_dtype=kernel_dtype, output_dtype=dtype, ) @@ -283,9 +292,10 @@ def test_conv2d_symmetric_padding_int8( dilation, groups, dtype, - dtype, + kernel_dtype, + bias_dtype, out_channels, - weight_format, + kernel_layout, enable_bias, relu_type, ) @@ -321,7 +331,7 @@ def test_conv2d_symmetric_padding_int8( "input_zero_point, input_scale, kernel_scale, out_channels", [(10, 0.0128, [0.11, 0.22], 2), (-64, 1, [1, 0.0256, 1.37], 3)], ) -def test_conv2d_asymmetric_padding_int8( +def test_conv2d_asymmetric_padding( padding, enable_bias, relu_type, @@ -335,19 +345,22 @@ def test_conv2d_asymmetric_padding_int8( use_unpacked_api = True test_runner = AOT_USMP_CORSTONE300_RUNNER + dtype = "int8" ifm_shape = (1, 25, 25, 12) kernel_size = (5, 5) strides = (2, 2) dilation = (1, 1) - dtype = "int8" groups = 1 - weight_format = "HWIO" + input_zero_point = input_zero_point if dtype == "int8" else 0 + kernel_layout = "HWIO" kernel_h = kernel_size[0] kernel_w = kernel_size[1] kernel_shape = (kernel_h, kernel_w, ifm_shape[3] // groups, out_channels) kernel_zero_point = 0 in_min, in_max = get_range_for_dtype_str(dtype) + kernel_dtype, bias_dtype = get_kernel_bias_dtype(dtype) + output_scale, output_zero_point = get_conv2d_qnn_params( kernel_shape, input_scale, @@ -355,7 +368,7 @@ def test_conv2d_asymmetric_padding_int8( kernel_scale, kernel_zero_point, input_dtype=dtype, - weights_dtype=dtype, + kernel_dtype=kernel_dtype, output_dtype=dtype, ) @@ -373,9 +386,10 @@ def test_conv2d_asymmetric_padding_int8( dilation, groups, dtype, - dtype, + kernel_dtype, + bias_dtype, out_channels, - weight_format, + kernel_layout, enable_bias, relu_type, ) @@ -434,13 +448,14 @@ def test_pad_conv2d_fusion_int8( kernel_scale = [0.11, 0.22] out_channels = 2 groups = 1 - weight_format = "HWIO" + kernel_layout = "HWIO" kernel_h = kernel_size[0] kernel_w = kernel_size[1] kernel_shape = (kernel_h, kernel_w, ifm_shape[3] // groups, out_channels) kernel_zero_point = 0 in_min, in_max = get_range_for_dtype_str(dtype) + kernel_dtype, bias_dtype = get_kernel_bias_dtype(dtype) output_scale, output_zero_point = get_conv2d_qnn_params( kernel_shape, input_scale, @@ -448,7 +463,7 @@ def test_pad_conv2d_fusion_int8( kernel_scale, kernel_zero_point, input_dtype=dtype, - weights_dtype=dtype, + kernel_dtype=kernel_dtype, output_dtype=dtype, ) @@ -474,9 +489,10 @@ def test_pad_conv2d_fusion_int8( dilation, groups, dtype, - dtype, + kernel_dtype, + bias_dtype, out_channels, - weight_format, + kernel_layout, enable_bias, relu_type, input_op=pad, @@ -545,13 +561,15 @@ def test_invalid_pad_conv2d_fusion_int8( kernel_scale = [0.11, 0.22] out_channels = 2 groups = 1 - weight_format = "HWIO" + kernel_layout = "HWIO" kernel_h = kernel_size[0] kernel_w = kernel_size[1] kernel_shape = (kernel_h, kernel_w, ifm_shape[3] // groups, out_channels) kernel_zero_point = 0 in_min, in_max = get_range_for_dtype_str(dtype) + kernel_dtype, bias_dtype = get_kernel_bias_dtype(dtype) + output_scale, output_zero_point = get_conv2d_qnn_params( kernel_shape, input_scale, @@ -559,7 +577,7 @@ def test_invalid_pad_conv2d_fusion_int8( kernel_scale, kernel_zero_point, input_dtype=dtype, - weights_dtype=dtype, + kernel_dtype=kernel_dtype, output_dtype=dtype, ) @@ -585,9 +603,10 @@ def test_invalid_pad_conv2d_fusion_int8( dilation, groups, dtype, - dtype, + kernel_dtype, + bias_dtype, out_channels, - weight_format, + kernel_layout, enable_bias, relu_type, input_op=pad, @@ -675,6 +694,7 @@ def test_conv2d_int8_tflite(ifm_shape, kernel_shape, strides, dilation, padding, @tvm.testing.requires_cmsisnn +@pytest.mark.parametrize("dtype", ["int8", "int16"]) @pytest.mark.parametrize("ifm_shape", [(1, 28, 28, 12), (1, 64, 100, 4)]) @pytest.mark.parametrize("kernel_size", [(3, 3)]) @pytest.mark.parametrize("padding", ["SAME", "VALID"]) @@ -691,7 +711,8 @@ def test_conv2d_int8_tflite(ifm_shape, kernel_shape, strides, dilation, padding, @pytest.mark.parametrize( "compiler_cpu, cpu_flags", [("cortex-m55", "+nomve"), ("cortex-m55", ""), ("cortex-m7", "")] ) -def test_depthwise_int8( +def test_depthwise( + dtype, ifm_shape, kernel_size, padding, @@ -711,9 +732,9 @@ def test_depthwise_int8( interface_api = "c" use_unpacked_api = True - dtype = "int8" groups = 1 - weight_format = "HWIO" + input_zero_point = input_zero_point if dtype == "int8" else 0 + kernel_layout = "HWIO" kernel_h = kernel_size[0] kernel_w = kernel_size[1] kernel_shape = (kernel_h, kernel_w, ifm_shape[3] // groups, out_channels) @@ -721,12 +742,14 @@ def test_depthwise_int8( in_min, in_max = get_range_for_dtype_str(dtype) groups = ifm_shape[3] - weight_format = "HWOI" + kernel_layout = "HWOI" kernel_shape = (kernel_h, kernel_w, ifm_shape[3], depth_multiplier) out_channels = ifm_shape[3] * depth_multiplier ks_len = len(kernel_scale) kernel_scale = [kernel_scale[i % ks_len] for i in range(out_channels)] + kernel_dtype, bias_dtype = get_kernel_bias_dtype(dtype) + output_scale, output_zero_point = get_conv2d_qnn_params( kernel_shape, input_scale, @@ -734,7 +757,7 @@ def test_depthwise_int8( kernel_scale, kernel_zero_point, input_dtype=dtype, - weights_dtype=dtype, + kernel_dtype=kernel_dtype, output_dtype=dtype, is_depthwise=True, ) @@ -753,9 +776,10 @@ def test_depthwise_int8( dilation, groups, dtype, - dtype, + kernel_dtype, + bias_dtype, out_channels, - weight_format, + kernel_layout, enable_bias, relu_type, ) @@ -823,7 +847,8 @@ def test_relay_conv2d_cmsisnn_depthwise_int8( ifm_shape = (1, 24, 24, 1) groups = ifm_shape[3] - weight_format = "HWIO" + input_zero_point = input_zero_point if dtype == "int8" else 0 + kernel_layout = "HWIO" (kernel_h, kernel_w) = (3, 3) kernel_shape = (kernel_h, kernel_w, ifm_shape[3], depth_multiplier) out_channels = ifm_shape[3] * depth_multiplier @@ -832,6 +857,8 @@ def test_relay_conv2d_cmsisnn_depthwise_int8( kernel_zero_point = 0 kernel_scale = [kernel_scale[i % ks_len] for i in range(out_channels)] + kernel_dtype, bias_dtype = get_kernel_bias_dtype(dtype) + output_scale, output_zero_point = get_conv2d_qnn_params( kernel_shape, input_scale, @@ -839,7 +866,7 @@ def test_relay_conv2d_cmsisnn_depthwise_int8( kernel_scale, kernel_zero_point, input_dtype=dtype, - weights_dtype=dtype, + kernel_dtype=kernel_dtype, output_dtype=dtype, is_depthwise=True, ) @@ -858,9 +885,10 @@ def test_relay_conv2d_cmsisnn_depthwise_int8( dilation, groups, dtype, - dtype, + kernel_dtype, + bias_dtype, out_channels, - weight_format, + kernel_layout, enable_bias, relu_type, ) @@ -915,13 +943,15 @@ def test_relay_conv2d_cmsisnn_depthwise_int8( def parameterize_for_invalid_model(test): """Generates non int8 inputs""" - in_dtype = ["uint8", "int8"] + in_dtype = ["uint8", "int8", "int16"] kernel_dtype = ["uint8", "int8"] kernel_zero_point = [-33, 10, 0] all_combinations = itertools.product(in_dtype, kernel_dtype, kernel_zero_point) all_combinations = filter( lambda parameters: not ( - parameters[0] == "int8" and parameters[1] == "int8" and parameters[2] == 0 + (parameters[0] == "int8" or parameters[0] == "int16") + and parameters[1] == "int8" + and parameters[2] == 0 ), all_combinations, ) @@ -947,6 +977,7 @@ def test_invalid_parameters( kernel_layout = "HWIO" kernel_shape = [3, 3, ifm_shape[3], out_channels] + _, bias_dtype = get_kernel_bias_dtype(in_dtype) output_scale, output_zero_point = get_conv2d_qnn_params( kernel_shape, input_scale, @@ -973,8 +1004,9 @@ def test_invalid_parameters( groups=1, dtype=in_dtype, kernel_dtype=kernel_dtype, + bias_dtype=bias_dtype, out_channels=out_channels, - weight_format=kernel_layout, + kernel_layout=kernel_layout, enable_bias=True, relu_type="NONE", ) diff --git a/tests/python/contrib/test_cmsisnn/utils.py b/tests/python/contrib/test_cmsisnn/utils.py index 9fdb89289aff..f3a6b0c1343b 100644 --- a/tests/python/contrib/test_cmsisnn/utils.py +++ b/tests/python/contrib/test_cmsisnn/utils.py @@ -137,39 +137,52 @@ def get_same_padding(in_shape, kernel, dilation, stride): return [pad_top, pad_left, pad_bottom, pad_right] +def get_kernel_bias_dtype(input_dtype): + """ + Returns (kernel_dtype, bias_dtype) based on input's dtype. + """ + # uint8 corresponds to an invalid case, so returning int types + # does not cause tests to break + if input_dtype in ("int8", "uint8"): + return ("int8", "int32") + elif input_dtype == "int16": + return ("int8", "int64") + raise ValueError("Invalid dtype provided to get_kernel_bias_dtype()") + + def get_conv2d_qnn_params( - weight_shape: List[int], + kernel_shape: List[int], input_scale: float, input_zp: int, - weights_scale: Union[float, List[float]], - weights_zp: int, + kernel_scale: Union[float, List[float]], + kernel_zp: int, input_dtype: str = "int8", - weights_dtype: str = "int8", + kernel_dtype: str = "int8", output_dtype: str = "int8", is_depthwise: bool = False, ) -> Tuple[float, int]: """ Calculate the output quantization parameters for convolution based on the input and - weights quantization paramters and the data types. + kernel quantization paramters and the data types. Parameters ---------- - weight_shape : List[int] - shape of the weights + kernel_shape : List[int] + shape of the kernel input_scale : float scale of the input tensor input_zp : int zero point of the input tensor - weights_scale : Union[float, List[float]] - scale(s) of the weights tensor - weights_zp : int - zero point of the weights tensor + kernel_scale : Union[float, List[float]] + scale(s) of the kernel tensor + kernel_zp : int + zero point of the kernel tensor is_depthwise : bool whether it is a depthwise convolution input_dtype : str data type of the input tensor - weights_dtype : str - data type of the weights tensor + kernel_dtype : str + data type of the kernel tensor output_dtype : str data type of the output tensor @@ -184,27 +197,27 @@ def get_conv2d_qnn_params( input_max = input_scale * (input_dtype_max - input_zp) input_min = input_scale * (input_dtype_min - input_zp) - weights_dtype_min, weights_dtype_max = get_range_for_dtype_str(weights_dtype) - weights_sc_max = np.max(weights_scale) - weights_max = weights_sc_max * (weights_dtype_max - weights_zp) + kernel_dtype_min, kernel_dtype_max = get_range_for_dtype_str(kernel_dtype) + kernel_sc_max = np.max(kernel_scale) + kernel_max = kernel_sc_max * (kernel_dtype_max - kernel_zp) - weights_sc_min = np.min(weights_scale) - weights_min = weights_sc_min * (weights_dtype_min - weights_zp) + kernel_sc_min = np.min(kernel_scale) + kernel_min = kernel_sc_min * (kernel_dtype_min - kernel_zp) - weights_h = weight_shape[1] - weights_w = weight_shape[2] - channels = weight_shape[3] - num_elements = weights_h * weights_w * channels + kernel_h = kernel_shape[1] + kernel_w = kernel_shape[2] + channels = kernel_shape[3] + num_elements = kernel_h * kernel_w * channels # Adjust the result if it is a depthwise convolution if is_depthwise: num_elements = num_elements / channels # The smallest and largest possible values in the unquantized output tensor output_limits = [ - weights_max * input_max * num_elements, - weights_min * input_max * num_elements, - weights_min * input_min * num_elements, - weights_max * input_min * num_elements, + kernel_max * input_max * num_elements, + kernel_min * input_max * num_elements, + kernel_min * input_min * num_elements, + kernel_max * input_min * num_elements, ] output_max = max(output_limits) From 8300e01a923ad2cf3e0bc5c06f14a07441f926f1 Mon Sep 17 00:00:00 2001 From: Ashutosh Parkhi Date: Fri, 30 Sep 2022 19:56:12 +0100 Subject: [PATCH 2/3] Added few comments and simplified variable assignment Change-Id: I1956107ff4a52437a525aa34c746acc5e3a31631 --- python/tvm/relay/op/contrib/cmsisnn.py | 11 +++++++---- tests/python/contrib/test_cmsisnn/test_conv2d.py | 3 ++- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/python/tvm/relay/op/contrib/cmsisnn.py b/python/tvm/relay/op/contrib/cmsisnn.py index 1ec70195c9e0..0e82ab8be3db 100644 --- a/python/tvm/relay/op/contrib/cmsisnn.py +++ b/python/tvm/relay/op/contrib/cmsisnn.py @@ -146,17 +146,19 @@ def check_qnn_conv2d(pattern): # check if dtypes are supported for the following entities # (input_dtype, weight_dtype, bias_dtype, out_dtype, pattern_dtype) are_dtypes_valid = False + conv2d_input_dtype = conv2d_input.checked_type.dtype if bias_add: bias_dtype = bias_add.args[1].checked_type.dtype else: - bias_dtype = "int32" if conv2d_input.checked_type.dtype == "int8" else "int64" + # this is only to enable to following check that validates all sorts of dtypes + bias_dtype = "int32" if conv2d_input_dtype == "int8" else "int64" valid_dtypes = None - if conv2d_input.checked_type.dtype == "int8": + if conv2d_input_dtype == "int8": valid_dtypes = ("int8", "int8", "int32", "int32", "int8") - elif conv2d_input.checked_type.dtype == "int16": + elif conv2d_input_dtype == "int16": valid_dtypes = ("int16", "int8", "int64", "int64", "int16") if ( - conv2d_input.checked_type.dtype, + conv2d_input_dtype, conv2d_weight.checked_type.dtype, bias_dtype, conv2d.attrs.out_dtype, @@ -164,6 +166,7 @@ def check_qnn_conv2d(pattern): ) == valid_dtypes: are_dtypes_valid = True + # combination of all checks to decide if pattern is eligible for partitioning ret = ( are_dtypes_valid and all([zp == 0 for zp in kernel_zp]) diff --git a/tests/python/contrib/test_cmsisnn/test_conv2d.py b/tests/python/contrib/test_cmsisnn/test_conv2d.py index 9ff55c952c88..63cbb4d2518b 100644 --- a/tests/python/contrib/test_cmsisnn/test_conv2d.py +++ b/tests/python/contrib/test_cmsisnn/test_conv2d.py @@ -257,6 +257,7 @@ def test_conv2d_symmetric_padding( strides = (1, 1) dilation = (1, 1) groups = 1 + # input_zero_point is not handled by TFLM when int16 input_zero_point = input_zero_point if dtype == "int8" else 0 kernel_layout = "HWIO" kernel_h = kernel_size[0] @@ -942,7 +943,7 @@ def test_relay_conv2d_cmsisnn_depthwise_int8( def parameterize_for_invalid_model(test): - """Generates non int8 inputs""" + """Generates non-int8 non-int16 inputs""" in_dtype = ["uint8", "int8", "int16"] kernel_dtype = ["uint8", "int8"] kernel_zero_point = [-33, 10, 0] From b5d1995b0a30ca30e538dc0feb94ef7e026aa950 Mon Sep 17 00:00:00 2001 From: Ashutosh Parkhi Date: Mon, 3 Oct 2022 18:04:19 +0100 Subject: [PATCH 3/3] Added check for zeroness of input_zero_point Change-Id: I62e67fdbe2781c90e55028ff2da88789623f269a --- python/tvm/relay/op/contrib/cmsisnn.py | 15 +++++++++++---- tests/python/contrib/test_cmsisnn/test_conv2d.py | 11 +++++++---- 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/python/tvm/relay/op/contrib/cmsisnn.py b/python/tvm/relay/op/contrib/cmsisnn.py index 0e82ab8be3db..8964937469c4 100644 --- a/python/tvm/relay/op/contrib/cmsisnn.py +++ b/python/tvm/relay/op/contrib/cmsisnn.py @@ -129,10 +129,6 @@ def check_qnn_conv2d(pattern): conv2d_input = conv2d.args[0] conv2d_weight = conv2d.args[1] - # kernel zero_point should be 0 - kernel_zp = conv2d.args[3].data.numpy() - kernel_zp = [kernel_zp] if kernel_zp.ndim == 0 else kernel_zp - # check if depthwise Conv2D kernel_layout = conv2d.attrs.kernel_layout pos_o = kernel_layout.index("O") @@ -157,6 +153,7 @@ def check_qnn_conv2d(pattern): valid_dtypes = ("int8", "int8", "int32", "int32", "int8") elif conv2d_input_dtype == "int16": valid_dtypes = ("int16", "int8", "int64", "int64", "int16") + if ( conv2d_input_dtype, conv2d_weight.checked_type.dtype, @@ -166,9 +163,19 @@ def check_qnn_conv2d(pattern): ) == valid_dtypes: are_dtypes_valid = True + # input_zero_point should be 0 when int16 + valid_input_zp = True + if conv2d_input_dtype == "int16" and conv2d.args[2].data.numpy().item(0) != 0: + valid_input_zp = False + + # kernel zero_point should be 0 + kernel_zp = conv2d.args[3].data.numpy() + kernel_zp = [kernel_zp] if kernel_zp.ndim == 0 else kernel_zp + # combination of all checks to decide if pattern is eligible for partitioning ret = ( are_dtypes_valid + and valid_input_zp and all([zp == 0 for zp in kernel_zp]) and (not is_depthwise or bias_add is not None) ) diff --git a/tests/python/contrib/test_cmsisnn/test_conv2d.py b/tests/python/contrib/test_cmsisnn/test_conv2d.py index 63cbb4d2518b..66ff5d793880 100644 --- a/tests/python/contrib/test_cmsisnn/test_conv2d.py +++ b/tests/python/contrib/test_cmsisnn/test_conv2d.py @@ -947,17 +947,20 @@ def parameterize_for_invalid_model(test): in_dtype = ["uint8", "int8", "int16"] kernel_dtype = ["uint8", "int8"] kernel_zero_point = [-33, 10, 0] - all_combinations = itertools.product(in_dtype, kernel_dtype, kernel_zero_point) + input_zero_point = [64, 0] + all_combinations = itertools.product( + in_dtype, kernel_dtype, kernel_zero_point, input_zero_point + ) all_combinations = filter( lambda parameters: not ( - (parameters[0] == "int8" or parameters[0] == "int16") + (parameters[0] == "int8" or (parameters[0] == "int16" and parameters[3] == 0)) and parameters[1] == "int8" and parameters[2] == 0 ), all_combinations, ) return pytest.mark.parametrize( - ["in_dtype", "kernel_dtype", "kernel_zero_point"], + ["in_dtype", "kernel_dtype", "kernel_zero_point", "input_zero_point"], all_combinations, )(test) @@ -968,12 +971,12 @@ def test_invalid_parameters( in_dtype, kernel_dtype, kernel_zero_point, + input_zero_point, ): """Tests Depthwise op for non int8 inputs""" ifm_shape = (1, 28, 28, 12) out_channels = 2 input_scale = 1 - input_zero_point = 24 kernel_scale = [0.11, 0.0237] kernel_layout = "HWIO"