From a9b24f8ff55eb4072d95b49f70ea65289c0a1e57 Mon Sep 17 00:00:00 2001 From: Guoyu Wang Date: Wed, 21 Apr 2021 12:09:42 -0700 Subject: [PATCH 1/5] [CoreML EP]Add gemm/matmul support --- .../core/providers/coreml/builders/helper.cc | 4 +- .../coreml/builders/impl/builder_utils.cc | 20 +- .../coreml/builders/impl/builder_utils.h | 4 + .../coreml/builders/impl/gemm_op_builder.cc | 231 ++++++++++++++++++ .../coreml/builders/op_builder_factory.cc | 5 + .../coreml/builders/op_builder_factory.h | 2 +- .../core/providers/get_execution_providers.cc | 8 + .../test/providers/cpu/math/gemm_test.cc | 198 ++++++++------- 8 files changed, 367 insertions(+), 105 deletions(-) create mode 100644 onnxruntime/core/providers/coreml/builders/impl/gemm_op_builder.cc diff --git a/onnxruntime/core/providers/coreml/builders/helper.cc b/onnxruntime/core/providers/coreml/builders/helper.cc index e94a94c53ab33..a2b88f5008763 100644 --- a/onnxruntime/core/providers/coreml/builders/helper.cc +++ b/onnxruntime/core/providers/coreml/builders/helper.cc @@ -73,8 +73,8 @@ bool IsInputSupported(const NodeArg& input, const std::string& parent_name, cons return false; } - // For some undocuemented reason, apple CoreML lib will fail loading the model if the model has - // dimension > 16384 + // For some undocumented reason, Apple CoreML framework will fail loading the model if the model + // input has dimension > 16384 // See this issue, https://github.com/apple/coremltools/issues/1003 if (dim.dim_value() > 16384) { LOGS(logger, WARNING) << "CoreML does not support input dim > 16384, input:" << input_name diff --git a/onnxruntime/core/providers/coreml/builders/impl/builder_utils.cc b/onnxruntime/core/providers/coreml/builders/impl/builder_utils.cc index 90455f72458c3..646f2d8897eb0 100644 --- a/onnxruntime/core/providers/coreml/builders/impl/builder_utils.cc +++ b/onnxruntime/core/providers/coreml/builders/impl/builder_utils.cc @@ -1,7 +1,9 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. +#include #include +#include "core/providers/shared/utils/utils.h" #include "builder_utils.h" #include "coreml/NeuralNetwork.pb.h" @@ -80,18 +82,20 @@ common::Status HandleAutoPad(const std::vector input_shape, return Status::OK(); } +void CreateCoreMLWeight(CoreML::Specification::WeightParams& weight, + const float* data, size_t num_elements) { + weight.mutable_floatvalue()->Clear(); + std::copy(data, data + num_elements, + google::protobuf::RepeatedFieldBackInserter(weight.mutable_floatvalue())); +} + common::Status CreateCoreMLWeight(CoreML::Specification::WeightParams& weight, const ONNX_NAMESPACE::TensorProto& tensor) { auto data_type = tensor.data_type(); if (data_type = ONNX_NAMESPACE::TensorProto_DataType_FLOAT) { - const float* data = - tensor.float_data().empty() ? reinterpret_cast(tensor.raw_data().data()) - : tensor.float_data().data(); - - weight.mutable_floatvalue()->Clear(); - auto num_elements = Product(tensor.dims()); - std::copy(data, data + num_elements, - google::protobuf::RepeatedFieldBackInserter(weight.mutable_floatvalue())); + const float* data = GetTensorFloatData(tensor); + auto num_elements = SafeInt(Product(tensor.dims())); + CreateCoreMLWeight(weight, data, num_elements); } else { // TODO: support other type return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, diff --git a/onnxruntime/core/providers/coreml/builders/impl/builder_utils.h b/onnxruntime/core/providers/coreml/builders/impl/builder_utils.h index ef789489c6aa6..9263c20d38ea4 100644 --- a/onnxruntime/core/providers/coreml/builders/impl/builder_utils.h +++ b/onnxruntime/core/providers/coreml/builders/impl/builder_utils.h @@ -33,5 +33,9 @@ common::Status HandleAutoPad(const std::vector input_shape, common::Status CreateCoreMLWeight(CoreML::Specification::WeightParams& weight, const ONNX_NAMESPACE::TensorProto& tensor); +// Copy the float array to a coreml weight +void CreateCoreMLWeight(CoreML::Specification::WeightParams& weight, + const float* data, size_t num_elements); + } // namespace coreml } // namespace onnxruntime diff --git a/onnxruntime/core/providers/coreml/builders/impl/gemm_op_builder.cc b/onnxruntime/core/providers/coreml/builders/impl/gemm_op_builder.cc new file mode 100644 index 0000000000000..f6662fa275a20 --- /dev/null +++ b/onnxruntime/core/providers/coreml/builders/impl/gemm_op_builder.cc @@ -0,0 +1,231 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include +#include "core/providers/common.h" +#include "core/providers/shared/utils/utils.h" +#include "core/providers/coreml/builders/helper.h" +#include "core/providers/coreml/builders/model_builder.h" +#include "core/providers/coreml/builders/op_builder_factory.h" + +#include "base_op_builder.h" +#include "builder_utils.h" + +namespace onnxruntime { +namespace coreml { + +class GemmOpBuilder : public BaseOpBuilder { + // Add operator related + public: + void AddInitializersToSkip(ModelBuilder& model_builder, const Node& node) const override; + + private: + Status AddToModelBuilderImpl(ModelBuilder& model_builder, const Node& node, + const logging::Logger& logger) const override ORT_MUST_USE_RESULT; + + // Operator support related + private: + bool IsOpSupportedImpl(const InitializedTensorSet& /* initializers */, const Node& /* node */, + const logging::Logger& /* logger */) const override; +}; + +// Add operator related + +void GemmOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const Node& node) const { + const auto& op = node.OpType(); + const auto& input_defs(node.InputDefs()); + model_builder.AddInitializerToSkip(input_defs[1]->Name()); + if (op == "Gemm" && input_defs.size() > 2) { + model_builder.AddInitializerToSkip(input_defs[2]->Name()); + } +} + +// This is an internal function, requires input tensor to be 2d float tensor +// TODO, add support of other data types +static std::vector GetTensorFloatDataTransposed(const ONNX_NAMESPACE::TensorProto& tensor) { + const float* src_data = GetTensorFloatData(tensor); + const auto& tensor_shape = tensor.dims(); + auto x_t = SafeInt(tensor_shape[0]); + auto y_t = SafeInt(tensor_shape[1]); + std::vector transposed_data(x_t * y_t); + for (size_t x = 0; x < x_t; x++) { + for (size_t y = 0; y < y_t; y++) { + transposed_data[y * x_t + x] = src_data[x * y_t + y]; + } + } + + return transposed_data; +} + +Status GemmOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const Node& node, + const logging::Logger& /* logger */) const { + std::unique_ptr layer = CreateNNLayer(node); + + const auto& op_type = node.OpType(); + const auto& input_defs = node.InputDefs(); + const auto& b_tensor = *model_builder.GetInitializerTensors().at(input_defs[1]->Name()); + const auto& b_shape = b_tensor.dims(); + + auto* coreml_inner_product = layer->mutable_innerproduct(); + + // The coreml innerproduct weight (matrix B) is stored transposed + // - for MatMul and Gemm (transB = 0), the coreml weight is B' + // - for Gemm (transB = 1), the coreml weight is B + if (op_type == "MatMul") { + coreml_inner_product->set_inputchannels(b_shape[0]); + coreml_inner_product->set_outputchannels(b_shape[1]); + // Add weight (b of MatMul) + const auto b_transposed = GetTensorFloatDataTransposed(b_tensor); + CreateCoreMLWeight(*coreml_inner_product->mutable_weights(), b_transposed.data(), b_transposed.size()); + } else { // Gemm + NodeAttrHelper helper(node); + const auto transB = helper.Get("transB", 0); + if (transB == 0) { + coreml_inner_product->set_inputchannels(b_shape[0]); + coreml_inner_product->set_outputchannels(b_shape[1]); + const auto b_transposed = GetTensorFloatDataTransposed(b_tensor); + CreateCoreMLWeight(*coreml_inner_product->mutable_weights(), b_transposed.data(), b_transposed.size()); + } else { + coreml_inner_product->set_inputchannels(b_shape[1]); + coreml_inner_product->set_outputchannels(b_shape[0]); + // Add weight (b of MatMul) + CreateCoreMLWeight(*coreml_inner_product->mutable_weights(), b_tensor); + } + + // Add bias if present + if (input_defs.size() > 2) { + coreml_inner_product->set_hasbias(true); + const auto& bias_tensor = *model_builder.GetInitializerTensors().at(input_defs[2]->Name()); + CreateCoreMLWeight(*coreml_inner_product->mutable_bias(), bias_tensor); + } + } + + *layer->mutable_input()->Add() = input_defs[0]->Name(); + *layer->mutable_output()->Add() = node.OutputDefs()[0]->Name(); + + model_builder.AddLayer(std::move(layer)); + return Status::OK(); +} + +// Operator support related + +bool GemmOpBuilder::IsOpSupportedImpl(const InitializedTensorSet& initializers, const Node& node, + const logging::Logger& logger) const { + const auto& op_type = node.OpType(); + const auto& input_defs(node.InputDefs()); + size_t a_idx = 0, b_idx = 1, c_idx = 2; // A*B+C + + if (!Contains(initializers, input_defs[b_idx]->Name())) { + LOGS(logger, VERBOSE) << "B of Gemm/Matmul must be an initializer tensor"; + return false; + } + + std::vector a_shape; + { + if (!GetShape(*input_defs[a_idx], a_shape, logger)) + return false; + + if (a_shape.size() != 2) { + LOGS(logger, VERBOSE) << "A must be 2D"; + return false; + } + + if (Product(a_shape) == 0) { + LOGS(logger, VERBOSE) << "A must be non-empty"; + return false; + } + } + + std::vector b_shape; + { + if (!GetShape(*input_defs[b_idx], b_shape, logger)) + return false; + + if (b_shape.size() != 2) { + LOGS(logger, VERBOSE) << "B must be 2D"; + return false; + } + + if (Product(b_shape) == 0) { + LOGS(logger, VERBOSE) << "B must be non-empty"; + return false; + } + } + + if (op_type == "Gemm") { + NodeAttrHelper helper(node); + const auto transA = helper.Get("transA", 0); + const auto transB = helper.Get("transB", 0); + const auto alpha = helper.Get("alpha", 1.0f); + const auto beta = helper.Get("beta", 1.0f); + if (!(transA == 0 && alpha == 1.f && beta == 1.f)) { + LOGS(logger, VERBOSE) << "Only transA == 0, alpha == 1.0 " + << "and beta == 1.0 is supported." + << " transA " << transA + << " alpha " << alpha + << " beta " << beta; + return false; + } + + // C of Gemm + // For now we only support {n} or {1,n} tensor + if (input_defs.size() == 3) { + if (!Contains(initializers, input_defs[c_idx]->Name())) { + LOGS(logger, VERBOSE) << "C of Gemm must be an initializer tensor"; + return false; + } + + std::vector c_shape; + if (!GetShape(*input_defs[c_idx], c_shape, logger)) + return false; + + size_t c_dim = c_shape.size(); + + if (c_dim == 0) { + LOGS(logger, VERBOSE) << "C of Gemm cannot be a scalar"; + return false; + } + + if (c_dim != 1) { + // If C is a (2+)d tensor, it must have the format {1, 1, ..., 1, n} + // where every except the last dimension should be 1 + for (size_t i = 0; i < c_dim - 1; ++i) { + if (c_shape[i] != 1) { + LOGS(logger, VERBOSE) << "C of Gemm must be a vector or a tensor with only last dimension != 1"; + return false; + } + } + } + + auto c_size = c_shape[c_dim - 1]; + if (c_size != (transB == 0 ? b_shape[1] : b_shape[0])) { + LOGS(logger, VERBOSE) << "C of Gemm must be a vector of b_shape[" + << (transB == 0 ? "1" : "0") << "]" + << " b_shape: [" << b_shape[0] << ", " << b_shape[1] << "]" + << " c_size: " << c_size; + + return false; + } + } + } + + return true; +} + +void CreateGemmOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations) { + if (op_registrations.op_builder_map.find(op_type) != op_registrations.op_builder_map.cend()) + return; + + static std::vector op_types = + { + "Gemm", + "MatMul", + }; + + op_registrations.builders.push_back(onnxruntime::make_unique()); + for (const auto& op_type : op_types) { + op_registrations.op_builder_map.emplace(op_type, op_registrations.builders.back().get()); + } +} +} // namespace coreml +} // namespace onnxruntime diff --git a/onnxruntime/core/providers/coreml/builders/op_builder_factory.cc b/onnxruntime/core/providers/coreml/builders/op_builder_factory.cc index bceb84fe08cff..bc7d34d1ed4d7 100644 --- a/onnxruntime/core/providers/coreml/builders/op_builder_factory.cc +++ b/onnxruntime/core/providers/coreml/builders/op_builder_factory.cc @@ -56,6 +56,11 @@ static OpBuilderRegistrations CreateOpBuilderRegistrations() { CreateResizeOpBuilder("Resize", op_registrations); } + { // Gemm/MatMul + CreateGemmOpBuilder("Gemm", op_registrations); + CreateGemmOpBuilder("MatMul", op_registrations); + } + return op_registrations; } diff --git a/onnxruntime/core/providers/coreml/builders/op_builder_factory.h b/onnxruntime/core/providers/coreml/builders/op_builder_factory.h index 17cff76bbc3cb..cb1ab6dbdc219 100644 --- a/onnxruntime/core/providers/coreml/builders/op_builder_factory.h +++ b/onnxruntime/core/providers/coreml/builders/op_builder_factory.h @@ -28,6 +28,6 @@ void CreateConcatOpBuilder(const std::string& op_type, OpBuilderRegistrations& o void CreateActivationOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations); void CreatePoolOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations); - +void CreateGemmOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations); } // namespace coreml } // namespace onnxruntime \ No newline at end of file diff --git a/onnxruntime/core/providers/get_execution_providers.cc b/onnxruntime/core/providers/get_execution_providers.cc index 866d71feef490..53dcf67e922e1 100644 --- a/onnxruntime/core/providers/get_execution_providers.cc +++ b/onnxruntime/core/providers/get_execution_providers.cc @@ -119,6 +119,14 @@ constexpr ProviderInfo kProvidersInPriorityOrder[] = true, #else false, +#endif + }, + { + kCoreMLExecutionProvider, +#ifdef USE_COREML + true, +#else + false, #endif }, {kCpuExecutionProvider, true}, // kCpuExecutionProvider is always last diff --git a/onnxruntime/test/providers/cpu/math/gemm_test.cc b/onnxruntime/test/providers/cpu/math/gemm_test.cc index bc86b3b23bcd2..c3eef92126b2d 100644 --- a/onnxruntime/test/providers/cpu/math/gemm_test.cc +++ b/onnxruntime/test/providers/cpu/math/gemm_test.cc @@ -9,37 +9,39 @@ namespace onnxruntime { namespace test { template -void TestGemmNoTrans(bool b_is_initializer) { - OpTester test("Gemm"); - - test.AddAttribute("transA", (int64_t)0); - test.AddAttribute("transB", (int64_t)0); - test.AddAttribute("alpha", 1.0f); - test.AddAttribute("beta", 1.0f); - - test.AddInput("A", {2, 4}, - {1.0f, 2.0f, 3.0f, 4.0f, - -1.0f, -2.0f, -3.0f, -4.0f}); - test.AddInput("B", {4, 3}, std::vector(12, 1.0f), b_is_initializer); - test.AddInput("C", {2, 3}, std::vector(6, 1.0f)); - test.AddOutput("Y", {2, 3}, - {11.0f, 11.0f, 11.0f, - -9.0f, -9.0f, -9.0f}); - test.Run(); +void TestGemmNoTrans() { + auto run_test = [](bool b_is_initializer, bool c_is_initializer = false) { + OpTester test("Gemm"); + + test.AddAttribute("transA", (int64_t)0); + test.AddAttribute("transB", (int64_t)0); + test.AddAttribute("alpha", 1.0f); + test.AddAttribute("beta", 1.0f); + + test.AddInput("A", {2, 4}, + {1.0f, 2.0f, 3.0f, 4.0f, + -1.0f, -2.0f, -3.0f, -4.0f}); + test.AddInput("B", {4, 3}, std::vector(12, 1.0f), b_is_initializer); + test.AddInput("C", {2, 3}, std::vector(6, 1.0f), c_is_initializer); + test.AddOutput("Y", {2, 3}, + {11.0f, 11.0f, 11.0f, + -9.0f, -9.0f, -9.0f}); + test.Run(); + }; + + run_test(false, false); + // NNAPI EP requires weight to be an initializer + run_test(true, false); + // CoreML EP requires weight and bias both to be initializers + run_test(true, true); } TEST(GemmOpTest, GemmNoTrans_float) { - TestGemmNoTrans(false); + TestGemmNoTrans(); } TEST(GemmOpTest, GemmNoTrans_double) { - TestGemmNoTrans(false); -} - -// NNAPI EP requires weight to be an initializer -TEST(GemmOpTest, GemmNoTransBIsInitializer) { - TestGemmNoTrans(true); - TestGemmNoTrans(true); + TestGemmNoTrans(); } // Only CUDA kernel has float 16 support @@ -84,41 +86,44 @@ TEST(GemmOpTest, GemmNoTrans_f16) { #endif template -void TestGemmBroadcast(bool b_is_initializer) { - OpTester test("Gemm"); - - test.AddAttribute("transA", (int64_t)0); - test.AddAttribute("transB", (int64_t)0); - test.AddAttribute("alpha", 1.0f); - test.AddAttribute("beta", 1.0f); - - test.AddInput("A", {2, 4}, - {1.0f, 2.0f, 3.0f, 4.0f, - -1.0f, -2.0f, -3.0f, -4.0f}); - test.AddInput("B", {4, 3}, std::vector(12, 1.0f), b_is_initializer); - test.AddInput("C", {3}, std::vector{1.0f, 2.0f, 3.0f}); - test.AddOutput("Y", {2, 3}, - {11.0f, 12.0f, 13.0f, - -9.0f, -8.0f, -7.0f}); +void TestGemmBroadcast() { + auto run_test = [](bool b_is_initializer, bool c_is_initializer) { + OpTester test("Gemm"); + + test.AddAttribute("transA", (int64_t)0); + test.AddAttribute("transB", (int64_t)0); + test.AddAttribute("alpha", 1.0f); + test.AddAttribute("beta", 1.0f); + + test.AddInput("A", {2, 4}, + {1.0f, 2.0f, 3.0f, 4.0f, + -1.0f, -2.0f, -3.0f, -4.0f}); + test.AddInput("B", {4, 3}, std::vector(12, 1.0f), b_is_initializer); + test.AddInput("C", {3}, std::vector{1.0f, 2.0f, 3.0f}, c_is_initializer); + test.AddOutput("Y", {2, 3}, + {11.0f, 12.0f, 13.0f, + -9.0f, -8.0f, -7.0f}); #if defined(OPENVINO_CONFIG_GPU_FP16) || defined(OPENVINO_CONFIG_GPU_FP32) - test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kOpenVINOExecutionProvider}); // OpenVINO : Temporarily disabled due to accuracy issues + test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kOpenVINOExecutionProvider}); // OpenVINO : Temporarily disabled due to accuracy issues #else - test.Run(); + test.Run(); #endif -} + }; -TEST(GemmOpTest, GemmBroadcast) { - TestGemmBroadcast(false); - TestGemmBroadcast(false); + run_test(false, false); + // NNAPI EP requires weight to be an initializer + run_test(true, false); + // CoreML EP requires weight and bias both to be initializers + run_test(true, true); } -TEST(GemmOpTest, GemmBroadcastBIsInitializer) { - TestGemmBroadcast(true); - TestGemmBroadcast(true); +TEST(GemmOpTest, GemmBroadcast) { + TestGemmBroadcast(); + TestGemmBroadcast(); } template -static void TestGemmTrans(bool b_is_initializer) { +static void TestGemmTrans() { OpTester test("Gemm"); test.AddAttribute("transA", (int64_t)1); @@ -131,7 +136,7 @@ static void TestGemmTrans(bool b_is_initializer) { 2.0f, -2.0f, 3.0f, -3.0f, 4.0f, -4.0f}); - test.AddInput("B", {3, 4}, std::vector(12, 1.0f), b_is_initializer); + test.AddInput("B", {3, 4}, std::vector(12, 1.0f)); test.AddInput("C", {3}, std::vector(3, 1.0f)); test.AddOutput("Y", {2, 3}, {11.0f, 11.0f, 11.0f, @@ -144,39 +149,39 @@ static void TestGemmTrans(bool b_is_initializer) { } TEST(GemmOpTest, GemmTrans) { - TestGemmTrans(false); - TestGemmTrans(false); -} - -TEST(GemmOpTest, GemmTransBIsInitializer) { - TestGemmTrans(true); - TestGemmTrans(true); + TestGemmTrans(); + TestGemmTrans(); } // NNAPI EP's GEMM only works as A*B', add case only B is transposed // Also test NNAPI EP's handling of non-1D bias (C of Gemm) template static void TestGemmTransB() { - OpTester test("Gemm"); - - test.AddAttribute("transA", (int64_t)0); - test.AddAttribute("transB", (int64_t)1); - test.AddAttribute("alpha", 1.0f); - test.AddAttribute("beta", 1.0f); - - test.AddInput("A", {2, 4}, - {1.0f, 2.0f, 3.0f, 4.0f, - -1.0f, -2.0f, -3.0f, -4.0f}); - test.AddInput("B", {3, 4}, std::vector(12, 1.0f)); - test.AddInput("C", {1, 3}, std::vector(3, 1.0f)); - test.AddOutput("Y", {2, 3}, - {11.0f, 11.0f, 11.0f, - -9.0f, -9.0f, -9.0f}); + auto run_test = [](bool b_is_initializer, bool c_is_initializer = false) { + OpTester test("Gemm"); + + test.AddAttribute("transA", (int64_t)0); + test.AddAttribute("transB", (int64_t)1); + test.AddAttribute("alpha", 1.0f); + test.AddAttribute("beta", 1.0f); + + test.AddInput("A", {2, 4}, + {1.0f, 2.0f, 3.0f, 4.0f, + -1.0f, -2.0f, -3.0f, -4.0f}); + test.AddInput("B", {3, 4}, std::vector(12, 1.0f), b_is_initializer); + test.AddInput("C", {1, 3}, std::vector(3, 1.0f), c_is_initializer); + test.AddOutput("Y", {2, 3}, + {11.0f, 11.0f, 11.0f, + -9.0f, -9.0f, -9.0f}); #if defined(OPENVINO_CONFIG_GPU_FP16) || defined(OPENVINO_CONFIG_GPU_FP32) - test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kOpenVINOExecutionProvider}); // OpenVINO: Temporarily disabled due to accuracy issues + test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kOpenVINOExecutionProvider}); // OpenVINO: Temporarily disabled due to accuracy issues #else - test.Run(); + test.Run(); #endif + }; + run_test(false, false); + // CoreML EP requires weight and bias both to be initializers + run_test(true, true); } TEST(GemmOpTest, GemmTransB) { @@ -187,27 +192,32 @@ TEST(GemmOpTest, GemmTransB) { // NNAPI EP's GEMM only works as A*B', add case only B is transposed // Also test NNAPI EP's handling of non-1D bias (C of Gemm) which is broadcastable but not valid for NNAPI template -void TestGemmTransB_1() { - OpTester test("Gemm"); - - test.AddAttribute("transA", (int64_t)0); - test.AddAttribute("transB", (int64_t)1); - test.AddAttribute("alpha", 1.0f); - test.AddAttribute("beta", 1.0f); - - test.AddInput("A", {2, 4}, - {1.0f, 2.0f, 3.0f, 4.0f, - -1.0f, -2.0f, -3.0f, -4.0f}); - test.AddInput("B", {3, 4}, std::vector(12, 1.0f)); - test.AddInput("C", {2, 1}, std::vector(2, 1.0f)); - test.AddOutput("Y", {2, 3}, - {11.0f, 11.0f, 11.0f, - -9.0f, -9.0f, -9.0f}); +static void TestGemmTransB_1() { + auto run_test = [](bool b_is_initializer, bool c_is_initializer = false) { + OpTester test("Gemm"); + + test.AddAttribute("transA", (int64_t)0); + test.AddAttribute("transB", (int64_t)1); + test.AddAttribute("alpha", 1.0f); + test.AddAttribute("beta", 1.0f); + + test.AddInput("A", {2, 4}, + {1.0f, 2.0f, 3.0f, 4.0f, + -1.0f, -2.0f, -3.0f, -4.0f}); + test.AddInput("B", {3, 4}, std::vector(12, 1.0f), b_is_initializer); + test.AddInput("C", {2, 1}, std::vector(2, 1.0f), c_is_initializer); + test.AddOutput("Y", {2, 3}, + {11.0f, 11.0f, 11.0f, + -9.0f, -9.0f, -9.0f}); #if defined(OPENVINO_CONFIG_GPU_FP16) || defined(OPENVINO_CONFIG_GPU_FP32) - test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kOpenVINOExecutionProvider}); // OpenVINO: Temporarily disabled due to accuracy issues + test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kOpenVINOExecutionProvider}); // OpenVINO: Temporarily disabled due to accuracy issues #else - test.Run(); + test.Run(); #endif + }; + run_test(false, false); + // CoreML EP requires weight and bias both to be initializers + run_test(true, true); } TEST(GemmOpTest, GemmTransB_1) { From b808c280a0e70110ca2b1a53c299d067602a71fa Mon Sep 17 00:00:00 2001 From: Guoyu Wang Date: Wed, 21 Apr 2021 14:23:58 -0700 Subject: [PATCH 2/5] remove changes in get_execution_providers --- onnxruntime/core/providers/get_execution_providers.cc | 8 -------- 1 file changed, 8 deletions(-) diff --git a/onnxruntime/core/providers/get_execution_providers.cc b/onnxruntime/core/providers/get_execution_providers.cc index 53dcf67e922e1..866d71feef490 100644 --- a/onnxruntime/core/providers/get_execution_providers.cc +++ b/onnxruntime/core/providers/get_execution_providers.cc @@ -119,14 +119,6 @@ constexpr ProviderInfo kProvidersInPriorityOrder[] = true, #else false, -#endif - }, - { - kCoreMLExecutionProvider, -#ifdef USE_COREML - true, -#else - false, #endif }, {kCpuExecutionProvider, true}, // kCpuExecutionProvider is always last From dae2ecd37b41b292a24ce64213fd2b0b6f594c4e Mon Sep 17 00:00:00 2001 From: Guoyu Wang Date: Wed, 21 Apr 2021 15:50:56 -0700 Subject: [PATCH 3/5] Address CR comments --- .../core/providers/coreml/builders/impl/builder_utils.cc | 1 + .../core/providers/coreml/builders/impl/gemm_op_builder.cc | 2 ++ 2 files changed, 3 insertions(+) diff --git a/onnxruntime/core/providers/coreml/builders/impl/builder_utils.cc b/onnxruntime/core/providers/coreml/builders/impl/builder_utils.cc index 646f2d8897eb0..d3031d682f387 100644 --- a/onnxruntime/core/providers/coreml/builders/impl/builder_utils.cc +++ b/onnxruntime/core/providers/coreml/builders/impl/builder_utils.cc @@ -85,6 +85,7 @@ common::Status HandleAutoPad(const std::vector input_shape, void CreateCoreMLWeight(CoreML::Specification::WeightParams& weight, const float* data, size_t num_elements) { weight.mutable_floatvalue()->Clear(); + weight.mutable_floatvalue()->Reserve(num_elements); std::copy(data, data + num_elements, google::protobuf::RepeatedFieldBackInserter(weight.mutable_floatvalue())); } diff --git a/onnxruntime/core/providers/coreml/builders/impl/gemm_op_builder.cc b/onnxruntime/core/providers/coreml/builders/impl/gemm_op_builder.cc index f6662fa275a20..339587b8f1dd3 100644 --- a/onnxruntime/core/providers/coreml/builders/impl/gemm_op_builder.cc +++ b/onnxruntime/core/providers/coreml/builders/impl/gemm_op_builder.cc @@ -34,6 +34,8 @@ class GemmOpBuilder : public BaseOpBuilder { void GemmOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const Node& node) const { const auto& op = node.OpType(); const auto& input_defs(node.InputDefs()); + // We have already embedded the weights (matrix B and C(if any)) into the coreml layer + // No need to copy them later to reduce memory consumption model_builder.AddInitializerToSkip(input_defs[1]->Name()); if (op == "Gemm" && input_defs.size() > 2) { model_builder.AddInitializerToSkip(input_defs[2]->Name()); From 0079ff1d6ec0972b1728d3d42b7d8011d4040753 Mon Sep 17 00:00:00 2001 From: Guoyu Wang Date: Wed, 21 Apr 2021 16:08:37 -0700 Subject: [PATCH 4/5] Switch to list initialization --- .../core/providers/coreml/builders/impl/builder_utils.cc | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/onnxruntime/core/providers/coreml/builders/impl/builder_utils.cc b/onnxruntime/core/providers/coreml/builders/impl/builder_utils.cc index d3031d682f387..4ea4367cc82ad 100644 --- a/onnxruntime/core/providers/coreml/builders/impl/builder_utils.cc +++ b/onnxruntime/core/providers/coreml/builders/impl/builder_utils.cc @@ -84,10 +84,7 @@ common::Status HandleAutoPad(const std::vector input_shape, void CreateCoreMLWeight(CoreML::Specification::WeightParams& weight, const float* data, size_t num_elements) { - weight.mutable_floatvalue()->Clear(); - weight.mutable_floatvalue()->Reserve(num_elements); - std::copy(data, data + num_elements, - google::protobuf::RepeatedFieldBackInserter(weight.mutable_floatvalue())); + *weight.mutable_floatvalue() = {data, data + num_elements}; } common::Status CreateCoreMLWeight(CoreML::Specification::WeightParams& weight, From 2bd99001aa0883129cf854d52968f81ddcd371a6 Mon Sep 17 00:00:00 2001 From: Guoyu Wang Date: Wed, 21 Apr 2021 16:15:11 -0700 Subject: [PATCH 5/5] Minor update --- .../providers/coreml/builders/impl/reshape_op_builder.cc | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/onnxruntime/core/providers/coreml/builders/impl/reshape_op_builder.cc b/onnxruntime/core/providers/coreml/builders/impl/reshape_op_builder.cc index abfee1ea4f434..126ecd82cb7ee 100644 --- a/onnxruntime/core/providers/coreml/builders/impl/reshape_op_builder.cc +++ b/onnxruntime/core/providers/coreml/builders/impl/reshape_op_builder.cc @@ -51,16 +51,11 @@ Status ReshapeOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, : target_shape_tensor.int64_data().data(); const auto size = target_shape_tensor.dims()[0]; - std::vector target_shape; - std::copy(raw_target_shape, raw_target_shape + size, std::back_inserter(target_shape)); - + std::vector target_shape{raw_target_shape, raw_target_shape + size}; std::vector input_shape; ORT_RETURN_IF_NOT(GetShape(*input_defs[0], input_shape, logger), "Cannot get shape"); ReshapeHelper helper(TensorShape(input_shape), target_shape); - std::copy(target_shape.cbegin(), target_shape.cend(), - google::protobuf::RepeatedFieldBackInserter( - layer->mutable_reshapestatic()->mutable_targetshape())); - + *layer->mutable_reshapestatic()->mutable_targetshape() = {target_shape.cbegin(), target_shape.cend()}; *layer->mutable_input()->Add() = input_defs[0]->Name(); *layer->mutable_output()->Add() = node.OutputDefs()[0]->Name();