diff --git a/BUILD.md b/BUILD.md index e4f31151a8d00..9af6d500ffdd6 100644 --- a/BUILD.md +++ b/BUILD.md @@ -249,7 +249,7 @@ See more information on the OpenVINO Execution Provider [here](./docs/execution_ --build_server: Using this flag in addition to --use_openvino builds the OpenVINO Execution Provider with ONNX Runtime Server. -* ``: Specifies the hardware target for building OpenVINO Execution Provider. Below are the options for different Intel target devices. +* ``: Specifies the default hardware target for building OpenVINO Execution Provider. This can be overriden dynamically at runtime with another option (refer to [OpenVINO-ExecutionProvider.md](./docs/execution_providers/OpenVINO-ExecutionProvider.md) for more details on dynamic device selection). Below are the options for different Intel target devices. | Hardware Option | Target Device | | --------------- | ------------------------| diff --git a/docs/execution_providers/OpenVINO-ExecutionProvider.md b/docs/execution_providers/OpenVINO-ExecutionProvider.md index 60fe328fa15a4..6ca0ca9c499f9 100644 --- a/docs/execution_providers/OpenVINO-ExecutionProvider.md +++ b/docs/execution_providers/OpenVINO-ExecutionProvider.md @@ -5,6 +5,21 @@ OpenVINO Execution Provider enables deep learning inference on Intel CPUs, Intel ## Build For build instructions, please see the [BUILD page](../../BUILD.md#openvino). +## Dynamic device selection +When ONNX Runtime is built with OpenVINO Execution Provider, a target hardware option needs to be provided. This build time option becomes the default target harware the EP schedules inference on. However, this target may be overriden at runtime to schedule inference on a different hardware as shown below. + +Note. This dynamic hardware selection is optional. The EP falls back to the build-time default selection if no dynamic hardware option value is specified. +1. Python API +``` +import onnxruntime +onnxruntime.capi._pybind_state.set_openvino_device("") +# Create session after this +``` +2. C/C++ API +``` +Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_OpenVINO(sf, "")); +``` + ## ONNX Layers supported using OpenVINO The table below shows the ONNX layers supported using OpenVINO Execution Provider and the mapping between ONNX layers and OpenVINO layers. The below table also lists the Intel hardware support for each of the layers. CPU refers to Intel® diff --git a/onnxruntime/core/providers/openvino/backend_manager.cc b/onnxruntime/core/providers/openvino/backend_manager.cc index f3ef7400bdd53..bc0b2fd772260 100644 --- a/onnxruntime/core/providers/openvino/backend_manager.cc +++ b/onnxruntime/core/providers/openvino/backend_manager.cc @@ -14,31 +14,20 @@ namespace onnxruntime { namespace openvino_ep { -BackendManager::BackendManager(const onnxruntime::Node* fused_node, const logging::Logger& logger) { +BackendManager::BackendManager(const onnxruntime::Node* fused_node, + const logging::Logger& logger,std::string dev_id, + std::string prec_str) : device_id_(dev_id), precision_str_(prec_str) { device_id_ = "CPU"; - precision_ = InferenceEngine::Precision::FP32; std::string precision_str_ = "FP32"; - -#ifdef OPENVINO_CONFIG_CPU_FP32 -#endif -#ifdef OPENVINO_CONFIG_GPU_FP32 - device_id_ = "GPU"; -#endif -#ifdef OPENVINO_CONFIG_GPU_FP16 - device_id_ = "GPU"; - precision_ = InferenceEngine::Precision::FP16; - precision_str_ = "FP16"; -#endif -#ifdef OPENVINO_CONFIG_MYRIAD - device_id_ = "MYRIAD"; - precision_ = InferenceEngine::Precision::FP16; - precision_str_ = "FP16"; -#endif -#ifdef OPENVINO_CONFIG_VAD_M - device_id_ = "HDDL"; - precision_ = InferenceEngine::Precision::FP16; - precision_str_ = "FP16"; -#endif + if(precision_str_ == "FP32") { + precision_ = InferenceEngine::Precision::FP32; + } else if (precision_str_ == "FP16") + { + precision_ = InferenceEngine::Precision::FP16; + } else { + ORT_THROW("Invalid OpenVINO Precision type: " + precision_str_); + } + // Save the indexes of graph inputs among fused_node's inputDefs // (which also contains initializers). diff --git a/onnxruntime/core/providers/openvino/backend_manager.h b/onnxruntime/core/providers/openvino/backend_manager.h index 2abef02b379af..51849713e3662 100644 --- a/onnxruntime/core/providers/openvino/backend_manager.h +++ b/onnxruntime/core/providers/openvino/backend_manager.h @@ -18,7 +18,7 @@ namespace openvino_ep { // Singleton class that manages all the backends class BackendManager { public: - BackendManager(const onnxruntime::Node* fused_node, const logging::Logger& logger); + BackendManager(const onnxruntime::Node* fused_node, const logging::Logger& logger, std::string dev_id, std::string prec_str); void Compute(Ort::CustomOpApi api, OrtKernelContext* context); void ShutdownBackendManager(); diff --git a/onnxruntime/core/providers/openvino/openvino_execution_provider.cc b/onnxruntime/core/providers/openvino/openvino_execution_provider.cc index a18ad2a4ac36f..fa37f8612641d 100644 --- a/onnxruntime/core/providers/openvino/openvino_execution_provider.cc +++ b/onnxruntime/core/providers/openvino/openvino_execution_provider.cc @@ -36,8 +36,7 @@ namespace onnxruntime { constexpr const char* OpenVINO = "OpenVINO"; OpenVINOExecutionProvider::OpenVINOExecutionProvider(const OpenVINOExecutionProviderInfo& info) - : IExecutionProvider{onnxruntime::kOpenVINOExecutionProvider} { - ORT_UNUSED_PARAMETER(info); + : IExecutionProvider{onnxruntime::kOpenVINOExecutionProvider}, info_(info) { DeviceAllocatorRegistrationInfo device_info({OrtMemTypeDefault, [](int) { return std::make_unique(std::make_unique(OpenVINO, OrtDeviceAllocator)); }, std::numeric_limits::max()}); InsertAllocator(CreateAllocator(device_info)); } @@ -176,7 +175,7 @@ bool IsUnsupportedOp(std::string name, std::string device) { std::merge(unsupported_ops_cpu.begin(), unsupported_ops_cpu.end(), unsupported_ops_gpu.begin(), unsupported_ops_gpu.end(), std::inserter(unsupported_ops, unsupported_ops.begin())); - } else if (device == "VPU") { + } else if (device == "MYRIAD" || device == "HDDL") { std::merge(unsupported_ops_cpu.begin(), unsupported_ops_cpu.end(), unsupported_ops_vpu.begin(), unsupported_ops_vpu.end(), std::inserter(unsupported_ops, unsupported_ops.begin())); @@ -468,7 +467,7 @@ static bool IsTypeSupported(const NodeArg* node_arg, bool is_initializer, const }; auto dtype = type_proto->tensor_type().elem_type(); - if(device_id == "CPU" || device_id == "VPU"){ + if(device_id == "CPU" || device_id == "MYRIAD" || device_id == "HDDL"){ if(supported_types_cpu.find(dtype) != supported_types_cpu.end()) return true; @@ -494,7 +493,7 @@ static bool IsTypeSupported(const NodeArg* node_arg, bool is_initializer, const static bool IsNodeSupported(const std::map>& op_map, const onnxruntime::GraphViewer& graph_viewer, - const NodeIndex node_idx) { + const NodeIndex node_idx, std::string& device_id) { const auto& node = graph_viewer.GetNode(node_idx); const auto& optype = node->OpType(); @@ -502,16 +501,6 @@ static bool IsNodeSupported(const std::map>& std::cout << "Node " << optype << std::endl; const auto& domain = node->Domain(); - std::string device_id = "CPU"; - -#if defined(OPENVINO_CONFIG_GPU_FP32) || defined(OPENVINO_CONFIG_GPU_FP16) - device_id = "GPU"; -#endif - -#if defined(OPENVINO_CONFIG_MYRIAD) || defined(OPENVINO_CONFIG_VAD_M) - device_id = "VPU"; -#endif - /* 0. Check if node is in the unsupported list 1. Check input and output data types are supported. @@ -631,13 +620,13 @@ static std::map> GetNgSupportedOps(const int } static std::vector -GetUnsupportedNodeIndices(const GraphViewer& graph_viewer, /*out*/ std::unordered_set& ng_required_initializers) { +GetUnsupportedNodeIndices(const GraphViewer& graph_viewer, std::string device, /*out*/ std::unordered_set& ng_required_initializers) { const auto ng_supported_ops = GetNgSupportedOps(GetOnnxOpSet(graph_viewer)); std::vector unsupported_nodes_idx; for (const auto& node_idx : graph_viewer.GetNodesInTopologicalOrder()) { - if (IsNodeSupported(ng_supported_ops, graph_viewer, node_idx)) { + if (IsNodeSupported(ng_supported_ops, graph_viewer, node_idx, device)) { // Collect inputs that are initializers graph_viewer.GetNode(node_idx)->ForEachDef([&ng_required_initializers, &graph_viewer](const onnxruntime::NodeArg& node_arg, bool is_input) { if(is_input && graph_viewer.GetAllInitializedTensors().count(node_arg.Name())) { @@ -825,7 +814,7 @@ OpenVINOExecutionProvider::GetCapability(const onnxruntime::GraphViewer& graph_v TODO: Support overridable initializers */ std::unordered_set ng_required_initializers; - const auto unsupported_nodes = GetUnsupportedNodeIndices(graph_viewer, ng_required_initializers); + const auto unsupported_nodes = GetUnsupportedNodeIndices(graph_viewer, info_.device_id_, ng_required_initializers); //If all ops are supported, no partitioning is required. Short-circuit and avoid splitting. if (unsupported_nodes.empty()) { @@ -893,7 +882,7 @@ common::Status OpenVINOExecutionProvider::Compile( std::vector& node_compute_funcs) { for (const auto& fused_node : fused_nodes) { NodeComputeInfo compute_info; - std::shared_ptr backend_manager = std::make_shared(fused_node, *GetLogger()); + std::shared_ptr backend_manager = std::make_shared(fused_node, *GetLogger(), info_.device_id_, info_.precision_); compute_info.create_state_func = [backend_manager](ComputeContext* context, FunctionState* state) { diff --git a/onnxruntime/core/providers/openvino/openvino_execution_provider.h b/onnxruntime/core/providers/openvino/openvino_execution_provider.h index bcb00ba97b893..7eea824c79c8a 100644 --- a/onnxruntime/core/providers/openvino/openvino_execution_provider.h +++ b/onnxruntime/core/providers/openvino/openvino_execution_provider.h @@ -11,21 +11,58 @@ #include "backend_manager.h" #include -namespace ngraph { -namespace runtime { -class Backend; -} -} // namespace ngraph - namespace onnxruntime { // Information needed to construct OpenVINO execution providers. struct OpenVINOExecutionProviderInfo { - const char* device{"CPU_FP32"}; + std::string device_id_; + std::string precision_; - explicit OpenVINOExecutionProviderInfo(const char* dev) : device(dev) { + explicit OpenVINOExecutionProviderInfo(std::string dev_id) { + if(dev_id == "") { + LOGS_DEFAULT(INFO) << "[OpenVINO-EP]" << "No runtime device selection option provided."; + #ifdef OPENVINO_CONFIG_CPU_FP32 + device_id_ = "CPU"; + precision_ = "FP32"; + #endif + #ifdef OPENVINO_CONFIG_GPU_FP32 + device_id_ = "GPU"; + precision_ = "FP32"; + #endif + #ifdef OPENVINO_CONFIG_GPU_FP16 + device_id_ = "GPU"; + precision_ = "FP16"; + #endif + #ifdef OPENVINO_CONFIG_MYRIAD + device_id_ = "MYRIAD"; + precision_ = "FP16"; + #endif + #ifdef OPENVINO_CONFIG_VAD_M + device_id_ = "HDDL"; + precision_ = "FP16"; + #endif + } else if (dev_id == "CPU_FP32") { + device_id_ = "CPU"; + precision_ = "FP32"; + } else if (dev_id == "GPU_FP32") { + device_id_ = "GPU"; + precision_ = "FP32"; + } else if (dev_id == "GPU_FP16") { + device_id_ = "GPU"; + precision_ = "FP16"; + } else if (dev_id == "MYRIAD_FP16") { + device_id_ = "MYRIAD"; + precision_ = "FP16"; + } else if (dev_id == "VAD-M_FP16") { + device_id_ = "VAD-M"; + precision_ = "FP16"; + } else { + ORT_THROW("Invalid device string: " + dev_id); + } + LOGS_DEFAULT(INFO) << "[OpenVINO-EP]" << "Choosing Device: " << device_id_ << " , Precision: " << precision_; } OpenVINOExecutionProviderInfo() { + OpenVINOExecutionProviderInfo(""); } }; diff --git a/onnxruntime/core/providers/openvino/openvino_provider_factory.cc b/onnxruntime/core/providers/openvino/openvino_provider_factory.cc index 1717e2e8f21bf..84ec83713e437 100644 --- a/onnxruntime/core/providers/openvino/openvino_provider_factory.cc +++ b/onnxruntime/core/providers/openvino/openvino_provider_factory.cc @@ -7,7 +7,12 @@ namespace onnxruntime { struct OpenVINOProviderFactory : IExecutionProviderFactory { - OpenVINOProviderFactory(const char* device) : device_(device) { + OpenVINOProviderFactory(const char* device) { + if(device == nullptr) { + device_ = ""; + } else { + device_ = device; + } } ~OpenVINOProviderFactory() override { } @@ -19,8 +24,7 @@ struct OpenVINOProviderFactory : IExecutionProviderFactory { }; std::unique_ptr OpenVINOProviderFactory::CreateProvider() { - OpenVINOExecutionProviderInfo info; - //info.device = device_; + OpenVINOExecutionProviderInfo info(device_); return std::make_unique(info); } diff --git a/onnxruntime/python/onnxruntime_pybind_state.cc b/onnxruntime/python/onnxruntime_pybind_state.cc index ff9219cde66a5..44612a08ad3a3 100644 --- a/onnxruntime/python/onnxruntime_pybind_state.cc +++ b/onnxruntime/python/onnxruntime_pybind_state.cc @@ -48,26 +48,27 @@ #define BACKEND_NGRAPH "" #endif -#if OPENVINO_CONFIG_CPU_FP32 -#define BACKEND_OPENVINO "-OPENVINO_CPU_FP32" +#ifdef USE_OPENVINO + #if OPENVINO_CONFIG_CPU_FP32 + #define BACKEND_OPENVINO "-OPENVINO_CPU_FP32" -#elif OPENVINO_CONFIG_GPU_FP32 -#define BACKEND_OPENVINO "-OPENVINO_GPU_FP32" + #elif OPENVINO_CONFIG_GPU_FP32 + #define BACKEND_OPENVINO "-OPENVINO_GPU_FP32" -#elif OPENVINO_CONFIG_GPU_FP16 -#define BACKEND_OPENVINO "-OPENVINO_GPU_FP16" + #elif OPENVINO_CONFIG_GPU_FP16 + #define BACKEND_OPENVINO "-OPENVINO_GPU_FP16" -#elif OPENVINO_CONFIG_MYRIAD -#define BACKEND_OPENVINO "-OPENVINO_MYRIAD" + #elif OPENVINO_CONFIG_MYRIAD + #define BACKEND_OPENVINO "-OPENVINO_MYRIAD" -#elif OPENVINO_CONFIG_VAD_M -#define BACKEND_OPENVINO "-OPENVINO_VAD_M" + #elif OPENVINO_CONFIG_VAD_M + #define BACKEND_OPENVINO "-OPENVINO_VAD_M" -#else -#define BACKEND_OPENVINO "" + #else + #define BACKEND_OPENVINO "" + #endif #endif - #ifdef USE_NUPHAR #define BACKEND_NUPHAR "-NUPHAR" #else @@ -100,6 +101,7 @@ #endif #ifdef USE_OPENVINO #include "core/providers/openvino/openvino_provider_factory.h" +std::string openvino_device; #endif #ifdef USE_NUPHAR #include "core/providers/nuphar/nuphar_provider_factory.h" @@ -323,9 +325,9 @@ void RegisterExecutionProviders(InferenceSession* sess, const std::vector std::string { + return openvino_device; + }, "" + ); +#endif + #ifdef onnxruntime_PYBIND_EXPORT_OPSCHEMA m.def( "get_all_operator_schema", []() -> const std::vector { @@ -415,7 +429,7 @@ void addGlobalMethods(py::module& m) { onnxruntime::CreateExecutionProviderFactory_NGraph("CPU"), #endif #ifdef USE_OPENVINO - onnxruntime::CreateExecutionProviderFactory_OpenVINO("CPU"), + onnxruntime::CreateExecutionProviderFactory_OpenVINO(openvino_device), #endif #ifdef USE_TENSORRT onnxruntime::CreateExecutionProviderFactory_Tensorrt(0) diff --git a/onnxruntime/test/onnx/main.cc b/onnxruntime/test/onnx/main.cc index 40bbce9812fba..1fe3a5fb32282 100644 --- a/onnxruntime/test/onnx/main.cc +++ b/onnxruntime/test/onnx/main.cc @@ -282,7 +282,7 @@ int real_main(int argc, char* argv[], Ort::Env& env) { if (enable_openvino) { #ifdef USE_OPENVINO sf.SetGraphOptimizationLevel(ORT_DISABLE_ALL); - Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_OpenVINO(sf, "CPU")); + Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_OpenVINO(sf, "")); #else fprintf(stderr, "OpenVINO is not supported in this build"); return -1; diff --git a/onnxruntime/test/perftest/ort_test_session.cc b/onnxruntime/test/perftest/ort_test_session.cc index e1ea85f482534..d718d241cee61 100644 --- a/onnxruntime/test/perftest/ort_test_session.cc +++ b/onnxruntime/test/perftest/ort_test_session.cc @@ -63,7 +63,7 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device #endif } else if (provider_name == onnxruntime::kOpenVINOExecutionProvider) { #ifdef USE_OPENVINO - Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_OpenVINO(session_options, "CPU")); + Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_OpenVINO(session_options, "")); #else ORT_THROW("OpenVINO is not supported in this build\n"); #endif diff --git a/server/environment.cc b/server/environment.cc index be12314722d6b..b2aa1dbb874fb 100644 --- a/server/environment.cc +++ b/server/environment.cc @@ -80,7 +80,7 @@ void ServerEnvironment::RegisterExecutionProviders(){ #endif #ifdef USE_OPENVINO - Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_OpenVINO(options_, "CPU")); + Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_OpenVINO(options_, "")); #endif