From 55d4facc1a9b9aa1c2694b044d3d20b2a90999a7 Mon Sep 17 00:00:00 2001 From: Srirammaswamy Date: Tue, 27 Aug 2024 14:07:38 +0530 Subject: [PATCH 1/5] Implements blob compatibility check for NPU * OVEP catches the NPU driver exception and return failure status * NPU to CPU fallback is disabled when inferencing with blob --- .../providers/openvino/backend_manager.cc | 3 +- .../openvino/openvino_execution_provider.cc | 78 ++++++++++--------- 2 files changed, 45 insertions(+), 36 deletions(-) diff --git a/onnxruntime/core/providers/openvino/backend_manager.cc b/onnxruntime/core/providers/openvino/backend_manager.cc index 6466eb8880b38..78f77f9b3dc48 100644 --- a/onnxruntime/core/providers/openvino/backend_manager.cc +++ b/onnxruntime/core/providers/openvino/backend_manager.cc @@ -111,7 +111,8 @@ BackendManager::BackendManager(const GlobalContext& global_context, ORT_THROW(ex.what()); #else if (device_type.find("NPU") != std::string::npos && - !GetGlobalContext().disable_cpu_fallback) { + !GetGlobalContext().disable_cpu_fallback && + !ep_ctx_handle_.IsValidOVEPCtxGraph()) { LOGS_DEFAULT(WARNING) << ex.what(); LOGS_DEFAULT(WARNING) << "Model compilation failed at OV NPU." << "Falling back to OV CPU for execution"; diff --git a/onnxruntime/core/providers/openvino/openvino_execution_provider.cc b/onnxruntime/core/providers/openvino/openvino_execution_provider.cc index 29c45916795d3..b4269b8262b1f 100644 --- a/onnxruntime/core/providers/openvino/openvino_execution_provider.cc +++ b/onnxruntime/core/providers/openvino/openvino_execution_provider.cc @@ -139,42 +139,50 @@ common::Status OpenVINOExecutionProvider::Compile( // During backend creation, we check if user wants to use precompiled blob onnx model or the original model // For precompiled blob, directly load the model instead of compiling the model // For original model, check if the user wants to export a model with pre-compiled blob - - std::shared_ptr backend_manager = - std::make_shared(*global_context_, - fused_node, - graph_body_viewer, - *GetLogger(), - ep_ctx_handle_); - - compute_info.create_state_func = - [backend_manager](ComputeContext* context, FunctionState* state) { - OpenVINOEPFunctionState* p = new OpenVINOEPFunctionState(); - p->allocate_func = context->allocate_func; - p->destroy_func = context->release_func; - p->allocator_handle = context->allocator_handle; - p->backend_manager = backend_manager; - *state = static_cast(p); - return 0; - }; - compute_info.compute_func = [](FunctionState state, const OrtApi* /* api */, OrtKernelContext* context) { - auto function_state = static_cast(state); - try { - function_state->backend_manager->Compute(context); - } catch (const std::exception& ex) { - return common::Status(common::ONNXRUNTIME, common::FAIL, ex.what()); + try { + std::shared_ptr backend_manager = + std::make_shared(*global_context_, + fused_node, + graph_body_viewer, + *GetLogger(), + ep_ctx_handle_); + compute_info.create_state_func = + [backend_manager](ComputeContext* context, FunctionState* state) { + OpenVINOEPFunctionState* p = new OpenVINOEPFunctionState(); + p->allocate_func = context->allocate_func; + p->destroy_func = context->release_func; + p->allocator_handle = context->allocator_handle; + p->backend_manager = backend_manager; + *state = static_cast(p); + return 0; + }; + compute_info.compute_func = [](FunctionState state, const OrtApi* /* api */, OrtKernelContext* context) { + auto function_state = static_cast(state); + try { + function_state->backend_manager->Compute(context); + } catch (const std::exception& ex) { + return common::Status(common::ONNXRUNTIME, common::FAIL, ex.what()); + } + return Status::OK(); + }; + + compute_info.release_state_func = + [](FunctionState state) { + if (state) { + OpenVINOEPFunctionState* function_state = static_cast(state); + delete function_state; + } + }; + node_compute_funcs.push_back(compute_info); + } catch (const OnnxRuntimeException& ex) { + std::string exception_str = ex.what(); + if (exception_str.find("ZE_RESULT_ERROR_UNKNOWN") != std::string::npos || + exception_str.find("ZE_RESULT_ERROR_UNINITIALIZED") != std::string::npos) { + return Status(common::ONNXRUNTIME, common::EP_FAIL, "Model needs to be recompiled"); + } else { + ORT_THROW(exception_str); } - return Status::OK(); - }; - - compute_info.release_state_func = - [](FunctionState state) { - if (state) { - OpenVINOEPFunctionState* function_state = static_cast(state); - delete function_state; - } - }; - node_compute_funcs.push_back(compute_info); + } } return Status::OK(); From e4ff4babb558e75a1fb7832c68b24375b1eada17 Mon Sep 17 00:00:00 2001 From: Srirammaswamy Date: Fri, 30 Aug 2024 12:07:00 +0530 Subject: [PATCH 2/5] Update NPU device exception handling approach * Changes failure status code to exception (std::runtime_error) * Capture all NPU related errors * Throw minimal error message with error type and error code for Release builds --- .../providers/openvino/backend_manager.cc | 24 +++++- .../openvino/openvino_execution_provider.cc | 78 +++++++++---------- 2 files changed, 57 insertions(+), 45 deletions(-) diff --git a/onnxruntime/core/providers/openvino/backend_manager.cc b/onnxruntime/core/providers/openvino/backend_manager.cc index 78f77f9b3dc48..fc4e5eb9a5283 100644 --- a/onnxruntime/core/providers/openvino/backend_manager.cc +++ b/onnxruntime/core/providers/openvino/backend_manager.cc @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -107,13 +108,14 @@ BackendManager::BackendManager(const GlobalContext& global_context, subgraph_context_, ep_ctx_handle_); } catch (const OnnxRuntimeException& ex) { + std::string exception_str = ex.what(); #if defined(OPENVINO_DISABLE_NPU_FALLBACK) - ORT_THROW(ex.what()); + ORT_THROW(exception_str); #else if (device_type.find("NPU") != std::string::npos && !GetGlobalContext().disable_cpu_fallback && !ep_ctx_handle_.IsValidOVEPCtxGraph()) { - LOGS_DEFAULT(WARNING) << ex.what(); + LOGS_DEFAULT(WARNING) << exception_str; LOGS_DEFAULT(WARNING) << "Model compilation failed at OV NPU." << "Falling back to OV CPU for execution"; GetGlobalContext().device_type = "CPU"; @@ -126,6 +128,24 @@ BackendManager::BackendManager(const GlobalContext& global_context, } catch (std::string const& msg) { ORT_THROW(msg); } + } else if (device_type.find("NPU") != std::string::npos && + exception_str.find("intel_npu") != std::string::npos) { + // Handle NPU device related errors +#ifndef NDEBUG + ORT_THROW(exception_str + "\nModel needs to be recompiled\n"); +#endif + std::string error_message = "UNKNOWN NPU ERROR"; + std::string error_code = "code 0x0"; + std::regex error_message_pattern(R"(\bZE_\w*\b)"); + std::regex error_code_pattern("code 0x[0-9a-fA-F]+"); + std::smatch matches; + if (std::regex_search(exception_str, matches, error_message_pattern)) { + error_message = matches[0]; + } + if (std::regex_search(exception_str, matches, error_code_pattern)) { + error_code = matches[0]; + } + throw std::runtime_error(error_message + ", " + error_code + "\nModel needs to be recompiled\n"); } else { ORT_THROW(ex.what()); } diff --git a/onnxruntime/core/providers/openvino/openvino_execution_provider.cc b/onnxruntime/core/providers/openvino/openvino_execution_provider.cc index b4269b8262b1f..29c45916795d3 100644 --- a/onnxruntime/core/providers/openvino/openvino_execution_provider.cc +++ b/onnxruntime/core/providers/openvino/openvino_execution_provider.cc @@ -139,50 +139,42 @@ common::Status OpenVINOExecutionProvider::Compile( // During backend creation, we check if user wants to use precompiled blob onnx model or the original model // For precompiled blob, directly load the model instead of compiling the model // For original model, check if the user wants to export a model with pre-compiled blob - try { - std::shared_ptr backend_manager = - std::make_shared(*global_context_, - fused_node, - graph_body_viewer, - *GetLogger(), - ep_ctx_handle_); - compute_info.create_state_func = - [backend_manager](ComputeContext* context, FunctionState* state) { - OpenVINOEPFunctionState* p = new OpenVINOEPFunctionState(); - p->allocate_func = context->allocate_func; - p->destroy_func = context->release_func; - p->allocator_handle = context->allocator_handle; - p->backend_manager = backend_manager; - *state = static_cast(p); - return 0; - }; - compute_info.compute_func = [](FunctionState state, const OrtApi* /* api */, OrtKernelContext* context) { - auto function_state = static_cast(state); - try { - function_state->backend_manager->Compute(context); - } catch (const std::exception& ex) { - return common::Status(common::ONNXRUNTIME, common::FAIL, ex.what()); - } - return Status::OK(); - }; - - compute_info.release_state_func = - [](FunctionState state) { - if (state) { - OpenVINOEPFunctionState* function_state = static_cast(state); - delete function_state; - } - }; - node_compute_funcs.push_back(compute_info); - } catch (const OnnxRuntimeException& ex) { - std::string exception_str = ex.what(); - if (exception_str.find("ZE_RESULT_ERROR_UNKNOWN") != std::string::npos || - exception_str.find("ZE_RESULT_ERROR_UNINITIALIZED") != std::string::npos) { - return Status(common::ONNXRUNTIME, common::EP_FAIL, "Model needs to be recompiled"); - } else { - ORT_THROW(exception_str); + + std::shared_ptr backend_manager = + std::make_shared(*global_context_, + fused_node, + graph_body_viewer, + *GetLogger(), + ep_ctx_handle_); + + compute_info.create_state_func = + [backend_manager](ComputeContext* context, FunctionState* state) { + OpenVINOEPFunctionState* p = new OpenVINOEPFunctionState(); + p->allocate_func = context->allocate_func; + p->destroy_func = context->release_func; + p->allocator_handle = context->allocator_handle; + p->backend_manager = backend_manager; + *state = static_cast(p); + return 0; + }; + compute_info.compute_func = [](FunctionState state, const OrtApi* /* api */, OrtKernelContext* context) { + auto function_state = static_cast(state); + try { + function_state->backend_manager->Compute(context); + } catch (const std::exception& ex) { + return common::Status(common::ONNXRUNTIME, common::FAIL, ex.what()); } - } + return Status::OK(); + }; + + compute_info.release_state_func = + [](FunctionState state) { + if (state) { + OpenVINOEPFunctionState* function_state = static_cast(state); + delete function_state; + } + }; + node_compute_funcs.push_back(compute_info); } return Status::OK(); From b5400cbab5a00408a7dc6d5ebf8f8e088f39bac5 Mon Sep 17 00:00:00 2001 From: "S, Srirammaswamy" Date: Mon, 2 Sep 2024 17:13:47 +0530 Subject: [PATCH 3/5] Fix lint issues --- .../providers/openvino/backend_manager.cc | 32 +++++++++---------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/onnxruntime/core/providers/openvino/backend_manager.cc b/onnxruntime/core/providers/openvino/backend_manager.cc index fc4e5eb9a5283..b86444927740a 100644 --- a/onnxruntime/core/providers/openvino/backend_manager.cc +++ b/onnxruntime/core/providers/openvino/backend_manager.cc @@ -75,24 +75,24 @@ BackendManager::BackendManager(const GlobalContext& global_context, "QDQ stripping should not be enabled for models with dynamic input shapes. " "Set enable_qdq_optimizer to False"); if ((GetGlobalContext().device_type.find("CPU") != std::string::npos || - GetGlobalContext().device_type.find("GPU") != std::string::npos) && + GetGlobalContext().device_type.find("GPU") != std::string::npos) && !GetGlobalContext().disable_dynamic_shapes) { - LOGS_DEFAULT(INFO) << "[OpenVINO-EP] Starting backend initialization. " - << "Creating backend Dynamic Shapes"; - try { - concrete_backend_ = BackendFactory::MakeBackend(model_proto, - GetGlobalContext(), - subgraph_context_, - ep_ctx_handle_); - } catch (std::string const& msg) { - ORT_THROW(msg); - } - LOGS_DEFAULT(INFO) << "[OpenVINO-EP] " - << "Backend created for graph " << subgraph_context_.subgraph_name; + LOGS_DEFAULT(INFO) << "[OpenVINO-EP] Starting backend initialization. " + << "Creating backend Dynamic Shapes"; + try { + concrete_backend_ = BackendFactory::MakeBackend(model_proto, + GetGlobalContext(), + subgraph_context_, + ep_ctx_handle_); + } catch (std::string const& msg) { + ORT_THROW(msg); + } + LOGS_DEFAULT(INFO) << "[OpenVINO-EP] " + << "Backend created for graph " << subgraph_context_.subgraph_name; } else { - // Only cache model_proto in global to rewrite the model with input shapes at runtime. - // For dynamic backend creation - model_proto_ = std::move(model_proto); + // Only cache model_proto in global to rewrite the model with input shapes at runtime. + // For dynamic backend creation + model_proto_ = std::move(model_proto); } } else { LOGS_DEFAULT(INFO) << "[OpenVINO-EP] Model has concrete input dims. " From 574e82a9143e178fb4e094fec6b0c62b27e368ef Mon Sep 17 00:00:00 2001 From: "S, Srirammaswamy" Date: Tue, 3 Sep 2024 14:49:48 +0530 Subject: [PATCH 4/5] Address review comments --- .../core/providers/openvino/backend_manager.cc | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/onnxruntime/core/providers/openvino/backend_manager.cc b/onnxruntime/core/providers/openvino/backend_manager.cc index b86444927740a..a4e1a9367be80 100644 --- a/onnxruntime/core/providers/openvino/backend_manager.cc +++ b/onnxruntime/core/providers/openvino/backend_manager.cc @@ -109,12 +109,12 @@ BackendManager::BackendManager(const GlobalContext& global_context, ep_ctx_handle_); } catch (const OnnxRuntimeException& ex) { std::string exception_str = ex.what(); + bool enable_cpu_fallback = !GetGlobalContext().disable_cpu_fallback; #if defined(OPENVINO_DISABLE_NPU_FALLBACK) - ORT_THROW(exception_str); -#else + enable_cpu_fallback = false; +#endif if (device_type.find("NPU") != std::string::npos && - !GetGlobalContext().disable_cpu_fallback && - !ep_ctx_handle_.IsValidOVEPCtxGraph()) { + enable_cpu_fallback && !ep_ctx_handle_.IsValidOVEPCtxGraph()) { LOGS_DEFAULT(WARNING) << exception_str; LOGS_DEFAULT(WARNING) << "Model compilation failed at OV NPU." << "Falling back to OV CPU for execution"; @@ -147,9 +147,8 @@ BackendManager::BackendManager(const GlobalContext& global_context, } throw std::runtime_error(error_message + ", " + error_code + "\nModel needs to be recompiled\n"); } else { - ORT_THROW(ex.what()); + ORT_THROW(exception_str); } -#endif } } if (global_context_.export_ep_ctx_blob && !ep_ctx_handle_.IsValidOVEPCtxGraph()) { From 4203dcb1135f90a8a4629a9afca14fab28eaa3c2 Mon Sep 17 00:00:00 2001 From: "S, Srirammaswamy" Date: Tue, 3 Sep 2024 16:46:45 +0530 Subject: [PATCH 5/5] Address review comments --- .../providers/openvino/backend_manager.cc | 52 +++++++++++-------- 1 file changed, 29 insertions(+), 23 deletions(-) diff --git a/onnxruntime/core/providers/openvino/backend_manager.cc b/onnxruntime/core/providers/openvino/backend_manager.cc index a4e1a9367be80..4fca4037301fb 100644 --- a/onnxruntime/core/providers/openvino/backend_manager.cc +++ b/onnxruntime/core/providers/openvino/backend_manager.cc @@ -109,13 +109,14 @@ BackendManager::BackendManager(const GlobalContext& global_context, ep_ctx_handle_); } catch (const OnnxRuntimeException& ex) { std::string exception_str = ex.what(); - bool enable_cpu_fallback = !GetGlobalContext().disable_cpu_fallback; + bool eligible_for_cpu_fallback = device_type.find("NPU") != std::string::npos && + !GetGlobalContext().disable_cpu_fallback && + !ep_ctx_handle_.IsValidOVEPCtxGraph(); #if defined(OPENVINO_DISABLE_NPU_FALLBACK) - enable_cpu_fallback = false; -#endif - if (device_type.find("NPU") != std::string::npos && - enable_cpu_fallback && !ep_ctx_handle_.IsValidOVEPCtxGraph()) { - LOGS_DEFAULT(WARNING) << exception_str; + eligible_for_cpu_fallback = false; +#else + if (eligible_for_cpu_fallback) { + LOGS_DEFAULT(VERBOSE) << exception_str; LOGS_DEFAULT(WARNING) << "Model compilation failed at OV NPU." << "Falling back to OV CPU for execution"; GetGlobalContext().device_type = "CPU"; @@ -128,26 +129,31 @@ BackendManager::BackendManager(const GlobalContext& global_context, } catch (std::string const& msg) { ORT_THROW(msg); } - } else if (device_type.find("NPU") != std::string::npos && - exception_str.find("intel_npu") != std::string::npos) { - // Handle NPU device related errors + } +#endif + if (!eligible_for_cpu_fallback) { + if (device_type.find("NPU") != std::string::npos && + exception_str.find("intel_npu") != std::string::npos) { + // Handle NPU device related errors #ifndef NDEBUG - ORT_THROW(exception_str + "\nModel needs to be recompiled\n"); + ORT_THROW(exception_str + "\nModel needs to be recompiled\n"); +#else + std::string error_message = "UNKNOWN NPU ERROR"; + std::string error_code = "code 0x0"; + std::regex error_message_pattern(R"(\bZE_\w*\b)"); + std::regex error_code_pattern("code 0x[0-9a-fA-F]+"); + std::smatch matches; + if (std::regex_search(exception_str, matches, error_message_pattern)) { + error_message = matches[0]; + } + if (std::regex_search(exception_str, matches, error_code_pattern)) { + error_code = matches[0]; + } + throw std::runtime_error(error_message + ", " + error_code + "\nModel needs to be recompiled\n"); #endif - std::string error_message = "UNKNOWN NPU ERROR"; - std::string error_code = "code 0x0"; - std::regex error_message_pattern(R"(\bZE_\w*\b)"); - std::regex error_code_pattern("code 0x[0-9a-fA-F]+"); - std::smatch matches; - if (std::regex_search(exception_str, matches, error_message_pattern)) { - error_message = matches[0]; - } - if (std::regex_search(exception_str, matches, error_code_pattern)) { - error_code = matches[0]; + } else { + ORT_THROW(exception_str); } - throw std::runtime_error(error_message + ", " + error_code + "\nModel needs to be recompiled\n"); - } else { - ORT_THROW(exception_str); } } }