From 11e7ee50bacb51d37307b91917d07294a5e24db6 Mon Sep 17 00:00:00 2001 From: ynimmaga Date: Tue, 5 Nov 2024 20:46:36 -0800 Subject: [PATCH 001/188] First commit for openvino backend --- backends/openvino/CMakeLists.txt | 52 +++++++ backends/openvino/__init__.py | 4 + backends/openvino/partitioner.py | 112 +++++++++++++++ backends/openvino/preprocess.py | 45 ++++++ backends/openvino/requirements.txt | 8 ++ backends/openvino/runtime/OpenvinoBackend.cpp | 134 ++++++++++++++++++ 6 files changed, 355 insertions(+) create mode 100644 backends/openvino/CMakeLists.txt create mode 100644 backends/openvino/__init__.py create mode 100644 backends/openvino/partitioner.py create mode 100644 backends/openvino/preprocess.py create mode 100644 backends/openvino/requirements.txt create mode 100644 backends/openvino/runtime/OpenvinoBackend.cpp diff --git a/backends/openvino/CMakeLists.txt b/backends/openvino/CMakeLists.txt new file mode 100644 index 00000000000..9bb67fc97eb --- /dev/null +++ b/backends/openvino/CMakeLists.txt @@ -0,0 +1,52 @@ +cmake_minimum_required(VERSION 3.19) +project(openvino_backend) + +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED ON) + +set(CMAKE_EXPORT_COMPILE_COMMANDS ON) + +# Source root directory for executorch. +if(NOT EXECUTORCH_ROOT) + set(EXECUTORCH_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../..) +endif() + +include(${EXECUTORCH_ROOT}/build/Utils.cmake) + +set(_common_include_directories ${EXECUTORCH_ROOT}/..) + +# Set openvino directory from environment +set(OPENVINO_DIR "$ENV{INTEL_OPENVINO_DIR}") +set(OPENVINO_INCLUDE_DIRS ${OPENVINO_DIR}/deployment_tools/inference_engine/include ${OPENVINO_DIR}/runtime/include) +message("${OPENVINO_DIR}/runtime/include/openvino") + +# Define the source files for the OpenVINO backend +set(_openvino_backend_sources backends/openvino/runtime/OpenvinoBackend.cpp) + +list(TRANSFORM _openvino_backend_sources PREPEND "${EXECUTORCH_ROOT}/") + +# Add the OpenVINO backend library +add_library(openvino_backend STATIC ${_openvino_backend_sources}) + +# Include directories for ExecuteTorch and OpenVINO +target_include_directories( + openvino_backend PUBLIC ${_common_include_directories} +) + +target_include_directories( + openvino_backend PUBLIC ${OPENVINO_INCLUDE_DIRS} +) + +set(OPENVINO_LIB_PATH ${OPENVINO_DIR}/runtime/lib/intel64) +set(OPENVINO_LIBS + ${OPENVINO_LIB_PATH}/libopenvino.so + ${OPENVINO_LIB_PATH}/libopenvino_ir_frontend.so.2430 + ${OPENVINO_LIB_PATH}/libopenvino_c.so + ${OPENVINO_LIB_PATH}/libopenvino_intel_cpu_plugin.so + ${OPENVINO_LIB_PATH}/libopenvino_intel_gpu_plugin.so + ${OPENVINO_LIB_PATH}/libopenvino_auto_plugin.so +) + +# Link the OpenVINO library to the backend +target_link_libraries(openvino_backend PRIVATE OPENVINO_LIBS) + diff --git a/backends/openvino/__init__.py b/backends/openvino/__init__.py new file mode 100644 index 00000000000..dac275d3f12 --- /dev/null +++ b/backends/openvino/__init__.py @@ -0,0 +1,4 @@ +from .partitioner import OpenvinoPartitioner +from .preprocess import OpenvinoBackend + +__all__ = [OpenvinoBackend, OpenvinoPartitioner] diff --git a/backends/openvino/partitioner.py b/backends/openvino/partitioner.py new file mode 100644 index 00000000000..2fa20bd8831 --- /dev/null +++ b/backends/openvino/partitioner.py @@ -0,0 +1,112 @@ +# Copyright (c) 2024 MediaTek Inc. +# +# Licensed under the BSD License (the "License"); you may not use this file +# except in compliance with the License. See the license file in the root +# directory of this source tree for more details. + +from typing import Callable, final, List, Optional, Tuple + +import torch +from executorch.backends.openvino.preprocess import OpenvinoBackend +from executorch.exir.backend.backend_details import CompileSpec +from executorch.exir.backend.partitioner import ( + DelegationSpec, + Partitioner, + PartitionResult, +) +from executorch.exir.backend.utils import tag_constant_data + +from torch.export.exported_program import ExportedProgram +from torch.fx.passes.infra.partitioner import CapabilityBasedPartitioner +from torch.fx.passes.operator_support import OperatorSupportBase +import torch.fx as fx +from openvino.frontend.pytorch.torchdynamo.op_support import OperatorSupport + +class OpenvinoOperatorsSupport(OperatorSupportBase): + + def __init__( + self, + op_types_to_skip: Optional[set] = None, + op_names_to_skip: Optional[set] = None, + ) -> None: + if op_types_to_skip is None: + op_types_to_skip = set() + if op_names_to_skip is None: + op_names_to_skip = set() + + self._op_types_to_skip = op_types_to_skip + self._op_names_to_skip = op_names_to_skip + + def is_node_supported(self, _, node: torch.fx.Node) -> bool: + if node.op != "call_function": + return False + + options = [] + op_type = node.target.__name__ + supported_ops = OperatorSupport(options)._support_dict + if (op_type == "getitem"): + return True + + if ("torch.ops." + str(op_type) in supported_ops): + return True + else: + print("Op not supported: ", "torch.ops." + str(op_type)) + + if op_type in self._op_types_to_skip or node.name in self._op_names_to_skip: + print( + f"[OpenVINO Backend] The {op_type} operator with name '{node.name}' is skipped." + ) + return False + + return False + + +@final +class OpenvinoPartitioner(Partitioner): + + def __init__( + self, + compile_spec: List[CompileSpec], + op_types_to_skip: Optional[set] = None, + op_names_to_skip: Optional[set] = None, + ) -> None: + self.delegation_spec = DelegationSpec(OpenvinoBackend.__name__, compile_spec) + self._op_types_to_skip = op_types_to_skip + self._op_names_to_skip = op_names_to_skip + + def ops_to_not_decompose( + self, + ep: ExportedProgram, + ) -> Tuple[List[torch._ops.OpOverload], Optional[Callable[[torch.fx.Node], bool]]]: + ops_not_decompose = [ + torch.ops.aten.pixel_shuffle.default, + torch.ops.aten.upsample_bilinear2d.default, + torch.ops.aten.upsample_bilinear2d.vec, + torch.ops.aten.upsample_nearest2d.default, + torch.ops.aten.upsample_nearest2d.vec, + ] + return (ops_not_decompose, None) + + def partition(self, exported_program: ExportedProgram) -> PartitionResult: + options = {} + gm = fx.symbolic_trace(exported_program.graph_module) + + partitioner = CapabilityBasedPartitioner( + exported_program.graph_module, + OpenvinoOperatorsSupport(self._op_types_to_skip, self._op_names_to_skip), + allows_single_node_partition=True + ) + partition_list = partitioner.propose_partitions() + + partition_tags = {} + for partition in partition_list: + for node in partition.nodes: + tag = f"tag{partition.id}" + node.meta["delegation_tag"] = tag + partition_tags[tag] = self.delegation_spec + + tag_constant_data(exported_program) + + return PartitionResult( + tagged_exported_program=exported_program, partition_tags=partition_tags + ) diff --git a/backends/openvino/preprocess.py b/backends/openvino/preprocess.py new file mode 100644 index 00000000000..bfb38474797 --- /dev/null +++ b/backends/openvino/preprocess.py @@ -0,0 +1,45 @@ +# Copyright (c) 2024 MediaTek Inc. +# +# Licensed under the BSD License (the "License"); you may not use this file +# except in compliance with the License. See the license file in the root +# directory of this source tree for more details. + +import contextlib +import struct + +from typing import final, List, cast + +import torch +from executorch.exir.backend.backend_details import ( + BackendDetails, + ExportedProgram, + PreprocessResult, +) +from executorch.exir.backend.compile_spec_schema import CompileSpec +from openvino.frontend.pytorch.torchdynamo.compile import openvino_compile + +SKIP_COMPILE_SPEC_KEYS = {"ImportForever"} + + +@final +class OpenvinoBackend(BackendDetails): + + @classmethod + def preprocess( + cls, edge_program: ExportedProgram, module_compile_spec: List[CompileSpec] + ) -> PreprocessResult: + name_to_node_mappings = {node.name: node for node in edge_program.graph.nodes} + input_names = edge_program.graph_signature.user_inputs + output_names = edge_program.graph_signature.user_outputs + args = [] + for node in edge_program.graph.nodes: + if (node.target in input_names): + args.append( node.meta["val"]) + + input_shapes = [] + output_shapes = [] + + compiled = openvino_compile(edge_program.module(), *args) + model_bytes = compiled.export_model() + + return PreprocessResult(processed_bytes=model_bytes) diff --git a/backends/openvino/requirements.txt b/backends/openvino/requirements.txt new file mode 100644 index 00000000000..7c3de886e27 --- /dev/null +++ b/backends/openvino/requirements.txt @@ -0,0 +1,8 @@ +datasets +huggingface-hub +safetensors +sentencepiece +tokenizers +transformers +piq +pillow diff --git a/backends/openvino/runtime/OpenvinoBackend.cpp b/backends/openvino/runtime/OpenvinoBackend.cpp new file mode 100644 index 00000000000..491335b43b4 --- /dev/null +++ b/backends/openvino/runtime/OpenvinoBackend.cpp @@ -0,0 +1,134 @@ +#include +#include + +#include + +#include +#include +#include +#include +#include + +using namespace std; +using executorch::aten::ScalarType; +using executorch::runtime::ArrayRef; +using executorch::runtime::Backend; +using executorch::runtime::BackendExecutionContext; +using executorch::runtime::BackendInitContext; +using executorch::runtime::CompileSpec; +using executorch::runtime::DelegateHandle; +using executorch::runtime::Error; +using executorch::runtime::EValue; +using executorch::runtime::FreeableBuffer; +using executorch::runtime::MemoryAllocator; +using executorch::runtime::Result; + +namespace executorch { +namespace backends { +namespace openvino { + +typedef struct { + std::shared_ptr compiled_model; + std::shared_ptr infer_request; +} ExecutionHandle; + +class OpenvinoBackend final : public ::executorch::runtime::BackendInterface { + public: + OpenvinoBackend() {} + + ~OpenvinoBackend() = default; + + virtual bool is_available() const override { + // Check if OpenVINO runtime is available + return true; + } + + Result init( + BackendInitContext& context, + FreeableBuffer* processed, + ArrayRef compile_specs) const override { + ET_LOG(Info, "OpenvinoBackend::init %p", processed->data()); + + ov::Core core; + const char* data_ptr = static_cast(processed->data()); + size_t data_size = processed->size(); // Use appropriate size function here + + // Copy data to a string or vector + std::string data_string(data_ptr, data_size); + + // Wrap the data in a stream + std::istringstream compiled_stream(data_string); + + auto compiled_model = core.import_model(compiled_stream, "CPU"); //target_device); + + // Allocate an infer request + std::shared_ptr infer_request = std::make_shared(compiled_model.create_infer_request()); + + // Allocate execution handle + MemoryAllocator* allocator = context.get_runtime_allocator(); + ExecutionHandle* handle = ET_ALLOCATE_INSTANCE_OR_RETURN_ERROR(allocator, ExecutionHandle); + handle->compiled_model = std::make_shared(compiled_model); //compiled_model; + handle->infer_request = infer_request; + + return handle; + } + + Error execute( + BackendExecutionContext& context, + DelegateHandle* input_handle, + EValue** args) const override { + ExecutionHandle* execution_handle = (ExecutionHandle*)input_handle; + + auto infer_request = execution_handle->infer_request; + + // Assume first argument is the input tensor + auto input_tensor = args[0]->toTensor(); + ov::Shape input_shape(input_tensor.sizes().begin(), input_tensor.sizes().end()); + + // Convert input tensor to OpenVINO tensor + ov::element::Type ov_type = convert_to_openvino_type(input_tensor.scalar_type()); + ov::Tensor ov_input_tensor(ov_type, input_shape, input_tensor.mutable_data_ptr()); + + infer_request->set_tensor("input", ov_input_tensor); + + // Execute the inference + infer_request->infer(); + + // Retrieve and copy output + auto output_tensor = args[1]->toTensor(); // Assume second argument is the output + ov::Tensor ov_output_tensor = infer_request->get_tensor("output"); + + std::memcpy(output_tensor.mutable_data_ptr(), ov_output_tensor.data(), ov_output_tensor.get_byte_size()); + + return Error::Ok; + } + + void destroy(DelegateHandle* handle) const override { + return; + } + + private: + ov::element::Type convert_to_openvino_type(ScalarType scalar_type) const { + // Convert ExecuteTorch scalar types to OpenVINO element types + switch (scalar_type) { + case ScalarType::Float: + return ov::element::f32; + case ScalarType::Int: + return ov::element::i32; + case ScalarType::Char: + return ov::element::i8; + default: + throw std::runtime_error("Unsupported scalar type"); + } + } +}; + +namespace { +auto backend = OpenvinoBackend(); +Backend backend_id{"OpenvinoBackend", &backend}; +static auto registered = register_backend(backend_id); +} // namespace + +} // namespace openvino +} // namespace backends +} // namespace executorch From be927aa4160a8893341d0ea2c31cc6d5b2e81213 Mon Sep 17 00:00:00 2001 From: ynimmaga Date: Mon, 11 Nov 2024 21:38:06 -0800 Subject: [PATCH 002/188] Added example for openvino backend --- backends/openvino/CMakeLists.txt | 30 ++-- backends/openvino/runtime/OpenvinoBackend.cpp | 29 ++-- examples/openvino/CMakeLists.txt | 83 ++++++++++ .../openvino_executor_runner.cpp | 145 ++++++++++++++++++ .../executor_runner/ov_executor_runner.cpp | 120 +++++++++++++++ examples/openvino/openvino_build.sh | 53 +++++++ 6 files changed, 436 insertions(+), 24 deletions(-) create mode 100644 examples/openvino/CMakeLists.txt create mode 100644 examples/openvino/executor_runner/openvino_executor_runner.cpp create mode 100644 examples/openvino/executor_runner/ov_executor_runner.cpp create mode 100755 examples/openvino/openvino_build.sh diff --git a/backends/openvino/CMakeLists.txt b/backends/openvino/CMakeLists.txt index 9bb67fc97eb..e6be4f14d79 100644 --- a/backends/openvino/CMakeLists.txt +++ b/backends/openvino/CMakeLists.txt @@ -1,11 +1,12 @@ -cmake_minimum_required(VERSION 3.19) -project(openvino_backend) - set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_EXPORT_COMPILE_COMMANDS ON) +set(_common_include_directories ${CMAKE_CURRENT_SOURCE_DIR}/../../..) + +include_directories(BEFORE ${_common_include_directories}) + # Source root directory for executorch. if(NOT EXECUTORCH_ROOT) set(EXECUTORCH_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../..) @@ -18,15 +19,10 @@ set(_common_include_directories ${EXECUTORCH_ROOT}/..) # Set openvino directory from environment set(OPENVINO_DIR "$ENV{INTEL_OPENVINO_DIR}") set(OPENVINO_INCLUDE_DIRS ${OPENVINO_DIR}/deployment_tools/inference_engine/include ${OPENVINO_DIR}/runtime/include) -message("${OPENVINO_DIR}/runtime/include/openvino") - -# Define the source files for the OpenVINO backend -set(_openvino_backend_sources backends/openvino/runtime/OpenvinoBackend.cpp) - -list(TRANSFORM _openvino_backend_sources PREPEND "${EXECUTORCH_ROOT}/") # Add the OpenVINO backend library -add_library(openvino_backend STATIC ${_openvino_backend_sources}) +add_library(openvino_backend SHARED) +target_compile_options(openvino_backend PRIVATE "-frtti" "-fexceptions") # Include directories for ExecuteTorch and OpenVINO target_include_directories( @@ -40,7 +36,7 @@ target_include_directories( set(OPENVINO_LIB_PATH ${OPENVINO_DIR}/runtime/lib/intel64) set(OPENVINO_LIBS ${OPENVINO_LIB_PATH}/libopenvino.so - ${OPENVINO_LIB_PATH}/libopenvino_ir_frontend.so.2430 + ${OPENVINO_LIB_PATH}/libopenvino_ir_frontend.so.2450 ${OPENVINO_LIB_PATH}/libopenvino_c.so ${OPENVINO_LIB_PATH}/libopenvino_intel_cpu_plugin.so ${OPENVINO_LIB_PATH}/libopenvino_intel_gpu_plugin.so @@ -48,5 +44,15 @@ set(OPENVINO_LIBS ) # Link the OpenVINO library to the backend -target_link_libraries(openvino_backend PRIVATE OPENVINO_LIBS) +target_link_libraries(openvino_backend PRIVATE ${OPENVINO_LIBS} executorch_core) + +target_sources( + openvino_backend + PRIVATE ${CMAKE_CURRENT_LIST_DIR}/runtime/OpenvinoBackend.cpp +) + +target_link_options_shared_lib(openvino_backend) +install(TARGETS openvino_backend DESTINATION lib) + + diff --git a/backends/openvino/runtime/OpenvinoBackend.cpp b/backends/openvino/runtime/OpenvinoBackend.cpp index 491335b43b4..baf2915e59d 100644 --- a/backends/openvino/runtime/OpenvinoBackend.cpp +++ b/backends/openvino/runtime/OpenvinoBackend.cpp @@ -1,5 +1,6 @@ #include #include +#include #include @@ -34,7 +35,7 @@ typedef struct { class OpenvinoBackend final : public ::executorch::runtime::BackendInterface { public: - OpenvinoBackend() {} + OpenvinoBackend() {std::cout << "In OV Backend constructor" << std::endl;} ~OpenvinoBackend() = default; @@ -50,8 +51,9 @@ class OpenvinoBackend final : public ::executorch::runtime::BackendInterface { ET_LOG(Info, "OpenvinoBackend::init %p", processed->data()); ov::Core core; + const char* data_ptr = static_cast(processed->data()); - size_t data_size = processed->size(); // Use appropriate size function here + size_t data_size = processed->size(); // Copy data to a string or vector std::string data_string(data_ptr, data_size); @@ -59,7 +61,7 @@ class OpenvinoBackend final : public ::executorch::runtime::BackendInterface { // Wrap the data in a stream std::istringstream compiled_stream(data_string); - auto compiled_model = core.import_model(compiled_stream, "CPU"); //target_device); + auto compiled_model = core.import_model(compiled_stream, "CPU"); // Allocate an infer request std::shared_ptr infer_request = std::make_shared(compiled_model.create_infer_request()); @@ -67,7 +69,7 @@ class OpenvinoBackend final : public ::executorch::runtime::BackendInterface { // Allocate execution handle MemoryAllocator* allocator = context.get_runtime_allocator(); ExecutionHandle* handle = ET_ALLOCATE_INSTANCE_OR_RETURN_ERROR(allocator, ExecutionHandle); - handle->compiled_model = std::make_shared(compiled_model); //compiled_model; + handle->compiled_model = std::make_shared(compiled_model); handle->infer_request = infer_request; return handle; @@ -89,14 +91,15 @@ class OpenvinoBackend final : public ::executorch::runtime::BackendInterface { ov::element::Type ov_type = convert_to_openvino_type(input_tensor.scalar_type()); ov::Tensor ov_input_tensor(ov_type, input_shape, input_tensor.mutable_data_ptr()); - infer_request->set_tensor("input", ov_input_tensor); + //infer_request->set_tensor("input", ov_input_tensor); + infer_request->set_input_tensor(0, ov_input_tensor); // Execute the inference infer_request->infer(); // Retrieve and copy output auto output_tensor = args[1]->toTensor(); // Assume second argument is the output - ov::Tensor ov_output_tensor = infer_request->get_tensor("output"); + ov::Tensor ov_output_tensor = infer_request->get_output_tensor(0); //get_tensor("output"); std::memcpy(output_tensor.mutable_data_ptr(), ov_output_tensor.data(), ov_output_tensor.get_byte_size()); @@ -123,12 +126,14 @@ class OpenvinoBackend final : public ::executorch::runtime::BackendInterface { } }; -namespace { -auto backend = OpenvinoBackend(); -Backend backend_id{"OpenvinoBackend", &backend}; -static auto registered = register_backend(backend_id); -} // namespace - } // namespace openvino } // namespace backends } // namespace executorch + +namespace { +auto backend = executorch::backends::openvino::OpenvinoBackend(); +executorch::runtime::Backend backend_id{"OpenvinoBackend", &backend}; +static auto registered = executorch::runtime::register_backend(backend_id); +} // namespace + + diff --git a/examples/openvino/CMakeLists.txt b/examples/openvino/CMakeLists.txt new file mode 100644 index 00000000000..31903042c04 --- /dev/null +++ b/examples/openvino/CMakeLists.txt @@ -0,0 +1,83 @@ +# Copyright (c) Qualcomm Innovation Center, Inc. +# All rights reserved +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +set(CMAKE_CXX_STANDARD 17) + +cmake_minimum_required(VERSION 3.19) +project(openvino_runner_example) + +# Source root directory for executorch. +if(NOT EXECUTORCH_ROOT) + set(EXECUTORCH_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../..) +endif() + +include(${EXECUTORCH_ROOT}/build/Utils.cmake) +include(${EXECUTORCH_ROOT}/build/Codegen.cmake) + +if(NOT PYTHON_EXECUTABLE) + resolve_python_executable() +endif() + +if(NOT CMAKE_BUILD_TYPE) + set(CMAKE_BUILD_TYPE Debug) +endif() + +set(_common_compile_options -Wno-deprecated-declarations -fPIC) + +# Let files say "include ". +set(_common_include_directories ${EXECUTORCH_ROOT}/..) + +# +# The `__srcs` lists are defined by including ${EXECUTORCH_SRCS_FILE}. +# +set(EXECUTORCH_SRCS_FILE + "${CMAKE_CURRENT_BINARY_DIR}/../../../build/executorch_srcs.cmake" +) +extract_sources(${EXECUTORCH_SRCS_FILE}) +include(${EXECUTORCH_SRCS_FILE}) + +set(_openvino_executor_runner__srcs ${CMAKE_CURRENT_LIST_DIR}/../openvino/executor_runner/openvino_executor_runner.cpp) + +# preprocess executor runner src files +list(PREPEND _openvino_executor_runner__srcs + ${CMAKE_CURRENT_LIST_DIR}/../openvino/executor_runner/openvino_executor_runner.cpp +) + +# build executor runner +add_executable(openvino_executor_runner ${_openvino_executor_runner__srcs}) +target_include_directories( + openvino_executor_runner PUBLIC ${_common_include_directories} +) + +# Set the path to the library directory +set(LIBRARY_DIR "/home/icx-6338/ynimmaga/executorch_new/executorch/cmake-openvino-out/lib/") + +# List the libraries you want to link (without the 'lib' prefix and file extension) +set(LIBRARIES_TO_LINK ${LIBRARY_DIR}/libopenvino_backend.so + ${LIBRARY_DIR}/libexecutorch.a + ${LIBRARY_DIR}/libexecutorch_core.a + ${EXECUTORCH_ROOT}/third-party/gflags/build/lib/libgflags_nothreads.a + ${LIBRARY_DIR}/libpthreadpool.a + ${LIBRARY_DIR}/libextension_data_loader.a + ${LIBRARY_DIR}/libextension_runner_util.a +) + +# Add the library directory to the link search path +link_directories(${LIBRARY_DIR}) + +# Link all libraries at once +target_link_libraries(openvino_executor_runner PRIVATE ${LIBRARIES_TO_LINK}) + +set_target_properties( + openvino_executor_runner PROPERTIES LINK_FLAGS "-Wl,-rpath='$ORIGIN'" +) + + +get_filename_component( + EXECUTORCH_SOURCE_DIR "${CMAKE_CURRENT_LIST_DIR}/../.." ABSOLUTE +) + + diff --git a/examples/openvino/executor_runner/openvino_executor_runner.cpp b/examples/openvino/executor_runner/openvino_executor_runner.cpp new file mode 100644 index 00000000000..86b975fe007 --- /dev/null +++ b/examples/openvino/executor_runner/openvino_executor_runner.cpp @@ -0,0 +1,145 @@ +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include +#include + +static uint8_t method_allocator_pool[4 * 1024U * 1024U]; // 4 MB + +DEFINE_string( + model_path, + "/home/icx-6338/ynimmaga/delegate.pte", //"model.pte", + "Model serialized in flatbuffer format."); +DEFINE_int32(iteration, 1, "Iterations of inference."); + +using executorch::extension::FileDataLoader; +using executorch::extension::prepare_input_tensors; +using executorch::runtime::Error; +using executorch::runtime::EValue; +using executorch::runtime::HierarchicalAllocator; +using executorch::runtime::MemoryAllocator; +using executorch::runtime::MemoryManager; +using executorch::runtime::Method; +using executorch::runtime::MethodMeta; +using executorch::runtime::Program; +using executorch::runtime::Result; +using executorch::runtime::Span; + +int main(int argc, char** argv) { + executorch::runtime::runtime_init(); + + gflags::ParseCommandLineFlags(&argc, &argv, true); + if (argc != 1) { + std::string msg = "Extra commandline args:"; + for (int i = 1; i < argc; i++) { + msg += " " + std::string(argv[i]); + } + ET_LOG(Error, "%s", msg.c_str()); + return 1; + } + + const char* model_path = FLAGS_model_path.c_str(); + Result loader = FileDataLoader::from(model_path); + ET_CHECK_MSG( + loader.ok(), + "FileDataLoader::from() failed: 0x%" PRIx32, + static_cast(loader.error())); + + Result program = Program::load(&loader.get()); + if (!program.ok()) { + ET_LOG(Error, "Failed to parse model file %s", model_path); + return 1; + } + ET_LOG(Info, "Model file %s is loaded.", model_path); + + const char* method_name = nullptr; + { + const auto method_name_result = program->get_method_name(0); + ET_CHECK_MSG(method_name_result.ok(), "Program has no methods"); + method_name = *method_name_result; + } + ET_LOG(Info, "Using method %s", method_name); + + Result method_meta = program->method_meta(method_name); + ET_CHECK_MSG( + method_meta.ok(), + "Failed to get method_meta for %s: 0x%" PRIx32, + method_name, + static_cast(method_meta.error())); + + MemoryAllocator method_allocator{ + MemoryAllocator(sizeof(method_allocator_pool), method_allocator_pool)}; + + std::vector> planned_buffers; + std::vector> planned_spans; + size_t num_memory_planned_buffers = method_meta->num_memory_planned_buffers(); + for (size_t id = 0; id < num_memory_planned_buffers; ++id) { + size_t buffer_size = + static_cast(method_meta->memory_planned_buffer_size(id).get()); + ET_LOG(Info, "Setting up planned buffer %zu, size %zu.", id, buffer_size); + planned_buffers.push_back(std::make_unique(buffer_size)); + planned_spans.push_back({planned_buffers.back().get(), buffer_size}); + } + HierarchicalAllocator planned_memory( + {planned_spans.data(), planned_spans.size()}); + + MemoryManager memory_manager(&method_allocator, &planned_memory); + + Result method = program->load_method(method_name, &memory_manager); + ET_CHECK_MSG( + method.ok(), + "Loading of method %s failed with status 0x%" PRIx32, + method_name, + static_cast(method.error())); + ET_LOG(Info, "Method loaded."); + + auto inputs = prepare_input_tensors(*method); + ET_CHECK_MSG( + inputs.ok(), + "Could not prepare inputs: 0x%" PRIx32, + static_cast(inputs.error())); + ET_LOG(Info, "Inputs prepared."); + + auto before_exec = std::chrono::high_resolution_clock::now(); + Error status = Error::Ok; + for (int i = 0; i < FLAGS_iteration; ++i) { + status = method->execute(); + } + auto after_exec = std::chrono::high_resolution_clock::now(); + double elapsed_time = std::chrono::duration_cast( + after_exec - before_exec) + .count() / 1000.0; + + ET_LOG( + Info, + "%d inference took %f ms, avg %f ms", + FLAGS_iteration, + elapsed_time, + elapsed_time / static_cast(FLAGS_iteration)); + ET_CHECK_MSG( + status == Error::Ok, + "Execution of method %s failed with status 0x%" PRIx32, + method_name, + static_cast(status)); + ET_LOG(Info, "Model executed successfully."); + + std::vector outputs(method->outputs_size()); + ET_LOG(Info, "%zu outputs: ", outputs.size()); + status = method->get_outputs(outputs.data(), outputs.size()); + ET_CHECK(status == Error::Ok); + //std::cout << executorch::extension::evalue_edge_items(100); + //for (int i = 0; i < outputs.size(); ++i) { + // std::cout << "Output " << i << ": " << outputs[i] << std::endl; + //} + + return 0; +} diff --git a/examples/openvino/executor_runner/ov_executor_runner.cpp b/examples/openvino/executor_runner/ov_executor_runner.cpp new file mode 100644 index 00000000000..d0be48fdcc9 --- /dev/null +++ b/examples/openvino/executor_runner/ov_executor_runner.cpp @@ -0,0 +1,120 @@ +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include +#include + +/* +using executorch::extension::FileDataLoader; +using executorch::extension::prepare_input_tensors; +using executorch::runtime::Error; +using executorch::runtime::EValue; +using executorch::runtime::HierarchicalAllocator; +using executorch::runtime::MemoryAllocator; +using executorch::runtime::MemoryManager; +using executorch::runtime::Method; +using executorch::runtime::MethodMeta; +using executorch::runtime::Program; +using executorch::runtime::Result; +using executorch::runtime::Span; +*/ +using executorch::aten::Tensor; +using executorch::aten::TensorImpl; +using executorch::extension::FileDataLoader; +//using executorch::extension::MallocMemoryAllocator; +using executorch::extension::prepare_input_tensors; +using executorch::runtime::Error; +using executorch::runtime::EValue; +using executorch::runtime::HierarchicalAllocator; +using executorch::runtime::MemoryAllocator; +using executorch::runtime::MemoryManager; +using executorch::runtime::Method; +using executorch::runtime::MethodMeta; +using executorch::runtime::Program; +using executorch::runtime::Result; +using executorch::runtime::Span; + +int main() { +Result loader = + FileDataLoader::from("/home/icx-6338/ynimmaga/delegate.pte"); +assert(loader.ok()); + +Result program = Program::load(&loader.get()); +assert(program.ok()); + +// Method names map back to Python nn.Module method names. Most users will only +// have the singular method "forward". +const char* method_name = "forward"; + +// MethodMeta is a lightweight structure that lets us gather metadata +// information about a specific method. In this case we are looking to get the +// required size of the memory planned buffers for the method "forward". +Result method_meta = program->method_meta(method_name); +assert(method_meta.ok()); + +std::vector> planned_buffers; // Owns the Memory +std::vector> planned_arenas; // Passed to the allocator + +size_t num_memory_planned_buffers = method_meta->num_memory_planned_buffers(); + +// It is possible to have multiple layers in our memory hierarchy; for example, +// SRAM and DRAM. +for (size_t id = 0; id < num_memory_planned_buffers; ++id) { + // .get() will always succeed because id < num_memory_planned_buffers. + size_t buffer_size = + static_cast(method_meta->memory_planned_buffer_size(id).get()); + planned_buffers.push_back(std::make_unique(buffer_size)); + planned_arenas.push_back({planned_buffers.back().get(), buffer_size}); +} + +HierarchicalAllocator planned_memory( + {planned_arenas.data(), planned_arenas.size()}); + +// Version of MemoryAllocator that uses malloc to handle allocations rather then +// a fixed buffer. +//MallocMemoryAllocator method_allocator; +MemoryAllocator method_allocator{ + MemoryAllocator(sizeof(method_allocator_pool), method_allocator_pool)}; + +// Assemble all of the allocators into the MemoryManager that the Executor will +// use. +MemoryManager memory_manager(&method_allocator, &planned_memory); + +Result method = program->load_method(method_name); +assert(method.ok()); + +// Create our input tensor. +float data[1 * 3 * 256 * 256]; +Tensor::SizesType sizes[] = {1, 3, 256, 256}; +Tensor::DimOrderType dim_order = {0, 1, 2, 3}; +TensorImpl impl( + ScalarType::Float, // dtype + 4, // number of dimensions + sizes, + data, + dim_order); +Tensor t(&impl); + +// Implicitly casts t to EValue +Error set_input_error = method->set_input(t, 0); +assert(set_input_error == Error::Ok); + +Error execute_error = method->execute(); +assert(execute_error == Error::Ok); + +EValue output = method->get_output(0); +assert(output.isTensor()); + +return 0; + +} diff --git a/examples/openvino/openvino_build.sh b/examples/openvino/openvino_build.sh new file mode 100755 index 00000000000..f53679cc910 --- /dev/null +++ b/examples/openvino/openvino_build.sh @@ -0,0 +1,53 @@ +#!/bin/bash + +# Exit immediately if a command exits with a non-zero status. +set -e + +# Define the directory where CMakeLists.txt is located +EXECUTORCH_ROOT=$(realpath "$(dirname "$0")/../..") +echo EXECUTORCH_ROOT=${EXECUTORCH_ROOT} + +main() { + # Set build directory + local build_dir="cmake-openvino-out" + + # Create and enter the build directory + cd "$EXECUTORCH_ROOT" + rm -rf "${build_dir}" + + # Configure the project with CMake + # Note: Add any additional configuration options you need here + cmake -DCMAKE_INSTALL_PREFIX="${build_dir}" \ + -DEXECUTORCH_BUILD_OPENVINO=ON \ + -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \ + -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \ + -DEXECUTORCH_BUILD_EXTENSION_RUNNER_UTIL=ON \ + -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \ + -B"${build_dir}" + + + # Build the project + cmake --build cmake-openvino-out --target install --config Release -j5 + + ## Build example + local example_dir=examples/openvino + local example_build_dir="${build_dir}/${example_dir}" + local cmake_prefix_path="${PWD}/${build_dir}/lib/cmake/ExecuTorch;${PWD}/${build_dir}/third-party/gflags;" + rm -rf "${example_build_dir}" + + ## MTK original + cmake -DCMAKE_PREFIX_PATH="${cmake_prefix_path}" \ + -DCMAKE_FIND_ROOT_PATH_MODE_PACKAGE=BOTH \ + -B"${example_build_dir}" \ + $EXECUTORCH_ROOT/$example_dir + + cmake --build "${example_build_dir}" -j5 + + # Switch back to the original directory + cd - > /dev/null + + # Print a success message + echo "Build successfully completed." +} + +main "$@" From 06759effd9bed0cb4e49423e7e6db6b8c5bcbf79 Mon Sep 17 00:00:00 2001 From: ynimmaga Date: Mon, 11 Nov 2024 21:49:19 -0800 Subject: [PATCH 003/188] Updated CMakeLists.txt to add openvino build option --- CMakeLists.txt | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index 156fb24e6b6..4db56ab71ca 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -191,6 +191,8 @@ option(EXECUTORCH_BUILD_MPS "Build the MPS backend" OFF) option(EXECUTORCH_BUILD_NEURON "Build the backends/mediatek directory" OFF) +option(EXECUTORCH_BUILD_OPENVINO "Build the Openvino backend" ON) + option(EXECUTORCH_BUILD_PYBIND "Build the Python Bindings" OFF) option(EXECUTORCH_BUILD_QNN "Build the Qualcomm backend" OFF) @@ -621,6 +623,10 @@ if(EXECUTORCH_BUILD_NEURON) add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/backends/mediatek) endif() +if(EXECUTORCH_BUILD_OPENVINO) + add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/backends/openvino) +endif() + if(EXECUTORCH_BUILD_QNN) add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/backends/qualcomm) endif() From 2b5f599a23e0d87a98735a7e4701e45757e2e33f Mon Sep 17 00:00:00 2001 From: ynimmaga Date: Tue, 12 Nov 2024 22:02:01 -0800 Subject: [PATCH 004/188] Updated headers for openvino aot steps --- backends/openvino/partitioner.py | 6 - backends/openvino/preprocess.py | 6 - build/executorch_srcs.cmake | 448 +++++++++++++++++++++++++++++++ 3 files changed, 448 insertions(+), 12 deletions(-) create mode 100644 build/executorch_srcs.cmake diff --git a/backends/openvino/partitioner.py b/backends/openvino/partitioner.py index 2fa20bd8831..b0d0e18a0d7 100644 --- a/backends/openvino/partitioner.py +++ b/backends/openvino/partitioner.py @@ -1,9 +1,3 @@ -# Copyright (c) 2024 MediaTek Inc. -# -# Licensed under the BSD License (the "License"); you may not use this file -# except in compliance with the License. See the license file in the root -# directory of this source tree for more details. - from typing import Callable, final, List, Optional, Tuple import torch diff --git a/backends/openvino/preprocess.py b/backends/openvino/preprocess.py index bfb38474797..4dd89b2f8a3 100644 --- a/backends/openvino/preprocess.py +++ b/backends/openvino/preprocess.py @@ -1,9 +1,3 @@ -# Copyright (c) 2024 MediaTek Inc. -# -# Licensed under the BSD License (the "License"); you may not use this file -# except in compliance with the License. See the license file in the root -# directory of this source tree for more details. - import contextlib import struct diff --git a/build/executorch_srcs.cmake b/build/executorch_srcs.cmake new file mode 100644 index 00000000000..a44fe650da2 --- /dev/null +++ b/build/executorch_srcs.cmake @@ -0,0 +1,448 @@ +# @generated by extract_sources.py + +set(_executorch__srcs + kernels/prim_ops/et_copy_index.cpp + kernels/prim_ops/et_view.cpp + kernels/prim_ops/register_prim_ops.cpp +) + +set(_executorch_core__srcs + runtime/backend/interface.cpp + runtime/core/evalue.cpp + runtime/core/exec_aten/util/tensor_util_portable.cpp + runtime/core/portable_type/tensor_impl.cpp + runtime/executor/method.cpp + runtime/executor/method_meta.cpp + runtime/executor/program.cpp + runtime/executor/tensor_parser_exec_aten.cpp + runtime/executor/tensor_parser_portable.cpp + runtime/kernel/operator_registry.cpp + runtime/platform/abort.cpp + runtime/platform/default/posix.cpp + runtime/platform/log.cpp + runtime/platform/profiler.cpp + runtime/platform/runtime.cpp + schema/extended_header.cpp +) + +set(_portable_kernels__srcs + kernels/portable/cpu/op__to_dim_order_copy.cpp + kernels/portable/cpu/op_abs.cpp + kernels/portable/cpu/op_acos.cpp + kernels/portable/cpu/op_acosh.cpp + kernels/portable/cpu/op_add.cpp + kernels/portable/cpu/op_addmm.cpp + kernels/portable/cpu/op_alias_copy.cpp + kernels/portable/cpu/op_allclose.cpp + kernels/portable/cpu/op_amax.cpp + kernels/portable/cpu/op_amin.cpp + kernels/portable/cpu/op_any.cpp + kernels/portable/cpu/op_arange.cpp + kernels/portable/cpu/op_argmax.cpp + kernels/portable/cpu/op_argmin.cpp + kernels/portable/cpu/op_as_strided_copy.cpp + kernels/portable/cpu/op_asin.cpp + kernels/portable/cpu/op_asinh.cpp + kernels/portable/cpu/op_atan.cpp + kernels/portable/cpu/op_atan2.cpp + kernels/portable/cpu/op_atanh.cpp + kernels/portable/cpu/op_avg_pool2d.cpp + kernels/portable/cpu/op_bitwise_and.cpp + kernels/portable/cpu/op_bitwise_not.cpp + kernels/portable/cpu/op_bitwise_or.cpp + kernels/portable/cpu/op_bitwise_xor.cpp + kernels/portable/cpu/op_bmm.cpp + kernels/portable/cpu/op_cat.cpp + kernels/portable/cpu/op_cdist_forward.cpp + kernels/portable/cpu/op_ceil.cpp + kernels/portable/cpu/op_clamp.cpp + kernels/portable/cpu/op_clone.cpp + kernels/portable/cpu/op_constant_pad_nd.cpp + kernels/portable/cpu/op_convolution.cpp + kernels/portable/cpu/op_convolution_backward.cpp + kernels/portable/cpu/op_copy.cpp + kernels/portable/cpu/op_cos.cpp + kernels/portable/cpu/op_cosh.cpp + kernels/portable/cpu/op_cumsum.cpp + kernels/portable/cpu/op_detach_copy.cpp + kernels/portable/cpu/op_diagonal_copy.cpp + kernels/portable/cpu/op_div.cpp + kernels/portable/cpu/op_embedding.cpp + kernels/portable/cpu/op_empty.cpp + kernels/portable/cpu/op_eq.cpp + kernels/portable/cpu/op_erf.cpp + kernels/portable/cpu/op_exp.cpp + kernels/portable/cpu/op_expand_copy.cpp + kernels/portable/cpu/op_expm1.cpp + kernels/portable/cpu/op_fill.cpp + kernels/portable/cpu/op_flip.cpp + kernels/portable/cpu/op_floor.cpp + kernels/portable/cpu/op_floor_divide.cpp + kernels/portable/cpu/op_fmod.cpp + kernels/portable/cpu/op_full.cpp + kernels/portable/cpu/op_full_like.cpp + kernels/portable/cpu/op_gather.cpp + kernels/portable/cpu/op_ge.cpp + kernels/portable/cpu/op_gelu.cpp + kernels/portable/cpu/op_glu.cpp + kernels/portable/cpu/op_gt.cpp + kernels/portable/cpu/op_hardtanh.cpp + kernels/portable/cpu/op_index.cpp + kernels/portable/cpu/op_index_put.cpp + kernels/portable/cpu/op_index_select.cpp + kernels/portable/cpu/op_isinf.cpp + kernels/portable/cpu/op_isnan.cpp + kernels/portable/cpu/op_le.cpp + kernels/portable/cpu/op_leaky_relu.cpp + kernels/portable/cpu/op_lift_fresh_copy.cpp + kernels/portable/cpu/op_linear_scratch_example.cpp + kernels/portable/cpu/op_log.cpp + kernels/portable/cpu/op_log10.cpp + kernels/portable/cpu/op_log1p.cpp + kernels/portable/cpu/op_log2.cpp + kernels/portable/cpu/op_log_softmax.cpp + kernels/portable/cpu/op_logical_and.cpp + kernels/portable/cpu/op_logical_not.cpp + kernels/portable/cpu/op_logical_or.cpp + kernels/portable/cpu/op_logical_xor.cpp + kernels/portable/cpu/op_logit.cpp + kernels/portable/cpu/op_lt.cpp + kernels/portable/cpu/op_masked_fill.cpp + kernels/portable/cpu/op_masked_scatter.cpp + kernels/portable/cpu/op_max.cpp + kernels/portable/cpu/op_max_pool2d_with_indices.cpp + kernels/portable/cpu/op_maximum.cpp + kernels/portable/cpu/op_mean.cpp + kernels/portable/cpu/op_min.cpp + kernels/portable/cpu/op_minimum.cpp + kernels/portable/cpu/op_mm.cpp + kernels/portable/cpu/op_mul.cpp + kernels/portable/cpu/op_narrow_copy.cpp + kernels/portable/cpu/op_native_batch_norm.cpp + kernels/portable/cpu/op_native_group_norm.cpp + kernels/portable/cpu/op_native_layer_norm.cpp + kernels/portable/cpu/op_ne.cpp + kernels/portable/cpu/op_neg.cpp + kernels/portable/cpu/op_nonzero.cpp + kernels/portable/cpu/op_ones.cpp + kernels/portable/cpu/op_pdist_forward.cpp + kernels/portable/cpu/op_permute_copy.cpp + kernels/portable/cpu/op_pixel_shuffle.cpp + kernels/portable/cpu/op_pixel_unshuffle.cpp + kernels/portable/cpu/op_pow.cpp + kernels/portable/cpu/op_prod.cpp + kernels/portable/cpu/op_reciprocal.cpp + kernels/portable/cpu/op_reflection_pad1d.cpp + kernels/portable/cpu/op_reflection_pad2d.cpp + kernels/portable/cpu/op_reflection_pad3d.cpp + kernels/portable/cpu/op_relu.cpp + kernels/portable/cpu/op_remainder.cpp + kernels/portable/cpu/op_repeat.cpp + kernels/portable/cpu/op_replication_pad1d.cpp + kernels/portable/cpu/op_replication_pad2d.cpp + kernels/portable/cpu/op_replication_pad3d.cpp + kernels/portable/cpu/op_roll.cpp + kernels/portable/cpu/op_round.cpp + kernels/portable/cpu/op_rsqrt.cpp + kernels/portable/cpu/op_rsub.cpp + kernels/portable/cpu/op_scalar_tensor.cpp + kernels/portable/cpu/op_scatter.cpp + kernels/portable/cpu/op_scatter_add.cpp + kernels/portable/cpu/op_select_copy.cpp + kernels/portable/cpu/op_select_scatter.cpp + kernels/portable/cpu/op_sigmoid.cpp + kernels/portable/cpu/op_sign.cpp + kernels/portable/cpu/op_sin.cpp + kernels/portable/cpu/op_sinh.cpp + kernels/portable/cpu/op_slice_copy.cpp + kernels/portable/cpu/op_slice_scatter.cpp + kernels/portable/cpu/op_softmax.cpp + kernels/portable/cpu/op_split_copy.cpp + kernels/portable/cpu/op_split_with_sizes_copy.cpp + kernels/portable/cpu/op_sqrt.cpp + kernels/portable/cpu/op_squeeze_copy.cpp + kernels/portable/cpu/op_stack.cpp + kernels/portable/cpu/op_sub.cpp + kernels/portable/cpu/op_sum.cpp + kernels/portable/cpu/op_t_copy.cpp + kernels/portable/cpu/op_tan.cpp + kernels/portable/cpu/op_tanh.cpp + kernels/portable/cpu/op_to_copy.cpp + kernels/portable/cpu/op_topk.cpp + kernels/portable/cpu/op_transpose_copy.cpp + kernels/portable/cpu/op_tril.cpp + kernels/portable/cpu/op_trunc.cpp + kernels/portable/cpu/op_unbind_copy.cpp + kernels/portable/cpu/op_unsqueeze_copy.cpp + kernels/portable/cpu/op_var.cpp + kernels/portable/cpu/op_view_copy.cpp + kernels/portable/cpu/op_where.cpp + kernels/portable/cpu/op_zeros.cpp + kernels/portable/cpu/pattern/unary_ufunc_realh.cpp + kernels/portable/cpu/pattern/unary_ufunc_realhb_to_bool.cpp + kernels/portable/cpu/pattern/unary_ufunc_realhbbf16_to_floathbf16.cpp + kernels/portable/cpu/util/activation_ops_util.cpp + kernels/portable/cpu/util/advanced_index_util.cpp + kernels/portable/cpu/util/broadcast_util.cpp + kernels/portable/cpu/util/copy_ops_util.cpp + kernels/portable/cpu/util/distance_util.cpp + kernels/portable/cpu/util/dtype_util.cpp + kernels/portable/cpu/util/index_util.cpp + kernels/portable/cpu/util/kernel_ops_util.cpp + kernels/portable/cpu/util/matmul_ops_util.cpp + kernels/portable/cpu/util/normalization_ops_util.cpp + kernels/portable/cpu/util/padding_util.cpp + kernels/portable/cpu/util/reduce_util.cpp + kernels/portable/cpu/util/repeat_util.cpp + kernels/portable/cpu/util/select_copy_util.cpp + kernels/portable/cpu/util/slice_util.cpp +) + +set(_optimized_kernels__srcs + extension/parallel/thread_parallel.cpp + kernels/optimized/blas/BlasKernel.cpp + kernels/optimized/blas/CPUBlas.cpp + kernels/optimized/cpu/op_add.cpp + kernels/optimized/cpu/op_bmm.cpp + kernels/optimized/cpu/op_div.cpp + kernels/optimized/cpu/op_exp.cpp + kernels/optimized/cpu/op_le.cpp + kernels/optimized/cpu/op_linear.cpp + kernels/optimized/cpu/op_mm.cpp + kernels/optimized/cpu/op_mul.cpp + kernels/optimized/cpu/op_native_layer_norm.cpp + kernels/optimized/cpu/op_neg.cpp + kernels/optimized/cpu/op_sub.cpp +) + +set(_quantized_kernels__srcs + kernels/quantized/cpu/embeddingxb.cpp + kernels/quantized/cpu/op_add.cpp + kernels/quantized/cpu/op_choose_qparams.cpp + kernels/quantized/cpu/op_dequantize.cpp + kernels/quantized/cpu/op_embedding.cpp + kernels/quantized/cpu/op_embedding2b.cpp + kernels/quantized/cpu/op_embedding4b.cpp + kernels/quantized/cpu/op_mixed_linear.cpp + kernels/quantized/cpu/op_mixed_mm.cpp + kernels/quantized/cpu/op_quantize.cpp +) + +set(_program_schema__srcs + schema/program.fbs + schema/scalar_type.fbs +) + +set(_optimized_cpublas__srcs + extension/parallel/thread_parallel.cpp + extension/threadpool/threadpool.cpp + extension/threadpool/threadpool_guard.cpp + kernels/optimized/blas/BlasKernel.cpp + kernels/optimized/blas/CPUBlas.cpp +) + +set(_optimized_native_cpu_ops_oss__srcs + codegen/templates/RegisterCodegenUnboxedKernels.cpp + codegen/templates/RegisterDispatchKeyCustomOps.cpp + codegen/templates/RegisterKernels.cpp + codegen/templates/RegisterSchema.cpp + extension/parallel/thread_parallel.cpp + extension/threadpool/threadpool.cpp + extension/threadpool/threadpool_guard.cpp + kernels/optimized/blas/BlasKernel.cpp + kernels/optimized/blas/CPUBlas.cpp + kernels/optimized/cpu/op_add.cpp + kernels/optimized/cpu/op_bmm.cpp + kernels/optimized/cpu/op_div.cpp + kernels/optimized/cpu/op_exp.cpp + kernels/optimized/cpu/op_le.cpp + kernels/optimized/cpu/op_linear.cpp + kernels/optimized/cpu/op_mm.cpp + kernels/optimized/cpu/op_mul.cpp + kernels/optimized/cpu/op_native_layer_norm.cpp + kernels/optimized/cpu/op_neg.cpp + kernels/optimized/cpu/op_sub.cpp +) + +set(_extension_data_loader__srcs + extension/data_loader/file_data_loader.cpp + extension/data_loader/mmap_data_loader.cpp +) + +set(_extension_module__srcs + extension/module/module.cpp +) + +set(_extension_runner_util__srcs + extension/runner_util/inputs.cpp + extension/runner_util/inputs_portable.cpp +) + +set(_extension_llm_runner__srcs + extension/data_loader/file_data_loader.cpp + extension/data_loader/mmap_data_loader.cpp + extension/llm/runner/text_decoder_runner.cpp + extension/llm/runner/text_prefiller.cpp + extension/llm/sampler/sampler.cpp + extension/tensor/tensor_ptr.cpp + extension/tensor/tensor_ptr_maker.cpp +) + +set(_extension_tensor__srcs + extension/tensor/tensor_ptr.cpp + extension/tensor/tensor_ptr_maker.cpp +) + +set(_extension_threadpool__srcs + extension/threadpool/threadpool.cpp + extension/threadpool/threadpool_guard.cpp +) + +set(_extension_training__srcs + extension/data_loader/file_data_loader.cpp + extension/data_loader/mmap_data_loader.cpp + extension/module/module.cpp + extension/training/module/training_module.cpp + extension/training/optimizer/sgd.cpp + kernels/prim_ops/et_copy_index.cpp + kernels/prim_ops/et_view.cpp + kernels/prim_ops/register_prim_ops.cpp +) + +set(_train_xor__srcs + extension/data_loader/file_data_loader.cpp + extension/data_loader/mmap_data_loader.cpp + extension/module/module.cpp + extension/tensor/tensor_ptr.cpp + extension/tensor/tensor_ptr_maker.cpp + extension/training/examples/XOR/train.cpp + extension/training/module/training_module.cpp + extension/training/optimizer/sgd.cpp +) + +set(_executor_runner__srcs + examples/portable/executor_runner/executor_runner.cpp + extension/data_loader/file_data_loader.cpp + extension/evalue_util/print_evalue.cpp + extension/runner_util/inputs.cpp + extension/runner_util/inputs_portable.cpp + runtime/executor/test/test_backend_compiler_lib.cpp +) + +set(_size_test__srcs + extension/data_loader/file_data_loader.cpp + test/size_test.cpp +) + +set(_mps_executor_runner__srcs + backends/apple/mps/runtime/MPSBackend.mm + backends/apple/mps/runtime/MPSCompiler.mm + backends/apple/mps/runtime/MPSDelegateHeader.mm + backends/apple/mps/runtime/MPSDevice.mm + backends/apple/mps/runtime/MPSExecutor.mm + backends/apple/mps/runtime/MPSGraphBuilder.mm + backends/apple/mps/runtime/MPSStream.mm + backends/apple/mps/runtime/operations/ActivationOps.mm + backends/apple/mps/runtime/operations/BinaryOps.mm + backends/apple/mps/runtime/operations/ClampOps.mm + backends/apple/mps/runtime/operations/ConstantOps.mm + backends/apple/mps/runtime/operations/ConvolutionOps.mm + backends/apple/mps/runtime/operations/IndexingOps.mm + backends/apple/mps/runtime/operations/LinearAlgebra.mm + backends/apple/mps/runtime/operations/NormalizationOps.mm + backends/apple/mps/runtime/operations/OperationUtils.mm + backends/apple/mps/runtime/operations/PadOps.mm + backends/apple/mps/runtime/operations/PoolingOps.mm + backends/apple/mps/runtime/operations/QuantDequant.mm + backends/apple/mps/runtime/operations/RangeOps.mm + backends/apple/mps/runtime/operations/ReduceOps.mm + backends/apple/mps/runtime/operations/ShapeOps.mm + backends/apple/mps/runtime/operations/UnaryOps.mm + devtools/bundled_program/bundled_program.cpp + devtools/etdump/emitter.cpp + devtools/etdump/etdump_flatcc.cpp + examples/apple/mps/executor_runner/mps_executor_runner.mm + extension/data_loader/file_data_loader.cpp + extension/evalue_util/print_evalue.cpp + extension/runner_util/inputs.cpp + extension/runner_util/inputs_portable.cpp +) + +set(_mps_backend__srcs + backends/apple/mps/runtime/MPSBackend.mm + backends/apple/mps/runtime/MPSCompiler.mm + backends/apple/mps/runtime/MPSDelegateHeader.mm + backends/apple/mps/runtime/MPSDevice.mm + backends/apple/mps/runtime/MPSExecutor.mm + backends/apple/mps/runtime/MPSGraphBuilder.mm + backends/apple/mps/runtime/MPSStream.mm + backends/apple/mps/runtime/operations/ActivationOps.mm + backends/apple/mps/runtime/operations/BinaryOps.mm + backends/apple/mps/runtime/operations/ClampOps.mm + backends/apple/mps/runtime/operations/ConstantOps.mm + backends/apple/mps/runtime/operations/ConvolutionOps.mm + backends/apple/mps/runtime/operations/IndexingOps.mm + backends/apple/mps/runtime/operations/LinearAlgebra.mm + backends/apple/mps/runtime/operations/NormalizationOps.mm + backends/apple/mps/runtime/operations/OperationUtils.mm + backends/apple/mps/runtime/operations/PadOps.mm + backends/apple/mps/runtime/operations/PoolingOps.mm + backends/apple/mps/runtime/operations/QuantDequant.mm + backends/apple/mps/runtime/operations/RangeOps.mm + backends/apple/mps/runtime/operations/ReduceOps.mm + backends/apple/mps/runtime/operations/ShapeOps.mm + backends/apple/mps/runtime/operations/UnaryOps.mm +) + +set(_mps_schema__srcs + backends/apple/mps/serialization/schema.fbs +) + +set(_xnn_executor_runner__srcs + examples/portable/executor_runner/executor_runner.cpp + extension/data_loader/file_data_loader.cpp + extension/evalue_util/print_evalue.cpp + extension/runner_util/inputs.cpp + extension/runner_util/inputs_portable.cpp +) + +set(_xnnpack_backend__srcs + backends/xnnpack/runtime/XNNCompiler.cpp + backends/xnnpack/runtime/XNNExecutor.cpp + backends/xnnpack/runtime/XNNHeader.cpp + backends/xnnpack/runtime/XNNPACKBackend.cpp + backends/xnnpack/runtime/profiling/XNNProfiler.cpp + extension/threadpool/threadpool.cpp + extension/threadpool/threadpool_guard.cpp +) + +set(_xnnpack_schema__srcs + backends/xnnpack/serialization/runtime_schema.fbs +) + +set(_vulkan_schema__srcs + backends/vulkan/serialization/schema.fbs +) + +set(_custom_ops__srcs + extension/llm/custom_ops/op_fallback.cpp + extension/llm/custom_ops/op_fast_hadamard_transform.cpp + extension/llm/custom_ops/op_sdpa.cpp + extension/llm/custom_ops/op_update_quantized_cache.cpp + extension/llm/custom_ops/spinquant/fast_hadamard_transform.cpp + extension/llm/custom_ops/spinquant/third-party/FFHT/fht_avx.c + kernels/portable/cpu/util/reduce_util.cpp +) + +set(_llama_runner__srcs + examples/models/llama/runner/runner.cpp + examples/models/llama/tokenizer/llama_tiktoken.cpp + extension/evalue_util/print_evalue.cpp + extension/llm/runner/text_decoder_runner.cpp + extension/llm/runner/text_prefiller.cpp + extension/llm/sampler/sampler.cpp + extension/llm/tokenizer/bpe_tokenizer.cpp + extension/llm/tokenizer/tiktoken.cpp + extension/tensor/tensor_ptr.cpp + extension/tensor/tensor_ptr_maker.cpp +) \ No newline at end of file From 86f685eb301a064bbefef4b27157dfc317b54172 Mon Sep 17 00:00:00 2001 From: ynimmaga Date: Wed, 13 Nov 2024 14:52:10 -0800 Subject: [PATCH 005/188] Fixed library path errors in cmake --- backends/openvino/CMakeLists.txt | 4 ++-- examples/openvino/CMakeLists.txt | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/backends/openvino/CMakeLists.txt b/backends/openvino/CMakeLists.txt index e6be4f14d79..08fccf2faae 100644 --- a/backends/openvino/CMakeLists.txt +++ b/backends/openvino/CMakeLists.txt @@ -35,8 +35,8 @@ target_include_directories( set(OPENVINO_LIB_PATH ${OPENVINO_DIR}/runtime/lib/intel64) set(OPENVINO_LIBS - ${OPENVINO_LIB_PATH}/libopenvino.so - ${OPENVINO_LIB_PATH}/libopenvino_ir_frontend.so.2450 + ${OPENVINO_LIB_PATH}/libopenvino.so.2025.0.0 + ${OPENVINO_LIB_PATH}/libopenvino_ir_frontend.so.2025.0.0 ${OPENVINO_LIB_PATH}/libopenvino_c.so ${OPENVINO_LIB_PATH}/libopenvino_intel_cpu_plugin.so ${OPENVINO_LIB_PATH}/libopenvino_intel_gpu_plugin.so diff --git a/examples/openvino/CMakeLists.txt b/examples/openvino/CMakeLists.txt index 31903042c04..0a37f777e2d 100644 --- a/examples/openvino/CMakeLists.txt +++ b/examples/openvino/CMakeLists.txt @@ -53,7 +53,7 @@ target_include_directories( ) # Set the path to the library directory -set(LIBRARY_DIR "/home/icx-6338/ynimmaga/executorch_new/executorch/cmake-openvino-out/lib/") +set(LIBRARY_DIR "${CMAKE_CURRENT_LIST_DIR}/../../cmake-openvino-out/lib/") # List the libraries you want to link (without the 'lib' prefix and file extension) set(LIBRARIES_TO_LINK ${LIBRARY_DIR}/libopenvino_backend.so From bb12f60da69b6a83af96b8beb6d70caa8f7500fc Mon Sep 17 00:00:00 2001 From: ynimmaga Date: Wed, 20 Nov 2024 14:23:58 -0800 Subject: [PATCH 006/188] Configure device with compile spec --- backends/openvino/preprocess.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/backends/openvino/preprocess.py b/backends/openvino/preprocess.py index 4dd89b2f8a3..96df9faba85 100644 --- a/backends/openvino/preprocess.py +++ b/backends/openvino/preprocess.py @@ -22,6 +22,7 @@ class OpenvinoBackend(BackendDetails): def preprocess( cls, edge_program: ExportedProgram, module_compile_spec: List[CompileSpec] ) -> PreprocessResult: + name_to_node_mappings = {node.name: node for node in edge_program.graph.nodes} input_names = edge_program.graph_signature.user_inputs output_names = edge_program.graph_signature.user_outputs @@ -33,7 +34,11 @@ def preprocess( input_shapes = [] output_shapes = [] - compiled = openvino_compile(edge_program.module(), *args) + compile_options = {} + for spec in module_compile_spec: + compile_options[spec.key] = spec.value.decode() + + compiled = openvino_compile(edge_program.module(), *args, options=compile_options) model_bytes = compiled.export_model() return PreprocessResult(processed_bytes=model_bytes) From c9c896747c76a862360b7003534788ebcca8c317 Mon Sep 17 00:00:00 2001 From: ynimmaga Date: Thu, 21 Nov 2024 14:51:01 -0800 Subject: [PATCH 007/188] Added aot compiler script for openvino --- examples/openvino/aot/README.md | 88 +++++++++++++++++++ .../openvino/aot/aot_openvino_compiler.py | 74 ++++++++++++++++ 2 files changed, 162 insertions(+) create mode 100644 examples/openvino/aot/README.md create mode 100644 examples/openvino/aot/aot_openvino_compiler.py diff --git a/examples/openvino/aot/README.md b/examples/openvino/aot/README.md new file mode 100644 index 00000000000..6c59f1dad41 --- /dev/null +++ b/examples/openvino/aot/README.md @@ -0,0 +1,88 @@ +# **Model Export Script for Executorch** + +This script allows users to export deep learning models from various model suites (TIMM, Torchvision, Hugging Face) to a openvino backend using **Executorch**. Users can dynamically specify the model, input shape, and target device. + + +## **Usage** + +### **Command Structure** +```bash +python aot_openvino_compiler.py --suite --model --input_shape --device +``` + +### **Arguments** +- **`--suite`** (required): + Specifies the model suite to use. + Supported values: + - `timm` (e.g., VGG16, ResNet50) + - `torchvision` (e.g., resnet18, mobilenet_v2) + - `huggingface` (e.g., bert-base-uncased) + +- **`--model`** (required): + Name of the model to export. + Examples: + - For `timm`: `vgg16`, `resnet50` + - For `torchvision`: `resnet18`, `mobilenet_v2` + - For `huggingface`: `bert-base-uncased`, `distilbert-base-uncased` + +- **`--input_shape`** (required): + Input shape for the model. Provide this as a **list** or **tuple**. + Examples: + - `[1, 3, 224, 224]` (Zsh users: wrap in quotes) + - `(1, 3, 224, 224)` + +- **`--device`** (optional): + Target device for the compiled model. Default is `CPU`. + Examples: `CPU`, `GPU` + +## **Examples** + +### Export a TIMM VGG16 model for the CPU +```bash +python aot_openvino_compiler.py --suite timm --model vgg16 --input_shape [1, 3, 224, 224] --device CPU +``` + +### Export a Torchvision ResNet50 model for the GPU +```bash +python aot_openvino_compiler.py --suite torchvision --model resnet50 --input_shape "(1, 3, 256, 256)" --device GPU +``` + +### Export a Hugging Face BERT model for the CPU +```bash +python aot_openvino_compiler.py --suite huggingface --model bert-base-uncased --input_shape "(1, 512)" --device CPU +``` + +## **Notes** +1. **Input Shape in Zsh**: + If you are using Zsh, wrap `--input_shape` in quotes or use a tuple: + ```bash + --input_shape '[1, 3, 224, 224]' + --input_shape "(1, 3, 224, 224)" + ``` + +2. **Model Compatibility**: + Ensure the specified `model_name` exists in the selected `suite`. Use the corresponding library's documentation to verify model availability. + +3. **Output File**: + The exported model will be saved as `.pte` in the current directory. + +4. **Dependencies**: + - Python 3.8+ + - PyTorch + - Executorch + - TIMM (`pip install timm`) + - Torchvision + - Transformers (`pip install transformers`) + +## **Error Handling** +- **Model Not Found**: + If the script raises an error such as: + ```bash + ValueError: Model not found + ``` + Verify that the model name is correct for the chosen suite. + +- **Unsupported Input Shape**: + Ensure `--input_shape` is provided as a valid list or tuple. + + diff --git a/examples/openvino/aot/aot_openvino_compiler.py b/examples/openvino/aot/aot_openvino_compiler.py new file mode 100644 index 00000000000..f2f26b03951 --- /dev/null +++ b/examples/openvino/aot/aot_openvino_compiler.py @@ -0,0 +1,74 @@ +import executorch +import timm +import torch +import torchvision.models as torchvision_models +from transformers import AutoModel +from executorch.exir.backend.backend_details import CompileSpec +from executorch.backends.openvino.preprocess import OpenvinoBackend +from executorch.backends.openvino.partitioner import OpenvinoPartitioner +from executorch.exir import EdgeProgramManager, to_edge +from torch.export import export, ExportedProgram +from torch.export.exported_program import ExportedProgram +import argparse + +# Function to load a model based on the selected suite +def load_model(suite: str, model_name: str): + if suite == "timm": + return timm.create_model(model_name, pretrained=True) + elif suite == "torchvision": + if not hasattr(torchvision_models, model_name): + raise ValueError(f"Model {model_name} not found in torchvision.") + return getattr(torchvision_models, model_name)(pretrained=True) + elif suite == "huggingface": + return AutoModel.from_pretrained(model_name) + else: + raise ValueError(f"Unsupported model suite: {suite}") + +def main(suite: str, model_name: str, input_shape, device: str): + # Ensure input_shape is a tuple + if isinstance(input_shape, list): + input_shape = tuple(input_shape) + elif not isinstance(input_shape, tuple): + raise ValueError("Input shape must be a list or tuple.") + + # Load the selected model + model = load_model(suite, model_name) + model = model.eval() + + # Provide input + example_args = (torch.randn(*input_shape), ) + + # Export to aten dialect using torch.export + aten_dialect: ExportedProgram = export(model, example_args) + + # Convert to edge dialect + edge_program: EdgeProgramManager = to_edge(aten_dialect) + to_be_lowered_module = edge_program.exported_program() + + # Lower the module to the backend with a custom partitioner + compile_spec = [CompileSpec("device", device.encode())] + lowered_module = edge_program.to_backend(OpenvinoPartitioner(compile_spec)) + + # Apply backend-specific passes + exec_prog = lowered_module.to_executorch(config=executorch.exir.ExecutorchBackendConfig()) + + # Serialize and save it to a file + with open(f"{model_name}.pte", "wb") as file: + exec_prog.write_to_file(file) + print(f"Model exported and saved as {model_name}.pte on {device}.") + +if __name__ == "__main__": + # Argument parser for dynamic inputs + parser = argparse.ArgumentParser(description="Export models with executorch.") + parser.add_argument("--suite", type=str, required=True, choices=["timm", "torchvision", "huggingface"], + help="Select the model suite (timm, torchvision, huggingface).") + parser.add_argument("--model", type=str, required=True, help="Model name to be loaded.") + parser.add_argument("--input_shape", type=eval, required=True, + help="Input shape for the model as a list or tuple (e.g., [1, 3, 224, 224] or (1, 3, 224, 224)).") + parser.add_argument("--device", type=str, default="CPU", + help="Target device for compiling the model (e.g., CPU, GPU). Default is CPU.") + + args = parser.parse_args() + + # Run the main function with parsed arguments + main(args.suite, args.model, args.input_shape, args.device) From f5bd8324f1a738d579f0d119b236b4a23e55703f Mon Sep 17 00:00:00 2001 From: Yamini Nimmagadda Date: Tue, 10 Dec 2024 12:25:41 -0800 Subject: [PATCH 008/188] Temporary build instructions --- examples/openvino/ReadMe.md | 64 +++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) create mode 100644 examples/openvino/ReadMe.md diff --git a/examples/openvino/ReadMe.md b/examples/openvino/ReadMe.md new file mode 100644 index 00000000000..13196f5151c --- /dev/null +++ b/examples/openvino/ReadMe.md @@ -0,0 +1,64 @@ +# TODO: Delete and reformat later + +## Build Executorch + +```bash +git clone -b openvino_backend https://github.com/ynimmaga/executorch +cd executorch +git submodule update --init –recursive +./install_requirements.sh +(If not successful) pkill -f buck && ./install_requirements.sh +``` + +## Build OpenVINO and source environment variables: + +```bash +git clone -b executorch_ov_backend https://github.com/ynimmaga/openvino +cd openvino +git submodule update --init --recursive +mkdir build +cd build +cmake .. -DCMAKE_BUILD_TYPE=Release -DENABLE_PYTHON=ON -DENABLE_WHEEL=ON +make -j +cd wheels +pip install + +cd ../.. +cmake --install build --prefix +cd +source setupvars.sh +``` + +## Build gflags: + +```bash +cd third-party/gflags +mkdir build +cd build +cmake .. +make -j12 +``` + +## Build OpenVINO example: + +```bash +cd ../../../examples/openvino +./openvino_build.sh +``` + +### AOT step: +```bash +cd aot +python aot_openvino_compiler.py --suite torchvision --model resnet50 --input_shape "(1, 3, 256, 256)" --device CPU +``` + +### Update the model.pte in executorch example and rebuild +```bash +cd +cd examples/openvino/executor_runner +Update the path of model.pte in openvino_executor_runner.cpp at https://github.com/ynimmaga/executorch/blob/openvino_backend/examples/openvino/executor_runner/openvino_executor_runner.cpp#L20 + +Rebuild the example using “./openvino_build.sh” +The executable is in /cmake-openvino-out/examples/openvino +./openvino_executor_runner +``` From f3dc62cc874e62bc0b98fcf081aa678a30b19c20 Mon Sep 17 00:00:00 2001 From: ynimmaga Date: Tue, 10 Dec 2024 18:12:32 -0800 Subject: [PATCH 009/188] Added standalone build script for openvino backend --- backends/openvino/scripts/build.sh | 40 +++++++++++++++++++++++++++++ examples/openvino/openvino_build.sh | 2 +- 2 files changed, 41 insertions(+), 1 deletion(-) create mode 100755 backends/openvino/scripts/build.sh diff --git a/backends/openvino/scripts/build.sh b/backends/openvino/scripts/build.sh new file mode 100755 index 00000000000..0c07a5bb729 --- /dev/null +++ b/backends/openvino/scripts/build.sh @@ -0,0 +1,40 @@ +#!/bin/bash + +# Exit immediately if a command exits with a non-zero status. +set -e + +# Define the directory where CMakeLists.txt is located +EXECUTORCH_ROOT=$(realpath "$(dirname "$0")/../../..") +echo EXECUTORCH_ROOT=${EXECUTORCH_ROOT} + +main() { + # Set build directory + local build_dir="cmake-openvino-out" + + # Create and enter the build directory + cd "$EXECUTORCH_ROOT" + rm -rf "${build_dir}" + + # Configure the project with CMake + # Note: Add any additional configuration options you need here + cmake -DCMAKE_INSTALL_PREFIX="${build_dir}" \ + -DEXECUTORCH_BUILD_OPENVINO=ON \ + -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \ + -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \ + -DEXECUTORCH_BUILD_EXTENSION_RUNNER_UTIL=ON \ + -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \ + -B"${build_dir}" + + + # Build the project + cmake --build cmake-openvino-out --target install --config Release -j5 + + # Switch back to the original directory + cd - > /dev/null + + # Print a success message + echo "Build successfully completed." + +} + +main "$@" diff --git a/examples/openvino/openvino_build.sh b/examples/openvino/openvino_build.sh index f53679cc910..0d2703e5646 100755 --- a/examples/openvino/openvino_build.sh +++ b/examples/openvino/openvino_build.sh @@ -35,7 +35,7 @@ main() { local cmake_prefix_path="${PWD}/${build_dir}/lib/cmake/ExecuTorch;${PWD}/${build_dir}/third-party/gflags;" rm -rf "${example_build_dir}" - ## MTK original + ## OpenVINO original cmake -DCMAKE_PREFIX_PATH="${cmake_prefix_path}" \ -DCMAKE_FIND_ROOT_PATH_MODE_PACKAGE=BOTH \ -B"${example_build_dir}" \ From ca852de7332b220686e8f5dcd36ea680286d0c56 Mon Sep 17 00:00:00 2001 From: Cavus Mustafa Date: Fri, 13 Dec 2024 15:09:15 -0800 Subject: [PATCH 010/188] Handling multiple inputs/outputs with zero-copy --- backends/openvino/runtime/OpenvinoBackend.cpp | 38 ++++++++++++------- 1 file changed, 24 insertions(+), 14 deletions(-) diff --git a/backends/openvino/runtime/OpenvinoBackend.cpp b/backends/openvino/runtime/OpenvinoBackend.cpp index baf2915e59d..0c2a6c290a1 100644 --- a/backends/openvino/runtime/OpenvinoBackend.cpp +++ b/backends/openvino/runtime/OpenvinoBackend.cpp @@ -83,25 +83,35 @@ class OpenvinoBackend final : public ::executorch::runtime::BackendInterface { auto infer_request = execution_handle->infer_request; - // Assume first argument is the input tensor - auto input_tensor = args[0]->toTensor(); - ov::Shape input_shape(input_tensor.sizes().begin(), input_tensor.sizes().end()); + size_t num_inputs = infer_request->get_compiled_model().inputs().size(); + size_t num_outputs = infer_request->get_compiled_model().outputs().size(); - // Convert input tensor to OpenVINO tensor - ov::element::Type ov_type = convert_to_openvino_type(input_tensor.scalar_type()); - ov::Tensor ov_input_tensor(ov_type, input_shape, input_tensor.mutable_data_ptr()); + // Set inputs + for (size_t i = 0; i < num_inputs; i++) { + auto input_tensor = args[i]->toTensor(); + ov::Shape input_shape(input_tensor.sizes().begin(), input_tensor.sizes().end()); - //infer_request->set_tensor("input", ov_input_tensor); - infer_request->set_input_tensor(0, ov_input_tensor); + // Convert input tensor to OpenVINO tensor + ov::element::Type ov_type = convert_to_openvino_type(input_tensor.scalar_type()); + ov::Tensor ov_input_tensor(ov_type, input_shape, input_tensor.mutable_data_ptr()); - // Execute the inference - infer_request->infer(); + infer_request->set_input_tensor(i, ov_input_tensor); + } + + // Set outputs + for (size_t i = 0; i < num_outputs; i++) { + auto output_tensor = args[num_inputs+i]->toTensor(); + ov::Shape output_shape(output_tensor.sizes().begin(), output_tensor.sizes().end()); - // Retrieve and copy output - auto output_tensor = args[1]->toTensor(); // Assume second argument is the output - ov::Tensor ov_output_tensor = infer_request->get_output_tensor(0); //get_tensor("output"); + // Convert input tensor to OpenVINO tensor + ov::element::Type ov_type = convert_to_openvino_type(output_tensor.scalar_type()); + ov::Tensor ov_output_tensor(ov_type, output_shape, output_tensor.mutable_data_ptr()); - std::memcpy(output_tensor.mutable_data_ptr(), ov_output_tensor.data(), ov_output_tensor.get_byte_size()); + infer_request->set_output_tensor(i, ov_output_tensor); + } + + // Execute the inference + infer_request->infer(); return Error::Ok; } From 0703814f658383f0856d94fe21783ca543da10d8 Mon Sep 17 00:00:00 2001 From: ynimmaga Date: Mon, 20 Jan 2025 18:38:22 -0800 Subject: [PATCH 011/188] Added fallback with portable kernels --- examples/openvino/CMakeLists.txt | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/examples/openvino/CMakeLists.txt b/examples/openvino/CMakeLists.txt index 0a37f777e2d..d2a2cf89b77 100644 --- a/examples/openvino/CMakeLists.txt +++ b/examples/openvino/CMakeLists.txt @@ -46,6 +46,27 @@ list(PREPEND _openvino_executor_runner__srcs ${CMAKE_CURRENT_LIST_DIR}/../openvino/executor_runner/openvino_executor_runner.cpp ) +find_package(executorch CONFIG REQUIRED) +target_include_directories(executorch INTERFACE ${_common_include_directories}) +target_compile_options(executorch INTERFACE ${_common_compile_options}) + +# portable_ops_lib +gen_selected_ops(LIB_NAME "openvino_portable_ops_lib" INCLUDE_ALL_OPS "ON") +generate_bindings_for_kernels( + LIB_NAME "openvino_portable_ops_lib" FUNCTIONS_YAML + ${EXECUTORCH_ROOT}/kernels/portable/functions.yaml +) +gen_operators_lib( + LIB_NAME "openvino_portable_ops_lib" KERNEL_LIBS portable_kernels DEPS executorch +) +target_compile_options( + openvino_portable_ops_lib INTERFACE -DET_EVENT_TRACER_ENABLED +) +target_include_directories( + openvino_portable_ops_lib PUBLIC ${_common_include_directories} +) + + # build executor runner add_executable(openvino_executor_runner ${_openvino_executor_runner__srcs}) target_include_directories( @@ -69,7 +90,7 @@ set(LIBRARIES_TO_LINK ${LIBRARY_DIR}/libopenvino_backend.so link_directories(${LIBRARY_DIR}) # Link all libraries at once -target_link_libraries(openvino_executor_runner PRIVATE ${LIBRARIES_TO_LINK}) +target_link_libraries(openvino_executor_runner PRIVATE ${LIBRARIES_TO_LINK} openvino_portable_ops_lib) set_target_properties( openvino_executor_runner PROPERTIES LINK_FLAGS "-Wl,-rpath='$ORIGIN'" From 0a769bed35cf22bfab835e2100786433c0cde1c6 Mon Sep 17 00:00:00 2001 From: ynimmaga Date: Tue, 21 Jan 2025 14:01:25 -0800 Subject: [PATCH 012/188] Added openvino_functions.yaml containing only unsupported ops --- backends/openvino/openvino_functions.yaml | 258 ++++++++++++++++++++++ examples/openvino/CMakeLists.txt | 2 +- 2 files changed, 259 insertions(+), 1 deletion(-) create mode 100644 backends/openvino/openvino_functions.yaml diff --git a/backends/openvino/openvino_functions.yaml b/backends/openvino/openvino_functions.yaml new file mode 100644 index 00000000000..5e83141a1bb --- /dev/null +++ b/backends/openvino/openvino_functions.yaml @@ -0,0 +1,258 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This yaml file contains operators that are also defined by the ATen library. +# For lean mode: +# - Codegen'd target `executorch_generated_lib` will be reading all the information +# from this file, including operator schema and kernel metadata. +# - Selective build target `codegen:executorch_defined_ops` now is selecting all the +# operators in this file, by dumping all the op names into `selected_operators.yaml`. +# +# For ATen mode: +# - Codegen'd tagret `executorch_generated_lib` will read information from +# `native_functions.yaml` in ATen library, for both operator schema and kernel metadat. +# - Selective build will also look for operator names listed in this file and use them to +# filter the entries in `native_functions.yaml`. Kernel metadata defined in this yaml +# will be ignored. +# +# See the README.md file in this directory for a description of the syntax used +# by this file. + +- op: _cdist_forward.out + kernels: + - arg_meta: null + kernel_name: torch::executor::_cdist_forward_out + +- op: _pdist_forward.out + kernels: + - arg_meta: null + kernel_name: torch::executor::_pdist_forward_out + +- op: alias_copy.out + kernels: + - arg_meta: null + kernel_name: torch::executor::alias_copy_out + +- op: any.all_out + kernels: + - arg_meta: null + kernel_name: torch::executor::any_all_out + +- op: any.dims_out + kernels: + - arg_meta: null + kernel_name: torch::executor::any_dims_out + +- op: atan.out + kernels: + - arg_meta: null + kernel_name: torch::executor::atan_out + +- op: atan2.out + kernels: + - arg_meta: null + kernel_name: torch::executor::atan2_out + +- op: bitwise_or.Scalar_out + kernels: + - arg_meta: null + kernel_name: torch::executor::bitwise_or_Scalar_out + +- op: bitwise_xor.Scalar_out + kernels: + - arg_meta: null + kernel_name: torch::executor::bitwise_xor_Scalar_out + +- op: clamp.Tensor_out + kernels: + - arg_meta: null + kernel_name: torch::executor::clamp_tensor_out + +- op: convolution_backward.out + kernels: + - arg_meta: null + kernel_name: torch::executor::convolution_backward_out + +- op: detach_copy.out + kernels: + - arg_meta: null + kernel_name: torch::executor::detach_copy_out + +- op: diagonal_copy.out + kernels: + - arg_meta: null + kernel_name: torch::executor::diagonal_copy_out + +- op: expm1.out + kernels: + - arg_meta: null + kernel_name: torch::executor::expm1_out + +- op: floor_divide.out + kernels: + - arg_meta: null + kernel_name: torch::executor::floor_divide_out + +- op: index_put.out + kernels: + - arg_meta: null + kernel_name: torch::executor::index_put_out + +- op: logical_and.out + kernels: + - arg_meta: null + kernel_name: torch::executor::logical_and_out + +- op: logical_or.out + kernels: + - arg_meta: null + kernel_name: torch::executor::logical_or_out + +- op: logical_xor.out + kernels: + - arg_meta: null + kernel_name: torch::executor::logical_xor_out + +- op: logit.out + kernels: + - arg_meta: null + kernel_name: torch::executor::logit_out + +- op: masked_scatter.out + kernels: + - arg_meta: null + kernel_name: torch::executor::masked_scatter_out + +- op: masked_select.out + kernels: + - arg_meta: null + kernel_name: torch::executor::masked_select_out + +- op: narrow_copy.out + kernels: + - arg_meta: null + kernel_name: torch::executor::narrow_copy_out + +- op: nonzero.out + kernels: + - arg_meta: null + kernel_name: torch::executor::nonzero_out + +- op: pixel_shuffle.out + kernels: + - arg_meta: null + kernel_name: torch::executor::pixel_shuffle_out + +- op: pixel_unshuffle.out + kernels: + - arg_meta: null + kernel_name: torch::executor::pixel_unshuffle_out + +- op: prod.int_out + kernels: + - arg_meta: null + kernel_name: torch::executor::prod_int_out + +- op: prod.out + kernels: + - arg_meta: null + kernel_name: torch::executor::prod_out + +- op: remainder.Tensor_out + kernels: + - arg_meta: null + kernel_name: torch::executor::remainder_Tensor_out + +- op: remainder.Scalar_out + kernels: + - arg_meta: null + kernel_name: torch::executor::remainder_Scalar_out + +- op: repeat_interleave.Tensor_out + kernels: + - arg_meta: null + kernel_name: torch::executor::repeat_interleave_Tensor_out + +- op: reflection_pad1d.out + kernels: + - arg_meta: null + kernel_name: torch::executor::reflection_pad1d_out + +- op: reflection_pad3d.out + kernels: + - arg_meta: null + kernel_name: torch::executor::reflection_pad3d_out + +- op: replication_pad1d.out + kernels: + - arg_meta: null + kernel_name: torch::executor::replication_pad1d_out + +- op: replication_pad2d.out + kernels: + - arg_meta: null + kernel_name: torch::executor::replication_pad2d_out + +- op: replication_pad3d.out + kernels: + - arg_meta: null + kernel_name: torch::executor::replication_pad3d_out + +- op: round.out + kernels: + - arg_meta: null + kernel_name: torch::executor::round_out + +- op: scatter_add.out + kernels: + - arg_meta: null + kernel_name: torch::executor::scatter_add_out + +- op: split_copy.Tensor_out + kernels: + - arg_meta: null + kernel_name: torch::executor::split_copy_Tensor_out + +- op: squeeze_copy.dim_out + kernels: + - arg_meta: null + kernel_name: torch::executor::squeeze_copy_dim_out + +- op: sub.Scalar_out + kernels: + - arg_meta: null + kernel_name: torch::executor::sub_scalar_out + +- op: t_copy.out + kernels: + - arg_meta: null + kernel_name: torch::executor::t_copy_out + +- op: transpose_copy.int_out + kernels: + - arg_meta: null + kernel_name: torch::executor::transpose_copy_int_out + +- op: trunc.out + kernels: + - arg_meta: null + kernel_name: torch::executor::trunc_out + +- op: unbind_copy.int_out + kernels: + - arg_meta: null + kernel_name: torch::executor::unbind_copy_int_out + +- op: upsample_bilinear2d.vec_out + kernels: + - arg_meta: null + kernel_name: torch::executor::upsample_bilinear2d_vec_out + +- func: dim_order_ops::_empty_dim_order.out(int[] size, *, int[]? dim_order=None, Tensor(a!) out) -> Tensor(a!) + kernels: + - arg_meta: null + kernel_name: torch::executor::_empty_dim_order_out + +- func: dim_order_ops::_to_dim_order_copy.out(Tensor self, *, bool non_blocking=False, int[]? dim_order=None, Tensor(a!) out) -> Tensor(a!) + kernels: + - arg_meta: null + kernel_name: torch::executor::_to_dim_order_copy_out diff --git a/examples/openvino/CMakeLists.txt b/examples/openvino/CMakeLists.txt index d2a2cf89b77..346f8f3c852 100644 --- a/examples/openvino/CMakeLists.txt +++ b/examples/openvino/CMakeLists.txt @@ -54,7 +54,7 @@ target_compile_options(executorch INTERFACE ${_common_compile_options}) gen_selected_ops(LIB_NAME "openvino_portable_ops_lib" INCLUDE_ALL_OPS "ON") generate_bindings_for_kernels( LIB_NAME "openvino_portable_ops_lib" FUNCTIONS_YAML - ${EXECUTORCH_ROOT}/kernels/portable/functions.yaml + ${EXECUTORCH_ROOT}/backends/openvino/openvino_functions.yaml ) gen_operators_lib( LIB_NAME "openvino_portable_ops_lib" KERNEL_LIBS portable_kernels DEPS executorch From 0e6707d71bbc1e7a0f7bcdd91fb09bc6289df063 Mon Sep 17 00:00:00 2001 From: ynimmaga Date: Tue, 21 Jan 2025 14:28:03 -0800 Subject: [PATCH 013/188] Updated the unsupported ops for fallback --- backends/openvino/openvino_functions.yaml | 20 ++------------------ examples/openvino/CMakeLists.txt | 6 ------ 2 files changed, 2 insertions(+), 24 deletions(-) diff --git a/backends/openvino/openvino_functions.yaml b/backends/openvino/openvino_functions.yaml index 5e83141a1bb..296d57d7320 100644 --- a/backends/openvino/openvino_functions.yaml +++ b/backends/openvino/openvino_functions.yaml @@ -1,21 +1,5 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# -# This yaml file contains operators that are also defined by the ATen library. -# For lean mode: -# - Codegen'd target `executorch_generated_lib` will be reading all the information -# from this file, including operator schema and kernel metadata. -# - Selective build target `codegen:executorch_defined_ops` now is selecting all the -# operators in this file, by dumping all the op names into `selected_operators.yaml`. -# -# For ATen mode: -# - Codegen'd tagret `executorch_generated_lib` will read information from -# `native_functions.yaml` in ATen library, for both operator schema and kernel metadat. -# - Selective build will also look for operator names listed in this file and use them to -# filter the entries in `native_functions.yaml`. Kernel metadata defined in this yaml -# will be ignored. -# -# See the README.md file in this directory for a description of the syntax used -# by this file. +# This yaml file contains operators that are unsupported with openvino backend and +# will use portable kernels for fall back - op: _cdist_forward.out kernels: diff --git a/examples/openvino/CMakeLists.txt b/examples/openvino/CMakeLists.txt index 346f8f3c852..961b34efd41 100644 --- a/examples/openvino/CMakeLists.txt +++ b/examples/openvino/CMakeLists.txt @@ -1,9 +1,3 @@ -# Copyright (c) Qualcomm Innovation Center, Inc. -# All rights reserved -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - set(CMAKE_CXX_STANDARD 17) cmake_minimum_required(VERSION 3.19) From 68a1cd44b3d98d0c018d3133043912f8b3bdfdca Mon Sep 17 00:00:00 2001 From: ynimmaga Date: Tue, 21 Jan 2025 15:00:22 -0800 Subject: [PATCH 014/188] Removed redundant example for openvino backend --- .../executor_runner/ov_executor_runner.cpp | 120 ------------------ 1 file changed, 120 deletions(-) delete mode 100644 examples/openvino/executor_runner/ov_executor_runner.cpp diff --git a/examples/openvino/executor_runner/ov_executor_runner.cpp b/examples/openvino/executor_runner/ov_executor_runner.cpp deleted file mode 100644 index d0be48fdcc9..00000000000 --- a/examples/openvino/executor_runner/ov_executor_runner.cpp +++ /dev/null @@ -1,120 +0,0 @@ -#include -#include -#include -#include -#include - -#include - -#include -#include -#include -#include -#include -#include -#include - -/* -using executorch::extension::FileDataLoader; -using executorch::extension::prepare_input_tensors; -using executorch::runtime::Error; -using executorch::runtime::EValue; -using executorch::runtime::HierarchicalAllocator; -using executorch::runtime::MemoryAllocator; -using executorch::runtime::MemoryManager; -using executorch::runtime::Method; -using executorch::runtime::MethodMeta; -using executorch::runtime::Program; -using executorch::runtime::Result; -using executorch::runtime::Span; -*/ -using executorch::aten::Tensor; -using executorch::aten::TensorImpl; -using executorch::extension::FileDataLoader; -//using executorch::extension::MallocMemoryAllocator; -using executorch::extension::prepare_input_tensors; -using executorch::runtime::Error; -using executorch::runtime::EValue; -using executorch::runtime::HierarchicalAllocator; -using executorch::runtime::MemoryAllocator; -using executorch::runtime::MemoryManager; -using executorch::runtime::Method; -using executorch::runtime::MethodMeta; -using executorch::runtime::Program; -using executorch::runtime::Result; -using executorch::runtime::Span; - -int main() { -Result loader = - FileDataLoader::from("/home/icx-6338/ynimmaga/delegate.pte"); -assert(loader.ok()); - -Result program = Program::load(&loader.get()); -assert(program.ok()); - -// Method names map back to Python nn.Module method names. Most users will only -// have the singular method "forward". -const char* method_name = "forward"; - -// MethodMeta is a lightweight structure that lets us gather metadata -// information about a specific method. In this case we are looking to get the -// required size of the memory planned buffers for the method "forward". -Result method_meta = program->method_meta(method_name); -assert(method_meta.ok()); - -std::vector> planned_buffers; // Owns the Memory -std::vector> planned_arenas; // Passed to the allocator - -size_t num_memory_planned_buffers = method_meta->num_memory_planned_buffers(); - -// It is possible to have multiple layers in our memory hierarchy; for example, -// SRAM and DRAM. -for (size_t id = 0; id < num_memory_planned_buffers; ++id) { - // .get() will always succeed because id < num_memory_planned_buffers. - size_t buffer_size = - static_cast(method_meta->memory_planned_buffer_size(id).get()); - planned_buffers.push_back(std::make_unique(buffer_size)); - planned_arenas.push_back({planned_buffers.back().get(), buffer_size}); -} - -HierarchicalAllocator planned_memory( - {planned_arenas.data(), planned_arenas.size()}); - -// Version of MemoryAllocator that uses malloc to handle allocations rather then -// a fixed buffer. -//MallocMemoryAllocator method_allocator; -MemoryAllocator method_allocator{ - MemoryAllocator(sizeof(method_allocator_pool), method_allocator_pool)}; - -// Assemble all of the allocators into the MemoryManager that the Executor will -// use. -MemoryManager memory_manager(&method_allocator, &planned_memory); - -Result method = program->load_method(method_name); -assert(method.ok()); - -// Create our input tensor. -float data[1 * 3 * 256 * 256]; -Tensor::SizesType sizes[] = {1, 3, 256, 256}; -Tensor::DimOrderType dim_order = {0, 1, 2, 3}; -TensorImpl impl( - ScalarType::Float, // dtype - 4, // number of dimensions - sizes, - data, - dim_order); -Tensor t(&impl); - -// Implicitly casts t to EValue -Error set_input_error = method->set_input(t, 0); -assert(set_input_error == Error::Ok); - -Error execute_error = method->execute(); -assert(execute_error == Error::Ok); - -EValue output = method->get_output(0); -assert(output.isTensor()); - -return 0; - -} From 4d1b4eb634f382871076fc4a57f535b06c04ad4b Mon Sep 17 00:00:00 2001 From: ynimmaga Date: Tue, 21 Jan 2025 21:49:13 -0800 Subject: [PATCH 015/188] Reconfigured openvino backend functions --- backends/openvino/runtime/OpenvinoBackend.cpp | 134 ++++++++++++------ backends/openvino/runtime/OpenvinoBackend.hpp | 61 ++++++++ 2 files changed, 150 insertions(+), 45 deletions(-) create mode 100644 backends/openvino/runtime/OpenvinoBackend.hpp diff --git a/backends/openvino/runtime/OpenvinoBackend.cpp b/backends/openvino/runtime/OpenvinoBackend.cpp index baf2915e59d..00be8d9a323 100644 --- a/backends/openvino/runtime/OpenvinoBackend.cpp +++ b/backends/openvino/runtime/OpenvinoBackend.cpp @@ -10,6 +10,8 @@ #include #include +#include "OpenvinoBackend.hpp" + using namespace std; using executorch::aten::ScalarType; using executorch::runtime::ArrayRef; @@ -28,30 +30,44 @@ namespace executorch { namespace backends { namespace openvino { -typedef struct { - std::shared_ptr compiled_model; - std::shared_ptr infer_request; -} ExecutionHandle; +OpenvinoBackend::OpenvinoBackend() { + if (!is_available()) { + //ET_LOG(Error, "OpenVINO runtime is not available. Initialization failed."); + throw std::runtime_error("OpenVINO runtime not available"); + } -class OpenvinoBackend final : public ::executorch::runtime::BackendInterface { - public: - OpenvinoBackend() {std::cout << "In OV Backend constructor" << std::endl;} + //ET_LOG(Info, "OpenVINO runtime successfully verified and initialized."); +} + +bool OpenvinoBackend::is_available() const { + try { + // Create an OpenVINO Core object to verify runtime availability + ov::Core core; + + // Check if at least one device is available + auto devices = core.get_available_devices(); + if (!devices.empty()) { + return true; // OpenVINO is available + } + } catch (const std::exception& e) { + // Log the exception if OpenVINO runtime is not available + ET_LOG(Error, "OpenVINO is not available: %s", e.what()); + } catch (...) { + // Handle any unexpected errors + ET_LOG(Error, "OpenVINO availability check failed due to an unknown error."); + } - ~OpenvinoBackend() = default; + return false; // OpenVINO is not available +} - virtual bool is_available() const override { - // Check if OpenVINO runtime is available - return true; - } +Result OpenvinoBackend::init( + BackendInitContext& context, + FreeableBuffer* processed, + ArrayRef compile_specs) const { - Result init( - BackendInitContext& context, - FreeableBuffer* processed, - ArrayRef compile_specs) const override { ET_LOG(Info, "OpenvinoBackend::init %p", processed->data()); ov::Core core; - const char* data_ptr = static_cast(processed->data()); size_t data_size = processed->size(); @@ -61,6 +77,7 @@ class OpenvinoBackend final : public ::executorch::runtime::BackendInterface { // Wrap the data in a stream std::istringstream compiled_stream(data_string); + // Import the model auto compiled_model = core.import_model(compiled_stream, "CPU"); // Allocate an infer request @@ -73,46 +90,74 @@ class OpenvinoBackend final : public ::executorch::runtime::BackendInterface { handle->infer_request = infer_request; return handle; - } +} + +Error OpenvinoBackend::execute( + BackendExecutionContext& context, + DelegateHandle* input_handle, + EValue** args) const { - Error execute( - BackendExecutionContext& context, - DelegateHandle* input_handle, - EValue** args) const override { ExecutionHandle* execution_handle = (ExecutionHandle*)input_handle; auto infer_request = execution_handle->infer_request; - // Assume first argument is the input tensor - auto input_tensor = args[0]->toTensor(); - ov::Shape input_shape(input_tensor.sizes().begin(), input_tensor.sizes().end()); + size_t num_inputs = infer_request->get_compiled_model().inputs().size(); + size_t num_outputs = infer_request->get_compiled_model().outputs().size(); + + // Set inputs + for (size_t i = 0; i < num_inputs; i++) { + auto input_tensor = args[i]->toTensor(); + ov::Shape input_shape(input_tensor.sizes().begin(), input_tensor.sizes().end()); + + // Convert input tensor to OpenVINO tensor + ov::element::Type ov_type = convert_to_openvino_type(input_tensor.scalar_type()); + ov::Tensor ov_input_tensor(ov_type, input_shape, input_tensor.mutable_data_ptr()); + + infer_request->set_input_tensor(i, ov_input_tensor); + } + + // Set outputs + for (size_t i = 0; i < num_outputs; i++) { + auto output_tensor = args[num_inputs+i]->toTensor(); + ov::Shape output_shape(output_tensor.sizes().begin(), output_tensor.sizes().end()); - // Convert input tensor to OpenVINO tensor - ov::element::Type ov_type = convert_to_openvino_type(input_tensor.scalar_type()); - ov::Tensor ov_input_tensor(ov_type, input_shape, input_tensor.mutable_data_ptr()); + // Convert input tensor to OpenVINO tensor + ov::element::Type ov_type = convert_to_openvino_type(output_tensor.scalar_type()); + ov::Tensor ov_output_tensor(ov_type, output_shape, output_tensor.mutable_data_ptr()); - //infer_request->set_tensor("input", ov_input_tensor); - infer_request->set_input_tensor(0, ov_input_tensor); + infer_request->set_output_tensor(i, ov_output_tensor); + } // Execute the inference infer_request->infer(); - // Retrieve and copy output - auto output_tensor = args[1]->toTensor(); // Assume second argument is the output - ov::Tensor ov_output_tensor = infer_request->get_output_tensor(0); //get_tensor("output"); + return Error::Ok; +} - std::memcpy(output_tensor.mutable_data_ptr(), ov_output_tensor.data(), ov_output_tensor.get_byte_size()); +void OpenvinoBackend::destroy(DelegateHandle* handle) const { + if (!handle) { + ET_LOG(Info, "Attempted to destroy a null handle."); + return; + } - return Error::Ok; - } + // Cast the handle to the appropriate type + ExecutionHandle* execution_handle = static_cast(handle); + + // Clean up resources + if (execution_handle->infer_request) { + execution_handle->infer_request.reset(); // Release the infer request + ET_LOG(Info, "Infer request successfully destroyed."); + } + + if (execution_handle->compiled_model) { + execution_handle->compiled_model.reset(); // Release the compiled model + ET_LOG(Info, "Compiled model successfully destroyed."); + } - void destroy(DelegateHandle* handle) const override { - return; - } + ET_LOG(Info, "Delegate handle destroyed successfully."); +} - private: - ov::element::Type convert_to_openvino_type(ScalarType scalar_type) const { - // Convert ExecuteTorch scalar types to OpenVINO element types +ov::element::Type OpenvinoBackend::convert_to_openvino_type(ScalarType scalar_type) const { switch (scalar_type) { case ScalarType::Float: return ov::element::f32; @@ -123,8 +168,7 @@ class OpenvinoBackend final : public ::executorch::runtime::BackendInterface { default: throw std::runtime_error("Unsupported scalar type"); } - } -}; +} } // namespace openvino } // namespace backends @@ -133,7 +177,7 @@ class OpenvinoBackend final : public ::executorch::runtime::BackendInterface { namespace { auto backend = executorch::backends::openvino::OpenvinoBackend(); executorch::runtime::Backend backend_id{"OpenvinoBackend", &backend}; -static auto registered = executorch::runtime::register_backend(backend_id); +static auto registered = executorch::runtime::register_backend(backend_id); } // namespace diff --git a/backends/openvino/runtime/OpenvinoBackend.hpp b/backends/openvino/runtime/OpenvinoBackend.hpp new file mode 100644 index 00000000000..a116aa5349c --- /dev/null +++ b/backends/openvino/runtime/OpenvinoBackend.hpp @@ -0,0 +1,61 @@ +#ifndef OPENVINO_BACKEND_HPP +#define OPENVINO_BACKEND_HPP + +#include +#include +#include + +#include +#include +#include +#include +#include + +using namespace std; +using executorch::aten::ScalarType; +using executorch::runtime::ArrayRef; +using executorch::runtime::Backend; +using executorch::runtime::BackendExecutionContext; +using executorch::runtime::BackendInitContext; +using executorch::runtime::CompileSpec; +using executorch::runtime::DelegateHandle; +using executorch::runtime::Error; +using executorch::runtime::EValue; +using executorch::runtime::FreeableBuffer; +using executorch::runtime::MemoryAllocator; +using executorch::runtime::Result; + +namespace executorch { +namespace backends { +namespace openvino { + +typedef struct { + std::shared_ptr compiled_model; + std::shared_ptr infer_request; +} ExecutionHandle; + +class OpenvinoBackend final : public ::executorch::runtime::BackendInterface { + public: + OpenvinoBackend(); + ~OpenvinoBackend() = default; + + virtual bool is_available() const override; + Result init( + BackendInitContext& context, + FreeableBuffer* processed, + ArrayRef compile_specs) const override; + Error execute( + BackendExecutionContext& context, + DelegateHandle* input_handle, + EValue** args) const override; + void destroy(DelegateHandle* handle) const override; + + private: + ov::element::Type convert_to_openvino_type(ScalarType scalar_type) const; +}; + +} // namespace openvino +} // namespace backends +} // namespace executorch + +#endif // OPENVINO_BACKEND_HPP From 5c55a5647c4950a87302e3afc1b69175dd8180f5 Mon Sep 17 00:00:00 2001 From: ynimmaga Date: Tue, 21 Jan 2025 22:08:31 -0800 Subject: [PATCH 016/188] Updated openvino backend cmake file --- backends/openvino/CMakeLists.txt | 69 +++++++++++++++++++------------- 1 file changed, 41 insertions(+), 28 deletions(-) diff --git a/backends/openvino/CMakeLists.txt b/backends/openvino/CMakeLists.txt index 08fccf2faae..129ab0435ac 100644 --- a/backends/openvino/CMakeLists.txt +++ b/backends/openvino/CMakeLists.txt @@ -1,58 +1,71 @@ +# Set C++ standard set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED ON) +# Ensure compile_commands are generated set(CMAKE_EXPORT_COMPILE_COMMANDS ON) -set(_common_include_directories ${CMAKE_CURRENT_SOURCE_DIR}/../../..) +# Define common include directories +set(COMMON_INCLUDE_DIRS ${CMAKE_CURRENT_SOURCE_DIR}/../../..) -include_directories(BEFORE ${_common_include_directories}) +# Include common directories before others to ensure proper order +include_directories(BEFORE ${COMMON_INCLUDE_DIRS}) -# Source root directory for executorch. +# Set up EXECUTORCH_ROOT if not already set if(NOT EXECUTORCH_ROOT) set(EXECUTORCH_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../..) endif() +# Include utility cmake script from the executorch repository include(${EXECUTORCH_ROOT}/build/Utils.cmake) -set(_common_include_directories ${EXECUTORCH_ROOT}/..) +# Update common include directory for ExecuteTorch +set(COMMON_INCLUDE_DIRS ${EXECUTORCH_ROOT}/..) -# Set openvino directory from environment +# Set OpenVINO directory and include directories from environment variable set(OPENVINO_DIR "$ENV{INTEL_OPENVINO_DIR}") -set(OPENVINO_INCLUDE_DIRS ${OPENVINO_DIR}/deployment_tools/inference_engine/include ${OPENVINO_DIR}/runtime/include) +if(NOT OPENVINO_DIR) + message(FATAL_ERROR "INTEL_OPENVINO_DIR environment variable is not set.") +endif() + +set(OPENVINO_INCLUDE_DIRS + ${OPENVINO_DIR}/deployment_tools/inference_engine/include + ${OPENVINO_DIR}/runtime/include +) -# Add the OpenVINO backend library +# Define OpenVINO library path +set(OPENVINO_LIB_PATH ${OPENVINO_DIR}/runtime/lib/intel64) + +# Define OpenVINO libraries +set(OPENVINO_LIB ${OPENVINO_LIB_PATH}/libopenvino.so) + +# Add the OpenVINO backend library as a shared library add_library(openvino_backend SHARED) + +# Enable exceptions and RTTI for OpenVINO backend target_compile_options(openvino_backend PRIVATE "-frtti" "-fexceptions") # Include directories for ExecuteTorch and OpenVINO target_include_directories( - openvino_backend PUBLIC ${_common_include_directories} + openvino_backend PUBLIC + ${COMMON_INCLUDE_DIRS} + ${OPENVINO_INCLUDE_DIRS} ) -target_include_directories( - openvino_backend PUBLIC ${OPENVINO_INCLUDE_DIRS} +# Link OpenVINO libraries and executorch core to the backend +target_link_libraries(openvino_backend PRIVATE + ${OPENVINO_LIB} + executorch_core ) -set(OPENVINO_LIB_PATH ${OPENVINO_DIR}/runtime/lib/intel64) -set(OPENVINO_LIBS - ${OPENVINO_LIB_PATH}/libopenvino.so.2025.0.0 - ${OPENVINO_LIB_PATH}/libopenvino_ir_frontend.so.2025.0.0 - ${OPENVINO_LIB_PATH}/libopenvino_c.so - ${OPENVINO_LIB_PATH}/libopenvino_intel_cpu_plugin.so - ${OPENVINO_LIB_PATH}/libopenvino_intel_gpu_plugin.so - ${OPENVINO_LIB_PATH}/libopenvino_auto_plugin.so +# Add source files to the OpenVINO backend library +target_sources(openvino_backend PRIVATE + ${CMAKE_CURRENT_LIST_DIR}/runtime/OpenvinoBackend.cpp ) -# Link the OpenVINO library to the backend -target_link_libraries(openvino_backend PRIVATE ${OPENVINO_LIBS} executorch_core) - -target_sources( - openvino_backend - PRIVATE ${CMAKE_CURRENT_LIST_DIR}/runtime/OpenvinoBackend.cpp -) +# Set additional link options for shared library +target_link_options(openvino_backend PRIVATE -Wl,-rpath=${OPENVINO_LIB_PATH}) -target_link_options_shared_lib(openvino_backend) +# Install the OpenVINO backend library to the lib directory install(TARGETS openvino_backend DESTINATION lib) - - From 29d840062a289df87101631f06e5e22cf7ab223b Mon Sep 17 00:00:00 2001 From: ynimmaga Date: Wed, 29 Jan 2025 17:03:43 -0800 Subject: [PATCH 017/188] Added arguments for model path and num iters to openvino example --- .../openvino_executor_runner.cpp | 55 +++++++++++++------ 1 file changed, 38 insertions(+), 17 deletions(-) diff --git a/examples/openvino/executor_runner/openvino_executor_runner.cpp b/examples/openvino/executor_runner/openvino_executor_runner.cpp index 86b975fe007..67bb35d9701 100644 --- a/examples/openvino/executor_runner/openvino_executor_runner.cpp +++ b/examples/openvino/executor_runner/openvino_executor_runner.cpp @@ -13,13 +13,18 @@ #include #include +// Define a fixed-size memory pool for the method allocator (4 MB) static uint8_t method_allocator_pool[4 * 1024U * 1024U]; // 4 MB +// Define command-line flags for model path and the number of iterations DEFINE_string( model_path, - "/home/icx-6338/ynimmaga/delegate.pte", //"model.pte", - "Model serialized in flatbuffer format."); -DEFINE_int32(iteration, 1, "Iterations of inference."); + "", + "Path to the model serialized in flatbuffer format (required)."); +DEFINE_int32( + num_iter, + 1, + "Number of inference iterations (default is 1)."); using executorch::extension::FileDataLoader; using executorch::extension::prepare_input_tensors; @@ -35,25 +40,34 @@ using executorch::runtime::Result; using executorch::runtime::Span; int main(int argc, char** argv) { + // Initialize the runtime environment executorch::runtime::runtime_init(); + // Parse command-line arguments and flags gflags::ParseCommandLineFlags(&argc, &argv, true); - if (argc != 1) { - std::string msg = "Extra commandline args:"; - for (int i = 1; i < argc; i++) { - msg += " " + std::string(argv[i]); - } - ET_LOG(Error, "%s", msg.c_str()); + + // Check if the model path is provided + if (FLAGS_model_path.empty()) { + std::cerr << "Error: --model_path is required." << std::endl; + std::cerr << "Usage: " << argv[0] + << " --model_path= --num_iter=" << std::endl; return 1; } + // Retrieve the model path and number of iterations const char* model_path = FLAGS_model_path.c_str(); + int num_iterations = FLAGS_num_iter; + std::cout << "Model path: " << model_path << std::endl; + std::cout << "Number of iterations: " << num_iterations << std::endl; + + // Load the model using FileDataLoader Result loader = FileDataLoader::from(model_path); ET_CHECK_MSG( loader.ok(), "FileDataLoader::from() failed: 0x%" PRIx32, static_cast(loader.error())); + // Load the program from the loaded model Result program = Program::load(&loader.get()); if (!program.ok()) { ET_LOG(Error, "Failed to parse model file %s", model_path); @@ -61,6 +75,7 @@ int main(int argc, char** argv) { } ET_LOG(Info, "Model file %s is loaded.", model_path); + // Retrieve the method name from the program (assumes the first method is used) const char* method_name = nullptr; { const auto method_name_result = program->get_method_name(0); @@ -69,6 +84,7 @@ int main(int argc, char** argv) { } ET_LOG(Info, "Using method %s", method_name); + // Retrieve metadata about the method Result method_meta = program->method_meta(method_name); ET_CHECK_MSG( method_meta.ok(), @@ -76,9 +92,11 @@ int main(int argc, char** argv) { method_name, static_cast(method_meta.error())); + // Set up a memory allocator for the method MemoryAllocator method_allocator{ MemoryAllocator(sizeof(method_allocator_pool), method_allocator_pool)}; + // Prepare planned buffers for memory planning std::vector> planned_buffers; std::vector> planned_spans; size_t num_memory_planned_buffers = method_meta->num_memory_planned_buffers(); @@ -92,8 +110,10 @@ int main(int argc, char** argv) { HierarchicalAllocator planned_memory( {planned_spans.data(), planned_spans.size()}); + // Set up a memory manager using the method allocator and planned memory MemoryManager memory_manager(&method_allocator, &planned_memory); + // Load the method into the program Result method = program->load_method(method_name, &memory_manager); ET_CHECK_MSG( method.ok(), @@ -102,6 +122,7 @@ int main(int argc, char** argv) { static_cast(method.error())); ET_LOG(Info, "Method loaded."); + // Prepare the input tensors for the method auto inputs = prepare_input_tensors(*method); ET_CHECK_MSG( inputs.ok(), @@ -109,9 +130,10 @@ int main(int argc, char** argv) { static_cast(inputs.error())); ET_LOG(Info, "Inputs prepared."); + // Measure execution time for inference auto before_exec = std::chrono::high_resolution_clock::now(); Error status = Error::Ok; - for (int i = 0; i < FLAGS_iteration; ++i) { + for (int i = 0; i < num_iterations; ++i) { status = method->execute(); } auto after_exec = std::chrono::high_resolution_clock::now(); @@ -119,12 +141,13 @@ int main(int argc, char** argv) { after_exec - before_exec) .count() / 1000.0; + // Log execution time and average time per iteration ET_LOG( Info, "%d inference took %f ms, avg %f ms", - FLAGS_iteration, + num_iterations, elapsed_time, - elapsed_time / static_cast(FLAGS_iteration)); + elapsed_time / static_cast(num_iterations)); ET_CHECK_MSG( status == Error::Ok, "Execution of method %s failed with status 0x%" PRIx32, @@ -132,14 +155,12 @@ int main(int argc, char** argv) { static_cast(status)); ET_LOG(Info, "Model executed successfully."); + // Retrieve and print the method outputs std::vector outputs(method->outputs_size()); - ET_LOG(Info, "%zu outputs: ", outputs.size()); + ET_LOG(Info, "%zu Number of outputs: ", outputs.size()); status = method->get_outputs(outputs.data(), outputs.size()); ET_CHECK(status == Error::Ok); - //std::cout << executorch::extension::evalue_edge_items(100); - //for (int i = 0; i < outputs.size(); ++i) { - // std::cout << "Output " << i << ": " << outputs[i] << std::endl; - //} return 0; } + From 5806788fcc5de97bd311720ed68d92de6c9c67b4 Mon Sep 17 00:00:00 2001 From: Cavus Mustafa Date: Tue, 28 Jan 2025 19:26:16 -0800 Subject: [PATCH 018/188] Initial unit tests for OpenVINO backend --- backends/openvino/tests/models.py | 1109 +++++++++++++++++ .../openvino/tests/test_openvino_delegate.py | 192 +++ 2 files changed, 1301 insertions(+) create mode 100644 backends/openvino/tests/models.py create mode 100644 backends/openvino/tests/test_openvino_delegate.py diff --git a/backends/openvino/tests/models.py b/backends/openvino/tests/models.py new file mode 100644 index 00000000000..dfdeeb98655 --- /dev/null +++ b/backends/openvino/tests/models.py @@ -0,0 +1,1109 @@ +import torch + +# module with related operator only +class Add(torch.nn.Module): + def __init__(self): + super().__init__() + + def forward(self, x, y): + return torch.add(x, y) + + +class AddConstantFloat(torch.nn.Module): + def __init__(self): + super().__init__() + + def forward(self, x): + return 10.0 + x + + +class AddConstantLong(torch.nn.Module): + def __init__(self): + super().__init__() + + def forward(self, x): + return 10 + x + + +class Arange(torch.nn.Module): + def __init__(self, x): + super().__init__() + self.x = x + + def forward(self, y): + return torch.arange(self.x, dtype=torch.float32) + y + + +class AvgPoolModule(torch.nn.Module): + def __init__(self): + super().__init__() + self.avgPool = torch.nn.AvgPool2d( + kernel_size=(2, 2), + padding=(1, 1), + stride=(1, 1), + count_include_pad=False, + ) + + def forward(self, x): + return self.avgPool(x) + + +class BatchNorm(torch.nn.Module): + def __init__(self, n_features): + super().__init__() + self.native_batchnorm = torch.nn.BatchNorm2d(n_features) + self.eval() + + def forward(self, x): + return self.native_batchnorm(x) + + +class Bmm(torch.nn.Module): + def __init__(self): + super().__init__() + + def forward(self, x, y): + return torch.matmul(x, y) + + +class Cast(torch.nn.Module): + def __init__(self): + super().__init__() + + def forward(self, x): + return x.type(torch.IntTensor) + + +class Cat2(torch.nn.Module): + def __init__(self): + super().__init__() + + def forward(self, x, y): + return torch.cat((x, y), axis=2) + + +class Cat3(torch.nn.Module): + def __init__(self): + super().__init__() + + def forward(self, x, y): + return torch.concat((y, y, x), axis=2) + + +class Cat4(torch.nn.Module): + def __init__(self): + super().__init__() + + def forward(self, x, y): + return torch.cat((y, y, x, x), axis=2) + + +class Ceil(torch.nn.Module): + def __init__(self): + super().__init__() + + def forward(self, x): + return torch.ceil(x) + + +class Chunk(torch.nn.Module): + def __init__(self): + super().__init__() + + def forward(self, x): + return torch.chunk(x, chunks=2, dim=-1) + + +class ChunkAdd(torch.nn.Module): + def __init__(self): + super().__init__() + + def forward(self, x): + c1, c2 = torch.chunk(x, chunks=2, dim=-1) + return torch.add(c1, c2) + + +class Clamp(torch.nn.Module): + def __init__(self): + super().__init__() + + def forward(self, x): + return torch.clamp(x, max=0) + + +class CompositeDelegateModule(torch.nn.Module): + def __init__( + self, + compiler_specs, + partitioner_type, + capture_method, + lowered_method, + quantize_method=None, + ) -> None: + super().__init__() + self.modules = [ + Conv2dSequential(), + Conv2dSequential(), + Add(), + Relu(), + ] + self.sample_inputs = [ + (torch.randn([1, 1, 3, 3]),), + (torch.randn([1, 1, 3, 3]),), + (torch.randn([1, 2, 3, 3]), torch.randn([1, 2, 3, 3])), + (torch.randn([1, 2, 3, 3]),), + ] + self.lowered_modules = [] + for module, sample_input in zip(self.modules, self.sample_inputs): + partitioner = partitioner_type(compiler_specs) + if quantize_method: + module = quantize_method(module, sample_input) + edge_prog = capture_method(module, sample_input) + edge_prog.exported_program = lowered_method( + edge_prog.exported_program, partitioner + ) + self.lowered_modules.append( + edge_prog.exported_program.graph_module._modules.get("lowered_module_0") + ) + + def forward(self, x, y): + x1 = self.lowered_modules[0](x) + x2 = self.lowered_modules[1](y) + x3 = self.lowered_modules[2](x1[0], x2[0]) + x4 = self.lowered_modules[3](x3[0]) + return x4[0] + + def get_random_input(self): + return (torch.randn([1, 1, 3, 3]), torch.randn([1, 1, 3, 3])) + + def get_reference_module(self): + class CompositeReferenceModule(torch.nn.Module): + def __init__(self, modules): + super().__init__() + self.modules = modules + + def forward(self, x, y): + x1 = self.modules[0](x) + x2 = self.modules[1](y) + x3 = self.modules[2](x1, x2) + x4 = self.modules[3](x3) + return x4 + + return CompositeReferenceModule(self.modules) + + +class ContextBinaryExample(torch.nn.Module): + def forward(self, x, y): + x = torch.nn.functional.relu(x) + y = torch.nn.functional.relu(y) + return x, y + + def example_inputs(self): + return { + "x": torch.randn((1, 3, 3, 3)), + "y": torch.randn((2, 1, 5, 5)), + } + + +class Conv1dSequential(torch.nn.Module): + def __init__(self, bias=True): + super().__init__() + self.first = torch.nn.Conv1d( + in_channels=1, + out_channels=3, + kernel_size=(3), + padding=1, + bias=bias, + ) + + self.second = torch.nn.Conv1d( + in_channels=3, + out_channels=2, + kernel_size=(3), + padding=1, + bias=bias, + ) + + def forward(self, x): + return self.second(self.first(x)) + + +# small models +class Conv1dReluLogSoftmax(torch.nn.Module): + def __init__(self): + super().__init__() + self.conv = torch.nn.Conv1d( + in_channels=2, out_channels=2, kernel_size=1, stride=1, padding=1 + ) + self.logsoftmax = torch.nn.LogSoftmax(dim=1) + + def forward(self, x): + x = torch.nn.functional.relu(self.conv(x)) + x = self.logsoftmax(x) + return x + + +class Conv2dAvgPool2d(torch.nn.Module): + def __init__(self): + super().__init__() + self.conv = torch.nn.Conv2d( + 3, 16, 7, bias=True, stride=2, padding=3, dilation=1 + ) + self.pool = torch.nn.AvgPool2d(3, stride=2, padding=1) + + def forward(self, x): + return self.pool(self.conv(x)) + + +class Conv2dBnHardtanhMean(torch.nn.Module): + def __init__(self): + super(Conv2dBnHardtanhMean, self).__init__() + groups = 1 + stride = [2, 2] + padding = [1, 1] + dilation = [1, 1] + in_channels = 1 + out_channels = 1 + + self.conv = torch.nn.Conv2d( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=(3, 3), + stride=stride, + padding=padding, + groups=groups, + dilation=dilation, + bias=True, + ) + self.conv.weight = torch.nn.Parameter(torch.randn(self.conv.weight.size())) + self.native_batchnorm = torch.nn.BatchNorm2d(out_channels) + self.hardtanh = torch.nn.Hardtanh(min_val=0, max_val=6) + self.eval() + + def forward(self, x): + x1 = self.conv(x) + x2 = self.native_batchnorm(x1) + x3 = self.hardtanh(x2) + x4 = torch.mean(x3, (1), keepdim=True) + return x4 + + +class Conv2dCat(torch.nn.Module): + def __init__(self): + super().__init__() + self.conv1 = torch.nn.Conv2d(3, 3, 3) + self.conv2 = torch.nn.Conv2d(3, 3, 3) + + def forward(self, x, y): + x = self.conv1(x) + y = self.conv2(y) + z = torch.cat([x, y], dim=1) + return z + + +class Conv2dMaxPool2d(torch.nn.Module): + def __init__(self): + super().__init__() + self.conv = torch.nn.Conv2d( + in_channels=2, + out_channels=2, + kernel_size=(1, 1), + padding=1, + bias=True, + ) + self.pool = torch.nn.MaxPool2d(1, 1) + + def forward(self, x): + return self.pool(self.conv(x)) + + +class Conv2dSequential(torch.nn.Module): + def __init__(self, bias=True, channel_last=False): + super().__init__() + self.first = torch.nn.Conv2d( + in_channels=1, + out_channels=3, + kernel_size=(3, 3), + padding=1, + bias=bias, + ) + self.second = torch.nn.Conv2d( + in_channels=3, + out_channels=2, + kernel_size=(3, 3), + padding=1, + bias=bias, + ) + self.channel_last = channel_last + + def forward(self, x): + x = x.to(memory_format=torch.channels_last) if self.channel_last else x + return self.second(self.first(x)) + + +class Conv2dSingle(torch.nn.Module): + def __init__(self, bias=True): + super().__init__() + self.conv = torch.nn.Conv2d( + in_channels=1, + out_channels=3, + kernel_size=(3, 3), + padding=1, + bias=bias, + ) + + def forward(self, x): + return self.conv(x) + + +class ConvTranspose2dSingle(torch.nn.Module): + def __init__(self, bias=True): + super().__init__() + self.conv_transpose = torch.nn.ConvTranspose2d( + in_channels=1, + out_channels=3, + kernel_size=3, + stride=2, + padding=1, + bias=bias, + ) + + def forward(self, x): + return self.conv_transpose(x) + + +class Conv2dDownUpSample(torch.nn.Module): + def __init__(self, bias=True): + super().__init__() + self.conv = torch.nn.Conv2d( + in_channels=16, + out_channels=16, + kernel_size=3, + stride=2, + padding=1, + bias=bias, + ) + self.conv_transpose = torch.nn.ConvTranspose2d( + in_channels=16, + out_channels=16, + kernel_size=3, + stride=2, + padding=1, + bias=bias, + ) + + def forward(self, x): + return self.conv_transpose(self.conv(x)) + + +class Conv2dSumReduceDim(torch.nn.Module): + def __init__(self): + super().__init__() + self.first = torch.nn.Conv2d( + in_channels=1, + out_channels=3, + kernel_size=(3, 3), + padding=1, + bias=True, + ) + + def forward(self, x): + return torch.sum(self.first(x), dim=(2, 3), keepdim=False) + + +class Conv2dTopK(torch.nn.Module): + def __init__(self): + super().__init__() + self.conv = torch.nn.Conv2d(3, 16, 3) + + def forward(self, x): + x = self.conv(x) + topk_values, topk_indices = torch.topk(x, 5, dim=1) + return topk_values + + +class Cos(torch.nn.Module): + def __init__(self): + super().__init__() + + def forward(self, x): + return torch.cos(x) + + +class Div(torch.nn.Module): + def __init__(self): + super().__init__() + + def forward(self, x, y): + return torch.divide(x, y) + + +class DivConstantFloat(torch.nn.Module): + def __init__(self): + super().__init__() + + def forward(self, x): + return x / 10.0 + + +class DivConstantLong(torch.nn.Module): + def __init__(self): + super().__init__() + + def forward(self, x): + return x / 10 + + +class DrawGraphModel(torch.nn.Module): + def __init__(self): + super().__init__() + self.relu1 = torch.nn.ReLU() + self.relu2 = torch.nn.ReLU() + kernel_sz = 32 + self.conv1 = torch.nn.Conv2d(kernel_sz, kernel_sz, 3, padding=1, bias=True) + self.conv2 = torch.nn.Conv2d(kernel_sz, kernel_sz, 3, padding=1, bias=True) + + def forward(self, x): + x1 = self.conv1(x) + x2 = self.conv2(x) + y1 = self.relu1(x1) + y2 = self.relu1(x2) + return y1 + y2 + + +class EinsumBilinear(torch.nn.Module): + def __init__(self): + super().__init__() + + def forward(self, bn, anm, bm): + return torch.einsum("bn,anm,bm->ba", bn, anm, bm) + + +class EinsumOuterProduct(torch.nn.Module): + def __init__(self): + super().__init__() + + def forward(self, i, j): + return torch.einsum("i,j->ij", i, j) + + +class EinsumOuterProductRelu(torch.nn.Module): + def __init__(self): + super().__init__() + + def forward(self, i, j): + return torch.relu(torch.einsum("i,j->ij", i, j)) + + +class Embedding(torch.nn.Module): + def __init__(self): + super().__init__() + self.embedding = torch.nn.Embedding(10, 3) + + def forward(self, x): + return self.embedding(x) + + +class ExpandCopy(torch.nn.Module): + def __init__(self): + super().__init__() + + def forward(self, x): + return x.expand(3, 4) + + +class Gelu(torch.nn.Module): + def __init__(self): + super().__init__() + self.gelu = torch.nn.GELU() + + def forward(self, x): + return self.gelu(x) + + +class GroupNorm(torch.nn.Module): + def __init__(self, bias=True): + super().__init__() + self.conv = torch.nn.Conv2d( + 32, + 256, + kernel_size=3, + stride=1, + padding=1, + bias=bias, + ) + self.norm = torch.nn.GroupNorm(32, 256) + + def forward(self, x): + y = self.conv(x) + return y, self.norm(y) + + +class HardSigmoid(torch.nn.Module): + def __init__(self): + super().__init__() + self.hardsigmoid = torch.nn.Hardsigmoid() + + def forward(self, x): + return self.hardsigmoid(x) + + +class HardSwish(torch.nn.Module): + def __init__(self): + super().__init__() + self.hardswish = torch.nn.Hardswish() + + def forward(self, x): + return self.hardswish(x) + + +class HardTanh(torch.nn.Module): + def __init__(self): + super().__init__() + self.hardtanh = torch.nn.Hardtanh(min_val=0, max_val=6) + + def forward(self, x): + return self.hardtanh(x) + + +class Index(torch.nn.Module): + def __init__(self): + super().__init__() + self.idx0 = torch.tensor([[0, 1], [2, 3], [4, 5]], dtype=torch.int32) + self.idx1 = torch.tensor([[1, 2], [3, 4], [5, 6]], dtype=torch.int32) + + def forward(self, x): + return x[self.idx0] + x[self.idx1] + + +class IndexPut(torch.nn.Module): + def __init__(self): + super().__init__() + self.register_buffer( + "k_cache", + torch.zeros((1, 1024, 12, 64), dtype=torch.float32), + ) + + def forward(self, input_pos, k_val): + k_out = torch.ops.aten.index_put_(self.k_cache, [None, input_pos], k_val) + return k_out + + +class LayerNorm(torch.nn.Module): + def __init__(self): + super().__init__() + self.layer_norm = torch.nn.LayerNorm([768], eps=1e-6) + self.linear = torch.nn.Linear(768, 196) + + def forward(self, x): + return self.linear(self.layer_norm(x)) + + +class LeakyReLUDefault(torch.nn.Module): + def __init__(self): + super().__init__() + self.leaky_relu = torch.nn.LeakyReLU() + + def forward(self, x): + return self.leaky_relu(x) + + +class LeakyReLUCustom(torch.nn.Module): + def __init__(self, coeff): + super().__init__() + self.leaky_relu = torch.nn.LeakyReLU(coeff) + + def forward(self, x): + return self.leaky_relu(x) + + +class Linear(torch.nn.Module): + def __init__(self, use_bias: bool = True): + super().__init__() + self.linear = torch.nn.Linear(4, 5, use_bias).eval() + + def forward(self, x): + return self.linear(x) + + +class LogSoftmax(torch.nn.Module): + def __init__(self): + super().__init__() + + def forward(self, x): + return torch.nn.functional.log_softmax(x, dim=-1) + + +class MaxPool2d(torch.nn.Module): + def __init__(self): + super().__init__() + self.max_pool2d = torch.nn.MaxPool2d( + kernel_size=3, + stride=1, + padding=1, + dilation=1, + ceil_mode=True, + ) + + def forward(self, x): + return self.max_pool2d(x) + + +class MeanWKeppDim(torch.nn.Module): + def __init__(self): + super().__init__() + + def forward(self, x): + return torch.mean(x, (-1, -2), keepdim=True) + + +class MeanWOKeppDim(torch.nn.Module): + def __init__(self): + super().__init__() + + def forward(self, x): + return torch.mean(x, (-1, -2)) + + +class Mul(torch.nn.Module): + def __init__(self): + super().__init__() + + def forward(self, x, y): + return torch.mul(x, y) + + +class MulConstantFloat(torch.nn.Module): + def __init__(self): + super().__init__() + + def forward(self, x): + return 10.0 * x + + +class MulConstantLong(torch.nn.Module): + def __init__(self): + super().__init__() + + def forward(self, x): + return 10 * x + + +class MulScalar(torch.nn.Module): + def __init__(self): + super().__init__() + self._scalar = 3.14 + + def forward(self, x): + out1 = torch.ops.aten.mul.Scalar(x, self._scalar) + return out1 + + +class MultiheadAttention(torch.nn.Module): + def __init__(self): + super().__init__() + self.multi_head_attention = torch.nn.MultiheadAttention( + 96, 12, dropout=0.0, batch_first=True + ) + + def forward(self, x): + attn_output, _ = self.multi_head_attention(x, x, x, need_weights=False) + return attn_output + + +class Pad(torch.nn.Module): + def __init__(self): + super().__init__() + + def forward(self, x): + return torch.nn.functional.pad( + x[:, 1:], [0, 0, 0, 1, 0, 0], value=0.0, mode="constant" + ) + + +class PixelShuffle(torch.nn.Module): + def __init__(self, scale): + super().__init__() + self.pixel_shuffle = torch.nn.PixelShuffle(scale) + + def forward(self, x): + return self.pixel_shuffle(x) + + +class PixelUnshuffle(torch.nn.Module): + def __init__(self, scale): + super().__init__() + self.pixel_unshuffle = torch.nn.PixelUnshuffle(scale) + + def forward(self, x): + return self.pixel_unshuffle(x) + + +class PixelUnshuffleMathEquivalent(torch.nn.Module): + def __init__(self, scale): + super().__init__() + self.scale = scale + + def forward(self, x): + b, c, hh, hw = x.size() + out_channel = c * (self.scale**2) + h = hh // self.scale + w = hw // self.scale + x_view = x.view(b, c, h, self.scale, w, self.scale) + return x_view.permute(0, 1, 3, 5, 2, 4).reshape(b, out_channel, h, w) + + +class PowTensorScalar(torch.nn.Module): + def __init__(self): + super().__init__() + + def forward(self, x): + return torch.pow(x, 2) + + +class PReLUDefault(torch.nn.Module): + def __init__(self): + super().__init__() + self.prelu = torch.nn.PReLU() + + def forward(self, x): + return self.prelu(x) + + +class PReLUPerChannel(torch.nn.Module): + def __init__(self, channels): + super().__init__() + self.prelu = torch.nn.PReLU(channels) + + def forward(self, x): + return self.prelu(x) + + +class Relu(torch.nn.Module): + def __init__(self): + super().__init__() + self.relu = torch.nn.ReLU() + + def forward(self, x): + return self.relu(x) + + +class Reshape(torch.nn.Module): + def __init__(self): + super().__init__() + + def forward(self, x): + return x.reshape(1, 12) + + +class ResidualBlockModule(torch.nn.Module): + def __init__(self): + super(ResidualBlockModule, self).__init__() + groups = 1 + stride = [1, 1] + padding = [1, 1] + dilation = [1, 1] + in_channels = 32 + out_channels = 32 + + self.conv = torch.nn.Conv2d( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=(3, 3), + stride=stride, + padding=padding, + groups=groups, + dilation=dilation, + bias=True, + ) + self.native_batchnorm = torch.nn.BatchNorm2d(out_channels) + self.hardtanh = torch.nn.Hardtanh(min_val=0, max_val=6.0) + self.eval() + + def forward(self, x): + x1 = self.conv(x) + x2 = self.native_batchnorm(x1) + x3 = self.conv(x2) + x4 = self.native_batchnorm(x3) + x5 = self.hardtanh(x4) + x6 = torch.add(x5, x2) + return x6 + + +class ResizeBilinear2D(torch.nn.Module): + def __init__(self): + super().__init__() + + def forward(self, x): + output_shape = [dim * 2 for dim in x.shape[-2:]] + return torch.nn.functional.interpolate( + x, + size=list(torch.randn(output_shape).shape), + mode="bilinear", + align_corners=False, + ) + + +class ResizeNearest2D(torch.nn.Module): + def __init__(self): + super().__init__() + + def forward(self, x): + output_shape = [dim * 2 for dim in x.shape[-2:]] + return torch.nn.functional.interpolate( + x, + size=list(torch.randn(output_shape).shape), + mode="nearest", + ) + + +class RmsNorm(torch.nn.Module): + def __init__(self): + super().__init__() + self.eps = 1e-5 + self.rms = torch.nn.RMSNorm([4], 1e-5) + + def forward(self, x): + return self.rms(x) + + +class Rsqrt(torch.nn.Module): + def __init__(self): + super().__init__() + + def forward(self, x): + return torch.rsqrt(x) + + +class ScaledDotProductAttention(torch.nn.Module): + def __init__(self): + super().__init__() + + def forward(self, query_layer, key_layer, value_layer, attn_mask): + attn_output = torch.nn.functional.scaled_dot_product_attention( + query_layer, key_layer, value_layer, attn_mask + ) + return attn_output + + +class SelectCopy(torch.nn.Module): + def __init__(self): + super().__init__() + self.conv = torch.nn.Conv2d( + in_channels=3, + out_channels=2, + kernel_size=(3, 3), + padding=1, + bias=True, + ) + + def forward(self, x): + return self.conv(x)[0, 1, 1:2] + + +class Sigmoid(torch.nn.Module): + def __init__(self): + super().__init__() + + def forward(self, x): + return torch.sigmoid(x) + + +class Sin(torch.nn.Module): + def __init__(self): + super().__init__() + + def forward(self, x): + return torch.sin(x) + + +class SimpleModel(torch.nn.Module): + def __init__(self): + super().__init__() + kernel_sz = 32 + self.conv1 = torch.nn.Conv2d(kernel_sz, kernel_sz, 3, padding=1, bias=True) + self.conv2 = torch.nn.Conv2d(kernel_sz, kernel_sz, 3, padding=1, bias=True) + self.conv3 = torch.nn.Conv2d(kernel_sz, kernel_sz, 3, padding=1, bias=False) + self.conv4 = torch.nn.Conv2d(kernel_sz, kernel_sz, 3, padding=1, bias=False) + self.hardtanh = torch.nn.Hardtanh(min_val=0, max_val=6) + self.relu = torch.nn.ReLU() + self.batch_norm = torch.nn.BatchNorm2d(kernel_sz) + self.add = torch.add + self.mean = torch.mean + self.reshape = torch.reshape + self.linear = torch.nn.Linear(4, 10) + self.permute = torch.permute + self.eval() + + def forward(self, x, y): + x1 = self.conv1(x) + x2 = self.batch_norm(x1) + x3 = self.relu(x2) + x4 = self.conv2(x3) + x5 = self.relu(x4) + y1 = self.conv3(y) + y2 = self.batch_norm(y1) + y3 = self.relu(y2) + y4 = self.conv4(y3) + y5 = self.relu(y4) + z = self.add(x5, y5) + z1 = self.permute(z, (0, 3, 2, 1)) + z2 = torch.mean(z1, [1, 2], True) + z3 = self.reshape(z2, (8, -1)) + z4 = self.linear(z3) + z5 = self.hardtanh(z4) + return z5 + + +class SliceCopy(torch.nn.Module): + def __init__(self): + super().__init__() + self.position_ids = torch.randn([1, 512]) + + def forward(self, x, y): + seq_length = y.size()[1] + return x[:, :seq_length] + self.position_ids[:, :seq_length] + + +class SliceCopyWithStep(torch.nn.Module): + def __init__(self): + super().__init__() + self.position_ids = torch.randn([1, 512]) + self.step = 2 + + def forward(self, x, y): + seq_length = y.size()[1] + return ( + x[:, : seq_length : self.step] + + self.position_ids[:, : seq_length : self.step] + ) + + +class Softmax(torch.nn.Module): + def __init__(self): + super().__init__() + + def forward(self, x): + return torch.nn.functional.softmax(x, dim=-1) + + +class Sqrt(torch.nn.Module): + def __init__(self): + super().__init__() + + def forward(self, x): + return torch.sqrt(x) + + +class SqrtConstant(torch.nn.Module): + def __init__(self): + super().__init__() + + def forward(self, x): + return x / torch.sqrt(torch.tensor([64.0])) + + +class Squeeze(torch.nn.Module): + def __init__(self): + super().__init__() + + def forward(self, x): + return x.squeeze() + + +class Stack(torch.nn.Module): + def __init__(self): + super().__init__() + + def forward(self, x, y): + return torch.stack((x, y)) + + +class Sub(torch.nn.Module): + def __init__(self): + super().__init__() + + def forward(self, x, y): + return torch.sub(x, y) + + +class SubConstantFloat(torch.nn.Module): + def __init__(self): + super().__init__() + + def forward(self, x): + return 10.0 - x + + +class SubConstantLong(torch.nn.Module): + def __init__(self): + super().__init__() + + def forward(self, x): + return 10 - x + + +class SumIntList(torch.nn.Module): + def __init__(self): + super().__init__() + + def forward(self, x): + return torch.sum(x, dim=(2, 3), keepdim=True) + + +class Tanh(torch.nn.Module): + def __init__(self): + super().__init__() + + def forward(self, x): + return torch.tanh(x) + + +class TopKandIndex(torch.nn.Module): + def __init__(self): + super().__init__() + self.idx_source = torch.rand(10, 3) + + def forward(self, x): + a, b = torch.topk(x, 3) + return a + self.idx_source[b] + + +class Unbind(torch.nn.Module): + def __init__(self): + super().__init__() + + def forward(self, x): + return torch.unbind(x) + + +class Unsqueeze(torch.nn.Module): + def __init__(self): + super().__init__() + + def forward(self, x): + return x.unsqueeze(0) + + +class View(torch.nn.Module): + def __init__(self): + super().__init__() + self.first_size = 2 + self.second_size = 256 + + def forward(self, x, y): + new_shape = x.size()[:-1] + (self.first_size, self.second_size) + return x.view(new_shape) + + +class ViewPermuteMatMul(torch.nn.Module): + def __init__(self): + super().__init__() + self.first_size = 2 + self.second_size = 256 + + def forward(self, x, y): + new_shape = x.size()[:-1] + (self.first_size, self.second_size) + x = x.view(new_shape) + x = x.permute(0, 2, 1, 3) + return torch.matmul(x, y.transpose(-1, -2)) diff --git a/backends/openvino/tests/test_openvino_delegate.py b/backends/openvino/tests/test_openvino_delegate.py new file mode 100644 index 00000000000..5525b6ca5f3 --- /dev/null +++ b/backends/openvino/tests/test_openvino_delegate.py @@ -0,0 +1,192 @@ +import io +import json +import subprocess +import sys +import tempfile +import unittest +from multiprocessing.connection import Listener +from pathlib import Path + +import numpy as np + +import torch +import executorch +from executorch.backends.openvino.partitioner import OpenvinoPartitioner +from executorch.exir.backend.backend_details import CompileSpec +from torch.export import export, ExportedProgram +from executorch.exir import EdgeProgramManager, to_edge +from executorch.backends.openvino.preprocess import OpenvinoBackend +from executorch.exir.program import ExecutorchProgram, ExecutorchProgramManager +from executorch.backends.openvino.tests.models import * + +import os +import random + +from collections import defaultdict +from typing import List + +import argparse + +class TestOpenVINO(unittest.TestCase): + device = "CPU" + build_folder = None + + def execute_layer_test( + self, + module: torch.nn.Module, + sample_inputs: tuple[torch.Tensor], + expected_partitions: int = 1, + assert_output_equal: bool = True, + ): + + module = module.eval() + aten_dialect: ExportedProgram = export(module, sample_inputs) + + edge_program: EdgeProgramManager = to_edge(aten_dialect) + to_be_lowered_module = edge_program.exported_program() + + compile_spec = [CompileSpec("device", self.device.encode())] + lowered_module = edge_program.to_backend(OpenvinoPartitioner(compile_spec)) + + exec_prog = lowered_module.to_executorch(config=executorch.exir.ExecutorchBackendConfig()) + + self.assertEqual( + len(exec_prog.executorch_program.execution_plan[0].delegates), + expected_partitions, + ) + for i in range(expected_partitions): + self.assertEqual( + exec_prog.executorch_program.execution_plan[0].delegates[i].id, + OpenvinoBackend.__name__, + ) + + if (assert_output_equal): + with tempfile.TemporaryDirectory() as tmp_dir: + input_list = "" + for idx, _ in enumerate(sample_inputs): + input_name = f"input_0_{idx}.raw" + input_list += input_name + " " + input_list = input_list.strip() + "\n" + + output_dir = f"{tmp_dir}/outputs" + + ref_output = module(*sample_inputs) + if isinstance(ref_output, torch.Tensor): + ref_output = [ref_output,] + + pte_fname = f"{tmp_dir}/openvino_executorch_test.pte" + with open(pte_fname, "wb") as file: + exec_prog.write_to_file(file) + + + self.generate_inputs(tmp_dir, "input_list.txt", [sample_inputs], input_list) + self.make_output_dir(output_dir) + + cmd = [ + # openvino_executor_runner + f"{self.build_folder}/examples/openvino/openvino_executor_runner", + pte_fname, + #"--input_list_path", + f"{tmp_dir}/input_list.txt", + #"--output_folder_path", + output_dir, + #"--method_index", + #str(method_index), + ] + + env = dict(os.environ) + proc = subprocess.run( + cmd, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + env=env, + cwd=tmp_dir, + ) + + stdout_str = proc.stdout.decode('utf-8') + self.assertIn("Model executed successfully.", stdout_str) + + output_dir = f"{tmp_dir}/outputs" + outputs = [] + + for i, f in enumerate(sorted(os.listdir(output_dir))): + filename = os.path.join(output_dir, f) + output = np.fromfile(filename, dtype=ref_output[i].numpy().dtype) + output = torch.from_numpy(output).reshape(ref_output[i].shape) + outputs.append(output) + + self.assertTrue(len(ref_output) == len(outputs)) + for i in range(len(ref_output)): + self.assertTrue( + torch.allclose( + outputs[i], ref_output[i], atol=self.atol, rtol=self.rtol + ), + msg=f"ref_output:\n{ref_output[i]}\n\ntest_output:\n{outputs[i]}", + ) + + def generate_inputs(self, dest_path: str, file_name: str, inputs=None, input_list=None): + input_list_file = None + input_files = [] + + # Prepare input list + if input_list is not None: + input_list_file = f"{dest_path}/{file_name}" + with open(input_list_file, "w") as f: + f.write(input_list) + f.flush() + + # Prepare input data + if inputs is not None: + for idx, data in enumerate(inputs): + for i, d in enumerate(data): + file_name = f"{dest_path}/input_{idx}_{i}.raw" + d.detach().numpy().tofile(file_name) + input_files.append(file_name) + + return input_list_file, input_files + + def make_output_dir(self, path: str): + if os.path.exists(path): + for f in os.listdir(path): + os.remove(os.path.join(path, f)) + os.removedirs(path) + os.makedirs(path) + + +class TestOpenVINOloatingPointOperator(TestOpenVINO): + atol = 1e-1 + rtol = 1e-1 + + def test_openvino_backend_arange(self): + module = Arange(5) # noqa: F405 + #sample_input = (torch.randn(5),) + sample_input = (torch.ones(5),) + self.execute_layer_test(module, sample_input) + + +def setup_environment(): + parser = argparse.ArgumentParser() + + parser.add_argument( + "-b", + "--build_folder", + help="path to cmake binary directory", + type=str, + required=True, + ) + parser.add_argument( + "-s", + "--device", + help="OpenVINO device to execute the model on", + type=str, + default="CPU", + ) + + args, ns_args = parser.parse_known_args(namespace=unittest) + TestOpenVINO.device = args.device + TestOpenVINO.build_folder = args.build_folder + return sys.argv[:1] + ns_args + +if __name__ == "__main__": + ut_args = setup_environment() + unittest.main(argv=ut_args) From 916ba64b6b9f1818283ffbb5662d13d47a396576 Mon Sep 17 00:00:00 2001 From: Cavus Mustafa Date: Wed, 29 Jan 2025 17:34:15 -0800 Subject: [PATCH 019/188] Unit test update and cleanup --- backends/openvino/tests/models.py | 1109 ----------------- .../tests/ops/base_openvino_op_test.py | 153 +++ backends/openvino/tests/ops/test_add.py | 19 + backends/openvino/tests/ops/test_arange.py | 21 + .../openvino/tests/test_openvino_delegate.py | 192 +-- 5 files changed, 221 insertions(+), 1273 deletions(-) delete mode 100644 backends/openvino/tests/models.py create mode 100644 backends/openvino/tests/ops/base_openvino_op_test.py create mode 100644 backends/openvino/tests/ops/test_add.py create mode 100644 backends/openvino/tests/ops/test_arange.py diff --git a/backends/openvino/tests/models.py b/backends/openvino/tests/models.py deleted file mode 100644 index dfdeeb98655..00000000000 --- a/backends/openvino/tests/models.py +++ /dev/null @@ -1,1109 +0,0 @@ -import torch - -# module with related operator only -class Add(torch.nn.Module): - def __init__(self): - super().__init__() - - def forward(self, x, y): - return torch.add(x, y) - - -class AddConstantFloat(torch.nn.Module): - def __init__(self): - super().__init__() - - def forward(self, x): - return 10.0 + x - - -class AddConstantLong(torch.nn.Module): - def __init__(self): - super().__init__() - - def forward(self, x): - return 10 + x - - -class Arange(torch.nn.Module): - def __init__(self, x): - super().__init__() - self.x = x - - def forward(self, y): - return torch.arange(self.x, dtype=torch.float32) + y - - -class AvgPoolModule(torch.nn.Module): - def __init__(self): - super().__init__() - self.avgPool = torch.nn.AvgPool2d( - kernel_size=(2, 2), - padding=(1, 1), - stride=(1, 1), - count_include_pad=False, - ) - - def forward(self, x): - return self.avgPool(x) - - -class BatchNorm(torch.nn.Module): - def __init__(self, n_features): - super().__init__() - self.native_batchnorm = torch.nn.BatchNorm2d(n_features) - self.eval() - - def forward(self, x): - return self.native_batchnorm(x) - - -class Bmm(torch.nn.Module): - def __init__(self): - super().__init__() - - def forward(self, x, y): - return torch.matmul(x, y) - - -class Cast(torch.nn.Module): - def __init__(self): - super().__init__() - - def forward(self, x): - return x.type(torch.IntTensor) - - -class Cat2(torch.nn.Module): - def __init__(self): - super().__init__() - - def forward(self, x, y): - return torch.cat((x, y), axis=2) - - -class Cat3(torch.nn.Module): - def __init__(self): - super().__init__() - - def forward(self, x, y): - return torch.concat((y, y, x), axis=2) - - -class Cat4(torch.nn.Module): - def __init__(self): - super().__init__() - - def forward(self, x, y): - return torch.cat((y, y, x, x), axis=2) - - -class Ceil(torch.nn.Module): - def __init__(self): - super().__init__() - - def forward(self, x): - return torch.ceil(x) - - -class Chunk(torch.nn.Module): - def __init__(self): - super().__init__() - - def forward(self, x): - return torch.chunk(x, chunks=2, dim=-1) - - -class ChunkAdd(torch.nn.Module): - def __init__(self): - super().__init__() - - def forward(self, x): - c1, c2 = torch.chunk(x, chunks=2, dim=-1) - return torch.add(c1, c2) - - -class Clamp(torch.nn.Module): - def __init__(self): - super().__init__() - - def forward(self, x): - return torch.clamp(x, max=0) - - -class CompositeDelegateModule(torch.nn.Module): - def __init__( - self, - compiler_specs, - partitioner_type, - capture_method, - lowered_method, - quantize_method=None, - ) -> None: - super().__init__() - self.modules = [ - Conv2dSequential(), - Conv2dSequential(), - Add(), - Relu(), - ] - self.sample_inputs = [ - (torch.randn([1, 1, 3, 3]),), - (torch.randn([1, 1, 3, 3]),), - (torch.randn([1, 2, 3, 3]), torch.randn([1, 2, 3, 3])), - (torch.randn([1, 2, 3, 3]),), - ] - self.lowered_modules = [] - for module, sample_input in zip(self.modules, self.sample_inputs): - partitioner = partitioner_type(compiler_specs) - if quantize_method: - module = quantize_method(module, sample_input) - edge_prog = capture_method(module, sample_input) - edge_prog.exported_program = lowered_method( - edge_prog.exported_program, partitioner - ) - self.lowered_modules.append( - edge_prog.exported_program.graph_module._modules.get("lowered_module_0") - ) - - def forward(self, x, y): - x1 = self.lowered_modules[0](x) - x2 = self.lowered_modules[1](y) - x3 = self.lowered_modules[2](x1[0], x2[0]) - x4 = self.lowered_modules[3](x3[0]) - return x4[0] - - def get_random_input(self): - return (torch.randn([1, 1, 3, 3]), torch.randn([1, 1, 3, 3])) - - def get_reference_module(self): - class CompositeReferenceModule(torch.nn.Module): - def __init__(self, modules): - super().__init__() - self.modules = modules - - def forward(self, x, y): - x1 = self.modules[0](x) - x2 = self.modules[1](y) - x3 = self.modules[2](x1, x2) - x4 = self.modules[3](x3) - return x4 - - return CompositeReferenceModule(self.modules) - - -class ContextBinaryExample(torch.nn.Module): - def forward(self, x, y): - x = torch.nn.functional.relu(x) - y = torch.nn.functional.relu(y) - return x, y - - def example_inputs(self): - return { - "x": torch.randn((1, 3, 3, 3)), - "y": torch.randn((2, 1, 5, 5)), - } - - -class Conv1dSequential(torch.nn.Module): - def __init__(self, bias=True): - super().__init__() - self.first = torch.nn.Conv1d( - in_channels=1, - out_channels=3, - kernel_size=(3), - padding=1, - bias=bias, - ) - - self.second = torch.nn.Conv1d( - in_channels=3, - out_channels=2, - kernel_size=(3), - padding=1, - bias=bias, - ) - - def forward(self, x): - return self.second(self.first(x)) - - -# small models -class Conv1dReluLogSoftmax(torch.nn.Module): - def __init__(self): - super().__init__() - self.conv = torch.nn.Conv1d( - in_channels=2, out_channels=2, kernel_size=1, stride=1, padding=1 - ) - self.logsoftmax = torch.nn.LogSoftmax(dim=1) - - def forward(self, x): - x = torch.nn.functional.relu(self.conv(x)) - x = self.logsoftmax(x) - return x - - -class Conv2dAvgPool2d(torch.nn.Module): - def __init__(self): - super().__init__() - self.conv = torch.nn.Conv2d( - 3, 16, 7, bias=True, stride=2, padding=3, dilation=1 - ) - self.pool = torch.nn.AvgPool2d(3, stride=2, padding=1) - - def forward(self, x): - return self.pool(self.conv(x)) - - -class Conv2dBnHardtanhMean(torch.nn.Module): - def __init__(self): - super(Conv2dBnHardtanhMean, self).__init__() - groups = 1 - stride = [2, 2] - padding = [1, 1] - dilation = [1, 1] - in_channels = 1 - out_channels = 1 - - self.conv = torch.nn.Conv2d( - in_channels=in_channels, - out_channels=out_channels, - kernel_size=(3, 3), - stride=stride, - padding=padding, - groups=groups, - dilation=dilation, - bias=True, - ) - self.conv.weight = torch.nn.Parameter(torch.randn(self.conv.weight.size())) - self.native_batchnorm = torch.nn.BatchNorm2d(out_channels) - self.hardtanh = torch.nn.Hardtanh(min_val=0, max_val=6) - self.eval() - - def forward(self, x): - x1 = self.conv(x) - x2 = self.native_batchnorm(x1) - x3 = self.hardtanh(x2) - x4 = torch.mean(x3, (1), keepdim=True) - return x4 - - -class Conv2dCat(torch.nn.Module): - def __init__(self): - super().__init__() - self.conv1 = torch.nn.Conv2d(3, 3, 3) - self.conv2 = torch.nn.Conv2d(3, 3, 3) - - def forward(self, x, y): - x = self.conv1(x) - y = self.conv2(y) - z = torch.cat([x, y], dim=1) - return z - - -class Conv2dMaxPool2d(torch.nn.Module): - def __init__(self): - super().__init__() - self.conv = torch.nn.Conv2d( - in_channels=2, - out_channels=2, - kernel_size=(1, 1), - padding=1, - bias=True, - ) - self.pool = torch.nn.MaxPool2d(1, 1) - - def forward(self, x): - return self.pool(self.conv(x)) - - -class Conv2dSequential(torch.nn.Module): - def __init__(self, bias=True, channel_last=False): - super().__init__() - self.first = torch.nn.Conv2d( - in_channels=1, - out_channels=3, - kernel_size=(3, 3), - padding=1, - bias=bias, - ) - self.second = torch.nn.Conv2d( - in_channels=3, - out_channels=2, - kernel_size=(3, 3), - padding=1, - bias=bias, - ) - self.channel_last = channel_last - - def forward(self, x): - x = x.to(memory_format=torch.channels_last) if self.channel_last else x - return self.second(self.first(x)) - - -class Conv2dSingle(torch.nn.Module): - def __init__(self, bias=True): - super().__init__() - self.conv = torch.nn.Conv2d( - in_channels=1, - out_channels=3, - kernel_size=(3, 3), - padding=1, - bias=bias, - ) - - def forward(self, x): - return self.conv(x) - - -class ConvTranspose2dSingle(torch.nn.Module): - def __init__(self, bias=True): - super().__init__() - self.conv_transpose = torch.nn.ConvTranspose2d( - in_channels=1, - out_channels=3, - kernel_size=3, - stride=2, - padding=1, - bias=bias, - ) - - def forward(self, x): - return self.conv_transpose(x) - - -class Conv2dDownUpSample(torch.nn.Module): - def __init__(self, bias=True): - super().__init__() - self.conv = torch.nn.Conv2d( - in_channels=16, - out_channels=16, - kernel_size=3, - stride=2, - padding=1, - bias=bias, - ) - self.conv_transpose = torch.nn.ConvTranspose2d( - in_channels=16, - out_channels=16, - kernel_size=3, - stride=2, - padding=1, - bias=bias, - ) - - def forward(self, x): - return self.conv_transpose(self.conv(x)) - - -class Conv2dSumReduceDim(torch.nn.Module): - def __init__(self): - super().__init__() - self.first = torch.nn.Conv2d( - in_channels=1, - out_channels=3, - kernel_size=(3, 3), - padding=1, - bias=True, - ) - - def forward(self, x): - return torch.sum(self.first(x), dim=(2, 3), keepdim=False) - - -class Conv2dTopK(torch.nn.Module): - def __init__(self): - super().__init__() - self.conv = torch.nn.Conv2d(3, 16, 3) - - def forward(self, x): - x = self.conv(x) - topk_values, topk_indices = torch.topk(x, 5, dim=1) - return topk_values - - -class Cos(torch.nn.Module): - def __init__(self): - super().__init__() - - def forward(self, x): - return torch.cos(x) - - -class Div(torch.nn.Module): - def __init__(self): - super().__init__() - - def forward(self, x, y): - return torch.divide(x, y) - - -class DivConstantFloat(torch.nn.Module): - def __init__(self): - super().__init__() - - def forward(self, x): - return x / 10.0 - - -class DivConstantLong(torch.nn.Module): - def __init__(self): - super().__init__() - - def forward(self, x): - return x / 10 - - -class DrawGraphModel(torch.nn.Module): - def __init__(self): - super().__init__() - self.relu1 = torch.nn.ReLU() - self.relu2 = torch.nn.ReLU() - kernel_sz = 32 - self.conv1 = torch.nn.Conv2d(kernel_sz, kernel_sz, 3, padding=1, bias=True) - self.conv2 = torch.nn.Conv2d(kernel_sz, kernel_sz, 3, padding=1, bias=True) - - def forward(self, x): - x1 = self.conv1(x) - x2 = self.conv2(x) - y1 = self.relu1(x1) - y2 = self.relu1(x2) - return y1 + y2 - - -class EinsumBilinear(torch.nn.Module): - def __init__(self): - super().__init__() - - def forward(self, bn, anm, bm): - return torch.einsum("bn,anm,bm->ba", bn, anm, bm) - - -class EinsumOuterProduct(torch.nn.Module): - def __init__(self): - super().__init__() - - def forward(self, i, j): - return torch.einsum("i,j->ij", i, j) - - -class EinsumOuterProductRelu(torch.nn.Module): - def __init__(self): - super().__init__() - - def forward(self, i, j): - return torch.relu(torch.einsum("i,j->ij", i, j)) - - -class Embedding(torch.nn.Module): - def __init__(self): - super().__init__() - self.embedding = torch.nn.Embedding(10, 3) - - def forward(self, x): - return self.embedding(x) - - -class ExpandCopy(torch.nn.Module): - def __init__(self): - super().__init__() - - def forward(self, x): - return x.expand(3, 4) - - -class Gelu(torch.nn.Module): - def __init__(self): - super().__init__() - self.gelu = torch.nn.GELU() - - def forward(self, x): - return self.gelu(x) - - -class GroupNorm(torch.nn.Module): - def __init__(self, bias=True): - super().__init__() - self.conv = torch.nn.Conv2d( - 32, - 256, - kernel_size=3, - stride=1, - padding=1, - bias=bias, - ) - self.norm = torch.nn.GroupNorm(32, 256) - - def forward(self, x): - y = self.conv(x) - return y, self.norm(y) - - -class HardSigmoid(torch.nn.Module): - def __init__(self): - super().__init__() - self.hardsigmoid = torch.nn.Hardsigmoid() - - def forward(self, x): - return self.hardsigmoid(x) - - -class HardSwish(torch.nn.Module): - def __init__(self): - super().__init__() - self.hardswish = torch.nn.Hardswish() - - def forward(self, x): - return self.hardswish(x) - - -class HardTanh(torch.nn.Module): - def __init__(self): - super().__init__() - self.hardtanh = torch.nn.Hardtanh(min_val=0, max_val=6) - - def forward(self, x): - return self.hardtanh(x) - - -class Index(torch.nn.Module): - def __init__(self): - super().__init__() - self.idx0 = torch.tensor([[0, 1], [2, 3], [4, 5]], dtype=torch.int32) - self.idx1 = torch.tensor([[1, 2], [3, 4], [5, 6]], dtype=torch.int32) - - def forward(self, x): - return x[self.idx0] + x[self.idx1] - - -class IndexPut(torch.nn.Module): - def __init__(self): - super().__init__() - self.register_buffer( - "k_cache", - torch.zeros((1, 1024, 12, 64), dtype=torch.float32), - ) - - def forward(self, input_pos, k_val): - k_out = torch.ops.aten.index_put_(self.k_cache, [None, input_pos], k_val) - return k_out - - -class LayerNorm(torch.nn.Module): - def __init__(self): - super().__init__() - self.layer_norm = torch.nn.LayerNorm([768], eps=1e-6) - self.linear = torch.nn.Linear(768, 196) - - def forward(self, x): - return self.linear(self.layer_norm(x)) - - -class LeakyReLUDefault(torch.nn.Module): - def __init__(self): - super().__init__() - self.leaky_relu = torch.nn.LeakyReLU() - - def forward(self, x): - return self.leaky_relu(x) - - -class LeakyReLUCustom(torch.nn.Module): - def __init__(self, coeff): - super().__init__() - self.leaky_relu = torch.nn.LeakyReLU(coeff) - - def forward(self, x): - return self.leaky_relu(x) - - -class Linear(torch.nn.Module): - def __init__(self, use_bias: bool = True): - super().__init__() - self.linear = torch.nn.Linear(4, 5, use_bias).eval() - - def forward(self, x): - return self.linear(x) - - -class LogSoftmax(torch.nn.Module): - def __init__(self): - super().__init__() - - def forward(self, x): - return torch.nn.functional.log_softmax(x, dim=-1) - - -class MaxPool2d(torch.nn.Module): - def __init__(self): - super().__init__() - self.max_pool2d = torch.nn.MaxPool2d( - kernel_size=3, - stride=1, - padding=1, - dilation=1, - ceil_mode=True, - ) - - def forward(self, x): - return self.max_pool2d(x) - - -class MeanWKeppDim(torch.nn.Module): - def __init__(self): - super().__init__() - - def forward(self, x): - return torch.mean(x, (-1, -2), keepdim=True) - - -class MeanWOKeppDim(torch.nn.Module): - def __init__(self): - super().__init__() - - def forward(self, x): - return torch.mean(x, (-1, -2)) - - -class Mul(torch.nn.Module): - def __init__(self): - super().__init__() - - def forward(self, x, y): - return torch.mul(x, y) - - -class MulConstantFloat(torch.nn.Module): - def __init__(self): - super().__init__() - - def forward(self, x): - return 10.0 * x - - -class MulConstantLong(torch.nn.Module): - def __init__(self): - super().__init__() - - def forward(self, x): - return 10 * x - - -class MulScalar(torch.nn.Module): - def __init__(self): - super().__init__() - self._scalar = 3.14 - - def forward(self, x): - out1 = torch.ops.aten.mul.Scalar(x, self._scalar) - return out1 - - -class MultiheadAttention(torch.nn.Module): - def __init__(self): - super().__init__() - self.multi_head_attention = torch.nn.MultiheadAttention( - 96, 12, dropout=0.0, batch_first=True - ) - - def forward(self, x): - attn_output, _ = self.multi_head_attention(x, x, x, need_weights=False) - return attn_output - - -class Pad(torch.nn.Module): - def __init__(self): - super().__init__() - - def forward(self, x): - return torch.nn.functional.pad( - x[:, 1:], [0, 0, 0, 1, 0, 0], value=0.0, mode="constant" - ) - - -class PixelShuffle(torch.nn.Module): - def __init__(self, scale): - super().__init__() - self.pixel_shuffle = torch.nn.PixelShuffle(scale) - - def forward(self, x): - return self.pixel_shuffle(x) - - -class PixelUnshuffle(torch.nn.Module): - def __init__(self, scale): - super().__init__() - self.pixel_unshuffle = torch.nn.PixelUnshuffle(scale) - - def forward(self, x): - return self.pixel_unshuffle(x) - - -class PixelUnshuffleMathEquivalent(torch.nn.Module): - def __init__(self, scale): - super().__init__() - self.scale = scale - - def forward(self, x): - b, c, hh, hw = x.size() - out_channel = c * (self.scale**2) - h = hh // self.scale - w = hw // self.scale - x_view = x.view(b, c, h, self.scale, w, self.scale) - return x_view.permute(0, 1, 3, 5, 2, 4).reshape(b, out_channel, h, w) - - -class PowTensorScalar(torch.nn.Module): - def __init__(self): - super().__init__() - - def forward(self, x): - return torch.pow(x, 2) - - -class PReLUDefault(torch.nn.Module): - def __init__(self): - super().__init__() - self.prelu = torch.nn.PReLU() - - def forward(self, x): - return self.prelu(x) - - -class PReLUPerChannel(torch.nn.Module): - def __init__(self, channels): - super().__init__() - self.prelu = torch.nn.PReLU(channels) - - def forward(self, x): - return self.prelu(x) - - -class Relu(torch.nn.Module): - def __init__(self): - super().__init__() - self.relu = torch.nn.ReLU() - - def forward(self, x): - return self.relu(x) - - -class Reshape(torch.nn.Module): - def __init__(self): - super().__init__() - - def forward(self, x): - return x.reshape(1, 12) - - -class ResidualBlockModule(torch.nn.Module): - def __init__(self): - super(ResidualBlockModule, self).__init__() - groups = 1 - stride = [1, 1] - padding = [1, 1] - dilation = [1, 1] - in_channels = 32 - out_channels = 32 - - self.conv = torch.nn.Conv2d( - in_channels=in_channels, - out_channels=out_channels, - kernel_size=(3, 3), - stride=stride, - padding=padding, - groups=groups, - dilation=dilation, - bias=True, - ) - self.native_batchnorm = torch.nn.BatchNorm2d(out_channels) - self.hardtanh = torch.nn.Hardtanh(min_val=0, max_val=6.0) - self.eval() - - def forward(self, x): - x1 = self.conv(x) - x2 = self.native_batchnorm(x1) - x3 = self.conv(x2) - x4 = self.native_batchnorm(x3) - x5 = self.hardtanh(x4) - x6 = torch.add(x5, x2) - return x6 - - -class ResizeBilinear2D(torch.nn.Module): - def __init__(self): - super().__init__() - - def forward(self, x): - output_shape = [dim * 2 for dim in x.shape[-2:]] - return torch.nn.functional.interpolate( - x, - size=list(torch.randn(output_shape).shape), - mode="bilinear", - align_corners=False, - ) - - -class ResizeNearest2D(torch.nn.Module): - def __init__(self): - super().__init__() - - def forward(self, x): - output_shape = [dim * 2 for dim in x.shape[-2:]] - return torch.nn.functional.interpolate( - x, - size=list(torch.randn(output_shape).shape), - mode="nearest", - ) - - -class RmsNorm(torch.nn.Module): - def __init__(self): - super().__init__() - self.eps = 1e-5 - self.rms = torch.nn.RMSNorm([4], 1e-5) - - def forward(self, x): - return self.rms(x) - - -class Rsqrt(torch.nn.Module): - def __init__(self): - super().__init__() - - def forward(self, x): - return torch.rsqrt(x) - - -class ScaledDotProductAttention(torch.nn.Module): - def __init__(self): - super().__init__() - - def forward(self, query_layer, key_layer, value_layer, attn_mask): - attn_output = torch.nn.functional.scaled_dot_product_attention( - query_layer, key_layer, value_layer, attn_mask - ) - return attn_output - - -class SelectCopy(torch.nn.Module): - def __init__(self): - super().__init__() - self.conv = torch.nn.Conv2d( - in_channels=3, - out_channels=2, - kernel_size=(3, 3), - padding=1, - bias=True, - ) - - def forward(self, x): - return self.conv(x)[0, 1, 1:2] - - -class Sigmoid(torch.nn.Module): - def __init__(self): - super().__init__() - - def forward(self, x): - return torch.sigmoid(x) - - -class Sin(torch.nn.Module): - def __init__(self): - super().__init__() - - def forward(self, x): - return torch.sin(x) - - -class SimpleModel(torch.nn.Module): - def __init__(self): - super().__init__() - kernel_sz = 32 - self.conv1 = torch.nn.Conv2d(kernel_sz, kernel_sz, 3, padding=1, bias=True) - self.conv2 = torch.nn.Conv2d(kernel_sz, kernel_sz, 3, padding=1, bias=True) - self.conv3 = torch.nn.Conv2d(kernel_sz, kernel_sz, 3, padding=1, bias=False) - self.conv4 = torch.nn.Conv2d(kernel_sz, kernel_sz, 3, padding=1, bias=False) - self.hardtanh = torch.nn.Hardtanh(min_val=0, max_val=6) - self.relu = torch.nn.ReLU() - self.batch_norm = torch.nn.BatchNorm2d(kernel_sz) - self.add = torch.add - self.mean = torch.mean - self.reshape = torch.reshape - self.linear = torch.nn.Linear(4, 10) - self.permute = torch.permute - self.eval() - - def forward(self, x, y): - x1 = self.conv1(x) - x2 = self.batch_norm(x1) - x3 = self.relu(x2) - x4 = self.conv2(x3) - x5 = self.relu(x4) - y1 = self.conv3(y) - y2 = self.batch_norm(y1) - y3 = self.relu(y2) - y4 = self.conv4(y3) - y5 = self.relu(y4) - z = self.add(x5, y5) - z1 = self.permute(z, (0, 3, 2, 1)) - z2 = torch.mean(z1, [1, 2], True) - z3 = self.reshape(z2, (8, -1)) - z4 = self.linear(z3) - z5 = self.hardtanh(z4) - return z5 - - -class SliceCopy(torch.nn.Module): - def __init__(self): - super().__init__() - self.position_ids = torch.randn([1, 512]) - - def forward(self, x, y): - seq_length = y.size()[1] - return x[:, :seq_length] + self.position_ids[:, :seq_length] - - -class SliceCopyWithStep(torch.nn.Module): - def __init__(self): - super().__init__() - self.position_ids = torch.randn([1, 512]) - self.step = 2 - - def forward(self, x, y): - seq_length = y.size()[1] - return ( - x[:, : seq_length : self.step] - + self.position_ids[:, : seq_length : self.step] - ) - - -class Softmax(torch.nn.Module): - def __init__(self): - super().__init__() - - def forward(self, x): - return torch.nn.functional.softmax(x, dim=-1) - - -class Sqrt(torch.nn.Module): - def __init__(self): - super().__init__() - - def forward(self, x): - return torch.sqrt(x) - - -class SqrtConstant(torch.nn.Module): - def __init__(self): - super().__init__() - - def forward(self, x): - return x / torch.sqrt(torch.tensor([64.0])) - - -class Squeeze(torch.nn.Module): - def __init__(self): - super().__init__() - - def forward(self, x): - return x.squeeze() - - -class Stack(torch.nn.Module): - def __init__(self): - super().__init__() - - def forward(self, x, y): - return torch.stack((x, y)) - - -class Sub(torch.nn.Module): - def __init__(self): - super().__init__() - - def forward(self, x, y): - return torch.sub(x, y) - - -class SubConstantFloat(torch.nn.Module): - def __init__(self): - super().__init__() - - def forward(self, x): - return 10.0 - x - - -class SubConstantLong(torch.nn.Module): - def __init__(self): - super().__init__() - - def forward(self, x): - return 10 - x - - -class SumIntList(torch.nn.Module): - def __init__(self): - super().__init__() - - def forward(self, x): - return torch.sum(x, dim=(2, 3), keepdim=True) - - -class Tanh(torch.nn.Module): - def __init__(self): - super().__init__() - - def forward(self, x): - return torch.tanh(x) - - -class TopKandIndex(torch.nn.Module): - def __init__(self): - super().__init__() - self.idx_source = torch.rand(10, 3) - - def forward(self, x): - a, b = torch.topk(x, 3) - return a + self.idx_source[b] - - -class Unbind(torch.nn.Module): - def __init__(self): - super().__init__() - - def forward(self, x): - return torch.unbind(x) - - -class Unsqueeze(torch.nn.Module): - def __init__(self): - super().__init__() - - def forward(self, x): - return x.unsqueeze(0) - - -class View(torch.nn.Module): - def __init__(self): - super().__init__() - self.first_size = 2 - self.second_size = 256 - - def forward(self, x, y): - new_shape = x.size()[:-1] + (self.first_size, self.second_size) - return x.view(new_shape) - - -class ViewPermuteMatMul(torch.nn.Module): - def __init__(self): - super().__init__() - self.first_size = 2 - self.second_size = 256 - - def forward(self, x, y): - new_shape = x.size()[:-1] + (self.first_size, self.second_size) - x = x.view(new_shape) - x = x.permute(0, 2, 1, 3) - return torch.matmul(x, y.transpose(-1, -2)) diff --git a/backends/openvino/tests/ops/base_openvino_op_test.py b/backends/openvino/tests/ops/base_openvino_op_test.py new file mode 100644 index 00000000000..c0c31e57e1b --- /dev/null +++ b/backends/openvino/tests/ops/base_openvino_op_test.py @@ -0,0 +1,153 @@ +import os +import subprocess +import tempfile +import unittest + +import numpy as np +import torch +import executorch +from executorch.backends.openvino.partitioner import OpenvinoPartitioner +from executorch.exir.backend.backend_details import CompileSpec +from torch.export import export, ExportedProgram +from executorch.exir import EdgeProgramManager, to_edge +from executorch.backends.openvino.preprocess import OpenvinoBackend + + +class BaseOpenvinoOpTest(unittest.TestCase): + device = "CPU" + build_folder = "" + + atol = 1e-1 + rtol = 1e-1 + + def execute_layer_test( + self, + module: torch.nn.Module, + sample_inputs: tuple[torch.Tensor], + expected_partitions: int = 1, + assert_output_equal: bool = True, + ): + + module = module.eval() + # Export to aten dialect using torch.export + aten_dialect: ExportedProgram = export(module, sample_inputs) + + # Convert to edge dialect + edge_program: EdgeProgramManager = to_edge(aten_dialect) + to_be_lowered_module = edge_program.exported_program() + + # Lower the module to the backend with a custom partitioner + compile_spec = [CompileSpec("device", self.device.encode())] + lowered_module = edge_program.to_backend(OpenvinoPartitioner(compile_spec)) + + # Apply backend-specific passes + exec_prog = lowered_module.to_executorch(config=executorch.exir.ExecutorchBackendConfig()) + + # Check if the number of partitions created matches the expected number of partitions + self.assertEqual( + len(exec_prog.executorch_program.execution_plan[0].delegates), + expected_partitions, + ) + # Check if the individual partitions are assigned to Openvino backend + for i in range(expected_partitions): + self.assertEqual( + exec_prog.executorch_program.execution_plan[0].delegates[i].id, + OpenvinoBackend.__name__, + ) + + # Execute the model and compare the outputs with the reference outputs + if (assert_output_equal): + with tempfile.TemporaryDirectory() as tmp_dir: + input_list = "" + for idx, _ in enumerate(sample_inputs): + input_name = f"input_0_{idx}.raw" + input_list += input_name + " " + input_list = input_list.strip() + "\n" + + output_dir = f"{tmp_dir}/outputs" + + # Execute the module in eager mode to calculate the reference outputs + ref_output = module(*sample_inputs) + if isinstance(ref_output, torch.Tensor): + ref_output = [ref_output,] + + # Serialize the executorch model and save into a temporary file + pte_fname = f"{tmp_dir}/openvino_executorch_test.pte" + with open(pte_fname, "wb") as file: + exec_prog.write_to_file(file) + + # Save inputs into a temporary file + self.generate_inputs(tmp_dir, "input_list.txt", [sample_inputs], input_list) + self.make_output_dir(output_dir) + + # Start a subprocess to execute model with openvino_executor_runner + cmd = [ + f"{self.build_folder}/examples/openvino/openvino_executor_runner", + pte_fname, + #"--input_list_path", + f"{tmp_dir}/input_list.txt", + #"--output_folder_path", + output_dir, + ] + + env = dict(os.environ) + proc = subprocess.run( + cmd, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + env=env, + cwd=tmp_dir, + ) + + stdout_str = proc.stdout.decode('utf-8') + + # Check if execution completed successfully + self.assertIn("Model executed successfully.", stdout_str) + + # Read the outputs from the temporary files + output_dir = f"{tmp_dir}/outputs" + outputs = [] + + for i, f in enumerate(sorted(os.listdir(output_dir))): + filename = os.path.join(output_dir, f) + output = np.fromfile(filename, dtype=ref_output[i].numpy().dtype) + output = torch.from_numpy(output).reshape(ref_output[i].shape) + outputs.append(output) + + # Compare the outputs with the reference outputs + self.assertTrue(len(ref_output) == len(outputs)) + for i in range(len(ref_output)): + self.assertTrue( + torch.allclose( + outputs[i], ref_output[i], atol=self.atol, rtol=self.rtol + ), + msg=f"ref_output:\n{ref_output[i]}\n\ntest_output:\n{outputs[i]}", + ) + + def generate_inputs(self, dest_path: str, file_name: str, inputs=None, input_list=None): + input_list_file = None + input_files = [] + + # Prepare input list + if input_list is not None: + input_list_file = f"{dest_path}/{file_name}" + with open(input_list_file, "w") as f: + f.write(input_list) + f.flush() + + # Prepare input data + if inputs is not None: + for idx, data in enumerate(inputs): + for i, d in enumerate(data): + file_name = f"{dest_path}/input_{idx}_{i}.raw" + d.detach().numpy().tofile(file_name) + input_files.append(file_name) + + return input_list_file, input_files + + def make_output_dir(self, path: str): + if os.path.exists(path): + for f in os.listdir(path): + os.remove(os.path.join(path, f)) + os.removedirs(path) + os.makedirs(path) diff --git a/backends/openvino/tests/ops/test_add.py b/backends/openvino/tests/ops/test_add.py new file mode 100644 index 00000000000..baccea29851 --- /dev/null +++ b/backends/openvino/tests/ops/test_add.py @@ -0,0 +1,19 @@ +from executorch.backends.openvino.tests.ops.base_openvino_op_test import BaseOpenvinoOpTest +import torch + +class TestAddOperator(BaseOpenvinoOpTest): + + def create_model(self): + class Add(torch.nn.Module): + def __init__(self): + super().__init__() + + def forward(self, x, y): + return torch.add(x, y) + + return Add() + + def test_add(self): + module = self.create_model() + sample_input = (torch.ones(2, 5, 1, 3), torch.ones(2, 5, 1, 3)) + self.execute_layer_test(module, sample_input) diff --git a/backends/openvino/tests/ops/test_arange.py b/backends/openvino/tests/ops/test_arange.py new file mode 100644 index 00000000000..c425841df49 --- /dev/null +++ b/backends/openvino/tests/ops/test_arange.py @@ -0,0 +1,21 @@ +from executorch.backends.openvino.tests.ops.base_openvino_op_test import BaseOpenvinoOpTest +import torch + +class TestArangeOperator(BaseOpenvinoOpTest): + + def create_model(self, x): + class Arange(torch.nn.Module): + def __init__(self, x): + super().__init__() + self.x = x + + def forward(self, y): + return torch.arange(self.x, dtype=torch.float32) + y + + return Arange(5) + + def test_arange(self): + module = self.create_model(5) + #sample_input = (torch.randn(5),) + sample_input = (torch.ones(5),) + self.execute_layer_test(module, sample_input) diff --git a/backends/openvino/tests/test_openvino_delegate.py b/backends/openvino/tests/test_openvino_delegate.py index 5525b6ca5f3..a64c7bd1127 100644 --- a/backends/openvino/tests/test_openvino_delegate.py +++ b/backends/openvino/tests/test_openvino_delegate.py @@ -1,170 +1,26 @@ -import io -import json -import subprocess -import sys -import tempfile import unittest -from multiprocessing.connection import Listener -from pathlib import Path - -import numpy as np - -import torch -import executorch -from executorch.backends.openvino.partitioner import OpenvinoPartitioner -from executorch.exir.backend.backend_details import CompileSpec -from torch.export import export, ExportedProgram -from executorch.exir import EdgeProgramManager, to_edge -from executorch.backends.openvino.preprocess import OpenvinoBackend -from executorch.exir.program import ExecutorchProgram, ExecutorchProgramManager -from executorch.backends.openvino.tests.models import * - -import os -import random - -from collections import defaultdict -from typing import List - import argparse -class TestOpenVINO(unittest.TestCase): - device = "CPU" - build_folder = None - - def execute_layer_test( - self, - module: torch.nn.Module, - sample_inputs: tuple[torch.Tensor], - expected_partitions: int = 1, - assert_output_equal: bool = True, - ): - - module = module.eval() - aten_dialect: ExportedProgram = export(module, sample_inputs) - - edge_program: EdgeProgramManager = to_edge(aten_dialect) - to_be_lowered_module = edge_program.exported_program() - - compile_spec = [CompileSpec("device", self.device.encode())] - lowered_module = edge_program.to_backend(OpenvinoPartitioner(compile_spec)) - - exec_prog = lowered_module.to_executorch(config=executorch.exir.ExecutorchBackendConfig()) - - self.assertEqual( - len(exec_prog.executorch_program.execution_plan[0].delegates), - expected_partitions, - ) - for i in range(expected_partitions): - self.assertEqual( - exec_prog.executorch_program.execution_plan[0].delegates[i].id, - OpenvinoBackend.__name__, - ) - - if (assert_output_equal): - with tempfile.TemporaryDirectory() as tmp_dir: - input_list = "" - for idx, _ in enumerate(sample_inputs): - input_name = f"input_0_{idx}.raw" - input_list += input_name + " " - input_list = input_list.strip() + "\n" - - output_dir = f"{tmp_dir}/outputs" - - ref_output = module(*sample_inputs) - if isinstance(ref_output, torch.Tensor): - ref_output = [ref_output,] - - pte_fname = f"{tmp_dir}/openvino_executorch_test.pte" - with open(pte_fname, "wb") as file: - exec_prog.write_to_file(file) - - - self.generate_inputs(tmp_dir, "input_list.txt", [sample_inputs], input_list) - self.make_output_dir(output_dir) - - cmd = [ - # openvino_executor_runner - f"{self.build_folder}/examples/openvino/openvino_executor_runner", - pte_fname, - #"--input_list_path", - f"{tmp_dir}/input_list.txt", - #"--output_folder_path", - output_dir, - #"--method_index", - #str(method_index), - ] - - env = dict(os.environ) - proc = subprocess.run( - cmd, - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT, - env=env, - cwd=tmp_dir, - ) - - stdout_str = proc.stdout.decode('utf-8') - self.assertIn("Model executed successfully.", stdout_str) - - output_dir = f"{tmp_dir}/outputs" - outputs = [] - - for i, f in enumerate(sorted(os.listdir(output_dir))): - filename = os.path.join(output_dir, f) - output = np.fromfile(filename, dtype=ref_output[i].numpy().dtype) - output = torch.from_numpy(output).reshape(ref_output[i].shape) - outputs.append(output) - - self.assertTrue(len(ref_output) == len(outputs)) - for i in range(len(ref_output)): - self.assertTrue( - torch.allclose( - outputs[i], ref_output[i], atol=self.atol, rtol=self.rtol - ), - msg=f"ref_output:\n{ref_output[i]}\n\ntest_output:\n{outputs[i]}", - ) - - def generate_inputs(self, dest_path: str, file_name: str, inputs=None, input_list=None): - input_list_file = None - input_files = [] - - # Prepare input list - if input_list is not None: - input_list_file = f"{dest_path}/{file_name}" - with open(input_list_file, "w") as f: - f.write(input_list) - f.flush() - - # Prepare input data - if inputs is not None: - for idx, data in enumerate(inputs): - for i, d in enumerate(data): - file_name = f"{dest_path}/input_{idx}_{i}.raw" - d.detach().numpy().tofile(file_name) - input_files.append(file_name) - - return input_list_file, input_files - - def make_output_dir(self, path: str): - if os.path.exists(path): - for f in os.listdir(path): - os.remove(os.path.join(path, f)) - os.removedirs(path) - os.makedirs(path) +class OpenvinoTestSuite(unittest.TestSuite): + test_params = {} -class TestOpenVINOloatingPointOperator(TestOpenVINO): - atol = 1e-1 - rtol = 1e-1 + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) - def test_openvino_backend_arange(self): - module = Arange(5) # noqa: F405 - #sample_input = (torch.randn(5),) - sample_input = (torch.ones(5),) - self.execute_layer_test(module, sample_input) + def addTest(self, test): + # Set test parameters if this is an instance of TestOpenvino + from executorch.backends.openvino.tests.ops.base_openvino_op_test import BaseOpenvinoOpTest + if isinstance(test, BaseOpenvinoOpTest): + if "device" in self.test_params: + test.device = self.test_params["device"] + if "build_folder" in self.test_params: + test.build_folder = self.test_params["build_folder"] + # Call the original addTest method to actually add the test to the suite + super().addTest(test) -def setup_environment(): +def parse_arguments(): parser = argparse.ArgumentParser() parser.add_argument( @@ -183,10 +39,18 @@ def setup_environment(): ) args, ns_args = parser.parse_known_args(namespace=unittest) - TestOpenVINO.device = args.device - TestOpenVINO.build_folder = args.build_folder - return sys.argv[:1] + ns_args + test_params = {} + test_params["device"] = args.device + test_params["build_folder"] = args.build_folder + return test_params if __name__ == "__main__": - ut_args = setup_environment() - unittest.main(argv=ut_args) + loader = unittest.TestLoader() + # Replace the default test suite with a custom test suite to be able to + # pass test parameter to the test cases + loader.suiteClass = OpenvinoTestSuite + loader.suiteClass.test_params = parse_arguments() + # Discover all existing op tests in "ops" folder + suite = loader.discover("ops", pattern='test_*.py') + # Start running tests + unittest.TextTestRunner().run(suite) From e0b1bb7fca6a1f0991e5329eb0b13acc9d93d1d5 Mon Sep 17 00:00:00 2001 From: Cavus Mustafa Date: Thu, 30 Jan 2025 18:43:03 -0800 Subject: [PATCH 020/188] Input/Output processing for example and unit tests --- .../tests/ops/base_openvino_op_test.py | 5 +- backends/openvino/tests/ops/test_add.py | 2 +- backends/openvino/tests/ops/test_arange.py | 3 +- .../openvino/tests/test_openvino_delegate.py | 13 ++- .../openvino_executor_runner.cpp | 95 ++++++++++++++++++- 5 files changed, 110 insertions(+), 8 deletions(-) diff --git a/backends/openvino/tests/ops/base_openvino_op_test.py b/backends/openvino/tests/ops/base_openvino_op_test.py index c0c31e57e1b..4d03096b51e 100644 --- a/backends/openvino/tests/ops/base_openvino_op_test.py +++ b/backends/openvino/tests/ops/base_openvino_op_test.py @@ -83,10 +83,11 @@ def execute_layer_test( # Start a subprocess to execute model with openvino_executor_runner cmd = [ f"{self.build_folder}/examples/openvino/openvino_executor_runner", + "--model_path", pte_fname, - #"--input_list_path", + "--input_list_path", f"{tmp_dir}/input_list.txt", - #"--output_folder_path", + "--output_folder_path", output_dir, ] diff --git a/backends/openvino/tests/ops/test_add.py b/backends/openvino/tests/ops/test_add.py index baccea29851..e70d7f65afe 100644 --- a/backends/openvino/tests/ops/test_add.py +++ b/backends/openvino/tests/ops/test_add.py @@ -15,5 +15,5 @@ def forward(self, x, y): def test_add(self): module = self.create_model() - sample_input = (torch.ones(2, 5, 1, 3), torch.ones(2, 5, 1, 3)) + sample_input = (torch.rand(2, 5, 1, 3), torch.rand(2, 5, 1, 3)) self.execute_layer_test(module, sample_input) diff --git a/backends/openvino/tests/ops/test_arange.py b/backends/openvino/tests/ops/test_arange.py index c425841df49..0dd739a2585 100644 --- a/backends/openvino/tests/ops/test_arange.py +++ b/backends/openvino/tests/ops/test_arange.py @@ -16,6 +16,5 @@ def forward(self, y): def test_arange(self): module = self.create_model(5) - #sample_input = (torch.randn(5),) - sample_input = (torch.ones(5),) + sample_input = (torch.randn(5),) self.execute_layer_test(module, sample_input) diff --git a/backends/openvino/tests/test_openvino_delegate.py b/backends/openvino/tests/test_openvino_delegate.py index a64c7bd1127..bbf61d1ea09 100644 --- a/backends/openvino/tests/test_openvino_delegate.py +++ b/backends/openvino/tests/test_openvino_delegate.py @@ -37,11 +37,19 @@ def parse_arguments(): type=str, default="CPU", ) + parser.add_argument( + "-p", + "--pattern", + help="Pattern to match test files. Provide complete file name to run individual op tests", + type=str, + default="test_*.py", + ) args, ns_args = parser.parse_known_args(namespace=unittest) test_params = {} test_params["device"] = args.device test_params["build_folder"] = args.build_folder + test_params["pattern"] = args.pattern return test_params if __name__ == "__main__": @@ -49,8 +57,9 @@ def parse_arguments(): # Replace the default test suite with a custom test suite to be able to # pass test parameter to the test cases loader.suiteClass = OpenvinoTestSuite - loader.suiteClass.test_params = parse_arguments() + test_params = parse_arguments() + loader.suiteClass.test_params = test_params # Discover all existing op tests in "ops" folder - suite = loader.discover("ops", pattern='test_*.py') + suite = loader.discover("ops", pattern=test_params['pattern']) # Start running tests unittest.TextTestRunner().run(suite) diff --git a/examples/openvino/executor_runner/openvino_executor_runner.cpp b/examples/openvino/executor_runner/openvino_executor_runner.cpp index 67bb35d9701..b6e13218773 100644 --- a/examples/openvino/executor_runner/openvino_executor_runner.cpp +++ b/examples/openvino/executor_runner/openvino_executor_runner.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include @@ -16,7 +17,7 @@ // Define a fixed-size memory pool for the method allocator (4 MB) static uint8_t method_allocator_pool[4 * 1024U * 1024U]; // 4 MB -// Define command-line flags for model path and the number of iterations +// Define command-line flags for model path, the number of iterations, input list path, and output folder path DEFINE_string( model_path, "", @@ -25,6 +26,14 @@ DEFINE_int32( num_iter, 1, "Number of inference iterations (default is 1)."); +DEFINE_string( + input_list_path, + "", + "Path to the input list file which includes the list of raw input tensor files (optional)."); +DEFINE_string( + output_folder_path, + "", + "Path to the output folder to save raw output tensor files (optional)."); using executorch::extension::FileDataLoader; using executorch::extension::prepare_input_tensors; @@ -38,6 +47,7 @@ using executorch::runtime::MethodMeta; using executorch::runtime::Program; using executorch::runtime::Result; using executorch::runtime::Span; +using executorch::runtime::TensorInfo; int main(int argc, char** argv) { // Initialize the runtime environment @@ -128,6 +138,72 @@ int main(int argc, char** argv) { inputs.ok(), "Could not prepare inputs: 0x%" PRIx32, static_cast(inputs.error())); + + // If the input path list is provided, read input tensors from the files + if (!(FLAGS_input_list_path.empty())) { + const char* input_list_path = FLAGS_input_list_path.c_str(); + ET_LOG(Info, "Loading input tensors from the list provided in %s.", input_list_path); + Error status = Error::Ok; + std::vector inputs(method->inputs_size()); + ET_LOG(Info, "%zu inputs: ", inputs.size()); + status = method->get_inputs(inputs.data(), inputs.size()); + ET_CHECK(status == Error::Ok); + + auto split = [](std::string s, std::string delimiter) { + size_t pos_start = 0, pos_end, delim_len = delimiter.length(); + std::string token; + std::vector res; + + while ((pos_end = s.find(delimiter, pos_start)) != std::string::npos) { + token = s.substr(pos_start, pos_end - pos_start); + pos_start = pos_end + delim_len; + res.push_back(token); + } + res.push_back(s.substr(pos_start)); + return res; + }; + + // Read raw input tensor file names from input list file and + // iterate each raw input tensor file to read values + std::ifstream input_list(input_list_path); + if (input_list.is_open()) { + size_t num_inputs = method->inputs_size(); + std::string file_path; + while (std::getline(input_list, file_path)) { + auto input_files = split(file_path, " "); + if (input_files.size() == 0) { + break; + } + for (int input_index = 0; input_index < num_inputs; ++input_index) { + MethodMeta method_meta = method->method_meta(); + Result tensor_meta = + method_meta.input_tensor_meta(input_index); + auto input_data_ptr = inputs[input_index].toTensor().data_ptr(); + + std::ifstream fin(input_files[input_index], std::ios::binary); + fin.seekg(0, fin.end); + size_t file_size = fin.tellg(); + + ET_CHECK_MSG( + file_size == tensor_meta->nbytes(), + "Input(%d) size mismatch. file bytes: %zu, tensor bytes: %zu", + input_index, + file_size, + tensor_meta->nbytes()); + + fin.seekg(0, fin.beg); + fin.read( + static_cast(input_data_ptr), + file_size); + fin.close(); + } + } + } else { + ET_CHECK_MSG(false, + "Failed to read input list file: %s", + input_list_path); + } + } ET_LOG(Info, "Inputs prepared."); // Measure execution time for inference @@ -161,6 +237,23 @@ int main(int argc, char** argv) { status = method->get_outputs(outputs.data(), outputs.size()); ET_CHECK(status == Error::Ok); + // If output folder path is provided, save output tensors + // into raw tensor files. + if (!(FLAGS_output_folder_path.empty())) { + const char* output_folder_path = FLAGS_output_folder_path.c_str(); + ET_LOG(Info, "Saving output tensors into the output folder: %s.", output_folder_path); + for (size_t output_index = 0; output_index < method->outputs_size(); + output_index++) { + auto output_tensor = outputs[output_index].toTensor(); + auto output_file_name = std::string(output_folder_path) + "/output_" + + std::to_string(output_index) + ".raw"; + std::ofstream fout(output_file_name.c_str(), std::ios::binary); + fout.write( + output_tensor.const_data_ptr(), output_tensor.nbytes()); + fout.close(); + } + } + return 0; } From 91087706938f7ec82d50262309f1a6741a8abe24 Mon Sep 17 00:00:00 2001 From: Cavus Mustafa Date: Fri, 31 Jan 2025 12:32:21 -0800 Subject: [PATCH 021/188] Added executorch parameter to openvino_compile call --- backends/openvino/preprocess.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backends/openvino/preprocess.py b/backends/openvino/preprocess.py index 96df9faba85..8f6991afdd3 100644 --- a/backends/openvino/preprocess.py +++ b/backends/openvino/preprocess.py @@ -38,7 +38,7 @@ def preprocess( for spec in module_compile_spec: compile_options[spec.key] = spec.value.decode() - compiled = openvino_compile(edge_program.module(), *args, options=compile_options) + compiled = openvino_compile(edge_program.module(), *args, options=compile_options, executorch=True) model_bytes = compiled.export_model() return PreprocessResult(processed_bytes=model_bytes) From ecbe5e259c4dbefd497f1f0236552899d0718135 Mon Sep 17 00:00:00 2001 From: Cavus Mustafa Date: Fri, 31 Jan 2025 19:40:29 -0800 Subject: [PATCH 022/188] New op unit tests added --- .../tests/ops/base_openvino_op_test.py | 4 +- backends/openvino/tests/ops/test_add.py | 2 +- backends/openvino/tests/ops/test_addmm.py | 25 +++++ .../openvino/tests/ops/test_batch_norm.py | 51 +++++++++ .../openvino/tests/ops/test_convolution.py | 105 ++++++++++++++++++ backends/openvino/tests/ops/test_mean.py | 59 ++++++++++ backends/openvino/tests/ops/test_permute.py | 30 +++++ backends/openvino/tests/ops/test_pooling.py | 65 +++++++++++ backends/openvino/tests/ops/test_unary_ops.py | 36 ++++++ backends/openvino/tests/ops/test_view.py | 32 ++++++ 10 files changed, 406 insertions(+), 3 deletions(-) create mode 100644 backends/openvino/tests/ops/test_addmm.py create mode 100644 backends/openvino/tests/ops/test_batch_norm.py create mode 100644 backends/openvino/tests/ops/test_convolution.py create mode 100644 backends/openvino/tests/ops/test_mean.py create mode 100644 backends/openvino/tests/ops/test_permute.py create mode 100644 backends/openvino/tests/ops/test_pooling.py create mode 100644 backends/openvino/tests/ops/test_unary_ops.py create mode 100644 backends/openvino/tests/ops/test_view.py diff --git a/backends/openvino/tests/ops/base_openvino_op_test.py b/backends/openvino/tests/ops/base_openvino_op_test.py index 4d03096b51e..a51b99e8eca 100644 --- a/backends/openvino/tests/ops/base_openvino_op_test.py +++ b/backends/openvino/tests/ops/base_openvino_op_test.py @@ -111,7 +111,7 @@ def execute_layer_test( for i, f in enumerate(sorted(os.listdir(output_dir))): filename = os.path.join(output_dir, f) - output = np.fromfile(filename, dtype=ref_output[i].numpy().dtype) + output = np.fromfile(filename, dtype=ref_output[i].detach().numpy().dtype) output = torch.from_numpy(output).reshape(ref_output[i].shape) outputs.append(output) @@ -120,7 +120,7 @@ def execute_layer_test( for i in range(len(ref_output)): self.assertTrue( torch.allclose( - outputs[i], ref_output[i], atol=self.atol, rtol=self.rtol + outputs[i], ref_output[i], atol=self.atol, rtol=self.rtol, equal_nan=True ), msg=f"ref_output:\n{ref_output[i]}\n\ntest_output:\n{outputs[i]}", ) diff --git a/backends/openvino/tests/ops/test_add.py b/backends/openvino/tests/ops/test_add.py index e70d7f65afe..d298f77e792 100644 --- a/backends/openvino/tests/ops/test_add.py +++ b/backends/openvino/tests/ops/test_add.py @@ -15,5 +15,5 @@ def forward(self, x, y): def test_add(self): module = self.create_model() - sample_input = (torch.rand(2, 5, 1, 3), torch.rand(2, 5, 1, 3)) + sample_input = (torch.randn(2, 5, 1, 3), torch.randn(2, 5, 1, 3)) self.execute_layer_test(module, sample_input) diff --git a/backends/openvino/tests/ops/test_addmm.py b/backends/openvino/tests/ops/test_addmm.py new file mode 100644 index 00000000000..32f09ebdc29 --- /dev/null +++ b/backends/openvino/tests/ops/test_addmm.py @@ -0,0 +1,25 @@ +from executorch.backends.openvino.tests.ops.base_openvino_op_test import BaseOpenvinoOpTest +import torch + +class TestAddMMOperator(BaseOpenvinoOpTest): + + def create_model(self): + class AddMM(torch.nn.Module): + def __init__(self): + super().__init__() + self.alpha = 1. + self.beta = 1. + + def forward(self, x, y, z): + #return torch.add(x, y) + return torch.addmm(x, y, z, alpha=self.alpha, beta=self.beta) + + return AddMM() + + def test_addmm(self): + module = self.create_model() + input_x = torch.randn(4,4, dtype=torch.float32) + input_y = torch.randn(4,4, dtype=torch.float32) + input_z = torch.randn(4,4, dtype=torch.float32) + sample_input = (input_x, input_y, input_z) + self.execute_layer_test(module, sample_input) diff --git a/backends/openvino/tests/ops/test_batch_norm.py b/backends/openvino/tests/ops/test_batch_norm.py new file mode 100644 index 00000000000..ecb76860434 --- /dev/null +++ b/backends/openvino/tests/ops/test_batch_norm.py @@ -0,0 +1,51 @@ +from executorch.backends.openvino.tests.ops.base_openvino_op_test import BaseOpenvinoOpTest +import torch + +op_params = [{'weights': True, 'bias': True, 'eps': 1.0 }, + {'weights': True, 'bias': True, 'eps': 0.00005 }, + {'weights': True, 'bias': True, 'eps': 0.5 }, + {'weights': True, 'bias': True, 'eps': 0.042 }, + {'weights': True, 'bias': False, 'eps': 1.0 }, + {'weights': True, 'bias': False, 'eps': 0.00005 }, + {'weights': True, 'bias': False, 'eps': 0.5 }, + {'weights': True, 'bias': False, 'eps': 0.042 }, + {'weights': False, 'bias': True, 'eps': 1.0 }, + {'weights': False, 'bias': True, 'eps': 0.00005 }, + {'weights': False, 'bias': True, 'eps': 0.5 }, + {'weights': False, 'bias': True, 'eps': 0.042 }, + {'weights': False, 'bias': False, 'eps': 1.0 }, + {'weights': False, 'bias': False, 'eps': 0.00005 }, + {'weights': False, 'bias': False, 'eps': 0.5 }, + {'weights': False, 'bias': False, 'eps': 0.042 }, + ] + + +class TestBatchNormOperator(BaseOpenvinoOpTest): + + def create_model(self, weights, bias, eps): + + class BatchNorm(torch.nn.Module): + def __init__(self, weights=True, bias=True, eps=1e-05): + super(BatchNorm, self).__init__() + self.weight = torch.nn.Parameter(torch.randn(6)) if weights else None + self.bias = torch.nn.Parameter(torch.randn(6)) if bias else None + self.running_mean = torch.randn(6) + self.running_var = torch.randn(6) + self.eps = eps + + def forward(self, x): + return torch.nn.functional.batch_norm(x, self.running_mean, self.running_var, self.weight, self.bias, eps=self.eps, training=False) + + return BatchNorm(weights, bias, eps) + + + def test_batch_norm(self): + for params in op_params: + with self.subTest(params=params): + module = self.create_model(weights=params['weights'], + bias=params['bias'], + eps=params['eps']) + + sample_input = (torch.randn(20, 6, 10),) + + self.execute_layer_test(module, sample_input) diff --git a/backends/openvino/tests/ops/test_convolution.py b/backends/openvino/tests/ops/test_convolution.py new file mode 100644 index 00000000000..83a80282089 --- /dev/null +++ b/backends/openvino/tests/ops/test_convolution.py @@ -0,0 +1,105 @@ +from executorch.backends.openvino.tests.ops.base_openvino_op_test import BaseOpenvinoOpTest +import torch + +d2_params = [{'weights_shape': [3, 3, 2, 2], 'strides': [1, 1], 'pads': [0, 0], 'dilations': [1, 1], 'groups': 1, + 'output_padding': [0, 0], 'transposed': True}, + {'weights_shape': [3, 3, 2, 2], 'strides': [1, 1], 'pads': [0, 0], 'dilations': [ + 1, 1], 'groups': 1, 'output_padding': [0, 0], 'transposed': False}, + {'weights_shape': [3, 1, 1, 1], 'strides': [1, 1], 'pads': [0, 0], 'dilations': [ + 1, 1], 'groups': 3, 'output_padding': [0, 0], 'transposed': True}, + {'weights_shape': [3, 1, 1, 1], 'strides': [1, 1], 'pads': [0, 0], 'dilations': [ + 1, 1], 'groups': 3, 'output_padding': [0, 0], 'transposed': False}, + {'weights_shape': [3, 1, 1, 1], 'strides': [1, 1], 'bias_shape': [1], 'pads': [ + 1, 1], 'dilations': [1, 1], 'groups': 1, 'output_padding': [0, 0], 'transposed': True}, + {'weights_shape': [3, 3, 1, 1], 'strides': [1, 1], 'pads': [ + 1, 1], 'dilations': [1, 1], 'groups': 1, 'output_padding': [0, 0], 'transposed': False}, + {'weights_shape': [3, 1, 1, 1], 'strides': [1, 1], 'bias_shape': [1], 'pads': [ + 3, 1], 'dilations': [1, 1], 'groups': 1, 'output_padding': [0, 0], 'transposed': True}, + {'weights_shape': [3, 3, 1, 1], 'strides': [1, 1], 'pads': [ + 3, 1], 'dilations': [1, 1], 'groups': 1, 'output_padding': [0, 0], 'transposed': False}, + {'weights_shape': [3, 1, 1, 1], 'strides': [1, 1], 'bias_shape': [1], 'pads': [ + 1, 0], 'dilations': [1, 1], 'groups': 1, 'output_padding': [0, 0], 'transposed': True}, + {'weights_shape': [3, 3, 1, 1], 'strides': [1, 1], 'pads': [ + 0, 1], 'dilations': [1, 1], 'groups': 1, 'output_padding': [0, 0], 'transposed': False}, + {'weights_shape': [3, 1, 1, 1], 'strides': [1, 1], 'pads': [ + 1, 0], 'dilations': [1, 1], 'groups': 3, 'output_padding': [0, 0], 'transposed': True}, + {'weights_shape': [3, 1, 1, 1], 'strides': [1, 1], 'pads': [ + 0, 1], 'dilations': [1, 1], 'groups': 3, 'output_padding': [0, 0], 'transposed': False}, + {'weights_shape': [3, 1, 1, 1], 'strides': [1, 1], 'pads': [ + 1, 0], 'dilations': [2, 2], 'groups': 3, 'output_padding': [0, 0], 'transposed': True}, + {'weights_shape': [3, 1, 1, 1], 'strides': [1, 1], 'pads': [ + 0, 0], 'dilations': [2, 2], 'groups': 3, 'output_padding': [0, 0], 'transposed': False}, + {'weights_shape': [3, 1, 1, 1], 'strides': [2, 1], 'bias_shape': [1], 'pads': [ + 1, 0], 'dilations': [1, 1], 'groups': 1, 'output_padding': [0, 0], 'transposed': True}, + {'weights_shape': [3, 3, 1, 1], 'strides': [2, 1], 'pads': [ + 0, 0], 'dilations': [1, 1], 'groups': 1, 'output_padding': [0, 0], 'transposed': False}, + {'weights_shape': [3, 1, 1, 1], 'strides': [2, 2], 'bias_shape': [1], 'pads': [ + 0, 0], 'dilations': [1, 1], 'groups': 1, 'output_padding': [0, 0], 'transposed': True}, + {'weights_shape': [3, 3, 1, 1], 'strides': [2, 2], 'pads': [ + 0, 0], 'dilations': [1, 1], 'groups': 1, 'output_padding': [0, 0], 'transposed': False}, + {'weights_shape': [3, 3, 1, 1], 'strides': [2, 1], 'pads': [ + 0, 0], 'dilations': [1, 1], 'groups': 1, 'output_padding': [0, 0], 'transposed': False}, + {'weights_shape': [3, 1, 1, 1], 'strides': [2, 2], 'bias_shape': [1], 'pads': [ + 0, 0], 'dilations': [1, 1], 'groups': 1, 'output_padding': [0, 0], 'transposed': True}, + {'weights_shape': [3, 1, 1, 1], 'strides': [2, 2], 'bias_shape': [1], 'pads': [ + 1, 1], 'dilations': [2, 2], 'groups': 1, 'output_padding': [1, 1], 'transposed': True}, + ] + +class TestConvolutionOperator(BaseOpenvinoOpTest): + + def create_model(self, weights_shape, strides, pads, dilations, groups, bias, transposed, output_padding=0, + bias_shape=None, underscore=False): + + bias_dim = 0 + + class Convolution(torch.nn.Module): + def __init__(self): + super().__init__() + self.weight = torch.nn.Parameter(torch.randn(weights_shape)) + self.bias_shape = bias_shape + if self.bias_shape is None: + self.bias_shape = weights_shape[bias_dim] + self.bias = torch.nn.Parameter(torch.randn(self.bias_shape)) if bias else None + self.strides = strides + self.pads = pads + self.dilations = dilations + self.groups = groups + self.transposed = transposed + self.output_padding = output_padding + if underscore: + self.forward = self.forward_ + + def forward(self, x): + return torch.convolution( + x, self.weight, self.bias, self.strides, self.pads, self.dilations, self.transposed, + self.output_padding, self.groups + ) + + def forward_(self, x): + return torch._convolution( + x, self.weight, self.bias, self.strides, self.pads, self.dilations, self.transposed, + self.output_padding, self.groups, False, False, False, False + ) + + return Convolution() + + def test_convolution(self): + bias_underscore_config = [(False, False), (True, False)] + for bias, underscore in bias_underscore_config: + for params in d2_params: + with self.subTest(params=params, bias=bias, underscore=underscore): + bias_shape = None + if 'bias_shape' in params: + bias_shape = params['bias_shape'] + module = self.create_model(weights_shape=params['weights_shape'], + strides=params['strides'], + pads=params['pads'], + dilations=params['dilations'], + groups=params['groups'], + output_padding=params['output_padding'], + transposed=params['transposed'], + bias_shape=bias_shape, + bias=bias, + underscore=underscore) + sample_input = (torch.randn(1, 3, 10, 10),) + self.execute_layer_test(module, sample_input) diff --git a/backends/openvino/tests/ops/test_mean.py b/backends/openvino/tests/ops/test_mean.py new file mode 100644 index 00000000000..3315fd1e61d --- /dev/null +++ b/backends/openvino/tests/ops/test_mean.py @@ -0,0 +1,59 @@ +from executorch.backends.openvino.tests.ops.base_openvino_op_test import BaseOpenvinoOpTest +import torch + +op_params = [{'axes': None, 'keep_dim': None, 'dtype': None, }, + {'axes': None, 'keep_dim': None, 'dtype': "float64",}, + {'axes': None, 'keep_dim': None, 'dtype': "float32",}, + {'axes': None, 'keep_dim': None, 'dtype': "int32", }, + {'axes': 0, 'keep_dim': False, 'dtype': None, }, + {'axes': 0, 'keep_dim': False, 'dtype': None, }, + ] + +dtypes = { + "float32": torch.float32, + "float64": torch.float64, + "int32": torch.int32, + "int64": torch.int64, + "int8": torch.int8, + "uint8": torch.uint8 +} + +class TestMeanOperator(BaseOpenvinoOpTest): + + def create_model(self, axes, keep_dims, dtype): + + pt_dtype = dtypes.get(dtype) + + class Mean(torch.nn.Module): + def __init__(self, axes=None, keep_dims=None, dtype=None): + super(Mean, self).__init__() + self.axes = axes + self.keep_dims = keep_dims + self.dtype = dtype + + def forward(self, x): + if self.axes is None and self.keep_dims is None: + if self.dtype is None: + return torch.mean(x, dtype=self.dtype) + return torch.mean(x) + if self.axes is not None and self.keep_dims is None: + if self.dtype is None: + return torch.mean(x, self.axes) + return torch.mean(x, self.axes, dtype=self.dtype) + if self.dtype is None: + return torch.mean(x, self.axes, self.keep_dims) + return torch.mean(x, self.axes, self.keep_dims, dtype=self.dtype) + + return Mean(axes, keep_dims, pt_dtype) + + + def test_mean(self): + for params in op_params: + with self.subTest(params=params): + module = self.create_model(axes=params['axes'], + keep_dims=params['keep_dim'], + dtype=params['dtype']) + + sample_input = (torch.randint(-10, 10, (1, 3, 224, 224)).to(dtype=torch.float32),) + + self.execute_layer_test(module, sample_input) diff --git a/backends/openvino/tests/ops/test_permute.py b/backends/openvino/tests/ops/test_permute.py new file mode 100644 index 00000000000..1de60db3965 --- /dev/null +++ b/backends/openvino/tests/ops/test_permute.py @@ -0,0 +1,30 @@ +from executorch.backends.openvino.tests.ops.base_openvino_op_test import BaseOpenvinoOpTest +import torch + +op_params = [{'order': [0, 2, 3, 1] }, + {'order': [0, 3, 1, 2] }, + ] + +class TestPermuteOperator(BaseOpenvinoOpTest): + + def create_model(self, order): + + class Permute(torch.nn.Module): + def __init__(self, order): + super(Permute, self).__init__() + self.order = order + + def forward(self, x): + return torch.permute(x, self.order) + + return Permute(order) + + + def test_permute(self): + for params in op_params: + with self.subTest(params=params): + module = self.create_model(order=params['order']) + + sample_input = (torch.randn(1, 3, 224, 224),) + + self.execute_layer_test(module, sample_input) diff --git a/backends/openvino/tests/ops/test_pooling.py b/backends/openvino/tests/ops/test_pooling.py new file mode 100644 index 00000000000..60ab2f9edfa --- /dev/null +++ b/backends/openvino/tests/ops/test_pooling.py @@ -0,0 +1,65 @@ +from executorch.backends.openvino.tests.ops.base_openvino_op_test import BaseOpenvinoOpTest +import torch + +d2_params = [{'kernel_size': [3, 3], 'stride': 1, 'padding': 0}, + {'kernel_size': [3, 3], 'stride': [1, 1], 'padding': 1}, + {'kernel_size': [3, 3], 'stride': [1, 1], 'padding': [0, 1]}, + {'kernel_size': [3, 3], 'stride': [1, 1], 'padding': [1, 0]}, + {'kernel_size': [3, 3], 'stride': [2, 1], 'padding': 0}, + {'kernel_size': [2, 1], 'stride': [2, 1], 'padding': 0}, + {'kernel_size': [2, 1], 'stride': None, 'padding': 0}, + {'kernel_size': [2, 1], 'stride': [], 'padding': 0}, + {'kernel_size': [8, 8], 'stride': [8, 4], 'padding': 1}, + ] + +class TestPoolingOperator(BaseOpenvinoOpTest): + + def create_model(self, op_type, kernel_size, stride, padding, dilation=1, ceil_mode=True, count_include_pad=True, dtype=torch.float32): + + class MaxPoolingBase(torch.nn.Module): + def __init__(self): + super().__init__() + self.kernel_size = kernel_size + self.stride = stride + self.padding = padding + self.dilation = dilation + self.ceil_mode = ceil_mode + self.dtype = dtype + + def forward(self, x): + pass + + class MaxPool2D(MaxPoolingBase): + def forward(self, x): + return torch.nn.functional.max_pool2d(x.to(self.dtype), self.kernel_size, self.stride, self.padding, self.dilation, + self.ceil_mode) + + class MaxPool2DIndices(MaxPoolingBase): + def forward(self, x): + return torch.nn.functional.max_pool2d(x, self.kernel_size, self.stride, self.padding, self.dilation, + self.ceil_mode, return_indices=True) + + ops = { + "MaxPool2D": MaxPool2D, + "MaxPool2DIndices": MaxPool2DIndices, + } + + aten_pooling = ops[op_type] + + return aten_pooling() + + def test_pooling2d(self): + for params in d2_params: + with self.subTest(params=params): + bias_shape = None + if 'bias_shape' in params: + bias_shape = params['bias_shape'] + module = self.create_model(op_type='MaxPool2D', + kernel_size=params['kernel_size'], + stride=params['stride'], + padding=params['padding'], + dilation=1, + ceil_mode=True, + count_include_pad=True) + sample_input = (torch.randn(1, 3, 15, 15),) + self.execute_layer_test(module, sample_input) diff --git a/backends/openvino/tests/ops/test_unary_ops.py b/backends/openvino/tests/ops/test_unary_ops.py new file mode 100644 index 00000000000..9a5866d6e65 --- /dev/null +++ b/backends/openvino/tests/ops/test_unary_ops.py @@ -0,0 +1,36 @@ +from executorch.backends.openvino.tests.ops.base_openvino_op_test import BaseOpenvinoOpTest +import torch + + +OPS = [ + torch.relu, +] + + +class TestUnaryOperator(BaseOpenvinoOpTest): + + def create_model(self, op, dtype): + + class UnaryOp(torch.nn.Module): + def __init__(self, op, dtype): + super().__init__() + self.dtype = dtype + self.op = op + + def forward(self, x): + x1 = x.to(self.dtype) + y = self.op(x1) + return y, x1 + + return UnaryOp(op, dtype) + + + def test_unary_op(self): + for op in OPS: + with self.subTest(op=OPS): + + module = self.create_model(op, dtype=torch.float32) + + sample_input = (torch.rand(2, 10) * 10 + 1,) + + self.execute_layer_test(module, sample_input) diff --git a/backends/openvino/tests/ops/test_view.py b/backends/openvino/tests/ops/test_view.py new file mode 100644 index 00000000000..f5450a10af9 --- /dev/null +++ b/backends/openvino/tests/ops/test_view.py @@ -0,0 +1,32 @@ +from executorch.backends.openvino.tests.ops.base_openvino_op_test import BaseOpenvinoOpTest +import torch + +op_params = [{'input_shape': [2, 3, 2], 'target_shape': [2, 6] }, + {'input_shape': [4], 'target_shape': [2, 2] }, + ] + +class TestViewOperator(BaseOpenvinoOpTest): + + def create_model(self, target_shape): + + class View(torch.nn.Module): + + def __init__(self, target_shape) -> None: + super().__init__() + self.target_shape = target_shape + + def forward(self, input_tensor): + return input_tensor.view(self.target_shape) + + return View(target_shape) + + + def test_view(self): + for params in op_params: + with self.subTest(params=params): + + module = self.create_model(params['target_shape']) + + sample_input = (torch.randn(params['input_shape']),) + + self.execute_layer_test(module, sample_input) From a4d7458a6065716a666358c07dc9e2e1dd6d6c52 Mon Sep 17 00:00:00 2001 From: ynimmaga Date: Sat, 1 Feb 2025 08:50:41 -0800 Subject: [PATCH 023/188] Added license headers to the openvino files --- backends/openvino/CMakeLists.txt | 6 ++++++ backends/openvino/partitioner.py | 6 ++++++ backends/openvino/preprocess.py | 6 ++++++ backends/openvino/runtime/OpenvinoBackend.cpp | 8 ++++++++ backends/openvino/runtime/OpenvinoBackend.hpp | 8 ++++++++ examples/openvino/CMakeLists.txt | 6 ++++++ examples/openvino/aot/aot_openvino_compiler.py | 6 ++++++ .../openvino/executor_runner/openvino_executor_runner.cpp | 8 ++++++++ 8 files changed, 54 insertions(+) diff --git a/backends/openvino/CMakeLists.txt b/backends/openvino/CMakeLists.txt index 129ab0435ac..4df2015a8d7 100644 --- a/backends/openvino/CMakeLists.txt +++ b/backends/openvino/CMakeLists.txt @@ -1,3 +1,9 @@ +# Copyright (c) Intel Corporation +# +# Licensed under the BSD License (the "License"); you may not use this file +# except in compliance with the License. See the license file in the root +# directory of this source tree for more details. + # Set C++ standard set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED ON) diff --git a/backends/openvino/partitioner.py b/backends/openvino/partitioner.py index b0d0e18a0d7..8e621f56508 100644 --- a/backends/openvino/partitioner.py +++ b/backends/openvino/partitioner.py @@ -1,3 +1,9 @@ +# Copyright (c) Intel Corporation +# +# Licensed under the BSD License (the "License"); you may not use this file +# except in compliance with the License. See the license file in the root +# directory of this source tree for more details. + from typing import Callable, final, List, Optional, Tuple import torch diff --git a/backends/openvino/preprocess.py b/backends/openvino/preprocess.py index 8f6991afdd3..6af45ff63f9 100644 --- a/backends/openvino/preprocess.py +++ b/backends/openvino/preprocess.py @@ -1,3 +1,9 @@ +# Copyright (c) Intel Corporation +# +# Licensed under the BSD License (the "License"); you may not use this file +# except in compliance with the License. See the license file in the root +# directory of this source tree for more details. + import contextlib import struct diff --git a/backends/openvino/runtime/OpenvinoBackend.cpp b/backends/openvino/runtime/OpenvinoBackend.cpp index 00be8d9a323..95d85445f7e 100644 --- a/backends/openvino/runtime/OpenvinoBackend.cpp +++ b/backends/openvino/runtime/OpenvinoBackend.cpp @@ -1,3 +1,11 @@ +/* + * Copyright (c) Intel Corporation + * + * Licensed under the BSD License (the "License"); you may not use this file + * except in compliance with the License. See the license file in the root + * directory of this source tree for more details. + */ + #include #include #include diff --git a/backends/openvino/runtime/OpenvinoBackend.hpp b/backends/openvino/runtime/OpenvinoBackend.hpp index a116aa5349c..e6f0e8659fb 100644 --- a/backends/openvino/runtime/OpenvinoBackend.hpp +++ b/backends/openvino/runtime/OpenvinoBackend.hpp @@ -1,3 +1,11 @@ +/* + * Copyright (c) Intel Corporation + * + * Licensed under the BSD License (the "License"); you may not use this file + * except in compliance with the License. See the license file in the root + * directory of this source tree for more details. + */ + #ifndef OPENVINO_BACKEND_HPP #define OPENVINO_BACKEND_HPP diff --git a/examples/openvino/CMakeLists.txt b/examples/openvino/CMakeLists.txt index 961b34efd41..64f1e8d5463 100644 --- a/examples/openvino/CMakeLists.txt +++ b/examples/openvino/CMakeLists.txt @@ -1,3 +1,9 @@ +# Copyright (c) Intel Corporation +# +# Licensed under the BSD License (the "License"); you may not use this file +# except in compliance with the License. See the license file in the root +# directory of this source tree for more details. + set(CMAKE_CXX_STANDARD 17) cmake_minimum_required(VERSION 3.19) diff --git a/examples/openvino/aot/aot_openvino_compiler.py b/examples/openvino/aot/aot_openvino_compiler.py index f2f26b03951..4674fbbd755 100644 --- a/examples/openvino/aot/aot_openvino_compiler.py +++ b/examples/openvino/aot/aot_openvino_compiler.py @@ -1,3 +1,9 @@ +# Copyright (c) Intel Corporation +# +# Licensed under the BSD License (the "License"); you may not use this file +# except in compliance with the License. See the license file in the root +# directory of this source tree for more details. + import executorch import timm import torch diff --git a/examples/openvino/executor_runner/openvino_executor_runner.cpp b/examples/openvino/executor_runner/openvino_executor_runner.cpp index b6e13218773..7615b63649a 100644 --- a/examples/openvino/executor_runner/openvino_executor_runner.cpp +++ b/examples/openvino/executor_runner/openvino_executor_runner.cpp @@ -1,3 +1,11 @@ +/* + * Copyright (c) Intel Corporation + * + * Licensed under the BSD License (the "License"); you may not use this file + * except in compliance with the License. See the license file in the root + * directory of this source tree for more details. + */ + #include #include #include From 8302911b8085ec09a1432998ac808c32f47fb056 Mon Sep 17 00:00:00 2001 From: ynimmaga Date: Tue, 4 Feb 2025 14:16:13 -0800 Subject: [PATCH 024/188] Get openvino backend device from compile specs --- backends/openvino/preprocess.py | 3 --- backends/openvino/runtime/OpenvinoBackend.cpp | 9 ++++++++- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/backends/openvino/preprocess.py b/backends/openvino/preprocess.py index 96df9faba85..cd6d5150cf3 100644 --- a/backends/openvino/preprocess.py +++ b/backends/openvino/preprocess.py @@ -12,9 +12,6 @@ from executorch.exir.backend.compile_spec_schema import CompileSpec from openvino.frontend.pytorch.torchdynamo.compile import openvino_compile -SKIP_COMPILE_SPEC_KEYS = {"ImportForever"} - - @final class OpenvinoBackend(BackendDetails): diff --git a/backends/openvino/runtime/OpenvinoBackend.cpp b/backends/openvino/runtime/OpenvinoBackend.cpp index 00be8d9a323..15007b38792 100644 --- a/backends/openvino/runtime/OpenvinoBackend.cpp +++ b/backends/openvino/runtime/OpenvinoBackend.cpp @@ -77,8 +77,15 @@ Result OpenvinoBackend::init( // Wrap the data in a stream std::istringstream compiled_stream(data_string); + auto device = "CPU"; + // Get the device value, if provided in compile sepcs + for (auto& compile_spec : compile_specs) { + if (std::strcmp(compile_spec.key, "device") == 0) + device = static_cast(compile_spec.value.buffer); + } + // Import the model - auto compiled_model = core.import_model(compiled_stream, "CPU"); + auto compiled_model = core.import_model(compiled_stream, device); // Allocate an infer request std::shared_ptr infer_request = std::make_shared(compiled_model.create_infer_request()); From 2cda72b7a0176766a4993488d008ff8e0d2391e7 Mon Sep 17 00:00:00 2001 From: ynimmaga Date: Tue, 4 Feb 2025 15:34:21 -0800 Subject: [PATCH 025/188] Fixed formatting issues --- backends/openvino/CMakeLists.txt | 1 - backends/openvino/__init__.py | 2 +- backends/openvino/partitioner.py | 14 +- backends/openvino/preprocess.py | 11 +- backends/openvino/runtime/OpenvinoBackend.cpp | 223 +++++++++--------- examples/openvino/CMakeLists.txt | 2 - examples/openvino/aot/README.md | 2 - .../openvino/aot/aot_openvino_compiler.py | 48 +++- .../openvino_executor_runner.cpp | 15 +- 9 files changed, 172 insertions(+), 146 deletions(-) diff --git a/backends/openvino/CMakeLists.txt b/backends/openvino/CMakeLists.txt index 129ab0435ac..f3fda7fe5ca 100644 --- a/backends/openvino/CMakeLists.txt +++ b/backends/openvino/CMakeLists.txt @@ -68,4 +68,3 @@ target_link_options(openvino_backend PRIVATE -Wl,-rpath=${OPENVINO_LIB_PATH}) # Install the OpenVINO backend library to the lib directory install(TARGETS openvino_backend DESTINATION lib) - diff --git a/backends/openvino/__init__.py b/backends/openvino/__init__.py index dac275d3f12..7460fe46b0b 100644 --- a/backends/openvino/__init__.py +++ b/backends/openvino/__init__.py @@ -1,4 +1,4 @@ from .partitioner import OpenvinoPartitioner from .preprocess import OpenvinoBackend -__all__ = [OpenvinoBackend, OpenvinoPartitioner] +__all__ = [OpenvinoBackend, OpenvinoPartitioner] diff --git a/backends/openvino/partitioner.py b/backends/openvino/partitioner.py index b0d0e18a0d7..88e94fedb03 100644 --- a/backends/openvino/partitioner.py +++ b/backends/openvino/partitioner.py @@ -1,6 +1,7 @@ from typing import Callable, final, List, Optional, Tuple import torch +import torch.fx as fx from executorch.backends.openvino.preprocess import OpenvinoBackend from executorch.exir.backend.backend_details import CompileSpec from executorch.exir.backend.partitioner import ( @@ -9,12 +10,12 @@ PartitionResult, ) from executorch.exir.backend.utils import tag_constant_data +from openvino.frontend.pytorch.torchdynamo.op_support import OperatorSupport from torch.export.exported_program import ExportedProgram from torch.fx.passes.infra.partitioner import CapabilityBasedPartitioner from torch.fx.passes.operator_support import OperatorSupportBase -import torch.fx as fx -from openvino.frontend.pytorch.torchdynamo.op_support import OperatorSupport + class OpenvinoOperatorsSupport(OperatorSupportBase): @@ -38,10 +39,10 @@ def is_node_supported(self, _, node: torch.fx.Node) -> bool: options = [] op_type = node.target.__name__ supported_ops = OperatorSupport(options)._support_dict - if (op_type == "getitem"): + if op_type == "getitem": return True - if ("torch.ops." + str(op_type) in supported_ops): + if "torch.ops." + str(op_type) in supported_ops: return True else: print("Op not supported: ", "torch.ops." + str(op_type)) @@ -52,7 +53,7 @@ def is_node_supported(self, _, node: torch.fx.Node) -> bool: ) return False - return False + return False @final @@ -82,13 +83,12 @@ def ops_to_not_decompose( return (ops_not_decompose, None) def partition(self, exported_program: ExportedProgram) -> PartitionResult: - options = {} gm = fx.symbolic_trace(exported_program.graph_module) partitioner = CapabilityBasedPartitioner( exported_program.graph_module, OpenvinoOperatorsSupport(self._op_types_to_skip, self._op_names_to_skip), - allows_single_node_partition=True + allows_single_node_partition=True, ) partition_list = partitioner.propose_partitions() diff --git a/backends/openvino/preprocess.py b/backends/openvino/preprocess.py index cd6d5150cf3..e2aefe7d729 100644 --- a/backends/openvino/preprocess.py +++ b/backends/openvino/preprocess.py @@ -1,7 +1,7 @@ import contextlib import struct -from typing import final, List, cast +from typing import cast, final, List import torch from executorch.exir.backend.backend_details import ( @@ -12,6 +12,7 @@ from executorch.exir.backend.compile_spec_schema import CompileSpec from openvino.frontend.pytorch.torchdynamo.compile import openvino_compile + @final class OpenvinoBackend(BackendDetails): @@ -25,8 +26,8 @@ def preprocess( output_names = edge_program.graph_signature.user_outputs args = [] for node in edge_program.graph.nodes: - if (node.target in input_names): - args.append( node.meta["val"]) + if node.target in input_names: + args.append(node.meta["val"]) input_shapes = [] output_shapes = [] @@ -35,7 +36,9 @@ def preprocess( for spec in module_compile_spec: compile_options[spec.key] = spec.value.decode() - compiled = openvino_compile(edge_program.module(), *args, options=compile_options) + compiled = openvino_compile( + edge_program.module(), *args, options=compile_options + ) model_bytes = compiled.export_model() return PreprocessResult(processed_bytes=model_bytes) diff --git a/backends/openvino/runtime/OpenvinoBackend.cpp b/backends/openvino/runtime/OpenvinoBackend.cpp index 15007b38792..f9c7a67ef78 100644 --- a/backends/openvino/runtime/OpenvinoBackend.cpp +++ b/backends/openvino/runtime/OpenvinoBackend.cpp @@ -1,6 +1,6 @@ #include -#include #include +#include #include @@ -31,150 +31,159 @@ namespace backends { namespace openvino { OpenvinoBackend::OpenvinoBackend() { - if (!is_available()) { - //ET_LOG(Error, "OpenVINO runtime is not available. Initialization failed."); - throw std::runtime_error("OpenVINO runtime not available"); - } + if (!is_available()) { + // ET_LOG(Error, "OpenVINO runtime is not available. Initialization + // failed."); + throw std::runtime_error("OpenVINO runtime not available"); + } - //ET_LOG(Info, "OpenVINO runtime successfully verified and initialized."); + // ET_LOG(Info, "OpenVINO runtime successfully verified and initialized."); } bool OpenvinoBackend::is_available() const { - try { - // Create an OpenVINO Core object to verify runtime availability - ov::Core core; - - // Check if at least one device is available - auto devices = core.get_available_devices(); - if (!devices.empty()) { - return true; // OpenVINO is available - } - } catch (const std::exception& e) { - // Log the exception if OpenVINO runtime is not available - ET_LOG(Error, "OpenVINO is not available: %s", e.what()); - } catch (...) { - // Handle any unexpected errors - ET_LOG(Error, "OpenVINO availability check failed due to an unknown error."); - } + try { + // Create an OpenVINO Core object to verify runtime availability + ov::Core core; - return false; // OpenVINO is not available + // Check if at least one device is available + auto devices = core.get_available_devices(); + if (!devices.empty()) { + return true; // OpenVINO is available + } + } catch (const std::exception& e) { + // Log the exception if OpenVINO runtime is not available + ET_LOG(Error, "OpenVINO is not available: %s", e.what()); + } catch (...) { + // Handle any unexpected errors + ET_LOG( + Error, "OpenVINO availability check failed due to an unknown error."); + } + + return false; // OpenVINO is not available } Result OpenvinoBackend::init( BackendInitContext& context, FreeableBuffer* processed, ArrayRef compile_specs) const { + ET_LOG(Info, "OpenvinoBackend::init %p", processed->data()); - ET_LOG(Info, "OpenvinoBackend::init %p", processed->data()); + ov::Core core; + const char* data_ptr = static_cast(processed->data()); + size_t data_size = processed->size(); - ov::Core core; - const char* data_ptr = static_cast(processed->data()); - size_t data_size = processed->size(); - - // Copy data to a string or vector - std::string data_string(data_ptr, data_size); + // Copy data to a string or vector + std::string data_string(data_ptr, data_size); - // Wrap the data in a stream - std::istringstream compiled_stream(data_string); + // Wrap the data in a stream + std::istringstream compiled_stream(data_string); - auto device = "CPU"; - // Get the device value, if provided in compile sepcs - for (auto& compile_spec : compile_specs) { - if (std::strcmp(compile_spec.key, "device") == 0) - device = static_cast(compile_spec.value.buffer); - } + auto device = "CPU"; + // Get the device value, if provided in compile sepcs + for (auto& compile_spec : compile_specs) { + if (std::strcmp(compile_spec.key, "device") == 0) + device = static_cast(compile_spec.value.buffer); + } - // Import the model - auto compiled_model = core.import_model(compiled_stream, device); + // Import the model + auto compiled_model = core.import_model(compiled_stream, device); - // Allocate an infer request - std::shared_ptr infer_request = std::make_shared(compiled_model.create_infer_request()); + // Allocate an infer request + std::shared_ptr infer_request = + std::make_shared(compiled_model.create_infer_request()); - // Allocate execution handle - MemoryAllocator* allocator = context.get_runtime_allocator(); - ExecutionHandle* handle = ET_ALLOCATE_INSTANCE_OR_RETURN_ERROR(allocator, ExecutionHandle); - handle->compiled_model = std::make_shared(compiled_model); - handle->infer_request = infer_request; + // Allocate execution handle + MemoryAllocator* allocator = context.get_runtime_allocator(); + ExecutionHandle* handle = + ET_ALLOCATE_INSTANCE_OR_RETURN_ERROR(allocator, ExecutionHandle); + handle->compiled_model = std::make_shared(compiled_model); + handle->infer_request = infer_request; - return handle; + return handle; } Error OpenvinoBackend::execute( BackendExecutionContext& context, DelegateHandle* input_handle, EValue** args) const { + ExecutionHandle* execution_handle = (ExecutionHandle*)input_handle; - ExecutionHandle* execution_handle = (ExecutionHandle*)input_handle; + auto infer_request = execution_handle->infer_request; - auto infer_request = execution_handle->infer_request; + size_t num_inputs = infer_request->get_compiled_model().inputs().size(); + size_t num_outputs = infer_request->get_compiled_model().outputs().size(); - size_t num_inputs = infer_request->get_compiled_model().inputs().size(); - size_t num_outputs = infer_request->get_compiled_model().outputs().size(); + // Set inputs + for (size_t i = 0; i < num_inputs; i++) { + auto input_tensor = args[i]->toTensor(); + ov::Shape input_shape( + input_tensor.sizes().begin(), input_tensor.sizes().end()); - // Set inputs - for (size_t i = 0; i < num_inputs; i++) { - auto input_tensor = args[i]->toTensor(); - ov::Shape input_shape(input_tensor.sizes().begin(), input_tensor.sizes().end()); + // Convert input tensor to OpenVINO tensor + ov::element::Type ov_type = + convert_to_openvino_type(input_tensor.scalar_type()); + ov::Tensor ov_input_tensor( + ov_type, input_shape, input_tensor.mutable_data_ptr()); - // Convert input tensor to OpenVINO tensor - ov::element::Type ov_type = convert_to_openvino_type(input_tensor.scalar_type()); - ov::Tensor ov_input_tensor(ov_type, input_shape, input_tensor.mutable_data_ptr()); - - infer_request->set_input_tensor(i, ov_input_tensor); - } + infer_request->set_input_tensor(i, ov_input_tensor); + } - // Set outputs - for (size_t i = 0; i < num_outputs; i++) { - auto output_tensor = args[num_inputs+i]->toTensor(); - ov::Shape output_shape(output_tensor.sizes().begin(), output_tensor.sizes().end()); + // Set outputs + for (size_t i = 0; i < num_outputs; i++) { + auto output_tensor = args[num_inputs + i]->toTensor(); + ov::Shape output_shape( + output_tensor.sizes().begin(), output_tensor.sizes().end()); - // Convert input tensor to OpenVINO tensor - ov::element::Type ov_type = convert_to_openvino_type(output_tensor.scalar_type()); - ov::Tensor ov_output_tensor(ov_type, output_shape, output_tensor.mutable_data_ptr()); + // Convert input tensor to OpenVINO tensor + ov::element::Type ov_type = + convert_to_openvino_type(output_tensor.scalar_type()); + ov::Tensor ov_output_tensor( + ov_type, output_shape, output_tensor.mutable_data_ptr()); - infer_request->set_output_tensor(i, ov_output_tensor); - } + infer_request->set_output_tensor(i, ov_output_tensor); + } - // Execute the inference - infer_request->infer(); + // Execute the inference + infer_request->infer(); - return Error::Ok; + return Error::Ok; } void OpenvinoBackend::destroy(DelegateHandle* handle) const { - if (!handle) { - ET_LOG(Info, "Attempted to destroy a null handle."); - return; - } - - // Cast the handle to the appropriate type - ExecutionHandle* execution_handle = static_cast(handle); - - // Clean up resources - if (execution_handle->infer_request) { - execution_handle->infer_request.reset(); // Release the infer request - ET_LOG(Info, "Infer request successfully destroyed."); - } - - if (execution_handle->compiled_model) { - execution_handle->compiled_model.reset(); // Release the compiled model - ET_LOG(Info, "Compiled model successfully destroyed."); - } - - ET_LOG(Info, "Delegate handle destroyed successfully."); + if (!handle) { + ET_LOG(Info, "Attempted to destroy a null handle."); + return; + } + + // Cast the handle to the appropriate type + ExecutionHandle* execution_handle = static_cast(handle); + + // Clean up resources + if (execution_handle->infer_request) { + execution_handle->infer_request.reset(); // Release the infer request + ET_LOG(Info, "Infer request successfully destroyed."); + } + + if (execution_handle->compiled_model) { + execution_handle->compiled_model.reset(); // Release the compiled model + ET_LOG(Info, "Compiled model successfully destroyed."); + } + + ET_LOG(Info, "Delegate handle destroyed successfully."); } -ov::element::Type OpenvinoBackend::convert_to_openvino_type(ScalarType scalar_type) const { - switch (scalar_type) { - case ScalarType::Float: - return ov::element::f32; - case ScalarType::Int: - return ov::element::i32; - case ScalarType::Char: - return ov::element::i8; - default: - throw std::runtime_error("Unsupported scalar type"); - } +ov::element::Type OpenvinoBackend::convert_to_openvino_type( + ScalarType scalar_type) const { + switch (scalar_type) { + case ScalarType::Float: + return ov::element::f32; + case ScalarType::Int: + return ov::element::i32; + case ScalarType::Char: + return ov::element::i8; + default: + throw std::runtime_error("Unsupported scalar type"); + } } } // namespace openvino @@ -184,7 +193,5 @@ ov::element::Type OpenvinoBackend::convert_to_openvino_type(ScalarType scalar_ty namespace { auto backend = executorch::backends::openvino::OpenvinoBackend(); executorch::runtime::Backend backend_id{"OpenvinoBackend", &backend}; -static auto registered = executorch::runtime::register_backend(backend_id); +static auto registered = executorch::runtime::register_backend(backend_id); } // namespace - - diff --git a/examples/openvino/CMakeLists.txt b/examples/openvino/CMakeLists.txt index 961b34efd41..af626ae56d9 100644 --- a/examples/openvino/CMakeLists.txt +++ b/examples/openvino/CMakeLists.txt @@ -94,5 +94,3 @@ set_target_properties( get_filename_component( EXECUTORCH_SOURCE_DIR "${CMAKE_CURRENT_LIST_DIR}/../.." ABSOLUTE ) - - diff --git a/examples/openvino/aot/README.md b/examples/openvino/aot/README.md index 6c59f1dad41..873f380810c 100644 --- a/examples/openvino/aot/README.md +++ b/examples/openvino/aot/README.md @@ -84,5 +84,3 @@ python aot_openvino_compiler.py --suite huggingface --model bert-base-uncased -- - **Unsupported Input Shape**: Ensure `--input_shape` is provided as a valid list or tuple. - - diff --git a/examples/openvino/aot/aot_openvino_compiler.py b/examples/openvino/aot/aot_openvino_compiler.py index f2f26b03951..9a413380f56 100644 --- a/examples/openvino/aot/aot_openvino_compiler.py +++ b/examples/openvino/aot/aot_openvino_compiler.py @@ -1,15 +1,18 @@ +import argparse + import executorch import timm import torch import torchvision.models as torchvision_models -from transformers import AutoModel -from executorch.exir.backend.backend_details import CompileSpec -from executorch.backends.openvino.preprocess import OpenvinoBackend from executorch.backends.openvino.partitioner import OpenvinoPartitioner +from executorch.backends.openvino.preprocess import OpenvinoBackend from executorch.exir import EdgeProgramManager, to_edge +from executorch.exir.backend.backend_details import CompileSpec from torch.export import export, ExportedProgram + from torch.export.exported_program import ExportedProgram -import argparse +from transformers import AutoModel + # Function to load a model based on the selected suite def load_model(suite: str, model_name: str): @@ -24,6 +27,7 @@ def load_model(suite: str, model_name: str): else: raise ValueError(f"Unsupported model suite: {suite}") + def main(suite: str, model_name: str, input_shape, device: str): # Ensure input_shape is a tuple if isinstance(input_shape, list): @@ -36,7 +40,7 @@ def main(suite: str, model_name: str, input_shape, device: str): model = model.eval() # Provide input - example_args = (torch.randn(*input_shape), ) + example_args = (torch.randn(*input_shape),) # Export to aten dialect using torch.export aten_dialect: ExportedProgram = export(model, example_args) @@ -50,23 +54,41 @@ def main(suite: str, model_name: str, input_shape, device: str): lowered_module = edge_program.to_backend(OpenvinoPartitioner(compile_spec)) # Apply backend-specific passes - exec_prog = lowered_module.to_executorch(config=executorch.exir.ExecutorchBackendConfig()) + exec_prog = lowered_module.to_executorch( + config=executorch.exir.ExecutorchBackendConfig() + ) # Serialize and save it to a file with open(f"{model_name}.pte", "wb") as file: exec_prog.write_to_file(file) print(f"Model exported and saved as {model_name}.pte on {device}.") + if __name__ == "__main__": # Argument parser for dynamic inputs parser = argparse.ArgumentParser(description="Export models with executorch.") - parser.add_argument("--suite", type=str, required=True, choices=["timm", "torchvision", "huggingface"], - help="Select the model suite (timm, torchvision, huggingface).") - parser.add_argument("--model", type=str, required=True, help="Model name to be loaded.") - parser.add_argument("--input_shape", type=eval, required=True, - help="Input shape for the model as a list or tuple (e.g., [1, 3, 224, 224] or (1, 3, 224, 224)).") - parser.add_argument("--device", type=str, default="CPU", - help="Target device for compiling the model (e.g., CPU, GPU). Default is CPU.") + parser.add_argument( + "--suite", + type=str, + required=True, + choices=["timm", "torchvision", "huggingface"], + help="Select the model suite (timm, torchvision, huggingface).", + ) + parser.add_argument( + "--model", type=str, required=True, help="Model name to be loaded." + ) + parser.add_argument( + "--input_shape", + type=eval, + required=True, + help="Input shape for the model as a list or tuple (e.g., [1, 3, 224, 224] or (1, 3, 224, 224)).", + ) + parser.add_argument( + "--device", + type=str, + default="CPU", + help="Target device for compiling the model (e.g., CPU, GPU). Default is CPU.", + ) args = parser.parse_args() diff --git a/examples/openvino/executor_runner/openvino_executor_runner.cpp b/examples/openvino/executor_runner/openvino_executor_runner.cpp index 67bb35d9701..27b5bf6568f 100644 --- a/examples/openvino/executor_runner/openvino_executor_runner.cpp +++ b/examples/openvino/executor_runner/openvino_executor_runner.cpp @@ -21,10 +21,7 @@ DEFINE_string( model_path, "", "Path to the model serialized in flatbuffer format (required)."); -DEFINE_int32( - num_iter, - 1, - "Number of inference iterations (default is 1)."); +DEFINE_int32(num_iter, 1, "Number of inference iterations (default is 1)."); using executorch::extension::FileDataLoader; using executorch::extension::prepare_input_tensors; @@ -50,7 +47,8 @@ int main(int argc, char** argv) { if (FLAGS_model_path.empty()) { std::cerr << "Error: --model_path is required." << std::endl; std::cerr << "Usage: " << argv[0] - << " --model_path= --num_iter=" << std::endl; + << " --model_path= --num_iter=" + << std::endl; return 1; } @@ -75,7 +73,8 @@ int main(int argc, char** argv) { } ET_LOG(Info, "Model file %s is loaded.", model_path); - // Retrieve the method name from the program (assumes the first method is used) + // Retrieve the method name from the program (assumes the first method is + // used) const char* method_name = nullptr; { const auto method_name_result = program->get_method_name(0); @@ -139,7 +138,8 @@ int main(int argc, char** argv) { auto after_exec = std::chrono::high_resolution_clock::now(); double elapsed_time = std::chrono::duration_cast( after_exec - before_exec) - .count() / 1000.0; + .count() / + 1000.0; // Log execution time and average time per iteration ET_LOG( @@ -163,4 +163,3 @@ int main(int argc, char** argv) { return 0; } - From f7dc3e3b2a4d9053f9697a6b15cf6f29b9ef9fd4 Mon Sep 17 00:00:00 2001 From: ynimmaga Date: Tue, 4 Feb 2025 15:42:36 -0800 Subject: [PATCH 026/188] Fixed formatting issues --- backends/openvino/preprocess.py | 2 +- examples/openvino/aot/aot_openvino_compiler.py | 6 ++++++ .../openvino/executor_runner/openvino_executor_runner.cpp | 8 ++++++++ 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/backends/openvino/preprocess.py b/backends/openvino/preprocess.py index e2aefe7d729..c2e69883c3b 100644 --- a/backends/openvino/preprocess.py +++ b/backends/openvino/preprocess.py @@ -37,7 +37,7 @@ def preprocess( compile_options[spec.key] = spec.value.decode() compiled = openvino_compile( - edge_program.module(), *args, options=compile_options + edge_program.module(), *args, options=compile_options, executorch=True ) model_bytes = compiled.export_model() diff --git a/examples/openvino/aot/aot_openvino_compiler.py b/examples/openvino/aot/aot_openvino_compiler.py index 9a413380f56..eebc39e89de 100644 --- a/examples/openvino/aot/aot_openvino_compiler.py +++ b/examples/openvino/aot/aot_openvino_compiler.py @@ -1,3 +1,9 @@ +# Copyright (c) Intel Corporation +# +# Licensed under the BSD License (the "License"); you may not use this file +# except in compliance with the License. See the license file in the root +# directory of this source tree for more details. + import argparse import executorch diff --git a/examples/openvino/executor_runner/openvino_executor_runner.cpp b/examples/openvino/executor_runner/openvino_executor_runner.cpp index 27b5bf6568f..1886e4f7847 100644 --- a/examples/openvino/executor_runner/openvino_executor_runner.cpp +++ b/examples/openvino/executor_runner/openvino_executor_runner.cpp @@ -22,6 +22,14 @@ DEFINE_string( "", "Path to the model serialized in flatbuffer format (required)."); DEFINE_int32(num_iter, 1, "Number of inference iterations (default is 1)."); +DEFINE_string( + input_list_path, + "", + "Path to the input list file which includes the list of raw input tensor files (optional)."); +DEFINE_string( + output_folder_path, + "", + "Path to the output folder to save raw output tensor files (optional)."); using executorch::extension::FileDataLoader; using executorch::extension::prepare_input_tensors; From c5ca5c4d8256d0964f62f0de39fc56d638100b19 Mon Sep 17 00:00:00 2001 From: Cavus Mustafa Date: Wed, 5 Feb 2025 18:55:50 -0800 Subject: [PATCH 027/188] Openvino backend model tests added --- .../tests/models/test_classification.py | 34 +++++++++++++++++++ .../openvino/tests/test_openvino_delegate.py | 13 +++++-- 2 files changed, 45 insertions(+), 2 deletions(-) create mode 100644 backends/openvino/tests/models/test_classification.py diff --git a/backends/openvino/tests/models/test_classification.py b/backends/openvino/tests/models/test_classification.py new file mode 100644 index 00000000000..59558b397ab --- /dev/null +++ b/backends/openvino/tests/models/test_classification.py @@ -0,0 +1,34 @@ +from executorch.backends.openvino.tests.ops.base_openvino_op_test import BaseOpenvinoOpTest +import torch +import timm +import torchvision.models as torchvision_models +from transformers import AutoModel + +classifier_params = [ + {'model': ['torchvision', 'resnet50', (1, 3, 224, 224)] }, + {'model': ['torchvision', 'mobilenet_v2', (1, 3, 224, 224)] }, + ] + +# Function to load a model based on the selected suite +def load_model(suite: str, model_name: str): + if suite == "timm": + return timm.create_model(model_name, pretrained=True) + elif suite == "torchvision": + if not hasattr(torchvision_models, model_name): + raise ValueError(f"Model {model_name} not found in torchvision.") + return getattr(torchvision_models, model_name)(pretrained=True) + elif suite == "huggingface": + return AutoModel.from_pretrained(model_name) + else: + raise ValueError(f"Unsupported model suite: {suite}") + +class TestClassifier(BaseOpenvinoOpTest): + + def test_classifier(self): + for params in classifier_params: + with self.subTest(params=params): + module = load_model(params['model'][0], params['model'][1]) + + sample_input = (torch.randn(params['model'][2]),) + + self.execute_layer_test(module, sample_input) diff --git a/backends/openvino/tests/test_openvino_delegate.py b/backends/openvino/tests/test_openvino_delegate.py index bbf61d1ea09..eaabcf2603b 100644 --- a/backends/openvino/tests/test_openvino_delegate.py +++ b/backends/openvino/tests/test_openvino_delegate.py @@ -40,16 +40,25 @@ def parse_arguments(): parser.add_argument( "-p", "--pattern", - help="Pattern to match test files. Provide complete file name to run individual op tests", + help="Pattern to match test files. Provide complete file name to run individual tests", type=str, default="test_*.py", ) + parser.add_argument( + "-t", + "--test_type", + help="Specify the type of tests ('ops' or 'models')", + type=str, + default="ops", + choices={"ops", "models"}, + ) args, ns_args = parser.parse_known_args(namespace=unittest) test_params = {} test_params["device"] = args.device test_params["build_folder"] = args.build_folder test_params["pattern"] = args.pattern + test_params["test_type"] = args.test_type return test_params if __name__ == "__main__": @@ -60,6 +69,6 @@ def parse_arguments(): test_params = parse_arguments() loader.suiteClass.test_params = test_params # Discover all existing op tests in "ops" folder - suite = loader.discover("ops", pattern=test_params['pattern']) + suite = loader.discover(test_params['test_type'], pattern=test_params['pattern']) # Start running tests unittest.TextTestRunner().run(suite) From 54fac03b63b668608de73600bf5029a54468e28a Mon Sep 17 00:00:00 2001 From: Cavus Mustafa Date: Thu, 6 Feb 2025 16:10:11 -0800 Subject: [PATCH 028/188] removed executorch option into openvino_compile --- backends/openvino/preprocess.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backends/openvino/preprocess.py b/backends/openvino/preprocess.py index 6af45ff63f9..6380d0d3b78 100644 --- a/backends/openvino/preprocess.py +++ b/backends/openvino/preprocess.py @@ -44,7 +44,7 @@ def preprocess( for spec in module_compile_spec: compile_options[spec.key] = spec.value.decode() - compiled = openvino_compile(edge_program.module(), *args, options=compile_options, executorch=True) + compiled = openvino_compile(edge_program.module(), *args, options=compile_options) model_bytes = compiled.export_model() return PreprocessResult(processed_bytes=model_bytes) From 8a5abed9f944b14dc75c120d99c550c0fdb08a5a Mon Sep 17 00:00:00 2001 From: ynimmaga Date: Sun, 9 Feb 2025 14:42:44 -0800 Subject: [PATCH 029/188] cmake updates for openvino --- backends/openvino/CMakeLists.txt | 67 ++++++++-------- .../scripts/{build.sh => openvino_build.sh} | 0 examples/openvino/CMakeLists.txt | 77 ++++++++----------- ...ino_build.sh => openvino_build_example.sh} | 4 +- 4 files changed, 65 insertions(+), 83 deletions(-) rename backends/openvino/scripts/{build.sh => openvino_build.sh} (100%) rename examples/openvino/{openvino_build.sh => openvino_build_example.sh} (95%) diff --git a/backends/openvino/CMakeLists.txt b/backends/openvino/CMakeLists.txt index 4df2015a8d7..3f2199b634d 100644 --- a/backends/openvino/CMakeLists.txt +++ b/backends/openvino/CMakeLists.txt @@ -4,74 +4,69 @@ # except in compliance with the License. See the license file in the root # directory of this source tree for more details. +# Set minimum required CMake version +cmake_minimum_required(VERSION 3.19) + +# Set project name +project(openvino_backend_project) + # Set C++ standard set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED ON) -# Ensure compile_commands are generated +# Ensure compile_commands.json is generated set(CMAKE_EXPORT_COMPILE_COMMANDS ON) -# Define common include directories -set(COMMON_INCLUDE_DIRS ${CMAKE_CURRENT_SOURCE_DIR}/../../..) - -# Include common directories before others to ensure proper order -include_directories(BEFORE ${COMMON_INCLUDE_DIRS}) - # Set up EXECUTORCH_ROOT if not already set if(NOT EXECUTORCH_ROOT) set(EXECUTORCH_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../..) endif() -# Include utility cmake script from the executorch repository -include(${EXECUTORCH_ROOT}/build/Utils.cmake) - -# Update common include directory for ExecuteTorch +# Define common include directories set(COMMON_INCLUDE_DIRS ${EXECUTORCH_ROOT}/..) -# Set OpenVINO directory and include directories from environment variable +# Include utility CMake scripts from ExecuteTorch +include(${EXECUTORCH_ROOT}/build/Utils.cmake) + +# Set OpenVINO directory from environment variable set(OPENVINO_DIR "$ENV{INTEL_OPENVINO_DIR}") if(NOT OPENVINO_DIR) - message(FATAL_ERROR "INTEL_OPENVINO_DIR environment variable is not set.") + message(FATAL_ERROR "ERROR: INTEL_OPENVINO_DIR environment variable is not set.") endif() +# Set OpenVINO include directories set(OPENVINO_INCLUDE_DIRS - ${OPENVINO_DIR}/deployment_tools/inference_engine/include ${OPENVINO_DIR}/runtime/include + ${OPENVINO_DIR}/deployment_tools/inference_engine/include ) -# Define OpenVINO library path +# Set OpenVINO library path set(OPENVINO_LIB_PATH ${OPENVINO_DIR}/runtime/lib/intel64) -# Define OpenVINO libraries -set(OPENVINO_LIB ${OPENVINO_LIB_PATH}/libopenvino.so) +# Try to locate OpenVINO automatically +find_library(OPENVINO_LIB NAMES openvino PATHS ${OPENVINO_LIB_PATH} NO_DEFAULT_PATH) +if(NOT OPENVINO_LIB) + message(FATAL_ERROR "ERROR: OpenVINO library (libopenvino.so) not found in ${OPENVINO_LIB_PATH}") +endif() -# Add the OpenVINO backend library as a shared library +# Define OpenVINO backend as a shared library add_library(openvino_backend SHARED) # Enable exceptions and RTTI for OpenVINO backend -target_compile_options(openvino_backend PRIVATE "-frtti" "-fexceptions") +target_compile_options(openvino_backend PRIVATE -frtti -fexceptions) -# Include directories for ExecuteTorch and OpenVINO -target_include_directories( - openvino_backend PUBLIC - ${COMMON_INCLUDE_DIRS} - ${OPENVINO_INCLUDE_DIRS} -) +# Include ExecuteTorch and OpenVINO directories +target_include_directories(openvino_backend PUBLIC ${COMMON_INCLUDE_DIRS} ${OPENVINO_INCLUDE_DIRS}) -# Link OpenVINO libraries and executorch core to the backend -target_link_libraries(openvino_backend PRIVATE - ${OPENVINO_LIB} - executorch_core -) +# Link OpenVINO and ExecuteTorch core libraries +target_link_libraries(openvino_backend PRIVATE ${OPENVINO_LIB} executorch_core) -# Add source files to the OpenVINO backend library -target_sources(openvino_backend PRIVATE - ${CMAKE_CURRENT_LIST_DIR}/runtime/OpenvinoBackend.cpp -) +# Add source files for OpenVINO backend +target_sources(openvino_backend PRIVATE ${CMAKE_CURRENT_LIST_DIR}/runtime/OpenvinoBackend.cpp) -# Set additional link options for shared library +# Set runtime library path for OpenVINO target_link_options(openvino_backend PRIVATE -Wl,-rpath=${OPENVINO_LIB_PATH}) -# Install the OpenVINO backend library to the lib directory +# Install OpenVINO backend library to the lib directory install(TARGETS openvino_backend DESTINATION lib) diff --git a/backends/openvino/scripts/build.sh b/backends/openvino/scripts/openvino_build.sh similarity index 100% rename from backends/openvino/scripts/build.sh rename to backends/openvino/scripts/openvino_build.sh diff --git a/examples/openvino/CMakeLists.txt b/examples/openvino/CMakeLists.txt index 64f1e8d5463..761de51cf28 100644 --- a/examples/openvino/CMakeLists.txt +++ b/examples/openvino/CMakeLists.txt @@ -4,11 +4,13 @@ # except in compliance with the License. See the license file in the root # directory of this source tree for more details. -set(CMAKE_CXX_STANDARD 17) - cmake_minimum_required(VERSION 3.19) project(openvino_runner_example) +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED ON) +set(CMAKE_CXX_EXTENSIONS OFF) + # Source root directory for executorch. if(NOT EXECUTORCH_ROOT) set(EXECUTORCH_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../..) @@ -26,31 +28,20 @@ if(NOT CMAKE_BUILD_TYPE) endif() set(_common_compile_options -Wno-deprecated-declarations -fPIC) - -# Let files say "include ". set(_common_include_directories ${EXECUTORCH_ROOT}/..) -# -# The `__srcs` lists are defined by including ${EXECUTORCH_SRCS_FILE}. -# -set(EXECUTORCH_SRCS_FILE - "${CMAKE_CURRENT_BINARY_DIR}/../../../build/executorch_srcs.cmake" -) +set(EXECUTORCH_SRCS_FILE "${CMAKE_CURRENT_BINARY_DIR}/../../../build/executorch_srcs.cmake") extract_sources(${EXECUTORCH_SRCS_FILE}) include(${EXECUTORCH_SRCS_FILE}) -set(_openvino_executor_runner__srcs ${CMAKE_CURRENT_LIST_DIR}/../openvino/executor_runner/openvino_executor_runner.cpp) - -# preprocess executor runner src files -list(PREPEND _openvino_executor_runner__srcs - ${CMAKE_CURRENT_LIST_DIR}/../openvino/executor_runner/openvino_executor_runner.cpp +set(_openvino_executor_runner__srcs + ${CMAKE_CURRENT_LIST_DIR}/../openvino/executor_runner/openvino_executor_runner.cpp ) find_package(executorch CONFIG REQUIRED) -target_include_directories(executorch INTERFACE ${_common_include_directories}) -target_compile_options(executorch INTERFACE ${_common_compile_options}) +include_directories(${EXECUTORCH_INCLUDE_DIRS}) -# portable_ops_lib +# Portable Ops Library gen_selected_ops(LIB_NAME "openvino_portable_ops_lib" INCLUDE_ALL_OPS "ON") generate_bindings_for_kernels( LIB_NAME "openvino_portable_ops_lib" FUNCTIONS_YAML @@ -59,43 +50,39 @@ generate_bindings_for_kernels( gen_operators_lib( LIB_NAME "openvino_portable_ops_lib" KERNEL_LIBS portable_kernels DEPS executorch ) -target_compile_options( - openvino_portable_ops_lib INTERFACE -DET_EVENT_TRACER_ENABLED -) -target_include_directories( - openvino_portable_ops_lib PUBLIC ${_common_include_directories} -) - +target_compile_options(openvino_portable_ops_lib INTERFACE -DET_EVENT_TRACER_ENABLED) +target_include_directories(openvino_portable_ops_lib PUBLIC ${_common_include_directories}) -# build executor runner +# Build Executor Runner add_executable(openvino_executor_runner ${_openvino_executor_runner__srcs}) target_include_directories( - openvino_executor_runner PUBLIC ${_common_include_directories} + openvino_executor_runner PUBLIC ${_common_include_directories} ${EXECUTORCH_ROOT}/third-party/gflags/include ) -# Set the path to the library directory +# Set Library Directory set(LIBRARY_DIR "${CMAKE_CURRENT_LIST_DIR}/../../cmake-openvino-out/lib/") +message(STATUS "Library directory path: ${LIBRARY_DIR}") -# List the libraries you want to link (without the 'lib' prefix and file extension) -set(LIBRARIES_TO_LINK ${LIBRARY_DIR}/libopenvino_backend.so - ${LIBRARY_DIR}/libexecutorch.a - ${LIBRARY_DIR}/libexecutorch_core.a - ${EXECUTORCH_ROOT}/third-party/gflags/build/lib/libgflags_nothreads.a - ${LIBRARY_DIR}/libpthreadpool.a - ${LIBRARY_DIR}/libextension_data_loader.a - ${LIBRARY_DIR}/libextension_runner_util.a -) - -# Add the library directory to the link search path -link_directories(${LIBRARY_DIR}) - -# Link all libraries at once -target_link_libraries(openvino_executor_runner PRIVATE ${LIBRARIES_TO_LINK} openvino_portable_ops_lib) +# Locate OpenVINO Backend Library +find_library(OPENVINO_BACKEND_LIB NAMES openvino_backend PATHS ${LIBRARY_DIR} NO_DEFAULT_PATH) +if(NOT OPENVINO_BACKEND_LIB) + message(FATAL_ERROR "OpenVINO backend library not found in ${LIBRARY_DIR}") +endif() -set_target_properties( - openvino_executor_runner PROPERTIES LINK_FLAGS "-Wl,-rpath='$ORIGIN'" +# Link Libraries +target_link_libraries(openvino_executor_runner PRIVATE + ${OPENVINO_BACKEND_LIB} + executorch + executorch_core + openvino_portable_ops_lib + extension_data_loader + extension_runner_util + gflags + pthreadpool ) +# Ensure Proper RPATH Handling +set_target_properties(openvino_executor_runner PROPERTIES INSTALL_RPATH "$ORIGIN") get_filename_component( EXECUTORCH_SOURCE_DIR "${CMAKE_CURRENT_LIST_DIR}/../.." ABSOLUTE diff --git a/examples/openvino/openvino_build.sh b/examples/openvino/openvino_build_example.sh similarity index 95% rename from examples/openvino/openvino_build.sh rename to examples/openvino/openvino_build_example.sh index 0d2703e5646..de6e585a1ab 100755 --- a/examples/openvino/openvino_build.sh +++ b/examples/openvino/openvino_build_example.sh @@ -27,7 +27,7 @@ main() { # Build the project - cmake --build cmake-openvino-out --target install --config Release -j5 + cmake --build cmake-openvino-out --target install --config Release -j$(nproc) ## Build example local example_dir=examples/openvino @@ -41,7 +41,7 @@ main() { -B"${example_build_dir}" \ $EXECUTORCH_ROOT/$example_dir - cmake --build "${example_build_dir}" -j5 + cmake --build "${example_build_dir}" -j$(nproc) # Switch back to the original directory cd - > /dev/null From 2424c72ecbccc4c3d4ee07ba73783bba714f6b1e Mon Sep 17 00:00:00 2001 From: Yamini Nimmagadda Date: Sun, 9 Feb 2025 18:38:23 -0800 Subject: [PATCH 030/188] Added ReadMe for openvino backend --- backends/openvino/README.md | 56 +++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) create mode 100644 backends/openvino/README.md diff --git a/backends/openvino/README.md b/backends/openvino/README.md new file mode 100644 index 00000000000..7ea63af477d --- /dev/null +++ b/backends/openvino/README.md @@ -0,0 +1,56 @@ +# OpenVINO Backend on ExecuTorch +The OpenVINO backend enables optimized execution of deep learning models on Intel hardware, leveraging Intel's [OpenVINO toolkit](https://www.intel.com/content/www/us/en/developer/tools/openvino-toolkit/overview.html) for inference acceleration. + +## Supported Hardware + +OpenVINO backend supports the following hardware: + +- Intel CPUs +- Intel integrated GPUs +- Intel discrete GPUs +- Intel NPUs + +## Build Instructions + +### Prerequisites + +Before you begin, ensure you have openvino installed and configured on your system: + +```bash +git clone -b executorch_ov_backend https://github.com/ynimmaga/openvino +cd openvino +git submodule update --init --recursive +mkdir build +cd build +cmake .. -DCMAKE_BUILD_TYPE=Release -DENABLE_PYTHON=ON -DENABLE_WHEEL=ON +make -j + +cd ../.. +cmake --install build --prefix +cd +source setupvars.sh +``` + +### Setup + +Follow the steps below to setup your build environment: + +1. **Setup ExecuTorch Environment**: Refer to the [Setting up ExecuTorch](https://pytorch.org/executorch/stable/getting-started-setup) guide for detailed instructions on setting up the ExecuTorch environment. + +2. **Setup OpenVINO Backend Environment** +- Install the dependent libs. Ensure that you are inside backends/openvino/ directory + ```bash + pip install -r requirements.txt + ``` + +3. Navigate to `scripts/` directory. + +4. **Build OpenVINO Backend**: Once the prerequisites are in place, run the `openvino_build.sh` script to start the build process, OpenVINO backend will be built under `cmake-openvino-out/backends/openvino/` as `libneuron_backend.so` + + ```bash + ./openvino_build.sh + ``` + +### Run + +Please refer to `executorch/examples/openvino/` for aot optimization and execution examples of various of models. From 11218a357181707f7453598fc3fc81899d6dc7b5 Mon Sep 17 00:00:00 2001 From: Yamini Nimmagadda Date: Mon, 10 Feb 2025 14:55:44 -0800 Subject: [PATCH 031/188] Update README.md for openvino backend --- backends/openvino/README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/backends/openvino/README.md b/backends/openvino/README.md index 7ea63af477d..414c30ecb3c 100644 --- a/backends/openvino/README.md +++ b/backends/openvino/README.md @@ -16,6 +16,9 @@ OpenVINO backend supports the following hardware: Before you begin, ensure you have openvino installed and configured on your system: +## TODO: Update with the openvino commit/Release tag once the changes in OpenVINO are merged +## TODO: Add instructions for support with OpenVINO release package + ```bash git clone -b executorch_ov_backend https://github.com/ynimmaga/openvino cd openvino From f7e03c5f5879686f259507c470a6e00dde8f4b11 Mon Sep 17 00:00:00 2001 From: Yamini Nimmagadda Date: Mon, 10 Feb 2025 15:11:06 -0800 Subject: [PATCH 032/188] Added directory structure for openvino backend in ReadMe --- backends/openvino/README.md | 33 ++++++++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/backends/openvino/README.md b/backends/openvino/README.md index 414c30ecb3c..31d1c1b1c4a 100644 --- a/backends/openvino/README.md +++ b/backends/openvino/README.md @@ -1,4 +1,4 @@ -# OpenVINO Backend on ExecuTorch +# OpenVINO Backend for ExecuTorch The OpenVINO backend enables optimized execution of deep learning models on Intel hardware, leveraging Intel's [OpenVINO toolkit](https://www.intel.com/content/www/us/en/developer/tools/openvino-toolkit/overview.html) for inference acceleration. ## Supported Hardware @@ -10,6 +10,37 @@ OpenVINO backend supports the following hardware: - Intel discrete GPUs - Intel NPUs +## Directory Structure + +``` +executorch +├── backends +│ └── openvino +│ ├── runtime +│ ├── OpenvinoBackend.cpp +│ └── OpenvinoBackend.hpp +│ ├── scripts +│ └── openvino_build.sh +│ ├── tests +│ ├── CMakeLists.txt +│ ├── README.md +│ ├── __init__.py +│ ├── openvino_functions.yaml +│ ├── partitioner.py +│ ├── preprocess.py +│ └── requirements.txt +└── examples +│ └── openvino +│ ├── aot +│ ├── README.md +│ └── aot_openvino_compiler.py +│ └── executor_runner +│ └── openvino_executor_runner.cpp +│ ├── CMakeLists.txt +│ ├── README.md +└── └── openvino_build_example.sh +``` + ## Build Instructions ### Prerequisites From b49e43988868317a90d3418a5b58365ec62fd5ea Mon Sep 17 00:00:00 2001 From: Yamini Nimmagadda Date: Mon, 10 Feb 2025 15:24:51 -0800 Subject: [PATCH 033/188] Minor updates for README.md --- backends/openvino/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backends/openvino/README.md b/backends/openvino/README.md index 31d1c1b1c4a..06cce4a6211 100644 --- a/backends/openvino/README.md +++ b/backends/openvino/README.md @@ -87,4 +87,4 @@ Follow the steps below to setup your build environment: ### Run -Please refer to `executorch/examples/openvino/` for aot optimization and execution examples of various of models. +Please refer to [README.md](../../examples/openvino/README.md) for instructions on running examples of various of models with openvino backend. From d122f51ea2da5d4cc87f356aea57e178387296ee Mon Sep 17 00:00:00 2001 From: Yamini Nimmagadda Date: Mon, 10 Feb 2025 17:43:36 -0800 Subject: [PATCH 034/188] Created README.md for examples --- examples/openvino/README.md | 85 +++++++++++++++++++++++++++++++++++++ 1 file changed, 85 insertions(+) create mode 100644 examples/openvino/README.md diff --git a/examples/openvino/README.md b/examples/openvino/README.md new file mode 100644 index 00000000000..10d8c3343cf --- /dev/null +++ b/examples/openvino/README.md @@ -0,0 +1,85 @@ +# OpenVINO Backend Examples + +This guide provides detailed instructions on how to export models for Executorch and execute them on the OpenVINO backend. The examples demonstrate how to export a model, load a model, prepare input tensors, execute inference, and save the output results. + +## Directory Structure + +Below is the layout of the `examples/openvino` directory, which includes the necessary files for the example applications: + +``` +examples/openvino +├── aot # Directory with scripts and instructions for AoT export + ├── README.md # Instructions to export models to '.pte' + └── aot_openvino_compiler.py # Example script for AoT export +├── executor_runner # Directory with examples for C++ execution + └── openvino_executor_runner.cpp # Example C++ file for execution +├── CMakeLists.txt # CMake build configuration to build examples +├── README.md # Documentation for examples (this file) +└── openvino_build_example.sh # Script to build examples for openvino backend +``` + +# Build Instructions for Examples + +## Environment Setup +Follow the [instructions](../../backends/openvino/README.md) of **Prerequisites** and **Setup** in `backends/openvino/README.md` to set up the OpenVINO backend. + +## AOT step: +Refer to the [README.md](aot/README.md) in the `aot` folder for detailed instructions on exporting deep learning models from various model suites (TIMM, Torchvision, Hugging Face) to openvino backend using Executorch. Users can dynamically specify the model, input shape, and target device. + +Below is an example to export a ResNet50 model from Torchvision model suite for CPU device with an input shape of `[1, 3, 256, 256]` + +```bash +cd aot +python aot_openvino_compiler.py --suite torchvision --model resnet50 --input_shape "(1, 3, 256, 256)" --device CPU +``` +The exported model will be saved as 'resnet50.pte' in the current directory. + +## Build OpenVINO Examples +Build the backend and the examples by executing the script: +```bash +./openvino_build_example.sh +``` +The executable is saved in `/cmake-openvino-out/examples/openvino/` + +### Run the example + +Now, run the example using the executable generated in the above step. The executable requires a model file (`.pte` file generated in the aot step), number of inference iterations, and optional input/output paths. + +#### Command Syntax: + +``` +cd ../../cmake-openvino-out/examples/openvino + +./openvino_executor_runner \ + --model_path= \ + --num_iter= \ + [--input_list_path=] \ + [--output_folder_path=] +``` +#### Command-Line Arguments + +- `--model_path`: (Required) Path to the model serialized in `.pte` format. +- `--num_iter`: (Optional) Number of times to run inference (default: 1). +- `--input_list_path`: (Optional) Path to a file containing the list of raw input tensor files. +- `--output_folder_path`: (Optional) Path to a folder where output tensor files will be saved. + +#### Example Usage + +Run inference with a given model for 10 iterations and save outputs: + +``` +./openvino_executor_runner \ + --model_path=model.pte \ + --num_iter=10 \ + --output_folder_path=outputs/ +``` + +Run inference with an input tensor file: + +``` +./openvino_executor_runner \ + --model_path=model.pte \ + --num_iter=5 \ + --input_list_path=input_list.txt \ + --output_folder_path=outputs/ +``` From aad2ac790790694ca842870567f88f39264847ea Mon Sep 17 00:00:00 2001 From: ynimmaga Date: Mon, 10 Feb 2025 18:04:26 -0800 Subject: [PATCH 035/188] delete old Readme file for openvino examples --- examples/openvino/ReadMe.md | 64 ------------------------------------- 1 file changed, 64 deletions(-) delete mode 100644 examples/openvino/ReadMe.md diff --git a/examples/openvino/ReadMe.md b/examples/openvino/ReadMe.md deleted file mode 100644 index 13196f5151c..00000000000 --- a/examples/openvino/ReadMe.md +++ /dev/null @@ -1,64 +0,0 @@ -# TODO: Delete and reformat later - -## Build Executorch - -```bash -git clone -b openvino_backend https://github.com/ynimmaga/executorch -cd executorch -git submodule update --init –recursive -./install_requirements.sh -(If not successful) pkill -f buck && ./install_requirements.sh -``` - -## Build OpenVINO and source environment variables: - -```bash -git clone -b executorch_ov_backend https://github.com/ynimmaga/openvino -cd openvino -git submodule update --init --recursive -mkdir build -cd build -cmake .. -DCMAKE_BUILD_TYPE=Release -DENABLE_PYTHON=ON -DENABLE_WHEEL=ON -make -j -cd wheels -pip install - -cd ../.. -cmake --install build --prefix -cd -source setupvars.sh -``` - -## Build gflags: - -```bash -cd third-party/gflags -mkdir build -cd build -cmake .. -make -j12 -``` - -## Build OpenVINO example: - -```bash -cd ../../../examples/openvino -./openvino_build.sh -``` - -### AOT step: -```bash -cd aot -python aot_openvino_compiler.py --suite torchvision --model resnet50 --input_shape "(1, 3, 256, 256)" --device CPU -``` - -### Update the model.pte in executorch example and rebuild -```bash -cd -cd examples/openvino/executor_runner -Update the path of model.pte in openvino_executor_runner.cpp at https://github.com/ynimmaga/executorch/blob/openvino_backend/examples/openvino/executor_runner/openvino_executor_runner.cpp#L20 - -Rebuild the example using “./openvino_build.sh” -The executable is in /cmake-openvino-out/examples/openvino -./openvino_executor_runner -``` From 050259bc612a8bc63f0fb0e38bbfaf8993d630fe Mon Sep 17 00:00:00 2001 From: Yamini Nimmagadda Date: Mon, 10 Feb 2025 20:28:27 -0800 Subject: [PATCH 036/188] Added OpenVINO tutorial in docs --- docs/source/build-run-openvino.md | 202 ++++++++++++++++++++++++++++++ 1 file changed, 202 insertions(+) create mode 100644 docs/source/build-run-openvino.md diff --git a/docs/source/build-run-openvino.md b/docs/source/build-run-openvino.md new file mode 100644 index 00000000000..ec3330d94cb --- /dev/null +++ b/docs/source/build-run-openvino.md @@ -0,0 +1,202 @@ +# Building and Running ExecuTorch with OpenVINO Backend + +In this tutorial we will walk you through the process of setting up the prerequisites, building OpenVINO backend library, exporting `.pte` models with OpenVINO optimizations, and executing the exported models on Intel hardware. + + +::::{grid} 2 +:::{grid-item-card} What you will learn in this tutorial: +:class-card: card-prerequisites +* In this tutorial you will learn how to lower and deploy a model with OpenVINO. +::: +:::{grid-item-card} Tutorials we recommend you complete before this: +:class-card: card-prerequisites +* [Introduction to ExecuTorch](intro-how-it-works.md) +* [Setting up ExecuTorch](getting-started-setup.md) +* [Building ExecuTorch with CMake](runtime-build-and-cross-compilation.md) +::: +:::: + +## Introduction to OpenVINO + +[OpenVINO](https://www.intel.com/content/www/us/en/developer/tools/openvino-toolkit/overview.html) is an open-source toolkit designed to enhance AI inference on Intel hardware by reducing latency and increasing throughput while preserving accuracy. It optimizes hardware utilization and simplifies AI development and deep learning integration across domains such as computer vision, large language models (LLMs), and generative AI. + +OpenVINO is integrated as an Executorch delegate to accelerate AI applications deployed with Executorch APIs. + +## Supported Hardware + +OpenVINO backend supports the following hardware: + +- Intel CPUs +- Intel integrated GPUs +- Intel discrete GPUs +- Intel NPUs + +## Directory Structure + +``` +executorch +├── backends +│ └── openvino +│ ├── runtime +│ ├── OpenvinoBackend.cpp +│ └── OpenvinoBackend.hpp +│ ├── scripts +│ └── openvino_build.sh +│ ├── tests +│ ├── CMakeLists.txt +│ ├── README.md +│ ├── __init__.py +│ ├── openvino_functions.yaml +│ ├── partitioner.py +│ ├── preprocess.py +│ └── requirements.txt +└── examples +│ └── openvino +│ ├── aot +│ ├── README.md +│ └── aot_openvino_compiler.py +│ └── executor_runner +│ └── openvino_executor_runner.cpp +│ ├── CMakeLists.txt +│ ├── README.md +└── └── openvino_build_example.sh +``` + +## Instructions for Building OpenVINO Backend + +### Prerequisites + +Before you begin, ensure you have openvino installed and configured on your system: + +#### TODO: Update with the openvino commit/Release tag once the changes in OpenVINO are merged +#### TODO: Add instructions for support with OpenVINO release package + +```bash +git clone -b executorch_ov_backend https://github.com/ynimmaga/openvino +cd openvino +git submodule update --init --recursive +mkdir build +cd build +cmake .. -DCMAKE_BUILD_TYPE=Release -DENABLE_PYTHON=ON -DENABLE_WHEEL=ON +make -j + +cd ../.. +cmake --install build --prefix +cd +source setupvars.sh +``` + +### Setup + +Follow the steps below to setup your build environment: + +1. **Setup ExecuTorch Environment**: Refer to the [Setting up ExecuTorch](https://pytorch.org/executorch/stable/getting-started-setup) guide for detailed instructions on setting up the ExecuTorch environment. + +2. **Setup OpenVINO Backend Environment** +- Install the dependent libs. Ensure that you are inside backends/openvino/ directory + ```bash + pip install -r requirements.txt + ``` + +3. Navigate to `scripts/` directory. + +4. **Build OpenVINO Backend**: Once the prerequisites are in place, run the `openvino_build.sh` script to start the build process, OpenVINO backend will be built under `cmake-openvino-out/backends/openvino/` as `libneuron_backend.so` + + ```bash + ./openvino_build.sh + ``` + +## Build Instructions for Examples + +### AOT step: +Refer to the [README.md](aot/README.md) in the `aot` folder for detailed instructions on exporting deep learning models from various model suites (TIMM, Torchvision, Hugging Face) to openvino backend using Executorch. Users can dynamically specify the model, input shape, and target device. + +Below is an example to export a ResNet50 model from Torchvision model suite for CPU device with an input shape of `[1, 3, 256, 256]` + +```bash +cd aot +python aot_openvino_compiler.py --suite torchvision --model resnet50 --input_shape "(1, 3, 256, 256)" --device CPU +``` +The exported model will be saved as 'resnet50.pte' in the current directory. + +#### **Arguments** +- **`--suite`** (required): + Specifies the model suite to use. + Supported values: + - `timm` (e.g., VGG16, ResNet50) + - `torchvision` (e.g., resnet18, mobilenet_v2) + - `huggingface` (e.g., bert-base-uncased) + +- **`--model`** (required): + Name of the model to export. + Examples: + - For `timm`: `vgg16`, `resnet50` + - For `torchvision`: `resnet18`, `mobilenet_v2` + - For `huggingface`: `bert-base-uncased`, `distilbert-base-uncased` + +- **`--input_shape`** (required): + Input shape for the model. Provide this as a **list** or **tuple**. + Examples: + - `[1, 3, 224, 224]` (Zsh users: wrap in quotes) + - `(1, 3, 224, 224)` + +- **`--device`** (optional): + Target device for the compiled model. Default is `CPU`. + Examples: `CPU`, `GPU` + +### Build C++ OpenVINO Examples +Build the backend and the examples by executing the script: +```bash +./openvino_build_example.sh +``` +The executable is saved in `/cmake-openvino-out/examples/openvino/` + +Now, run the example using the executable generated in the above step. The executable requires a model file (`.pte` file generated in the aot step), number of inference iterations, and optional input/output paths. + +#### Command Syntax: + +``` +cd ../../cmake-openvino-out/examples/openvino + +./openvino_executor_runner \ + --model_path= \ + --num_iter= \ + [--input_list_path=] \ + [--output_folder_path=] +``` +#### Command-Line Arguments + +- `--model_path`: (Required) Path to the model serialized in `.pte` format. +- `--num_iter`: (Optional) Number of times to run inference (default: 1). +- `--input_list_path`: (Optional) Path to a file containing the list of raw input tensor files. +- `--output_folder_path`: (Optional) Path to a folder where output tensor files will be saved. + +#### Example Usage + +Run inference with a given model for 10 iterations and save outputs: + +``` +./openvino_executor_runner \ + --model_path=model.pte \ + --num_iter=10 \ + --output_folder_path=outputs/ +``` + +Run inference with an input tensor file: + +``` +./openvino_executor_runner \ + --model_path=model.pte \ + --num_iter=5 \ + --input_list_path=input_list.txt \ + --output_folder_path=outputs/ +``` + +## Supported model list + +### TODO + +## FAQ + +If you encounter any issues while reproducing the tutorial, please file a github +issue on ExecuTorch repo and tag use `#openvino` tag From 81460f395ccc351411c5d441b235f392125f1a80 Mon Sep 17 00:00:00 2001 From: ynimmaga Date: Tue, 11 Feb 2025 14:46:07 -0800 Subject: [PATCH 037/188] Updated cmake and build scripts to link against gflags --- backends/openvino/scripts/openvino_build.sh | 1 + examples/openvino/CMakeLists.txt | 12 +++++++++--- examples/openvino/openvino_build_example.sh | 1 + 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/backends/openvino/scripts/openvino_build.sh b/backends/openvino/scripts/openvino_build.sh index 0c07a5bb729..2a8a25511ac 100755 --- a/backends/openvino/scripts/openvino_build.sh +++ b/backends/openvino/scripts/openvino_build.sh @@ -18,6 +18,7 @@ main() { # Configure the project with CMake # Note: Add any additional configuration options you need here cmake -DCMAKE_INSTALL_PREFIX="${build_dir}" \ + -DCMAKE_BUILD_TYPE=Release \ -DEXECUTORCH_BUILD_OPENVINO=ON \ -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \ -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \ diff --git a/examples/openvino/CMakeLists.txt b/examples/openvino/CMakeLists.txt index 761de51cf28..4a1917fa3af 100644 --- a/examples/openvino/CMakeLists.txt +++ b/examples/openvino/CMakeLists.txt @@ -56,11 +56,11 @@ target_include_directories(openvino_portable_ops_lib PUBLIC ${_common_include_di # Build Executor Runner add_executable(openvino_executor_runner ${_openvino_executor_runner__srcs}) target_include_directories( - openvino_executor_runner PUBLIC ${_common_include_directories} ${EXECUTORCH_ROOT}/third-party/gflags/include + openvino_executor_runner PUBLIC ${_common_include_directories} ${EXECUTORCH_ROOT}/cmake-openvino-out/third-party/gflags/include ) # Set Library Directory -set(LIBRARY_DIR "${CMAKE_CURRENT_LIST_DIR}/../../cmake-openvino-out/lib/") +set(LIBRARY_DIR "${CMAKE_CURRENT_LIST_DIR}/../../cmake-openvino-out/lib/;${CMAKE_CURRENT_LIST_DIR}/../../cmake-openvino-out/third-party/gflags") message(STATUS "Library directory path: ${LIBRARY_DIR}") # Locate OpenVINO Backend Library @@ -69,15 +69,21 @@ if(NOT OPENVINO_BACKEND_LIB) message(FATAL_ERROR "OpenVINO backend library not found in ${LIBRARY_DIR}") endif() +# Locate OpenVINO Backend Library +find_library(GFLAGS_LIB NAMES gflags_nothreads PATHS ${LIBRARY_DIR} NO_DEFAULT_PATH) +if(NOT GFLAGS_LIB) + message(FATAL_ERROR "Gflags library not found in ${LIBRARY_DIR}") +endif() + # Link Libraries target_link_libraries(openvino_executor_runner PRIVATE ${OPENVINO_BACKEND_LIB} + ${GFLAGS_LIB} executorch executorch_core openvino_portable_ops_lib extension_data_loader extension_runner_util - gflags pthreadpool ) diff --git a/examples/openvino/openvino_build_example.sh b/examples/openvino/openvino_build_example.sh index de6e585a1ab..ee16658941d 100755 --- a/examples/openvino/openvino_build_example.sh +++ b/examples/openvino/openvino_build_example.sh @@ -18,6 +18,7 @@ main() { # Configure the project with CMake # Note: Add any additional configuration options you need here cmake -DCMAKE_INSTALL_PREFIX="${build_dir}" \ + -DCMAKE_BUILD_TYPE=Release \ -DEXECUTORCH_BUILD_OPENVINO=ON \ -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \ -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \ From 7139bac7c9d93cad10e6a7645eee33dcd98fcb32 Mon Sep 17 00:00:00 2001 From: Yamini Nimmagadda Date: Tue, 11 Feb 2025 19:02:12 -0800 Subject: [PATCH 038/188] Update backends/openvino/README.md Co-authored-by: Mustafa Cavus --- backends/openvino/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backends/openvino/README.md b/backends/openvino/README.md index 06cce4a6211..a1eb5a47b75 100644 --- a/backends/openvino/README.md +++ b/backends/openvino/README.md @@ -59,7 +59,7 @@ cd build cmake .. -DCMAKE_BUILD_TYPE=Release -DENABLE_PYTHON=ON -DENABLE_WHEEL=ON make -j -cd ../.. +cd .. cmake --install build --prefix cd source setupvars.sh From 58152a2c324075825fa1653116cb1668d77f2232 Mon Sep 17 00:00:00 2001 From: Yamini Nimmagadda Date: Tue, 11 Feb 2025 19:02:48 -0800 Subject: [PATCH 039/188] remove enable_wheel from openvino build instructions Co-authored-by: Mustafa Cavus --- backends/openvino/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backends/openvino/README.md b/backends/openvino/README.md index a1eb5a47b75..7e627ddd33d 100644 --- a/backends/openvino/README.md +++ b/backends/openvino/README.md @@ -56,7 +56,7 @@ cd openvino git submodule update --init --recursive mkdir build cd build -cmake .. -DCMAKE_BUILD_TYPE=Release -DENABLE_PYTHON=ON -DENABLE_WHEEL=ON +cmake .. -DCMAKE_BUILD_TYPE=Release -DENABLE_PYTHON=ON make -j cd .. From 541e44da41e3798ea0e52e141d39965696d0c86f Mon Sep 17 00:00:00 2001 From: Yamini Nimmagadda Date: Tue, 11 Feb 2025 19:14:06 -0800 Subject: [PATCH 040/188] Update backends/openvino/README.md Co-authored-by: Mustafa Cavus --- backends/openvino/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backends/openvino/README.md b/backends/openvino/README.md index 7e627ddd33d..bb2b1459282 100644 --- a/backends/openvino/README.md +++ b/backends/openvino/README.md @@ -79,7 +79,7 @@ Follow the steps below to setup your build environment: 3. Navigate to `scripts/` directory. -4. **Build OpenVINO Backend**: Once the prerequisites are in place, run the `openvino_build.sh` script to start the build process, OpenVINO backend will be built under `cmake-openvino-out/backends/openvino/` as `libneuron_backend.so` +4. **Build OpenVINO Backend**: Once the prerequisites are in place, run the `openvino_build.sh` script to start the build process, OpenVINO backend will be built under `cmake-openvino-out/backends/openvino/` as `libopenvino_backend.so` ```bash ./openvino_build.sh From 99681789a6baa3271fd33cb986e61be6be6b1833 Mon Sep 17 00:00:00 2001 From: Yamini Nimmagadda Date: Tue, 11 Feb 2025 19:56:00 -0800 Subject: [PATCH 041/188] Updated executorch environment setup in README.md --- backends/openvino/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/backends/openvino/README.md b/backends/openvino/README.md index bb2b1459282..4141588aeae 100644 --- a/backends/openvino/README.md +++ b/backends/openvino/README.md @@ -69,10 +69,10 @@ source setupvars.sh Follow the steps below to setup your build environment: -1. **Setup ExecuTorch Environment**: Refer to the [Setting up ExecuTorch](https://pytorch.org/executorch/stable/getting-started-setup) guide for detailed instructions on setting up the ExecuTorch environment. +1. **Setup ExecuTorch Environment**: Refer to the [Environment Setup](https://pytorch.org/executorch/stable/getting-started-setup#environment-setup) guide for detailed instructions on setting up the ExecuTorch environment. 2. **Setup OpenVINO Backend Environment** -- Install the dependent libs. Ensure that you are inside backends/openvino/ directory +- Install the dependent libs. Ensure that you are inside `executorch/backends/openvino/` directory ```bash pip install -r requirements.txt ``` From 32e7cc792c7ecc827e6f861c344a960a30f87c17 Mon Sep 17 00:00:00 2001 From: Yamini Nimmagadda Date: Tue, 11 Feb 2025 19:59:08 -0800 Subject: [PATCH 042/188] Update environment setup in build-run-openvino.md --- docs/source/build-run-openvino.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/source/build-run-openvino.md b/docs/source/build-run-openvino.md index ec3330d94cb..08882c448a7 100644 --- a/docs/source/build-run-openvino.md +++ b/docs/source/build-run-openvino.md @@ -90,17 +90,17 @@ source setupvars.sh Follow the steps below to setup your build environment: -1. **Setup ExecuTorch Environment**: Refer to the [Setting up ExecuTorch](https://pytorch.org/executorch/stable/getting-started-setup) guide for detailed instructions on setting up the ExecuTorch environment. +1. **Setup ExecuTorch Environment**: Refer to the [Environment Setup](https://pytorch.org/executorch/stable/getting-started-setup#environment-setup) guide for detailed instructions on setting up the ExecuTorch environment. 2. **Setup OpenVINO Backend Environment** -- Install the dependent libs. Ensure that you are inside backends/openvino/ directory +- Install the dependent libs. Ensure that you are inside `executorch/backends/openvino/` directory ```bash pip install -r requirements.txt ``` 3. Navigate to `scripts/` directory. -4. **Build OpenVINO Backend**: Once the prerequisites are in place, run the `openvino_build.sh` script to start the build process, OpenVINO backend will be built under `cmake-openvino-out/backends/openvino/` as `libneuron_backend.so` +4. **Build OpenVINO Backend**: Once the prerequisites are in place, run the `openvino_build.sh` script to start the build process, OpenVINO backend will be built under `cmake-openvino-out/backends/openvino/` as `libopenvino_backend.so` ```bash ./openvino_build.sh From 09a4ffb67bbcdfb5345c3268f14cd60630e45b08 Mon Sep 17 00:00:00 2001 From: Yamini Nimmagadda Date: Tue, 11 Feb 2025 20:02:39 -0800 Subject: [PATCH 043/188] Added OpenVINO in the main README.md --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index d722887eff7..3dda2a1a452 100644 --- a/README.md +++ b/README.md @@ -29,6 +29,7 @@ Platform Support: - Arm - Cadence - MediaTek + - OpenVINO - Qualcomm - Vulkan - XNNPACK From 5d2784d9a76cbf2dd5ad5ec5e116b9e9ceccfa1f Mon Sep 17 00:00:00 2001 From: Aleksandr Suslov Date: Wed, 5 Feb 2025 10:47:24 +0400 Subject: [PATCH 044/188] added init integration of quantization --- backends/openvino/__init__.py | 3 +- backends/openvino/quantizer/__init__.py | 3 + backends/openvino/quantizer/quantizer.py | 309 ++++++++++++++++++ backends/openvino/requirements.txt | 1 + examples/openvino/CMakeLists.txt | 1 + examples/openvino/aot/README.md | 7 + .../openvino/aot/aot_openvino_compiler.py | 71 +++- examples/openvino/openvino_build_example.sh | 1 + 8 files changed, 392 insertions(+), 4 deletions(-) create mode 100644 backends/openvino/quantizer/__init__.py create mode 100644 backends/openvino/quantizer/quantizer.py diff --git a/backends/openvino/__init__.py b/backends/openvino/__init__.py index dac275d3f12..4a69f6b75ff 100644 --- a/backends/openvino/__init__.py +++ b/backends/openvino/__init__.py @@ -1,4 +1,5 @@ from .partitioner import OpenvinoPartitioner from .preprocess import OpenvinoBackend +from .quantizer.quantizer import OpenVINOQuantizer -__all__ = [OpenvinoBackend, OpenvinoPartitioner] +__all__ = [OpenvinoBackend, OpenvinoPartitioner, OpenVINOQuantizer] diff --git a/backends/openvino/quantizer/__init__.py b/backends/openvino/quantizer/__init__.py new file mode 100644 index 00000000000..03ea98e2c5b --- /dev/null +++ b/backends/openvino/quantizer/__init__.py @@ -0,0 +1,3 @@ +from .quantizer import OpenVINOQuantizer + +__all__ = [OpenVINOQuantizer] diff --git a/backends/openvino/quantizer/quantizer.py b/backends/openvino/quantizer/quantizer.py new file mode 100644 index 00000000000..58fde3e23f1 --- /dev/null +++ b/backends/openvino/quantizer/quantizer.py @@ -0,0 +1,309 @@ +# Copyright (c) 2025 Intel Corporation +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from collections import defaultdict +from typing import Dict, List, Optional, Tuple, Union + +import torch.fx +from torch.ao.quantization.observer import HistogramObserver +from torch.ao.quantization.observer import PerChannelMinMaxObserver +from torch.ao.quantization.quantizer.quantizer import EdgeOrNode +from torch.ao.quantization.quantizer.quantizer import QuantizationAnnotation +from torch.ao.quantization.quantizer.quantizer import QuantizationSpec +from torch.ao.quantization.quantizer.quantizer import QuantizationSpecBase +from torch.ao.quantization.quantizer.quantizer import Quantizer +from torch.ao.quantization.quantizer.quantizer import SharedQuantizationSpec + +import nncf +from nncf.common.graph.graph import NNCFGraph +from nncf.common.logging import nncf_logger +from nncf.common.quantization.quantizer_propagation.solver import QuantizerPropagationRule +from nncf.common.quantization.quantizer_setup import QuantizationPointBase +from nncf.common.quantization.quantizer_setup import SingleConfigQuantizerSetup +from nncf.common.quantization.structs import QuantizationPreset +from nncf.common.quantization.structs import QuantizationScheme +from nncf.experimental.torch.fx.nncf_graph_builder import GraphConverter +from nncf.experimental.torch.fx.node_utils import get_graph_node_by_name +from nncf.experimental.torch.fx.transformations import fold_constant_except_qdq +from nncf.parameters import ModelType +from nncf.parameters import QuantizationMode +from nncf.parameters import TargetDevice +from nncf.quantization.advanced_parameters import FP8QuantizationParameters +from nncf.quantization.advanced_parameters import OverflowFix +from nncf.quantization.advanced_parameters import QuantizationParameters +from nncf.quantization.algorithms.min_max.algorithm import MinMaxQuantization +from nncf.scopes import IgnoredScope +from nncf.torch.model_graph_manager import get_weight_tensor_port_ids + +QUANT_ANNOTATION_KEY = "quantization_annotation" + + +class OpenVINOQuantizer(Quantizer): + """ + Implementation of the Torch AO quantizer which annotates models with quantization annotations + optimally for the inference via OpenVINO. + """ + + def __init__( + self, + *, + mode: Optional[QuantizationMode] = None, + preset: Optional[QuantizationPreset] = None, + target_device: TargetDevice = TargetDevice.ANY, + model_type: Optional[ModelType] = None, + ignored_scope: Optional[IgnoredScope] = None, + overflow_fix: Optional[OverflowFix] = None, + quantize_outputs: bool = False, + activations_quantization_params: Optional[Union[QuantizationParameters, FP8QuantizationParameters]] = None, + weights_quantization_params: Optional[Union[QuantizationParameters, FP8QuantizationParameters]] = None, + quantizer_propagation_rule: QuantizerPropagationRule = QuantizerPropagationRule.MERGE_ALL_IN_ONE, + ): + """ + :param mode: Defines optimization mode for the algorithm. None by default. + :param preset: A preset controls the quantization mode (symmetric and asymmetric). + It can take the following values: + - `performance`: Symmetric quantization of weights and activations. + - `mixed`: Symmetric quantization of weights and asymmetric quantization of activations. + Default value is None. In this case, `mixed` preset is used for `transformer` + model type otherwise `performance`. + :param target_device: A target device the specificity of which will be taken + into account while compressing in order to obtain the best performance + for this type of device, defaults to TargetDevice.ANY. + :param model_type: Model type is needed to specify additional patterns + in the model. Supported only `transformer` now. + :param ignored_scope: An ignored scope that defined the list of model control + flow graph nodes to be ignored during quantization. + :param overflow_fix: This option controls whether to apply the overflow issue + fix for the 8-bit quantization. + :param quantize_outputs: Whether to insert additional quantizers right before + each of the model outputs. + :param activations_quantization_params: Quantization parameters for model + activations. + :param weights_quantization_params: Quantization parameters for model weights. + :param quantizer_propagation_rule: The strategy to be used while propagating and merging quantizers. + MERGE_ALL_IN_ONE by default. + """ + self._min_max_algo = MinMaxQuantization( + mode=mode, + preset=preset, + target_device=target_device, + model_type=model_type, + ignored_scope=ignored_scope, + overflow_fix=overflow_fix, + quantize_outputs=quantize_outputs, + activations_quantization_params=activations_quantization_params, + weights_quantization_params=weights_quantization_params, + quantizer_propagation_rule=quantizer_propagation_rule, + ) + + def get_quantization_setup(self, model: torch.fx.GraphModule, nncf_graph: NNCFGraph) -> SingleConfigQuantizerSetup: + self._min_max_algo._set_backend_entity(model) + return self._min_max_algo.find_quantization_setup(model, nncf_graph) + + def annotate(self, model: torch.fx.GraphModule) -> torch.fx.GraphModule: + nncf_graph = GraphConverter.create_nncf_graph(model) + quantization_setup = self.get_quantization_setup(model, nncf_graph) + + graph = model.graph + node_vs_torch_annotation = defaultdict(QuantizationAnnotation) + + for qp in quantization_setup.quantization_points.values(): + edge_or_node, annotation = self._get_edge_or_node_and_annotation( + graph, nncf_graph, qp, node_vs_torch_annotation + ) + qspec = self._get_torch_ao_qspec_from_qp(qp) + self._fill_torch_ao_annotation(edge_or_node, qspec, annotation) + + for quantizer_ids in quantization_setup.unified_scale_groups.values(): + + root_quantizer_id = self._get_unified_scales_root_quantizer_id( + nncf_graph, quantizer_ids, quantization_setup + ) + root_qp = quantization_setup.quantization_points[root_quantizer_id] + + if any(root_qp.qconfig != quantization_setup.quantization_points[q_id].qconfig for q_id in quantizer_ids): + qps = [quantization_setup.quantization_points[q_id] for q_id in quantizer_ids] + msg = ( + "Different quantization configs are set to one unified scale group:" + f"{[(qp.insertion_point.__dict__, str(qp.qconfig)) for qp in qps]}" + ) + raise nncf.InternalError(msg) + + root_target_node = get_graph_node_by_name(graph, root_qp.insertion_point.target_node_name) + root_edge_or_node = self._get_edge_or_node(root_target_node, root_qp, nncf_graph) + + for quantizer_id in quantizer_ids: + if quantizer_id == root_quantizer_id: + continue + + qspec = SharedQuantizationSpec(root_edge_or_node) + qp = quantization_setup.quantization_points[quantizer_id] + edge_or_node, annotation = self._get_edge_or_node_and_annotation( + graph, nncf_graph, qp, node_vs_torch_annotation + ) + self._fill_torch_ao_annotation(edge_or_node, qspec, annotation) + + for node, annotation in node_vs_torch_annotation.items(): + assert QUANT_ANNOTATION_KEY not in node.meta + node.meta[QUANT_ANNOTATION_KEY] = annotation + + @staticmethod + def _get_unified_scales_root_quantizer_id( + nncf_graph: NNCFGraph, quantizer_ids: List[int], quantizer_setup: SingleConfigQuantizerSetup + ) -> int: + """ + Identifies the earliest quantizer node ID based on the corresponding `nncf_node.node_id` + in the given NNCFGraph. This is required by the `_get_obs_or_fq_map` function. + Refer to: https://github.com/pytorch/pytorch/blob/main/torch/ao/quantization/pt2e/prepare.py#L291 + + :param nncf_graph: The NNCFGraph instance. + :param quantizer_ids: The list of quantizer IDs to evaluate. + :param quantizer_setup: The instance of SingleConfigQuantizerSetup. + :return: The ID of the earliest quantizer node in terms of `nncf_node.node_id`. + """ + nncf_node_quantizer_id = None + root_quantizer_id = None + for quantizer_id in quantizer_ids: + target_node_name = quantizer_setup.quantization_points[quantizer_id].insertion_point.target_node_name + nncf_node = nncf_graph.get_node_by_name(target_node_name) + if nncf_node_quantizer_id is None or nncf_node.node_id < nncf_node_quantizer_id: + root_quantizer_id = quantizer_id + nncf_node_quantizer_id = nncf_node.node_id + return root_quantizer_id + + @staticmethod + def _get_edge_or_node_and_annotation( + graph: torch.fx.Graph, + nncf_graph: NNCFGraph, + qp: QuantizationPointBase, + node_vs_torch_annotation: Dict[torch.fx.Node, QuantizationAnnotation], + ) -> Tuple[EdgeOrNode, QuantizationAnnotation]: + """ + Retrieves the edge or node and its corresponding QuantizationAnnotation based on the given graph, + quantization point, and node-to-annotation mapping. + + :param graph: torch.fx.Graph instance. + :param nncf_graph: NNCFGraph instance. + :param qp: QuantizationPointBase instance. + :param node_vs_torch_annotation: A dictionary mapping torch.fx.GraphNode objects to their respective + QuantizationAnnotations. + :return: A tuple containing the EdgeOrNode and its associated QuantizationAnnotation. + """ + target_node = get_graph_node_by_name(graph, qp.insertion_point.target_node_name) + annotation = node_vs_torch_annotation[target_node] + edge_or_node = OpenVINOQuantizer._get_edge_or_node(target_node, qp, nncf_graph) + return edge_or_node, annotation + + @staticmethod + def _get_edge_or_node(target_node: torch.fx.Node, qp: QuantizationPointBase, nncf_graph: NNCFGraph) -> EdgeOrNode: + """ + Returns the edge or node based on the given target node and quantization point. + + :param target_node: Target node instance. + :param qp: QuantizationPointBase instance. + :param graph: NNCFGraph instance. + :return: The corresponding EdgeOrNode derived from the target node and quantization point. + """ + ip = qp.insertion_point + if qp.is_weight_quantization_point(): + nncf_node = nncf_graph.get_node_by_name(target_node.name) + weights_ports_ids = get_weight_tensor_port_ids(nncf_node, nncf_graph) + if len(weights_ports_ids) > 1: + # TODO(dlyakhov): support quantization for nodes with several weights + nncf_logger.warning( + f"Quantization of the weighted node {target_node.name}" + " is not yet supported by the OpenVINOQuantizer." + f" Only the weight on port ID {weights_ports_ids[0]} will be quantized." + f" Quantizable weights are located on ports: {weights_ports_ids}." + ) + weight_node = target_node.all_input_nodes[weights_ports_ids[0]] + return (weight_node, target_node) + + if ip.input_port_id is None: + return target_node + + node = target_node.all_input_nodes[ip.input_port_id] + return (node, target_node) + + @staticmethod + def _fill_torch_ao_annotation( + edge_or_node: EdgeOrNode, + qspec: QuantizationSpecBase, + annotation_to_update: QuantizationAnnotation, + ) -> None: + """ + Helper method to update the annotation_to_update based on the specified edge_or_node and qspec. + + :param edge_or_node: The target EdgeOrNode to be used for the update. + :param qspec: An instance of QuantizationSpecBase representing the quantization specification to apply. + :param annotation_to_update: The annotation to update based on the edge_or_node and qspec. + """ + if isinstance(edge_or_node, torch.fx.Node): + annotation_to_update.output_qspec = qspec + else: + annotation_to_update.input_qspec_map[edge_or_node[0]] = qspec + + @staticmethod + def _get_torch_ao_qspec_from_qp(qp: QuantizationPointBase) -> QuantizationSpec: + """ + Retrieves the quantization configuration from the given quantization point and + converts it into a QuantizationSpec. + + :param qp: An instance of QuantizationPointBase. + :return: A QuantizationSpec retrieved and converted from the quantization point. + """ + # Eps value is copied from nncf/torch/quantization/layers.py + extra_args = {"eps": 1e-16} + qconfig = qp.qconfig + is_weight = qp.is_weight_quantization_point() + + if qconfig.per_channel: + torch_qscheme = ( + torch.per_channel_symmetric + if qconfig.mode is QuantizationScheme.SYMMETRIC + else torch.per_channel_affine + ) + else: + torch_qscheme = ( + torch.per_tensor_symmetric if qconfig.mode is QuantizationScheme.SYMMETRIC else torch.per_tensor_affine + ) + if is_weight: + observer = PerChannelMinMaxObserver + quant_min = -128 + quant_max = 127 + dtype = torch.int8 + channel_axis = 0 + else: + observer = ( + HistogramObserver + if torch_qscheme in [torch.per_tensor_symmetric, torch.per_tensor_affine] + else PerChannelMinMaxObserver + ) + quant_min = 0 + quant_max = 255 + dtype = torch.int8 if qconfig.signedness_to_force else torch.uint8 + channel_axis = 1 # channel dim for activations + return QuantizationSpec( + dtype=dtype, + observer_or_fake_quant_ctr=observer.with_args(**extra_args), + quant_min=quant_min, + quant_max=quant_max, + qscheme=torch_qscheme, + ch_axis=channel_axis, + is_dynamic=False, + ) + + def validate(self, model: torch.fx.GraphModule) -> None: + pass + + def transform_for_annotation(self, model: torch.fx.GraphModule) -> torch.fx.GraphModule: + fold_constant_except_qdq(model) + return model diff --git a/backends/openvino/requirements.txt b/backends/openvino/requirements.txt index 7c3de886e27..f00257127a3 100644 --- a/backends/openvino/requirements.txt +++ b/backends/openvino/requirements.txt @@ -6,3 +6,4 @@ tokenizers transformers piq pillow +nncf @ https://github.com/openvinotoolkit/nncf.git diff --git a/examples/openvino/CMakeLists.txt b/examples/openvino/CMakeLists.txt index 4a1917fa3af..10638a7b5f7 100644 --- a/examples/openvino/CMakeLists.txt +++ b/examples/openvino/CMakeLists.txt @@ -55,6 +55,7 @@ target_include_directories(openvino_portable_ops_lib PUBLIC ${_common_include_di # Build Executor Runner add_executable(openvino_executor_runner ${_openvino_executor_runner__srcs}) + target_include_directories( openvino_executor_runner PUBLIC ${_common_include_directories} ${EXECUTORCH_ROOT}/cmake-openvino-out/third-party/gflags/include ) diff --git a/examples/openvino/aot/README.md b/examples/openvino/aot/README.md index 6c59f1dad41..46e476a8408 100644 --- a/examples/openvino/aot/README.md +++ b/examples/openvino/aot/README.md @@ -31,10 +31,17 @@ python aot_openvino_compiler.py --suite --model --inp - `[1, 3, 224, 224]` (Zsh users: wrap in quotes) - `(1, 3, 224, 224)` +- **`--quantize`** (optional): + Enable model quantization: Default is False. + +- **`--dataset`** (optional): + Path to the calibration dataset. TODO: It is necessary to think in what form to support the dataset. For the experiment, tiny-imagenet is used, which can be downloaded from here http://cs231n.stanford.edu/tiny-imagenet-200.zip and specify the path to it. + - **`--device`** (optional): Target device for the compiled model. Default is `CPU`. Examples: `CPU`, `GPU` + ## **Examples** ### Export a TIMM VGG16 model for the CPU diff --git a/examples/openvino/aot/aot_openvino_compiler.py b/examples/openvino/aot/aot_openvino_compiler.py index 4674fbbd755..cc31e011e38 100644 --- a/examples/openvino/aot/aot_openvino_compiler.py +++ b/examples/openvino/aot/aot_openvino_compiler.py @@ -4,10 +4,15 @@ # except in compliance with the License. See the license file in the root # directory of this source tree for more details. +import nncf.experimental +import nncf.experimental.torch import executorch +import nncf import timm import torch +import torchvision.datasets as datasets import torchvision.models as torchvision_models +import torchvision.transforms as transforms from transformers import AutoModel from executorch.exir.backend.backend_details import CompileSpec from executorch.backends.openvino.preprocess import OpenvinoBackend @@ -16,6 +21,12 @@ from torch.export import export, ExportedProgram from torch.export.exported_program import ExportedProgram import argparse +from executorch.backends.openvino import OpenVINOQuantizer +from torch.ao.quantization.quantize_pt2e import ( + convert_pt2e, + prepare_pt2e, +) + # Function to load a model based on the selected suite def load_model(suite: str, model_name: str): @@ -30,7 +41,48 @@ def load_model(suite: str, model_name: str): else: raise ValueError(f"Unsupported model suite: {suite}") -def main(suite: str, model_name: str, input_shape, device: str): + +def load_calibration_dataset(dataset_path: str): + val_dir = f"{dataset_path}/val" + + normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + + val_dataset = datasets.ImageFolder( + val_dir, + transforms.Compose( + [ + transforms.Resize(64), # for tiny imagenet + transforms.ToTensor(), + normalize, + ] + ), + ) + + calibration_dataset = torch.utils.data.DataLoader( + val_dataset, batch_size=1, shuffle=False, num_workers=0, pin_memory=True + ) + + return calibration_dataset + + +def quantize_model(model: torch.fx.GraphModule, calibration_dataset: torch.utils.data.DataLoader, subset_size=300): + quantizer = OpenVINOQuantizer() + + print("PTQ: Annotate the model...") + annotated_model = prepare_pt2e(model, quantizer) + + print("PTQ: Calibrate the model...") + for idx, data in enumerate(calibration_dataset): + if idx >= subset_size: + break + annotated_model(data[0]) + + print("PTQ: Convert the quantized model...") + quantized_model = convert_pt2e(annotated_model) + return quantized_model + + +def main(suite: str, model_name: str, input_shape, quantize: bool, dataset_path: str, device: str): # Ensure input_shape is a tuple if isinstance(input_shape, list): input_shape = tuple(input_shape) @@ -44,9 +96,19 @@ def main(suite: str, model_name: str, input_shape, device: str): # Provide input example_args = (torch.randn(*input_shape), ) - # Export to aten dialect using torch.export + # Export the model to the aten dialect aten_dialect: ExportedProgram = export(model, example_args) + if quantize: + # Quantize model + if not dataset_path: + raise ValueError("Quantization requires a calibration dataset.") + calibration_dataset = load_calibration_dataset(dataset_path) + + captured_model = aten_dialect.module() + quantized_model = quantize_model(captured_model, calibration_dataset) + aten_dialect: ExportedProgram = export(quantized_model, example_args) + # Convert to edge dialect edge_program: EdgeProgramManager = to_edge(aten_dialect) to_be_lowered_module = edge_program.exported_program() @@ -71,10 +133,13 @@ def main(suite: str, model_name: str, input_shape, device: str): parser.add_argument("--model", type=str, required=True, help="Model name to be loaded.") parser.add_argument("--input_shape", type=eval, required=True, help="Input shape for the model as a list or tuple (e.g., [1, 3, 224, 224] or (1, 3, 224, 224)).") + parser.add_argument("--quantize", action="store_true", help="Enable model quantization.") + parser.add_argument("--dataset", type=str, help="Path to the calibration dataset.") parser.add_argument("--device", type=str, default="CPU", help="Target device for compiling the model (e.g., CPU, GPU). Default is CPU.") args = parser.parse_args() # Run the main function with parsed arguments - main(args.suite, args.model, args.input_shape, args.device) + with nncf.torch.disable_patching(): + main(args.suite, args.model, args.input_shape, args.quantize, args.dataset, args.device) diff --git a/examples/openvino/openvino_build_example.sh b/examples/openvino/openvino_build_example.sh index ee16658941d..52c508d8ee2 100755 --- a/examples/openvino/openvino_build_example.sh +++ b/examples/openvino/openvino_build_example.sh @@ -34,6 +34,7 @@ main() { local example_dir=examples/openvino local example_build_dir="${build_dir}/${example_dir}" local cmake_prefix_path="${PWD}/${build_dir}/lib/cmake/ExecuTorch;${PWD}/${build_dir}/third-party/gflags;" + rm -rf "${example_build_dir}" ## OpenVINO original From 61488d5a9d77ebf86658392c8ee0e24b6eb9f550 Mon Sep 17 00:00:00 2001 From: dlyakhov Date: Fri, 7 Feb 2025 18:12:06 +0100 Subject: [PATCH 045/188] deit3_small_patch16_224_in21ft1k --- backends/openvino/quantizer/quantizer.py | 2 ++ .../openvino/aot/aot_openvino_compiler.py | 25 +++++++++++-------- 2 files changed, 17 insertions(+), 10 deletions(-) diff --git a/backends/openvino/quantizer/quantizer.py b/backends/openvino/quantizer/quantizer.py index 58fde3e23f1..aefa91f7455 100644 --- a/backends/openvino/quantizer/quantizer.py +++ b/backends/openvino/quantizer/quantizer.py @@ -15,6 +15,7 @@ import torch.fx from torch.ao.quantization.observer import HistogramObserver from torch.ao.quantization.observer import PerChannelMinMaxObserver +from torch.ao.quantization.observer import MinMaxObserver from torch.ao.quantization.quantizer.quantizer import EdgeOrNode from torch.ao.quantization.quantizer.quantizer import QuantizationAnnotation from torch.ao.quantization.quantizer.quantizer import QuantizationSpec @@ -276,6 +277,7 @@ def _get_torch_ao_qspec_from_qp(qp: QuantizationPointBase) -> QuantizationSpec: torch.per_tensor_symmetric if qconfig.mode is QuantizationScheme.SYMMETRIC else torch.per_tensor_affine ) if is_weight: + observer = PerChannelMinMaxObserver if qconfig.per_channel else MinMaxObserver observer = PerChannelMinMaxObserver quant_min = -128 quant_max = 127 diff --git a/examples/openvino/aot/aot_openvino_compiler.py b/examples/openvino/aot/aot_openvino_compiler.py index cc31e011e38..dabf1c964fa 100644 --- a/examples/openvino/aot/aot_openvino_compiler.py +++ b/examples/openvino/aot/aot_openvino_compiler.py @@ -65,20 +65,17 @@ def load_calibration_dataset(dataset_path: str): return calibration_dataset -def quantize_model(model: torch.fx.GraphModule, calibration_dataset: torch.utils.data.DataLoader, subset_size=300): - quantizer = OpenVINOQuantizer() +def quantize_model(model: torch.fx.GraphModule, example_args, subset_size=300): + quantizer = OpenVINOQuantizer(ignored_scope=nncf.IgnoredScope(types=["__getitem__", "layer_norm"])) print("PTQ: Annotate the model...") annotated_model = prepare_pt2e(model, quantizer) print("PTQ: Calibrate the model...") - for idx, data in enumerate(calibration_dataset): - if idx >= subset_size: - break - annotated_model(data[0]) + annotated_model(*example_args) print("PTQ: Convert the quantized model...") - quantized_model = convert_pt2e(annotated_model) + quantized_model = convert_pt2e(annotated_model, fold_quantize=False) return quantized_model @@ -106,7 +103,9 @@ def main(suite: str, model_name: str, input_shape, quantize: bool, dataset_path: calibration_dataset = load_calibration_dataset(dataset_path) captured_model = aten_dialect.module() - quantized_model = quantize_model(captured_model, calibration_dataset) + visualize_fx_model(captured_model, f"{model_name}_fp32.svg") + quantized_model = quantize_model(captured_model, example_args) + visualize_fx_model(quantized_model, f"{model_name}_int8.svg") aten_dialect: ExportedProgram = export(quantized_model, example_args) # Convert to edge dialect @@ -121,9 +120,15 @@ def main(suite: str, model_name: str, input_shape, quantize: bool, dataset_path: exec_prog = lowered_module.to_executorch(config=executorch.exir.ExecutorchBackendConfig()) # Serialize and save it to a file - with open(f"{model_name}.pte", "wb") as file: + model_name = f"{model_name}_{'int8' if quantize else 'fp32'}.pte" + with open(model_name, "wb") as file: exec_prog.write_to_file(file) - print(f"Model exported and saved as {model_name}.pte on {device}.") + print(f"Model exported and saved as {model_name} on {device}.") + +from torch.fx.passes.graph_drawer import FxGraphDrawer +def visualize_fx_model(model: torch.fx.GraphModule, output_svg_path: str): + g = FxGraphDrawer(model, output_svg_path) + g.get_dot_graph().write_svg(output_svg_path) if __name__ == "__main__": # Argument parser for dynamic inputs From 42155a1d433d87428781099b9c1ba276e7aebb55 Mon Sep 17 00:00:00 2001 From: dlyakhov Date: Fri, 7 Feb 2025 18:28:57 +0100 Subject: [PATCH 046/188] Resnet-like model checked --- examples/openvino/aot/aot_openvino_compiler.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/examples/openvino/aot/aot_openvino_compiler.py b/examples/openvino/aot/aot_openvino_compiler.py index dabf1c964fa..a062af4d001 100644 --- a/examples/openvino/aot/aot_openvino_compiler.py +++ b/examples/openvino/aot/aot_openvino_compiler.py @@ -66,7 +66,8 @@ def load_calibration_dataset(dataset_path: str): def quantize_model(model: torch.fx.GraphModule, example_args, subset_size=300): - quantizer = OpenVINOQuantizer(ignored_scope=nncf.IgnoredScope(types=["__getitem__", "layer_norm"])) + #quantizer = OpenVINOQuantizer(ignored_scope=nncf.IgnoredScope(types=["__getitem__", "layer_norm"])) + quantizer = OpenVINOQuantizer() print("PTQ: Annotate the model...") annotated_model = prepare_pt2e(model, quantizer) @@ -100,12 +101,12 @@ def main(suite: str, model_name: str, input_shape, quantize: bool, dataset_path: # Quantize model if not dataset_path: raise ValueError("Quantization requires a calibration dataset.") - calibration_dataset = load_calibration_dataset(dataset_path) + #calibration_dataset = load_calibration_dataset(dataset_path) captured_model = aten_dialect.module() - visualize_fx_model(captured_model, f"{model_name}_fp32.svg") + #visualize_fx_model(captured_model, f"{model_name}_fp32.svg") quantized_model = quantize_model(captured_model, example_args) - visualize_fx_model(quantized_model, f"{model_name}_int8.svg") + #visualize_fx_model(quantized_model, f"{model_name}_int8.svg") aten_dialect: ExportedProgram = export(quantized_model, example_args) # Convert to edge dialect From 7c66314296db63523872df6407bfbc271d4d8e4c Mon Sep 17 00:00:00 2001 From: dlyakhov Date: Mon, 10 Feb 2025 19:39:26 +0100 Subject: [PATCH 047/188] WIP --- backends/openvino/quantizer/quantizer.py | 87 +++++------- .../openvino/aot/aot_openvino_compiler.py | 132 ++++++++++++++---- .../openvino_executor_runner.cpp | 1 + 3 files changed, 142 insertions(+), 78 deletions(-) diff --git a/backends/openvino/quantizer/quantizer.py b/backends/openvino/quantizer/quantizer.py index aefa91f7455..b5f43251426 100644 --- a/backends/openvino/quantizer/quantizer.py +++ b/backends/openvino/quantizer/quantizer.py @@ -10,12 +10,11 @@ # limitations under the License. from collections import defaultdict -from typing import Dict, List, Optional, Tuple, Union +from typing import Dict, List, Optional, Tuple import torch.fx from torch.ao.quantization.observer import HistogramObserver from torch.ao.quantization.observer import PerChannelMinMaxObserver -from torch.ao.quantization.observer import MinMaxObserver from torch.ao.quantization.quantizer.quantizer import EdgeOrNode from torch.ao.quantization.quantizer.quantizer import QuantizationAnnotation from torch.ao.quantization.quantizer.quantizer import QuantizationSpec @@ -24,25 +23,11 @@ from torch.ao.quantization.quantizer.quantizer import SharedQuantizationSpec import nncf +import nncf.common.quantization as q +import nncf.experimental.torch.fx as nncf_fx +import nncf.parameters as p +import nncf.quantization.advanced_parameters as advanced_p from nncf.common.graph.graph import NNCFGraph -from nncf.common.logging import nncf_logger -from nncf.common.quantization.quantizer_propagation.solver import QuantizerPropagationRule -from nncf.common.quantization.quantizer_setup import QuantizationPointBase -from nncf.common.quantization.quantizer_setup import SingleConfigQuantizerSetup -from nncf.common.quantization.structs import QuantizationPreset -from nncf.common.quantization.structs import QuantizationScheme -from nncf.experimental.torch.fx.nncf_graph_builder import GraphConverter -from nncf.experimental.torch.fx.node_utils import get_graph_node_by_name -from nncf.experimental.torch.fx.transformations import fold_constant_except_qdq -from nncf.parameters import ModelType -from nncf.parameters import QuantizationMode -from nncf.parameters import TargetDevice -from nncf.quantization.advanced_parameters import FP8QuantizationParameters -from nncf.quantization.advanced_parameters import OverflowFix -from nncf.quantization.advanced_parameters import QuantizationParameters -from nncf.quantization.algorithms.min_max.algorithm import MinMaxQuantization -from nncf.scopes import IgnoredScope -from nncf.torch.model_graph_manager import get_weight_tensor_port_ids QUANT_ANNOTATION_KEY = "quantization_annotation" @@ -56,16 +41,15 @@ class OpenVINOQuantizer(Quantizer): def __init__( self, *, - mode: Optional[QuantizationMode] = None, - preset: Optional[QuantizationPreset] = None, - target_device: TargetDevice = TargetDevice.ANY, - model_type: Optional[ModelType] = None, - ignored_scope: Optional[IgnoredScope] = None, - overflow_fix: Optional[OverflowFix] = None, + mode: Optional[p.QuantizationMode] = None, + preset: Optional[q.structs.QuantizationPreset] = None, + target_device: p.TargetDevice = p.TargetDevice.ANY, + transformer_model: bool = False, + ignored_scope: Optional[nncf.IgnoredScope] = None, + overflow_fix: Optional[advanced_p.OverflowFix] = None, quantize_outputs: bool = False, - activations_quantization_params: Optional[Union[QuantizationParameters, FP8QuantizationParameters]] = None, - weights_quantization_params: Optional[Union[QuantizationParameters, FP8QuantizationParameters]] = None, - quantizer_propagation_rule: QuantizerPropagationRule = QuantizerPropagationRule.MERGE_ALL_IN_ONE, + activations_quantization_params: Optional[advanced_p.QuantizationParameters] = None, + weights_quantization_params: Optional[advanced_p.QuantizationParameters] = None, ): """ :param mode: Defines optimization mode for the algorithm. None by default. @@ -89,29 +73,28 @@ def __init__( :param activations_quantization_params: Quantization parameters for model activations. :param weights_quantization_params: Quantization parameters for model weights. - :param quantizer_propagation_rule: The strategy to be used while propagating and merging quantizers. - MERGE_ALL_IN_ONE by default. """ - self._min_max_algo = MinMaxQuantization( + self._min_max_algo = nncf.quantization.algorithms.min_max.algorithm.MinMaxQuantization( mode=mode, preset=preset, target_device=target_device, - model_type=model_type, + model_type=p.ModelType.TRANSFORMER if transformer_model else None, ignored_scope=ignored_scope, overflow_fix=overflow_fix, quantize_outputs=quantize_outputs, activations_quantization_params=activations_quantization_params, weights_quantization_params=weights_quantization_params, - quantizer_propagation_rule=quantizer_propagation_rule, ) - def get_quantization_setup(self, model: torch.fx.GraphModule, nncf_graph: NNCFGraph) -> SingleConfigQuantizerSetup: + def get_nncf_quantization_setup( + self, model: torch.fx.GraphModule, nncf_graph: NNCFGraph + ) -> q.quantizer_setup.SingleConfigQuantizerSetup: self._min_max_algo._set_backend_entity(model) return self._min_max_algo.find_quantization_setup(model, nncf_graph) def annotate(self, model: torch.fx.GraphModule) -> torch.fx.GraphModule: - nncf_graph = GraphConverter.create_nncf_graph(model) - quantization_setup = self.get_quantization_setup(model, nncf_graph) + nncf_graph = nncf_fx.nncf_graph_builder.GraphConverter.create_nncf_graph(model) + quantization_setup = self.get_nncf_quantization_setup(model, nncf_graph) graph = model.graph node_vs_torch_annotation = defaultdict(QuantizationAnnotation) @@ -138,7 +121,9 @@ def annotate(self, model: torch.fx.GraphModule) -> torch.fx.GraphModule: ) raise nncf.InternalError(msg) - root_target_node = get_graph_node_by_name(graph, root_qp.insertion_point.target_node_name) + root_target_node = nncf_fx.node_utils.get_graph_node_by_name( + graph, root_qp.insertion_point.target_node_name + ) root_edge_or_node = self._get_edge_or_node(root_target_node, root_qp, nncf_graph) for quantizer_id in quantizer_ids: @@ -155,10 +140,11 @@ def annotate(self, model: torch.fx.GraphModule) -> torch.fx.GraphModule: for node, annotation in node_vs_torch_annotation.items(): assert QUANT_ANNOTATION_KEY not in node.meta node.meta[QUANT_ANNOTATION_KEY] = annotation + return model @staticmethod def _get_unified_scales_root_quantizer_id( - nncf_graph: NNCFGraph, quantizer_ids: List[int], quantizer_setup: SingleConfigQuantizerSetup + nncf_graph: NNCFGraph, quantizer_ids: List[int], quantizer_setup: q.quantizer_setup.SingleConfigQuantizerSetup ) -> int: """ Identifies the earliest quantizer node ID based on the corresponding `nncf_node.node_id` @@ -184,7 +170,7 @@ def _get_unified_scales_root_quantizer_id( def _get_edge_or_node_and_annotation( graph: torch.fx.Graph, nncf_graph: NNCFGraph, - qp: QuantizationPointBase, + qp: q.quantizer_setup.QuantizationPointBase, node_vs_torch_annotation: Dict[torch.fx.Node, QuantizationAnnotation], ) -> Tuple[EdgeOrNode, QuantizationAnnotation]: """ @@ -198,13 +184,15 @@ def _get_edge_or_node_and_annotation( QuantizationAnnotations. :return: A tuple containing the EdgeOrNode and its associated QuantizationAnnotation. """ - target_node = get_graph_node_by_name(graph, qp.insertion_point.target_node_name) + target_node = nncf_fx.node_utils.get_graph_node_by_name(graph, qp.insertion_point.target_node_name) annotation = node_vs_torch_annotation[target_node] edge_or_node = OpenVINOQuantizer._get_edge_or_node(target_node, qp, nncf_graph) return edge_or_node, annotation @staticmethod - def _get_edge_or_node(target_node: torch.fx.Node, qp: QuantizationPointBase, nncf_graph: NNCFGraph) -> EdgeOrNode: + def _get_edge_or_node( + target_node: torch.fx.Node, qp: q.quantizer_setup.QuantizationPointBase, nncf_graph: NNCFGraph + ) -> EdgeOrNode: """ Returns the edge or node based on the given target node and quantization point. @@ -216,10 +204,10 @@ def _get_edge_or_node(target_node: torch.fx.Node, qp: QuantizationPointBase, nnc ip = qp.insertion_point if qp.is_weight_quantization_point(): nncf_node = nncf_graph.get_node_by_name(target_node.name) - weights_ports_ids = get_weight_tensor_port_ids(nncf_node, nncf_graph) + weights_ports_ids = nncf.torch.model_graph_manager.get_weight_tensor_port_ids(nncf_node, nncf_graph) if len(weights_ports_ids) > 1: # TODO(dlyakhov): support quantization for nodes with several weights - nncf_logger.warning( + nncf.common.logging.nncf_logger.warning( f"Quantization of the weighted node {target_node.name}" " is not yet supported by the OpenVINOQuantizer." f" Only the weight on port ID {weights_ports_ids[0]} will be quantized." @@ -253,7 +241,7 @@ def _fill_torch_ao_annotation( annotation_to_update.input_qspec_map[edge_or_node[0]] = qspec @staticmethod - def _get_torch_ao_qspec_from_qp(qp: QuantizationPointBase) -> QuantizationSpec: + def _get_torch_ao_qspec_from_qp(qp: q.quantizer_setup.QuantizationPointBase) -> QuantizationSpec: """ Retrieves the quantization configuration from the given quantization point and converts it into a QuantizationSpec. @@ -269,15 +257,16 @@ def _get_torch_ao_qspec_from_qp(qp: QuantizationPointBase) -> QuantizationSpec: if qconfig.per_channel: torch_qscheme = ( torch.per_channel_symmetric - if qconfig.mode is QuantizationScheme.SYMMETRIC + if qconfig.mode is q.structs.QuantizationScheme.SYMMETRIC else torch.per_channel_affine ) else: torch_qscheme = ( - torch.per_tensor_symmetric if qconfig.mode is QuantizationScheme.SYMMETRIC else torch.per_tensor_affine + torch.per_tensor_symmetric + if qconfig.mode is q.structs.QuantizationScheme.SYMMETRIC + else torch.per_tensor_affine ) if is_weight: - observer = PerChannelMinMaxObserver if qconfig.per_channel else MinMaxObserver observer = PerChannelMinMaxObserver quant_min = -128 quant_max = 127 @@ -307,5 +296,5 @@ def validate(self, model: torch.fx.GraphModule) -> None: pass def transform_for_annotation(self, model: torch.fx.GraphModule) -> torch.fx.GraphModule: - fold_constant_except_qdq(model) + nncf_fx.transformations.fold_constant_except_qdq(model) return model diff --git a/examples/openvino/aot/aot_openvino_compiler.py b/examples/openvino/aot/aot_openvino_compiler.py index a062af4d001..928757c32e2 100644 --- a/examples/openvino/aot/aot_openvino_compiler.py +++ b/examples/openvino/aot/aot_openvino_compiler.py @@ -22,11 +22,15 @@ from torch.export.exported_program import ExportedProgram import argparse from executorch.backends.openvino import OpenVINOQuantizer +#from nncf.experimental.torch.fx.quantization.quantizer.openvino_quantizer import OpenVINOQuantizer +from nncf.experimental.torch.fx.quantization.quantize_pt2e import quantize_pt2e from torch.ao.quantization.quantize_pt2e import ( convert_pt2e, prepare_pt2e, ) - +from sklearn.metrics import accuracy_score +from timm.data import resolve_data_config +from timm.data.transforms_factory import create_transform # Function to load a model based on the selected suite def load_model(suite: str, model_name: str): @@ -42,20 +46,17 @@ def load_model(suite: str, model_name: str): raise ValueError(f"Unsupported model suite: {suite}") -def load_calibration_dataset(dataset_path: str): +def load_calibration_dataset(dataset_path: str, suite: str, model: torch.nn.Module): val_dir = f"{dataset_path}/val" - normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + if suite == "torchvision": + transform = torchvision_models.get_model_weights(model.name).transforms() + else: + transform = create_transform(**resolve_data_config(model.pretrained_cfg, model=model)) val_dataset = datasets.ImageFolder( val_dir, - transforms.Compose( - [ - transforms.Resize(64), # for tiny imagenet - transforms.ToTensor(), - normalize, - ] - ), + transform=transform ) calibration_dataset = torch.utils.data.DataLoader( @@ -65,21 +66,6 @@ def load_calibration_dataset(dataset_path: str): return calibration_dataset -def quantize_model(model: torch.fx.GraphModule, example_args, subset_size=300): - #quantizer = OpenVINOQuantizer(ignored_scope=nncf.IgnoredScope(types=["__getitem__", "layer_norm"])) - quantizer = OpenVINOQuantizer() - - print("PTQ: Annotate the model...") - annotated_model = prepare_pt2e(model, quantizer) - - print("PTQ: Calibrate the model...") - annotated_model(*example_args) - - print("PTQ: Convert the quantized model...") - quantized_model = convert_pt2e(annotated_model, fold_quantize=False) - return quantized_model - - def main(suite: str, model_name: str, input_shape, quantize: bool, dataset_path: str, device: str): # Ensure input_shape is a tuple if isinstance(input_shape, list): @@ -98,15 +84,24 @@ def main(suite: str, model_name: str, input_shape, quantize: bool, dataset_path: aten_dialect: ExportedProgram = export(model, example_args) if quantize: + if suite == "huggingface": + raise ValueError("Quantization of {suite} models did not support yet.") + # Quantize model if not dataset_path: raise ValueError("Quantization requires a calibration dataset.") - #calibration_dataset = load_calibration_dataset(dataset_path) + calibration_dataset = load_calibration_dataset(dataset_path, suite, model) captured_model = aten_dialect.module() #visualize_fx_model(captured_model, f"{model_name}_fp32.svg") - quantized_model = quantize_model(captured_model, example_args) - #visualize_fx_model(quantized_model, f"{model_name}_int8.svg") + quantizer = OpenVINOQuantizer() + + print("PTQ: Quantize the model") + def transform(x): + return x[0] + + quantized_model = quantize_pt2e(captured_model, quantizer, calibration_dataset=nncf.Dataset(calibration_dataset, transform_func=transform), fold_quantize=False) + aten_dialect: ExportedProgram = export(quantized_model, example_args) # Convert to edge dialect @@ -121,16 +116,95 @@ def main(suite: str, model_name: str, input_shape, quantize: bool, dataset_path: exec_prog = lowered_module.to_executorch(config=executorch.exir.ExecutorchBackendConfig()) # Serialize and save it to a file - model_name = f"{model_name}_{'int8' if quantize else 'fp32'}.pte" + model_name = f"{model_name}_{'int8' if quantize else 'fp32'}.pte" with open(model_name, "wb") as file: exec_prog.write_to_file(file) print(f"Model exported and saved as {model_name} on {device}.") + if quantize: + print("Start validation of the quantized model:") + + # 1: Dump inputs + import os + import shutil + + dest_path = "tmp_inputs" + out_path = "tmp_outputs" + targets, input_files = [], [] + for d in [dest_path, out_path]: + if os.path.exists(d): + shutil.rmtree(d) + os.makedirs(d) + input_list = "" + for idx, data in enumerate(calibration_dataset): + feature, target = data + targets.append(target) + file_name = f"{dest_path}/input_{idx}_0.raw" + input_list += file_name + " " + if not isinstance(feature, torch.Tensor): + feature = torch.tensor(feature) + feature.detach().numpy().tofile(file_name) + input_files.append(file_name) + + inp_list_file = os.path.join(dest_path, "in_list.txt") + with open(inp_list_file, "w") as f: + input_list = input_list.strip() + "\n" + f.write(input_list) + + # 2: Run the executor + print("Run openvino_executor_runner...") + import subprocess + breakpoint() + subprocess.run(["../../../cmake-openvino-out/examples/openvino/openvino_executor_runner", + f"--model_path={model_name}", + f"--input_list_path={inp_list_file}", + f"--output_folder_path={out_path}", + #f"--num_iter={len(input_files)}" + ]) + + # 3: load the outputs and compare with the targets + import numpy as np + predictions = [] + for i in range(len(input_files)): + predictions.append( + np.fromfile( + os.path.join(out_path, f"output_{i}.raw"), dtype=np.float32 + ) + ) + + k_val = [1, 5] + acc_top1 = accuracy_score(predictions, targets) + print(f"acc@1: {acc_top1}") + + from torch.fx.passes.graph_drawer import FxGraphDrawer def visualize_fx_model(model: torch.fx.GraphModule, output_svg_path: str): g = FxGraphDrawer(model, output_svg_path) g.get_dot_graph().write_svg(output_svg_path) +def generate_inputs(dest_path: str, file_name: str, inputs=None, input_list=None): + input_list_file = None + input_files = [] + + # Prepare input list + if input_list is not None: + input_list_file = f"{dest_path}/{file_name}" + with open(input_list_file, "w") as f: + f.write(input_list) + f.flush() + + # Prepare input data + if inputs is not None: + for idx, data in enumerate(inputs): + for i, d in enumerate(data): + file_name = f"{dest_path}/input_{idx}_{i}.raw" + if not isinstance(d, torch.Tensor): + d = torch.tensor(d) + d.detach().numpy().tofile(file_name) + input_files.append(file_name) + + return input_list_file, input_files + if __name__ == "__main__": # Argument parser for dynamic inputs parser = argparse.ArgumentParser(description="Export models with executorch.") diff --git a/examples/openvino/executor_runner/openvino_executor_runner.cpp b/examples/openvino/executor_runner/openvino_executor_runner.cpp index 7615b63649a..b0d3a9004c2 100644 --- a/examples/openvino/executor_runner/openvino_executor_runner.cpp +++ b/examples/openvino/executor_runner/openvino_executor_runner.cpp @@ -179,6 +179,7 @@ int main(int argc, char** argv) { std::string file_path; while (std::getline(input_list, file_path)) { auto input_files = split(file_path, " "); + ET_LOG(Info, "INPUT_FILES.SIZE: %ld", input_files.size()); if (input_files.size() == 0) { break; } From c1fa9e25851b5819dea18b0070c9ab46cc2e0c3a Mon Sep 17 00:00:00 2001 From: dlyakhov Date: Tue, 11 Feb 2025 10:31:15 +0100 Subject: [PATCH 048/188] Formating --- backends/openvino/quantizer/quantizer.py | 15 +- .../openvino/aot/aot_openvino_compiler.py | 128 ++++++++---------- .../openvino_executor_runner.cpp | 2 + 3 files changed, 63 insertions(+), 82 deletions(-) diff --git a/backends/openvino/quantizer/quantizer.py b/backends/openvino/quantizer/quantizer.py index b5f43251426..63da8325e4f 100644 --- a/backends/openvino/quantizer/quantizer.py +++ b/backends/openvino/quantizer/quantizer.py @@ -1,13 +1,8 @@ -# Copyright (c) 2025 Intel Corporation -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# http://www.apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. +# Copyright (c) Intel Corporation +# +# Licensed under the BSD License (the "License"); you may not use this file +# except in compliance with the License. See the license file in the root +# directory of this source tree for more details. from collections import defaultdict from typing import Dict, List, Optional, Tuple diff --git a/examples/openvino/aot/aot_openvino_compiler.py b/examples/openvino/aot/aot_openvino_compiler.py index 928757c32e2..91df971403c 100644 --- a/examples/openvino/aot/aot_openvino_compiler.py +++ b/examples/openvino/aot/aot_openvino_compiler.py @@ -4,33 +4,31 @@ # except in compliance with the License. See the license file in the root # directory of this source tree for more details. -import nncf.experimental -import nncf.experimental.torch +import argparse + import executorch -import nncf import timm import torch import torchvision.datasets as datasets import torchvision.models as torchvision_models -import torchvision.transforms as transforms -from transformers import AutoModel -from executorch.exir.backend.backend_details import CompileSpec -from executorch.backends.openvino.preprocess import OpenvinoBackend -from executorch.backends.openvino.partitioner import OpenvinoPartitioner -from executorch.exir import EdgeProgramManager, to_edge -from torch.export import export, ExportedProgram -from torch.export.exported_program import ExportedProgram -import argparse from executorch.backends.openvino import OpenVINOQuantizer -#from nncf.experimental.torch.fx.quantization.quantizer.openvino_quantizer import OpenVINOQuantizer -from nncf.experimental.torch.fx.quantization.quantize_pt2e import quantize_pt2e -from torch.ao.quantization.quantize_pt2e import ( - convert_pt2e, - prepare_pt2e, -) +from executorch.backends.openvino.partitioner import OpenvinoPartitioner +from executorch.exir import EdgeProgramManager +from executorch.exir import to_edge +from executorch.exir.backend.backend_details import CompileSpec from sklearn.metrics import accuracy_score from timm.data import resolve_data_config from timm.data.transforms_factory import create_transform +from torch.export import ExportedProgram +from torch.export import export +from torch.export.exported_program import ExportedProgram +from transformers import AutoModel + +import nncf +import nncf.experimental +import nncf.experimental.torch +from nncf.experimental.torch.fx.quantization.quantize_pt2e import quantize_pt2e + # Function to load a model based on the selected suite def load_model(suite: str, model_name: str): @@ -54,10 +52,7 @@ def load_calibration_dataset(dataset_path: str, suite: str, model: torch.nn.Modu else: transform = create_transform(**resolve_data_config(model.pretrained_cfg, model=model)) - val_dataset = datasets.ImageFolder( - val_dir, - transform=transform - ) + val_dataset = datasets.ImageFolder(val_dir, transform=transform) calibration_dataset = torch.utils.data.DataLoader( val_dataset, batch_size=1, shuffle=False, num_workers=0, pin_memory=True @@ -78,7 +73,7 @@ def main(suite: str, model_name: str, input_shape, quantize: bool, dataset_path: model = model.eval() # Provide input - example_args = (torch.randn(*input_shape), ) + example_args = (torch.randn(*input_shape),) # Export the model to the aten dialect aten_dialect: ExportedProgram = export(model, example_args) @@ -93,14 +88,19 @@ def main(suite: str, model_name: str, input_shape, quantize: bool, dataset_path: calibration_dataset = load_calibration_dataset(dataset_path, suite, model) captured_model = aten_dialect.module() - #visualize_fx_model(captured_model, f"{model_name}_fp32.svg") quantizer = OpenVINOQuantizer() print("PTQ: Quantize the model") + def transform(x): return x[0] - quantized_model = quantize_pt2e(captured_model, quantizer, calibration_dataset=nncf.Dataset(calibration_dataset, transform_func=transform), fold_quantize=False) + quantized_model = quantize_pt2e( + captured_model, + quantizer, + calibration_dataset=nncf.Dataset(calibration_dataset, transform_func=transform), + fold_quantize=False, + ) aten_dialect: ExportedProgram = export(quantized_model, example_args) @@ -154,69 +154,53 @@ def transform(x): # 2: Run the executor print("Run openvino_executor_runner...") import subprocess - breakpoint() - subprocess.run(["../../../cmake-openvino-out/examples/openvino/openvino_executor_runner", - f"--model_path={model_name}", - f"--input_list_path={inp_list_file}", - f"--output_folder_path={out_path}", - #f"--num_iter={len(input_files)}" - ]) + + subprocess.run( + [ + "../../../cmake-openvino-out/examples/openvino/openvino_executor_runner", + f"--model_path={model_name}", + f"--input_list_path={inp_list_file}", + f"--output_folder_path={out_path}", + # f"--num_iter={len(input_files)}" + ] + ) # 3: load the outputs and compare with the targets import numpy as np + predictions = [] for i in range(len(input_files)): - predictions.append( - np.fromfile( - os.path.join(out_path, f"output_{i}.raw"), dtype=np.float32 - ) - ) + predictions.append(np.fromfile(os.path.join(out_path, f"output_{i}.raw"), dtype=np.float32)) - k_val = [1, 5] acc_top1 = accuracy_score(predictions, targets) print(f"acc@1: {acc_top1}") -from torch.fx.passes.graph_drawer import FxGraphDrawer -def visualize_fx_model(model: torch.fx.GraphModule, output_svg_path: str): - g = FxGraphDrawer(model, output_svg_path) - g.get_dot_graph().write_svg(output_svg_path) - -def generate_inputs(dest_path: str, file_name: str, inputs=None, input_list=None): - input_list_file = None - input_files = [] - - # Prepare input list - if input_list is not None: - input_list_file = f"{dest_path}/{file_name}" - with open(input_list_file, "w") as f: - f.write(input_list) - f.flush() - - # Prepare input data - if inputs is not None: - for idx, data in enumerate(inputs): - for i, d in enumerate(data): - file_name = f"{dest_path}/input_{idx}_{i}.raw" - if not isinstance(d, torch.Tensor): - d = torch.tensor(d) - d.detach().numpy().tofile(file_name) - input_files.append(file_name) - - return input_list_file, input_files - if __name__ == "__main__": # Argument parser for dynamic inputs parser = argparse.ArgumentParser(description="Export models with executorch.") - parser.add_argument("--suite", type=str, required=True, choices=["timm", "torchvision", "huggingface"], - help="Select the model suite (timm, torchvision, huggingface).") + parser.add_argument( + "--suite", + type=str, + required=True, + choices=["timm", "torchvision", "huggingface"], + help="Select the model suite (timm, torchvision, huggingface).", + ) parser.add_argument("--model", type=str, required=True, help="Model name to be loaded.") - parser.add_argument("--input_shape", type=eval, required=True, - help="Input shape for the model as a list or tuple (e.g., [1, 3, 224, 224] or (1, 3, 224, 224)).") + parser.add_argument( + "--input_shape", + type=eval, + required=True, + help="Input shape for the model as a list or tuple (e.g., [1, 3, 224, 224] or (1, 3, 224, 224)).", + ) parser.add_argument("--quantize", action="store_true", help="Enable model quantization.") parser.add_argument("--dataset", type=str, help="Path to the calibration dataset.") - parser.add_argument("--device", type=str, default="CPU", - help="Target device for compiling the model (e.g., CPU, GPU). Default is CPU.") + parser.add_argument( + "--device", + type=str, + default="CPU", + help="Target device for compiling the model (e.g., CPU, GPU). Default is CPU.", + ) args = parser.parse_args() diff --git a/examples/openvino/executor_runner/openvino_executor_runner.cpp b/examples/openvino/executor_runner/openvino_executor_runner.cpp index b0d3a9004c2..41268751b2f 100644 --- a/examples/openvino/executor_runner/openvino_executor_runner.cpp +++ b/examples/openvino/executor_runner/openvino_executor_runner.cpp @@ -180,6 +180,7 @@ int main(int argc, char** argv) { while (std::getline(input_list, file_path)) { auto input_files = split(file_path, " "); ET_LOG(Info, "INPUT_FILES.SIZE: %ld", input_files.size()); + ET_LOG(Info, "NUM_INPUTS: %ld", num_inputs); if (input_files.size() == 0) { break; } @@ -189,6 +190,7 @@ int main(int argc, char** argv) { method_meta.input_tensor_meta(input_index); auto input_data_ptr = inputs[input_index].toTensor().data_ptr(); + ET_LOG(Info, "READ FILE %s", std::string(input_files[input_index])); std::ifstream fin(input_files[input_index], std::ios::binary); fin.seekg(0, fin.end); size_t file_size = fin.tellg(); From e2415afba91eaf52eda3b9f8a1e20c739f3183f8 Mon Sep 17 00:00:00 2001 From: dlyakhov Date: Tue, 11 Feb 2025 12:10:03 +0100 Subject: [PATCH 049/188] openvino_executor_runner.cpp can run on several inputs --- .../openvino/aot/aot_openvino_compiler.py | 75 ++-- .../openvino_executor_runner.cpp | 321 ++++++++++-------- 2 files changed, 214 insertions(+), 182 deletions(-) diff --git a/examples/openvino/aot/aot_openvino_compiler.py b/examples/openvino/aot/aot_openvino_compiler.py index 91df971403c..64f2ca2b955 100644 --- a/examples/openvino/aot/aot_openvino_compiler.py +++ b/examples/openvino/aot/aot_openvino_compiler.py @@ -5,8 +5,13 @@ # directory of this source tree for more details. import argparse +import os +import shutil +import subprocess +from pathlib import Path import executorch +import numpy as np import timm import torch import torchvision.datasets as datasets @@ -19,9 +24,9 @@ from sklearn.metrics import accuracy_score from timm.data import resolve_data_config from timm.data.transforms_factory import create_transform -from torch.export import ExportedProgram from torch.export import export from torch.export.exported_program import ExportedProgram +from torch.fx.passes.graph_drawer import FxGraphDrawer from transformers import AutoModel import nncf @@ -36,12 +41,14 @@ def load_model(suite: str, model_name: str): return timm.create_model(model_name, pretrained=True) elif suite == "torchvision": if not hasattr(torchvision_models, model_name): - raise ValueError(f"Model {model_name} not found in torchvision.") + msg = f"Model {model_name} not found in torchvision." + raise ValueError(msg) return getattr(torchvision_models, model_name)(pretrained=True) elif suite == "huggingface": return AutoModel.from_pretrained(model_name) else: - raise ValueError(f"Unsupported model suite: {suite}") + msg = f"Unsupported model suite: {suite}" + raise ValueError(msg) def load_calibration_dataset(dataset_path: str, suite: str, model: torch.nn.Module): @@ -61,12 +68,32 @@ def load_calibration_dataset(dataset_path: str, suite: str, model: torch.nn.Modu return calibration_dataset +def visualize_fx_model(model: torch.fx.GraphModule, output_svg_path: str): + g = FxGraphDrawer(model, output_svg_path) + g.get_dot_graph().write_svg(output_svg_path) + + +def dump_inputs(calibration_dataset, dest_path): + input_files, targets = [], [] + for idx, data in enumerate(calibration_dataset): + feature, target = data + targets.append(target) + file_name = f"{dest_path}/input_{idx}_0.raw" + if not isinstance(feature, torch.Tensor): + feature = torch.tensor(feature) + feature.detach().numpy().tofile(file_name) + input_files.append(file_name) + + return input_files, targets + + def main(suite: str, model_name: str, input_shape, quantize: bool, dataset_path: str, device: str): # Ensure input_shape is a tuple if isinstance(input_shape, list): input_shape = tuple(input_shape) elif not isinstance(input_shape, tuple): - raise ValueError("Input shape must be a list or tuple.") + msg = "Input shape must be a list or tuple." + raise ValueError(msg) # Load the selected model model = load_model(suite, model_name) @@ -80,11 +107,13 @@ def main(suite: str, model_name: str, input_shape, quantize: bool, dataset_path: if quantize: if suite == "huggingface": - raise ValueError("Quantization of {suite} models did not support yet.") + msg = f"Quantization of {suite} models did not support yet." + raise ValueError(msg) # Quantize model if not dataset_path: - raise ValueError("Quantization requires a calibration dataset.") + msg = "Quantization requires a calibration dataset." + raise ValueError(msg) calibration_dataset = load_calibration_dataset(dataset_path, suite, model) captured_model = aten_dialect.module() @@ -101,6 +130,7 @@ def transform(x): calibration_dataset=nncf.Dataset(calibration_dataset, transform_func=transform), fold_quantize=False, ) + visualize_fx_model(quantized_model, f"{model_name}_int8.svg") aten_dialect: ExportedProgram = export(quantized_model, example_args) @@ -123,37 +153,21 @@ def transform(x): if quantize: print("Start validation of the quantized model:") - # 1: Dump inputs - import os - import shutil - - dest_path = "tmp_inputs" - out_path = "tmp_outputs" - targets, input_files = [], [] + dest_path = Path("tmp_inputs") + out_path = Path("tmp_outputs") for d in [dest_path, out_path]: if os.path.exists(d): shutil.rmtree(d) os.makedirs(d) - input_list = "" - for idx, data in enumerate(calibration_dataset): - feature, target = data - targets.append(target) - file_name = f"{dest_path}/input_{idx}_0.raw" - input_list += file_name + " " - if not isinstance(feature, torch.Tensor): - feature = torch.tensor(feature) - feature.detach().numpy().tofile(file_name) - input_files.append(file_name) - - inp_list_file = os.path.join(dest_path, "in_list.txt") + + input_files, targets = dump_inputs(calibration_dataset, dest_path) + inp_list_file = dest_path / "in_list.txt" with open(inp_list_file, "w") as f: - input_list = input_list.strip() + "\n" - f.write(input_list) + f.write("\n".join(input_files) + "\n") # 2: Run the executor print("Run openvino_executor_runner...") - import subprocess subprocess.run( [ @@ -161,16 +175,15 @@ def transform(x): f"--model_path={model_name}", f"--input_list_path={inp_list_file}", f"--output_folder_path={out_path}", - # f"--num_iter={len(input_files)}" ] ) # 3: load the outputs and compare with the targets - import numpy as np predictions = [] for i in range(len(input_files)): - predictions.append(np.fromfile(os.path.join(out_path, f"output_{i}.raw"), dtype=np.float32)) + tensor = np.fromfile(out_path / f"output_{i}_0.raw", dtype=np.float32) + predictions.append(torch.tensor(np.argmax(tensor))) acc_top1 = accuracy_score(predictions, targets) print(f"acc@1: {acc_top1}") diff --git a/examples/openvino/executor_runner/openvino_executor_runner.cpp b/examples/openvino/executor_runner/openvino_executor_runner.cpp index 41268751b2f..f9a85c03a53 100644 --- a/examples/openvino/executor_runner/openvino_executor_runner.cpp +++ b/examples/openvino/executor_runner/openvino_executor_runner.cpp @@ -11,6 +11,7 @@ #include #include #include +#include #include @@ -25,22 +26,16 @@ // Define a fixed-size memory pool for the method allocator (4 MB) static uint8_t method_allocator_pool[4 * 1024U * 1024U]; // 4 MB -// Define command-line flags for model path, the number of iterations, input list path, and output folder path +// Define command-line flags for model path, the number of iterations, input +// list path, and output folder path +DEFINE_string(model_path, "", + "Path to the model serialized in flatbuffer format (required)."); +DEFINE_int32(num_iter, 1, "Number of inference iterations (default is 1)."); +DEFINE_string(input_list_path, "", + "Path to the input list file which includes the list of raw " + "input tensor files (optional)."); DEFINE_string( - model_path, - "", - "Path to the model serialized in flatbuffer format (required)."); -DEFINE_int32( - num_iter, - 1, - "Number of inference iterations (default is 1)."); -DEFINE_string( - input_list_path, - "", - "Path to the input list file which includes the list of raw input tensor files (optional)."); -DEFINE_string( - output_folder_path, - "", + output_folder_path, "", "Path to the output folder to save raw output tensor files (optional)."); using executorch::extension::FileDataLoader; @@ -57,7 +52,119 @@ using executorch::runtime::Result; using executorch::runtime::Span; using executorch::runtime::TensorInfo; -int main(int argc, char** argv) { +std::pair benchmark_method(Result &method, + int num_iterations) { + Error status = Error::Ok; + auto before_exec = std::chrono::high_resolution_clock::now(); + for (int i = 0; i < num_iterations; ++i) { + status = method->execute(); + } + auto after_exec = std::chrono::high_resolution_clock::now(); + double elapsed_time = std::chrono::duration_cast( + after_exec - before_exec) + .count() / + 1000.0; + return std::make_pair(elapsed_time, status); +} + +void dump_outputs(Result &method, const char *output_folder_path, + size_t index = 0) { + std::vector outputs(method->outputs_size()); + Error status = Error::Ok; + status = method->get_outputs(outputs.data(), outputs.size()); + ET_CHECK(status == Error::Ok); + for (size_t output_index = 0; output_index < method->outputs_size(); + output_index++) { + auto output_tensor = outputs[output_index].toTensor(); + auto output_file_name = std::string(output_folder_path) + "/output_" + + std::to_string(index) + "_" + + std::to_string(output_index) + ".raw"; + std::ofstream fout(output_file_name.c_str(), std::ios::binary); + fout.write(output_tensor.const_data_ptr(), output_tensor.nbytes()); + fout.close(); + ET_LOG(Info, "Write outputs to file %s", output_file_name.c_str()); + } +} + +struct ProcessInputsResult { + double total_time; + size_t num_iter; + Error status; +}; + +ProcessInputsResult process_inputs(Result &method, + const char *input_list_path, + const char *output_folder_path) { + std::vector inputs(method->inputs_size()); + ET_LOG(Info, "%zu inputs: ", inputs.size()); + double total_time_elapsed = 0.; + size_t idx = 0; + + Error status = Error::Ok; + status = method->get_inputs(inputs.data(), inputs.size()); + ET_CHECK(status == Error::Ok); + + auto split = [](std::string s, std::string delimiter) { + size_t pos_start = 0, pos_end, delim_len = delimiter.length(); + std::string token; + std::vector res; + + while ((pos_end = s.find(delimiter, pos_start)) != std::string::npos) { + token = s.substr(pos_start, pos_end - pos_start); + pos_start = pos_end + delim_len; + res.push_back(token); + } + res.push_back(s.substr(pos_start)); + return res; + }; + + // Read raw input tensor file names from input list file and + // iterate each raw input tensor file to read values + std::ifstream input_list(input_list_path); + if (input_list.is_open()) { + size_t num_inputs = method->inputs_size(); + std::string file_path; + while (std::getline(input_list, file_path)) { + auto input_files = split(file_path, " "); + if (input_files.size() == 0) { + break; + } + for (int input_index = 0; input_index < num_inputs; ++input_index) { + MethodMeta method_meta = method->method_meta(); + Result tensor_meta = + method_meta.input_tensor_meta(input_index); + auto input_data_ptr = inputs[input_index].toTensor().data_ptr(); + + ET_LOG(Info, "Read inputs from file %s", + input_files[input_index].c_str()); + std::ifstream fin(input_files[input_index], std::ios::binary); + fin.seekg(0, fin.end); + size_t file_size = fin.tellg(); + + ET_CHECK_MSG( + file_size == tensor_meta->nbytes(), + "Input(%d) size mismatch. file bytes: %zu, tensor bytes: %zu", + input_index, file_size, tensor_meta->nbytes()); + + fin.seekg(0, fin.beg); + fin.read(static_cast(input_data_ptr), file_size); + fin.close(); + } + double time_elapsed; + std::tie(time_elapsed, status) = benchmark_method(method, 1); + if (status != Error::Ok) { + return {total_time_elapsed, idx, status}; + } + total_time_elapsed += time_elapsed; + dump_outputs(method, output_folder_path, idx++); + } + } else { + ET_CHECK_MSG(false, "Failed to read input list file: %s", input_list_path); + } + return {total_time_elapsed, idx, status}; +} + +int main(int argc, char **argv) { // Initialize the runtime environment executorch::runtime::runtime_init(); @@ -68,22 +175,21 @@ int main(int argc, char** argv) { if (FLAGS_model_path.empty()) { std::cerr << "Error: --model_path is required." << std::endl; std::cerr << "Usage: " << argv[0] - << " --model_path= --num_iter=" << std::endl; + << " --model_path= --num_iter=" + << std::endl; return 1; } // Retrieve the model path and number of iterations - const char* model_path = FLAGS_model_path.c_str(); + const char *model_path = FLAGS_model_path.c_str(); int num_iterations = FLAGS_num_iter; std::cout << "Model path: " << model_path << std::endl; std::cout << "Number of iterations: " << num_iterations << std::endl; // Load the model using FileDataLoader Result loader = FileDataLoader::from(model_path); - ET_CHECK_MSG( - loader.ok(), - "FileDataLoader::from() failed: 0x%" PRIx32, - static_cast(loader.error())); + ET_CHECK_MSG(loader.ok(), "FileDataLoader::from() failed: 0x%" PRIx32, + static_cast(loader.error())); // Load the program from the loaded model Result program = Program::load(&loader.get()); @@ -93,8 +199,9 @@ int main(int argc, char** argv) { } ET_LOG(Info, "Model file %s is loaded.", model_path); - // Retrieve the method name from the program (assumes the first method is used) - const char* method_name = nullptr; + // Retrieve the method name from the program (assumes the first method is + // used) + const char *method_name = nullptr; { const auto method_name_result = program->get_method_name(0); ET_CHECK_MSG(method_name_result.ok(), "Program has no methods"); @@ -104,11 +211,8 @@ int main(int argc, char** argv) { // Retrieve metadata about the method Result method_meta = program->method_meta(method_name); - ET_CHECK_MSG( - method_meta.ok(), - "Failed to get method_meta for %s: 0x%" PRIx32, - method_name, - static_cast(method_meta.error())); + ET_CHECK_MSG(method_meta.ok(), "Failed to get method_meta for %s: 0x%" PRIx32, + method_name, static_cast(method_meta.error())); // Set up a memory allocator for the method MemoryAllocator method_allocator{ @@ -133,138 +237,53 @@ int main(int argc, char** argv) { // Load the method into the program Result method = program->load_method(method_name, &memory_manager); - ET_CHECK_MSG( - method.ok(), - "Loading of method %s failed with status 0x%" PRIx32, - method_name, - static_cast(method.error())); + ET_CHECK_MSG(method.ok(), + "Loading of method %s failed with status 0x%" PRIx32, + method_name, static_cast(method.error())); ET_LOG(Info, "Method loaded."); // Prepare the input tensors for the method auto inputs = prepare_input_tensors(*method); - ET_CHECK_MSG( - inputs.ok(), - "Could not prepare inputs: 0x%" PRIx32, - static_cast(inputs.error())); + ET_CHECK_MSG(inputs.ok(), "Could not prepare inputs: 0x%" PRIx32, + static_cast(inputs.error())); + + double elapsed_time; + Error status = Error::Ok; // If the input path list is provided, read input tensors from the files - if (!(FLAGS_input_list_path.empty())) { - const char* input_list_path = FLAGS_input_list_path.c_str(); - ET_LOG(Info, "Loading input tensors from the list provided in %s.", input_list_path); - Error status = Error::Ok; - std::vector inputs(method->inputs_size()); - ET_LOG(Info, "%zu inputs: ", inputs.size()); - status = method->get_inputs(inputs.data(), inputs.size()); - ET_CHECK(status == Error::Ok); - - auto split = [](std::string s, std::string delimiter) { - size_t pos_start = 0, pos_end, delim_len = delimiter.length(); - std::string token; - std::vector res; - - while ((pos_end = s.find(delimiter, pos_start)) != std::string::npos) { - token = s.substr(pos_start, pos_end - pos_start); - pos_start = pos_end + delim_len; - res.push_back(token); - } - res.push_back(s.substr(pos_start)); - return res; - }; - - // Read raw input tensor file names from input list file and - // iterate each raw input tensor file to read values - std::ifstream input_list(input_list_path); - if (input_list.is_open()) { - size_t num_inputs = method->inputs_size(); - std::string file_path; - while (std::getline(input_list, file_path)) { - auto input_files = split(file_path, " "); - ET_LOG(Info, "INPUT_FILES.SIZE: %ld", input_files.size()); - ET_LOG(Info, "NUM_INPUTS: %ld", num_inputs); - if (input_files.size() == 0) { - break; - } - for (int input_index = 0; input_index < num_inputs; ++input_index) { - MethodMeta method_meta = method->method_meta(); - Result tensor_meta = - method_meta.input_tensor_meta(input_index); - auto input_data_ptr = inputs[input_index].toTensor().data_ptr(); - - ET_LOG(Info, "READ FILE %s", std::string(input_files[input_index])); - std::ifstream fin(input_files[input_index], std::ios::binary); - fin.seekg(0, fin.end); - size_t file_size = fin.tellg(); - - ET_CHECK_MSG( - file_size == tensor_meta->nbytes(), - "Input(%d) size mismatch. file bytes: %zu, tensor bytes: %zu", - input_index, - file_size, - tensor_meta->nbytes()); - - fin.seekg(0, fin.beg); - fin.read( - static_cast(input_data_ptr), - file_size); - fin.close(); - } - } - } else { - ET_CHECK_MSG(false, - "Failed to read input list file: %s", - input_list_path); + if (!(FLAGS_input_list_path.empty()) and + !(FLAGS_output_folder_path.empty())) { + const char *input_list_path = FLAGS_input_list_path.c_str(); + ET_LOG(Info, "Loading input tensors from the list provided in %s.", + input_list_path); + const char *output_folder_path = FLAGS_output_folder_path.c_str(); + auto res = process_inputs(method, input_list_path, output_folder_path); + elapsed_time = res.total_time; + status = res.status; + num_iterations = res.num_iter; + } else { + + // Measure execution time for inference + std::tie(elapsed_time, status) = benchmark_method(method, num_iterations); + // Retrieve and print the method outputs + ET_LOG(Info, "%zu Number of outputs: ", method->outputs_size()); + + // If output folder path is provided, save output tensors + // into raw tensor files. + if (!(FLAGS_output_folder_path.empty())) { + const char *output_folder_path = FLAGS_output_folder_path.c_str(); + ET_LOG(Info, "Saving output tensors into the output folder: %s.", + output_folder_path); + dump_outputs(method, output_folder_path); } } - ET_LOG(Info, "Inputs prepared."); - - // Measure execution time for inference - auto before_exec = std::chrono::high_resolution_clock::now(); - Error status = Error::Ok; - for (int i = 0; i < num_iterations; ++i) { - status = method->execute(); - } - auto after_exec = std::chrono::high_resolution_clock::now(); - double elapsed_time = std::chrono::duration_cast( - after_exec - before_exec) - .count() / 1000.0; - // Log execution time and average time per iteration - ET_LOG( - Info, - "%d inference took %f ms, avg %f ms", - num_iterations, - elapsed_time, - elapsed_time / static_cast(num_iterations)); - ET_CHECK_MSG( - status == Error::Ok, - "Execution of method %s failed with status 0x%" PRIx32, - method_name, - static_cast(status)); + ET_LOG(Info, "%d inference took %f ms, avg %f ms", num_iterations, + elapsed_time, elapsed_time / static_cast(num_iterations)); + ET_CHECK_MSG(status == Error::Ok, + "Execution of method %s failed with status 0x%" PRIx32, + method_name, static_cast(status)); ET_LOG(Info, "Model executed successfully."); - // Retrieve and print the method outputs - std::vector outputs(method->outputs_size()); - ET_LOG(Info, "%zu Number of outputs: ", outputs.size()); - status = method->get_outputs(outputs.data(), outputs.size()); - ET_CHECK(status == Error::Ok); - - // If output folder path is provided, save output tensors - // into raw tensor files. - if (!(FLAGS_output_folder_path.empty())) { - const char* output_folder_path = FLAGS_output_folder_path.c_str(); - ET_LOG(Info, "Saving output tensors into the output folder: %s.", output_folder_path); - for (size_t output_index = 0; output_index < method->outputs_size(); - output_index++) { - auto output_tensor = outputs[output_index].toTensor(); - auto output_file_name = std::string(output_folder_path) + "/output_" + - std::to_string(output_index) + ".raw"; - std::ofstream fout(output_file_name.c_str(), std::ios::binary); - fout.write( - output_tensor.const_data_ptr(), output_tensor.nbytes()); - fout.close(); - } - } - return 0; } - From 8cbb1175902efab402d814805e4348b9c817b1f1 Mon Sep 17 00:00:00 2001 From: dlyakhov Date: Tue, 11 Feb 2025 14:28:25 +0100 Subject: [PATCH 050/188] Validate option / minor --- .../openvino/aot/aot_openvino_compiler.py | 33 ++++++++++++------- .../openvino_executor_runner.cpp | 3 -- 2 files changed, 21 insertions(+), 15 deletions(-) diff --git a/examples/openvino/aot/aot_openvino_compiler.py b/examples/openvino/aot/aot_openvino_compiler.py index 64f2ca2b955..3bdaf947a69 100644 --- a/examples/openvino/aot/aot_openvino_compiler.py +++ b/examples/openvino/aot/aot_openvino_compiler.py @@ -51,11 +51,11 @@ def load_model(suite: str, model_name: str): raise ValueError(msg) -def load_calibration_dataset(dataset_path: str, suite: str, model: torch.nn.Module): +def load_calibration_dataset(dataset_path: str, suite: str, model: torch.nn.Module, model_name: str): val_dir = f"{dataset_path}/val" if suite == "torchvision": - transform = torchvision_models.get_model_weights(model.name).transforms() + transform = torchvision_models.get_model_weights(model_name).DEFAULT.transforms() else: transform = create_transform(**resolve_data_config(model.pretrained_cfg, model=model)) @@ -87,7 +87,7 @@ def dump_inputs(calibration_dataset, dest_path): return input_files, targets -def main(suite: str, model_name: str, input_shape, quantize: bool, dataset_path: str, device: str): +def main(suite: str, model_name: str, input_shape, quantize: bool, validate: bool, dataset_path: str, device: str): # Ensure input_shape is a tuple if isinstance(input_shape, list): input_shape = tuple(input_shape) @@ -95,6 +95,8 @@ def main(suite: str, model_name: str, input_shape, quantize: bool, dataset_path: msg = "Input shape must be a list or tuple." raise ValueError(msg) + calibration_dataset = None + # Load the selected model model = load_model(suite, model_name) model = model.eval() @@ -114,7 +116,7 @@ def main(suite: str, model_name: str, input_shape, quantize: bool, dataset_path: if not dataset_path: msg = "Quantization requires a calibration dataset." raise ValueError(msg) - calibration_dataset = load_calibration_dataset(dataset_path, suite, model) + calibration_dataset = load_calibration_dataset(dataset_path, suite, model, model_name) captured_model = aten_dialect.module() quantizer = OpenVINOQuantizer() @@ -146,12 +148,15 @@ def transform(x): exec_prog = lowered_module.to_executorch(config=executorch.exir.ExecutorchBackendConfig()) # Serialize and save it to a file - model_name = f"{model_name}_{'int8' if quantize else 'fp32'}.pte" - with open(model_name, "wb") as file: + model_file_name = f"{model_name}_{'int8' if quantize else 'fp32'}.pte" + with open(model_file_name, "wb") as file: exec_prog.write_to_file(file) - print(f"Model exported and saved as {model_name} on {device}.") + print(f"Model exported and saved as {model_file_name} on {device}.") + + if validate: + if calibration_dataset is None: + calibration_dataset = load_calibration_dataset(dataset_path, suite, model, model_name) - if quantize: print("Start validation of the quantized model:") # 1: Dump inputs dest_path = Path("tmp_inputs") @@ -172,18 +177,17 @@ def transform(x): subprocess.run( [ "../../../cmake-openvino-out/examples/openvino/openvino_executor_runner", - f"--model_path={model_name}", + f"--model_path={model_file_name}", f"--input_list_path={inp_list_file}", f"--output_folder_path={out_path}", ] ) # 3: load the outputs and compare with the targets - predictions = [] for i in range(len(input_files)): tensor = np.fromfile(out_path / f"output_{i}_0.raw", dtype=np.float32) - predictions.append(torch.tensor(np.argmax(tensor))) + predictions.append(torch.argmax(torch.tensor(tensor))) acc_top1 = accuracy_score(predictions, targets) print(f"acc@1: {acc_top1}") @@ -207,6 +211,11 @@ def transform(x): help="Input shape for the model as a list or tuple (e.g., [1, 3, 224, 224] or (1, 3, 224, 224)).", ) parser.add_argument("--quantize", action="store_true", help="Enable model quantization.") + parser.add_argument( + "--validate", + action="store_true", + help="Enable model validation. --dataset argument is requred for the validation.", + ) parser.add_argument("--dataset", type=str, help="Path to the calibration dataset.") parser.add_argument( "--device", @@ -219,4 +228,4 @@ def transform(x): # Run the main function with parsed arguments with nncf.torch.disable_patching(): - main(args.suite, args.model, args.input_shape, args.quantize, args.dataset, args.device) + main(args.suite, args.model, args.input_shape, args.quantize, args.validate, args.dataset, args.device) diff --git a/examples/openvino/executor_runner/openvino_executor_runner.cpp b/examples/openvino/executor_runner/openvino_executor_runner.cpp index f9a85c03a53..36c957bc433 100644 --- a/examples/openvino/executor_runner/openvino_executor_runner.cpp +++ b/examples/openvino/executor_runner/openvino_executor_runner.cpp @@ -82,7 +82,6 @@ void dump_outputs(Result &method, const char *output_folder_path, std::ofstream fout(output_file_name.c_str(), std::ios::binary); fout.write(output_tensor.const_data_ptr(), output_tensor.nbytes()); fout.close(); - ET_LOG(Info, "Write outputs to file %s", output_file_name.c_str()); } } @@ -135,8 +134,6 @@ ProcessInputsResult process_inputs(Result &method, method_meta.input_tensor_meta(input_index); auto input_data_ptr = inputs[input_index].toTensor().data_ptr(); - ET_LOG(Info, "Read inputs from file %s", - input_files[input_index].c_str()); std::ifstream fin(input_files[input_index], std::ios::binary); fin.seekg(0, fin.end); size_t file_size = fin.tellg(); From 4b60fb4934d39c683f323e2bd526d422bf39fcd5 Mon Sep 17 00:00:00 2001 From: dlyakhov Date: Tue, 11 Feb 2025 14:56:18 +0100 Subject: [PATCH 051/188] Input shape from the input dataset --- .../openvino/aot/aot_openvino_compiler.py | 30 +++++++++++-------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/examples/openvino/aot/aot_openvino_compiler.py b/examples/openvino/aot/aot_openvino_compiler.py index 3bdaf947a69..e4ef955b40c 100644 --- a/examples/openvino/aot/aot_openvino_compiler.py +++ b/examples/openvino/aot/aot_openvino_compiler.py @@ -88,19 +88,20 @@ def dump_inputs(calibration_dataset, dest_path): def main(suite: str, model_name: str, input_shape, quantize: bool, validate: bool, dataset_path: str, device: str): - # Ensure input_shape is a tuple - if isinstance(input_shape, list): - input_shape = tuple(input_shape) - elif not isinstance(input_shape, tuple): - msg = "Input shape must be a list or tuple." - raise ValueError(msg) - - calibration_dataset = None - # Load the selected model model = load_model(suite, model_name) model = model.eval() + if dataset_path: + calibration_dataset = load_calibration_dataset(dataset_path, suite, model, model_name) + input_shape = tuple(next(iter(calibration_dataset))[0].shape) + print(f"Input shape retrieved from the model config: {input_shape}") + # Ensure input_shape is a tuple + elif isinstance(input_shape, list): + input_shape = tuple(input_shape) + else: + msg = "Input shape must be a list or tuple." + raise ValueError(msg) # Provide input example_args = (torch.randn(*input_shape),) @@ -116,7 +117,6 @@ def main(suite: str, model_name: str, input_shape, quantize: bool, validate: boo if not dataset_path: msg = "Quantization requires a calibration dataset." raise ValueError(msg) - calibration_dataset = load_calibration_dataset(dataset_path, suite, model, model_name) captured_model = aten_dialect.module() quantizer = OpenVINOQuantizer() @@ -154,8 +154,13 @@ def transform(x): print(f"Model exported and saved as {model_file_name} on {device}.") if validate: - if calibration_dataset is None: - calibration_dataset = load_calibration_dataset(dataset_path, suite, model, model_name) + if suite == "huggingface": + msg = f"Validation of {suite} models did not support yet." + raise ValueError(msg) + + if not dataset_path: + msg = "Validateion requires a calibration dataset." + raise ValueError(msg) print("Start validation of the quantized model:") # 1: Dump inputs @@ -207,7 +212,6 @@ def transform(x): parser.add_argument( "--input_shape", type=eval, - required=True, help="Input shape for the model as a list or tuple (e.g., [1, 3, 224, 224] or (1, 3, 224, 224)).", ) parser.add_argument("--quantize", action="store_true", help="Enable model quantization.") From e0cd6448ef57210b2e91f5aa93393b0860371e48 Mon Sep 17 00:00:00 2001 From: dlyakhov Date: Tue, 11 Feb 2025 15:23:27 +0100 Subject: [PATCH 052/188] --batch_size --- .../openvino/aot/aot_openvino_compiler.py | 39 +++++++++++++++---- 1 file changed, 32 insertions(+), 7 deletions(-) diff --git a/examples/openvino/aot/aot_openvino_compiler.py b/examples/openvino/aot/aot_openvino_compiler.py index e4ef955b40c..dba47c0dde3 100644 --- a/examples/openvino/aot/aot_openvino_compiler.py +++ b/examples/openvino/aot/aot_openvino_compiler.py @@ -51,7 +51,7 @@ def load_model(suite: str, model_name: str): raise ValueError(msg) -def load_calibration_dataset(dataset_path: str, suite: str, model: torch.nn.Module, model_name: str): +def load_calibration_dataset(dataset_path: str, batch_size: int, suite: str, model: torch.nn.Module, model_name: str): val_dir = f"{dataset_path}/val" if suite == "torchvision": @@ -62,7 +62,7 @@ def load_calibration_dataset(dataset_path: str, suite: str, model: torch.nn.Modu val_dataset = datasets.ImageFolder(val_dir, transform=transform) calibration_dataset = torch.utils.data.DataLoader( - val_dataset, batch_size=1, shuffle=False, num_workers=0, pin_memory=True + val_dataset, batch_size=batch_size, shuffle=False, num_workers=0, pin_memory=True ) return calibration_dataset @@ -77,7 +77,7 @@ def dump_inputs(calibration_dataset, dest_path): input_files, targets = [], [] for idx, data in enumerate(calibration_dataset): feature, target = data - targets.append(target) + targets.extend(target) file_name = f"{dest_path}/input_{idx}_0.raw" if not isinstance(feature, torch.Tensor): feature = torch.tensor(feature) @@ -87,13 +87,22 @@ def dump_inputs(calibration_dataset, dest_path): return input_files, targets -def main(suite: str, model_name: str, input_shape, quantize: bool, validate: bool, dataset_path: str, device: str): +def main( + suite: str, + model_name: str, + input_shape, + quantize: bool, + validate: bool, + dataset_path: str, + device: str, + batch_size: int, +): # Load the selected model model = load_model(suite, model_name) model = model.eval() if dataset_path: - calibration_dataset = load_calibration_dataset(dataset_path, suite, model, model_name) + calibration_dataset = load_calibration_dataset(dataset_path, batch_size, suite, model, model_name) input_shape = tuple(next(iter(calibration_dataset))[0].shape) print(f"Input shape retrieved from the model config: {input_shape}") # Ensure input_shape is a tuple @@ -192,7 +201,7 @@ def transform(x): predictions = [] for i in range(len(input_files)): tensor = np.fromfile(out_path / f"output_{i}_0.raw", dtype=np.float32) - predictions.append(torch.argmax(torch.tensor(tensor))) + predictions.extend(torch.tensor(tensor).reshape(-1, 1000).argmax(-1)) acc_top1 = accuracy_score(predictions, targets) print(f"acc@1: {acc_top1}") @@ -214,6 +223,13 @@ def transform(x): type=eval, help="Input shape for the model as a list or tuple (e.g., [1, 3, 224, 224] or (1, 3, 224, 224)).", ) + parser.add_argument( + "--batch_size", + type=int, + default=1, + help="Batch size for the validation. Default batch_size == 1." + " The dataset length must be evenly divisible by the batch size.", + ) parser.add_argument("--quantize", action="store_true", help="Enable model quantization.") parser.add_argument( "--validate", @@ -232,4 +248,13 @@ def transform(x): # Run the main function with parsed arguments with nncf.torch.disable_patching(): - main(args.suite, args.model, args.input_shape, args.quantize, args.validate, args.dataset, args.device) + main( + args.suite, + args.model, + args.input_shape, + args.quantize, + args.validate, + args.dataset, + args.device, + args.batch_size, + ) From 2a04ee6a6d27357c71086761e02be2ef66904076 Mon Sep 17 00:00:00 2001 From: dlyakhov Date: Tue, 11 Feb 2025 16:20:53 +0100 Subject: [PATCH 053/188] Adapt subset size to keep +- 300 pics for calibration --- examples/openvino/aot/aot_openvino_compiler.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/examples/openvino/aot/aot_openvino_compiler.py b/examples/openvino/aot/aot_openvino_compiler.py index dba47c0dde3..909eabe3677 100644 --- a/examples/openvino/aot/aot_openvino_compiler.py +++ b/examples/openvino/aot/aot_openvino_compiler.py @@ -135,9 +135,12 @@ def main( def transform(x): return x[0] + default_subset_size = 300 + batch_size = calibration_dataset.batch_size quantized_model = quantize_pt2e( captured_model, quantizer, + subset_size=(default_subset_size // batch_size) + int(default_subset_size % batch_size > 0), calibration_dataset=nncf.Dataset(calibration_dataset, transform_func=transform), fold_quantize=False, ) From db7dc1318c9e54a64ebc54e7f5b5cd1d945e42ac Mon Sep 17 00:00:00 2001 From: Daniil Lyakhov Date: Tue, 11 Feb 2025 16:35:59 +0100 Subject: [PATCH 054/188] Apply suggestions from code review Co-authored-by: Alexander Suslov --- examples/openvino/aot/aot_openvino_compiler.py | 4 +--- examples/openvino/openvino_build_example.sh | 8 +++----- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/examples/openvino/aot/aot_openvino_compiler.py b/examples/openvino/aot/aot_openvino_compiler.py index 909eabe3677..cf41ff318fd 100644 --- a/examples/openvino/aot/aot_openvino_compiler.py +++ b/examples/openvino/aot/aot_openvino_compiler.py @@ -30,8 +30,6 @@ from transformers import AutoModel import nncf -import nncf.experimental -import nncf.experimental.torch from nncf.experimental.torch.fx.quantization.quantize_pt2e import quantize_pt2e @@ -239,7 +237,7 @@ def transform(x): action="store_true", help="Enable model validation. --dataset argument is requred for the validation.", ) - parser.add_argument("--dataset", type=str, help="Path to the calibration dataset.") + parser.add_argument("--dataset", type=str, help="Path to the validation dataset.") parser.add_argument( "--device", type=str, diff --git a/examples/openvino/openvino_build_example.sh b/examples/openvino/openvino_build_example.sh index 52c508d8ee2..a490ff30154 100755 --- a/examples/openvino/openvino_build_example.sh +++ b/examples/openvino/openvino_build_example.sh @@ -34,7 +34,6 @@ main() { local example_dir=examples/openvino local example_build_dir="${build_dir}/${example_dir}" local cmake_prefix_path="${PWD}/${build_dir}/lib/cmake/ExecuTorch;${PWD}/${build_dir}/third-party/gflags;" - rm -rf "${example_build_dir}" ## OpenVINO original @@ -43,11 +42,10 @@ main() { -B"${example_build_dir}" \ $EXECUTORCH_ROOT/$example_dir +<<<<<<< HEAD:examples/openvino/openvino_build_example.sh cmake --build "${example_build_dir}" -j$(nproc) - - # Switch back to the original directory - cd - > /dev/null - +======= + cmake --build "${example_build_dir}" -j5 # Print a success message echo "Build successfully completed." } From de3f50b5d33f79acd28b37be64f7b40de7e04278 Mon Sep 17 00:00:00 2001 From: dlyakhov Date: Tue, 11 Feb 2025 17:17:06 +0100 Subject: [PATCH 055/188] Comments --- examples/openvino/aot/README.md | 46 ++++-- .../openvino/aot/aot_openvino_compiler.py | 146 +++++++++++------- 2 files changed, 125 insertions(+), 67 deletions(-) diff --git a/examples/openvino/aot/README.md b/examples/openvino/aot/README.md index 46e476a8408..5fd97dba21e 100644 --- a/examples/openvino/aot/README.md +++ b/examples/openvino/aot/README.md @@ -11,34 +11,41 @@ python aot_openvino_compiler.py --suite --model --inp ``` ### **Arguments** -- **`--suite`** (required): - Specifies the model suite to use. +- **`--suite`** (required): + Specifies the model suite to use. Supported values: - `timm` (e.g., VGG16, ResNet50) - `torchvision` (e.g., resnet18, mobilenet_v2) - `huggingface` (e.g., bert-base-uncased) -- **`--model`** (required): - Name of the model to export. +- **`--model`** (required): + Name of the model to export. Examples: - For `timm`: `vgg16`, `resnet50` - For `torchvision`: `resnet18`, `mobilenet_v2` - For `huggingface`: `bert-base-uncased`, `distilbert-base-uncased` -- **`--input_shape`** (required): - Input shape for the model. Provide this as a **list** or **tuple**. +- **`--input_shape`**: + Input shape for the model. Provide this as a **list** or **tuple**. Examples: - `[1, 3, 224, 224]` (Zsh users: wrap in quotes) - `(1, 3, 224, 224)` +- **`--batch_size`** : + Batch size for the validation. Default batch_size == 1. + The dataset length must be evenly divisible by the batch size. + - **`--quantize`** (optional): Enable model quantization: Default is False. +- **`--quantize`** (optional): + Enable model validation. --dataset argument is requred for the validation. + - **`--dataset`** (optional): - Path to the calibration dataset. TODO: It is necessary to think in what form to support the dataset. For the experiment, tiny-imagenet is used, which can be downloaded from here http://cs231n.stanford.edu/tiny-imagenet-200.zip and specify the path to it. + Path to the imagenet-like calibration dataset. -- **`--device`** (optional): - Target device for the compiled model. Default is `CPU`. +- **`--device`** (optional) + Target device for the compiled model. Default is `CPU`. Examples: `CPU`, `GPU` @@ -58,22 +65,31 @@ python aot_openvino_compiler.py --suite torchvision --model resnet50 --input_sha ```bash python aot_openvino_compiler.py --suite huggingface --model bert-base-uncased --input_shape "(1, 512)" --device CPU ``` +### Export and validate TIMM Resnet50d model for the CPU +```bash +python aot_openvino_compiler.py --suite timm --model vgg16 --input_shape [1, 3, 224, 224] --device CPU --validate --dataset /path/to/dataset +``` + +### Export, quantize and validate TIMM Resnet50d model for the CPU +```bash +python aot_openvino_compiler.py --suite timm --model vgg16 --input_shape [1, 3, 224, 224] --device CPU --validate --dataset /path/to/dataset --quantize +``` ## **Notes** -1. **Input Shape in Zsh**: +1. **Input Shape in Zsh**: If you are using Zsh, wrap `--input_shape` in quotes or use a tuple: ```bash --input_shape '[1, 3, 224, 224]' --input_shape "(1, 3, 224, 224)" ``` -2. **Model Compatibility**: +2. **Model Compatibility**: Ensure the specified `model_name` exists in the selected `suite`. Use the corresponding library's documentation to verify model availability. -3. **Output File**: +3. **Output File**: The exported model will be saved as `.pte` in the current directory. -4. **Dependencies**: +4. **Dependencies**: - Python 3.8+ - PyTorch - Executorch @@ -82,14 +98,14 @@ python aot_openvino_compiler.py --suite huggingface --model bert-base-uncased -- - Transformers (`pip install transformers`) ## **Error Handling** -- **Model Not Found**: +- **Model Not Found**: If the script raises an error such as: ```bash ValueError: Model not found ``` Verify that the model name is correct for the chosen suite. -- **Unsupported Input Shape**: +- **Unsupported Input Shape**: Ensure `--input_shape` is provided as a valid list or tuple. diff --git a/examples/openvino/aot/aot_openvino_compiler.py b/examples/openvino/aot/aot_openvino_compiler.py index cf41ff318fd..4f45fc1d426 100644 --- a/examples/openvino/aot/aot_openvino_compiler.py +++ b/examples/openvino/aot/aot_openvino_compiler.py @@ -8,6 +8,7 @@ import os import shutil import subprocess +from itertools import islice from pathlib import Path import executorch @@ -24,6 +25,8 @@ from sklearn.metrics import accuracy_score from timm.data import resolve_data_config from timm.data.transforms_factory import create_transform +from torch.ao.quantization.quantize_pt2e import convert_pt2e +from torch.ao.quantization.quantize_pt2e import prepare_pt2e from torch.export import export from torch.export.exported_program import ExportedProgram from torch.fx.passes.graph_drawer import FxGraphDrawer @@ -54,8 +57,11 @@ def load_calibration_dataset(dataset_path: str, batch_size: int, suite: str, mod if suite == "torchvision": transform = torchvision_models.get_model_weights(model_name).DEFAULT.transforms() - else: + elif suite == "timm": transform = create_transform(**resolve_data_config(model.pretrained_cfg, model=model)) + else: + msg = f"Validation is not supported yet for the suite {suite}" + raise ValueError(msg) val_dataset = datasets.ImageFolder(val_dir, transform=transform) @@ -85,6 +91,76 @@ def dump_inputs(calibration_dataset, dest_path): return input_files, targets +def quantize_model( + captured_model: torch.fx.GraphModule, calibration_dataset: torch.utils.data.DataLoader, use_nncf: bool +) -> torch.fx.GraphModule: + quantizer = OpenVINOQuantizer() + + print("PTQ: Quantize the model") + default_subset_size = 300 + batch_size = calibration_dataset.batch_size + subset_size = (default_subset_size // batch_size) + int(default_subset_size % batch_size > 0) + + def transform(x): + return x[0] + + if use_nncf: + + quantized_model = quantize_pt2e( + captured_model, + quantizer, + subset_size=subset_size, + calibration_dataset=nncf.Dataset(calibration_dataset, transform_func=transform), + fold_quantize=False, + ) + else: + annotated_model = prepare_pt2e(captured_model, quantizer) + + print("PTQ: Calibrate the model...") + for data in islice(calibration_dataset, subset_size): + annotated_model(transform(data)) + + print("PTQ: Convert the quantized model...") + quantized_model = convert_pt2e(annotated_model, fold_quantize=False) + + return quantized_model + + +def validate_model(model_file_name: str, calibration_dataset: torch.utils.data.DataLoader) -> float: + # 1: Dump inputs + dest_path = Path("tmp_inputs") + out_path = Path("tmp_outputs") + for d in [dest_path, out_path]: + if os.path.exists(d): + shutil.rmtree(d) + os.makedirs(d) + + input_files, targets = dump_inputs(calibration_dataset, dest_path) + inp_list_file = dest_path / "in_list.txt" + with open(inp_list_file, "w") as f: + f.write("\n".join(input_files) + "\n") + + # 2: Run the executor + print("Run openvino_executor_runner...") + + subprocess.run( + [ + "../../../cmake-openvino-out/examples/openvino/openvino_executor_runner", + f"--model_path={model_file_name}", + f"--input_list_path={inp_list_file}", + f"--output_folder_path={out_path}", + ] + ) + + # 3: load the outputs and compare with the targets + predictions = [] + for i in range(len(input_files)): + tensor = np.fromfile(out_path / f"output_{i}_0.raw", dtype=np.float32) + predictions.extend(torch.tensor(tensor).reshape(-1, 1000).argmax(-1)) + + return accuracy_score(predictions, targets) + + def main( suite: str, model_name: str, @@ -94,6 +170,7 @@ def main( dataset_path: str, device: str, batch_size: int, + quantization_flow: str, ): # Load the selected model model = load_model(suite, model_name) @@ -104,7 +181,7 @@ def main( input_shape = tuple(next(iter(calibration_dataset))[0].shape) print(f"Input shape retrieved from the model config: {input_shape}") # Ensure input_shape is a tuple - elif isinstance(input_shape, list): + elif isinstance(input_shape, (list, tuple)): input_shape = tuple(input_shape) else: msg = "Input shape must be a list or tuple." @@ -124,23 +201,8 @@ def main( if not dataset_path: msg = "Quantization requires a calibration dataset." raise ValueError(msg) - - captured_model = aten_dialect.module() - quantizer = OpenVINOQuantizer() - - print("PTQ: Quantize the model") - - def transform(x): - return x[0] - - default_subset_size = 300 - batch_size = calibration_dataset.batch_size - quantized_model = quantize_pt2e( - captured_model, - quantizer, - subset_size=(default_subset_size // batch_size) + int(default_subset_size % batch_size > 0), - calibration_dataset=nncf.Dataset(calibration_dataset, transform_func=transform), - fold_quantize=False, + quantized_model = quantize_model( + aten_dialect.module(), calibration_dataset, use_nncf=quantization_flow == "nncf" ) visualize_fx_model(quantized_model, f"{model_name}_int8.svg") @@ -172,39 +234,8 @@ def transform(x): msg = "Validateion requires a calibration dataset." raise ValueError(msg) - print("Start validation of the quantized model:") - # 1: Dump inputs - dest_path = Path("tmp_inputs") - out_path = Path("tmp_outputs") - for d in [dest_path, out_path]: - if os.path.exists(d): - shutil.rmtree(d) - os.makedirs(d) - - input_files, targets = dump_inputs(calibration_dataset, dest_path) - inp_list_file = dest_path / "in_list.txt" - with open(inp_list_file, "w") as f: - f.write("\n".join(input_files) + "\n") - - # 2: Run the executor - print("Run openvino_executor_runner...") - - subprocess.run( - [ - "../../../cmake-openvino-out/examples/openvino/openvino_executor_runner", - f"--model_path={model_file_name}", - f"--input_list_path={inp_list_file}", - f"--output_folder_path={out_path}", - ] - ) - - # 3: load the outputs and compare with the targets - predictions = [] - for i in range(len(input_files)): - tensor = np.fromfile(out_path / f"output_{i}_0.raw", dtype=np.float32) - predictions.extend(torch.tensor(tensor).reshape(-1, 1000).argmax(-1)) - - acc_top1 = accuracy_score(predictions, targets) + print("Start validation of the model:") + acc_top1 = validate_model(model_file_name, calibration_dataset) print(f"acc@1: {acc_top1}") @@ -244,10 +275,20 @@ def transform(x): default="CPU", help="Target device for compiling the model (e.g., CPU, GPU). Default is CPU.", ) + parser.add_argument( + "--quantization_flow", + type=str, + choices=["pt2e", "nncf"], + default="nncf", + help="Select the quantization flow (nncf or pt2e):" + " pt2e is the default torch.ao quantization flow, while" + " nncf is a custom method with additional algorithms to improve model performance.", + ) args = parser.parse_args() # Run the main function with parsed arguments + # Disable nncf patching as export of the patched model is not supported. with nncf.torch.disable_patching(): main( args.suite, @@ -258,4 +299,5 @@ def transform(x): args.dataset, args.device, args.batch_size, + args.quantization_flow, ) From 17fe62f001fd731be97b5242d8f41893c144944a Mon Sep 17 00:00:00 2001 From: dlyakhov Date: Tue, 11 Feb 2025 18:02:54 +0100 Subject: [PATCH 056/188] OpenVINOQuantizer: constructor arguments have been refined --- backends/openvino/quantizer/quantizer.py | 71 +++++++++++------------- 1 file changed, 33 insertions(+), 38 deletions(-) diff --git a/backends/openvino/quantizer/quantizer.py b/backends/openvino/quantizer/quantizer.py index 63da8325e4f..8ce1ce6dda1 100644 --- a/backends/openvino/quantizer/quantizer.py +++ b/backends/openvino/quantizer/quantizer.py @@ -5,6 +5,7 @@ # directory of this source tree for more details. from collections import defaultdict +from enum import Enum from typing import Dict, List, Optional, Tuple import torch.fx @@ -20,13 +21,25 @@ import nncf import nncf.common.quantization as q import nncf.experimental.torch.fx as nncf_fx -import nncf.parameters as p -import nncf.quantization.advanced_parameters as advanced_p from nncf.common.graph.graph import NNCFGraph QUANT_ANNOTATION_KEY = "quantization_annotation" +class QuantizationMode(Enum): + """ + Defines special quantization modes. + + - INT8_SYM: INT8 symmetric quantization for both activations and weights. + - INT8_MIXED: INT8 asymmetric quantization for activations, symmetric for weights. + - INT8_TRANSFORMER: Optimized INT8 quantization for transformer-based models + """ + + INT8_SYM = "int8_sym" + INT8_MIXED = "int8_mixed" + INT8_TRANSFORMER = "int8_transformer" + + class OpenVINOQuantizer(Quantizer): """ Implementation of the Torch AO quantizer which annotates models with quantization annotations @@ -36,49 +49,31 @@ class OpenVINOQuantizer(Quantizer): def __init__( self, *, - mode: Optional[p.QuantizationMode] = None, - preset: Optional[q.structs.QuantizationPreset] = None, - target_device: p.TargetDevice = p.TargetDevice.ANY, - transformer_model: bool = False, + mode: Optional[QuantizationMode] = QuantizationMode.INT8_SYM, ignored_scope: Optional[nncf.IgnoredScope] = None, - overflow_fix: Optional[advanced_p.OverflowFix] = None, - quantize_outputs: bool = False, - activations_quantization_params: Optional[advanced_p.QuantizationParameters] = None, - weights_quantization_params: Optional[advanced_p.QuantizationParameters] = None, + **kwargs, ): """ - :param mode: Defines optimization mode for the algorithm. None by default. - :param preset: A preset controls the quantization mode (symmetric and asymmetric). - It can take the following values: - - `performance`: Symmetric quantization of weights and activations. - - `mixed`: Symmetric quantization of weights and asymmetric quantization of activations. - Default value is None. In this case, `mixed` preset is used for `transformer` - model type otherwise `performance`. - :param target_device: A target device the specificity of which will be taken - into account while compressing in order to obtain the best performance - for this type of device, defaults to TargetDevice.ANY. - :param model_type: Model type is needed to specify additional patterns - in the model. Supported only `transformer` now. + :param mode: Defines special quantization modes. + - INT8_SYM: INT8 symmetric quantization for both activations and weights. + - INT8_MIXED: INT8 asymmetric quantization for activations, symmetric for weights. + - INT8_TRANSFORMER: Optimized INT8 quantization for transformer-based models + Default value is INT8_SYM. :param ignored_scope: An ignored scope that defined the list of model control flow graph nodes to be ignored during quantization. - :param overflow_fix: This option controls whether to apply the overflow issue - fix for the 8-bit quantization. - :param quantize_outputs: Whether to insert additional quantizers right before - each of the model outputs. - :param activations_quantization_params: Quantization parameters for model - activations. - :param weights_quantization_params: Quantization parameters for model weights. + :param kwargs: Arguments to pass to the NNCF MinMaxQuantization algorithm. """ + if mode == QuantizationMode.INT8_SYM: + preset = q.structs.QuantizationPreset.PERFORMANCE + model_type = None + elif mode == QuantizationMode.INT8_MIXED: + preset = q.structs.QuantizationPreset.MIXED + model_type = None + else: + preset = None + model_type = nncf.parameters.ModelType.TRANSFORMER self._min_max_algo = nncf.quantization.algorithms.min_max.algorithm.MinMaxQuantization( - mode=mode, - preset=preset, - target_device=target_device, - model_type=p.ModelType.TRANSFORMER if transformer_model else None, - ignored_scope=ignored_scope, - overflow_fix=overflow_fix, - quantize_outputs=quantize_outputs, - activations_quantization_params=activations_quantization_params, - weights_quantization_params=weights_quantization_params, + preset=preset, model_type=model_type, ignored_scope=ignored_scope, **kwargs ) def get_nncf_quantization_setup( From c7e07586a7d639d4f252efdbc77d4768f7ef5278 Mon Sep 17 00:00:00 2001 From: dlyakhov Date: Wed, 12 Feb 2025 15:04:16 +0100 Subject: [PATCH 057/188] set_ignored_scope | readme updates --- backends/openvino/quantizer/quantizer.py | 56 +++++++++++++++------ examples/openvino/aot/README.md | 10 ++-- examples/openvino/openvino_build_example.sh | 7 +-- 3 files changed, 52 insertions(+), 21 deletions(-) diff --git a/backends/openvino/quantizer/quantizer.py b/backends/openvino/quantizer/quantizer.py index 8ce1ce6dda1..480faeee635 100644 --- a/backends/openvino/quantizer/quantizer.py +++ b/backends/openvino/quantizer/quantizer.py @@ -19,7 +19,7 @@ from torch.ao.quantization.quantizer.quantizer import SharedQuantizationSpec import nncf -import nncf.common.quantization as q +import nncf.common.quantization as quantization import nncf.experimental.torch.fx as nncf_fx from nncf.common.graph.graph import NNCFGraph @@ -50,7 +50,6 @@ def __init__( self, *, mode: Optional[QuantizationMode] = QuantizationMode.INT8_SYM, - ignored_scope: Optional[nncf.IgnoredScope] = None, **kwargs, ): """ @@ -59,26 +58,53 @@ def __init__( - INT8_MIXED: INT8 asymmetric quantization for activations, symmetric for weights. - INT8_TRANSFORMER: Optimized INT8 quantization for transformer-based models Default value is INT8_SYM. - :param ignored_scope: An ignored scope that defined the list of model control - flow graph nodes to be ignored during quantization. :param kwargs: Arguments to pass to the NNCF MinMaxQuantization algorithm. """ if mode == QuantizationMode.INT8_SYM: - preset = q.structs.QuantizationPreset.PERFORMANCE + preset = quantization.structs.QuantizationPreset.PERFORMANCE model_type = None elif mode == QuantizationMode.INT8_MIXED: - preset = q.structs.QuantizationPreset.MIXED + preset = quantization.structs.QuantizationPreset.MIXED model_type = None else: preset = None model_type = nncf.parameters.ModelType.TRANSFORMER self._min_max_algo = nncf.quantization.algorithms.min_max.algorithm.MinMaxQuantization( - preset=preset, model_type=model_type, ignored_scope=ignored_scope, **kwargs + preset=preset, model_type=model_type, **kwargs + ) + + def set_ignored_scope( + self, + names: Optional[List[str]] = None, + patterns: Optional[List[str]] = None, + types: Optional[List[str]] = None, + subgraphs: Optional[List[Tuple[List[str], List[str]]]] = None, + validate: bool = True, + ) -> None: + """ + Provides an option to specify portions of model to be excluded from compression. + The ignored scope defines model sub-graphs that should be excluded from the quantization process. + + :param names: List of ignored node names. + :param patterns: List of regular expressions that define patterns for names of ignored nodes. + :param types: List of ignored operation types. + :param subgraphs: List of ignored subgraphs. + :param validate: If set to True, then a RuntimeError will be raised if any ignored scope does not match + in the model graph. + """ + self._min_max_algo.set_ignored_scope( + nncf.IgnoredScope( + names=names or [], + patterns=patterns or [], + types=types or [], + subgraphs=subgraphs or [], + validate=validate, + ) ) def get_nncf_quantization_setup( self, model: torch.fx.GraphModule, nncf_graph: NNCFGraph - ) -> q.quantizer_setup.SingleConfigQuantizerSetup: + ) -> quantization.quantizer_setup.SingleConfigQuantizerSetup: self._min_max_algo._set_backend_entity(model) return self._min_max_algo.find_quantization_setup(model, nncf_graph) @@ -134,7 +160,9 @@ def annotate(self, model: torch.fx.GraphModule) -> torch.fx.GraphModule: @staticmethod def _get_unified_scales_root_quantizer_id( - nncf_graph: NNCFGraph, quantizer_ids: List[int], quantizer_setup: q.quantizer_setup.SingleConfigQuantizerSetup + nncf_graph: NNCFGraph, + quantizer_ids: List[int], + quantizer_setup: quantization.quantizer_setup.SingleConfigQuantizerSetup, ) -> int: """ Identifies the earliest quantizer node ID based on the corresponding `nncf_node.node_id` @@ -160,7 +188,7 @@ def _get_unified_scales_root_quantizer_id( def _get_edge_or_node_and_annotation( graph: torch.fx.Graph, nncf_graph: NNCFGraph, - qp: q.quantizer_setup.QuantizationPointBase, + qp: quantization.quantizer_setup.QuantizationPointBase, node_vs_torch_annotation: Dict[torch.fx.Node, QuantizationAnnotation], ) -> Tuple[EdgeOrNode, QuantizationAnnotation]: """ @@ -181,7 +209,7 @@ def _get_edge_or_node_and_annotation( @staticmethod def _get_edge_or_node( - target_node: torch.fx.Node, qp: q.quantizer_setup.QuantizationPointBase, nncf_graph: NNCFGraph + target_node: torch.fx.Node, qp: quantization.quantizer_setup.QuantizationPointBase, nncf_graph: NNCFGraph ) -> EdgeOrNode: """ Returns the edge or node based on the given target node and quantization point. @@ -231,7 +259,7 @@ def _fill_torch_ao_annotation( annotation_to_update.input_qspec_map[edge_or_node[0]] = qspec @staticmethod - def _get_torch_ao_qspec_from_qp(qp: q.quantizer_setup.QuantizationPointBase) -> QuantizationSpec: + def _get_torch_ao_qspec_from_qp(qp: quantization.quantizer_setup.QuantizationPointBase) -> QuantizationSpec: """ Retrieves the quantization configuration from the given quantization point and converts it into a QuantizationSpec. @@ -247,13 +275,13 @@ def _get_torch_ao_qspec_from_qp(qp: q.quantizer_setup.QuantizationPointBase) -> if qconfig.per_channel: torch_qscheme = ( torch.per_channel_symmetric - if qconfig.mode is q.structs.QuantizationScheme.SYMMETRIC + if qconfig.mode is quantization.structs.QuantizationScheme.SYMMETRIC else torch.per_channel_affine ) else: torch_qscheme = ( torch.per_tensor_symmetric - if qconfig.mode is q.structs.QuantizationScheme.SYMMETRIC + if qconfig.mode is quantization.structs.QuantizationScheme.SYMMETRIC else torch.per_tensor_affine ) if is_weight: diff --git a/examples/openvino/aot/README.md b/examples/openvino/aot/README.md index 5fd97dba21e..900a5b6cbe0 100644 --- a/examples/openvino/aot/README.md +++ b/examples/openvino/aot/README.md @@ -16,7 +16,7 @@ python aot_openvino_compiler.py --suite --model --inp Supported values: - `timm` (e.g., VGG16, ResNet50) - `torchvision` (e.g., resnet18, mobilenet_v2) - - `huggingface` (e.g., bert-base-uncased) + - `huggingface` (e.g., bert-base-uncased). NB: Quantization and validation is not supported yet. - **`--model`** (required): Name of the model to export. @@ -36,10 +36,12 @@ python aot_openvino_compiler.py --suite --model --inp The dataset length must be evenly divisible by the batch size. - **`--quantize`** (optional): - Enable model quantization: Default is False. + Enable model quantization. --dataset argument is requred for the quantization. `huggingface` suite does not supported yet. + + +- **`--validate`** (optional): + Enable model validation. --dataset argument is requred for the validation. `huggingface` suite does not supported yet. -- **`--quantize`** (optional): - Enable model validation. --dataset argument is requred for the validation. - **`--dataset`** (optional): Path to the imagenet-like calibration dataset. diff --git a/examples/openvino/openvino_build_example.sh b/examples/openvino/openvino_build_example.sh index a490ff30154..ee16658941d 100755 --- a/examples/openvino/openvino_build_example.sh +++ b/examples/openvino/openvino_build_example.sh @@ -42,10 +42,11 @@ main() { -B"${example_build_dir}" \ $EXECUTORCH_ROOT/$example_dir -<<<<<<< HEAD:examples/openvino/openvino_build_example.sh cmake --build "${example_build_dir}" -j$(nproc) -======= - cmake --build "${example_build_dir}" -j5 + + # Switch back to the original directory + cd - > /dev/null + # Print a success message echo "Build successfully completed." } From 70a3b2a04ca08162f22597764e6c209d2bb477e5 Mon Sep 17 00:00:00 2001 From: Cavus Mustafa Date: Wed, 12 Feb 2025 14:24:36 -0800 Subject: [PATCH 058/188] Use to_edge_transform_and_lower in aot_openvino_compiler --- examples/openvino/aot/aot_openvino_compiler.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/examples/openvino/aot/aot_openvino_compiler.py b/examples/openvino/aot/aot_openvino_compiler.py index 4674fbbd755..242a4fa1532 100644 --- a/examples/openvino/aot/aot_openvino_compiler.py +++ b/examples/openvino/aot/aot_openvino_compiler.py @@ -12,7 +12,7 @@ from executorch.exir.backend.backend_details import CompileSpec from executorch.backends.openvino.preprocess import OpenvinoBackend from executorch.backends.openvino.partitioner import OpenvinoPartitioner -from executorch.exir import EdgeProgramManager, to_edge +from executorch.exir import EdgeProgramManager, to_edge_transform_and_lower from torch.export import export, ExportedProgram from torch.export.exported_program import ExportedProgram import argparse @@ -47,13 +47,9 @@ def main(suite: str, model_name: str, input_shape, device: str): # Export to aten dialect using torch.export aten_dialect: ExportedProgram = export(model, example_args) - # Convert to edge dialect - edge_program: EdgeProgramManager = to_edge(aten_dialect) - to_be_lowered_module = edge_program.exported_program() - - # Lower the module to the backend with a custom partitioner + # Convert to edge dialect and lower the module to the backend with a custom partitioner compile_spec = [CompileSpec("device", device.encode())] - lowered_module = edge_program.to_backend(OpenvinoPartitioner(compile_spec)) + lowered_module: EdgeProgramManager = to_edge_transform_and_lower(aten_dialect, partitioner=[OpenvinoPartitioner(compile_spec),]) # Apply backend-specific passes exec_prog = lowered_module.to_executorch(config=executorch.exir.ExecutorchBackendConfig()) From 5ea37c7ef5b7268277e11cb9d07dad447b252e37 Mon Sep 17 00:00:00 2001 From: Cavus Mustafa Date: Wed, 12 Feb 2025 14:26:18 -0800 Subject: [PATCH 059/188] Fix input tensor file reading bug --- .../executor_runner/openvino_executor_runner.cpp | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/examples/openvino/executor_runner/openvino_executor_runner.cpp b/examples/openvino/executor_runner/openvino_executor_runner.cpp index 7615b63649a..1c58a49fbb0 100644 --- a/examples/openvino/executor_runner/openvino_executor_runner.cpp +++ b/examples/openvino/executor_runner/openvino_executor_runner.cpp @@ -175,6 +175,11 @@ int main(int argc, char** argv) { // iterate each raw input tensor file to read values std::ifstream input_list(input_list_path); if (input_list.is_open()) { + std::string inputs_dir = ""; + size_t last_pos = std::string(input_list_path).rfind('/'); + if (last_pos != std::string::npos) { + inputs_dir = std::string(input_list_path).substr(0, last_pos+1); + } size_t num_inputs = method->inputs_size(); std::string file_path; while (std::getline(input_list, file_path)) { @@ -188,7 +193,12 @@ int main(int argc, char** argv) { method_meta.input_tensor_meta(input_index); auto input_data_ptr = inputs[input_index].toTensor().data_ptr(); - std::ifstream fin(input_files[input_index], std::ios::binary); + std::ifstream fin(inputs_dir+input_files[input_index], std::ios::binary); + if (!(fin.good())) { + ET_CHECK_MSG(false, + "Failed to read input tensor file: %s", + inputs_dir+input_files[input_index]); + } fin.seekg(0, fin.end); size_t file_size = fin.tellg(); From 6beadb3c63ee7574abaa3d8c1e7965d84d761de8 Mon Sep 17 00:00:00 2001 From: Cavus Mustafa Date: Wed, 12 Feb 2025 14:27:16 -0800 Subject: [PATCH 060/188] Update unit tests --- backends/openvino/tests/ops/base_openvino_op_test.py | 3 +-- backends/openvino/tests/test_openvino_delegate.py | 6 +++++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/backends/openvino/tests/ops/base_openvino_op_test.py b/backends/openvino/tests/ops/base_openvino_op_test.py index a51b99e8eca..88fafc25fe4 100644 --- a/backends/openvino/tests/ops/base_openvino_op_test.py +++ b/backends/openvino/tests/ops/base_openvino_op_test.py @@ -97,13 +97,12 @@ def execute_layer_test( stdout=subprocess.PIPE, stderr=subprocess.STDOUT, env=env, - cwd=tmp_dir, ) stdout_str = proc.stdout.decode('utf-8') # Check if execution completed successfully - self.assertIn("Model executed successfully.", stdout_str) + self.assertTrue(proc.returncode == 0) # Read the outputs from the temporary files output_dir = f"{tmp_dir}/outputs" diff --git a/backends/openvino/tests/test_openvino_delegate.py b/backends/openvino/tests/test_openvino_delegate.py index eaabcf2603b..89763d1d960 100644 --- a/backends/openvino/tests/test_openvino_delegate.py +++ b/backends/openvino/tests/test_openvino_delegate.py @@ -71,4 +71,8 @@ def parse_arguments(): # Discover all existing op tests in "ops" folder suite = loader.discover(test_params['test_type'], pattern=test_params['pattern']) # Start running tests - unittest.TextTestRunner().run(suite) + result = unittest.TextTestRunner().run(suite) + if result.wasSuccessful(): + print("OpenVINO backend tests completed successfully") + else: + print("OpenVINO backend tests completed with failures") From 4dbe4444baa7eb27fb1ee24f164606d4f2e12008 Mon Sep 17 00:00:00 2001 From: Yamini Nimmagadda Date: Wed, 12 Feb 2025 14:41:07 -0800 Subject: [PATCH 061/188] Update build-run-openvino.md --- docs/source/build-run-openvino.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/source/build-run-openvino.md b/docs/source/build-run-openvino.md index 08882c448a7..4cc15b05edb 100644 --- a/docs/source/build-run-openvino.md +++ b/docs/source/build-run-openvino.md @@ -77,10 +77,10 @@ cd openvino git submodule update --init --recursive mkdir build cd build -cmake .. -DCMAKE_BUILD_TYPE=Release -DENABLE_PYTHON=ON -DENABLE_WHEEL=ON +cmake .. -DCMAKE_BUILD_TYPE=Release -DENABLE_PYTHON=ON make -j -cd ../.. +cd .. cmake --install build --prefix cd source setupvars.sh From 0c07b701aca0871cb3db455eb5ef7363042364d9 Mon Sep 17 00:00:00 2001 From: Cavus Mustafa Date: Wed, 12 Feb 2025 15:59:06 -0800 Subject: [PATCH 062/188] temp changes for debugging --- .../tests/ops/base_openvino_op_test.py | 4 +++- .../openvino_executor_runner.cpp | 24 +++++++++++++++++++ 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/backends/openvino/tests/ops/base_openvino_op_test.py b/backends/openvino/tests/ops/base_openvino_op_test.py index 88fafc25fe4..57b58c4ba32 100644 --- a/backends/openvino/tests/ops/base_openvino_op_test.py +++ b/backends/openvino/tests/ops/base_openvino_op_test.py @@ -77,7 +77,7 @@ def execute_layer_test( exec_prog.write_to_file(file) # Save inputs into a temporary file - self.generate_inputs(tmp_dir, "input_list.txt", [sample_inputs], input_list) + #self.generate_inputs(tmp_dir, "input_list.txt", [sample_inputs], input_list) self.make_output_dir(output_dir) # Start a subprocess to execute model with openvino_executor_runner @@ -100,6 +100,8 @@ def execute_layer_test( ) stdout_str = proc.stdout.decode('utf-8') + print("STDOUT:") + print(stdout_str) # Check if execution completed successfully self.assertTrue(proc.returncode == 0) diff --git a/examples/openvino/executor_runner/openvino_executor_runner.cpp b/examples/openvino/executor_runner/openvino_executor_runner.cpp index 1c58a49fbb0..a9cf4106149 100644 --- a/examples/openvino/executor_runner/openvino_executor_runner.cpp +++ b/examples/openvino/executor_runner/openvino_executor_runner.cpp @@ -80,6 +80,7 @@ int main(int argc, char** argv) { // Load the model using FileDataLoader Result loader = FileDataLoader::from(model_path); + std::cout << "AAA - 1" << std::endl; ET_CHECK_MSG( loader.ok(), "FileDataLoader::from() failed: 0x%" PRIx32, @@ -88,22 +89,27 @@ int main(int argc, char** argv) { // Load the program from the loaded model Result program = Program::load(&loader.get()); if (!program.ok()) { + std::cout << "AAA - 2" << std::endl; ET_LOG(Error, "Failed to parse model file %s", model_path); return 1; } + std::cout << "AAA - 3" << std::endl; ET_LOG(Info, "Model file %s is loaded.", model_path); // Retrieve the method name from the program (assumes the first method is used) const char* method_name = nullptr; { const auto method_name_result = program->get_method_name(0); + std::cout << "AAA - 4" << std::endl; ET_CHECK_MSG(method_name_result.ok(), "Program has no methods"); method_name = *method_name_result; } + std::cout << "AAA - 5" << std::endl; ET_LOG(Info, "Using method %s", method_name); // Retrieve metadata about the method Result method_meta = program->method_meta(method_name); + std::cout << "AAA - 6" << std::endl; ET_CHECK_MSG( method_meta.ok(), "Failed to get method_meta for %s: 0x%" PRIx32, @@ -121,6 +127,7 @@ int main(int argc, char** argv) { for (size_t id = 0; id < num_memory_planned_buffers; ++id) { size_t buffer_size = static_cast(method_meta->memory_planned_buffer_size(id).get()); + std::cout << "AAA - 7" << std::endl; ET_LOG(Info, "Setting up planned buffer %zu, size %zu.", id, buffer_size); planned_buffers.push_back(std::make_unique(buffer_size)); planned_spans.push_back({planned_buffers.back().get(), buffer_size}); @@ -133,15 +140,18 @@ int main(int argc, char** argv) { // Load the method into the program Result method = program->load_method(method_name, &memory_manager); + std::cout << "AAA - 8" << std::endl; ET_CHECK_MSG( method.ok(), "Loading of method %s failed with status 0x%" PRIx32, method_name, static_cast(method.error())); + std::cout << "AAA - 9" << std::endl; ET_LOG(Info, "Method loaded."); // Prepare the input tensors for the method auto inputs = prepare_input_tensors(*method); + std::cout << "AAA - 10" << std::endl; ET_CHECK_MSG( inputs.ok(), "Could not prepare inputs: 0x%" PRIx32, @@ -150,11 +160,14 @@ int main(int argc, char** argv) { // If the input path list is provided, read input tensors from the files if (!(FLAGS_input_list_path.empty())) { const char* input_list_path = FLAGS_input_list_path.c_str(); + std::cout << "AAA - 11" << std::endl; ET_LOG(Info, "Loading input tensors from the list provided in %s.", input_list_path); Error status = Error::Ok; std::vector inputs(method->inputs_size()); + std::cout << "AAA - 12" << std::endl; ET_LOG(Info, "%zu inputs: ", inputs.size()); status = method->get_inputs(inputs.data(), inputs.size()); + std::cout << "AAA - 13" << std::endl; ET_CHECK(status == Error::Ok); auto split = [](std::string s, std::string delimiter) { @@ -195,6 +208,7 @@ int main(int argc, char** argv) { std::ifstream fin(inputs_dir+input_files[input_index], std::ios::binary); if (!(fin.good())) { + std::cout << "AAA - 14" << std::endl; ET_CHECK_MSG(false, "Failed to read input tensor file: %s", inputs_dir+input_files[input_index]); @@ -202,6 +216,7 @@ int main(int argc, char** argv) { fin.seekg(0, fin.end); size_t file_size = fin.tellg(); + std::cout << "AAA - 15" << std::endl; ET_CHECK_MSG( file_size == tensor_meta->nbytes(), "Input(%d) size mismatch. file bytes: %zu, tensor bytes: %zu", @@ -217,11 +232,13 @@ int main(int argc, char** argv) { } } } else { + std::cout << "AAA - 16" << std::endl; ET_CHECK_MSG(false, "Failed to read input list file: %s", input_list_path); } } + std::cout << "AAA - 17" << std::endl; ET_LOG(Info, "Inputs prepared."); // Measure execution time for inference @@ -236,29 +253,35 @@ int main(int argc, char** argv) { .count() / 1000.0; // Log execution time and average time per iteration + std::cout << "AAA - 18" << std::endl; ET_LOG( Info, "%d inference took %f ms, avg %f ms", num_iterations, elapsed_time, elapsed_time / static_cast(num_iterations)); + std::cout << "AAA - 19" << std::endl; ET_CHECK_MSG( status == Error::Ok, "Execution of method %s failed with status 0x%" PRIx32, method_name, static_cast(status)); + std::cout << "AAA - 20" << std::endl; ET_LOG(Info, "Model executed successfully."); // Retrieve and print the method outputs std::vector outputs(method->outputs_size()); + std::cout << "AAA - 21" << std::endl; ET_LOG(Info, "%zu Number of outputs: ", outputs.size()); status = method->get_outputs(outputs.data(), outputs.size()); + std::cout << "AAA - 22" << std::endl; ET_CHECK(status == Error::Ok); // If output folder path is provided, save output tensors // into raw tensor files. if (!(FLAGS_output_folder_path.empty())) { const char* output_folder_path = FLAGS_output_folder_path.c_str(); + std::cout << "AAA - 23" << std::endl; ET_LOG(Info, "Saving output tensors into the output folder: %s.", output_folder_path); for (size_t output_index = 0; output_index < method->outputs_size(); output_index++) { @@ -271,6 +294,7 @@ int main(int argc, char** argv) { fout.close(); } } + std::cout << "AAA - 24" << std::endl; return 0; } From b0862027bdc74f0986b98e726c2d19cbab7227af Mon Sep 17 00:00:00 2001 From: Mustafa Cavus Date: Wed, 12 Feb 2025 17:28:45 -0800 Subject: [PATCH 063/188] Initial document for openvino backend tests --- backends/openvino/tests/README.md | 52 +++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) create mode 100644 backends/openvino/tests/README.md diff --git a/backends/openvino/tests/README.md b/backends/openvino/tests/README.md new file mode 100644 index 00000000000..964357c58e0 --- /dev/null +++ b/backends/openvino/tests/README.md @@ -0,0 +1,52 @@ +# Unit Tests for OpenVINO Backend + +## Directory Structure + +Below is the layout of the `backends/openvino/tests` directory, which includes the necessary files for the example applications: + +``` +backends/openvino/tests +├── ops # Directory with base op test script and individual op tests. + ├── base_openvino_op_test.py # Script which contains the base class for all op tests. + └── test_.py # Individual op tests scripts. +├── models # Directory with model test scripts. + └── test_classification.py # Test script for classification models. +├── README.md # Documentation for unit tests (this file) +└── test_openvino_delegate.py # Script to execute unit tests. +``` + +## Executing Unit Tests + +### Prerequisites + +Before you begin, refer to instructions provided in [OpenVINO Backend for ExecuTorch](./README.md) to install openvino and setup executorch environment. +Once openvino is installed and executorch environment is set, refer to [asdfasdf](asdfasdf) to build openvino_example_runner. + +### Usage + +test_openvino_delegate.py allows to run op or model tests for openvino backend. + +### **Arguments** +- **`--build_folder`** (required): + Path to cmake binary directory. (Refer to [asdf](asdf)) + Examples: + - `../../../cmake-openvino-out` (Relative path from `backends/openvino/tests` directory) + - `/cmake-openvino-out` (Absolute path to the default build folder) + +- **`--test_type`** (optional): + Type of the tests to run. + Supported values: + - `ops` (default) + - `models` + +- **`--pattern`** (optional): + Pattern to match test files. Provide complete file name to run individual tests. The default value is `test_*.py` + Examples: + - `test_convolution.py` (Assuming `--test_type` parameter is provided as `ops`, this will run only convolution tests) + - `test_add*.py` (Assuming `--test_type` parameter is provided as `ops`, this will run add and addmm op tests) + +- **`--device`** (optional): + Target device to compile and run tests. Default is `CPU`. + Examples: `CPU`, `GPU` + + From cb91596c511dedeb3754896e22a36783eeb1546d Mon Sep 17 00:00:00 2001 From: Mustafa Cavus Date: Wed, 12 Feb 2025 17:38:39 -0800 Subject: [PATCH 064/188] Update openvino backend test documentation --- backends/openvino/tests/README.md | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/backends/openvino/tests/README.md b/backends/openvino/tests/README.md index 964357c58e0..61e959f2db1 100644 --- a/backends/openvino/tests/README.md +++ b/backends/openvino/tests/README.md @@ -19,8 +19,8 @@ backends/openvino/tests ### Prerequisites -Before you begin, refer to instructions provided in [OpenVINO Backend for ExecuTorch](./README.md) to install openvino and setup executorch environment. -Once openvino is installed and executorch environment is set, refer to [asdfasdf](asdfasdf) to build openvino_example_runner. +Before you begin, refer to instructions provided in [OpenVINO Backend for ExecuTorch](../README.md) to install openvino and setup executorch environment. +Once openvino is installed and executorch environment is set, refer to [OpenVINO Backend Examples](../../../examples/openvino/README.md) to build openvino_example_runner. ### Usage @@ -28,7 +28,7 @@ test_openvino_delegate.py allows to run op or model tests for openvino backend. ### **Arguments** - **`--build_folder`** (required): - Path to cmake binary directory. (Refer to [asdf](asdf)) + Path to cmake binary directory. (Refer to [OpenVINO Backend Examples](../../../examples/openvino/README.md)) Examples: - `../../../cmake-openvino-out` (Relative path from `backends/openvino/tests` directory) - `/cmake-openvino-out` (Absolute path to the default build folder) @@ -50,3 +50,18 @@ test_openvino_delegate.py allows to run op or model tests for openvino backend. Examples: `CPU`, `GPU` +## **Examples** + +### Execute Tests for All Ops on CPU +```bash +python test_openvino_delegate.py --build_folder ../../../cmake-openvino-out --device CPU --test_type ops +``` + +### Execute Convolution Op Tests on CPU +```bash +python test_openvino_delegate.py --build_folder ../../../cmake-openvino-out --device CPU --test_type ops --pattern test_convolution.py +``` + +### Execute Tests for all Models on GPU +```bash +python test_openvino_delegate.py --build_folder ../../../cmake-openvino-out --device GPU --test_type models From d7567277978ab4b49461e72e7590a11bf0686be0 Mon Sep 17 00:00:00 2001 From: Mustafa Cavus Date: Wed, 12 Feb 2025 17:39:46 -0800 Subject: [PATCH 065/188] Update README.md --- backends/openvino/tests/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backends/openvino/tests/README.md b/backends/openvino/tests/README.md index 61e959f2db1..e573408d197 100644 --- a/backends/openvino/tests/README.md +++ b/backends/openvino/tests/README.md @@ -24,7 +24,7 @@ Once openvino is installed and executorch environment is set, refer to [OpenVINO ### Usage -test_openvino_delegate.py allows to run op or model tests for openvino backend. +`test_openvino_delegate.py` allows to run op or model tests for openvino backend. ### **Arguments** - **`--build_folder`** (required): From 09b2dbdd950da8ce777ffd15ed1ef8fd50f203dd Mon Sep 17 00:00:00 2001 From: Cavus Mustafa Date: Wed, 12 Feb 2025 18:04:39 -0800 Subject: [PATCH 066/188] Removed debugging lines --- .../tests/ops/base_openvino_op_test.py | 4 ---- .../openvino_executor_runner.cpp | 24 ------------------- 2 files changed, 28 deletions(-) diff --git a/backends/openvino/tests/ops/base_openvino_op_test.py b/backends/openvino/tests/ops/base_openvino_op_test.py index 57b58c4ba32..5ee04734918 100644 --- a/backends/openvino/tests/ops/base_openvino_op_test.py +++ b/backends/openvino/tests/ops/base_openvino_op_test.py @@ -99,10 +99,6 @@ def execute_layer_test( env=env, ) - stdout_str = proc.stdout.decode('utf-8') - print("STDOUT:") - print(stdout_str) - # Check if execution completed successfully self.assertTrue(proc.returncode == 0) diff --git a/examples/openvino/executor_runner/openvino_executor_runner.cpp b/examples/openvino/executor_runner/openvino_executor_runner.cpp index a9cf4106149..1c58a49fbb0 100644 --- a/examples/openvino/executor_runner/openvino_executor_runner.cpp +++ b/examples/openvino/executor_runner/openvino_executor_runner.cpp @@ -80,7 +80,6 @@ int main(int argc, char** argv) { // Load the model using FileDataLoader Result loader = FileDataLoader::from(model_path); - std::cout << "AAA - 1" << std::endl; ET_CHECK_MSG( loader.ok(), "FileDataLoader::from() failed: 0x%" PRIx32, @@ -89,27 +88,22 @@ int main(int argc, char** argv) { // Load the program from the loaded model Result program = Program::load(&loader.get()); if (!program.ok()) { - std::cout << "AAA - 2" << std::endl; ET_LOG(Error, "Failed to parse model file %s", model_path); return 1; } - std::cout << "AAA - 3" << std::endl; ET_LOG(Info, "Model file %s is loaded.", model_path); // Retrieve the method name from the program (assumes the first method is used) const char* method_name = nullptr; { const auto method_name_result = program->get_method_name(0); - std::cout << "AAA - 4" << std::endl; ET_CHECK_MSG(method_name_result.ok(), "Program has no methods"); method_name = *method_name_result; } - std::cout << "AAA - 5" << std::endl; ET_LOG(Info, "Using method %s", method_name); // Retrieve metadata about the method Result method_meta = program->method_meta(method_name); - std::cout << "AAA - 6" << std::endl; ET_CHECK_MSG( method_meta.ok(), "Failed to get method_meta for %s: 0x%" PRIx32, @@ -127,7 +121,6 @@ int main(int argc, char** argv) { for (size_t id = 0; id < num_memory_planned_buffers; ++id) { size_t buffer_size = static_cast(method_meta->memory_planned_buffer_size(id).get()); - std::cout << "AAA - 7" << std::endl; ET_LOG(Info, "Setting up planned buffer %zu, size %zu.", id, buffer_size); planned_buffers.push_back(std::make_unique(buffer_size)); planned_spans.push_back({planned_buffers.back().get(), buffer_size}); @@ -140,18 +133,15 @@ int main(int argc, char** argv) { // Load the method into the program Result method = program->load_method(method_name, &memory_manager); - std::cout << "AAA - 8" << std::endl; ET_CHECK_MSG( method.ok(), "Loading of method %s failed with status 0x%" PRIx32, method_name, static_cast(method.error())); - std::cout << "AAA - 9" << std::endl; ET_LOG(Info, "Method loaded."); // Prepare the input tensors for the method auto inputs = prepare_input_tensors(*method); - std::cout << "AAA - 10" << std::endl; ET_CHECK_MSG( inputs.ok(), "Could not prepare inputs: 0x%" PRIx32, @@ -160,14 +150,11 @@ int main(int argc, char** argv) { // If the input path list is provided, read input tensors from the files if (!(FLAGS_input_list_path.empty())) { const char* input_list_path = FLAGS_input_list_path.c_str(); - std::cout << "AAA - 11" << std::endl; ET_LOG(Info, "Loading input tensors from the list provided in %s.", input_list_path); Error status = Error::Ok; std::vector inputs(method->inputs_size()); - std::cout << "AAA - 12" << std::endl; ET_LOG(Info, "%zu inputs: ", inputs.size()); status = method->get_inputs(inputs.data(), inputs.size()); - std::cout << "AAA - 13" << std::endl; ET_CHECK(status == Error::Ok); auto split = [](std::string s, std::string delimiter) { @@ -208,7 +195,6 @@ int main(int argc, char** argv) { std::ifstream fin(inputs_dir+input_files[input_index], std::ios::binary); if (!(fin.good())) { - std::cout << "AAA - 14" << std::endl; ET_CHECK_MSG(false, "Failed to read input tensor file: %s", inputs_dir+input_files[input_index]); @@ -216,7 +202,6 @@ int main(int argc, char** argv) { fin.seekg(0, fin.end); size_t file_size = fin.tellg(); - std::cout << "AAA - 15" << std::endl; ET_CHECK_MSG( file_size == tensor_meta->nbytes(), "Input(%d) size mismatch. file bytes: %zu, tensor bytes: %zu", @@ -232,13 +217,11 @@ int main(int argc, char** argv) { } } } else { - std::cout << "AAA - 16" << std::endl; ET_CHECK_MSG(false, "Failed to read input list file: %s", input_list_path); } } - std::cout << "AAA - 17" << std::endl; ET_LOG(Info, "Inputs prepared."); // Measure execution time for inference @@ -253,35 +236,29 @@ int main(int argc, char** argv) { .count() / 1000.0; // Log execution time and average time per iteration - std::cout << "AAA - 18" << std::endl; ET_LOG( Info, "%d inference took %f ms, avg %f ms", num_iterations, elapsed_time, elapsed_time / static_cast(num_iterations)); - std::cout << "AAA - 19" << std::endl; ET_CHECK_MSG( status == Error::Ok, "Execution of method %s failed with status 0x%" PRIx32, method_name, static_cast(status)); - std::cout << "AAA - 20" << std::endl; ET_LOG(Info, "Model executed successfully."); // Retrieve and print the method outputs std::vector outputs(method->outputs_size()); - std::cout << "AAA - 21" << std::endl; ET_LOG(Info, "%zu Number of outputs: ", outputs.size()); status = method->get_outputs(outputs.data(), outputs.size()); - std::cout << "AAA - 22" << std::endl; ET_CHECK(status == Error::Ok); // If output folder path is provided, save output tensors // into raw tensor files. if (!(FLAGS_output_folder_path.empty())) { const char* output_folder_path = FLAGS_output_folder_path.c_str(); - std::cout << "AAA - 23" << std::endl; ET_LOG(Info, "Saving output tensors into the output folder: %s.", output_folder_path); for (size_t output_index = 0; output_index < method->outputs_size(); output_index++) { @@ -294,7 +271,6 @@ int main(int argc, char** argv) { fout.close(); } } - std::cout << "AAA - 24" << std::endl; return 0; } From 49530338ca23c7434fc91460932d6ce36cc00f7a Mon Sep 17 00:00:00 2001 From: Cavus Mustafa Date: Wed, 12 Feb 2025 18:08:55 -0800 Subject: [PATCH 067/188] Removed comment which was added for debugging --- backends/openvino/tests/ops/base_openvino_op_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backends/openvino/tests/ops/base_openvino_op_test.py b/backends/openvino/tests/ops/base_openvino_op_test.py index 5ee04734918..46c0b63fc37 100644 --- a/backends/openvino/tests/ops/base_openvino_op_test.py +++ b/backends/openvino/tests/ops/base_openvino_op_test.py @@ -77,7 +77,7 @@ def execute_layer_test( exec_prog.write_to_file(file) # Save inputs into a temporary file - #self.generate_inputs(tmp_dir, "input_list.txt", [sample_inputs], input_list) + self.generate_inputs(tmp_dir, "input_list.txt", [sample_inputs], input_list) self.make_output_dir(output_dir) # Start a subprocess to execute model with openvino_executor_runner From 19cbc69adbb6310f266b9f6bdfaeb47e6eeb18ff Mon Sep 17 00:00:00 2001 From: dlyakhov Date: Fri, 14 Feb 2025 14:10:02 +0100 Subject: [PATCH 068/188] openvino_executor_runner.cpp: comments --- .../openvino_executor_runner.cpp | 210 +++++++++--------- 1 file changed, 108 insertions(+), 102 deletions(-) diff --git a/examples/openvino/executor_runner/openvino_executor_runner.cpp b/examples/openvino/executor_runner/openvino_executor_runner.cpp index 36c957bc433..c3922c793a3 100644 --- a/examples/openvino/executor_runner/openvino_executor_runner.cpp +++ b/examples/openvino/executor_runner/openvino_executor_runner.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -52,57 +53,54 @@ using executorch::runtime::Result; using executorch::runtime::Span; using executorch::runtime::TensorInfo; -std::pair benchmark_method(Result &method, - int num_iterations) { - Error status = Error::Ok; - auto before_exec = std::chrono::high_resolution_clock::now(); - for (int i = 0; i < num_iterations; ++i) { - status = method->execute(); - } - auto after_exec = std::chrono::high_resolution_clock::now(); - double elapsed_time = std::chrono::duration_cast( - after_exec - before_exec) - .count() / - 1000.0; - return std::make_pair(elapsed_time, status); +std::function build_set_input_tensor( + Result &method, std::vector &inputs, + const std::vector> input_paths) { + return [&inputs, &method, input_paths](size_t idx) -> void { + const MethodMeta method_meta = method->method_meta(); + for (int input_index = 0; input_index < method->inputs_size(); + ++input_index) { + + Result tensor_meta = + method_meta.input_tensor_meta(input_index); + auto input_data_ptr = inputs[input_index].toTensor().data_ptr(); + + std::ifstream fin(input_paths[idx][input_index], std::ios::binary); + fin.seekg(0, fin.end); + size_t file_size = fin.tellg(); + + ET_CHECK_MSG( + file_size == tensor_meta->nbytes(), + "Input(%d) size mismatch. file bytes: %zu, tensor bytes: %zu", + input_index, file_size, tensor_meta->nbytes()); + + fin.seekg(0, fin.beg); + fin.read(static_cast(input_data_ptr), file_size); + fin.close(); + } + }; } -void dump_outputs(Result &method, const char *output_folder_path, - size_t index = 0) { - std::vector outputs(method->outputs_size()); - Error status = Error::Ok; - status = method->get_outputs(outputs.data(), outputs.size()); - ET_CHECK(status == Error::Ok); - for (size_t output_index = 0; output_index < method->outputs_size(); - output_index++) { - auto output_tensor = outputs[output_index].toTensor(); - auto output_file_name = std::string(output_folder_path) + "/output_" + - std::to_string(index) + "_" + - std::to_string(output_index) + ".raw"; - std::ofstream fout(output_file_name.c_str(), std::ios::binary); - fout.write(output_tensor.const_data_ptr(), output_tensor.nbytes()); - fout.close(); - } +std::function +build_dump_outputs(std::vector &outputs, const size_t output_size, + const std::string output_folder_path) { + return [&outputs, output_folder_path, output_size](size_t idx) -> void { + for (size_t output_index = 0; output_index < output_size; output_index++) { + auto output_tensor = outputs[output_index].toTensor(); + auto output_file_name = output_folder_path + "/output_" + + std::to_string(idx) + "_" + + std::to_string(output_index) + ".raw"; + std::ofstream fout(output_file_name.c_str(), std::ios::binary); + fout.write(output_tensor.const_data_ptr(), output_tensor.nbytes()); + fout.close(); + } + }; } -struct ProcessInputsResult { - double total_time; - size_t num_iter; - Error status; -}; - -ProcessInputsResult process_inputs(Result &method, - const char *input_list_path, - const char *output_folder_path) { - std::vector inputs(method->inputs_size()); - ET_LOG(Info, "%zu inputs: ", inputs.size()); - double total_time_elapsed = 0.; +std::vector> +get_inputs_paths(const char *input_list_path) { size_t idx = 0; - Error status = Error::Ok; - status = method->get_inputs(inputs.data(), inputs.size()); - ET_CHECK(status == Error::Ok); - auto split = [](std::string s, std::string delimiter) { size_t pos_start = 0, pos_end, delim_len = delimiter.length(); std::string token; @@ -120,45 +118,19 @@ ProcessInputsResult process_inputs(Result &method, // Read raw input tensor file names from input list file and // iterate each raw input tensor file to read values std::ifstream input_list(input_list_path); - if (input_list.is_open()) { - size_t num_inputs = method->inputs_size(); - std::string file_path; - while (std::getline(input_list, file_path)) { - auto input_files = split(file_path, " "); - if (input_files.size() == 0) { - break; - } - for (int input_index = 0; input_index < num_inputs; ++input_index) { - MethodMeta method_meta = method->method_meta(); - Result tensor_meta = - method_meta.input_tensor_meta(input_index); - auto input_data_ptr = inputs[input_index].toTensor().data_ptr(); - - std::ifstream fin(input_files[input_index], std::ios::binary); - fin.seekg(0, fin.end); - size_t file_size = fin.tellg(); - - ET_CHECK_MSG( - file_size == tensor_meta->nbytes(), - "Input(%d) size mismatch. file bytes: %zu, tensor bytes: %zu", - input_index, file_size, tensor_meta->nbytes()); - - fin.seekg(0, fin.beg); - fin.read(static_cast(input_data_ptr), file_size); - fin.close(); - } - double time_elapsed; - std::tie(time_elapsed, status) = benchmark_method(method, 1); - if (status != Error::Ok) { - return {total_time_elapsed, idx, status}; - } - total_time_elapsed += time_elapsed; - dump_outputs(method, output_folder_path, idx++); - } - } else { + if (!input_list.is_open()) { ET_CHECK_MSG(false, "Failed to read input list file: %s", input_list_path); } - return {total_time_elapsed, idx, status}; + std::string file_path; + auto retval = std::vector>(); + while (std::getline(input_list, file_path)) { + auto input_files = split(file_path, " "); + if (input_files.size() == 0) { + break; + } + retval.push_back(input_files); + } + return retval; } int main(int argc, char **argv) { @@ -240,43 +212,77 @@ int main(int argc, char **argv) { ET_LOG(Info, "Method loaded."); // Prepare the input tensors for the method - auto inputs = prepare_input_tensors(*method); - ET_CHECK_MSG(inputs.ok(), "Could not prepare inputs: 0x%" PRIx32, - static_cast(inputs.error())); + auto method_inputs = prepare_input_tensors(*method); + ET_CHECK_MSG(method_inputs.ok(), "Could not prepare inputs: 0x%" PRIx32, + static_cast(method_inputs.error())); - double elapsed_time; Error status = Error::Ok; + std::vector inputs(method->inputs_size()); + ET_LOG(Info, "Number of input layers: %zu", inputs.size()); + + status = method->get_inputs(inputs.data(), inputs.size()); + ET_CHECK(status == Error::Ok); // If the input path list is provided, read input tensors from the files - if (!(FLAGS_input_list_path.empty()) and - !(FLAGS_output_folder_path.empty())) { + std::function set_input_tensor; + if (!FLAGS_input_list_path.empty()) { const char *input_list_path = FLAGS_input_list_path.c_str(); ET_LOG(Info, "Loading input tensors from the list provided in %s.", input_list_path); - const char *output_folder_path = FLAGS_output_folder_path.c_str(); - auto res = process_inputs(method, input_list_path, output_folder_path); - elapsed_time = res.total_time; - status = res.status; - num_iterations = res.num_iter; + const auto input_paths = get_inputs_paths(input_list_path); + num_iterations = input_paths.size(); + ET_LOG(Info, "Number of iters is set to the len of the inputs: %u.", + num_iterations); + + set_input_tensor = build_set_input_tensor(method, inputs, input_paths); } else { + set_input_tensor = [](size_t idx) -> void {}; + } + + ET_LOG(Info, "%zu Number of output layers: ", method->outputs_size()); + + std::vector outputs(method->outputs_size()); + status = method->get_outputs(outputs.data(), outputs.size()); + ET_CHECK(status == Error::Ok); - // Measure execution time for inference - std::tie(elapsed_time, status) = benchmark_method(method, num_iterations); + std::function dump_outputs; + if (!FLAGS_output_folder_path.empty()) { // Retrieve and print the method outputs - ET_LOG(Info, "%zu Number of outputs: ", method->outputs_size()); // If output folder path is provided, save output tensors // into raw tensor files. - if (!(FLAGS_output_folder_path.empty())) { - const char *output_folder_path = FLAGS_output_folder_path.c_str(); - ET_LOG(Info, "Saving output tensors into the output folder: %s.", - output_folder_path); - dump_outputs(method, output_folder_path); + const char *output_folder_path = FLAGS_output_folder_path.c_str(); + ET_LOG(Info, "Saving output tensors into the output folder: %s.", + output_folder_path); + dump_outputs = build_dump_outputs(outputs, outputs.size(), + std::string(output_folder_path)); + + } else { + dump_outputs = [](size_t idx) {}; + } + + // Measure execution time for inference + + double total_time_elapsed = 0.; + for (int i = 0; (i < num_iterations and status == Error::Ok); ++i) { + set_input_tensor(i); + auto before_exec = std::chrono::high_resolution_clock::now(); + status = method->execute(); + auto after_exec = std::chrono::high_resolution_clock::now(); + if (status == Error::Ok) { + dump_outputs(i); } + double elapsed_time = std::chrono::duration_cast( + after_exec - before_exec) + .count() / + 1000.0; + total_time_elapsed += elapsed_time; } + // Log execution time and average time per iteration ET_LOG(Info, "%d inference took %f ms, avg %f ms", num_iterations, - elapsed_time, elapsed_time / static_cast(num_iterations)); + total_time_elapsed, + total_time_elapsed / static_cast(num_iterations)); ET_CHECK_MSG(status == Error::Ok, "Execution of method %s failed with status 0x%" PRIx32, method_name, static_cast(status)); From 0892b9d47760d330d06e1f2e816872f01ef2fbed Mon Sep 17 00:00:00 2001 From: Daniil Lyakhov Date: Fri, 14 Feb 2025 15:52:53 +0100 Subject: [PATCH 069/188] Apply suggestions from code review Co-authored-by: Yamini Nimmagadda --- examples/openvino/aot/aot_openvino_compiler.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/openvino/aot/aot_openvino_compiler.py b/examples/openvino/aot/aot_openvino_compiler.py index 4f45fc1d426..25537910fe2 100644 --- a/examples/openvino/aot/aot_openvino_compiler.py +++ b/examples/openvino/aot/aot_openvino_compiler.py @@ -231,7 +231,7 @@ def main( raise ValueError(msg) if not dataset_path: - msg = "Validateion requires a calibration dataset." + msg = "Validation requires a calibration dataset." raise ValueError(msg) print("Start validation of the model:") @@ -266,7 +266,7 @@ def main( parser.add_argument( "--validate", action="store_true", - help="Enable model validation. --dataset argument is requred for the validation.", + help="Enable model validation. --dataset argument is required for the validation.", ) parser.add_argument("--dataset", type=str, help="Path to the validation dataset.") parser.add_argument( From d1aa42556665eb837368e2f74faf286fc52ba562 Mon Sep 17 00:00:00 2001 From: dlyakhov Date: Fri, 14 Feb 2025 16:09:24 +0100 Subject: [PATCH 070/188] aot_openvino_compiler.py: comments --- examples/openvino/aot/aot_openvino_compiler.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/examples/openvino/aot/aot_openvino_compiler.py b/examples/openvino/aot/aot_openvino_compiler.py index 25537910fe2..f0844289580 100644 --- a/examples/openvino/aot/aot_openvino_compiler.py +++ b/examples/openvino/aot/aot_openvino_compiler.py @@ -29,7 +29,6 @@ from torch.ao.quantization.quantize_pt2e import prepare_pt2e from torch.export import export from torch.export.exported_program import ExportedProgram -from torch.fx.passes.graph_drawer import FxGraphDrawer from transformers import AutoModel import nncf @@ -72,11 +71,6 @@ def load_calibration_dataset(dataset_path: str, batch_size: int, suite: str, mod return calibration_dataset -def visualize_fx_model(model: torch.fx.GraphModule, output_svg_path: str): - g = FxGraphDrawer(model, output_svg_path) - g.get_dot_graph().write_svg(output_svg_path) - - def dump_inputs(calibration_dataset, dest_path): input_files, targets = [], [] for idx, data in enumerate(calibration_dataset): @@ -204,7 +198,6 @@ def main( quantized_model = quantize_model( aten_dialect.module(), calibration_dataset, use_nncf=quantization_flow == "nncf" ) - visualize_fx_model(quantized_model, f"{model_name}_int8.svg") aten_dialect: ExportedProgram = export(quantized_model, example_args) From b9b604d8ed231355ed437fff05a0d213010f793e Mon Sep 17 00:00:00 2001 From: dlyakhov Date: Fri, 14 Feb 2025 17:24:28 +0100 Subject: [PATCH 071/188] README --- examples/openvino/aot/README.md | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/examples/openvino/aot/README.md b/examples/openvino/aot/README.md index 900a5b6cbe0..884ed55849f 100644 --- a/examples/openvino/aot/README.md +++ b/examples/openvino/aot/README.md @@ -25,7 +25,7 @@ python aot_openvino_compiler.py --suite --model --inp - For `torchvision`: `resnet18`, `mobilenet_v2` - For `huggingface`: `bert-base-uncased`, `distilbert-base-uncased` -- **`--input_shape`**: +- **`--input_shape`**(optional): Input shape for the model. Provide this as a **list** or **tuple**. Examples: - `[1, 3, 224, 224]` (Zsh users: wrap in quotes) @@ -38,11 +38,15 @@ python aot_openvino_compiler.py --suite --model --inp - **`--quantize`** (optional): Enable model quantization. --dataset argument is requred for the quantization. `huggingface` suite does not supported yet. +- **`--quantization_flow`** (optional): + Specifies the way to quantize torch.fx.GraphModule. + Supported values: + - `nncf`: `nncf quantize_pt2e` API (default) + - `pt2e`: torch ao quantization pipeline. - **`--validate`** (optional): Enable model validation. --dataset argument is requred for the validation. `huggingface` suite does not supported yet. - - **`--dataset`** (optional): Path to the imagenet-like calibration dataset. From a9800995235770af8f13231c50777c5a022f7251 Mon Sep 17 00:00:00 2001 From: Cavus Mustafa Date: Fri, 14 Feb 2025 16:23:26 -0800 Subject: [PATCH 072/188] Fix for input file path bug --- .../executor_runner/openvino_executor_runner.cpp | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/examples/openvino/executor_runner/openvino_executor_runner.cpp b/examples/openvino/executor_runner/openvino_executor_runner.cpp index c3922c793a3..17cc91ba3e9 100644 --- a/examples/openvino/executor_runner/openvino_executor_runner.cpp +++ b/examples/openvino/executor_runner/openvino_executor_runner.cpp @@ -101,7 +101,7 @@ std::vector> get_inputs_paths(const char *input_list_path) { size_t idx = 0; - auto split = [](std::string s, std::string delimiter) { + auto split_and_add_prefix = [](std::string s, std::string delimiter, std::string prefix = "") { size_t pos_start = 0, pos_end, delim_len = delimiter.length(); std::string token; std::vector res; @@ -109,9 +109,9 @@ get_inputs_paths(const char *input_list_path) { while ((pos_end = s.find(delimiter, pos_start)) != std::string::npos) { token = s.substr(pos_start, pos_end - pos_start); pos_start = pos_end + delim_len; - res.push_back(token); + res.push_back(prefix + token); } - res.push_back(s.substr(pos_start)); + res.push_back(prefix + s.substr(pos_start)); return res; }; @@ -121,10 +121,15 @@ get_inputs_paths(const char *input_list_path) { if (!input_list.is_open()) { ET_CHECK_MSG(false, "Failed to read input list file: %s", input_list_path); } + std::string inputs_dir = ""; + size_t last_pos = std::string(input_list_path).rfind('/'); + if (last_pos != std::string::npos) { + inputs_dir = std::string(input_list_path).substr(0, last_pos+1); + } std::string file_path; auto retval = std::vector>(); while (std::getline(input_list, file_path)) { - auto input_files = split(file_path, " "); + auto input_files = split_and_add_prefix(file_path, " ", inputs_dir); if (input_files.size() == 0) { break; } From 209132b586950d6078c9aaf698a47b6c9cf47b39 Mon Sep 17 00:00:00 2001 From: Cavus Mustafa Date: Fri, 14 Feb 2025 16:37:30 -0800 Subject: [PATCH 073/188] Test script name change --- .../openvino/tests/{test_openvino_delegate.py => test_runner.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename backends/openvino/tests/{test_openvino_delegate.py => test_runner.py} (100%) diff --git a/backends/openvino/tests/test_openvino_delegate.py b/backends/openvino/tests/test_runner.py similarity index 100% rename from backends/openvino/tests/test_openvino_delegate.py rename to backends/openvino/tests/test_runner.py From 4a27c36e1eeb8ed60b4e5cfed4d27c749618f793 Mon Sep 17 00:00:00 2001 From: Mustafa Cavus Date: Fri, 14 Feb 2025 16:18:42 -0800 Subject: [PATCH 074/188] Update README.md --- backends/openvino/tests/README.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/backends/openvino/tests/README.md b/backends/openvino/tests/README.md index e573408d197..3ad109274f5 100644 --- a/backends/openvino/tests/README.md +++ b/backends/openvino/tests/README.md @@ -12,7 +12,7 @@ backends/openvino/tests ├── models # Directory with model test scripts. └── test_classification.py # Test script for classification models. ├── README.md # Documentation for unit tests (this file) -└── test_openvino_delegate.py # Script to execute unit tests. +└── test_runner.py # Script to execute unit tests. ``` ## Executing Unit Tests @@ -24,7 +24,7 @@ Once openvino is installed and executorch environment is set, refer to [OpenVINO ### Usage -`test_openvino_delegate.py` allows to run op or model tests for openvino backend. +`test_runner.py` allows to run op or model tests for openvino backend. ### **Arguments** - **`--build_folder`** (required): @@ -54,14 +54,14 @@ Once openvino is installed and executorch environment is set, refer to [OpenVINO ### Execute Tests for All Ops on CPU ```bash -python test_openvino_delegate.py --build_folder ../../../cmake-openvino-out --device CPU --test_type ops +python test_runner.py --build_folder ../../../cmake-openvino-out --device CPU --test_type ops ``` ### Execute Convolution Op Tests on CPU ```bash -python test_openvino_delegate.py --build_folder ../../../cmake-openvino-out --device CPU --test_type ops --pattern test_convolution.py +python test_runner.py --build_folder ../../../cmake-openvino-out --device CPU --test_type ops --pattern test_convolution.py ``` ### Execute Tests for all Models on GPU ```bash -python test_openvino_delegate.py --build_folder ../../../cmake-openvino-out --device GPU --test_type models +python test_runner.py --build_folder ../../../cmake-openvino-out --device GPU --test_type models From 0a981a6a8d2554de78232a3cfb886381aed6185d Mon Sep 17 00:00:00 2001 From: Cavus Mustafa Date: Fri, 14 Feb 2025 17:23:12 -0800 Subject: [PATCH 075/188] Code formatting --- backends/openvino/__init__.py | 2 +- backends/openvino/preprocess.py | 4 +- backends/openvino/quantizer/quantizer.py | 75 ++-- .../tests/models/test_classification.py | 18 +- .../tests/ops/base_openvino_op_test.py | 41 ++- backends/openvino/tests/ops/test_add.py | 7 +- backends/openvino/tests/ops/test_addmm.py | 19 +- backends/openvino/tests/ops/test_arange.py | 7 +- .../openvino/tests/ops/test_batch_norm.py | 58 ++-- .../openvino/tests/ops/test_convolution.py | 319 ++++++++++++++---- backends/openvino/tests/ops/test_mean.py | 59 +++- backends/openvino/tests/ops/test_permute.py | 15 +- backends/openvino/tests/ops/test_pooling.py | 81 +++-- backends/openvino/tests/ops/test_unary_ops.py | 7 +- backends/openvino/tests/ops/test_view.py | 17 +- backends/openvino/tests/test_runner.py | 11 +- .../openvino/aot/aot_openvino_compiler.py | 74 ++-- .../openvino_executor_runner.cpp | 148 ++++---- 18 files changed, 677 insertions(+), 285 deletions(-) diff --git a/backends/openvino/__init__.py b/backends/openvino/__init__.py index 4a69f6b75ff..52bcce31807 100644 --- a/backends/openvino/__init__.py +++ b/backends/openvino/__init__.py @@ -2,4 +2,4 @@ from .preprocess import OpenvinoBackend from .quantizer.quantizer import OpenVINOQuantizer -__all__ = [OpenvinoBackend, OpenvinoPartitioner, OpenVINOQuantizer] +__all__ = [OpenvinoBackend, OpenvinoPartitioner, OpenVINOQuantizer] diff --git a/backends/openvino/preprocess.py b/backends/openvino/preprocess.py index 6224debc440..057702b162e 100644 --- a/backends/openvino/preprocess.py +++ b/backends/openvino/preprocess.py @@ -42,7 +42,9 @@ def preprocess( for spec in module_compile_spec: compile_options[spec.key] = spec.value.decode() - compiled = openvino_compile(edge_program.module(), *args, options=compile_options) + compiled = openvino_compile( + edge_program.module(), *args, options=compile_options + ) model_bytes = compiled.export_model() return PreprocessResult(processed_bytes=model_bytes) diff --git a/backends/openvino/quantizer/quantizer.py b/backends/openvino/quantizer/quantizer.py index 480faeee635..f82d0745e8b 100644 --- a/backends/openvino/quantizer/quantizer.py +++ b/backends/openvino/quantizer/quantizer.py @@ -8,20 +8,21 @@ from enum import Enum from typing import Dict, List, Optional, Tuple -import torch.fx -from torch.ao.quantization.observer import HistogramObserver -from torch.ao.quantization.observer import PerChannelMinMaxObserver -from torch.ao.quantization.quantizer.quantizer import EdgeOrNode -from torch.ao.quantization.quantizer.quantizer import QuantizationAnnotation -from torch.ao.quantization.quantizer.quantizer import QuantizationSpec -from torch.ao.quantization.quantizer.quantizer import QuantizationSpecBase -from torch.ao.quantization.quantizer.quantizer import Quantizer -from torch.ao.quantization.quantizer.quantizer import SharedQuantizationSpec - import nncf import nncf.common.quantization as quantization import nncf.experimental.torch.fx as nncf_fx + +import torch.fx from nncf.common.graph.graph import NNCFGraph +from torch.ao.quantization.observer import HistogramObserver, PerChannelMinMaxObserver +from torch.ao.quantization.quantizer.quantizer import ( + EdgeOrNode, + QuantizationAnnotation, + QuantizationSpec, + QuantizationSpecBase, + Quantizer, + SharedQuantizationSpec, +) QUANT_ANNOTATION_KEY = "quantization_annotation" @@ -69,8 +70,10 @@ def __init__( else: preset = None model_type = nncf.parameters.ModelType.TRANSFORMER - self._min_max_algo = nncf.quantization.algorithms.min_max.algorithm.MinMaxQuantization( - preset=preset, model_type=model_type, **kwargs + self._min_max_algo = ( + nncf.quantization.algorithms.min_max.algorithm.MinMaxQuantization( + preset=preset, model_type=model_type, **kwargs + ) ) def set_ignored_scope( @@ -129,8 +132,14 @@ def annotate(self, model: torch.fx.GraphModule) -> torch.fx.GraphModule: ) root_qp = quantization_setup.quantization_points[root_quantizer_id] - if any(root_qp.qconfig != quantization_setup.quantization_points[q_id].qconfig for q_id in quantizer_ids): - qps = [quantization_setup.quantization_points[q_id] for q_id in quantizer_ids] + if any( + root_qp.qconfig != quantization_setup.quantization_points[q_id].qconfig + for q_id in quantizer_ids + ): + qps = [ + quantization_setup.quantization_points[q_id] + for q_id in quantizer_ids + ] msg = ( "Different quantization configs are set to one unified scale group:" f"{[(qp.insertion_point.__dict__, str(qp.qconfig)) for qp in qps]}" @@ -140,7 +149,9 @@ def annotate(self, model: torch.fx.GraphModule) -> torch.fx.GraphModule: root_target_node = nncf_fx.node_utils.get_graph_node_by_name( graph, root_qp.insertion_point.target_node_name ) - root_edge_or_node = self._get_edge_or_node(root_target_node, root_qp, nncf_graph) + root_edge_or_node = self._get_edge_or_node( + root_target_node, root_qp, nncf_graph + ) for quantizer_id in quantizer_ids: if quantizer_id == root_quantizer_id: @@ -177,9 +188,14 @@ def _get_unified_scales_root_quantizer_id( nncf_node_quantizer_id = None root_quantizer_id = None for quantizer_id in quantizer_ids: - target_node_name = quantizer_setup.quantization_points[quantizer_id].insertion_point.target_node_name + target_node_name = quantizer_setup.quantization_points[ + quantizer_id + ].insertion_point.target_node_name nncf_node = nncf_graph.get_node_by_name(target_node_name) - if nncf_node_quantizer_id is None or nncf_node.node_id < nncf_node_quantizer_id: + if ( + nncf_node_quantizer_id is None + or nncf_node.node_id < nncf_node_quantizer_id + ): root_quantizer_id = quantizer_id nncf_node_quantizer_id = nncf_node.node_id return root_quantizer_id @@ -202,14 +218,18 @@ def _get_edge_or_node_and_annotation( QuantizationAnnotations. :return: A tuple containing the EdgeOrNode and its associated QuantizationAnnotation. """ - target_node = nncf_fx.node_utils.get_graph_node_by_name(graph, qp.insertion_point.target_node_name) + target_node = nncf_fx.node_utils.get_graph_node_by_name( + graph, qp.insertion_point.target_node_name + ) annotation = node_vs_torch_annotation[target_node] edge_or_node = OpenVINOQuantizer._get_edge_or_node(target_node, qp, nncf_graph) return edge_or_node, annotation @staticmethod def _get_edge_or_node( - target_node: torch.fx.Node, qp: quantization.quantizer_setup.QuantizationPointBase, nncf_graph: NNCFGraph + target_node: torch.fx.Node, + qp: quantization.quantizer_setup.QuantizationPointBase, + nncf_graph: NNCFGraph, ) -> EdgeOrNode: """ Returns the edge or node based on the given target node and quantization point. @@ -222,7 +242,11 @@ def _get_edge_or_node( ip = qp.insertion_point if qp.is_weight_quantization_point(): nncf_node = nncf_graph.get_node_by_name(target_node.name) - weights_ports_ids = nncf.torch.model_graph_manager.get_weight_tensor_port_ids(nncf_node, nncf_graph) + weights_ports_ids = ( + nncf.torch.model_graph_manager.get_weight_tensor_port_ids( + nncf_node, nncf_graph + ) + ) if len(weights_ports_ids) > 1: # TODO(dlyakhov): support quantization for nodes with several weights nncf.common.logging.nncf_logger.warning( @@ -259,7 +283,9 @@ def _fill_torch_ao_annotation( annotation_to_update.input_qspec_map[edge_or_node[0]] = qspec @staticmethod - def _get_torch_ao_qspec_from_qp(qp: quantization.quantizer_setup.QuantizationPointBase) -> QuantizationSpec: + def _get_torch_ao_qspec_from_qp( + qp: quantization.quantizer_setup.QuantizationPointBase, + ) -> QuantizationSpec: """ Retrieves the quantization configuration from the given quantization point and converts it into a QuantizationSpec. @@ -293,7 +319,8 @@ def _get_torch_ao_qspec_from_qp(qp: quantization.quantizer_setup.QuantizationPoi else: observer = ( HistogramObserver - if torch_qscheme in [torch.per_tensor_symmetric, torch.per_tensor_affine] + if torch_qscheme + in [torch.per_tensor_symmetric, torch.per_tensor_affine] else PerChannelMinMaxObserver ) quant_min = 0 @@ -313,6 +340,8 @@ def _get_torch_ao_qspec_from_qp(qp: quantization.quantizer_setup.QuantizationPoi def validate(self, model: torch.fx.GraphModule) -> None: pass - def transform_for_annotation(self, model: torch.fx.GraphModule) -> torch.fx.GraphModule: + def transform_for_annotation( + self, model: torch.fx.GraphModule + ) -> torch.fx.GraphModule: nncf_fx.transformations.fold_constant_except_qdq(model) return model diff --git a/backends/openvino/tests/models/test_classification.py b/backends/openvino/tests/models/test_classification.py index 59558b397ab..c9c71af777d 100644 --- a/backends/openvino/tests/models/test_classification.py +++ b/backends/openvino/tests/models/test_classification.py @@ -1,13 +1,16 @@ -from executorch.backends.openvino.tests.ops.base_openvino_op_test import BaseOpenvinoOpTest -import torch import timm +import torch import torchvision.models as torchvision_models +from executorch.backends.openvino.tests.ops.base_openvino_op_test import ( + BaseOpenvinoOpTest, +) from transformers import AutoModel classifier_params = [ - {'model': ['torchvision', 'resnet50', (1, 3, 224, 224)] }, - {'model': ['torchvision', 'mobilenet_v2', (1, 3, 224, 224)] }, - ] + {"model": ["torchvision", "resnet50", (1, 3, 224, 224)]}, + {"model": ["torchvision", "mobilenet_v2", (1, 3, 224, 224)]}, +] + # Function to load a model based on the selected suite def load_model(suite: str, model_name: str): @@ -22,13 +25,14 @@ def load_model(suite: str, model_name: str): else: raise ValueError(f"Unsupported model suite: {suite}") + class TestClassifier(BaseOpenvinoOpTest): def test_classifier(self): for params in classifier_params: with self.subTest(params=params): - module = load_model(params['model'][0], params['model'][1]) + module = load_model(params["model"][0], params["model"][1]) - sample_input = (torch.randn(params['model'][2]),) + sample_input = (torch.randn(params["model"][2]),) self.execute_layer_test(module, sample_input) diff --git a/backends/openvino/tests/ops/base_openvino_op_test.py b/backends/openvino/tests/ops/base_openvino_op_test.py index 46c0b63fc37..8b92fc8bc17 100644 --- a/backends/openvino/tests/ops/base_openvino_op_test.py +++ b/backends/openvino/tests/ops/base_openvino_op_test.py @@ -3,14 +3,15 @@ import tempfile import unittest +import executorch + import numpy as np import torch -import executorch from executorch.backends.openvino.partitioner import OpenvinoPartitioner +from executorch.backends.openvino.preprocess import OpenvinoBackend +from executorch.exir import EdgeProgramManager, to_edge from executorch.exir.backend.backend_details import CompileSpec from torch.export import export, ExportedProgram -from executorch.exir import EdgeProgramManager, to_edge -from executorch.backends.openvino.preprocess import OpenvinoBackend class BaseOpenvinoOpTest(unittest.TestCase): @@ -41,7 +42,9 @@ def execute_layer_test( lowered_module = edge_program.to_backend(OpenvinoPartitioner(compile_spec)) # Apply backend-specific passes - exec_prog = lowered_module.to_executorch(config=executorch.exir.ExecutorchBackendConfig()) + exec_prog = lowered_module.to_executorch( + config=executorch.exir.ExecutorchBackendConfig() + ) # Check if the number of partitions created matches the expected number of partitions self.assertEqual( @@ -56,7 +59,7 @@ def execute_layer_test( ) # Execute the model and compare the outputs with the reference outputs - if (assert_output_equal): + if assert_output_equal: with tempfile.TemporaryDirectory() as tmp_dir: input_list = "" for idx, _ in enumerate(sample_inputs): @@ -69,7 +72,9 @@ def execute_layer_test( # Execute the module in eager mode to calculate the reference outputs ref_output = module(*sample_inputs) if isinstance(ref_output, torch.Tensor): - ref_output = [ref_output,] + ref_output = [ + ref_output, + ] # Serialize the executorch model and save into a temporary file pte_fname = f"{tmp_dir}/openvino_executorch_test.pte" @@ -77,7 +82,9 @@ def execute_layer_test( exec_prog.write_to_file(file) # Save inputs into a temporary file - self.generate_inputs(tmp_dir, "input_list.txt", [sample_inputs], input_list) + self.generate_inputs( + tmp_dir, "input_list.txt", [sample_inputs], input_list + ) self.make_output_dir(output_dir) # Start a subprocess to execute model with openvino_executor_runner @@ -108,7 +115,9 @@ def execute_layer_test( for i, f in enumerate(sorted(os.listdir(output_dir))): filename = os.path.join(output_dir, f) - output = np.fromfile(filename, dtype=ref_output[i].detach().numpy().dtype) + output = np.fromfile( + filename, dtype=ref_output[i].detach().numpy().dtype + ) output = torch.from_numpy(output).reshape(ref_output[i].shape) outputs.append(output) @@ -117,22 +126,28 @@ def execute_layer_test( for i in range(len(ref_output)): self.assertTrue( torch.allclose( - outputs[i], ref_output[i], atol=self.atol, rtol=self.rtol, equal_nan=True + outputs[i], + ref_output[i], + atol=self.atol, + rtol=self.rtol, + equal_nan=True, ), msg=f"ref_output:\n{ref_output[i]}\n\ntest_output:\n{outputs[i]}", ) - def generate_inputs(self, dest_path: str, file_name: str, inputs=None, input_list=None): + def generate_inputs( + self, dest_path: str, file_name: str, inputs=None, input_list=None + ): input_list_file = None input_files = [] - + # Prepare input list if input_list is not None: input_list_file = f"{dest_path}/{file_name}" with open(input_list_file, "w") as f: f.write(input_list) f.flush() - + # Prepare input data if inputs is not None: for idx, data in enumerate(inputs): @@ -140,7 +155,7 @@ def generate_inputs(self, dest_path: str, file_name: str, inputs=None, input_lis file_name = f"{dest_path}/input_{idx}_{i}.raw" d.detach().numpy().tofile(file_name) input_files.append(file_name) - + return input_list_file, input_files def make_output_dir(self, path: str): diff --git a/backends/openvino/tests/ops/test_add.py b/backends/openvino/tests/ops/test_add.py index d298f77e792..5b68d0ff149 100644 --- a/backends/openvino/tests/ops/test_add.py +++ b/backends/openvino/tests/ops/test_add.py @@ -1,5 +1,8 @@ -from executorch.backends.openvino.tests.ops.base_openvino_op_test import BaseOpenvinoOpTest import torch +from executorch.backends.openvino.tests.ops.base_openvino_op_test import ( + BaseOpenvinoOpTest, +) + class TestAddOperator(BaseOpenvinoOpTest): @@ -7,7 +10,7 @@ def create_model(self): class Add(torch.nn.Module): def __init__(self): super().__init__() - + def forward(self, x, y): return torch.add(x, y) diff --git a/backends/openvino/tests/ops/test_addmm.py b/backends/openvino/tests/ops/test_addmm.py index 32f09ebdc29..51c1314db0d 100644 --- a/backends/openvino/tests/ops/test_addmm.py +++ b/backends/openvino/tests/ops/test_addmm.py @@ -1,5 +1,8 @@ -from executorch.backends.openvino.tests.ops.base_openvino_op_test import BaseOpenvinoOpTest import torch +from executorch.backends.openvino.tests.ops.base_openvino_op_test import ( + BaseOpenvinoOpTest, +) + class TestAddMMOperator(BaseOpenvinoOpTest): @@ -7,19 +10,19 @@ def create_model(self): class AddMM(torch.nn.Module): def __init__(self): super().__init__() - self.alpha = 1. - self.beta = 1. - + self.alpha = 1.0 + self.beta = 1.0 + def forward(self, x, y, z): - #return torch.add(x, y) + # return torch.add(x, y) return torch.addmm(x, y, z, alpha=self.alpha, beta=self.beta) return AddMM() def test_addmm(self): module = self.create_model() - input_x = torch.randn(4,4, dtype=torch.float32) - input_y = torch.randn(4,4, dtype=torch.float32) - input_z = torch.randn(4,4, dtype=torch.float32) + input_x = torch.randn(4, 4, dtype=torch.float32) + input_y = torch.randn(4, 4, dtype=torch.float32) + input_z = torch.randn(4, 4, dtype=torch.float32) sample_input = (input_x, input_y, input_z) self.execute_layer_test(module, sample_input) diff --git a/backends/openvino/tests/ops/test_arange.py b/backends/openvino/tests/ops/test_arange.py index 0dd739a2585..b2aeb9c2100 100644 --- a/backends/openvino/tests/ops/test_arange.py +++ b/backends/openvino/tests/ops/test_arange.py @@ -1,5 +1,8 @@ -from executorch.backends.openvino.tests.ops.base_openvino_op_test import BaseOpenvinoOpTest import torch +from executorch.backends.openvino.tests.ops.base_openvino_op_test import ( + BaseOpenvinoOpTest, +) + class TestArangeOperator(BaseOpenvinoOpTest): @@ -8,7 +11,7 @@ class Arange(torch.nn.Module): def __init__(self, x): super().__init__() self.x = x - + def forward(self, y): return torch.arange(self.x, dtype=torch.float32) + y diff --git a/backends/openvino/tests/ops/test_batch_norm.py b/backends/openvino/tests/ops/test_batch_norm.py index ecb76860434..05d529163f9 100644 --- a/backends/openvino/tests/ops/test_batch_norm.py +++ b/backends/openvino/tests/ops/test_batch_norm.py @@ -1,23 +1,26 @@ -from executorch.backends.openvino.tests.ops.base_openvino_op_test import BaseOpenvinoOpTest import torch - -op_params = [{'weights': True, 'bias': True, 'eps': 1.0 }, - {'weights': True, 'bias': True, 'eps': 0.00005 }, - {'weights': True, 'bias': True, 'eps': 0.5 }, - {'weights': True, 'bias': True, 'eps': 0.042 }, - {'weights': True, 'bias': False, 'eps': 1.0 }, - {'weights': True, 'bias': False, 'eps': 0.00005 }, - {'weights': True, 'bias': False, 'eps': 0.5 }, - {'weights': True, 'bias': False, 'eps': 0.042 }, - {'weights': False, 'bias': True, 'eps': 1.0 }, - {'weights': False, 'bias': True, 'eps': 0.00005 }, - {'weights': False, 'bias': True, 'eps': 0.5 }, - {'weights': False, 'bias': True, 'eps': 0.042 }, - {'weights': False, 'bias': False, 'eps': 1.0 }, - {'weights': False, 'bias': False, 'eps': 0.00005 }, - {'weights': False, 'bias': False, 'eps': 0.5 }, - {'weights': False, 'bias': False, 'eps': 0.042 }, - ] +from executorch.backends.openvino.tests.ops.base_openvino_op_test import ( + BaseOpenvinoOpTest, +) + +op_params = [ + {"weights": True, "bias": True, "eps": 1.0}, + {"weights": True, "bias": True, "eps": 0.00005}, + {"weights": True, "bias": True, "eps": 0.5}, + {"weights": True, "bias": True, "eps": 0.042}, + {"weights": True, "bias": False, "eps": 1.0}, + {"weights": True, "bias": False, "eps": 0.00005}, + {"weights": True, "bias": False, "eps": 0.5}, + {"weights": True, "bias": False, "eps": 0.042}, + {"weights": False, "bias": True, "eps": 1.0}, + {"weights": False, "bias": True, "eps": 0.00005}, + {"weights": False, "bias": True, "eps": 0.5}, + {"weights": False, "bias": True, "eps": 0.042}, + {"weights": False, "bias": False, "eps": 1.0}, + {"weights": False, "bias": False, "eps": 0.00005}, + {"weights": False, "bias": False, "eps": 0.5}, + {"weights": False, "bias": False, "eps": 0.042}, +] class TestBatchNormOperator(BaseOpenvinoOpTest): @@ -34,17 +37,24 @@ def __init__(self, weights=True, bias=True, eps=1e-05): self.eps = eps def forward(self, x): - return torch.nn.functional.batch_norm(x, self.running_mean, self.running_var, self.weight, self.bias, eps=self.eps, training=False) + return torch.nn.functional.batch_norm( + x, + self.running_mean, + self.running_var, + self.weight, + self.bias, + eps=self.eps, + training=False, + ) return BatchNorm(weights, bias, eps) - def test_batch_norm(self): for params in op_params: with self.subTest(params=params): - module = self.create_model(weights=params['weights'], - bias=params['bias'], - eps=params['eps']) + module = self.create_model( + weights=params["weights"], bias=params["bias"], eps=params["eps"] + ) sample_input = (torch.randn(20, 6, 10),) diff --git a/backends/openvino/tests/ops/test_convolution.py b/backends/openvino/tests/ops/test_convolution.py index 83a80282089..45d785d3612 100644 --- a/backends/openvino/tests/ops/test_convolution.py +++ b/backends/openvino/tests/ops/test_convolution.py @@ -1,54 +1,223 @@ -from executorch.backends.openvino.tests.ops.base_openvino_op_test import BaseOpenvinoOpTest import torch +from executorch.backends.openvino.tests.ops.base_openvino_op_test import ( + BaseOpenvinoOpTest, +) + +d2_params = [ + { + "weights_shape": [3, 3, 2, 2], + "strides": [1, 1], + "pads": [0, 0], + "dilations": [1, 1], + "groups": 1, + "output_padding": [0, 0], + "transposed": True, + }, + { + "weights_shape": [3, 3, 2, 2], + "strides": [1, 1], + "pads": [0, 0], + "dilations": [1, 1], + "groups": 1, + "output_padding": [0, 0], + "transposed": False, + }, + { + "weights_shape": [3, 1, 1, 1], + "strides": [1, 1], + "pads": [0, 0], + "dilations": [1, 1], + "groups": 3, + "output_padding": [0, 0], + "transposed": True, + }, + { + "weights_shape": [3, 1, 1, 1], + "strides": [1, 1], + "pads": [0, 0], + "dilations": [1, 1], + "groups": 3, + "output_padding": [0, 0], + "transposed": False, + }, + { + "weights_shape": [3, 1, 1, 1], + "strides": [1, 1], + "bias_shape": [1], + "pads": [1, 1], + "dilations": [1, 1], + "groups": 1, + "output_padding": [0, 0], + "transposed": True, + }, + { + "weights_shape": [3, 3, 1, 1], + "strides": [1, 1], + "pads": [1, 1], + "dilations": [1, 1], + "groups": 1, + "output_padding": [0, 0], + "transposed": False, + }, + { + "weights_shape": [3, 1, 1, 1], + "strides": [1, 1], + "bias_shape": [1], + "pads": [3, 1], + "dilations": [1, 1], + "groups": 1, + "output_padding": [0, 0], + "transposed": True, + }, + { + "weights_shape": [3, 3, 1, 1], + "strides": [1, 1], + "pads": [3, 1], + "dilations": [1, 1], + "groups": 1, + "output_padding": [0, 0], + "transposed": False, + }, + { + "weights_shape": [3, 1, 1, 1], + "strides": [1, 1], + "bias_shape": [1], + "pads": [1, 0], + "dilations": [1, 1], + "groups": 1, + "output_padding": [0, 0], + "transposed": True, + }, + { + "weights_shape": [3, 3, 1, 1], + "strides": [1, 1], + "pads": [0, 1], + "dilations": [1, 1], + "groups": 1, + "output_padding": [0, 0], + "transposed": False, + }, + { + "weights_shape": [3, 1, 1, 1], + "strides": [1, 1], + "pads": [1, 0], + "dilations": [1, 1], + "groups": 3, + "output_padding": [0, 0], + "transposed": True, + }, + { + "weights_shape": [3, 1, 1, 1], + "strides": [1, 1], + "pads": [0, 1], + "dilations": [1, 1], + "groups": 3, + "output_padding": [0, 0], + "transposed": False, + }, + { + "weights_shape": [3, 1, 1, 1], + "strides": [1, 1], + "pads": [1, 0], + "dilations": [2, 2], + "groups": 3, + "output_padding": [0, 0], + "transposed": True, + }, + { + "weights_shape": [3, 1, 1, 1], + "strides": [1, 1], + "pads": [0, 0], + "dilations": [2, 2], + "groups": 3, + "output_padding": [0, 0], + "transposed": False, + }, + { + "weights_shape": [3, 1, 1, 1], + "strides": [2, 1], + "bias_shape": [1], + "pads": [1, 0], + "dilations": [1, 1], + "groups": 1, + "output_padding": [0, 0], + "transposed": True, + }, + { + "weights_shape": [3, 3, 1, 1], + "strides": [2, 1], + "pads": [0, 0], + "dilations": [1, 1], + "groups": 1, + "output_padding": [0, 0], + "transposed": False, + }, + { + "weights_shape": [3, 1, 1, 1], + "strides": [2, 2], + "bias_shape": [1], + "pads": [0, 0], + "dilations": [1, 1], + "groups": 1, + "output_padding": [0, 0], + "transposed": True, + }, + { + "weights_shape": [3, 3, 1, 1], + "strides": [2, 2], + "pads": [0, 0], + "dilations": [1, 1], + "groups": 1, + "output_padding": [0, 0], + "transposed": False, + }, + { + "weights_shape": [3, 3, 1, 1], + "strides": [2, 1], + "pads": [0, 0], + "dilations": [1, 1], + "groups": 1, + "output_padding": [0, 0], + "transposed": False, + }, + { + "weights_shape": [3, 1, 1, 1], + "strides": [2, 2], + "bias_shape": [1], + "pads": [0, 0], + "dilations": [1, 1], + "groups": 1, + "output_padding": [0, 0], + "transposed": True, + }, + { + "weights_shape": [3, 1, 1, 1], + "strides": [2, 2], + "bias_shape": [1], + "pads": [1, 1], + "dilations": [2, 2], + "groups": 1, + "output_padding": [1, 1], + "transposed": True, + }, +] -d2_params = [{'weights_shape': [3, 3, 2, 2], 'strides': [1, 1], 'pads': [0, 0], 'dilations': [1, 1], 'groups': 1, - 'output_padding': [0, 0], 'transposed': True}, - {'weights_shape': [3, 3, 2, 2], 'strides': [1, 1], 'pads': [0, 0], 'dilations': [ - 1, 1], 'groups': 1, 'output_padding': [0, 0], 'transposed': False}, - {'weights_shape': [3, 1, 1, 1], 'strides': [1, 1], 'pads': [0, 0], 'dilations': [ - 1, 1], 'groups': 3, 'output_padding': [0, 0], 'transposed': True}, - {'weights_shape': [3, 1, 1, 1], 'strides': [1, 1], 'pads': [0, 0], 'dilations': [ - 1, 1], 'groups': 3, 'output_padding': [0, 0], 'transposed': False}, - {'weights_shape': [3, 1, 1, 1], 'strides': [1, 1], 'bias_shape': [1], 'pads': [ - 1, 1], 'dilations': [1, 1], 'groups': 1, 'output_padding': [0, 0], 'transposed': True}, - {'weights_shape': [3, 3, 1, 1], 'strides': [1, 1], 'pads': [ - 1, 1], 'dilations': [1, 1], 'groups': 1, 'output_padding': [0, 0], 'transposed': False}, - {'weights_shape': [3, 1, 1, 1], 'strides': [1, 1], 'bias_shape': [1], 'pads': [ - 3, 1], 'dilations': [1, 1], 'groups': 1, 'output_padding': [0, 0], 'transposed': True}, - {'weights_shape': [3, 3, 1, 1], 'strides': [1, 1], 'pads': [ - 3, 1], 'dilations': [1, 1], 'groups': 1, 'output_padding': [0, 0], 'transposed': False}, - {'weights_shape': [3, 1, 1, 1], 'strides': [1, 1], 'bias_shape': [1], 'pads': [ - 1, 0], 'dilations': [1, 1], 'groups': 1, 'output_padding': [0, 0], 'transposed': True}, - {'weights_shape': [3, 3, 1, 1], 'strides': [1, 1], 'pads': [ - 0, 1], 'dilations': [1, 1], 'groups': 1, 'output_padding': [0, 0], 'transposed': False}, - {'weights_shape': [3, 1, 1, 1], 'strides': [1, 1], 'pads': [ - 1, 0], 'dilations': [1, 1], 'groups': 3, 'output_padding': [0, 0], 'transposed': True}, - {'weights_shape': [3, 1, 1, 1], 'strides': [1, 1], 'pads': [ - 0, 1], 'dilations': [1, 1], 'groups': 3, 'output_padding': [0, 0], 'transposed': False}, - {'weights_shape': [3, 1, 1, 1], 'strides': [1, 1], 'pads': [ - 1, 0], 'dilations': [2, 2], 'groups': 3, 'output_padding': [0, 0], 'transposed': True}, - {'weights_shape': [3, 1, 1, 1], 'strides': [1, 1], 'pads': [ - 0, 0], 'dilations': [2, 2], 'groups': 3, 'output_padding': [0, 0], 'transposed': False}, - {'weights_shape': [3, 1, 1, 1], 'strides': [2, 1], 'bias_shape': [1], 'pads': [ - 1, 0], 'dilations': [1, 1], 'groups': 1, 'output_padding': [0, 0], 'transposed': True}, - {'weights_shape': [3, 3, 1, 1], 'strides': [2, 1], 'pads': [ - 0, 0], 'dilations': [1, 1], 'groups': 1, 'output_padding': [0, 0], 'transposed': False}, - {'weights_shape': [3, 1, 1, 1], 'strides': [2, 2], 'bias_shape': [1], 'pads': [ - 0, 0], 'dilations': [1, 1], 'groups': 1, 'output_padding': [0, 0], 'transposed': True}, - {'weights_shape': [3, 3, 1, 1], 'strides': [2, 2], 'pads': [ - 0, 0], 'dilations': [1, 1], 'groups': 1, 'output_padding': [0, 0], 'transposed': False}, - {'weights_shape': [3, 3, 1, 1], 'strides': [2, 1], 'pads': [ - 0, 0], 'dilations': [1, 1], 'groups': 1, 'output_padding': [0, 0], 'transposed': False}, - {'weights_shape': [3, 1, 1, 1], 'strides': [2, 2], 'bias_shape': [1], 'pads': [ - 0, 0], 'dilations': [1, 1], 'groups': 1, 'output_padding': [0, 0], 'transposed': True}, - {'weights_shape': [3, 1, 1, 1], 'strides': [2, 2], 'bias_shape': [1], 'pads': [ - 1, 1], 'dilations': [2, 2], 'groups': 1, 'output_padding': [1, 1], 'transposed': True}, - ] class TestConvolutionOperator(BaseOpenvinoOpTest): - def create_model(self, weights_shape, strides, pads, dilations, groups, bias, transposed, output_padding=0, - bias_shape=None, underscore=False): + def create_model( + self, + weights_shape, + strides, + pads, + dilations, + groups, + bias, + transposed, + output_padding=0, + bias_shape=None, + underscore=False, + ): bias_dim = 0 @@ -59,7 +228,9 @@ def __init__(self): self.bias_shape = bias_shape if self.bias_shape is None: self.bias_shape = weights_shape[bias_dim] - self.bias = torch.nn.Parameter(torch.randn(self.bias_shape)) if bias else None + self.bias = ( + torch.nn.Parameter(torch.randn(self.bias_shape)) if bias else None + ) self.strides = strides self.pads = pads self.dilations = dilations @@ -68,17 +239,35 @@ def __init__(self): self.output_padding = output_padding if underscore: self.forward = self.forward_ - + def forward(self, x): return torch.convolution( - x, self.weight, self.bias, self.strides, self.pads, self.dilations, self.transposed, - self.output_padding, self.groups + x, + self.weight, + self.bias, + self.strides, + self.pads, + self.dilations, + self.transposed, + self.output_padding, + self.groups, ) def forward_(self, x): return torch._convolution( - x, self.weight, self.bias, self.strides, self.pads, self.dilations, self.transposed, - self.output_padding, self.groups, False, False, False, False + x, + self.weight, + self.bias, + self.strides, + self.pads, + self.dilations, + self.transposed, + self.output_padding, + self.groups, + False, + False, + False, + False, ) return Convolution() @@ -89,17 +278,19 @@ def test_convolution(self): for params in d2_params: with self.subTest(params=params, bias=bias, underscore=underscore): bias_shape = None - if 'bias_shape' in params: - bias_shape = params['bias_shape'] - module = self.create_model(weights_shape=params['weights_shape'], - strides=params['strides'], - pads=params['pads'], - dilations=params['dilations'], - groups=params['groups'], - output_padding=params['output_padding'], - transposed=params['transposed'], - bias_shape=bias_shape, - bias=bias, - underscore=underscore) + if "bias_shape" in params: + bias_shape = params["bias_shape"] + module = self.create_model( + weights_shape=params["weights_shape"], + strides=params["strides"], + pads=params["pads"], + dilations=params["dilations"], + groups=params["groups"], + output_padding=params["output_padding"], + transposed=params["transposed"], + bias_shape=bias_shape, + bias=bias, + underscore=underscore, + ) sample_input = (torch.randn(1, 3, 10, 10),) self.execute_layer_test(module, sample_input) diff --git a/backends/openvino/tests/ops/test_mean.py b/backends/openvino/tests/ops/test_mean.py index 3315fd1e61d..9050ceb90af 100644 --- a/backends/openvino/tests/ops/test_mean.py +++ b/backends/openvino/tests/ops/test_mean.py @@ -1,13 +1,40 @@ -from executorch.backends.openvino.tests.ops.base_openvino_op_test import BaseOpenvinoOpTest import torch +from executorch.backends.openvino.tests.ops.base_openvino_op_test import ( + BaseOpenvinoOpTest, +) -op_params = [{'axes': None, 'keep_dim': None, 'dtype': None, }, - {'axes': None, 'keep_dim': None, 'dtype': "float64",}, - {'axes': None, 'keep_dim': None, 'dtype': "float32",}, - {'axes': None, 'keep_dim': None, 'dtype': "int32", }, - {'axes': 0, 'keep_dim': False, 'dtype': None, }, - {'axes': 0, 'keep_dim': False, 'dtype': None, }, - ] +op_params = [ + { + "axes": None, + "keep_dim": None, + "dtype": None, + }, + { + "axes": None, + "keep_dim": None, + "dtype": "float64", + }, + { + "axes": None, + "keep_dim": None, + "dtype": "float32", + }, + { + "axes": None, + "keep_dim": None, + "dtype": "int32", + }, + { + "axes": 0, + "keep_dim": False, + "dtype": None, + }, + { + "axes": 0, + "keep_dim": False, + "dtype": None, + }, +] dtypes = { "float32": torch.float32, @@ -15,9 +42,10 @@ "int32": torch.int32, "int64": torch.int64, "int8": torch.int8, - "uint8": torch.uint8 + "uint8": torch.uint8, } + class TestMeanOperator(BaseOpenvinoOpTest): def create_model(self, axes, keep_dims, dtype): @@ -46,14 +74,17 @@ def forward(self, x): return Mean(axes, keep_dims, pt_dtype) - def test_mean(self): for params in op_params: with self.subTest(params=params): - module = self.create_model(axes=params['axes'], - keep_dims=params['keep_dim'], - dtype=params['dtype']) + module = self.create_model( + axes=params["axes"], + keep_dims=params["keep_dim"], + dtype=params["dtype"], + ) - sample_input = (torch.randint(-10, 10, (1, 3, 224, 224)).to(dtype=torch.float32),) + sample_input = ( + torch.randint(-10, 10, (1, 3, 224, 224)).to(dtype=torch.float32), + ) self.execute_layer_test(module, sample_input) diff --git a/backends/openvino/tests/ops/test_permute.py b/backends/openvino/tests/ops/test_permute.py index 1de60db3965..28ef5ab4369 100644 --- a/backends/openvino/tests/ops/test_permute.py +++ b/backends/openvino/tests/ops/test_permute.py @@ -1,9 +1,13 @@ -from executorch.backends.openvino.tests.ops.base_openvino_op_test import BaseOpenvinoOpTest import torch +from executorch.backends.openvino.tests.ops.base_openvino_op_test import ( + BaseOpenvinoOpTest, +) + +op_params = [ + {"order": [0, 2, 3, 1]}, + {"order": [0, 3, 1, 2]}, +] -op_params = [{'order': [0, 2, 3, 1] }, - {'order': [0, 3, 1, 2] }, - ] class TestPermuteOperator(BaseOpenvinoOpTest): @@ -19,11 +23,10 @@ def forward(self, x): return Permute(order) - def test_permute(self): for params in op_params: with self.subTest(params=params): - module = self.create_model(order=params['order']) + module = self.create_model(order=params["order"]) sample_input = (torch.randn(1, 3, 224, 224),) diff --git a/backends/openvino/tests/ops/test_pooling.py b/backends/openvino/tests/ops/test_pooling.py index 60ab2f9edfa..0315c8fc8b8 100644 --- a/backends/openvino/tests/ops/test_pooling.py +++ b/backends/openvino/tests/ops/test_pooling.py @@ -1,20 +1,34 @@ -from executorch.backends.openvino.tests.ops.base_openvino_op_test import BaseOpenvinoOpTest import torch +from executorch.backends.openvino.tests.ops.base_openvino_op_test import ( + BaseOpenvinoOpTest, +) + +d2_params = [ + {"kernel_size": [3, 3], "stride": 1, "padding": 0}, + {"kernel_size": [3, 3], "stride": [1, 1], "padding": 1}, + {"kernel_size": [3, 3], "stride": [1, 1], "padding": [0, 1]}, + {"kernel_size": [3, 3], "stride": [1, 1], "padding": [1, 0]}, + {"kernel_size": [3, 3], "stride": [2, 1], "padding": 0}, + {"kernel_size": [2, 1], "stride": [2, 1], "padding": 0}, + {"kernel_size": [2, 1], "stride": None, "padding": 0}, + {"kernel_size": [2, 1], "stride": [], "padding": 0}, + {"kernel_size": [8, 8], "stride": [8, 4], "padding": 1}, +] -d2_params = [{'kernel_size': [3, 3], 'stride': 1, 'padding': 0}, - {'kernel_size': [3, 3], 'stride': [1, 1], 'padding': 1}, - {'kernel_size': [3, 3], 'stride': [1, 1], 'padding': [0, 1]}, - {'kernel_size': [3, 3], 'stride': [1, 1], 'padding': [1, 0]}, - {'kernel_size': [3, 3], 'stride': [2, 1], 'padding': 0}, - {'kernel_size': [2, 1], 'stride': [2, 1], 'padding': 0}, - {'kernel_size': [2, 1], 'stride': None, 'padding': 0}, - {'kernel_size': [2, 1], 'stride': [], 'padding': 0}, - {'kernel_size': [8, 8], 'stride': [8, 4], 'padding': 1}, - ] class TestPoolingOperator(BaseOpenvinoOpTest): - def create_model(self, op_type, kernel_size, stride, padding, dilation=1, ceil_mode=True, count_include_pad=True, dtype=torch.float32): + def create_model( + self, + op_type, + kernel_size, + stride, + padding, + dilation=1, + ceil_mode=True, + count_include_pad=True, + dtype=torch.float32, + ): class MaxPoolingBase(torch.nn.Module): def __init__(self): @@ -31,14 +45,27 @@ def forward(self, x): class MaxPool2D(MaxPoolingBase): def forward(self, x): - return torch.nn.functional.max_pool2d(x.to(self.dtype), self.kernel_size, self.stride, self.padding, self.dilation, - self.ceil_mode) + return torch.nn.functional.max_pool2d( + x.to(self.dtype), + self.kernel_size, + self.stride, + self.padding, + self.dilation, + self.ceil_mode, + ) class MaxPool2DIndices(MaxPoolingBase): def forward(self, x): - return torch.nn.functional.max_pool2d(x, self.kernel_size, self.stride, self.padding, self.dilation, - self.ceil_mode, return_indices=True) - + return torch.nn.functional.max_pool2d( + x, + self.kernel_size, + self.stride, + self.padding, + self.dilation, + self.ceil_mode, + return_indices=True, + ) + ops = { "MaxPool2D": MaxPool2D, "MaxPool2DIndices": MaxPool2DIndices, @@ -52,14 +79,16 @@ def test_pooling2d(self): for params in d2_params: with self.subTest(params=params): bias_shape = None - if 'bias_shape' in params: - bias_shape = params['bias_shape'] - module = self.create_model(op_type='MaxPool2D', - kernel_size=params['kernel_size'], - stride=params['stride'], - padding=params['padding'], - dilation=1, - ceil_mode=True, - count_include_pad=True) + if "bias_shape" in params: + bias_shape = params["bias_shape"] + module = self.create_model( + op_type="MaxPool2D", + kernel_size=params["kernel_size"], + stride=params["stride"], + padding=params["padding"], + dilation=1, + ceil_mode=True, + count_include_pad=True, + ) sample_input = (torch.randn(1, 3, 15, 15),) self.execute_layer_test(module, sample_input) diff --git a/backends/openvino/tests/ops/test_unary_ops.py b/backends/openvino/tests/ops/test_unary_ops.py index 9a5866d6e65..99787e587b3 100644 --- a/backends/openvino/tests/ops/test_unary_ops.py +++ b/backends/openvino/tests/ops/test_unary_ops.py @@ -1,5 +1,7 @@ -from executorch.backends.openvino.tests.ops.base_openvino_op_test import BaseOpenvinoOpTest import torch +from executorch.backends.openvino.tests.ops.base_openvino_op_test import ( + BaseOpenvinoOpTest, +) OPS = [ @@ -16,7 +18,7 @@ def __init__(self, op, dtype): super().__init__() self.dtype = dtype self.op = op - + def forward(self, x): x1 = x.to(self.dtype) y = self.op(x1) @@ -24,7 +26,6 @@ def forward(self, x): return UnaryOp(op, dtype) - def test_unary_op(self): for op in OPS: with self.subTest(op=OPS): diff --git a/backends/openvino/tests/ops/test_view.py b/backends/openvino/tests/ops/test_view.py index f5450a10af9..8aef13fffa0 100644 --- a/backends/openvino/tests/ops/test_view.py +++ b/backends/openvino/tests/ops/test_view.py @@ -1,9 +1,13 @@ -from executorch.backends.openvino.tests.ops.base_openvino_op_test import BaseOpenvinoOpTest import torch +from executorch.backends.openvino.tests.ops.base_openvino_op_test import ( + BaseOpenvinoOpTest, +) + +op_params = [ + {"input_shape": [2, 3, 2], "target_shape": [2, 6]}, + {"input_shape": [4], "target_shape": [2, 2]}, +] -op_params = [{'input_shape': [2, 3, 2], 'target_shape': [2, 6] }, - {'input_shape': [4], 'target_shape': [2, 2] }, - ] class TestViewOperator(BaseOpenvinoOpTest): @@ -20,13 +24,12 @@ def forward(self, input_tensor): return View(target_shape) - def test_view(self): for params in op_params: with self.subTest(params=params): - module = self.create_model(params['target_shape']) + module = self.create_model(params["target_shape"]) - sample_input = (torch.randn(params['input_shape']),) + sample_input = (torch.randn(params["input_shape"]),) self.execute_layer_test(module, sample_input) diff --git a/backends/openvino/tests/test_runner.py b/backends/openvino/tests/test_runner.py index 89763d1d960..4021114f60f 100644 --- a/backends/openvino/tests/test_runner.py +++ b/backends/openvino/tests/test_runner.py @@ -1,5 +1,6 @@ -import unittest import argparse +import unittest + class OpenvinoTestSuite(unittest.TestSuite): @@ -10,7 +11,10 @@ def __init__(self, *args, **kwargs): def addTest(self, test): # Set test parameters if this is an instance of TestOpenvino - from executorch.backends.openvino.tests.ops.base_openvino_op_test import BaseOpenvinoOpTest + from executorch.backends.openvino.tests.ops.base_openvino_op_test import ( + BaseOpenvinoOpTest, + ) + if isinstance(test, BaseOpenvinoOpTest): if "device" in self.test_params: test.device = self.test_params["device"] @@ -61,6 +65,7 @@ def parse_arguments(): test_params["test_type"] = args.test_type return test_params + if __name__ == "__main__": loader = unittest.TestLoader() # Replace the default test suite with a custom test suite to be able to @@ -69,7 +74,7 @@ def parse_arguments(): test_params = parse_arguments() loader.suiteClass.test_params = test_params # Discover all existing op tests in "ops" folder - suite = loader.discover(test_params['test_type'], pattern=test_params['pattern']) + suite = loader.discover(test_params["test_type"], pattern=test_params["pattern"]) # Start running tests result = unittest.TextTestRunner().run(suite) if result.wasSuccessful(): diff --git a/examples/openvino/aot/aot_openvino_compiler.py b/examples/openvino/aot/aot_openvino_compiler.py index 17e2623d14f..8bf1d4a1e88 100644 --- a/examples/openvino/aot/aot_openvino_compiler.py +++ b/examples/openvino/aot/aot_openvino_compiler.py @@ -12,6 +12,8 @@ from pathlib import Path import executorch + +import nncf import numpy as np import timm import torch @@ -19,21 +21,17 @@ import torchvision.models as torchvision_models from executorch.backends.openvino import OpenVINOQuantizer from executorch.backends.openvino.partitioner import OpenvinoPartitioner -from executorch.exir import EdgeProgramManager -from executorch.exir import to_edge_transform_and_lower +from executorch.exir import EdgeProgramManager, to_edge_transform_and_lower from executorch.exir.backend.backend_details import CompileSpec +from nncf.experimental.torch.fx.quantization.quantize_pt2e import quantize_pt2e from sklearn.metrics import accuracy_score from timm.data import resolve_data_config from timm.data.transforms_factory import create_transform -from torch.ao.quantization.quantize_pt2e import convert_pt2e -from torch.ao.quantization.quantize_pt2e import prepare_pt2e +from torch.ao.quantization.quantize_pt2e import convert_pt2e, prepare_pt2e from torch.export import export from torch.export.exported_program import ExportedProgram from transformers import AutoModel -import nncf -from nncf.experimental.torch.fx.quantization.quantize_pt2e import quantize_pt2e - # Function to load a model based on the selected suite def load_model(suite: str, model_name: str): @@ -51,13 +49,23 @@ def load_model(suite: str, model_name: str): raise ValueError(msg) -def load_calibration_dataset(dataset_path: str, batch_size: int, suite: str, model: torch.nn.Module, model_name: str): +def load_calibration_dataset( + dataset_path: str, + batch_size: int, + suite: str, + model: torch.nn.Module, + model_name: str, +): val_dir = f"{dataset_path}/val" if suite == "torchvision": - transform = torchvision_models.get_model_weights(model_name).DEFAULT.transforms() + transform = torchvision_models.get_model_weights( + model_name + ).DEFAULT.transforms() elif suite == "timm": - transform = create_transform(**resolve_data_config(model.pretrained_cfg, model=model)) + transform = create_transform( + **resolve_data_config(model.pretrained_cfg, model=model) + ) else: msg = f"Validation is not supported yet for the suite {suite}" raise ValueError(msg) @@ -65,7 +73,11 @@ def load_calibration_dataset(dataset_path: str, batch_size: int, suite: str, mod val_dataset = datasets.ImageFolder(val_dir, transform=transform) calibration_dataset = torch.utils.data.DataLoader( - val_dataset, batch_size=batch_size, shuffle=False, num_workers=0, pin_memory=True + val_dataset, + batch_size=batch_size, + shuffle=False, + num_workers=0, + pin_memory=True, ) return calibration_dataset @@ -86,14 +98,18 @@ def dump_inputs(calibration_dataset, dest_path): def quantize_model( - captured_model: torch.fx.GraphModule, calibration_dataset: torch.utils.data.DataLoader, use_nncf: bool + captured_model: torch.fx.GraphModule, + calibration_dataset: torch.utils.data.DataLoader, + use_nncf: bool, ) -> torch.fx.GraphModule: quantizer = OpenVINOQuantizer() print("PTQ: Quantize the model") default_subset_size = 300 batch_size = calibration_dataset.batch_size - subset_size = (default_subset_size // batch_size) + int(default_subset_size % batch_size > 0) + subset_size = (default_subset_size // batch_size) + int( + default_subset_size % batch_size > 0 + ) def transform(x): return x[0] @@ -104,7 +120,9 @@ def transform(x): captured_model, quantizer, subset_size=subset_size, - calibration_dataset=nncf.Dataset(calibration_dataset, transform_func=transform), + calibration_dataset=nncf.Dataset( + calibration_dataset, transform_func=transform + ), fold_quantize=False, ) else: @@ -120,7 +138,9 @@ def transform(x): return quantized_model -def validate_model(model_file_name: str, calibration_dataset: torch.utils.data.DataLoader) -> float: +def validate_model( + model_file_name: str, calibration_dataset: torch.utils.data.DataLoader +) -> float: # 1: Dump inputs dest_path = Path("tmp_inputs") out_path = Path("tmp_outputs") @@ -171,7 +191,9 @@ def main( model = model.eval() if dataset_path: - calibration_dataset = load_calibration_dataset(dataset_path, batch_size, suite, model, model_name) + calibration_dataset = load_calibration_dataset( + dataset_path, batch_size, suite, model, model_name + ) input_shape = tuple(next(iter(calibration_dataset))[0].shape) print(f"Input shape retrieved from the model config: {input_shape}") # Ensure input_shape is a tuple @@ -196,14 +218,21 @@ def main( msg = "Quantization requires a calibration dataset." raise ValueError(msg) quantized_model = quantize_model( - aten_dialect.module(), calibration_dataset, use_nncf=quantization_flow == "nncf" + aten_dialect.module(), + calibration_dataset, + use_nncf=quantization_flow == "nncf", ) aten_dialect: ExportedProgram = export(quantized_model, example_args) # Convert to edge dialect and lower the module to the backend with a custom partitioner compile_spec = [CompileSpec("device", device.encode())] - lowered_module: EdgeProgramManager = to_edge_transform_and_lower(aten_dialect, partitioner=[OpenvinoPartitioner(compile_spec),]) + lowered_module: EdgeProgramManager = to_edge_transform_and_lower( + aten_dialect, + partitioner=[ + OpenvinoPartitioner(compile_spec), + ], + ) # Apply backend-specific passes exec_prog = lowered_module.to_executorch( @@ -230,7 +259,6 @@ def main( print(f"acc@1: {acc_top1}") - if __name__ == "__main__": # Argument parser for dynamic inputs parser = argparse.ArgumentParser(description="Export models with executorch.") @@ -241,7 +269,9 @@ def main( choices=["timm", "torchvision", "huggingface"], help="Select the model suite (timm, torchvision, huggingface).", ) - parser.add_argument("--model", type=str, required=True, help="Model name to be loaded.") + parser.add_argument( + "--model", type=str, required=True, help="Model name to be loaded." + ) parser.add_argument( "--input_shape", type=eval, @@ -254,7 +284,9 @@ def main( help="Batch size for the validation. Default batch_size == 1." " The dataset length must be evenly divisible by the batch size.", ) - parser.add_argument("--quantize", action="store_true", help="Enable model quantization.") + parser.add_argument( + "--quantize", action="store_true", help="Enable model quantization." + ) parser.add_argument( "--validate", action="store_true", diff --git a/examples/openvino/executor_runner/openvino_executor_runner.cpp b/examples/openvino/executor_runner/openvino_executor_runner.cpp index 17cc91ba3e9..92b358ef24d 100644 --- a/examples/openvino/executor_runner/openvino_executor_runner.cpp +++ b/examples/openvino/executor_runner/openvino_executor_runner.cpp @@ -29,14 +29,19 @@ static uint8_t method_allocator_pool[4 * 1024U * 1024U]; // 4 MB // Define command-line flags for model path, the number of iterations, input // list path, and output folder path -DEFINE_string(model_path, "", - "Path to the model serialized in flatbuffer format (required)."); +DEFINE_string( + model_path, + "", + "Path to the model serialized in flatbuffer format (required)."); DEFINE_int32(num_iter, 1, "Number of inference iterations (default is 1)."); -DEFINE_string(input_list_path, "", - "Path to the input list file which includes the list of raw " - "input tensor files (optional)."); DEFINE_string( - output_folder_path, "", + input_list_path, + "", + "Path to the input list file which includes the list of raw " + "input tensor files (optional)."); +DEFINE_string( + output_folder_path, + "", "Path to the output folder to save raw output tensor files (optional)."); using executorch::extension::FileDataLoader; @@ -54,13 +59,13 @@ using executorch::runtime::Span; using executorch::runtime::TensorInfo; std::function build_set_input_tensor( - Result &method, std::vector &inputs, + Result& method, + std::vector& inputs, const std::vector> input_paths) { return [&inputs, &method, input_paths](size_t idx) -> void { const MethodMeta method_meta = method->method_meta(); for (int input_index = 0; input_index < method->inputs_size(); ++input_index) { - Result tensor_meta = method_meta.input_tensor_meta(input_index); auto input_data_ptr = inputs[input_index].toTensor().data_ptr(); @@ -72,24 +77,26 @@ std::function build_set_input_tensor( ET_CHECK_MSG( file_size == tensor_meta->nbytes(), "Input(%d) size mismatch. file bytes: %zu, tensor bytes: %zu", - input_index, file_size, tensor_meta->nbytes()); + input_index, + file_size, + tensor_meta->nbytes()); fin.seekg(0, fin.beg); - fin.read(static_cast(input_data_ptr), file_size); + fin.read(static_cast(input_data_ptr), file_size); fin.close(); } }; } -std::function -build_dump_outputs(std::vector &outputs, const size_t output_size, - const std::string output_folder_path) { +std::function build_dump_outputs( + std::vector& outputs, + const size_t output_size, + const std::string output_folder_path) { return [&outputs, output_folder_path, output_size](size_t idx) -> void { for (size_t output_index = 0; output_index < output_size; output_index++) { auto output_tensor = outputs[output_index].toTensor(); auto output_file_name = output_folder_path + "/output_" + - std::to_string(idx) + "_" + - std::to_string(output_index) + ".raw"; + std::to_string(idx) + "_" + std::to_string(output_index) + ".raw"; std::ofstream fout(output_file_name.c_str(), std::ios::binary); fout.write(output_tensor.const_data_ptr(), output_tensor.nbytes()); fout.close(); @@ -97,23 +104,24 @@ build_dump_outputs(std::vector &outputs, const size_t output_size, }; } -std::vector> -get_inputs_paths(const char *input_list_path) { +std::vector> get_inputs_paths( + const char* input_list_path) { size_t idx = 0; - auto split_and_add_prefix = [](std::string s, std::string delimiter, std::string prefix = "") { - size_t pos_start = 0, pos_end, delim_len = delimiter.length(); - std::string token; - std::vector res; - - while ((pos_end = s.find(delimiter, pos_start)) != std::string::npos) { - token = s.substr(pos_start, pos_end - pos_start); - pos_start = pos_end + delim_len; - res.push_back(prefix + token); - } - res.push_back(prefix + s.substr(pos_start)); - return res; - }; + auto split_and_add_prefix = + [](std::string s, std::string delimiter, std::string prefix = "") { + size_t pos_start = 0, pos_end, delim_len = delimiter.length(); + std::string token; + std::vector res; + + while ((pos_end = s.find(delimiter, pos_start)) != std::string::npos) { + token = s.substr(pos_start, pos_end - pos_start); + pos_start = pos_end + delim_len; + res.push_back(prefix + token); + } + res.push_back(prefix + s.substr(pos_start)); + return res; + }; // Read raw input tensor file names from input list file and // iterate each raw input tensor file to read values @@ -124,7 +132,7 @@ get_inputs_paths(const char *input_list_path) { std::string inputs_dir = ""; size_t last_pos = std::string(input_list_path).rfind('/'); if (last_pos != std::string::npos) { - inputs_dir = std::string(input_list_path).substr(0, last_pos+1); + inputs_dir = std::string(input_list_path).substr(0, last_pos + 1); } std::string file_path; auto retval = std::vector>(); @@ -138,7 +146,7 @@ get_inputs_paths(const char *input_list_path) { return retval; } -int main(int argc, char **argv) { +int main(int argc, char** argv) { // Initialize the runtime environment executorch::runtime::runtime_init(); @@ -155,15 +163,17 @@ int main(int argc, char **argv) { } // Retrieve the model path and number of iterations - const char *model_path = FLAGS_model_path.c_str(); + const char* model_path = FLAGS_model_path.c_str(); int num_iterations = FLAGS_num_iter; std::cout << "Model path: " << model_path << std::endl; std::cout << "Number of iterations: " << num_iterations << std::endl; // Load the model using FileDataLoader Result loader = FileDataLoader::from(model_path); - ET_CHECK_MSG(loader.ok(), "FileDataLoader::from() failed: 0x%" PRIx32, - static_cast(loader.error())); + ET_CHECK_MSG( + loader.ok(), + "FileDataLoader::from() failed: 0x%" PRIx32, + static_cast(loader.error())); // Load the program from the loaded model Result program = Program::load(&loader.get()); @@ -175,7 +185,7 @@ int main(int argc, char **argv) { // Retrieve the method name from the program (assumes the first method is // used) - const char *method_name = nullptr; + const char* method_name = nullptr; { const auto method_name_result = program->get_method_name(0); ET_CHECK_MSG(method_name_result.ok(), "Program has no methods"); @@ -185,8 +195,11 @@ int main(int argc, char **argv) { // Retrieve metadata about the method Result method_meta = program->method_meta(method_name); - ET_CHECK_MSG(method_meta.ok(), "Failed to get method_meta for %s: 0x%" PRIx32, - method_name, static_cast(method_meta.error())); + ET_CHECK_MSG( + method_meta.ok(), + "Failed to get method_meta for %s: 0x%" PRIx32, + method_name, + static_cast(method_meta.error())); // Set up a memory allocator for the method MemoryAllocator method_allocator{ @@ -211,15 +224,19 @@ int main(int argc, char **argv) { // Load the method into the program Result method = program->load_method(method_name, &memory_manager); - ET_CHECK_MSG(method.ok(), - "Loading of method %s failed with status 0x%" PRIx32, - method_name, static_cast(method.error())); + ET_CHECK_MSG( + method.ok(), + "Loading of method %s failed with status 0x%" PRIx32, + method_name, + static_cast(method.error())); ET_LOG(Info, "Method loaded."); // Prepare the input tensors for the method auto method_inputs = prepare_input_tensors(*method); - ET_CHECK_MSG(method_inputs.ok(), "Could not prepare inputs: 0x%" PRIx32, - static_cast(method_inputs.error())); + ET_CHECK_MSG( + method_inputs.ok(), + "Could not prepare inputs: 0x%" PRIx32, + static_cast(method_inputs.error())); Error status = Error::Ok; std::vector inputs(method->inputs_size()); @@ -231,13 +248,17 @@ int main(int argc, char **argv) { // If the input path list is provided, read input tensors from the files std::function set_input_tensor; if (!FLAGS_input_list_path.empty()) { - const char *input_list_path = FLAGS_input_list_path.c_str(); - ET_LOG(Info, "Loading input tensors from the list provided in %s.", - input_list_path); + const char* input_list_path = FLAGS_input_list_path.c_str(); + ET_LOG( + Info, + "Loading input tensors from the list provided in %s.", + input_list_path); const auto input_paths = get_inputs_paths(input_list_path); num_iterations = input_paths.size(); - ET_LOG(Info, "Number of iters is set to the len of the inputs: %u.", - num_iterations); + ET_LOG( + Info, + "Number of iters is set to the len of the inputs: %u.", + num_iterations); set_input_tensor = build_set_input_tensor(method, inputs, input_paths); } else { @@ -256,11 +277,13 @@ int main(int argc, char **argv) { // If output folder path is provided, save output tensors // into raw tensor files. - const char *output_folder_path = FLAGS_output_folder_path.c_str(); - ET_LOG(Info, "Saving output tensors into the output folder: %s.", - output_folder_path); - dump_outputs = build_dump_outputs(outputs, outputs.size(), - std::string(output_folder_path)); + const char* output_folder_path = FLAGS_output_folder_path.c_str(); + ET_LOG( + Info, + "Saving output tensors into the output folder: %s.", + output_folder_path); + dump_outputs = build_dump_outputs( + outputs, outputs.size(), std::string(output_folder_path)); } else { dump_outputs = [](size_t idx) {}; @@ -280,17 +303,22 @@ int main(int argc, char **argv) { double elapsed_time = std::chrono::duration_cast( after_exec - before_exec) .count() / - 1000.0; + 1000.0; total_time_elapsed += elapsed_time; } // Log execution time and average time per iteration - ET_LOG(Info, "%d inference took %f ms, avg %f ms", num_iterations, - total_time_elapsed, - total_time_elapsed / static_cast(num_iterations)); - ET_CHECK_MSG(status == Error::Ok, - "Execution of method %s failed with status 0x%" PRIx32, - method_name, static_cast(status)); + ET_LOG( + Info, + "%d inference took %f ms, avg %f ms", + num_iterations, + total_time_elapsed, + total_time_elapsed / static_cast(num_iterations)); + ET_CHECK_MSG( + status == Error::Ok, + "Execution of method %s failed with status 0x%" PRIx32, + method_name, + static_cast(status)); ET_LOG(Info, "Model executed successfully."); return 0; From ae661a18e2e623f3a850e3d1d7cd5cae64d3728a Mon Sep 17 00:00:00 2001 From: Cavus Mustafa Date: Tue, 18 Feb 2025 12:03:15 -0800 Subject: [PATCH 076/188] Fix for input path bug in validation --- examples/openvino/aot/aot_openvino_compiler.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/examples/openvino/aot/aot_openvino_compiler.py b/examples/openvino/aot/aot_openvino_compiler.py index 8bf1d4a1e88..ddcf43b9658 100644 --- a/examples/openvino/aot/aot_openvino_compiler.py +++ b/examples/openvino/aot/aot_openvino_compiler.py @@ -88,10 +88,11 @@ def dump_inputs(calibration_dataset, dest_path): for idx, data in enumerate(calibration_dataset): feature, target = data targets.extend(target) - file_name = f"{dest_path}/input_{idx}_0.raw" + file_name = f"input_{idx}_0.raw" + file_path = f"{dest_path}/{file_name}" if not isinstance(feature, torch.Tensor): feature = torch.tensor(feature) - feature.detach().numpy().tofile(file_name) + feature.detach().numpy().tofile(file_path) input_files.append(file_name) return input_files, targets From 9de858e15f88fa38a88a139a7ae2678bd0abe74f Mon Sep 17 00:00:00 2001 From: Cavus Mustafa Date: Tue, 18 Feb 2025 14:05:48 -0800 Subject: [PATCH 077/188] Code formatting --- backends/openvino/partitioner.py | 3 --- backends/openvino/preprocess.py | 11 +---------- .../openvino/tests/ops/base_openvino_op_test.py | 15 ++++++++------- backends/openvino/tests/ops/test_pooling.py | 3 --- examples/openvino/aot/aot_openvino_compiler.py | 2 +- 5 files changed, 10 insertions(+), 24 deletions(-) diff --git a/backends/openvino/partitioner.py b/backends/openvino/partitioner.py index f07be7c8410..9acde3dff5e 100644 --- a/backends/openvino/partitioner.py +++ b/backends/openvino/partitioner.py @@ -7,7 +7,6 @@ from typing import Callable, final, List, Optional, Tuple import torch -import torch.fx as fx from executorch.backends.openvino.preprocess import OpenvinoBackend from executorch.exir.backend.backend_details import CompileSpec from executorch.exir.backend.partitioner import ( @@ -89,8 +88,6 @@ def ops_to_not_decompose( return (ops_not_decompose, None) def partition(self, exported_program: ExportedProgram) -> PartitionResult: - gm = fx.symbolic_trace(exported_program.graph_module) - partitioner = CapabilityBasedPartitioner( exported_program.graph_module, OpenvinoOperatorsSupport(self._op_types_to_skip, self._op_names_to_skip), diff --git a/backends/openvino/preprocess.py b/backends/openvino/preprocess.py index 057702b162e..930904d8f90 100644 --- a/backends/openvino/preprocess.py +++ b/backends/openvino/preprocess.py @@ -4,12 +4,8 @@ # except in compliance with the License. See the license file in the root # directory of this source tree for more details. -import contextlib -import struct +from typing import final, List -from typing import cast, final, List - -import torch from executorch.exir.backend.backend_details import ( BackendDetails, ExportedProgram, @@ -27,17 +23,12 @@ def preprocess( cls, edge_program: ExportedProgram, module_compile_spec: List[CompileSpec] ) -> PreprocessResult: - name_to_node_mappings = {node.name: node for node in edge_program.graph.nodes} input_names = edge_program.graph_signature.user_inputs - output_names = edge_program.graph_signature.user_outputs args = [] for node in edge_program.graph.nodes: if node.target in input_names: args.append(node.meta["val"]) - input_shapes = [] - output_shapes = [] - compile_options = {} for spec in module_compile_spec: compile_options[spec.key] = spec.value.decode() diff --git a/backends/openvino/tests/ops/base_openvino_op_test.py b/backends/openvino/tests/ops/base_openvino_op_test.py index 8b92fc8bc17..4a18b2995f8 100644 --- a/backends/openvino/tests/ops/base_openvino_op_test.py +++ b/backends/openvino/tests/ops/base_openvino_op_test.py @@ -9,7 +9,7 @@ import torch from executorch.backends.openvino.partitioner import OpenvinoPartitioner from executorch.backends.openvino.preprocess import OpenvinoBackend -from executorch.exir import EdgeProgramManager, to_edge +from executorch.exir import EdgeProgramManager, to_edge_transform_and_lower from executorch.exir.backend.backend_details import CompileSpec from torch.export import export, ExportedProgram @@ -33,13 +33,14 @@ def execute_layer_test( # Export to aten dialect using torch.export aten_dialect: ExportedProgram = export(module, sample_inputs) - # Convert to edge dialect - edge_program: EdgeProgramManager = to_edge(aten_dialect) - to_be_lowered_module = edge_program.exported_program() - - # Lower the module to the backend with a custom partitioner + # Convert to edge dialect and lower the module to the backend with a custom partitioner compile_spec = [CompileSpec("device", self.device.encode())] - lowered_module = edge_program.to_backend(OpenvinoPartitioner(compile_spec)) + lowered_module: EdgeProgramManager = to_edge_transform_and_lower( + aten_dialect, + partitioner=[ + OpenvinoPartitioner(compile_spec), + ], + ) # Apply backend-specific passes exec_prog = lowered_module.to_executorch( diff --git a/backends/openvino/tests/ops/test_pooling.py b/backends/openvino/tests/ops/test_pooling.py index 0315c8fc8b8..bc42b52faaa 100644 --- a/backends/openvino/tests/ops/test_pooling.py +++ b/backends/openvino/tests/ops/test_pooling.py @@ -78,9 +78,6 @@ def forward(self, x): def test_pooling2d(self): for params in d2_params: with self.subTest(params=params): - bias_shape = None - if "bias_shape" in params: - bias_shape = params["bias_shape"] module = self.create_model( op_type="MaxPool2D", kernel_size=params["kernel_size"], diff --git a/examples/openvino/aot/aot_openvino_compiler.py b/examples/openvino/aot/aot_openvino_compiler.py index ddcf43b9658..f77ddcb75c7 100644 --- a/examples/openvino/aot/aot_openvino_compiler.py +++ b/examples/openvino/aot/aot_openvino_compiler.py @@ -17,7 +17,6 @@ import numpy as np import timm import torch -import torchvision.datasets as datasets import torchvision.models as torchvision_models from executorch.backends.openvino import OpenVINOQuantizer from executorch.backends.openvino.partitioner import OpenvinoPartitioner @@ -30,6 +29,7 @@ from torch.ao.quantization.quantize_pt2e import convert_pt2e, prepare_pt2e from torch.export import export from torch.export.exported_program import ExportedProgram +from torchvision import datasets from transformers import AutoModel From 88f482382cba14d6470057a99afdf2ccaf78c122 Mon Sep 17 00:00:00 2001 From: Cavus Mustafa Date: Tue, 18 Feb 2025 14:54:26 -0800 Subject: [PATCH 078/188] Disable openvino build by default --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index e9fae7ab18f..61c4ee64a9a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -204,7 +204,7 @@ option(EXECUTORCH_BUILD_MPS "Build the MPS backend" OFF) option(EXECUTORCH_BUILD_NEURON "Build the backends/mediatek directory" OFF) -option(EXECUTORCH_BUILD_OPENVINO "Build the Openvino backend" ON) +option(EXECUTORCH_BUILD_OPENVINO "Build the Openvino backend" OFF) option(EXECUTORCH_BUILD_PYBIND "Build the Python Bindings" OFF) From 685ceb1e68beb742eea81d96da0334e46ed07fee Mon Sep 17 00:00:00 2001 From: Cavus Mustafa Date: Tue, 18 Feb 2025 15:07:59 -0800 Subject: [PATCH 079/188] Enabled logging for example --- examples/openvino/openvino_build_example.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/examples/openvino/openvino_build_example.sh b/examples/openvino/openvino_build_example.sh index ee16658941d..c1b6224ec21 100755 --- a/examples/openvino/openvino_build_example.sh +++ b/examples/openvino/openvino_build_example.sh @@ -24,6 +24,7 @@ main() { -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \ -DEXECUTORCH_BUILD_EXTENSION_RUNNER_UTIL=ON \ -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \ + -DEXECUTORCH_ENABLE_LOGGING=ON \ -B"${build_dir}" From f1db517079253408d6211803d9ece5a95f8c4e86 Mon Sep 17 00:00:00 2001 From: Mustafa Cavus Date: Tue, 18 Feb 2025 15:02:55 -0800 Subject: [PATCH 080/188] Update openvino commit id --- backends/openvino/README.md | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/backends/openvino/README.md b/backends/openvino/README.md index 4141588aeae..368c25e228d 100644 --- a/backends/openvino/README.md +++ b/backends/openvino/README.md @@ -47,12 +47,11 @@ executorch Before you begin, ensure you have openvino installed and configured on your system: -## TODO: Update with the openvino commit/Release tag once the changes in OpenVINO are merged ## TODO: Add instructions for support with OpenVINO release package ```bash -git clone -b executorch_ov_backend https://github.com/ynimmaga/openvino -cd openvino +git clone https://github.com/openvinotoolkit/openvino.git +cd openvino && git checkout 20ad7cb git submodule update --init --recursive mkdir build cd build From 44b897924c45b8e57e3d8d7abc9ea48e8a62d1ae Mon Sep 17 00:00:00 2001 From: Mustafa Cavus Date: Tue, 18 Feb 2025 15:05:18 -0800 Subject: [PATCH 081/188] Update build-run-openvino.md --- docs/source/build-run-openvino.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/source/build-run-openvino.md b/docs/source/build-run-openvino.md index 4cc15b05edb..586a1b27fe8 100644 --- a/docs/source/build-run-openvino.md +++ b/docs/source/build-run-openvino.md @@ -72,8 +72,8 @@ Before you begin, ensure you have openvino installed and configured on your syst #### TODO: Add instructions for support with OpenVINO release package ```bash -git clone -b executorch_ov_backend https://github.com/ynimmaga/openvino -cd openvino +git clone https://github.com/openvinotoolkit/openvino.git +cd openvino && git checkout 20ad7cb git submodule update --init --recursive mkdir build cd build From b7d302e6ae73e92d7cb27eeb34df3087b2345216 Mon Sep 17 00:00:00 2001 From: Mustafa Cavus Date: Tue, 18 Feb 2025 15:05:54 -0800 Subject: [PATCH 082/188] Update build-run-openvino.md --- docs/source/build-run-openvino.md | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/source/build-run-openvino.md b/docs/source/build-run-openvino.md index 586a1b27fe8..2bcc3e6007d 100644 --- a/docs/source/build-run-openvino.md +++ b/docs/source/build-run-openvino.md @@ -68,7 +68,6 @@ executorch Before you begin, ensure you have openvino installed and configured on your system: -#### TODO: Update with the openvino commit/Release tag once the changes in OpenVINO are merged #### TODO: Add instructions for support with OpenVINO release package ```bash From d3daf538ab7c9e5959d66e873aac71b27c9259f1 Mon Sep 17 00:00:00 2001 From: Mustafa Cavus Date: Tue, 18 Feb 2025 15:07:36 -0800 Subject: [PATCH 083/188] Update build-run-openvino.md --- docs/source/build-run-openvino.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/docs/source/build-run-openvino.md b/docs/source/build-run-openvino.md index 2bcc3e6007d..142f872f2a0 100644 --- a/docs/source/build-run-openvino.md +++ b/docs/source/build-run-openvino.md @@ -74,8 +74,7 @@ Before you begin, ensure you have openvino installed and configured on your syst git clone https://github.com/openvinotoolkit/openvino.git cd openvino && git checkout 20ad7cb git submodule update --init --recursive -mkdir build -cd build +mkdir build && cd build cmake .. -DCMAKE_BUILD_TYPE=Release -DENABLE_PYTHON=ON make -j From c159421c70f32765d4d4ec8341a301922fbafe92 Mon Sep 17 00:00:00 2001 From: Mustafa Cavus Date: Tue, 18 Feb 2025 15:08:26 -0800 Subject: [PATCH 084/188] Update README.md --- backends/openvino/README.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/backends/openvino/README.md b/backends/openvino/README.md index 368c25e228d..da1d501b5c2 100644 --- a/backends/openvino/README.md +++ b/backends/openvino/README.md @@ -53,8 +53,7 @@ Before you begin, ensure you have openvino installed and configured on your syst git clone https://github.com/openvinotoolkit/openvino.git cd openvino && git checkout 20ad7cb git submodule update --init --recursive -mkdir build -cd build +mkdir build && cd build cmake .. -DCMAKE_BUILD_TYPE=Release -DENABLE_PYTHON=ON make -j From f4d2ce30998e7f16d0a1f821f800c615a71c9f85 Mon Sep 17 00:00:00 2001 From: ynimmaga Date: Tue, 18 Feb 2025 15:27:52 -0800 Subject: [PATCH 085/188] Added function comments with inputs and outputs --- backends/openvino/partitioner.py | 32 +++++++++- backends/openvino/preprocess.py | 9 +++ .../openvino/aot/aot_openvino_compiler.py | 60 +++++++++++++++++++ 3 files changed, 100 insertions(+), 1 deletion(-) diff --git a/backends/openvino/partitioner.py b/backends/openvino/partitioner.py index f07be7c8410..2360305735d 100644 --- a/backends/openvino/partitioner.py +++ b/backends/openvino/partitioner.py @@ -30,6 +30,12 @@ def __init__( op_types_to_skip: Optional[set] = None, op_names_to_skip: Optional[set] = None, ) -> None: + """ + Initializes the OpenvinoOperatorsSupport class. + + :param op_types_to_skip: A set of operator types to skip during support checking. + :param op_names_to_skip: A set of operator names to skip during support checking. + """ if op_types_to_skip is None: op_types_to_skip = set() if op_names_to_skip is None: @@ -39,6 +45,12 @@ def __init__( self._op_names_to_skip = op_names_to_skip def is_node_supported(self, _, node: torch.fx.Node) -> bool: + """ + Checks if a given node is supported by OpenVINO. + + :param node: The FX graph node representing an operation. + :return: True if the node is supported, otherwise False. + """ if node.op != "call_function": return False @@ -71,6 +83,13 @@ def __init__( op_types_to_skip: Optional[set] = None, op_names_to_skip: Optional[set] = None, ) -> None: + """ + Initializes the OpenvinoPartitioner class. + + :param compile_spec: A list of compile specifications for OpenVINO. + :param op_types_to_skip: A set of operator types to skip during partitioning. + :param op_names_to_skip: A set of operator names to skip during partitioning. + """ self.delegation_spec = DelegationSpec(OpenvinoBackend.__name__, compile_spec) self._op_types_to_skip = op_types_to_skip self._op_names_to_skip = op_names_to_skip @@ -79,6 +98,13 @@ def ops_to_not_decompose( self, ep: ExportedProgram, ) -> Tuple[List[torch._ops.OpOverload], Optional[Callable[[torch.fx.Node], bool]]]: + """ + Returns a tuple containing a list of operations that should not be decomposed + and an optional function to filter nodes. + + :param ep: The exported program. + :return: A tuple consisting of a list of ops to keep and an optional filtering function. + """ ops_not_decompose = [ torch.ops.aten.pixel_shuffle.default, torch.ops.aten.upsample_bilinear2d.default, @@ -89,8 +115,12 @@ def ops_to_not_decompose( return (ops_not_decompose, None) def partition(self, exported_program: ExportedProgram) -> PartitionResult: - gm = fx.symbolic_trace(exported_program.graph_module) + """ + Partitions an exported program into supported and unsupported segments. + :param exported_program: The exported program. + :return: A PartitionResult containing the partitioned graph and delegation tags. + """ partitioner = CapabilityBasedPartitioner( exported_program.graph_module, OpenvinoOperatorsSupport(self._op_types_to_skip, self._op_names_to_skip), diff --git a/backends/openvino/preprocess.py b/backends/openvino/preprocess.py index 057702b162e..9538560805b 100644 --- a/backends/openvino/preprocess.py +++ b/backends/openvino/preprocess.py @@ -26,7 +26,16 @@ class OpenvinoBackend(BackendDetails): def preprocess( cls, edge_program: ExportedProgram, module_compile_spec: List[CompileSpec] ) -> PreprocessResult: + """ + Preprocesses the exported program and compiles it for the OpenVINO backend. + Args: + edge_program (ExportedProgram): The exported program representing the model. + module_compile_spec (List[CompileSpec]): A list of compile specifications for the OpenVINO backend. + + Returns: + PreprocessResult: The result of preprocessing, including the compiled model bytes. + """ name_to_node_mappings = {node.name: node for node in edge_program.graph.nodes} input_names = edge_program.graph_signature.user_inputs output_names = edge_program.graph_signature.user_outputs diff --git a/examples/openvino/aot/aot_openvino_compiler.py b/examples/openvino/aot/aot_openvino_compiler.py index 8bf1d4a1e88..0ace2df30c1 100644 --- a/examples/openvino/aot/aot_openvino_compiler.py +++ b/examples/openvino/aot/aot_openvino_compiler.py @@ -35,6 +35,17 @@ # Function to load a model based on the selected suite def load_model(suite: str, model_name: str): + """ + Loads a pre-trained model from the specified model suite. + + :param suite: The suite from which to load the model. Supported values are: + - "timm": Uses `timm.create_model` to load the model. + - "torchvision": Loads a model from `torchvision.models`. Raises an error if the model does not exist. + - "huggingface": Loads a transformer model using `AutoModel.from_pretrained`. + :param model_name: The name of the model to load. + :return: The loaded model instance. + :raises ValueError: If the specified model suite is unsupported or the model is not found. + """ if suite == "timm": return timm.create_model(model_name, pretrained=True) elif suite == "torchvision": @@ -56,6 +67,19 @@ def load_calibration_dataset( model: torch.nn.Module, model_name: str, ): + """ + Loads a calibration dataset for model quantization. + + :param dataset_path: Path to the dataset directory. + :param batch_size: Number of samples per batch. + :param suite: The model suite used for preprocessing transformations. Supported values are: + - "torchvision": Uses predefined transformations for torchvision models. + - "timm": Uses dataset transformations based on the model's pretrained configuration. + :param model: The model instance, required for timm transformation resolution. + :param model_name: The model name, required for torchvision transformations. + :return: A DataLoader instance for the calibration dataset. + :raises ValueError: If the suite is unsupported for validation. + """ val_dir = f"{dataset_path}/val" if suite == "torchvision": @@ -84,6 +108,13 @@ def load_calibration_dataset( def dump_inputs(calibration_dataset, dest_path): + """ + Dumps the input data from a calibration dataset to raw files. + + :param calibration_dataset: The dataset containing calibration inputs. + :param dest_path: The destination directory to save the raw input files. + :return: A tuple containing a list of input file paths and the corresponding target labels. + """ input_files, targets = [], [] for idx, data in enumerate(calibration_dataset): feature, target = data @@ -102,6 +133,14 @@ def quantize_model( calibration_dataset: torch.utils.data.DataLoader, use_nncf: bool, ) -> torch.fx.GraphModule: + """ + Quantizes a model using either NNCF-based or PTQ-based quantization. + + :param captured_model: The model to be quantized, represented as a torch.fx.GraphModule. + :param calibration_dataset: A DataLoader containing calibration data for quantization. + :param use_nncf: Whether to use NNCF-based quantization (True) or standard PTQ (False). + :return: The quantized model as a torch.fx.GraphModule. + """ quantizer = OpenVINOQuantizer() print("PTQ: Quantize the model") @@ -141,6 +180,13 @@ def transform(x): def validate_model( model_file_name: str, calibration_dataset: torch.utils.data.DataLoader ) -> float: + """ + Validates the model using the calibration dataset. + + :param model_file_name: The path to the quantized model file. + :param calibration_dataset: A DataLoader containing calibration data. + :return: The accuracy score of the model. + """ # 1: Dump inputs dest_path = Path("tmp_inputs") out_path = Path("tmp_outputs") @@ -186,6 +232,20 @@ def main( batch_size: int, quantization_flow: str, ): + """ + Main function to load, quantize, and validate a model. + + :param suite: The model suite to use (e.g., "timm", "torchvision", "huggingface"). + :param model_name: The name of the model to load. + :param input_shape: The input shape for the model. + :param quantize: Whether to quantize the model. + :param validate: Whether to validate the model. + :param dataset_path: Path to the dataset for calibration/validation. + :param device: The device to run the model on (e.g., "cpu", "gpu"). + :param batch_size: Batch size for dataset loading. + :param quantization_flow: The quantization method to use. + """ + # Load the selected model model = load_model(suite, model_name) model = model.eval() From 1a74ed94aad4568ad2bb7d08c80d459a9358c524 Mon Sep 17 00:00:00 2001 From: Cavus Mustafa Date: Wed, 19 Feb 2025 09:58:17 -0800 Subject: [PATCH 086/188] nncf requirement syntax updated --- backends/openvino/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backends/openvino/requirements.txt b/backends/openvino/requirements.txt index f00257127a3..91f0c0e802f 100644 --- a/backends/openvino/requirements.txt +++ b/backends/openvino/requirements.txt @@ -6,4 +6,4 @@ tokenizers transformers piq pillow -nncf @ https://github.com/openvinotoolkit/nncf.git +git+https://github.com/openvinotoolkit/nncf@191b53d#egg=nncf From 2baa31630ac0b543e6733ff671866eef276bb672 Mon Sep 17 00:00:00 2001 From: Mustafa Cavus Date: Wed, 19 Feb 2025 10:01:28 -0800 Subject: [PATCH 087/188] Update README.md --- backends/openvino/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backends/openvino/README.md b/backends/openvino/README.md index da1d501b5c2..4d125ad155c 100644 --- a/backends/openvino/README.md +++ b/backends/openvino/README.md @@ -74,7 +74,7 @@ Follow the steps below to setup your build environment: ```bash pip install -r requirements.txt ``` - + Note: To achieve optimal performance with NNCF quantization, you should install the latest development version of NNCF (version 2.16.0.dev0+191b53d9 or higher). 3. Navigate to `scripts/` directory. 4. **Build OpenVINO Backend**: Once the prerequisites are in place, run the `openvino_build.sh` script to start the build process, OpenVINO backend will be built under `cmake-openvino-out/backends/openvino/` as `libopenvino_backend.so` From 4b55597d3b18deb72f719ee6a34618401cd2b99b Mon Sep 17 00:00:00 2001 From: Cavus Mustafa Date: Thu, 20 Feb 2025 16:48:21 -0800 Subject: [PATCH 088/188] Build updates for openvino backend --- backends/openvino/CMakeLists.txt | 34 ++++++-------------------------- examples/openvino/CMakeLists.txt | 4 ---- 2 files changed, 6 insertions(+), 32 deletions(-) diff --git a/backends/openvino/CMakeLists.txt b/backends/openvino/CMakeLists.txt index 017da3d92fe..0cc70bcc384 100644 --- a/backends/openvino/CMakeLists.txt +++ b/backends/openvino/CMakeLists.txt @@ -28,44 +28,22 @@ set(COMMON_INCLUDE_DIRS ${EXECUTORCH_ROOT}/..) # Include utility CMake scripts from ExecuteTorch include(${EXECUTORCH_ROOT}/build/Utils.cmake) -# Set OpenVINO directory from environment variable -set(OPENVINO_DIR "$ENV{INTEL_OPENVINO_DIR}") -if(NOT OPENVINO_DIR) - message(FATAL_ERROR "ERROR: INTEL_OPENVINO_DIR environment variable is not set.") -endif() - -# Set OpenVINO include directories -set(OPENVINO_INCLUDE_DIRS - ${OPENVINO_DIR}/runtime/include - ${OPENVINO_DIR}/deployment_tools/inference_engine/include -) - -# Set OpenVINO library path -set(OPENVINO_LIB_PATH ${OPENVINO_DIR}/runtime/lib/intel64) - -# Try to locate OpenVINO automatically -find_library(OPENVINO_LIB NAMES openvino PATHS ${OPENVINO_LIB_PATH} NO_DEFAULT_PATH) -if(NOT OPENVINO_LIB) - message(FATAL_ERROR "ERROR: OpenVINO library (libopenvino.so) not found in ${OPENVINO_LIB_PATH}") -endif() +find_package(OpenVINO REQUIRED) # Define OpenVINO backend as a shared library -add_library(openvino_backend SHARED) +add_library(openvino_backend SHARED .) # Enable exceptions and RTTI for OpenVINO backend target_compile_options(openvino_backend PRIVATE -frtti -fexceptions) -# Include ExecuteTorch and OpenVINO directories -target_include_directories(openvino_backend PUBLIC ${COMMON_INCLUDE_DIRS} ${OPENVINO_INCLUDE_DIRS}) +# Include ExecuteTorch directories +target_include_directories(openvino_backend PUBLIC ${COMMON_INCLUDE_DIRS}) # Link OpenVINO and ExecuteTorch core libraries -target_link_libraries(openvino_backend PRIVATE ${OPENVINO_LIB} executorch_core) +target_link_libraries(openvino_backend PRIVATE openvino::runtime executorch_core) # Add source files for OpenVINO backend target_sources(openvino_backend PRIVATE ${CMAKE_CURRENT_LIST_DIR}/runtime/OpenvinoBackend.cpp) -# Set runtime library path for OpenVINO -target_link_options(openvino_backend PRIVATE -Wl,-rpath=${OPENVINO_LIB_PATH}) - -# Install OpenVINO backend library to the lib directory +## Install OpenVINO backend library to the lib directory install(TARGETS openvino_backend DESTINATION lib) diff --git a/examples/openvino/CMakeLists.txt b/examples/openvino/CMakeLists.txt index bce54af1b4a..af68880d426 100644 --- a/examples/openvino/CMakeLists.txt +++ b/examples/openvino/CMakeLists.txt @@ -30,10 +30,6 @@ endif() set(_common_compile_options -Wno-deprecated-declarations -fPIC) set(_common_include_directories ${EXECUTORCH_ROOT}/..) -set(EXECUTORCH_SRCS_FILE "${CMAKE_CURRENT_BINARY_DIR}/../../../build/executorch_srcs.cmake") -extract_sources(${EXECUTORCH_SRCS_FILE}) -include(${EXECUTORCH_SRCS_FILE}) - set(_openvino_executor_runner__srcs ${CMAKE_CURRENT_LIST_DIR}/../openvino/executor_runner/openvino_executor_runner.cpp ) From 1a928d1adb9df63dd4737c5b071c1ad667d9bec1 Mon Sep 17 00:00:00 2001 From: Cavus Mustafa Date: Thu, 20 Feb 2025 17:15:33 -0800 Subject: [PATCH 089/188] Removed executorch_srcs.cmake file --- build/executorch_srcs.cmake | 448 ------------------------------------ 1 file changed, 448 deletions(-) delete mode 100644 build/executorch_srcs.cmake diff --git a/build/executorch_srcs.cmake b/build/executorch_srcs.cmake deleted file mode 100644 index a44fe650da2..00000000000 --- a/build/executorch_srcs.cmake +++ /dev/null @@ -1,448 +0,0 @@ -# @generated by extract_sources.py - -set(_executorch__srcs - kernels/prim_ops/et_copy_index.cpp - kernels/prim_ops/et_view.cpp - kernels/prim_ops/register_prim_ops.cpp -) - -set(_executorch_core__srcs - runtime/backend/interface.cpp - runtime/core/evalue.cpp - runtime/core/exec_aten/util/tensor_util_portable.cpp - runtime/core/portable_type/tensor_impl.cpp - runtime/executor/method.cpp - runtime/executor/method_meta.cpp - runtime/executor/program.cpp - runtime/executor/tensor_parser_exec_aten.cpp - runtime/executor/tensor_parser_portable.cpp - runtime/kernel/operator_registry.cpp - runtime/platform/abort.cpp - runtime/platform/default/posix.cpp - runtime/platform/log.cpp - runtime/platform/profiler.cpp - runtime/platform/runtime.cpp - schema/extended_header.cpp -) - -set(_portable_kernels__srcs - kernels/portable/cpu/op__to_dim_order_copy.cpp - kernels/portable/cpu/op_abs.cpp - kernels/portable/cpu/op_acos.cpp - kernels/portable/cpu/op_acosh.cpp - kernels/portable/cpu/op_add.cpp - kernels/portable/cpu/op_addmm.cpp - kernels/portable/cpu/op_alias_copy.cpp - kernels/portable/cpu/op_allclose.cpp - kernels/portable/cpu/op_amax.cpp - kernels/portable/cpu/op_amin.cpp - kernels/portable/cpu/op_any.cpp - kernels/portable/cpu/op_arange.cpp - kernels/portable/cpu/op_argmax.cpp - kernels/portable/cpu/op_argmin.cpp - kernels/portable/cpu/op_as_strided_copy.cpp - kernels/portable/cpu/op_asin.cpp - kernels/portable/cpu/op_asinh.cpp - kernels/portable/cpu/op_atan.cpp - kernels/portable/cpu/op_atan2.cpp - kernels/portable/cpu/op_atanh.cpp - kernels/portable/cpu/op_avg_pool2d.cpp - kernels/portable/cpu/op_bitwise_and.cpp - kernels/portable/cpu/op_bitwise_not.cpp - kernels/portable/cpu/op_bitwise_or.cpp - kernels/portable/cpu/op_bitwise_xor.cpp - kernels/portable/cpu/op_bmm.cpp - kernels/portable/cpu/op_cat.cpp - kernels/portable/cpu/op_cdist_forward.cpp - kernels/portable/cpu/op_ceil.cpp - kernels/portable/cpu/op_clamp.cpp - kernels/portable/cpu/op_clone.cpp - kernels/portable/cpu/op_constant_pad_nd.cpp - kernels/portable/cpu/op_convolution.cpp - kernels/portable/cpu/op_convolution_backward.cpp - kernels/portable/cpu/op_copy.cpp - kernels/portable/cpu/op_cos.cpp - kernels/portable/cpu/op_cosh.cpp - kernels/portable/cpu/op_cumsum.cpp - kernels/portable/cpu/op_detach_copy.cpp - kernels/portable/cpu/op_diagonal_copy.cpp - kernels/portable/cpu/op_div.cpp - kernels/portable/cpu/op_embedding.cpp - kernels/portable/cpu/op_empty.cpp - kernels/portable/cpu/op_eq.cpp - kernels/portable/cpu/op_erf.cpp - kernels/portable/cpu/op_exp.cpp - kernels/portable/cpu/op_expand_copy.cpp - kernels/portable/cpu/op_expm1.cpp - kernels/portable/cpu/op_fill.cpp - kernels/portable/cpu/op_flip.cpp - kernels/portable/cpu/op_floor.cpp - kernels/portable/cpu/op_floor_divide.cpp - kernels/portable/cpu/op_fmod.cpp - kernels/portable/cpu/op_full.cpp - kernels/portable/cpu/op_full_like.cpp - kernels/portable/cpu/op_gather.cpp - kernels/portable/cpu/op_ge.cpp - kernels/portable/cpu/op_gelu.cpp - kernels/portable/cpu/op_glu.cpp - kernels/portable/cpu/op_gt.cpp - kernels/portable/cpu/op_hardtanh.cpp - kernels/portable/cpu/op_index.cpp - kernels/portable/cpu/op_index_put.cpp - kernels/portable/cpu/op_index_select.cpp - kernels/portable/cpu/op_isinf.cpp - kernels/portable/cpu/op_isnan.cpp - kernels/portable/cpu/op_le.cpp - kernels/portable/cpu/op_leaky_relu.cpp - kernels/portable/cpu/op_lift_fresh_copy.cpp - kernels/portable/cpu/op_linear_scratch_example.cpp - kernels/portable/cpu/op_log.cpp - kernels/portable/cpu/op_log10.cpp - kernels/portable/cpu/op_log1p.cpp - kernels/portable/cpu/op_log2.cpp - kernels/portable/cpu/op_log_softmax.cpp - kernels/portable/cpu/op_logical_and.cpp - kernels/portable/cpu/op_logical_not.cpp - kernels/portable/cpu/op_logical_or.cpp - kernels/portable/cpu/op_logical_xor.cpp - kernels/portable/cpu/op_logit.cpp - kernels/portable/cpu/op_lt.cpp - kernels/portable/cpu/op_masked_fill.cpp - kernels/portable/cpu/op_masked_scatter.cpp - kernels/portable/cpu/op_max.cpp - kernels/portable/cpu/op_max_pool2d_with_indices.cpp - kernels/portable/cpu/op_maximum.cpp - kernels/portable/cpu/op_mean.cpp - kernels/portable/cpu/op_min.cpp - kernels/portable/cpu/op_minimum.cpp - kernels/portable/cpu/op_mm.cpp - kernels/portable/cpu/op_mul.cpp - kernels/portable/cpu/op_narrow_copy.cpp - kernels/portable/cpu/op_native_batch_norm.cpp - kernels/portable/cpu/op_native_group_norm.cpp - kernels/portable/cpu/op_native_layer_norm.cpp - kernels/portable/cpu/op_ne.cpp - kernels/portable/cpu/op_neg.cpp - kernels/portable/cpu/op_nonzero.cpp - kernels/portable/cpu/op_ones.cpp - kernels/portable/cpu/op_pdist_forward.cpp - kernels/portable/cpu/op_permute_copy.cpp - kernels/portable/cpu/op_pixel_shuffle.cpp - kernels/portable/cpu/op_pixel_unshuffle.cpp - kernels/portable/cpu/op_pow.cpp - kernels/portable/cpu/op_prod.cpp - kernels/portable/cpu/op_reciprocal.cpp - kernels/portable/cpu/op_reflection_pad1d.cpp - kernels/portable/cpu/op_reflection_pad2d.cpp - kernels/portable/cpu/op_reflection_pad3d.cpp - kernels/portable/cpu/op_relu.cpp - kernels/portable/cpu/op_remainder.cpp - kernels/portable/cpu/op_repeat.cpp - kernels/portable/cpu/op_replication_pad1d.cpp - kernels/portable/cpu/op_replication_pad2d.cpp - kernels/portable/cpu/op_replication_pad3d.cpp - kernels/portable/cpu/op_roll.cpp - kernels/portable/cpu/op_round.cpp - kernels/portable/cpu/op_rsqrt.cpp - kernels/portable/cpu/op_rsub.cpp - kernels/portable/cpu/op_scalar_tensor.cpp - kernels/portable/cpu/op_scatter.cpp - kernels/portable/cpu/op_scatter_add.cpp - kernels/portable/cpu/op_select_copy.cpp - kernels/portable/cpu/op_select_scatter.cpp - kernels/portable/cpu/op_sigmoid.cpp - kernels/portable/cpu/op_sign.cpp - kernels/portable/cpu/op_sin.cpp - kernels/portable/cpu/op_sinh.cpp - kernels/portable/cpu/op_slice_copy.cpp - kernels/portable/cpu/op_slice_scatter.cpp - kernels/portable/cpu/op_softmax.cpp - kernels/portable/cpu/op_split_copy.cpp - kernels/portable/cpu/op_split_with_sizes_copy.cpp - kernels/portable/cpu/op_sqrt.cpp - kernels/portable/cpu/op_squeeze_copy.cpp - kernels/portable/cpu/op_stack.cpp - kernels/portable/cpu/op_sub.cpp - kernels/portable/cpu/op_sum.cpp - kernels/portable/cpu/op_t_copy.cpp - kernels/portable/cpu/op_tan.cpp - kernels/portable/cpu/op_tanh.cpp - kernels/portable/cpu/op_to_copy.cpp - kernels/portable/cpu/op_topk.cpp - kernels/portable/cpu/op_transpose_copy.cpp - kernels/portable/cpu/op_tril.cpp - kernels/portable/cpu/op_trunc.cpp - kernels/portable/cpu/op_unbind_copy.cpp - kernels/portable/cpu/op_unsqueeze_copy.cpp - kernels/portable/cpu/op_var.cpp - kernels/portable/cpu/op_view_copy.cpp - kernels/portable/cpu/op_where.cpp - kernels/portable/cpu/op_zeros.cpp - kernels/portable/cpu/pattern/unary_ufunc_realh.cpp - kernels/portable/cpu/pattern/unary_ufunc_realhb_to_bool.cpp - kernels/portable/cpu/pattern/unary_ufunc_realhbbf16_to_floathbf16.cpp - kernels/portable/cpu/util/activation_ops_util.cpp - kernels/portable/cpu/util/advanced_index_util.cpp - kernels/portable/cpu/util/broadcast_util.cpp - kernels/portable/cpu/util/copy_ops_util.cpp - kernels/portable/cpu/util/distance_util.cpp - kernels/portable/cpu/util/dtype_util.cpp - kernels/portable/cpu/util/index_util.cpp - kernels/portable/cpu/util/kernel_ops_util.cpp - kernels/portable/cpu/util/matmul_ops_util.cpp - kernels/portable/cpu/util/normalization_ops_util.cpp - kernels/portable/cpu/util/padding_util.cpp - kernels/portable/cpu/util/reduce_util.cpp - kernels/portable/cpu/util/repeat_util.cpp - kernels/portable/cpu/util/select_copy_util.cpp - kernels/portable/cpu/util/slice_util.cpp -) - -set(_optimized_kernels__srcs - extension/parallel/thread_parallel.cpp - kernels/optimized/blas/BlasKernel.cpp - kernels/optimized/blas/CPUBlas.cpp - kernels/optimized/cpu/op_add.cpp - kernels/optimized/cpu/op_bmm.cpp - kernels/optimized/cpu/op_div.cpp - kernels/optimized/cpu/op_exp.cpp - kernels/optimized/cpu/op_le.cpp - kernels/optimized/cpu/op_linear.cpp - kernels/optimized/cpu/op_mm.cpp - kernels/optimized/cpu/op_mul.cpp - kernels/optimized/cpu/op_native_layer_norm.cpp - kernels/optimized/cpu/op_neg.cpp - kernels/optimized/cpu/op_sub.cpp -) - -set(_quantized_kernels__srcs - kernels/quantized/cpu/embeddingxb.cpp - kernels/quantized/cpu/op_add.cpp - kernels/quantized/cpu/op_choose_qparams.cpp - kernels/quantized/cpu/op_dequantize.cpp - kernels/quantized/cpu/op_embedding.cpp - kernels/quantized/cpu/op_embedding2b.cpp - kernels/quantized/cpu/op_embedding4b.cpp - kernels/quantized/cpu/op_mixed_linear.cpp - kernels/quantized/cpu/op_mixed_mm.cpp - kernels/quantized/cpu/op_quantize.cpp -) - -set(_program_schema__srcs - schema/program.fbs - schema/scalar_type.fbs -) - -set(_optimized_cpublas__srcs - extension/parallel/thread_parallel.cpp - extension/threadpool/threadpool.cpp - extension/threadpool/threadpool_guard.cpp - kernels/optimized/blas/BlasKernel.cpp - kernels/optimized/blas/CPUBlas.cpp -) - -set(_optimized_native_cpu_ops_oss__srcs - codegen/templates/RegisterCodegenUnboxedKernels.cpp - codegen/templates/RegisterDispatchKeyCustomOps.cpp - codegen/templates/RegisterKernels.cpp - codegen/templates/RegisterSchema.cpp - extension/parallel/thread_parallel.cpp - extension/threadpool/threadpool.cpp - extension/threadpool/threadpool_guard.cpp - kernels/optimized/blas/BlasKernel.cpp - kernels/optimized/blas/CPUBlas.cpp - kernels/optimized/cpu/op_add.cpp - kernels/optimized/cpu/op_bmm.cpp - kernels/optimized/cpu/op_div.cpp - kernels/optimized/cpu/op_exp.cpp - kernels/optimized/cpu/op_le.cpp - kernels/optimized/cpu/op_linear.cpp - kernels/optimized/cpu/op_mm.cpp - kernels/optimized/cpu/op_mul.cpp - kernels/optimized/cpu/op_native_layer_norm.cpp - kernels/optimized/cpu/op_neg.cpp - kernels/optimized/cpu/op_sub.cpp -) - -set(_extension_data_loader__srcs - extension/data_loader/file_data_loader.cpp - extension/data_loader/mmap_data_loader.cpp -) - -set(_extension_module__srcs - extension/module/module.cpp -) - -set(_extension_runner_util__srcs - extension/runner_util/inputs.cpp - extension/runner_util/inputs_portable.cpp -) - -set(_extension_llm_runner__srcs - extension/data_loader/file_data_loader.cpp - extension/data_loader/mmap_data_loader.cpp - extension/llm/runner/text_decoder_runner.cpp - extension/llm/runner/text_prefiller.cpp - extension/llm/sampler/sampler.cpp - extension/tensor/tensor_ptr.cpp - extension/tensor/tensor_ptr_maker.cpp -) - -set(_extension_tensor__srcs - extension/tensor/tensor_ptr.cpp - extension/tensor/tensor_ptr_maker.cpp -) - -set(_extension_threadpool__srcs - extension/threadpool/threadpool.cpp - extension/threadpool/threadpool_guard.cpp -) - -set(_extension_training__srcs - extension/data_loader/file_data_loader.cpp - extension/data_loader/mmap_data_loader.cpp - extension/module/module.cpp - extension/training/module/training_module.cpp - extension/training/optimizer/sgd.cpp - kernels/prim_ops/et_copy_index.cpp - kernels/prim_ops/et_view.cpp - kernels/prim_ops/register_prim_ops.cpp -) - -set(_train_xor__srcs - extension/data_loader/file_data_loader.cpp - extension/data_loader/mmap_data_loader.cpp - extension/module/module.cpp - extension/tensor/tensor_ptr.cpp - extension/tensor/tensor_ptr_maker.cpp - extension/training/examples/XOR/train.cpp - extension/training/module/training_module.cpp - extension/training/optimizer/sgd.cpp -) - -set(_executor_runner__srcs - examples/portable/executor_runner/executor_runner.cpp - extension/data_loader/file_data_loader.cpp - extension/evalue_util/print_evalue.cpp - extension/runner_util/inputs.cpp - extension/runner_util/inputs_portable.cpp - runtime/executor/test/test_backend_compiler_lib.cpp -) - -set(_size_test__srcs - extension/data_loader/file_data_loader.cpp - test/size_test.cpp -) - -set(_mps_executor_runner__srcs - backends/apple/mps/runtime/MPSBackend.mm - backends/apple/mps/runtime/MPSCompiler.mm - backends/apple/mps/runtime/MPSDelegateHeader.mm - backends/apple/mps/runtime/MPSDevice.mm - backends/apple/mps/runtime/MPSExecutor.mm - backends/apple/mps/runtime/MPSGraphBuilder.mm - backends/apple/mps/runtime/MPSStream.mm - backends/apple/mps/runtime/operations/ActivationOps.mm - backends/apple/mps/runtime/operations/BinaryOps.mm - backends/apple/mps/runtime/operations/ClampOps.mm - backends/apple/mps/runtime/operations/ConstantOps.mm - backends/apple/mps/runtime/operations/ConvolutionOps.mm - backends/apple/mps/runtime/operations/IndexingOps.mm - backends/apple/mps/runtime/operations/LinearAlgebra.mm - backends/apple/mps/runtime/operations/NormalizationOps.mm - backends/apple/mps/runtime/operations/OperationUtils.mm - backends/apple/mps/runtime/operations/PadOps.mm - backends/apple/mps/runtime/operations/PoolingOps.mm - backends/apple/mps/runtime/operations/QuantDequant.mm - backends/apple/mps/runtime/operations/RangeOps.mm - backends/apple/mps/runtime/operations/ReduceOps.mm - backends/apple/mps/runtime/operations/ShapeOps.mm - backends/apple/mps/runtime/operations/UnaryOps.mm - devtools/bundled_program/bundled_program.cpp - devtools/etdump/emitter.cpp - devtools/etdump/etdump_flatcc.cpp - examples/apple/mps/executor_runner/mps_executor_runner.mm - extension/data_loader/file_data_loader.cpp - extension/evalue_util/print_evalue.cpp - extension/runner_util/inputs.cpp - extension/runner_util/inputs_portable.cpp -) - -set(_mps_backend__srcs - backends/apple/mps/runtime/MPSBackend.mm - backends/apple/mps/runtime/MPSCompiler.mm - backends/apple/mps/runtime/MPSDelegateHeader.mm - backends/apple/mps/runtime/MPSDevice.mm - backends/apple/mps/runtime/MPSExecutor.mm - backends/apple/mps/runtime/MPSGraphBuilder.mm - backends/apple/mps/runtime/MPSStream.mm - backends/apple/mps/runtime/operations/ActivationOps.mm - backends/apple/mps/runtime/operations/BinaryOps.mm - backends/apple/mps/runtime/operations/ClampOps.mm - backends/apple/mps/runtime/operations/ConstantOps.mm - backends/apple/mps/runtime/operations/ConvolutionOps.mm - backends/apple/mps/runtime/operations/IndexingOps.mm - backends/apple/mps/runtime/operations/LinearAlgebra.mm - backends/apple/mps/runtime/operations/NormalizationOps.mm - backends/apple/mps/runtime/operations/OperationUtils.mm - backends/apple/mps/runtime/operations/PadOps.mm - backends/apple/mps/runtime/operations/PoolingOps.mm - backends/apple/mps/runtime/operations/QuantDequant.mm - backends/apple/mps/runtime/operations/RangeOps.mm - backends/apple/mps/runtime/operations/ReduceOps.mm - backends/apple/mps/runtime/operations/ShapeOps.mm - backends/apple/mps/runtime/operations/UnaryOps.mm -) - -set(_mps_schema__srcs - backends/apple/mps/serialization/schema.fbs -) - -set(_xnn_executor_runner__srcs - examples/portable/executor_runner/executor_runner.cpp - extension/data_loader/file_data_loader.cpp - extension/evalue_util/print_evalue.cpp - extension/runner_util/inputs.cpp - extension/runner_util/inputs_portable.cpp -) - -set(_xnnpack_backend__srcs - backends/xnnpack/runtime/XNNCompiler.cpp - backends/xnnpack/runtime/XNNExecutor.cpp - backends/xnnpack/runtime/XNNHeader.cpp - backends/xnnpack/runtime/XNNPACKBackend.cpp - backends/xnnpack/runtime/profiling/XNNProfiler.cpp - extension/threadpool/threadpool.cpp - extension/threadpool/threadpool_guard.cpp -) - -set(_xnnpack_schema__srcs - backends/xnnpack/serialization/runtime_schema.fbs -) - -set(_vulkan_schema__srcs - backends/vulkan/serialization/schema.fbs -) - -set(_custom_ops__srcs - extension/llm/custom_ops/op_fallback.cpp - extension/llm/custom_ops/op_fast_hadamard_transform.cpp - extension/llm/custom_ops/op_sdpa.cpp - extension/llm/custom_ops/op_update_quantized_cache.cpp - extension/llm/custom_ops/spinquant/fast_hadamard_transform.cpp - extension/llm/custom_ops/spinquant/third-party/FFHT/fht_avx.c - kernels/portable/cpu/util/reduce_util.cpp -) - -set(_llama_runner__srcs - examples/models/llama/runner/runner.cpp - examples/models/llama/tokenizer/llama_tiktoken.cpp - extension/evalue_util/print_evalue.cpp - extension/llm/runner/text_decoder_runner.cpp - extension/llm/runner/text_prefiller.cpp - extension/llm/sampler/sampler.cpp - extension/llm/tokenizer/bpe_tokenizer.cpp - extension/llm/tokenizer/tiktoken.cpp - extension/tensor/tensor_ptr.cpp - extension/tensor/tensor_ptr_maker.cpp -) \ No newline at end of file From cbd067183555f65a017c9f87e17e7f1220708e4e Mon Sep 17 00:00:00 2001 From: Mustafa Cavus Date: Thu, 20 Feb 2025 17:16:50 -0800 Subject: [PATCH 090/188] Update backends/openvino/CMakeLists.txt Co-authored-by: Yamini Nimmagadda --- backends/openvino/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backends/openvino/CMakeLists.txt b/backends/openvino/CMakeLists.txt index 0cc70bcc384..772168e783b 100644 --- a/backends/openvino/CMakeLists.txt +++ b/backends/openvino/CMakeLists.txt @@ -36,7 +36,7 @@ add_library(openvino_backend SHARED .) # Enable exceptions and RTTI for OpenVINO backend target_compile_options(openvino_backend PRIVATE -frtti -fexceptions) -# Include ExecuteTorch directories +# Include Executorch directories target_include_directories(openvino_backend PUBLIC ${COMMON_INCLUDE_DIRS}) # Link OpenVINO and ExecuteTorch core libraries From 8ae15e2fef7821aeaaf6c6948c3ea74b6e38e532 Mon Sep 17 00:00:00 2001 From: Mustafa Cavus Date: Thu, 20 Feb 2025 17:16:56 -0800 Subject: [PATCH 091/188] Update backends/openvino/CMakeLists.txt Co-authored-by: Yamini Nimmagadda --- backends/openvino/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backends/openvino/CMakeLists.txt b/backends/openvino/CMakeLists.txt index 772168e783b..9b834d5e31e 100644 --- a/backends/openvino/CMakeLists.txt +++ b/backends/openvino/CMakeLists.txt @@ -45,5 +45,5 @@ target_link_libraries(openvino_backend PRIVATE openvino::runtime executorch_core # Add source files for OpenVINO backend target_sources(openvino_backend PRIVATE ${CMAKE_CURRENT_LIST_DIR}/runtime/OpenvinoBackend.cpp) -## Install OpenVINO backend library to the lib directory +# Install OpenVINO backend library to the lib directory install(TARGETS openvino_backend DESTINATION lib) From 514d4c0f985a43aab77d5f19d3cc89760729bc72 Mon Sep 17 00:00:00 2001 From: Cavus Mustafa Date: Thu, 20 Feb 2025 18:00:51 -0800 Subject: [PATCH 092/188] Logging added for building openvino backend --- build/Utils.cmake | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/build/Utils.cmake b/build/Utils.cmake index 3d4e9c76005..3bb62fdaf0f 100644 --- a/build/Utils.cmake +++ b/build/Utils.cmake @@ -115,6 +115,10 @@ function(executorch_print_configuration_summary) STATUS " EXECUTORCH_BUILD_NEURON : ${EXECUTORCH_BUILD_NEURON}" ) + message( + STATUS + " EXECUTORCH_BUILD_OPENVINO : ${EXECUTORCH_BUILD_OPENVINO}" + ) message( STATUS " EXECUTORCH_BUILD_PTHREADPOOL : ${EXECUTORCH_BUILD_PTHREADPOOL}" From a28fcf34816362606c4ce1ba5c9b404fe55f97a1 Mon Sep 17 00:00:00 2001 From: Mustafa Cavus Date: Fri, 21 Feb 2025 15:02:33 -0800 Subject: [PATCH 093/188] Update examples/openvino/aot/README.md Co-authored-by: Kimish Patel --- examples/openvino/aot/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/openvino/aot/README.md b/examples/openvino/aot/README.md index adfed35eefc..24c3cc35b98 100644 --- a/examples/openvino/aot/README.md +++ b/examples/openvino/aot/README.md @@ -36,7 +36,7 @@ python aot_openvino_compiler.py --suite --model --inp The dataset length must be evenly divisible by the batch size. - **`--quantize`** (optional): - Enable model quantization. --dataset argument is requred for the quantization. `huggingface` suite does not supported yet. + Enable model quantization. --dataset argument is requred for the quantization. `huggingface` suite is not supported yet. - **`--quantization_flow`** (optional): Specifies the way to quantize torch.fx.GraphModule. From fa8feac077ce554948f5095e97361cc55347730a Mon Sep 17 00:00:00 2001 From: daniil-lyakhov Date: Fri, 21 Feb 2025 18:18:07 +0100 Subject: [PATCH 094/188] Comments --- backends/openvino/quantizer/quantizer.py | 36 ++++++++++ .../openvino/aot/aot_openvino_compiler.py | 67 +------------------ 2 files changed, 37 insertions(+), 66 deletions(-) diff --git a/backends/openvino/quantizer/quantizer.py b/backends/openvino/quantizer/quantizer.py index f82d0745e8b..501bf5142dd 100644 --- a/backends/openvino/quantizer/quantizer.py +++ b/backends/openvino/quantizer/quantizer.py @@ -6,6 +6,7 @@ from collections import defaultdict from enum import Enum +from itertools import islice from typing import Dict, List, Optional, Tuple import nncf @@ -13,6 +14,7 @@ import nncf.experimental.torch.fx as nncf_fx import torch.fx + from nncf.common.graph.graph import NNCFGraph from torch.ao.quantization.observer import HistogramObserver, PerChannelMinMaxObserver from torch.ao.quantization.quantizer.quantizer import ( @@ -343,5 +345,39 @@ def validate(self, model: torch.fx.GraphModule) -> None: def transform_for_annotation( self, model: torch.fx.GraphModule ) -> torch.fx.GraphModule: + # Fold constant branches to avoid their quantization nncf_fx.transformations.fold_constant_except_qdq(model) return model + + +def quantize_model( + captured_model: torch.fx.GraphModule, + calibration_dataset: torch.utils.data.DataLoader, +) -> torch.fx.GraphModule: + """ + Quantizes a model using either NNCF-based or PTQ-based quantization. + + :param captured_model: The model to be quantized, represented as a torch.fx.GraphModule. + :param calibration_dataset: A DataLoader containing calibration data for quantization. + :return: The quantized model as a torch.fx.GraphModule. + """ + quantizer = OpenVINOQuantizer() + + print("PTQ: Quantize the model") + default_subset_size = 300 + batch_size = calibration_dataset.batch_size + subset_size = (default_subset_size // batch_size) + int( + default_subset_size % batch_size > 0 + ) + + def transform(x): + return x[0] + + quantized_model = nncf_fx.quantize_pt2e( + captured_model, + quantizer, + subset_size=subset_size, + calibration_dataset=nncf.Dataset(calibration_dataset, transform_func=transform), + fold_quantize=False, + ) + return quantized_model diff --git a/examples/openvino/aot/aot_openvino_compiler.py b/examples/openvino/aot/aot_openvino_compiler.py index 61251dc0ef5..257c8fb50b4 100644 --- a/examples/openvino/aot/aot_openvino_compiler.py +++ b/examples/openvino/aot/aot_openvino_compiler.py @@ -8,7 +8,6 @@ import os import shutil import subprocess -from itertools import islice from pathlib import Path import executorch @@ -18,15 +17,13 @@ import timm import torch import torchvision.models as torchvision_models -from executorch.backends.openvino import OpenVINOQuantizer from executorch.backends.openvino.partitioner import OpenvinoPartitioner +from executorch.backends.openvino.quantizer.quantizer import quantize_model from executorch.exir import EdgeProgramManager, to_edge_transform_and_lower from executorch.exir.backend.backend_details import CompileSpec -from nncf.experimental.torch.fx.quantization.quantize_pt2e import quantize_pt2e from sklearn.metrics import accuracy_score from timm.data import resolve_data_config from timm.data.transforms_factory import create_transform -from torch.ao.quantization.quantize_pt2e import convert_pt2e, prepare_pt2e from torch.export import export from torch.export.exported_program import ExportedProgram from torchvision import datasets @@ -129,55 +126,6 @@ def dump_inputs(calibration_dataset, dest_path): return input_files, targets -def quantize_model( - captured_model: torch.fx.GraphModule, - calibration_dataset: torch.utils.data.DataLoader, - use_nncf: bool, -) -> torch.fx.GraphModule: - """ - Quantizes a model using either NNCF-based or PTQ-based quantization. - - :param captured_model: The model to be quantized, represented as a torch.fx.GraphModule. - :param calibration_dataset: A DataLoader containing calibration data for quantization. - :param use_nncf: Whether to use NNCF-based quantization (True) or standard PTQ (False). - :return: The quantized model as a torch.fx.GraphModule. - """ - quantizer = OpenVINOQuantizer() - - print("PTQ: Quantize the model") - default_subset_size = 300 - batch_size = calibration_dataset.batch_size - subset_size = (default_subset_size // batch_size) + int( - default_subset_size % batch_size > 0 - ) - - def transform(x): - return x[0] - - if use_nncf: - - quantized_model = quantize_pt2e( - captured_model, - quantizer, - subset_size=subset_size, - calibration_dataset=nncf.Dataset( - calibration_dataset, transform_func=transform - ), - fold_quantize=False, - ) - else: - annotated_model = prepare_pt2e(captured_model, quantizer) - - print("PTQ: Calibrate the model...") - for data in islice(calibration_dataset, subset_size): - annotated_model(transform(data)) - - print("PTQ: Convert the quantized model...") - quantized_model = convert_pt2e(annotated_model, fold_quantize=False) - - return quantized_model - - def validate_model( model_file_name: str, calibration_dataset: torch.utils.data.DataLoader ) -> float: @@ -231,7 +179,6 @@ def main( dataset_path: str, device: str, batch_size: int, - quantization_flow: str, ): """ Main function to load, quantize, and validate a model. @@ -244,7 +191,6 @@ def main( :param dataset_path: Path to the dataset for calibration/validation. :param device: The device to run the model on (e.g., "cpu", "gpu"). :param batch_size: Batch size for dataset loading. - :param quantization_flow: The quantization method to use. """ # Load the selected model @@ -281,7 +227,6 @@ def main( quantized_model = quantize_model( aten_dialect.module(), calibration_dataset, - use_nncf=quantization_flow == "nncf", ) aten_dialect: ExportedProgram = export(quantized_model, example_args) @@ -360,15 +305,6 @@ def main( default="CPU", help="Target device for compiling the model (e.g., CPU, GPU). Default is CPU.", ) - parser.add_argument( - "--quantization_flow", - type=str, - choices=["pt2e", "nncf"], - default="nncf", - help="Select the quantization flow (nncf or pt2e):" - " pt2e is the default torch.ao quantization flow, while" - " nncf is a custom method with additional algorithms to improve model performance.", - ) args = parser.parse_args() @@ -384,5 +320,4 @@ def main( args.dataset, args.device, args.batch_size, - args.quantization_flow, ) From bb51412aa698917465f3f6cea0f9eb149f64cc95 Mon Sep 17 00:00:00 2001 From: Cavus Mustafa Date: Mon, 24 Feb 2025 12:55:16 -0800 Subject: [PATCH 095/188] renamed OpenvinoBackend.h to OpenvinoBackend.hpp --- backends/openvino/README.md | 2 +- backends/openvino/runtime/OpenvinoBackend.cpp | 2 +- .../openvino/runtime/{OpenvinoBackend.hpp => OpenvinoBackend.h} | 0 3 files changed, 2 insertions(+), 2 deletions(-) rename backends/openvino/runtime/{OpenvinoBackend.hpp => OpenvinoBackend.h} (100%) diff --git a/backends/openvino/README.md b/backends/openvino/README.md index 4d125ad155c..3ac66370c88 100644 --- a/backends/openvino/README.md +++ b/backends/openvino/README.md @@ -18,7 +18,7 @@ executorch │ └── openvino │ ├── runtime │ ├── OpenvinoBackend.cpp -│ └── OpenvinoBackend.hpp +│ └── OpenvinoBackend.h │ ├── scripts │ └── openvino_build.sh │ ├── tests diff --git a/backends/openvino/runtime/OpenvinoBackend.cpp b/backends/openvino/runtime/OpenvinoBackend.cpp index 890941e8c89..3e4198d7668 100644 --- a/backends/openvino/runtime/OpenvinoBackend.cpp +++ b/backends/openvino/runtime/OpenvinoBackend.cpp @@ -18,7 +18,7 @@ #include #include -#include "OpenvinoBackend.hpp" +#include "OpenvinoBackend.h" using namespace std; using executorch::aten::ScalarType; diff --git a/backends/openvino/runtime/OpenvinoBackend.hpp b/backends/openvino/runtime/OpenvinoBackend.h similarity index 100% rename from backends/openvino/runtime/OpenvinoBackend.hpp rename to backends/openvino/runtime/OpenvinoBackend.h From 935e9372d3b64fd867d7296b7ba3b2732dfe3e07 Mon Sep 17 00:00:00 2001 From: Cavus Mustafa Date: Mon, 24 Feb 2025 13:47:43 -0800 Subject: [PATCH 096/188] Removed using blanket namespaces --- backends/openvino/runtime/OpenvinoBackend.cpp | 44 +++++++----------- backends/openvino/runtime/OpenvinoBackend.h | 45 ++++++++----------- 2 files changed, 33 insertions(+), 56 deletions(-) diff --git a/backends/openvino/runtime/OpenvinoBackend.cpp b/backends/openvino/runtime/OpenvinoBackend.cpp index 3e4198d7668..e5ee60ffb16 100644 --- a/backends/openvino/runtime/OpenvinoBackend.cpp +++ b/backends/openvino/runtime/OpenvinoBackend.cpp @@ -20,20 +20,6 @@ #include "OpenvinoBackend.h" -using namespace std; -using executorch::aten::ScalarType; -using executorch::runtime::ArrayRef; -using executorch::runtime::Backend; -using executorch::runtime::BackendExecutionContext; -using executorch::runtime::BackendInitContext; -using executorch::runtime::CompileSpec; -using executorch::runtime::DelegateHandle; -using executorch::runtime::Error; -using executorch::runtime::EValue; -using executorch::runtime::FreeableBuffer; -using executorch::runtime::MemoryAllocator; -using executorch::runtime::Result; - namespace executorch { namespace backends { namespace openvino { @@ -70,10 +56,10 @@ bool OpenvinoBackend::is_available() const { return false; // OpenVINO is not available } -Result OpenvinoBackend::init( - BackendInitContext& context, - FreeableBuffer* processed, - ArrayRef compile_specs) const { +exr::Result OpenvinoBackend::init( + exr::BackendInitContext& context, + exr::FreeableBuffer* processed, + exr::ArrayRef compile_specs) const { ET_LOG(Info, "OpenvinoBackend::init %p", processed->data()); ov::Core core; @@ -101,7 +87,7 @@ Result OpenvinoBackend::init( std::make_shared(compiled_model.create_infer_request()); // Allocate execution handle - MemoryAllocator* allocator = context.get_runtime_allocator(); + exr::MemoryAllocator* allocator = context.get_runtime_allocator(); ExecutionHandle* handle = ET_ALLOCATE_INSTANCE_OR_RETURN_ERROR(allocator, ExecutionHandle); handle->compiled_model = std::make_shared(compiled_model); @@ -110,10 +96,10 @@ Result OpenvinoBackend::init( return handle; } -Error OpenvinoBackend::execute( - BackendExecutionContext& context, - DelegateHandle* input_handle, - EValue** args) const { +exr::Error OpenvinoBackend::execute( + exr::BackendExecutionContext& context, + exr::DelegateHandle* input_handle, + exr::EValue** args) const { ExecutionHandle* execution_handle = (ExecutionHandle*)input_handle; auto infer_request = execution_handle->infer_request; @@ -154,10 +140,10 @@ Error OpenvinoBackend::execute( // Execute the inference infer_request->infer(); - return Error::Ok; + return exr::Error::Ok; } -void OpenvinoBackend::destroy(DelegateHandle* handle) const { +void OpenvinoBackend::destroy(exr::DelegateHandle* handle) const { if (!handle) { ET_LOG(Info, "Attempted to destroy a null handle."); return; @@ -181,13 +167,13 @@ void OpenvinoBackend::destroy(DelegateHandle* handle) const { } ov::element::Type OpenvinoBackend::convert_to_openvino_type( - ScalarType scalar_type) const { + exa::ScalarType scalar_type) const { switch (scalar_type) { - case ScalarType::Float: + case exa::ScalarType::Float: return ov::element::f32; - case ScalarType::Int: + case exa::ScalarType::Int: return ov::element::i32; - case ScalarType::Char: + case exa::ScalarType::Char: return ov::element::i8; default: throw std::runtime_error("Unsupported scalar type"); diff --git a/backends/openvino/runtime/OpenvinoBackend.h b/backends/openvino/runtime/OpenvinoBackend.h index e6f0e8659fb..f285374e5dd 100644 --- a/backends/openvino/runtime/OpenvinoBackend.h +++ b/backends/openvino/runtime/OpenvinoBackend.h @@ -9,9 +9,9 @@ #ifndef OPENVINO_BACKEND_HPP #define OPENVINO_BACKEND_HPP -#include -#include #include +#include +#include #include #include @@ -19,47 +19,38 @@ #include #include +namespace exr = executorch::runtime; +namespace exa = executorch::aten; + using namespace std; -using executorch::aten::ScalarType; -using executorch::runtime::ArrayRef; -using executorch::runtime::Backend; -using executorch::runtime::BackendExecutionContext; -using executorch::runtime::BackendInitContext; -using executorch::runtime::CompileSpec; -using executorch::runtime::DelegateHandle; -using executorch::runtime::Error; -using executorch::runtime::EValue; -using executorch::runtime::FreeableBuffer; -using executorch::runtime::MemoryAllocator; -using executorch::runtime::Result; namespace executorch { namespace backends { namespace openvino { typedef struct { - std::shared_ptr compiled_model; - std::shared_ptr infer_request; + std::shared_ptr compiled_model; + std::shared_ptr infer_request; } ExecutionHandle; -class OpenvinoBackend final : public ::executorch::runtime::BackendInterface { +class OpenvinoBackend final : public ::exr::BackendInterface { public: OpenvinoBackend(); ~OpenvinoBackend() = default; virtual bool is_available() const override; - Result init( - BackendInitContext& context, - FreeableBuffer* processed, - ArrayRef compile_specs) const override; - Error execute( - BackendExecutionContext& context, - DelegateHandle* input_handle, - EValue** args) const override; - void destroy(DelegateHandle* handle) const override; + exr::Result init( + exr::BackendInitContext& context, + exr::FreeableBuffer* processed, + exr::ArrayRef compile_specs) const override; + exr::Error execute( + exr::BackendExecutionContext& context, + exr::DelegateHandle* input_handle, + exr::EValue** args) const override; + void destroy(exr::DelegateHandle* handle) const override; private: - ov::element::Type convert_to_openvino_type(ScalarType scalar_type) const; + ov::element::Type convert_to_openvino_type(exa::ScalarType scalar_type) const; }; } // namespace openvino From 0a18afd39823e87cd0743ec09e93f772156624ba Mon Sep 17 00:00:00 2001 From: Cavus Mustafa Date: Mon, 24 Feb 2025 16:11:17 -0800 Subject: [PATCH 097/188] Free processed data after compiling model --- backends/openvino/runtime/OpenvinoBackend.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/backends/openvino/runtime/OpenvinoBackend.cpp b/backends/openvino/runtime/OpenvinoBackend.cpp index e5ee60ffb16..e7f4888c6ed 100644 --- a/backends/openvino/runtime/OpenvinoBackend.cpp +++ b/backends/openvino/runtime/OpenvinoBackend.cpp @@ -82,6 +82,9 @@ exr::Result OpenvinoBackend::init( // Import the model auto compiled_model = core.import_model(compiled_stream, device); + // The processed data can be freed since the model is compiled + processed->Free(); + // Allocate an infer request std::shared_ptr infer_request = std::make_shared(compiled_model.create_infer_request()); From 1620154f10c4cf832ac4d95a13c7935008a5237f Mon Sep 17 00:00:00 2001 From: Mustafa Cavus Date: Mon, 24 Feb 2025 15:55:44 -0800 Subject: [PATCH 098/188] Remove directory structure from tutorial readme --- docs/source/build-run-openvino.md | 31 ------------------------------- 1 file changed, 31 deletions(-) diff --git a/docs/source/build-run-openvino.md b/docs/source/build-run-openvino.md index 142f872f2a0..d62467ad346 100644 --- a/docs/source/build-run-openvino.md +++ b/docs/source/build-run-openvino.md @@ -31,37 +31,6 @@ OpenVINO backend supports the following hardware: - Intel discrete GPUs - Intel NPUs -## Directory Structure - -``` -executorch -├── backends -│ └── openvino -│ ├── runtime -│ ├── OpenvinoBackend.cpp -│ └── OpenvinoBackend.hpp -│ ├── scripts -│ └── openvino_build.sh -│ ├── tests -│ ├── CMakeLists.txt -│ ├── README.md -│ ├── __init__.py -│ ├── openvino_functions.yaml -│ ├── partitioner.py -│ ├── preprocess.py -│ └── requirements.txt -└── examples -│ └── openvino -│ ├── aot -│ ├── README.md -│ └── aot_openvino_compiler.py -│ └── executor_runner -│ └── openvino_executor_runner.cpp -│ ├── CMakeLists.txt -│ ├── README.md -└── └── openvino_build_example.sh -``` - ## Instructions for Building OpenVINO Backend ### Prerequisites From 83d19468a0d887bd5a81d2e717ae43df59d1ba02 Mon Sep 17 00:00:00 2001 From: Mustafa Cavus Date: Mon, 24 Feb 2025 16:00:27 -0800 Subject: [PATCH 099/188] Fix the path for aot readme file --- docs/source/build-run-openvino.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/build-run-openvino.md b/docs/source/build-run-openvino.md index d62467ad346..0332e11ee60 100644 --- a/docs/source/build-run-openvino.md +++ b/docs/source/build-run-openvino.md @@ -76,7 +76,7 @@ Follow the steps below to setup your build environment: ## Build Instructions for Examples ### AOT step: -Refer to the [README.md](aot/README.md) in the `aot` folder for detailed instructions on exporting deep learning models from various model suites (TIMM, Torchvision, Hugging Face) to openvino backend using Executorch. Users can dynamically specify the model, input shape, and target device. +Refer to the [README.md](../../examples/openvino/aot/README.md) in the `examples/openvino/aot` folder for detailed instructions on exporting deep learning models from various model suites (TIMM, Torchvision, Hugging Face) to openvino backend using Executorch. Users can dynamically specify the model, input shape, and target device. Below is an example to export a ResNet50 model from Torchvision model suite for CPU device with an input shape of `[1, 3, 256, 256]` From e854ee0756cd6bf0d75bcc3ae1b7d8722545d6d0 Mon Sep 17 00:00:00 2001 From: Mustafa Cavus Date: Mon, 24 Feb 2025 16:03:59 -0800 Subject: [PATCH 100/188] Fix the path into aot folder --- docs/source/build-run-openvino.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/source/build-run-openvino.md b/docs/source/build-run-openvino.md index 0332e11ee60..e64d1e34618 100644 --- a/docs/source/build-run-openvino.md +++ b/docs/source/build-run-openvino.md @@ -76,12 +76,12 @@ Follow the steps below to setup your build environment: ## Build Instructions for Examples ### AOT step: -Refer to the [README.md](../../examples/openvino/aot/README.md) in the `examples/openvino/aot` folder for detailed instructions on exporting deep learning models from various model suites (TIMM, Torchvision, Hugging Face) to openvino backend using Executorch. Users can dynamically specify the model, input shape, and target device. +Refer to the [README.md](../../examples/openvino/aot/README.md) in the `executorch/examples/openvino/aot` folder for detailed instructions on exporting deep learning models from various model suites (TIMM, Torchvision, Hugging Face) to openvino backend using Executorch. Users can dynamically specify the model, input shape, and target device. Below is an example to export a ResNet50 model from Torchvision model suite for CPU device with an input shape of `[1, 3, 256, 256]` ```bash -cd aot +cd executorch/examples/openvino/aot python aot_openvino_compiler.py --suite torchvision --model resnet50 --input_shape "(1, 3, 256, 256)" --device CPU ``` The exported model will be saved as 'resnet50.pte' in the current directory. From 3e1a06111d62683b172a6322fe01692f4ddfe2f3 Mon Sep 17 00:00:00 2001 From: Mustafa Cavus Date: Mon, 24 Feb 2025 17:54:16 -0800 Subject: [PATCH 101/188] Move aot insturctions to the main example readme file --- examples/openvino/README.md | 117 ++++++++++++++++++++++++++++++++++-- 1 file changed, 112 insertions(+), 5 deletions(-) diff --git a/examples/openvino/README.md b/examples/openvino/README.md index 10d8c3343cf..48993e9fa3b 100644 --- a/examples/openvino/README.md +++ b/examples/openvino/README.md @@ -24,15 +24,122 @@ examples/openvino Follow the [instructions](../../backends/openvino/README.md) of **Prerequisites** and **Setup** in `backends/openvino/README.md` to set up the OpenVINO backend. ## AOT step: -Refer to the [README.md](aot/README.md) in the `aot` folder for detailed instructions on exporting deep learning models from various model suites (TIMM, Torchvision, Hugging Face) to openvino backend using Executorch. Users can dynamically specify the model, input shape, and target device. -Below is an example to export a ResNet50 model from Torchvision model suite for CPU device with an input shape of `[1, 3, 256, 256]` +Within the `aot` folder, you'll find the model export script called `aot_openvino_compiler.py`. This script allows users to export deep learning models from various model suites (TIMM, Torchvision, Hugging Face) to a openvino backend using **Executorch**. Users can dynamically specify the model, input shape, and target device. +### **Usage** + +First, navigate to the `aot` directory by running the command `cd aot`. Then, refer to the instructions provided below. + +#### **Command Structure** ```bash -cd aot -python aot_openvino_compiler.py --suite torchvision --model resnet50 --input_shape "(1, 3, 256, 256)" --device CPU +python aot_openvino_compiler.py --suite --model --input_shape --device ``` -The exported model will be saved as 'resnet50.pte' in the current directory. + +#### **Arguments** +- **`--suite`** (required): + Specifies the model suite to use. + Supported values: + - `timm` (e.g., VGG16, ResNet50) + - `torchvision` (e.g., resnet18, mobilenet_v2) + - `huggingface` (e.g., bert-base-uncased). NB: Quantization and validation is not supported yet. + +- **`--model`** (required): + Name of the model to export. + Examples: + - For `timm`: `vgg16`, `resnet50` + - For `torchvision`: `resnet18`, `mobilenet_v2` + - For `huggingface`: `bert-base-uncased`, `distilbert-base-uncased` + +- **`--input_shape`**(optional): + Input shape for the model. Provide this as a **list** or **tuple**. + Examples: + - `[1, 3, 224, 224]` (Zsh users: wrap in quotes) + - `(1, 3, 224, 224)` + +- **`--batch_size`** : + Batch size for the validation. Default batch_size == 1. + The dataset length must be evenly divisible by the batch size. + +- **`--quantize`** (optional): + Enable model quantization. --dataset argument is requred for the quantization. `huggingface` suite does not supported yet. + +- **`--quantization_flow`** (optional): + Specifies the way to quantize torch.fx.GraphModule. + Supported values: + - `nncf`: `nncf quantize_pt2e` API (default) + - `pt2e`: torch ao quantization pipeline. + +- **`--validate`** (optional): + Enable model validation. --dataset argument is requred for the validation. `huggingface` suite does not supported yet. + +- **`--dataset`** (optional): + Path to the imagenet-like calibration dataset. + +- **`--device`** (optional) + Target device for the compiled model. Default is `CPU`. + Examples: `CPU`, `GPU` + + +### **Examples** + +#### Export a TIMM VGG16 model for the CPU +```bash +python aot_openvino_compiler.py --suite timm --model vgg16 --input_shape [1, 3, 224, 224] --device CPU +``` + +#### Export a Torchvision ResNet50 model for the GPU +```bash +python aot_openvino_compiler.py --suite torchvision --model resnet50 --input_shape "(1, 3, 256, 256)" --device GPU +``` + +#### Export a Hugging Face BERT model for the CPU +```bash +python aot_openvino_compiler.py --suite huggingface --model bert-base-uncased --input_shape "(1, 512)" --device CPU +``` +#### Export and validate TIMM Resnet50d model for the CPU +```bash +python aot_openvino_compiler.py --suite timm --model vgg16 --input_shape [1, 3, 224, 224] --device CPU --validate --dataset /path/to/dataset +``` + +#### Export, quantize and validate TIMM Resnet50d model for the CPU +```bash +python aot_openvino_compiler.py --suite timm --model vgg16 --input_shape [1, 3, 224, 224] --device CPU --validate --dataset /path/to/dataset --quantize +``` + +### **Notes** +1. **Input Shape in Zsh**: + If you are using Zsh, wrap `--input_shape` in quotes or use a tuple: + ```bash + --input_shape '[1, 3, 224, 224]' + --input_shape "(1, 3, 224, 224)" + ``` + +2. **Model Compatibility**: + Ensure the specified `model_name` exists in the selected `suite`. Use the corresponding library's documentation to verify model availability. + +3. **Output File**: + The exported model will be saved as `.pte` in the current directory. + +4. **Dependencies**: + - Python 3.8+ + - PyTorch + - Executorch + - TIMM (`pip install timm`) + - Torchvision + - Transformers (`pip install transformers`) + +### **Error Handling** +- **Model Not Found**: + If the script raises an error such as: + ```bash + ValueError: Model not found + ``` + Verify that the model name is correct for the chosen suite. + +- **Unsupported Input Shape**: + Ensure `--input_shape` is provided as a valid list or tuple. + ## Build OpenVINO Examples Build the backend and the examples by executing the script: From 5627c5ed7a32efb1820f845b02d4deb1c11ad255 Mon Sep 17 00:00:00 2001 From: Mustafa Cavus Date: Mon, 24 Feb 2025 17:55:19 -0800 Subject: [PATCH 102/188] Delete examples/openvino/aot/README.md --- examples/openvino/aot/README.md | 115 -------------------------------- 1 file changed, 115 deletions(-) delete mode 100644 examples/openvino/aot/README.md diff --git a/examples/openvino/aot/README.md b/examples/openvino/aot/README.md deleted file mode 100644 index adfed35eefc..00000000000 --- a/examples/openvino/aot/README.md +++ /dev/null @@ -1,115 +0,0 @@ -# **Model Export Script for Executorch** - -This script allows users to export deep learning models from various model suites (TIMM, Torchvision, Hugging Face) to a openvino backend using **Executorch**. Users can dynamically specify the model, input shape, and target device. - - -## **Usage** - -### **Command Structure** -```bash -python aot_openvino_compiler.py --suite --model --input_shape --device -``` - -### **Arguments** -- **`--suite`** (required): - Specifies the model suite to use. - Supported values: - - `timm` (e.g., VGG16, ResNet50) - - `torchvision` (e.g., resnet18, mobilenet_v2) - - `huggingface` (e.g., bert-base-uncased). NB: Quantization and validation is not supported yet. - -- **`--model`** (required): - Name of the model to export. - Examples: - - For `timm`: `vgg16`, `resnet50` - - For `torchvision`: `resnet18`, `mobilenet_v2` - - For `huggingface`: `bert-base-uncased`, `distilbert-base-uncased` - -- **`--input_shape`**(optional): - Input shape for the model. Provide this as a **list** or **tuple**. - Examples: - - `[1, 3, 224, 224]` (Zsh users: wrap in quotes) - - `(1, 3, 224, 224)` - -- **`--batch_size`** : - Batch size for the validation. Default batch_size == 1. - The dataset length must be evenly divisible by the batch size. - -- **`--quantize`** (optional): - Enable model quantization. --dataset argument is requred for the quantization. `huggingface` suite does not supported yet. - -- **`--quantization_flow`** (optional): - Specifies the way to quantize torch.fx.GraphModule. - Supported values: - - `nncf`: `nncf quantize_pt2e` API (default) - - `pt2e`: torch ao quantization pipeline. - -- **`--validate`** (optional): - Enable model validation. --dataset argument is requred for the validation. `huggingface` suite does not supported yet. - -- **`--dataset`** (optional): - Path to the imagenet-like calibration dataset. - -- **`--device`** (optional) - Target device for the compiled model. Default is `CPU`. - Examples: `CPU`, `GPU` - - -## **Examples** - -### Export a TIMM VGG16 model for the CPU -```bash -python aot_openvino_compiler.py --suite timm --model vgg16 --input_shape [1, 3, 224, 224] --device CPU -``` - -### Export a Torchvision ResNet50 model for the GPU -```bash -python aot_openvino_compiler.py --suite torchvision --model resnet50 --input_shape "(1, 3, 256, 256)" --device GPU -``` - -### Export a Hugging Face BERT model for the CPU -```bash -python aot_openvino_compiler.py --suite huggingface --model bert-base-uncased --input_shape "(1, 512)" --device CPU -``` -### Export and validate TIMM Resnet50d model for the CPU -```bash -python aot_openvino_compiler.py --suite timm --model vgg16 --input_shape [1, 3, 224, 224] --device CPU --validate --dataset /path/to/dataset -``` - -### Export, quantize and validate TIMM Resnet50d model for the CPU -```bash -python aot_openvino_compiler.py --suite timm --model vgg16 --input_shape [1, 3, 224, 224] --device CPU --validate --dataset /path/to/dataset --quantize -``` - -## **Notes** -1. **Input Shape in Zsh**: - If you are using Zsh, wrap `--input_shape` in quotes or use a tuple: - ```bash - --input_shape '[1, 3, 224, 224]' - --input_shape "(1, 3, 224, 224)" - ``` - -2. **Model Compatibility**: - Ensure the specified `model_name` exists in the selected `suite`. Use the corresponding library's documentation to verify model availability. - -3. **Output File**: - The exported model will be saved as `.pte` in the current directory. - -4. **Dependencies**: - - Python 3.8+ - - PyTorch - - Executorch - - TIMM (`pip install timm`) - - Torchvision - - Transformers (`pip install transformers`) - -## **Error Handling** -- **Model Not Found**: - If the script raises an error such as: - ```bash - ValueError: Model not found - ``` - Verify that the model name is correct for the chosen suite. - -- **Unsupported Input Shape**: - Ensure `--input_shape` is provided as a valid list or tuple. From a993931a1225fac5bcd97a3b9e44c10e7bd29134 Mon Sep 17 00:00:00 2001 From: Mustafa Cavus Date: Mon, 24 Feb 2025 18:05:34 -0800 Subject: [PATCH 103/188] Added link for openvino supported hardware --- backends/openvino/README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/backends/openvino/README.md b/backends/openvino/README.md index 3ac66370c88..7a0ed1b7402 100644 --- a/backends/openvino/README.md +++ b/backends/openvino/README.md @@ -10,6 +10,8 @@ OpenVINO backend supports the following hardware: - Intel discrete GPUs - Intel NPUs +For more information on the supported hardware, please refer to [OpenVINO System Requirements](https://docs.openvino.ai/2025/about-openvino/release-notes-openvino/system-requirements.html) page. + ## Directory Structure ``` From 1e63eff4510310d52bda296ea73554f134c47528 Mon Sep 17 00:00:00 2001 From: Cavus Mustafa Date: Mon, 24 Feb 2025 18:36:07 -0800 Subject: [PATCH 104/188] code formatting --- backends/openvino/runtime/OpenvinoBackend.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backends/openvino/runtime/OpenvinoBackend.cpp b/backends/openvino/runtime/OpenvinoBackend.cpp index e7f4888c6ed..16bdc279a45 100644 --- a/backends/openvino/runtime/OpenvinoBackend.cpp +++ b/backends/openvino/runtime/OpenvinoBackend.cpp @@ -83,7 +83,7 @@ exr::Result OpenvinoBackend::init( auto compiled_model = core.import_model(compiled_stream, device); // The processed data can be freed since the model is compiled - processed->Free(); + processed->Free(); // Allocate an infer request std::shared_ptr infer_request = From 0e76d9382f26ee5f37d4aef6ac03521b13783af6 Mon Sep 17 00:00:00 2001 From: Cavus Mustafa Date: Mon, 24 Feb 2025 18:41:54 -0800 Subject: [PATCH 105/188] add aot readme file --- examples/openvino/aot/README.md | 115 ++++++++++++++++++++++++++++++++ 1 file changed, 115 insertions(+) create mode 100644 examples/openvino/aot/README.md diff --git a/examples/openvino/aot/README.md b/examples/openvino/aot/README.md new file mode 100644 index 00000000000..adfed35eefc --- /dev/null +++ b/examples/openvino/aot/README.md @@ -0,0 +1,115 @@ +# **Model Export Script for Executorch** + +This script allows users to export deep learning models from various model suites (TIMM, Torchvision, Hugging Face) to a openvino backend using **Executorch**. Users can dynamically specify the model, input shape, and target device. + + +## **Usage** + +### **Command Structure** +```bash +python aot_openvino_compiler.py --suite --model --input_shape --device +``` + +### **Arguments** +- **`--suite`** (required): + Specifies the model suite to use. + Supported values: + - `timm` (e.g., VGG16, ResNet50) + - `torchvision` (e.g., resnet18, mobilenet_v2) + - `huggingface` (e.g., bert-base-uncased). NB: Quantization and validation is not supported yet. + +- **`--model`** (required): + Name of the model to export. + Examples: + - For `timm`: `vgg16`, `resnet50` + - For `torchvision`: `resnet18`, `mobilenet_v2` + - For `huggingface`: `bert-base-uncased`, `distilbert-base-uncased` + +- **`--input_shape`**(optional): + Input shape for the model. Provide this as a **list** or **tuple**. + Examples: + - `[1, 3, 224, 224]` (Zsh users: wrap in quotes) + - `(1, 3, 224, 224)` + +- **`--batch_size`** : + Batch size for the validation. Default batch_size == 1. + The dataset length must be evenly divisible by the batch size. + +- **`--quantize`** (optional): + Enable model quantization. --dataset argument is requred for the quantization. `huggingface` suite does not supported yet. + +- **`--quantization_flow`** (optional): + Specifies the way to quantize torch.fx.GraphModule. + Supported values: + - `nncf`: `nncf quantize_pt2e` API (default) + - `pt2e`: torch ao quantization pipeline. + +- **`--validate`** (optional): + Enable model validation. --dataset argument is requred for the validation. `huggingface` suite does not supported yet. + +- **`--dataset`** (optional): + Path to the imagenet-like calibration dataset. + +- **`--device`** (optional) + Target device for the compiled model. Default is `CPU`. + Examples: `CPU`, `GPU` + + +## **Examples** + +### Export a TIMM VGG16 model for the CPU +```bash +python aot_openvino_compiler.py --suite timm --model vgg16 --input_shape [1, 3, 224, 224] --device CPU +``` + +### Export a Torchvision ResNet50 model for the GPU +```bash +python aot_openvino_compiler.py --suite torchvision --model resnet50 --input_shape "(1, 3, 256, 256)" --device GPU +``` + +### Export a Hugging Face BERT model for the CPU +```bash +python aot_openvino_compiler.py --suite huggingface --model bert-base-uncased --input_shape "(1, 512)" --device CPU +``` +### Export and validate TIMM Resnet50d model for the CPU +```bash +python aot_openvino_compiler.py --suite timm --model vgg16 --input_shape [1, 3, 224, 224] --device CPU --validate --dataset /path/to/dataset +``` + +### Export, quantize and validate TIMM Resnet50d model for the CPU +```bash +python aot_openvino_compiler.py --suite timm --model vgg16 --input_shape [1, 3, 224, 224] --device CPU --validate --dataset /path/to/dataset --quantize +``` + +## **Notes** +1. **Input Shape in Zsh**: + If you are using Zsh, wrap `--input_shape` in quotes or use a tuple: + ```bash + --input_shape '[1, 3, 224, 224]' + --input_shape "(1, 3, 224, 224)" + ``` + +2. **Model Compatibility**: + Ensure the specified `model_name` exists in the selected `suite`. Use the corresponding library's documentation to verify model availability. + +3. **Output File**: + The exported model will be saved as `.pte` in the current directory. + +4. **Dependencies**: + - Python 3.8+ + - PyTorch + - Executorch + - TIMM (`pip install timm`) + - Torchvision + - Transformers (`pip install transformers`) + +## **Error Handling** +- **Model Not Found**: + If the script raises an error such as: + ```bash + ValueError: Model not found + ``` + Verify that the model name is correct for the chosen suite. + +- **Unsupported Input Shape**: + Ensure `--input_shape` is provided as a valid list or tuple. From 3b6fd88553dc22e528ce069efb708d50d1989fde Mon Sep 17 00:00:00 2001 From: Cavus Mustafa Date: Mon, 24 Feb 2025 18:42:31 -0800 Subject: [PATCH 106/188] remove aot readme file --- examples/openvino/aot/README.md | 115 -------------------------------- 1 file changed, 115 deletions(-) delete mode 100644 examples/openvino/aot/README.md diff --git a/examples/openvino/aot/README.md b/examples/openvino/aot/README.md deleted file mode 100644 index adfed35eefc..00000000000 --- a/examples/openvino/aot/README.md +++ /dev/null @@ -1,115 +0,0 @@ -# **Model Export Script for Executorch** - -This script allows users to export deep learning models from various model suites (TIMM, Torchvision, Hugging Face) to a openvino backend using **Executorch**. Users can dynamically specify the model, input shape, and target device. - - -## **Usage** - -### **Command Structure** -```bash -python aot_openvino_compiler.py --suite --model --input_shape --device -``` - -### **Arguments** -- **`--suite`** (required): - Specifies the model suite to use. - Supported values: - - `timm` (e.g., VGG16, ResNet50) - - `torchvision` (e.g., resnet18, mobilenet_v2) - - `huggingface` (e.g., bert-base-uncased). NB: Quantization and validation is not supported yet. - -- **`--model`** (required): - Name of the model to export. - Examples: - - For `timm`: `vgg16`, `resnet50` - - For `torchvision`: `resnet18`, `mobilenet_v2` - - For `huggingface`: `bert-base-uncased`, `distilbert-base-uncased` - -- **`--input_shape`**(optional): - Input shape for the model. Provide this as a **list** or **tuple**. - Examples: - - `[1, 3, 224, 224]` (Zsh users: wrap in quotes) - - `(1, 3, 224, 224)` - -- **`--batch_size`** : - Batch size for the validation. Default batch_size == 1. - The dataset length must be evenly divisible by the batch size. - -- **`--quantize`** (optional): - Enable model quantization. --dataset argument is requred for the quantization. `huggingface` suite does not supported yet. - -- **`--quantization_flow`** (optional): - Specifies the way to quantize torch.fx.GraphModule. - Supported values: - - `nncf`: `nncf quantize_pt2e` API (default) - - `pt2e`: torch ao quantization pipeline. - -- **`--validate`** (optional): - Enable model validation. --dataset argument is requred for the validation. `huggingface` suite does not supported yet. - -- **`--dataset`** (optional): - Path to the imagenet-like calibration dataset. - -- **`--device`** (optional) - Target device for the compiled model. Default is `CPU`. - Examples: `CPU`, `GPU` - - -## **Examples** - -### Export a TIMM VGG16 model for the CPU -```bash -python aot_openvino_compiler.py --suite timm --model vgg16 --input_shape [1, 3, 224, 224] --device CPU -``` - -### Export a Torchvision ResNet50 model for the GPU -```bash -python aot_openvino_compiler.py --suite torchvision --model resnet50 --input_shape "(1, 3, 256, 256)" --device GPU -``` - -### Export a Hugging Face BERT model for the CPU -```bash -python aot_openvino_compiler.py --suite huggingface --model bert-base-uncased --input_shape "(1, 512)" --device CPU -``` -### Export and validate TIMM Resnet50d model for the CPU -```bash -python aot_openvino_compiler.py --suite timm --model vgg16 --input_shape [1, 3, 224, 224] --device CPU --validate --dataset /path/to/dataset -``` - -### Export, quantize and validate TIMM Resnet50d model for the CPU -```bash -python aot_openvino_compiler.py --suite timm --model vgg16 --input_shape [1, 3, 224, 224] --device CPU --validate --dataset /path/to/dataset --quantize -``` - -## **Notes** -1. **Input Shape in Zsh**: - If you are using Zsh, wrap `--input_shape` in quotes or use a tuple: - ```bash - --input_shape '[1, 3, 224, 224]' - --input_shape "(1, 3, 224, 224)" - ``` - -2. **Model Compatibility**: - Ensure the specified `model_name` exists in the selected `suite`. Use the corresponding library's documentation to verify model availability. - -3. **Output File**: - The exported model will be saved as `.pte` in the current directory. - -4. **Dependencies**: - - Python 3.8+ - - PyTorch - - Executorch - - TIMM (`pip install timm`) - - Torchvision - - Transformers (`pip install transformers`) - -## **Error Handling** -- **Model Not Found**: - If the script raises an error such as: - ```bash - ValueError: Model not found - ``` - Verify that the model name is correct for the chosen suite. - -- **Unsupported Input Shape**: - Ensure `--input_shape` is provided as a valid list or tuple. From 99c832bbdde4e9a066c81a1396d69aa8ca3093c1 Mon Sep 17 00:00:00 2001 From: Cavus Mustafa Date: Mon, 24 Feb 2025 18:43:19 -0800 Subject: [PATCH 107/188] add aot readme file --- examples/openvino/aot/README.md | 115 ++++++++++++++++++++++++++++++++ 1 file changed, 115 insertions(+) create mode 100644 examples/openvino/aot/README.md diff --git a/examples/openvino/aot/README.md b/examples/openvino/aot/README.md new file mode 100644 index 00000000000..adfed35eefc --- /dev/null +++ b/examples/openvino/aot/README.md @@ -0,0 +1,115 @@ +# **Model Export Script for Executorch** + +This script allows users to export deep learning models from various model suites (TIMM, Torchvision, Hugging Face) to a openvino backend using **Executorch**. Users can dynamically specify the model, input shape, and target device. + + +## **Usage** + +### **Command Structure** +```bash +python aot_openvino_compiler.py --suite --model --input_shape --device +``` + +### **Arguments** +- **`--suite`** (required): + Specifies the model suite to use. + Supported values: + - `timm` (e.g., VGG16, ResNet50) + - `torchvision` (e.g., resnet18, mobilenet_v2) + - `huggingface` (e.g., bert-base-uncased). NB: Quantization and validation is not supported yet. + +- **`--model`** (required): + Name of the model to export. + Examples: + - For `timm`: `vgg16`, `resnet50` + - For `torchvision`: `resnet18`, `mobilenet_v2` + - For `huggingface`: `bert-base-uncased`, `distilbert-base-uncased` + +- **`--input_shape`**(optional): + Input shape for the model. Provide this as a **list** or **tuple**. + Examples: + - `[1, 3, 224, 224]` (Zsh users: wrap in quotes) + - `(1, 3, 224, 224)` + +- **`--batch_size`** : + Batch size for the validation. Default batch_size == 1. + The dataset length must be evenly divisible by the batch size. + +- **`--quantize`** (optional): + Enable model quantization. --dataset argument is requred for the quantization. `huggingface` suite does not supported yet. + +- **`--quantization_flow`** (optional): + Specifies the way to quantize torch.fx.GraphModule. + Supported values: + - `nncf`: `nncf quantize_pt2e` API (default) + - `pt2e`: torch ao quantization pipeline. + +- **`--validate`** (optional): + Enable model validation. --dataset argument is requred for the validation. `huggingface` suite does not supported yet. + +- **`--dataset`** (optional): + Path to the imagenet-like calibration dataset. + +- **`--device`** (optional) + Target device for the compiled model. Default is `CPU`. + Examples: `CPU`, `GPU` + + +## **Examples** + +### Export a TIMM VGG16 model for the CPU +```bash +python aot_openvino_compiler.py --suite timm --model vgg16 --input_shape [1, 3, 224, 224] --device CPU +``` + +### Export a Torchvision ResNet50 model for the GPU +```bash +python aot_openvino_compiler.py --suite torchvision --model resnet50 --input_shape "(1, 3, 256, 256)" --device GPU +``` + +### Export a Hugging Face BERT model for the CPU +```bash +python aot_openvino_compiler.py --suite huggingface --model bert-base-uncased --input_shape "(1, 512)" --device CPU +``` +### Export and validate TIMM Resnet50d model for the CPU +```bash +python aot_openvino_compiler.py --suite timm --model vgg16 --input_shape [1, 3, 224, 224] --device CPU --validate --dataset /path/to/dataset +``` + +### Export, quantize and validate TIMM Resnet50d model for the CPU +```bash +python aot_openvino_compiler.py --suite timm --model vgg16 --input_shape [1, 3, 224, 224] --device CPU --validate --dataset /path/to/dataset --quantize +``` + +## **Notes** +1. **Input Shape in Zsh**: + If you are using Zsh, wrap `--input_shape` in quotes or use a tuple: + ```bash + --input_shape '[1, 3, 224, 224]' + --input_shape "(1, 3, 224, 224)" + ``` + +2. **Model Compatibility**: + Ensure the specified `model_name` exists in the selected `suite`. Use the corresponding library's documentation to verify model availability. + +3. **Output File**: + The exported model will be saved as `.pte` in the current directory. + +4. **Dependencies**: + - Python 3.8+ + - PyTorch + - Executorch + - TIMM (`pip install timm`) + - Torchvision + - Transformers (`pip install transformers`) + +## **Error Handling** +- **Model Not Found**: + If the script raises an error such as: + ```bash + ValueError: Model not found + ``` + Verify that the model name is correct for the chosen suite. + +- **Unsupported Input Shape**: + Ensure `--input_shape` is provided as a valid list or tuple. From 73431cf84dbc11986b3255451baf2a12451c8b8e Mon Sep 17 00:00:00 2001 From: Mustafa Cavus Date: Mon, 24 Feb 2025 19:21:13 -0800 Subject: [PATCH 108/188] change make command to use max threads available --- backends/openvino/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backends/openvino/README.md b/backends/openvino/README.md index 7a0ed1b7402..36fb19c783c 100644 --- a/backends/openvino/README.md +++ b/backends/openvino/README.md @@ -57,7 +57,7 @@ cd openvino && git checkout 20ad7cb git submodule update --init --recursive mkdir build && cd build cmake .. -DCMAKE_BUILD_TYPE=Release -DENABLE_PYTHON=ON -make -j +make -j$(nproc) cd .. cmake --install build --prefix From bb284e4e3afae8712c3629020d1d9cce1e5a8963 Mon Sep 17 00:00:00 2001 From: Cavus Mustafa Date: Mon, 24 Feb 2025 19:48:54 -0800 Subject: [PATCH 109/188] Renamed yaml file name for unsupported ops --- backends/openvino/README.md | 2 +- ...nvino_functions.yaml => unsupported_openvino_functions.yaml} | 0 examples/openvino/CMakeLists.txt | 2 +- 3 files changed, 2 insertions(+), 2 deletions(-) rename backends/openvino/{openvino_functions.yaml => unsupported_openvino_functions.yaml} (100%) diff --git a/backends/openvino/README.md b/backends/openvino/README.md index 36fb19c783c..bf050a1084b 100644 --- a/backends/openvino/README.md +++ b/backends/openvino/README.md @@ -27,7 +27,7 @@ executorch │ ├── CMakeLists.txt │ ├── README.md │ ├── __init__.py -│ ├── openvino_functions.yaml +│ ├── unsupported_openvino_functions.yaml │ ├── partitioner.py │ ├── preprocess.py │ └── requirements.txt diff --git a/backends/openvino/openvino_functions.yaml b/backends/openvino/unsupported_openvino_functions.yaml similarity index 100% rename from backends/openvino/openvino_functions.yaml rename to backends/openvino/unsupported_openvino_functions.yaml diff --git a/examples/openvino/CMakeLists.txt b/examples/openvino/CMakeLists.txt index af68880d426..4b4d82ffd90 100644 --- a/examples/openvino/CMakeLists.txt +++ b/examples/openvino/CMakeLists.txt @@ -41,7 +41,7 @@ include_directories(${EXECUTORCH_INCLUDE_DIRS}) gen_selected_ops(LIB_NAME "openvino_portable_ops_lib" INCLUDE_ALL_OPS "ON") generate_bindings_for_kernels( LIB_NAME "openvino_portable_ops_lib" FUNCTIONS_YAML - ${EXECUTORCH_ROOT}/backends/openvino/openvino_functions.yaml + ${EXECUTORCH_ROOT}/backends/openvino/unsupported_openvino_functions.yaml ) gen_operators_lib( LIB_NAME "openvino_portable_ops_lib" KERNEL_LIBS portable_kernels DEPS executorch From ac8df240446391a8ba38be165f6bf76aad684728 Mon Sep 17 00:00:00 2001 From: Cavus Mustafa Date: Tue, 25 Feb 2025 12:16:25 -0800 Subject: [PATCH 110/188] License comment line update --- backends/openvino/CMakeLists.txt | 4 ++-- backends/openvino/partitioner.py | 4 ++-- backends/openvino/preprocess.py | 4 ++-- backends/openvino/quantizer/quantizer.py | 4 ++-- backends/openvino/runtime/OpenvinoBackend.cpp | 11 +++++------ backends/openvino/runtime/OpenvinoBackend.h | 11 +++++------ examples/openvino/CMakeLists.txt | 4 ++-- examples/openvino/aot/aot_openvino_compiler.py | 4 ++-- .../executor_runner/openvino_executor_runner.cpp | 11 +++++------ 9 files changed, 27 insertions(+), 30 deletions(-) diff --git a/backends/openvino/CMakeLists.txt b/backends/openvino/CMakeLists.txt index 9b834d5e31e..4046265f21f 100644 --- a/backends/openvino/CMakeLists.txt +++ b/backends/openvino/CMakeLists.txt @@ -1,8 +1,8 @@ # Copyright (c) Intel Corporation # # Licensed under the BSD License (the "License"); you may not use this file -# except in compliance with the License. See the license file in the root -# directory of this source tree for more details. +# except in compliance with the License. See the license file found in the +# LICENSE file in the root directory of this source tree. # Set minimum required CMake version cmake_minimum_required(VERSION 3.19) diff --git a/backends/openvino/partitioner.py b/backends/openvino/partitioner.py index 78dfb117bee..7aa8348e680 100644 --- a/backends/openvino/partitioner.py +++ b/backends/openvino/partitioner.py @@ -1,8 +1,8 @@ # Copyright (c) Intel Corporation # # Licensed under the BSD License (the "License"); you may not use this file -# except in compliance with the License. See the license file in the root -# directory of this source tree for more details. +# except in compliance with the License. See the license file found in the +# LICENSE file in the root directory of this source tree. from typing import Callable, final, List, Optional, Tuple diff --git a/backends/openvino/preprocess.py b/backends/openvino/preprocess.py index 7ea5c8c825d..5c8398bc51d 100644 --- a/backends/openvino/preprocess.py +++ b/backends/openvino/preprocess.py @@ -1,8 +1,8 @@ # Copyright (c) Intel Corporation # # Licensed under the BSD License (the "License"); you may not use this file -# except in compliance with the License. See the license file in the root -# directory of this source tree for more details. +# except in compliance with the License. See the license file found in the +# LICENSE file in the root directory of this source tree. from typing import final, List diff --git a/backends/openvino/quantizer/quantizer.py b/backends/openvino/quantizer/quantizer.py index f82d0745e8b..f7bf8ad2786 100644 --- a/backends/openvino/quantizer/quantizer.py +++ b/backends/openvino/quantizer/quantizer.py @@ -1,8 +1,8 @@ # Copyright (c) Intel Corporation # # Licensed under the BSD License (the "License"); you may not use this file -# except in compliance with the License. See the license file in the root -# directory of this source tree for more details. +# except in compliance with the License. See the license file found in the +# LICENSE file in the root directory of this source tree. from collections import defaultdict from enum import Enum diff --git a/backends/openvino/runtime/OpenvinoBackend.cpp b/backends/openvino/runtime/OpenvinoBackend.cpp index 16bdc279a45..bd9ab893880 100644 --- a/backends/openvino/runtime/OpenvinoBackend.cpp +++ b/backends/openvino/runtime/OpenvinoBackend.cpp @@ -1,9 +1,8 @@ -/* - * Copyright (c) Intel Corporation - * - * Licensed under the BSD License (the "License"); you may not use this file - * except in compliance with the License. See the license file in the root - * directory of this source tree for more details. +/* Copyright (c) Intel Corporation + * + * Licensed under the BSD License (the "License"); you may not use this file + * except in compliance with the License. See the license file found in the + * LICENSE file in the root directory of this source tree. */ #include diff --git a/backends/openvino/runtime/OpenvinoBackend.h b/backends/openvino/runtime/OpenvinoBackend.h index f285374e5dd..7599c0d3b8b 100644 --- a/backends/openvino/runtime/OpenvinoBackend.h +++ b/backends/openvino/runtime/OpenvinoBackend.h @@ -1,9 +1,8 @@ -/* - * Copyright (c) Intel Corporation - * - * Licensed under the BSD License (the "License"); you may not use this file - * except in compliance with the License. See the license file in the root - * directory of this source tree for more details. +/* Copyright (c) Intel Corporation + * + * Licensed under the BSD License (the "License"); you may not use this file + * except in compliance with the License. See the license file found in the + * LICENSE file in the root directory of this source tree. */ #ifndef OPENVINO_BACKEND_HPP diff --git a/examples/openvino/CMakeLists.txt b/examples/openvino/CMakeLists.txt index 4b4d82ffd90..3143b8641f7 100644 --- a/examples/openvino/CMakeLists.txt +++ b/examples/openvino/CMakeLists.txt @@ -1,8 +1,8 @@ # Copyright (c) Intel Corporation # # Licensed under the BSD License (the "License"); you may not use this file -# except in compliance with the License. See the license file in the root -# directory of this source tree for more details. +# except in compliance with the License. See the license file found in the +# LICENSE file in the root directory of this source tree. cmake_minimum_required(VERSION 3.19) project(openvino_runner_example) diff --git a/examples/openvino/aot/aot_openvino_compiler.py b/examples/openvino/aot/aot_openvino_compiler.py index 61251dc0ef5..f4016e8742d 100644 --- a/examples/openvino/aot/aot_openvino_compiler.py +++ b/examples/openvino/aot/aot_openvino_compiler.py @@ -1,8 +1,8 @@ # Copyright (c) Intel Corporation # # Licensed under the BSD License (the "License"); you may not use this file -# except in compliance with the License. See the license file in the root -# directory of this source tree for more details. +# except in compliance with the License. See the license file found in the +# LICENSE file in the root directory of this source tree. import argparse import os diff --git a/examples/openvino/executor_runner/openvino_executor_runner.cpp b/examples/openvino/executor_runner/openvino_executor_runner.cpp index 92b358ef24d..f3300d70802 100644 --- a/examples/openvino/executor_runner/openvino_executor_runner.cpp +++ b/examples/openvino/executor_runner/openvino_executor_runner.cpp @@ -1,9 +1,8 @@ -/* - * Copyright (c) Intel Corporation - * - * Licensed under the BSD License (the "License"); you may not use this file - * except in compliance with the License. See the license file in the root - * directory of this source tree for more details. +/* Copyright (c) Intel Corporation + * + * Licensed under the BSD License (the "License"); you may not use this file + * except in compliance with the License. See the license file found in the + * LICENSE file in the root directory of this source tree. */ #include From 60512751f8284f2b509db73e5f97066b314fb4c9 Mon Sep 17 00:00:00 2001 From: Cavus Mustafa Date: Tue, 25 Feb 2025 14:44:38 -0800 Subject: [PATCH 111/188] atol and rtol update --- backends/openvino/tests/ops/base_openvino_op_test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/backends/openvino/tests/ops/base_openvino_op_test.py b/backends/openvino/tests/ops/base_openvino_op_test.py index 4a18b2995f8..36d1a702538 100644 --- a/backends/openvino/tests/ops/base_openvino_op_test.py +++ b/backends/openvino/tests/ops/base_openvino_op_test.py @@ -18,8 +18,8 @@ class BaseOpenvinoOpTest(unittest.TestCase): device = "CPU" build_folder = "" - atol = 1e-1 - rtol = 1e-1 + atol = 1e-5 + rtol = 1e-5 def execute_layer_test( self, From 0275c47f78b7947455b31126a01fb00bb1ad1cab Mon Sep 17 00:00:00 2001 From: daniil-lyakhov Date: Wed, 26 Feb 2025 11:17:40 +0100 Subject: [PATCH 112/188] Comments --- backends/openvino/quantizer/quantizer.py | 1 - 1 file changed, 1 deletion(-) diff --git a/backends/openvino/quantizer/quantizer.py b/backends/openvino/quantizer/quantizer.py index 501bf5142dd..b5de6bc240e 100644 --- a/backends/openvino/quantizer/quantizer.py +++ b/backends/openvino/quantizer/quantizer.py @@ -6,7 +6,6 @@ from collections import defaultdict from enum import Enum -from itertools import islice from typing import Dict, List, Optional, Tuple import nncf From cec2fb06b1b0cbd9bf3ba86560d15e4290d573ea Mon Sep 17 00:00:00 2001 From: suryasidd Date: Wed, 26 Feb 2025 05:28:05 -0800 Subject: [PATCH 113/188] Update cmake-out directory --- backends/openvino/scripts/openvino_build.sh | 4 ++-- examples/openvino/CMakeLists.txt | 6 +++--- examples/openvino/aot/aot_openvino_compiler.py | 2 +- examples/openvino/openvino_build_example.sh | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/backends/openvino/scripts/openvino_build.sh b/backends/openvino/scripts/openvino_build.sh index 2a8a25511ac..836e976278c 100755 --- a/backends/openvino/scripts/openvino_build.sh +++ b/backends/openvino/scripts/openvino_build.sh @@ -9,7 +9,7 @@ echo EXECUTORCH_ROOT=${EXECUTORCH_ROOT} main() { # Set build directory - local build_dir="cmake-openvino-out" + local build_dir="cmake-out" # Create and enter the build directory cd "$EXECUTORCH_ROOT" @@ -28,7 +28,7 @@ main() { # Build the project - cmake --build cmake-openvino-out --target install --config Release -j5 + cmake --build cmake-out --target install --config Release -j5 # Switch back to the original directory cd - > /dev/null diff --git a/examples/openvino/CMakeLists.txt b/examples/openvino/CMakeLists.txt index af68880d426..15791bfeb29 100644 --- a/examples/openvino/CMakeLists.txt +++ b/examples/openvino/CMakeLists.txt @@ -30,7 +30,7 @@ endif() set(_common_compile_options -Wno-deprecated-declarations -fPIC) set(_common_include_directories ${EXECUTORCH_ROOT}/..) -set(_openvino_executor_runner__srcs +set(_openvino_executor_runner__srcs ${CMAKE_CURRENT_LIST_DIR}/../openvino/executor_runner/openvino_executor_runner.cpp ) @@ -53,11 +53,11 @@ target_include_directories(openvino_portable_ops_lib PUBLIC ${_common_include_di add_executable(openvino_executor_runner ${_openvino_executor_runner__srcs}) target_include_directories( - openvino_executor_runner PUBLIC ${_common_include_directories} ${EXECUTORCH_ROOT}/cmake-openvino-out/third-party/gflags/include + openvino_executor_runner PUBLIC ${_common_include_directories} ${EXECUTORCH_ROOT}/cmake-out/third-party/gflags/include ) # Set Library Directory -set(LIBRARY_DIR "${CMAKE_CURRENT_LIST_DIR}/../../cmake-openvino-out/lib/;${CMAKE_CURRENT_LIST_DIR}/../../cmake-openvino-out/third-party/gflags") +set(LIBRARY_DIR "${CMAKE_CURRENT_LIST_DIR}/../../cmake-out/lib/;${CMAKE_CURRENT_LIST_DIR}/../../cmake-out/third-party/gflags") message(STATUS "Library directory path: ${LIBRARY_DIR}") # Locate OpenVINO Backend Library diff --git a/examples/openvino/aot/aot_openvino_compiler.py b/examples/openvino/aot/aot_openvino_compiler.py index 61251dc0ef5..6e92d8436ee 100644 --- a/examples/openvino/aot/aot_openvino_compiler.py +++ b/examples/openvino/aot/aot_openvino_compiler.py @@ -206,7 +206,7 @@ def validate_model( subprocess.run( [ - "../../../cmake-openvino-out/examples/openvino/openvino_executor_runner", + "../../../cmake-out/examples/openvino/openvino_executor_runner", f"--model_path={model_file_name}", f"--input_list_path={inp_list_file}", f"--output_folder_path={out_path}", diff --git a/examples/openvino/openvino_build_example.sh b/examples/openvino/openvino_build_example.sh index c1b6224ec21..0c585c25288 100755 --- a/examples/openvino/openvino_build_example.sh +++ b/examples/openvino/openvino_build_example.sh @@ -9,7 +9,7 @@ echo EXECUTORCH_ROOT=${EXECUTORCH_ROOT} main() { # Set build directory - local build_dir="cmake-openvino-out" + local build_dir="cmake-out" # Create and enter the build directory cd "$EXECUTORCH_ROOT" @@ -29,7 +29,7 @@ main() { # Build the project - cmake --build cmake-openvino-out --target install --config Release -j$(nproc) + cmake --build cmake-out --target install --config Release -j$(nproc) ## Build example local example_dir=examples/openvino From 53e7c172ac38b20a0bd470719dbf6794e88a5f86 Mon Sep 17 00:00:00 2001 From: suryasidd Date: Wed, 26 Feb 2025 13:13:23 -0800 Subject: [PATCH 114/188] Removed unused packages --- backends/openvino/requirements.txt | 7 ------- 1 file changed, 7 deletions(-) diff --git a/backends/openvino/requirements.txt b/backends/openvino/requirements.txt index 91f0c0e802f..50fb2b68816 100644 --- a/backends/openvino/requirements.txt +++ b/backends/openvino/requirements.txt @@ -1,9 +1,2 @@ -datasets -huggingface-hub -safetensors -sentencepiece -tokenizers transformers -piq -pillow git+https://github.com/openvinotoolkit/nncf@191b53d#egg=nncf From ee54b2fbecada1c57ce57e03fd085c466131cbda Mon Sep 17 00:00:00 2001 From: daniil-lyakhov Date: Thu, 27 Feb 2025 11:13:08 +0100 Subject: [PATCH 115/188] import nncf.torch --- examples/openvino/aot/aot_openvino_compiler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/openvino/aot/aot_openvino_compiler.py b/examples/openvino/aot/aot_openvino_compiler.py index 257c8fb50b4..d8de7397287 100644 --- a/examples/openvino/aot/aot_openvino_compiler.py +++ b/examples/openvino/aot/aot_openvino_compiler.py @@ -12,7 +12,7 @@ import executorch -import nncf +import nncf.torch import numpy as np import timm import torch From bc29b461c7dcc5288099246807e81c36120bccf9 Mon Sep 17 00:00:00 2001 From: suryasidd Date: Thu, 27 Feb 2025 16:35:27 -0800 Subject: [PATCH 116/188] Code cleanup --- backends/openvino/scripts/openvino_build.sh | 2 +- examples/openvino/openvino_build_example.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/backends/openvino/scripts/openvino_build.sh b/backends/openvino/scripts/openvino_build.sh index 836e976278c..357231e9692 100755 --- a/backends/openvino/scripts/openvino_build.sh +++ b/backends/openvino/scripts/openvino_build.sh @@ -28,7 +28,7 @@ main() { # Build the project - cmake --build cmake-out --target install --config Release -j5 + cmake --build ${build_dir} --target install --config Release -j$(nproc) # Switch back to the original directory cd - > /dev/null diff --git a/examples/openvino/openvino_build_example.sh b/examples/openvino/openvino_build_example.sh index 0c585c25288..5a99b27f08c 100755 --- a/examples/openvino/openvino_build_example.sh +++ b/examples/openvino/openvino_build_example.sh @@ -29,7 +29,7 @@ main() { # Build the project - cmake --build cmake-out --target install --config Release -j$(nproc) + cmake --build ${build_dir} --target install --config Release -j$(nproc) ## Build example local example_dir=examples/openvino From 751f087a93f9cf6471ad6158c13020962ddc8c22 Mon Sep 17 00:00:00 2001 From: Cavus Mustafa Date: Mon, 3 Mar 2025 21:50:27 -0800 Subject: [PATCH 117/188] openvino backend: static lib, pybinding, and test updates --- CMakeLists.txt | 4 + backends/openvino/CMakeLists.txt | 7 +- backends/openvino/runtime/OpenvinoBackend.cpp | 13 +- backends/openvino/runtime/OpenvinoBackend.h | 2 +- backends/openvino/scripts/openvino_build.sh | 75 +++++++--- .../tests/ops/base_openvino_op_test.py | 132 ++++-------------- backends/openvino/tests/test_runner.py | 10 -- examples/openvino/CMakeLists.txt | 12 +- .../openvino_executor_runner.cpp | 2 +- install_executorch.py | 2 +- 10 files changed, 105 insertions(+), 154 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 61c4ee64a9a..7951adc5cda 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -798,6 +798,10 @@ if(EXECUTORCH_BUILD_PYBIND) list(APPEND _dep_libs mpsdelegate) endif() + if(EXECUTORCH_BUILD_OPENVINO) + list(APPEND _dep_libs openvino_backend) + endif() + if(EXECUTORCH_BUILD_XNNPACK) # need to explicitly specify XNNPACK and microkernels-prod # here otherwise uses XNNPACK and microkernel-prod symbols from libtorch_cpu diff --git a/backends/openvino/CMakeLists.txt b/backends/openvino/CMakeLists.txt index 4046265f21f..38767526ff6 100644 --- a/backends/openvino/CMakeLists.txt +++ b/backends/openvino/CMakeLists.txt @@ -28,10 +28,11 @@ set(COMMON_INCLUDE_DIRS ${EXECUTORCH_ROOT}/..) # Include utility CMake scripts from ExecuteTorch include(${EXECUTORCH_ROOT}/build/Utils.cmake) +# Find OpenVINO libraries find_package(OpenVINO REQUIRED) -# Define OpenVINO backend as a shared library -add_library(openvino_backend SHARED .) +# Define OpenVINO backend as a static library +add_library(openvino_backend STATIC .) # Enable exceptions and RTTI for OpenVINO backend target_compile_options(openvino_backend PRIVATE -frtti -fexceptions) @@ -45,5 +46,7 @@ target_link_libraries(openvino_backend PRIVATE openvino::runtime executorch_core # Add source files for OpenVINO backend target_sources(openvino_backend PRIVATE ${CMAKE_CURRENT_LIST_DIR}/runtime/OpenvinoBackend.cpp) +target_link_options_shared_lib(openvino_backend) + # Install OpenVINO backend library to the lib directory install(TARGETS openvino_backend DESTINATION lib) diff --git a/backends/openvino/runtime/OpenvinoBackend.cpp b/backends/openvino/runtime/OpenvinoBackend.cpp index bd9ab893880..8aff2f25c39 100644 --- a/backends/openvino/runtime/OpenvinoBackend.cpp +++ b/backends/openvino/runtime/OpenvinoBackend.cpp @@ -1,5 +1,5 @@ /* Copyright (c) Intel Corporation - * + * * Licensed under the BSD License (the "License"); you may not use this file * except in compliance with the License. See the license file found in the * LICENSE file in the root directory of this source tree. @@ -23,15 +23,7 @@ namespace executorch { namespace backends { namespace openvino { -OpenvinoBackend::OpenvinoBackend() { - if (!is_available()) { - // ET_LOG(Error, "OpenVINO runtime is not available. Initialization - // failed."); - throw std::runtime_error("OpenVINO runtime not available"); - } - - // ET_LOG(Info, "OpenVINO runtime successfully verified and initialized."); -} +OpenvinoBackend::OpenvinoBackend() {} bool OpenvinoBackend::is_available() const { try { @@ -92,6 +84,7 @@ exr::Result OpenvinoBackend::init( exr::MemoryAllocator* allocator = context.get_runtime_allocator(); ExecutionHandle* handle = ET_ALLOCATE_INSTANCE_OR_RETURN_ERROR(allocator, ExecutionHandle); + new (handle) ExecutionHandle; handle->compiled_model = std::make_shared(compiled_model); handle->infer_request = infer_request; diff --git a/backends/openvino/runtime/OpenvinoBackend.h b/backends/openvino/runtime/OpenvinoBackend.h index 7599c0d3b8b..5956d6d90f7 100644 --- a/backends/openvino/runtime/OpenvinoBackend.h +++ b/backends/openvino/runtime/OpenvinoBackend.h @@ -1,5 +1,5 @@ /* Copyright (c) Intel Corporation - * + * * Licensed under the BSD License (the "License"); you may not use this file * except in compliance with the License. See the license file found in the * LICENSE file in the root directory of this source tree. diff --git a/backends/openvino/scripts/openvino_build.sh b/backends/openvino/scripts/openvino_build.sh index 357231e9692..5a1c5712563 100755 --- a/backends/openvino/scripts/openvino_build.sh +++ b/backends/openvino/scripts/openvino_build.sh @@ -8,27 +8,60 @@ EXECUTORCH_ROOT=$(realpath "$(dirname "$0")/../../..") echo EXECUTORCH_ROOT=${EXECUTORCH_ROOT} main() { - # Set build directory - local build_dir="cmake-out" - - # Create and enter the build directory - cd "$EXECUTORCH_ROOT" - rm -rf "${build_dir}" - - # Configure the project with CMake - # Note: Add any additional configuration options you need here - cmake -DCMAKE_INSTALL_PREFIX="${build_dir}" \ - -DCMAKE_BUILD_TYPE=Release \ - -DEXECUTORCH_BUILD_OPENVINO=ON \ - -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \ - -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \ - -DEXECUTORCH_BUILD_EXTENSION_RUNNER_UTIL=ON \ - -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \ - -B"${build_dir}" - - - # Build the project - cmake --build ${build_dir} --target install --config Release -j$(nproc) + build_type=${1:-"cpp_runtime"} + + # If the first arguments is cpp_runtime (default), build libraries for C++ runtime + if [[ -z "$build_type" || "$build_type" == "cpp_runtime" ]]; then + echo "Building C++ Runtime Libraries" + + # Set build directory + local build_dir="cmake-out" + + # Create and enter the build directory + cd "$EXECUTORCH_ROOT" + rm -rf "${build_dir}" + + # Configure the project with CMake + # Note: Add any additional configuration options you need here + cmake -DCMAKE_INSTALL_PREFIX="${build_dir}" \ + -DCMAKE_BUILD_TYPE=Release \ + -DEXECUTORCH_BUILD_OPENVINO=ON \ + -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \ + -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \ + -DEXECUTORCH_BUILD_EXTENSION_RUNNER_UTIL=ON \ + -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \ + -B"${build_dir}" + + + # Build the project + cmake --build ${build_dir} --target install --config Release -j$(nproc) + + # If the first arguments is pybinding, build python package with pybinding + elif [[ "$build_type" == "pybinding" ]]; then + echo "Building Python Package with Pybinding" + + # Create and enter the build directory + cd "$EXECUTORCH_ROOT" + ./install_executorch.sh --clean + + # Set parameters to configure the project with CMake + # Note: Add any additional configuration options you need here + export CMAKE_ARGS="-DEXECUTORCH_BUILD_OPENVINO=ON \ + -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \ + -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \ + -DEXECUTORCH_BUILD_EXTENSION_RUNNER_UTIL=ON \ + -DEXECUTORCH_ENABLE_LOGGING=ON \ + -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \ + -DEXECUTORCH_BUILD_PYBIND=ON" + export CMAKE_BUILD_ARGS="--target openvino_backend" + + # Build the package + pip install . --no-build-isolation + + else + echo "Error: Argument is not valid: $build_type" + exit 1 # Exit the script with an error code + fi # Switch back to the original directory cd - > /dev/null diff --git a/backends/openvino/tests/ops/base_openvino_op_test.py b/backends/openvino/tests/ops/base_openvino_op_test.py index 36d1a702538..397664f027a 100644 --- a/backends/openvino/tests/ops/base_openvino_op_test.py +++ b/backends/openvino/tests/ops/base_openvino_op_test.py @@ -1,22 +1,20 @@ -import os -import subprocess -import tempfile import unittest import executorch - -import numpy as np import torch from executorch.backends.openvino.partitioner import OpenvinoPartitioner from executorch.backends.openvino.preprocess import OpenvinoBackend from executorch.exir import EdgeProgramManager, to_edge_transform_and_lower from executorch.exir.backend.backend_details import CompileSpec + +from executorch.extension.pybindings.portable_lib import ( # @manual + _load_for_executorch_from_buffer, +) from torch.export import export, ExportedProgram class BaseOpenvinoOpTest(unittest.TestCase): device = "CPU" - build_folder = "" atol = 1e-5 rtol = 1e-5 @@ -61,107 +59,27 @@ def execute_layer_test( # Execute the model and compare the outputs with the reference outputs if assert_output_equal: - with tempfile.TemporaryDirectory() as tmp_dir: - input_list = "" - for idx, _ in enumerate(sample_inputs): - input_name = f"input_0_{idx}.raw" - input_list += input_name + " " - input_list = input_list.strip() + "\n" - - output_dir = f"{tmp_dir}/outputs" - - # Execute the module in eager mode to calculate the reference outputs - ref_output = module(*sample_inputs) - if isinstance(ref_output, torch.Tensor): - ref_output = [ - ref_output, - ] - - # Serialize the executorch model and save into a temporary file - pte_fname = f"{tmp_dir}/openvino_executorch_test.pte" - with open(pte_fname, "wb") as file: - exec_prog.write_to_file(file) - - # Save inputs into a temporary file - self.generate_inputs( - tmp_dir, "input_list.txt", [sample_inputs], input_list - ) - self.make_output_dir(output_dir) - - # Start a subprocess to execute model with openvino_executor_runner - cmd = [ - f"{self.build_folder}/examples/openvino/openvino_executor_runner", - "--model_path", - pte_fname, - "--input_list_path", - f"{tmp_dir}/input_list.txt", - "--output_folder_path", - output_dir, + # Execute the module in eager mode to calculate the reference outputs + ref_output = module(*sample_inputs) + if isinstance(ref_output, torch.Tensor): + ref_output = [ + ref_output, ] - env = dict(os.environ) - proc = subprocess.run( - cmd, - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT, - env=env, + # Load model from buffer and execute + executorch_module = _load_for_executorch_from_buffer(exec_prog.buffer) + outputs = executorch_module.run_method("forward", sample_inputs) + + # Compare the outputs with the reference outputs + self.assertTrue(len(ref_output) == len(outputs)) + for i in range(len(ref_output)): + self.assertTrue( + torch.allclose( + outputs[i], + ref_output[i], + atol=self.atol, + rtol=self.rtol, + equal_nan=True, + ), + msg=f"ref_output:\n{ref_output[i]}\n\ntest_output:\n{outputs[i]}", ) - - # Check if execution completed successfully - self.assertTrue(proc.returncode == 0) - - # Read the outputs from the temporary files - output_dir = f"{tmp_dir}/outputs" - outputs = [] - - for i, f in enumerate(sorted(os.listdir(output_dir))): - filename = os.path.join(output_dir, f) - output = np.fromfile( - filename, dtype=ref_output[i].detach().numpy().dtype - ) - output = torch.from_numpy(output).reshape(ref_output[i].shape) - outputs.append(output) - - # Compare the outputs with the reference outputs - self.assertTrue(len(ref_output) == len(outputs)) - for i in range(len(ref_output)): - self.assertTrue( - torch.allclose( - outputs[i], - ref_output[i], - atol=self.atol, - rtol=self.rtol, - equal_nan=True, - ), - msg=f"ref_output:\n{ref_output[i]}\n\ntest_output:\n{outputs[i]}", - ) - - def generate_inputs( - self, dest_path: str, file_name: str, inputs=None, input_list=None - ): - input_list_file = None - input_files = [] - - # Prepare input list - if input_list is not None: - input_list_file = f"{dest_path}/{file_name}" - with open(input_list_file, "w") as f: - f.write(input_list) - f.flush() - - # Prepare input data - if inputs is not None: - for idx, data in enumerate(inputs): - for i, d in enumerate(data): - file_name = f"{dest_path}/input_{idx}_{i}.raw" - d.detach().numpy().tofile(file_name) - input_files.append(file_name) - - return input_list_file, input_files - - def make_output_dir(self, path: str): - if os.path.exists(path): - for f in os.listdir(path): - os.remove(os.path.join(path, f)) - os.removedirs(path) - os.makedirs(path) diff --git a/backends/openvino/tests/test_runner.py b/backends/openvino/tests/test_runner.py index 4021114f60f..8bf103530d4 100644 --- a/backends/openvino/tests/test_runner.py +++ b/backends/openvino/tests/test_runner.py @@ -18,8 +18,6 @@ def addTest(self, test): if isinstance(test, BaseOpenvinoOpTest): if "device" in self.test_params: test.device = self.test_params["device"] - if "build_folder" in self.test_params: - test.build_folder = self.test_params["build_folder"] # Call the original addTest method to actually add the test to the suite super().addTest(test) @@ -27,13 +25,6 @@ def addTest(self, test): def parse_arguments(): parser = argparse.ArgumentParser() - parser.add_argument( - "-b", - "--build_folder", - help="path to cmake binary directory", - type=str, - required=True, - ) parser.add_argument( "-s", "--device", @@ -60,7 +51,6 @@ def parse_arguments(): args, ns_args = parser.parse_known_args(namespace=unittest) test_params = {} test_params["device"] = args.device - test_params["build_folder"] = args.build_folder test_params["pattern"] = args.pattern test_params["test_type"] = args.test_type return test_params diff --git a/examples/openvino/CMakeLists.txt b/examples/openvino/CMakeLists.txt index 256e0b369e5..170805919a0 100644 --- a/examples/openvino/CMakeLists.txt +++ b/examples/openvino/CMakeLists.txt @@ -66,6 +66,13 @@ if(NOT OPENVINO_BACKEND_LIB) message(FATAL_ERROR "OpenVINO backend library not found in ${LIBRARY_DIR}") endif() +# Locate OpenVINO Library +find_package(OpenVINO REQUIRED) + +# Add OpenVINO Backend Library +add_library(openvino_backend STATIC IMPORTED) +set_property(TARGET openvino_backend PROPERTY IMPORTED_LOCATION ${OPENVINO_BACKEND_LIB}) + # Locate OpenVINO Backend Library find_library(GFLAGS_LIB NAMES gflags_nothreads PATHS ${LIBRARY_DIR} NO_DEFAULT_PATH) if(NOT GFLAGS_LIB) @@ -74,7 +81,8 @@ endif() # Link Libraries target_link_libraries(openvino_executor_runner PRIVATE - ${OPENVINO_BACKEND_LIB} + openvino_backend + openvino::runtime ${GFLAGS_LIB} executorch executorch_core @@ -84,6 +92,8 @@ target_link_libraries(openvino_executor_runner PRIVATE pthreadpool ) +target_link_options_shared_lib(openvino_backend) + # Ensure Proper RPATH Handling set_target_properties(openvino_executor_runner PROPERTIES INSTALL_RPATH "$ORIGIN") diff --git a/examples/openvino/executor_runner/openvino_executor_runner.cpp b/examples/openvino/executor_runner/openvino_executor_runner.cpp index f3300d70802..06482d5beb3 100644 --- a/examples/openvino/executor_runner/openvino_executor_runner.cpp +++ b/examples/openvino/executor_runner/openvino_executor_runner.cpp @@ -1,5 +1,5 @@ /* Copyright (c) Intel Corporation - * + * * Licensed under the BSD License (the "License"); you may not use this file * except in compliance with the License. See the license file found in the * LICENSE file in the root directory of this source tree. diff --git a/install_executorch.py b/install_executorch.py index b35f5668eb2..4797f5b2e2c 100644 --- a/install_executorch.py +++ b/install_executorch.py @@ -39,7 +39,7 @@ def clean(): print("Done cleaning build artifacts.") -VALID_PYBINDS = ["coreml", "mps", "xnnpack", "training"] +VALID_PYBINDS = ["coreml", "mps", "xnnpack", "training", "openvino"] ################################################################################ From 629d5a40285336b15faff3bda0bea0e1f52f3269 Mon Sep 17 00:00:00 2001 From: Cavus Mustafa Date: Mon, 3 Mar 2025 21:54:51 -0800 Subject: [PATCH 118/188] Typo fix in openvino backend header file --- backends/openvino/runtime/OpenvinoBackend.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/backends/openvino/runtime/OpenvinoBackend.h b/backends/openvino/runtime/OpenvinoBackend.h index 5956d6d90f7..069e4659d37 100644 --- a/backends/openvino/runtime/OpenvinoBackend.h +++ b/backends/openvino/runtime/OpenvinoBackend.h @@ -5,8 +5,8 @@ * LICENSE file in the root directory of this source tree. */ -#ifndef OPENVINO_BACKEND_HPP -#define OPENVINO_BACKEND_HPP +#ifndef OPENVINO_BACKEND_H +#define OPENVINO_BACKEND_H #include #include @@ -56,4 +56,4 @@ class OpenvinoBackend final : public ::exr::BackendInterface { } // namespace backends } // namespace executorch -#endif // OPENVINO_BACKEND_HPP +#endif // OPENVINO_BACKEND_H From 63fd3a2d0dbdd2b193fda5ca4db8c55cfa5cd54e Mon Sep 17 00:00:00 2001 From: Mustafa Cavus Date: Tue, 4 Mar 2025 09:52:15 -0800 Subject: [PATCH 119/188] Update README.md --- backends/openvino/tests/README.md | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/backends/openvino/tests/README.md b/backends/openvino/tests/README.md index 3ad109274f5..868a8a62b2b 100644 --- a/backends/openvino/tests/README.md +++ b/backends/openvino/tests/README.md @@ -20,19 +20,12 @@ backends/openvino/tests ### Prerequisites Before you begin, refer to instructions provided in [OpenVINO Backend for ExecuTorch](../README.md) to install openvino and setup executorch environment. -Once openvino is installed and executorch environment is set, refer to [OpenVINO Backend Examples](../../../examples/openvino/README.md) to build openvino_example_runner. ### Usage `test_runner.py` allows to run op or model tests for openvino backend. ### **Arguments** -- **`--build_folder`** (required): - Path to cmake binary directory. (Refer to [OpenVINO Backend Examples](../../../examples/openvino/README.md)) - Examples: - - `../../../cmake-openvino-out` (Relative path from `backends/openvino/tests` directory) - - `/cmake-openvino-out` (Absolute path to the default build folder) - - **`--test_type`** (optional): Type of the tests to run. Supported values: @@ -54,14 +47,14 @@ Once openvino is installed and executorch environment is set, refer to [OpenVINO ### Execute Tests for All Ops on CPU ```bash -python test_runner.py --build_folder ../../../cmake-openvino-out --device CPU --test_type ops +python test_runner.py --device CPU --test_type ops ``` ### Execute Convolution Op Tests on CPU ```bash -python test_runner.py --build_folder ../../../cmake-openvino-out --device CPU --test_type ops --pattern test_convolution.py +python test_runner.py --device CPU --test_type ops --pattern test_convolution.py ``` ### Execute Tests for all Models on GPU ```bash -python test_runner.py --build_folder ../../../cmake-openvino-out --device GPU --test_type models +python test_runner.py --device GPU --test_type models From 6ce932142b67ecfbf98c7f518d5f8ac299ff91f8 Mon Sep 17 00:00:00 2001 From: Mustafa Cavus Date: Tue, 4 Mar 2025 10:02:25 -0800 Subject: [PATCH 120/188] Update README.md --- backends/openvino/README.md | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/backends/openvino/README.md b/backends/openvino/README.md index bf050a1084b..0af4c5ff506 100644 --- a/backends/openvino/README.md +++ b/backends/openvino/README.md @@ -79,11 +79,16 @@ Follow the steps below to setup your build environment: Note: To achieve optimal performance with NNCF quantization, you should install the latest development version of NNCF (version 2.16.0.dev0+191b53d9 or higher). 3. Navigate to `scripts/` directory. -4. **Build OpenVINO Backend**: Once the prerequisites are in place, run the `openvino_build.sh` script to start the build process, OpenVINO backend will be built under `cmake-openvino-out/backends/openvino/` as `libopenvino_backend.so` +4. **Build OpenVINO Backend**: Once the prerequisites are in place, run the `openvino_build.sh` script to start the build process. By default, OpenVINO backend will be built under `cmake-out/backends/openvino/` as `libopenvino_backend.a` ```bash ./openvino_build.sh ``` + **Build OpenVINO Backend with Pybinding**: To build and install the OpenVINO backend with Python bindings, run the `openvino_build.sh` script with the `pybinding` argument. This will compile and install the ExecuTorch Python package with the OpenVINO backend into your Python environment. + + ```bash + ./openvino_build.sh pybinding + ``` ### Run From 173970fb4e922d97588a69620182e029b280fff5 Mon Sep 17 00:00:00 2001 From: Mustafa Cavus Date: Tue, 4 Mar 2025 10:05:09 -0800 Subject: [PATCH 121/188] Update README.md --- backends/openvino/tests/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backends/openvino/tests/README.md b/backends/openvino/tests/README.md index 868a8a62b2b..0aad14e04a0 100644 --- a/backends/openvino/tests/README.md +++ b/backends/openvino/tests/README.md @@ -19,7 +19,7 @@ backends/openvino/tests ### Prerequisites -Before you begin, refer to instructions provided in [OpenVINO Backend for ExecuTorch](../README.md) to install openvino and setup executorch environment. +Before you begin, refer to instructions provided in [OpenVINO Backend for ExecuTorch](../README.md) to install OpenVINO and ExecuTorch Python package with the OpenVINO backend into your Python environment. ### Usage From 213a018f28d5338acb318597d95074cedac13fee Mon Sep 17 00:00:00 2001 From: Cavus Mustafa Date: Tue, 4 Mar 2025 16:19:16 -0800 Subject: [PATCH 122/188] pybinding example added for openvino backend --- .../openvino/openvino_pybinding_example.py | 192 ++++++++++++++++++ 1 file changed, 192 insertions(+) create mode 100644 examples/openvino/openvino_pybinding_example.py diff --git a/examples/openvino/openvino_pybinding_example.py b/examples/openvino/openvino_pybinding_example.py new file mode 100644 index 00000000000..a94083d92eb --- /dev/null +++ b/examples/openvino/openvino_pybinding_example.py @@ -0,0 +1,192 @@ +# Copyright (c) Intel Corporation +# +# Licensed under the BSD License (the "License"); you may not use this file +# except in compliance with the License. See the license file found in the +# LICENSE file in the root directory of this source tree. + +import argparse +import sys +import time + +import timm + +import torch +import torchvision.models as torchvision_models + +from executorch.backends.openvino.partitioner import OpenvinoPartitioner +from executorch.exir import EdgeProgramManager, to_edge_transform_and_lower +from executorch.exir.backend.backend_details import CompileSpec + +from executorch.extension.pybindings.portable_lib import ( # @manual + _load_for_executorch_from_buffer, +) +from torch.export import export, ExportedProgram +from transformers import AutoModel + + +# Function to load a model based on the selected suite +def load_model(suite: str, model_name: str): + """ + Loads a pre-trained model from the specified model suite. + + :param suite: The suite from which to load the model. Supported values are: + - "timm": Uses `timm.create_model` to load the model. + - "torchvision": Loads a model from `torchvision.models`. Raises an error if the model does not exist. + - "huggingface": Loads a transformer model using `AutoModel.from_pretrained`. + :param model_name: The name of the model to load. + :return: The loaded model instance. + :raises ValueError: If the specified model suite is unsupported or the model is not found. + """ + if suite == "timm": + return timm.create_model(model_name, pretrained=True) + elif suite == "torchvision": + if not hasattr(torchvision_models, model_name): + msg = f"Model {model_name} not found in torchvision." + raise ValueError(msg) + return getattr(torchvision_models, model_name)(pretrained=True) + elif suite == "huggingface": + return AutoModel.from_pretrained(model_name) + else: + msg = f"Unsupported model suite: {suite}" + raise ValueError(msg) + + +def main( + suite: str, + model_name: str, + model_path: str, + input_shape, + device: str, + num_iterations: int, + warmup_iterations: int, + input_path: str, + output_path: str, +): + """ + Main function to load, quantize, and validate a model. + + :param suite: The model suite to use (e.g., "timm", "torchvision", "huggingface"). + :param model_name: The name of the model to load. + :param input_shape: The input shape for the model. + :param device: The device to run the model on (e.g., "cpu", "gpu"). + :param num_iterations: Number of iterations to execute inference. + """ + # Custom check to ensure suite is provided with model name + if model_name and not suite: + print("Error: --suite argument should be provided with --model") + sys.exit(1) + + if input_path: + print("Loading input tensor from ", input_path) + sample_inputs = (torch.load(input_path, weights_only=False),) + else: + print("Generating random input tensor with shape of ", input_shape) + sample_inputs = (torch.randn(input_shape),) + + if model_name: + print("Downloading model") + print("suite: ", suite) + print("model: ", model_name) + model = load_model(suite, model_name) + model = model.eval() + + exported_program: ExportedProgram = export(model, sample_inputs) + compile_spec = [CompileSpec("device", device.encode())] + edge: EdgeProgramManager = to_edge_transform_and_lower( + exported_program, + partitioner=[ + OpenvinoPartitioner(compile_spec), + ], + ) + + exec_prog = edge.to_executorch() + executorch_module = _load_for_executorch_from_buffer(exec_prog.buffer) + else: + print("Loading model from ", model_path) + with open(model_path, "rb") as f: + model_buffer = f.read() # Read model file into buffer + executorch_module = _load_for_executorch_from_buffer(model_buffer) + + if warmup_iterations > 0: + print("Warmup begins for ", warmup_iterations, " iterations") + for _i in range(warmup_iterations): + out = executorch_module.run_method("forward", sample_inputs) + + print("Execution begins for ", num_iterations, " iterations") + time_total = 0 + for _i in range(num_iterations): + time_start = time.time() + out = executorch_module.run_method("forward", sample_inputs) + time_end = time.time() + time_total += time_end - time_start + + print("Average inference time: ", (time_total / float(num_iterations)), " secs") + + if output_path: + torch.save(out, output_path) + + +if __name__ == "__main__": + # Argument parser for dynamic inputs + parser = argparse.ArgumentParser(description="Export models with executorch.") + parser.add_argument( + "--suite", + type=str, + required=False, + choices=["timm", "torchvision", "huggingface"], + help="Select the model suite (timm, torchvision, huggingface).", + ) + model_group = parser.add_mutually_exclusive_group(required=True) + model_group.add_argument("--model", type=str, help="Model name to be loaded.") + model_group.add_argument( + "--model_path", type=str, help="Model path to .pte file to be loaded." + ) + input_group = parser.add_mutually_exclusive_group(required=True) + input_group.add_argument( + "--input_shape", + type=eval, + help="Input shape for the model as a list or tuple (e.g., [1, 3, 224, 224] or (1, 3, 224, 224)).", + ) + parser.add_argument( + "--device", + type=str, + default="CPU", + help="Target device for compiling the model (e.g., CPU, GPU). Default is CPU.", + ) + parser.add_argument( + "--num_iter", + type=int, + default=1, + help="Number of iterations to execute inference", + ) + parser.add_argument( + "--warmup_iter", + type=int, + default=0, + help="Number of iterations to execute for warmup", + ) + input_group.add_argument( + "--input_tensor_path", + type=str, + help="Optional raw tensor input file to load the input from", + ) + parser.add_argument( + "--output_tensor_path", + type=str, + help="Optional output file path to save raw output tensor", + ) + + args = parser.parse_args() + + # Run the main function with parsed arguments + main( + args.suite, + args.model, + args.model_path, + args.input_shape, + args.device, + args.num_iter, + args.warmup_iter, + args.input_tensor_path, + args.output_tensor_path, + ) From 2fcceb2e8bf03c727975acce5c881e312c91476a Mon Sep 17 00:00:00 2001 From: Mustafa Cavus Date: Tue, 4 Mar 2025 16:39:04 -0800 Subject: [PATCH 123/188] Update README.md --- examples/openvino/README.md | 69 +++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) diff --git a/examples/openvino/README.md b/examples/openvino/README.md index 48993e9fa3b..0b3ff914df1 100644 --- a/examples/openvino/README.md +++ b/examples/openvino/README.md @@ -190,3 +190,72 @@ Run inference with an input tensor file: --input_list_path=input_list.txt \ --output_folder_path=outputs/ ``` + +## Running Pybinding Example: + +You can use the `openvino_pybinding_example.py` script to run models with the OpenVINO backend through the Python bindings. + +### **Usage** + +#### **Command Structure** +```bash +python openvino_pybinding_example.py +``` + +#### **Arguments** +- **`--suite`** (required if `--model_path` argument is not used): + Specifies the model suite to use. Needs to be used with `--model` argument. + Supported values: + - `timm` (e.g., VGG16, ResNet50) + - `torchvision` (e.g., resnet18, mobilenet_v2) + - `huggingface` (e.g., bert-base-uncased). NB: Quantization and validation is not supported yet. + +- **`--model`** (required if `--model_path` argument is not used): + Name of the model to export. Needs to be used with `--suite` argument. + Examples: + - For `timm`: `vgg16`, `resnet50` + - For `torchvision`: `resnet18`, `mobilenet_v2` + - For `huggingface`: `bert-base-uncased`, `distilbert-base-uncased` + +- **`--model_path`** (required if `--suite` and `--model` arguments are not used): + Path to the saved model file. This argument allows you to load the compiled model from a file, instead of downloading it from the model suites using the `--suite` and `--model` arguments. + Example: `/resnet50_fp32.pte` + +- **`--input_shape`**(required for random inputs): + Input shape for the model. Provide this as a **list** or **tuple**. + Examples: + - `[1, 3, 224, 224]` (Zsh users: wrap in quotes) + - `(1, 3, 224, 224)` + + - **`--input_tensor_path`**(optional): + Path to the raw input tensor file. If this argument is not provided, a random input tensor will be generated with the input shape provided with `--input_shape` argument. + Example: `/input_tensor.pt` + + - **`--output_tensor_path`**(optional): + Path to the file where the output raw tensor will be saved. + Example: `/output_tensor.pt` + +- **`--device`** (optional) + Target device for the compiled model. Default is `CPU`. + Examples: `CPU`, `GPU` + +- **`--num_iter`** (optional) + Number of iterations to execute inference for evaluation. The default value is `1`. + Examples: `100`, `1000` + +- **`--warmup_iter`** (optional) + Number of warmup iterations to execute inference before evaluation. The default value is `0`. + Examples: `5`, `10` + + +### **Examples** + +#### Execute Torchvision ResNet50 model for the GPU with Random Inputs +```bash +python openvino_pybinding_example.py --suite torchvision --model resnet50 --input_shape "(1, 3, 256, 256)" --device GPU +``` + +#### Run a Precompiled Model for the CPU Using an Existing Input Tensor File and Save the Output. +```bash +python openvino_pybinding_example.py --model_path /path/to/model/folder/resnet50_fp32.pte --input_tensor_file /path/to/input/folder/input.pt --output_tensor_file /path/to/output/folder/output.pt --device CPU +``` From 34d1cf32df153af3e493d55f27254b4aa525e87c Mon Sep 17 00:00:00 2001 From: Cavus Mustafa Date: Tue, 4 Mar 2025 19:12:11 -0800 Subject: [PATCH 124/188] use pybinding for validation --- .../openvino/aot/aot_openvino_compiler.py | 70 +++++-------------- 1 file changed, 17 insertions(+), 53 deletions(-) diff --git a/examples/openvino/aot/aot_openvino_compiler.py b/examples/openvino/aot/aot_openvino_compiler.py index 97430549f0d..767f9fc0874 100644 --- a/examples/openvino/aot/aot_openvino_compiler.py +++ b/examples/openvino/aot/aot_openvino_compiler.py @@ -28,6 +28,9 @@ from torch.export.exported_program import ExportedProgram from torchvision import datasets from transformers import AutoModel +from executorch.extension.pybindings.portable_lib import ( # @manual + _load_for_executorch_from_buffer, +) # Function to load a model based on the selected suite @@ -104,69 +107,29 @@ def load_calibration_dataset( return calibration_dataset -def dump_inputs(calibration_dataset, dest_path): - """ - Dumps the input data from a calibration dataset to raw files. - - :param calibration_dataset: The dataset containing calibration inputs. - :param dest_path: The destination directory to save the raw input files. - :return: A tuple containing a list of input file paths and the corresponding target labels. - """ - input_files, targets = [], [] - for idx, data in enumerate(calibration_dataset): - feature, target = data - targets.extend(target) - file_name = f"input_{idx}_0.raw" - file_path = f"{dest_path}/{file_name}" - if not isinstance(feature, torch.Tensor): - feature = torch.tensor(feature) - feature.detach().numpy().tofile(file_path) - input_files.append(file_name) - - return input_files, targets - - def validate_model( - model_file_name: str, calibration_dataset: torch.utils.data.DataLoader + exec_prog: EdgeProgramManager, calibration_dataset: torch.utils.data.DataLoader ) -> float: """ Validates the model using the calibration dataset. - :param model_file_name: The path to the quantized model file. + :param exec_prog: EdgeProgramManager of the lowered model :param calibration_dataset: A DataLoader containing calibration data. :return: The accuracy score of the model. """ - # 1: Dump inputs - dest_path = Path("tmp_inputs") - out_path = Path("tmp_outputs") - for d in [dest_path, out_path]: - if os.path.exists(d): - shutil.rmtree(d) - os.makedirs(d) - - input_files, targets = dump_inputs(calibration_dataset, dest_path) - inp_list_file = dest_path / "in_list.txt" - with open(inp_list_file, "w") as f: - f.write("\n".join(input_files) + "\n") - - # 2: Run the executor - print("Run openvino_executor_runner...") - - subprocess.run( - [ - "../../../cmake-out/examples/openvino/openvino_executor_runner", - f"--model_path={model_file_name}", - f"--input_list_path={inp_list_file}", - f"--output_folder_path={out_path}", - ] - ) + # 1: Load model from buffer + executorch_module = _load_for_executorch_from_buffer(exec_prog.buffer) - # 3: load the outputs and compare with the targets + # 2: Iterate over the dataset and run the executor predictions = [] - for i in range(len(input_files)): - tensor = np.fromfile(out_path / f"output_{i}_0.raw", dtype=np.float32) - predictions.extend(torch.tensor(tensor).reshape(-1, 1000).argmax(-1)) + targets = [] + for idx, data in enumerate(calibration_dataset): + feature, target = data + targets.extend(target) + out = executorch_module.run_method("forward", (feature,)) + predictions.extend(torch.stack(out).reshape(-1, 1000).argmax(-1)) + # 1: Check accuracy return accuracy_score(predictions, targets) @@ -261,7 +224,8 @@ def main( raise ValueError(msg) print("Start validation of the model:") - acc_top1 = validate_model(model_file_name, calibration_dataset) + #acc_top1 = validate_model(model_file_name, calibration_dataset) + acc_top1 = validate_model(exec_prog, calibration_dataset) print(f"acc@1: {acc_top1}") From 4f3df9a9ac90a9392abbe771908e9bc7ce4aac3f Mon Sep 17 00:00:00 2001 From: Cavus Mustafa Date: Tue, 4 Mar 2025 19:30:20 -0800 Subject: [PATCH 125/188] Use common executor, remove custom openvino executor runner, use pybinding for validation --- backends/openvino/CMakeLists.txt | 23 ++ backends/openvino/scripts/openvino_build.sh | 1 + .../openvino_executor_runner.cpp | 324 ------------------ examples/openvino/openvino_build_example.sh | 55 --- 4 files changed, 24 insertions(+), 379 deletions(-) delete mode 100644 examples/openvino/executor_runner/openvino_executor_runner.cpp delete mode 100755 examples/openvino/openvino_build_example.sh diff --git a/backends/openvino/CMakeLists.txt b/backends/openvino/CMakeLists.txt index 38767526ff6..7348ac94a6e 100644 --- a/backends/openvino/CMakeLists.txt +++ b/backends/openvino/CMakeLists.txt @@ -48,5 +48,28 @@ target_sources(openvino_backend PRIVATE ${CMAKE_CURRENT_LIST_DIR}/runtime/Openvi target_link_options_shared_lib(openvino_backend) +if(EXECUTORCH_BUILD_OPENVINO_EXECUTOR_RUNNER) + # Build executor runner binary for openvino backend + list(APPEND openvino_executor_runner_libs openvino_backend executorch) + + set(_openvino_executor_runner__srcs + ${EXECUTORCH_ROOT}/examples/portable/executor_runner/executor_runner.cpp + ${EXECUTORCH_ROOT}/extension/data_loader/file_data_loader.cpp + ${EXECUTORCH_ROOT}/extension/evalue_util/print_evalue.cpp + ${EXECUTORCH_ROOT}/extension/runner_util/inputs.cpp + ${EXECUTORCH_ROOT}/extension/runner_util/inputs_portable.cpp + ) + add_executable(openvino_executor_runner ${_openvino_executor_runner__srcs}) + + list(APPEND openvino_executor_runner_libs) + + target_link_libraries( + openvino_executor_runner gflags portable_ops_lib ${openvino_executor_runner_libs} + ) + target_compile_options(openvino_executor_runner PUBLIC ${_common_compile_options}) +endif() + + + # Install OpenVINO backend library to the lib directory install(TARGETS openvino_backend DESTINATION lib) diff --git a/backends/openvino/scripts/openvino_build.sh b/backends/openvino/scripts/openvino_build.sh index 5a1c5712563..7cb3545aedb 100755 --- a/backends/openvino/scripts/openvino_build.sh +++ b/backends/openvino/scripts/openvino_build.sh @@ -30,6 +30,7 @@ main() { -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \ -DEXECUTORCH_BUILD_EXTENSION_RUNNER_UTIL=ON \ -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \ + -DEXECUTORCH_BUILD_OPENVINO_EXECUTOR_RUNNER=ON \ -B"${build_dir}" diff --git a/examples/openvino/executor_runner/openvino_executor_runner.cpp b/examples/openvino/executor_runner/openvino_executor_runner.cpp deleted file mode 100644 index 06482d5beb3..00000000000 --- a/examples/openvino/executor_runner/openvino_executor_runner.cpp +++ /dev/null @@ -1,324 +0,0 @@ -/* Copyright (c) Intel Corporation - * - * Licensed under the BSD License (the "License"); you may not use this file - * except in compliance with the License. See the license file found in the - * LICENSE file in the root directory of this source tree. - */ - -#include -#include -#include -#include -#include -#include -#include - -#include - -#include -#include -#include -#include -#include -#include -#include - -// Define a fixed-size memory pool for the method allocator (4 MB) -static uint8_t method_allocator_pool[4 * 1024U * 1024U]; // 4 MB - -// Define command-line flags for model path, the number of iterations, input -// list path, and output folder path -DEFINE_string( - model_path, - "", - "Path to the model serialized in flatbuffer format (required)."); -DEFINE_int32(num_iter, 1, "Number of inference iterations (default is 1)."); -DEFINE_string( - input_list_path, - "", - "Path to the input list file which includes the list of raw " - "input tensor files (optional)."); -DEFINE_string( - output_folder_path, - "", - "Path to the output folder to save raw output tensor files (optional)."); - -using executorch::extension::FileDataLoader; -using executorch::extension::prepare_input_tensors; -using executorch::runtime::Error; -using executorch::runtime::EValue; -using executorch::runtime::HierarchicalAllocator; -using executorch::runtime::MemoryAllocator; -using executorch::runtime::MemoryManager; -using executorch::runtime::Method; -using executorch::runtime::MethodMeta; -using executorch::runtime::Program; -using executorch::runtime::Result; -using executorch::runtime::Span; -using executorch::runtime::TensorInfo; - -std::function build_set_input_tensor( - Result& method, - std::vector& inputs, - const std::vector> input_paths) { - return [&inputs, &method, input_paths](size_t idx) -> void { - const MethodMeta method_meta = method->method_meta(); - for (int input_index = 0; input_index < method->inputs_size(); - ++input_index) { - Result tensor_meta = - method_meta.input_tensor_meta(input_index); - auto input_data_ptr = inputs[input_index].toTensor().data_ptr(); - - std::ifstream fin(input_paths[idx][input_index], std::ios::binary); - fin.seekg(0, fin.end); - size_t file_size = fin.tellg(); - - ET_CHECK_MSG( - file_size == tensor_meta->nbytes(), - "Input(%d) size mismatch. file bytes: %zu, tensor bytes: %zu", - input_index, - file_size, - tensor_meta->nbytes()); - - fin.seekg(0, fin.beg); - fin.read(static_cast(input_data_ptr), file_size); - fin.close(); - } - }; -} - -std::function build_dump_outputs( - std::vector& outputs, - const size_t output_size, - const std::string output_folder_path) { - return [&outputs, output_folder_path, output_size](size_t idx) -> void { - for (size_t output_index = 0; output_index < output_size; output_index++) { - auto output_tensor = outputs[output_index].toTensor(); - auto output_file_name = output_folder_path + "/output_" + - std::to_string(idx) + "_" + std::to_string(output_index) + ".raw"; - std::ofstream fout(output_file_name.c_str(), std::ios::binary); - fout.write(output_tensor.const_data_ptr(), output_tensor.nbytes()); - fout.close(); - } - }; -} - -std::vector> get_inputs_paths( - const char* input_list_path) { - size_t idx = 0; - - auto split_and_add_prefix = - [](std::string s, std::string delimiter, std::string prefix = "") { - size_t pos_start = 0, pos_end, delim_len = delimiter.length(); - std::string token; - std::vector res; - - while ((pos_end = s.find(delimiter, pos_start)) != std::string::npos) { - token = s.substr(pos_start, pos_end - pos_start); - pos_start = pos_end + delim_len; - res.push_back(prefix + token); - } - res.push_back(prefix + s.substr(pos_start)); - return res; - }; - - // Read raw input tensor file names from input list file and - // iterate each raw input tensor file to read values - std::ifstream input_list(input_list_path); - if (!input_list.is_open()) { - ET_CHECK_MSG(false, "Failed to read input list file: %s", input_list_path); - } - std::string inputs_dir = ""; - size_t last_pos = std::string(input_list_path).rfind('/'); - if (last_pos != std::string::npos) { - inputs_dir = std::string(input_list_path).substr(0, last_pos + 1); - } - std::string file_path; - auto retval = std::vector>(); - while (std::getline(input_list, file_path)) { - auto input_files = split_and_add_prefix(file_path, " ", inputs_dir); - if (input_files.size() == 0) { - break; - } - retval.push_back(input_files); - } - return retval; -} - -int main(int argc, char** argv) { - // Initialize the runtime environment - executorch::runtime::runtime_init(); - - // Parse command-line arguments and flags - gflags::ParseCommandLineFlags(&argc, &argv, true); - - // Check if the model path is provided - if (FLAGS_model_path.empty()) { - std::cerr << "Error: --model_path is required." << std::endl; - std::cerr << "Usage: " << argv[0] - << " --model_path= --num_iter=" - << std::endl; - return 1; - } - - // Retrieve the model path and number of iterations - const char* model_path = FLAGS_model_path.c_str(); - int num_iterations = FLAGS_num_iter; - std::cout << "Model path: " << model_path << std::endl; - std::cout << "Number of iterations: " << num_iterations << std::endl; - - // Load the model using FileDataLoader - Result loader = FileDataLoader::from(model_path); - ET_CHECK_MSG( - loader.ok(), - "FileDataLoader::from() failed: 0x%" PRIx32, - static_cast(loader.error())); - - // Load the program from the loaded model - Result program = Program::load(&loader.get()); - if (!program.ok()) { - ET_LOG(Error, "Failed to parse model file %s", model_path); - return 1; - } - ET_LOG(Info, "Model file %s is loaded.", model_path); - - // Retrieve the method name from the program (assumes the first method is - // used) - const char* method_name = nullptr; - { - const auto method_name_result = program->get_method_name(0); - ET_CHECK_MSG(method_name_result.ok(), "Program has no methods"); - method_name = *method_name_result; - } - ET_LOG(Info, "Using method %s", method_name); - - // Retrieve metadata about the method - Result method_meta = program->method_meta(method_name); - ET_CHECK_MSG( - method_meta.ok(), - "Failed to get method_meta for %s: 0x%" PRIx32, - method_name, - static_cast(method_meta.error())); - - // Set up a memory allocator for the method - MemoryAllocator method_allocator{ - MemoryAllocator(sizeof(method_allocator_pool), method_allocator_pool)}; - - // Prepare planned buffers for memory planning - std::vector> planned_buffers; - std::vector> planned_spans; - size_t num_memory_planned_buffers = method_meta->num_memory_planned_buffers(); - for (size_t id = 0; id < num_memory_planned_buffers; ++id) { - size_t buffer_size = - static_cast(method_meta->memory_planned_buffer_size(id).get()); - ET_LOG(Info, "Setting up planned buffer %zu, size %zu.", id, buffer_size); - planned_buffers.push_back(std::make_unique(buffer_size)); - planned_spans.push_back({planned_buffers.back().get(), buffer_size}); - } - HierarchicalAllocator planned_memory( - {planned_spans.data(), planned_spans.size()}); - - // Set up a memory manager using the method allocator and planned memory - MemoryManager memory_manager(&method_allocator, &planned_memory); - - // Load the method into the program - Result method = program->load_method(method_name, &memory_manager); - ET_CHECK_MSG( - method.ok(), - "Loading of method %s failed with status 0x%" PRIx32, - method_name, - static_cast(method.error())); - ET_LOG(Info, "Method loaded."); - - // Prepare the input tensors for the method - auto method_inputs = prepare_input_tensors(*method); - ET_CHECK_MSG( - method_inputs.ok(), - "Could not prepare inputs: 0x%" PRIx32, - static_cast(method_inputs.error())); - - Error status = Error::Ok; - std::vector inputs(method->inputs_size()); - ET_LOG(Info, "Number of input layers: %zu", inputs.size()); - - status = method->get_inputs(inputs.data(), inputs.size()); - ET_CHECK(status == Error::Ok); - - // If the input path list is provided, read input tensors from the files - std::function set_input_tensor; - if (!FLAGS_input_list_path.empty()) { - const char* input_list_path = FLAGS_input_list_path.c_str(); - ET_LOG( - Info, - "Loading input tensors from the list provided in %s.", - input_list_path); - const auto input_paths = get_inputs_paths(input_list_path); - num_iterations = input_paths.size(); - ET_LOG( - Info, - "Number of iters is set to the len of the inputs: %u.", - num_iterations); - - set_input_tensor = build_set_input_tensor(method, inputs, input_paths); - } else { - set_input_tensor = [](size_t idx) -> void {}; - } - - ET_LOG(Info, "%zu Number of output layers: ", method->outputs_size()); - - std::vector outputs(method->outputs_size()); - status = method->get_outputs(outputs.data(), outputs.size()); - ET_CHECK(status == Error::Ok); - - std::function dump_outputs; - if (!FLAGS_output_folder_path.empty()) { - // Retrieve and print the method outputs - - // If output folder path is provided, save output tensors - // into raw tensor files. - const char* output_folder_path = FLAGS_output_folder_path.c_str(); - ET_LOG( - Info, - "Saving output tensors into the output folder: %s.", - output_folder_path); - dump_outputs = build_dump_outputs( - outputs, outputs.size(), std::string(output_folder_path)); - - } else { - dump_outputs = [](size_t idx) {}; - } - - // Measure execution time for inference - - double total_time_elapsed = 0.; - for (int i = 0; (i < num_iterations and status == Error::Ok); ++i) { - set_input_tensor(i); - auto before_exec = std::chrono::high_resolution_clock::now(); - status = method->execute(); - auto after_exec = std::chrono::high_resolution_clock::now(); - if (status == Error::Ok) { - dump_outputs(i); - } - double elapsed_time = std::chrono::duration_cast( - after_exec - before_exec) - .count() / - 1000.0; - total_time_elapsed += elapsed_time; - } - - // Log execution time and average time per iteration - ET_LOG( - Info, - "%d inference took %f ms, avg %f ms", - num_iterations, - total_time_elapsed, - total_time_elapsed / static_cast(num_iterations)); - ET_CHECK_MSG( - status == Error::Ok, - "Execution of method %s failed with status 0x%" PRIx32, - method_name, - static_cast(status)); - ET_LOG(Info, "Model executed successfully."); - - return 0; -} diff --git a/examples/openvino/openvino_build_example.sh b/examples/openvino/openvino_build_example.sh deleted file mode 100755 index 5a99b27f08c..00000000000 --- a/examples/openvino/openvino_build_example.sh +++ /dev/null @@ -1,55 +0,0 @@ -#!/bin/bash - -# Exit immediately if a command exits with a non-zero status. -set -e - -# Define the directory where CMakeLists.txt is located -EXECUTORCH_ROOT=$(realpath "$(dirname "$0")/../..") -echo EXECUTORCH_ROOT=${EXECUTORCH_ROOT} - -main() { - # Set build directory - local build_dir="cmake-out" - - # Create and enter the build directory - cd "$EXECUTORCH_ROOT" - rm -rf "${build_dir}" - - # Configure the project with CMake - # Note: Add any additional configuration options you need here - cmake -DCMAKE_INSTALL_PREFIX="${build_dir}" \ - -DCMAKE_BUILD_TYPE=Release \ - -DEXECUTORCH_BUILD_OPENVINO=ON \ - -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \ - -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \ - -DEXECUTORCH_BUILD_EXTENSION_RUNNER_UTIL=ON \ - -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \ - -DEXECUTORCH_ENABLE_LOGGING=ON \ - -B"${build_dir}" - - - # Build the project - cmake --build ${build_dir} --target install --config Release -j$(nproc) - - ## Build example - local example_dir=examples/openvino - local example_build_dir="${build_dir}/${example_dir}" - local cmake_prefix_path="${PWD}/${build_dir}/lib/cmake/ExecuTorch;${PWD}/${build_dir}/third-party/gflags;" - rm -rf "${example_build_dir}" - - ## OpenVINO original - cmake -DCMAKE_PREFIX_PATH="${cmake_prefix_path}" \ - -DCMAKE_FIND_ROOT_PATH_MODE_PACKAGE=BOTH \ - -B"${example_build_dir}" \ - $EXECUTORCH_ROOT/$example_dir - - cmake --build "${example_build_dir}" -j$(nproc) - - # Switch back to the original directory - cd - > /dev/null - - # Print a success message - echo "Build successfully completed." -} - -main "$@" From d89429b9ca8b8cc9d69169b37753639d651f7f73 Mon Sep 17 00:00:00 2001 From: Cavus Mustafa Date: Tue, 4 Mar 2025 19:41:24 -0800 Subject: [PATCH 126/188] code formatting --- examples/openvino/aot/aot_openvino_compiler.py | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/examples/openvino/aot/aot_openvino_compiler.py b/examples/openvino/aot/aot_openvino_compiler.py index 767f9fc0874..203d01fe73b 100644 --- a/examples/openvino/aot/aot_openvino_compiler.py +++ b/examples/openvino/aot/aot_openvino_compiler.py @@ -5,15 +5,10 @@ # LICENSE file in the root directory of this source tree. import argparse -import os -import shutil -import subprocess -from pathlib import Path import executorch import nncf.torch -import numpy as np import timm import torch import torchvision.models as torchvision_models @@ -21,6 +16,9 @@ from executorch.backends.openvino.quantizer.quantizer import quantize_model from executorch.exir import EdgeProgramManager, to_edge_transform_and_lower from executorch.exir.backend.backend_details import CompileSpec +from executorch.extension.pybindings.portable_lib import ( # @manual + _load_for_executorch_from_buffer, +) from sklearn.metrics import accuracy_score from timm.data import resolve_data_config from timm.data.transforms_factory import create_transform @@ -28,9 +26,6 @@ from torch.export.exported_program import ExportedProgram from torchvision import datasets from transformers import AutoModel -from executorch.extension.pybindings.portable_lib import ( # @manual - _load_for_executorch_from_buffer, -) # Function to load a model based on the selected suite @@ -123,7 +118,7 @@ def validate_model( # 2: Iterate over the dataset and run the executor predictions = [] targets = [] - for idx, data in enumerate(calibration_dataset): + for _idx, data in enumerate(calibration_dataset): feature, target = data targets.extend(target) out = executorch_module.run_method("forward", (feature,)) @@ -224,7 +219,6 @@ def main( raise ValueError(msg) print("Start validation of the model:") - #acc_top1 = validate_model(model_file_name, calibration_dataset) acc_top1 = validate_model(exec_prog, calibration_dataset) print(f"acc@1: {acc_top1}") From d81f921b681064087baa855f2d7ea216ed941ef5 Mon Sep 17 00:00:00 2001 From: Cavus Mustafa Date: Tue, 4 Mar 2025 20:13:19 -0800 Subject: [PATCH 127/188] renamed openvino_pybinding_example.py to export_and_infer_openvino.py --- ...openvino_pybinding_example.py => export_and_infer_openvino.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename examples/openvino/{openvino_pybinding_example.py => export_and_infer_openvino.py} (100%) diff --git a/examples/openvino/openvino_pybinding_example.py b/examples/openvino/export_and_infer_openvino.py similarity index 100% rename from examples/openvino/openvino_pybinding_example.py rename to examples/openvino/export_and_infer_openvino.py From fb3685cade302c4a3f466289754c79887564789b Mon Sep 17 00:00:00 2001 From: Cavus Mustafa Date: Wed, 5 Mar 2025 13:32:21 -0800 Subject: [PATCH 128/188] Removed yaml file for unsupported ops (not needed) --- .../unsupported_openvino_functions.yaml | 242 ------------------ 1 file changed, 242 deletions(-) delete mode 100644 backends/openvino/unsupported_openvino_functions.yaml diff --git a/backends/openvino/unsupported_openvino_functions.yaml b/backends/openvino/unsupported_openvino_functions.yaml deleted file mode 100644 index 296d57d7320..00000000000 --- a/backends/openvino/unsupported_openvino_functions.yaml +++ /dev/null @@ -1,242 +0,0 @@ -# This yaml file contains operators that are unsupported with openvino backend and -# will use portable kernels for fall back - -- op: _cdist_forward.out - kernels: - - arg_meta: null - kernel_name: torch::executor::_cdist_forward_out - -- op: _pdist_forward.out - kernels: - - arg_meta: null - kernel_name: torch::executor::_pdist_forward_out - -- op: alias_copy.out - kernels: - - arg_meta: null - kernel_name: torch::executor::alias_copy_out - -- op: any.all_out - kernels: - - arg_meta: null - kernel_name: torch::executor::any_all_out - -- op: any.dims_out - kernels: - - arg_meta: null - kernel_name: torch::executor::any_dims_out - -- op: atan.out - kernels: - - arg_meta: null - kernel_name: torch::executor::atan_out - -- op: atan2.out - kernels: - - arg_meta: null - kernel_name: torch::executor::atan2_out - -- op: bitwise_or.Scalar_out - kernels: - - arg_meta: null - kernel_name: torch::executor::bitwise_or_Scalar_out - -- op: bitwise_xor.Scalar_out - kernels: - - arg_meta: null - kernel_name: torch::executor::bitwise_xor_Scalar_out - -- op: clamp.Tensor_out - kernels: - - arg_meta: null - kernel_name: torch::executor::clamp_tensor_out - -- op: convolution_backward.out - kernels: - - arg_meta: null - kernel_name: torch::executor::convolution_backward_out - -- op: detach_copy.out - kernels: - - arg_meta: null - kernel_name: torch::executor::detach_copy_out - -- op: diagonal_copy.out - kernels: - - arg_meta: null - kernel_name: torch::executor::diagonal_copy_out - -- op: expm1.out - kernels: - - arg_meta: null - kernel_name: torch::executor::expm1_out - -- op: floor_divide.out - kernels: - - arg_meta: null - kernel_name: torch::executor::floor_divide_out - -- op: index_put.out - kernels: - - arg_meta: null - kernel_name: torch::executor::index_put_out - -- op: logical_and.out - kernels: - - arg_meta: null - kernel_name: torch::executor::logical_and_out - -- op: logical_or.out - kernels: - - arg_meta: null - kernel_name: torch::executor::logical_or_out - -- op: logical_xor.out - kernels: - - arg_meta: null - kernel_name: torch::executor::logical_xor_out - -- op: logit.out - kernels: - - arg_meta: null - kernel_name: torch::executor::logit_out - -- op: masked_scatter.out - kernels: - - arg_meta: null - kernel_name: torch::executor::masked_scatter_out - -- op: masked_select.out - kernels: - - arg_meta: null - kernel_name: torch::executor::masked_select_out - -- op: narrow_copy.out - kernels: - - arg_meta: null - kernel_name: torch::executor::narrow_copy_out - -- op: nonzero.out - kernels: - - arg_meta: null - kernel_name: torch::executor::nonzero_out - -- op: pixel_shuffle.out - kernels: - - arg_meta: null - kernel_name: torch::executor::pixel_shuffle_out - -- op: pixel_unshuffle.out - kernels: - - arg_meta: null - kernel_name: torch::executor::pixel_unshuffle_out - -- op: prod.int_out - kernels: - - arg_meta: null - kernel_name: torch::executor::prod_int_out - -- op: prod.out - kernels: - - arg_meta: null - kernel_name: torch::executor::prod_out - -- op: remainder.Tensor_out - kernels: - - arg_meta: null - kernel_name: torch::executor::remainder_Tensor_out - -- op: remainder.Scalar_out - kernels: - - arg_meta: null - kernel_name: torch::executor::remainder_Scalar_out - -- op: repeat_interleave.Tensor_out - kernels: - - arg_meta: null - kernel_name: torch::executor::repeat_interleave_Tensor_out - -- op: reflection_pad1d.out - kernels: - - arg_meta: null - kernel_name: torch::executor::reflection_pad1d_out - -- op: reflection_pad3d.out - kernels: - - arg_meta: null - kernel_name: torch::executor::reflection_pad3d_out - -- op: replication_pad1d.out - kernels: - - arg_meta: null - kernel_name: torch::executor::replication_pad1d_out - -- op: replication_pad2d.out - kernels: - - arg_meta: null - kernel_name: torch::executor::replication_pad2d_out - -- op: replication_pad3d.out - kernels: - - arg_meta: null - kernel_name: torch::executor::replication_pad3d_out - -- op: round.out - kernels: - - arg_meta: null - kernel_name: torch::executor::round_out - -- op: scatter_add.out - kernels: - - arg_meta: null - kernel_name: torch::executor::scatter_add_out - -- op: split_copy.Tensor_out - kernels: - - arg_meta: null - kernel_name: torch::executor::split_copy_Tensor_out - -- op: squeeze_copy.dim_out - kernels: - - arg_meta: null - kernel_name: torch::executor::squeeze_copy_dim_out - -- op: sub.Scalar_out - kernels: - - arg_meta: null - kernel_name: torch::executor::sub_scalar_out - -- op: t_copy.out - kernels: - - arg_meta: null - kernel_name: torch::executor::t_copy_out - -- op: transpose_copy.int_out - kernels: - - arg_meta: null - kernel_name: torch::executor::transpose_copy_int_out - -- op: trunc.out - kernels: - - arg_meta: null - kernel_name: torch::executor::trunc_out - -- op: unbind_copy.int_out - kernels: - - arg_meta: null - kernel_name: torch::executor::unbind_copy_int_out - -- op: upsample_bilinear2d.vec_out - kernels: - - arg_meta: null - kernel_name: torch::executor::upsample_bilinear2d_vec_out - -- func: dim_order_ops::_empty_dim_order.out(int[] size, *, int[]? dim_order=None, Tensor(a!) out) -> Tensor(a!) - kernels: - - arg_meta: null - kernel_name: torch::executor::_empty_dim_order_out - -- func: dim_order_ops::_to_dim_order_copy.out(Tensor self, *, bool non_blocking=False, int[]? dim_order=None, Tensor(a!) out) -> Tensor(a!) - kernels: - - arg_meta: null - kernel_name: torch::executor::_to_dim_order_copy_out From 7d5dd96071a43afc6eebebcc098e074341c4f420 Mon Sep 17 00:00:00 2001 From: Mustafa Cavus Date: Wed, 5 Mar 2025 13:53:11 -0800 Subject: [PATCH 129/188] Update README.md --- examples/openvino/README.md | 45 +++++++++++++------------------------ 1 file changed, 15 insertions(+), 30 deletions(-) diff --git a/examples/openvino/README.md b/examples/openvino/README.md index 0b3ff914df1..418b442c9d5 100644 --- a/examples/openvino/README.md +++ b/examples/openvino/README.md @@ -142,64 +142,49 @@ python aot_openvino_compiler.py --suite timm --model vgg16 --input_shape [1, 3, ## Build OpenVINO Examples -Build the backend and the examples by executing the script: +Build the backend libraries and executor runner by executing the script below in `/backends/openvino/scripts` folder: ```bash -./openvino_build_example.sh +./openvino_build.sh ``` -The executable is saved in `/cmake-openvino-out/examples/openvino/` +The executable is saved in `/cmake-out/backends/openvino/` -### Run the example +### Run the Example with Executor Runner -Now, run the example using the executable generated in the above step. The executable requires a model file (`.pte` file generated in the aot step), number of inference iterations, and optional input/output paths. +Now, run the example using the executable generated in the above step. The executable requires a model file (`.pte` file generated in the aot step), and optional number of inference executions. #### Command Syntax: ``` -cd ../../cmake-openvino-out/examples/openvino +cd ../../cmake-out/backends/openvino ./openvino_executor_runner \ --model_path= \ - --num_iter= \ - [--input_list_path=] \ - [--output_folder_path=] + --num_executions= ``` #### Command-Line Arguments - `--model_path`: (Required) Path to the model serialized in `.pte` format. -- `--num_iter`: (Optional) Number of times to run inference (default: 1). -- `--input_list_path`: (Optional) Path to a file containing the list of raw input tensor files. -- `--output_folder_path`: (Optional) Path to a folder where output tensor files will be saved. +- `--num_executions`: (Optional) Number of times to run inference (default: 1). #### Example Usage -Run inference with a given model for 10 iterations and save outputs: +Run inference with a given model for 10 iterations: ``` ./openvino_executor_runner \ --model_path=model.pte \ - --num_iter=10 \ - --output_folder_path=outputs/ + --num_executions=10 ``` -Run inference with an input tensor file: +## Running Python Example with Pybinding: -``` -./openvino_executor_runner \ - --model_path=model.pte \ - --num_iter=5 \ - --input_list_path=input_list.txt \ - --output_folder_path=outputs/ -``` - -## Running Pybinding Example: - -You can use the `openvino_pybinding_example.py` script to run models with the OpenVINO backend through the Python bindings. +You can use the `export_and_infer_openvino.py` script to run models with the OpenVINO backend through the Python bindings. ### **Usage** #### **Command Structure** ```bash -python openvino_pybinding_example.py +python export_and_infer_openvino.py ``` #### **Arguments** @@ -252,10 +237,10 @@ python openvino_pybinding_example.py #### Execute Torchvision ResNet50 model for the GPU with Random Inputs ```bash -python openvino_pybinding_example.py --suite torchvision --model resnet50 --input_shape "(1, 3, 256, 256)" --device GPU +python export_and_infer_openvino.py --suite torchvision --model resnet50 --input_shape "(1, 3, 256, 256)" --device GPU ``` #### Run a Precompiled Model for the CPU Using an Existing Input Tensor File and Save the Output. ```bash -python openvino_pybinding_example.py --model_path /path/to/model/folder/resnet50_fp32.pte --input_tensor_file /path/to/input/folder/input.pt --output_tensor_file /path/to/output/folder/output.pt --device CPU +python export_and_infer_openvino.py --model_path /path/to/model/folder/resnet50_fp32.pte --input_tensor_file /path/to/input/folder/input.pt --output_tensor_file /path/to/output/folder/output.pt --device CPU ``` From 79e58396ae96372fc7f93492caec525b4d7db8b7 Mon Sep 17 00:00:00 2001 From: Mustafa Cavus Date: Wed, 5 Mar 2025 16:06:34 -0800 Subject: [PATCH 130/188] Update README.md --- backends/openvino/README.md | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/backends/openvino/README.md b/backends/openvino/README.md index 0af4c5ff506..c7961d75823 100644 --- a/backends/openvino/README.md +++ b/backends/openvino/README.md @@ -27,7 +27,6 @@ executorch │ ├── CMakeLists.txt │ ├── README.md │ ├── __init__.py -│ ├── unsupported_openvino_functions.yaml │ ├── partitioner.py │ ├── preprocess.py │ └── requirements.txt @@ -36,11 +35,8 @@ executorch │ ├── aot │ ├── README.md │ └── aot_openvino_compiler.py -│ └── executor_runner -│ └── openvino_executor_runner.cpp -│ ├── CMakeLists.txt +│ ├── export_and_infer_openvino.py │ ├── README.md -└── └── openvino_build_example.sh ``` ## Build Instructions From ead3a85b7852323efc82e002aff9281861f09eef Mon Sep 17 00:00:00 2001 From: Mustafa Cavus Date: Wed, 5 Mar 2025 16:08:58 -0800 Subject: [PATCH 131/188] Update README.md --- backends/openvino/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/backends/openvino/README.md b/backends/openvino/README.md index c7961d75823..0a036f27d8b 100644 --- a/backends/openvino/README.md +++ b/backends/openvino/README.md @@ -80,10 +80,10 @@ Follow the steps below to setup your build environment: ```bash ./openvino_build.sh ``` - **Build OpenVINO Backend with Pybinding**: To build and install the OpenVINO backend with Python bindings, run the `openvino_build.sh` script with the `pybinding` argument. This will compile and install the ExecuTorch Python package with the OpenVINO backend into your Python environment. + **Build OpenVINO Backend with Pybinding**: To build and install the OpenVINO backend with Python bindings, run the `openvino_build.sh` script with the `--pybind` argument. This will compile and install the ExecuTorch Python package with the OpenVINO backend into your Python environment. ```bash - ./openvino_build.sh pybinding + ./openvino_build.sh --pybind ``` ### Run From e30cc4cfd6bd02a02b149752f409082f108be484 Mon Sep 17 00:00:00 2001 From: Cavus Mustafa Date: Wed, 5 Mar 2025 16:33:20 -0800 Subject: [PATCH 132/188] build argument update for openvino backend --- backends/openvino/scripts/openvino_build.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/backends/openvino/scripts/openvino_build.sh b/backends/openvino/scripts/openvino_build.sh index 7cb3545aedb..ea6c1e3ec35 100755 --- a/backends/openvino/scripts/openvino_build.sh +++ b/backends/openvino/scripts/openvino_build.sh @@ -8,9 +8,9 @@ EXECUTORCH_ROOT=$(realpath "$(dirname "$0")/../../..") echo EXECUTORCH_ROOT=${EXECUTORCH_ROOT} main() { - build_type=${1:-"cpp_runtime"} + build_type=${1:-"--cpp_runtime"} - # If the first arguments is cpp_runtime (default), build libraries for C++ runtime + # If the first arguments is --cpp_runtime (default), build libraries for C++ runtime if [[ -z "$build_type" || "$build_type" == "cpp_runtime" ]]; then echo "Building C++ Runtime Libraries" @@ -37,8 +37,8 @@ main() { # Build the project cmake --build ${build_dir} --target install --config Release -j$(nproc) - # If the first arguments is pybinding, build python package with pybinding - elif [[ "$build_type" == "pybinding" ]]; then + # If the first arguments is --pybind, build python package with pybinding + elif [[ "$build_type" == "--pybind" ]]; then echo "Building Python Package with Pybinding" # Create and enter the build directory From 08205208501e131316d2aa39de16d52bd2f7817b Mon Sep 17 00:00:00 2001 From: Cavus Mustafa Date: Wed, 5 Mar 2025 16:53:33 -0800 Subject: [PATCH 133/188] add is_available call into contructor back --- backends/openvino/runtime/OpenvinoBackend.cpp | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/backends/openvino/runtime/OpenvinoBackend.cpp b/backends/openvino/runtime/OpenvinoBackend.cpp index 8aff2f25c39..373df5d7de8 100644 --- a/backends/openvino/runtime/OpenvinoBackend.cpp +++ b/backends/openvino/runtime/OpenvinoBackend.cpp @@ -23,7 +23,14 @@ namespace executorch { namespace backends { namespace openvino { -OpenvinoBackend::OpenvinoBackend() {} +OpenvinoBackend::OpenvinoBackend() { + if (!is_available()) { + ET_LOG(Error, "OpenVINO runtime is not available. Initialization failed."); + throw std::runtime_error("OpenVINO runtime not available"); + } + + ET_LOG(Info, "OpenVINO runtime successfully verified and initialized."); +} bool OpenvinoBackend::is_available() const { try { From 3e8e9a152778dcfecdd81743386ba720dab3d23b Mon Sep 17 00:00:00 2001 From: Cavus Mustafa Date: Wed, 5 Mar 2025 17:02:03 -0800 Subject: [PATCH 134/188] Fix typo in openvino build script --- backends/openvino/scripts/openvino_build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backends/openvino/scripts/openvino_build.sh b/backends/openvino/scripts/openvino_build.sh index ea6c1e3ec35..2f424dea61c 100755 --- a/backends/openvino/scripts/openvino_build.sh +++ b/backends/openvino/scripts/openvino_build.sh @@ -11,7 +11,7 @@ main() { build_type=${1:-"--cpp_runtime"} # If the first arguments is --cpp_runtime (default), build libraries for C++ runtime - if [[ -z "$build_type" || "$build_type" == "cpp_runtime" ]]; then + if [[ -z "$build_type" || "$build_type" == "--cpp_runtime" ]]; then echo "Building C++ Runtime Libraries" # Set build directory From 1d37c5c7ed317e2a7ff8952703fdcd9bd538d435 Mon Sep 17 00:00:00 2001 From: Cavus Mustafa Date: Wed, 5 Mar 2025 17:25:52 -0800 Subject: [PATCH 135/188] Fix pybinding build issue and remove is_available call from constructor --- backends/openvino/runtime/OpenvinoBackend.cpp | 9 +-------- backends/openvino/scripts/openvino_build.sh | 4 ++-- 2 files changed, 3 insertions(+), 10 deletions(-) diff --git a/backends/openvino/runtime/OpenvinoBackend.cpp b/backends/openvino/runtime/OpenvinoBackend.cpp index 373df5d7de8..8aff2f25c39 100644 --- a/backends/openvino/runtime/OpenvinoBackend.cpp +++ b/backends/openvino/runtime/OpenvinoBackend.cpp @@ -23,14 +23,7 @@ namespace executorch { namespace backends { namespace openvino { -OpenvinoBackend::OpenvinoBackend() { - if (!is_available()) { - ET_LOG(Error, "OpenVINO runtime is not available. Initialization failed."); - throw std::runtime_error("OpenVINO runtime not available"); - } - - ET_LOG(Info, "OpenVINO runtime successfully verified and initialized."); -} +OpenvinoBackend::OpenvinoBackend() {} bool OpenvinoBackend::is_available() const { try { diff --git a/backends/openvino/scripts/openvino_build.sh b/backends/openvino/scripts/openvino_build.sh index 2f424dea61c..4a4827638ab 100755 --- a/backends/openvino/scripts/openvino_build.sh +++ b/backends/openvino/scripts/openvino_build.sh @@ -52,11 +52,11 @@ main() { -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \ -DEXECUTORCH_BUILD_EXTENSION_RUNNER_UTIL=ON \ -DEXECUTORCH_ENABLE_LOGGING=ON \ - -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \ - -DEXECUTORCH_BUILD_PYBIND=ON" + -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON" export CMAKE_BUILD_ARGS="--target openvino_backend" # Build the package + EXECUTORCH_BUILD_PYBIND=ON \ pip install . --no-build-isolation else From 574d2b07225b1d14f716a65638f8a064864e8f6e Mon Sep 17 00:00:00 2001 From: Cavus Mustafa Date: Wed, 5 Mar 2025 17:34:16 -0800 Subject: [PATCH 136/188] remove aot folder --- examples/openvino/aot/README.md | 115 ------------------ .../{aot => }/aot_openvino_compiler.py | 0 2 files changed, 115 deletions(-) delete mode 100644 examples/openvino/aot/README.md rename examples/openvino/{aot => }/aot_openvino_compiler.py (100%) diff --git a/examples/openvino/aot/README.md b/examples/openvino/aot/README.md deleted file mode 100644 index 24c3cc35b98..00000000000 --- a/examples/openvino/aot/README.md +++ /dev/null @@ -1,115 +0,0 @@ -# **Model Export Script for Executorch** - -This script allows users to export deep learning models from various model suites (TIMM, Torchvision, Hugging Face) to a openvino backend using **Executorch**. Users can dynamically specify the model, input shape, and target device. - - -## **Usage** - -### **Command Structure** -```bash -python aot_openvino_compiler.py --suite --model --input_shape --device -``` - -### **Arguments** -- **`--suite`** (required): - Specifies the model suite to use. - Supported values: - - `timm` (e.g., VGG16, ResNet50) - - `torchvision` (e.g., resnet18, mobilenet_v2) - - `huggingface` (e.g., bert-base-uncased). NB: Quantization and validation is not supported yet. - -- **`--model`** (required): - Name of the model to export. - Examples: - - For `timm`: `vgg16`, `resnet50` - - For `torchvision`: `resnet18`, `mobilenet_v2` - - For `huggingface`: `bert-base-uncased`, `distilbert-base-uncased` - -- **`--input_shape`**(optional): - Input shape for the model. Provide this as a **list** or **tuple**. - Examples: - - `[1, 3, 224, 224]` (Zsh users: wrap in quotes) - - `(1, 3, 224, 224)` - -- **`--batch_size`** : - Batch size for the validation. Default batch_size == 1. - The dataset length must be evenly divisible by the batch size. - -- **`--quantize`** (optional): - Enable model quantization. --dataset argument is requred for the quantization. `huggingface` suite is not supported yet. - -- **`--quantization_flow`** (optional): - Specifies the way to quantize torch.fx.GraphModule. - Supported values: - - `nncf`: `nncf quantize_pt2e` API (default) - - `pt2e`: torch ao quantization pipeline. - -- **`--validate`** (optional): - Enable model validation. --dataset argument is requred for the validation. `huggingface` suite does not supported yet. - -- **`--dataset`** (optional): - Path to the imagenet-like calibration dataset. - -- **`--device`** (optional) - Target device for the compiled model. Default is `CPU`. - Examples: `CPU`, `GPU` - - -## **Examples** - -### Export a TIMM VGG16 model for the CPU -```bash -python aot_openvino_compiler.py --suite timm --model vgg16 --input_shape [1, 3, 224, 224] --device CPU -``` - -### Export a Torchvision ResNet50 model for the GPU -```bash -python aot_openvino_compiler.py --suite torchvision --model resnet50 --input_shape "(1, 3, 256, 256)" --device GPU -``` - -### Export a Hugging Face BERT model for the CPU -```bash -python aot_openvino_compiler.py --suite huggingface --model bert-base-uncased --input_shape "(1, 512)" --device CPU -``` -### Export and validate TIMM Resnet50d model for the CPU -```bash -python aot_openvino_compiler.py --suite timm --model vgg16 --input_shape [1, 3, 224, 224] --device CPU --validate --dataset /path/to/dataset -``` - -### Export, quantize and validate TIMM Resnet50d model for the CPU -```bash -python aot_openvino_compiler.py --suite timm --model vgg16 --input_shape [1, 3, 224, 224] --device CPU --validate --dataset /path/to/dataset --quantize -``` - -## **Notes** -1. **Input Shape in Zsh**: - If you are using Zsh, wrap `--input_shape` in quotes or use a tuple: - ```bash - --input_shape '[1, 3, 224, 224]' - --input_shape "(1, 3, 224, 224)" - ``` - -2. **Model Compatibility**: - Ensure the specified `model_name` exists in the selected `suite`. Use the corresponding library's documentation to verify model availability. - -3. **Output File**: - The exported model will be saved as `.pte` in the current directory. - -4. **Dependencies**: - - Python 3.8+ - - PyTorch - - Executorch - - TIMM (`pip install timm`) - - Torchvision - - Transformers (`pip install transformers`) - -## **Error Handling** -- **Model Not Found**: - If the script raises an error such as: - ```bash - ValueError: Model not found - ``` - Verify that the model name is correct for the chosen suite. - -- **Unsupported Input Shape**: - Ensure `--input_shape` is provided as a valid list or tuple. diff --git a/examples/openvino/aot/aot_openvino_compiler.py b/examples/openvino/aot_openvino_compiler.py similarity index 100% rename from examples/openvino/aot/aot_openvino_compiler.py rename to examples/openvino/aot_openvino_compiler.py From 9251dcfad5ca5fc93303ffd9ff2623ba60cbe75f Mon Sep 17 00:00:00 2001 From: Mustafa Cavus Date: Wed, 5 Mar 2025 17:28:02 -0800 Subject: [PATCH 137/188] Update README.md --- examples/openvino/README.md | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/examples/openvino/README.md b/examples/openvino/README.md index 418b442c9d5..a31883bc93e 100644 --- a/examples/openvino/README.md +++ b/examples/openvino/README.md @@ -8,14 +8,10 @@ Below is the layout of the `examples/openvino` directory, which includes the nec ``` examples/openvino -├── aot # Directory with scripts and instructions for AoT export - ├── README.md # Instructions to export models to '.pte' - └── aot_openvino_compiler.py # Example script for AoT export -├── executor_runner # Directory with examples for C++ execution - └── openvino_executor_runner.cpp # Example C++ file for execution ├── CMakeLists.txt # CMake build configuration to build examples ├── README.md # Documentation for examples (this file) -└── openvino_build_example.sh # Script to build examples for openvino backend +├── aot_openvino_compiler.py # Example script for AoT export +└── export_and_infer_openvino.py # Example script to export and execute models with python bindings ``` # Build Instructions for Examples @@ -25,12 +21,10 @@ Follow the [instructions](../../backends/openvino/README.md) of **Prerequisites* ## AOT step: -Within the `aot` folder, you'll find the model export script called `aot_openvino_compiler.py`. This script allows users to export deep learning models from various model suites (TIMM, Torchvision, Hugging Face) to a openvino backend using **Executorch**. Users can dynamically specify the model, input shape, and target device. +The export script called `aot_openvino_compiler.py` allows users to export deep learning models from various model suites (TIMM, Torchvision, Hugging Face) to a openvino backend using **Executorch**. Users can dynamically specify the model, input shape, and target device. ### **Usage** -First, navigate to the `aot` directory by running the command `cd aot`. Then, refer to the instructions provided below. - #### **Command Structure** ```bash python aot_openvino_compiler.py --suite --model --input_shape --device From 634b7e8a4ec110dcd2901a173d6d2a867938f1f3 Mon Sep 17 00:00:00 2001 From: Cavus Mustafa Date: Wed, 5 Mar 2025 17:52:54 -0800 Subject: [PATCH 138/188] --pybind changed to --enable_python --- backends/openvino/scripts/openvino_build.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/backends/openvino/scripts/openvino_build.sh b/backends/openvino/scripts/openvino_build.sh index 4a4827638ab..26eacc982f9 100755 --- a/backends/openvino/scripts/openvino_build.sh +++ b/backends/openvino/scripts/openvino_build.sh @@ -37,8 +37,8 @@ main() { # Build the project cmake --build ${build_dir} --target install --config Release -j$(nproc) - # If the first arguments is --pybind, build python package with pybinding - elif [[ "$build_type" == "--pybind" ]]; then + # If the first arguments is --enable_python, build python package with python bindings + elif [[ "$build_type" == "--enable_python" ]]; then echo "Building Python Package with Pybinding" # Create and enter the build directory From 473b92e6c3d2f24e539ddbbd3f9c236be73c49fa Mon Sep 17 00:00:00 2001 From: Mustafa Cavus Date: Wed, 5 Mar 2025 17:32:59 -0800 Subject: [PATCH 139/188] Update README.md --- examples/openvino/README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/examples/openvino/README.md b/examples/openvino/README.md index a31883bc93e..70d22de642f 100644 --- a/examples/openvino/README.md +++ b/examples/openvino/README.md @@ -8,7 +8,6 @@ Below is the layout of the `examples/openvino` directory, which includes the nec ``` examples/openvino -├── CMakeLists.txt # CMake build configuration to build examples ├── README.md # Documentation for examples (this file) ├── aot_openvino_compiler.py # Example script for AoT export └── export_and_infer_openvino.py # Example script to export and execute models with python bindings From ba711cfd54fc2b6f94453da356cef418c0c13b0c Mon Sep 17 00:00:00 2001 From: Mustafa Cavus Date: Wed, 5 Mar 2025 17:41:03 -0800 Subject: [PATCH 140/188] Update README.md --- backends/openvino/README.md | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/backends/openvino/README.md b/backends/openvino/README.md index 0a036f27d8b..752cc2a5056 100644 --- a/backends/openvino/README.md +++ b/backends/openvino/README.md @@ -32,11 +32,9 @@ executorch │ └── requirements.txt └── examples │ └── openvino -│ ├── aot -│ ├── README.md -│ └── aot_openvino_compiler.py +│ ├── aot_openvino_compiler.py │ ├── export_and_infer_openvino.py -│ ├── README.md +│ └── README.md ``` ## Build Instructions @@ -75,15 +73,15 @@ Follow the steps below to setup your build environment: Note: To achieve optimal performance with NNCF quantization, you should install the latest development version of NNCF (version 2.16.0.dev0+191b53d9 or higher). 3. Navigate to `scripts/` directory. -4. **Build OpenVINO Backend**: Once the prerequisites are in place, run the `openvino_build.sh` script to start the build process. By default, OpenVINO backend will be built under `cmake-out/backends/openvino/` as `libopenvino_backend.a` +4. **Build OpenVINO Backend C++ Libraries and Executor Runner**: Once the prerequisites are in place, run the `openvino_build.sh` script to start the build process. By default, OpenVINO backend will be built under `cmake-out/backends/openvino/` as `libopenvino_backend.a` ```bash ./openvino_build.sh ``` - **Build OpenVINO Backend with Pybinding**: To build and install the OpenVINO backend with Python bindings, run the `openvino_build.sh` script with the `--pybind` argument. This will compile and install the ExecuTorch Python package with the OpenVINO backend into your Python environment. + **Build OpenVINO Backend Python Package with Pybindings**: To build and install the OpenVINO backend Python package with Python bindings, run the `openvino_build.sh` script with the `--enable_python` argument. This will compile and install the ExecuTorch Python package with the OpenVINO backend into your Python environment. This options will also enable pybindings which is required to execute OpenVINO backend tests and `export_and_infer_openvino.py` script inside `executorch/examples/openvino` folder. ```bash - ./openvino_build.sh --pybind + ./openvino_build.sh --enable_python ``` ### Run From 8a8b3db18a9973dc3b5e4a9ed5cb15d78dbcea5f Mon Sep 17 00:00:00 2001 From: Cavus Mustafa Date: Wed, 5 Mar 2025 18:06:20 -0800 Subject: [PATCH 141/188] removed cmake file for openvino example --- examples/openvino/CMakeLists.txt | 102 ------------------------------- 1 file changed, 102 deletions(-) delete mode 100644 examples/openvino/CMakeLists.txt diff --git a/examples/openvino/CMakeLists.txt b/examples/openvino/CMakeLists.txt deleted file mode 100644 index 170805919a0..00000000000 --- a/examples/openvino/CMakeLists.txt +++ /dev/null @@ -1,102 +0,0 @@ -# Copyright (c) Intel Corporation -# -# Licensed under the BSD License (the "License"); you may not use this file -# except in compliance with the License. See the license file found in the -# LICENSE file in the root directory of this source tree. - -cmake_minimum_required(VERSION 3.19) -project(openvino_runner_example) - -set(CMAKE_CXX_STANDARD 17) -set(CMAKE_CXX_STANDARD_REQUIRED ON) -set(CMAKE_CXX_EXTENSIONS OFF) - -# Source root directory for executorch. -if(NOT EXECUTORCH_ROOT) - set(EXECUTORCH_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../..) -endif() - -include(${EXECUTORCH_ROOT}/build/Utils.cmake) -include(${EXECUTORCH_ROOT}/build/Codegen.cmake) - -if(NOT PYTHON_EXECUTABLE) - resolve_python_executable() -endif() - -if(NOT CMAKE_BUILD_TYPE) - set(CMAKE_BUILD_TYPE Debug) -endif() - -set(_common_compile_options -Wno-deprecated-declarations -fPIC) -set(_common_include_directories ${EXECUTORCH_ROOT}/..) - -set(_openvino_executor_runner__srcs - ${CMAKE_CURRENT_LIST_DIR}/../openvino/executor_runner/openvino_executor_runner.cpp -) - -find_package(executorch CONFIG REQUIRED) -include_directories(${EXECUTORCH_INCLUDE_DIRS}) - -# Portable Ops Library -gen_selected_ops(LIB_NAME "openvino_portable_ops_lib" INCLUDE_ALL_OPS "ON") -generate_bindings_for_kernels( - LIB_NAME "openvino_portable_ops_lib" FUNCTIONS_YAML - ${EXECUTORCH_ROOT}/backends/openvino/unsupported_openvino_functions.yaml -) -gen_operators_lib( - LIB_NAME "openvino_portable_ops_lib" KERNEL_LIBS portable_kernels DEPS executorch -) -target_compile_options(openvino_portable_ops_lib INTERFACE -DET_EVENT_TRACER_ENABLED) -target_include_directories(openvino_portable_ops_lib PUBLIC ${_common_include_directories}) - -# Build Executor Runner -add_executable(openvino_executor_runner ${_openvino_executor_runner__srcs}) - -target_include_directories( - openvino_executor_runner PUBLIC ${_common_include_directories} ${EXECUTORCH_ROOT}/cmake-out/third-party/gflags/include -) - -# Set Library Directory -set(LIBRARY_DIR "${CMAKE_CURRENT_LIST_DIR}/../../cmake-out/lib/;${CMAKE_CURRENT_LIST_DIR}/../../cmake-out/third-party/gflags") -message(STATUS "Library directory path: ${LIBRARY_DIR}") - -# Locate OpenVINO Backend Library -find_library(OPENVINO_BACKEND_LIB NAMES openvino_backend PATHS ${LIBRARY_DIR} NO_DEFAULT_PATH) -if(NOT OPENVINO_BACKEND_LIB) - message(FATAL_ERROR "OpenVINO backend library not found in ${LIBRARY_DIR}") -endif() - -# Locate OpenVINO Library -find_package(OpenVINO REQUIRED) - -# Add OpenVINO Backend Library -add_library(openvino_backend STATIC IMPORTED) -set_property(TARGET openvino_backend PROPERTY IMPORTED_LOCATION ${OPENVINO_BACKEND_LIB}) - -# Locate OpenVINO Backend Library -find_library(GFLAGS_LIB NAMES gflags_nothreads PATHS ${LIBRARY_DIR} NO_DEFAULT_PATH) -if(NOT GFLAGS_LIB) - message(FATAL_ERROR "Gflags library not found in ${LIBRARY_DIR}") -endif() - -# Link Libraries -target_link_libraries(openvino_executor_runner PRIVATE - openvino_backend - openvino::runtime - ${GFLAGS_LIB} - executorch - executorch_core - openvino_portable_ops_lib - extension_data_loader - extension_runner_util - pthreadpool -) - -target_link_options_shared_lib(openvino_backend) - -# Ensure Proper RPATH Handling -set_target_properties(openvino_executor_runner PROPERTIES INSTALL_RPATH "$ORIGIN") - -get_filename_component( - EXECUTORCH_SOURCE_DIR "${CMAKE_CURRENT_LIST_DIR}/../.." ABSOLUTE -) From 49e27921425cd98e3ec1deba14b8d30ae7ff5159 Mon Sep 17 00:00:00 2001 From: Mustafa Cavus Date: Wed, 5 Mar 2025 17:52:38 -0800 Subject: [PATCH 142/188] Update backends/openvino/README.md Co-authored-by: Yamini Nimmagadda --- backends/openvino/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backends/openvino/README.md b/backends/openvino/README.md index 752cc2a5056..fffa4fac8a1 100644 --- a/backends/openvino/README.md +++ b/backends/openvino/README.md @@ -78,7 +78,7 @@ Follow the steps below to setup your build environment: ```bash ./openvino_build.sh ``` - **Build OpenVINO Backend Python Package with Pybindings**: To build and install the OpenVINO backend Python package with Python bindings, run the `openvino_build.sh` script with the `--enable_python` argument. This will compile and install the ExecuTorch Python package with the OpenVINO backend into your Python environment. This options will also enable pybindings which is required to execute OpenVINO backend tests and `export_and_infer_openvino.py` script inside `executorch/examples/openvino` folder. + **Build OpenVINO Backend Python Package with Pybindings**: To build and install the OpenVINO backend Python package with Python bindings, run the `openvino_build.sh` script with the `--enable_python` argument. This will compile and install the ExecuTorch Python package with the OpenVINO backend into your Python environment. This option will also enable python bindings required to execute OpenVINO backend tests and `export_and_infer_openvino.py` script inside `executorch/examples/openvino` folder. ```bash ./openvino_build.sh --enable_python From e783581261c2a96a000800a492a0f1e91c4c7e8d Mon Sep 17 00:00:00 2001 From: Mustafa Cavus Date: Wed, 5 Mar 2025 17:53:18 -0800 Subject: [PATCH 143/188] Update examples/openvino/export_and_infer_openvino.py Co-authored-by: Yamini Nimmagadda --- examples/openvino/export_and_infer_openvino.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/openvino/export_and_infer_openvino.py b/examples/openvino/export_and_infer_openvino.py index a94083d92eb..9528a98565e 100644 --- a/examples/openvino/export_and_infer_openvino.py +++ b/examples/openvino/export_and_infer_openvino.py @@ -63,7 +63,7 @@ def main( output_path: str, ): """ - Main function to load, quantize, and validate a model. + Main function to load, quantize, and infer a model. :param suite: The model suite to use (e.g., "timm", "torchvision", "huggingface"). :param model_name: The name of the model to load. From 62d719e82514544938d0853777ee0832393ac971 Mon Sep 17 00:00:00 2001 From: Mustafa Cavus Date: Wed, 5 Mar 2025 18:01:29 -0800 Subject: [PATCH 144/188] Update backends/openvino/README.md Co-authored-by: Yamini Nimmagadda --- backends/openvino/README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/backends/openvino/README.md b/backends/openvino/README.md index fffa4fac8a1..8702cbf38c9 100644 --- a/backends/openvino/README.md +++ b/backends/openvino/README.md @@ -32,9 +32,9 @@ executorch │ └── requirements.txt └── examples │ └── openvino -│ ├── aot_openvino_compiler.py -│ ├── export_and_infer_openvino.py -│ └── README.md + ├── aot_openvino_compiler.py + ├── export_and_infer_openvino.py + └── README.md ``` ## Build Instructions From 5154ad6850a7c4590a90dee4983ca383684e70e9 Mon Sep 17 00:00:00 2001 From: dlyakhov Date: Thu, 6 Mar 2025 11:56:38 +0100 Subject: [PATCH 145/188] [OpenVINO] Constant folding is removed from the OpenVINOQuantizer --- backends/openvino/quantizer/quantizer.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/backends/openvino/quantizer/quantizer.py b/backends/openvino/quantizer/quantizer.py index f2306bfffc7..4db0002860d 100644 --- a/backends/openvino/quantizer/quantizer.py +++ b/backends/openvino/quantizer/quantizer.py @@ -341,13 +341,6 @@ def _get_torch_ao_qspec_from_qp( def validate(self, model: torch.fx.GraphModule) -> None: pass - def transform_for_annotation( - self, model: torch.fx.GraphModule - ) -> torch.fx.GraphModule: - # Fold constant branches to avoid their quantization - nncf_fx.transformations.fold_constant_except_qdq(model) - return model - def quantize_model( captured_model: torch.fx.GraphModule, From 4685a375926e2a9acff4f9495b5a9128bda213cd Mon Sep 17 00:00:00 2001 From: dlyakhov Date: Wed, 5 Mar 2025 15:29:29 +0100 Subject: [PATCH 146/188] quantize_model cleanup --- backends/openvino/quantizer/quantizer.py | 33 ++++++++++++++-------- examples/openvino/aot_openvino_compiler.py | 18 ++++++++++-- 2 files changed, 38 insertions(+), 13 deletions(-) diff --git a/backends/openvino/quantizer/quantizer.py b/backends/openvino/quantizer/quantizer.py index f2306bfffc7..1005d41b075 100644 --- a/backends/openvino/quantizer/quantizer.py +++ b/backends/openvino/quantizer/quantizer.py @@ -6,7 +6,7 @@ from collections import defaultdict from enum import Enum -from typing import Dict, List, Optional, Tuple +from typing import Dict, List, Optional, Tuple, Callable, Any import nncf import nncf.common.quantization as quantization @@ -351,32 +351,43 @@ def transform_for_annotation( def quantize_model( captured_model: torch.fx.GraphModule, + quantizer: Quantizer, calibration_dataset: torch.utils.data.DataLoader, + subset_size: int, + fast_bias_correction: Optional[bool] = True, + smooth_quant: bool = False, + transform_fn: Optional[Callable[[Any], Any]]= None, + **kwargs, ) -> torch.fx.GraphModule: """ - Quantizes a model using either NNCF-based or PTQ-based quantization. + Quantizes a model using NNCF quantize_pt2e API. :param captured_model: The model to be quantized, represented as a torch.fx.GraphModule. + :param quantizer: Torch ao quantizer to annotate nodes in the graph with quantization setups :param calibration_dataset: A DataLoader containing calibration data for quantization. + :param subset_size: Size of a subset to calculate activations + statistics used for quantization. + :param fast_bias_correction: Setting this option to `False` enables a different + bias correction method which is more accurate, in general, and takes + more time but requires less memory. None disables the bias correction algorithm. + :param smooth_quant: Setting this option to `True` enables the SmoothQuant algorithm. + :param kwargs: The keyword arguments for the nncf quantize_pt2e function. :return: The quantized model as a torch.fx.GraphModule. """ quantizer = OpenVINOQuantizer() print("PTQ: Quantize the model") - default_subset_size = 300 - batch_size = calibration_dataset.batch_size - subset_size = (default_subset_size // batch_size) + int( - default_subset_size % batch_size > 0 - ) - def transform(x): - return x[0] + if "fold_quantize" not in kwargs: + kwargs["fold_quantize"] = False quantized_model = nncf_fx.quantize_pt2e( captured_model, quantizer, subset_size=subset_size, - calibration_dataset=nncf.Dataset(calibration_dataset, transform_func=transform), - fold_quantize=False, + calibration_dataset=nncf.Dataset(calibration_dataset, transform_fn), + fast_bias_correction=fast_bias_correction, + smooth_quant=smooth_quant, + **kwargs ) return quantized_model diff --git a/examples/openvino/aot_openvino_compiler.py b/examples/openvino/aot_openvino_compiler.py index 203d01fe73b..6e431aa991f 100644 --- a/examples/openvino/aot_openvino_compiler.py +++ b/examples/openvino/aot_openvino_compiler.py @@ -13,7 +13,10 @@ import torch import torchvision.models as torchvision_models from executorch.backends.openvino.partitioner import OpenvinoPartitioner -from executorch.backends.openvino.quantizer.quantizer import quantize_model +from executorch.backends.openvino.quantizer.quantizer import ( + OpenVINOQuantizer, + quantize_model, +) from executorch.exir import EdgeProgramManager, to_edge_transform_and_lower from executorch.exir.backend.backend_details import CompileSpec from executorch.extension.pybindings.portable_lib import ( # @manual @@ -182,9 +185,20 @@ def main( if not dataset_path: msg = "Quantization requires a calibration dataset." raise ValueError(msg) + + subset_size = 300 + batch_size = calibration_dataset.batch_size + subset_size = (subset_size // batch_size) + int(subset_size % batch_size > 0) + + quantizer = OpenVINOQuantizer() + + transform_fn = lambda x: x[0] quantized_model = quantize_model( aten_dialect.module(), - calibration_dataset, + quantizer=quantizer, + calibration_dataset=calibration_dataset, + subset_size=subset_size, + transform_fn=transform_fn, ) aten_dialect: ExportedProgram = export(quantized_model, example_args) From 0efa875b88def668a71d9357f4d75079e06f1e3c Mon Sep 17 00:00:00 2001 From: Mustafa Cavus Date: Thu, 6 Mar 2025 10:21:12 -0800 Subject: [PATCH 147/188] Update README.md --- backends/openvino/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backends/openvino/README.md b/backends/openvino/README.md index 8702cbf38c9..c9fd17162c3 100644 --- a/backends/openvino/README.md +++ b/backends/openvino/README.md @@ -31,7 +31,7 @@ executorch │ ├── preprocess.py │ └── requirements.txt └── examples -│ └── openvino + └── openvino ├── aot_openvino_compiler.py ├── export_and_infer_openvino.py └── README.md From db281b6b3b8aa504c620ffe36f603b097b74dd8b Mon Sep 17 00:00:00 2001 From: Mustafa Cavus Date: Thu, 6 Mar 2025 10:42:41 -0800 Subject: [PATCH 148/188] updated atol and rtol --- backends/openvino/tests/ops/base_openvino_op_test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/backends/openvino/tests/ops/base_openvino_op_test.py b/backends/openvino/tests/ops/base_openvino_op_test.py index 397664f027a..3b2d622d517 100644 --- a/backends/openvino/tests/ops/base_openvino_op_test.py +++ b/backends/openvino/tests/ops/base_openvino_op_test.py @@ -16,8 +16,8 @@ class BaseOpenvinoOpTest(unittest.TestCase): device = "CPU" - atol = 1e-5 - rtol = 1e-5 + atol = 1e-3 + rtol = 1e-3 def execute_layer_test( self, From 94a351e09c043f2ccbb48dfa58c43271229cb2f1 Mon Sep 17 00:00:00 2001 From: dlyakhov Date: Thu, 6 Mar 2025 19:03:42 +0100 Subject: [PATCH 149/188] Comments --- backends/openvino/quantizer/__init__.py | 4 +-- backends/openvino/quantizer/quantizer.py | 29 ++++++++++++++++------ examples/openvino/aot_openvino_compiler.py | 10 ++------ 3 files changed, 26 insertions(+), 17 deletions(-) diff --git a/backends/openvino/quantizer/__init__.py b/backends/openvino/quantizer/__init__.py index 03ea98e2c5b..44992b4f269 100644 --- a/backends/openvino/quantizer/__init__.py +++ b/backends/openvino/quantizer/__init__.py @@ -1,3 +1,3 @@ -from .quantizer import OpenVINOQuantizer +from .quantizer import OpenVINOQuantizer, quantize_model -__all__ = [OpenVINOQuantizer] +__all__ = [OpenVINOQuantizer, quantize_model] diff --git a/backends/openvino/quantizer/quantizer.py b/backends/openvino/quantizer/quantizer.py index 1005d41b075..e12af6ff8a1 100644 --- a/backends/openvino/quantizer/quantizer.py +++ b/backends/openvino/quantizer/quantizer.py @@ -6,7 +6,7 @@ from collections import defaultdict from enum import Enum -from typing import Dict, List, Optional, Tuple, Callable, Any +from typing import Any, Callable, Dict, List, Optional, Tuple import nncf import nncf.common.quantization as quantization @@ -351,30 +351,45 @@ def transform_for_annotation( def quantize_model( captured_model: torch.fx.GraphModule, - quantizer: Quantizer, calibration_dataset: torch.utils.data.DataLoader, - subset_size: int, + *, + mode: QuantizationMode = QuantizationMode.INT8_SYM, + subset_size: int = 300, fast_bias_correction: Optional[bool] = True, smooth_quant: bool = False, - transform_fn: Optional[Callable[[Any], Any]]= None, + transform_fn: Optional[Callable[[Any], Any]] = None, + extra_quantizer_options: Optional[Dict[str, Any]] = None, **kwargs, ) -> torch.fx.GraphModule: """ Quantizes a model using NNCF quantize_pt2e API. :param captured_model: The model to be quantized, represented as a torch.fx.GraphModule. - :param quantizer: Torch ao quantizer to annotate nodes in the graph with quantization setups :param calibration_dataset: A DataLoader containing calibration data for quantization. + :param mode: Defines special quantization modes. + - INT8_SYM: INT8 symmetric quantization for both activations and weights. + - INT8_MIXED: INT8 asymmetric quantization for activations, symmetric for weights. + - INT8_TRANSFORMER: Optimized INT8 quantization for transformer-based models + Default value is INT8_SYM. :param subset_size: Size of a subset to calculate activations statistics used for quantization. :param fast_bias_correction: Setting this option to `False` enables a different bias correction method which is more accurate, in general, and takes more time but requires less memory. None disables the bias correction algorithm. :param smooth_quant: Setting this option to `True` enables the SmoothQuant algorithm. + :param extra_quantizer_options: A dictionary containing additional configuration options + for the OpenVINOQuantizer. :param kwargs: The keyword arguments for the nncf quantize_pt2e function. :return: The quantized model as a torch.fx.GraphModule. """ - quantizer = OpenVINOQuantizer() + extra_quantizer_options = extra_quantizer_options or {} + if "mode" in extra_quantizer_options: + print( + f'Ignoring "mode" from the quantizer_config. Using parameter mode = {mode}' + ) + del extra_quantizer_options["mode"] + + quantizer = OpenVINOQuantizer(mode=mode, **extra_quantizer_options) print("PTQ: Quantize the model") @@ -388,6 +403,6 @@ def quantize_model( calibration_dataset=nncf.Dataset(calibration_dataset, transform_fn), fast_bias_correction=fast_bias_correction, smooth_quant=smooth_quant, - **kwargs + **kwargs, ) return quantized_model diff --git a/examples/openvino/aot_openvino_compiler.py b/examples/openvino/aot_openvino_compiler.py index 6e431aa991f..711f4277817 100644 --- a/examples/openvino/aot_openvino_compiler.py +++ b/examples/openvino/aot_openvino_compiler.py @@ -13,10 +13,7 @@ import torch import torchvision.models as torchvision_models from executorch.backends.openvino.partitioner import OpenvinoPartitioner -from executorch.backends.openvino.quantizer.quantizer import ( - OpenVINOQuantizer, - quantize_model, -) +from executorch.backends.openvino.quantizer import quantize_model from executorch.exir import EdgeProgramManager, to_edge_transform_and_lower from executorch.exir.backend.backend_details import CompileSpec from executorch.extension.pybindings.portable_lib import ( # @manual @@ -190,13 +187,10 @@ def main( batch_size = calibration_dataset.batch_size subset_size = (subset_size // batch_size) + int(subset_size % batch_size > 0) - quantizer = OpenVINOQuantizer() - transform_fn = lambda x: x[0] quantized_model = quantize_model( aten_dialect.module(), - quantizer=quantizer, - calibration_dataset=calibration_dataset, + calibration_dataset, subset_size=subset_size, transform_fn=transform_fn, ) From 4f8ca404a1f93e4055c77488ff4cf18f9559ac40 Mon Sep 17 00:00:00 2001 From: dlyakhov Date: Fri, 7 Mar 2025 11:54:16 +0100 Subject: [PATCH 150/188] Fix executorch/examples/openvino/README.md --- examples/openvino/README.md | 6 ------ 1 file changed, 6 deletions(-) diff --git a/examples/openvino/README.md b/examples/openvino/README.md index 70d22de642f..29ac3bc5ca3 100644 --- a/examples/openvino/README.md +++ b/examples/openvino/README.md @@ -57,12 +57,6 @@ python aot_openvino_compiler.py --suite --model --inp - **`--quantize`** (optional): Enable model quantization. --dataset argument is requred for the quantization. `huggingface` suite does not supported yet. -- **`--quantization_flow`** (optional): - Specifies the way to quantize torch.fx.GraphModule. - Supported values: - - `nncf`: `nncf quantize_pt2e` API (default) - - `pt2e`: torch ao quantization pipeline. - - **`--validate`** (optional): Enable model validation. --dataset argument is requred for the validation. `huggingface` suite does not supported yet. From d47e3d920b5540fff402c14fc796dc6000d33484 Mon Sep 17 00:00:00 2001 From: dlyakhov Date: Fri, 7 Mar 2025 14:50:50 +0100 Subject: [PATCH 151/188] Update NNCF version --- backends/openvino/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backends/openvino/requirements.txt b/backends/openvino/requirements.txt index 50fb2b68816..316633e9004 100644 --- a/backends/openvino/requirements.txt +++ b/backends/openvino/requirements.txt @@ -1,2 +1,2 @@ transformers -git+https://github.com/openvinotoolkit/nncf@191b53d#egg=nncf +git+https://github.com/openvinotoolkit/nncf@6b0fc1c#egg=nncf From 4263a12c975022eadb2f3af58751d3c0c5d3c8f8 Mon Sep 17 00:00:00 2001 From: Cavus Mustafa Date: Fri, 7 Mar 2025 15:17:25 -0800 Subject: [PATCH 152/188] Added inference option into aot_openvino_compiler.py, removed inference sample --- examples/openvino/aot_openvino_compiler.py | 99 +++++++++ .../openvino/export_and_infer_openvino.py | 192 ------------------ 2 files changed, 99 insertions(+), 192 deletions(-) delete mode 100644 examples/openvino/export_and_infer_openvino.py diff --git a/examples/openvino/aot_openvino_compiler.py b/examples/openvino/aot_openvino_compiler.py index 203d01fe73b..ca0069c85b6 100644 --- a/examples/openvino/aot_openvino_compiler.py +++ b/examples/openvino/aot_openvino_compiler.py @@ -5,6 +5,7 @@ # LICENSE file in the root directory of this source tree. import argparse +import time import executorch @@ -102,6 +103,54 @@ def load_calibration_dataset( return calibration_dataset +def infer_model( + exec_prog: EdgeProgramManager, + input_shape, + num_iter: int, + warmup_iter: int, + input_path: str, + output_path: str, +) -> float: + """ + Executes inference and reports the average timing. + + :param exec_prog: EdgeProgramManager of the lowered model + :param input_shape: The input shape for the model. + :param num_iter: The number of iterations to execute inference for timing. + :param warmup_iter: The number of iterations to execute inference for warmup before timing. + :param input_path: Path to the input tensor file to read the input for inference. + :param output_path: Path to the output tensor file to save the output of inference.. + :return: The average inference timing. + """ + # 1: Load model from buffer + executorch_module = _load_for_executorch_from_buffer(exec_prog.buffer) + + # 2: Initialize inputs + if input_path: + inputs = (torch.load(input_path, weights_only=False),) + else: + inputs = (torch.randn(input_shape),) + + # 3: Execute warmup + for _i in range(warmup_iter): + out = executorch_module.run_method("forward", inputs) + + # 4: Execute inference and measure timing + time_total = 0.0 + for _i in range(num_iter): + time_start = time.time() + out = executorch_module.run_method("forward", inputs) + time_end = time.time() + time_total += time_end - time_start + + # 5: Save output tensor as raw tensor file + if output_path: + torch.save(out, output_path) + + # 6: Return average inference timing + return time_total / float(num_iter) + + def validate_model( exec_prog: EdgeProgramManager, calibration_dataset: torch.utils.data.DataLoader ) -> float: @@ -137,6 +186,11 @@ def main( dataset_path: str, device: str, batch_size: int, + infer: bool, + num_iter: int, + warmup_iter: int, + input_path: str, + output_path: str, ): """ Main function to load, quantize, and validate a model. @@ -149,6 +203,12 @@ def main( :param dataset_path: Path to the dataset for calibration/validation. :param device: The device to run the model on (e.g., "cpu", "gpu"). :param batch_size: Batch size for dataset loading. + :param infer: Whether to execute inference and report timing. + :param num_iter: The number of iterations to execute inference for timing. + :param warmup_iter: The number of iterations to execute inference for warmup before timing. + :param input_path: Path to the input tensor file to read the input for inference. + :param output_path: Path to the output tensor file to save the output of inference.. + """ # Load the selected model @@ -222,6 +282,13 @@ def main( acc_top1 = validate_model(exec_prog, calibration_dataset) print(f"acc@1: {acc_top1}") + if infer: + print("Start inference of the model:") + avg_time = infer_model( + exec_prog, input_shape, num_iter, warmup_iter, input_path, output_path + ) + print(f"Average inference time: {avg_time}") + if __name__ == "__main__": # Argument parser for dynamic inputs @@ -256,6 +323,33 @@ def main( action="store_true", help="Enable model validation. --dataset argument is required for the validation.", ) + parser.add_argument( + "--infer", + action="store_true", + help="Run inference and report timing.", + ) + parser.add_argument( + "--num_iter", + type=int, + default=1, + help="The number of iterations to execute inference for timing.", + ) + parser.add_argument( + "--warmup_iter", + type=int, + default=0, + help="The number of iterations to execute inference for warmup before timing.", + ) + parser.add_argument( + "--input_tensor_path", + type=str, + help="Path to the input tensor file to read the input for inference.", + ) + parser.add_argument( + "--output_tensor_path", + type=str, + help="Path to the output tensor file to save the output of inference.", + ) parser.add_argument("--dataset", type=str, help="Path to the validation dataset.") parser.add_argument( "--device", @@ -278,4 +372,9 @@ def main( args.dataset, args.device, args.batch_size, + args.infer, + args.num_iter, + args.warmup_iter, + args.input_tensor_path, + args.output_tensor_path, ) diff --git a/examples/openvino/export_and_infer_openvino.py b/examples/openvino/export_and_infer_openvino.py deleted file mode 100644 index 9528a98565e..00000000000 --- a/examples/openvino/export_and_infer_openvino.py +++ /dev/null @@ -1,192 +0,0 @@ -# Copyright (c) Intel Corporation -# -# Licensed under the BSD License (the "License"); you may not use this file -# except in compliance with the License. See the license file found in the -# LICENSE file in the root directory of this source tree. - -import argparse -import sys -import time - -import timm - -import torch -import torchvision.models as torchvision_models - -from executorch.backends.openvino.partitioner import OpenvinoPartitioner -from executorch.exir import EdgeProgramManager, to_edge_transform_and_lower -from executorch.exir.backend.backend_details import CompileSpec - -from executorch.extension.pybindings.portable_lib import ( # @manual - _load_for_executorch_from_buffer, -) -from torch.export import export, ExportedProgram -from transformers import AutoModel - - -# Function to load a model based on the selected suite -def load_model(suite: str, model_name: str): - """ - Loads a pre-trained model from the specified model suite. - - :param suite: The suite from which to load the model. Supported values are: - - "timm": Uses `timm.create_model` to load the model. - - "torchvision": Loads a model from `torchvision.models`. Raises an error if the model does not exist. - - "huggingface": Loads a transformer model using `AutoModel.from_pretrained`. - :param model_name: The name of the model to load. - :return: The loaded model instance. - :raises ValueError: If the specified model suite is unsupported or the model is not found. - """ - if suite == "timm": - return timm.create_model(model_name, pretrained=True) - elif suite == "torchvision": - if not hasattr(torchvision_models, model_name): - msg = f"Model {model_name} not found in torchvision." - raise ValueError(msg) - return getattr(torchvision_models, model_name)(pretrained=True) - elif suite == "huggingface": - return AutoModel.from_pretrained(model_name) - else: - msg = f"Unsupported model suite: {suite}" - raise ValueError(msg) - - -def main( - suite: str, - model_name: str, - model_path: str, - input_shape, - device: str, - num_iterations: int, - warmup_iterations: int, - input_path: str, - output_path: str, -): - """ - Main function to load, quantize, and infer a model. - - :param suite: The model suite to use (e.g., "timm", "torchvision", "huggingface"). - :param model_name: The name of the model to load. - :param input_shape: The input shape for the model. - :param device: The device to run the model on (e.g., "cpu", "gpu"). - :param num_iterations: Number of iterations to execute inference. - """ - # Custom check to ensure suite is provided with model name - if model_name and not suite: - print("Error: --suite argument should be provided with --model") - sys.exit(1) - - if input_path: - print("Loading input tensor from ", input_path) - sample_inputs = (torch.load(input_path, weights_only=False),) - else: - print("Generating random input tensor with shape of ", input_shape) - sample_inputs = (torch.randn(input_shape),) - - if model_name: - print("Downloading model") - print("suite: ", suite) - print("model: ", model_name) - model = load_model(suite, model_name) - model = model.eval() - - exported_program: ExportedProgram = export(model, sample_inputs) - compile_spec = [CompileSpec("device", device.encode())] - edge: EdgeProgramManager = to_edge_transform_and_lower( - exported_program, - partitioner=[ - OpenvinoPartitioner(compile_spec), - ], - ) - - exec_prog = edge.to_executorch() - executorch_module = _load_for_executorch_from_buffer(exec_prog.buffer) - else: - print("Loading model from ", model_path) - with open(model_path, "rb") as f: - model_buffer = f.read() # Read model file into buffer - executorch_module = _load_for_executorch_from_buffer(model_buffer) - - if warmup_iterations > 0: - print("Warmup begins for ", warmup_iterations, " iterations") - for _i in range(warmup_iterations): - out = executorch_module.run_method("forward", sample_inputs) - - print("Execution begins for ", num_iterations, " iterations") - time_total = 0 - for _i in range(num_iterations): - time_start = time.time() - out = executorch_module.run_method("forward", sample_inputs) - time_end = time.time() - time_total += time_end - time_start - - print("Average inference time: ", (time_total / float(num_iterations)), " secs") - - if output_path: - torch.save(out, output_path) - - -if __name__ == "__main__": - # Argument parser for dynamic inputs - parser = argparse.ArgumentParser(description="Export models with executorch.") - parser.add_argument( - "--suite", - type=str, - required=False, - choices=["timm", "torchvision", "huggingface"], - help="Select the model suite (timm, torchvision, huggingface).", - ) - model_group = parser.add_mutually_exclusive_group(required=True) - model_group.add_argument("--model", type=str, help="Model name to be loaded.") - model_group.add_argument( - "--model_path", type=str, help="Model path to .pte file to be loaded." - ) - input_group = parser.add_mutually_exclusive_group(required=True) - input_group.add_argument( - "--input_shape", - type=eval, - help="Input shape for the model as a list or tuple (e.g., [1, 3, 224, 224] or (1, 3, 224, 224)).", - ) - parser.add_argument( - "--device", - type=str, - default="CPU", - help="Target device for compiling the model (e.g., CPU, GPU). Default is CPU.", - ) - parser.add_argument( - "--num_iter", - type=int, - default=1, - help="Number of iterations to execute inference", - ) - parser.add_argument( - "--warmup_iter", - type=int, - default=0, - help="Number of iterations to execute for warmup", - ) - input_group.add_argument( - "--input_tensor_path", - type=str, - help="Optional raw tensor input file to load the input from", - ) - parser.add_argument( - "--output_tensor_path", - type=str, - help="Optional output file path to save raw output tensor", - ) - - args = parser.parse_args() - - # Run the main function with parsed arguments - main( - args.suite, - args.model, - args.model_path, - args.input_shape, - args.device, - args.num_iter, - args.warmup_iter, - args.input_tensor_path, - args.output_tensor_path, - ) From 1d975c7fb6cfbb7dd8a6967fa5d413f340ea3383 Mon Sep 17 00:00:00 2001 From: Mustafa Cavus Date: Fri, 7 Mar 2025 15:21:06 -0800 Subject: [PATCH 153/188] Update README.md --- examples/openvino/README.md | 89 +++++++++---------------------------- 1 file changed, 20 insertions(+), 69 deletions(-) diff --git a/examples/openvino/README.md b/examples/openvino/README.md index 70d22de642f..f3f1dc6dad5 100644 --- a/examples/openvino/README.md +++ b/examples/openvino/README.md @@ -69,6 +69,21 @@ python aot_openvino_compiler.py --suite --model --inp - **`--dataset`** (optional): Path to the imagenet-like calibration dataset. +- **`--infer`** (optional): + Execute inference with the compiled model and report average inference timing. + +- **`--num_iter`** (optional): + Number of iterations to execute inference. Default value for the number of iterations is `1`. + +- **`--warmup_iter`** (optional): + Number of warmup iterations to execute inference before timing begins. Default value for the warmup iterations is `0`. + +- **`--input_tensor_path`** (optional): + Path to the raw tensor file to be used as input for inference. If this argument is not provided, a random input tensor will be generated. + +- **`--output_tensor_path`** (optional): + Path to the raw tensor file which the output of the inference to be saved. + - **`--device`** (optional) Target device for the compiled model. Default is `CPU`. Examples: `CPU`, `GPU` @@ -100,6 +115,11 @@ python aot_openvino_compiler.py --suite timm --model vgg16 --input_shape [1, 3, python aot_openvino_compiler.py --suite timm --model vgg16 --input_shape [1, 3, 224, 224] --device CPU --validate --dataset /path/to/dataset --quantize ``` +#### Export a Torchvision Inception V3 model for the CPU and Execute Inference +```bash +python aot_openvino_compiler.py --suite torchvision --model inception_v3 --infer --warmup_iter 10 --num_iter 100 --input_shape "(1, 3, 256, 256)" --device CPU +``` + ### **Notes** 1. **Input Shape in Zsh**: If you are using Zsh, wrap `--input_shape` in quotes or use a tuple: @@ -168,72 +188,3 @@ Run inference with a given model for 10 iterations: --model_path=model.pte \ --num_executions=10 ``` - -## Running Python Example with Pybinding: - -You can use the `export_and_infer_openvino.py` script to run models with the OpenVINO backend through the Python bindings. - -### **Usage** - -#### **Command Structure** -```bash -python export_and_infer_openvino.py -``` - -#### **Arguments** -- **`--suite`** (required if `--model_path` argument is not used): - Specifies the model suite to use. Needs to be used with `--model` argument. - Supported values: - - `timm` (e.g., VGG16, ResNet50) - - `torchvision` (e.g., resnet18, mobilenet_v2) - - `huggingface` (e.g., bert-base-uncased). NB: Quantization and validation is not supported yet. - -- **`--model`** (required if `--model_path` argument is not used): - Name of the model to export. Needs to be used with `--suite` argument. - Examples: - - For `timm`: `vgg16`, `resnet50` - - For `torchvision`: `resnet18`, `mobilenet_v2` - - For `huggingface`: `bert-base-uncased`, `distilbert-base-uncased` - -- **`--model_path`** (required if `--suite` and `--model` arguments are not used): - Path to the saved model file. This argument allows you to load the compiled model from a file, instead of downloading it from the model suites using the `--suite` and `--model` arguments. - Example: `/resnet50_fp32.pte` - -- **`--input_shape`**(required for random inputs): - Input shape for the model. Provide this as a **list** or **tuple**. - Examples: - - `[1, 3, 224, 224]` (Zsh users: wrap in quotes) - - `(1, 3, 224, 224)` - - - **`--input_tensor_path`**(optional): - Path to the raw input tensor file. If this argument is not provided, a random input tensor will be generated with the input shape provided with `--input_shape` argument. - Example: `/input_tensor.pt` - - - **`--output_tensor_path`**(optional): - Path to the file where the output raw tensor will be saved. - Example: `/output_tensor.pt` - -- **`--device`** (optional) - Target device for the compiled model. Default is `CPU`. - Examples: `CPU`, `GPU` - -- **`--num_iter`** (optional) - Number of iterations to execute inference for evaluation. The default value is `1`. - Examples: `100`, `1000` - -- **`--warmup_iter`** (optional) - Number of warmup iterations to execute inference before evaluation. The default value is `0`. - Examples: `5`, `10` - - -### **Examples** - -#### Execute Torchvision ResNet50 model for the GPU with Random Inputs -```bash -python export_and_infer_openvino.py --suite torchvision --model resnet50 --input_shape "(1, 3, 256, 256)" --device GPU -``` - -#### Run a Precompiled Model for the CPU Using an Existing Input Tensor File and Save the Output. -```bash -python export_and_infer_openvino.py --model_path /path/to/model/folder/resnet50_fp32.pte --input_tensor_file /path/to/input/folder/input.pt --output_tensor_file /path/to/output/folder/output.pt --device CPU -``` From ae3a9e71b2a3496c6a61f52332b416fd82b7c7de Mon Sep 17 00:00:00 2001 From: Cavus Mustafa Date: Fri, 7 Mar 2025 19:03:36 -0800 Subject: [PATCH 154/188] aot script updated --- ..._compiler.py => aot_optimize_and_infer.py} | 24 +++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) rename examples/openvino/{aot_openvino_compiler.py => aot_optimize_and_infer.py} (94%) diff --git a/examples/openvino/aot_openvino_compiler.py b/examples/openvino/aot_optimize_and_infer.py similarity index 94% rename from examples/openvino/aot_openvino_compiler.py rename to examples/openvino/aot_optimize_and_infer.py index ca0069c85b6..b95ea5cad79 100644 --- a/examples/openvino/aot_openvino_compiler.py +++ b/examples/openvino/aot_optimize_and_infer.py @@ -181,6 +181,8 @@ def main( suite: str, model_name: str, input_shape, + save_model: bool, + model_file_name: str, quantize: bool, validate: bool, dataset_path: str, @@ -198,6 +200,8 @@ def main( :param suite: The model suite to use (e.g., "timm", "torchvision", "huggingface"). :param model_name: The name of the model to load. :param input_shape: The input shape for the model. + :param save_model: Whether to save the compiled model as a .pte file. + :param model_file_name: Custom file name to save the exported model. :param quantize: Whether to quantize the model. :param validate: Whether to validate the model. :param dataset_path: Path to the dataset for calibration/validation. @@ -264,10 +268,12 @@ def main( ) # Serialize and save it to a file - model_file_name = f"{model_name}_{'int8' if quantize else 'fp32'}.pte" - with open(model_file_name, "wb") as file: - exec_prog.write_to_file(file) - print(f"Model exported and saved as {model_file_name} on {device}.") + if save_model: + if not model_file_name: + model_file_name = f"{model_name}_{'int8' if quantize else 'fp32'}.pte" + with open(model_file_name, "wb") as file: + exec_prog.write_to_file(file) + print(f"Model exported and saved as {model_file_name} on {device}.") if validate: if suite == "huggingface": @@ -315,6 +321,14 @@ def main( help="Batch size for the validation. Default batch_size == 1." " The dataset length must be evenly divisible by the batch size.", ) + parser.add_argument( + "--export", action="store_true", help="Export the compiled model as .pte file." + ) + parser.add_argument( + "--model_file_name", + type=str, + help="Custom file name to save the exported model.", + ) parser.add_argument( "--quantize", action="store_true", help="Enable model quantization." ) @@ -367,6 +381,8 @@ def main( args.suite, args.model, args.input_shape, + args.export, + args.model_file_name, args.quantize, args.validate, args.dataset, From 59c90eadc37b40ad7f87c7538fd3c7f580537797 Mon Sep 17 00:00:00 2001 From: Mustafa Cavus Date: Fri, 7 Mar 2025 18:49:19 -0800 Subject: [PATCH 155/188] Update README.md --- examples/openvino/README.md | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/examples/openvino/README.md b/examples/openvino/README.md index f3f1dc6dad5..c7c474ec938 100644 --- a/examples/openvino/README.md +++ b/examples/openvino/README.md @@ -9,8 +9,7 @@ Below is the layout of the `examples/openvino` directory, which includes the nec ``` examples/openvino ├── README.md # Documentation for examples (this file) -├── aot_openvino_compiler.py # Example script for AoT export -└── export_and_infer_openvino.py # Example script to export and execute models with python bindings +└── aot_optimize_and_infer.py # Example script to export and execute models ``` # Build Instructions for Examples @@ -20,13 +19,13 @@ Follow the [instructions](../../backends/openvino/README.md) of **Prerequisites* ## AOT step: -The export script called `aot_openvino_compiler.py` allows users to export deep learning models from various model suites (TIMM, Torchvision, Hugging Face) to a openvino backend using **Executorch**. Users can dynamically specify the model, input shape, and target device. +The export script called `aot_optimize_and_infer.py` allows users to export deep learning models from various model suites (TIMM, Torchvision, Hugging Face) to a openvino backend using **Executorch**. Users can dynamically specify the model, input shape, and target device. ### **Usage** #### **Command Structure** ```bash -python aot_openvino_compiler.py --suite --model --input_shape --device +python aot_optimize_and_infer.py --suite --model --input_shape --device ``` #### **Arguments** @@ -50,6 +49,12 @@ python aot_openvino_compiler.py --suite --model --inp - `[1, 3, 224, 224]` (Zsh users: wrap in quotes) - `(1, 3, 224, 224)` +- **`--export`** (optional): + Save the exported model as a `.pte` file. + +- **`--model_file_name`** (optional): + Specify a custom file name to save the exported model. + - **`--batch_size`** : Batch size for the validation. Default batch_size == 1. The dataset length must be evenly divisible by the batch size. @@ -93,31 +98,31 @@ python aot_openvino_compiler.py --suite --model --inp #### Export a TIMM VGG16 model for the CPU ```bash -python aot_openvino_compiler.py --suite timm --model vgg16 --input_shape [1, 3, 224, 224] --device CPU +python aot_optimize_and_infer.py --export --suite timm --model vgg16 --input_shape [1, 3, 224, 224] --device CPU ``` #### Export a Torchvision ResNet50 model for the GPU ```bash -python aot_openvino_compiler.py --suite torchvision --model resnet50 --input_shape "(1, 3, 256, 256)" --device GPU +python aot_optimize_and_infer.py --export --suite torchvision --model resnet50 --input_shape "(1, 3, 256, 256)" --device GPU ``` #### Export a Hugging Face BERT model for the CPU ```bash -python aot_openvino_compiler.py --suite huggingface --model bert-base-uncased --input_shape "(1, 512)" --device CPU +python aot_optimize_and_infer.py --export --suite huggingface --model bert-base-uncased --input_shape "(1, 512)" --device CPU ``` #### Export and validate TIMM Resnet50d model for the CPU ```bash -python aot_openvino_compiler.py --suite timm --model vgg16 --input_shape [1, 3, 224, 224] --device CPU --validate --dataset /path/to/dataset +python aot_optimize_and_infer.py --export --suite timm --model vgg16 --input_shape [1, 3, 224, 224] --device CPU --validate --dataset /path/to/dataset ``` #### Export, quantize and validate TIMM Resnet50d model for the CPU ```bash -python aot_openvino_compiler.py --suite timm --model vgg16 --input_shape [1, 3, 224, 224] --device CPU --validate --dataset /path/to/dataset --quantize +python aot_optimize_and_infer.py --export --suite timm --model vgg16 --input_shape [1, 3, 224, 224] --device CPU --validate --dataset /path/to/dataset --quantize ``` -#### Export a Torchvision Inception V3 model for the CPU and Execute Inference +#### Execute Inference with Torchvision Inception V3 model for the CPU ```bash -python aot_openvino_compiler.py --suite torchvision --model inception_v3 --infer --warmup_iter 10 --num_iter 100 --input_shape "(1, 3, 256, 256)" --device CPU +python aot_optimize_and_infer.py --suite torchvision --model inception_v3 --infer --warmup_iter 10 --num_iter 100 --input_shape "(1, 3, 256, 256)" --device CPU ``` ### **Notes** From 1d756e69f57c6eb6a4da9b73ce9f9d5db511ea26 Mon Sep 17 00:00:00 2001 From: Mustafa Cavus Date: Fri, 7 Mar 2025 18:51:04 -0800 Subject: [PATCH 156/188] Update README.md --- backends/openvino/README.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/backends/openvino/README.md b/backends/openvino/README.md index c9fd17162c3..36fabbeeb1f 100644 --- a/backends/openvino/README.md +++ b/backends/openvino/README.md @@ -32,8 +32,7 @@ executorch │ └── requirements.txt └── examples └── openvino - ├── aot_openvino_compiler.py - ├── export_and_infer_openvino.py + ├── aot_optimize_and_infer.py └── README.md ``` From 37b56f86ea41e0c8b3015844993e1d278aefdbd5 Mon Sep 17 00:00:00 2001 From: Mustafa Cavus Date: Fri, 7 Mar 2025 18:53:17 -0800 Subject: [PATCH 157/188] Update README.md --- examples/openvino/README.md | 4 ---- 1 file changed, 4 deletions(-) diff --git a/examples/openvino/README.md b/examples/openvino/README.md index c7c474ec938..a5a8d47be0f 100644 --- a/examples/openvino/README.md +++ b/examples/openvino/README.md @@ -23,10 +23,6 @@ The export script called `aot_optimize_and_infer.py` allows users to export deep ### **Usage** -#### **Command Structure** -```bash -python aot_optimize_and_infer.py --suite --model --input_shape --device -``` #### **Arguments** - **`--suite`** (required): From 4902c8a5da2fc66ccaacf154bff0bebf1a5b8810 Mon Sep 17 00:00:00 2001 From: Mustafa Cavus Date: Fri, 7 Mar 2025 18:54:19 -0800 Subject: [PATCH 158/188] Update README.md --- examples/openvino/README.md | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/examples/openvino/README.md b/examples/openvino/README.md index a5a8d47be0f..153adeaac89 100644 --- a/examples/openvino/README.md +++ b/examples/openvino/README.md @@ -90,33 +90,33 @@ The export script called `aot_optimize_and_infer.py` allows users to export deep Examples: `CPU`, `GPU` -### **Examples** +#### **Examples** -#### Export a TIMM VGG16 model for the CPU +##### Export a TIMM VGG16 model for the CPU ```bash python aot_optimize_and_infer.py --export --suite timm --model vgg16 --input_shape [1, 3, 224, 224] --device CPU ``` -#### Export a Torchvision ResNet50 model for the GPU +##### Export a Torchvision ResNet50 model for the GPU ```bash python aot_optimize_and_infer.py --export --suite torchvision --model resnet50 --input_shape "(1, 3, 256, 256)" --device GPU ``` -#### Export a Hugging Face BERT model for the CPU +##### Export a Hugging Face BERT model for the CPU ```bash python aot_optimize_and_infer.py --export --suite huggingface --model bert-base-uncased --input_shape "(1, 512)" --device CPU ``` -#### Export and validate TIMM Resnet50d model for the CPU +##### Export and validate TIMM Resnet50d model for the CPU ```bash python aot_optimize_and_infer.py --export --suite timm --model vgg16 --input_shape [1, 3, 224, 224] --device CPU --validate --dataset /path/to/dataset ``` -#### Export, quantize and validate TIMM Resnet50d model for the CPU +##### Export, quantize and validate TIMM Resnet50d model for the CPU ```bash python aot_optimize_and_infer.py --export --suite timm --model vgg16 --input_shape [1, 3, 224, 224] --device CPU --validate --dataset /path/to/dataset --quantize ``` -#### Execute Inference with Torchvision Inception V3 model for the CPU +##### Execute Inference with Torchvision Inception V3 model for the CPU ```bash python aot_optimize_and_infer.py --suite torchvision --model inception_v3 --infer --warmup_iter 10 --num_iter 100 --input_shape "(1, 3, 256, 256)" --device CPU ``` From f30ee7657ab8d539f9c95dfea9a3b0aff4742be1 Mon Sep 17 00:00:00 2001 From: Mustafa Cavus Date: Mon, 10 Mar 2025 11:18:37 -0700 Subject: [PATCH 159/188] Update README.md --- examples/openvino/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/openvino/README.md b/examples/openvino/README.md index 153adeaac89..5da6f6b20af 100644 --- a/examples/openvino/README.md +++ b/examples/openvino/README.md @@ -19,7 +19,7 @@ Follow the [instructions](../../backends/openvino/README.md) of **Prerequisites* ## AOT step: -The export script called `aot_optimize_and_infer.py` allows users to export deep learning models from various model suites (TIMM, Torchvision, Hugging Face) to a openvino backend using **Executorch**. Users can dynamically specify the model, input shape, and target device. +The python script called `aot_optimize_and_infer.py` allows users to export deep learning models from various model suites (TIMM, Torchvision, Hugging Face) to a openvino backend using **Executorch**. Users can dynamically specify the model, input shape, and target device. ### **Usage** From ee032ac732e2c67e76b75a74311e759c18bd6a4d Mon Sep 17 00:00:00 2001 From: Mustafa Cavus Date: Mon, 10 Mar 2025 12:55:12 -0700 Subject: [PATCH 160/188] Update examples/openvino/aot_openvino_compiler.py --- examples/openvino/aot_openvino_compiler.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/examples/openvino/aot_openvino_compiler.py b/examples/openvino/aot_openvino_compiler.py index 711f4277817..317618d042b 100644 --- a/examples/openvino/aot_openvino_compiler.py +++ b/examples/openvino/aot_openvino_compiler.py @@ -187,7 +187,8 @@ def main( batch_size = calibration_dataset.batch_size subset_size = (subset_size // batch_size) + int(subset_size % batch_size > 0) - transform_fn = lambda x: x[0] + def transform_fn(x): + return x[0] quantized_model = quantize_model( aten_dialect.module(), calibration_dataset, From e76c440bfc365761221e3fa3bcd41f69f4c282dc Mon Sep 17 00:00:00 2001 From: Mustafa Cavus Date: Mon, 10 Mar 2025 13:01:34 -0700 Subject: [PATCH 161/188] code formatting --- examples/openvino/aot_openvino_compiler.py | 1 + 1 file changed, 1 insertion(+) diff --git a/examples/openvino/aot_openvino_compiler.py b/examples/openvino/aot_openvino_compiler.py index 317618d042b..9478fe88c40 100644 --- a/examples/openvino/aot_openvino_compiler.py +++ b/examples/openvino/aot_openvino_compiler.py @@ -189,6 +189,7 @@ def main( def transform_fn(x): return x[0] + quantized_model = quantize_model( aten_dialect.module(), calibration_dataset, From a02855f3c4ba06df5e1ed800f06debae532de0cf Mon Sep 17 00:00:00 2001 From: Cavus Mustafa Date: Mon, 10 Mar 2025 13:52:19 -0700 Subject: [PATCH 162/188] code formatting --- examples/openvino/aot_optimize_and_infer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/openvino/aot_optimize_and_infer.py b/examples/openvino/aot_optimize_and_infer.py index 5ff4ff2d5ce..9eaeb529026 100644 --- a/examples/openvino/aot_optimize_and_infer.py +++ b/examples/openvino/aot_optimize_and_infer.py @@ -177,7 +177,7 @@ def validate_model( return accuracy_score(predictions, targets) -def main( +def main( # noqa: C901 suite: str, model_name: str, input_shape, From 7e7711fec329732960b4bd732be3ff965ab45945 Mon Sep 17 00:00:00 2001 From: Cavus Mustafa Date: Wed, 12 Mar 2025 16:28:27 -0700 Subject: [PATCH 163/188] temp fix for unit tests with nncf --- backends/openvino/tests/test_runner.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/backends/openvino/tests/test_runner.py b/backends/openvino/tests/test_runner.py index 8bf103530d4..0bda8189b0d 100644 --- a/backends/openvino/tests/test_runner.py +++ b/backends/openvino/tests/test_runner.py @@ -1,6 +1,8 @@ import argparse import unittest +import nncf.torch + class OpenvinoTestSuite(unittest.TestSuite): @@ -66,7 +68,8 @@ def parse_arguments(): # Discover all existing op tests in "ops" folder suite = loader.discover(test_params["test_type"], pattern=test_params["pattern"]) # Start running tests - result = unittest.TextTestRunner().run(suite) + with nncf.torch.disable_patching(): + result = unittest.TextTestRunner().run(suite) if result.wasSuccessful(): print("OpenVINO backend tests completed successfully") else: From 93adab7a26042fb357cae5b75d30cd0ea7ae4789 Mon Sep 17 00:00:00 2001 From: Cavus Mustafa Date: Mon, 17 Mar 2025 12:59:52 -0700 Subject: [PATCH 164/188] preprocess update for latest ov release --- backends/openvino/preprocess.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backends/openvino/preprocess.py b/backends/openvino/preprocess.py index 5c8398bc51d..0e145d369c9 100644 --- a/backends/openvino/preprocess.py +++ b/backends/openvino/preprocess.py @@ -47,4 +47,4 @@ def preprocess( ) model_bytes = compiled.export_model() - return PreprocessResult(processed_bytes=model_bytes) + return PreprocessResult(processed_bytes=model_bytes.getvalue()) From d743bc047d98eea45b0ba679cacd323299c74ba7 Mon Sep 17 00:00:00 2001 From: Mustafa Cavus Date: Mon, 17 Mar 2025 12:40:55 -0700 Subject: [PATCH 165/188] Update README.md --- backends/openvino/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backends/openvino/README.md b/backends/openvino/README.md index 36fabbeeb1f..5c69e03f11c 100644 --- a/backends/openvino/README.md +++ b/backends/openvino/README.md @@ -46,7 +46,7 @@ Before you begin, ensure you have openvino installed and configured on your syst ```bash git clone https://github.com/openvinotoolkit/openvino.git -cd openvino && git checkout 20ad7cb +cd openvino && git checkout releases/2025/1 git submodule update --init --recursive mkdir build && cd build cmake .. -DCMAKE_BUILD_TYPE=Release -DENABLE_PYTHON=ON From 293b1d5534e4a6a8dcbfd5321324869c9ec460e8 Mon Sep 17 00:00:00 2001 From: Mustafa Cavus Date: Mon, 17 Mar 2025 12:41:38 -0700 Subject: [PATCH 166/188] Update build-run-openvino.md --- docs/source/build-run-openvino.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/build-run-openvino.md b/docs/source/build-run-openvino.md index e64d1e34618..8f6424e7d65 100644 --- a/docs/source/build-run-openvino.md +++ b/docs/source/build-run-openvino.md @@ -41,7 +41,7 @@ Before you begin, ensure you have openvino installed and configured on your syst ```bash git clone https://github.com/openvinotoolkit/openvino.git -cd openvino && git checkout 20ad7cb +cd openvino && git checkout releases/2025/1 git submodule update --init --recursive mkdir build && cd build cmake .. -DCMAKE_BUILD_TYPE=Release -DENABLE_PYTHON=ON From ca3ec9c7d2a309b7fc18f57282d56ee7be6a73ba Mon Sep 17 00:00:00 2001 From: suryasidd Date: Tue, 18 Mar 2025 22:11:45 -0700 Subject: [PATCH 167/188] Fix inputs for hf models --- backends/openvino/runtime/OpenvinoBackend.cpp | 4 ++++ examples/openvino/aot_optimize_and_infer.py | 17 +++++++++++------ 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/backends/openvino/runtime/OpenvinoBackend.cpp b/backends/openvino/runtime/OpenvinoBackend.cpp index 8aff2f25c39..431ffcff67c 100644 --- a/backends/openvino/runtime/OpenvinoBackend.cpp +++ b/backends/openvino/runtime/OpenvinoBackend.cpp @@ -170,6 +170,10 @@ ov::element::Type OpenvinoBackend::convert_to_openvino_type( return ov::element::i32; case exa::ScalarType::Char: return ov::element::i8; + case exa::ScalarType::Long: + return ov::element::i64; + case exa::ScalarType::Bool: + return ov::element::boolean; default: throw std::runtime_error("Unsupported scalar type"); } diff --git a/examples/openvino/aot_optimize_and_infer.py b/examples/openvino/aot_optimize_and_infer.py index 9eaeb529026..825682007ee 100644 --- a/examples/openvino/aot_optimize_and_infer.py +++ b/examples/openvino/aot_optimize_and_infer.py @@ -105,7 +105,7 @@ def load_calibration_dataset( def infer_model( exec_prog: EdgeProgramManager, - input_shape, + inputs, num_iter: int, warmup_iter: int, input_path: str, @@ -115,7 +115,7 @@ def infer_model( Executes inference and reports the average timing. :param exec_prog: EdgeProgramManager of the lowered model - :param input_shape: The input shape for the model. + :param inputs: The inputs for the model. :param num_iter: The number of iterations to execute inference for timing. :param warmup_iter: The number of iterations to execute inference for warmup before timing. :param input_path: Path to the input tensor file to read the input for inference. @@ -128,8 +128,6 @@ def infer_model( # 2: Initialize inputs if input_path: inputs = (torch.load(input_path, weights_only=False),) - else: - inputs = (torch.randn(input_shape),) # 3: Execute warmup for _i in range(warmup_iter): @@ -232,7 +230,14 @@ def main( # noqa: C901 msg = "Input shape must be a list or tuple." raise ValueError(msg) # Provide input - example_args = (torch.randn(*input_shape),) + if suite == "huggingface": + if hasattr(model, 'config') and hasattr(model.config, 'vocab_size'): + vocab_size = model.config.vocab_size + else: + vocab_size = 30522 + example_args = (torch.randint(0, vocab_size, input_shape, dtype=torch.int64), ) + else: + example_args = (torch.randn(*input_shape),) # Export the model to the aten dialect aten_dialect: ExportedProgram = export(model, example_args) @@ -301,7 +306,7 @@ def transform_fn(x): if infer: print("Start inference of the model:") avg_time = infer_model( - exec_prog, input_shape, num_iter, warmup_iter, input_path, output_path + exec_prog, example_args, num_iter, warmup_iter, input_path, output_path ) print(f"Average inference time: {avg_time}") From ab0cb88e95bf5f635ea6f197a1fff2201ab15d3c Mon Sep 17 00:00:00 2001 From: suryasidd Date: Wed, 19 Mar 2025 13:28:48 -0700 Subject: [PATCH 168/188] Addressed PR comments --- examples/openvino/aot_optimize_and_infer.py | 32 +++++++++------------ 1 file changed, 14 insertions(+), 18 deletions(-) diff --git a/examples/openvino/aot_optimize_and_infer.py b/examples/openvino/aot_optimize_and_infer.py index 825682007ee..8ae1763ea5b 100644 --- a/examples/openvino/aot_optimize_and_infer.py +++ b/examples/openvino/aot_optimize_and_infer.py @@ -108,7 +108,6 @@ def infer_model( inputs, num_iter: int, warmup_iter: int, - input_path: str, output_path: str, ) -> float: """ @@ -118,22 +117,17 @@ def infer_model( :param inputs: The inputs for the model. :param num_iter: The number of iterations to execute inference for timing. :param warmup_iter: The number of iterations to execute inference for warmup before timing. - :param input_path: Path to the input tensor file to read the input for inference. :param output_path: Path to the output tensor file to save the output of inference.. :return: The average inference timing. """ - # 1: Load model from buffer + # Load model from buffer executorch_module = _load_for_executorch_from_buffer(exec_prog.buffer) - # 2: Initialize inputs - if input_path: - inputs = (torch.load(input_path, weights_only=False),) - - # 3: Execute warmup + # Execute warmup for _i in range(warmup_iter): out = executorch_module.run_method("forward", inputs) - # 4: Execute inference and measure timing + # Execute inference and measure timing time_total = 0.0 for _i in range(num_iter): time_start = time.time() @@ -141,11 +135,11 @@ def infer_model( time_end = time.time() time_total += time_end - time_start - # 5: Save output tensor as raw tensor file + # Save output tensor as raw tensor file if output_path: torch.save(out, output_path) - # 6: Return average inference timing + # Return average inference timing return time_total / float(num_iter) @@ -159,10 +153,10 @@ def validate_model( :param calibration_dataset: A DataLoader containing calibration data. :return: The accuracy score of the model. """ - # 1: Load model from buffer + # Load model from buffer executorch_module = _load_for_executorch_from_buffer(exec_prog.buffer) - # 2: Iterate over the dataset and run the executor + # Iterate over the dataset and run the executor predictions = [] targets = [] for _idx, data in enumerate(calibration_dataset): @@ -171,7 +165,7 @@ def validate_model( out = executorch_module.run_method("forward", (feature,)) predictions.extend(torch.stack(out).reshape(-1, 1000).argmax(-1)) - # 1: Check accuracy + # Check accuracy return accuracy_score(predictions, targets) @@ -230,12 +224,14 @@ def main( # noqa: C901 msg = "Input shape must be a list or tuple." raise ValueError(msg) # Provide input - if suite == "huggingface": - if hasattr(model, 'config') and hasattr(model.config, 'vocab_size'): + if input_path: + example_args = (torch.load(input_path, weights_only=False),) + elif suite == "huggingface": + if hasattr(model, "config") and hasattr(model.config, "vocab_size"): vocab_size = model.config.vocab_size else: vocab_size = 30522 - example_args = (torch.randint(0, vocab_size, input_shape, dtype=torch.int64), ) + example_args = (torch.randint(0, vocab_size, input_shape, dtype=torch.int64),) else: example_args = (torch.randn(*input_shape),) @@ -306,7 +302,7 @@ def transform_fn(x): if infer: print("Start inference of the model:") avg_time = infer_model( - exec_prog, example_args, num_iter, warmup_iter, input_path, output_path + exec_prog, example_args, num_iter, warmup_iter, output_path ) print(f"Average inference time: {avg_time}") From 24e823f9de2ccbf3013a10b0e837611f6c1626e0 Mon Sep 17 00:00:00 2001 From: Cavus Mustafa Date: Thu, 20 Mar 2025 18:04:42 -0700 Subject: [PATCH 169/188] remove openvino pybind option from executorch install script --- install_executorch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/install_executorch.py b/install_executorch.py index 4797f5b2e2c..b35f5668eb2 100644 --- a/install_executorch.py +++ b/install_executorch.py @@ -39,7 +39,7 @@ def clean(): print("Done cleaning build artifacts.") -VALID_PYBINDS = ["coreml", "mps", "xnnpack", "training", "openvino"] +VALID_PYBINDS = ["coreml", "mps", "xnnpack", "training"] ################################################################################ From e0bd72ab2fd3e6de0f366f1a7c1135f9ee83db24 Mon Sep 17 00:00:00 2001 From: Mustafa Cavus Date: Thu, 20 Mar 2025 17:50:41 -0700 Subject: [PATCH 170/188] Update build-run-openvino.md --- docs/source/build-run-openvino.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/build-run-openvino.md b/docs/source/build-run-openvino.md index 8f6424e7d65..60888cfb72b 100644 --- a/docs/source/build-run-openvino.md +++ b/docs/source/build-run-openvino.md @@ -37,7 +37,6 @@ OpenVINO backend supports the following hardware: Before you begin, ensure you have openvino installed and configured on your system: -#### TODO: Add instructions for support with OpenVINO release package ```bash git clone https://github.com/openvinotoolkit/openvino.git @@ -52,6 +51,7 @@ cmake --install build --prefix cd source setupvars.sh ``` +Note: The OpenVINO backend is not yet supported in the current OpenVINO release. Therefore, it is recommended to build from source. ### Setup From b1756d14500d05da804f5bdca4d39d99a87ca4e4 Mon Sep 17 00:00:00 2001 From: Mustafa Cavus Date: Thu, 20 Mar 2025 17:51:55 -0700 Subject: [PATCH 171/188] Update build-run-openvino.md --- docs/source/build-run-openvino.md | 25 ------------------------- 1 file changed, 25 deletions(-) diff --git a/docs/source/build-run-openvino.md b/docs/source/build-run-openvino.md index 60888cfb72b..0b2eed0dcb0 100644 --- a/docs/source/build-run-openvino.md +++ b/docs/source/build-run-openvino.md @@ -86,31 +86,6 @@ python aot_openvino_compiler.py --suite torchvision --model resnet50 --input_sha ``` The exported model will be saved as 'resnet50.pte' in the current directory. -#### **Arguments** -- **`--suite`** (required): - Specifies the model suite to use. - Supported values: - - `timm` (e.g., VGG16, ResNet50) - - `torchvision` (e.g., resnet18, mobilenet_v2) - - `huggingface` (e.g., bert-base-uncased) - -- **`--model`** (required): - Name of the model to export. - Examples: - - For `timm`: `vgg16`, `resnet50` - - For `torchvision`: `resnet18`, `mobilenet_v2` - - For `huggingface`: `bert-base-uncased`, `distilbert-base-uncased` - -- **`--input_shape`** (required): - Input shape for the model. Provide this as a **list** or **tuple**. - Examples: - - `[1, 3, 224, 224]` (Zsh users: wrap in quotes) - - `(1, 3, 224, 224)` - -- **`--device`** (optional): - Target device for the compiled model. Default is `CPU`. - Examples: `CPU`, `GPU` - ### Build C++ OpenVINO Examples Build the backend and the examples by executing the script: ```bash From 2cf3ecfa8188f94e1fe1a7c8d0513de6f97199da Mon Sep 17 00:00:00 2001 From: Mustafa Cavus Date: Thu, 20 Mar 2025 17:53:53 -0700 Subject: [PATCH 172/188] Update build-run-openvino.md --- docs/source/build-run-openvino.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/source/build-run-openvino.md b/docs/source/build-run-openvino.md index 0b2eed0dcb0..9d04f74d280 100644 --- a/docs/source/build-run-openvino.md +++ b/docs/source/build-run-openvino.md @@ -31,6 +31,8 @@ OpenVINO backend supports the following hardware: - Intel discrete GPUs - Intel NPUs +For more information on the supported hardware, please refer to [OpenVINO System Requirements](https://docs.openvino.ai/2025/about-openvino/release-notes-openvino/system-requirements.html) page. + ## Instructions for Building OpenVINO Backend ### Prerequisites From e3ceba02ee5195198603fff3dadb2d866084709d Mon Sep 17 00:00:00 2001 From: Mustafa Cavus Date: Thu, 20 Mar 2025 18:02:36 -0700 Subject: [PATCH 173/188] Update build-run-openvino.md --- docs/source/build-run-openvino.md | 40 +++++-------------------------- 1 file changed, 6 insertions(+), 34 deletions(-) diff --git a/docs/source/build-run-openvino.md b/docs/source/build-run-openvino.md index 9d04f74d280..412e1b79643 100644 --- a/docs/source/build-run-openvino.md +++ b/docs/source/build-run-openvino.md @@ -69,7 +69,7 @@ Follow the steps below to setup your build environment: 3. Navigate to `scripts/` directory. -4. **Build OpenVINO Backend**: Once the prerequisites are in place, run the `openvino_build.sh` script to start the build process, OpenVINO backend will be built under `cmake-openvino-out/backends/openvino/` as `libopenvino_backend.so` +4. **Build OpenVINO Backend**: Once the prerequisites are in place, run the `openvino_build.sh` script to start the build process, OpenVINO backend will be built under `cmake-out/backends/openvino/` as `libopenvino_backend.a` ```bash ./openvino_build.sh @@ -84,7 +84,7 @@ Below is an example to export a ResNet50 model from Torchvision model suite for ```bash cd executorch/examples/openvino/aot -python aot_openvino_compiler.py --suite torchvision --model resnet50 --input_shape "(1, 3, 256, 256)" --device CPU +python aot_openvino_compiler.py --export --suite torchvision --model resnet50 --input_shape "(1, 3, 256, 256)" --device CPU ``` The exported model will be saved as 'resnet50.pte' in the current directory. @@ -93,48 +93,20 @@ Build the backend and the examples by executing the script: ```bash ./openvino_build_example.sh ``` -The executable is saved in `/cmake-openvino-out/examples/openvino/` +The executable is saved in `/cmake-openvino-out/backends/openvino/` -Now, run the example using the executable generated in the above step. The executable requires a model file (`.pte` file generated in the aot step), number of inference iterations, and optional input/output paths. - -#### Command Syntax: - -``` -cd ../../cmake-openvino-out/examples/openvino - -./openvino_executor_runner \ - --model_path= \ - --num_iter= \ - [--input_list_path=] \ - [--output_folder_path=] -``` -#### Command-Line Arguments - -- `--model_path`: (Required) Path to the model serialized in `.pte` format. -- `--num_iter`: (Optional) Number of times to run inference (default: 1). -- `--input_list_path`: (Optional) Path to a file containing the list of raw input tensor files. -- `--output_folder_path`: (Optional) Path to a folder where output tensor files will be saved. +Now, run the example using the executable generated in the above step. The executable requires a model file (`.pte` file generated in the aot step), number of inference executions. #### Example Usage -Run inference with a given model for 10 iterations and save outputs: +Run inference with a given model for 10 executions: ``` ./openvino_executor_runner \ --model_path=model.pte \ - --num_iter=10 \ - --output_folder_path=outputs/ + --num_executions=10 ``` -Run inference with an input tensor file: - -``` -./openvino_executor_runner \ - --model_path=model.pte \ - --num_iter=5 \ - --input_list_path=input_list.txt \ - --output_folder_path=outputs/ -``` ## Supported model list From 3d538de2fe933bdad1ea22a31928cc8437ef72a1 Mon Sep 17 00:00:00 2001 From: Mustafa Cavus Date: Thu, 20 Mar 2025 18:03:23 -0700 Subject: [PATCH 174/188] Update build-run-openvino.md --- docs/source/build-run-openvino.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/build-run-openvino.md b/docs/source/build-run-openvino.md index 412e1b79643..be2d97257f7 100644 --- a/docs/source/build-run-openvino.md +++ b/docs/source/build-run-openvino.md @@ -93,7 +93,7 @@ Build the backend and the examples by executing the script: ```bash ./openvino_build_example.sh ``` -The executable is saved in `/cmake-openvino-out/backends/openvino/` +The executable is saved in `/cmake-out/backends/openvino/` Now, run the example using the executable generated in the above step. The executable requires a model file (`.pte` file generated in the aot step), number of inference executions. From c884129ea6abe914c47847b91abd1aa7c7ee51f2 Mon Sep 17 00:00:00 2001 From: Mustafa Cavus Date: Thu, 20 Mar 2025 18:04:52 -0700 Subject: [PATCH 175/188] Update build-run-openvino.md --- docs/source/build-run-openvino.md | 3 --- 1 file changed, 3 deletions(-) diff --git a/docs/source/build-run-openvino.md b/docs/source/build-run-openvino.md index be2d97257f7..18fb87898c9 100644 --- a/docs/source/build-run-openvino.md +++ b/docs/source/build-run-openvino.md @@ -108,9 +108,6 @@ Run inference with a given model for 10 executions: ``` -## Supported model list - -### TODO ## FAQ From e04c874bf376f51d3265a9faa16cf636cad4aca1 Mon Sep 17 00:00:00 2001 From: Mustafa Cavus Date: Thu, 20 Mar 2025 18:13:33 -0700 Subject: [PATCH 176/188] Update README.md --- backends/openvino/README.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/backends/openvino/README.md b/backends/openvino/README.md index 5c69e03f11c..a9ef84ac37e 100644 --- a/backends/openvino/README.md +++ b/backends/openvino/README.md @@ -42,8 +42,6 @@ executorch Before you begin, ensure you have openvino installed and configured on your system: -## TODO: Add instructions for support with OpenVINO release package - ```bash git clone https://github.com/openvinotoolkit/openvino.git cd openvino && git checkout releases/2025/1 @@ -57,6 +55,7 @@ cmake --install build --prefix cd source setupvars.sh ``` +Note: The OpenVINO backend is not yet supported in the current OpenVINO release. Therefore, it is recommended to build from source. ### Setup From f5619e15fba1a437d3c82724deb04d8b2c162000 Mon Sep 17 00:00:00 2001 From: Mustafa Cavus Date: Thu, 20 Mar 2025 18:24:55 -0700 Subject: [PATCH 177/188] Update build-run-openvino.md --- docs/source/build-run-openvino.md | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/docs/source/build-run-openvino.md b/docs/source/build-run-openvino.md index 18fb87898c9..5f098235b28 100644 --- a/docs/source/build-run-openvino.md +++ b/docs/source/build-run-openvino.md @@ -78,22 +78,19 @@ Follow the steps below to setup your build environment: ## Build Instructions for Examples ### AOT step: -Refer to the [README.md](../../examples/openvino/aot/README.md) in the `executorch/examples/openvino/aot` folder for detailed instructions on exporting deep learning models from various model suites (TIMM, Torchvision, Hugging Face) to openvino backend using Executorch. Users can dynamically specify the model, input shape, and target device. +Refer to the [README.md](../../examples/openvino/README.md) in the `executorch/examples/openvino` folder for detailed instructions on exporting deep learning models from various model suites (TIMM, Torchvision, Hugging Face) to openvino backend using Executorch. Users can dynamically specify the model, input shape, and target device. Below is an example to export a ResNet50 model from Torchvision model suite for CPU device with an input shape of `[1, 3, 256, 256]` ```bash -cd executorch/examples/openvino/aot -python aot_openvino_compiler.py --export --suite torchvision --model resnet50 --input_shape "(1, 3, 256, 256)" --device CPU +cd executorch/examples/openvino +python aot_optimize_and_infer.py --export --suite torchvision --model resnet50 --input_shape "(1, 3, 256, 256)" --device CPU ``` The exported model will be saved as 'resnet50.pte' in the current directory. ### Build C++ OpenVINO Examples -Build the backend and the examples by executing the script: -```bash -./openvino_build_example.sh -``` -The executable is saved in `/cmake-out/backends/openvino/` + +After building the OpenVINO backend following the [instructions](#setup) above, the executable will be saved in `/cmake-out/backends/openvino/`. Now, run the example using the executable generated in the above step. The executable requires a model file (`.pte` file generated in the aot step), number of inference executions. From d1be27e89cb8294225fa88caf73abdf5adba4c7a Mon Sep 17 00:00:00 2001 From: Mustafa Cavus Date: Thu, 20 Mar 2025 18:25:49 -0700 Subject: [PATCH 178/188] Update build-run-openvino.md --- docs/source/build-run-openvino.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/build-run-openvino.md b/docs/source/build-run-openvino.md index 5f098235b28..85730c9dd3a 100644 --- a/docs/source/build-run-openvino.md +++ b/docs/source/build-run-openvino.md @@ -92,7 +92,7 @@ The exported model will be saved as 'resnet50.pte' in the current directory. After building the OpenVINO backend following the [instructions](#setup) above, the executable will be saved in `/cmake-out/backends/openvino/`. -Now, run the example using the executable generated in the above step. The executable requires a model file (`.pte` file generated in the aot step), number of inference executions. +The executable requires a model file (`.pte` file generated in the aot step) and the number of inference executions. #### Example Usage From 1f0cefc1243d0e30fd4135b7ae83439cb023cb1d Mon Sep 17 00:00:00 2001 From: Cavus Mustafa Date: Thu, 20 Mar 2025 19:45:14 -0700 Subject: [PATCH 179/188] add openvino pybind option into install_executorch.py --- install_executorch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/install_executorch.py b/install_executorch.py index b35f5668eb2..4797f5b2e2c 100644 --- a/install_executorch.py +++ b/install_executorch.py @@ -39,7 +39,7 @@ def clean(): print("Done cleaning build artifacts.") -VALID_PYBINDS = ["coreml", "mps", "xnnpack", "training"] +VALID_PYBINDS = ["coreml", "mps", "xnnpack", "training", "openvino"] ################################################################################ From 65aa7055f9636a812b7613c7b879a3f5b096fe16 Mon Sep 17 00:00:00 2001 From: Mustafa Cavus Date: Fri, 21 Mar 2025 10:57:31 -0700 Subject: [PATCH 180/188] Update backends/openvino/README.md Co-authored-by: Yamini Nimmagadda --- backends/openvino/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backends/openvino/README.md b/backends/openvino/README.md index a9ef84ac37e..f46f944b48c 100644 --- a/backends/openvino/README.md +++ b/backends/openvino/README.md @@ -55,7 +55,7 @@ cmake --install build --prefix cd source setupvars.sh ``` -Note: The OpenVINO backend is not yet supported in the current OpenVINO release. Therefore, it is recommended to build from source. +Note: The OpenVINO backend is not yet supported with the current OpenVINO release packages. It is recommended to build from source. The instructions for using OpenVINO release packages will be added soon. ### Setup From 6f52a9af90cab87e149160fefaf53560ce4a8a74 Mon Sep 17 00:00:00 2001 From: Mustafa Cavus Date: Fri, 21 Mar 2025 10:57:38 -0700 Subject: [PATCH 181/188] Update docs/source/build-run-openvino.md Co-authored-by: Yamini Nimmagadda --- docs/source/build-run-openvino.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/build-run-openvino.md b/docs/source/build-run-openvino.md index 85730c9dd3a..db3890969e7 100644 --- a/docs/source/build-run-openvino.md +++ b/docs/source/build-run-openvino.md @@ -53,7 +53,7 @@ cmake --install build --prefix cd source setupvars.sh ``` -Note: The OpenVINO backend is not yet supported in the current OpenVINO release. Therefore, it is recommended to build from source. +Note: The OpenVINO backend is not yet supported with the current OpenVINO release packages. It is recommended to build from source. The instructions for using OpenVINO release packages will be added soon. ### Setup From 15178ce9055828145976131d67213daff1b7cadc Mon Sep 17 00:00:00 2001 From: Mustafa Cavus Date: Fri, 21 Mar 2025 11:18:39 -0700 Subject: [PATCH 182/188] Update build-run-openvino.md --- docs/source/build-run-openvino.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/build-run-openvino.md b/docs/source/build-run-openvino.md index db3890969e7..e0d1be9278d 100644 --- a/docs/source/build-run-openvino.md +++ b/docs/source/build-run-openvino.md @@ -106,7 +106,7 @@ Run inference with a given model for 10 executions: -## FAQ +## Support If you encounter any issues while reproducing the tutorial, please file a github issue on ExecuTorch repo and tag use `#openvino` tag From 30c68219a7e5d62872de03e4762bc2ec74a05b4f Mon Sep 17 00:00:00 2001 From: Cavus Mustafa Date: Mon, 24 Mar 2025 15:44:20 -0700 Subject: [PATCH 183/188] Resolved conflicts with main branch --- backends/openvino/CMakeLists.txt | 2 +- backends/openvino/runtime/OpenvinoBackend.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/backends/openvino/CMakeLists.txt b/backends/openvino/CMakeLists.txt index 7348ac94a6e..8d07cd9a366 100644 --- a/backends/openvino/CMakeLists.txt +++ b/backends/openvino/CMakeLists.txt @@ -26,7 +26,7 @@ endif() set(COMMON_INCLUDE_DIRS ${EXECUTORCH_ROOT}/..) # Include utility CMake scripts from ExecuteTorch -include(${EXECUTORCH_ROOT}/build/Utils.cmake) +include(${EXECUTORCH_ROOT}/tools/cmake/Utils.cmake) # Find OpenVINO libraries find_package(OpenVINO REQUIRED) diff --git a/backends/openvino/runtime/OpenvinoBackend.cpp b/backends/openvino/runtime/OpenvinoBackend.cpp index 431ffcff67c..09fb53d829c 100644 --- a/backends/openvino/runtime/OpenvinoBackend.cpp +++ b/backends/openvino/runtime/OpenvinoBackend.cpp @@ -83,7 +83,7 @@ exr::Result OpenvinoBackend::init( // Allocate execution handle exr::MemoryAllocator* allocator = context.get_runtime_allocator(); ExecutionHandle* handle = - ET_ALLOCATE_INSTANCE_OR_RETURN_ERROR(allocator, ExecutionHandle); + allocator->allocateInstance(); new (handle) ExecutionHandle; handle->compiled_model = std::make_shared(compiled_model); handle->infer_request = infer_request; From 682ae80ac7e71f838a95f42b11fc0cd8ea1d9a40 Mon Sep 17 00:00:00 2001 From: Cavus Mustafa Date: Mon, 24 Mar 2025 15:54:43 -0700 Subject: [PATCH 184/188] code formatting --- backends/openvino/runtime/OpenvinoBackend.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/backends/openvino/runtime/OpenvinoBackend.cpp b/backends/openvino/runtime/OpenvinoBackend.cpp index 09fb53d829c..a3134f72b4b 100644 --- a/backends/openvino/runtime/OpenvinoBackend.cpp +++ b/backends/openvino/runtime/OpenvinoBackend.cpp @@ -82,8 +82,7 @@ exr::Result OpenvinoBackend::init( // Allocate execution handle exr::MemoryAllocator* allocator = context.get_runtime_allocator(); - ExecutionHandle* handle = - allocator->allocateInstance(); + ExecutionHandle* handle = allocator->allocateInstance(); new (handle) ExecutionHandle; handle->compiled_model = std::make_shared(compiled_model); handle->infer_request = infer_request; From b4964254896a5a5f8b571097908daa562b518768 Mon Sep 17 00:00:00 2001 From: suryasidd Date: Tue, 25 Mar 2025 12:20:34 -0700 Subject: [PATCH 185/188] Updated build dependencies in docs --- backends/openvino/README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/backends/openvino/README.md b/backends/openvino/README.md index f46f944b48c..95a5f4c364e 100644 --- a/backends/openvino/README.md +++ b/backends/openvino/README.md @@ -46,6 +46,7 @@ Before you begin, ensure you have openvino installed and configured on your syst git clone https://github.com/openvinotoolkit/openvino.git cd openvino && git checkout releases/2025/1 git submodule update --init --recursive +sudo ./install_build_dependencies.sh mkdir build && cd build cmake .. -DCMAKE_BUILD_TYPE=Release -DENABLE_PYTHON=ON make -j$(nproc) @@ -56,6 +57,7 @@ cd source setupvars.sh ``` Note: The OpenVINO backend is not yet supported with the current OpenVINO release packages. It is recommended to build from source. The instructions for using OpenVINO release packages will be added soon. +For more information about OpenVINO build, refer to the [OpenVINO Build Instructions](https://github.com/openvinotoolkit/openvino/blob/master/docs/dev/build_linux.md). ### Setup From de4cef24f9f612dd1be9b9f5810b10ec9ac958d5 Mon Sep 17 00:00:00 2001 From: suryasidd Date: Tue, 25 Mar 2025 12:54:49 -0700 Subject: [PATCH 186/188] Updated docs --- docs/source/build-run-openvino.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/docs/source/build-run-openvino.md b/docs/source/build-run-openvino.md index e0d1be9278d..f9ea5df0862 100644 --- a/docs/source/build-run-openvino.md +++ b/docs/source/build-run-openvino.md @@ -20,7 +20,7 @@ In this tutorial we will walk you through the process of setting up the prerequi [OpenVINO](https://www.intel.com/content/www/us/en/developer/tools/openvino-toolkit/overview.html) is an open-source toolkit designed to enhance AI inference on Intel hardware by reducing latency and increasing throughput while preserving accuracy. It optimizes hardware utilization and simplifies AI development and deep learning integration across domains such as computer vision, large language models (LLMs), and generative AI. -OpenVINO is integrated as an Executorch delegate to accelerate AI applications deployed with Executorch APIs. +OpenVINO is integrated as an Executorch delegate to accelerate AI applications deployed with Executorch APIs. ## Supported Hardware @@ -44,6 +44,7 @@ Before you begin, ensure you have openvino installed and configured on your syst git clone https://github.com/openvinotoolkit/openvino.git cd openvino && git checkout releases/2025/1 git submodule update --init --recursive +sudo ./install_build_dependencies.sh mkdir build && cd build cmake .. -DCMAKE_BUILD_TYPE=Release -DENABLE_PYTHON=ON make -j @@ -54,6 +55,7 @@ cd source setupvars.sh ``` Note: The OpenVINO backend is not yet supported with the current OpenVINO release packages. It is recommended to build from source. The instructions for using OpenVINO release packages will be added soon. +For more information about OpenVINO build, refer to the [OpenVINO Build Instructions](https://github.com/openvinotoolkit/openvino/blob/master/docs/dev/build_linux.md). ### Setup @@ -78,7 +80,7 @@ Follow the steps below to setup your build environment: ## Build Instructions for Examples ### AOT step: -Refer to the [README.md](../../examples/openvino/README.md) in the `executorch/examples/openvino` folder for detailed instructions on exporting deep learning models from various model suites (TIMM, Torchvision, Hugging Face) to openvino backend using Executorch. Users can dynamically specify the model, input shape, and target device. +Refer to the [README.md](../../examples/openvino/README.md) in the `executorch/examples/openvino` folder for detailed instructions on exporting deep learning models from various model suites (TIMM, Torchvision, Hugging Face) to openvino backend using Executorch. Users can dynamically specify the model, input shape, and target device. Below is an example to export a ResNet50 model from Torchvision model suite for CPU device with an input shape of `[1, 3, 256, 256]` From 3a3e9122c0deb7ce446c756f3872fa505bce1e96 Mon Sep 17 00:00:00 2001 From: Cavus Mustafa Date: Wed, 26 Mar 2025 12:52:17 -0700 Subject: [PATCH 187/188] pybind update to use new api --- .../tests/ops/base_openvino_op_test.py | 10 +++++----- examples/openvino/aot_optimize_and_infer.py | 18 ++++++++++-------- 2 files changed, 15 insertions(+), 13 deletions(-) diff --git a/backends/openvino/tests/ops/base_openvino_op_test.py b/backends/openvino/tests/ops/base_openvino_op_test.py index 3b2d622d517..b46682de443 100644 --- a/backends/openvino/tests/ops/base_openvino_op_test.py +++ b/backends/openvino/tests/ops/base_openvino_op_test.py @@ -6,10 +6,8 @@ from executorch.backends.openvino.preprocess import OpenvinoBackend from executorch.exir import EdgeProgramManager, to_edge_transform_and_lower from executorch.exir.backend.backend_details import CompileSpec +from executorch.runtime import Runtime -from executorch.extension.pybindings.portable_lib import ( # @manual - _load_for_executorch_from_buffer, -) from torch.export import export, ExportedProgram @@ -67,8 +65,10 @@ def execute_layer_test( ] # Load model from buffer and execute - executorch_module = _load_for_executorch_from_buffer(exec_prog.buffer) - outputs = executorch_module.run_method("forward", sample_inputs) + runtime = Runtime.get() + program = runtime.load_program(exec_prog.buffer) + method = program.load_method("forward") + outputs = method.execute(sample_inputs) # Compare the outputs with the reference outputs self.assertTrue(len(ref_output) == len(outputs)) diff --git a/examples/openvino/aot_optimize_and_infer.py b/examples/openvino/aot_optimize_and_infer.py index 8ae1763ea5b..ea4fc6399ec 100644 --- a/examples/openvino/aot_optimize_and_infer.py +++ b/examples/openvino/aot_optimize_and_infer.py @@ -17,9 +17,7 @@ from executorch.backends.openvino.quantizer import quantize_model from executorch.exir import EdgeProgramManager, to_edge_transform_and_lower from executorch.exir.backend.backend_details import CompileSpec -from executorch.extension.pybindings.portable_lib import ( # @manual - _load_for_executorch_from_buffer, -) +from executorch.runtime import Runtime from sklearn.metrics import accuracy_score from timm.data import resolve_data_config from timm.data.transforms_factory import create_transform @@ -121,17 +119,19 @@ def infer_model( :return: The average inference timing. """ # Load model from buffer - executorch_module = _load_for_executorch_from_buffer(exec_prog.buffer) + runtime = Runtime.get() + program = runtime.load_program(exec_prog.buffer) + method = program.load_method("forward") # Execute warmup for _i in range(warmup_iter): - out = executorch_module.run_method("forward", inputs) + out = method.execute(inputs) # Execute inference and measure timing time_total = 0.0 for _i in range(num_iter): time_start = time.time() - out = executorch_module.run_method("forward", inputs) + out = method.execute(inputs) time_end = time.time() time_total += time_end - time_start @@ -154,7 +154,9 @@ def validate_model( :return: The accuracy score of the model. """ # Load model from buffer - executorch_module = _load_for_executorch_from_buffer(exec_prog.buffer) + runtime = Runtime.get() + program = runtime.load_program(exec_prog.buffer) + method = program.load_method("forward") # Iterate over the dataset and run the executor predictions = [] @@ -162,7 +164,7 @@ def validate_model( for _idx, data in enumerate(calibration_dataset): feature, target = data targets.extend(target) - out = executorch_module.run_method("forward", (feature,)) + out = method.execute((feature,)) predictions.extend(torch.stack(out).reshape(-1, 1000).argmax(-1)) # Check accuracy From 6258b42a62b3090caa37f8f4168ffac7a012e28f Mon Sep 17 00:00:00 2001 From: Cavus Mustafa Date: Wed, 26 Mar 2025 16:47:02 -0700 Subject: [PATCH 188/188] pybind openvino merge update --- setup.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/setup.py b/setup.py index 871fdf329c2..76fbbbd9025 100644 --- a/setup.py +++ b/setup.py @@ -121,6 +121,7 @@ def pybindings(cls) -> bool: [ cls.coreml(), cls.mps(), + cls.openvino(), cls.xnnpack(), cls.training(), ] @@ -135,6 +136,10 @@ def coreml(cls) -> bool: def mps(cls) -> bool: return cls._is_cmake_arg_enabled("EXECUTORCH_BUILD_MPS", default=False) + @classmethod + def openvino(cls) -> bool: + return cls._is_cmake_arg_enabled("EXECUTORCH_BUILD_OPENVINO", default=False) + @classmethod def xnnpack(cls) -> bool: return cls._is_cmake_arg_enabled("EXECUTORCH_BUILD_XNNPACK", default=False)