From 64b0045ac32e4fcaee32d671444289f2d5b8700f Mon Sep 17 00:00:00 2001 From: Lucy Qiu Date: Tue, 7 Oct 2025 11:06:27 -0700 Subject: [PATCH] Introduce public MergedDataMap (#14861) Summary: Pull Request resolved: https://github.com/pytorch/executorch/pull/14861 Add public merged data map. Module can use this to resolve multiple named data maps. Creating as a sep dependency rather than inside module/ so it can be used independently of module. (think there may be some other internal usages soon) Add support for BUCK and CMake. On CMake side, have to add the dep for everyone who uses module .. Reviewed By: swolchok Differential Revision: D83527299 --- .ci/scripts/build-qnn-sdk.sh | 1 + .ci/scripts/test_llama_torchao_lowbit.sh | 1 + .../test_torchao_huggingface_checkpoints.sh | 1 + .ci/scripts/test_yolo12.sh | 4 + .github/workflows/trunk.yml | 1 + CMakeLists.txt | 5 + backends/mediatek/scripts/mtk_build.sh | 1 + backends/qualcomm/scripts/build.sh | 2 + backends/samsung/build.sh | 2 + backends/vulkan/test/scripts/test_model.sh | 1 + backends/vulkan/test/scripts/test_op.sh | 1 + extension/named_data_map/CMakeLists.txt | 46 +++++ extension/named_data_map/TARGETS | 8 + extension/named_data_map/merged_data_map.cpp | 117 ++++++++++++ extension/named_data_map/merged_data_map.h | 106 +++++++++++ extension/named_data_map/targets.bzl | 21 +++ extension/named_data_map/test/CMakeLists.txt | 60 ++++++ extension/named_data_map/test/TARGETS | 8 + .../test/merged_data_map_test.cpp | 174 ++++++++++++++++++ extension/named_data_map/test/targets.bzl | 26 +++ scripts/build_wasm_tests.sh | 1 + .../executorch/build/build_variables.bzl | 4 + test/run_oss_cpp_tests.sh | 1 + tools/cmake/Codegen.cmake | 2 + tools/cmake/preset/android.cmake | 1 + tools/cmake/preset/apple_common.cmake | 1 + tools/cmake/preset/default.cmake | 10 + tools/cmake/preset/llm.cmake | 1 + tools/cmake/preset/profiling.cmake | 1 + tools/cmake/preset/pybind.cmake | 3 +- tools/cmake/preset/windows.cmake | 1 + 31 files changed, 611 insertions(+), 1 deletion(-) create mode 100644 extension/named_data_map/CMakeLists.txt create mode 100644 extension/named_data_map/TARGETS create mode 100644 extension/named_data_map/merged_data_map.cpp create mode 100644 extension/named_data_map/merged_data_map.h create mode 100644 extension/named_data_map/targets.bzl create mode 100644 extension/named_data_map/test/CMakeLists.txt create mode 100644 extension/named_data_map/test/TARGETS create mode 100644 extension/named_data_map/test/merged_data_map_test.cpp create mode 100644 extension/named_data_map/test/targets.bzl diff --git a/.ci/scripts/build-qnn-sdk.sh b/.ci/scripts/build-qnn-sdk.sh index 7f34e8afb63..30835cf5085 100755 --- a/.ci/scripts/build-qnn-sdk.sh +++ b/.ci/scripts/build-qnn-sdk.sh @@ -38,6 +38,7 @@ set_up_aot() { -DEXECUTORCH_BUILD_EXTENSION_EXTENSION_LLM=ON \ -DEXECUTORCH_BUILD_EXTENSION_EXTENSION_LLM_RUNNER=ON \ -DEXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR=ON \ + -DEXECUTORCH_BUILD_EXTENSION_NAMED_DATA_MAP=ON \ -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \ -DEXECUTORCH_ENABLE_EVENT_TRACER=ON \ -DPYTHON_EXECUTABLE=python3 diff --git a/.ci/scripts/test_llama_torchao_lowbit.sh b/.ci/scripts/test_llama_torchao_lowbit.sh index 5f472fad63b..a7ded52ccc6 100644 --- a/.ci/scripts/test_llama_torchao_lowbit.sh +++ b/.ci/scripts/test_llama_torchao_lowbit.sh @@ -31,6 +31,7 @@ cmake -DPYTHON_EXECUTABLE=python \ -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \ -DEXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR=ON \ -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \ + -DEXECUTORCH_BUILD_EXTENSION_NAMED_DATA_MAP=ON \ -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \ -DEXECUTORCH_BUILD_XNNPACK=OFF \ -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \ diff --git a/.ci/scripts/test_torchao_huggingface_checkpoints.sh b/.ci/scripts/test_torchao_huggingface_checkpoints.sh index f06c794f88d..da50d28800a 100644 --- a/.ci/scripts/test_torchao_huggingface_checkpoints.sh +++ b/.ci/scripts/test_torchao_huggingface_checkpoints.sh @@ -129,6 +129,7 @@ if [[ "$TEST_WITH_RUNNER" -eq 1 ]]; then -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \ -DEXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR=ON \ -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \ + -DEXECUTORCH_BUILD_EXTENSION_NAMED_DATA_MAP=ON \ -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \ -DEXECUTORCH_BUILD_XNNPACK=ON \ -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \ diff --git a/.ci/scripts/test_yolo12.sh b/.ci/scripts/test_yolo12.sh index e3f20d5f970..594ddbf86ed 100755 --- a/.ci/scripts/test_yolo12.sh +++ b/.ci/scripts/test_yolo12.sh @@ -119,6 +119,8 @@ cmake_install_executorch_libraries() { -DEXECUTORCH_BUILD_XNNPACK="$XNNPACK" \ -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \ -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \ + -DEXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR=ON \ + -DEXECUTORCH_BUILD_EXTENSION_NAMED_DATA_MAP=ON \ -DEXECUTORCH_BUILD_EXTENSION_RUNNER_UTIL=ON \ -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \ -B"${build_dir}" @@ -131,6 +133,8 @@ cmake_install_executorch_libraries() { -DEXECUTORCH_BUILD_XNNPACK="$XNNPACK" \ -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \ -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \ + -DEXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR=ON \ + -DEXECUTORCH_BUILD_EXTENSION_NAMED_DATA_MAP=ON \ -DEXECUTORCH_BUILD_EXTENSION_RUNNER_UTIL=ON \ -DEXECUTORCH_ENABLE_LOGGING=ON \ -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \ diff --git a/.github/workflows/trunk.yml b/.github/workflows/trunk.yml index aabea88f517..2d25f469ae7 100644 --- a/.github/workflows/trunk.yml +++ b/.github/workflows/trunk.yml @@ -852,6 +852,7 @@ jobs: -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \ -DEXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR=ON \ -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \ + -DEXECUTORCH_BUILD_EXTENSION_NAMED_DATA_MAP=ON \ -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \ -DEXECUTORCH_BUILD_XNNPACK=ON \ -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \ diff --git a/CMakeLists.txt b/CMakeLists.txt index 6a36d7e563a..a9554bcf2ff 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -630,6 +630,11 @@ if(EXECUTORCH_BUILD_EXTENSION_MODULE) list(APPEND _executorch_extensions extension_module_static) endif() +if(EXECUTORCH_BUILD_EXTENSION_NAMED_DATA_MAP) + add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/named_data_map) + list(APPEND _executorch_extensions extension_named_data_map) +endif() + if(EXECUTORCH_BUILD_EXTENSION_LLM) if(EXECUTORCH_BUILD_EXTENSION_LLM_RUNNER) set(SUPPORT_REGEX_LOOKAHEAD ON) diff --git a/backends/mediatek/scripts/mtk_build.sh b/backends/mediatek/scripts/mtk_build.sh index 599f754d7bc..d42e5f7e10a 100755 --- a/backends/mediatek/scripts/mtk_build.sh +++ b/backends/mediatek/scripts/mtk_build.sh @@ -30,6 +30,7 @@ cmake -DCMAKE_INSTALL_PREFIX="${build_dir}" \ -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \ -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \ -DEXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR=ON \ + -DEXECUTORCH_BUILD_EXTENSION_NAMED_DATA_MAP=ON \ -DEXECUTORCH_BUILD_NEURON=ON \ -B"${build_dir}" diff --git a/backends/qualcomm/scripts/build.sh b/backends/qualcomm/scripts/build.sh index c84911cf851..4cdd1efe6f4 100755 --- a/backends/qualcomm/scripts/build.sh +++ b/backends/qualcomm/scripts/build.sh @@ -86,6 +86,7 @@ if [ "$BUILD_AARCH64" = true ]; then -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \ -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \ -DEXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR=ON \ + -DEXECUTORCH_BUILD_EXTENSION_NAMED_DATA_MAP=ON \ -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \ -DEXECUTORCH_ENABLE_EVENT_TRACER=ON \ -DEXECUTORCH_ENABLE_LOGGING=ON \ @@ -155,6 +156,7 @@ if [ "$BUILD_X86_64" = true ]; then -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \ -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \ -DEXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR=ON \ + -DEXECUTORCH_BUILD_EXTENSION_NAMED_DATA_MAP=ON \ -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \ -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \ -DEXECUTORCH_ENABLE_EVENT_TRACER=ON \ diff --git a/backends/samsung/build.sh b/backends/samsung/build.sh index dfa6407ff50..4845c760f0c 100755 --- a/backends/samsung/build.sh +++ b/backends/samsung/build.sh @@ -45,6 +45,7 @@ function build_x86_64() { -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \ -DEXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR=ON \ -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \ + -DEXECUTORCH_BUILD_EXTENSION_NAMED_DATA_MAP=ON \ -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \ -S ${PROJECT_DIR} \ -B ${X86_64_BUILD_DIR} @@ -77,6 +78,7 @@ function build_android() { -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \ -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \ -DEXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR=ON \ + -DEXECUTORCH_BUILD_EXTENSION_NAMED_DATA_MAP=ON \ -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \ -DEXECUTORCH_ENABLE_LOGGING=1 \ -DEXECUTORCH_BUILD_DEVTOOLS=ON \ diff --git a/backends/vulkan/test/scripts/test_model.sh b/backends/vulkan/test/scripts/test_model.sh index 5f06d2c039b..40ec88bae70 100755 --- a/backends/vulkan/test/scripts/test_model.sh +++ b/backends/vulkan/test/scripts/test_model.sh @@ -111,6 +111,7 @@ build_core_libraries_and_devtools() { -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \ -DEXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR=ON \ -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \ + -DEXECUTORCH_BUILD_EXTENSION_NAMED_DATA_MAP=ON \ -DEXECUTORCH_BUILD_EXTENSION_RUNNER_UTIL=ON \ -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \ -DEXECUTORCH_BUILD_EXECUTOR_RUNNER=ON \ diff --git a/backends/vulkan/test/scripts/test_op.sh b/backends/vulkan/test/scripts/test_op.sh index 1ec07b7f75f..797089e54dc 100755 --- a/backends/vulkan/test/scripts/test_op.sh +++ b/backends/vulkan/test/scripts/test_op.sh @@ -138,6 +138,7 @@ build_core_libraries() { -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \ -DEXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR=ON \ -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \ + -DEXECUTORCH_BUILD_EXTENSION_NAMED_DATA_MAP=ON \ -DEXECUTORCH_BUILD_EXTENSION_RUNNER_UTIL=ON \ -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \ -DEXECUTORCH_BUILD_EXECUTOR_RUNNER=ON \ diff --git a/extension/named_data_map/CMakeLists.txt b/extension/named_data_map/CMakeLists.txt new file mode 100644 index 00000000000..a4ad208c7e2 --- /dev/null +++ b/extension/named_data_map/CMakeLists.txt @@ -0,0 +1,46 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +# Please format this file by running: +# ~~~ +# cmake-format -i CMakeLists.txt +# ~~~ + +cmake_minimum_required(VERSION 3.19) + +# Source root directory for executorch. +if(NOT EXECUTORCH_ROOT) + set(EXECUTORCH_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../..) +endif() + +list(TRANSFORM _extension_named_data_map__srcs PREPEND "${EXECUTORCH_ROOT}/") +# Create the library +add_library(extension_named_data_map ${_extension_named_data_map__srcs}) + +# Link dependencies +target_link_libraries(extension_named_data_map PUBLIC executorch_core) + +target_include_directories( + extension_named_data_map PUBLIC ${_common_include_directories} +) + +target_compile_options( + extension_named_data_map PUBLIC ${_common_compile_options} +) + +# Install libraries +install( + TARGETS extension_named_data_map + EXPORT ExecuTorchTargets + DESTINATION ${CMAKE_INSTALL_LIBDIR} + INCLUDES + DESTINATION ${_common_include_directories} +) + +# Add tests if testing is enabled +if(BUILD_TESTING) + add_subdirectory(test) +endif() diff --git a/extension/named_data_map/TARGETS b/extension/named_data_map/TARGETS new file mode 100644 index 00000000000..2341af9282f --- /dev/null +++ b/extension/named_data_map/TARGETS @@ -0,0 +1,8 @@ +# Any targets that should be shared between fbcode and xplat must be defined in +# targets.bzl. This file can contain fbcode-only targets. + +load(":targets.bzl", "define_common_targets") + +oncall("executorch") + +define_common_targets() diff --git a/extension/named_data_map/merged_data_map.cpp b/extension/named_data_map/merged_data_map.cpp new file mode 100644 index 00000000000..b42701c7587 --- /dev/null +++ b/extension/named_data_map/merged_data_map.cpp @@ -0,0 +1,117 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include +#include + +#include +#include + +using executorch::aten::string_view; +using executorch::ET_RUNTIME_NAMESPACE::NamedDataMap; +using executorch::ET_RUNTIME_NAMESPACE::TensorLayout; +using executorch::runtime::Error; +using executorch::runtime::FreeableBuffer; +using executorch::runtime::Result; +using executorch::runtime::Span; + +namespace executorch::extension { + +/*static*/ Result MergedDataMap::load( + Span named_data_maps) { + std::vector valid_data_maps; + for (auto i : c10::irange(named_data_maps.size())) { + if (named_data_maps[i] != nullptr && + named_data_maps[i]->get_num_keys().get() > 0) { + valid_data_maps.push_back(named_data_maps[i]); + } + } + ET_CHECK_OR_RETURN_ERROR( + !valid_data_maps.empty(), + InvalidArgument, + "No non-empty named data maps provided to merge"); + + // Check for duplicate keys. + std::unordered_map key_to_map_index; + for (auto i : c10::irange(valid_data_maps.size())) { + const auto cur_map = valid_data_maps[i]; + uint32_t num_keys = cur_map->get_num_keys().get(); + for (auto j : c10::irange(num_keys)) { + const auto cur_key = cur_map->get_key(j).get(); + const auto [it, inserted] = key_to_map_index.emplace(cur_key, i); + ET_CHECK_OR_RETURN_ERROR( + inserted, + InvalidArgument, + "Duplicate key %s in named data maps at index %u and %lu", + cur_key, + it->second, + i); + } + } + return MergedDataMap(std::move(valid_data_maps), std::move(key_to_map_index)); +} + +ET_NODISCARD Result MergedDataMap::get_tensor_layout( + string_view key) const { + const auto it = key_to_map_index_.find(key.data()); + ET_CHECK_OR_RETURN_ERROR( + it != key_to_map_index_.end(), + NotFound, + "Key %s not found in named data maps", + key.data()); + + return named_data_maps_.at(it->second)->get_tensor_layout(key); +} + +ET_NODISCARD +Result MergedDataMap::get_data(string_view key) const { + const auto it = key_to_map_index_.find(key.data()); + ET_CHECK_OR_RETURN_ERROR( + it != key_to_map_index_.end(), + NotFound, + "Key %s not found in named data maps", + key.data()); + return named_data_maps_.at(it->second)->get_data(key); +} + +ET_NODISCARD Error MergedDataMap::load_data_into( + string_view key, + void* buffer, + size_t size) const { + const auto it = key_to_map_index_.find(key.data()); + ET_CHECK_OR_RETURN_ERROR( + it != key_to_map_index_.end(), + NotFound, + "Key %s not found in named data maps", + key.data()); + return named_data_maps_.at(it->second)->load_data_into(key, buffer, size); +} + +ET_NODISCARD Result MergedDataMap::get_num_keys() const { + return key_to_map_index_.size(); +} + +ET_NODISCARD Result MergedDataMap::get_key(uint32_t index) const { + uint32_t total_num_keys = get_num_keys().get(); + ET_CHECK_OR_RETURN_ERROR( + index < total_num_keys, + InvalidArgument, + "Index %u out of range of size %u", + index, + total_num_keys); + for (auto i : c10::irange(named_data_maps_.size())) { + auto num_keys = named_data_maps_[i]->get_num_keys().get(); + if (index < num_keys) { + return named_data_maps_[i]->get_key(index); + } + index -= num_keys; + } + // Shouldn't reach here. + return Error::Internal; +} +} // namespace executorch::extension diff --git a/extension/named_data_map/merged_data_map.h b/extension/named_data_map/merged_data_map.h new file mode 100644 index 00000000000..13415c0b59e --- /dev/null +++ b/extension/named_data_map/merged_data_map.h @@ -0,0 +1,106 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include + +#include +#include + +namespace executorch::extension { +/** + * A NamedDataMap implementation that wraps other NamedDataMaps. + */ +class MergedDataMap final + : public executorch::ET_RUNTIME_NAMESPACE::NamedDataMap { + public: + /** + * Creates a new NamedDataMap that takes in other data maps. + * + * @param[in] data_maps vector of NamedDataMap pointers to merge. + * Note: the data maps must outlive the MergedDataMap instance. + */ + static executorch::runtime::Result + load(executorch::runtime::Span< + const executorch::ET_RUNTIME_NAMESPACE::NamedDataMap*> named_data_maps); + + /** + * Retrieve the tensor_layout for the specified key. + * + * @param[in] key The name of the tensor to get metadata on. + * + * @return Error::NotFound if the key is not present. + */ + ET_NODISCARD + executorch::runtime::Result< + const executorch::ET_RUNTIME_NAMESPACE::TensorLayout> + get_tensor_layout(executorch::aten::string_view key) const override; + + /** + * Retrieve read-only data for the specified key. + * + * @param[in] key The name of the tensor to get data on. + * + * @return error if the key is not present or data cannot be loaded. + */ + ET_NODISCARD + executorch::runtime::Result get_data( + executorch::aten::string_view key) const override; + + /** + * Loads the data of the specified tensor into the provided buffer. + * + * @param[in] key The name of the tensor to get the data of. + * @param[in] buffer The buffer to load data into. Must point to at least + * `size` bytes of memory. + * @param[in] size The number of bytes to load. + * + * @returns an Error indicating if the load was successful. + */ + ET_NODISCARD executorch::runtime::Error load_data_into( + executorch::aten::string_view key, + void* buffer, + size_t size) const override; + + /** + * @returns The number of keys in the map. + */ + ET_NODISCARD executorch::runtime::Result get_num_keys() + const override; + /** + * @returns The key at the specified index, error if index out of bounds. + */ + ET_NODISCARD executorch::runtime::Result get_key( + uint32_t index) const override; + + MergedDataMap(MergedDataMap&&) noexcept = default; + + ~MergedDataMap() override = default; + + private: + MergedDataMap( + std::vector + named_data_maps, + std::unordered_map key_to_map_index) + : named_data_maps_(std::move(named_data_maps)), + key_to_map_index_(std::move(key_to_map_index)) {} + + // Not copyable or assignable. + MergedDataMap(const MergedDataMap& rhs) = delete; + MergedDataMap& operator=(MergedDataMap&& rhs) noexcept = delete; + MergedDataMap& operator=(const MergedDataMap& rhs) = delete; + + std::vector + named_data_maps_; + + // Map from key to index in the named_data_maps_ vector. + std::unordered_map key_to_map_index_; +}; + +} // namespace executorch::extension diff --git a/extension/named_data_map/targets.bzl b/extension/named_data_map/targets.bzl new file mode 100644 index 00000000000..0c2b2fa6d5c --- /dev/null +++ b/extension/named_data_map/targets.bzl @@ -0,0 +1,21 @@ +load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "get_aten_mode_options", "runtime") + +def define_common_targets(): + for aten_mode in get_aten_mode_options(): + aten_suffix = "_aten" if aten_mode else "" + runtime.cxx_library( + name = "merged_data_map" + aten_suffix, + srcs = [ + "merged_data_map.cpp", + ], + exported_headers = [ + "merged_data_map.h", + ], + visibility = [ + "@EXECUTORCH_CLIENTS", + ], + deps = [ + "//executorch/runtime/core:named_data_map" + aten_suffix, + "//executorch/runtime/core:core", + ], + ) diff --git a/extension/named_data_map/test/CMakeLists.txt b/extension/named_data_map/test/CMakeLists.txt new file mode 100644 index 00000000000..7fbcb7e5989 --- /dev/null +++ b/extension/named_data_map/test/CMakeLists.txt @@ -0,0 +1,60 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +# Please this file formatted by running: +# ~~~ +# cmake-format -i CMakeLists.txt +# ~~~ + +cmake_minimum_required(VERSION 3.19) + +# Source root directory for executorch. +if(NOT EXECUTORCH_ROOT) + set(EXECUTORCH_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../../..) +endif() + +include(${EXECUTORCH_ROOT}/tools/cmake/Test.cmake) + +add_custom_command( + OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/ModuleAddMulProgram.ptd" + "${CMAKE_CURRENT_BINARY_DIR}/ModuleLinearProgram.ptd" + COMMAND + ${PYTHON_EXECUTABLE} -m test.models.export_program --modules + "ModuleAddMul,ModuleLinear" --external-constants --outdir + "${CMAKE_CURRENT_BINARY_DIR}" + WORKING_DIRECTORY ${EXECUTORCH_ROOT} +) + +add_custom_target( + extension_named_data_map_test_resources + DEPENDS "${CMAKE_CURRENT_BINARY_DIR}/ModuleAddMulProgram.ptd" + "${CMAKE_CURRENT_BINARY_DIR}/ModuleLinearProgram.ptd" +) + +set(test_env + "ET_MODULE_ADD_MUL_DATA_PATH=${CMAKE_CURRENT_BINARY_DIR}/ModuleAddMulProgram.ptd" + "ET_MODULE_LINEAR_DATA_PATH=${CMAKE_CURRENT_BINARY_DIR}/ModuleLinearProgram.ptd" +) + +set(_test_srcs merged_data_map_test.cpp) + +et_cxx_test( + extension_named_data_map_test + SOURCES + ${_test_srcs} + EXTRA_LIBS + extension_named_data_map + extension_flat_tensor + extension_data_loader +) + +add_dependencies( + extension_named_data_map_test extension_named_data_map + extension_named_data_map_test_resources +) +set_property( + TEST extension_named_data_map_test PROPERTY ENVIRONMENT ${test_env} +) diff --git a/extension/named_data_map/test/TARGETS b/extension/named_data_map/test/TARGETS new file mode 100644 index 00000000000..883ab644309 --- /dev/null +++ b/extension/named_data_map/test/TARGETS @@ -0,0 +1,8 @@ +# Any targets that should be shared between fbcode and xplat must be defined in +# targets.bzl. This file can contain fbcode-only targets. + +load(":targets.bzl", "define_common_targets") + +oncall("executorch") + +define_common_targets(is_fbcode=True) diff --git a/extension/named_data_map/test/merged_data_map_test.cpp b/extension/named_data_map/test/merged_data_map_test.cpp new file mode 100644 index 00000000000..4086855f439 --- /dev/null +++ b/extension/named_data_map/test/merged_data_map_test.cpp @@ -0,0 +1,174 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include + +using namespace ::testing; +using executorch::extension::FileDataLoader; +using executorch::extension::FlatTensorDataMap; +using executorch::extension::MergedDataMap; +using executorch::runtime::DataLoader; +using executorch::runtime::Error; +using executorch::runtime::NamedDataMap; +using executorch::runtime::Result; +using executorch::runtime::Span; +using executorch::runtime::TensorLayout; + +class MergedDataMapTest : public ::testing::Test { + protected: + void load_flat_tensor_data_map(const char* path, const char* module_name) { + Result loader = FileDataLoader::from(path); + ASSERT_EQ(loader.error(), Error::Ok); + loaders_.emplace( + module_name, std::make_unique(std::move(loader.get()))); + + Result data_map = + FlatTensorDataMap::load(loaders_[module_name].get()); + EXPECT_EQ(data_map.error(), Error::Ok); + + data_maps_.emplace( + module_name, + std::make_unique(std::move(data_map.get()))); + } + + void SetUp() override { + // Since these tests cause ET_LOG to be called, the PAL must be initialized + // first. + executorch::runtime::runtime_init(); + + // Load FlatTensor data maps. + // The eager addmul and linear models are defined at: + // //executorch/test/models/export_program.py + load_flat_tensor_data_map( + std::getenv("ET_MODULE_ADD_MUL_DATA_PATH"), "addmul"); + load_flat_tensor_data_map( + std::getenv("ET_MODULE_LINEAR_DATA_PATH"), "linear"); + } + + private: + // Must outlive data_maps_, but tests shouldn't need to touch it. + std::unordered_map> loaders_; + + protected: + std::unordered_map> data_maps_; +}; + +// Check that two tensor layouts are equivalent. +void check_tensor_layout(TensorLayout& layout1, TensorLayout& layout2) { + EXPECT_EQ(layout1.scalar_type(), layout2.scalar_type()); + EXPECT_EQ(layout1.nbytes(), layout2.nbytes()); + EXPECT_EQ(layout1.sizes().size(), layout2.sizes().size()); + for (auto i : c10::irange(layout1.sizes().size())) { + EXPECT_EQ(layout1.sizes()[i], layout2.sizes()[i]); + } + EXPECT_EQ(layout1.dim_order().size(), layout2.dim_order().size()); + for (auto i : c10::irange(layout1.dim_order().size())) { + EXPECT_EQ(layout1.dim_order()[i], layout2.dim_order()[i]); + } +} + +// Given that ndm is part of merged, check that all the API calls on ndm produce +// the same results as merged. +void compare_ndm_api_calls( + const NamedDataMap* ndm, + const NamedDataMap* merged) { + uint32_t num_keys = ndm->get_num_keys().get(); + for (auto i : c10::irange(num_keys)) { + auto key = ndm->get_key(i).get(); + + // Compare get_tensor_layout. + auto ndm_meta = ndm->get_tensor_layout(key).get(); + auto merged_meta = merged->get_tensor_layout(key).get(); + check_tensor_layout(ndm_meta, merged_meta); + + // Compare get_data. + auto ndm_data = ndm->get_data(key); + auto merged_data = merged->get_data(key); + EXPECT_EQ(ndm_data.get().size(), merged_data.get().size()); + for (auto j : c10::irange(ndm_meta.nbytes())) { + EXPECT_EQ( + ((uint8_t*)ndm_data.get().data())[j], + ((uint8_t*)merged_data.get().data())[j]); + } + ndm_data->Free(); + merged_data->Free(); + + // Compare load_into. + auto nbytes = ndm_meta.nbytes(); + auto ndm_buffer = std::make_unique(nbytes); + auto ndm_load_into = ndm->load_data_into(key, ndm_buffer.get(), nbytes); + EXPECT_EQ(ndm_load_into, Error::Ok); + auto merged_buffer = std::make_unique(nbytes); + auto merged_load_into = + merged->load_data_into(key, merged_buffer.get(), nbytes); + EXPECT_EQ(merged_load_into, Error::Ok); + for (auto j : c10::irange(ndm_meta.nbytes())) { + EXPECT_EQ( + ((uint8_t*)merged_buffer.get())[j], + ((uint8_t*)merged_buffer.get())[j]); + } + } +} + +TEST_F(MergedDataMapTest, LoadNullDataMap) { + Result merged_map = MergedDataMap::load({nullptr, nullptr}); + EXPECT_EQ(merged_map.error(), Error::InvalidArgument); +} + +TEST_F(MergedDataMapTest, LoadSingleDataMap) { + std::vector ndms = {data_maps_["addmul"].get(), nullptr}; + Result merged_map = + MergedDataMap::load(Span(ndms.data(), ndms.size())); + EXPECT_EQ(merged_map.error(), Error::Ok); + + // Num keys. + EXPECT_EQ( + merged_map->get_num_keys().get(), + data_maps_["addmul"]->get_num_keys().get()); + + // API calls produce equivalent results. + compare_ndm_api_calls(data_maps_["addmul"].get(), &merged_map.get()); +} + +TEST_F(MergedDataMapTest, LoadDuplicateDataMapsFail) { + std::vector ndms = { + data_maps_["addmul"].get(), data_maps_["addmul"].get()}; + Result merged_map = + MergedDataMap::load(Span(ndms.data(), ndms.size())); + EXPECT_EQ(merged_map.error(), Error::InvalidArgument); +} + +TEST_F(MergedDataMapTest, CheckDataMapContents) { + std::vector ndms = { + data_maps_["addmul"].get(), data_maps_["linear"].get()}; + Result merged_map = + MergedDataMap::load(Span(ndms.data(), ndms.size())); + EXPECT_EQ(merged_map.error(), Error::Ok); + + // Num keys. + size_t addmul_num_keys = data_maps_["addmul"]->get_num_keys().get(); + size_t linear_num_keys = data_maps_["linear"]->get_num_keys().get(); + EXPECT_EQ( + merged_map->get_num_keys().get(), addmul_num_keys + linear_num_keys); + + // API calls produce equivalent results. + compare_ndm_api_calls(data_maps_["addmul"].get(), &merged_map.get()); + compare_ndm_api_calls(data_maps_["linear"].get(), &merged_map.get()); +} diff --git a/extension/named_data_map/test/targets.bzl b/extension/named_data_map/test/targets.bzl new file mode 100644 index 00000000000..516abb8d45e --- /dev/null +++ b/extension/named_data_map/test/targets.bzl @@ -0,0 +1,26 @@ +load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime") + +def define_common_targets(is_fbcode=False): + if not runtime.is_oss and is_fbcode: + modules_env = { + # The tests use this var to find the program file to load. This uses + # an fbcode target path because the authoring/export tools + # intentionally don't work in xplat (since they're host-only tools). + "ET_MODULE_ADD_MUL_DATA_PATH": "$(location fbcode//executorch/test/models:exported_program_and_data[ModuleAddMul.ptd])", + "ET_MODULE_LINEAR_DATA_PATH": "$(location fbcode//executorch/test/models:exported_program_and_data[ModuleLinear.ptd])", + } + + runtime.cxx_test( + name = "merged_data_map_test", + srcs = [ + "merged_data_map_test.cpp", + ], + deps = [ + "//executorch/extension/data_loader:file_data_loader", + "//executorch/extension/flat_tensor:flat_tensor_data_map", + "//executorch/extension/named_data_map:merged_data_map", + "//executorch/runtime/core:named_data_map", + "//executorch/runtime/core/exec_aten:lib", + ], + env = modules_env, + ) diff --git a/scripts/build_wasm_tests.sh b/scripts/build_wasm_tests.sh index 9a09ddd2749..4dd7355e118 100644 --- a/scripts/build_wasm_tests.sh +++ b/scripts/build_wasm_tests.sh @@ -22,6 +22,7 @@ emcmake cmake . -DEXECUTORCH_BUILD_WASM=ON \ -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \ -DEXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR=ON \ -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \ + -DEXECUTORCH_BUILD_EXTENSION_NAMED_DATA_MAP=ON \ -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \ -DEXECUTORCH_SELECT_OPS_LIST="aten::mm.out,aten::add.out" \ -DEXECUTORCH_BUILD_TESTS=ON \ diff --git a/shim_et/xplat/executorch/build/build_variables.bzl b/shim_et/xplat/executorch/build/build_variables.bzl index ea086886449..8d8893f7454 100644 --- a/shim_et/xplat/executorch/build/build_variables.bzl +++ b/shim_et/xplat/executorch/build/build_variables.bzl @@ -341,6 +341,10 @@ EXTENSION_MODULE_SRCS = [ "extension/module/module.cpp", ] +EXTENSION_NAMED_DATA_MAP_SRCS = [ + "extension/named_data_map/merged_data_map.cpp", +] + EXTENSION_RUNNER_UTIL_SRCS = [ "extension/runner_util/inputs.cpp", "extension/runner_util/inputs_portable.cpp", diff --git a/test/run_oss_cpp_tests.sh b/test/run_oss_cpp_tests.sh index 1648f2ba434..5166d454e60 100755 --- a/test/run_oss_cpp_tests.sh +++ b/test/run_oss_cpp_tests.sh @@ -41,6 +41,7 @@ build_executorch() { -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \ -DEXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR=ON \ -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \ + -DEXECUTORCH_BUILD_EXTENSION_NAMED_DATA_MAP=ON \ -DEXECUTORCH_BUILD_EXTENSION_LLM=ON \ -DEXECUTORCH_BUILD_EXTENSION_LLM_RUNNER=ON \ -DEXECUTORCH_BUILD_EXTENSION_RUNNER_UTIL=ON \ diff --git a/tools/cmake/Codegen.cmake b/tools/cmake/Codegen.cmake index 2a6bf42b48a..32d3d8b554f 100644 --- a/tools/cmake/Codegen.cmake +++ b/tools/cmake/Codegen.cmake @@ -399,6 +399,7 @@ function(executorch_load_build_variables) EXTENSION_EVALUE_UTIL_SRCS EXTENSION_FLAT_TENSOR_SRCS EXTENSION_MODULE_SRCS + EXTENSION_NAMED_DATA_MAP_SRCS EXTENSION_RUNNER_UTIL_SRCS EXTENSION_LLM_RUNNER_SRCS EXTENSION_TENSOR_SRCS @@ -431,6 +432,7 @@ function(executorch_load_build_variables) _extension_evalue_util__srcs _extension_flat_tensor__srcs _extension_module__srcs + _extension_named_data_map__srcs _extension_runner_util__srcs _extension_llm_runner__srcs _extension_tensor__srcs diff --git a/tools/cmake/preset/android.cmake b/tools/cmake/preset/android.cmake index d794e8fcef3..5c9bc97e3ef 100644 --- a/tools/cmake/preset/android.cmake +++ b/tools/cmake/preset/android.cmake @@ -23,6 +23,7 @@ set_overridable_option(EXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR ON) set_overridable_option(EXECUTORCH_BUILD_EXTENSION_LLM ON) set_overridable_option(EXECUTORCH_BUILD_EXTENSION_LLM_RUNNER ON) set_overridable_option(EXECUTORCH_BUILD_EXTENSION_MODULE ON) +set_overridable_option(EXECUTORCH_BUILD_EXTENSION_NAMED_DATA_MAP ON) set_overridable_option(EXECUTORCH_BUILD_EXTENSION_RUNNER_UTIL ON) set_overridable_option(EXECUTORCH_BUILD_EXTENSION_TENSOR ON) set_overridable_option(EXECUTORCH_BUILD_EXTENSION_TRAINING ON) diff --git a/tools/cmake/preset/apple_common.cmake b/tools/cmake/preset/apple_common.cmake index 7b4ec420996..27ec35aa43e 100644 --- a/tools/cmake/preset/apple_common.cmake +++ b/tools/cmake/preset/apple_common.cmake @@ -28,6 +28,7 @@ set_overridable_option(EXECUTORCH_BUILD_EXTENSION_LLM ON) set_overridable_option(EXECUTORCH_BUILD_EXTENSION_LLM_RUNNER ON) set_overridable_option(EXECUTORCH_BUILD_EXTENSION_MODULE ON) set_overridable_option(EXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR ON) +set_overridable_option(EXECUTORCH_BUILD_EXTENSION_NAMED_DATA_MAP ON) set_overridable_option(EXECUTORCH_BUILD_EXTENSION_TENSOR ON) set_overridable_option(EXECUTORCH_BUILD_KERNELS_LLM ON) set_overridable_option(EXECUTORCH_BUILD_KERNELS_OPTIMIZED ON) diff --git a/tools/cmake/preset/default.cmake b/tools/cmake/preset/default.cmake index 0039ab551fb..d47688034b9 100644 --- a/tools/cmake/preset/default.cmake +++ b/tools/cmake/preset/default.cmake @@ -86,6 +86,10 @@ define_overridable_option( define_overridable_option( EXECUTORCH_BUILD_EXTENSION_MODULE "Build the Module extension" BOOL OFF ) +define_overridable_option( + EXECUTORCH_BUILD_EXTENSION_NAMED_DATA_MAP + "Build the Named Data Map extension" BOOL OFF +) define_overridable_option( EXECUTORCH_BUILD_EXTENSION_TENSOR "Build the Tensor extension" BOOL OFF ) @@ -277,6 +281,12 @@ check_required_options_on( check_required_options_on( IF_ON EXECUTORCH_BUILD_EXTENSION_MODULE REQUIRES EXECUTORCH_BUILD_EXTENSION_DATA_LOADER EXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR + EXECUTORCH_BUILD_EXTENSION_NAMED_DATA_MAP +) + +check_required_options_on( + IF_ON EXECUTORCH_BUILD_EXTENSION_NAMED_DATA_MAP REQUIRES + EXECUTORCH_BUILD_EXTENSION_DATA_LOADER ) check_required_options_on( diff --git a/tools/cmake/preset/llm.cmake b/tools/cmake/preset/llm.cmake index e29fc7c4287..6cd2482f717 100644 --- a/tools/cmake/preset/llm.cmake +++ b/tools/cmake/preset/llm.cmake @@ -10,6 +10,7 @@ set_overridable_option(EXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR ON) set_overridable_option(EXECUTORCH_BUILD_EXTENSION_LLM ON) set_overridable_option(EXECUTORCH_BUILD_EXTENSION_LLM_RUNNER ON) set_overridable_option(EXECUTORCH_BUILD_EXTENSION_MODULE ON) +set_overridable_option(EXECUTORCH_BUILD_EXTENSION_NAMED_DATA_MAP ON) set_overridable_option(EXECUTORCH_BUILD_EXTENSION_TENSOR ON) set_overridable_option(EXECUTORCH_BUILD_KERNELS_LLM ON) set_overridable_option(EXECUTORCH_BUILD_KERNELS_OPTIMIZED ON) diff --git a/tools/cmake/preset/profiling.cmake b/tools/cmake/preset/profiling.cmake index a73c340078c..640a84b261c 100644 --- a/tools/cmake/preset/profiling.cmake +++ b/tools/cmake/preset/profiling.cmake @@ -9,6 +9,7 @@ set_overridable_option(EXECUTORCH_BUILD_EXTENSION_DATA_LOADER ON) set_overridable_option(EXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR ON) set_overridable_option(EXECUTORCH_BUILD_EXTENSION_MODULE ON) +set_overridable_option(EXECUTORCH_BUILD_EXTENSION_NAMED_DATA_MAP ON) set_overridable_option(EXECUTORCH_BUILD_EXTENSION_TENSOR ON) set_overridable_option(EXECUTORCH_BUILD_KERNELS_LLM ON) set_overridable_option(EXECUTORCH_BUILD_KERNELS_OPTIMIZED ON) diff --git a/tools/cmake/preset/pybind.cmake b/tools/cmake/preset/pybind.cmake index f98e68ef5ac..c71c10ad01f 100644 --- a/tools/cmake/preset/pybind.cmake +++ b/tools/cmake/preset/pybind.cmake @@ -17,10 +17,11 @@ set_overridable_option(EXECUTORCH_BUILD_EXTENSION_TENSOR ON) set_overridable_option(EXECUTORCH_BUILD_EXTENSION_RUNNER_UTIL ON) set_overridable_option(EXECUTORCH_BUILD_KERNELS_LLM ON) set_overridable_option(EXECUTORCH_BUILD_KERNELS_LLM_AOT ON) +set_overridable_option(EXECUTORCH_BUILD_KERNELS_OPTIMIZED ON) set_overridable_option(EXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR ON) set_overridable_option(EXECUTORCH_BUILD_EXTENSION_DATA_LOADER ON) -set_overridable_option(EXECUTORCH_BUILD_KERNELS_OPTIMIZED ON) set_overridable_option(EXECUTORCH_BUILD_EXTENSION_MODULE ON) +set_overridable_option(EXECUTORCH_BUILD_EXTENSION_NAMED_DATA_MAP ON) # TODO(larryliu0820): Temporarily disable building llm_runner for Windows wheel # due to the issue of tokenizer file path length limitation. diff --git a/tools/cmake/preset/windows.cmake b/tools/cmake/preset/windows.cmake index b75a5af578e..5123dfc956d 100644 --- a/tools/cmake/preset/windows.cmake +++ b/tools/cmake/preset/windows.cmake @@ -10,6 +10,7 @@ set_overridable_option(EXECUTORCH_BUILD_EXTENSION_DATA_LOADER ON) set_overridable_option(EXECUTORCH_BUILD_EXTENSION_EVALUE_UTIL ON) set_overridable_option(EXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR ON) set_overridable_option(EXECUTORCH_BUILD_EXTENSION_MODULE ON) +set_overridable_option(EXECUTORCH_BUILD_EXTENSION_NAMED_DATA_MAP ON) set_overridable_option(EXECUTORCH_BUILD_EXTENSION_RUNNER_UTIL ON) set_overridable_option(EXECUTORCH_BUILD_EXTENSION_TENSOR ON) set_overridable_option(EXECUTORCH_BUILD_KERNELS_OPTIMIZED ON)