From 9cfd8787dac8f2eb428c1a27bbe6d837a80b4006 Mon Sep 17 00:00:00 2001 From: Zuby Afzal Date: Fri, 13 Jun 2025 14:43:30 -0700 Subject: [PATCH 1/5] Implement load_into(); Add unit tests --- extension/data_loader/mmap_data_loader.cpp | 119 +++++++++++++++++- extension/data_loader/mmap_data_loader.h | 9 ++ .../test/mmap_data_loader_test.cpp | 52 ++++++++ 3 files changed, 175 insertions(+), 5 deletions(-) diff --git a/extension/data_loader/mmap_data_loader.cpp b/extension/data_loader/mmap_data_loader.cpp index 53fd7bdf624..0595a3dd89c 100644 --- a/extension/data_loader/mmap_data_loader.cpp +++ b/extension/data_loader/mmap_data_loader.cpp @@ -150,11 +150,11 @@ void MunmapSegment(void* context, void* data, size_t size) { } } // namespace -Result MmapDataLoader::load( - size_t offset, - size_t size, - ET_UNUSED const DataLoader::SegmentInfo& segment_info) const { - ET_CHECK_OR_RETURN_ERROR( +/** + * Helper for input validation. + */ +Error MmapDataLoader::validate_input(size_t offset, size_t size) const { + ET_CHECK_OR_RETURN_ERROR( // Probably had its value moved to another instance. fd_ >= 0, InvalidState, @@ -173,6 +173,19 @@ Result MmapDataLoader::load( InvalidArgument, "Offset %zu too large for off_t", offset); + return Error::Ok; +} + +Result MmapDataLoader::load( + size_t offset, + size_t size, + ET_UNUSED const DataLoader::SegmentInfo& segment_info) const { + + // Validate input. + auto err = validate_input(offset, size); + if (err != Error::Ok) { + return err; + } // mmap() will fail if the size is zero. if (size == 0) { @@ -268,5 +281,101 @@ Result MmapDataLoader::size() const { return file_size_; } +Error MmapDataLoader::load_into( + size_t offset, + size_t size, + ET_UNUSED const SegmentInfo& segment_info, + void* buffer) const { + + ET_CHECK_OR_RETURN_ERROR(buffer != nullptr, InvalidArgument, "Buffer is null"); + + // Validate input. + auto err = validate_input(offset, size); + if (err != Error::Ok) { + return err; + } + + // Nothing to copy + if (size == 0) { + return Error::Ok; + } + + // Find the range of pages that covers the requested region. + Range range = + get_overlapping_pages(static_cast(offset), size, page_size_); + + size_t map_size = range.size; + if (range.start + map_size > file_size_) { + // Clamp to the end of the file. + // + // The Windows implementation of mmap uses CreateFileMapping which returns + // error STATUS_SECTION_TOO_BIG (0xc0000040) if we try to map past the end + // of the last page of a file mapped in as read-only. + map_size = file_size_ - range.start; + } + + // Map the pages read-only. MAP_PRIVATE vs. MAP_SHARED doesn't matter since + // the data is read-only, but use PRIVATE just to further avoid accidentally + // modifying the file. + void* pages = ::mmap( + nullptr, + map_size, + PROT_READ, + MAP_PRIVATE, + fd_, + static_cast(range.start)); + ET_CHECK_OR_RETURN_ERROR( + pages != MAP_FAILED, + AccessFailed, + "Failed to map %s: mmap(..., size=%zd, ..., fd=%d, offset=0x%zx)", + file_name_, + range.size, + fd_, + range.start); + + if (mlock_config_ == MlockConfig::UseMlock || + mlock_config_ == MlockConfig::UseMlockIgnoreErrors) { + int err = ::mlock(pages, size); + if (err < 0) { + if (mlock_config_ == MlockConfig::UseMlockIgnoreErrors) { + ET_LOG( + Debug, + "Ignoring mlock error for file %s (off=0x%zd): " + "mlock(%p, %zu) failed: %s (%d)", + file_name_, + offset, + pages, + size, + ::strerror(errno), + errno); + } else { + ET_LOG( + Error, + "File %s (off=0x%zd): mlock(%p, %zu) failed: %s (%d)", + file_name_, + offset, + pages, + size, + ::strerror(errno), + errno); + ::munmap(pages, size); + return Error::NotSupported; + } + } + // No need to keep track of this. munmap() will unlock as a side effect. + } + + // Offset into mapped region. + const size_t map_delta = offset - range.start; + + // Copy data into caller's buffer. + std::memcpy(buffer, static_cast(pages) + map_delta, size); + + // Unmap mapped region. + ::munmap(pages, map_size); + + return Error::Ok; +} + } // namespace extension } // namespace executorch diff --git a/extension/data_loader/mmap_data_loader.h b/extension/data_loader/mmap_data_loader.h index c55f81a490b..9cdea284d3c 100644 --- a/extension/data_loader/mmap_data_loader.h +++ b/extension/data_loader/mmap_data_loader.h @@ -95,6 +95,15 @@ class MmapDataLoader final : public executorch::runtime::DataLoader { ET_NODISCARD executorch::runtime::Result size() const override; + ET_NODISCARD executorch::runtime::Error validate_input(size_t offset, size_t size) const; + + ET_NODISCARD + executorch::runtime::Error load_into( + size_t offset, + size_t size, + ET_UNUSED const SegmentInfo& segment_info, + void* buffer) const override; + private: MmapDataLoader( int fd, diff --git a/extension/data_loader/test/mmap_data_loader_test.cpp b/extension/data_loader/test/mmap_data_loader_test.cpp index c01b3454493..3f8b053fccf 100644 --- a/extension/data_loader/test/mmap_data_loader_test.cpp +++ b/extension/data_loader/test/mmap_data_loader_test.cpp @@ -376,3 +376,55 @@ TEST_F(MmapDataLoaderTest, DEPRECATEDFrom) { ASSERT_EQ(total_size.error(), Error::Ok); EXPECT_EQ(*total_size, contents_size); } + +// Tests that load_into copies bytes correctly. +TEST_F(MmapDataLoaderTest, LoadIntoCopiesCorrectly) { + // Create a test string. + const char* test_text = "FILE_CONTENTS"; + const size_t text_size = std::strlen(test_text); + TempFile tf(test_text); + + // Wrap it in a loader. + Result mdl = MmapDataLoader::from(tf.path().c_str()); + ASSERT_EQ(mdl.error(), Error::Ok); + + // Destination buffer. + std::vector dst(text_size); + + // Call load_into() + Error err = mdl->load_into( + /*offset=*/0, + /*size=*/text_size, + DataLoader::SegmentInfo(DataLoader::SegmentInfo::Type::Program), + dst.data()); + ASSERT_EQ(err, Error::Ok); + + // Verify memory copied correctly. + EXPECT_EQ(0, std::memcmp(dst.data(), test_text, text_size)); +} + +// Tests that load_into copies offset slice correctly. +TEST_F(MmapDataLoaderTest, LoadIntoCopiesOffsetCorrectly) { + // Create a test string. + const char* contents = "ABCDEFGH"; + TempFile tf(contents); + + // Wrap it in a loader. + Result mdl = MmapDataLoader::from(tf.path().c_str()); + ASSERT_EQ(mdl.error(), Error::Ok); + + // Copying 3 bytes starting at offset 2 = "CDE" + const size_t offset = 2; + const size_t size = 3; + uint8_t dst[size]; + + // Call load_into() + Error err = mdl->load_into( + offset, size, + DataLoader::SegmentInfo(DataLoader::SegmentInfo::Type::Program), + dst); + ASSERT_EQ(err, Error::Ok); + + // Verify memory copied correctly. + EXPECT_EQ(0, std::memcmp(dst, contents + offset, size)); +} \ No newline at end of file From 0b3e9aca115d6a98e6c1b7040275ee72e66f1f0b Mon Sep 17 00:00:00 2001 From: Zuby Afzal Date: Fri, 13 Jun 2025 14:49:56 -0700 Subject: [PATCH 2/5] Apply lintrunner formatting --- extension/data_loader/mmap_data_loader.cpp | 7 +++---- extension/data_loader/mmap_data_loader.h | 4 +++- extension/data_loader/test/mmap_data_loader_test.cpp | 5 +++-- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/extension/data_loader/mmap_data_loader.cpp b/extension/data_loader/mmap_data_loader.cpp index 0595a3dd89c..3cb88aa3803 100644 --- a/extension/data_loader/mmap_data_loader.cpp +++ b/extension/data_loader/mmap_data_loader.cpp @@ -154,7 +154,7 @@ void MunmapSegment(void* context, void* data, size_t size) { * Helper for input validation. */ Error MmapDataLoader::validate_input(size_t offset, size_t size) const { - ET_CHECK_OR_RETURN_ERROR( + ET_CHECK_OR_RETURN_ERROR( // Probably had its value moved to another instance. fd_ >= 0, InvalidState, @@ -180,7 +180,6 @@ Result MmapDataLoader::load( size_t offset, size_t size, ET_UNUSED const DataLoader::SegmentInfo& segment_info) const { - // Validate input. auto err = validate_input(offset, size); if (err != Error::Ok) { @@ -286,8 +285,8 @@ Error MmapDataLoader::load_into( size_t size, ET_UNUSED const SegmentInfo& segment_info, void* buffer) const { - - ET_CHECK_OR_RETURN_ERROR(buffer != nullptr, InvalidArgument, "Buffer is null"); + ET_CHECK_OR_RETURN_ERROR( + buffer != nullptr, InvalidArgument, "Buffer is null"); // Validate input. auto err = validate_input(offset, size); diff --git a/extension/data_loader/mmap_data_loader.h b/extension/data_loader/mmap_data_loader.h index 9cdea284d3c..6a9da4c1c49 100644 --- a/extension/data_loader/mmap_data_loader.h +++ b/extension/data_loader/mmap_data_loader.h @@ -95,7 +95,9 @@ class MmapDataLoader final : public executorch::runtime::DataLoader { ET_NODISCARD executorch::runtime::Result size() const override; - ET_NODISCARD executorch::runtime::Error validate_input(size_t offset, size_t size) const; + ET_NODISCARD executorch::runtime::Error validate_input( + size_t offset, + size_t size) const; ET_NODISCARD executorch::runtime::Error load_into( diff --git a/extension/data_loader/test/mmap_data_loader_test.cpp b/extension/data_loader/test/mmap_data_loader_test.cpp index 3f8b053fccf..76b972c46d0 100644 --- a/extension/data_loader/test/mmap_data_loader_test.cpp +++ b/extension/data_loader/test/mmap_data_loader_test.cpp @@ -415,12 +415,13 @@ TEST_F(MmapDataLoaderTest, LoadIntoCopiesOffsetCorrectly) { // Copying 3 bytes starting at offset 2 = "CDE" const size_t offset = 2; - const size_t size = 3; + const size_t size = 3; uint8_t dst[size]; // Call load_into() Error err = mdl->load_into( - offset, size, + offset, + size, DataLoader::SegmentInfo(DataLoader::SegmentInfo::Type::Program), dst); ASSERT_EQ(err, Error::Ok); From 3779d455f9bcf04f2f97a6e23e6d82f9ae984d14 Mon Sep 17 00:00:00 2001 From: Zuby Afzal Date: Mon, 16 Jun 2025 15:55:17 -0700 Subject: [PATCH 3/5] Remove mlock from load_into() --- extension/data_loader/mmap_data_loader.cpp | 32 ---------------------- 1 file changed, 32 deletions(-) diff --git a/extension/data_loader/mmap_data_loader.cpp b/extension/data_loader/mmap_data_loader.cpp index 3cb88aa3803..6e42b73f082 100644 --- a/extension/data_loader/mmap_data_loader.cpp +++ b/extension/data_loader/mmap_data_loader.cpp @@ -332,38 +332,6 @@ Error MmapDataLoader::load_into( fd_, range.start); - if (mlock_config_ == MlockConfig::UseMlock || - mlock_config_ == MlockConfig::UseMlockIgnoreErrors) { - int err = ::mlock(pages, size); - if (err < 0) { - if (mlock_config_ == MlockConfig::UseMlockIgnoreErrors) { - ET_LOG( - Debug, - "Ignoring mlock error for file %s (off=0x%zd): " - "mlock(%p, %zu) failed: %s (%d)", - file_name_, - offset, - pages, - size, - ::strerror(errno), - errno); - } else { - ET_LOG( - Error, - "File %s (off=0x%zd): mlock(%p, %zu) failed: %s (%d)", - file_name_, - offset, - pages, - size, - ::strerror(errno), - errno); - ::munmap(pages, size); - return Error::NotSupported; - } - } - // No need to keep track of this. munmap() will unlock as a side effect. - } - // Offset into mapped region. const size_t map_delta = offset - range.start; From 17b35999c3296f799cb95a8785fe215557a3f8bb Mon Sep 17 00:00:00 2001 From: Zuby Afzal Date: Tue, 17 Jun 2025 08:23:48 -0700 Subject: [PATCH 4/5] Move validate_input to private section --- extension/data_loader/mmap_data_loader.cpp | 2 +- extension/data_loader/mmap_data_loader.h | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/extension/data_loader/mmap_data_loader.cpp b/extension/data_loader/mmap_data_loader.cpp index 6e42b73f082..b77717c9aca 100644 --- a/extension/data_loader/mmap_data_loader.cpp +++ b/extension/data_loader/mmap_data_loader.cpp @@ -151,7 +151,7 @@ void MunmapSegment(void* context, void* data, size_t size) { } // namespace /** - * Helper for input validation. + * Validates that file read range is within bounds. */ Error MmapDataLoader::validate_input(size_t offset, size_t size) const { ET_CHECK_OR_RETURN_ERROR( diff --git a/extension/data_loader/mmap_data_loader.h b/extension/data_loader/mmap_data_loader.h index 6a9da4c1c49..c0496a39d4b 100644 --- a/extension/data_loader/mmap_data_loader.h +++ b/extension/data_loader/mmap_data_loader.h @@ -95,10 +95,6 @@ class MmapDataLoader final : public executorch::runtime::DataLoader { ET_NODISCARD executorch::runtime::Result size() const override; - ET_NODISCARD executorch::runtime::Error validate_input( - size_t offset, - size_t size) const; - ET_NODISCARD executorch::runtime::Error load_into( size_t offset, @@ -124,6 +120,10 @@ class MmapDataLoader final : public executorch::runtime::DataLoader { MmapDataLoader& operator=(const MmapDataLoader&) = delete; MmapDataLoader& operator=(MmapDataLoader&&) = delete; + ET_NODISCARD executorch::runtime::Error validate_input( + size_t offset, + size_t size) const; + const char* const file_name_; // String data is owned by the instance. const size_t file_size_; const size_t page_size_; From a972fa8682a3f1345e23bfea724ab07b74c9e539 Mon Sep 17 00:00:00 2001 From: Zuby Afzal Date: Tue, 17 Jun 2025 08:27:59 -0700 Subject: [PATCH 5/5] Update comments --- extension/data_loader/mmap_data_loader.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/extension/data_loader/mmap_data_loader.cpp b/extension/data_loader/mmap_data_loader.cpp index b77717c9aca..76b45a04a21 100644 --- a/extension/data_loader/mmap_data_loader.cpp +++ b/extension/data_loader/mmap_data_loader.cpp @@ -180,7 +180,7 @@ Result MmapDataLoader::load( size_t offset, size_t size, ET_UNUSED const DataLoader::SegmentInfo& segment_info) const { - // Validate input. + // Ensure read range is valid. auto err = validate_input(offset, size); if (err != Error::Ok) { return err; @@ -288,13 +288,13 @@ Error MmapDataLoader::load_into( ET_CHECK_OR_RETURN_ERROR( buffer != nullptr, InvalidArgument, "Buffer is null"); - // Validate input. + // Ensure read range is valid. auto err = validate_input(offset, size); if (err != Error::Ok) { return err; } - // Nothing to copy + // Nothing to copy. if (size == 0) { return Error::Ok; }