From 4c1183cd636373296e58fa85baab55853c069077 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 21 Mar 2024 11:01:37 +0100 Subject: [PATCH 01/14] GH-40698: [C++] Create registry for Devices to map DeviceType to MemoryManager in C Device Data import --- cpp/src/arrow/c/bridge.cc | 7 ++-- cpp/src/arrow/device.cc | 68 +++++++++++++++++++++++++++++++++++++++ cpp/src/arrow/device.h | 7 ++++ 3 files changed, 78 insertions(+), 4 deletions(-) diff --git a/cpp/src/arrow/c/bridge.cc b/cpp/src/arrow/c/bridge.cc index 4ec79a73029..feba878b7fc 100644 --- a/cpp/src/arrow/c/bridge.cc +++ b/cpp/src/arrow/c/bridge.cc @@ -1969,10 +1969,9 @@ Result> ImportRecordBatch(struct ArrowArray* array, Result> DefaultDeviceMapper(ArrowDeviceType device_type, int64_t device_id) { - if (device_type != ARROW_DEVICE_CPU) { - return Status::NotImplemented("Only importing data on CPU is supported"); - } - return default_cpu_memory_manager(); + ARROW_ASSIGN_OR_RAISE(auto mapper, + GetMemoryManager(static_cast(device_type))); + return mapper(device_id); } Result> ImportDeviceArray(struct ArrowDeviceArray* array, diff --git a/cpp/src/arrow/device.cc b/cpp/src/arrow/device.cc index 3736a4e018c..4c20b6296ea 100644 --- a/cpp/src/arrow/device.cc +++ b/cpp/src/arrow/device.cc @@ -18,6 +18,8 @@ #include "arrow/device.h" #include +#include +#include #include #include "arrow/array.h" @@ -268,4 +270,70 @@ std::shared_ptr CPUDevice::default_memory_manager() { return default_cpu_memory_manager(); } +namespace internal { + +using MemoryMapper = + std::function>(int64_t device_id)>; + +class DeviceMemoryManagerRegistryImpl { + public: + DeviceMemoryManagerRegistryImpl() {} + + Status RegisterDevice(DeviceAllocationType device_type, MemoryMapper memory_mapper) { + std::lock_guard lock(lock_); + auto it = registry_.find(device_type); + if (it != registry_.end()) { + return Status::KeyError("Device type is already registered"); + } + registry_[device_type] = std::move(memory_mapper); + return Status::OK(); + } + + Result GetMemoryManager(DeviceAllocationType device_type) { + std::lock_guard lock(lock_); + auto it = registry_.find(device_type); + if (it == registry_.end()) { + return Status::KeyError("Device type is not registered"); + } + return it->second; + } + + private: + std::mutex lock_; + std::unordered_map registry_; +}; + +Result> DefaultCPUMemoryMapper(int64_t device_id) { + return default_cpu_memory_manager(); +} + +static std::shared_ptr g_registry; +static std::once_flag registry_initialized; + +static void CreateGlobalDeviceRegistry() { + g_registry = std::make_shared(); + + // Always register the CPU device + + ARROW_CHECK_OK( + g_registry->RegisterDevice(DeviceAllocationType::kCPU, DefaultCPUMemoryMapper)); +} + +std::shared_ptr GetDeviceRegistry() { + std::call_once(registry_initialized, CreateGlobalDeviceRegistry); + return g_registry; +} + +} // namespace internal + +Status RegisterDevice(DeviceAllocationType device_type, MemoryMapper memory_mapper) { + auto registry = internal::GetDeviceRegistry(); + return registry->RegisterDevice(device_type, std::move(memory_mapper)); +} + +Result GetMemoryManager(DeviceAllocationType device_type) { + auto registry = internal::GetDeviceRegistry(); + return registry->GetMemoryManager(device_type); +} + } // namespace arrow diff --git a/cpp/src/arrow/device.h b/cpp/src/arrow/device.h index efb0a5ab400..630e4477040 100644 --- a/cpp/src/arrow/device.h +++ b/cpp/src/arrow/device.h @@ -363,4 +363,11 @@ class ARROW_EXPORT CPUMemoryManager : public MemoryManager { ARROW_EXPORT std::shared_ptr default_cpu_memory_manager(); +using MemoryMapper = + std::function>(int64_t device_id)>; + +Status RegisterDevice(DeviceAllocationType device_type, MemoryMapper memory_mapper); + +Result GetMemoryManager(DeviceAllocationType device_type); + } // namespace arrow From 9972eed75af261a9d42617f92a87dfc3ec617c32 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 21 Mar 2024 11:48:47 +0000 Subject: [PATCH 02/14] test with CUDA --- cpp/src/arrow/device.h | 2 ++ cpp/src/arrow/gpu/cuda_memory.cc | 22 ++++++++++++++++++++++ cpp/src/arrow/gpu/cuda_memory.h | 3 +++ cpp/src/arrow/gpu/cuda_test.cc | 15 ++++----------- 4 files changed, 31 insertions(+), 11 deletions(-) diff --git a/cpp/src/arrow/device.h b/cpp/src/arrow/device.h index 630e4477040..ae6862495b1 100644 --- a/cpp/src/arrow/device.h +++ b/cpp/src/arrow/device.h @@ -366,8 +366,10 @@ std::shared_ptr default_cpu_memory_manager(); using MemoryMapper = std::function>(int64_t device_id)>; +ARROW_EXPORT Status RegisterDevice(DeviceAllocationType device_type, MemoryMapper memory_mapper); +ARROW_EXPORT Result GetMemoryManager(DeviceAllocationType device_type); } // namespace arrow diff --git a/cpp/src/arrow/gpu/cuda_memory.cc b/cpp/src/arrow/gpu/cuda_memory.cc index 860c6311d7b..29e0d9cd8e9 100644 --- a/cpp/src/arrow/gpu/cuda_memory.cc +++ b/cpp/src/arrow/gpu/cuda_memory.cc @@ -27,6 +27,7 @@ #include #include "arrow/buffer.h" +#include "arrow/device.h" #include "arrow/io/memory.h" #include "arrow/memory_pool.h" #include "arrow/status.h" @@ -501,5 +502,26 @@ Result> DefaultMemoryMapper(ArrowDeviceType devic } } +Result> DefaultGPUMemoryMapper(int64_t device_id) { + ARROW_ASSIGN_OR_RAISE(auto device, arrow::cuda::CudaDevice::Make(device_id)); + return device->default_memory_manager(); +} + +Status RegisterCUDADeviceInternal() { + RETURN_NOT_OK(RegisterDevice(DeviceAllocationType::kCUDA, DefaultGPUMemoryMapper)); + RETURN_NOT_OK(RegisterDevice(DeviceAllocationType::kCUDA_HOST, DefaultGPUMemoryMapper)); + RETURN_NOT_OK(RegisterDevice(DeviceAllocationType::kCUDA_MANAGED, DefaultGPUMemoryMapper)); + return Status::OK(); +} + + +std::once_flag cuda_registered; + +Status RegisterCUDADevice() { + std::call_once(cuda_registered, RegisterCUDADeviceInternal); + return Status::OK(); +} + + } // namespace cuda } // namespace arrow diff --git a/cpp/src/arrow/gpu/cuda_memory.h b/cpp/src/arrow/gpu/cuda_memory.h index d323bef0349..22216d80d25 100644 --- a/cpp/src/arrow/gpu/cuda_memory.h +++ b/cpp/src/arrow/gpu/cuda_memory.h @@ -264,5 +264,8 @@ ARROW_EXPORT Result> DefaultMemoryMapper(ArrowDeviceType device_type, int64_t device_id); +ARROW_EXPORT +Status RegisterCUDADevice(); + } // namespace cuda } // namespace arrow diff --git a/cpp/src/arrow/gpu/cuda_test.cc b/cpp/src/arrow/gpu/cuda_test.cc index d2f01cb3bbc..5b9431200e9 100644 --- a/cpp/src/arrow/gpu/cuda_test.cc +++ b/cpp/src/arrow/gpu/cuda_test.cc @@ -716,15 +716,8 @@ class TestCudaDeviceArrayRoundtrip : public ::testing::Test { public: using ArrayFactory = std::function>()>; - static Result> DeviceMapper(ArrowDeviceType type, - int64_t id) { - if (type != ARROW_DEVICE_CUDA) { - return Status::NotImplemented("should only be CUDA device"); - } - - ARROW_ASSIGN_OR_RAISE(auto manager, cuda::CudaDeviceManager::Instance()); - ARROW_ASSIGN_OR_RAISE(auto device, manager->GetDevice(id)); - return device->default_memory_manager(); + void SetUp() { + ASSERT_OK(RegisterCUDADevice()); } static ArrayFactory JSONArrayFactory(std::shared_ptr type, const char* json) { @@ -759,7 +752,7 @@ class TestCudaDeviceArrayRoundtrip : public ::testing::Test { std::shared_ptr device_array_roundtripped; ASSERT_OK_AND_ASSIGN(device_array_roundtripped, - ImportDeviceArray(&c_array, &c_schema, DeviceMapper)); + ImportDeviceArray(&c_array, &c_schema)); ASSERT_TRUE(ArrowSchemaIsReleased(&c_schema)); ASSERT_TRUE(ArrowArrayIsReleased(&c_array.array)); @@ -779,7 +772,7 @@ class TestCudaDeviceArrayRoundtrip : public ::testing::Test { ASSERT_OK(ExportDeviceArray(*device_array, sync, &c_array, &c_schema)); device_array_roundtripped.reset(); ASSERT_OK_AND_ASSIGN(device_array_roundtripped, - ImportDeviceArray(&c_array, &c_schema, DeviceMapper)); + ImportDeviceArray(&c_array, &c_schema)); ASSERT_TRUE(ArrowSchemaIsReleased(&c_schema)); ASSERT_TRUE(ArrowArrayIsReleased(&c_array.array)); From e16e24d5cdac0ffefbdeb01b5b045be3f2b5b118 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 21 Mar 2024 13:53:05 +0100 Subject: [PATCH 03/14] some clean-up and docs --- cpp/src/arrow/c/bridge.cc | 4 ++-- cpp/src/arrow/device.cc | 14 +++++++------- cpp/src/arrow/device.h | 13 +++++++++++-- cpp/src/arrow/gpu/cuda_memory.cc | 11 ++++++----- cpp/src/arrow/gpu/cuda_test.cc | 4 +--- 5 files changed, 27 insertions(+), 19 deletions(-) diff --git a/cpp/src/arrow/c/bridge.cc b/cpp/src/arrow/c/bridge.cc index feba878b7fc..e825f34e712 100644 --- a/cpp/src/arrow/c/bridge.cc +++ b/cpp/src/arrow/c/bridge.cc @@ -1969,8 +1969,8 @@ Result> ImportRecordBatch(struct ArrowArray* array, Result> DefaultDeviceMapper(ArrowDeviceType device_type, int64_t device_id) { - ARROW_ASSIGN_OR_RAISE(auto mapper, - GetMemoryManager(static_cast(device_type))); + ARROW_ASSIGN_OR_RAISE(auto mapper, GetDeviceMemoryManager( + static_cast(device_type))); return mapper(device_id); } diff --git a/cpp/src/arrow/device.cc b/cpp/src/arrow/device.cc index 4c20b6296ea..aa1a3c9a4a6 100644 --- a/cpp/src/arrow/device.cc +++ b/cpp/src/arrow/device.cc @@ -272,9 +272,6 @@ std::shared_ptr CPUDevice::default_memory_manager() { namespace internal { -using MemoryMapper = - std::function>(int64_t device_id)>; - class DeviceMemoryManagerRegistryImpl { public: DeviceMemoryManagerRegistryImpl() {} @@ -283,7 +280,8 @@ class DeviceMemoryManagerRegistryImpl { std::lock_guard lock(lock_); auto it = registry_.find(device_type); if (it != registry_.end()) { - return Status::KeyError("Device type is already registered"); + return Status::KeyError("Device type ", static_cast(device_type), + " is already registered"); } registry_[device_type] = std::move(memory_mapper); return Status::OK(); @@ -293,7 +291,8 @@ class DeviceMemoryManagerRegistryImpl { std::lock_guard lock(lock_); auto it = registry_.find(device_type); if (it == registry_.end()) { - return Status::KeyError("Device type is not registered"); + return Status::KeyError("Device type ", static_cast(device_type), + "is not registered"); } return it->second; } @@ -326,12 +325,13 @@ std::shared_ptr GetDeviceRegistry() { } // namespace internal -Status RegisterDevice(DeviceAllocationType device_type, MemoryMapper memory_mapper) { +Status RegisterDeviceMemoryManager(DeviceAllocationType device_type, + MemoryMapper memory_mapper) { auto registry = internal::GetDeviceRegistry(); return registry->RegisterDevice(device_type, std::move(memory_mapper)); } -Result GetMemoryManager(DeviceAllocationType device_type) { +Result GetDeviceMemoryManager(DeviceAllocationType device_type) { auto registry = internal::GetDeviceRegistry(); return registry->GetMemoryManager(device_type); } diff --git a/cpp/src/arrow/device.h b/cpp/src/arrow/device.h index ae6862495b1..128c4b6f8f6 100644 --- a/cpp/src/arrow/device.h +++ b/cpp/src/arrow/device.h @@ -366,10 +366,19 @@ std::shared_ptr default_cpu_memory_manager(); using MemoryMapper = std::function>(int64_t device_id)>; +/// \brief Register a function to retrieve a MemoryManager for a Device type +/// +/// This registers the device type globally. A specific device type can only +/// be registered once. This method is thread-safe +/// \param[in] device_type the device type for which to register a MemoryManager +/// \param[in] memory_mapper function that takes a device id and returns the appropriate +/// MemoryManager for the registered device type and given device id +/// \return Status ARROW_EXPORT -Status RegisterDevice(DeviceAllocationType device_type, MemoryMapper memory_mapper); +Status RegisterDeviceMemoryManager(DeviceAllocationType device_type, + MemoryMapper memory_mapper); ARROW_EXPORT -Result GetMemoryManager(DeviceAllocationType device_type); +Result GetDeviceMemoryManager(DeviceAllocationType device_type); } // namespace arrow diff --git a/cpp/src/arrow/gpu/cuda_memory.cc b/cpp/src/arrow/gpu/cuda_memory.cc index 29e0d9cd8e9..3051c25bd8f 100644 --- a/cpp/src/arrow/gpu/cuda_memory.cc +++ b/cpp/src/arrow/gpu/cuda_memory.cc @@ -508,13 +508,15 @@ Result> DefaultGPUMemoryMapper(int64_t device_id) } Status RegisterCUDADeviceInternal() { - RETURN_NOT_OK(RegisterDevice(DeviceAllocationType::kCUDA, DefaultGPUMemoryMapper)); - RETURN_NOT_OK(RegisterDevice(DeviceAllocationType::kCUDA_HOST, DefaultGPUMemoryMapper)); - RETURN_NOT_OK(RegisterDevice(DeviceAllocationType::kCUDA_MANAGED, DefaultGPUMemoryMapper)); + RETURN_NOT_OK( + RegisterDeviceMemoryManager(DeviceAllocationType::kCUDA, DefaultGPUMemoryMapper)); + RETURN_NOT_OK(RegisterDeviceMemoryManager(DeviceAllocationType::kCUDA_HOST, + DefaultGPUMemoryMapper)); + RETURN_NOT_OK(RegisterDeviceMemoryManager(DeviceAllocationType::kCUDA_MANAGED, + DefaultGPUMemoryMapper)); return Status::OK(); } - std::once_flag cuda_registered; Status RegisterCUDADevice() { @@ -522,6 +524,5 @@ Status RegisterCUDADevice() { return Status::OK(); } - } // namespace cuda } // namespace arrow diff --git a/cpp/src/arrow/gpu/cuda_test.cc b/cpp/src/arrow/gpu/cuda_test.cc index 5b9431200e9..c2d7c7ebbea 100644 --- a/cpp/src/arrow/gpu/cuda_test.cc +++ b/cpp/src/arrow/gpu/cuda_test.cc @@ -716,9 +716,7 @@ class TestCudaDeviceArrayRoundtrip : public ::testing::Test { public: using ArrayFactory = std::function>()>; - void SetUp() { - ASSERT_OK(RegisterCUDADevice()); - } + void SetUp() { ASSERT_OK(RegisterCUDADevice()); } static ArrayFactory JSONArrayFactory(std::shared_ptr type, const char* json) { return [=]() { return ArrayFromJSON(type, json); }; From 337e65f0b7a452e9a64a83e9a0a008dfc8e6d214 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 21 Mar 2024 21:58:20 +0100 Subject: [PATCH 04/14] use static local variable instead of call_once --- cpp/src/arrow/device.cc | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/cpp/src/arrow/device.cc b/cpp/src/arrow/device.cc index aa1a3c9a4a6..2386e75b20e 100644 --- a/cpp/src/arrow/device.cc +++ b/cpp/src/arrow/device.cc @@ -306,21 +306,18 @@ Result> DefaultCPUMemoryMapper(int64_t device_id) return default_cpu_memory_manager(); } -static std::shared_ptr g_registry; -static std::once_flag registry_initialized; - -static void CreateGlobalDeviceRegistry() { - g_registry = std::make_shared(); +static std::unique_ptr CreateDeviceRegistry() { + auto registry = std::make_unique(); // Always register the CPU device + DCHECK_OK(registry->RegisterDevice(DeviceAllocationType::kCPU, DefaultCPUMemoryMapper)); - ARROW_CHECK_OK( - g_registry->RegisterDevice(DeviceAllocationType::kCPU, DefaultCPUMemoryMapper)); + return registry; } -std::shared_ptr GetDeviceRegistry() { - std::call_once(registry_initialized, CreateGlobalDeviceRegistry); - return g_registry; +DeviceMemoryManagerRegistryImpl* GetDeviceRegistry() { + static auto g_registry = CreateDeviceRegistry(); + return g_registry.get(); } } // namespace internal From f33872d7ebb16eed3c8c7a258e32ab71afa6dc32 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 22 Mar 2024 09:48:27 +0100 Subject: [PATCH 05/14] some more docs + basic test for failure cases --- cpp/src/arrow/c/bridge_test.cc | 13 +++++++++++++ cpp/src/arrow/device.h | 14 +++++++++++++- cpp/src/arrow/gpu/cuda_memory.h | 7 +++++++ 3 files changed, 33 insertions(+), 1 deletion(-) diff --git a/cpp/src/arrow/c/bridge_test.cc b/cpp/src/arrow/c/bridge_test.cc index dba6e4736b6..6ca458249b0 100644 --- a/cpp/src/arrow/c/bridge_test.cc +++ b/cpp/src/arrow/c/bridge_test.cc @@ -4330,6 +4330,19 @@ TEST_F(TestDeviceArrayRoundtrip, Struct) { TestWithJSON(mm, type, R"([[4, null], null, [5, "foo"]])"); } +TEST(TestDeviceRegistry, Basics) { + // Test the error cases for the device registry + + // CPU is already registered + ASSERT_RAISES(KeyError, RegisterDeviceMemoryManager( + DeviceAllocationType::kCPU, [](int64_t device_id) { + return default_cpu_memory_manager(); + })); + + // VPI is not registered + ASSERT_RAISES(KeyError, GetDeviceMemoryManager(DeviceAllocationType::kVPI)); +} + //////////////////////////////////////////////////////////////////////////// // Array stream export tests diff --git a/cpp/src/arrow/device.h b/cpp/src/arrow/device.h index 128c4b6f8f6..ca056280703 100644 --- a/cpp/src/arrow/device.h +++ b/cpp/src/arrow/device.h @@ -369,7 +369,12 @@ using MemoryMapper = /// \brief Register a function to retrieve a MemoryManager for a Device type /// /// This registers the device type globally. A specific device type can only -/// be registered once. This method is thread-safe +/// be registered once. This method is thread-safe. +/// +/// Currently, this registry is only used for importing data through the C Device +/// Data Interface (for the default Device to MemoryManager mapper in +/// arrow::ImportDeviceArray/ImportDeviceRecordBatch). +/// /// \param[in] device_type the device type for which to register a MemoryManager /// \param[in] memory_mapper function that takes a device id and returns the appropriate /// MemoryManager for the registered device type and given device id @@ -378,6 +383,13 @@ ARROW_EXPORT Status RegisterDeviceMemoryManager(DeviceAllocationType device_type, MemoryMapper memory_mapper); +/// \brief Get the registered function to retrieve a MemoryManager for the +/// given Device type +/// +/// \param[in] device_type the device type +/// \return function that takes a device id and returns the appropriate +/// MemoryManager for the registered device type and given device id +ARROW_EXPORT ARROW_EXPORT Result GetDeviceMemoryManager(DeviceAllocationType device_type); diff --git a/cpp/src/arrow/gpu/cuda_memory.h b/cpp/src/arrow/gpu/cuda_memory.h index 22216d80d25..0cbb584c241 100644 --- a/cpp/src/arrow/gpu/cuda_memory.h +++ b/cpp/src/arrow/gpu/cuda_memory.h @@ -264,6 +264,13 @@ ARROW_EXPORT Result> DefaultMemoryMapper(ArrowDeviceType device_type, int64_t device_id); +/// \brief Register the CUDA devices (CUDA, CUDA_HOST, CUDA_MANAGED). +/// +/// Currently, this registry is only used for importing data through the C Device +/// Data Interface (for the default Device to MemoryManager mapper in +/// arrow::ImportDeviceArray/ImportDeviceRecordBatch). +/// Before importing CUDA data with the default mapper, this function should +/// be called to ensure the CUDA device types are recognised. ARROW_EXPORT Status RegisterCUDADevice(); From 8c6acabf58fa697e857115b6652f33083b8063b2 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Tue, 26 Mar 2024 11:31:46 +0100 Subject: [PATCH 06/14] Apply suggestions from code review Co-authored-by: Antoine Pitrou --- cpp/src/arrow/device.cc | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/cpp/src/arrow/device.cc b/cpp/src/arrow/device.cc index 2386e75b20e..7d6e93ae084 100644 --- a/cpp/src/arrow/device.cc +++ b/cpp/src/arrow/device.cc @@ -278,12 +278,11 @@ class DeviceMemoryManagerRegistryImpl { Status RegisterDevice(DeviceAllocationType device_type, MemoryMapper memory_mapper) { std::lock_guard lock(lock_); - auto it = registry_.find(device_type); - if (it != registry_.end()) { + auto [_, inserted] = registry.try_emplace(device_type, std::move(memory_mapper)); + if (!inserted) { return Status::KeyError("Device type ", static_cast(device_type), " is already registered"); } - registry_[device_type] = std::move(memory_mapper); return Status::OK(); } From 1c20d68dc2c547f1d38476de42e43d0aa33de109 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Tue, 26 Mar 2024 11:38:07 +0100 Subject: [PATCH 07/14] fixup and move registry tests --- cpp/src/arrow/buffer_test.cc | 13 +++++++++++++ cpp/src/arrow/c/bridge_test.cc | 13 ------------- cpp/src/arrow/device.cc | 2 +- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/cpp/src/arrow/buffer_test.cc b/cpp/src/arrow/buffer_test.cc index 13f6ea63b5e..9b71aea1add 100644 --- a/cpp/src/arrow/buffer_test.cc +++ b/cpp/src/arrow/buffer_test.cc @@ -1023,4 +1023,17 @@ TEST(TestBufferConcatenation, EmptyBuffer) { AssertMyBufferEqual(*result, contents); } +TEST(TestDeviceRegistry, Basics) { + // Test the error cases for the device registry + + // CPU is already registered + ASSERT_RAISES(KeyError, RegisterDeviceMemoryManager( + DeviceAllocationType::kCPU, [](int64_t device_id) { + return default_cpu_memory_manager(); + })); + + // VPI is not registered + ASSERT_RAISES(KeyError, GetDeviceMemoryManager(DeviceAllocationType::kVPI)); +} + } // namespace arrow diff --git a/cpp/src/arrow/c/bridge_test.cc b/cpp/src/arrow/c/bridge_test.cc index 6ca458249b0..dba6e4736b6 100644 --- a/cpp/src/arrow/c/bridge_test.cc +++ b/cpp/src/arrow/c/bridge_test.cc @@ -4330,19 +4330,6 @@ TEST_F(TestDeviceArrayRoundtrip, Struct) { TestWithJSON(mm, type, R"([[4, null], null, [5, "foo"]])"); } -TEST(TestDeviceRegistry, Basics) { - // Test the error cases for the device registry - - // CPU is already registered - ASSERT_RAISES(KeyError, RegisterDeviceMemoryManager( - DeviceAllocationType::kCPU, [](int64_t device_id) { - return default_cpu_memory_manager(); - })); - - // VPI is not registered - ASSERT_RAISES(KeyError, GetDeviceMemoryManager(DeviceAllocationType::kVPI)); -} - //////////////////////////////////////////////////////////////////////////// // Array stream export tests diff --git a/cpp/src/arrow/device.cc b/cpp/src/arrow/device.cc index 7d6e93ae084..41fe5ba01c4 100644 --- a/cpp/src/arrow/device.cc +++ b/cpp/src/arrow/device.cc @@ -278,7 +278,7 @@ class DeviceMemoryManagerRegistryImpl { Status RegisterDevice(DeviceAllocationType device_type, MemoryMapper memory_mapper) { std::lock_guard lock(lock_); - auto [_, inserted] = registry.try_emplace(device_type, std::move(memory_mapper)); + auto [_, inserted] = registry_.try_emplace(device_type, std::move(memory_mapper)); if (!inserted) { return Status::KeyError("Device type ", static_cast(device_type), " is already registered"); From 76ee5b868d633a717bfba81cee582e273e7c0169 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Tue, 26 Mar 2024 11:43:36 +0000 Subject: [PATCH 08/14] remove support for CUDA_HOST and CUDA_MANAGED --- cpp/src/arrow/gpu/cuda_memory.cc | 6 ++---- cpp/src/arrow/gpu/cuda_memory.h | 2 +- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/cpp/src/arrow/gpu/cuda_memory.cc b/cpp/src/arrow/gpu/cuda_memory.cc index 3051c25bd8f..b052b4ac9bb 100644 --- a/cpp/src/arrow/gpu/cuda_memory.cc +++ b/cpp/src/arrow/gpu/cuda_memory.cc @@ -510,10 +510,8 @@ Result> DefaultGPUMemoryMapper(int64_t device_id) Status RegisterCUDADeviceInternal() { RETURN_NOT_OK( RegisterDeviceMemoryManager(DeviceAllocationType::kCUDA, DefaultGPUMemoryMapper)); - RETURN_NOT_OK(RegisterDeviceMemoryManager(DeviceAllocationType::kCUDA_HOST, - DefaultGPUMemoryMapper)); - RETURN_NOT_OK(RegisterDeviceMemoryManager(DeviceAllocationType::kCUDA_MANAGED, - DefaultGPUMemoryMapper)); + // TODO add the CUDA_HOST and CUDA_MANAGED allocation types when they are supported in + // the CudaDevice return Status::OK(); } diff --git a/cpp/src/arrow/gpu/cuda_memory.h b/cpp/src/arrow/gpu/cuda_memory.h index 0cbb584c241..d039e9f4645 100644 --- a/cpp/src/arrow/gpu/cuda_memory.h +++ b/cpp/src/arrow/gpu/cuda_memory.h @@ -264,7 +264,7 @@ ARROW_EXPORT Result> DefaultMemoryMapper(ArrowDeviceType device_type, int64_t device_id); -/// \brief Register the CUDA devices (CUDA, CUDA_HOST, CUDA_MANAGED). +/// \brief Register the CUDA device (only CUDA, not CUDA_HOST or CUDA_MANAGED). /// /// Currently, this registry is only used for importing data through the C Device /// Data Interface (for the default Device to MemoryManager mapper in From 0176f58ad6c125b04505a47d5edfea272a556447 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Tue, 26 Mar 2024 11:59:58 +0000 Subject: [PATCH 09/14] register CUDA by default, remove public RegisterCUDADevice --- cpp/src/arrow/gpu/cuda_memory.cc | 15 +++++++-------- cpp/src/arrow/gpu/cuda_memory.h | 10 ---------- cpp/src/arrow/gpu/cuda_test.cc | 2 -- 3 files changed, 7 insertions(+), 20 deletions(-) diff --git a/cpp/src/arrow/gpu/cuda_memory.cc b/cpp/src/arrow/gpu/cuda_memory.cc index b052b4ac9bb..89eb8abc352 100644 --- a/cpp/src/arrow/gpu/cuda_memory.cc +++ b/cpp/src/arrow/gpu/cuda_memory.cc @@ -502,25 +502,24 @@ Result> DefaultMemoryMapper(ArrowDeviceType devic } } +namespace { + Result> DefaultGPUMemoryMapper(int64_t device_id) { ARROW_ASSIGN_OR_RAISE(auto device, arrow::cuda::CudaDevice::Make(device_id)); return device->default_memory_manager(); } -Status RegisterCUDADeviceInternal() { - RETURN_NOT_OK( +bool RegisterCUDADeviceInternal() { + DCHECK_OK( RegisterDeviceMemoryManager(DeviceAllocationType::kCUDA, DefaultGPUMemoryMapper)); // TODO add the CUDA_HOST and CUDA_MANAGED allocation types when they are supported in // the CudaDevice - return Status::OK(); + return true; } -std::once_flag cuda_registered; +static auto cuda_registered = RegisterCUDADeviceInternal(); -Status RegisterCUDADevice() { - std::call_once(cuda_registered, RegisterCUDADeviceInternal); - return Status::OK(); -} +} // namespace } // namespace cuda } // namespace arrow diff --git a/cpp/src/arrow/gpu/cuda_memory.h b/cpp/src/arrow/gpu/cuda_memory.h index d039e9f4645..d323bef0349 100644 --- a/cpp/src/arrow/gpu/cuda_memory.h +++ b/cpp/src/arrow/gpu/cuda_memory.h @@ -264,15 +264,5 @@ ARROW_EXPORT Result> DefaultMemoryMapper(ArrowDeviceType device_type, int64_t device_id); -/// \brief Register the CUDA device (only CUDA, not CUDA_HOST or CUDA_MANAGED). -/// -/// Currently, this registry is only used for importing data through the C Device -/// Data Interface (for the default Device to MemoryManager mapper in -/// arrow::ImportDeviceArray/ImportDeviceRecordBatch). -/// Before importing CUDA data with the default mapper, this function should -/// be called to ensure the CUDA device types are recognised. -ARROW_EXPORT -Status RegisterCUDADevice(); - } // namespace cuda } // namespace arrow diff --git a/cpp/src/arrow/gpu/cuda_test.cc b/cpp/src/arrow/gpu/cuda_test.cc index c2d7c7ebbea..4c450bf3899 100644 --- a/cpp/src/arrow/gpu/cuda_test.cc +++ b/cpp/src/arrow/gpu/cuda_test.cc @@ -716,8 +716,6 @@ class TestCudaDeviceArrayRoundtrip : public ::testing::Test { public: using ArrayFactory = std::function>()>; - void SetUp() { ASSERT_OK(RegisterCUDADevice()); } - static ArrayFactory JSONArrayFactory(std::shared_ptr type, const char* json) { return [=]() { return ArrayFromJSON(type, json); }; } From 92ece263ea694808f3e34da4164476cedbc89563 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Tue, 26 Mar 2024 12:04:49 +0000 Subject: [PATCH 10/14] address general feedback --- cpp/src/arrow/device.cc | 8 ++++---- cpp/src/arrow/device.h | 1 - cpp/src/arrow/gpu/cuda_memory.cc | 4 ++-- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/cpp/src/arrow/device.cc b/cpp/src/arrow/device.cc index 41fe5ba01c4..0e8420099a6 100644 --- a/cpp/src/arrow/device.cc +++ b/cpp/src/arrow/device.cc @@ -270,7 +270,7 @@ std::shared_ptr CPUDevice::default_memory_manager() { return default_cpu_memory_manager(); } -namespace internal { +namespace { class DeviceMemoryManagerRegistryImpl { public: @@ -319,16 +319,16 @@ DeviceMemoryManagerRegistryImpl* GetDeviceRegistry() { return g_registry.get(); } -} // namespace internal +} // namespace Status RegisterDeviceMemoryManager(DeviceAllocationType device_type, MemoryMapper memory_mapper) { - auto registry = internal::GetDeviceRegistry(); + auto registry = GetDeviceRegistry(); return registry->RegisterDevice(device_type, std::move(memory_mapper)); } Result GetDeviceMemoryManager(DeviceAllocationType device_type) { - auto registry = internal::GetDeviceRegistry(); + auto registry = GetDeviceRegistry(); return registry->GetMemoryManager(device_type); } diff --git a/cpp/src/arrow/device.h b/cpp/src/arrow/device.h index ca056280703..f15e078c2af 100644 --- a/cpp/src/arrow/device.h +++ b/cpp/src/arrow/device.h @@ -390,7 +390,6 @@ Status RegisterDeviceMemoryManager(DeviceAllocationType device_type, /// \return function that takes a device id and returns the appropriate /// MemoryManager for the registered device type and given device id ARROW_EXPORT -ARROW_EXPORT Result GetDeviceMemoryManager(DeviceAllocationType device_type); } // namespace arrow diff --git a/cpp/src/arrow/gpu/cuda_memory.cc b/cpp/src/arrow/gpu/cuda_memory.cc index 89eb8abc352..7da7b2f5050 100644 --- a/cpp/src/arrow/gpu/cuda_memory.cc +++ b/cpp/src/arrow/gpu/cuda_memory.cc @@ -504,14 +504,14 @@ Result> DefaultMemoryMapper(ArrowDeviceType devic namespace { -Result> DefaultGPUMemoryMapper(int64_t device_id) { +Result> DefaultCUDAMemoryMapper(int64_t device_id) { ARROW_ASSIGN_OR_RAISE(auto device, arrow::cuda::CudaDevice::Make(device_id)); return device->default_memory_manager(); } bool RegisterCUDADeviceInternal() { DCHECK_OK( - RegisterDeviceMemoryManager(DeviceAllocationType::kCUDA, DefaultGPUMemoryMapper)); + RegisterDeviceMemoryManager(DeviceAllocationType::kCUDA, DefaultCUDAMemoryMapper)); // TODO add the CUDA_HOST and CUDA_MANAGED allocation types when they are supported in // the CudaDevice return true; From 7a9e30dec1fac289a28c1f358f326cbc943b0204 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Tue, 26 Mar 2024 13:34:26 +0100 Subject: [PATCH 11/14] deprecate arrow::cuda::DefaultMemoryMapper --- cpp/src/arrow/gpu/cuda_memory.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cpp/src/arrow/gpu/cuda_memory.h b/cpp/src/arrow/gpu/cuda_memory.h index d323bef0349..488f4183730 100644 --- a/cpp/src/arrow/gpu/cuda_memory.h +++ b/cpp/src/arrow/gpu/cuda_memory.h @@ -260,7 +260,9 @@ Result GetDeviceAddress(const uint8_t* cpu_data, ARROW_EXPORT Result GetHostAddress(uintptr_t device_ptr); -ARROW_EXPORT +ARROW_DEPRECATED( + "Deprecated in 16.0.0. The CUDA device is registered by default, and you can use " + "arrow::DefaultDeviceMapper instead.") Result> DefaultMemoryMapper(ArrowDeviceType device_type, int64_t device_id); From a5a6f6cf00f78be380962647ad40a6be2a455479 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 27 Mar 2024 09:42:03 +0100 Subject: [PATCH 12/14] renaming --- cpp/src/arrow/buffer_test.cc | 10 +++++----- cpp/src/arrow/c/bridge.cc | 8 ++++---- cpp/src/arrow/c/bridge.h | 12 ++++++------ cpp/src/arrow/device.cc | 30 +++++++++++++++--------------- cpp/src/arrow/device.h | 7 +++---- cpp/src/arrow/gpu/cuda_memory.cc | 5 ++--- 6 files changed, 35 insertions(+), 37 deletions(-) diff --git a/cpp/src/arrow/buffer_test.cc b/cpp/src/arrow/buffer_test.cc index 9b71aea1add..06ed0bfba04 100644 --- a/cpp/src/arrow/buffer_test.cc +++ b/cpp/src/arrow/buffer_test.cc @@ -1027,13 +1027,13 @@ TEST(TestDeviceRegistry, Basics) { // Test the error cases for the device registry // CPU is already registered - ASSERT_RAISES(KeyError, RegisterDeviceMemoryManager( - DeviceAllocationType::kCPU, [](int64_t device_id) { - return default_cpu_memory_manager(); - })); + ASSERT_RAISES(KeyError, + RegisterDeviceMapper(DeviceAllocationType::kCPU, [](int64_t device_id) { + return default_cpu_memory_manager(); + })); // VPI is not registered - ASSERT_RAISES(KeyError, GetDeviceMemoryManager(DeviceAllocationType::kVPI)); + ASSERT_RAISES(KeyError, GetDeviceMapper(DeviceAllocationType::kVPI)); } } // namespace arrow diff --git a/cpp/src/arrow/c/bridge.cc b/cpp/src/arrow/c/bridge.cc index e825f34e712..d004de7a2ea 100644 --- a/cpp/src/arrow/c/bridge.cc +++ b/cpp/src/arrow/c/bridge.cc @@ -1967,10 +1967,10 @@ Result> ImportRecordBatch(struct ArrowArray* array, return ImportRecordBatch(array, *maybe_schema); } -Result> DefaultDeviceMapper(ArrowDeviceType device_type, - int64_t device_id) { - ARROW_ASSIGN_OR_RAISE(auto mapper, GetDeviceMemoryManager( - static_cast(device_type))); +Result> DefaultDeviceMemoryMapper( + ArrowDeviceType device_type, int64_t device_id) { + ARROW_ASSIGN_OR_RAISE(auto mapper, + GetDeviceMapper(static_cast(device_type))); return mapper(device_id); } diff --git a/cpp/src/arrow/c/bridge.h b/cpp/src/arrow/c/bridge.h index 0ced3d38cd1..74a302be4c2 100644 --- a/cpp/src/arrow/c/bridge.h +++ b/cpp/src/arrow/c/bridge.h @@ -219,8 +219,8 @@ using DeviceMemoryMapper = std::function>(ArrowDeviceType, int64_t)>; ARROW_EXPORT -Result> DefaultDeviceMapper(ArrowDeviceType device_type, - int64_t device_id); +Result> DefaultDeviceMemoryMapper( + ArrowDeviceType device_type, int64_t device_id); /// \brief EXPERIMENTAL: Import C++ device array from the C data interface. /// @@ -236,7 +236,7 @@ Result> DefaultDeviceMapper(ArrowDeviceType devic ARROW_EXPORT Result> ImportDeviceArray( struct ArrowDeviceArray* array, std::shared_ptr type, - const DeviceMemoryMapper& mapper = DefaultDeviceMapper); + const DeviceMemoryMapper& mapper = DefaultDeviceMemoryMapper); /// \brief EXPERIMENTAL: Import C++ device array and its type from the C data interface. /// @@ -253,7 +253,7 @@ Result> ImportDeviceArray( ARROW_EXPORT Result> ImportDeviceArray( struct ArrowDeviceArray* array, struct ArrowSchema* type, - const DeviceMemoryMapper& mapper = DefaultDeviceMapper); + const DeviceMemoryMapper& mapper = DefaultDeviceMemoryMapper); /// \brief EXPERIMENTAL: Import C++ record batch with buffers on a device from the C data /// interface. @@ -271,7 +271,7 @@ Result> ImportDeviceArray( ARROW_EXPORT Result> ImportDeviceRecordBatch( struct ArrowDeviceArray* array, std::shared_ptr schema, - const DeviceMemoryMapper& mapper = DefaultDeviceMapper); + const DeviceMemoryMapper& mapper = DefaultDeviceMemoryMapper); /// \brief EXPERIMENTAL: Import C++ record batch with buffers on a device and its schema /// from the C data interface. @@ -291,7 +291,7 @@ Result> ImportDeviceRecordBatch( ARROW_EXPORT Result> ImportDeviceRecordBatch( struct ArrowDeviceArray* array, struct ArrowSchema* schema, - const DeviceMemoryMapper& mapper = DefaultDeviceMapper); + const DeviceMemoryMapper& mapper = DefaultDeviceMemoryMapper); /// @} diff --git a/cpp/src/arrow/device.cc b/cpp/src/arrow/device.cc index 0e8420099a6..172136df2c9 100644 --- a/cpp/src/arrow/device.cc +++ b/cpp/src/arrow/device.cc @@ -272,11 +272,11 @@ std::shared_ptr CPUDevice::default_memory_manager() { namespace { -class DeviceMemoryManagerRegistryImpl { +class DeviceMapperRegistryImpl { public: - DeviceMemoryManagerRegistryImpl() {} + DeviceMapperRegistryImpl() {} - Status RegisterDevice(DeviceAllocationType device_type, MemoryMapper memory_mapper) { + Status RegisterDevice(DeviceAllocationType device_type, DeviceMapper memory_mapper) { std::lock_guard lock(lock_); auto [_, inserted] = registry_.try_emplace(device_type, std::move(memory_mapper)); if (!inserted) { @@ -286,7 +286,7 @@ class DeviceMemoryManagerRegistryImpl { return Status::OK(); } - Result GetMemoryManager(DeviceAllocationType device_type) { + Result GetMapper(DeviceAllocationType device_type) { std::lock_guard lock(lock_); auto it = registry_.find(device_type); if (it == registry_.end()) { @@ -298,38 +298,38 @@ class DeviceMemoryManagerRegistryImpl { private: std::mutex lock_; - std::unordered_map registry_; + std::unordered_map registry_; }; -Result> DefaultCPUMemoryMapper(int64_t device_id) { +Result> DefaultCPUDeviceMapper(int64_t device_id) { return default_cpu_memory_manager(); } -static std::unique_ptr CreateDeviceRegistry() { - auto registry = std::make_unique(); +static std::unique_ptr CreateDeviceRegistry() { + auto registry = std::make_unique(); // Always register the CPU device - DCHECK_OK(registry->RegisterDevice(DeviceAllocationType::kCPU, DefaultCPUMemoryMapper)); + DCHECK_OK(registry->RegisterDevice(DeviceAllocationType::kCPU, DefaultCPUDeviceMapper)); return registry; } -DeviceMemoryManagerRegistryImpl* GetDeviceRegistry() { +DeviceMapperRegistryImpl* GetDeviceRegistry() { static auto g_registry = CreateDeviceRegistry(); return g_registry.get(); } } // namespace -Status RegisterDeviceMemoryManager(DeviceAllocationType device_type, - MemoryMapper memory_mapper) { +Status RegisterDeviceMapper(DeviceAllocationType device_type, + DeviceMapper mapper) { auto registry = GetDeviceRegistry(); - return registry->RegisterDevice(device_type, std::move(memory_mapper)); + return registry->RegisterDevice(device_type, std::move(mapper)); } -Result GetDeviceMemoryManager(DeviceAllocationType device_type) { +Result GetDeviceMapper(DeviceAllocationType device_type) { auto registry = GetDeviceRegistry(); - return registry->GetMemoryManager(device_type); + return registry->GetMapper(device_type); } } // namespace arrow diff --git a/cpp/src/arrow/device.h b/cpp/src/arrow/device.h index f15e078c2af..b4d19c95129 100644 --- a/cpp/src/arrow/device.h +++ b/cpp/src/arrow/device.h @@ -363,7 +363,7 @@ class ARROW_EXPORT CPUMemoryManager : public MemoryManager { ARROW_EXPORT std::shared_ptr default_cpu_memory_manager(); -using MemoryMapper = +using DeviceMapper = std::function>(int64_t device_id)>; /// \brief Register a function to retrieve a MemoryManager for a Device type @@ -380,8 +380,7 @@ using MemoryMapper = /// MemoryManager for the registered device type and given device id /// \return Status ARROW_EXPORT -Status RegisterDeviceMemoryManager(DeviceAllocationType device_type, - MemoryMapper memory_mapper); +Status RegisterDeviceMapper(DeviceAllocationType device_type, DeviceMapper mapper); /// \brief Get the registered function to retrieve a MemoryManager for the /// given Device type @@ -390,6 +389,6 @@ Status RegisterDeviceMemoryManager(DeviceAllocationType device_type, /// \return function that takes a device id and returns the appropriate /// MemoryManager for the registered device type and given device id ARROW_EXPORT -Result GetDeviceMemoryManager(DeviceAllocationType device_type); +Result GetDeviceMapper(DeviceAllocationType device_type); } // namespace arrow diff --git a/cpp/src/arrow/gpu/cuda_memory.cc b/cpp/src/arrow/gpu/cuda_memory.cc index 7da7b2f5050..6972321006a 100644 --- a/cpp/src/arrow/gpu/cuda_memory.cc +++ b/cpp/src/arrow/gpu/cuda_memory.cc @@ -504,14 +504,13 @@ Result> DefaultMemoryMapper(ArrowDeviceType devic namespace { -Result> DefaultCUDAMemoryMapper(int64_t device_id) { +Result> DefaultCUDADeviceMapper(int64_t device_id) { ARROW_ASSIGN_OR_RAISE(auto device, arrow::cuda::CudaDevice::Make(device_id)); return device->default_memory_manager(); } bool RegisterCUDADeviceInternal() { - DCHECK_OK( - RegisterDeviceMemoryManager(DeviceAllocationType::kCUDA, DefaultCUDAMemoryMapper)); + DCHECK_OK(RegisterDeviceMapper(DeviceAllocationType::kCUDA, DefaultCUDADeviceMapper)); // TODO add the CUDA_HOST and CUDA_MANAGED allocation types when they are supported in // the CudaDevice return true; From 6805f02defc85728bf79e1958cbac40e7eabb8f4 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 27 Mar 2024 09:55:30 +0100 Subject: [PATCH 13/14] fix doc param --- cpp/src/arrow/device.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/arrow/device.h b/cpp/src/arrow/device.h index b4d19c95129..622551c6bd0 100644 --- a/cpp/src/arrow/device.h +++ b/cpp/src/arrow/device.h @@ -376,7 +376,7 @@ using DeviceMapper = /// arrow::ImportDeviceArray/ImportDeviceRecordBatch). /// /// \param[in] device_type the device type for which to register a MemoryManager -/// \param[in] memory_mapper function that takes a device id and returns the appropriate +/// \param[in] mapper function that takes a device id and returns the appropriate /// MemoryManager for the registered device type and given device id /// \return Status ARROW_EXPORT From a73f4a542601ac89a910a3bf5153c260f92bc243 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 27 Mar 2024 10:05:42 +0100 Subject: [PATCH 14/14] lint --- cpp/src/arrow/device.cc | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/cpp/src/arrow/device.cc b/cpp/src/arrow/device.cc index 172136df2c9..98b8f7b3039 100644 --- a/cpp/src/arrow/device.cc +++ b/cpp/src/arrow/device.cc @@ -321,8 +321,7 @@ DeviceMapperRegistryImpl* GetDeviceRegistry() { } // namespace -Status RegisterDeviceMapper(DeviceAllocationType device_type, - DeviceMapper mapper) { +Status RegisterDeviceMapper(DeviceAllocationType device_type, DeviceMapper mapper) { auto registry = GetDeviceRegistry(); return registry->RegisterDevice(device_type, std::move(mapper)); }