From 2970a07c90f448f6c28bb8df5fa11f321c5d8395 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Fri, 3 Apr 2026 13:51:52 -0500 Subject: [PATCH 1/2] Migrate RMM usage to CCCL MR design Remove dependency on rmm::mr::device_memory_resource base class. Resources now satisfy the cuda::mr::resource concept directly. - Replace shared_ptr with value types and cuda::mr::any_resource for type-erased storage - Replace set_current_device_resource(ptr) with set_current_device_resource_ref - Replace set_per_device_resource(id, ptr) with set_per_device_resource_ref - Remove make_owning_wrapper usage - Remove dynamic_cast on memory resources (no common base class) - Remove owning_wrapper.hpp and device_memory_resource.hpp includes - Add missing thrust/iterator/transform_output_iterator.h include (no longer transitively included via CCCL) --- cpp/cuopt_cli.cpp | 7 +++--- cpp/src/barrier/sparse_cholesky.cuh | 4 ++-- .../infeasibility_information.cu | 2 ++ cpp/src/routing/ges_solver.cu | 2 -- cpp/src/utilities/cuda_helpers.cuh | 20 +---------------- cpp/tests/mip/load_balancing_test.cu | 4 ++-- cpp/tests/mip/multi_probe_test.cu | 4 ++-- cpp/tests/utilities/base_fixture.hpp | 22 +++++++++---------- 8 files changed, 24 insertions(+), 41 deletions(-) diff --git a/cpp/cuopt_cli.cpp b/cpp/cuopt_cli.cpp index 899a3118b3..2cb18cc1c5 100644 --- a/cpp/cuopt_cli.cpp +++ b/cpp/cuopt_cli.cpp @@ -376,7 +376,7 @@ int main(int argc, char* argv[]) // Only initialize CUDA resources if using GPU memory backend (not remote execution) auto memory_backend = cuopt::linear_programming::get_memory_backend_type(); - std::vector> memory_resources; + std::vector memory_resources; if (memory_backend == cuopt::linear_programming::memory_backend_t::GPU) { // All arguments are parsed as string, default values are parsed as int if unused. @@ -384,10 +384,11 @@ int main(int argc, char* argv[]) ? std::stoi(program.get("--num-gpus")) : program.get("--num-gpus"); + memory_resources.reserve(std::min(raft::device_setter::get_device_count(), num_gpus)); for (int i = 0; i < std::min(raft::device_setter::get_device_count(), num_gpus); ++i) { RAFT_CUDA_TRY(cudaSetDevice(i)); - memory_resources.push_back(make_async()); - rmm::mr::set_per_device_resource(rmm::cuda_device_id{i}, memory_resources.back().get()); + memory_resources.emplace_back(); + rmm::mr::set_per_device_resource_ref(rmm::cuda_device_id{i}, memory_resources.back()); } RAFT_CUDA_TRY(cudaSetDevice(0)); } diff --git a/cpp/src/barrier/sparse_cholesky.cuh b/cpp/src/barrier/sparse_cholesky.cuh index f7938fb989..52fea89502 100644 --- a/cpp/src/barrier/sparse_cholesky.cuh +++ b/cpp/src/barrier/sparse_cholesky.cuh @@ -247,8 +247,8 @@ class sparse_cholesky_cudss_t : public sparse_cholesky_base_t { CUDSS_CALL_AND_CHECK_EXIT(cudssSetStream(handle, stream), status, "cudaStreamCreate"); mem_handler.ctx = reinterpret_cast(handle_ptr_->get_workspace_resource()); - mem_handler.device_alloc = cudss_device_alloc; - mem_handler.device_free = cudss_device_dealloc; + mem_handler.device_alloc = cudss_device_alloc; + mem_handler.device_free = cudss_device_dealloc; CUDSS_CALL_AND_CHECK_EXIT( cudssSetDeviceMemHandler(handle, &mem_handler), status, "cudssSetDeviceMemHandler"); diff --git a/cpp/src/pdlp/termination_strategy/infeasibility_information.cu b/cpp/src/pdlp/termination_strategy/infeasibility_information.cu index dbb35b732d..51b702eae1 100644 --- a/cpp/src/pdlp/termination_strategy/infeasibility_information.cu +++ b/cpp/src/pdlp/termination_strategy/infeasibility_information.cu @@ -15,6 +15,8 @@ #include +#include + #include #include #include diff --git a/cpp/src/routing/ges_solver.cu b/cpp/src/routing/ges_solver.cu index 194f73b99e..a660f84909 100644 --- a/cpp/src/routing/ges_solver.cu +++ b/cpp/src/routing/ges_solver.cu @@ -16,8 +16,6 @@ #include "adapters/assignment_adapter.cuh" #include "ges/guided_ejection_search.cuh" -#include - namespace cuopt { namespace routing { diff --git a/cpp/src/utilities/cuda_helpers.cuh b/cpp/src/utilities/cuda_helpers.cuh index 946099648d..80e7b730db 100644 --- a/cpp/src/utilities/cuda_helpers.cuh +++ b/cpp/src/utilities/cuda_helpers.cuh @@ -16,8 +16,6 @@ #include #include #include -#include -#include #include namespace cuopt { @@ -216,25 +214,9 @@ DI void sorted_insert(T* array, T item, int curr_size, int max_size) inline size_t get_device_memory_size() { - // Otherwise, we need to get the free memory from the device size_t free_mem, total_mem; cudaMemGetInfo(&free_mem, &total_mem); - - auto res = rmm::mr::get_current_device_resource(); - auto limiting_adaptor = - dynamic_cast*>(res); - // Did we specifiy an explicit memory limit? - if (limiting_adaptor) { - printf("limiting_adaptor->get_allocation_limit(): %fMiB\n", - limiting_adaptor->get_allocation_limit() / (double)1e6); - printf("used_mem: %fMiB\n", limiting_adaptor->get_allocated_bytes() / (double)1e6); - printf("free_mem: %fMiB\n", - (limiting_adaptor->get_allocation_limit() - limiting_adaptor->get_allocated_bytes()) / - (double)1e6); - return std::min(total_mem, limiting_adaptor->get_allocation_limit()); - } else { - return total_mem; - } + return total_mem; } } // namespace cuopt diff --git a/cpp/tests/mip/load_balancing_test.cu b/cpp/tests/mip/load_balancing_test.cu index 5e2f08007d..991a3072c3 100644 --- a/cpp/tests/mip/load_balancing_test.cu +++ b/cpp/tests/mip/load_balancing_test.cu @@ -32,7 +32,7 @@ namespace cuopt::linear_programming::test { -inline auto make_async() { return std::make_shared(); } +inline auto make_async() { return rmm::mr::cuda_async_memory_resource(); } void init_handler(const raft::handle_t* handle_ptr) { @@ -119,7 +119,7 @@ bounds_probe_results(detail::bound_presolve_t& bnd_prb_0, void test_multi_probe(std::string path) { auto memory_resource = make_async(); - rmm::mr::set_current_device_resource(memory_resource.get()); + rmm::mr::set_current_device_resource_ref(memory_resource); const raft::handle_t handle_{}; cuopt::mps_parser::mps_data_model_t mps_problem = cuopt::mps_parser::parse_mps(path, false); diff --git a/cpp/tests/mip/multi_probe_test.cu b/cpp/tests/mip/multi_probe_test.cu index 073c153486..2910cb4a44 100644 --- a/cpp/tests/mip/multi_probe_test.cu +++ b/cpp/tests/mip/multi_probe_test.cu @@ -31,7 +31,7 @@ namespace cuopt::linear_programming::test { -inline auto make_async() { return std::make_shared(); } +inline auto make_async() { return rmm::mr::cuda_async_memory_resource(); } void init_handler(const raft::handle_t* handle_ptr) { @@ -141,7 +141,7 @@ multi_probe_results( void test_multi_probe(std::string path) { auto memory_resource = make_async(); - rmm::mr::set_current_device_resource(memory_resource.get()); + rmm::mr::set_current_device_resource_ref(memory_resource); const raft::handle_t handle_{}; cuopt::mps_parser::mps_data_model_t mps_problem = cuopt::mps_parser::parse_mps(path, false); diff --git a/cpp/tests/utilities/base_fixture.hpp b/cpp/tests/utilities/base_fixture.hpp index abc69627df..c9c15ae04d 100644 --- a/cpp/tests/utilities/base_fixture.hpp +++ b/cpp/tests/utilities/base_fixture.hpp @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2021-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2021-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -13,11 +13,12 @@ #include +#include + #include #include #include #include -#include #include #include @@ -25,18 +26,18 @@ namespace cuopt { namespace test { /// MR factory functions -inline auto make_cuda() { return std::make_shared(); } +inline auto make_cuda() { return rmm::mr::cuda_memory_resource(); } -inline auto make_async() { return std::make_shared(); } +inline auto make_async() { return rmm::mr::cuda_async_memory_resource(); } -inline auto make_managed() { return std::make_shared(); } +inline auto make_managed() { return rmm::mr::managed_memory_resource(); } inline auto make_pool() { // 1GB of initial pool size const size_t initial_pool_size = 1024 * 1024 * 1024; - return rmm::mr::make_owning_wrapper(make_async(), - initial_pool_size); + auto upstream = make_async(); + return rmm::mr::pool_memory_resource(upstream, initial_pool_size); } inline auto make_binning() @@ -44,8 +45,7 @@ inline auto make_binning() auto pool = make_pool(); // Add a fixed_size_memory_resource for bins of size 256, 512, 1024, 2048 and // 4096KiB Larger allocations will use the pool resource - auto mr = rmm::mr::make_owning_wrapper(pool, 18, 22); - return mr; + return rmm::mr::binning_memory_resource(pool, 18, 22); } /** @@ -62,7 +62,7 @@ inline auto make_binning() * Accepted types are "pool", "cuda", and "managed" only. * @return Memory resource instance */ -inline std::shared_ptr create_memory_resource( +inline cuda::mr::any_resource create_memory_resource( std::string const& allocation_mode) { if (allocation_mode == "binning") return make_binning(); @@ -120,6 +120,6 @@ inline auto parse_test_options(int argc, char** argv) auto const cmd_opts = parse_test_options(argc, argv); \ auto const rmm_mode = cmd_opts["rmm_mode"].as(); \ auto resource = cuopt::test::create_memory_resource(rmm_mode); \ - rmm::mr::set_current_device_resource(resource.get()); \ + rmm::mr::set_current_device_resource_ref(resource); \ return RUN_ALL_TESTS(); \ } From 31a6eabcdb1a11fd8fef36af50146af5606d2b5e Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Wed, 15 Apr 2026 11:42:09 -0500 Subject: [PATCH 2/2] Inline upstream memory resource variable in test fixture MR composition --- cpp/tests/utilities/base_fixture.hpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/cpp/tests/utilities/base_fixture.hpp b/cpp/tests/utilities/base_fixture.hpp index c9c15ae04d..c4cd0e2575 100644 --- a/cpp/tests/utilities/base_fixture.hpp +++ b/cpp/tests/utilities/base_fixture.hpp @@ -36,8 +36,7 @@ inline auto make_pool() { // 1GB of initial pool size const size_t initial_pool_size = 1024 * 1024 * 1024; - auto upstream = make_async(); - return rmm::mr::pool_memory_resource(upstream, initial_pool_size); + return rmm::mr::pool_memory_resource(make_async(), initial_pool_size); } inline auto make_binning()