diff --git a/cpp/cuopt_cli.cpp b/cpp/cuopt_cli.cpp index ac568e07cf..0847a2fd11 100644 --- a/cpp/cuopt_cli.cpp +++ b/cpp/cuopt_cli.cpp @@ -415,15 +415,16 @@ int main(int argc, char* argv[]) // Only initialize CUDA resources if using GPU memory backend (not remote execution) auto memory_backend = cuopt::linear_programming::get_memory_backend_type(); - std::vector> memory_resources; + std::vector memory_resources; if (memory_backend == cuopt::linear_programming::memory_backend_t::GPU) { const int num_gpus = settings.get_parameter(CUOPT_NUM_GPUS); + memory_resources.reserve(std::min(raft::device_setter::get_device_count(), num_gpus)); for (int i = 0; i < std::min(raft::device_setter::get_device_count(), num_gpus); ++i) { RAFT_CUDA_TRY(cudaSetDevice(i)); - memory_resources.push_back(make_async()); - rmm::mr::set_per_device_resource(rmm::cuda_device_id{i}, memory_resources.back().get()); + memory_resources.emplace_back(); + rmm::mr::set_per_device_resource_ref(rmm::cuda_device_id{i}, memory_resources.back()); } RAFT_CUDA_TRY(cudaSetDevice(0)); } diff --git a/cpp/src/barrier/sparse_cholesky.cuh b/cpp/src/barrier/sparse_cholesky.cuh index f7938fb989..52fea89502 100644 --- a/cpp/src/barrier/sparse_cholesky.cuh +++ b/cpp/src/barrier/sparse_cholesky.cuh @@ -247,8 +247,8 @@ class sparse_cholesky_cudss_t : public sparse_cholesky_base_t { CUDSS_CALL_AND_CHECK_EXIT(cudssSetStream(handle, stream), status, "cudaStreamCreate"); mem_handler.ctx = reinterpret_cast(handle_ptr_->get_workspace_resource()); - mem_handler.device_alloc = cudss_device_alloc; - mem_handler.device_free = cudss_device_dealloc; + mem_handler.device_alloc = cudss_device_alloc; + mem_handler.device_free = cudss_device_dealloc; CUDSS_CALL_AND_CHECK_EXIT( cudssSetDeviceMemHandler(handle, &mem_handler), status, "cudssSetDeviceMemHandler"); diff --git a/cpp/src/pdlp/termination_strategy/infeasibility_information.cu b/cpp/src/pdlp/termination_strategy/infeasibility_information.cu index 37972ba442..f795d2c4ca 100644 --- a/cpp/src/pdlp/termination_strategy/infeasibility_information.cu +++ b/cpp/src/pdlp/termination_strategy/infeasibility_information.cu @@ -15,6 +15,8 @@ #include +#include + #include #include #include diff --git a/cpp/src/routing/ges_solver.cu b/cpp/src/routing/ges_solver.cu index 194f73b99e..a660f84909 100644 --- a/cpp/src/routing/ges_solver.cu +++ b/cpp/src/routing/ges_solver.cu @@ -16,8 +16,6 @@ #include "adapters/assignment_adapter.cuh" #include "ges/guided_ejection_search.cuh" -#include - namespace cuopt { namespace routing { diff --git a/cpp/src/utilities/cuda_helpers.cuh b/cpp/src/utilities/cuda_helpers.cuh index 84c0a590be..2ed4c5a39b 100644 --- a/cpp/src/utilities/cuda_helpers.cuh +++ b/cpp/src/utilities/cuda_helpers.cuh @@ -16,8 +16,6 @@ #include #include #include -#include -#include #include #include @@ -242,25 +240,9 @@ DI void sorted_insert(T* array, T item, int curr_size, int max_size) inline size_t get_device_memory_size() { - // Otherwise, we need to get the free memory from the device size_t free_mem, total_mem; cudaMemGetInfo(&free_mem, &total_mem); - - auto res = rmm::mr::get_current_device_resource(); - auto limiting_adaptor = - dynamic_cast*>(res); - // Did we specifiy an explicit memory limit? - if (limiting_adaptor) { - printf("limiting_adaptor->get_allocation_limit(): %fMiB\n", - limiting_adaptor->get_allocation_limit() / (double)1e6); - printf("used_mem: %fMiB\n", limiting_adaptor->get_allocated_bytes() / (double)1e6); - printf("free_mem: %fMiB\n", - (limiting_adaptor->get_allocation_limit() - limiting_adaptor->get_allocated_bytes()) / - (double)1e6); - return std::min(total_mem, limiting_adaptor->get_allocation_limit()); - } else { - return total_mem; - } + return total_mem; } } // namespace cuopt diff --git a/cpp/tests/mip/load_balancing_test.cu b/cpp/tests/mip/load_balancing_test.cu index 1f825a26f7..909db285d8 100644 --- a/cpp/tests/mip/load_balancing_test.cu +++ b/cpp/tests/mip/load_balancing_test.cu @@ -32,7 +32,7 @@ namespace cuopt::linear_programming::test { -inline auto make_async() { return std::make_shared(); } +inline auto make_async() { return rmm::mr::cuda_async_memory_resource(); } void init_handler(const raft::handle_t* handle_ptr) { @@ -119,7 +119,7 @@ bounds_probe_results(detail::bound_presolve_t& bnd_prb_0, void test_multi_probe(std::string path) { auto memory_resource = make_async(); - rmm::mr::set_current_device_resource(memory_resource.get()); + rmm::mr::set_current_device_resource_ref(memory_resource); const raft::handle_t handle_{}; cuopt::mps_parser::mps_data_model_t mps_problem = cuopt::mps_parser::parse_mps(path, false); diff --git a/cpp/tests/mip/multi_probe_test.cu b/cpp/tests/mip/multi_probe_test.cu index 003220de9b..f32872b597 100644 --- a/cpp/tests/mip/multi_probe_test.cu +++ b/cpp/tests/mip/multi_probe_test.cu @@ -31,7 +31,7 @@ namespace cuopt::linear_programming::test { -inline auto make_async() { return std::make_shared(); } +inline auto make_async() { return rmm::mr::cuda_async_memory_resource(); } void init_handler(const raft::handle_t* handle_ptr) { @@ -141,7 +141,7 @@ multi_probe_results( void test_multi_probe(std::string path) { auto memory_resource = make_async(); - rmm::mr::set_current_device_resource(memory_resource.get()); + rmm::mr::set_current_device_resource_ref(memory_resource); const raft::handle_t handle_{}; cuopt::mps_parser::mps_data_model_t mps_problem = cuopt::mps_parser::parse_mps(path, false); diff --git a/cpp/tests/utilities/base_fixture.hpp b/cpp/tests/utilities/base_fixture.hpp index abc69627df..c4cd0e2575 100644 --- a/cpp/tests/utilities/base_fixture.hpp +++ b/cpp/tests/utilities/base_fixture.hpp @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2021-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2021-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -13,11 +13,12 @@ #include +#include + #include #include #include #include -#include #include #include @@ -25,18 +26,17 @@ namespace cuopt { namespace test { /// MR factory functions -inline auto make_cuda() { return std::make_shared(); } +inline auto make_cuda() { return rmm::mr::cuda_memory_resource(); } -inline auto make_async() { return std::make_shared(); } +inline auto make_async() { return rmm::mr::cuda_async_memory_resource(); } -inline auto make_managed() { return std::make_shared(); } +inline auto make_managed() { return rmm::mr::managed_memory_resource(); } inline auto make_pool() { // 1GB of initial pool size const size_t initial_pool_size = 1024 * 1024 * 1024; - return rmm::mr::make_owning_wrapper(make_async(), - initial_pool_size); + return rmm::mr::pool_memory_resource(make_async(), initial_pool_size); } inline auto make_binning() @@ -44,8 +44,7 @@ inline auto make_binning() auto pool = make_pool(); // Add a fixed_size_memory_resource for bins of size 256, 512, 1024, 2048 and // 4096KiB Larger allocations will use the pool resource - auto mr = rmm::mr::make_owning_wrapper(pool, 18, 22); - return mr; + return rmm::mr::binning_memory_resource(pool, 18, 22); } /** @@ -62,7 +61,7 @@ inline auto make_binning() * Accepted types are "pool", "cuda", and "managed" only. * @return Memory resource instance */ -inline std::shared_ptr create_memory_resource( +inline cuda::mr::any_resource create_memory_resource( std::string const& allocation_mode) { if (allocation_mode == "binning") return make_binning(); @@ -120,6 +119,6 @@ inline auto parse_test_options(int argc, char** argv) auto const cmd_opts = parse_test_options(argc, argv); \ auto const rmm_mode = cmd_opts["rmm_mode"].as(); \ auto resource = cuopt::test::create_memory_resource(rmm_mode); \ - rmm::mr::set_current_device_resource(resource.get()); \ + rmm::mr::set_current_device_resource_ref(resource); \ return RUN_ALL_TESTS(); \ }