NVIDIA · bdice · Apr 3, 2026 · Apr 15, 2026 · Apr 15, 2026
@@ -415,15 +415,16 @@ int main(int argc, char* argv[])
 
   // Only initialize CUDA resources if using GPU memory backend (not remote execution)
   auto memory_backend = cuopt::linear_programming::get_memory_backend_type();
-  std::vector<std::shared_ptr<rmm::mr::device_memory_resource>> memory_resources;
+  std::vector<rmm::mr::cuda_async_memory_resource> memory_resources;
 
   if (memory_backend == cuopt::linear_programming::memory_backend_t::GPU) {
     const int num_gpus = settings.get_parameter<int>(CUOPT_NUM_GPUS);
 
+    memory_resources.reserve(std::min(raft::device_setter::get_device_count(), num_gpus));
     for (int i = 0; i < std::min(raft::device_setter::get_device_count(), num_gpus); ++i) {
       RAFT_CUDA_TRY(cudaSetDevice(i));
-      memory_resources.push_back(make_async());
-      rmm::mr::set_per_device_resource(rmm::cuda_device_id{i}, memory_resources.back().get());
+      memory_resources.emplace_back();
+      rmm::mr::set_per_device_resource_ref(rmm::cuda_device_id{i}, memory_resources.back());
     }
     RAFT_CUDA_TRY(cudaSetDevice(0));
   }

@@ -247,8 +247,8 @@ class sparse_cholesky_cudss_t : public sparse_cholesky_base_t<i_t, f_t> {
     CUDSS_CALL_AND_CHECK_EXIT(cudssSetStream(handle, stream), status, "cudaStreamCreate");
 
     mem_handler.ctx          = reinterpret_cast<void*>(handle_ptr_->get_workspace_resource());
-    mem_handler.device_alloc = cudss_device_alloc<rmm::mr::device_memory_resource>;
-    mem_handler.device_free  = cudss_device_dealloc<rmm::mr::device_memory_resource>;
+    mem_handler.device_alloc = cudss_device_alloc<void>;
+    mem_handler.device_free  = cudss_device_dealloc<void>;
 
     CUDSS_CALL_AND_CHECK_EXIT(
       cudssSetDeviceMemHandler(handle, &mem_handler), status, "cudssSetDeviceMemHandler");

@@ -15,6 +15,8 @@
 
 #include <mip_heuristics/mip_constants.hpp>
 
+#include <thrust/iterator/transform_output_iterator.h>
+
 #include <raft/sparse/detail/cusparse_wrappers.h>
 #include <raft/core/nvtx.hpp>
 #include <raft/linalg/detail/cublas_wrappers.hpp>

@@ -16,8 +16,6 @@
 #include "adapters/assignment_adapter.cuh"
 #include "ges/guided_ejection_search.cuh"
 
-#include <rmm/mr/device_memory_resource.hpp>
-
 namespace cuopt {
 namespace routing {
 

@@ -16,8 +16,6 @@
 #include <raft/util/cuda_utils.cuh>
 #include <raft/util/cudart_utils.hpp>
 #include <rmm/device_uvector.hpp>
-#include <rmm/mr/cuda_async_memory_resource.hpp>
-#include <rmm/mr/limiting_resource_adaptor.hpp>
 #include <shared_mutex>
 #include <unordered_map>
 
@@ -242,25 +240,9 @@ DI void sorted_insert(T* array, T item, int curr_size, int max_size)
 
 inline size_t get_device_memory_size()
 {
-  // Otherwise, we need to get the free memory from the device
   size_t free_mem, total_mem;
   cudaMemGetInfo(&free_mem, &total_mem);
-
-  auto res = rmm::mr::get_current_device_resource();
-  auto limiting_adaptor =
-    dynamic_cast<rmm::mr::limiting_resource_adaptor<rmm::mr::cuda_async_memory_resource>*>(res);
-  // Did we specifiy an explicit memory limit?
-  if (limiting_adaptor) {
-    printf("limiting_adaptor->get_allocation_limit(): %fMiB\n",
-           limiting_adaptor->get_allocation_limit() / (double)1e6);
-    printf("used_mem: %fMiB\n", limiting_adaptor->get_allocated_bytes() / (double)1e6);
-    printf("free_mem: %fMiB\n",
-           (limiting_adaptor->get_allocation_limit() - limiting_adaptor->get_allocated_bytes()) /
-             (double)1e6);
-    return std::min(total_mem, limiting_adaptor->get_allocation_limit());
-  } else {
-    return total_mem;
-  }
+  return total_mem;
 }
 
 }  // namespace cuopt
@@ -32,7 +32,7 @@
 
 namespace cuopt::linear_programming::test {
 
-inline auto make_async() { return std::make_shared<rmm::mr::cuda_async_memory_resource>(); }
+inline auto make_async() { return rmm::mr::cuda_async_memory_resource(); }
 
 void init_handler(const raft::handle_t* handle_ptr)
 {
@@ -119,7 +119,7 @@ bounds_probe_results(detail::bound_presolve_t<int, double>& bnd_prb_0,
 void test_multi_probe(std::string path)
 {
   auto memory_resource = make_async();
-  rmm::mr::set_current_device_resource(memory_resource.get());
+  rmm::mr::set_current_device_resource_ref(memory_resource);
   const raft::handle_t handle_{};
   cuopt::mps_parser::mps_data_model_t<int, double> mps_problem =
     cuopt::mps_parser::parse_mps<int, double>(path, false);

@@ -31,7 +31,7 @@
 
 namespace cuopt::linear_programming::test {
 
-inline auto make_async() { return std::make_shared<rmm::mr::cuda_async_memory_resource>(); }
+inline auto make_async() { return rmm::mr::cuda_async_memory_resource(); }
 
 void init_handler(const raft::handle_t* handle_ptr)
 {
@@ -141,7 +141,7 @@ multi_probe_results(
 void test_multi_probe(std::string path)
 {
   auto memory_resource = make_async();
-  rmm::mr::set_current_device_resource(memory_resource.get());
+  rmm::mr::set_current_device_resource_ref(memory_resource);
   const raft::handle_t handle_{};
   cuopt::mps_parser::mps_data_model_t<int, double> mps_problem =
     cuopt::mps_parser::parse_mps<int, double>(path, false);

@@ -1,6 +1,6 @@
 /* clang-format off */
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2021-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2021-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  */
 /* clang-format on */
@@ -13,39 +13,38 @@
 
 #include <gtest/gtest.h>
 
+#include <cuda/memory_resource>
+
 #include <rmm/mr/binning_memory_resource.hpp>
 #include <rmm/mr/cuda_async_memory_resource.hpp>
 #include <rmm/mr/cuda_memory_resource.hpp>
 #include <rmm/mr/managed_memory_resource.hpp>
-#include <rmm/mr/owning_wrapper.hpp>
 #include <rmm/mr/per_device_resource.hpp>
 #include <rmm/mr/pool_memory_resource.hpp>
 
 namespace cuopt {
 namespace test {
 
 /// MR factory functions
-inline auto make_cuda() { return std::make_shared<rmm::mr::cuda_memory_resource>(); }
+inline auto make_cuda() { return rmm::mr::cuda_memory_resource(); }
 
-inline auto make_async() { return std::make_shared<rmm::mr::cuda_async_memory_resource>(); }
+inline auto make_async() { return rmm::mr::cuda_async_memory_resource(); }
 
-inline auto make_managed() { return std::make_shared<rmm::mr::managed_memory_resource>(); }
+inline auto make_managed() { return rmm::mr::managed_memory_resource(); }
 
 inline auto make_pool()
 {
   // 1GB of initial pool size
   const size_t initial_pool_size = 1024 * 1024 * 1024;
-  return rmm::mr::make_owning_wrapper<rmm::mr::pool_memory_resource>(make_async(),
-                                                                     initial_pool_size);
+  return rmm::mr::pool_memory_resource(make_async(), initial_pool_size);
 }
 
 inline auto make_binning()
 {
   auto pool = make_pool();
   // Add a fixed_size_memory_resource for bins of size 256, 512, 1024, 2048 and
   // 4096KiB Larger allocations will use the pool resource
-  auto mr = rmm::mr::make_owning_wrapper<rmm::mr::binning_memory_resource>(pool, 18, 22);
-  return mr;
+  return rmm::mr::binning_memory_resource(pool, 18, 22);
 }
 
 /**
@@ -62,7 +61,7 @@ inline auto make_binning()
  *        Accepted types are "pool", "cuda", and "managed" only.
  * @return Memory resource instance
  */
-inline std::shared_ptr<rmm::mr::device_memory_resource> create_memory_resource(
+inline cuda::mr::any_resource<cuda::mr::device_accessible> create_memory_resource(
   std::string const& allocation_mode)
 {
   if (allocation_mode == "binning") return make_binning();
@@ -120,6 +119,6 @@ inline auto parse_test_options(int argc, char** argv)
     auto const cmd_opts = parse_test_options(argc, argv);                \
     auto const rmm_mode = cmd_opts["rmm_mode"].as<std::string>();        \
     auto resource       = cuopt::test::create_memory_resource(rmm_mode); \
-    rmm::mr::set_current_device_resource(resource.get());                \
+    rmm::mr::set_current_device_resource_ref(resource);                  \
     return RUN_ALL_TESTS();                                              \
   }