From 2970a07c90f448f6c28bb8df5fa11f321c5d8395 Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Fri, 3 Apr 2026 13:51:52 -0500
Subject: [PATCH 1/2] Migrate RMM usage to CCCL MR design

Remove dependency on rmm::mr::device_memory_resource base class. Resources
now satisfy the cuda::mr::resource concept directly.

- Replace shared_ptr<device_memory_resource> with value types and
  cuda::mr::any_resource<cuda::mr::device_accessible> for type-erased storage
- Replace set_current_device_resource(ptr) with set_current_device_resource_ref
- Replace set_per_device_resource(id, ptr) with set_per_device_resource_ref
- Remove make_owning_wrapper usage
- Remove dynamic_cast on memory resources (no common base class)
- Remove owning_wrapper.hpp and device_memory_resource.hpp includes
- Add missing thrust/iterator/transform_output_iterator.h include
  (no longer transitively included via CCCL)
---
 cpp/cuopt_cli.cpp                             |  7 +++---
 cpp/src/barrier/sparse_cholesky.cuh           |  4 ++--
 .../infeasibility_information.cu              |  2 ++
 cpp/src/routing/ges_solver.cu                 |  2 --
 cpp/src/utilities/cuda_helpers.cuh            | 20 +----------------
 cpp/tests/mip/load_balancing_test.cu          |  4 ++--
 cpp/tests/mip/multi_probe_test.cu             |  4 ++--
 cpp/tests/utilities/base_fixture.hpp          | 22 +++++++++----------
 8 files changed, 24 insertions(+), 41 deletions(-)
diff --git a/cpp/cuopt_cli.cpp b/cpp/cuopt_cli.cpp
index 899a3118b3..2cb18cc1c5 100644
--- a/cpp/cuopt_cli.cpp
+++ b/cpp/cuopt_cli.cpp
@@ -376,7 +376,7 @@ int main(int argc, char* argv[])
 
   // Only initialize CUDA resources if using GPU memory backend (not remote execution)
   auto memory_backend = cuopt::linear_programming::get_memory_backend_type();
-  std::vector<std::shared_ptr<rmm::mr::device_memory_resource>> memory_resources;
+  std::vector<rmm::mr::cuda_async_memory_resource> memory_resources;
 
   if (memory_backend == cuopt::linear_programming::memory_backend_t::GPU) {
     // All arguments are parsed as string, default values are parsed as int if unused.
@@ -384,10 +384,11 @@ int main(int argc, char* argv[])
                             ? std::stoi(program.get<std::string>("--num-gpus"))
                             : program.get<int>("--num-gpus");
 
+    memory_resources.reserve(std::min(raft::device_setter::get_device_count(), num_gpus));
     for (int i = 0; i < std::min(raft::device_setter::get_device_count(), num_gpus); ++i) {
       RAFT_CUDA_TRY(cudaSetDevice(i));
-      memory_resources.push_back(make_async());
-      rmm::mr::set_per_device_resource(rmm::cuda_device_id{i}, memory_resources.back().get());
+      memory_resources.emplace_back();
+      rmm::mr::set_per_device_resource_ref(rmm::cuda_device_id{i}, memory_resources.back());
     }
     RAFT_CUDA_TRY(cudaSetDevice(0));
   }
diff --git a/cpp/src/barrier/sparse_cholesky.cuh b/cpp/src/barrier/sparse_cholesky.cuh
index f7938fb989..52fea89502 100644
--- a/cpp/src/barrier/sparse_cholesky.cuh
+++ b/cpp/src/barrier/sparse_cholesky.cuh
@@ -247,8 +247,8 @@ class sparse_cholesky_cudss_t : public sparse_cholesky_base_t<i_t, f_t> {
     CUDSS_CALL_AND_CHECK_EXIT(cudssSetStream(handle, stream), status, "cudaStreamCreate");
 
     mem_handler.ctx          = reinterpret_cast<void*>(handle_ptr_->get_workspace_resource());
-    mem_handler.device_alloc = cudss_device_alloc<rmm::mr::device_memory_resource>;
-    mem_handler.device_free  = cudss_device_dealloc<rmm::mr::device_memory_resource>;
+    mem_handler.device_alloc = cudss_device_alloc<void>;
+    mem_handler.device_free  = cudss_device_dealloc<void>;
 
     CUDSS_CALL_AND_CHECK_EXIT(
       cudssSetDeviceMemHandler(handle, &mem_handler), status, "cudssSetDeviceMemHandler");
diff --git a/cpp/src/pdlp/termination_strategy/infeasibility_information.cu b/cpp/src/pdlp/termination_strategy/infeasibility_information.cu
index dbb35b732d..51b702eae1 100644
--- a/cpp/src/pdlp/termination_strategy/infeasibility_information.cu
+++ b/cpp/src/pdlp/termination_strategy/infeasibility_information.cu
@@ -15,6 +15,8 @@
 
 #include <mip_heuristics/mip_constants.hpp>
 
+#include <thrust/iterator/transform_output_iterator.h>
+
 #include <raft/sparse/detail/cusparse_wrappers.h>
 #include <raft/core/nvtx.hpp>
 #include <raft/linalg/detail/cublas_wrappers.hpp>
diff --git a/cpp/src/routing/ges_solver.cu b/cpp/src/routing/ges_solver.cu
index 194f73b99e..a660f84909 100644
--- a/cpp/src/routing/ges_solver.cu
+++ b/cpp/src/routing/ges_solver.cu
@@ -16,8 +16,6 @@
 #include "adapters/assignment_adapter.cuh"
 #include "ges/guided_ejection_search.cuh"
 
-#include <rmm/mr/device_memory_resource.hpp>
-
 namespace cuopt {
 namespace routing {
 
diff --git a/cpp/src/utilities/cuda_helpers.cuh b/cpp/src/utilities/cuda_helpers.cuh
index 946099648d..80e7b730db 100644
--- a/cpp/src/utilities/cuda_helpers.cuh
+++ b/cpp/src/utilities/cuda_helpers.cuh
@@ -16,8 +16,6 @@
 #include <raft/util/cuda_utils.cuh>
 #include <raft/util/cudart_utils.hpp>
 #include <rmm/device_uvector.hpp>
-#include <rmm/mr/cuda_async_memory_resource.hpp>
-#include <rmm/mr/limiting_resource_adaptor.hpp>
 #include <unordered_map>
 
 namespace cuopt {
@@ -216,25 +214,9 @@ DI void sorted_insert(T* array, T item, int curr_size, int max_size)
 
 inline size_t get_device_memory_size()
 {
-  // Otherwise, we need to get the free memory from the device
   size_t free_mem, total_mem;
   cudaMemGetInfo(&free_mem, &total_mem);
-
-  auto res = rmm::mr::get_current_device_resource();
-  auto limiting_adaptor =
-    dynamic_cast<rmm::mr::limiting_resource_adaptor<rmm::mr::cuda_async_memory_resource>*>(res);
-  // Did we specifiy an explicit memory limit?
-  if (limiting_adaptor) {
-    printf("limiting_adaptor->get_allocation_limit(): %fMiB\n",
-           limiting_adaptor->get_allocation_limit() / (double)1e6);
-    printf("used_mem: %fMiB\n", limiting_adaptor->get_allocated_bytes() / (double)1e6);
-    printf("free_mem: %fMiB\n",
-           (limiting_adaptor->get_allocation_limit() - limiting_adaptor->get_allocated_bytes()) /
-             (double)1e6);
-    return std::min(total_mem, limiting_adaptor->get_allocation_limit());
-  } else {
-    return total_mem;
-  }
+  return total_mem;
 }
 
 }  // namespace cuopt
diff --git a/cpp/tests/mip/load_balancing_test.cu b/cpp/tests/mip/load_balancing_test.cu
index 5e2f08007d..991a3072c3 100644
--- a/cpp/tests/mip/load_balancing_test.cu
+++ b/cpp/tests/mip/load_balancing_test.cu
@@ -32,7 +32,7 @@
 
 namespace cuopt::linear_programming::test {
 
-inline auto make_async() { return std::make_shared<rmm::mr::cuda_async_memory_resource>(); }
+inline auto make_async() { return rmm::mr::cuda_async_memory_resource(); }
 
 void init_handler(const raft::handle_t* handle_ptr)
 {
@@ -119,7 +119,7 @@ bounds_probe_results(detail::bound_presolve_t<int, double>& bnd_prb_0,
 void test_multi_probe(std::string path)
 {
   auto memory_resource = make_async();
-  rmm::mr::set_current_device_resource(memory_resource.get());
+  rmm::mr::set_current_device_resource_ref(memory_resource);
   const raft::handle_t handle_{};
   cuopt::mps_parser::mps_data_model_t<int, double> mps_problem =
     cuopt::mps_parser::parse_mps<int, double>(path, false);
diff --git a/cpp/tests/mip/multi_probe_test.cu b/cpp/tests/mip/multi_probe_test.cu
index 073c153486..2910cb4a44 100644
--- a/cpp/tests/mip/multi_probe_test.cu
+++ b/cpp/tests/mip/multi_probe_test.cu
@@ -31,7 +31,7 @@
 
 namespace cuopt::linear_programming::test {
 
-inline auto make_async() { return std::make_shared<rmm::mr::cuda_async_memory_resource>(); }
+inline auto make_async() { return rmm::mr::cuda_async_memory_resource(); }
 
 void init_handler(const raft::handle_t* handle_ptr)
 {
@@ -141,7 +141,7 @@ multi_probe_results(
 void test_multi_probe(std::string path)
 {
   auto memory_resource = make_async();
-  rmm::mr::set_current_device_resource(memory_resource.get());
+  rmm::mr::set_current_device_resource_ref(memory_resource);
   const raft::handle_t handle_{};
   cuopt::mps_parser::mps_data_model_t<int, double> mps_problem =
     cuopt::mps_parser::parse_mps<int, double>(path, false);
diff --git a/cpp/tests/utilities/base_fixture.hpp b/cpp/tests/utilities/base_fixture.hpp
index abc69627df..c9c15ae04d 100644
--- a/cpp/tests/utilities/base_fixture.hpp
+++ b/cpp/tests/utilities/base_fixture.hpp
@@ -1,6 +1,6 @@
 /* clang-format off */
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2021-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2021-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  */
 /* clang-format on */
@@ -13,11 +13,12 @@
 
 #include <gtest/gtest.h>
 
+#include <cuda/memory_resource>
+
 #include <rmm/mr/binning_memory_resource.hpp>
 #include <rmm/mr/cuda_async_memory_resource.hpp>
 #include <rmm/mr/cuda_memory_resource.hpp>
 #include <rmm/mr/managed_memory_resource.hpp>
-#include <rmm/mr/owning_wrapper.hpp>
 #include <rmm/mr/per_device_resource.hpp>
 #include <rmm/mr/pool_memory_resource.hpp>
 
@@ -25,18 +26,18 @@ namespace cuopt {
 namespace test {
 
 /// MR factory functions
-inline auto make_cuda() { return std::make_shared<rmm::mr::cuda_memory_resource>(); }
+inline auto make_cuda() { return rmm::mr::cuda_memory_resource(); }
 
-inline auto make_async() { return std::make_shared<rmm::mr::cuda_async_memory_resource>(); }
+inline auto make_async() { return rmm::mr::cuda_async_memory_resource(); }
 
-inline auto make_managed() { return std::make_shared<rmm::mr::managed_memory_resource>(); }
+inline auto make_managed() { return rmm::mr::managed_memory_resource(); }
 
 inline auto make_pool()
 {
   // 1GB of initial pool size
   const size_t initial_pool_size = 1024 * 1024 * 1024;
-  return rmm::mr::make_owning_wrapper<rmm::mr::pool_memory_resource>(make_async(),
-                                                                     initial_pool_size);
+  auto upstream                  = make_async();
+  return rmm::mr::pool_memory_resource(upstream, initial_pool_size);
 }
 
 inline auto make_binning()
@@ -44,8 +45,7 @@ inline auto make_binning()
   auto pool = make_pool();
   // Add a fixed_size_memory_resource for bins of size 256, 512, 1024, 2048 and
   // 4096KiB Larger allocations will use the pool resource
-  auto mr = rmm::mr::make_owning_wrapper<rmm::mr::binning_memory_resource>(pool, 18, 22);
-  return mr;
+  return rmm::mr::binning_memory_resource(pool, 18, 22);
 }
 
 /**
@@ -62,7 +62,7 @@ inline auto make_binning()
  *        Accepted types are "pool", "cuda", and "managed" only.
  * @return Memory resource instance
  */
-inline std::shared_ptr<rmm::mr::device_memory_resource> create_memory_resource(
+inline cuda::mr::any_resource<cuda::mr::device_accessible> create_memory_resource(
   std::string const& allocation_mode)
 {
   if (allocation_mode == "binning") return make_binning();
@@ -120,6 +120,6 @@ inline auto parse_test_options(int argc, char** argv)
     auto const cmd_opts = parse_test_options(argc, argv);                \
     auto const rmm_mode = cmd_opts["rmm_mode"].as<std::string>();        \
     auto resource       = cuopt::test::create_memory_resource(rmm_mode); \
-    rmm::mr::set_current_device_resource(resource.get());                \
+    rmm::mr::set_current_device_resource_ref(resource);                  \
     return RUN_ALL_TESTS();                                              \
   }

From 31a6eabcdb1a11fd8fef36af50146af5606d2b5e Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Wed, 15 Apr 2026 11:42:09 -0500
Subject: [PATCH 2/2] Inline upstream memory resource variable in test fixture
 MR composition

---
 cpp/tests/utilities/base_fixture.hpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/cpp/tests/utilities/base_fixture.hpp b/cpp/tests/utilities/base_fixture.hpp
index c9c15ae04d..c4cd0e2575 100644
--- a/cpp/tests/utilities/base_fixture.hpp
+++ b/cpp/tests/utilities/base_fixture.hpp
@@ -36,8 +36,7 @@ inline auto make_pool()
 {
   // 1GB of initial pool size
   const size_t initial_pool_size = 1024 * 1024 * 1024;
-  auto upstream                  = make_async();
-  return rmm::mr::pool_memory_resource(upstream, initial_pool_size);
+  return rmm::mr::pool_memory_resource(make_async(), initial_pool_size);
 }
 
 inline auto make_binning()