Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions cpp/cuopt_cli.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -415,15 +415,16 @@ int main(int argc, char* argv[])

// Only initialize CUDA resources if using GPU memory backend (not remote execution)
auto memory_backend = cuopt::linear_programming::get_memory_backend_type();
std::vector<std::shared_ptr<rmm::mr::device_memory_resource>> memory_resources;
std::vector<rmm::mr::cuda_async_memory_resource> memory_resources;

if (memory_backend == cuopt::linear_programming::memory_backend_t::GPU) {
const int num_gpus = settings.get_parameter<int>(CUOPT_NUM_GPUS);

memory_resources.reserve(std::min(raft::device_setter::get_device_count(), num_gpus));
for (int i = 0; i < std::min(raft::device_setter::get_device_count(), num_gpus); ++i) {
RAFT_CUDA_TRY(cudaSetDevice(i));
memory_resources.push_back(make_async());
rmm::mr::set_per_device_resource(rmm::cuda_device_id{i}, memory_resources.back().get());
memory_resources.emplace_back();
rmm::mr::set_per_device_resource_ref(rmm::cuda_device_id{i}, memory_resources.back());
}
RAFT_CUDA_TRY(cudaSetDevice(0));
}
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/barrier/sparse_cholesky.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -247,8 +247,8 @@ class sparse_cholesky_cudss_t : public sparse_cholesky_base_t<i_t, f_t> {
CUDSS_CALL_AND_CHECK_EXIT(cudssSetStream(handle, stream), status, "cudaStreamCreate");

mem_handler.ctx = reinterpret_cast<void*>(handle_ptr_->get_workspace_resource());
mem_handler.device_alloc = cudss_device_alloc<rmm::mr::device_memory_resource>;
mem_handler.device_free = cudss_device_dealloc<rmm::mr::device_memory_resource>;
mem_handler.device_alloc = cudss_device_alloc<void>;
mem_handler.device_free = cudss_device_dealloc<void>;

CUDSS_CALL_AND_CHECK_EXIT(
cudssSetDeviceMemHandler(handle, &mem_handler), status, "cudssSetDeviceMemHandler");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@

#include <mip_heuristics/mip_constants.hpp>

#include <thrust/iterator/transform_output_iterator.h>

#include <raft/sparse/detail/cusparse_wrappers.h>
#include <raft/core/nvtx.hpp>
#include <raft/linalg/detail/cublas_wrappers.hpp>
Expand Down
2 changes: 0 additions & 2 deletions cpp/src/routing/ges_solver.cu
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,6 @@
#include "adapters/assignment_adapter.cuh"
#include "ges/guided_ejection_search.cuh"

#include <rmm/mr/device_memory_resource.hpp>

namespace cuopt {
namespace routing {

Expand Down
20 changes: 1 addition & 19 deletions cpp/src/utilities/cuda_helpers.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,6 @@
#include <raft/util/cuda_utils.cuh>
#include <raft/util/cudart_utils.hpp>
#include <rmm/device_uvector.hpp>
#include <rmm/mr/cuda_async_memory_resource.hpp>
#include <rmm/mr/limiting_resource_adaptor.hpp>
#include <shared_mutex>
#include <unordered_map>

Expand Down Expand Up @@ -242,25 +240,9 @@ DI void sorted_insert(T* array, T item, int curr_size, int max_size)

inline size_t get_device_memory_size()
{
// Otherwise, we need to get the free memory from the device
size_t free_mem, total_mem;
cudaMemGetInfo(&free_mem, &total_mem);

auto res = rmm::mr::get_current_device_resource();
auto limiting_adaptor =
dynamic_cast<rmm::mr::limiting_resource_adaptor<rmm::mr::cuda_async_memory_resource>*>(res);
// Did we specifiy an explicit memory limit?
if (limiting_adaptor) {
printf("limiting_adaptor->get_allocation_limit(): %fMiB\n",
limiting_adaptor->get_allocation_limit() / (double)1e6);
printf("used_mem: %fMiB\n", limiting_adaptor->get_allocated_bytes() / (double)1e6);
printf("free_mem: %fMiB\n",
(limiting_adaptor->get_allocation_limit() - limiting_adaptor->get_allocated_bytes()) /
(double)1e6);
return std::min(total_mem, limiting_adaptor->get_allocation_limit());
} else {
return total_mem;
}
return total_mem;
}

} // namespace cuopt
4 changes: 2 additions & 2 deletions cpp/tests/mip/load_balancing_test.cu
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@

namespace cuopt::linear_programming::test {

inline auto make_async() { return std::make_shared<rmm::mr::cuda_async_memory_resource>(); }
inline auto make_async() { return rmm::mr::cuda_async_memory_resource(); }

void init_handler(const raft::handle_t* handle_ptr)
{
Expand Down Expand Up @@ -119,7 +119,7 @@ bounds_probe_results(detail::bound_presolve_t<int, double>& bnd_prb_0,
void test_multi_probe(std::string path)
{
auto memory_resource = make_async();
rmm::mr::set_current_device_resource(memory_resource.get());
rmm::mr::set_current_device_resource_ref(memory_resource);
const raft::handle_t handle_{};
cuopt::mps_parser::mps_data_model_t<int, double> mps_problem =
cuopt::mps_parser::parse_mps<int, double>(path, false);
Expand Down
4 changes: 2 additions & 2 deletions cpp/tests/mip/multi_probe_test.cu
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@

namespace cuopt::linear_programming::test {

inline auto make_async() { return std::make_shared<rmm::mr::cuda_async_memory_resource>(); }
inline auto make_async() { return rmm::mr::cuda_async_memory_resource(); }

void init_handler(const raft::handle_t* handle_ptr)
{
Expand Down Expand Up @@ -141,7 +141,7 @@ multi_probe_results(
void test_multi_probe(std::string path)
{
auto memory_resource = make_async();
rmm::mr::set_current_device_resource(memory_resource.get());
rmm::mr::set_current_device_resource_ref(memory_resource);
const raft::handle_t handle_{};
cuopt::mps_parser::mps_data_model_t<int, double> mps_problem =
cuopt::mps_parser::parse_mps<int, double>(path, false);
Expand Down
21 changes: 10 additions & 11 deletions cpp/tests/utilities/base_fixture.hpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/* clang-format off */
/*
* SPDX-FileCopyrightText: Copyright (c) 2021-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 2021-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*/
/* clang-format on */
Expand All @@ -13,39 +13,38 @@

#include <gtest/gtest.h>

#include <cuda/memory_resource>

#include <rmm/mr/binning_memory_resource.hpp>
#include <rmm/mr/cuda_async_memory_resource.hpp>
#include <rmm/mr/cuda_memory_resource.hpp>
#include <rmm/mr/managed_memory_resource.hpp>
#include <rmm/mr/owning_wrapper.hpp>
#include <rmm/mr/per_device_resource.hpp>
#include <rmm/mr/pool_memory_resource.hpp>

namespace cuopt {
namespace test {

/// MR factory functions
inline auto make_cuda() { return std::make_shared<rmm::mr::cuda_memory_resource>(); }
inline auto make_cuda() { return rmm::mr::cuda_memory_resource(); }

inline auto make_async() { return std::make_shared<rmm::mr::cuda_async_memory_resource>(); }
inline auto make_async() { return rmm::mr::cuda_async_memory_resource(); }

inline auto make_managed() { return std::make_shared<rmm::mr::managed_memory_resource>(); }
inline auto make_managed() { return rmm::mr::managed_memory_resource(); }

inline auto make_pool()
{
// 1GB of initial pool size
const size_t initial_pool_size = 1024 * 1024 * 1024;
return rmm::mr::make_owning_wrapper<rmm::mr::pool_memory_resource>(make_async(),
initial_pool_size);
return rmm::mr::pool_memory_resource(make_async(), initial_pool_size);
}

inline auto make_binning()
{
auto pool = make_pool();
// Add a fixed_size_memory_resource for bins of size 256, 512, 1024, 2048 and
// 4096KiB Larger allocations will use the pool resource
auto mr = rmm::mr::make_owning_wrapper<rmm::mr::binning_memory_resource>(pool, 18, 22);
return mr;
return rmm::mr::binning_memory_resource(pool, 18, 22);
}

/**
Expand All @@ -62,7 +61,7 @@ inline auto make_binning()
* Accepted types are "pool", "cuda", and "managed" only.
* @return Memory resource instance
*/
inline std::shared_ptr<rmm::mr::device_memory_resource> create_memory_resource(
inline cuda::mr::any_resource<cuda::mr::device_accessible> create_memory_resource(
std::string const& allocation_mode)
{
if (allocation_mode == "binning") return make_binning();
Expand Down Expand Up @@ -120,6 +119,6 @@ inline auto parse_test_options(int argc, char** argv)
auto const cmd_opts = parse_test_options(argc, argv); \
auto const rmm_mode = cmd_opts["rmm_mode"].as<std::string>(); \
auto resource = cuopt::test::create_memory_resource(rmm_mode); \
rmm::mr::set_current_device_resource(resource.get()); \
rmm::mr::set_current_device_resource_ref(resource); \
return RUN_ALL_TESTS(); \
}