From ef84855b36f369137a4efac0b64fc8920b638bca Mon Sep 17 00:00:00 2001 From: Hugo Linsenmaier Date: Mon, 27 Oct 2025 14:31:10 -0700 Subject: [PATCH] Use one cusparse handle per thread to avoid race condition on cusparseSetStream --- cpp/src/dual_simplex/cusparse_view.cu | 4 ++++ cpp/src/linear_programming/solve.cu | 3 +-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/cpp/src/dual_simplex/cusparse_view.cu b/cpp/src/dual_simplex/cusparse_view.cu index 8d22604734..0ae9ea9bca 100644 --- a/cpp/src/dual_simplex/cusparse_view.cu +++ b/cpp/src/dual_simplex/cusparse_view.cu @@ -138,6 +138,10 @@ cusparse_view_t::cusparse_view_t(raft::handle_t const* handle_ptr, d_minus_one_(f_t(-1), handle_ptr->get_stream()), d_zero_(f_t(0), handle_ptr->get_stream()) { + RAFT_CUBLAS_TRY(raft::linalg::detail::cublassetpointermode( + handle_ptr->get_cublas_handle(), CUBLAS_POINTER_MODE_DEVICE, handle_ptr->get_stream())); + RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsesetpointermode( + handle_ptr->get_cusparse_handle(), CUSPARSE_POINTER_MODE_DEVICE, handle_ptr->get_stream())); // TMP matrix data should already be on the GPU constexpr bool debug = false; if (debug) { printf("A hash: %zu\n", A.hash()); } diff --git a/cpp/src/linear_programming/solve.cu b/cpp/src/linear_programming/solve.cu index 2fb5d047eb..d452460a61 100644 --- a/cpp/src/linear_programming/solve.cu +++ b/cpp/src/linear_programming/solve.cu @@ -672,9 +672,8 @@ optimization_problem_solution_t run_concurrent( // Initialize the dual simplex structures before we run PDLP. // Otherwise, CUDA API calls to the problem stream may occur in both threads and throw graph // capture off - auto barrier_handle = raft::handle_t(*op_problem.get_handle_ptr()); rmm::cuda_stream_view barrier_stream = rmm::cuda_stream_per_thread; - raft::resource::set_cuda_stream(barrier_handle, barrier_stream); + auto barrier_handle = raft::handle_t(barrier_stream); // Make sure allocations are done on the original stream problem.handle_ptr->sync_stream();