From 0838dae53de24dbe1cf965e8de7e621f55b11c29 Mon Sep 17 00:00:00 2001 From: Nicolas Blin Date: Wed, 23 Jul 2025 10:11:51 +0000 Subject: [PATCH 1/2] disable cuda graph when in batch pdlp mode --- .../cuopt/linear_programming/solve.hpp | 3 +- .../utilities/cython_solve.hpp | 3 +- cpp/src/linear_programming/pdhg.cu | 7 +- cpp/src/linear_programming/pdhg.hpp | 4 +- cpp/src/linear_programming/pdlp.cu | 10 +-- cpp/src/linear_programming/pdlp.cuh | 3 +- .../restart_strategy/pdlp_restart_strategy.cu | 5 +- .../pdlp_restart_strategy.cuh | 3 +- .../weighted_average_solution.cu | 5 +- .../weighted_average_solution.hpp | 5 +- cpp/src/linear_programming/solve.cu | 30 ++++---- .../adaptive_step_size_strategy.cu | 5 +- .../adaptive_step_size_strategy.hpp | 3 +- .../utilities/cython_solve.cu | 19 ++++-- .../utilities/ping_pong_graph.cuh | 68 ++++++++++++------- 15 files changed, 111 insertions(+), 62 deletions(-) diff --git a/cpp/include/cuopt/linear_programming/solve.hpp b/cpp/include/cuopt/linear_programming/solve.hpp index 880982ad9d..04ee5530c0 100644 --- a/cpp/include/cuopt/linear_programming/solve.hpp +++ b/cpp/include/cuopt/linear_programming/solve.hpp @@ -51,7 +51,8 @@ optimization_problem_solution_t solve_lp( optimization_problem_t& op_problem, pdlp_solver_settings_t const& settings = pdlp_solver_settings_t{}, bool problem_checking = true, - bool use_pdlp_solver_mode = true); + bool use_pdlp_solver_mode = true, + bool is_batch_mode = false); /** * @brief Linear programming solve function. diff --git a/cpp/include/cuopt/linear_programming/utilities/cython_solve.hpp b/cpp/include/cuopt/linear_programming/utilities/cython_solve.hpp index cc30ff7a02..eef185d0d0 100644 --- a/cpp/include/cuopt/linear_programming/utilities/cython_solve.hpp +++ b/cpp/include/cuopt/linear_programming/utilities/cython_solve.hpp @@ -104,7 +104,8 @@ struct solver_ret_t { std::unique_ptr call_solve(cuopt::mps_parser::data_model_view_t*, linear_programming::solver_settings_t*, - unsigned int flags = cudaStreamNonBlocking); + unsigned int flags = cudaStreamNonBlocking, + bool is_batch_mode = false); std::pair>, double> call_batch_solve( std::vector*>, diff --git a/cpp/src/linear_programming/pdhg.cu b/cpp/src/linear_programming/pdhg.cu index ad4b69e075..f932eeb8d8 100644 --- a/cpp/src/linear_programming/pdhg.cu +++ b/cpp/src/linear_programming/pdhg.cu @@ -35,7 +35,8 @@ namespace cuopt::linear_programming::detail { template pdhg_solver_t::pdhg_solver_t(raft::handle_t const* handle_ptr, - problem_t& op_problem_scaled) + problem_t& op_problem_scaled, + bool is_batch_mode) : handle_ptr_(handle_ptr), stream_view_(handle_ptr_->get_stream()), problem_ptr(&op_problem_scaled), @@ -57,8 +58,8 @@ pdhg_solver_t::pdhg_solver_t(raft::handle_t const* handle_ptr, reusable_device_scalar_value_0_{0.0, stream_view_}, reusable_device_scalar_value_neg_1_{f_t(-1.0), stream_view_}, reusable_device_scalar_1_{stream_view_}, - graph_all{stream_view_}, - graph_prim_proj_gradient_dual{stream_view_}, + graph_all{stream_view_, is_batch_mode}, + graph_prim_proj_gradient_dual{stream_view_, is_batch_mode}, d_total_pdhg_iterations_{0, stream_view_} { } diff --git a/cpp/src/linear_programming/pdhg.hpp b/cpp/src/linear_programming/pdhg.hpp index c44b48865a..3e4f7565e5 100644 --- a/cpp/src/linear_programming/pdhg.hpp +++ b/cpp/src/linear_programming/pdhg.hpp @@ -31,7 +31,9 @@ namespace cuopt::linear_programming::detail { template class pdhg_solver_t { public: - pdhg_solver_t(raft::handle_t const* handle_ptr, problem_t& op_problem); + pdhg_solver_t(raft::handle_t const* handle_ptr, + problem_t& op_problem, + bool is_batch_mode = false); saddle_point_state_t& get_saddle_point_state(); cusparse_view_t& get_cusparse_view(); diff --git a/cpp/src/linear_programming/pdlp.cu b/cpp/src/linear_programming/pdlp.cu index 7acadae50d..84b04d43f9 100644 --- a/cpp/src/linear_programming/pdlp.cu +++ b/cpp/src/linear_programming/pdlp.cu @@ -53,7 +53,8 @@ void set_pdlp_hyper_parameters(rmm::cuda_stream_view stream_view) template pdlp_solver_t::pdlp_solver_t(problem_t& op_problem, - pdlp_solver_settings_t const& settings) + pdlp_solver_settings_t const& settings, + bool is_batch_mode) : handle_ptr_(op_problem.handle_ptr), stream_view_(handle_ptr_->get_stream()), problem_ptr(&op_problem), @@ -67,8 +68,8 @@ pdlp_solver_t::pdlp_solver_t(problem_t& op_problem, dual_step_size_{stream_view_}, primal_weight_{stream_view_}, step_size_{(f_t)pdlp_hyper_params::initial_step_size_scaling, stream_view_}, - step_size_strategy_{handle_ptr_, &primal_weight_, &step_size_}, - pdhg_solver_{handle_ptr_, op_problem_scaled_}, + step_size_strategy_{handle_ptr_, &primal_weight_, &step_size_, is_batch_mode}, + pdhg_solver_{handle_ptr_, op_problem_scaled_, is_batch_mode}, settings_(settings, stream_view_), initial_scaling_strategy_{handle_ptr_, op_problem_scaled_, @@ -100,7 +101,8 @@ pdlp_solver_t::pdlp_solver_t(problem_t& op_problem, op_problem, average_op_problem_evaluation_cusparse_view_, primal_size_h_, - dual_size_h_}, + dual_size_h_, + is_batch_mode}, average_termination_strategy_{handle_ptr_, op_problem, average_op_problem_evaluation_cusparse_view_, diff --git a/cpp/src/linear_programming/pdlp.cuh b/cpp/src/linear_programming/pdlp.cuh index 10a028f268..24db449726 100644 --- a/cpp/src/linear_programming/pdlp.cuh +++ b/cpp/src/linear_programming/pdlp.cuh @@ -65,7 +65,8 @@ class pdlp_solver_t { */ pdlp_solver_t( problem_t& op_problem, - pdlp_solver_settings_t const& settings = pdlp_solver_settings_t{}); + pdlp_solver_settings_t const& settings = pdlp_solver_settings_t{}, + bool is_batch_mode = false); optimization_problem_solution_t run_solver( const std::chrono::high_resolution_clock::time_point& start_time); diff --git a/cpp/src/linear_programming/restart_strategy/pdlp_restart_strategy.cu b/cpp/src/linear_programming/restart_strategy/pdlp_restart_strategy.cu index 55b06aecf4..236ec373d8 100644 --- a/cpp/src/linear_programming/restart_strategy/pdlp_restart_strategy.cu +++ b/cpp/src/linear_programming/restart_strategy/pdlp_restart_strategy.cu @@ -108,10 +108,11 @@ pdlp_restart_strategy_t::pdlp_restart_strategy_t( problem_t& op_problem, const cusparse_view_t& cusparse_view, const i_t primal_size, - const i_t dual_size) + const i_t dual_size, + bool is_batch_mode) : handle_ptr_(handle_ptr), stream_view_(handle_ptr_->get_stream()), - weighted_average_solution_{handle_ptr_, primal_size, dual_size}, + weighted_average_solution_{handle_ptr_, primal_size, dual_size, is_batch_mode}, primal_size_h_(primal_size), dual_size_h_(dual_size), problem_ptr(&op_problem), diff --git a/cpp/src/linear_programming/restart_strategy/pdlp_restart_strategy.cuh b/cpp/src/linear_programming/restart_strategy/pdlp_restart_strategy.cuh index 403f772391..8400977c15 100644 --- a/cpp/src/linear_programming/restart_strategy/pdlp_restart_strategy.cuh +++ b/cpp/src/linear_programming/restart_strategy/pdlp_restart_strategy.cuh @@ -101,7 +101,8 @@ class pdlp_restart_strategy_t { problem_t& op_problem, const cusparse_view_t& cusparse_view, const i_t primal_size, - const i_t dual_size); + const i_t dual_size, + bool is_batch_mode = false); // Compute kkt score on passed argument using the container tmp_kkt score and stream view f_t compute_kkt_score(const rmm::device_scalar& l2_primal_residual, diff --git a/cpp/src/linear_programming/restart_strategy/weighted_average_solution.cu b/cpp/src/linear_programming/restart_strategy/weighted_average_solution.cu index 17b33606fc..27bc08b9b6 100644 --- a/cpp/src/linear_programming/restart_strategy/weighted_average_solution.cu +++ b/cpp/src/linear_programming/restart_strategy/weighted_average_solution.cu @@ -29,7 +29,8 @@ namespace cuopt::linear_programming::detail { template weighted_average_solution_t::weighted_average_solution_t(raft::handle_t const* handle_ptr, i_t primal_size, - i_t dual_size) + i_t dual_size, + bool is_batch_mode) : handle_ptr_(handle_ptr), stream_view_(handle_ptr_->get_stream()), primal_size_h_(primal_size), @@ -39,7 +40,7 @@ weighted_average_solution_t::weighted_average_solution_t(raft::handle_ sum_primal_solution_weights_{0.0, stream_view_}, sum_dual_solution_weights_{0.0, stream_view_}, iterations_since_last_restart_{0}, - graph(stream_view_) + graph(stream_view_, is_batch_mode) { RAFT_CUDA_TRY( cudaMemsetAsync(sum_primal_solutions_.data(), 0.0, sizeof(f_t) * primal_size_h_, stream_view_)); diff --git a/cpp/src/linear_programming/restart_strategy/weighted_average_solution.hpp b/cpp/src/linear_programming/restart_strategy/weighted_average_solution.hpp index bea96b52fc..00cc30e186 100644 --- a/cpp/src/linear_programming/restart_strategy/weighted_average_solution.hpp +++ b/cpp/src/linear_programming/restart_strategy/weighted_average_solution.hpp @@ -29,7 +29,10 @@ namespace cuopt::linear_programming::detail { template class weighted_average_solution_t { public: - weighted_average_solution_t(raft::handle_t const* handle_ptr, i_t primal_size, i_t dual_size); + weighted_average_solution_t(raft::handle_t const* handle_ptr, + i_t primal_size, + i_t dual_size, + bool is_batch_mode); void reset_weighted_average_solution(); void add_current_solution_to_weighted_average_solution(const f_t* primal_solution, diff --git a/cpp/src/linear_programming/solve.cu b/cpp/src/linear_programming/solve.cu index c3ba4f2c7d..df3d3d1e1b 100644 --- a/cpp/src/linear_programming/solve.cu +++ b/cpp/src/linear_programming/solve.cu @@ -350,24 +350,26 @@ template static optimization_problem_solution_t run_pdlp_solver( detail::problem_t& problem, pdlp_solver_settings_t const& settings, - const std::chrono::high_resolution_clock::time_point& start_time) + const std::chrono::high_resolution_clock::time_point& start_time, + bool is_batch_mode) { if (problem.n_constraints == 0) { CUOPT_LOG_INFO("No constraints in the problem: PDLP can't be run, use Dual Simplex instead."); return optimization_problem_solution_t{pdlp_termination_status_t::NumericalError, problem.handle_ptr->get_stream()}; } - detail::pdlp_solver_t solver(problem, settings); + detail::pdlp_solver_t solver(problem, settings, is_batch_mode); return solver.run_solver(start_time); } template optimization_problem_solution_t run_pdlp(detail::problem_t& problem, - pdlp_solver_settings_t const& settings) + pdlp_solver_settings_t const& settings, + bool is_batch_mode) { auto start_solver = std::chrono::high_resolution_clock::now(); f_t start_time = dual_simplex::tic(); - auto sol = run_pdlp_solver(problem, settings, start_solver); + auto sol = run_pdlp_solver(problem, settings, start_solver, is_batch_mode); auto end = std::chrono::high_resolution_clock::now(); auto duration = std::chrono::duration_cast(end - start_solver); sol.set_solve_time(duration.count() / 1000.0); @@ -467,7 +469,8 @@ template optimization_problem_solution_t run_concurrent( optimization_problem_t& op_problem, detail::problem_t& problem, - pdlp_solver_settings_t const& settings) + pdlp_solver_settings_t const& settings, + bool is_batch_mode) { CUOPT_LOG_INFO("Running concurrent\n"); f_t start_time = dual_simplex::tic(); @@ -495,7 +498,7 @@ optimization_problem_solution_t run_concurrent( std::ref(sol_dual_simplex_ptr)); // Run pdlp in the main thread - auto sol_pdlp = run_pdlp(problem, settings_pdlp); + auto sol_pdlp = run_pdlp(problem, settings_pdlp, is_batch_mode); // Wait for dual simplex thread to finish dual_simplex_thread.join(); @@ -539,14 +542,15 @@ template optimization_problem_solution_t solve_lp_with_method( optimization_problem_t& op_problem, detail::problem_t& problem, - pdlp_solver_settings_t const& settings) + pdlp_solver_settings_t const& settings, + bool is_batch_mode) { if (settings.method == method_t::DualSimplex) { return run_dual_simplex(problem, settings); } else if (settings.method == method_t::Concurrent) { - return run_concurrent(op_problem, problem, settings); + return run_concurrent(op_problem, problem, settings, is_batch_mode); } else { - return run_pdlp(problem, settings); + return run_pdlp(problem, settings, is_batch_mode); } } @@ -554,7 +558,8 @@ template optimization_problem_solution_t solve_lp(optimization_problem_t& op_problem, pdlp_solver_settings_t const& settings, bool problem_checking, - bool use_pdlp_solver_mode) + bool use_pdlp_solver_mode, + bool is_batch_mode) { try { // Create log stream for file logging and add it to default logger @@ -593,7 +598,7 @@ optimization_problem_solution_t solve_lp(optimization_problem_tget_stream()); - auto sol = solve_lp_with_method(op_problem, problem, settings); + auto sol = solve_lp_with_method(op_problem, problem, settings, is_batch_mode); if (settings.sol_file != "") { CUOPT_LOG_INFO("Writing solution to file %s", settings.sol_file.c_str()); @@ -699,7 +704,8 @@ optimization_problem_solution_t solve_lp( optimization_problem_t& op_problem, \ pdlp_solver_settings_t const& settings, \ bool problem_checking, \ - bool use_pdlp_solver_mode); \ + bool use_pdlp_solver_mode, \ + bool is_batch_mode); \ \ template optimization_problem_solution_t solve_lp( \ raft::handle_t const* handle_ptr, \ diff --git a/cpp/src/linear_programming/step_size_strategy/adaptive_step_size_strategy.cu b/cpp/src/linear_programming/step_size_strategy/adaptive_step_size_strategy.cu index e8355dfa1c..1569869182 100644 --- a/cpp/src/linear_programming/step_size_strategy/adaptive_step_size_strategy.cu +++ b/cpp/src/linear_programming/step_size_strategy/adaptive_step_size_strategy.cu @@ -40,7 +40,8 @@ template adaptive_step_size_strategy_t::adaptive_step_size_strategy_t( raft::handle_t const* handle_ptr, rmm::device_scalar* primal_weight, - rmm::device_scalar* step_size) + rmm::device_scalar* step_size, + bool is_batch_mode) : stream_pool_(parallel_stream_computation), dot_delta_X_(cudaEventDisableTiming), dot_delta_Y_(cudaEventDisableTiming), @@ -55,7 +56,7 @@ adaptive_step_size_strategy_t::adaptive_step_size_strategy_t( norm_squared_delta_dual_{stream_view_}, reusable_device_scalar_value_1_{f_t(1.0), stream_view_}, reusable_device_scalar_value_0_{f_t(0.0), stream_view_}, - graph(stream_view_) + graph(stream_view_, is_batch_mode) { valid_step_size_ = make_unique_cuda_host_pinned(); } diff --git a/cpp/src/linear_programming/step_size_strategy/adaptive_step_size_strategy.hpp b/cpp/src/linear_programming/step_size_strategy/adaptive_step_size_strategy.hpp index 225aa2de07..4649caaf1c 100644 --- a/cpp/src/linear_programming/step_size_strategy/adaptive_step_size_strategy.hpp +++ b/cpp/src/linear_programming/step_size_strategy/adaptive_step_size_strategy.hpp @@ -56,7 +56,8 @@ class adaptive_step_size_strategy_t { adaptive_step_size_strategy_t(raft::handle_t const* handle_ptr, rmm::device_scalar* primal_weight, - rmm::device_scalar* step_size); + rmm::device_scalar* step_size, + bool is_batch_mode = false); void compute_step_sizes(pdhg_solver_t& pdhg_solver, rmm::device_scalar& primal_step_size, diff --git a/cpp/src/linear_programming/utilities/cython_solve.cu b/cpp/src/linear_programming/utilities/cython_solve.cu index 8f3624850c..1f9034e204 100644 --- a/cpp/src/linear_programming/utilities/cython_solve.cu +++ b/cpp/src/linear_programming/utilities/cython_solve.cu @@ -118,14 +118,18 @@ data_model_to_optimization_problem( */ linear_programming_ret_t call_solve_lp( cuopt::linear_programming::optimization_problem_t& op_problem, - cuopt::linear_programming::pdlp_solver_settings_t& solver_settings) + cuopt::linear_programming::pdlp_solver_settings_t& solver_settings, + bool is_batch_mode) { raft::common::nvtx::range fun_scope("Call Solve"); cuopt_expects( op_problem.get_problem_category() == cuopt::linear_programming::problem_category_t::LP, error_type_t::ValidationError, "LP solve cannot be called on a MIP problem!"); - auto solution = cuopt::linear_programming::solve_lp(op_problem, solver_settings); + const bool problem_checking = true; + const bool use_pdlp_solver_mode = true; + auto solution = cuopt::linear_programming::solve_lp( + op_problem, solver_settings, problem_checking, use_pdlp_solver_mode, is_batch_mode); linear_programming_ret_t lp_ret{ std::make_unique(solution.get_primal_solution().release()), std::make_unique(solution.get_dual_solution().release()), @@ -209,7 +213,8 @@ mip_ret_t call_solve_mip( std::unique_ptr call_solve( cuopt::mps_parser::data_model_view_t* data_model, cuopt::linear_programming::solver_settings_t* solver_settings, - unsigned int flags) + unsigned int flags, + bool is_batch_mode) { raft::common::nvtx::range fun_scope("Call Solve"); @@ -220,7 +225,8 @@ std::unique_ptr call_solve( auto op_problem = data_model_to_optimization_problem(data_model, solver_settings, &handle_); solver_ret_t response; if (op_problem.get_problem_category() == linear_programming::problem_category_t::LP) { - response.lp_ret = call_solve_lp(op_problem, solver_settings->get_pdlp_settings()); + response.lp_ret = + call_solve_lp(op_problem, solver_settings->get_pdlp_settings(), is_batch_mode); response.problem_type = linear_programming::problem_category_t::LP; } else { response.mip_ret = call_solve_mip(op_problem, solver_settings->get_mip_settings()); @@ -284,9 +290,12 @@ std::pair>, double> call_batch_solve( solver_settings->set_parameter(CUOPT_METHOD, CUOPT_METHOD_PDLP); } + const bool is_batch_mode = true; + #pragma omp parallel for num_threads(max_thread) for (std::size_t i = 0; i < size; ++i) - list[i] = std::move(call_solve(data_models[i], solver_settings, cudaStreamNonBlocking)); + list[i] = + std::move(call_solve(data_models[i], solver_settings, cudaStreamNonBlocking, is_batch_mode)); auto end = std::chrono::high_resolution_clock::now(); auto duration = std::chrono::duration_cast(end - start_solver); diff --git a/cpp/src/linear_programming/utilities/ping_pong_graph.cuh b/cpp/src/linear_programming/utilities/ping_pong_graph.cuh index ab813865e4..4939e0711c 100644 --- a/cpp/src/linear_programming/utilities/ping_pong_graph.cuh +++ b/cpp/src/linear_programming/utilities/ping_pong_graph.cuh @@ -28,53 +28,69 @@ namespace cuopt::linear_programming::detail { template class ping_pong_graph_t { public: - ping_pong_graph_t(rmm::cuda_stream_view stream_view) : stream_view_(stream_view) {} + ping_pong_graph_t(rmm::cuda_stream_view stream_view, bool is_batch_mode = false) + : stream_view_(stream_view), is_batch_mode_(is_batch_mode) + { + } ~ping_pong_graph_t() { - if (even_initialized) { RAFT_CUDA_TRY_NO_THROW(cudaGraphExecDestroy(even_instance)); } - if (odd_initialized) { RAFT_CUDA_TRY_NO_THROW(cudaGraphExecDestroy(odd_instance)); } + if (!is_batch_mode_) { + if (even_initialized) { RAFT_CUDA_TRY_NO_THROW(cudaGraphExecDestroy(even_instance)); } + if (odd_initialized) { RAFT_CUDA_TRY_NO_THROW(cudaGraphExecDestroy(odd_instance)); } + } } void start_capture(i_t total_pdlp_iterations) { - if (total_pdlp_iterations % 2 == 0 && !even_initialized) { - RAFT_CUDA_TRY(cudaStreamBeginCapture(stream_view_.value(), cudaStreamCaptureModeThreadLocal)); - } else if (total_pdlp_iterations % 2 == 1 && !odd_initialized) { - RAFT_CUDA_TRY(cudaStreamBeginCapture(stream_view_.value(), cudaStreamCaptureModeThreadLocal)); + if (!is_batch_mode_) { + if (total_pdlp_iterations % 2 == 0 && !even_initialized) { + RAFT_CUDA_TRY( + cudaStreamBeginCapture(stream_view_.value(), cudaStreamCaptureModeThreadLocal)); + } else if (total_pdlp_iterations % 2 == 1 && !odd_initialized) { + RAFT_CUDA_TRY( + cudaStreamBeginCapture(stream_view_.value(), cudaStreamCaptureModeThreadLocal)); + } } } void end_capture(i_t total_pdlp_iterations) { - if (total_pdlp_iterations % 2 == 0 && !even_initialized) { - RAFT_CUDA_TRY(cudaStreamEndCapture(stream_view_.value(), &even_graph)); - // Extra NULL NULL 0 mandatory for cuda 11.8 - RAFT_CUDA_TRY(cudaGraphInstantiate(&even_instance, even_graph, nullptr, nullptr, 0)); - even_initialized = true; - RAFT_CUDA_TRY_NO_THROW(cudaGraphDestroy(even_graph)); - } else if (total_pdlp_iterations % 2 == 1 && !odd_initialized) { - RAFT_CUDA_TRY(cudaStreamEndCapture(stream_view_.value(), &odd_graph)); - // Extra NULL NULL 0 mandatory for cuda 11.8 - RAFT_CUDA_TRY(cudaGraphInstantiate(&odd_instance, odd_graph, nullptr, nullptr, 0)); - odd_initialized = true; - RAFT_CUDA_TRY_NO_THROW(cudaGraphDestroy(odd_graph)); + if (!is_batch_mode_) { + if (total_pdlp_iterations % 2 == 0 && !even_initialized) { + RAFT_CUDA_TRY(cudaStreamEndCapture(stream_view_.value(), &even_graph)); + // Extra NULL NULL 0 mandatory for cuda 11.8 + RAFT_CUDA_TRY(cudaGraphInstantiate(&even_instance, even_graph, nullptr, nullptr, 0)); + even_initialized = true; + RAFT_CUDA_TRY_NO_THROW(cudaGraphDestroy(even_graph)); + } else if (total_pdlp_iterations % 2 == 1 && !odd_initialized) { + RAFT_CUDA_TRY(cudaStreamEndCapture(stream_view_.value(), &odd_graph)); + // Extra NULL NULL 0 mandatory for cuda 11.8 + RAFT_CUDA_TRY(cudaGraphInstantiate(&odd_instance, odd_graph, nullptr, nullptr, 0)); + odd_initialized = true; + RAFT_CUDA_TRY_NO_THROW(cudaGraphDestroy(odd_graph)); + } } } void launch(i_t total_pdlp_iterations) { - if (total_pdlp_iterations % 2 == 0 && even_initialized) { - RAFT_CUDA_TRY(cudaGraphLaunch(even_instance, stream_view_.value())); - } else if (total_pdlp_iterations % 2 == 1 && odd_initialized) { - RAFT_CUDA_TRY(cudaGraphLaunch(odd_instance, stream_view_.value())); + if (!is_batch_mode_) { + if (total_pdlp_iterations % 2 == 0 && even_initialized) { + RAFT_CUDA_TRY(cudaGraphLaunch(even_instance, stream_view_.value())); + } else if (total_pdlp_iterations % 2 == 1 && odd_initialized) { + RAFT_CUDA_TRY(cudaGraphLaunch(odd_instance, stream_view_.value())); + } } } bool is_initialized(i_t total_pdlp_iterations) { - return (total_pdlp_iterations % 2 == 0 && even_initialized) || - (total_pdlp_iterations % 2 == 1 && odd_initialized); + if (!is_batch_mode_) { + return (total_pdlp_iterations % 2 == 0 && even_initialized) || + (total_pdlp_iterations % 2 == 1 && odd_initialized); + } + return false; } private: @@ -85,5 +101,7 @@ class ping_pong_graph_t { rmm::cuda_stream_view stream_view_; bool even_initialized{false}; bool odd_initialized{false}; + // Temporary fix to disable cuda graph in batch mode + bool is_batch_mode_{false}; }; } // namespace cuopt::linear_programming::detail \ No newline at end of file From bf5e7cdb4f8863677e2f88ef289dba5535064809 Mon Sep 17 00:00:00 2001 From: Nicolas Blin Date: Wed, 23 Jul 2025 12:19:25 +0000 Subject: [PATCH 2/2] re-enable tests --- .../cuopt/cuopt/tests/linear_programming/test_lp_solver.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/python/cuopt/cuopt/tests/linear_programming/test_lp_solver.py b/python/cuopt/cuopt/tests/linear_programming/test_lp_solver.py index f0944acdc2..ed0d04ec19 100644 --- a/python/cuopt/cuopt/tests/linear_programming/test_lp_solver.py +++ b/python/cuopt/cuopt/tests/linear_programming/test_lp_solver.py @@ -475,9 +475,6 @@ def test_parse_var_names(): ) -@pytest.mark.skip( - reason="Intermittent failure, new version is being worked on" -) def test_parser_and_batch_solver(): data_model_list = [] @@ -567,9 +564,6 @@ def test_warm_start_other_problem(): solver.Solve(data_model_obj2, settings) -@pytest.mark.skip( - reason="Intermittent failure, new version is being worked on" -) def test_batch_solver_warm_start(): data_model_list = []