From 0838dae53de24dbe1cf965e8de7e621f55b11c29 Mon Sep 17 00:00:00 2001
From: Nicolas Blin <nicolas.blin7@hotmail.fr>
Date: Wed, 23 Jul 2025 10:11:51 +0000
Subject: [PATCH 1/2] disable cuda graph when in batch pdlp mode

---
 .../cuopt/linear_programming/solve.hpp        |  3 +-
 .../utilities/cython_solve.hpp                |  3 +-
 cpp/src/linear_programming/pdhg.cu            |  7 +-
 cpp/src/linear_programming/pdhg.hpp           |  4 +-
 cpp/src/linear_programming/pdlp.cu            | 10 +--
 cpp/src/linear_programming/pdlp.cuh           |  3 +-
 .../restart_strategy/pdlp_restart_strategy.cu |  5 +-
 .../pdlp_restart_strategy.cuh                 |  3 +-
 .../weighted_average_solution.cu              |  5 +-
 .../weighted_average_solution.hpp             |  5 +-
 cpp/src/linear_programming/solve.cu           | 30 ++++----
 .../adaptive_step_size_strategy.cu            |  5 +-
 .../adaptive_step_size_strategy.hpp           |  3 +-
 .../utilities/cython_solve.cu                 | 19 ++++--
 .../utilities/ping_pong_graph.cuh             | 68 ++++++++++++-------
 15 files changed, 111 insertions(+), 62 deletions(-)
diff --git a/cpp/include/cuopt/linear_programming/solve.hpp b/cpp/include/cuopt/linear_programming/solve.hpp
index 880982ad9d..04ee5530c0 100644
--- a/cpp/include/cuopt/linear_programming/solve.hpp
+++ b/cpp/include/cuopt/linear_programming/solve.hpp
@@ -51,7 +51,8 @@ optimization_problem_solution_t<i_t, f_t> solve_lp(
   optimization_problem_t<i_t, f_t>& op_problem,
   pdlp_solver_settings_t<i_t, f_t> const& settings = pdlp_solver_settings_t<i_t, f_t>{},
   bool problem_checking                            = true,
-  bool use_pdlp_solver_mode                        = true);
+  bool use_pdlp_solver_mode                        = true,
+  bool is_batch_mode                               = false);
 
 /**
  * @brief Linear programming solve function.
diff --git a/cpp/include/cuopt/linear_programming/utilities/cython_solve.hpp b/cpp/include/cuopt/linear_programming/utilities/cython_solve.hpp
index cc30ff7a02..eef185d0d0 100644
--- a/cpp/include/cuopt/linear_programming/utilities/cython_solve.hpp
+++ b/cpp/include/cuopt/linear_programming/utilities/cython_solve.hpp
@@ -104,7 +104,8 @@ struct solver_ret_t {
 
 std::unique_ptr<solver_ret_t> call_solve(cuopt::mps_parser::data_model_view_t<int, double>*,
                                          linear_programming::solver_settings_t<int, double>*,
-                                         unsigned int flags = cudaStreamNonBlocking);
+                                         unsigned int flags = cudaStreamNonBlocking,
+                                         bool is_batch_mode = false);
 
 std::pair<std::vector<std::unique_ptr<solver_ret_t>>, double> call_batch_solve(
   std::vector<cuopt::mps_parser::data_model_view_t<int, double>*>,
diff --git a/cpp/src/linear_programming/pdhg.cu b/cpp/src/linear_programming/pdhg.cu
index ad4b69e075..f932eeb8d8 100644
--- a/cpp/src/linear_programming/pdhg.cu
+++ b/cpp/src/linear_programming/pdhg.cu
@@ -35,7 +35,8 @@ namespace cuopt::linear_programming::detail {
 
 template <typename i_t, typename f_t>
 pdhg_solver_t<i_t, f_t>::pdhg_solver_t(raft::handle_t const* handle_ptr,
-                                       problem_t<i_t, f_t>& op_problem_scaled)
+                                       problem_t<i_t, f_t>& op_problem_scaled,
+                                       bool is_batch_mode)
   : handle_ptr_(handle_ptr),
     stream_view_(handle_ptr_->get_stream()),
     problem_ptr(&op_problem_scaled),
@@ -57,8 +58,8 @@ pdhg_solver_t<i_t, f_t>::pdhg_solver_t(raft::handle_t const* handle_ptr,
     reusable_device_scalar_value_0_{0.0, stream_view_},
     reusable_device_scalar_value_neg_1_{f_t(-1.0), stream_view_},
     reusable_device_scalar_1_{stream_view_},
-    graph_all{stream_view_},
-    graph_prim_proj_gradient_dual{stream_view_},
+    graph_all{stream_view_, is_batch_mode},
+    graph_prim_proj_gradient_dual{stream_view_, is_batch_mode},
     d_total_pdhg_iterations_{0, stream_view_}
 {
 }
diff --git a/cpp/src/linear_programming/pdhg.hpp b/cpp/src/linear_programming/pdhg.hpp
index c44b48865a..3e4f7565e5 100644
--- a/cpp/src/linear_programming/pdhg.hpp
+++ b/cpp/src/linear_programming/pdhg.hpp
@@ -31,7 +31,9 @@ namespace cuopt::linear_programming::detail {
 template <typename i_t, typename f_t>
 class pdhg_solver_t {
  public:
-  pdhg_solver_t(raft::handle_t const* handle_ptr, problem_t<i_t, f_t>& op_problem);
+  pdhg_solver_t(raft::handle_t const* handle_ptr,
+                problem_t<i_t, f_t>& op_problem,
+                bool is_batch_mode = false);
 
   saddle_point_state_t<i_t, f_t>& get_saddle_point_state();
   cusparse_view_t<i_t, f_t>& get_cusparse_view();
diff --git a/cpp/src/linear_programming/pdlp.cu b/cpp/src/linear_programming/pdlp.cu
index 7acadae50d..84b04d43f9 100644
--- a/cpp/src/linear_programming/pdlp.cu
+++ b/cpp/src/linear_programming/pdlp.cu
@@ -53,7 +53,8 @@ void set_pdlp_hyper_parameters(rmm::cuda_stream_view stream_view)
 
 template <typename i_t, typename f_t>
 pdlp_solver_t<i_t, f_t>::pdlp_solver_t(problem_t<i_t, f_t>& op_problem,
-                                       pdlp_solver_settings_t<i_t, f_t> const& settings)
+                                       pdlp_solver_settings_t<i_t, f_t> const& settings,
+                                       bool is_batch_mode)
   : handle_ptr_(op_problem.handle_ptr),
     stream_view_(handle_ptr_->get_stream()),
     problem_ptr(&op_problem),
@@ -67,8 +68,8 @@ pdlp_solver_t<i_t, f_t>::pdlp_solver_t(problem_t<i_t, f_t>& op_problem,
     dual_step_size_{stream_view_},
     primal_weight_{stream_view_},
     step_size_{(f_t)pdlp_hyper_params::initial_step_size_scaling, stream_view_},
-    step_size_strategy_{handle_ptr_, &primal_weight_, &step_size_},
-    pdhg_solver_{handle_ptr_, op_problem_scaled_},
+    step_size_strategy_{handle_ptr_, &primal_weight_, &step_size_, is_batch_mode},
+    pdhg_solver_{handle_ptr_, op_problem_scaled_, is_batch_mode},
     settings_(settings, stream_view_),
     initial_scaling_strategy_{handle_ptr_,
                               op_problem_scaled_,
@@ -100,7 +101,8 @@ pdlp_solver_t<i_t, f_t>::pdlp_solver_t(problem_t<i_t, f_t>& op_problem,
                       op_problem,
                       average_op_problem_evaluation_cusparse_view_,
                       primal_size_h_,
-                      dual_size_h_},
+                      dual_size_h_,
+                      is_batch_mode},
     average_termination_strategy_{handle_ptr_,
                                   op_problem,
                                   average_op_problem_evaluation_cusparse_view_,
diff --git a/cpp/src/linear_programming/pdlp.cuh b/cpp/src/linear_programming/pdlp.cuh
index 10a028f268..24db449726 100644
--- a/cpp/src/linear_programming/pdlp.cuh
+++ b/cpp/src/linear_programming/pdlp.cuh
@@ -65,7 +65,8 @@ class pdlp_solver_t {
    */
   pdlp_solver_t(
     problem_t<i_t, f_t>& op_problem,
-    pdlp_solver_settings_t<i_t, f_t> const& settings = pdlp_solver_settings_t<i_t, f_t>{});
+    pdlp_solver_settings_t<i_t, f_t> const& settings = pdlp_solver_settings_t<i_t, f_t>{},
+    bool is_batch_mode                               = false);
 
   optimization_problem_solution_t<i_t, f_t> run_solver(
     const std::chrono::high_resolution_clock::time_point& start_time);
diff --git a/cpp/src/linear_programming/restart_strategy/pdlp_restart_strategy.cu b/cpp/src/linear_programming/restart_strategy/pdlp_restart_strategy.cu
index 55b06aecf4..236ec373d8 100644
--- a/cpp/src/linear_programming/restart_strategy/pdlp_restart_strategy.cu
+++ b/cpp/src/linear_programming/restart_strategy/pdlp_restart_strategy.cu
@@ -108,10 +108,11 @@ pdlp_restart_strategy_t<i_t, f_t>::pdlp_restart_strategy_t(
   problem_t<i_t, f_t>& op_problem,
   const cusparse_view_t<i_t, f_t>& cusparse_view,
   const i_t primal_size,
-  const i_t dual_size)
+  const i_t dual_size,
+  bool is_batch_mode)
   : handle_ptr_(handle_ptr),
     stream_view_(handle_ptr_->get_stream()),
-    weighted_average_solution_{handle_ptr_, primal_size, dual_size},
+    weighted_average_solution_{handle_ptr_, primal_size, dual_size, is_batch_mode},
     primal_size_h_(primal_size),
     dual_size_h_(dual_size),
     problem_ptr(&op_problem),
diff --git a/cpp/src/linear_programming/restart_strategy/pdlp_restart_strategy.cuh b/cpp/src/linear_programming/restart_strategy/pdlp_restart_strategy.cuh
index 403f772391..8400977c15 100644
--- a/cpp/src/linear_programming/restart_strategy/pdlp_restart_strategy.cuh
+++ b/cpp/src/linear_programming/restart_strategy/pdlp_restart_strategy.cuh
@@ -101,7 +101,8 @@ class pdlp_restart_strategy_t {
                           problem_t<i_t, f_t>& op_problem,
                           const cusparse_view_t<i_t, f_t>& cusparse_view,
                           const i_t primal_size,
-                          const i_t dual_size);
+                          const i_t dual_size,
+                          bool is_batch_mode = false);
 
   // Compute kkt score on passed argument using the container tmp_kkt score and stream view
   f_t compute_kkt_score(const rmm::device_scalar<f_t>& l2_primal_residual,
diff --git a/cpp/src/linear_programming/restart_strategy/weighted_average_solution.cu b/cpp/src/linear_programming/restart_strategy/weighted_average_solution.cu
index 17b33606fc..27bc08b9b6 100644
--- a/cpp/src/linear_programming/restart_strategy/weighted_average_solution.cu
+++ b/cpp/src/linear_programming/restart_strategy/weighted_average_solution.cu
@@ -29,7 +29,8 @@ namespace cuopt::linear_programming::detail {
 template <typename i_t, typename f_t>
 weighted_average_solution_t<i_t, f_t>::weighted_average_solution_t(raft::handle_t const* handle_ptr,
                                                                    i_t primal_size,
-                                                                   i_t dual_size)
+                                                                   i_t dual_size,
+                                                                   bool is_batch_mode)
   : handle_ptr_(handle_ptr),
     stream_view_(handle_ptr_->get_stream()),
     primal_size_h_(primal_size),
@@ -39,7 +40,7 @@ weighted_average_solution_t<i_t, f_t>::weighted_average_solution_t(raft::handle_
     sum_primal_solution_weights_{0.0, stream_view_},
     sum_dual_solution_weights_{0.0, stream_view_},
     iterations_since_last_restart_{0},
-    graph(stream_view_)
+    graph(stream_view_, is_batch_mode)
 {
   RAFT_CUDA_TRY(
     cudaMemsetAsync(sum_primal_solutions_.data(), 0.0, sizeof(f_t) * primal_size_h_, stream_view_));
diff --git a/cpp/src/linear_programming/restart_strategy/weighted_average_solution.hpp b/cpp/src/linear_programming/restart_strategy/weighted_average_solution.hpp
index bea96b52fc..00cc30e186 100644
--- a/cpp/src/linear_programming/restart_strategy/weighted_average_solution.hpp
+++ b/cpp/src/linear_programming/restart_strategy/weighted_average_solution.hpp
@@ -29,7 +29,10 @@ namespace cuopt::linear_programming::detail {
 template <typename i_t, typename f_t>
 class weighted_average_solution_t {
  public:
-  weighted_average_solution_t(raft::handle_t const* handle_ptr, i_t primal_size, i_t dual_size);
+  weighted_average_solution_t(raft::handle_t const* handle_ptr,
+                              i_t primal_size,
+                              i_t dual_size,
+                              bool is_batch_mode);
 
   void reset_weighted_average_solution();
   void add_current_solution_to_weighted_average_solution(const f_t* primal_solution,
diff --git a/cpp/src/linear_programming/solve.cu b/cpp/src/linear_programming/solve.cu
index c3ba4f2c7d..df3d3d1e1b 100644
--- a/cpp/src/linear_programming/solve.cu
+++ b/cpp/src/linear_programming/solve.cu
@@ -350,24 +350,26 @@ template <typename i_t, typename f_t>
 static optimization_problem_solution_t<i_t, f_t> run_pdlp_solver(
   detail::problem_t<i_t, f_t>& problem,
   pdlp_solver_settings_t<i_t, f_t> const& settings,
-  const std::chrono::high_resolution_clock::time_point& start_time)
+  const std::chrono::high_resolution_clock::time_point& start_time,
+  bool is_batch_mode)
 {
   if (problem.n_constraints == 0) {
     CUOPT_LOG_INFO("No constraints in the problem: PDLP can't be run, use Dual Simplex instead.");
     return optimization_problem_solution_t<i_t, f_t>{pdlp_termination_status_t::NumericalError,
                                                      problem.handle_ptr->get_stream()};
   }
-  detail::pdlp_solver_t<i_t, f_t> solver(problem, settings);
+  detail::pdlp_solver_t<i_t, f_t> solver(problem, settings, is_batch_mode);
   return solver.run_solver(start_time);
 }
 
 template <typename i_t, typename f_t>
 optimization_problem_solution_t<i_t, f_t> run_pdlp(detail::problem_t<i_t, f_t>& problem,
-                                                   pdlp_solver_settings_t<i_t, f_t> const& settings)
+                                                   pdlp_solver_settings_t<i_t, f_t> const& settings,
+                                                   bool is_batch_mode)
 {
   auto start_solver = std::chrono::high_resolution_clock::now();
   f_t start_time    = dual_simplex::tic();
-  auto sol          = run_pdlp_solver(problem, settings, start_solver);
+  auto sol          = run_pdlp_solver(problem, settings, start_solver, is_batch_mode);
   auto end          = std::chrono::high_resolution_clock::now();
   auto duration     = std::chrono::duration_cast<std::chrono::milliseconds>(end - start_solver);
   sol.set_solve_time(duration.count() / 1000.0);
@@ -467,7 +469,8 @@ template <typename i_t, typename f_t>
 optimization_problem_solution_t<i_t, f_t> run_concurrent(
   optimization_problem_t<i_t, f_t>& op_problem,
   detail::problem_t<i_t, f_t>& problem,
-  pdlp_solver_settings_t<i_t, f_t> const& settings)
+  pdlp_solver_settings_t<i_t, f_t> const& settings,
+  bool is_batch_mode)
 {
   CUOPT_LOG_INFO("Running concurrent\n");
   f_t start_time = dual_simplex::tic();
@@ -495,7 +498,7 @@ optimization_problem_solution_t<i_t, f_t> run_concurrent(
                                   std::ref(sol_dual_simplex_ptr));
 
   // Run pdlp in the main thread
-  auto sol_pdlp = run_pdlp(problem, settings_pdlp);
+  auto sol_pdlp = run_pdlp(problem, settings_pdlp, is_batch_mode);
 
   // Wait for dual simplex thread to finish
   dual_simplex_thread.join();
@@ -539,14 +542,15 @@ template <typename i_t, typename f_t>
 optimization_problem_solution_t<i_t, f_t> solve_lp_with_method(
   optimization_problem_t<i_t, f_t>& op_problem,
   detail::problem_t<i_t, f_t>& problem,
-  pdlp_solver_settings_t<i_t, f_t> const& settings)
+  pdlp_solver_settings_t<i_t, f_t> const& settings,
+  bool is_batch_mode)
 {
   if (settings.method == method_t::DualSimplex) {
     return run_dual_simplex(problem, settings);
   } else if (settings.method == method_t::Concurrent) {
-    return run_concurrent(op_problem, problem, settings);
+    return run_concurrent(op_problem, problem, settings, is_batch_mode);
   } else {
-    return run_pdlp(problem, settings);
+    return run_pdlp(problem, settings, is_batch_mode);
   }
 }
 
@@ -554,7 +558,8 @@ template <typename i_t, typename f_t>
 optimization_problem_solution_t<i_t, f_t> solve_lp(optimization_problem_t<i_t, f_t>& op_problem,
                                                    pdlp_solver_settings_t<i_t, f_t> const& settings,
                                                    bool problem_checking,
-                                                   bool use_pdlp_solver_mode)
+                                                   bool use_pdlp_solver_mode,
+                                                   bool is_batch_mode)
 {
   try {
     // Create log stream for file logging and add it to default logger
@@ -593,7 +598,7 @@ optimization_problem_solution_t<i_t, f_t> solve_lp(optimization_problem_t<i_t, f
 
     setup_device_symbols(op_problem.get_handle_ptr()->get_stream());
 
-    auto sol = solve_lp_with_method(op_problem, problem, settings);
+    auto sol = solve_lp_with_method(op_problem, problem, settings, is_batch_mode);
 
     if (settings.sol_file != "") {
       CUOPT_LOG_INFO("Writing solution to file %s", settings.sol_file.c_str());
@@ -699,7 +704,8 @@ optimization_problem_solution_t<i_t, f_t> solve_lp(
     optimization_problem_t<int, F_TYPE>& op_problem,                                   \
     pdlp_solver_settings_t<int, F_TYPE> const& settings,                               \
     bool problem_checking,                                                             \
-    bool use_pdlp_solver_mode);                                                        \
+    bool use_pdlp_solver_mode,                                                         \
+    bool is_batch_mode);                                                               \
                                                                                        \
   template optimization_problem_solution_t<int, F_TYPE> solve_lp(                      \
     raft::handle_t const* handle_ptr,                                                  \
diff --git a/cpp/src/linear_programming/step_size_strategy/adaptive_step_size_strategy.cu b/cpp/src/linear_programming/step_size_strategy/adaptive_step_size_strategy.cu
index e8355dfa1c..1569869182 100644
--- a/cpp/src/linear_programming/step_size_strategy/adaptive_step_size_strategy.cu
+++ b/cpp/src/linear_programming/step_size_strategy/adaptive_step_size_strategy.cu
@@ -40,7 +40,8 @@ template <typename i_t, typename f_t>
 adaptive_step_size_strategy_t<i_t, f_t>::adaptive_step_size_strategy_t(
   raft::handle_t const* handle_ptr,
   rmm::device_scalar<f_t>* primal_weight,
-  rmm::device_scalar<f_t>* step_size)
+  rmm::device_scalar<f_t>* step_size,
+  bool is_batch_mode)
   : stream_pool_(parallel_stream_computation),
     dot_delta_X_(cudaEventDisableTiming),
     dot_delta_Y_(cudaEventDisableTiming),
@@ -55,7 +56,7 @@ adaptive_step_size_strategy_t<i_t, f_t>::adaptive_step_size_strategy_t(
     norm_squared_delta_dual_{stream_view_},
     reusable_device_scalar_value_1_{f_t(1.0), stream_view_},
     reusable_device_scalar_value_0_{f_t(0.0), stream_view_},
-    graph(stream_view_)
+    graph(stream_view_, is_batch_mode)
 {
   valid_step_size_ = make_unique_cuda_host_pinned<i_t>();
 }
diff --git a/cpp/src/linear_programming/step_size_strategy/adaptive_step_size_strategy.hpp b/cpp/src/linear_programming/step_size_strategy/adaptive_step_size_strategy.hpp
index 225aa2de07..4649caaf1c 100644
--- a/cpp/src/linear_programming/step_size_strategy/adaptive_step_size_strategy.hpp
+++ b/cpp/src/linear_programming/step_size_strategy/adaptive_step_size_strategy.hpp
@@ -56,7 +56,8 @@ class adaptive_step_size_strategy_t {
 
   adaptive_step_size_strategy_t(raft::handle_t const* handle_ptr,
                                 rmm::device_scalar<f_t>* primal_weight,
-                                rmm::device_scalar<f_t>* step_size);
+                                rmm::device_scalar<f_t>* step_size,
+                                bool is_batch_mode = false);
 
   void compute_step_sizes(pdhg_solver_t<i_t, f_t>& pdhg_solver,
                           rmm::device_scalar<f_t>& primal_step_size,
diff --git a/cpp/src/linear_programming/utilities/cython_solve.cu b/cpp/src/linear_programming/utilities/cython_solve.cu
index 8f3624850c..1f9034e204 100644
--- a/cpp/src/linear_programming/utilities/cython_solve.cu
+++ b/cpp/src/linear_programming/utilities/cython_solve.cu
@@ -118,14 +118,18 @@ data_model_to_optimization_problem(
  */
 linear_programming_ret_t call_solve_lp(
   cuopt::linear_programming::optimization_problem_t<int, double>& op_problem,
-  cuopt::linear_programming::pdlp_solver_settings_t<int, double>& solver_settings)
+  cuopt::linear_programming::pdlp_solver_settings_t<int, double>& solver_settings,
+  bool is_batch_mode)
 {
   raft::common::nvtx::range fun_scope("Call Solve");
   cuopt_expects(
     op_problem.get_problem_category() == cuopt::linear_programming::problem_category_t::LP,
     error_type_t::ValidationError,
     "LP solve cannot be called on a MIP problem!");
-  auto solution = cuopt::linear_programming::solve_lp(op_problem, solver_settings);
+  const bool problem_checking     = true;
+  const bool use_pdlp_solver_mode = true;
+  auto solution                   = cuopt::linear_programming::solve_lp(
+    op_problem, solver_settings, problem_checking, use_pdlp_solver_mode, is_batch_mode);
   linear_programming_ret_t lp_ret{
     std::make_unique<rmm::device_buffer>(solution.get_primal_solution().release()),
     std::make_unique<rmm::device_buffer>(solution.get_dual_solution().release()),
@@ -209,7 +213,8 @@ mip_ret_t call_solve_mip(
 std::unique_ptr<solver_ret_t> call_solve(
   cuopt::mps_parser::data_model_view_t<int, double>* data_model,
   cuopt::linear_programming::solver_settings_t<int, double>* solver_settings,
-  unsigned int flags)
+  unsigned int flags,
+  bool is_batch_mode)
 {
   raft::common::nvtx::range fun_scope("Call Solve");
 
@@ -220,7 +225,8 @@ std::unique_ptr<solver_ret_t> call_solve(
   auto op_problem = data_model_to_optimization_problem(data_model, solver_settings, &handle_);
   solver_ret_t response;
   if (op_problem.get_problem_category() == linear_programming::problem_category_t::LP) {
-    response.lp_ret       = call_solve_lp(op_problem, solver_settings->get_pdlp_settings());
+    response.lp_ret =
+      call_solve_lp(op_problem, solver_settings->get_pdlp_settings(), is_batch_mode);
     response.problem_type = linear_programming::problem_category_t::LP;
   } else {
     response.mip_ret      = call_solve_mip(op_problem, solver_settings->get_mip_settings());
@@ -284,9 +290,12 @@ std::pair<std::vector<std::unique_ptr<solver_ret_t>>, double> call_batch_solve(
     solver_settings->set_parameter(CUOPT_METHOD, CUOPT_METHOD_PDLP);
   }
 
+  const bool is_batch_mode = true;
+
 #pragma omp parallel for num_threads(max_thread)
   for (std::size_t i = 0; i < size; ++i)
-    list[i] = std::move(call_solve(data_models[i], solver_settings, cudaStreamNonBlocking));
+    list[i] =
+      std::move(call_solve(data_models[i], solver_settings, cudaStreamNonBlocking, is_batch_mode));
 
   auto end      = std::chrono::high_resolution_clock::now();
   auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(end - start_solver);
diff --git a/cpp/src/linear_programming/utilities/ping_pong_graph.cuh b/cpp/src/linear_programming/utilities/ping_pong_graph.cuh
index ab813865e4..4939e0711c 100644
--- a/cpp/src/linear_programming/utilities/ping_pong_graph.cuh
+++ b/cpp/src/linear_programming/utilities/ping_pong_graph.cuh
@@ -28,53 +28,69 @@ namespace cuopt::linear_programming::detail {
 template <typename i_t>
 class ping_pong_graph_t {
  public:
-  ping_pong_graph_t(rmm::cuda_stream_view stream_view) : stream_view_(stream_view) {}
+  ping_pong_graph_t(rmm::cuda_stream_view stream_view, bool is_batch_mode = false)
+    : stream_view_(stream_view), is_batch_mode_(is_batch_mode)
+  {
+  }
 
   ~ping_pong_graph_t()
   {
-    if (even_initialized) { RAFT_CUDA_TRY_NO_THROW(cudaGraphExecDestroy(even_instance)); }
-    if (odd_initialized) { RAFT_CUDA_TRY_NO_THROW(cudaGraphExecDestroy(odd_instance)); }
+    if (!is_batch_mode_) {
+      if (even_initialized) { RAFT_CUDA_TRY_NO_THROW(cudaGraphExecDestroy(even_instance)); }
+      if (odd_initialized) { RAFT_CUDA_TRY_NO_THROW(cudaGraphExecDestroy(odd_instance)); }
+    }
   }
 
   void start_capture(i_t total_pdlp_iterations)
   {
-    if (total_pdlp_iterations % 2 == 0 && !even_initialized) {
-      RAFT_CUDA_TRY(cudaStreamBeginCapture(stream_view_.value(), cudaStreamCaptureModeThreadLocal));
-    } else if (total_pdlp_iterations % 2 == 1 && !odd_initialized) {
-      RAFT_CUDA_TRY(cudaStreamBeginCapture(stream_view_.value(), cudaStreamCaptureModeThreadLocal));
+    if (!is_batch_mode_) {
+      if (total_pdlp_iterations % 2 == 0 && !even_initialized) {
+        RAFT_CUDA_TRY(
+          cudaStreamBeginCapture(stream_view_.value(), cudaStreamCaptureModeThreadLocal));
+      } else if (total_pdlp_iterations % 2 == 1 && !odd_initialized) {
+        RAFT_CUDA_TRY(
+          cudaStreamBeginCapture(stream_view_.value(), cudaStreamCaptureModeThreadLocal));
+      }
     }
   }
 
   void end_capture(i_t total_pdlp_iterations)
   {
-    if (total_pdlp_iterations % 2 == 0 && !even_initialized) {
-      RAFT_CUDA_TRY(cudaStreamEndCapture(stream_view_.value(), &even_graph));
-      // Extra NULL NULL 0 mandatory for cuda 11.8
-      RAFT_CUDA_TRY(cudaGraphInstantiate(&even_instance, even_graph, nullptr, nullptr, 0));
-      even_initialized = true;
-      RAFT_CUDA_TRY_NO_THROW(cudaGraphDestroy(even_graph));
-    } else if (total_pdlp_iterations % 2 == 1 && !odd_initialized) {
-      RAFT_CUDA_TRY(cudaStreamEndCapture(stream_view_.value(), &odd_graph));
-      // Extra NULL NULL 0 mandatory for cuda 11.8
-      RAFT_CUDA_TRY(cudaGraphInstantiate(&odd_instance, odd_graph, nullptr, nullptr, 0));
-      odd_initialized = true;
-      RAFT_CUDA_TRY_NO_THROW(cudaGraphDestroy(odd_graph));
+    if (!is_batch_mode_) {
+      if (total_pdlp_iterations % 2 == 0 && !even_initialized) {
+        RAFT_CUDA_TRY(cudaStreamEndCapture(stream_view_.value(), &even_graph));
+        // Extra NULL NULL 0 mandatory for cuda 11.8
+        RAFT_CUDA_TRY(cudaGraphInstantiate(&even_instance, even_graph, nullptr, nullptr, 0));
+        even_initialized = true;
+        RAFT_CUDA_TRY_NO_THROW(cudaGraphDestroy(even_graph));
+      } else if (total_pdlp_iterations % 2 == 1 && !odd_initialized) {
+        RAFT_CUDA_TRY(cudaStreamEndCapture(stream_view_.value(), &odd_graph));
+        // Extra NULL NULL 0 mandatory for cuda 11.8
+        RAFT_CUDA_TRY(cudaGraphInstantiate(&odd_instance, odd_graph, nullptr, nullptr, 0));
+        odd_initialized = true;
+        RAFT_CUDA_TRY_NO_THROW(cudaGraphDestroy(odd_graph));
+      }
     }
   }
 
   void launch(i_t total_pdlp_iterations)
   {
-    if (total_pdlp_iterations % 2 == 0 && even_initialized) {
-      RAFT_CUDA_TRY(cudaGraphLaunch(even_instance, stream_view_.value()));
-    } else if (total_pdlp_iterations % 2 == 1 && odd_initialized) {
-      RAFT_CUDA_TRY(cudaGraphLaunch(odd_instance, stream_view_.value()));
+    if (!is_batch_mode_) {
+      if (total_pdlp_iterations % 2 == 0 && even_initialized) {
+        RAFT_CUDA_TRY(cudaGraphLaunch(even_instance, stream_view_.value()));
+      } else if (total_pdlp_iterations % 2 == 1 && odd_initialized) {
+        RAFT_CUDA_TRY(cudaGraphLaunch(odd_instance, stream_view_.value()));
+      }
     }
   }
 
   bool is_initialized(i_t total_pdlp_iterations)
   {
-    return (total_pdlp_iterations % 2 == 0 && even_initialized) ||
-           (total_pdlp_iterations % 2 == 1 && odd_initialized);
+    if (!is_batch_mode_) {
+      return (total_pdlp_iterations % 2 == 0 && even_initialized) ||
+             (total_pdlp_iterations % 2 == 1 && odd_initialized);
+    }
+    return false;
   }
 
  private:
@@ -85,5 +101,7 @@ class ping_pong_graph_t {
   rmm::cuda_stream_view stream_view_;
   bool even_initialized{false};
   bool odd_initialized{false};
+  // Temporary fix to disable cuda graph in batch mode
+  bool is_batch_mode_{false};
 };
 }  // namespace cuopt::linear_programming::detail
\ No newline at end of file

From bf5e7cdb4f8863677e2f88ef289dba5535064809 Mon Sep 17 00:00:00 2001
From: Nicolas Blin <nicolas.blin7@hotmail.fr>
Date: Wed, 23 Jul 2025 12:19:25 +0000
Subject: [PATCH 2/2] re-enable tests

---
 .../cuopt/cuopt/tests/linear_programming/test_lp_solver.py  | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/python/cuopt/cuopt/tests/linear_programming/test_lp_solver.py b/python/cuopt/cuopt/tests/linear_programming/test_lp_solver.py
index f0944acdc2..ed0d04ec19 100644
--- a/python/cuopt/cuopt/tests/linear_programming/test_lp_solver.py
+++ b/python/cuopt/cuopt/tests/linear_programming/test_lp_solver.py
@@ -475,9 +475,6 @@ def test_parse_var_names():
         )
 
 
-@pytest.mark.skip(
-    reason="Intermittent failure, new version is being worked on"
-)
 def test_parser_and_batch_solver():
 
     data_model_list = []
@@ -567,9 +564,6 @@ def test_warm_start_other_problem():
         solver.Solve(data_model_obj2, settings)
 
 
-@pytest.mark.skip(
-    reason="Intermittent failure, new version is being worked on"
-)
 def test_batch_solver_warm_start():
 
     data_model_list = []