Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 14 additions & 6 deletions benchmarks/linear_programming/cuopt/run_pdlp.cu
Original file line number Diff line number Diff line change
Expand Up @@ -70,15 +70,23 @@ static void parse_arguments(argparse::ArgumentParser& program)
"Path to PDLP hyper-params file to configure PDLP solver. Has priority over PDLP solver "
"modes.");

program.add_argument("--presolve")
.help("enable/disable presolve (default: true for MIP problems, false for LP problems)")
.default_value(0)
.scan<'i', int>()
.choices(0, 1);
program.add_argument("--presolver")
.help("Presolver to use. Possible values: None, Papilo, PSLP, Default")
.default_value("Default")
.choices("None", "Papilo", "PSLP", "Default");

program.add_argument("--solution-path").help("Path where solution file will be generated");
}

static cuopt::linear_programming::presolver_t string_to_presolver(const std::string& presolver)
{
if (presolver == "None") return cuopt::linear_programming::presolver_t::None;
if (presolver == "Papilo") return cuopt::linear_programming::presolver_t::Papilo;
if (presolver == "PSLP") return cuopt::linear_programming::presolver_t::PSLP;
if (presolver == "Default") return cuopt::linear_programming::presolver_t::Default;
return cuopt::linear_programming::presolver_t::Default;
}

static cuopt::linear_programming::pdlp_solver_mode_t string_to_pdlp_solver_mode(
const std::string& mode)
{
Expand Down Expand Up @@ -107,7 +115,7 @@ static cuopt::linear_programming::pdlp_solver_settings_t<int, double> create_sol
string_to_pdlp_solver_mode(program.get<std::string>("--pdlp-solver-mode"));
settings.method = static_cast<cuopt::linear_programming::method_t>(program.get<int>("--method"));
settings.crossover = program.get<int>("--crossover");
settings.presolve = program.get<int>("--presolve");
settings.presolver = string_to_presolver(program.get<std::string>("--presolver"));

return settings;
}
Expand Down
3 changes: 2 additions & 1 deletion cpp/src/branch_and_bound/pseudo_costs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -335,8 +335,9 @@ void strong_branching(const user_problem_t<i_t, f_t>& original_problem,
}

const auto mps_model = simplex_problem_to_mps_data_model(original_problem);
const raft::handle_t batch_pdlp_handle;
const auto solutions =
batch_pdlp_solve(original_problem.handle_ptr, mps_model, fractional, fraction_values);
batch_pdlp_solve(&batch_pdlp_handle, mps_model, fractional, fraction_values);
f_t batch_pdlp_strong_branching_time = toc(start_batch);

// Find max iteration on how many are done accross the batch
Expand Down
1 change: 1 addition & 0 deletions cpp/src/pdlp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ set(LP_CORE_FILES
${CMAKE_CURRENT_SOURCE_DIR}/termination_strategy/infeasibility_information.cu
${CMAKE_CURRENT_SOURCE_DIR}/termination_strategy/convergence_information.cu
${CMAKE_CURRENT_SOURCE_DIR}/optimal_batch_size_handler/optimal_batch_size_handler.cu
${CMAKE_CURRENT_SOURCE_DIR}/utilities/ping_pong_graph.cu
)

# C and Python adapter files
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/pdlp/initial_scaling_strategy/initial_scaling.cu
Original file line number Diff line number Diff line change
Expand Up @@ -545,7 +545,7 @@ void pdlp_initial_scaling_strategy_t<i_t, f_t>::scale_problem()
#ifdef CUPDLP_DEBUG_MODE
print("constraint_lower_bound", op_problem_scaled_.constraint_lower_bounds);
print("constraint_upper_bound", op_problem_scaled_.constraint_upper_bounds);
std::vector<f_t2> variable_bounds = host_copy(op_problem_scaled_.variable_bounds);
std::vector<f_t2> variable_bounds = host_copy(op_problem_scaled_.variable_bounds, stream_view_);
std::vector<f_t> lower_bounds;
std::vector<f_t> upper_bounds;
for (const auto& variable_bound : variable_bounds) {
Expand Down
39 changes: 38 additions & 1 deletion cpp/src/pdlp/pdhg.cu
Original file line number Diff line number Diff line change
Expand Up @@ -567,6 +567,12 @@ struct primal_reflected_major_projection_bulk_op {
const f_t obj_coef = objective_coefficients[var_idx];
const f_t aty_val = current_AtY[idx];

cuopt_assert(!isnan(step_size), "primal_step_size is NaN in primal_reflected_major_projection");
cuopt_assert(!isinf(step_size), "primal_step_size is Inf in primal_reflected_major_projection");
cuopt_assert(step_size > f_t(0.0), "primal_step_size must be > 0");
cuopt_assert(!isnan(primal_val), "primal_solution is NaN in primal_reflected_major_projection");
cuopt_assert(!isnan(aty_val), "current_AtY is NaN in primal_reflected_major_projection");

const f_t next = primal_val - step_size * (obj_coef - aty_val);

const f_t2 bounds = variable_bounds[var_idx];
Expand All @@ -576,6 +582,9 @@ struct primal_reflected_major_projection_bulk_op {
potential_next_primal[idx] = next_clamped;
dual_slack[idx] = (next_clamped - next) / step_size;
reflected_primal[idx] = f_t(2.0) * next_clamped - primal_val;

cuopt_assert(!isnan(reflected_primal[idx]),
"reflected_primal is NaN after primal_reflected_major_projection");
}
};

Expand All @@ -599,6 +608,12 @@ struct dual_reflected_major_projection_bulk_op {
const f_t current_dual = dual_solution[idx];
const f_t Ax = dual_gradient[idx];

cuopt_assert(!isnan(step_size), "dual_step_size is NaN in dual_reflected_major_projection");
cuopt_assert(!isinf(step_size), "dual_step_size is Inf in dual_reflected_major_projection");
cuopt_assert(step_size > f_t(0.0), "dual_step_size must be > 0");
cuopt_assert(!isnan(current_dual), "dual_solution is NaN in dual_reflected_major_projection");
cuopt_assert(!isnan(Ax), "dual_gradient is NaN in dual_reflected_major_projection");

const f_t tmp = current_dual / step_size - Ax;
const f_t tmp_proj =
cuda::std::max<f_t>(-constraint_upper_bounds[constraint_idx],
Expand All @@ -607,6 +622,9 @@ struct dual_reflected_major_projection_bulk_op {

potential_next_dual[idx] = next_dual;
reflected_dual[idx] = f_t(2.0) * next_dual - current_dual;

cuopt_assert(!isnan(reflected_dual[idx]),
"reflected_dual is NaN after dual_reflected_major_projection");
}
};

Expand All @@ -631,12 +649,21 @@ struct primal_reflected_projection_bulk_op {
const f_t obj_coef = objective_coefficients[var_idx];
const f_t aty_val = current_AtY[idx];

cuopt_assert(!isnan(step_size), "primal_step_size is NaN in primal_reflected_projection");
cuopt_assert(!isnan(primal_val), "primal_solution is NaN in primal_reflected_projection");
cuopt_assert(!isnan(aty_val), "current_AtY is NaN in primal_reflected_projection");
cuopt_assert(!isinf(step_size), "primal_step_size is Inf in primal_reflected_projection");
cuopt_assert(step_size > f_t(0.0), "primal_step_size must be > 0");

f_t reflected = primal_val - step_size * (obj_coef - aty_val);

const f_t2 bounds = variable_bounds[var_idx];
reflected = cuda::std::max(cuda::std::min(reflected, get_upper(bounds)), get_lower(bounds));

reflected_primal[idx] = f_t(2.0) * reflected - primal_val;

cuopt_assert(!isnan(reflected_primal[idx]),
"reflected_primal is NaN after primal_reflected_projection");
}
};

Expand All @@ -659,13 +686,23 @@ struct dual_reflected_projection_bulk_op {

const f_t step_size = dual_step_size[batch_idx];
const f_t current_dual = dual_solution[idx];
const f_t tmp = current_dual / step_size - dual_gradient[idx];

cuopt_assert(!isnan(step_size), "dual_step_size is NaN in dual_reflected_projection");
cuopt_assert(!isnan(current_dual), "dual_solution is NaN in dual_reflected_projection");
cuopt_assert(!isnan(dual_gradient[idx]), "dual_gradient is NaN in dual_reflected_projection");
cuopt_assert(!isinf(step_size), "dual_step_size is Inf in dual_reflected_projection");
cuopt_assert(step_size > f_t(0.0), "dual_step_size must be > 0");

const f_t tmp = current_dual / step_size - dual_gradient[idx];
const f_t tmp_proj =
cuda::std::max<f_t>(-constraint_upper_bounds[constraint_idx],
cuda::std::min<f_t>(tmp, -constraint_lower_bounds[constraint_idx]));
const f_t next_dual = (tmp - tmp_proj) * step_size;

reflected_dual[idx] = f_t(2.0) * next_dual - current_dual;

cuopt_assert(!isnan(reflected_dual[idx]),
"reflected_dual is NaN after dual_reflected_projection");
}
};

Expand Down
105 changes: 81 additions & 24 deletions cpp/src/pdlp/pdlp.cu
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,9 @@

#include <thrust/count.h>
#include <thrust/extrema.h>
#include <thrust/logical.h>

#include <cmath>
#include <optional>
#include <unordered_set>

Expand Down Expand Up @@ -1186,24 +1188,55 @@ static void compute_stats(const rmm::device_uvector<f_t>& vec,
f_t& avg)
{
auto abs_op = [] __host__ __device__(f_t x) { return abs(x); };
auto min_nonzero = [] __host__ __device__(f_t x) {
return x == 0 ? std::numeric_limits<f_t>::max() : abs(x);
};

smallest = thrust::transform_reduce(rmm::exec_policy(vec.stream()),
vec.begin(),
vec.end(),
min_nonzero,
std::numeric_limits<f_t>::max(),
thrust::minimum<f_t>());

largest = thrust::transform_reduce(
rmm::exec_policy(vec.stream()), vec.begin(), vec.end(), abs_op, 0.0f, thrust::maximum<f_t>());

f_t sum = thrust::transform_reduce(
rmm::exec_policy(vec.stream()), vec.begin(), vec.end(), abs_op, 0.0f, thrust::plus<f_t>());

avg = sum / vec.size();
auto min_nonzero = [] __host__ __device__(f_t x)
-> f_t { return x == 0 ? std::numeric_limits<f_t>::max() : abs(x); };

cuopt_assert(vec.size() > 0, "Vector must not be empty");

auto stream = vec.stream();
size_t n = vec.size();

rmm::device_scalar<f_t> d_smallest(stream);
rmm::device_scalar<f_t> d_largest(stream);
rmm::device_scalar<f_t> d_sum(stream);

auto min_nz_iter = thrust::make_transform_iterator(vec.cbegin(), min_nonzero);
auto abs_iter = thrust::make_transform_iterator(vec.cbegin(), abs_op);

void* d_temp = nullptr;
size_t bytes_1 = 0, bytes_2 = 0, bytes_3 = 0;
RAFT_CUDA_TRY(cub::DeviceReduce::Reduce(d_temp,
bytes_1,
min_nz_iter,
d_smallest.data(),
n,
cuda::minimum<>{},
std::numeric_limits<f_t>::max(),
stream));
RAFT_CUDA_TRY(cub::DeviceReduce::Reduce(
d_temp, bytes_2, abs_iter, d_largest.data(), n, cuda::maximum<>{}, f_t(0), stream));
RAFT_CUDA_TRY(cub::DeviceReduce::Reduce(
d_temp, bytes_3, abs_iter, d_sum.data(), n, cuda::std::plus<>{}, f_t(0), stream));

size_t max_bytes = std::max({bytes_1, bytes_2, bytes_3});
rmm::device_buffer temp_buf(max_bytes, stream);

RAFT_CUDA_TRY(cub::DeviceReduce::Reduce(temp_buf.data(),
bytes_1,
min_nz_iter,
d_smallest.data(),
n,
cuda::minimum<>{},
std::numeric_limits<f_t>::max(),
stream));
RAFT_CUDA_TRY(cub::DeviceReduce::Reduce(
temp_buf.data(), bytes_2, abs_iter, d_largest.data(), n, cuda::maximum<>{}, f_t(0), stream));
RAFT_CUDA_TRY(cub::DeviceReduce::Reduce(
temp_buf.data(), bytes_3, abs_iter, d_sum.data(), n, cuda::std::plus<>{}, f_t(0), stream));

smallest = d_smallest.value(stream);
largest = d_largest.value(stream);
avg = d_sum.value(stream) / vec.size();
};

template <typename f_t>
Expand Down Expand Up @@ -1406,11 +1439,25 @@ HDI void fixed_error_computation(const f_t norm_squared_delta_primal,
const f_t interaction,
f_t* fixed_point_error)
{
cuopt_assert(!isnan(norm_squared_delta_primal), "norm_squared_delta_primal must not be NaN");
cuopt_assert(!isnan(norm_squared_delta_dual), "norm_squared_delta_dual must not be NaN");
cuopt_assert(!isnan(primal_weight), "primal_weight must not be NaN");
cuopt_assert(!isnan(step_size), "step_size must not be NaN");
cuopt_assert(!isnan(interaction), "interaction must not be NaN");
cuopt_assert(norm_squared_delta_primal >= f_t(0.0), "norm_squared_delta_primal must be >= 0");
cuopt_assert(norm_squared_delta_dual >= f_t(0.0), "norm_squared_delta_dual must be >= 0");
cuopt_assert(primal_weight > f_t(0.0), "primal_weight must be > 0");
cuopt_assert(step_size > f_t(0.0), "step_size must be > 0");

const f_t movement =
norm_squared_delta_primal * primal_weight + norm_squared_delta_dual / primal_weight;
const f_t computed_interaction = f_t(2.0) * interaction * step_size;

*fixed_point_error = cuda::std::sqrt(movement + computed_interaction);
cuopt_assert(movement + computed_interaction >= f_t(0.0),
"Movement + computed interaction must be >= 0");

// Clamp to 0 to avoid NaN
*fixed_point_error = cuda::std::sqrt(cuda::std::max(f_t(0.0), movement + computed_interaction));

#ifdef CUPDLP_DEBUG_MODE
printf("movement %lf\n", movement);
Expand Down Expand Up @@ -1790,6 +1837,7 @@ void pdlp_solver_t<i_t, f_t>::compute_fixed_error(std::vector<int>& has_restarte
// Sync to make sure all previous cuSparse operations are finished before setting the
// potential_next_dual_solution
RAFT_CUDA_TRY(cudaStreamSynchronize(stream_view_));

// Make potential_next_dual_solution point towards reflected dual solution to reuse the code
RAFT_CUSPARSE_TRY(cusparseDnVecSetValues(cusparse_view.potential_next_dual_solution,
(void*)pdhg_solver_.get_reflected_dual().data()));
Expand All @@ -1813,6 +1861,7 @@ void pdlp_solver_t<i_t, f_t>::compute_fixed_error(std::vector<int>& has_restarte
RAFT_CUDA_TRY(cudaStreamSynchronize(
stream_view_)); // To make sure all the data is written from device to host
RAFT_CUDA_TRY(cudaPeekAtLastError());

Comment on lines 1861 to +1864
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

cudaPeekAtLastError() is placed after cudaStreamSynchronize() — it is redundant and should precede the sync.

RAFT_CUDA_TRY(cudaStreamSynchronize(...)) already surfaces both launch-configuration errors (via the sticky error bit) and kernel execution errors. The cudaPeekAtLastError() call that follows can never observe a new error not already caught by the sync. To properly detect kernel launch errors (e.g., invalid grid dimensions) before blocking, cudaPeekAtLastError() must come immediately after the kernel launch.

🛠️ Proposed fix
     kernel_compute_fixed_error<f_t><<<grid_size, block_size, 0, stream_view_>>>(
       make_span(step_size_strategy_.get_norm_squared_delta_primal()),
       make_span(step_size_strategy_.get_norm_squared_delta_dual()),
       make_span(primal_weight_),
       make_span(step_size_),
       make_span(step_size_strategy_.get_interaction()),
       make_span(restart_strategy_.fixed_point_error_));
+    RAFT_CUDA_TRY(cudaPeekAtLastError());
     RAFT_CUDA_TRY(cudaStreamSynchronize(
       stream_view_));  // To make sure all the data is written from device to host
-    RAFT_CUDA_TRY(cudaPeekAtLastError());
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@cpp/src/pdlp/pdlp.cu` around lines 1861 - 1864, The cudaPeekAtLastError()
call is placed after RAFT_CUDA_TRY(cudaStreamSynchronize(...)) making it
redundant; move the RAFT_CUDA_TRY(cudaPeekAtLastError()) to immediately follow
the kernel launch (before RAFT_CUDA_TRY(cudaStreamSynchronize(stream_view_))) so
that launch/configuration errors are caught early, then keep the stream sync
(RAFT_CUDA_TRY(cudaStreamSynchronize(stream_view_))) to detect execution errors
and ensure device-to-host writes complete.

#ifdef CUPDLP_DEBUG_MODE
RAFT_CUDA_TRY(cudaDeviceSynchronize());
#endif
Expand Down Expand Up @@ -1847,9 +1896,15 @@ void pdlp_solver_t<i_t, f_t>::compute_fixed_error(std::vector<int>& has_restarte
#endif

for (size_t i = 0; i < climber_strategies_.size(); ++i) {
cuopt_assert(!std::isnan(restart_strategy_.fixed_point_error_[i]),
"fixed_point_error_ must not be NaN after compute_fixed_error");
cuopt_assert(restart_strategy_.fixed_point_error_[i] >= f_t(0.0),
"fixed_point_error_ must be >= 0 after compute_fixed_error");
if (has_restarted[i]) {
restart_strategy_.initial_fixed_point_error_[i] = restart_strategy_.fixed_point_error_[i];
has_restarted[i] = false;
cuopt_assert(!std::isnan(restart_strategy_.initial_fixed_point_error_[i]),
"initial_fixed_point_error_ must not be NaN after assignment");
has_restarted[i] = false;
}
}
}
Expand All @@ -1869,6 +1924,7 @@ void pdlp_solver_t<i_t, f_t>::transpose_primal_dual_to_row(
rmm::device_uvector<f_t> dual_slack_transposed(
is_dual_slack_empty ? 0 : primal_size_h_ * climber_strategies_.size(), stream_view_);

RAFT_CUBLAS_TRY(cublasSetStream(handle_ptr_->get_cublas_handle(), stream_view_));
CUBLAS_CHECK(cublasDgeam(handle_ptr_->get_cublas_handle(),
CUBLAS_OP_T,
CUBLAS_OP_N,
Expand Down Expand Up @@ -1945,6 +2001,7 @@ void pdlp_solver_t<i_t, f_t>::transpose_primal_dual_back_to_col(
rmm::device_uvector<f_t> dual_slack_transposed(
is_dual_slack_empty ? 0 : primal_size_h_ * climber_strategies_.size(), stream_view_);

RAFT_CUBLAS_TRY(cublasSetStream(handle_ptr_->get_cublas_handle(), stream_view_));
CUBLAS_CHECK(cublasDgeam(handle_ptr_->get_cublas_handle(),
CUBLAS_OP_T,
CUBLAS_OP_N,
Expand Down Expand Up @@ -2632,7 +2689,7 @@ void pdlp_solver_t<i_t, f_t>::compute_initial_step_size()
rmm::device_uvector<f_t> d_atq(n, stream_view_);

std::mt19937 gen(1);
std::normal_distribution<double> dist(0.0, 1.0);
std::normal_distribution<f_t> dist(f_t(0.0), f_t(1.0));

for (int i = 0; i < m; ++i)
z[i] = dist(gen);
Expand Down Expand Up @@ -2684,7 +2741,7 @@ void pdlp_solver_t<i_t, f_t>::compute_initial_step_size()
vecATQ,
CUSPARSE_SPMV_CSR_ALG2,
(f_t*)cusparse_view_.buffer_transpose.data(),
stream_view_));
stream_view_.value()));

// z = A @ A_t_q
RAFT_CUSPARSE_TRY(
Expand All @@ -2697,7 +2754,7 @@ void pdlp_solver_t<i_t, f_t>::compute_initial_step_size()
vecZ,
CUSPARSE_SPMV_CSR_ALG2,
(f_t*)cusparse_view_.buffer_non_transpose.data(),
stream_view_));
stream_view_.value()));
// sigma_max_sq = dot(q, z)
RAFT_CUBLAS_TRY(raft::linalg::detail::cublasdot(handle_ptr_->get_cublas_handle(),
m,
Expand All @@ -2706,7 +2763,7 @@ void pdlp_solver_t<i_t, f_t>::compute_initial_step_size()
d_z.data(),
primal_stride,
sigma_max_sq.data(),
stream_view_));
stream_view_.value()));

cub::DeviceTransform::Transform(
cuda::std::make_tuple(d_q.data(), d_z.data()),
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/pdlp/restart_strategy/pdlp_restart_strategy.cu
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,11 @@
#include <thrust/for_each.h>
#include <thrust/functional.h>
#include <thrust/iterator/counting_iterator.h>
#include <thrust/iterator/reverse_iterator.h>
#include <thrust/iterator/transform_iterator.h>
#include <thrust/iterator/zip_iterator.h>
#include <thrust/logical.h>
#include <thrust/sort.h>
#include <thrust/transform_reduce.h>

#include <cub/cub.cuh>

Expand Down
Loading