From 412326387b5d7ed9d4f9132e81f28aa57a71d919 Mon Sep 17 00:00:00 2001 From: Christopher Maes Date: Mon, 4 Aug 2025 15:18:37 -0700 Subject: [PATCH 01/31] First stab at barrier --- cpp/src/dual_simplex/CMakeLists.txt | 1 + cpp/src/dual_simplex/barrier.cpp | 666 +++++++++++++++++++++++++ cpp/src/dual_simplex/sparse_matrix.cpp | 14 + cpp/src/dual_simplex/sparse_matrix.hpp | 3 + 4 files changed, 684 insertions(+) create mode 100644 cpp/src/dual_simplex/barrier.cpp diff --git a/cpp/src/dual_simplex/CMakeLists.txt b/cpp/src/dual_simplex/CMakeLists.txt index d85471f9b9..1663f6e3ac 100644 --- a/cpp/src/dual_simplex/CMakeLists.txt +++ b/cpp/src/dual_simplex/CMakeLists.txt @@ -14,6 +14,7 @@ # limitations under the License. set(DUAL_SIMPLEX_SRC_FILES + ${CMAKE_CURRENT_SOURCE_DIR}/barrier.cpp ${CMAKE_CURRENT_SOURCE_DIR}/basis_solves.cpp ${CMAKE_CURRENT_SOURCE_DIR}/basis_updates.cpp ${CMAKE_CURRENT_SOURCE_DIR}/bound_flipping_ratio_test.cpp diff --git a/cpp/src/dual_simplex/barrier.cpp b/cpp/src/dual_simplex/barrier.cpp new file mode 100644 index 0000000000..1d636e0e00 --- /dev/null +++ b/cpp/src/dual_simplex/barrier.cpp @@ -0,0 +1,666 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + + +#include +#include +#include + +namespace cuopt::linear_programming::dual_simplex { + +template +class iteration_data_t { + public: + iteration_data_t(const lp_problem_t& lp, + i_t num_upper_bounds, + const simplex_solver_settings_t& settings) + : upper_bounds(num_upper_bounds), + w(num_upper_bounds), + x(lp.num_cols), + y(lp.num_rows), + v(num_upper_bounds), + z(lp.num_cols), + primal_residual(lp.num_rows), + bound_residual(num_upper_bounds), + dual_residual(lp.num_cols), + complementarity_xz_residual(lp.num_cols), + complementarity_wv_residual(num_upper_bounds), + primal_rhs(lp.num_rows), + bound_rhs(num_upper_bounds), + dual_rhs(lp.num_cols), + complementarity_xz_rhs(lp.num_cols), + complementarity_wv_rhs(num_upper_bounds), + dw_aff(num_upper_bounds), + dx_aff(lp.num_cols), + dy_aff(lp.num_rows), + dv_aff(num_upper_bounds), + dz_aff(lp.num_cols), + dw(num_upper_bounds), + dx(lp.num_cols), + dy(lp.num_rows), + dv(num_upper_bounds), + dz(lp.num_cols), + diag(lp.num_cols), + AD(lp.num_rows, lp.num_cols, 0), + DAT(lp.num_cols, lp.num_rows, 0), + ADAT(lp.num_rows, lp.num_rows, 0), + chol(lp.num_rows) + has_factorization(false) + { + + // Create the upper bounds vector + i_t n_upper_bounds = 0; + for (i_t j = 0; j < lp.num_cols; j++) { + if (lp.upper[j] < inf) { + upper_bounds[n_upper_bounds++] = j; + } + } + // Form A*Dinv*A' + diag.set_scalar(1.0); + if (n_upper_bounds > 0) { + for (i_t k = 0; k < n_upper_bounds; k++) { + i_t j = upper_bounds[k]; + diag[j] = 2.0; + } + } + inv_diag.set_scalar(1.0); + if (n_upper_bounds > 0) { + diag.inverse(inv_diag); + } + inv_sqrt_diag.set_scalar(1.0); + if (n_upper_bounds > 0) { + inv_diag.sqrt(inv_sqrt_diag); + } + + // Copy A into AD + AD = lp.A; + // Perform column scaling on A to get AD^(1/2) + AD.scale_columns(inv_sqrt_diag); + + // Form A*D*A' + float64_t start_form_aat = tic(); + csc_matrix_t DAT(lp.num_cols, lp.num_rows, 0); + AD.transpose(DAT); + AD.multiply(DAT, ADAT); + float64_t aat_time = toc(start_form_aat); + settings.log.printf("AAT time %.2fs\n", aat_time); + settings.log.printf("AAT nonzeros %e\n", static_cast(ADAT.col_start[lp.num_rows])); + + // Perform symbolic analysis + chol.analyze(ADAT); + } + + void scatter_upper_bounds(const dense_vector_t& y, dense_vector_t& z) + { + if (n_upper_bounds > 0) { + for (i_t k = 0; k < n_upper_bounds; k++) { + i_t j = upper_bounds[k]; + z[j] = y[k]; + } + } + } + + void gather_upper_bounds(const dense_vector_t& z, dense_vector_t& y) + { + if (n_upper_bounds > 0) { + for (i_t k = 0; k < n_upper_bounds; k++) { + i_t j = upper_bounds[k]; + y[k] = z[j]; + } + } + } + + i_t n_upper_bounds; + dense_vector_t upper_bounds; + + dense_vector_t w; + dense_vector_t x; + dense_vector_t y; + dense_vector_t v; + dense_vector_t z; + + dense_vector_t diag; + dense_vector_t inv_diag; + dense_vector_t inv_sqrt_diag; + + csc_matrix_t AD; + csc_matrix_t DAT; + csc_matrix_t ADAT; + + sparse_cholesky_t chol; + + + bool has_factorization; + + dense_vector_t primal_residual; + dense_vector_t bound_residual; + dense_vector_t dual_residual; + dense_vector_t complementarity_xz_residual; + dense_vector_t complementarity_wv_residual; + + dense_vector_t primal_rhs; + dense_vector_t bound_rhs; + dense_vector_t dual_rhs; + dense_vector_t complementarity_xz_rhs; + dense_vector_t complementarity_wv_rhs; + + dense_vector_t dw_aff; + dense_vector_t dx_aff; + dense_vector_t dy_aff; + dense_vector_t dv_aff; + dense_vector_t dz_aff; + + dense_vector_t dw; + dense_vector_t dx; + dense_vector_t dy; + dense_vector_t dv; + dense_vector_t dz; +}; + +template +void cholesky_factor(const csc_matrix_t& A, csc_matrix_t& L) +{ + cholmod_sparse *A_cholmod = to_cholmod(A); + cholmod_factor *L_cholmod = (cholmod_factor *)cholmod_l_allocate_factor(A_cholmod->nrow, A_cholmod->ncol, 1, 1, 0, CHOLMOD_REAL, &common); + cholmod_l_factorize(A_cholmod, L_cholmod, &common); + L = L_cholmod; +} + +template +void barrier_solver_t::initial_point(iteration_data_t& data) +{ + // Perform a numerical factorization + float64_t fact_start = tic(); + cholesky_factor(data.ADAT, data.L); + float64_t elapsed_time = toc(fact_start); + settings.log.printf("Factor nonzeros %e\n", data.L.col_start[lp.num_rows]); + settings.log.printf("Factorization time: %.3fs\n", elapsed_time); + + // rhs_x <- b + dense_vector_t rhs_x(lp.rhs); + + dense_vector_t Fu(lp.num_cols); + data.gather_upper_bounds(lp.u, Fu); + + dense_vector_t DinvFu(lp.num_cols); // DinvFu = Dinv * Fu + data.inv_diag.pairwise_product(Fu, DinvFu); + + // rhs_x <- A * Dinv * F * u - b + matrix_vector_multiply(lp.A, 1.0, DinvFu, -1.0, rhs_x); + + + // Solve A*Dinv*A'*q = A*Dinv*F*u - b + dense_vector_t q(lp.num_rows); + cholesky_solve(data.L, rhs_x, q); + + // rhs_x <- A*Dinv*A'*q - rhs_x + matrix_vector_multiply(data.ADAT, 1.0, q, -1.0, rhs_x); + settings.log.printf("|| A*Dinv*A'*q - (A*Dinv*F*u - b) || = %e\n", vector_norm2(rhs_x)); + + // x = Dinv*(F*u - A'*q) + data.inv_diag.pairwise_product(Fu, data.x); + // Fu <- -1.0 * A' * q + 1.0 * Fu + matrix_transpose_vector_multiply(lp.A, -1.0, q, 1.0, Fu); + + // x <- Dinv * (F*u - A'*q) + data.inv_diag.pairwise_product(Fu, data.x); + + // w <- E'*u - E'*x + if (data.n_upper_bounds > 0) { + for (i_t k = 0; k < data.n_upper_bounds; k++) { + i_t j = data.upper_bounds[k]; + data.w[k] = lp.u[j] - data.x[j]; + } + } + + // Verify A*x = b + data.primal_residual = lp.rhs; + matrix_vector_multiply(lp.A, 1.0, data.x, -1.0, data.primal_residual); + settings.log.printf("||b - A * x||: %e\n", vector_norm2(data.primal_residual)); + + if (data.n_upper_bounds > 0) { + for (i_t k = 0; k < data.n_upper_bounds; k++) { + i_t j = data.upper_bounds[k]; + data.bound_residual[k] = lp.u[j] - data.w[k] - data.x[j]; + } + settings.log.printf("|| u - w - x||: %e\n", vector_norm2(data.bound_residual)); + } + + // First compute rhs = A*Dinv*c + dense_vector_t rhs(lp.num_rows); + dense_vector_t Dinvc(lp.num_cols); + data.inv_diag.pairwise_product(lp.c, Dinvc); + // rhs = 1.0 * A * Dinv * c + matrix_vector_multiply(lp.A, 1.0, Dinvc, 0.0, rhs); + + // Solve A*Dinv*A'*q = A*Dinv*c + cholesky_solve(data.L, rhs, data.y); + + // z = Dinv*(c - A'*y) + dense_vector_t cmATy = lp.c; + matrix_transpose_vector_multiply(lp.A, -1.0, data.y, 1.0, cmATy); + // z <- Dinv * (c - A'*y) + data.inv_diag.pairwise_product(cmATy, data.z); + + // v = -E'*z + data.gather_upper_bounds(data.z, data.v); + data.v.multiply_scalar(-1.0); + + // Verify A'*y + z - E*v = c + data.z.pairwise_subtract(lp.c, data.dual_residual); + matrix_transpose_vector_multiply(lp.A, 1.0, data.y, 1.0, data.dual_residual); + if (data.n_upper_bounds > 0) { + for (i_t k = 0; k < data.n_upper_bounds; k++) { + i_t j = data.upper_bounds[k]; + data.dual_residual[j] -= data.v[k]; + } + } + settings.log.printf("||A^T y + z - E*v - c ||: %e\n", vector_norm2(data.dual_residual)); + + // Make sure (w, x, v, z) > 0 + float64_t epsilon_adjust = 10.0; + data.w.ensure_positive(epsilon_adjust); + data.x.ensure_positive(epsilon_adjust); + data.v.ensure_positive(epsilon_adjust); + data.z.ensure_positive(epsilon_adjust); +} + +template +void barrier_solver_t::compute_residuals() +{ + // Compute primal_residual = b - A*x + data.primal_residual = lp.rhs; + matrix_vector_multiply(lp.A, -1.0, data.x, 1.0, data.primal_residual); + + // Compute bound_residual = E'*u - w - E'*x + if (data.n_upper_bounds > 0) { + for (i_t k = 0; k < data.n_upper_bounds; k++) { + i_t j = data.upper_bounds[k]; + data.bound_residual[k] = lp.u[j] - data.w[k] - data.x[j]; + } + } + + // Compute dual_residual = c - A'*y - z + E*v + data.z.pairwise_subtract(lp.c, data.dual_residual); + matrix_transpose_vector_multiply(lp.A, -1.0, data.y, 1.0, data.dual_residual); + if (data.n_upper_bounds > 0) { + for (i_t k = 0; k < data.n_upper_bounds; k++) { + i_t j = data.upper_bounds[k]; + data.dual_residual[j] += data.v[k]; + } + } + + // Compute complementarity_xz_residual = x.*z + data.x.pairwise_product(data.z, data.complementarity_xz_residual); + + // Compute complementarity_wv_residual = w.*v + data.w.pairwise_product(data.v, data.complementarity_wv_residual); +} + +template +f_t barrier_solver_t::max_step_to_boundary(const dense_vector_t& x, const dense_vector_t& dx) const +{ + float64_t max_step = 1.0; + for (i_t i = 0; i < x.size(); i++) { + if (dx[i] < 0.0) { + max_step = fmin(max_step, -x[i]/dx[i]); + } + } +} + +template +void barrier_solver_t::compute_search_direction(iteration_data_t& data, + dense_vector_t& dw, + dense_vector_t& dx, + dense_vector_t& dy, + dense_vector_t& dv, + dense_vector_t& dz) +{ + const bool debug = false; + // Solves the linear system + // + // dw dx dy dv dz + // [ 0 A 0 0 0 ] [ dw ] = [ rp ] + // [ I E' 0 0 0 ] [ dx ] [ rw ] + // [ 0 0 A' -E I ] [ dy ] [ rd ] + // [ 0 Z 0 0 X ] [ dv ] [ rxz ] + // [ V 0 0 W 0 ] [ dz ] [ rwv ] + + // diag = z ./ x + data.z.pairwise_divide(data.x, data.diag); + // diag = z ./ x + E * (v ./ w) * E' + if (data.n_upper_bounds > 0) { + for (i_t k = 0; k < data.n_upper_bounds; k++) { + i_t j = data.upper_bounds[k]; + data.diag[j] += data.v[k] / data.w[k]; + } + } + + // inv_diag = 1.0 ./ diag + data.diag.inverse(data.inv_diag); + + // inv_sqrt_diag <- sqrt(inv_diag) + data.inv_diag.sqrt(data.inv_sqrt_diag); + + // Form A*D*A' + if (!data.has_factorization) { + // copy A into AD + data.AD = lp.A; + // Perform column scaling on A to get AD^(1/2) + data.AD.scale_columns(data.inv_sqrt_diag); + data.AD.transpose(data.DAT); + // compute ADAT = AD^(1/2) * (AD^(1/2))^T + multiply(data.AD, data.DAT, data.ADAT); + const float64_t regularization = 1e-4; + for (i_t j = 0; j < lp.num_rows; j++) { + const i_t col_start = data.ADAT.col_start[j]; + const i_t col_end = data.ADAT.col_start[j + 1]; + for (i_t p = col_start; p < col_end; p++) { + if (data.ADAT.i[p] == j && data.ADAT.x[p] < regularization) { + data.ADAT.x[p] = regularization; + } + } + } + // factorize + cholesky_factor(data.ADAT, data.L); + data.has_factorization = true; + } + + // Compute h = primal_rhs + A*inv_diag*(dual_rhs - complementarity_xz_rhs ./ x + + // E*((complementarity_wv_rhs - v .* bound_rhs) ./ w) ) + dense_vector_t tmp1(data.n_upper_bounds); + dense_vector_t tmp2(data.n_upper_bounds); + dense_vector_t tmp3(lp.num_cols); + dense_vector_t tmp4(lp.num_cols); + + // tmp2 <- v .* bound_rhs + data.v.pairwise_product(data.bound_rhs, tmp2); + tmp2.axpy(1.0, data.complementarity_wv_rhs, -1.0); + tmp2.pairwise_divide(data.w, tmp1); + // tmp1 <- (complementarity_wv_rhs - v .* bound_rhs) ./ w + // tmp3 <- E * tmp1 + data.scatter_upper_bounds(tmp1, tmp3); + + // tmp4 <- complementarity_xz_rhs ./ x + data.complementarity_xz_rhs.pairwise_divide(data.x, tmp4); + // tmp3 <- -complementarity_xz_rhs ./ x + E * ((complementarity_wv_rhs - v .* bound_rhs) ./ w) + tmp3.axpy(-1.0, tmp4, 1.0); + // tmp3 <- dual_rhs -complementarity_xz_rhs ./ x + E * ((complementarity_wv_rhs - v .* bound_rhs) + // ./ w) + tmp3.axpy(1.0, data.dual_rhs, 1.0); + // r1 <- dual_rhs -complementarity_xz_rhs ./ x + E * ((complementarity_wv_rhs - v .* bound_rhs) + // ./ w) + dense_vector_t r1 = tmp3; + // tmp4 <- inv_diag * ( dual_rhs -complementarity_xz_rhs ./ x + E *((complementarity_wv_rhs - v .* + // bound_rhs) ./ w)) + data.inv_diag.pairwise_product(tmp3, tmp4); + + // h <- 1.0 * A * tmp4 + h + matrix_vector_multiply(lp.A, 1.0, tmp4, 1.0, h); + + // Solve A D^{-1} A^T dy = h + cholesky_solve(data.L, h, dy); + + // y_residual <- ADAT*dy - h + dense_vector_t y_residual = h; + matrix_vector_multiply(data.ADAT, 1.0, dy, -1.0, y_residual); + if (debug) { settings.log.printf("||ADAT*dy - h|| = %e\n", vector_norm2(y_residual)); } + + // dx = dinv .* (A'*dy - dual_rhs + complementarity_xz_rhs ./ x - E *((complementarity_wv_rhs - v + // .* bound_rhs) ./ w)) r1 <- A'*dy - r1 + matrix_transpose_vector_multiply(lp.A, 1.0, dy, -1.0, r1); + // dx <- dinv .* r1 + data.inv_diag.pairwise_product(r1, dx); + + // x_residual <- A * dx - primal_rhs + dense_vector_t x_residual = data.primal_rhs; + matrix_vector_multiply(lp.A, 1.0, dx, -1.0, x_residual); + if (debug) { + settings.log.printf("|| A * dx - rp || = %e\n", vector_norm2(x_residual)); + } + + // dz = (complementarity_xz_rhs - z.* dx) ./ x; + // tmp3 <- z .* dx + data.z.pairwise_product(dx, tmp3); + // tmp3 <- 1.0 * complementarity_xz_rhs - tmp3 + tmp3.axpy(1.0, data.complementarity_xz_rhs, -1.0); + // dz <- tmp3 ./ x + tmp3.pairwise_divide(data.x, dz); + + // xz_residual <- z .* dx + x .* dz - complementarity_xz_rhs + dense_vector_t xz_residual = data.complementarity_xz_rhs; + dense_vector_t zdx(lp.num_cols); + dense_vector_t xdz(lp.num_cols); + data.z.pairwise_product(dx, zdx); + data.x.pairwise_product(dz, xdz); + xz_residual.axpy(1.0, zdx, -1.0); + xz_residual.axpy(1.0, xdz, 1.0); + if (debug) { + settings.log.printf("|| Z dx + X dz - rxz || = %e\n", vector_norm2(xz_residual)); + } + + // dv = (v .* E' dx + complementarity_wv_rhs - v .* bound_rhs) ./ w + // tmp1 <- E' * dx + data.gather_upper_bounds(dx, tmp1); + // tmp2 <- v .* E' * dx + data.v.pairwise_product(tmp1, tmp2); + // tmp1 <- v .* bound_rhs + data.v.pairwise_product(data.bound_rhs, tmp1); + // tmp1 <- v .* E' * dx - v . * bound_rhs + tmp1.axpy(1.0, tmp2, -1.0); + // tmp1 <- v .* E' * dx + complementarity_wv_rhs - v.* bound_rhs + tmp1.axpy(1.0, data.complementarity_wv_rhs, 1.0); + // dv <- tmp1 ./ w + tmp1.pairwise_divide(data.w, dv); + + // dw = bound_rhs - E'*dx + data.gather_upper_bounds(dx, tmp1); + tmp1.pairwise_subtract(data.bound_rhs, dw); + + // wv_residual <- v .* dw + w .* dv - complementarity_wv_rhs + dense_vector_t wv_residual = data.complementarity_wv_rhs; + dense_vector_t vdw(data.n_upper_bounds); + dense_vector_t wdv(data.n_upper_bounds); + data.v.pairwise_product(dw, vdw); + data.w.pairwise_product(dv, wdv); + wv_residual.axpy(1.0, vdw, -1.0); + wv_residual.axpy(1.0, wdv, 1.0); + if (debug) { + settings.log.printf("|| V dw + W dv - rwv || = %e\n", vector_norm2(wv_residual)); + } +} + +template +void barrier_solver_t::solve(const barrier_solver_settings_t& options) +{ + + float64_t start_time = tic(); + + i_t n = lp.num_cols; + i_t m = lp.num_rows; + + settings.log.printf("Barrier solver\n"); + settings.log.printf("%d variables\n", n); + settings.log.printf("%d constraints\n", m); + settings.log.printf("%ld nnz in A\n", lp.A.col_start[n]); + + // Compute the number of free variables + i_t num_free_variables = 0; + for (i_t i = 0; i < n; i++) { + if (lp.u[i] == inf && lp.l[i] == -inf) { + num_free_variables++; + } + } + + // Compute the number of upper bounds + i_t num_upper_bounds = 0; + for (i_t i = 0; i < n; i++) { + if (lp.u[i] < inf) { + num_upper_bounds++; + } + } + iteration_data_t data(lp, num_upper_bounds, settings); + settings.log.printf("%d finite upper bounds\n", num_upper_bounds); + + initial_point(data); + compute_residuals(data); + + f_t primal_residual_norm = std::max(vector_norm_inf(data.primal_residual), + vector_norm_inf(data.bound_residual)); + f_t dual_residual_norm = vector_norm_inf(data.dual_residual); + f_t complementarity_residual_norm = std::max(vector_norm_inf(data.complementarity_xz_residual), + vector_norm_inf(data.complementarity_wv_residual)); + f_t mu = (data.complementarity_xz_residual.sum() + data.complementarity_wv_residual.sum()) / (static_cast(n) + static_cast(num_upper_bounds)); + + f_t primal_objective = data.c.inner_product(data.x); + + dense_vector_t restrict_u(num_upper_bounds); + data.gather_upper_bounds(lp.u, restrict_u); + f_t dual_objective = data.b.inner_product(data.y) - restrict_u.inner_product(data.v); + + i_t iter = 0; + settings.log.printf(" objective residual step \n"); + settings.log.printf("iter primal dual primal dual compl primal dual elapsed\n"); + float64_t elapsed_time = toc(start_time); + settings.log.printf("%2d %+.12e %+.12e %.2e %.2e %.2e %.1f\n", \ + iter, primal_objective, dual_objective, primal_residual_norm, dual_residual_norm, complementarity_residual_norm, elapsed_time); + + bool converged = primal_residual_norm < options.feasibility_tol && dual_residual_norm < options.optimality_tol && complementarity_residual_norm < options.complementarity_tol; + + while (iter < options.iteration_limit) { + // Compute the affine step + data.primal_residual = data.primal_rhs; + data.bound_residual = data.bound_rhs; + data.dual_residual = data.dual_rhs; + data.complementarity_xz_residual = data.complementarity_xz_rhs; + data.complementarity_wv_residual = data.complementarity_wv_rhs; + // x.*z -> -x .* z + data.complementarity_xz_rhs.multiply_scalar(-1.0); + // w.*v -> -w .* v + data.complementarity_wv_rhs.multiply_scalar(-1.0); + + compute_search_direction(data, data.dw_aff, data.dx_aff, data.dy_aff, data.dv_aff, data.dz_aff); + + f_t step_primal_aff = std::min(max_step_to_boundary(data.w, data.dw_aff), max_step_to_boundary(data.x, data.dx_aff)); + f_t step_dual_aff = std::min(max_step_to_boundary(data.v, data.dv_aff), max_step_to_boundary(data.z, data.dz_aff)); + + // w_aff = w + step_primalaff * dw_aff + // x_aff = x + step_primal_aff * dx_aff + // v_aff = v + step_dual_aff * dv_aff + // z_aff = z + step_dual_aff * dz_aff + dense_vector_t w_aff = data.w; + dense_vector_t x_aff = data.x; + dense_vector_t v_aff = data.v; + dense_vector_t z_aff = data.z; + w_aff.axpy(step_primal_aff, data.dw_aff, 1.0); + x_aff.axpy(step_primal_aff, data.dx_aff, 1.0); + v_aff.axpy(step_dual_aff, data.dv_aff, 1.0); + z_aff.axpy(step_dual_aff, data.dz_aff, 1.0); + + dense_vector_t complementarity_xz_aff(lp.num_cols); + dense_vector_t complementarity_wv_aff(num_upper_bounds); + x_aff.pairwise_product(z_aff, complementarity_xz_aff); + w_aff.pairwise_product(v_aff, complementarity_wv_aff); + f_t mu_aff = (complementarity_xz_aff.sum() + complementarity_wv_aff.sum()) / (static_cast(n) + static_cast(num_upper_bounds)); + if (mu_aff < 0.0 || mu_aff != mu_aff) { + settings.log.printf("mu aff bad %e\n", mu_aff); + exit(1); + } + + f_t sigma = std::max(0.0, std::min(1.0, std::pow(mu_aff / mu, 3.0))); + + // Compute the combined centering corrector step + data.primal_rhs.set_scalar(0.0); + data.bound_rhs.set_scalar(0.0); + data.dual_rhs.set_scalar(0.0); + // complementarity_xz_rhs = -dx_aff .* dz_aff + sigma * mu + data.dx_aff.pairwise_product(data.dz_aff, data.complementarity_xz_rhs); + data.complementarity_xz_rhs.multiply_scalar(-1.0); + data.complementarity_xz_rhs.add_scalar(sigma * mu); + + // complementarity_wv_rhs = -dw_aff .* dv_aff + sigma * mu + data.dw_aff.pairwise_product(data.dv_aff, data.complementarity_wv_rhs); + data.complementarity_wv_rhs.multiply_scalar(-1.0); + data.complementarity_wv_rhs.add_scalar(sigma * mu); + + compute_search_direction(data, data.dw, data.dx, data.dy, data.dv, data.dz); + data.has_factorization = false; + + // dw = dw_aff + dw_cc + // dx = dx_aff + dx_cc + // dy = dy_aff + dy_cc + // dv = dv_aff + dv_cc + // dz = dz_aff + dz_cc + // Note: dw_cc - dz_cc are stored in dw - dz + data.dw.axpy(1.0, data.dw_aff, 1.0); + data.dx.axpy(1.0, data.dx_aff, 1.0); + data.dy.axpy(1.0, data.dy_aff, 1.0); + data.dv.axpy(1.0, data.dv_aff, 1.0); + data.dz.axpy(1.0, data.dz_aff, 1.0); + + f_t step_primal = std::min(max_step_to_boundary(data.w, data.dw), max_step_to_boundary(data.x, data.dx)); + f_t step_dual = std::min(max_step_to_boundary(data.v, data.dv), max_step_to_boundary(data.z, data.dz)); + + data.w.axpy(options.step_scale * step_primal, data.dw, 1.0); + data.x.axpy(options.step_scale * step_primal, data.dx, 1.0); + data.y.axpy(options.step_scale * step_dual, data.dy, 1.0); + data.v.axpy(options.step_scale * step_dual, data.dv, 1.0); + data.z.axpy(options.step_scale * step_dual, data.dz, 1.0); + + // Handle free variables + if (num_free_variables > 0) + { + settings.log.printf("Free variables not supported yet\n"); + exit(1); + } + + compute_residuals(data); + + mu = (data.complementarity_xz_residual.sum() + data.complementarity_wv_residual.sum()) / (static_cast(n) + static_cast(num_upper_bounds)); + + primal_residual_norm = std::max(vector_norm_inf(data.primal_residual), + vector_norm_inf(data.bound_residual)); + dual_residual_norm = vector_norm_inf(data.dual_residual); + complementarity_residual_norm = std::max(vector_norm_inf(data.complementarity_xz_residual), + vector_norm_inf(data.complementarity_wv_residual)); + + primal_objective = data.c.inner_product(data.x); + dual_objective = data.b.inner_product(data.y) - restrict_u.inner_product(data.v); + iter++; + elapsed_time = toc(start_time); + + settings.log.printf("%2d %+.12e %+.12e %.2e %.2e %.2e %.2e %.2e %.1f\n", \ + iter, primal_objective, dual_objective, primal_residual_norm, dual_residual_norm, complementarity_residual_norm, step_primal, step_dual, elapsed_time); + + bool primal_feasible = primal_residual_norm < options.feasibility_tol; + bool dual_feasible = dual_residual_norm < options.optimality_tol; + bool small_gap = complementarity_residual_norm < options.complementarity_tol; + + converged = primal_feasible && dual_feasible && small_gap; + + if (converged) { + settings.log.printf("Optimal solution found\n"); + settings.log.printf("Optimal objective value %.12e with constant %.12e\n", primal_objective, primal_objective + lp.obj_constant); + settings.log.printf("|| x* ||_inf %e\n", vector_norm_inf(data.x)); + break; + } + } +} + +} // namespace cuopt::linear_programming::dual_simplex diff --git a/cpp/src/dual_simplex/sparse_matrix.cpp b/cpp/src/dual_simplex/sparse_matrix.cpp index dc4df39904..5fc34062c6 100644 --- a/cpp/src/dual_simplex/sparse_matrix.cpp +++ b/cpp/src/dual_simplex/sparse_matrix.cpp @@ -378,6 +378,20 @@ void csc_matrix_t::print_matrix() const this->print_matrix(stdout); } +template +void csc_matrix_t::scale_columns(const std::vector& scale) +{ + const i_t n = this->n; + assert(scale.size() == n); + for (i_t j = 0; j < n; ++j) { + const i_t col_start = this->col_start[j]; + const i_t col_end = this->col_start[j + 1]; + for (i_t p = col_start; p < col_end; ++p) { + this->x[p] *= scale[j]; + } + } +} + template i_t scatter(const csc_matrix_t& A, i_t j, diff --git a/cpp/src/dual_simplex/sparse_matrix.hpp b/cpp/src/dual_simplex/sparse_matrix.hpp index 9cc3d6380c..b629991ed9 100644 --- a/cpp/src/dual_simplex/sparse_matrix.hpp +++ b/cpp/src/dual_simplex/sparse_matrix.hpp @@ -90,6 +90,9 @@ class csc_matrix_t { // Compute || A ||_1 = max_j (sum {i = 1 to m} | A(i, j) | ) f_t norm1() const; + // Perform column scaling of the matrix + void scale_columns(const std::vector& scale); + i_t nz_max; // maximum number of entries i_t m; // number of rows i_t n; // number of columns From da03c9d0eef659ef50876eec21e2deeb8b434e76 Mon Sep 17 00:00:00 2001 From: Ramakrishna Prabhu Date: Tue, 5 Aug 2025 14:25:19 -0500 Subject: [PATCH 02/31] add cudss --- conda/environments/all_cuda-128_arch-aarch64.yaml | 1 + conda/environments/all_cuda-128_arch-x86_64.yaml | 1 + conda/recipes/libcuopt/recipe.yaml | 6 ++++++ dependencies.yaml | 4 ++++ python/libcuopt/pyproject.toml | 1 + 5 files changed, 13 insertions(+) diff --git a/conda/environments/all_cuda-128_arch-aarch64.yaml b/conda/environments/all_cuda-128_arch-aarch64.yaml index b54a48f837..330886f785 100644 --- a/conda/environments/all_cuda-128_arch-aarch64.yaml +++ b/conda/environments/all_cuda-128_arch-aarch64.yaml @@ -34,6 +34,7 @@ dependencies: - httpx - ipython - jsonref==1.1.0 +- libcudss-dev - libcurand-dev - libcusolver-dev - libcusparse-dev diff --git a/conda/environments/all_cuda-128_arch-x86_64.yaml b/conda/environments/all_cuda-128_arch-x86_64.yaml index 7622ebfdec..f132176b84 100644 --- a/conda/environments/all_cuda-128_arch-x86_64.yaml +++ b/conda/environments/all_cuda-128_arch-x86_64.yaml @@ -34,6 +34,7 @@ dependencies: - httpx - ipython - jsonref==1.1.0 +- libcudss-dev - libcurand-dev - libcusolver-dev - libcusparse-dev diff --git a/conda/recipes/libcuopt/recipe.yaml b/conda/recipes/libcuopt/recipe.yaml index fbdcfd5e8c..78a06332e0 100644 --- a/conda/recipes/libcuopt/recipe.yaml +++ b/conda/recipes/libcuopt/recipe.yaml @@ -65,6 +65,7 @@ cache: - librmm =${{ dep_minor_version }} - rapids-logger =0.1 - cuda-nvtx-dev + - libcudss-dev - libcurand-dev - libcusparse-dev - cuda-cudart-dev @@ -94,6 +95,7 @@ outputs: - cuda-nvtx - cuda-version - gtest + - libcudss - libcurand - libcusparse - librmm @@ -130,6 +132,7 @@ outputs: - rapids-logger =0.1 - librmm =${{ dep_minor_version }} - cuda-cudart-dev + - libcudss-dev - libcublas - libcusparse-dev run: @@ -143,6 +146,7 @@ outputs: - cuda-nvtx - cuda-version - gtest + - libcudss - libcurand - libcusparse - librmm @@ -176,6 +180,7 @@ outputs: - gmock ${{ gtest_version }} - gtest ${{ gtest_version }} - cuda-cudart-dev + - libcudss-dev - libcublas - libcusparse-dev run: @@ -189,6 +194,7 @@ outputs: - cuda-nvtx - cuda-version - gtest + - libcudss - libcurand - libcusparse - librmm diff --git a/dependencies.yaml b/dependencies.yaml index 90e95fc8e1..6e64b9570c 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -691,6 +691,7 @@ dependencies: - libcurand-dev - libcusolver-dev - libcusparse-dev + - libcudss-dev - cuda-nvtx-dev - matrix: cuda: "12.*" @@ -698,6 +699,7 @@ dependencies: - libcurand-dev - libcusolver-dev - libcusparse-dev + - libcudss-dev - cuda-nvtx-dev - cuda-nvvm - cuda-crt @@ -714,6 +716,7 @@ dependencies: - nvidia-curand-cu12 - nvidia-cusparse-cu12 - nvidia-cusolver-cu12 + - nvidia-cudss-cu12 - nvidia-nvtx-cu12 # if use_cuda_wheels=false is provided, do not add dependencies on any CUDA wheels # (e.g. for DLFW and pip devcontainers) @@ -728,6 +731,7 @@ dependencies: - nvidia-curand - nvidia-cusparse - nvidia-cusolver + - nvidia-cudss - nvidia-nvtx develop: common: diff --git a/python/libcuopt/pyproject.toml b/python/libcuopt/pyproject.toml index 449ad72151..e9ad9b78c3 100644 --- a/python/libcuopt/pyproject.toml +++ b/python/libcuopt/pyproject.toml @@ -46,6 +46,7 @@ dependencies = [ "cuopt-mps-parser==25.8.*,>=0.0.0a0", "librmm==25.10.*,>=0.0.0a0", "nvidia-cublas", + "nvidia-cudss", "nvidia-curand", "nvidia-cusolver", "nvidia-cusparse", From 10dadaa9cc1f585b4cc6aa78f2e40102734ec8d0 Mon Sep 17 00:00:00 2001 From: Ramakrishna Prabhu Date: Tue, 5 Aug 2025 14:29:44 -0500 Subject: [PATCH 03/31] enable cudss in cmake --- cpp/CMakeLists.txt | 1 + python/libcuopt/CMakeLists.txt | 1 + 2 files changed, 2 insertions(+) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 2461f85d92..391e9e39ea 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -233,6 +233,7 @@ set(CMAKE_LIBRARY_PATH ${CMAKE_CURRENT_BINARY_DIR}/libmps_parser/) target_link_libraries(cuopt PUBLIC CUDA::cublas + CUDA::cudss CUDA::cusparse rmm::rmm rapids_logger::rapids_logger diff --git a/python/libcuopt/CMakeLists.txt b/python/libcuopt/CMakeLists.txt index 2443086e1c..4922e92eed 100644 --- a/python/libcuopt/CMakeLists.txt +++ b/python/libcuopt/CMakeLists.txt @@ -61,6 +61,7 @@ set(rpaths "$ORIGIN/../../rapids_logger/lib64" "$ORIGIN/../../librmm/lib64" "$ORIGIN/../../nvidia/cublas/lib" + "$ORIGIN/../../nvidia/cudss/lib" "$ORIGIN/../../nvidia/curand/lib" "$ORIGIN/../../nvidia/cusolver/lib" "$ORIGIN/../../nvidia/cusparse/lib" From 69f2b0561b485c9b238f368ef14f4abe83a96e91 Mon Sep 17 00:00:00 2001 From: Christopher Maes Date: Tue, 5 Aug 2025 17:47:09 -0700 Subject: [PATCH 04/31] First partially working version. You can switch between CHOLMOD and cuDSS --- cpp/CMakeLists.txt | 12 ++ .../cuopt/linear_programming/constants.h | 1 + .../pdlp/solver_settings.hpp | 1 + cpp/src/dual_simplex/CMakeLists.txt | 2 + cpp/src/dual_simplex/barrier.cpp | 186 +++++++++++------- cpp/src/dual_simplex/presolve.cpp | 130 ++++++++++-- cpp/src/dual_simplex/presolve.hpp | 2 + .../dual_simplex/simplex_solver_settings.hpp | 4 + cpp/src/dual_simplex/solve.cpp | 15 ++ cpp/src/linear_programming/solve.cu | 1 + cpp/src/math_optimization/solver_settings.cu | 3 +- 11 files changed, 270 insertions(+), 87 deletions(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 0266c4b335..5e919fabbb 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -27,6 +27,9 @@ include(rapids-find) rapids_cuda_init_architectures(CUOPT) +list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/../cmake") +message(STATUS "CMAKE_MODULE_PATH = ${CMAKE_MODULE_PATH}") + project( CUOPT VERSION 25.08.00 @@ -162,6 +165,9 @@ include(${rapids-cmake-dir}/cpm/rapids_logger.cmake) rapids_cpm_rapids_logger(BUILD_EXPORT_SET cuopt-exports INSTALL_EXPORT_SET cuopt-exports) create_logger_macros(CUOPT "cuopt::default_logger()" include/cuopt) +find_package(CHOLMOD REQUIRED) +find_package(CUDSS REQUIRED) + if(BUILD_TESTS) include(cmake/thirdparty/get_gtest.cmake) endif() @@ -204,6 +210,8 @@ target_link_options(cuopt PRIVATE "${CUOPT_BINARY_DIR}/fatbin.ld") add_library(cuopt::cuopt ALIAS cuopt) # ################################################################################################## # - include paths --------------------------------------------------------------------------------- +message(STATUS "target include directories CHOLMOD_INCLUDES = ${CHOLMOD_INCLUDES}") +message(STATUS "target include directories CUDSS_INCLUDES = ${CUDSS_INCLUDES}") target_include_directories(cuopt PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/../thirdparty" @@ -213,6 +221,8 @@ target_include_directories(cuopt "$" "$" "$" + ${CHOLMOD_INCLUDES} + ${CUDSS_INCLUDE} ) # ################################################################################################## @@ -238,6 +248,8 @@ target_link_libraries(cuopt CCCL::CCCL raft::raft cuopt::mps_parser + ${CHOLMOD_LIBRARIES} + ${CUDSS_LIBRARIES} PRIVATE ${CUOPT_PRIVATE_CUDA_LIBS} ) diff --git a/cpp/include/cuopt/linear_programming/constants.h b/cpp/include/cuopt/linear_programming/constants.h index ca4377de97..86fca74292 100644 --- a/cpp/include/cuopt/linear_programming/constants.h +++ b/cpp/include/cuopt/linear_programming/constants.h @@ -50,6 +50,7 @@ #define CUOPT_LOG_FILE "log_file" #define CUOPT_LOG_TO_CONSOLE "log_to_console" #define CUOPT_CROSSOVER "crossover" +#define CUOPT_USE_CUDSS "use_cudss" #define CUOPT_MIP_ABSOLUTE_TOLERANCE "mip_absolute_tolerance" #define CUOPT_MIP_RELATIVE_TOLERANCE "mip_relative_tolerance" #define CUOPT_MIP_INTEGRALITY_TOLERANCE "mip_integrality_tolerance" diff --git a/cpp/include/cuopt/linear_programming/pdlp/solver_settings.hpp b/cpp/include/cuopt/linear_programming/pdlp/solver_settings.hpp index 9dcccf7a7a..cf0d01151a 100644 --- a/cpp/include/cuopt/linear_programming/pdlp/solver_settings.hpp +++ b/cpp/include/cuopt/linear_programming/pdlp/solver_settings.hpp @@ -203,6 +203,7 @@ class pdlp_solver_settings_t { std::string user_problem_file{""}; bool per_constraint_residual{false}; bool crossover{false}; + bool use_cudss{false}; bool save_best_primal_so_far{false}; bool first_primal_feasible{false}; method_t method{method_t::Concurrent}; diff --git a/cpp/src/dual_simplex/CMakeLists.txt b/cpp/src/dual_simplex/CMakeLists.txt index 1663f6e3ac..e3586e5f8b 100644 --- a/cpp/src/dual_simplex/CMakeLists.txt +++ b/cpp/src/dual_simplex/CMakeLists.txt @@ -37,5 +37,7 @@ set(DUAL_SIMPLEX_SRC_FILES ${CMAKE_CURRENT_SOURCE_DIR}/triangle_solve.cpp ${CMAKE_CURRENT_SOURCE_DIR}/vector_math.cpp) +set_source_files_properties(${DUAL_SIMPLEX_SRC_FILES} DIRECTORY ${CMAKE_SOURCE_DIR} PROPERTIES COMPILE_OPTIONS "-g1") + set(CUOPT_SRC_FILES ${CUOPT_SRC_FILES} ${DUAL_SIMPLEX_SRC_FILES} PARENT_SCOPE) diff --git a/cpp/src/dual_simplex/barrier.cpp b/cpp/src/dual_simplex/barrier.cpp index 1d636e0e00..d5253963e4 100644 --- a/cpp/src/dual_simplex/barrier.cpp +++ b/cpp/src/dual_simplex/barrier.cpp @@ -20,8 +20,10 @@ #include #include +#include #include + namespace cuopt::linear_programming::dual_simplex { template @@ -31,11 +33,19 @@ class iteration_data_t { i_t num_upper_bounds, const simplex_solver_settings_t& settings) : upper_bounds(num_upper_bounds), + c(lp.objective), + b(lp.rhs), w(num_upper_bounds), x(lp.num_cols), y(lp.num_rows), v(num_upper_bounds), z(lp.num_cols), + diag(lp.num_cols), + inv_diag(lp.num_cols), + inv_sqrt_diag(lp.num_cols), + AD(lp.num_cols, lp.num_rows, 0), + DAT(lp.num_rows, lp.num_cols, 0), + ADAT(lp.num_rows, lp.num_rows, 0), primal_residual(lp.num_rows), bound_residual(num_upper_bounds), dual_residual(lp.num_cols), @@ -56,16 +66,11 @@ class iteration_data_t { dy(lp.num_rows), dv(num_upper_bounds), dz(lp.num_cols), - diag(lp.num_cols), - AD(lp.num_rows, lp.num_cols, 0), - DAT(lp.num_cols, lp.num_rows, 0), - ADAT(lp.num_rows, lp.num_rows, 0), - chol(lp.num_rows) has_factorization(false) { // Create the upper bounds vector - i_t n_upper_bounds = 0; + n_upper_bounds = 0; for (i_t j = 0; j < lp.num_cols; j++) { if (lp.upper[j] < inf) { upper_bounds[n_upper_bounds++] = j; @@ -90,20 +95,26 @@ class iteration_data_t { // Copy A into AD AD = lp.A; - // Perform column scaling on A to get AD^(1/2) + // Perform column scaling on A to get AD^(-1/2) AD.scale_columns(inv_sqrt_diag); - // Form A*D*A' + // Form A*Dinv*A' float64_t start_form_aat = tic(); csc_matrix_t DAT(lp.num_cols, lp.num_rows, 0); AD.transpose(DAT); - AD.multiply(DAT, ADAT); + multiply(AD, DAT, ADAT); float64_t aat_time = toc(start_form_aat); settings.log.printf("AAT time %.2fs\n", aat_time); settings.log.printf("AAT nonzeros %e\n", static_cast(ADAT.col_start[lp.num_rows])); + if (!settings.use_cudss) { + chol = std::make_unique>(lp.num_rows); + } else { + chol = std::make_unique>(lp.num_rows); + } + // Perform symbolic analysis - chol.analyze(ADAT); + chol->analyze(ADAT); } void scatter_upper_bounds(const dense_vector_t& y, dense_vector_t& z) @@ -128,6 +139,8 @@ class iteration_data_t { i_t n_upper_bounds; dense_vector_t upper_bounds; + dense_vector_t c; + dense_vector_t b; dense_vector_t w; dense_vector_t x; @@ -143,7 +156,7 @@ class iteration_data_t { csc_matrix_t DAT; csc_matrix_t ADAT; - sparse_cholesky_t chol; + std::unique_ptr> chol; bool has_factorization; @@ -173,30 +186,18 @@ class iteration_data_t { dense_vector_t dz; }; -template -void cholesky_factor(const csc_matrix_t& A, csc_matrix_t& L) -{ - cholmod_sparse *A_cholmod = to_cholmod(A); - cholmod_factor *L_cholmod = (cholmod_factor *)cholmod_l_allocate_factor(A_cholmod->nrow, A_cholmod->ncol, 1, 1, 0, CHOLMOD_REAL, &common); - cholmod_l_factorize(A_cholmod, L_cholmod, &common); - L = L_cholmod; -} template void barrier_solver_t::initial_point(iteration_data_t& data) { // Perform a numerical factorization - float64_t fact_start = tic(); - cholesky_factor(data.ADAT, data.L); - float64_t elapsed_time = toc(fact_start); - settings.log.printf("Factor nonzeros %e\n", data.L.col_start[lp.num_rows]); - settings.log.printf("Factorization time: %.3fs\n", elapsed_time); + data.chol->factorize(data.ADAT); // rhs_x <- b dense_vector_t rhs_x(lp.rhs); dense_vector_t Fu(lp.num_cols); - data.gather_upper_bounds(lp.u, Fu); + data.gather_upper_bounds(lp.upper, Fu); dense_vector_t DinvFu(lp.num_cols); // DinvFu = Dinv * Fu data.inv_diag.pairwise_product(Fu, DinvFu); @@ -207,7 +208,7 @@ void barrier_solver_t::initial_point(iteration_data_t& data) // Solve A*Dinv*A'*q = A*Dinv*F*u - b dense_vector_t q(lp.num_rows); - cholesky_solve(data.L, rhs_x, q); + data.chol->solve(rhs_x, q); // rhs_x <- A*Dinv*A'*q - rhs_x matrix_vector_multiply(data.ADAT, 1.0, q, -1.0, rhs_x); @@ -225,7 +226,7 @@ void barrier_solver_t::initial_point(iteration_data_t& data) if (data.n_upper_bounds > 0) { for (i_t k = 0; k < data.n_upper_bounds; k++) { i_t j = data.upper_bounds[k]; - data.w[k] = lp.u[j] - data.x[j]; + data.w[k] = lp.upper[j] - data.x[j]; } } @@ -237,7 +238,7 @@ void barrier_solver_t::initial_point(iteration_data_t& data) if (data.n_upper_bounds > 0) { for (i_t k = 0; k < data.n_upper_bounds; k++) { i_t j = data.upper_bounds[k]; - data.bound_residual[k] = lp.u[j] - data.w[k] - data.x[j]; + data.bound_residual[k] = lp.upper[j] - data.w[k] - data.x[j]; } settings.log.printf("|| u - w - x||: %e\n", vector_norm2(data.bound_residual)); } @@ -245,15 +246,15 @@ void barrier_solver_t::initial_point(iteration_data_t& data) // First compute rhs = A*Dinv*c dense_vector_t rhs(lp.num_rows); dense_vector_t Dinvc(lp.num_cols); - data.inv_diag.pairwise_product(lp.c, Dinvc); + data.inv_diag.pairwise_product(lp.objective, Dinvc); // rhs = 1.0 * A * Dinv * c matrix_vector_multiply(lp.A, 1.0, Dinvc, 0.0, rhs); // Solve A*Dinv*A'*q = A*Dinv*c - cholesky_solve(data.L, rhs, data.y); + data.chol->solve(rhs, data.y); // z = Dinv*(c - A'*y) - dense_vector_t cmATy = lp.c; + dense_vector_t cmATy = data.c; matrix_transpose_vector_multiply(lp.A, -1.0, data.y, 1.0, cmATy); // z <- Dinv * (c - A'*y) data.inv_diag.pairwise_product(cmATy, data.z); @@ -263,7 +264,7 @@ void barrier_solver_t::initial_point(iteration_data_t& data) data.v.multiply_scalar(-1.0); // Verify A'*y + z - E*v = c - data.z.pairwise_subtract(lp.c, data.dual_residual); + data.z.pairwise_subtract(data.c, data.dual_residual); matrix_transpose_vector_multiply(lp.A, 1.0, data.y, 1.0, data.dual_residual); if (data.n_upper_bounds > 0) { for (i_t k = 0; k < data.n_upper_bounds; k++) { @@ -282,7 +283,7 @@ void barrier_solver_t::initial_point(iteration_data_t& data) } template -void barrier_solver_t::compute_residuals() +void barrier_solver_t::compute_residuals(iteration_data_t& data) { // Compute primal_residual = b - A*x data.primal_residual = lp.rhs; @@ -292,12 +293,12 @@ void barrier_solver_t::compute_residuals() if (data.n_upper_bounds > 0) { for (i_t k = 0; k < data.n_upper_bounds; k++) { i_t j = data.upper_bounds[k]; - data.bound_residual[k] = lp.u[j] - data.w[k] - data.x[j]; + data.bound_residual[k] = lp.upper[j] - data.w[k] - data.x[j]; } } // Compute dual_residual = c - A'*y - z + E*v - data.z.pairwise_subtract(lp.c, data.dual_residual); + data.c.pairwise_subtract(data.z, data.dual_residual); matrix_transpose_vector_multiply(lp.A, -1.0, data.y, 1.0, data.dual_residual); if (data.n_upper_bounds > 0) { for (i_t k = 0; k < data.n_upper_bounds; k++) { @@ -319,9 +320,10 @@ f_t barrier_solver_t::max_step_to_boundary(const dense_vector_t @@ -332,7 +334,7 @@ void barrier_solver_t::compute_search_direction(iteration_data_t& dv, dense_vector_t& dz) { - const bool debug = false; + const bool debug = true; // Solves the linear system // // dw dx dy dv dz @@ -367,18 +369,23 @@ void barrier_solver_t::compute_search_direction(iteration_data_t 0.0) { + settings.log.printf("Applied regularization %.2e\n", applied_regularization); + } // factorize - cholesky_factor(data.ADAT, data.L); + data.chol->factorize(data.ADAT); data.has_factorization = true; } @@ -411,16 +418,22 @@ void barrier_solver_t::compute_search_direction(iteration_data_t h = data.primal_rhs; // h <- 1.0 * A * tmp4 + h matrix_vector_multiply(lp.A, 1.0, tmp4, 1.0, h); // Solve A D^{-1} A^T dy = h - cholesky_solve(data.L, h, dy); + data.chol->solve(h, dy); // y_residual <- ADAT*dy - h dense_vector_t y_residual = h; matrix_vector_multiply(data.ADAT, 1.0, dy, -1.0, y_residual); - if (debug) { settings.log.printf("||ADAT*dy - h|| = %e\n", vector_norm2(y_residual)); } + if (debug) { + const f_t y_residual_norm = vector_norm_inf(y_residual); + if (y_residual_norm > 1e-2) { + settings.log.printf("||ADAT*dy - h|| = %.2e\n", y_residual_norm); + } +} // dx = dinv .* (A'*dy - dual_rhs + complementarity_xz_rhs ./ x - E *((complementarity_wv_rhs - v // .* bound_rhs) ./ w)) r1 <- A'*dy - r1 @@ -432,7 +445,10 @@ void barrier_solver_t::compute_search_direction(iteration_data_t x_residual = data.primal_rhs; matrix_vector_multiply(lp.A, 1.0, dx, -1.0, x_residual); if (debug) { - settings.log.printf("|| A * dx - rp || = %e\n", vector_norm2(x_residual)); + const f_t x_residual_norm = vector_norm_inf(x_residual); + if (x_residual_norm > 1e-2) { + settings.log.printf("|| A * dx - rp || = %.2e\n", x_residual_norm); + } } // dz = (complementarity_xz_rhs - z.* dx) ./ x; @@ -452,7 +468,10 @@ void barrier_solver_t::compute_search_direction(iteration_data_t(xz_residual)); + const f_t xz_residual_norm = vector_norm_inf(xz_residual); + if (xz_residual_norm > 1e-2) { + settings.log.printf("|| Z dx + X dz - rxz || = %.2e\n", xz_residual_norm); + } } // dv = (v .* E' dx + complementarity_wv_rhs - v .* bound_rhs) ./ w @@ -471,7 +490,8 @@ void barrier_solver_t::compute_search_direction(iteration_data_t wv_residual = data.complementarity_wv_rhs; @@ -482,7 +502,10 @@ void barrier_solver_t::compute_search_direction(iteration_data_t(wv_residual)); + const f_t wv_residual_norm = vector_norm_inf(wv_residual); + if (wv_residual_norm > 1e-2) { + settings.log.printf("|| V dw + W dv - rwv || = %.2e\n", wv_residual_norm); + } } } @@ -495,23 +518,16 @@ void barrier_solver_t::solve(const barrier_solver_settings_t i_t n = lp.num_cols; i_t m = lp.num_rows; - settings.log.printf("Barrier solver\n"); - settings.log.printf("%d variables\n", n); - settings.log.printf("%d constraints\n", m); - settings.log.printf("%ld nnz in A\n", lp.A.col_start[n]); + settings.log.printf("Barrier solver: %d constraints, %d variables, %ld nonzeros\n", m, n, lp.A.col_start[n]); // Compute the number of free variables - i_t num_free_variables = 0; - for (i_t i = 0; i < n; i++) { - if (lp.u[i] == inf && lp.l[i] == -inf) { - num_free_variables++; - } - } + i_t num_free_variables = presolve_info.free_variable_pairs.size() / 2; + settings.log.printf("%d free variables\n", num_free_variables); // Compute the number of upper bounds i_t num_upper_bounds = 0; - for (i_t i = 0; i < n; i++) { - if (lp.u[i] < inf) { + for (i_t j = 0; j < n; j++) { + if (lp.upper[j] < inf) { num_upper_bounds++; } } @@ -527,16 +543,17 @@ void barrier_solver_t::solve(const barrier_solver_settings_t f_t complementarity_residual_norm = std::max(vector_norm_inf(data.complementarity_xz_residual), vector_norm_inf(data.complementarity_wv_residual)); f_t mu = (data.complementarity_xz_residual.sum() + data.complementarity_wv_residual.sum()) / (static_cast(n) + static_cast(num_upper_bounds)); - + settings.log.printf("Initial mu %e\n", mu); f_t primal_objective = data.c.inner_product(data.x); dense_vector_t restrict_u(num_upper_bounds); - data.gather_upper_bounds(lp.u, restrict_u); + dense_vector_t upper(lp.upper); + data.gather_upper_bounds(upper, restrict_u); f_t dual_objective = data.b.inner_product(data.y) - restrict_u.inner_product(data.v); i_t iter = 0; - settings.log.printf(" objective residual step \n"); - settings.log.printf("iter primal dual primal dual compl primal dual elapsed\n"); + settings.log.printf(" Objective Residual Step-Length Time\n"); + settings.log.printf("Iter Primal Dual Primal Dual Compl. Primal Dual Elapsed\n"); float64_t elapsed_time = toc(start_time); settings.log.printf("%2d %+.12e %+.12e %.2e %.2e %.2e %.1f\n", \ iter, primal_objective, dual_objective, primal_residual_norm, dual_residual_norm, complementarity_residual_norm, elapsed_time); @@ -545,11 +562,11 @@ void barrier_solver_t::solve(const barrier_solver_settings_t while (iter < options.iteration_limit) { // Compute the affine step - data.primal_residual = data.primal_rhs; - data.bound_residual = data.bound_rhs; - data.dual_residual = data.dual_rhs; - data.complementarity_xz_residual = data.complementarity_xz_rhs; - data.complementarity_wv_residual = data.complementarity_wv_rhs; + data.primal_rhs = data.primal_residual; + data.bound_rhs = data.bound_residual; + data.dual_rhs = data.dual_residual; + data.complementarity_xz_rhs = data.complementarity_xz_residual; + data.complementarity_wv_rhs = data.complementarity_wv_residual; // x.*z -> -x .* z data.complementarity_xz_rhs.multiply_scalar(-1.0); // w.*v -> -w .* v @@ -578,12 +595,14 @@ void barrier_solver_t::solve(const barrier_solver_settings_t x_aff.pairwise_product(z_aff, complementarity_xz_aff); w_aff.pairwise_product(v_aff, complementarity_wv_aff); f_t mu_aff = (complementarity_xz_aff.sum() + complementarity_wv_aff.sum()) / (static_cast(n) + static_cast(num_upper_bounds)); + //settings.log.printf("mu aff %e\n", mu_aff); if (mu_aff < 0.0 || mu_aff != mu_aff) { settings.log.printf("mu aff bad %e\n", mu_aff); exit(1); } f_t sigma = std::max(0.0, std::min(1.0, std::pow(mu_aff / mu, 3.0))); + //settings.log.printf("sigma %e\n", sigma); // Compute the combined centering corrector step data.primal_rhs.set_scalar(0.0); @@ -626,13 +645,31 @@ void barrier_solver_t::solve(const barrier_solver_settings_t // Handle free variables if (num_free_variables > 0) { - settings.log.printf("Free variables not supported yet\n"); - exit(1); + for (i_t k = 0; k < 2*num_free_variables; k += 2) + { + i_t u = presolve_info.free_variable_pairs[k]; + i_t v = presolve_info.free_variable_pairs[k+1]; + f_t u_val = data.x[u]; + f_t v_val = data.x[v]; + f_t min_val = std::min(u_val, v_val); + f_t eta = options.step_scale*min_val; + data.x[u] -= eta; + data.x[v] -= eta; + } + } + + f_t min_w = data.w.minimum(); + f_t min_x = data.x.minimum(); + f_t min_v = data.v.minimum(); + f_t min_z = data.z.minimum(); + if (min_w < 0.0 || min_x < 0.0 || min_v < 0.0 || min_z < 0.0) { + settings.log.printf("Violated bounds\n"); } compute_residuals(data); mu = (data.complementarity_xz_residual.sum() + data.complementarity_wv_residual.sum()) / (static_cast(n) + static_cast(num_upper_bounds)); + //settings.log.printf("mu %e\n", mu); primal_residual_norm = std::max(vector_norm_inf(data.primal_residual), vector_norm_inf(data.bound_residual)); @@ -645,8 +682,8 @@ void barrier_solver_t::solve(const barrier_solver_settings_t iter++; elapsed_time = toc(start_time); - settings.log.printf("%2d %+.12e %+.12e %.2e %.2e %.2e %.2e %.2e %.1f\n", \ - iter, primal_objective, dual_objective, primal_residual_norm, dual_residual_norm, complementarity_residual_norm, step_primal, step_dual, elapsed_time); + settings.log.printf("%2d %+.12e %+.12e %.2e %.2e %.2e %.2e %.2e %.2e %.1f\n", \ + iter, primal_objective, dual_objective, primal_residual_norm, dual_residual_norm, complementarity_residual_norm, step_primal, step_dual, mu, elapsed_time); bool primal_feasible = primal_residual_norm < options.feasibility_tol; bool dual_feasible = dual_residual_norm < options.optimality_tol; @@ -663,4 +700,13 @@ void barrier_solver_t::solve(const barrier_solver_settings_t } } +#ifdef DUAL_SIMPLEX_INSTANTIATE_DOUBLE +template class barrier_solver_t; +template class sparse_cholesky_base_t; +template class sparse_cholesky_cholmod_t; +template class sparse_cholesky_cudss_t; +template class iteration_data_t; +template class barrier_solver_settings_t; +#endif + } // namespace cuopt::linear_programming::dual_simplex diff --git a/cpp/src/dual_simplex/presolve.cpp b/cpp/src/dual_simplex/presolve.cpp index 68043e06a6..c875342d52 100644 --- a/cpp/src/dual_simplex/presolve.cpp +++ b/cpp/src/dual_simplex/presolve.cpp @@ -18,6 +18,7 @@ #include #include +#include #include @@ -690,20 +691,6 @@ void convert_user_problem(const user_problem_t& user_problem, bound_strengthening(row_sense, settings, problem); } - // The original problem may have a variable without a lower bound - // but a finite upper bound - // -inf < x_j <= u_j - i_t no_lower_bound = 0; - for (i_t j = 0; j < problem.num_cols; j++) { - if (problem.lower[j] == -INFINITY && problem.upper[j] < INFINITY) { no_lower_bound++; } - } - - // The original problem may have nonzero lower bounds - // 0 != l_j <= x_j <= u_j - i_t nonzero_lower_bounds = 0; - for (i_t j = 0; j < problem.num_cols; j++) { - if (problem.lower[j] != 0.0 && problem.lower[j] > -INFINITY) { nonzero_lower_bounds++; } - } if (less_rows > 0) { convert_less_than_to_equal(user_problem, row_sense, problem, less_rows, new_slacks); @@ -722,11 +709,120 @@ i_t presolve(const lp_problem_t& original, problem = original; std::vector row_sense(problem.num_rows, '='); + // The original problem may have a variable without a lower bound + // but a finite upper bound + // -inf < x_j <= u_j + i_t no_lower_bound = 0; + for (i_t j = 0; j < problem.num_cols; j++) { + if (problem.lower[j] == -inf && problem.upper[j] < inf) { no_lower_bound++; } + } + settings.log.printf("%d variables with no lower bound\n", no_lower_bound); + + // The original problem may have nonzero lower bounds + // 0 != l_j <= x_j <= u_j + i_t nonzero_lower_bounds = 0; + for (i_t j = 0; j < problem.num_cols; j++) { + if (problem.lower[j] != 0.0 && problem.lower[j] > -inf) { nonzero_lower_bounds++; } + } + if (settings.barrier_presolve && nonzero_lower_bounds > 0) { + settings.log.printf("Transforming %ld nonzero lower bound\n", nonzero_lower_bounds); + // We can construct a new variable: x'_j = x_j - l_j or x_j = x'_j + l_j + // than we have 0 <= x'_j <= u_j - l_j + // Constraints in the form: + // sum_{k != j} a_ik x_k + a_ij * x_j {=, <=} beta_i + // become + // sum_{k != j} a_ik x_k + a_ij * (x'_j + l_j) {=, <=} beta_i + // or + // sum_{k != j} a_ik x_k + a_ij * x'_j {=, <=} beta_i - a_{ij} l_j + // + // the cost function + // sum_{k != j} c_k x_k + c_j * x_j + // becomes + // sum_{k != j} c_k x_k + c_j (x'_j + l_j) + // + // so we get the constant term c_j * l_j + for (i_t j = 0; j < problem.num_cols; j++) + { + if (problem.lower[j] != 0.0 && problem.lower[j] > -inf) + { + for (i_t p = problem.A.col_start[j]; p < problem.A.col_start[j+1]; p++) + { + i_t i = problem.A.i[p]; + f_t aij = problem.A.x[p]; + problem.rhs[i] -= aij * problem.lower[j]; + } + problem.obj_constant += problem.objective[j] * problem.lower[j]; + problem.upper[j] -= problem.lower[j]; + problem.lower[j] = 0.0; + } + } + } + + // Check for free variables i_t free_variables = 0; for (i_t j = 0; j < problem.num_cols; j++) { - if (problem.lower[j] == -INFINITY && problem.upper[j] == INFINITY) { free_variables++; } + if (problem.lower[j] == -inf && problem.upper[j] == inf) { free_variables++; } + } + if (free_variables > 0) { + settings.log.printf("%d free variables\n", free_variables); + + // We have a variable x_j: with -inf < x_j < inf + // we create new variables v and w with 0 <= v, w and x_j = v - w + // Constraints + // sum_{k != j} a_ik x_k + a_ij x_j {=, <=} beta + // become + // sum_{k != j} a_ik x_k + aij v - a_ij w {=, <=} beta + // + // The cost function + // sum_{k != j} c_k x_k + c_j x_j + // becomes + // sum_{k != j} c_k x_k + c_j v - c_j w + + i_t num_cols = problem.num_cols + free_variables; + i_t nnz = problem.A.col_start[problem.num_cols]; + for (i_t j = 0; j 0) { settings.log.printf("%d free variables\n", free_variables); } // Check for empty rows i_t num_empty_rows = 0; @@ -761,6 +857,7 @@ i_t presolve(const lp_problem_t& original, std::vector dependent_rows; constexpr i_t kOk = -1; i_t infeasible; + f_t dependent_row_start = tic(); const i_t independent_rows = find_dependent_rows(problem, settings, dependent_rows, infeasible); if (infeasible != kOk) { settings.log.printf("Found problem infeasible in presolve\n"); @@ -773,6 +870,7 @@ i_t presolve(const lp_problem_t& original, problem.A.to_compressed_row(Arow); remove_rows(problem, row_sense, Arow, dependent_rows); } + settings.log.printf("Dependent row check in %.2fs\n", toc(dependent_row_start)); } assert(problem.num_rows == problem.A.m); assert(problem.num_cols == problem.A.n); diff --git a/cpp/src/dual_simplex/presolve.hpp b/cpp/src/dual_simplex/presolve.hpp index 7a307e6f73..5b2b0348d4 100644 --- a/cpp/src/dual_simplex/presolve.hpp +++ b/cpp/src/dual_simplex/presolve.hpp @@ -59,6 +59,8 @@ struct presolve_info_t { std::vector removed_values; // values of the removed reduced costs std::vector removed_reduced_costs; + // Free variable pairs + std::vector free_variable_pairs; }; template diff --git a/cpp/src/dual_simplex/simplex_solver_settings.hpp b/cpp/src/dual_simplex/simplex_solver_settings.hpp index a51ed19bcf..e5cf2ce99d 100644 --- a/cpp/src/dual_simplex/simplex_solver_settings.hpp +++ b/cpp/src/dual_simplex/simplex_solver_settings.hpp @@ -56,6 +56,8 @@ struct simplex_solver_settings_t { use_left_looking_lu(false), eliminate_singletons(true), print_presolve_stats(true), + barrier_presolve(false), + use_cudss(false), refactor_frequency(100), iteration_log_frequency(1000), first_iteration_log(2), @@ -101,6 +103,8 @@ struct simplex_solver_settings_t { use_left_looking_lu; // true to use left looking LU factorization, false to use right looking bool eliminate_singletons; // true to eliminate singletons from the basis bool print_presolve_stats; // true to print presolve stats + bool barrier_presolve; // true to use barrier presolve + bool use_cudss; // true to use cuDSS for sparse Cholesky factorization, false to use cholmod i_t refactor_frequency; // number of basis updates before refactorization i_t iteration_log_frequency; // number of iterations between log updates i_t first_iteration_log; // number of iterations to log at beginning of solve diff --git a/cpp/src/dual_simplex/solve.cpp b/cpp/src/dual_simplex/solve.cpp index e665bae973..67acab2039 100644 --- a/cpp/src/dual_simplex/solve.cpp +++ b/cpp/src/dual_simplex/solve.cpp @@ -17,6 +17,7 @@ #include +#include #include #include #include @@ -242,6 +243,20 @@ lp_status_t solve_linear_program(const user_problem_t& user_problem, lp_solution_t& solution) { f_t start_time = tic(); + { + lp_problem_t original_lp(1, 1, 1); + std::vector new_slacks; + simplex_solver_settings_t barrier_settings = settings; + barrier_settings.barrier_presolve = true; + convert_user_problem(user_problem, barrier_settings, original_lp, new_slacks); + presolve_info_t presolve_info; + lp_problem_t barrier_lp(1, 1, 1); + const i_t ok = presolve(original_lp, barrier_settings, barrier_lp, presolve_info); + if (ok == -1) { return lp_status_t::INFEASIBLE; } + barrier_solver_t barrier_solver(barrier_lp, presolve_info, barrier_settings); + barrier_solver_settings_t barrier_solver_settings; + barrier_solver.solve(barrier_solver_settings); + } lp_problem_t original_lp(1, 1, 1); std::vector new_slacks; convert_user_problem(user_problem, settings, original_lp, new_slacks); diff --git a/cpp/src/linear_programming/solve.cu b/cpp/src/linear_programming/solve.cu index c069859978..30a117bacc 100644 --- a/cpp/src/linear_programming/solve.cu +++ b/cpp/src/linear_programming/solve.cu @@ -306,6 +306,7 @@ run_dual_simplex(dual_simplex::user_problem_t& user_problem, dual_simplex_settings.time_limit = settings.time_limit; dual_simplex_settings.iteration_limit = settings.iteration_limit; dual_simplex_settings.concurrent_halt = settings.concurrent_halt; + dual_simplex_settings.use_cudss = settings.use_cudss; if (dual_simplex_settings.concurrent_halt != nullptr) { // Don't show the dual simplex log in concurrent mode. Show the PDLP log instead dual_simplex_settings.log.log = false; diff --git a/cpp/src/math_optimization/solver_settings.cu b/cpp/src/math_optimization/solver_settings.cu index 0e148610cd..a740a390c5 100644 --- a/cpp/src/math_optimization/solver_settings.cu +++ b/cpp/src/math_optimization/solver_settings.cu @@ -104,7 +104,8 @@ solver_settings_t::solver_settings_t() : pdlp_settings(), mip_settings {CUOPT_MIP_HEURISTICS_ONLY, &mip_settings.heuristics_only, false}, {CUOPT_LOG_TO_CONSOLE, &pdlp_settings.log_to_console, true}, {CUOPT_LOG_TO_CONSOLE, &mip_settings.log_to_console, true}, - {CUOPT_CROSSOVER, &pdlp_settings.crossover, false} + {CUOPT_CROSSOVER, &pdlp_settings.crossover, false}, + {CUOPT_USE_CUDSS, &pdlp_settings.use_cudss, false} }; // String parameters string_parameters = { From c50e99c0d6dcaa1d612379dfab4c35256533a7f7 Mon Sep 17 00:00:00 2001 From: Ramakrishna Prabhu Date: Tue, 5 Aug 2025 21:48:07 -0500 Subject: [PATCH 05/31] Update cmake --- cpp/CMakeLists.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 4932be6b9a..a929805de3 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -228,17 +228,18 @@ list(PREPEND CUOPT_PRIVATE_CUDA_LIBS CUDA::cublasLt) add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/libmps_parser) set(CMAKE_LIBRARY_PATH ${CMAKE_CURRENT_BINARY_DIR}/libmps_parser/) +find_package(cudss REQUIRED) target_link_libraries(cuopt PUBLIC CUDA::cublas - CUDA::cudss CUDA::cusparse rmm::rmm rapids_logger::rapids_logger CCCL::CCCL raft::raft cuopt::mps_parser + cudss PRIVATE ${CUOPT_PRIVATE_CUDA_LIBS} ) From 4ebf2f9da6bb20b690f3da8aee7f3be7b2ded9cd Mon Sep 17 00:00:00 2001 From: Ramakrishna Prabhu Date: Thu, 7 Aug 2025 10:50:07 -0500 Subject: [PATCH 06/31] install cudss --- ci/build_wheel_libcuopt.sh | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/ci/build_wheel_libcuopt.sh b/ci/build_wheel_libcuopt.sh index be78e8963a..e69cbe78c4 100755 --- a/ci/build_wheel_libcuopt.sh +++ b/ci/build_wheel_libcuopt.sh @@ -31,6 +31,15 @@ else echo "Building in release mode" fi +# Install cudss +if command -v dnf &> /dev/null; then + dnf install libcudss-dev -y +else + echo "dnf not found, skipping cudss installation" + exit 1 +fi + + rapids-logger "Generating build requirements" CUOPT_MPS_PARSER_WHEELHOUSE=$(RAPIDS_PY_WHEEL_NAME="cuopt_mps_parser" rapids-download-wheels-from-github python) From d1f9c14029d8b1f673b323996db64c4377a68dc6 Mon Sep 17 00:00:00 2001 From: Christopher Maes Date: Fri, 8 Aug 2025 16:22:52 -0700 Subject: [PATCH 07/31] Now optimal/suboptimal on 89/93 NETLIB LPs with cuDSS Note that cuDSS is nondeterministic so the number can vary between 87-89. --- cmake/FindCHOLMOD.cmake | 111 +++++ cmake/FindCUDSS.cmake | 29 ++ .../cuopt/linear_programming/constants.h | 1 + .../pdlp/solver_settings.hpp | 3 +- cpp/src/dual_simplex/barrier.cpp | 432 +++++++++++++---- cpp/src/dual_simplex/barrier.hpp | 87 ++++ cpp/src/dual_simplex/dense_vector.hpp | 182 +++++++ cpp/src/dual_simplex/presolve.cpp | 35 +- .../dual_simplex/simplex_solver_settings.hpp | 2 + cpp/src/dual_simplex/solve.cpp | 52 +- cpp/src/dual_simplex/solve.hpp | 5 + cpp/src/dual_simplex/sparse_cholesky.hpp | 458 ++++++++++++++++++ cpp/src/linear_programming/solve.cu | 59 +++ cpp/src/math_optimization/solver_settings.cu | 2 +- 14 files changed, 1331 insertions(+), 127 deletions(-) create mode 100644 cmake/FindCHOLMOD.cmake create mode 100644 cmake/FindCUDSS.cmake create mode 100644 cpp/src/dual_simplex/barrier.hpp create mode 100644 cpp/src/dual_simplex/dense_vector.hpp create mode 100644 cpp/src/dual_simplex/sparse_cholesky.hpp diff --git a/cmake/FindCHOLMOD.cmake b/cmake/FindCHOLMOD.cmake new file mode 100644 index 0000000000..6470e67ebb --- /dev/null +++ b/cmake/FindCHOLMOD.cmake @@ -0,0 +1,111 @@ +# Cholmod lib usually requires linking to a blas and lapack library. +# It is up to the user of this module to find a BLAS and link to it. + +if (CHOLMOD_INCLUDES AND CHOLMOD_LIBRARIES) + set(CHOLMOD_FIND_QUIETLY TRUE) +endif (CHOLMOD_INCLUDES AND CHOLMOD_LIBRARIES) + +message(STATUS "CHOLMOD DIR = $ENV{CHOLMODDIR}") + + +find_path(CHOLMOD_INCLUDES + NAMES + cholmod.h + PATHS + $ENV{CHOLMODDIR} + ${INCLUDE_INSTALL_DIR} + PATH_SUFFIXES + suitesparse + ufsparse +) + +find_path(SUITESPARSE_INCLUDES + NAMES + SuiteSparse_config.h + PATHS + $ENV{SUITESPARSE_DIR} + ${INCLUDE_INSTALL_DIR} +) +message(STATUS "SUITESPARSE_INCLUDES = ${SUITESPARSE_INCLUDES}") +list(APPEND CHOLMOD_INCLUDES ${SUITESPARSE_INCLUDES}) + +message(STATUS "CHOLMOD_LIB_DIR = $ENV{CHOLMOD_LIB_DIR}") + +find_library(CHOLMOD_LIBRARIES cholmod PATHS $ENV{CHOLMOD_LIB_DIR} ${LIB_INSTALL_DIR}) +message(STATUS " find_library CHOLMOD_LIBRARIES = ${CHOLMOD_LIBRARIES}") + +if(CHOLMOD_LIBRARIES) + + get_filename_component(CHOLMOD_LIBDIR ${CHOLMOD_LIBRARIES} PATH) + find_library(AMD_LIBRARY amd PATHS ${CHOLMOD_LIBDIR} $ENV{CHOLMOD_LIB_DIR}/../AMD ${LIB_INSTALL_DIR}) + if (AMD_LIBRARY) + set(CHOLMOD_LIBRARIES ${CHOLMOD_LIBRARIES} ${AMD_LIBRARY}) + else () + set(CHOLMOD_LIBRARIES FALSE) + endif () + +endif(CHOLMOD_LIBRARIES) +message(STATUS "CHOLMOD_LIBRARIES = ${CHOLMOD_LIBRARIES}") + +if(CHOLMOD_LIBRARIES) + + find_library(COLAMD_LIBRARY colamd PATHS ${CHOLMOD_LIBDIR} $ENV{CHOLMOD_LIB_DIR}/../COLAMD ${LIB_INSTALL_DIR}) + if (COLAMD_LIBRARY) + set(CHOLMOD_LIBRARIES ${CHOLMOD_LIBRARIES} ${COLAMD_LIBRARY}) + else () + set(CHOLMOD_LIBRARIES FALSE) + endif () + +endif(CHOLMOD_LIBRARIES) +message(STATUS "CHOLMOD_LIBRARIES = ${CHOLMOD_LIBRARIES}") + +if(CHOLMOD_LIBRARIES) + + find_library(CAMD_LIBRARY camd PATHS ${CHOLMOD_LIBDIR} $ENV{CHOLMOD_LIB_DIR}/../CAMD ${LIB_INSTALL_DIR}) + if (CAMD_LIBRARY) + set(CHOLMOD_LIBRARIES ${CHOLMOD_LIBRARIES} ${CAMD_LIBRARY}) + else () + set(CHOLMOD_LIBRARIES FALSE) + endif () + +endif(CHOLMOD_LIBRARIES) +message(STATUS "CHOLMOD_LIBRARIES = ${CHOLMOD_LIBRARIES}") + +if(CHOLMOD_LIBRARIES) + + find_library(CCOLAMD_LIBRARY ccolamd PATHS ${CHOLMOD_LIBDIR} $ENV{CHOLMOD_LIB_DIR}/../CCOLAMD ${LIB_INSTALL_DIR}) + if (CCOLAMD_LIBRARY) + set(CHOLMOD_LIBRARIES ${CHOLMOD_LIBRARIES} ${CCOLAMD_LIBRARY}) + else () + set(CHOLMOD_LIBRARIES FALSE) + endif () + +endif(CHOLMOD_LIBRARIES) +message(STATUS "CHOLMOD_LIBRARIES = ${CHOLMOD_LIBRARIES}") + +if(CHOLMOD_LIBRARIES) + + find_library(CHOLMOD_METIS_LIBRARY metis PATHS ${CHOLMOD_LIBDIR} $ENV{CHOLMOD_LIB_DIR} ${LIB_INSTALL_DIR}) + if (CHOLMOD_METIS_LIBRARY) + set(CHOLMOD_LIBRARIES ${CHOLMOD_LIBRARIES} ${CHOLMOD_METIS_LIBRARY}) + endif () + +endif(CHOLMOD_LIBRARIES) + +if(CHOLMOD_LIBRARIES) + + find_library(SUITESPARSE_LIBRARY SuiteSparse PATHS ${CHOLMOD_LIBDIR} $ENV{CHOLMOD_LIB_DIR} ${LIB_INSTALL_DIR}) + if (SUITESPARSE_LIBRARY) + set(CHOLMOD_LIBRARIES ${CHOLMOD_LIBRARIES} ${SUITESPARSE_LIBRARY}) + endif (SUITESPARSE_LIBRARY) + +endif(CHOLMOD_LIBRARIES) + +message(STATUS "CHOLMOD_INCLUDES = ${CHOLMOD_INCLUDES}") +message(STATUS "CHOLMOD_LIBRARIES = ${CHOLMOD_LIBRARIES}") + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(CHOLMOD DEFAULT_MSG + CHOLMOD_INCLUDES CHOLMOD_LIBRARIES) + +mark_as_advanced(CHOLMOD_INCLUDES CHOLMOD_LIBRARIES AMD_LIBRARY COLAMD_LIBRARY SUITESPARSE_LIBRARY CAMD_LIBRARY CCOLAMD_LIBRARY CHOLMOD_METIS_LIBRARY) diff --git a/cmake/FindCUDSS.cmake b/cmake/FindCUDSS.cmake new file mode 100644 index 0000000000..6f26e6bd2a --- /dev/null +++ b/cmake/FindCUDSS.cmake @@ -0,0 +1,29 @@ + +if (CUDSS_INCLUDE AND CUDSS_LIBRARIES) + set(CUDSS_FIND_QUIETLY TRUE) +endif (CUDSS_INCLUDE AND CUDSS_LIBRARIES) + +message(STATUS "CUDSS DIR = $ENV{CUDSS_DIR}") + + +find_path(CUDSS_INCLUDE + NAMES + cudss.h + PATHS + $ENV{CUDSS_DIR}/include + ${INCLUDE_INSTALL_DIR} + PATH_SUFFIXES + cudss +) + + +find_library(CUDSS_LIBRARIES cudss PATHS $ENV{CUDSS_DIR}/lib ${LIB_INSTALL_DIR}) + +message(STATUS "CUDSS_INCLUDE = ${CUDSS_INCLUDE}") +message(STATUS "CUDSS_LIBRARIES = ${CUDSS_LIBRARIES}") + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(CHOLMOD DEFAULT_MSG + CUDSS_INCLUDE CUDSS_LIBRARIES) + +mark_as_advanced(CUDSS_INCLUDE CUDSS_LIBRARIES) diff --git a/cpp/include/cuopt/linear_programming/constants.h b/cpp/include/cuopt/linear_programming/constants.h index 86fca74292..55fe544758 100644 --- a/cpp/include/cuopt/linear_programming/constants.h +++ b/cpp/include/cuopt/linear_programming/constants.h @@ -104,6 +104,7 @@ #define CUOPT_METHOD_CONCURRENT 0 #define CUOPT_METHOD_PDLP 1 #define CUOPT_METHOD_DUAL_SIMPLEX 2 +#define CUOPT_METHOD_BARRIER 3 /* @brief Status codes constants */ #define CUOPT_SUCCESS 0 diff --git a/cpp/include/cuopt/linear_programming/pdlp/solver_settings.hpp b/cpp/include/cuopt/linear_programming/pdlp/solver_settings.hpp index cf0d01151a..28a3439ced 100644 --- a/cpp/include/cuopt/linear_programming/pdlp/solver_settings.hpp +++ b/cpp/include/cuopt/linear_programming/pdlp/solver_settings.hpp @@ -63,7 +63,8 @@ enum pdlp_solver_mode_t : int { enum method_t : int { Concurrent = CUOPT_METHOD_CONCURRENT, PDLP = CUOPT_METHOD_PDLP, - DualSimplex = CUOPT_METHOD_DUAL_SIMPLEX + DualSimplex = CUOPT_METHOD_DUAL_SIMPLEX, + Barrier = CUOPT_METHOD_BARRIER }; template diff --git a/cpp/src/dual_simplex/barrier.cpp b/cpp/src/dual_simplex/barrier.cpp index d5253963e4..9a03883784 100644 --- a/cpp/src/dual_simplex/barrier.cpp +++ b/cpp/src/dual_simplex/barrier.cpp @@ -44,7 +44,7 @@ class iteration_data_t { inv_diag(lp.num_cols), inv_sqrt_diag(lp.num_cols), AD(lp.num_cols, lp.num_rows, 0), - DAT(lp.num_rows, lp.num_cols, 0), + AT(lp.num_rows, lp.num_cols, 0), ADAT(lp.num_rows, lp.num_rows, 0), primal_residual(lp.num_rows), bound_residual(num_upper_bounds), @@ -76,6 +76,7 @@ class iteration_data_t { upper_bounds[n_upper_bounds++] = j; } } + settings.log.printf("n_upper_bounds %d\n", n_upper_bounds); // Form A*Dinv*A' diag.set_scalar(1.0); if (n_upper_bounds > 0) { @@ -95,23 +96,24 @@ class iteration_data_t { // Copy A into AD AD = lp.A; - // Perform column scaling on A to get AD^(-1/2) - AD.scale_columns(inv_sqrt_diag); + // Perform column scaling on A to get AD^(-1) + AD.scale_columns(inv_diag); // Form A*Dinv*A' float64_t start_form_aat = tic(); - csc_matrix_t DAT(lp.num_cols, lp.num_rows, 0); - AD.transpose(DAT); - multiply(AD, DAT, ADAT); + lp.A.transpose(AT); + multiply(AD, AT, ADAT); + float64_t aat_time = toc(start_form_aat); settings.log.printf("AAT time %.2fs\n", aat_time); settings.log.printf("AAT nonzeros %e\n", static_cast(ADAT.col_start[lp.num_rows])); if (!settings.use_cudss) { - chol = std::make_unique>(lp.num_rows); + chol = std::make_unique>(settings, lp.num_rows); } else { - chol = std::make_unique>(lp.num_rows); + chol = std::make_unique>(settings, lp.num_rows); } + chol->set_positive_definite(false); // Perform symbolic analysis chol->analyze(ADAT); @@ -153,7 +155,7 @@ class iteration_data_t { dense_vector_t inv_sqrt_diag; csc_matrix_t AD; - csc_matrix_t DAT; + csc_matrix_t AT; csc_matrix_t ADAT; std::unique_ptr> chol; @@ -191,7 +193,11 @@ template void barrier_solver_t::initial_point(iteration_data_t& data) { // Perform a numerical factorization - data.chol->factorize(data.ADAT); + i_t status = data.chol->factorize(data.ADAT); + if (status != 0) { + settings.log.printf("Initial factorization failed\n"); + return; + } // rhs_x <- b dense_vector_t rhs_x(lp.rhs); @@ -208,7 +214,10 @@ void barrier_solver_t::initial_point(iteration_data_t& data) // Solve A*Dinv*A'*q = A*Dinv*F*u - b dense_vector_t q(lp.num_rows); - data.chol->solve(rhs_x, q); + settings.log.printf("||rhs_x|| = %e\n", vector_norm2(rhs_x)); + i_t solve_status = data.chol->solve(rhs_x, q); + settings.log.printf("Initial solve status %d\n", solve_status); + settings.log.printf("||q|| = %e\n", vector_norm2(q)); // rhs_x <- A*Dinv*A'*q - rhs_x matrix_vector_multiply(data.ADAT, 1.0, q, -1.0, rhs_x); @@ -283,56 +292,96 @@ void barrier_solver_t::initial_point(iteration_data_t& data) } template -void barrier_solver_t::compute_residuals(iteration_data_t& data) +void barrier_solver_t::compute_residuals(const dense_vector_t& w, + const dense_vector_t& x, + const dense_vector_t& y, + const dense_vector_t& v, + const dense_vector_t& z, + iteration_data_t& data) { // Compute primal_residual = b - A*x data.primal_residual = lp.rhs; - matrix_vector_multiply(lp.A, -1.0, data.x, 1.0, data.primal_residual); + matrix_vector_multiply(lp.A, -1.0, x, 1.0, data.primal_residual); // Compute bound_residual = E'*u - w - E'*x if (data.n_upper_bounds > 0) { for (i_t k = 0; k < data.n_upper_bounds; k++) { i_t j = data.upper_bounds[k]; - data.bound_residual[k] = lp.upper[j] - data.w[k] - data.x[j]; + data.bound_residual[k] = lp.upper[j] - w[k] - x[j]; } } // Compute dual_residual = c - A'*y - z + E*v - data.c.pairwise_subtract(data.z, data.dual_residual); - matrix_transpose_vector_multiply(lp.A, -1.0, data.y, 1.0, data.dual_residual); + data.c.pairwise_subtract(z, data.dual_residual); + matrix_transpose_vector_multiply(lp.A, -1.0, y, 1.0, data.dual_residual); if (data.n_upper_bounds > 0) { for (i_t k = 0; k < data.n_upper_bounds; k++) { i_t j = data.upper_bounds[k]; - data.dual_residual[j] += data.v[k]; + data.dual_residual[j] += v[k]; } } // Compute complementarity_xz_residual = x.*z - data.x.pairwise_product(data.z, data.complementarity_xz_residual); + x.pairwise_product(z, data.complementarity_xz_residual); // Compute complementarity_wv_residual = w.*v - data.w.pairwise_product(data.v, data.complementarity_wv_residual); + w.pairwise_product(v, data.complementarity_wv_residual); } template -f_t barrier_solver_t::max_step_to_boundary(const dense_vector_t& x, const dense_vector_t& dx) const +void barrier_solver_t::compute_residual_norms(const dense_vector_t& w, + const dense_vector_t& x, + const dense_vector_t& y, + const dense_vector_t& v, + const dense_vector_t& z, + iteration_data_t& data, + f_t& primal_residual_norm, + f_t& dual_residual_norm, + f_t& complementarity_residual_norm) +{ + compute_residuals(w, x, y, v, z, data); + primal_residual_norm = std::max(vector_norm_inf(data.primal_residual), + vector_norm_inf(data.bound_residual)); + dual_residual_norm = vector_norm_inf(data.dual_residual); + complementarity_residual_norm = std::max(vector_norm_inf(data.complementarity_xz_residual), + vector_norm_inf(data.complementarity_wv_residual)); +} + +template +f_t barrier_solver_t::max_step_to_boundary(const dense_vector_t& x, const dense_vector_t& dx, i_t& index) const { float64_t max_step = 1.0; + index = -1; for (i_t i = 0; i < x.size(); i++) { + // x_i + alpha * dx_i >= 0, x_i >= 0, alpha >= 0 + // We only need to worry about the case where dx_i < 0 + // alpha * dx_i >= -x_i => alpha <= -x_i / dx_i if (dx[i] < 0.0) { - max_step = std::min(max_step, -x[i]/dx[i]); + const f_t ratio = -x[i]/dx[i]; + if (ratio < max_step) { + max_step = ratio; + index = i; + } } } return max_step; } template -void barrier_solver_t::compute_search_direction(iteration_data_t& data, +f_t barrier_solver_t::max_step_to_boundary(const dense_vector_t& x, const dense_vector_t& dx) const +{ + i_t index; + return max_step_to_boundary(x, dx, index); +} + +template +i_t barrier_solver_t::compute_search_direction(iteration_data_t& data, dense_vector_t& dw, dense_vector_t& dx, dense_vector_t& dy, dense_vector_t& dv, - dense_vector_t& dz) + dense_vector_t& dz, + f_t& max_residual) { const bool debug = true; // Solves the linear system @@ -344,6 +393,7 @@ void barrier_solver_t::compute_search_direction(iteration_data_t::compute_search_direction(iteration_data_t 0.0) { - settings.log.printf("Applied regularization %.2e\n", applied_regularization); - } + data.AD.scale_columns(data.inv_diag); + // compute ADAT = A Dinv * A^T + multiply(data.AD, data.AT, data.ADAT); + // factorize - data.chol->factorize(data.ADAT); + i_t status = data.chol->factorize(data.ADAT); + if (status < 0) { + settings.log.printf("Factorization failed.\n"); + return -1; + } data.has_factorization = true; } @@ -398,9 +435,11 @@ void barrier_solver_t::compute_search_direction(iteration_data_t::compute_search_direction(iteration_data_t r1 = tmp3; + dense_vector_t r1_prime = r1; // tmp4 <- inv_diag * ( dual_rhs -complementarity_xz_rhs ./ x + E *((complementarity_wv_rhs - v .* // bound_rhs) ./ w)) data.inv_diag.pairwise_product(tmp3, tmp4); dense_vector_t h = data.primal_rhs; - // h <- 1.0 * A * tmp4 + h + // h <- 1.0 * A * tmp4 + primal_rhs matrix_vector_multiply(lp.A, 1.0, tmp4, 1.0, h); // Solve A D^{-1} A^T dy = h - data.chol->solve(h, dy); + i_t solve_status = data.chol->solve(h, dy); + if (solve_status < 0) { + settings.log.printf("Linear solve failed\n"); + return -1; + } // y_residual <- ADAT*dy - h dense_vector_t y_residual = h; matrix_vector_multiply(data.ADAT, 1.0, dy, -1.0, y_residual); - if (debug) { - const f_t y_residual_norm = vector_norm_inf(y_residual); - if (y_residual_norm > 1e-2) { - settings.log.printf("||ADAT*dy - h|| = %.2e\n", y_residual_norm); - } -} + const f_t y_residual_norm = vector_norm_inf(y_residual); + max_residual = std::max(max_residual, y_residual_norm); + if (y_residual_norm > 1e-2) { + settings.log.printf("|| h || = %.2e\n", vector_norm_inf(h)); + settings.log.printf("||ADAT*dy - h|| = %.2e\n", y_residual_norm); + } + if (y_residual_norm > 10) + { + return -1; + } // dx = dinv .* (A'*dy - dual_rhs + complementarity_xz_rhs ./ x - E *((complementarity_wv_rhs - v - // .* bound_rhs) ./ w)) r1 <- A'*dy - r1 + // .* bound_rhs) ./ w)) + //r1 <- A'*dy - r1 matrix_transpose_vector_multiply(lp.A, 1.0, dy, -1.0, r1); // dx <- dinv .* r1 data.inv_diag.pairwise_product(r1, dx); + // dx_residual <- D * dx - A'*dy - r1 + dense_vector_t dx_residual(lp.num_cols); + // dx_residual <- D*dx + data.diag.pairwise_product(dx, dx_residual); + // dx_residual <- -A'*dy + D*dx + matrix_transpose_vector_multiply(lp.A, -1.0, dy, 1.0, dx_residual); + // dx_residual <- D*dx - A'*dy + r1 + dx_residual.axpy(1.0, r1_prime, 1.0); + if (debug) { + const f_t dx_residual_norm = vector_norm_inf(dx_residual); + max_residual = std::max(max_residual, dx_residual_norm); + if (dx_residual_norm > 1e-2) { + settings.log.printf("|| D * dx - A'*y + r1 || = %.2e\n", dx_residual_norm); + } + } + + dense_vector_t dx_residual_2(lp.num_cols); + // dx_residual_2 <- D^-1 * (A'*dy - r1) + data.inv_diag.pairwise_product(r1, dx_residual_2); + // dx_residual_2 <- D^-1 * (A'*dy - r1) - dx + dx_residual_2.axpy(-1.0, dx, 1.0); + // dx_residual_2 <- D^-1 * (A'*dy - r1) - dx + const f_t dx_residual_2_norm = vector_norm_inf(dx_residual_2); + max_residual = std::max(max_residual, dx_residual_2_norm); + if (dx_residual_2_norm > 1e-2) { + settings.log.printf("|| D^-1 (A'*dy - r1) - dx || = %.2e\n", dx_residual_2_norm); + } + + dense_vector_t dx_residual_5(lp.num_cols); + dense_vector_t dx_residual_6(lp.num_rows); + // dx_residual_5 <- D^-1 * (A'*dy - r1) + data.inv_diag.pairwise_product(r1, dx_residual_5); + // dx_residual_6 <- A * D^-1 (A'*dy - r1) + matrix_vector_multiply(lp.A, 1.0, dx_residual_5, 0.0, dx_residual_6); + // dx_residual_6 <- A * D^-1 (A'*dy - r1) - A * dx + matrix_vector_multiply(lp.A, -1.0, dx, 1.0, dx_residual_6); + const f_t dx_residual_6_norm = vector_norm_inf(dx_residual_6); + max_residual = std::max(max_residual, dx_residual_6_norm); + if (dx_residual_6_norm > 1e-2) { + settings.log.printf("|| A * D^-1 (A'*dy - r1) - A * dx || = %.2e\n", dx_residual_6_norm); + } + + dense_vector_t dx_residual_3(lp.num_cols); + // dx_residual_3 <- D^-1 * r1 + data.inv_diag.pairwise_product(r1_prime, dx_residual_3); + dense_vector_t dx_residual_4(lp.num_rows); + // dx_residual_4 <- A * D^-1 * r1 + matrix_vector_multiply(lp.A, 1.0, dx_residual_3, 0.0, dx_residual_4); + // dx_residual_4 <- A * D^-1 * r1 + A * dx + matrix_vector_multiply(lp.A, 1.0, dx, 1.0, dx_residual_4); + // dx_residual_4 <- - A * D^-1 * r1 - A * dx + ADAT * dy + +#if CHECK_FORM_ADAT + csc_matrix_t ADinv = lp.A; + ADinv.scale_columns(data.inv_diag); + csc_matrix_t ADinvAT(lp.num_rows, lp.num_rows, 1); + csc_matrix_t Atranspose(1, 1, 0); + lp.A.transpose(Atranspose); + multiply(ADinv, Atranspose, ADinvAT); + matrix_vector_multiply(ADinvAT, 1.0, dy, -1.0, dx_residual_4); + const f_t dx_residual_4_norm = vector_norm_inf(dx_residual_4); + max_residual = std::max(max_residual, dx_residual_4_norm); + if (dx_residual_4_norm > 1e-2) { + settings.log.printf("|| ADAT * dy - A * D^-1 * r1 - A * dx || = %.2e\n", dx_residual_4_norm); + } + + csc_matrix_t C(lp.num_rows, lp.num_rows, 1); + add(ADinvAT, data.ADAT, 1.0, -1.0, C); + const f_t matrix_residual = C.norm1(); + max_residual = std::max(max_residual, matrix_residual); + if (matrix_residual > 1e-2) { + settings.log.printf("|| AD^{-1/2} D^{-1} A^T + E - A D^{-1} A^T|| = %.2e\n", matrix_residual); + } +#endif + + dense_vector_t dx_residual_7 = h; + matrix_vector_multiply(data.ADAT, 1.0, dy, -1.0, dx_residual_7); + const f_t dx_residual_7_norm = vector_norm_inf(dx_residual_7); + max_residual = std::max(max_residual, dx_residual_7_norm); + if (dx_residual_7_norm > 1e-2) { + settings.log.printf("|| A D^{-1} A^T * dy - h || = %.2e\n", dx_residual_7_norm); + } + // x_residual <- A * dx - primal_rhs dense_vector_t x_residual = data.primal_rhs; matrix_vector_multiply(lp.A, 1.0, dx, -1.0, x_residual); if (debug) { const f_t x_residual_norm = vector_norm_inf(x_residual); + max_residual = std::max(max_residual, x_residual_norm); if (x_residual_norm > 1e-2) { settings.log.printf("|| A * dx - rp || = %.2e\n", x_residual_norm); } @@ -469,6 +602,7 @@ void barrier_solver_t::compute_search_direction(iteration_data_t(xz_residual); + max_residual = std::max(max_residual, xz_residual_norm); if (xz_residual_norm > 1e-2) { settings.log.printf("|| Z dx + X dz - rxz || = %.2e\n", xz_residual_norm); } @@ -488,10 +622,67 @@ void barrier_solver_t::compute_search_direction(iteration_data_t dv_residual(data.n_upper_bounds); + dense_vector_t dv_residual_2(data.n_upper_bounds); + // dv_residual <- E'*dx + data.gather_upper_bounds(dx, dv_residual); + // dv_residual_2 <- v .* E' * dx + data.v.pairwise_product(dv_residual, dv_residual_2); + // dv_residual_2 <- -v .* E' * dx + dv_residual_2.multiply_scalar(-1.0); + // dv_residual_ <- W .* dv + data.w.pairwise_product(dv, dv_residual); + // dv_residual <- -v .* E' * dx + w .* dv + dv_residual.axpy(1.0, dv_residual_2, 1.0); + // dv_residual <- -v .* E' * dx + w .* dv - complementarity_wv_rhs + dv_residual.axpy(-1.0, data.complementarity_wv_rhs, 1.0); + // dv_residual_2 <- V * bound_rhs + data.v.pairwise_product(data.bound_rhs, dv_residual_2); + // dv_residual <- -v .* E' * dx + w .* dv - complementarity_wv_rhs + v .* bound_rhs + dv_residual.axpy(1.0, dv_residual_2, 1.0); + if (debug) { + const f_t dv_residual_norm = vector_norm_inf(dv_residual); + max_residual = std::max(max_residual, dv_residual_norm); + if (dv_residual_norm > 1e-2) { + settings.log.printf("|| -v .* E' * dx + w .* dv - complementarity_wv_rhs - v .* bound_rhs || = %.2e\n", dv_residual_norm); + } + } + + + // dual_residual <- A' * dy - E * dv + dz - dual_rhs + dense_vector_t dual_residual(lp.num_cols); + // dual_residual <- E * dv + data.scatter_upper_bounds(dv, dual_residual); + // dual_residual <- A' * dy - E * dv + matrix_transpose_vector_multiply(lp.A, 1.0, dy, -1.0, dual_residual); + // dual_residual <- A' * dy - E * dv + dz + dual_residual.axpy(1.0, dz, 1.0); + // dual_residual <- A' * dy - E * dv + dz - dual_rhs + dual_residual.axpy(-1.0, data.dual_rhs, 1.0); + if (debug) { + const f_t dual_residual_norm = vector_norm_inf(dual_residual); + max_residual = std::max(max_residual, dual_residual_norm); + if (dual_residual_norm > 1e-2) { + settings.log.printf("|| A' * dy - E * dv + dz - dual_rhs || = %.2e\n", dual_residual_norm); + } + } + + // dw = bound_rhs - E'*dx data.gather_upper_bounds(dx, tmp1); dw = data.bound_rhs; dw.axpy(-1.0, tmp1, 1.0); + // dw_residual <- dw + E'*dx - bound_rhs + dense_vector_t dw_residual(data.n_upper_bounds); + data.gather_upper_bounds(dx, dw_residual); + dw_residual.axpy(1.0, dw, 1.0); + dw_residual.axpy(-1.0, data.bound_rhs, 1.0); + if (debug) { + const f_t dw_residual_norm = vector_norm_inf(dw_residual); + max_residual = std::max(max_residual, dw_residual_norm); + if (dw_residual_norm > 1e-2) { + settings.log.printf("|| dw + E'*dx - bound_rhs || = %.2e\n", dw_residual_norm); + } + } // wv_residual <- v .* dw + w .* dv - complementarity_wv_rhs dense_vector_t wv_residual = data.complementarity_wv_rhs; @@ -503,14 +694,17 @@ void barrier_solver_t::compute_search_direction(iteration_data_t(wv_residual); + max_residual = std::max(max_residual, wv_residual_norm); if (wv_residual_norm > 1e-2) { settings.log.printf("|| V dw + W dv - rwv || = %.2e\n", wv_residual_norm); } } + + return 0; } template -void barrier_solver_t::solve(const barrier_solver_settings_t& options) +i_t barrier_solver_t::solve(const barrier_solver_settings_t& options) { float64_t start_time = tic(); @@ -535,7 +729,7 @@ void barrier_solver_t::solve(const barrier_solver_settings_t settings.log.printf("%d finite upper bounds\n", num_upper_bounds); initial_point(data); - compute_residuals(data); + compute_residuals(data.w, data.x, data.y, data.v, data.z, data); f_t primal_residual_norm = std::max(vector_norm_inf(data.primal_residual), vector_norm_inf(data.bound_residual)); @@ -560,6 +754,12 @@ void barrier_solver_t::solve(const barrier_solver_settings_t bool converged = primal_residual_norm < options.feasibility_tol && dual_residual_norm < options.optimality_tol && complementarity_residual_norm < options.complementarity_tol; + dense_vector_t w_save = data.w; + dense_vector_t x_save = data.x; + dense_vector_t y_save = data.y; + dense_vector_t v_save = data.v; + dense_vector_t z_save = data.z; + while (iter < options.iteration_limit) { // Compute the affine step data.primal_rhs = data.primal_residual; @@ -572,12 +772,36 @@ void barrier_solver_t::solve(const barrier_solver_settings_t // w.*v -> -w .* v data.complementarity_wv_rhs.multiply_scalar(-1.0); - compute_search_direction(data, data.dw_aff, data.dx_aff, data.dy_aff, data.dv_aff, data.dz_aff); + f_t max_affine_residual = 0.0; + i_t status = compute_search_direction(data, data.dw_aff, data.dx_aff, data.dy_aff, data.dv_aff, data.dz_aff, max_affine_residual); + if (status < 0) { + if (primal_residual_norm < 100 * options.feasibility_tol && + dual_residual_norm < 100 * options.optimality_tol && + complementarity_residual_norm < 100 * options.complementarity_tol) { + settings.log.printf("Suboptimal solution found\n"); + return 1; + } + compute_residual_norms(w_save, x_save, y_save, v_save, z_save, data, primal_residual_norm, dual_residual_norm, complementarity_residual_norm); + if (primal_residual_norm < 100 * options.feasibility_tol && + dual_residual_norm < 100 * options.optimality_tol && + complementarity_residual_norm < 100 * options.complementarity_tol) { + settings.log.printf("Restoring previous solution: primal %.2e dual %.2e complementarity %.2e\n", primal_residual_norm, dual_residual_norm, complementarity_residual_norm); + data.w = w_save; + data.x = x_save; + data.y = y_save; + data.v = v_save; + data.z = z_save; + settings.log.printf("Suboptimal solution found\n"); + return 1; + } + settings.log.printf("Search direction computation failed\n"); + return -1; + } f_t step_primal_aff = std::min(max_step_to_boundary(data.w, data.dw_aff), max_step_to_boundary(data.x, data.dx_aff)); f_t step_dual_aff = std::min(max_step_to_boundary(data.v, data.dv_aff), max_step_to_boundary(data.z, data.dz_aff)); - // w_aff = w + step_primalaff * dw_aff + // w_aff = w + step_primal_aff * dw_aff // x_aff = x + step_primal_aff * dx_aff // v_aff = v + step_dual_aff * dv_aff // z_aff = z + step_dual_aff * dz_aff @@ -595,14 +819,7 @@ void barrier_solver_t::solve(const barrier_solver_settings_t x_aff.pairwise_product(z_aff, complementarity_xz_aff); w_aff.pairwise_product(v_aff, complementarity_wv_aff); f_t mu_aff = (complementarity_xz_aff.sum() + complementarity_wv_aff.sum()) / (static_cast(n) + static_cast(num_upper_bounds)); - //settings.log.printf("mu aff %e\n", mu_aff); - if (mu_aff < 0.0 || mu_aff != mu_aff) { - settings.log.printf("mu aff bad %e\n", mu_aff); - exit(1); - } - f_t sigma = std::max(0.0, std::min(1.0, std::pow(mu_aff / mu, 3.0))); - //settings.log.printf("sigma %e\n", sigma); // Compute the combined centering corrector step data.primal_rhs.set_scalar(0.0); @@ -618,7 +835,18 @@ void barrier_solver_t::solve(const barrier_solver_settings_t data.complementarity_wv_rhs.multiply_scalar(-1.0); data.complementarity_wv_rhs.add_scalar(sigma * mu); - compute_search_direction(data, data.dw, data.dx, data.dy, data.dv, data.dz); + f_t max_corrector_residual = 0.0; + status = compute_search_direction(data, data.dw, data.dx, data.dy, data.dv, data.dz, max_corrector_residual); + if (status < 0) { + if (primal_residual_norm < 100 * options.feasibility_tol && + dual_residual_norm < 100 * options.optimality_tol && + complementarity_residual_norm < 100 * options.complementarity_tol) { + settings.log.printf("Suboptimal solution found\n"); + return 1; + } + settings.log.printf("Search direction computation failed\n"); + return -1; + } data.has_factorization = false; // dw = dw_aff + dw_cc @@ -633,14 +861,17 @@ void barrier_solver_t::solve(const barrier_solver_settings_t data.dv.axpy(1.0, data.dv_aff, 1.0); data.dz.axpy(1.0, data.dz_aff, 1.0); - f_t step_primal = std::min(max_step_to_boundary(data.w, data.dw), max_step_to_boundary(data.x, data.dx)); - f_t step_dual = std::min(max_step_to_boundary(data.v, data.dv), max_step_to_boundary(data.z, data.dz)); + f_t max_step_primal = std::min(max_step_to_boundary(data.w, data.dw), max_step_to_boundary(data.x, data.dx)); + f_t max_step_dual = std::min(max_step_to_boundary(data.v, data.dv), max_step_to_boundary(data.z, data.dz)); - data.w.axpy(options.step_scale * step_primal, data.dw, 1.0); - data.x.axpy(options.step_scale * step_primal, data.dx, 1.0); - data.y.axpy(options.step_scale * step_dual, data.dy, 1.0); - data.v.axpy(options.step_scale * step_dual, data.dv, 1.0); - data.z.axpy(options.step_scale * step_dual, data.dz, 1.0); + f_t step_primal = options.step_scale * max_step_primal; + f_t step_dual = options.step_scale * max_step_dual; + + data.w.axpy(step_primal, data.dw, 1.0); + data.x.axpy(step_primal, data.dx, 1.0); + data.y.axpy(step_dual, data.dy, 1.0); + data.v.axpy(step_dual, data.dv, 1.0); + data.z.axpy(step_dual, data.dz, 1.0); // Handle free variables if (num_free_variables > 0) @@ -658,32 +889,43 @@ void barrier_solver_t::solve(const barrier_solver_settings_t } } - f_t min_w = data.w.minimum(); - f_t min_x = data.x.minimum(); - f_t min_v = data.v.minimum(); - f_t min_z = data.z.minimum(); - if (min_w < 0.0 || min_x < 0.0 || min_v < 0.0 || min_z < 0.0) { - settings.log.printf("Violated bounds\n"); - } + compute_residual_norms(data.w, data.x, data.y, data.v, data.z, data, primal_residual_norm, dual_residual_norm, complementarity_residual_norm); - compute_residuals(data); + if (primal_residual_norm < 100 * options.feasibility_tol && + dual_residual_norm < 100 * options.optimality_tol && + complementarity_residual_norm < 100 * options.complementarity_tol) { + w_save = data.w; + x_save = data.x; + y_save = data.y; + v_save = data.v; + z_save = data.z; + } mu = (data.complementarity_xz_residual.sum() + data.complementarity_wv_residual.sum()) / (static_cast(n) + static_cast(num_upper_bounds)); - //settings.log.printf("mu %e\n", mu); - - primal_residual_norm = std::max(vector_norm_inf(data.primal_residual), - vector_norm_inf(data.bound_residual)); - dual_residual_norm = vector_norm_inf(data.dual_residual); - complementarity_residual_norm = std::max(vector_norm_inf(data.complementarity_xz_residual), - vector_norm_inf(data.complementarity_wv_residual)); primal_objective = data.c.inner_product(data.x); dual_objective = data.b.inner_product(data.y) - restrict_u.inner_product(data.v); iter++; elapsed_time = toc(start_time); - settings.log.printf("%2d %+.12e %+.12e %.2e %.2e %.2e %.2e %.2e %.2e %.1f\n", \ - iter, primal_objective, dual_objective, primal_residual_norm, dual_residual_norm, complementarity_residual_norm, step_primal, step_dual, mu, elapsed_time); + if (primal_objective != primal_objective || dual_objective != dual_objective) { + settings.log.printf("Numerical error in objective\n"); + return -1; + } + + settings.log.printf("%2d %+.12e %+.12e %.2e %.2e %.2e %.2e %.2e %.2e %.2e %.2e %.1f\n", + iter, + primal_objective + lp.obj_constant, + dual_objective + lp.obj_constant, + primal_residual_norm, + dual_residual_norm, + complementarity_residual_norm, + step_primal, + step_dual, + std::min(data.complementarity_xz_residual.minimum(), data.complementarity_wv_residual.minimum()), + mu, + std::max(max_affine_residual, max_corrector_residual), + elapsed_time); bool primal_feasible = primal_residual_norm < options.feasibility_tol; bool dual_feasible = dual_residual_norm < options.optimality_tol; @@ -693,11 +935,11 @@ void barrier_solver_t::solve(const barrier_solver_settings_t if (converged) { settings.log.printf("Optimal solution found\n"); - settings.log.printf("Optimal objective value %.12e with constant %.12e\n", primal_objective, primal_objective + lp.obj_constant); - settings.log.printf("|| x* ||_inf %e\n", vector_norm_inf(data.x)); - break; + settings.log.printf("Optimal objective value %.12e\n", primal_objective + lp.obj_constant); + return 0; } } + return 1; } #ifdef DUAL_SIMPLEX_INSTANTIATE_DOUBLE diff --git a/cpp/src/dual_simplex/barrier.hpp b/cpp/src/dual_simplex/barrier.hpp new file mode 100644 index 0000000000..d3d88c0fb0 --- /dev/null +++ b/cpp/src/dual_simplex/barrier.hpp @@ -0,0 +1,87 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include "dual_simplex/dense_vector.hpp" +#include "dual_simplex/sparse_matrix.hpp" +#include "dual_simplex/sparse_cholesky.hpp" +#include "dual_simplex/presolve.hpp" +#include "dual_simplex/simplex_solver_settings.hpp" +#include "dual_simplex/tic_toc.hpp" + +namespace cuopt::linear_programming::dual_simplex { + +template +struct barrier_solver_settings_t { + f_t feasibility_tol = 1e-6; + f_t optimality_tol = 1e-6; + f_t complementarity_tol = 1e-6; + i_t iteration_limit = 1000; + f_t step_scale = 0.9; +}; + + +template +class iteration_data_t; // Forward declare + +template +class barrier_solver_t { + public: + barrier_solver_t(const lp_problem_t& lp, + const presolve_info_t& presolve, + const simplex_solver_settings_t& settings) + : lp(lp), settings(settings), presolve_info(presolve) + { + } + i_t solve(const barrier_solver_settings_t& options); + + private: + void initial_point(iteration_data_t& data); + void compute_residual_norms(const dense_vector_t& w, + const dense_vector_t& x, + const dense_vector_t& y, + const dense_vector_t& v, + const dense_vector_t& z, + iteration_data_t& data, + f_t& primal_residual_norm, + f_t& dual_residual_norm, + f_t& complementarity_residual_norm); + void compute_residuals(const dense_vector_t& w, + const dense_vector_t& x, + const dense_vector_t& y, + const dense_vector_t& v, + const dense_vector_t& z, + iteration_data_t& data); + f_t max_step_to_boundary(const dense_vector_t& x, + const dense_vector_t& dx, + i_t& index) const; + f_t max_step_to_boundary(const dense_vector_t& x, + const dense_vector_t& dx) const; + i_t compute_search_direction(iteration_data_t& data, + dense_vector_t& dw, + dense_vector_t& dx, + dense_vector_t& dy, + dense_vector_t& dv, + dense_vector_t& dz, + f_t& max_residual); + + const lp_problem_t& lp; + const simplex_solver_settings_t& settings; + const presolve_info_t& presolve_info; +}; + +} // namespace cuopt::linear_programming::dual_simplex diff --git a/cpp/src/dual_simplex/dense_vector.hpp b/cpp/src/dual_simplex/dense_vector.hpp new file mode 100644 index 0000000000..3192c8d7b5 --- /dev/null +++ b/cpp/src/dual_simplex/dense_vector.hpp @@ -0,0 +1,182 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include "dual_simplex/types.hpp" + +#include +#include +#include + +namespace cuopt::linear_programming::dual_simplex { + +template +class dense_vector_t : public std::vector { + public: + dense_vector_t(i_t n) { this->resize(n, 0.0); } + dense_vector_t(const std::vector& in) { + this->resize(in.size()); + for (i_t i = 0; i < in.size(); i++) { + (*this)[i] = in[i]; + } + } + + f_t minimum() const + { + const i_t n = this->size(); + f_t min_x = inf; + for (i_t i = 0; i < n; i++) { + min_x = std::min(min_x, (*this)[i]); + } + return min_x; + } + + f_t maximum() const + { + const i_t n = this->size(); + f_t max_x = -inf; + for (i_t i = 0; i < n; i++) { + max_x = std::max(max_x, (*this)[i]); + } + return max_x; + } + + // b <- sqrt(a) + void sqrt(dense_vector_t& b) const + { + const i_t n = this->size(); + for (i_t i = 0; i < n; i++) { + b[i] = std::sqrt((*this)[i]); + } + } + // a <- a + alpha + void add_scalar(f_t alpha) + { + const i_t n = this->size(); + for (i_t i = 0; i < n; i++) { + (*this)[i] += alpha; + } + } + // a <- alpha * e, e = (1, 1, ..., 1) + void set_scalar(f_t alpha) + { + const i_t n = this->size(); + for (i_t i = 0; i < n; i++) { + (*this)[i] = alpha; + } + } + // a <- alpha * a + void multiply_scalar(f_t alpha) { + const i_t n = this->size(); + for (i_t i = 0; i < n; i++) { + (*this)[i] *= alpha; + } + } + f_t sum() const + { + f_t sum = 0.0; + const i_t n = this->size(); + for (i_t i = 0; i < n; ++i) { + sum += (*this)[i]; + } + return sum; + } + + f_t inner_product(dense_vector_t& b) const + { + f_t dot = 0.0; + const i_t n = this->size(); + for (i_t i = 0; i < n; i++) { + dot += (*this)[i] * b[i]; + } + return dot; + } + + // c <- a .* b + void pairwise_product(const dense_vector_t& b, dense_vector_t& c) const + { + const i_t n = this->size(); + if (b.size() != n) { + printf("Error: b.size() %d != n %d\n", b.size(), n); + exit(1); + } + if (c.size() != n) { + printf("Error: c.size() %d != n %d\n", c.size(), n); + exit(1); + } + for (i_t i = 0; i < n; i++) { + c[i] = (*this)[i] * b[i]; + } + } + // c <- a ./ b + void pairwise_divide(const dense_vector_t& b, dense_vector_t& c) const + { + const i_t n = this->size(); + for (i_t i = 0; i < n; i++) { + c[i] = (*this)[i] / b[i]; + } + } + + // c <- a - b + void pairwise_subtract(const dense_vector_t& b, dense_vector_t& c) const + { + const i_t n = this->size(); + for (i_t i = 0; i < n; i++) { + c[i] = (*this)[i] - b[i]; + } + } + + // y <- alpha * x + beta * y + void axpy(f_t alpha, const dense_vector_t& x, f_t beta) + { + const i_t n = this->size(); + for (i_t i = 0; i < n; i++) { + (*this)[i] = alpha * x[i] + beta * (*this)[i]; + } + } + + + void ensure_positive(f_t epsilon_adjust) + { + const f_t mix_x = minimum(); + if (mix_x <= 0.0) { + const f_t delta_x = -mix_x + epsilon_adjust; + add_scalar(delta_x); + } + } + + void bound_away_from_zero(f_t epsilon_adjust) + { + const i_t n = this->size(); + for (i_t i = 0; i < n; i++) { + if ((*this)[i] < epsilon_adjust) { + (*this)[i] = epsilon_adjust; + } + } + } + + // b <- 1.0 /a + void inverse(dense_vector_t& b) const + { + const i_t n = this->size(); + for (i_t i = 0; i < n; i++) { + b[i] = 1.0 / (*this)[i]; + } + } +}; + +} // namespace cuopt::linear_programming::dual_simplex diff --git a/cpp/src/dual_simplex/presolve.cpp b/cpp/src/dual_simplex/presolve.cpp index c875342d52..e2537e5a11 100644 --- a/cpp/src/dual_simplex/presolve.cpp +++ b/cpp/src/dual_simplex/presolve.cpp @@ -216,9 +216,10 @@ template i_t remove_rows(lp_problem_t& problem, const std::vector& row_sense, csr_matrix_t& Arow, - std::vector& row_marker) + std::vector& row_marker, + bool error_on_nonzero_rhs) { - constexpr bool verbose = false; + constexpr bool verbose = true; if (verbose) { printf("Removing rows %d %ld\n", Arow.m, row_marker.size()); } csr_matrix_t Aout; Arow.remove_rows(row_marker, Aout); @@ -233,7 +234,7 @@ i_t remove_rows(lp_problem_t& problem, new_rhs[row_count] = problem.rhs[i]; row_count++; } else { - if (problem.rhs[i] != 0.0) { + if (error_on_nonzero_rhs && problem.rhs[i] != 0.0) { if (verbose) { printf( "Error nonzero rhs %e for zero row %d sense %c\n", problem.rhs[i], i, row_sense[i]); @@ -270,7 +271,7 @@ i_t remove_empty_rows(lp_problem_t& problem, row_marker[i] = 0; } } - const i_t retval = remove_rows(problem, row_sense, Arow, row_marker); + const i_t retval = remove_rows(problem, row_sense, Arow, row_marker, true); return retval; } @@ -519,14 +520,11 @@ i_t find_dependent_rows(lp_problem_t& problem, csc_matrix_t U(m, m, nz); std::vector pinv(n); std::vector q(m); - for (i_t i = 0; i < m; ++i) { - q[i] = i; - } - std::optional> optional_q = q; - // TODO: Replace with right looking LU in crossover PR - // i_t pivots = left_looking_lu(C, settings, 1e-13, optional_q, L, U, pinv); - i_t pivots = 0; + + i_t pivots = right_looking_lu_row_permutation_only(C, settings, 1e-13, tic(), q, pinv); + if (pivots < m) { + settings.log.printf("Found %d dependent rows\n", m - pivots); const i_t num_dependent = m - pivots; std::vector independent_rhs(pivots); std::vector dependent_rhs(num_dependent); @@ -534,7 +532,7 @@ i_t find_dependent_rows(lp_problem_t& problem, i_t ind_count = 0; i_t dep_count = 0; for (i_t i = 0; i < m; ++i) { - i_t row = (*optional_q)[i]; + i_t row = q[i]; if (i < pivots) { dependent_rows[row] = 0; independent_rhs[ind_count++] = problem.rhs[row]; @@ -545,6 +543,7 @@ i_t find_dependent_rows(lp_problem_t& problem, } } +#if 0 std::vector z = independent_rhs; // Solve U1^T z = independent_rhs for (i_t k = 0; k < pivots; ++k) { @@ -576,6 +575,9 @@ i_t find_dependent_rows(lp_problem_t& problem, problem.rhs[dependent_row_list[k]] = 0.0; } } +#endif + } else { + settings.log.printf("No dependent rows found\n"); } return pivots; } @@ -697,7 +699,9 @@ void convert_user_problem(const user_problem_t& user_problem, } // Add artifical variables - add_artifical_variables(problem, equality_rows, new_slacks); + if (!settings.barrier_presolve) { + add_artifical_variables(problem, equality_rows, new_slacks); + } } template @@ -821,6 +825,7 @@ i_t presolve(const lp_problem_t& original, } } assert(problem.A.p[num_cols] == nnz); + problem.A.n = num_cols; problem.num_cols = num_cols; } @@ -852,7 +857,7 @@ i_t presolve(const lp_problem_t& original, } // Check for dependent rows - constexpr bool check_dependent_rows = false; + bool check_dependent_rows = settings.barrier; if (check_dependent_rows) { std::vector dependent_rows; constexpr i_t kOk = -1; @@ -868,7 +873,7 @@ i_t presolve(const lp_problem_t& original, settings.log.printf("%d dependent rows\n", num_dependent_rows); csr_matrix_t Arow; problem.A.to_compressed_row(Arow); - remove_rows(problem, row_sense, Arow, dependent_rows); + remove_rows(problem, row_sense, Arow, dependent_rows, false); } settings.log.printf("Dependent row check in %.2fs\n", toc(dependent_row_start)); } diff --git a/cpp/src/dual_simplex/simplex_solver_settings.hpp b/cpp/src/dual_simplex/simplex_solver_settings.hpp index e5cf2ce99d..7958269b0b 100644 --- a/cpp/src/dual_simplex/simplex_solver_settings.hpp +++ b/cpp/src/dual_simplex/simplex_solver_settings.hpp @@ -58,6 +58,7 @@ struct simplex_solver_settings_t { print_presolve_stats(true), barrier_presolve(false), use_cudss(false), + barrier(false), refactor_frequency(100), iteration_log_frequency(1000), first_iteration_log(2), @@ -105,6 +106,7 @@ struct simplex_solver_settings_t { bool print_presolve_stats; // true to print presolve stats bool barrier_presolve; // true to use barrier presolve bool use_cudss; // true to use cuDSS for sparse Cholesky factorization, false to use cholmod + bool barrier; // true to use barrier method, false to use dual simplex method i_t refactor_frequency; // number of basis updates before refactorization i_t iteration_log_frequency; // number of iterations between log updates i_t first_iteration_log; // number of iterations to log at beginning of solve diff --git a/cpp/src/dual_simplex/solve.cpp b/cpp/src/dual_simplex/solve.cpp index 67acab2039..76cd6d378c 100644 --- a/cpp/src/dual_simplex/solve.cpp +++ b/cpp/src/dual_simplex/solve.cpp @@ -237,26 +237,43 @@ lp_status_t solve_linear_program_advanced(const lp_problem_t& original return lp_status; } +template +lp_status_t solve_linear_program_with_barrier(const user_problem_t& user_problem, + const simplex_solver_settings_t& settings, + lp_solution_t& solution) +{ + f_t start_time = tic(); + lp_status_t status = lp_status_t::UNSET; + lp_problem_t original_lp(1, 1, 1); + std::vector new_slacks; + simplex_solver_settings_t barrier_settings = settings; + barrier_settings.barrier_presolve = true; + convert_user_problem(user_problem, barrier_settings, original_lp, new_slacks); + presolve_info_t presolve_info; + lp_problem_t presolved_lp(1, 1, 1); + const i_t ok = presolve(original_lp, barrier_settings, presolved_lp, presolve_info); + lp_problem_t barrier_lp( + presolved_lp.num_rows, presolved_lp.num_cols, presolved_lp.A.col_start[presolved_lp.num_cols]); + std::vector column_scales; + column_scaling(presolved_lp, barrier_settings, barrier_lp, column_scales); + if (ok == -1) { return lp_status_t::INFEASIBLE; } + barrier_solver_t barrier_solver(barrier_lp, presolve_info, barrier_settings); + barrier_solver_settings_t barrier_solver_settings; + i_t barrier_status = barrier_solver.solve(barrier_solver_settings); + if (barrier_status == 0) { + status = lp_status_t::OPTIMAL; + } else { + status = lp_status_t::NUMERICAL_ISSUES; + } + return status; +} + template lp_status_t solve_linear_program(const user_problem_t& user_problem, const simplex_solver_settings_t& settings, lp_solution_t& solution) { - f_t start_time = tic(); - { - lp_problem_t original_lp(1, 1, 1); - std::vector new_slacks; - simplex_solver_settings_t barrier_settings = settings; - barrier_settings.barrier_presolve = true; - convert_user_problem(user_problem, barrier_settings, original_lp, new_slacks); - presolve_info_t presolve_info; - lp_problem_t barrier_lp(1, 1, 1); - const i_t ok = presolve(original_lp, barrier_settings, barrier_lp, presolve_info); - if (ok == -1) { return lp_status_t::INFEASIBLE; } - barrier_solver_t barrier_solver(barrier_lp, presolve_info, barrier_settings); - barrier_solver_settings_t barrier_solver_settings; - barrier_solver.solve(barrier_solver_settings); - } + f_t start_time = tic(); lp_problem_t original_lp(1, 1, 1); std::vector new_slacks; convert_user_problem(user_problem, settings, original_lp, new_slacks); @@ -357,6 +374,11 @@ template lp_status_t solve_linear_program_advanced( std::vector& vstatus, std::vector& edge_norms); + +template lp_status_t solve_linear_program_with_barrier(const user_problem_t& user_problem, + const simplex_solver_settings_t& settings, + lp_solution_t& solution); + template lp_status_t solve_linear_program(const user_problem_t& user_problem, const simplex_solver_settings_t& settings, lp_solution_t& solution); diff --git a/cpp/src/dual_simplex/solve.hpp b/cpp/src/dual_simplex/solve.hpp index f20844d6e6..a54acf6975 100644 --- a/cpp/src/dual_simplex/solve.hpp +++ b/cpp/src/dual_simplex/solve.hpp @@ -56,6 +56,11 @@ lp_status_t solve_linear_program_advanced(const lp_problem_t& original std::vector& vstatus, std::vector& edge_norms); +template +lp_status_t solve_linear_program_with_barrier(const user_problem_t& user_problem, + const simplex_solver_settings_t& settings, + lp_solution_t& solution); + template lp_status_t solve_linear_program(const user_problem_t& user_problem, const simplex_solver_settings_t& settings, diff --git a/cpp/src/dual_simplex/sparse_cholesky.hpp b/cpp/src/dual_simplex/sparse_cholesky.hpp new file mode 100644 index 0000000000..8b0a51492c --- /dev/null +++ b/cpp/src/dual_simplex/sparse_cholesky.hpp @@ -0,0 +1,458 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include "dual_simplex/dense_vector.hpp" +#include "dual_simplex/sparse_matrix.hpp" +#include "dual_simplex/simplex_solver_settings.hpp" +#include "dual_simplex/tic_toc.hpp" + + +#include + +#include "cholmod.h" +#include "cudss.h" + +namespace cuopt::linear_programming::dual_simplex { + + +template +class sparse_cholesky_base_t { + public: + virtual ~sparse_cholesky_base_t() = default; + virtual i_t analyze(const csc_matrix_t& A_in) = 0; + virtual i_t factorize(const csc_matrix_t& A_in) = 0; + virtual i_t solve(const dense_vector_t& b, dense_vector_t& x) = 0; + virtual void set_positive_definite(bool positive_definite) = 0; +}; + + +template +class sparse_cholesky_cholmod_t : public sparse_cholesky_base_t { + public: + sparse_cholesky_cholmod_t(const simplex_solver_settings_t& settings, i_t size) : n(size), first_factor(true) { + cholmod_start(&common); + int version[3]; + cholmod_version(version); + settings.log.printf("Using CHOLMOD %d.%d.%d\n", version[0], version[1], version[2]); + A = nullptr; + L = nullptr; + } + ~sparse_cholesky_cholmod_t() override{ + cholmod_free_factor(&L, &common); + cholmod_free_sparse(&A, &common); + cholmod_finish(&common); + } + i_t analyze(const csc_matrix_t& A_in) override + { + A = to_cholmod(A_in); + // Perform symbolic analysis + f_t start_symbolic = tic(); + common.nmethods = 1; + common.method[0].ordering = CHOLMOD_AMD; + L = cholmod_analyze(A, &common); + f_t symbolic_time = toc(start_symbolic); + printf("Symbolic method used %d\n", L->ordering); + printf("Symbolic time %.2fs\n", symbolic_time); + printf("Symbolic nonzeros in factor %e\n", common.method[common.selected].lnz); + printf("Symbolic flops %e\n", common.method[common.selected].fl); + return 0; + } + i_t factorize(const csc_matrix_t& A_in) override + { + cholmod_free_sparse(&A, &common); + A = to_cholmod(A_in); + f_t start_numeric = tic(); + cholmod_factorize(A, L, &common); + f_t numeric_time = toc(start_numeric); + if (first_factor) { + printf("Factor nonzeros %e\n", common.method[common.selected].lnz); + printf("Factor time %.2fs\n", numeric_time); + first_factor = false; + } + if (common.status < CHOLMOD_OK) { + printf("Factorization failed\n"); + exit(1); + } + if (((int32_t) L->minor) != A_in.m) { + printf("AA' not positive definite %ld minors versus %d\n", L->minor, A_in.m); + return -1; + } + return 0; + } + i_t solve(const dense_vector_t& b, dense_vector_t& x) override + { + cholmod_dense *b_cholmod = cholmod_zeros(n, 1, CHOLMOD_REAL, &common); + for (i_t i = 0; i < n; i++) { + ((float64_t *)b_cholmod->x)[i] = b[i]; + } + cholmod_dense *x_cholmod = cholmod_solve(CHOLMOD_A, L, b_cholmod, &common); + for (i_t i = 0; i < n; i++) { + x[i] = ((float64_t *)x_cholmod->x)[i]; + } + +#ifdef CHECK_SOLVE + int32_t no_transpose = 0; + float64_t alpha[2] = {1.0, 0.0}; + float64_t beta[2] = {0.0, 0.0}; + cholmod_dense *residual = cholmod_zeros(n, 1, CHOLMOD_REAL, &common); + + cholmod_sdmult(A, no_transpose, alpha, beta, x_cholmod, residual, &common); + for (i_t i = 0; i < n; i++) { + f_t err = std::abs(((float64_t *)residual->x)[i] - b[i]); + if (err > 1e-6) { + printf("Error: L*L'*x[%d] - b[%d] = %e, x[%d] = %e, b[%d] = %e cholmod_b[%d] = %e\n", i, i, err, i, x[i], i, b[i], i, ((float64_t *)b_cholmod->x)[i]); + } + } + beta[0] = -1.0; + cholmod_sdmult(A, no_transpose, alpha, beta, x_cholmod, b_cholmod, &common); + + + printf("|| L*L'*x - b || = %e\n", cholmod_norm_dense(b_cholmod, 0, &common)); +#endif + + cholmod_free_dense(&x_cholmod, &common); + cholmod_free_dense(&b_cholmod, &common); + return 0; + } + + void set_positive_definite(bool positive_definite) override + { + // Do nothing + } + + private: + + cholmod_sparse *to_cholmod(const csc_matrix_t& A_in) + { + i_t nnz = A_in.col_start[A_in.n]; + cholmod_sparse *A = cholmod_allocate_sparse(A_in.m, A_in.n, nnz, 0, 0, 0, CHOLMOD_REAL, &common); + for (i_t j = 0; j <= A_in.n; j++) { + ((int32_t *)A->p)[j] = A_in.col_start[j]; + } + for (i_t j = 0; j < A_in.n; j++) { + ((int32_t *)A->nz)[j] = A_in.col_start[j + 1] - A_in.col_start[j]; + } + for (i_t p = 0; p < nnz; p++) { + ((int32_t *)A->i)[p] = A_in.i[p]; + } + for (i_t p = 0; p < nnz; p++) { + ((float64_t *)A->x)[p] = A_in.x[p]; + } + A->nzmax = nnz; + A->stype = 1; + cholmod_check_sparse(A, &common); + return A; + } + i_t n; + bool first_factor; + cholmod_sparse *A; + cholmod_factor *L; + cholmod_common common; +}; + +#define CUDSS_EXAMPLE_FREE do {} while(0) + +#define CUDA_CALL_AND_CHECK(call, msg) \ + do { \ + cuda_error = call; \ + if (cuda_error != cudaSuccess) { \ + printf("FAILED: CUDA API returned error = %d, details: " #msg "\n", cuda_error); \ + CUDSS_EXAMPLE_FREE; \ + return -1; \ + } \ + } while(0); + +#define CUDA_CALL_AND_CHECK_EXIT(call, msg) \ + do { \ + cuda_error = call; \ + if (cuda_error != cudaSuccess) { \ + printf("FAILED: CUDA API returned error = %d, details: " #msg "\n", cuda_error); \ + CUDSS_EXAMPLE_FREE; \ + exit(-1); \ + } \ + } while (0); + +#define CUDSS_CALL_AND_CHECK(call, status, msg) \ + do { \ + status = call; \ + if (status != CUDSS_STATUS_SUCCESS) { \ + printf("FAILED: CUDSS call ended unsuccessfully with status = %d, details: " #msg "\n", status); \ + CUDSS_EXAMPLE_FREE; \ + return -2; \ + } \ + } while(0); + +#define CUDSS_CALL_AND_CHECK_EXIT(call, status, msg) \ + do { \ + status = call; \ + if (status != CUDSS_STATUS_SUCCESS) { \ + printf("FAILED: CUDSS call ended unsuccessfully with status = %d, details: " #msg "\n", \ + status); \ + CUDSS_EXAMPLE_FREE; \ + exit(-2); \ + } \ + } while (0); + +template +class sparse_cholesky_cudss_t : public sparse_cholesky_base_t { + public: + sparse_cholesky_cudss_t(const simplex_solver_settings_t& settings, i_t size) : n(size), nnz(-1), first_factor(true), positive_definite(true) { + + int major, minor, patch; + cudssGetProperty(MAJOR_VERSION, &major); + cudssGetProperty(MINOR_VERSION, &minor); + cudssGetProperty(PATCH_LEVEL, &patch); + settings.log.printf("CUDSS Version %d.%d.%d\n", major, minor, patch); + + cuda_error = cudaSuccess; + status = CUDSS_STATUS_SUCCESS; + CUDA_CALL_AND_CHECK_EXIT(cudaStreamCreate(&stream), "cudaStreamCreate"); + CUDSS_CALL_AND_CHECK_EXIT(cudssCreate(&handle), status, "cudssCreate"); + CUDSS_CALL_AND_CHECK_EXIT(cudssConfigCreate(&solverConfig), status, "cudssConfigCreate"); + CUDSS_CALL_AND_CHECK_EXIT(cudssDataCreate(handle, &solverData), status, "cudssDataCreate"); + +#ifdef USE_AMD + // Tell cuDSS to use AMD + cudssAlgType_t reorder_alg = CUDSS_ALG_3; + CUDSS_CALL_AND_CHECK_EXIT(cudssConfigSet(solverConfig, CUDSS_CONFIG_REORDERING_ALG, + &reorder_alg, sizeof(cudssAlgType_t)), status, "cudssConfigSet for reordering alg"); +#endif + + int32_t ir_n_steps = 2; + CUDSS_CALL_AND_CHECK_EXIT(cudssConfigSet(solverConfig, CUDSS_CONFIG_IR_N_STEPS, + &ir_n_steps, sizeof(int32_t)), status, "cudssConfigSet for ir n steps"); + + // Device pointers + csr_offset_d = nullptr; + csr_columns_d = nullptr; + csr_values_d = nullptr; + x_values_d = nullptr; + b_values_d = nullptr; + CUDA_CALL_AND_CHECK_EXIT(cudaMalloc(&x_values_d, n * sizeof(f_t)), "cudaMalloc for x_values"); + CUDA_CALL_AND_CHECK_EXIT(cudaMalloc(&b_values_d, n * sizeof(f_t)), "cudaMalloc for b_values"); + + i_t ldb = n; + i_t ldx = n; + CUDSS_CALL_AND_CHECK_EXIT( + cudssMatrixCreateDn(&cudss_b, n, 1, ldb, b_values_d, CUDA_R_64F, CUDSS_LAYOUT_COL_MAJOR), + status, + "cudssMatrixCreateDn for b"); + CUDSS_CALL_AND_CHECK_EXIT( + cudssMatrixCreateDn(&cudss_x, n, 1, ldx, x_values_d, CUDA_R_64F, CUDSS_LAYOUT_COL_MAJOR), + status, + "cudssMatrixCreateDn for x"); + } + ~sparse_cholesky_cudss_t() override { + cudaFree(csr_values_d); + cudaFree(csr_columns_d); + cudaFree(csr_offset_d); + + cudaFree(x_values_d); + cudaFree(b_values_d); + + CUDSS_CALL_AND_CHECK_EXIT(cudssMatrixDestroy(A), status, "cudssMatrixDestroy for A"); + + + CUDSS_CALL_AND_CHECK_EXIT(cudssMatrixDestroy(cudss_x), status, "cudssMatrixDestroy for cudss_x"); + CUDSS_CALL_AND_CHECK_EXIT(cudssMatrixDestroy(cudss_b), status, "cudssMatrixDestroy for cudss_b"); + CUDSS_CALL_AND_CHECK_EXIT(cudssDataDestroy(handle, solverData), status, "cudssDataDestroy"); + CUDSS_CALL_AND_CHECK_EXIT(cudssConfigDestroy(solverConfig), status, "cudssConfigDestroy"); + CUDSS_CALL_AND_CHECK_EXIT(cudssDestroy(handle), status, "cudssDestroy"); + CUDA_CALL_AND_CHECK_EXIT(cudaStreamSynchronize(stream), "cudaStreamSynchronize"); + CUDA_CALL_AND_CHECK_EXIT(cudaStreamDestroy(stream), "cudaStreamDestroy"); + } + i_t analyze(const csc_matrix_t& A_in) override + { + csr_matrix_t Arow; + A_in.to_compressed_row(Arow); + + nnz = A_in.col_start[A_in.n]; + + CUDA_CALL_AND_CHECK(cudaMalloc(&csr_offset_d, (n + 1) * sizeof(i_t)), "cudaMalloc for csr_offset"); + CUDA_CALL_AND_CHECK(cudaMalloc(&csr_columns_d, nnz * sizeof(i_t)), "cudaMalloc for csr_columns"); + CUDA_CALL_AND_CHECK(cudaMalloc(&csr_values_d, nnz * sizeof(f_t)), "cudaMalloc for csr_values"); + + CUDA_CALL_AND_CHECK(cudaMemcpy(csr_offset_d, Arow.row_start.data(), (n + 1) * sizeof(i_t), cudaMemcpyHostToDevice), "cudaMemcpy for csr_offset"); + CUDA_CALL_AND_CHECK(cudaMemcpy(csr_columns_d, Arow.j.data(), nnz * sizeof(i_t), cudaMemcpyHostToDevice), "cudaMemcpy for csr_columns"); + CUDA_CALL_AND_CHECK(cudaMemcpy(csr_values_d, Arow.x.data(), nnz * sizeof(f_t), cudaMemcpyHostToDevice), "cudaMemcpy for csr_values"); + + if (!first_factor) { + CUDSS_CALL_AND_CHECK(cudssMatrixDestroy(A), status, "cudssMatrixDestroy for A"); + } + + CUDSS_CALL_AND_CHECK(cudssMatrixCreateCsr(&A, + n, + n, + nnz, + csr_offset_d, + nullptr, + csr_columns_d, + csr_values_d, + CUDA_R_32I, + CUDA_R_64F, + positive_definite ? CUDSS_MTYPE_SPD : CUDSS_MTYPE_SYMMETRIC, + CUDSS_MVIEW_FULL, + CUDSS_BASE_ZERO), + status, + "cudssMatrixCreateCsr"); + + // Perform symbolic analysis + f_t start_symbolic = tic(); + + CUDSS_CALL_AND_CHECK( + cudssExecute(handle, CUDSS_PHASE_ANALYSIS, solverConfig, solverData, A, cudss_x, cudss_b), + status, + "cudssExecute for analysis"); + + f_t symbolic_time = toc(start_symbolic); + printf("Symbolic time %.2fs\n", symbolic_time); + int64_t lu_nz = 0; + size_t size_written = 0; + CUDSS_CALL_AND_CHECK( + cudssDataGet( + handle, solverData, CUDSS_DATA_LU_NNZ, &lu_nz, sizeof(int64_t), &size_written), + status, + "cudssDataGet for LU_NNZ"); + printf("Symbolic nonzeros in factor %e\n", static_cast(lu_nz) / 2.0); + // TODO: Is there any way to get nonzeros in the factors? + // TODO: Is there any way to get flops for the factorization? + + + return 0; + } + i_t factorize(const csc_matrix_t& A_in) override + { + + csr_matrix_t Arow; + A_in.to_compressed_row(Arow); + + if (nnz != A_in.col_start[A_in.n]) { + printf("Error: nnz %d != A_in.col_start[A_in.n] %d\n", nnz, A_in.col_start[A_in.n]); + exit(1); + } + + CUDA_CALL_AND_CHECK(cudaMemcpy(csr_values_d, Arow.x.data(), nnz * sizeof(f_t), cudaMemcpyHostToDevice), "cudaMemcpy for csr_values"); + + CUDSS_CALL_AND_CHECK(cudssMatrixSetValues(A, csr_values_d), status, "cudssMatrixSetValues for A"); + + f_t start_numeric = tic(); + CUDSS_CALL_AND_CHECK( + cudssExecute( + handle, CUDSS_PHASE_FACTORIZATION, solverConfig, solverData, A, cudss_x, cudss_b), + status, + "cudssExecute for factorization"); + + f_t numeric_time = toc(start_numeric); + + int info; + size_t sizeWritten = 0; + CUDSS_CALL_AND_CHECK(cudssDataGet(handle, solverData, CUDSS_DATA_INFO, &info, + sizeof(info), &sizeWritten), status, "cudssDataGet for info"); + if (info != 0) { + printf("Factorization failed info %d\n", info); + return -1; + } + + if (first_factor) { + printf("Factor time %.2fs\n", numeric_time); + first_factor = false; + } + if (status != CUDSS_STATUS_SUCCESS) { + printf("cuDSS Factorization failed\n"); + return -1; + } + return 0; + } + + i_t solve(const dense_vector_t& b, dense_vector_t& x) override + { + if (b.size() != n) { + printf("Error: b.size() %d != n %d\n", b.size(), n); + exit(1); + } + if (x.size() != n) { + printf("Error: x.size() %d != n %d\n", x.size(), n); + exit(1); + } + CUDA_CALL_AND_CHECK(cudaMemcpy(b_values_d, b.data(), n * sizeof(f_t), cudaMemcpyHostToDevice), "cudaMemcpy for b_values"); + CUDA_CALL_AND_CHECK(cudaMemcpy(x_values_d, x.data(), n * sizeof(f_t), cudaMemcpyHostToDevice), "cudaMemcpy for x_values"); + + CUDSS_CALL_AND_CHECK(cudssMatrixSetValues(cudss_b, b_values_d), status, "cudssMatrixSetValues for b"); + CUDSS_CALL_AND_CHECK(cudssMatrixSetValues(cudss_x, x_values_d), status, "cudssMatrixSetValues for x"); + + + i_t ldb = n; + i_t ldx = n; + CUDSS_CALL_AND_CHECK_EXIT( + cudssMatrixCreateDn(&cudss_b, n, 1, ldb, b_values_d, CUDA_R_64F, CUDSS_LAYOUT_COL_MAJOR), + status, + "cudssMatrixCreateDn for b"); + CUDSS_CALL_AND_CHECK_EXIT( + cudssMatrixCreateDn(&cudss_x, n, 1, ldx, x_values_d, CUDA_R_64F, CUDSS_LAYOUT_COL_MAJOR), + status, + "cudssMatrixCreateDn for x"); + + CUDSS_CALL_AND_CHECK( + cudssExecute( + handle, CUDSS_PHASE_SOLVE, solverConfig, solverData, A, cudss_x, cudss_b), + status, + "cudssExecute for solve"); + + CUDA_CALL_AND_CHECK(cudaStreamSynchronize(stream), "cudaStreamSynchronize"); + + CUDA_CALL_AND_CHECK(cudaMemcpy(x.data(), x_values_d, n * sizeof(f_t), cudaMemcpyDeviceToHost), "cudaMemcpy for x"); + + for (i_t i = 0; i < n; i++) { + if (x[i] != x[i]) { + return -1; + } + } + + return 0; + } + + void set_positive_definite(bool positive_definite) override + { + this->positive_definite = positive_definite; + } + + private: + + + i_t n; + i_t nnz; + bool first_factor; + bool positive_definite; + cudaError_t cuda_error; + cudssStatus_t status; + cudaStream_t stream; + cudssHandle_t handle; + cudssConfig_t solverConfig; + cudssData_t solverData; + cudssMatrix_t A; + cudssMatrix_t cudss_x; + cudssMatrix_t cudss_b; + i_t *csr_offset_d; + i_t *csr_columns_d; + f_t *csr_values_d; + f_t *x_values_d; + f_t *b_values_d; +}; + + +} // namespace cuopt::linear_programming::dual_simplex diff --git a/cpp/src/linear_programming/solve.cu b/cpp/src/linear_programming/solve.cu index 30a117bacc..923d170a5b 100644 --- a/cpp/src/linear_programming/solve.cu +++ b/cpp/src/linear_programming/solve.cu @@ -292,6 +292,63 @@ optimization_problem_solution_t convert_dual_simplex_sol( return sol; } +template +std::tuple, dual_simplex::lp_status_t, f_t, f_t, f_t> +run_barrier(dual_simplex::user_problem_t& user_problem, + pdlp_solver_settings_t const& settings) +{ + auto start_solver = std::chrono::high_resolution_clock::now(); + + f_t norm_user_objective = dual_simplex::vector_norm2(user_problem.objective); + f_t norm_rhs = dual_simplex::vector_norm2(user_problem.rhs); + + dual_simplex::simplex_solver_settings_t dual_simplex_settings; + dual_simplex_settings.time_limit = settings.time_limit; + dual_simplex_settings.iteration_limit = settings.iteration_limit; + dual_simplex_settings.concurrent_halt = settings.concurrent_halt; + dual_simplex_settings.use_cudss = settings.use_cudss; + dual_simplex_settings.barrier = true; + if (dual_simplex_settings.concurrent_halt != nullptr) { + // Don't show the dual simplex log in concurrent mode. Show the PDLP log instead + dual_simplex_settings.log.log = false; + } + + dual_simplex::lp_solution_t solution(user_problem.num_rows, user_problem.num_cols); + auto status = + dual_simplex::solve_linear_program_with_barrier(user_problem, dual_simplex_settings, solution); + + auto end = std::chrono::high_resolution_clock::now(); + auto duration = std::chrono::duration_cast(end - start_solver); + + CUOPT_LOG_INFO("Barrier finished in %.2f seconds", duration.count() / 1000.0); + + if (settings.concurrent_halt != nullptr && (status == dual_simplex::lp_status_t::OPTIMAL || + status == dual_simplex::lp_status_t::UNBOUNDED || + status == dual_simplex::lp_status_t::INFEASIBLE)) { + // We finished. Tell PDLP to stop if it is still running. + settings.concurrent_halt->store(1, std::memory_order_release); + } + + return {std::move(solution), status, duration.count() / 1000.0, norm_user_objective, norm_rhs}; +} + +template +optimization_problem_solution_t run_barrier( + detail::problem_t& problem, pdlp_solver_settings_t const& settings) +{ + // Convert data structures to dual simplex format and back + dual_simplex::user_problem_t dual_simplex_problem = + cuopt_problem_to_simplex_problem(problem); + auto sol_dual_simplex = run_barrier(dual_simplex_problem, settings); + return convert_dual_simplex_sol(problem, + std::get<0>(sol_dual_simplex), + std::get<1>(sol_dual_simplex), + std::get<2>(sol_dual_simplex), + std::get<3>(sol_dual_simplex), + std::get<4>(sol_dual_simplex)); +} + + template std::tuple, dual_simplex::lp_status_t, f_t, f_t, f_t> run_dual_simplex(dual_simplex::user_problem_t& user_problem, @@ -548,6 +605,8 @@ optimization_problem_solution_t solve_lp_with_method( { if (settings.method == method_t::DualSimplex) { return run_dual_simplex(problem, settings); + } else if (settings.method == method_t::Barrier) { + return run_barrier(problem, settings); } else if (settings.method == method_t::Concurrent) { return run_concurrent(op_problem, problem, settings, is_batch_mode); } else { diff --git a/cpp/src/math_optimization/solver_settings.cu b/cpp/src/math_optimization/solver_settings.cu index a740a390c5..9a704c682a 100644 --- a/cpp/src/math_optimization/solver_settings.cu +++ b/cpp/src/math_optimization/solver_settings.cu @@ -89,7 +89,7 @@ solver_settings_t::solver_settings_t() : pdlp_settings(), mip_settings int_parameters = { {CUOPT_ITERATION_LIMIT, &pdlp_settings.iteration_limit, 0, std::numeric_limits::max(), std::numeric_limits::max()}, {CUOPT_PDLP_SOLVER_MODE, reinterpret_cast(&pdlp_settings.pdlp_solver_mode), CUOPT_PDLP_SOLVER_MODE_STABLE1, CUOPT_PDLP_SOLVER_MODE_FAST1, CUOPT_PDLP_SOLVER_MODE_STABLE2}, - {CUOPT_METHOD, reinterpret_cast(&pdlp_settings.method), CUOPT_METHOD_CONCURRENT, CUOPT_METHOD_DUAL_SIMPLEX, CUOPT_METHOD_CONCURRENT}, + {CUOPT_METHOD, reinterpret_cast(&pdlp_settings.method), CUOPT_METHOD_CONCURRENT, CUOPT_METHOD_BARRIER, CUOPT_METHOD_CONCURRENT}, {CUOPT_NUM_CPU_THREADS, &mip_settings.num_cpu_threads, -1, std::numeric_limits::max(), -1} }; From 999b69098db8a1330d157f9636f7a0399da323d7 Mon Sep 17 00:00:00 2001 From: Christopher Maes Date: Fri, 8 Aug 2025 16:51:31 -0700 Subject: [PATCH 08/31] Remove CHOLMOD experiment --- cmake/FindCHOLMOD.cmake | 111 -------------------- cpp/CMakeLists.txt | 4 - cpp/src/dual_simplex/barrier.cpp | 8 +- cpp/src/dual_simplex/sparse_cholesky.hpp | 125 ----------------------- 4 files changed, 2 insertions(+), 246 deletions(-) delete mode 100644 cmake/FindCHOLMOD.cmake diff --git a/cmake/FindCHOLMOD.cmake b/cmake/FindCHOLMOD.cmake deleted file mode 100644 index 6470e67ebb..0000000000 --- a/cmake/FindCHOLMOD.cmake +++ /dev/null @@ -1,111 +0,0 @@ -# Cholmod lib usually requires linking to a blas and lapack library. -# It is up to the user of this module to find a BLAS and link to it. - -if (CHOLMOD_INCLUDES AND CHOLMOD_LIBRARIES) - set(CHOLMOD_FIND_QUIETLY TRUE) -endif (CHOLMOD_INCLUDES AND CHOLMOD_LIBRARIES) - -message(STATUS "CHOLMOD DIR = $ENV{CHOLMODDIR}") - - -find_path(CHOLMOD_INCLUDES - NAMES - cholmod.h - PATHS - $ENV{CHOLMODDIR} - ${INCLUDE_INSTALL_DIR} - PATH_SUFFIXES - suitesparse - ufsparse -) - -find_path(SUITESPARSE_INCLUDES - NAMES - SuiteSparse_config.h - PATHS - $ENV{SUITESPARSE_DIR} - ${INCLUDE_INSTALL_DIR} -) -message(STATUS "SUITESPARSE_INCLUDES = ${SUITESPARSE_INCLUDES}") -list(APPEND CHOLMOD_INCLUDES ${SUITESPARSE_INCLUDES}) - -message(STATUS "CHOLMOD_LIB_DIR = $ENV{CHOLMOD_LIB_DIR}") - -find_library(CHOLMOD_LIBRARIES cholmod PATHS $ENV{CHOLMOD_LIB_DIR} ${LIB_INSTALL_DIR}) -message(STATUS " find_library CHOLMOD_LIBRARIES = ${CHOLMOD_LIBRARIES}") - -if(CHOLMOD_LIBRARIES) - - get_filename_component(CHOLMOD_LIBDIR ${CHOLMOD_LIBRARIES} PATH) - find_library(AMD_LIBRARY amd PATHS ${CHOLMOD_LIBDIR} $ENV{CHOLMOD_LIB_DIR}/../AMD ${LIB_INSTALL_DIR}) - if (AMD_LIBRARY) - set(CHOLMOD_LIBRARIES ${CHOLMOD_LIBRARIES} ${AMD_LIBRARY}) - else () - set(CHOLMOD_LIBRARIES FALSE) - endif () - -endif(CHOLMOD_LIBRARIES) -message(STATUS "CHOLMOD_LIBRARIES = ${CHOLMOD_LIBRARIES}") - -if(CHOLMOD_LIBRARIES) - - find_library(COLAMD_LIBRARY colamd PATHS ${CHOLMOD_LIBDIR} $ENV{CHOLMOD_LIB_DIR}/../COLAMD ${LIB_INSTALL_DIR}) - if (COLAMD_LIBRARY) - set(CHOLMOD_LIBRARIES ${CHOLMOD_LIBRARIES} ${COLAMD_LIBRARY}) - else () - set(CHOLMOD_LIBRARIES FALSE) - endif () - -endif(CHOLMOD_LIBRARIES) -message(STATUS "CHOLMOD_LIBRARIES = ${CHOLMOD_LIBRARIES}") - -if(CHOLMOD_LIBRARIES) - - find_library(CAMD_LIBRARY camd PATHS ${CHOLMOD_LIBDIR} $ENV{CHOLMOD_LIB_DIR}/../CAMD ${LIB_INSTALL_DIR}) - if (CAMD_LIBRARY) - set(CHOLMOD_LIBRARIES ${CHOLMOD_LIBRARIES} ${CAMD_LIBRARY}) - else () - set(CHOLMOD_LIBRARIES FALSE) - endif () - -endif(CHOLMOD_LIBRARIES) -message(STATUS "CHOLMOD_LIBRARIES = ${CHOLMOD_LIBRARIES}") - -if(CHOLMOD_LIBRARIES) - - find_library(CCOLAMD_LIBRARY ccolamd PATHS ${CHOLMOD_LIBDIR} $ENV{CHOLMOD_LIB_DIR}/../CCOLAMD ${LIB_INSTALL_DIR}) - if (CCOLAMD_LIBRARY) - set(CHOLMOD_LIBRARIES ${CHOLMOD_LIBRARIES} ${CCOLAMD_LIBRARY}) - else () - set(CHOLMOD_LIBRARIES FALSE) - endif () - -endif(CHOLMOD_LIBRARIES) -message(STATUS "CHOLMOD_LIBRARIES = ${CHOLMOD_LIBRARIES}") - -if(CHOLMOD_LIBRARIES) - - find_library(CHOLMOD_METIS_LIBRARY metis PATHS ${CHOLMOD_LIBDIR} $ENV{CHOLMOD_LIB_DIR} ${LIB_INSTALL_DIR}) - if (CHOLMOD_METIS_LIBRARY) - set(CHOLMOD_LIBRARIES ${CHOLMOD_LIBRARIES} ${CHOLMOD_METIS_LIBRARY}) - endif () - -endif(CHOLMOD_LIBRARIES) - -if(CHOLMOD_LIBRARIES) - - find_library(SUITESPARSE_LIBRARY SuiteSparse PATHS ${CHOLMOD_LIBDIR} $ENV{CHOLMOD_LIB_DIR} ${LIB_INSTALL_DIR}) - if (SUITESPARSE_LIBRARY) - set(CHOLMOD_LIBRARIES ${CHOLMOD_LIBRARIES} ${SUITESPARSE_LIBRARY}) - endif (SUITESPARSE_LIBRARY) - -endif(CHOLMOD_LIBRARIES) - -message(STATUS "CHOLMOD_INCLUDES = ${CHOLMOD_INCLUDES}") -message(STATUS "CHOLMOD_LIBRARIES = ${CHOLMOD_LIBRARIES}") - -include(FindPackageHandleStandardArgs) -find_package_handle_standard_args(CHOLMOD DEFAULT_MSG - CHOLMOD_INCLUDES CHOLMOD_LIBRARIES) - -mark_as_advanced(CHOLMOD_INCLUDES CHOLMOD_LIBRARIES AMD_LIBRARY COLAMD_LIBRARY SUITESPARSE_LIBRARY CAMD_LIBRARY CCOLAMD_LIBRARY CHOLMOD_METIS_LIBRARY) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index a5c590b970..264ce1f6a6 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -165,7 +165,6 @@ include(${rapids-cmake-dir}/cpm/rapids_logger.cmake) rapids_cpm_rapids_logger(BUILD_EXPORT_SET cuopt-exports INSTALL_EXPORT_SET cuopt-exports) create_logger_macros(CUOPT "cuopt::default_logger()" include/cuopt) -find_package(CHOLMOD REQUIRED) find_package(CUDSS REQUIRED) if(BUILD_TESTS) @@ -210,7 +209,6 @@ target_link_options(cuopt PRIVATE "${CUOPT_BINARY_DIR}/fatbin.ld") add_library(cuopt::cuopt ALIAS cuopt) # ################################################################################################## # - include paths --------------------------------------------------------------------------------- -message(STATUS "target include directories CHOLMOD_INCLUDES = ${CHOLMOD_INCLUDES}") message(STATUS "target include directories CUDSS_INCLUDES = ${CUDSS_INCLUDES}") target_include_directories(cuopt PRIVATE @@ -221,7 +219,6 @@ target_include_directories(cuopt "$" "$" "$" - ${CHOLMOD_INCLUDES} ${CUDSS_INCLUDE} ) @@ -248,7 +245,6 @@ target_link_libraries(cuopt CCCL::CCCL raft::raft cuopt::mps_parser - ${CHOLMOD_LIBRARIES} ${CUDSS_LIBRARIES} PRIVATE ${CUOPT_PRIVATE_CUDA_LIBS} diff --git a/cpp/src/dual_simplex/barrier.cpp b/cpp/src/dual_simplex/barrier.cpp index 9a03883784..233c1cb29d 100644 --- a/cpp/src/dual_simplex/barrier.cpp +++ b/cpp/src/dual_simplex/barrier.cpp @@ -108,11 +108,8 @@ class iteration_data_t { settings.log.printf("AAT time %.2fs\n", aat_time); settings.log.printf("AAT nonzeros %e\n", static_cast(ADAT.col_start[lp.num_rows])); - if (!settings.use_cudss) { - chol = std::make_unique>(settings, lp.num_rows); - } else { - chol = std::make_unique>(settings, lp.num_rows); - } + + chol = std::make_unique>(settings, lp.num_rows); chol->set_positive_definite(false); // Perform symbolic analysis @@ -945,7 +942,6 @@ i_t barrier_solver_t::solve(const barrier_solver_settings_t& #ifdef DUAL_SIMPLEX_INSTANTIATE_DOUBLE template class barrier_solver_t; template class sparse_cholesky_base_t; -template class sparse_cholesky_cholmod_t; template class sparse_cholesky_cudss_t; template class iteration_data_t; template class barrier_solver_settings_t; diff --git a/cpp/src/dual_simplex/sparse_cholesky.hpp b/cpp/src/dual_simplex/sparse_cholesky.hpp index 8b0a51492c..9b53b74f07 100644 --- a/cpp/src/dual_simplex/sparse_cholesky.hpp +++ b/cpp/src/dual_simplex/sparse_cholesky.hpp @@ -24,7 +24,6 @@ #include -#include "cholmod.h" #include "cudss.h" namespace cuopt::linear_programming::dual_simplex { @@ -41,130 +40,6 @@ class sparse_cholesky_base_t { }; -template -class sparse_cholesky_cholmod_t : public sparse_cholesky_base_t { - public: - sparse_cholesky_cholmod_t(const simplex_solver_settings_t& settings, i_t size) : n(size), first_factor(true) { - cholmod_start(&common); - int version[3]; - cholmod_version(version); - settings.log.printf("Using CHOLMOD %d.%d.%d\n", version[0], version[1], version[2]); - A = nullptr; - L = nullptr; - } - ~sparse_cholesky_cholmod_t() override{ - cholmod_free_factor(&L, &common); - cholmod_free_sparse(&A, &common); - cholmod_finish(&common); - } - i_t analyze(const csc_matrix_t& A_in) override - { - A = to_cholmod(A_in); - // Perform symbolic analysis - f_t start_symbolic = tic(); - common.nmethods = 1; - common.method[0].ordering = CHOLMOD_AMD; - L = cholmod_analyze(A, &common); - f_t symbolic_time = toc(start_symbolic); - printf("Symbolic method used %d\n", L->ordering); - printf("Symbolic time %.2fs\n", symbolic_time); - printf("Symbolic nonzeros in factor %e\n", common.method[common.selected].lnz); - printf("Symbolic flops %e\n", common.method[common.selected].fl); - return 0; - } - i_t factorize(const csc_matrix_t& A_in) override - { - cholmod_free_sparse(&A, &common); - A = to_cholmod(A_in); - f_t start_numeric = tic(); - cholmod_factorize(A, L, &common); - f_t numeric_time = toc(start_numeric); - if (first_factor) { - printf("Factor nonzeros %e\n", common.method[common.selected].lnz); - printf("Factor time %.2fs\n", numeric_time); - first_factor = false; - } - if (common.status < CHOLMOD_OK) { - printf("Factorization failed\n"); - exit(1); - } - if (((int32_t) L->minor) != A_in.m) { - printf("AA' not positive definite %ld minors versus %d\n", L->minor, A_in.m); - return -1; - } - return 0; - } - i_t solve(const dense_vector_t& b, dense_vector_t& x) override - { - cholmod_dense *b_cholmod = cholmod_zeros(n, 1, CHOLMOD_REAL, &common); - for (i_t i = 0; i < n; i++) { - ((float64_t *)b_cholmod->x)[i] = b[i]; - } - cholmod_dense *x_cholmod = cholmod_solve(CHOLMOD_A, L, b_cholmod, &common); - for (i_t i = 0; i < n; i++) { - x[i] = ((float64_t *)x_cholmod->x)[i]; - } - -#ifdef CHECK_SOLVE - int32_t no_transpose = 0; - float64_t alpha[2] = {1.0, 0.0}; - float64_t beta[2] = {0.0, 0.0}; - cholmod_dense *residual = cholmod_zeros(n, 1, CHOLMOD_REAL, &common); - - cholmod_sdmult(A, no_transpose, alpha, beta, x_cholmod, residual, &common); - for (i_t i = 0; i < n; i++) { - f_t err = std::abs(((float64_t *)residual->x)[i] - b[i]); - if (err > 1e-6) { - printf("Error: L*L'*x[%d] - b[%d] = %e, x[%d] = %e, b[%d] = %e cholmod_b[%d] = %e\n", i, i, err, i, x[i], i, b[i], i, ((float64_t *)b_cholmod->x)[i]); - } - } - beta[0] = -1.0; - cholmod_sdmult(A, no_transpose, alpha, beta, x_cholmod, b_cholmod, &common); - - - printf("|| L*L'*x - b || = %e\n", cholmod_norm_dense(b_cholmod, 0, &common)); -#endif - - cholmod_free_dense(&x_cholmod, &common); - cholmod_free_dense(&b_cholmod, &common); - return 0; - } - - void set_positive_definite(bool positive_definite) override - { - // Do nothing - } - - private: - - cholmod_sparse *to_cholmod(const csc_matrix_t& A_in) - { - i_t nnz = A_in.col_start[A_in.n]; - cholmod_sparse *A = cholmod_allocate_sparse(A_in.m, A_in.n, nnz, 0, 0, 0, CHOLMOD_REAL, &common); - for (i_t j = 0; j <= A_in.n; j++) { - ((int32_t *)A->p)[j] = A_in.col_start[j]; - } - for (i_t j = 0; j < A_in.n; j++) { - ((int32_t *)A->nz)[j] = A_in.col_start[j + 1] - A_in.col_start[j]; - } - for (i_t p = 0; p < nnz; p++) { - ((int32_t *)A->i)[p] = A_in.i[p]; - } - for (i_t p = 0; p < nnz; p++) { - ((float64_t *)A->x)[p] = A_in.x[p]; - } - A->nzmax = nnz; - A->stype = 1; - cholmod_check_sparse(A, &common); - return A; - } - i_t n; - bool first_factor; - cholmod_sparse *A; - cholmod_factor *L; - cholmod_common common; -}; - #define CUDSS_EXAMPLE_FREE do {} while(0) #define CUDA_CALL_AND_CHECK(call, msg) \ From 8e1f510904e0efc5c36003aa3d319bcbd1bbd7ef Mon Sep 17 00:00:00 2001 From: Ramakrishna Prabhu Date: Tue, 12 Aug 2025 13:24:59 -0500 Subject: [PATCH 09/31] fix wheel --- ci/build_wheel_libcuopt.sh | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/ci/build_wheel_libcuopt.sh b/ci/build_wheel_libcuopt.sh index e69cbe78c4..70f3dd6421 100755 --- a/ci/build_wheel_libcuopt.sh +++ b/ci/build_wheel_libcuopt.sh @@ -32,12 +32,13 @@ else fi # Install cudss -if command -v dnf &> /dev/null; then - dnf install libcudss-dev -y -else - echo "dnf not found, skipping cudss installation" - exit 1 -fi + +# Clean metadata & install cudss +dnf clean all +# Addding static library just to please CMAKE requirements +dnf -y install libcudss0-static-cuda-12 libcudss0-devel-cuda-12 libcudss0-cuda-12 + +# dnf -y install libcudss0-devel-cuda-12 rapids-logger "Generating build requirements" From 2fdb89981ad5a37b56b251009c2d1fb2191610eb Mon Sep 17 00:00:00 2001 From: Ramakrishna Prabhu Date: Tue, 12 Aug 2025 17:07:01 -0500 Subject: [PATCH 10/31] remove run export --- ci/build_wheel_libcuopt.sh | 15 +++++++++++---- conda/recipes/libcuopt/recipe.yaml | 3 --- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/ci/build_wheel_libcuopt.sh b/ci/build_wheel_libcuopt.sh index 70f3dd6421..e1c920968d 100755 --- a/ci/build_wheel_libcuopt.sh +++ b/ci/build_wheel_libcuopt.sh @@ -34,10 +34,17 @@ fi # Install cudss # Clean metadata & install cudss -dnf clean all -# Addding static library just to please CMAKE requirements -dnf -y install libcudss0-static-cuda-12 libcudss0-devel-cuda-12 libcudss0-cuda-12 - +if command -v dnf &> /dev/null; then + dnf clean all + # Adding static library just to please CMAKE requirements + dnf -y install libcudss0-static-cuda-12 libcudss0-devel-cuda-12 libcudss0-cuda-12 +elif command -v apt-get &> /dev/null; then + apt-get update + apt-get install -y libcudss-devel +else + echo "Neither dnf nor apt-get found. Cannot install cudss dependencies." + exit 1 +fi # dnf -y install libcudss0-devel-cuda-12 diff --git a/conda/recipes/libcuopt/recipe.yaml b/conda/recipes/libcuopt/recipe.yaml index daf065c3b5..79d2539773 100644 --- a/conda/recipes/libcuopt/recipe.yaml +++ b/conda/recipes/libcuopt/recipe.yaml @@ -95,7 +95,6 @@ outputs: - cuda-nvtx - cuda-version - gtest - - libcudss - libcurand - libcusparse - librmm @@ -146,7 +145,6 @@ outputs: - cuda-nvtx - cuda-version - gtest - - libcudss - libcurand - libcusparse - librmm @@ -194,7 +192,6 @@ outputs: - cuda-nvtx - cuda-version - gtest - - libcudss - libcurand - libcusparse - librmm From 17a6a2b3144cc4bbb7bd622f8cce4bf69c7f70bf Mon Sep 17 00:00:00 2001 From: Ramakrishna Prabhu Date: Tue, 12 Aug 2025 19:17:54 -0500 Subject: [PATCH 11/31] add config --- cpp/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index a929805de3..f828513770 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -228,7 +228,7 @@ list(PREPEND CUOPT_PRIVATE_CUDA_LIBS CUDA::cublasLt) add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/libmps_parser) set(CMAKE_LIBRARY_PATH ${CMAKE_CURRENT_BINARY_DIR}/libmps_parser/) -find_package(cudss REQUIRED) +find_package(cudss REQUIRED CONFIG) target_link_libraries(cuopt PUBLIC From 793199c514400be93e77d8f10595e4a5bb9e80df Mon Sep 17 00:00:00 2001 From: Christopher Maes Date: Tue, 12 Aug 2025 17:36:07 -0700 Subject: [PATCH 12/31] Relative tolerance and new mu in corrector step Now solving 90/93 NETLIB LPs Multiple changes: 1) Switch to termination criteria based on relative (rather than absolute) tolerances. Set the default relative tolerance to 1e-8. 2) Use mu affine, rather than mu, in the rhs of the corrector step 3) Disable dependent row checks in presolve 4) Add iteration limits and time limit checks 5) Add method to print out dense columns 6) Add PCG (currently disabled) --- cpp/src/dual_simplex/barrier.cpp | 228 ++++++++++++++++++++--- cpp/src/dual_simplex/barrier.hpp | 6 +- cpp/src/dual_simplex/presolve.cpp | 2 +- cpp/src/dual_simplex/sparse_cholesky.hpp | 2 + 4 files changed, 208 insertions(+), 30 deletions(-) diff --git a/cpp/src/dual_simplex/barrier.cpp b/cpp/src/dual_simplex/barrier.cpp index 233c1cb29d..9efc8f866a 100644 --- a/cpp/src/dual_simplex/barrier.cpp +++ b/cpp/src/dual_simplex/barrier.cpp @@ -23,6 +23,8 @@ #include #include +#include + namespace cuopt::linear_programming::dual_simplex { @@ -94,6 +96,8 @@ class iteration_data_t { inv_diag.sqrt(inv_sqrt_diag); } + column_density(lp.A, settings); + // Copy A into AD AD = lp.A; // Perform column scaling on A to get AD^(-1) @@ -116,6 +120,43 @@ class iteration_data_t { chol->analyze(ADAT); } + void column_density(const csc_matrix_t& A, const simplex_solver_settings_t& settings) + { + const i_t m = A.m; + const i_t n = A.n; + if (n < 2) { + return; + } + std::vector density(n); + for (i_t j = 0; j < n; j++) { + const i_t nnz = A.col_start[j+1] - A.col_start[j]; + density[j] = static_cast(nnz); + } + + std::vector permutation(n); + std::iota(permutation.begin(), permutation.end(), 0); + std::sort(permutation.begin(), permutation.end(), [&density](i_t i, i_t j) { + return density[i] < density[j]; + }); + + std::vector columns_to_keep; + columns_to_keep.reserve(n); + + f_t nnz = density[permutation[0]]; + columns_to_keep.push_back(permutation[0]); + for (i_t i = 1; i < n; i++) { + const i_t j = permutation[i]; + const f_t density_j = density[j]; + + if (i > n - 5 && density_j > 10.0) { + settings.log.printf("Dense column %6d nz %6d density %.2e nnz %.2e\n", j, static_cast(density_j), density_j / m, nnz + density_j * density_j); + } + nnz += density_j * density_j; + + columns_to_keep.push_back(j); + } + } + void scatter_upper_bounds(const dense_vector_t& y, dense_vector_t& z) { if (n_upper_bounds > 0) { @@ -136,6 +177,86 @@ class iteration_data_t { } } + void preconditioned_conjugate_gradient(const simplex_solver_settings_t& settings, const dense_vector_t& b, f_t tolerance, dense_vector_t& xinout) const + { + dense_vector_t residual = b; + dense_vector_t y(ADAT.n); + + dense_vector_t x = xinout; + + const csc_matrix_t& A = ADAT; + + // r = A*x - b + matrix_vector_multiply(A, 1.0, x, -1.0, residual); + + // Solve M y = r for y + chol->solve(residual, y); + + dense_vector_t p = y; + p.multiply_scalar(-1.0); + + dense_vector_t Ap(A.n); + i_t iter = 0; + f_t norm_residual = vector_norm2(residual); + f_t initial_norm_residual = norm_residual; + settings.log.printf("PCG initial residual 2-norm %e inf-norm %e\n", norm_residual, vector_norm_inf(residual)); + + f_t rTy = residual.inner_product(y); + + while (norm_residual > tolerance && iter < 100) { + // Compute Ap = A * p + matrix_vector_multiply(A, 1.0, p, 0.0, Ap); + + // Compute alpha = (r^T * y) / (p^T * Ap) + f_t alpha = rTy / p.inner_product(Ap); + + // Update residual = residual + alpha * Ap + residual.axpy(alpha, Ap, 1.0); + + f_t new_residual = vector_norm2(residual); + if (new_residual > 1.1 * norm_residual || new_residual > 1.1 * initial_norm_residual) { + settings.log.printf("PCG residual increased by more than 10%%. New %e > %e\n", new_residual, norm_residual); + break; + } + norm_residual = new_residual; + + // Update x = x + alpha * p + x.axpy(alpha, p, 1.0); + + + // residual = A*x - b + residual = b; + matrix_vector_multiply(A, 1.0, x, -1.0, residual); + norm_residual = vector_norm2(residual); + + // Solve M y = r for y + chol->solve(residual, y); + + // Compute beta = (r_+^T y_+) / (r^T y) + f_t r1Ty1 = residual.inner_product(y); + f_t beta = r1Ty1 / rTy; + + rTy = r1Ty1; + + // Update p = -y + beta * p + p.axpy(-1.0, y, beta); + + iter++; + settings.log.printf("PCG iter %3d 2-norm_residual %.2e inf-norm_residual %.2e\n", iter, norm_residual, vector_norm_inf(residual)); + } + + residual = b; + matrix_vector_multiply(A, 1.0, x, -1.0, residual); + norm_residual = vector_norm2(residual); + if (norm_residual < initial_norm_residual) { + settings.log.printf("PCG improved residual 2-norm %.2e/%.2e in %d iterations\n", norm_residual, initial_norm_residual, iter); + xinout = x; + } else + { + settings.log.printf("Rejecting PCG solution\n"); + } + } + i_t n_upper_bounds; dense_vector_t upper_bounds; dense_vector_t c; @@ -186,6 +307,7 @@ class iteration_data_t { }; + template void barrier_solver_t::initial_point(iteration_data_t& data) { @@ -480,6 +602,15 @@ i_t barrier_solver_t::compute_search_direction(iteration_data_t 1e-10) { + settings.log.printf("Running PCG to improve residual 2-norm %e inf-norm %e\n", vector_norm2(y_residual), y_residual_norm); + data.preconditioned_conjugate_gradient(settings, h, std::min(y_residual_norm / 100.0, 1e-6), dy); + y_residual = h; + matrix_vector_multiply(data.ADAT, 1.0, dy, -1.0, y_residual); + const f_t y_residual_norm_pcg = vector_norm_inf(y_residual); + settings.log.printf("PCG improved residual || ADAT * dy - h || = %.2e\n", y_residual_norm_pcg); + } + // dx = dinv .* (A'*dy - dual_rhs + complementarity_xz_rhs ./ x - E *((complementarity_wv_rhs - v // .* bound_rhs) ./ w)) //r1 <- A'*dy - r1 @@ -723,9 +854,17 @@ i_t barrier_solver_t::solve(const barrier_solver_settings_t& } } iteration_data_t data(lp, num_upper_bounds, settings); + if (toc(start_time) > settings.time_limit) { + settings.log.printf("Barrier time limit exceeded\n"); + return -1; + } settings.log.printf("%d finite upper bounds\n", num_upper_bounds); initial_point(data); + if (toc(start_time) > settings.time_limit) { + settings.log.printf("Barrier time limit exceeded\n"); + return -1; + } compute_residuals(data.w, data.x, data.y, data.v, data.z, data); f_t primal_residual_norm = std::max(vector_norm_inf(data.primal_residual), @@ -734,9 +873,16 @@ i_t barrier_solver_t::solve(const barrier_solver_settings_t& f_t complementarity_residual_norm = std::max(vector_norm_inf(data.complementarity_xz_residual), vector_norm_inf(data.complementarity_wv_residual)); f_t mu = (data.complementarity_xz_residual.sum() + data.complementarity_wv_residual.sum()) / (static_cast(n) + static_cast(num_upper_bounds)); - settings.log.printf("Initial mu %e\n", mu); + + f_t norm_b = vector_norm_inf(data.b); + f_t norm_c = vector_norm_inf(data.c); + f_t primal_objective = data.c.inner_product(data.x); + f_t relative_primal_residual = primal_residual_norm / (1.0 + norm_b); + f_t relative_dual_residual = dual_residual_norm / (1.0 + norm_c); + f_t relative_complementarity_residual = complementarity_residual_norm / (1.0 + std::abs(primal_objective)); + dense_vector_t restrict_u(num_upper_bounds); dense_vector_t upper(lp.upper); data.gather_upper_bounds(upper, restrict_u); @@ -757,7 +903,13 @@ i_t barrier_solver_t::solve(const barrier_solver_settings_t& dense_vector_t v_save = data.v; dense_vector_t z_save = data.z; - while (iter < options.iteration_limit) { + const i_t iteration_limit = std::min(options.iteration_limit, settings.iteration_limit); + + while (iter < iteration_limit) { + if (toc(start_time) > settings.time_limit) { + settings.log.printf("Barrier time limit exceeded\n"); + return -1; + } // Compute the affine step data.primal_rhs = data.primal_residual; data.bound_rhs = data.bound_residual; @@ -772,16 +924,21 @@ i_t barrier_solver_t::solve(const barrier_solver_settings_t& f_t max_affine_residual = 0.0; i_t status = compute_search_direction(data, data.dw_aff, data.dx_aff, data.dy_aff, data.dv_aff, data.dz_aff, max_affine_residual); if (status < 0) { - if (primal_residual_norm < 100 * options.feasibility_tol && - dual_residual_norm < 100 * options.optimality_tol && - complementarity_residual_norm < 100 * options.complementarity_tol) { + if (relative_primal_residual < 100 * options.feasibility_tol && + relative_dual_residual < 100 * options.optimality_tol && + relative_complementarity_residual < 100 * options.complementarity_tol) { settings.log.printf("Suboptimal solution found\n"); return 1; } compute_residual_norms(w_save, x_save, y_save, v_save, z_save, data, primal_residual_norm, dual_residual_norm, complementarity_residual_norm); - if (primal_residual_norm < 100 * options.feasibility_tol && - dual_residual_norm < 100 * options.optimality_tol && - complementarity_residual_norm < 100 * options.complementarity_tol) { + primal_objective = data.c.inner_product(x_save); + relative_primal_residual = primal_residual_norm / (1.0 + norm_b); + relative_dual_residual = dual_residual_norm / (1.0 + norm_c); + relative_complementarity_residual = complementarity_residual_norm / (1.0 + std::abs(primal_objective)); + + if (relative_primal_residual < 100 * options.feasibility_tol && + relative_dual_residual < 100 * options.optimality_tol && + relative_complementarity_residual < 100 * options.complementarity_tol) { settings.log.printf("Restoring previous solution: primal %.2e dual %.2e complementarity %.2e\n", primal_residual_norm, dual_residual_norm, complementarity_residual_norm); data.w = w_save; data.x = x_save; @@ -794,6 +951,10 @@ i_t barrier_solver_t::solve(const barrier_solver_settings_t& settings.log.printf("Search direction computation failed\n"); return -1; } + if (toc(start_time) > settings.time_limit) { + settings.log.printf("Barrier time limit exceeded\n"); + return -1; + } f_t step_primal_aff = std::min(max_step_to_boundary(data.w, data.dw_aff), max_step_to_boundary(data.x, data.dx_aff)); f_t step_dual_aff = std::min(max_step_to_boundary(data.v, data.dv_aff), max_step_to_boundary(data.z, data.dz_aff)); @@ -815,8 +976,11 @@ i_t barrier_solver_t::solve(const barrier_solver_settings_t& dense_vector_t complementarity_wv_aff(num_upper_bounds); x_aff.pairwise_product(z_aff, complementarity_xz_aff); w_aff.pairwise_product(v_aff, complementarity_wv_aff); - f_t mu_aff = (complementarity_xz_aff.sum() + complementarity_wv_aff.sum()) / (static_cast(n) + static_cast(num_upper_bounds)); + + f_t complementarity_aff_sum = complementarity_xz_aff.sum() + complementarity_wv_aff.sum(); + f_t mu_aff = (complementarity_aff_sum) / (static_cast(n) + static_cast(num_upper_bounds)); f_t sigma = std::max(0.0, std::min(1.0, std::pow(mu_aff / mu, 3.0))); + f_t new_mu = sigma * mu_aff; // Compute the combined centering corrector step data.primal_rhs.set_scalar(0.0); @@ -825,12 +989,12 @@ i_t barrier_solver_t::solve(const barrier_solver_settings_t& // complementarity_xz_rhs = -dx_aff .* dz_aff + sigma * mu data.dx_aff.pairwise_product(data.dz_aff, data.complementarity_xz_rhs); data.complementarity_xz_rhs.multiply_scalar(-1.0); - data.complementarity_xz_rhs.add_scalar(sigma * mu); + data.complementarity_xz_rhs.add_scalar(new_mu); // complementarity_wv_rhs = -dw_aff .* dv_aff + sigma * mu data.dw_aff.pairwise_product(data.dv_aff, data.complementarity_wv_rhs); data.complementarity_wv_rhs.multiply_scalar(-1.0); - data.complementarity_wv_rhs.add_scalar(sigma * mu); + data.complementarity_wv_rhs.add_scalar(new_mu); f_t max_corrector_residual = 0.0; status = compute_search_direction(data, data.dw, data.dx, data.dy, data.dv, data.dz, max_corrector_residual); @@ -845,6 +1009,10 @@ i_t barrier_solver_t::solve(const barrier_solver_settings_t& return -1; } data.has_factorization = false; + if (toc(start_time) > settings.time_limit) { + settings.log.printf("Barrier time limit exceeded\n"); + return -1; + } // dw = dw_aff + dw_cc // dx = dx_aff + dx_cc @@ -888,9 +1056,18 @@ i_t barrier_solver_t::solve(const barrier_solver_settings_t& compute_residual_norms(data.w, data.x, data.y, data.v, data.z, data, primal_residual_norm, dual_residual_norm, complementarity_residual_norm); - if (primal_residual_norm < 100 * options.feasibility_tol && - dual_residual_norm < 100 * options.optimality_tol && - complementarity_residual_norm < 100 * options.complementarity_tol) { + mu = (data.complementarity_xz_residual.sum() + data.complementarity_wv_residual.sum()) / (static_cast(n) + static_cast(num_upper_bounds)); + + primal_objective = data.c.inner_product(data.x); + dual_objective = data.b.inner_product(data.y) - restrict_u.inner_product(data.v); + + relative_primal_residual = primal_residual_norm / (1.0 + norm_b); + relative_dual_residual = dual_residual_norm / (1.0 + norm_c); + relative_complementarity_residual = complementarity_residual_norm / (1.0 + std::abs(primal_objective)); + + if (relative_primal_residual < 100 * options.feasibility_tol && + relative_dual_residual < 100 * options.optimality_tol && + relative_complementarity_residual < 100 * options.complementarity_tol) { w_save = data.w; x_save = data.x; y_save = data.y; @@ -898,10 +1075,6 @@ i_t barrier_solver_t::solve(const barrier_solver_settings_t& z_save = data.z; } - mu = (data.complementarity_xz_residual.sum() + data.complementarity_wv_residual.sum()) / (static_cast(n) + static_cast(num_upper_bounds)); - - primal_objective = data.c.inner_product(data.x); - dual_objective = data.b.inner_product(data.y) - restrict_u.inner_product(data.v); iter++; elapsed_time = toc(start_time); @@ -914,9 +1087,9 @@ i_t barrier_solver_t::solve(const barrier_solver_settings_t& iter, primal_objective + lp.obj_constant, dual_objective + lp.obj_constant, - primal_residual_norm, - dual_residual_norm, - complementarity_residual_norm, + relative_primal_residual, + relative_dual_residual, + relative_complementarity_residual, step_primal, step_dual, std::min(data.complementarity_xz_residual.minimum(), data.complementarity_wv_residual.minimum()), @@ -924,15 +1097,18 @@ i_t barrier_solver_t::solve(const barrier_solver_settings_t& std::max(max_affine_residual, max_corrector_residual), elapsed_time); - bool primal_feasible = primal_residual_norm < options.feasibility_tol; - bool dual_feasible = dual_residual_norm < options.optimality_tol; - bool small_gap = complementarity_residual_norm < options.complementarity_tol; + bool primal_feasible = relative_primal_residual < options.feasibility_tol; + bool dual_feasible = relative_dual_residual < options.optimality_tol; + bool small_gap = relative_complementarity_residual < options.complementarity_tol; converged = primal_feasible && dual_feasible && small_gap; if (converged) { - settings.log.printf("Optimal solution found\n"); - settings.log.printf("Optimal objective value %.12e\n", primal_objective + lp.obj_constant); + settings.log.printf("Optimal solution found in %d iterations and %.2fs\n", iter, toc(start_time)); + settings.log.printf("Objective %+.8e\n", primal_objective + lp.obj_constant); + settings.log.printf("Primal infeasibility (abs/rel): %8.2e/%8.2e\n", primal_residual_norm, relative_primal_residual); + settings.log.printf("Dual infeasibility (abs/rel): %8.2e/%8.2e\n", dual_residual_norm, relative_dual_residual); + settings.log.printf("Complementarity gap (abs/rel): %8.2e/%8.2e\n", complementarity_residual_norm, relative_complementarity_residual); return 0; } } diff --git a/cpp/src/dual_simplex/barrier.hpp b/cpp/src/dual_simplex/barrier.hpp index d3d88c0fb0..fdb7d803ef 100644 --- a/cpp/src/dual_simplex/barrier.hpp +++ b/cpp/src/dual_simplex/barrier.hpp @@ -27,9 +27,9 @@ namespace cuopt::linear_programming::dual_simplex { template struct barrier_solver_settings_t { - f_t feasibility_tol = 1e-6; - f_t optimality_tol = 1e-6; - f_t complementarity_tol = 1e-6; + f_t feasibility_tol = 1e-8; + f_t optimality_tol = 1e-8; + f_t complementarity_tol = 1e-8; i_t iteration_limit = 1000; f_t step_scale = 0.9; }; diff --git a/cpp/src/dual_simplex/presolve.cpp b/cpp/src/dual_simplex/presolve.cpp index e2537e5a11..9c366d9830 100644 --- a/cpp/src/dual_simplex/presolve.cpp +++ b/cpp/src/dual_simplex/presolve.cpp @@ -857,7 +857,7 @@ i_t presolve(const lp_problem_t& original, } // Check for dependent rows - bool check_dependent_rows = settings.barrier; + bool check_dependent_rows = false; //settings.barrier; if (check_dependent_rows) { std::vector dependent_rows; constexpr i_t kOk = -1; diff --git a/cpp/src/dual_simplex/sparse_cholesky.hpp b/cpp/src/dual_simplex/sparse_cholesky.hpp index 9b53b74f07..73e50658d5 100644 --- a/cpp/src/dual_simplex/sparse_cholesky.hpp +++ b/cpp/src/dual_simplex/sparse_cholesky.hpp @@ -108,9 +108,11 @@ class sparse_cholesky_cudss_t : public sparse_cholesky_base_t { &reorder_alg, sizeof(cudssAlgType_t)), status, "cudssConfigSet for reordering alg"); #endif +#if 0 int32_t ir_n_steps = 2; CUDSS_CALL_AND_CHECK_EXIT(cudssConfigSet(solverConfig, CUDSS_CONFIG_IR_N_STEPS, &ir_n_steps, sizeof(int32_t)), status, "cudssConfigSet for ir n steps"); +#endif // Device pointers csr_offset_d = nullptr; From b765e3ba8115159d12e7a3ad492367db0b5915d9 Mon Sep 17 00:00:00 2001 From: Christopher Maes Date: Tue, 12 Aug 2025 17:53:58 -0700 Subject: [PATCH 13/31] Return suboptimal if corrector step fails --- cpp/src/dual_simplex/barrier.cpp | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/cpp/src/dual_simplex/barrier.cpp b/cpp/src/dual_simplex/barrier.cpp index 9efc8f866a..db60c2bac6 100644 --- a/cpp/src/dual_simplex/barrier.cpp +++ b/cpp/src/dual_simplex/barrier.cpp @@ -947,6 +947,8 @@ i_t barrier_solver_t::solve(const barrier_solver_settings_t& data.z = z_save; settings.log.printf("Suboptimal solution found\n"); return 1; + } else { + settings.log.printf("Primal residual %.2e dual residual %.2e complementarity residual %.2e\n", relative_primal_residual, relative_dual_residual, relative_complementarity_residual); } settings.log.printf("Search direction computation failed\n"); return -1; @@ -999,12 +1001,32 @@ i_t barrier_solver_t::solve(const barrier_solver_settings_t& f_t max_corrector_residual = 0.0; status = compute_search_direction(data, data.dw, data.dx, data.dy, data.dv, data.dz, max_corrector_residual); if (status < 0) { - if (primal_residual_norm < 100 * options.feasibility_tol && - dual_residual_norm < 100 * options.optimality_tol && - complementarity_residual_norm < 100 * options.complementarity_tol) { + if (relative_primal_residual < 100 * options.feasibility_tol && + relative_dual_residual < 100 * options.optimality_tol && + relative_complementarity_residual < 100 * options.complementarity_tol) { settings.log.printf("Suboptimal solution found\n"); return 1; } + compute_residual_norms(w_save, x_save, y_save, v_save, z_save, data, primal_residual_norm, dual_residual_norm, complementarity_residual_norm); + primal_objective = data.c.inner_product(x_save); + relative_primal_residual = primal_residual_norm / (1.0 + norm_b); + relative_dual_residual = dual_residual_norm / (1.0 + norm_c); + relative_complementarity_residual = complementarity_residual_norm / (1.0 + std::abs(primal_objective)); + + if (relative_primal_residual < 100 * options.feasibility_tol && + relative_dual_residual < 100 * options.optimality_tol && + relative_complementarity_residual < 100 * options.complementarity_tol) { + settings.log.printf("Restoring previous solution: primal %.2e dual %.2e complementarity %.2e\n", primal_residual_norm, dual_residual_norm, complementarity_residual_norm); + data.w = w_save; + data.x = x_save; + data.y = y_save; + data.v = v_save; + data.z = z_save; + settings.log.printf("Suboptimal solution found\n"); + return 1; + } else { + settings.log.printf("Primal residual %.2e dual residual %.2e complementarity residual %.2e\n", relative_primal_residual, relative_dual_residual, relative_complementarity_residual); + } settings.log.printf("Search direction computation failed\n"); return -1; } From 67c81a61dcfea3213c59cca6c6dae96591d10fb0 Mon Sep 17 00:00:00 2001 From: Christopher Maes Date: Tue, 12 Aug 2025 17:55:28 -0700 Subject: [PATCH 14/31] Style fixes --- cmake/FindCUDSS.cmake | 13 + .../cuopt/linear_programming/constants.h | 2 +- cpp/src/dual_simplex/barrier.cpp | 1198 +++++++++-------- cpp/src/dual_simplex/barrier.hpp | 15 +- cpp/src/dual_simplex/dense_vector.hpp | 23 +- cpp/src/dual_simplex/presolve.cpp | 87 +- .../dual_simplex/simplex_solver_settings.hpp | 10 +- cpp/src/dual_simplex/solve.cpp | 10 +- cpp/src/dual_simplex/sparse_cholesky.hpp | 528 ++++---- cpp/src/linear_programming/solve.cu | 5 +- 10 files changed, 990 insertions(+), 901 deletions(-) diff --git a/cmake/FindCUDSS.cmake b/cmake/FindCUDSS.cmake index 6f26e6bd2a..9e1e110cb0 100644 --- a/cmake/FindCUDSS.cmake +++ b/cmake/FindCUDSS.cmake @@ -1,3 +1,16 @@ +# ============================================================================= +# Copyright (c) 2025, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing permissions and limitations under +# the License. +# ============================================================================= if (CUDSS_INCLUDE AND CUDSS_LIBRARIES) set(CUDSS_FIND_QUIETLY TRUE) diff --git a/cpp/include/cuopt/linear_programming/constants.h b/cpp/include/cuopt/linear_programming/constants.h index 55fe544758..250d9f8219 100644 --- a/cpp/include/cuopt/linear_programming/constants.h +++ b/cpp/include/cuopt/linear_programming/constants.h @@ -104,7 +104,7 @@ #define CUOPT_METHOD_CONCURRENT 0 #define CUOPT_METHOD_PDLP 1 #define CUOPT_METHOD_DUAL_SIMPLEX 2 -#define CUOPT_METHOD_BARRIER 3 +#define CUOPT_METHOD_BARRIER 3 /* @brief Status codes constants */ #define CUOPT_SUCCESS 0 diff --git a/cpp/src/dual_simplex/barrier.cpp b/cpp/src/dual_simplex/barrier.cpp index db60c2bac6..cb9d6fa7da 100644 --- a/cpp/src/dual_simplex/barrier.cpp +++ b/cpp/src/dual_simplex/barrier.cpp @@ -17,15 +17,13 @@ #include - -#include #include +#include #include #include #include - namespace cuopt::linear_programming::dual_simplex { template @@ -33,7 +31,7 @@ class iteration_data_t { public: iteration_data_t(const lp_problem_t& lp, i_t num_upper_bounds, - const simplex_solver_settings_t& settings) + const simplex_solver_settings_t& settings) : upper_bounds(num_upper_bounds), c(lp.objective), b(lp.rhs), @@ -70,31 +68,24 @@ class iteration_data_t { dz(lp.num_cols), has_factorization(false) { - // Create the upper bounds vector n_upper_bounds = 0; for (i_t j = 0; j < lp.num_cols; j++) { - if (lp.upper[j] < inf) { - upper_bounds[n_upper_bounds++] = j; - } + if (lp.upper[j] < inf) { upper_bounds[n_upper_bounds++] = j; } } settings.log.printf("n_upper_bounds %d\n", n_upper_bounds); // Form A*Dinv*A' diag.set_scalar(1.0); if (n_upper_bounds > 0) { - for (i_t k = 0; k < n_upper_bounds; k++) { - i_t j = upper_bounds[k]; - diag[j] = 2.0; - } + for (i_t k = 0; k < n_upper_bounds; k++) { + i_t j = upper_bounds[k]; + diag[j] = 2.0; + } } inv_diag.set_scalar(1.0); - if (n_upper_bounds > 0) { - diag.inverse(inv_diag); - } + if (n_upper_bounds > 0) { diag.inverse(inv_diag); } inv_sqrt_diag.set_scalar(1.0); - if (n_upper_bounds > 0) { - inv_diag.sqrt(inv_sqrt_diag); - } + if (n_upper_bounds > 0) { inv_diag.sqrt(inv_sqrt_diag); } column_density(lp.A, settings); @@ -112,7 +103,6 @@ class iteration_data_t { settings.log.printf("AAT time %.2fs\n", aat_time); settings.log.printf("AAT nonzeros %e\n", static_cast(ADAT.col_start[lp.num_rows])); - chol = std::make_unique>(settings, lp.num_rows); chol->set_positive_definite(false); @@ -120,17 +110,16 @@ class iteration_data_t { chol->analyze(ADAT); } - void column_density(const csc_matrix_t& A, const simplex_solver_settings_t& settings) + void column_density(const csc_matrix_t& A, + const simplex_solver_settings_t& settings) { const i_t m = A.m; const i_t n = A.n; - if (n < 2) { - return; - } + if (n < 2) { return; } std::vector density(n); for (i_t j = 0; j < n; j++) { - const i_t nnz = A.col_start[j+1] - A.col_start[j]; - density[j] = static_cast(nnz); + const i_t nnz = A.col_start[j + 1] - A.col_start[j]; + density[j] = static_cast(nnz); } std::vector permutation(n); @@ -145,17 +134,21 @@ class iteration_data_t { f_t nnz = density[permutation[0]]; columns_to_keep.push_back(permutation[0]); for (i_t i = 1; i < n; i++) { - const i_t j = permutation[i]; + const i_t j = permutation[i]; const f_t density_j = density[j]; if (i > n - 5 && density_j > 10.0) { - settings.log.printf("Dense column %6d nz %6d density %.2e nnz %.2e\n", j, static_cast(density_j), density_j / m, nnz + density_j * density_j); + settings.log.printf("Dense column %6d nz %6d density %.2e nnz %.2e\n", + j, + static_cast(density_j), + density_j / m, + nnz + density_j * density_j); } nnz += density_j * density_j; columns_to_keep.push_back(j); } - } + } void scatter_upper_bounds(const dense_vector_t& y, dense_vector_t& z) { @@ -177,7 +170,10 @@ class iteration_data_t { } } - void preconditioned_conjugate_gradient(const simplex_solver_settings_t& settings, const dense_vector_t& b, f_t tolerance, dense_vector_t& xinout) const + void preconditioned_conjugate_gradient(const simplex_solver_settings_t& settings, + const dense_vector_t& b, + f_t tolerance, + dense_vector_t& xinout) const { dense_vector_t residual = b; dense_vector_t y(ADAT.n); @@ -196,218 +192,221 @@ class iteration_data_t { p.multiply_scalar(-1.0); dense_vector_t Ap(A.n); - i_t iter = 0; - f_t norm_residual = vector_norm2(residual); + i_t iter = 0; + f_t norm_residual = vector_norm2(residual); f_t initial_norm_residual = norm_residual; - settings.log.printf("PCG initial residual 2-norm %e inf-norm %e\n", norm_residual, vector_norm_inf(residual)); + settings.log.printf("PCG initial residual 2-norm %e inf-norm %e\n", + norm_residual, + vector_norm_inf(residual)); f_t rTy = residual.inner_product(y); while (norm_residual > tolerance && iter < 100) { - // Compute Ap = A * p - matrix_vector_multiply(A, 1.0, p, 0.0, Ap); - - // Compute alpha = (r^T * y) / (p^T * Ap) - f_t alpha = rTy / p.inner_product(Ap); + // Compute Ap = A * p + matrix_vector_multiply(A, 1.0, p, 0.0, Ap); - // Update residual = residual + alpha * Ap - residual.axpy(alpha, Ap, 1.0); + // Compute alpha = (r^T * y) / (p^T * Ap) + f_t alpha = rTy / p.inner_product(Ap); - f_t new_residual = vector_norm2(residual); - if (new_residual > 1.1 * norm_residual || new_residual > 1.1 * initial_norm_residual) { - settings.log.printf("PCG residual increased by more than 10%%. New %e > %e\n", new_residual, norm_residual); - break; - } - norm_residual = new_residual; + // Update residual = residual + alpha * Ap + residual.axpy(alpha, Ap, 1.0); - // Update x = x + alpha * p - x.axpy(alpha, p, 1.0); + f_t new_residual = vector_norm2(residual); + if (new_residual > 1.1 * norm_residual || new_residual > 1.1 * initial_norm_residual) { + settings.log.printf( + "PCG residual increased by more than 10%%. New %e > %e\n", new_residual, norm_residual); + break; + } + norm_residual = new_residual; + // Update x = x + alpha * p + x.axpy(alpha, p, 1.0); - // residual = A*x - b - residual = b; - matrix_vector_multiply(A, 1.0, x, -1.0, residual); - norm_residual = vector_norm2(residual); + // residual = A*x - b + residual = b; + matrix_vector_multiply(A, 1.0, x, -1.0, residual); + norm_residual = vector_norm2(residual); - // Solve M y = r for y - chol->solve(residual, y); + // Solve M y = r for y + chol->solve(residual, y); - // Compute beta = (r_+^T y_+) / (r^T y) - f_t r1Ty1 = residual.inner_product(y); - f_t beta = r1Ty1 / rTy; + // Compute beta = (r_+^T y_+) / (r^T y) + f_t r1Ty1 = residual.inner_product(y); + f_t beta = r1Ty1 / rTy; - rTy = r1Ty1; + rTy = r1Ty1; - // Update p = -y + beta * p - p.axpy(-1.0, y, beta); + // Update p = -y + beta * p + p.axpy(-1.0, y, beta); - iter++; - settings.log.printf("PCG iter %3d 2-norm_residual %.2e inf-norm_residual %.2e\n", iter, norm_residual, vector_norm_inf(residual)); + iter++; + settings.log.printf("PCG iter %3d 2-norm_residual %.2e inf-norm_residual %.2e\n", + iter, + norm_residual, + vector_norm_inf(residual)); } residual = b; matrix_vector_multiply(A, 1.0, x, -1.0, residual); norm_residual = vector_norm2(residual); if (norm_residual < initial_norm_residual) { - settings.log.printf("PCG improved residual 2-norm %.2e/%.2e in %d iterations\n", norm_residual, initial_norm_residual, iter); - xinout = x; - } else - { - settings.log.printf("Rejecting PCG solution\n"); + settings.log.printf("PCG improved residual 2-norm %.2e/%.2e in %d iterations\n", + norm_residual, + initial_norm_residual, + iter); + xinout = x; + } else { + settings.log.printf("Rejecting PCG solution\n"); } } - i_t n_upper_bounds; - dense_vector_t upper_bounds; - dense_vector_t c; - dense_vector_t b; - - dense_vector_t w; - dense_vector_t x; - dense_vector_t y; - dense_vector_t v; - dense_vector_t z; - - dense_vector_t diag; - dense_vector_t inv_diag; - dense_vector_t inv_sqrt_diag; - - csc_matrix_t AD; - csc_matrix_t AT; - csc_matrix_t ADAT; - - std::unique_ptr> chol; - - - bool has_factorization; - - dense_vector_t primal_residual; - dense_vector_t bound_residual; - dense_vector_t dual_residual; - dense_vector_t complementarity_xz_residual; - dense_vector_t complementarity_wv_residual; - - dense_vector_t primal_rhs; - dense_vector_t bound_rhs; - dense_vector_t dual_rhs; - dense_vector_t complementarity_xz_rhs; - dense_vector_t complementarity_wv_rhs; - - dense_vector_t dw_aff; - dense_vector_t dx_aff; - dense_vector_t dy_aff; - dense_vector_t dv_aff; - dense_vector_t dz_aff; - - dense_vector_t dw; - dense_vector_t dx; - dense_vector_t dy; - dense_vector_t dv; - dense_vector_t dz; + i_t n_upper_bounds; + dense_vector_t upper_bounds; + dense_vector_t c; + dense_vector_t b; + + dense_vector_t w; + dense_vector_t x; + dense_vector_t y; + dense_vector_t v; + dense_vector_t z; + + dense_vector_t diag; + dense_vector_t inv_diag; + dense_vector_t inv_sqrt_diag; + + csc_matrix_t AD; + csc_matrix_t AT; + csc_matrix_t ADAT; + + std::unique_ptr> chol; + + bool has_factorization; + + dense_vector_t primal_residual; + dense_vector_t bound_residual; + dense_vector_t dual_residual; + dense_vector_t complementarity_xz_residual; + dense_vector_t complementarity_wv_residual; + + dense_vector_t primal_rhs; + dense_vector_t bound_rhs; + dense_vector_t dual_rhs; + dense_vector_t complementarity_xz_rhs; + dense_vector_t complementarity_wv_rhs; + + dense_vector_t dw_aff; + dense_vector_t dx_aff; + dense_vector_t dy_aff; + dense_vector_t dv_aff; + dense_vector_t dz_aff; + + dense_vector_t dw; + dense_vector_t dx; + dense_vector_t dy; + dense_vector_t dv; + dense_vector_t dz; }; - - template void barrier_solver_t::initial_point(iteration_data_t& data) { - // Perform a numerical factorization - i_t status = data.chol->factorize(data.ADAT); - if (status != 0) { - settings.log.printf("Initial factorization failed\n"); - return; - } - - // rhs_x <- b - dense_vector_t rhs_x(lp.rhs); + // Perform a numerical factorization + i_t status = data.chol->factorize(data.ADAT); + if (status != 0) { + settings.log.printf("Initial factorization failed\n"); + return; + } - dense_vector_t Fu(lp.num_cols); - data.gather_upper_bounds(lp.upper, Fu); + // rhs_x <- b + dense_vector_t rhs_x(lp.rhs); - dense_vector_t DinvFu(lp.num_cols); // DinvFu = Dinv * Fu - data.inv_diag.pairwise_product(Fu, DinvFu); + dense_vector_t Fu(lp.num_cols); + data.gather_upper_bounds(lp.upper, Fu); - // rhs_x <- A * Dinv * F * u - b - matrix_vector_multiply(lp.A, 1.0, DinvFu, -1.0, rhs_x); + dense_vector_t DinvFu(lp.num_cols); // DinvFu = Dinv * Fu + data.inv_diag.pairwise_product(Fu, DinvFu); + // rhs_x <- A * Dinv * F * u - b + matrix_vector_multiply(lp.A, 1.0, DinvFu, -1.0, rhs_x); - // Solve A*Dinv*A'*q = A*Dinv*F*u - b - dense_vector_t q(lp.num_rows); - settings.log.printf("||rhs_x|| = %e\n", vector_norm2(rhs_x)); - i_t solve_status = data.chol->solve(rhs_x, q); - settings.log.printf("Initial solve status %d\n", solve_status); - settings.log.printf("||q|| = %e\n", vector_norm2(q)); + // Solve A*Dinv*A'*q = A*Dinv*F*u - b + dense_vector_t q(lp.num_rows); + settings.log.printf("||rhs_x|| = %e\n", vector_norm2(rhs_x)); + i_t solve_status = data.chol->solve(rhs_x, q); + settings.log.printf("Initial solve status %d\n", solve_status); + settings.log.printf("||q|| = %e\n", vector_norm2(q)); - // rhs_x <- A*Dinv*A'*q - rhs_x - matrix_vector_multiply(data.ADAT, 1.0, q, -1.0, rhs_x); - settings.log.printf("|| A*Dinv*A'*q - (A*Dinv*F*u - b) || = %e\n", vector_norm2(rhs_x)); + // rhs_x <- A*Dinv*A'*q - rhs_x + matrix_vector_multiply(data.ADAT, 1.0, q, -1.0, rhs_x); + settings.log.printf("|| A*Dinv*A'*q - (A*Dinv*F*u - b) || = %e\n", vector_norm2(rhs_x)); - // x = Dinv*(F*u - A'*q) - data.inv_diag.pairwise_product(Fu, data.x); - // Fu <- -1.0 * A' * q + 1.0 * Fu - matrix_transpose_vector_multiply(lp.A, -1.0, q, 1.0, Fu); + // x = Dinv*(F*u - A'*q) + data.inv_diag.pairwise_product(Fu, data.x); + // Fu <- -1.0 * A' * q + 1.0 * Fu + matrix_transpose_vector_multiply(lp.A, -1.0, q, 1.0, Fu); - // x <- Dinv * (F*u - A'*q) - data.inv_diag.pairwise_product(Fu, data.x); + // x <- Dinv * (F*u - A'*q) + data.inv_diag.pairwise_product(Fu, data.x); - // w <- E'*u - E'*x - if (data.n_upper_bounds > 0) { - for (i_t k = 0; k < data.n_upper_bounds; k++) { - i_t j = data.upper_bounds[k]; - data.w[k] = lp.upper[j] - data.x[j]; - } + // w <- E'*u - E'*x + if (data.n_upper_bounds > 0) { + for (i_t k = 0; k < data.n_upper_bounds; k++) { + i_t j = data.upper_bounds[k]; + data.w[k] = lp.upper[j] - data.x[j]; } + } - // Verify A*x = b - data.primal_residual = lp.rhs; - matrix_vector_multiply(lp.A, 1.0, data.x, -1.0, data.primal_residual); - settings.log.printf("||b - A * x||: %e\n", vector_norm2(data.primal_residual)); - - if (data.n_upper_bounds > 0) { - for (i_t k = 0; k < data.n_upper_bounds; k++) { - i_t j = data.upper_bounds[k]; - data.bound_residual[k] = lp.upper[j] - data.w[k] - data.x[j]; - } - settings.log.printf("|| u - w - x||: %e\n", vector_norm2(data.bound_residual)); + // Verify A*x = b + data.primal_residual = lp.rhs; + matrix_vector_multiply(lp.A, 1.0, data.x, -1.0, data.primal_residual); + settings.log.printf("||b - A * x||: %e\n", vector_norm2(data.primal_residual)); + + if (data.n_upper_bounds > 0) { + for (i_t k = 0; k < data.n_upper_bounds; k++) { + i_t j = data.upper_bounds[k]; + data.bound_residual[k] = lp.upper[j] - data.w[k] - data.x[j]; } + settings.log.printf("|| u - w - x||: %e\n", vector_norm2(data.bound_residual)); + } - // First compute rhs = A*Dinv*c - dense_vector_t rhs(lp.num_rows); - dense_vector_t Dinvc(lp.num_cols); - data.inv_diag.pairwise_product(lp.objective, Dinvc); - // rhs = 1.0 * A * Dinv * c - matrix_vector_multiply(lp.A, 1.0, Dinvc, 0.0, rhs); - - // Solve A*Dinv*A'*q = A*Dinv*c - data.chol->solve(rhs, data.y); - - // z = Dinv*(c - A'*y) - dense_vector_t cmATy = data.c; - matrix_transpose_vector_multiply(lp.A, -1.0, data.y, 1.0, cmATy); - // z <- Dinv * (c - A'*y) - data.inv_diag.pairwise_product(cmATy, data.z); - - // v = -E'*z - data.gather_upper_bounds(data.z, data.v); - data.v.multiply_scalar(-1.0); - - // Verify A'*y + z - E*v = c - data.z.pairwise_subtract(data.c, data.dual_residual); - matrix_transpose_vector_multiply(lp.A, 1.0, data.y, 1.0, data.dual_residual); - if (data.n_upper_bounds > 0) { - for (i_t k = 0; k < data.n_upper_bounds; k++) { - i_t j = data.upper_bounds[k]; - data.dual_residual[j] -= data.v[k]; - } + // First compute rhs = A*Dinv*c + dense_vector_t rhs(lp.num_rows); + dense_vector_t Dinvc(lp.num_cols); + data.inv_diag.pairwise_product(lp.objective, Dinvc); + // rhs = 1.0 * A * Dinv * c + matrix_vector_multiply(lp.A, 1.0, Dinvc, 0.0, rhs); + + // Solve A*Dinv*A'*q = A*Dinv*c + data.chol->solve(rhs, data.y); + + // z = Dinv*(c - A'*y) + dense_vector_t cmATy = data.c; + matrix_transpose_vector_multiply(lp.A, -1.0, data.y, 1.0, cmATy); + // z <- Dinv * (c - A'*y) + data.inv_diag.pairwise_product(cmATy, data.z); + + // v = -E'*z + data.gather_upper_bounds(data.z, data.v); + data.v.multiply_scalar(-1.0); + + // Verify A'*y + z - E*v = c + data.z.pairwise_subtract(data.c, data.dual_residual); + matrix_transpose_vector_multiply(lp.A, 1.0, data.y, 1.0, data.dual_residual); + if (data.n_upper_bounds > 0) { + for (i_t k = 0; k < data.n_upper_bounds; k++) { + i_t j = data.upper_bounds[k]; + data.dual_residual[j] -= data.v[k]; } - settings.log.printf("||A^T y + z - E*v - c ||: %e\n", vector_norm2(data.dual_residual)); - - // Make sure (w, x, v, z) > 0 - float64_t epsilon_adjust = 10.0; - data.w.ensure_positive(epsilon_adjust); - data.x.ensure_positive(epsilon_adjust); - data.v.ensure_positive(epsilon_adjust); - data.z.ensure_positive(epsilon_adjust); + } + settings.log.printf("||A^T y + z - E*v - c ||: %e\n", vector_norm2(data.dual_residual)); + + // Make sure (w, x, v, z) > 0 + float64_t epsilon_adjust = 10.0; + data.w.ensure_positive(epsilon_adjust); + data.x.ensure_positive(epsilon_adjust); + data.v.ensure_positive(epsilon_adjust); + data.z.ensure_positive(epsilon_adjust); } template @@ -418,33 +417,33 @@ void barrier_solver_t::compute_residuals(const dense_vector_t& z, iteration_data_t& data) { - // Compute primal_residual = b - A*x - data.primal_residual = lp.rhs; - matrix_vector_multiply(lp.A, -1.0, x, 1.0, data.primal_residual); - - // Compute bound_residual = E'*u - w - E'*x - if (data.n_upper_bounds > 0) { - for (i_t k = 0; k < data.n_upper_bounds; k++) { - i_t j = data.upper_bounds[k]; - data.bound_residual[k] = lp.upper[j] - w[k] - x[j]; - } + // Compute primal_residual = b - A*x + data.primal_residual = lp.rhs; + matrix_vector_multiply(lp.A, -1.0, x, 1.0, data.primal_residual); + + // Compute bound_residual = E'*u - w - E'*x + if (data.n_upper_bounds > 0) { + for (i_t k = 0; k < data.n_upper_bounds; k++) { + i_t j = data.upper_bounds[k]; + data.bound_residual[k] = lp.upper[j] - w[k] - x[j]; } + } - // Compute dual_residual = c - A'*y - z + E*v - data.c.pairwise_subtract(z, data.dual_residual); - matrix_transpose_vector_multiply(lp.A, -1.0, y, 1.0, data.dual_residual); - if (data.n_upper_bounds > 0) { - for (i_t k = 0; k < data.n_upper_bounds; k++) { - i_t j = data.upper_bounds[k]; - data.dual_residual[j] += v[k]; - } + // Compute dual_residual = c - A'*y - z + E*v + data.c.pairwise_subtract(z, data.dual_residual); + matrix_transpose_vector_multiply(lp.A, -1.0, y, 1.0, data.dual_residual); + if (data.n_upper_bounds > 0) { + for (i_t k = 0; k < data.n_upper_bounds; k++) { + i_t j = data.upper_bounds[k]; + data.dual_residual[j] += v[k]; } + } - // Compute complementarity_xz_residual = x.*z - x.pairwise_product(z, data.complementarity_xz_residual); + // Compute complementarity_xz_residual = x.*z + x.pairwise_product(z, data.complementarity_xz_residual); - // Compute complementarity_wv_residual = w.*v - w.pairwise_product(v, data.complementarity_wv_residual); + // Compute complementarity_wv_residual = w.*v + w.pairwise_product(v, data.complementarity_wv_residual); } template @@ -458,49 +457,53 @@ void barrier_solver_t::compute_residual_norms(const dense_vector_t(data.primal_residual), - vector_norm_inf(data.bound_residual)); - dual_residual_norm = vector_norm_inf(data.dual_residual); - complementarity_residual_norm = std::max(vector_norm_inf(data.complementarity_xz_residual), - vector_norm_inf(data.complementarity_wv_residual)); + compute_residuals(w, x, y, v, z, data); + primal_residual_norm = std::max(vector_norm_inf(data.primal_residual), + vector_norm_inf(data.bound_residual)); + dual_residual_norm = vector_norm_inf(data.dual_residual); + complementarity_residual_norm = + std::max(vector_norm_inf(data.complementarity_xz_residual), + vector_norm_inf(data.complementarity_wv_residual)); } template -f_t barrier_solver_t::max_step_to_boundary(const dense_vector_t& x, const dense_vector_t& dx, i_t& index) const +f_t barrier_solver_t::max_step_to_boundary(const dense_vector_t& x, + const dense_vector_t& dx, + i_t& index) const { - float64_t max_step = 1.0; - index = -1; - for (i_t i = 0; i < x.size(); i++) { - // x_i + alpha * dx_i >= 0, x_i >= 0, alpha >= 0 - // We only need to worry about the case where dx_i < 0 - // alpha * dx_i >= -x_i => alpha <= -x_i / dx_i - if (dx[i] < 0.0) { - const f_t ratio = -x[i]/dx[i]; - if (ratio < max_step) { - max_step = ratio; - index = i; - } - } + float64_t max_step = 1.0; + index = -1; + for (i_t i = 0; i < x.size(); i++) { + // x_i + alpha * dx_i >= 0, x_i >= 0, alpha >= 0 + // We only need to worry about the case where dx_i < 0 + // alpha * dx_i >= -x_i => alpha <= -x_i / dx_i + if (dx[i] < 0.0) { + const f_t ratio = -x[i] / dx[i]; + if (ratio < max_step) { + max_step = ratio; + index = i; + } } - return max_step; + } + return max_step; } template -f_t barrier_solver_t::max_step_to_boundary(const dense_vector_t& x, const dense_vector_t& dx) const +f_t barrier_solver_t::max_step_to_boundary(const dense_vector_t& x, + const dense_vector_t& dx) const { - i_t index; - return max_step_to_boundary(x, dx, index); + i_t index; + return max_step_to_boundary(x, dx, index); } template i_t barrier_solver_t::compute_search_direction(iteration_data_t& data, - dense_vector_t& dw, - dense_vector_t& dx, - dense_vector_t& dy, - dense_vector_t& dv, - dense_vector_t& dz, - f_t& max_residual) + dense_vector_t& dw, + dense_vector_t& dx, + dense_vector_t& dy, + dense_vector_t& dv, + dense_vector_t& dz, + f_t& max_residual) { const bool debug = true; // Solves the linear system @@ -539,8 +542,8 @@ i_t barrier_solver_t::compute_search_direction(iteration_data_tfactorize(data.ADAT); if (status < 0) { - settings.log.printf("Factorization failed.\n"); - return -1; + settings.log.printf("Factorization failed.\n"); + return -1; } data.has_factorization = true; } @@ -571,7 +574,7 @@ i_t barrier_solver_t::compute_search_direction(iteration_data_t r1 = tmp3; + dense_vector_t r1 = tmp3; dense_vector_t r1_prime = r1; // tmp4 <- inv_diag * ( dual_rhs -complementarity_xz_rhs ./ x + E *((complementarity_wv_rhs - v .* // bound_rhs) ./ w)) @@ -592,19 +595,19 @@ i_t barrier_solver_t::compute_search_direction(iteration_data_t y_residual = h; matrix_vector_multiply(data.ADAT, 1.0, dy, -1.0, y_residual); const f_t y_residual_norm = vector_norm_inf(y_residual); - max_residual = std::max(max_residual, y_residual_norm); + max_residual = std::max(max_residual, y_residual_norm); if (y_residual_norm > 1e-2) { settings.log.printf("|| h || = %.2e\n", vector_norm_inf(h)); settings.log.printf("||ADAT*dy - h|| = %.2e\n", y_residual_norm); } - if (y_residual_norm > 10) - { - return -1; - } + if (y_residual_norm > 10) { return -1; } if (0 && y_residual_norm > 1e-10) { - settings.log.printf("Running PCG to improve residual 2-norm %e inf-norm %e\n", vector_norm2(y_residual), y_residual_norm); - data.preconditioned_conjugate_gradient(settings, h, std::min(y_residual_norm / 100.0, 1e-6), dy); + settings.log.printf("Running PCG to improve residual 2-norm %e inf-norm %e\n", + vector_norm2(y_residual), + y_residual_norm); + data.preconditioned_conjugate_gradient( + settings, h, std::min(y_residual_norm / 100.0, 1e-6), dy); y_residual = h; matrix_vector_multiply(data.ADAT, 1.0, dy, -1.0, y_residual); const f_t y_residual_norm_pcg = vector_norm_inf(y_residual); @@ -613,7 +616,7 @@ i_t barrier_solver_t::compute_search_direction(iteration_data_t::compute_search_direction(iteration_data_t(dx_residual); - max_residual = std::max(max_residual, dx_residual_norm); + max_residual = std::max(max_residual, dx_residual_norm); if (dx_residual_norm > 1e-2) { settings.log.printf("|| D * dx - A'*y + r1 || = %.2e\n", dx_residual_norm); } @@ -641,7 +644,7 @@ i_t barrier_solver_t::compute_search_direction(iteration_data_t(dx_residual_2); - max_residual = std::max(max_residual, dx_residual_2_norm); + max_residual = std::max(max_residual, dx_residual_2_norm); if (dx_residual_2_norm > 1e-2) { settings.log.printf("|| D^-1 (A'*dy - r1) - dx || = %.2e\n", dx_residual_2_norm); } @@ -655,7 +658,7 @@ i_t barrier_solver_t::compute_search_direction(iteration_data_t(dx_residual_6); - max_residual = std::max(max_residual, dx_residual_6_norm); + max_residual = std::max(max_residual, dx_residual_6_norm); if (dx_residual_6_norm > 1e-2) { settings.log.printf("|| A * D^-1 (A'*dy - r1) - A * dx || = %.2e\n", dx_residual_6_norm); } @@ -679,7 +682,7 @@ i_t barrier_solver_t::compute_search_direction(iteration_data_t(dx_residual_4); - max_residual = std::max(max_residual, dx_residual_4_norm); + max_residual = std::max(max_residual, dx_residual_4_norm); if (dx_residual_4_norm > 1e-2) { settings.log.printf("|| ADAT * dy - A * D^-1 * r1 - A * dx || = %.2e\n", dx_residual_4_norm); } @@ -687,7 +690,7 @@ i_t barrier_solver_t::compute_search_direction(iteration_data_t C(lp.num_rows, lp.num_rows, 1); add(ADinvAT, data.ADAT, 1.0, -1.0, C); const f_t matrix_residual = C.norm1(); - max_residual = std::max(max_residual, matrix_residual); + max_residual = std::max(max_residual, matrix_residual); if (matrix_residual > 1e-2) { settings.log.printf("|| AD^{-1/2} D^{-1} A^T + E - A D^{-1} A^T|| = %.2e\n", matrix_residual); } @@ -696,7 +699,7 @@ i_t barrier_solver_t::compute_search_direction(iteration_data_t dx_residual_7 = h; matrix_vector_multiply(data.ADAT, 1.0, dy, -1.0, dx_residual_7); const f_t dx_residual_7_norm = vector_norm_inf(dx_residual_7); - max_residual = std::max(max_residual, dx_residual_7_norm); + max_residual = std::max(max_residual, dx_residual_7_norm); if (dx_residual_7_norm > 1e-2) { settings.log.printf("|| A D^{-1} A^T * dy - h || = %.2e\n", dx_residual_7_norm); } @@ -706,7 +709,7 @@ i_t barrier_solver_t::compute_search_direction(iteration_data_t(x_residual); - max_residual = std::max(max_residual, x_residual_norm); + max_residual = std::max(max_residual, x_residual_norm); if (x_residual_norm > 1e-2) { settings.log.printf("|| A * dx - rp || = %.2e\n", x_residual_norm); } @@ -730,7 +733,7 @@ i_t barrier_solver_t::compute_search_direction(iteration_data_t(xz_residual); - max_residual = std::max(max_residual, xz_residual_norm); + max_residual = std::max(max_residual, xz_residual_norm); if (xz_residual_norm > 1e-2) { settings.log.printf("|| Z dx + X dz - rxz || = %.2e\n", xz_residual_norm); } @@ -770,13 +773,14 @@ i_t barrier_solver_t::compute_search_direction(iteration_data_t(dv_residual); - max_residual = std::max(max_residual, dv_residual_norm); + max_residual = std::max(max_residual, dv_residual_norm); if (dv_residual_norm > 1e-2) { - settings.log.printf("|| -v .* E' * dx + w .* dv - complementarity_wv_rhs - v .* bound_rhs || = %.2e\n", dv_residual_norm); + settings.log.printf( + "|| -v .* E' * dx + w .* dv - complementarity_wv_rhs - v .* bound_rhs || = %.2e\n", + dv_residual_norm); } } - // dual_residual <- A' * dy - E * dv + dz - dual_rhs dense_vector_t dual_residual(lp.num_cols); // dual_residual <- E * dv @@ -789,13 +793,13 @@ i_t barrier_solver_t::compute_search_direction(iteration_data_t(dual_residual); - max_residual = std::max(max_residual, dual_residual_norm); + max_residual = std::max(max_residual, dual_residual_norm); if (dual_residual_norm > 1e-2) { settings.log.printf("|| A' * dy - E * dv + dz - dual_rhs || = %.2e\n", dual_residual_norm); } } - // dw = bound_rhs - E'*dx + // dw = bound_rhs - E'*dx data.gather_upper_bounds(dx, tmp1); dw = data.bound_rhs; dw.axpy(-1.0, tmp1, 1.0); @@ -806,7 +810,7 @@ i_t barrier_solver_t::compute_search_direction(iteration_data_t(dw_residual); - max_residual = std::max(max_residual, dw_residual_norm); + max_residual = std::max(max_residual, dw_residual_norm); if (dw_residual_norm > 1e-2) { settings.log.printf("|| dw + E'*dx - bound_rhs || = %.2e\n", dw_residual_norm); } @@ -822,7 +826,7 @@ i_t barrier_solver_t::compute_search_direction(iteration_data_t(wv_residual); - max_residual = std::max(max_residual, wv_residual_norm); + max_residual = std::max(max_residual, wv_residual_norm); if (wv_residual_norm > 1e-2) { settings.log.printf("|| V dw + W dv - rwv || = %.2e\n", wv_residual_norm); } @@ -834,307 +838,377 @@ i_t barrier_solver_t::compute_search_direction(iteration_data_t i_t barrier_solver_t::solve(const barrier_solver_settings_t& options) { + float64_t start_time = tic(); - float64_t start_time = tic(); + i_t n = lp.num_cols; + i_t m = lp.num_rows; - i_t n = lp.num_cols; - i_t m = lp.num_rows; + settings.log.printf( + "Barrier solver: %d constraints, %d variables, %ld nonzeros\n", m, n, lp.A.col_start[n]); - settings.log.printf("Barrier solver: %d constraints, %d variables, %ld nonzeros\n", m, n, lp.A.col_start[n]); + // Compute the number of free variables + i_t num_free_variables = presolve_info.free_variable_pairs.size() / 2; + settings.log.printf("%d free variables\n", num_free_variables); - // Compute the number of free variables - i_t num_free_variables = presolve_info.free_variable_pairs.size() / 2; - settings.log.printf("%d free variables\n", num_free_variables); + // Compute the number of upper bounds + i_t num_upper_bounds = 0; + for (i_t j = 0; j < n; j++) { + if (lp.upper[j] < inf) { num_upper_bounds++; } + } + iteration_data_t data(lp, num_upper_bounds, settings); + if (toc(start_time) > settings.time_limit) { + settings.log.printf("Barrier time limit exceeded\n"); + return -1; + } + settings.log.printf("%d finite upper bounds\n", num_upper_bounds); - // Compute the number of upper bounds - i_t num_upper_bounds = 0; - for (i_t j = 0; j < n; j++) { - if (lp.upper[j] < inf) { - num_upper_bounds++; - } + initial_point(data); + if (toc(start_time) > settings.time_limit) { + settings.log.printf("Barrier time limit exceeded\n"); + return -1; + } + compute_residuals(data.w, data.x, data.y, data.v, data.z, data); + + f_t primal_residual_norm = std::max(vector_norm_inf(data.primal_residual), + vector_norm_inf(data.bound_residual)); + f_t dual_residual_norm = vector_norm_inf(data.dual_residual); + f_t complementarity_residual_norm = + std::max(vector_norm_inf(data.complementarity_xz_residual), + vector_norm_inf(data.complementarity_wv_residual)); + f_t mu = (data.complementarity_xz_residual.sum() + data.complementarity_wv_residual.sum()) / + (static_cast(n) + static_cast(num_upper_bounds)); + + f_t norm_b = vector_norm_inf(data.b); + f_t norm_c = vector_norm_inf(data.c); + + f_t primal_objective = data.c.inner_product(data.x); + + f_t relative_primal_residual = primal_residual_norm / (1.0 + norm_b); + f_t relative_dual_residual = dual_residual_norm / (1.0 + norm_c); + f_t relative_complementarity_residual = + complementarity_residual_norm / (1.0 + std::abs(primal_objective)); + + dense_vector_t restrict_u(num_upper_bounds); + dense_vector_t upper(lp.upper); + data.gather_upper_bounds(upper, restrict_u); + f_t dual_objective = data.b.inner_product(data.y) - restrict_u.inner_product(data.v); + + i_t iter = 0; + settings.log.printf( + " Objective Residual Step-Length " + "Time\n"); + settings.log.printf( + "Iter Primal Dual Primal Dual Compl. Primal Dual " + "Elapsed\n"); + float64_t elapsed_time = toc(start_time); + settings.log.printf("%2d %+.12e %+.12e %.2e %.2e %.2e %.1f\n", + iter, + primal_objective, + dual_objective, + primal_residual_norm, + dual_residual_norm, + complementarity_residual_norm, + elapsed_time); + + bool converged = primal_residual_norm < options.feasibility_tol && + dual_residual_norm < options.optimality_tol && + complementarity_residual_norm < options.complementarity_tol; + + dense_vector_t w_save = data.w; + dense_vector_t x_save = data.x; + dense_vector_t y_save = data.y; + dense_vector_t v_save = data.v; + dense_vector_t z_save = data.z; + + const i_t iteration_limit = std::min(options.iteration_limit, settings.iteration_limit); + + while (iter < iteration_limit) { + if (toc(start_time) > settings.time_limit) { + settings.log.printf("Barrier time limit exceeded\n"); + return -1; + } + // Compute the affine step + data.primal_rhs = data.primal_residual; + data.bound_rhs = data.bound_residual; + data.dual_rhs = data.dual_residual; + data.complementarity_xz_rhs = data.complementarity_xz_residual; + data.complementarity_wv_rhs = data.complementarity_wv_residual; + // x.*z -> -x .* z + data.complementarity_xz_rhs.multiply_scalar(-1.0); + // w.*v -> -w .* v + data.complementarity_wv_rhs.multiply_scalar(-1.0); + + f_t max_affine_residual = 0.0; + i_t status = compute_search_direction( + data, data.dw_aff, data.dx_aff, data.dy_aff, data.dv_aff, data.dz_aff, max_affine_residual); + if (status < 0) { + if (relative_primal_residual < 100 * options.feasibility_tol && + relative_dual_residual < 100 * options.optimality_tol && + relative_complementarity_residual < 100 * options.complementarity_tol) { + settings.log.printf("Suboptimal solution found\n"); + return 1; + } + compute_residual_norms(w_save, + x_save, + y_save, + v_save, + z_save, + data, + primal_residual_norm, + dual_residual_norm, + complementarity_residual_norm); + primal_objective = data.c.inner_product(x_save); + relative_primal_residual = primal_residual_norm / (1.0 + norm_b); + relative_dual_residual = dual_residual_norm / (1.0 + norm_c); + relative_complementarity_residual = + complementarity_residual_norm / (1.0 + std::abs(primal_objective)); + + if (relative_primal_residual < 100 * options.feasibility_tol && + relative_dual_residual < 100 * options.optimality_tol && + relative_complementarity_residual < 100 * options.complementarity_tol) { + settings.log.printf( + "Restoring previous solution: primal %.2e dual %.2e complementarity %.2e\n", + primal_residual_norm, + dual_residual_norm, + complementarity_residual_norm); + data.w = w_save; + data.x = x_save; + data.y = y_save; + data.v = v_save; + data.z = z_save; + settings.log.printf("Suboptimal solution found\n"); + return 1; + } else { + settings.log.printf( + "Primal residual %.2e dual residual %.2e complementarity residual %.2e\n", + relative_primal_residual, + relative_dual_residual, + relative_complementarity_residual); + } + settings.log.printf("Search direction computation failed\n"); + return -1; } - iteration_data_t data(lp, num_upper_bounds, settings); if (toc(start_time) > settings.time_limit) { settings.log.printf("Barrier time limit exceeded\n"); return -1; } - settings.log.printf("%d finite upper bounds\n", num_upper_bounds); - initial_point(data); + f_t step_primal_aff = std::min(max_step_to_boundary(data.w, data.dw_aff), + max_step_to_boundary(data.x, data.dx_aff)); + f_t step_dual_aff = std::min(max_step_to_boundary(data.v, data.dv_aff), + max_step_to_boundary(data.z, data.dz_aff)); + + // w_aff = w + step_primal_aff * dw_aff + // x_aff = x + step_primal_aff * dx_aff + // v_aff = v + step_dual_aff * dv_aff + // z_aff = z + step_dual_aff * dz_aff + dense_vector_t w_aff = data.w; + dense_vector_t x_aff = data.x; + dense_vector_t v_aff = data.v; + dense_vector_t z_aff = data.z; + w_aff.axpy(step_primal_aff, data.dw_aff, 1.0); + x_aff.axpy(step_primal_aff, data.dx_aff, 1.0); + v_aff.axpy(step_dual_aff, data.dv_aff, 1.0); + z_aff.axpy(step_dual_aff, data.dz_aff, 1.0); + + dense_vector_t complementarity_xz_aff(lp.num_cols); + dense_vector_t complementarity_wv_aff(num_upper_bounds); + x_aff.pairwise_product(z_aff, complementarity_xz_aff); + w_aff.pairwise_product(v_aff, complementarity_wv_aff); + + f_t complementarity_aff_sum = complementarity_xz_aff.sum() + complementarity_wv_aff.sum(); + f_t mu_aff = + (complementarity_aff_sum) / (static_cast(n) + static_cast(num_upper_bounds)); + f_t sigma = std::max(0.0, std::min(1.0, std::pow(mu_aff / mu, 3.0))); + f_t new_mu = sigma * mu_aff; + + // Compute the combined centering corrector step + data.primal_rhs.set_scalar(0.0); + data.bound_rhs.set_scalar(0.0); + data.dual_rhs.set_scalar(0.0); + // complementarity_xz_rhs = -dx_aff .* dz_aff + sigma * mu + data.dx_aff.pairwise_product(data.dz_aff, data.complementarity_xz_rhs); + data.complementarity_xz_rhs.multiply_scalar(-1.0); + data.complementarity_xz_rhs.add_scalar(new_mu); + + // complementarity_wv_rhs = -dw_aff .* dv_aff + sigma * mu + data.dw_aff.pairwise_product(data.dv_aff, data.complementarity_wv_rhs); + data.complementarity_wv_rhs.multiply_scalar(-1.0); + data.complementarity_wv_rhs.add_scalar(new_mu); + + f_t max_corrector_residual = 0.0; + status = compute_search_direction( + data, data.dw, data.dx, data.dy, data.dv, data.dz, max_corrector_residual); + if (status < 0) { + if (relative_primal_residual < 100 * options.feasibility_tol && + relative_dual_residual < 100 * options.optimality_tol && + relative_complementarity_residual < 100 * options.complementarity_tol) { + settings.log.printf("Suboptimal solution found\n"); + return 1; + } + compute_residual_norms(w_save, + x_save, + y_save, + v_save, + z_save, + data, + primal_residual_norm, + dual_residual_norm, + complementarity_residual_norm); + primal_objective = data.c.inner_product(x_save); + relative_primal_residual = primal_residual_norm / (1.0 + norm_b); + relative_dual_residual = dual_residual_norm / (1.0 + norm_c); + relative_complementarity_residual = + complementarity_residual_norm / (1.0 + std::abs(primal_objective)); + + if (relative_primal_residual < 100 * options.feasibility_tol && + relative_dual_residual < 100 * options.optimality_tol && + relative_complementarity_residual < 100 * options.complementarity_tol) { + settings.log.printf( + "Restoring previous solution: primal %.2e dual %.2e complementarity %.2e\n", + primal_residual_norm, + dual_residual_norm, + complementarity_residual_norm); + data.w = w_save; + data.x = x_save; + data.y = y_save; + data.v = v_save; + data.z = z_save; + settings.log.printf("Suboptimal solution found\n"); + return 1; + } else { + settings.log.printf( + "Primal residual %.2e dual residual %.2e complementarity residual %.2e\n", + relative_primal_residual, + relative_dual_residual, + relative_complementarity_residual); + } + settings.log.printf("Search direction computation failed\n"); + return -1; + } + data.has_factorization = false; if (toc(start_time) > settings.time_limit) { settings.log.printf("Barrier time limit exceeded\n"); return -1; } - compute_residuals(data.w, data.x, data.y, data.v, data.z, data); - - f_t primal_residual_norm = std::max(vector_norm_inf(data.primal_residual), - vector_norm_inf(data.bound_residual)); - f_t dual_residual_norm = vector_norm_inf(data.dual_residual); - f_t complementarity_residual_norm = std::max(vector_norm_inf(data.complementarity_xz_residual), - vector_norm_inf(data.complementarity_wv_residual)); - f_t mu = (data.complementarity_xz_residual.sum() + data.complementarity_wv_residual.sum()) / (static_cast(n) + static_cast(num_upper_bounds)); - - f_t norm_b = vector_norm_inf(data.b); - f_t norm_c = vector_norm_inf(data.c); - - f_t primal_objective = data.c.inner_product(data.x); - - f_t relative_primal_residual = primal_residual_norm / (1.0 + norm_b); - f_t relative_dual_residual = dual_residual_norm / (1.0 + norm_c); - f_t relative_complementarity_residual = complementarity_residual_norm / (1.0 + std::abs(primal_objective)); - - dense_vector_t restrict_u(num_upper_bounds); - dense_vector_t upper(lp.upper); - data.gather_upper_bounds(upper, restrict_u); - f_t dual_objective = data.b.inner_product(data.y) - restrict_u.inner_product(data.v); - - i_t iter = 0; - settings.log.printf(" Objective Residual Step-Length Time\n"); - settings.log.printf("Iter Primal Dual Primal Dual Compl. Primal Dual Elapsed\n"); - float64_t elapsed_time = toc(start_time); - settings.log.printf("%2d %+.12e %+.12e %.2e %.2e %.2e %.1f\n", \ - iter, primal_objective, dual_objective, primal_residual_norm, dual_residual_norm, complementarity_residual_norm, elapsed_time); - - bool converged = primal_residual_norm < options.feasibility_tol && dual_residual_norm < options.optimality_tol && complementarity_residual_norm < options.complementarity_tol; - - dense_vector_t w_save = data.w; - dense_vector_t x_save = data.x; - dense_vector_t y_save = data.y; - dense_vector_t v_save = data.v; - dense_vector_t z_save = data.z; - - const i_t iteration_limit = std::min(options.iteration_limit, settings.iteration_limit); - - while (iter < iteration_limit) { - if (toc(start_time) > settings.time_limit) { - settings.log.printf("Barrier time limit exceeded\n"); - return -1; - } - // Compute the affine step - data.primal_rhs = data.primal_residual; - data.bound_rhs = data.bound_residual; - data.dual_rhs = data.dual_residual; - data.complementarity_xz_rhs = data.complementarity_xz_residual; - data.complementarity_wv_rhs = data.complementarity_wv_residual; - // x.*z -> -x .* z - data.complementarity_xz_rhs.multiply_scalar(-1.0); - // w.*v -> -w .* v - data.complementarity_wv_rhs.multiply_scalar(-1.0); - - f_t max_affine_residual = 0.0; - i_t status = compute_search_direction(data, data.dw_aff, data.dx_aff, data.dy_aff, data.dv_aff, data.dz_aff, max_affine_residual); - if (status < 0) { - if (relative_primal_residual < 100 * options.feasibility_tol && - relative_dual_residual < 100 * options.optimality_tol && - relative_complementarity_residual < 100 * options.complementarity_tol) { - settings.log.printf("Suboptimal solution found\n"); - return 1; - } - compute_residual_norms(w_save, x_save, y_save, v_save, z_save, data, primal_residual_norm, dual_residual_norm, complementarity_residual_norm); - primal_objective = data.c.inner_product(x_save); - relative_primal_residual = primal_residual_norm / (1.0 + norm_b); - relative_dual_residual = dual_residual_norm / (1.0 + norm_c); - relative_complementarity_residual = complementarity_residual_norm / (1.0 + std::abs(primal_objective)); - - if (relative_primal_residual < 100 * options.feasibility_tol && - relative_dual_residual < 100 * options.optimality_tol && - relative_complementarity_residual < 100 * options.complementarity_tol) { - settings.log.printf("Restoring previous solution: primal %.2e dual %.2e complementarity %.2e\n", primal_residual_norm, dual_residual_norm, complementarity_residual_norm); - data.w = w_save; - data.x = x_save; - data.y = y_save; - data.v = v_save; - data.z = z_save; - settings.log.printf("Suboptimal solution found\n"); - return 1; - } else { - settings.log.printf("Primal residual %.2e dual residual %.2e complementarity residual %.2e\n", relative_primal_residual, relative_dual_residual, relative_complementarity_residual); - } - settings.log.printf("Search direction computation failed\n"); - return -1; - } - if (toc(start_time) > settings.time_limit) { - settings.log.printf("Barrier time limit exceeded\n"); - return -1; - } - - f_t step_primal_aff = std::min(max_step_to_boundary(data.w, data.dw_aff), max_step_to_boundary(data.x, data.dx_aff)); - f_t step_dual_aff = std::min(max_step_to_boundary(data.v, data.dv_aff), max_step_to_boundary(data.z, data.dz_aff)); - - // w_aff = w + step_primal_aff * dw_aff - // x_aff = x + step_primal_aff * dx_aff - // v_aff = v + step_dual_aff * dv_aff - // z_aff = z + step_dual_aff * dz_aff - dense_vector_t w_aff = data.w; - dense_vector_t x_aff = data.x; - dense_vector_t v_aff = data.v; - dense_vector_t z_aff = data.z; - w_aff.axpy(step_primal_aff, data.dw_aff, 1.0); - x_aff.axpy(step_primal_aff, data.dx_aff, 1.0); - v_aff.axpy(step_dual_aff, data.dv_aff, 1.0); - z_aff.axpy(step_dual_aff, data.dz_aff, 1.0); - - dense_vector_t complementarity_xz_aff(lp.num_cols); - dense_vector_t complementarity_wv_aff(num_upper_bounds); - x_aff.pairwise_product(z_aff, complementarity_xz_aff); - w_aff.pairwise_product(v_aff, complementarity_wv_aff); - - f_t complementarity_aff_sum = complementarity_xz_aff.sum() + complementarity_wv_aff.sum(); - f_t mu_aff = (complementarity_aff_sum) / (static_cast(n) + static_cast(num_upper_bounds)); - f_t sigma = std::max(0.0, std::min(1.0, std::pow(mu_aff / mu, 3.0))); - f_t new_mu = sigma * mu_aff; - - // Compute the combined centering corrector step - data.primal_rhs.set_scalar(0.0); - data.bound_rhs.set_scalar(0.0); - data.dual_rhs.set_scalar(0.0); - // complementarity_xz_rhs = -dx_aff .* dz_aff + sigma * mu - data.dx_aff.pairwise_product(data.dz_aff, data.complementarity_xz_rhs); - data.complementarity_xz_rhs.multiply_scalar(-1.0); - data.complementarity_xz_rhs.add_scalar(new_mu); - - // complementarity_wv_rhs = -dw_aff .* dv_aff + sigma * mu - data.dw_aff.pairwise_product(data.dv_aff, data.complementarity_wv_rhs); - data.complementarity_wv_rhs.multiply_scalar(-1.0); - data.complementarity_wv_rhs.add_scalar(new_mu); - - f_t max_corrector_residual = 0.0; - status = compute_search_direction(data, data.dw, data.dx, data.dy, data.dv, data.dz, max_corrector_residual); - if (status < 0) { - if (relative_primal_residual < 100 * options.feasibility_tol && - relative_dual_residual < 100 * options.optimality_tol && - relative_complementarity_residual < 100 * options.complementarity_tol) { - settings.log.printf("Suboptimal solution found\n"); - return 1; - } - compute_residual_norms(w_save, x_save, y_save, v_save, z_save, data, primal_residual_norm, dual_residual_norm, complementarity_residual_norm); - primal_objective = data.c.inner_product(x_save); - relative_primal_residual = primal_residual_norm / (1.0 + norm_b); - relative_dual_residual = dual_residual_norm / (1.0 + norm_c); - relative_complementarity_residual = complementarity_residual_norm / (1.0 + std::abs(primal_objective)); - - if (relative_primal_residual < 100 * options.feasibility_tol && - relative_dual_residual < 100 * options.optimality_tol && - relative_complementarity_residual < 100 * options.complementarity_tol) { - settings.log.printf("Restoring previous solution: primal %.2e dual %.2e complementarity %.2e\n", primal_residual_norm, dual_residual_norm, complementarity_residual_norm); - data.w = w_save; - data.x = x_save; - data.y = y_save; - data.v = v_save; - data.z = z_save; - settings.log.printf("Suboptimal solution found\n"); - return 1; - } else { - settings.log.printf("Primal residual %.2e dual residual %.2e complementarity residual %.2e\n", relative_primal_residual, relative_dual_residual, relative_complementarity_residual); - } - settings.log.printf("Search direction computation failed\n"); - return -1; - } - data.has_factorization = false; - if (toc(start_time) > settings.time_limit) { - settings.log.printf("Barrier time limit exceeded\n"); - return -1; - } - - // dw = dw_aff + dw_cc - // dx = dx_aff + dx_cc - // dy = dy_aff + dy_cc - // dv = dv_aff + dv_cc - // dz = dz_aff + dz_cc - // Note: dw_cc - dz_cc are stored in dw - dz - data.dw.axpy(1.0, data.dw_aff, 1.0); - data.dx.axpy(1.0, data.dx_aff, 1.0); - data.dy.axpy(1.0, data.dy_aff, 1.0); - data.dv.axpy(1.0, data.dv_aff, 1.0); - data.dz.axpy(1.0, data.dz_aff, 1.0); - - f_t max_step_primal = std::min(max_step_to_boundary(data.w, data.dw), max_step_to_boundary(data.x, data.dx)); - f_t max_step_dual = std::min(max_step_to_boundary(data.v, data.dv), max_step_to_boundary(data.z, data.dz)); - - f_t step_primal = options.step_scale * max_step_primal; - f_t step_dual = options.step_scale * max_step_dual; - - data.w.axpy(step_primal, data.dw, 1.0); - data.x.axpy(step_primal, data.dx, 1.0); - data.y.axpy(step_dual, data.dy, 1.0); - data.v.axpy(step_dual, data.dv, 1.0); - data.z.axpy(step_dual, data.dz, 1.0); - - // Handle free variables - if (num_free_variables > 0) - { - for (i_t k = 0; k < 2*num_free_variables; k += 2) - { - i_t u = presolve_info.free_variable_pairs[k]; - i_t v = presolve_info.free_variable_pairs[k+1]; - f_t u_val = data.x[u]; - f_t v_val = data.x[v]; - f_t min_val = std::min(u_val, v_val); - f_t eta = options.step_scale*min_val; - data.x[u] -= eta; - data.x[v] -= eta; - } - } - - compute_residual_norms(data.w, data.x, data.y, data.v, data.z, data, primal_residual_norm, dual_residual_norm, complementarity_residual_norm); - - mu = (data.complementarity_xz_residual.sum() + data.complementarity_wv_residual.sum()) / (static_cast(n) + static_cast(num_upper_bounds)); - - primal_objective = data.c.inner_product(data.x); - dual_objective = data.b.inner_product(data.y) - restrict_u.inner_product(data.v); - - relative_primal_residual = primal_residual_norm / (1.0 + norm_b); - relative_dual_residual = dual_residual_norm / (1.0 + norm_c); - relative_complementarity_residual = complementarity_residual_norm / (1.0 + std::abs(primal_objective)); - - if (relative_primal_residual < 100 * options.feasibility_tol && - relative_dual_residual < 100 * options.optimality_tol && - relative_complementarity_residual < 100 * options.complementarity_tol) { - w_save = data.w; - x_save = data.x; - y_save = data.y; - v_save = data.v; - z_save = data.z; - } - - iter++; - elapsed_time = toc(start_time); - - if (primal_objective != primal_objective || dual_objective != dual_objective) { - settings.log.printf("Numerical error in objective\n"); - return -1; - } - - settings.log.printf("%2d %+.12e %+.12e %.2e %.2e %.2e %.2e %.2e %.2e %.2e %.2e %.1f\n", - iter, - primal_objective + lp.obj_constant, - dual_objective + lp.obj_constant, - relative_primal_residual, - relative_dual_residual, - relative_complementarity_residual, - step_primal, - step_dual, - std::min(data.complementarity_xz_residual.minimum(), data.complementarity_wv_residual.minimum()), - mu, - std::max(max_affine_residual, max_corrector_residual), - elapsed_time); - - bool primal_feasible = relative_primal_residual < options.feasibility_tol; - bool dual_feasible = relative_dual_residual < options.optimality_tol; - bool small_gap = relative_complementarity_residual < options.complementarity_tol; - - converged = primal_feasible && dual_feasible && small_gap; - - if (converged) { - settings.log.printf("Optimal solution found in %d iterations and %.2fs\n", iter, toc(start_time)); - settings.log.printf("Objective %+.8e\n", primal_objective + lp.obj_constant); - settings.log.printf("Primal infeasibility (abs/rel): %8.2e/%8.2e\n", primal_residual_norm, relative_primal_residual); - settings.log.printf("Dual infeasibility (abs/rel): %8.2e/%8.2e\n", dual_residual_norm, relative_dual_residual); - settings.log.printf("Complementarity gap (abs/rel): %8.2e/%8.2e\n", complementarity_residual_norm, relative_complementarity_residual); - return 0; - } + + // dw = dw_aff + dw_cc + // dx = dx_aff + dx_cc + // dy = dy_aff + dy_cc + // dv = dv_aff + dv_cc + // dz = dz_aff + dz_cc + // Note: dw_cc - dz_cc are stored in dw - dz + data.dw.axpy(1.0, data.dw_aff, 1.0); + data.dx.axpy(1.0, data.dx_aff, 1.0); + data.dy.axpy(1.0, data.dy_aff, 1.0); + data.dv.axpy(1.0, data.dv_aff, 1.0); + data.dz.axpy(1.0, data.dz_aff, 1.0); + + f_t max_step_primal = + std::min(max_step_to_boundary(data.w, data.dw), max_step_to_boundary(data.x, data.dx)); + f_t max_step_dual = + std::min(max_step_to_boundary(data.v, data.dv), max_step_to_boundary(data.z, data.dz)); + + f_t step_primal = options.step_scale * max_step_primal; + f_t step_dual = options.step_scale * max_step_dual; + + data.w.axpy(step_primal, data.dw, 1.0); + data.x.axpy(step_primal, data.dx, 1.0); + data.y.axpy(step_dual, data.dy, 1.0); + data.v.axpy(step_dual, data.dv, 1.0); + data.z.axpy(step_dual, data.dz, 1.0); + + // Handle free variables + if (num_free_variables > 0) { + for (i_t k = 0; k < 2 * num_free_variables; k += 2) { + i_t u = presolve_info.free_variable_pairs[k]; + i_t v = presolve_info.free_variable_pairs[k + 1]; + f_t u_val = data.x[u]; + f_t v_val = data.x[v]; + f_t min_val = std::min(u_val, v_val); + f_t eta = options.step_scale * min_val; + data.x[u] -= eta; + data.x[v] -= eta; + } } - return 1; + + compute_residual_norms(data.w, + data.x, + data.y, + data.v, + data.z, + data, + primal_residual_norm, + dual_residual_norm, + complementarity_residual_norm); + + mu = (data.complementarity_xz_residual.sum() + data.complementarity_wv_residual.sum()) / + (static_cast(n) + static_cast(num_upper_bounds)); + + primal_objective = data.c.inner_product(data.x); + dual_objective = data.b.inner_product(data.y) - restrict_u.inner_product(data.v); + + relative_primal_residual = primal_residual_norm / (1.0 + norm_b); + relative_dual_residual = dual_residual_norm / (1.0 + norm_c); + relative_complementarity_residual = + complementarity_residual_norm / (1.0 + std::abs(primal_objective)); + + if (relative_primal_residual < 100 * options.feasibility_tol && + relative_dual_residual < 100 * options.optimality_tol && + relative_complementarity_residual < 100 * options.complementarity_tol) { + w_save = data.w; + x_save = data.x; + y_save = data.y; + v_save = data.v; + z_save = data.z; + } + + iter++; + elapsed_time = toc(start_time); + + if (primal_objective != primal_objective || dual_objective != dual_objective) { + settings.log.printf("Numerical error in objective\n"); + return -1; + } + + settings.log.printf("%2d %+.12e %+.12e %.2e %.2e %.2e %.2e %.2e %.2e %.2e %.2e %.1f\n", + iter, + primal_objective + lp.obj_constant, + dual_objective + lp.obj_constant, + relative_primal_residual, + relative_dual_residual, + relative_complementarity_residual, + step_primal, + step_dual, + std::min(data.complementarity_xz_residual.minimum(), + data.complementarity_wv_residual.minimum()), + mu, + std::max(max_affine_residual, max_corrector_residual), + elapsed_time); + + bool primal_feasible = relative_primal_residual < options.feasibility_tol; + bool dual_feasible = relative_dual_residual < options.optimality_tol; + bool small_gap = relative_complementarity_residual < options.complementarity_tol; + + converged = primal_feasible && dual_feasible && small_gap; + + if (converged) { + settings.log.printf( + "Optimal solution found in %d iterations and %.2fs\n", iter, toc(start_time)); + settings.log.printf("Objective %+.8e\n", primal_objective + lp.obj_constant); + settings.log.printf("Primal infeasibility (abs/rel): %8.2e/%8.2e\n", + primal_residual_norm, + relative_primal_residual); + settings.log.printf("Dual infeasibility (abs/rel): %8.2e/%8.2e\n", + dual_residual_norm, + relative_dual_residual); + settings.log.printf("Complementarity gap (abs/rel): %8.2e/%8.2e\n", + complementarity_residual_norm, + relative_complementarity_residual); + return 0; + } + } + return 1; } #ifdef DUAL_SIMPLEX_INSTANTIATE_DOUBLE @@ -1145,4 +1219,4 @@ template class iteration_data_t; template class barrier_solver_settings_t; #endif -} // namespace cuopt::linear_programming::dual_simplex +} // namespace cuopt::linear_programming::dual_simplex diff --git a/cpp/src/dual_simplex/barrier.hpp b/cpp/src/dual_simplex/barrier.hpp index fdb7d803ef..b2ad7d2c21 100644 --- a/cpp/src/dual_simplex/barrier.hpp +++ b/cpp/src/dual_simplex/barrier.hpp @@ -17,26 +17,25 @@ #pragma once #include "dual_simplex/dense_vector.hpp" -#include "dual_simplex/sparse_matrix.hpp" -#include "dual_simplex/sparse_cholesky.hpp" #include "dual_simplex/presolve.hpp" #include "dual_simplex/simplex_solver_settings.hpp" +#include "dual_simplex/sparse_cholesky.hpp" +#include "dual_simplex/sparse_matrix.hpp" #include "dual_simplex/tic_toc.hpp" namespace cuopt::linear_programming::dual_simplex { template struct barrier_solver_settings_t { - f_t feasibility_tol = 1e-8; - f_t optimality_tol = 1e-8; + f_t feasibility_tol = 1e-8; + f_t optimality_tol = 1e-8; f_t complementarity_tol = 1e-8; - i_t iteration_limit = 1000; - f_t step_scale = 0.9; + i_t iteration_limit = 1000; + f_t step_scale = 0.9; }; - template -class iteration_data_t; // Forward declare +class iteration_data_t; // Forward declare template class barrier_solver_t { diff --git a/cpp/src/dual_simplex/dense_vector.hpp b/cpp/src/dual_simplex/dense_vector.hpp index 3192c8d7b5..b8e5cf1143 100644 --- a/cpp/src/dual_simplex/dense_vector.hpp +++ b/cpp/src/dual_simplex/dense_vector.hpp @@ -18,9 +18,9 @@ #include "dual_simplex/types.hpp" -#include #include #include +#include namespace cuopt::linear_programming::dual_simplex { @@ -28,7 +28,8 @@ template class dense_vector_t : public std::vector { public: dense_vector_t(i_t n) { this->resize(n, 0.0); } - dense_vector_t(const std::vector& in) { + dense_vector_t(const std::vector& in) + { this->resize(in.size()); for (i_t i = 0; i < in.size(); i++) { (*this)[i] = in[i]; @@ -38,7 +39,7 @@ class dense_vector_t : public std::vector { f_t minimum() const { const i_t n = this->size(); - f_t min_x = inf; + f_t min_x = inf; for (i_t i = 0; i < n; i++) { min_x = std::min(min_x, (*this)[i]); } @@ -48,7 +49,7 @@ class dense_vector_t : public std::vector { f_t maximum() const { const i_t n = this->size(); - f_t max_x = -inf; + f_t max_x = -inf; for (i_t i = 0; i < n; i++) { max_x = std::max(max_x, (*this)[i]); } @@ -80,7 +81,8 @@ class dense_vector_t : public std::vector { } } // a <- alpha * a - void multiply_scalar(f_t alpha) { + void multiply_scalar(f_t alpha) + { const i_t n = this->size(); for (i_t i = 0; i < n; i++) { (*this)[i] *= alpha; @@ -88,7 +90,7 @@ class dense_vector_t : public std::vector { } f_t sum() const { - f_t sum = 0.0; + f_t sum = 0.0; const i_t n = this->size(); for (i_t i = 0; i < n; ++i) { sum += (*this)[i]; @@ -98,7 +100,7 @@ class dense_vector_t : public std::vector { f_t inner_product(dense_vector_t& b) const { - f_t dot = 0.0; + f_t dot = 0.0; const i_t n = this->size(); for (i_t i = 0; i < n; i++) { dot += (*this)[i] * b[i]; @@ -149,7 +151,6 @@ class dense_vector_t : public std::vector { } } - void ensure_positive(f_t epsilon_adjust) { const f_t mix_x = minimum(); @@ -163,9 +164,7 @@ class dense_vector_t : public std::vector { { const i_t n = this->size(); for (i_t i = 0; i < n; i++) { - if ((*this)[i] < epsilon_adjust) { - (*this)[i] = epsilon_adjust; - } + if ((*this)[i] < epsilon_adjust) { (*this)[i] = epsilon_adjust; } } } @@ -179,4 +178,4 @@ class dense_vector_t : public std::vector { } }; -} // namespace cuopt::linear_programming::dual_simplex +} // namespace cuopt::linear_programming::dual_simplex diff --git a/cpp/src/dual_simplex/presolve.cpp b/cpp/src/dual_simplex/presolve.cpp index 9c366d9830..6f4b8017f3 100644 --- a/cpp/src/dual_simplex/presolve.cpp +++ b/cpp/src/dual_simplex/presolve.cpp @@ -693,15 +693,12 @@ void convert_user_problem(const user_problem_t& user_problem, bound_strengthening(row_sense, settings, problem); } - if (less_rows > 0) { convert_less_than_to_equal(user_problem, row_sense, problem, less_rows, new_slacks); } // Add artifical variables - if (!settings.barrier_presolve) { - add_artifical_variables(problem, equality_rows, new_slacks); - } + if (!settings.barrier_presolve) { add_artifical_variables(problem, equality_rows, new_slacks); } } template @@ -713,7 +710,7 @@ i_t presolve(const lp_problem_t& original, problem = original; std::vector row_sense(problem.num_rows, '='); - // The original problem may have a variable without a lower bound + // The original problem may have a variable without a lower bound // but a finite upper bound // -inf < x_j <= u_j i_t no_lower_bound = 0; @@ -745,20 +742,17 @@ i_t presolve(const lp_problem_t& original, // sum_{k != j} c_k x_k + c_j (x'_j + l_j) // // so we get the constant term c_j * l_j - for (i_t j = 0; j < problem.num_cols; j++) - { - if (problem.lower[j] != 0.0 && problem.lower[j] > -inf) - { - for (i_t p = problem.A.col_start[j]; p < problem.A.col_start[j+1]; p++) - { - i_t i = problem.A.i[p]; - f_t aij = problem.A.x[p]; - problem.rhs[i] -= aij * problem.lower[j]; - } - problem.obj_constant += problem.objective[j] * problem.lower[j]; - problem.upper[j] -= problem.lower[j]; - problem.lower[j] = 0.0; + for (i_t j = 0; j < problem.num_cols; j++) { + if (problem.lower[j] != 0.0 && problem.lower[j] > -inf) { + for (i_t p = problem.A.col_start[j]; p < problem.A.col_start[j + 1]; p++) { + i_t i = problem.A.i[p]; + f_t aij = problem.A.x[p]; + problem.rhs[i] -= aij * problem.lower[j]; } + problem.obj_constant += problem.objective[j] * problem.lower[j]; + problem.upper[j] -= problem.lower[j]; + problem.lower[j] = 0.0; + } } } @@ -783,13 +777,11 @@ i_t presolve(const lp_problem_t& original, // sum_{k != j} c_k x_k + c_j v - c_j w i_t num_cols = problem.num_cols + free_variables; - i_t nnz = problem.A.col_start[problem.num_cols]; - for (i_t j = 0; j& original, presolve_info.free_variable_pairs.resize(free_variables * 2); i_t pair_count = 0; - i_t q = problem.A.col_start[problem.num_cols]; - i_t col = problem.num_cols; - for (i_t j = 0; j < problem.num_cols; j++) - { - if (problem.lower[j] == -inf && problem.upper[j] == inf) - { - for (i_t p = problem.A.col_start[j]; p < problem.A.col_start[j+1]; p++) - { - i_t i = problem.A.i[p]; - f_t aij = problem.A.x[p]; - problem.A.i[q] = i; - problem.A.x[q] = -aij; - q++; - } - problem.lower[col] = 0.0; - problem.upper[col] = inf; - problem.objective[col] = -problem.objective[j]; - presolve_info.free_variable_pairs[pair_count++] = j; - presolve_info.free_variable_pairs[pair_count++] = col; - problem.A.col_start[++col] = q; - problem.lower[j] = 0.0; + i_t q = problem.A.col_start[problem.num_cols]; + i_t col = problem.num_cols; + for (i_t j = 0; j < problem.num_cols; j++) { + if (problem.lower[j] == -inf && problem.upper[j] == inf) { + for (i_t p = problem.A.col_start[j]; p < problem.A.col_start[j + 1]; p++) { + i_t i = problem.A.i[p]; + f_t aij = problem.A.x[p]; + problem.A.i[q] = i; + problem.A.x[q] = -aij; + q++; } + problem.lower[col] = 0.0; + problem.upper[col] = inf; + problem.objective[col] = -problem.objective[j]; + presolve_info.free_variable_pairs[pair_count++] = j; + presolve_info.free_variable_pairs[pair_count++] = col; + problem.A.col_start[++col] = q; + problem.lower[j] = 0.0; + } } assert(problem.A.p[num_cols] == nnz); - problem.A.n = num_cols; + problem.A.n = num_cols; problem.num_cols = num_cols; } @@ -857,12 +846,12 @@ i_t presolve(const lp_problem_t& original, } // Check for dependent rows - bool check_dependent_rows = false; //settings.barrier; + bool check_dependent_rows = false; // settings.barrier; if (check_dependent_rows) { std::vector dependent_rows; constexpr i_t kOk = -1; i_t infeasible; - f_t dependent_row_start = tic(); + f_t dependent_row_start = tic(); const i_t independent_rows = find_dependent_rows(problem, settings, dependent_rows, infeasible); if (infeasible != kOk) { settings.log.printf("Found problem infeasible in presolve\n"); diff --git a/cpp/src/dual_simplex/simplex_solver_settings.hpp b/cpp/src/dual_simplex/simplex_solver_settings.hpp index 7958269b0b..2eec975958 100644 --- a/cpp/src/dual_simplex/simplex_solver_settings.hpp +++ b/cpp/src/dual_simplex/simplex_solver_settings.hpp @@ -102,11 +102,11 @@ struct simplex_solver_settings_t { bool relaxation; // true to only solve the LP relaxation of a MIP bool use_left_looking_lu; // true to use left looking LU factorization, false to use right looking - bool eliminate_singletons; // true to eliminate singletons from the basis - bool print_presolve_stats; // true to print presolve stats - bool barrier_presolve; // true to use barrier presolve - bool use_cudss; // true to use cuDSS for sparse Cholesky factorization, false to use cholmod - bool barrier; // true to use barrier method, false to use dual simplex method + bool eliminate_singletons; // true to eliminate singletons from the basis + bool print_presolve_stats; // true to print presolve stats + bool barrier_presolve; // true to use barrier presolve + bool use_cudss; // true to use cuDSS for sparse Cholesky factorization, false to use cholmod + bool barrier; // true to use barrier method, false to use dual simplex method i_t refactor_frequency; // number of basis updates before refactorization i_t iteration_log_frequency; // number of iterations between log updates i_t first_iteration_log; // number of iterations to log at beginning of solve diff --git a/cpp/src/dual_simplex/solve.cpp b/cpp/src/dual_simplex/solve.cpp index 76cd6d378c..f60a757015 100644 --- a/cpp/src/dual_simplex/solve.cpp +++ b/cpp/src/dual_simplex/solve.cpp @@ -273,7 +273,7 @@ lp_status_t solve_linear_program(const user_problem_t& user_problem, const simplex_solver_settings_t& settings, lp_solution_t& solution) { - f_t start_time = tic(); + f_t start_time = tic(); lp_problem_t original_lp(1, 1, 1); std::vector new_slacks; convert_user_problem(user_problem, settings, original_lp, new_slacks); @@ -374,10 +374,10 @@ template lp_status_t solve_linear_program_advanced( std::vector& vstatus, std::vector& edge_norms); - -template lp_status_t solve_linear_program_with_barrier(const user_problem_t& user_problem, - const simplex_solver_settings_t& settings, - lp_solution_t& solution); +template lp_status_t solve_linear_program_with_barrier( + const user_problem_t& user_problem, + const simplex_solver_settings_t& settings, + lp_solution_t& solution); template lp_status_t solve_linear_program(const user_problem_t& user_problem, const simplex_solver_settings_t& settings, diff --git a/cpp/src/dual_simplex/sparse_cholesky.hpp b/cpp/src/dual_simplex/sparse_cholesky.hpp index 73e50658d5..ead03cf721 100644 --- a/cpp/src/dual_simplex/sparse_cholesky.hpp +++ b/cpp/src/dual_simplex/sparse_cholesky.hpp @@ -17,60 +17,60 @@ #pragma once #include "dual_simplex/dense_vector.hpp" -#include "dual_simplex/sparse_matrix.hpp" #include "dual_simplex/simplex_solver_settings.hpp" +#include "dual_simplex/sparse_matrix.hpp" #include "dual_simplex/tic_toc.hpp" - #include #include "cudss.h" namespace cuopt::linear_programming::dual_simplex { - template class sparse_cholesky_base_t { public: - virtual ~sparse_cholesky_base_t() = default; - virtual i_t analyze(const csc_matrix_t& A_in) = 0; - virtual i_t factorize(const csc_matrix_t& A_in) = 0; - virtual i_t solve(const dense_vector_t& b, dense_vector_t& x) = 0; - virtual void set_positive_definite(bool positive_definite) = 0; + virtual ~sparse_cholesky_base_t() = default; + virtual i_t analyze(const csc_matrix_t& A_in) = 0; + virtual i_t factorize(const csc_matrix_t& A_in) = 0; + virtual i_t solve(const dense_vector_t& b, dense_vector_t& x) = 0; + virtual void set_positive_definite(bool positive_definite) = 0; }; +#define CUDSS_EXAMPLE_FREE \ + do { \ + } while (0) -#define CUDSS_EXAMPLE_FREE do {} while(0) - -#define CUDA_CALL_AND_CHECK(call, msg) \ - do { \ - cuda_error = call; \ - if (cuda_error != cudaSuccess) { \ - printf("FAILED: CUDA API returned error = %d, details: " #msg "\n", cuda_error); \ - CUDSS_EXAMPLE_FREE; \ - return -1; \ - } \ - } while(0); +#define CUDA_CALL_AND_CHECK(call, msg) \ + do { \ + cuda_error = call; \ + if (cuda_error != cudaSuccess) { \ + printf("FAILED: CUDA API returned error = %d, details: " #msg "\n", cuda_error); \ + CUDSS_EXAMPLE_FREE; \ + return -1; \ + } \ + } while (0); -#define CUDA_CALL_AND_CHECK_EXIT(call, msg) \ +#define CUDA_CALL_AND_CHECK_EXIT(call, msg) \ do { \ cuda_error = call; \ if (cuda_error != cudaSuccess) { \ printf("FAILED: CUDA API returned error = %d, details: " #msg "\n", cuda_error); \ CUDSS_EXAMPLE_FREE; \ - exit(-1); \ + exit(-1); \ } \ } while (0); -#define CUDSS_CALL_AND_CHECK(call, status, msg) \ - do { \ - status = call; \ - if (status != CUDSS_STATUS_SUCCESS) { \ - printf("FAILED: CUDSS call ended unsuccessfully with status = %d, details: " #msg "\n", status); \ - CUDSS_EXAMPLE_FREE; \ - return -2; \ - } \ - } while(0); +#define CUDSS_CALL_AND_CHECK(call, status, msg) \ + do { \ + status = call; \ + if (status != CUDSS_STATUS_SUCCESS) { \ + printf("FAILED: CUDSS call ended unsuccessfully with status = %d, details: " #msg "\n", \ + status); \ + CUDSS_EXAMPLE_FREE; \ + return -2; \ + } \ + } while (0); #define CUDSS_CALL_AND_CHECK_EXIT(call, status, msg) \ do { \ @@ -79,33 +79,37 @@ class sparse_cholesky_base_t { printf("FAILED: CUDSS call ended unsuccessfully with status = %d, details: " #msg "\n", \ status); \ CUDSS_EXAMPLE_FREE; \ - exit(-2); \ + exit(-2); \ } \ } while (0); template class sparse_cholesky_cudss_t : public sparse_cholesky_base_t { public: - sparse_cholesky_cudss_t(const simplex_solver_settings_t& settings, i_t size) : n(size), nnz(-1), first_factor(true), positive_definite(true) { - - int major, minor, patch; - cudssGetProperty(MAJOR_VERSION, &major); - cudssGetProperty(MINOR_VERSION, &minor); - cudssGetProperty(PATCH_LEVEL, &patch); - settings.log.printf("CUDSS Version %d.%d.%d\n", major, minor, patch); - - cuda_error = cudaSuccess; - status = CUDSS_STATUS_SUCCESS; - CUDA_CALL_AND_CHECK_EXIT(cudaStreamCreate(&stream), "cudaStreamCreate"); - CUDSS_CALL_AND_CHECK_EXIT(cudssCreate(&handle), status, "cudssCreate"); - CUDSS_CALL_AND_CHECK_EXIT(cudssConfigCreate(&solverConfig), status, "cudssConfigCreate"); - CUDSS_CALL_AND_CHECK_EXIT(cudssDataCreate(handle, &solverData), status, "cudssDataCreate"); + sparse_cholesky_cudss_t(const simplex_solver_settings_t& settings, i_t size) + : n(size), nnz(-1), first_factor(true), positive_definite(true) + { + int major, minor, patch; + cudssGetProperty(MAJOR_VERSION, &major); + cudssGetProperty(MINOR_VERSION, &minor); + cudssGetProperty(PATCH_LEVEL, &patch); + settings.log.printf("CUDSS Version %d.%d.%d\n", major, minor, patch); + + cuda_error = cudaSuccess; + status = CUDSS_STATUS_SUCCESS; + CUDA_CALL_AND_CHECK_EXIT(cudaStreamCreate(&stream), "cudaStreamCreate"); + CUDSS_CALL_AND_CHECK_EXIT(cudssCreate(&handle), status, "cudssCreate"); + CUDSS_CALL_AND_CHECK_EXIT(cudssConfigCreate(&solverConfig), status, "cudssConfigCreate"); + CUDSS_CALL_AND_CHECK_EXIT(cudssDataCreate(handle, &solverData), status, "cudssDataCreate"); #ifdef USE_AMD - // Tell cuDSS to use AMD - cudssAlgType_t reorder_alg = CUDSS_ALG_3; - CUDSS_CALL_AND_CHECK_EXIT(cudssConfigSet(solverConfig, CUDSS_CONFIG_REORDERING_ALG, - &reorder_alg, sizeof(cudssAlgType_t)), status, "cudssConfigSet for reordering alg"); + // Tell cuDSS to use AMD + cudssAlgType_t reorder_alg = CUDSS_ALG_3; + CUDSS_CALL_AND_CHECK_EXIT( + cudssConfigSet( + solverConfig, CUDSS_CONFIG_REORDERING_ALG, &reorder_alg, sizeof(cudssAlgType_t)), + status, + "cudssConfigSet for reordering alg"); #endif #if 0 @@ -114,222 +118,234 @@ class sparse_cholesky_cudss_t : public sparse_cholesky_base_t { &ir_n_steps, sizeof(int32_t)), status, "cudssConfigSet for ir n steps"); #endif - // Device pointers - csr_offset_d = nullptr; - csr_columns_d = nullptr; - csr_values_d = nullptr; - x_values_d = nullptr; - b_values_d = nullptr; - CUDA_CALL_AND_CHECK_EXIT(cudaMalloc(&x_values_d, n * sizeof(f_t)), "cudaMalloc for x_values"); - CUDA_CALL_AND_CHECK_EXIT(cudaMalloc(&b_values_d, n * sizeof(f_t)), "cudaMalloc for b_values"); - - i_t ldb = n; - i_t ldx = n; - CUDSS_CALL_AND_CHECK_EXIT( - cudssMatrixCreateDn(&cudss_b, n, 1, ldb, b_values_d, CUDA_R_64F, CUDSS_LAYOUT_COL_MAJOR), - status, - "cudssMatrixCreateDn for b"); - CUDSS_CALL_AND_CHECK_EXIT( - cudssMatrixCreateDn(&cudss_x, n, 1, ldx, x_values_d, CUDA_R_64F, CUDSS_LAYOUT_COL_MAJOR), - status, - "cudssMatrixCreateDn for x"); + // Device pointers + csr_offset_d = nullptr; + csr_columns_d = nullptr; + csr_values_d = nullptr; + x_values_d = nullptr; + b_values_d = nullptr; + CUDA_CALL_AND_CHECK_EXIT(cudaMalloc(&x_values_d, n * sizeof(f_t)), "cudaMalloc for x_values"); + CUDA_CALL_AND_CHECK_EXIT(cudaMalloc(&b_values_d, n * sizeof(f_t)), "cudaMalloc for b_values"); + + i_t ldb = n; + i_t ldx = n; + CUDSS_CALL_AND_CHECK_EXIT( + cudssMatrixCreateDn(&cudss_b, n, 1, ldb, b_values_d, CUDA_R_64F, CUDSS_LAYOUT_COL_MAJOR), + status, + "cudssMatrixCreateDn for b"); + CUDSS_CALL_AND_CHECK_EXIT( + cudssMatrixCreateDn(&cudss_x, n, 1, ldx, x_values_d, CUDA_R_64F, CUDSS_LAYOUT_COL_MAJOR), + status, + "cudssMatrixCreateDn for x"); + } + ~sparse_cholesky_cudss_t() override + { + cudaFree(csr_values_d); + cudaFree(csr_columns_d); + cudaFree(csr_offset_d); + + cudaFree(x_values_d); + cudaFree(b_values_d); + + CUDSS_CALL_AND_CHECK_EXIT(cudssMatrixDestroy(A), status, "cudssMatrixDestroy for A"); + + CUDSS_CALL_AND_CHECK_EXIT( + cudssMatrixDestroy(cudss_x), status, "cudssMatrixDestroy for cudss_x"); + CUDSS_CALL_AND_CHECK_EXIT( + cudssMatrixDestroy(cudss_b), status, "cudssMatrixDestroy for cudss_b"); + CUDSS_CALL_AND_CHECK_EXIT(cudssDataDestroy(handle, solverData), status, "cudssDataDestroy"); + CUDSS_CALL_AND_CHECK_EXIT(cudssConfigDestroy(solverConfig), status, "cudssConfigDestroy"); + CUDSS_CALL_AND_CHECK_EXIT(cudssDestroy(handle), status, "cudssDestroy"); + CUDA_CALL_AND_CHECK_EXIT(cudaStreamSynchronize(stream), "cudaStreamSynchronize"); + CUDA_CALL_AND_CHECK_EXIT(cudaStreamDestroy(stream), "cudaStreamDestroy"); + } + i_t analyze(const csc_matrix_t& A_in) override + { + csr_matrix_t Arow; + A_in.to_compressed_row(Arow); + + nnz = A_in.col_start[A_in.n]; + + CUDA_CALL_AND_CHECK(cudaMalloc(&csr_offset_d, (n + 1) * sizeof(i_t)), + "cudaMalloc for csr_offset"); + CUDA_CALL_AND_CHECK(cudaMalloc(&csr_columns_d, nnz * sizeof(i_t)), + "cudaMalloc for csr_columns"); + CUDA_CALL_AND_CHECK(cudaMalloc(&csr_values_d, nnz * sizeof(f_t)), "cudaMalloc for csr_values"); + + CUDA_CALL_AND_CHECK( + cudaMemcpy( + csr_offset_d, Arow.row_start.data(), (n + 1) * sizeof(i_t), cudaMemcpyHostToDevice), + "cudaMemcpy for csr_offset"); + CUDA_CALL_AND_CHECK( + cudaMemcpy(csr_columns_d, Arow.j.data(), nnz * sizeof(i_t), cudaMemcpyHostToDevice), + "cudaMemcpy for csr_columns"); + CUDA_CALL_AND_CHECK( + cudaMemcpy(csr_values_d, Arow.x.data(), nnz * sizeof(f_t), cudaMemcpyHostToDevice), + "cudaMemcpy for csr_values"); + + if (!first_factor) { + CUDSS_CALL_AND_CHECK(cudssMatrixDestroy(A), status, "cudssMatrixDestroy for A"); } - ~sparse_cholesky_cudss_t() override { - cudaFree(csr_values_d); - cudaFree(csr_columns_d); - cudaFree(csr_offset_d); - - cudaFree(x_values_d); - cudaFree(b_values_d); - CUDSS_CALL_AND_CHECK_EXIT(cudssMatrixDestroy(A), status, "cudssMatrixDestroy for A"); + CUDSS_CALL_AND_CHECK( + cudssMatrixCreateCsr(&A, + n, + n, + nnz, + csr_offset_d, + nullptr, + csr_columns_d, + csr_values_d, + CUDA_R_32I, + CUDA_R_64F, + positive_definite ? CUDSS_MTYPE_SPD : CUDSS_MTYPE_SYMMETRIC, + CUDSS_MVIEW_FULL, + CUDSS_BASE_ZERO), + status, + "cudssMatrixCreateCsr"); + + // Perform symbolic analysis + f_t start_symbolic = tic(); + + CUDSS_CALL_AND_CHECK( + cudssExecute(handle, CUDSS_PHASE_ANALYSIS, solverConfig, solverData, A, cudss_x, cudss_b), + status, + "cudssExecute for analysis"); + + f_t symbolic_time = toc(start_symbolic); + printf("Symbolic time %.2fs\n", symbolic_time); + int64_t lu_nz = 0; + size_t size_written = 0; + CUDSS_CALL_AND_CHECK( + cudssDataGet(handle, solverData, CUDSS_DATA_LU_NNZ, &lu_nz, sizeof(int64_t), &size_written), + status, + "cudssDataGet for LU_NNZ"); + printf("Symbolic nonzeros in factor %e\n", static_cast(lu_nz) / 2.0); + // TODO: Is there any way to get nonzeros in the factors? + // TODO: Is there any way to get flops for the factorization? + + return 0; + } + i_t factorize(const csc_matrix_t& A_in) override + { + csr_matrix_t Arow; + A_in.to_compressed_row(Arow); + + if (nnz != A_in.col_start[A_in.n]) { + printf("Error: nnz %d != A_in.col_start[A_in.n] %d\n", nnz, A_in.col_start[A_in.n]); + exit(1); + } + CUDA_CALL_AND_CHECK( + cudaMemcpy(csr_values_d, Arow.x.data(), nnz * sizeof(f_t), cudaMemcpyHostToDevice), + "cudaMemcpy for csr_values"); + + CUDSS_CALL_AND_CHECK( + cudssMatrixSetValues(A, csr_values_d), status, "cudssMatrixSetValues for A"); + + f_t start_numeric = tic(); + CUDSS_CALL_AND_CHECK( + cudssExecute( + handle, CUDSS_PHASE_FACTORIZATION, solverConfig, solverData, A, cudss_x, cudss_b), + status, + "cudssExecute for factorization"); + + f_t numeric_time = toc(start_numeric); + + int info; + size_t sizeWritten = 0; + CUDSS_CALL_AND_CHECK( + cudssDataGet(handle, solverData, CUDSS_DATA_INFO, &info, sizeof(info), &sizeWritten), + status, + "cudssDataGet for info"); + if (info != 0) { + printf("Factorization failed info %d\n", info); + return -1; + } - CUDSS_CALL_AND_CHECK_EXIT(cudssMatrixDestroy(cudss_x), status, "cudssMatrixDestroy for cudss_x"); - CUDSS_CALL_AND_CHECK_EXIT(cudssMatrixDestroy(cudss_b), status, "cudssMatrixDestroy for cudss_b"); - CUDSS_CALL_AND_CHECK_EXIT(cudssDataDestroy(handle, solverData), status, "cudssDataDestroy"); - CUDSS_CALL_AND_CHECK_EXIT(cudssConfigDestroy(solverConfig), status, "cudssConfigDestroy"); - CUDSS_CALL_AND_CHECK_EXIT(cudssDestroy(handle), status, "cudssDestroy"); - CUDA_CALL_AND_CHECK_EXIT(cudaStreamSynchronize(stream), "cudaStreamSynchronize"); - CUDA_CALL_AND_CHECK_EXIT(cudaStreamDestroy(stream), "cudaStreamDestroy"); + if (first_factor) { + printf("Factor time %.2fs\n", numeric_time); + first_factor = false; } - i_t analyze(const csc_matrix_t& A_in) override - { - csr_matrix_t Arow; - A_in.to_compressed_row(Arow); - - nnz = A_in.col_start[A_in.n]; - - CUDA_CALL_AND_CHECK(cudaMalloc(&csr_offset_d, (n + 1) * sizeof(i_t)), "cudaMalloc for csr_offset"); - CUDA_CALL_AND_CHECK(cudaMalloc(&csr_columns_d, nnz * sizeof(i_t)), "cudaMalloc for csr_columns"); - CUDA_CALL_AND_CHECK(cudaMalloc(&csr_values_d, nnz * sizeof(f_t)), "cudaMalloc for csr_values"); - - CUDA_CALL_AND_CHECK(cudaMemcpy(csr_offset_d, Arow.row_start.data(), (n + 1) * sizeof(i_t), cudaMemcpyHostToDevice), "cudaMemcpy for csr_offset"); - CUDA_CALL_AND_CHECK(cudaMemcpy(csr_columns_d, Arow.j.data(), nnz * sizeof(i_t), cudaMemcpyHostToDevice), "cudaMemcpy for csr_columns"); - CUDA_CALL_AND_CHECK(cudaMemcpy(csr_values_d, Arow.x.data(), nnz * sizeof(f_t), cudaMemcpyHostToDevice), "cudaMemcpy for csr_values"); - - if (!first_factor) { - CUDSS_CALL_AND_CHECK(cudssMatrixDestroy(A), status, "cudssMatrixDestroy for A"); - } - - CUDSS_CALL_AND_CHECK(cudssMatrixCreateCsr(&A, - n, - n, - nnz, - csr_offset_d, - nullptr, - csr_columns_d, - csr_values_d, - CUDA_R_32I, - CUDA_R_64F, - positive_definite ? CUDSS_MTYPE_SPD : CUDSS_MTYPE_SYMMETRIC, - CUDSS_MVIEW_FULL, - CUDSS_BASE_ZERO), - status, - "cudssMatrixCreateCsr"); - - // Perform symbolic analysis - f_t start_symbolic = tic(); - - CUDSS_CALL_AND_CHECK( - cudssExecute(handle, CUDSS_PHASE_ANALYSIS, solverConfig, solverData, A, cudss_x, cudss_b), - status, - "cudssExecute for analysis"); - - f_t symbolic_time = toc(start_symbolic); - printf("Symbolic time %.2fs\n", symbolic_time); - int64_t lu_nz = 0; - size_t size_written = 0; - CUDSS_CALL_AND_CHECK( - cudssDataGet( - handle, solverData, CUDSS_DATA_LU_NNZ, &lu_nz, sizeof(int64_t), &size_written), - status, - "cudssDataGet for LU_NNZ"); - printf("Symbolic nonzeros in factor %e\n", static_cast(lu_nz) / 2.0); - // TODO: Is there any way to get nonzeros in the factors? - // TODO: Is there any way to get flops for the factorization? - - - return 0; + if (status != CUDSS_STATUS_SUCCESS) { + printf("cuDSS Factorization failed\n"); + return -1; } - i_t factorize(const csc_matrix_t& A_in) override - { - - csr_matrix_t Arow; - A_in.to_compressed_row(Arow); - - if (nnz != A_in.col_start[A_in.n]) { - printf("Error: nnz %d != A_in.col_start[A_in.n] %d\n", nnz, A_in.col_start[A_in.n]); - exit(1); - } - - CUDA_CALL_AND_CHECK(cudaMemcpy(csr_values_d, Arow.x.data(), nnz * sizeof(f_t), cudaMemcpyHostToDevice), "cudaMemcpy for csr_values"); - - CUDSS_CALL_AND_CHECK(cudssMatrixSetValues(A, csr_values_d), status, "cudssMatrixSetValues for A"); - - f_t start_numeric = tic(); - CUDSS_CALL_AND_CHECK( - cudssExecute( - handle, CUDSS_PHASE_FACTORIZATION, solverConfig, solverData, A, cudss_x, cudss_b), - status, - "cudssExecute for factorization"); - - f_t numeric_time = toc(start_numeric); - - int info; - size_t sizeWritten = 0; - CUDSS_CALL_AND_CHECK(cudssDataGet(handle, solverData, CUDSS_DATA_INFO, &info, - sizeof(info), &sizeWritten), status, "cudssDataGet for info"); - if (info != 0) { - printf("Factorization failed info %d\n", info); - return -1; - } - - if (first_factor) { - printf("Factor time %.2fs\n", numeric_time); - first_factor = false; - } - if (status != CUDSS_STATUS_SUCCESS) { - printf("cuDSS Factorization failed\n"); - return -1; - } - return 0; + return 0; + } + + i_t solve(const dense_vector_t& b, dense_vector_t& x) override + { + if (b.size() != n) { + printf("Error: b.size() %d != n %d\n", b.size(), n); + exit(1); } - - i_t solve(const dense_vector_t& b, dense_vector_t& x) override - { - if (b.size() != n) { - printf("Error: b.size() %d != n %d\n", b.size(), n); - exit(1); - } - if (x.size() != n) { - printf("Error: x.size() %d != n %d\n", x.size(), n); - exit(1); - } - CUDA_CALL_AND_CHECK(cudaMemcpy(b_values_d, b.data(), n * sizeof(f_t), cudaMemcpyHostToDevice), "cudaMemcpy for b_values"); - CUDA_CALL_AND_CHECK(cudaMemcpy(x_values_d, x.data(), n * sizeof(f_t), cudaMemcpyHostToDevice), "cudaMemcpy for x_values"); - - CUDSS_CALL_AND_CHECK(cudssMatrixSetValues(cudss_b, b_values_d), status, "cudssMatrixSetValues for b"); - CUDSS_CALL_AND_CHECK(cudssMatrixSetValues(cudss_x, x_values_d), status, "cudssMatrixSetValues for x"); - - - i_t ldb = n; - i_t ldx = n; - CUDSS_CALL_AND_CHECK_EXIT( - cudssMatrixCreateDn(&cudss_b, n, 1, ldb, b_values_d, CUDA_R_64F, CUDSS_LAYOUT_COL_MAJOR), - status, - "cudssMatrixCreateDn for b"); - CUDSS_CALL_AND_CHECK_EXIT( - cudssMatrixCreateDn(&cudss_x, n, 1, ldx, x_values_d, CUDA_R_64F, CUDSS_LAYOUT_COL_MAJOR), - status, - "cudssMatrixCreateDn for x"); - - CUDSS_CALL_AND_CHECK( - cudssExecute( - handle, CUDSS_PHASE_SOLVE, solverConfig, solverData, A, cudss_x, cudss_b), - status, - "cudssExecute for solve"); - - CUDA_CALL_AND_CHECK(cudaStreamSynchronize(stream), "cudaStreamSynchronize"); - - CUDA_CALL_AND_CHECK(cudaMemcpy(x.data(), x_values_d, n * sizeof(f_t), cudaMemcpyDeviceToHost), "cudaMemcpy for x"); - - for (i_t i = 0; i < n; i++) { - if (x[i] != x[i]) { - return -1; - } - } - - return 0; + if (x.size() != n) { + printf("Error: x.size() %d != n %d\n", x.size(), n); + exit(1); } - - void set_positive_definite(bool positive_definite) override - { - this->positive_definite = positive_definite; + CUDA_CALL_AND_CHECK(cudaMemcpy(b_values_d, b.data(), n * sizeof(f_t), cudaMemcpyHostToDevice), + "cudaMemcpy for b_values"); + CUDA_CALL_AND_CHECK(cudaMemcpy(x_values_d, x.data(), n * sizeof(f_t), cudaMemcpyHostToDevice), + "cudaMemcpy for x_values"); + + CUDSS_CALL_AND_CHECK( + cudssMatrixSetValues(cudss_b, b_values_d), status, "cudssMatrixSetValues for b"); + CUDSS_CALL_AND_CHECK( + cudssMatrixSetValues(cudss_x, x_values_d), status, "cudssMatrixSetValues for x"); + + i_t ldb = n; + i_t ldx = n; + CUDSS_CALL_AND_CHECK_EXIT( + cudssMatrixCreateDn(&cudss_b, n, 1, ldb, b_values_d, CUDA_R_64F, CUDSS_LAYOUT_COL_MAJOR), + status, + "cudssMatrixCreateDn for b"); + CUDSS_CALL_AND_CHECK_EXIT( + cudssMatrixCreateDn(&cudss_x, n, 1, ldx, x_values_d, CUDA_R_64F, CUDSS_LAYOUT_COL_MAJOR), + status, + "cudssMatrixCreateDn for x"); + + CUDSS_CALL_AND_CHECK( + cudssExecute(handle, CUDSS_PHASE_SOLVE, solverConfig, solverData, A, cudss_x, cudss_b), + status, + "cudssExecute for solve"); + + CUDA_CALL_AND_CHECK(cudaStreamSynchronize(stream), "cudaStreamSynchronize"); + + CUDA_CALL_AND_CHECK(cudaMemcpy(x.data(), x_values_d, n * sizeof(f_t), cudaMemcpyDeviceToHost), + "cudaMemcpy for x"); + + for (i_t i = 0; i < n; i++) { + if (x[i] != x[i]) { return -1; } } - private: - - - i_t n; - i_t nnz; - bool first_factor; - bool positive_definite; - cudaError_t cuda_error; - cudssStatus_t status; - cudaStream_t stream; - cudssHandle_t handle; - cudssConfig_t solverConfig; - cudssData_t solverData; - cudssMatrix_t A; - cudssMatrix_t cudss_x; - cudssMatrix_t cudss_b; - i_t *csr_offset_d; - i_t *csr_columns_d; - f_t *csr_values_d; - f_t *x_values_d; - f_t *b_values_d; + return 0; + } + + void set_positive_definite(bool positive_definite) override + { + this->positive_definite = positive_definite; + } + + private: + i_t n; + i_t nnz; + bool first_factor; + bool positive_definite; + cudaError_t cuda_error; + cudssStatus_t status; + cudaStream_t stream; + cudssHandle_t handle; + cudssConfig_t solverConfig; + cudssData_t solverData; + cudssMatrix_t A; + cudssMatrix_t cudss_x; + cudssMatrix_t cudss_b; + i_t* csr_offset_d; + i_t* csr_columns_d; + f_t* csr_values_d; + f_t* x_values_d; + f_t* b_values_d; }; - -} // namespace cuopt::linear_programming::dual_simplex +} // namespace cuopt::linear_programming::dual_simplex diff --git a/cpp/src/linear_programming/solve.cu b/cpp/src/linear_programming/solve.cu index 923d170a5b..f0a49f3dd1 100644 --- a/cpp/src/linear_programming/solve.cu +++ b/cpp/src/linear_programming/solve.cu @@ -314,8 +314,8 @@ run_barrier(dual_simplex::user_problem_t& user_problem, } dual_simplex::lp_solution_t solution(user_problem.num_rows, user_problem.num_cols); - auto status = - dual_simplex::solve_linear_program_with_barrier(user_problem, dual_simplex_settings, solution); + auto status = dual_simplex::solve_linear_program_with_barrier( + user_problem, dual_simplex_settings, solution); auto end = std::chrono::high_resolution_clock::now(); auto duration = std::chrono::duration_cast(end - start_solver); @@ -348,7 +348,6 @@ optimization_problem_solution_t run_barrier( std::get<4>(sol_dual_simplex)); } - template std::tuple, dual_simplex::lp_status_t, f_t, f_t, f_t> run_dual_simplex(dual_simplex::user_problem_t& user_problem, From fe25ee8938e457576acac3d88b35abdfd7faf79e Mon Sep 17 00:00:00 2001 From: Ramakrishna Prabhu Date: Wed, 13 Aug 2025 11:02:52 -0500 Subject: [PATCH 15/31] update installation --- ci/build_wheel_cuopt.sh | 3 +++ ci/build_wheel_libcuopt.sh | 2 ++ 2 files changed, 5 insertions(+) diff --git a/ci/build_wheel_cuopt.sh b/ci/build_wheel_cuopt.sh index cd49b6ffb4..0a9a8edffd 100755 --- a/ci/build_wheel_cuopt.sh +++ b/ci/build_wheel_cuopt.sh @@ -20,6 +20,9 @@ set -euo pipefail source rapids-init-pip +# Install cudss +bash ci/utils/install_cudss.sh + package_dir="python/cuopt" export SKBUILD_CMAKE_ARGS="-DCUOPT_BUILD_WHEELS=ON;-DDISABLE_DEPRECATION_WARNINGS=ON"; diff --git a/ci/build_wheel_libcuopt.sh b/ci/build_wheel_libcuopt.sh index e1c920968d..9a40c7f17e 100755 --- a/ci/build_wheel_libcuopt.sh +++ b/ci/build_wheel_libcuopt.sh @@ -32,6 +32,7 @@ else fi # Install cudss +bash ci/utils/install_cudss.sh # Clean metadata & install cudss if command -v dnf &> /dev/null; then @@ -76,6 +77,7 @@ EXCLUDE_ARGS=( --exclude "libraft.so" --exclude "libcublas.so.*" --exclude "libcublasLt.so.*" + --exclude "libcudss*" --exclude "libcurand.so.*" --exclude "libcusolver.so.*" --exclude "libcusparse.so.*" From dd699a764baca3b64307989a06a855d7e975e24a Mon Sep 17 00:00:00 2001 From: Ramakrishna Prabhu Date: Wed, 13 Aug 2025 11:03:33 -0500 Subject: [PATCH 16/31] add install cudss script --- ci/utils/install_cudss.sh | 42 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100755 ci/utils/install_cudss.sh diff --git a/ci/utils/install_cudss.sh b/ci/utils/install_cudss.sh new file mode 100755 index 0000000000..2ac4a1b20e --- /dev/null +++ b/ci/utils/install_cudss.sh @@ -0,0 +1,42 @@ +#!/bin/bash + +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -euo pipefail + +ARCH=$(uname -m) + +# Clean metadata & install cudss +if command -v dnf &> /dev/null; then + dnf clean all + # Adding static library just to please CMAKE requirements + dnf -y install libcudss0-static-cuda-12 libcudss0-devel-cuda-12 libcudss0-cuda-12 +elif command -v apt-get &> /dev/null; then + apt-get update + apt-get install -y libcudss-devel +else + echo "Neither dnf nor apt-get found. Cannot install cudss dependencies." + exit 1 +fi + + + + + + + + + From bd62c3b925a886761ab710adc780f6ec7775bb3a Mon Sep 17 00:00:00 2001 From: Christopher Maes Date: Wed, 13 Aug 2025 09:53:52 -0700 Subject: [PATCH 17/31] Use settings.log to capture output to logfile --- cpp/src/dual_simplex/sparse_cholesky.hpp | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/cpp/src/dual_simplex/sparse_cholesky.hpp b/cpp/src/dual_simplex/sparse_cholesky.hpp index ead03cf721..83548df0ad 100644 --- a/cpp/src/dual_simplex/sparse_cholesky.hpp +++ b/cpp/src/dual_simplex/sparse_cholesky.hpp @@ -87,7 +87,7 @@ template class sparse_cholesky_cudss_t : public sparse_cholesky_base_t { public: sparse_cholesky_cudss_t(const simplex_solver_settings_t& settings, i_t size) - : n(size), nnz(-1), first_factor(true), positive_definite(true) + : n(size), nnz(-1), first_factor(true), positive_definite(true), settings_(settings) { int major, minor, patch; cudssGetProperty(MAJOR_VERSION, &major); @@ -213,14 +213,14 @@ class sparse_cholesky_cudss_t : public sparse_cholesky_base_t { "cudssExecute for analysis"); f_t symbolic_time = toc(start_symbolic); - printf("Symbolic time %.2fs\n", symbolic_time); + settings_.log.printf("Symbolic time %.2fs\n", symbolic_time); int64_t lu_nz = 0; size_t size_written = 0; CUDSS_CALL_AND_CHECK( cudssDataGet(handle, solverData, CUDSS_DATA_LU_NNZ, &lu_nz, sizeof(int64_t), &size_written), status, "cudssDataGet for LU_NNZ"); - printf("Symbolic nonzeros in factor %e\n", static_cast(lu_nz) / 2.0); + settings_.log.printf("Symbolic nonzeros in factor %e\n", static_cast(lu_nz) / 2.0); // TODO: Is there any way to get nonzeros in the factors? // TODO: Is there any way to get flops for the factorization? @@ -232,7 +232,7 @@ class sparse_cholesky_cudss_t : public sparse_cholesky_base_t { A_in.to_compressed_row(Arow); if (nnz != A_in.col_start[A_in.n]) { - printf("Error: nnz %d != A_in.col_start[A_in.n] %d\n", nnz, A_in.col_start[A_in.n]); + settings_.log.printf("Error: nnz %d != A_in.col_start[A_in.n] %d\n", nnz, A_in.col_start[A_in.n]); exit(1); } @@ -259,16 +259,16 @@ class sparse_cholesky_cudss_t : public sparse_cholesky_base_t { status, "cudssDataGet for info"); if (info != 0) { - printf("Factorization failed info %d\n", info); + settings_.log.printf("Factorization failed info %d\n", info); return -1; } if (first_factor) { - printf("Factor time %.2fs\n", numeric_time); + settings_.log.printf("Factor time %.2fs\n", numeric_time); first_factor = false; } if (status != CUDSS_STATUS_SUCCESS) { - printf("cuDSS Factorization failed\n"); + settings_.log.printf("cuDSS Factorization failed\n"); return -1; } return 0; @@ -277,11 +277,11 @@ class sparse_cholesky_cudss_t : public sparse_cholesky_base_t { i_t solve(const dense_vector_t& b, dense_vector_t& x) override { if (b.size() != n) { - printf("Error: b.size() %d != n %d\n", b.size(), n); + settings_.log.printf("Error: b.size() %d != n %d\n", b.size(), n); exit(1); } if (x.size() != n) { - printf("Error: x.size() %d != n %d\n", x.size(), n); + settings_.log.printf("Error: x.size() %d != n %d\n", x.size(), n); exit(1); } CUDA_CALL_AND_CHECK(cudaMemcpy(b_values_d, b.data(), n * sizeof(f_t), cudaMemcpyHostToDevice), @@ -346,6 +346,8 @@ class sparse_cholesky_cudss_t : public sparse_cholesky_base_t { f_t* csr_values_d; f_t* x_values_d; f_t* b_values_d; + + const simplex_solver_settings_t& settings_; }; } // namespace cuopt::linear_programming::dual_simplex From 6fca5ad2fd7007162a7ffcf012e2b44c3d766a65 Mon Sep 17 00:00:00 2001 From: Ramakrishna Prabhu Date: Wed, 13 Aug 2025 12:08:11 -0500 Subject: [PATCH 18/31] fix style --- ci/utils/install_cudss.sh | 2 -- 1 file changed, 2 deletions(-) diff --git a/ci/utils/install_cudss.sh b/ci/utils/install_cudss.sh index 2ac4a1b20e..5d37396de0 100755 --- a/ci/utils/install_cudss.sh +++ b/ci/utils/install_cudss.sh @@ -17,8 +17,6 @@ set -euo pipefail -ARCH=$(uname -m) - # Clean metadata & install cudss if command -v dnf &> /dev/null; then dnf clean all From b887b9610f4143013a394c7ef93eccb0f7fb5009 Mon Sep 17 00:00:00 2001 From: Christopher Maes Date: Wed, 13 Aug 2025 10:21:33 -0700 Subject: [PATCH 19/31] Hook up status codes --- cpp/src/dual_simplex/barrier.cpp | 31 +++++++++++++++-------------- cpp/src/dual_simplex/barrier.hpp | 3 ++- cpp/src/dual_simplex/solve.cpp | 10 ++-------- cpp/src/linear_programming/solve.cu | 2 +- 4 files changed, 21 insertions(+), 25 deletions(-) diff --git a/cpp/src/dual_simplex/barrier.cpp b/cpp/src/dual_simplex/barrier.cpp index cb9d6fa7da..3c95667b90 100644 --- a/cpp/src/dual_simplex/barrier.cpp +++ b/cpp/src/dual_simplex/barrier.cpp @@ -836,9 +836,10 @@ i_t barrier_solver_t::compute_search_direction(iteration_data_t -i_t barrier_solver_t::solve(const barrier_solver_settings_t& options) +lp_status_t barrier_solver_t::solve(const barrier_solver_settings_t& options) { float64_t start_time = tic(); + lp_status_t status = lp_status_t::UNSET; i_t n = lp.num_cols; i_t m = lp.num_rows; @@ -858,14 +859,14 @@ i_t barrier_solver_t::solve(const barrier_solver_settings_t& iteration_data_t data(lp, num_upper_bounds, settings); if (toc(start_time) > settings.time_limit) { settings.log.printf("Barrier time limit exceeded\n"); - return -1; + return lp_status_t::TIME_LIMIT; } settings.log.printf("%d finite upper bounds\n", num_upper_bounds); initial_point(data); if (toc(start_time) > settings.time_limit) { settings.log.printf("Barrier time limit exceeded\n"); - return -1; + return lp_status_t::TIME_LIMIT; } compute_residuals(data.w, data.x, data.y, data.v, data.z, data); @@ -925,7 +926,7 @@ i_t barrier_solver_t::solve(const barrier_solver_settings_t& while (iter < iteration_limit) { if (toc(start_time) > settings.time_limit) { settings.log.printf("Barrier time limit exceeded\n"); - return -1; + return lp_status_t::TIME_LIMIT; } // Compute the affine step data.primal_rhs = data.primal_residual; @@ -946,7 +947,7 @@ i_t barrier_solver_t::solve(const barrier_solver_settings_t& relative_dual_residual < 100 * options.optimality_tol && relative_complementarity_residual < 100 * options.complementarity_tol) { settings.log.printf("Suboptimal solution found\n"); - return 1; + return lp_status_t::OPTIMAL; // TODO: Barrier should probably have a separate suboptimal status } compute_residual_norms(w_save, x_save, @@ -977,7 +978,7 @@ i_t barrier_solver_t::solve(const barrier_solver_settings_t& data.v = v_save; data.z = z_save; settings.log.printf("Suboptimal solution found\n"); - return 1; + return lp_status_t::OPTIMAL; // TODO: Barrier should probably have a separate suboptimal status } else { settings.log.printf( "Primal residual %.2e dual residual %.2e complementarity residual %.2e\n", @@ -986,11 +987,11 @@ i_t barrier_solver_t::solve(const barrier_solver_settings_t& relative_complementarity_residual); } settings.log.printf("Search direction computation failed\n"); - return -1; + return lp_status_t::NUMERICAL_ISSUES; } if (toc(start_time) > settings.time_limit) { settings.log.printf("Barrier time limit exceeded\n"); - return -1; + return lp_status_t::TIME_LIMIT; } f_t step_primal_aff = std::min(max_step_to_boundary(data.w, data.dw_aff), @@ -1044,7 +1045,7 @@ i_t barrier_solver_t::solve(const barrier_solver_settings_t& relative_dual_residual < 100 * options.optimality_tol && relative_complementarity_residual < 100 * options.complementarity_tol) { settings.log.printf("Suboptimal solution found\n"); - return 1; + return lp_status_t::OPTIMAL; // TODO: Barrier should probably have a separate suboptimal status } compute_residual_norms(w_save, x_save, @@ -1075,7 +1076,7 @@ i_t barrier_solver_t::solve(const barrier_solver_settings_t& data.v = v_save; data.z = z_save; settings.log.printf("Suboptimal solution found\n"); - return 1; + return lp_status_t::OPTIMAL; // TODO: Barrier should probably have a separate suboptimal status } else { settings.log.printf( "Primal residual %.2e dual residual %.2e complementarity residual %.2e\n", @@ -1084,12 +1085,12 @@ i_t barrier_solver_t::solve(const barrier_solver_settings_t& relative_complementarity_residual); } settings.log.printf("Search direction computation failed\n"); - return -1; + return lp_status_t::NUMERICAL_ISSUES; } data.has_factorization = false; if (toc(start_time) > settings.time_limit) { settings.log.printf("Barrier time limit exceeded\n"); - return -1; + return lp_status_t::TIME_LIMIT; } // dw = dw_aff + dw_cc @@ -1168,7 +1169,7 @@ i_t barrier_solver_t::solve(const barrier_solver_settings_t& if (primal_objective != primal_objective || dual_objective != dual_objective) { settings.log.printf("Numerical error in objective\n"); - return -1; + return lp_status_t::NUMERICAL_ISSUES; } settings.log.printf("%2d %+.12e %+.12e %.2e %.2e %.2e %.2e %.2e %.2e %.2e %.2e %.1f\n", @@ -1205,10 +1206,10 @@ i_t barrier_solver_t::solve(const barrier_solver_settings_t& settings.log.printf("Complementarity gap (abs/rel): %8.2e/%8.2e\n", complementarity_residual_norm, relative_complementarity_residual); - return 0; + return lp_status_t::OPTIMAL; } } - return 1; + return lp_status_t::ITERATION_LIMIT; } #ifdef DUAL_SIMPLEX_INSTANTIATE_DOUBLE diff --git a/cpp/src/dual_simplex/barrier.hpp b/cpp/src/dual_simplex/barrier.hpp index b2ad7d2c21..b422a09c64 100644 --- a/cpp/src/dual_simplex/barrier.hpp +++ b/cpp/src/dual_simplex/barrier.hpp @@ -18,6 +18,7 @@ #include "dual_simplex/dense_vector.hpp" #include "dual_simplex/presolve.hpp" +#include "dual_simplex/solve.hpp" #include "dual_simplex/simplex_solver_settings.hpp" #include "dual_simplex/sparse_cholesky.hpp" #include "dual_simplex/sparse_matrix.hpp" @@ -46,7 +47,7 @@ class barrier_solver_t { : lp(lp), settings(settings), presolve_info(presolve) { } - i_t solve(const barrier_solver_settings_t& options); + lp_status_t solve(const barrier_solver_settings_t& options); private: void initial_point(iteration_data_t& data); diff --git a/cpp/src/dual_simplex/solve.cpp b/cpp/src/dual_simplex/solve.cpp index f60a757015..9078811bf0 100644 --- a/cpp/src/dual_simplex/solve.cpp +++ b/cpp/src/dual_simplex/solve.cpp @@ -243,7 +243,6 @@ lp_status_t solve_linear_program_with_barrier(const user_problem_t& us lp_solution_t& solution) { f_t start_time = tic(); - lp_status_t status = lp_status_t::UNSET; lp_problem_t original_lp(1, 1, 1); std::vector new_slacks; simplex_solver_settings_t barrier_settings = settings; @@ -259,13 +258,8 @@ lp_status_t solve_linear_program_with_barrier(const user_problem_t& us if (ok == -1) { return lp_status_t::INFEASIBLE; } barrier_solver_t barrier_solver(barrier_lp, presolve_info, barrier_settings); barrier_solver_settings_t barrier_solver_settings; - i_t barrier_status = barrier_solver.solve(barrier_solver_settings); - if (barrier_status == 0) { - status = lp_status_t::OPTIMAL; - } else { - status = lp_status_t::NUMERICAL_ISSUES; - } - return status; + lp_status_t barrier_status = barrier_solver.solve(barrier_solver_settings); + return barrier_status; } template diff --git a/cpp/src/linear_programming/solve.cu b/cpp/src/linear_programming/solve.cu index f0a49f3dd1..3766b8ae59 100644 --- a/cpp/src/linear_programming/solve.cu +++ b/cpp/src/linear_programming/solve.cu @@ -285,7 +285,7 @@ optimization_problem_solution_t convert_dual_simplex_sol( if (termination_status != pdlp_termination_status_t::Optimal && termination_status != pdlp_termination_status_t::TimeLimit && termination_status != pdlp_termination_status_t::ConcurrentLimit) { - CUOPT_LOG_INFO("Dual simplex status %s", sol.get_termination_status_string().c_str()); + CUOPT_LOG_INFO("Solve status %s", sol.get_termination_status_string().c_str()); } problem.handle_ptr->sync_stream(); From cfb637b0e4f17b4b90fa3712aecff16eff782182 Mon Sep 17 00:00:00 2001 From: Ramakrishna Prabhu Date: Wed, 13 Aug 2025 15:10:12 -0500 Subject: [PATCH 20/31] fix Cmake --- cpp/CMakeLists.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index f828513770..529cd5cf0c 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -230,6 +230,10 @@ set(CMAKE_LIBRARY_PATH ${CMAKE_CURRENT_BINARY_DIR}/libmps_parser/) find_package(cudss REQUIRED CONFIG) +set_target_properties(cudss PROPERTIES + INTERFACE_LINK_LIBRARIES "${cudss_LIBRARY_DIR}/libcudss.so.0" +) + target_link_libraries(cuopt PUBLIC CUDA::cublas From f75bed20a9115f22207c86ec046e74e2f01dcc81 Mon Sep 17 00:00:00 2001 From: Ramakrishna Prabhu Date: Wed, 13 Aug 2025 15:54:50 -0500 Subject: [PATCH 21/31] fix issues --- cpp/CMakeLists.txt | 6 +++--- cpp/src/mip/solver.cu | 10 ++++++++++ 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 529cd5cf0c..832cae5da1 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -230,9 +230,9 @@ set(CMAKE_LIBRARY_PATH ${CMAKE_CURRENT_BINARY_DIR}/libmps_parser/) find_package(cudss REQUIRED CONFIG) -set_target_properties(cudss PROPERTIES - INTERFACE_LINK_LIBRARIES "${cudss_LIBRARY_DIR}/libcudss.so.0" -) +#set_target_properties(cudss PROPERTIES +# INTERFACE_LINK_LIBRARIES "${cudss_LIBRARY_DIR}/libcudss.so.0" +#) target_link_libraries(cuopt PUBLIC diff --git a/cpp/src/mip/solver.cu b/cpp/src/mip/solver.cu index 0f2117991f..360b6b1795 100644 --- a/cpp/src/mip/solver.cu +++ b/cpp/src/mip/solver.cu @@ -33,15 +33,25 @@ #include #include +#include + #include #include #include namespace cuopt::linear_programming::detail { +void force_link_cudss() { + cudssHandle_t handle; + cudssCreate(&handle); + cudssDestroy(handle); +} + + // This serves as both a warm up but also a mandatory initial call to setup cuSparse and cuBLAS static void init_handler(const raft::handle_t* handle_ptr) { + force_link_cudss(); // Init cuBlas / cuSparse context here to avoid having it during solving time RAFT_CUBLAS_TRY(raft::linalg::detail::cublassetpointermode( handle_ptr->get_cublas_handle(), CUBLAS_POINTER_MODE_DEVICE, handle_ptr->get_stream())); From b00dcc3d441a762ff54ebd039e5d9fff34739e6d Mon Sep 17 00:00:00 2001 From: Ramakrishna Prabhu Date: Wed, 13 Aug 2025 15:55:13 -0500 Subject: [PATCH 22/31] fix issues --- cpp/src/mip/solver.cu | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/cpp/src/mip/solver.cu b/cpp/src/mip/solver.cu index 360b6b1795..44f6e8083e 100644 --- a/cpp/src/mip/solver.cu +++ b/cpp/src/mip/solver.cu @@ -41,13 +41,13 @@ namespace cuopt::linear_programming::detail { -void force_link_cudss() { - cudssHandle_t handle; - cudssCreate(&handle); - cudssDestroy(handle); +void force_link_cudss() +{ + cudssHandle_t handle; + cudssCreate(&handle); + cudssDestroy(handle); } - // This serves as both a warm up but also a mandatory initial call to setup cuSparse and cuBLAS static void init_handler(const raft::handle_t* handle_ptr) { From 63ca5295143cec39740044b3f7721401e6ec494c Mon Sep 17 00:00:00 2001 From: Christopher Maes Date: Wed, 13 Aug 2025 16:17:52 -0700 Subject: [PATCH 23/31] Add support for crossover after barrier --- cpp/src/dual_simplex/barrier.cpp | 251 +++++++++++------- cpp/src/dual_simplex/barrier.hpp | 15 +- cpp/src/dual_simplex/presolve.cpp | 56 ++-- cpp/src/dual_simplex/presolve.hpp | 2 + .../dual_simplex/simplex_solver_settings.hpp | 2 + cpp/src/dual_simplex/solve.cpp | 144 +++++++++- cpp/src/linear_programming/solve.cu | 1 + 7 files changed, 352 insertions(+), 119 deletions(-) diff --git a/cpp/src/dual_simplex/barrier.cpp b/cpp/src/dual_simplex/barrier.cpp index 3c95667b90..3ffc92c0be 100644 --- a/cpp/src/dual_simplex/barrier.cpp +++ b/cpp/src/dual_simplex/barrier.cpp @@ -40,6 +40,11 @@ class iteration_data_t { y(lp.num_rows), v(num_upper_bounds), z(lp.num_cols), + w_save(num_upper_bounds), + x_save(lp.num_cols), + y_save(lp.num_rows), + v_save(num_upper_bounds), + z_save(lp.num_cols), diag(lp.num_cols), inv_diag(lp.num_cols), inv_sqrt_diag(lp.num_cols), @@ -110,6 +115,28 @@ class iteration_data_t { chol->analyze(ADAT); } + void to_solution(const lp_problem_t& lp, i_t iterations, f_t objective, f_t user_objective, f_t primal_residual, f_t dual_residual, lp_solution_t& solution) + { + solution.x = x; + solution.y = y; + dense_vector_t z_tilde(z.size()); + scatter_upper_bounds(v, z_tilde); + z_tilde.axpy(1.0, z, -1.0); + solution.z = z_tilde; + + dense_vector_t dual_res = z_tilde; + dual_res.axpy(-1.0, lp.objective, 1.0); + matrix_transpose_vector_multiply(lp.A, 1.0, solution.y, 1.0, dual_res); + f_t dual_residual_norm = vector_norm_inf(dual_res); + printf("Solution Dual residual: %e\n", dual_residual_norm); + + solution.iterations = iterations; + solution.objective = objective; + solution.user_objective = user_objective; + solution.l2_primal_residual = primal_residual; + solution.l2_dual_residual = dual_residual; + } + void column_density(const csc_matrix_t& A, const simplex_solver_settings_t& settings) { @@ -271,6 +298,12 @@ class iteration_data_t { dense_vector_t v; dense_vector_t z; + dense_vector_t w_save; + dense_vector_t x_save; + dense_vector_t y_save; + dense_vector_t v_save; + dense_vector_t z_save; + dense_vector_t diag; dense_vector_t inv_diag; dense_vector_t inv_sqrt_diag; @@ -836,7 +869,79 @@ i_t barrier_solver_t::compute_search_direction(iteration_data_t -lp_status_t barrier_solver_t::solve(const barrier_solver_settings_t& options) +lp_status_t barrier_solver_t::check_for_suboptimal_solution( + const barrier_solver_settings_t& options, + iteration_data_t& data, + i_t iter, + f_t norm_b, + f_t norm_c, + f_t& primal_objective, + f_t& relative_primal_residual, + f_t& relative_dual_residual, + f_t& relative_complementarity_residual, + lp_solution_t& solution) +{ + if (relative_primal_residual < 100 * options.feasibility_tol && + relative_dual_residual < 100 * options.optimality_tol && + relative_complementarity_residual < 100 * options.complementarity_tol) { + settings.log.printf("Suboptimal solution found\n"); + data.to_solution(lp, + iter, + primal_objective, + primal_objective + lp.obj_constant, + vector_norm2(data.primal_residual), + vector_norm2(data.dual_residual), + solution); + return lp_status_t::OPTIMAL; // TODO: Barrier should probably have a separate suboptimal status + } + f_t primal_residual_norm; + f_t dual_residual_norm; + f_t complementarity_residual_norm; + compute_residual_norms(data.w_save, + data.x_save, + data.y_save, + data.v_save, + data.z_save, + data, + primal_residual_norm, + dual_residual_norm, + complementarity_residual_norm); + primal_objective = data.c.inner_product(data.x_save); + relative_primal_residual = primal_residual_norm / (1.0 + norm_b); + relative_dual_residual = dual_residual_norm / (1.0 + norm_c); + relative_complementarity_residual = + complementarity_residual_norm / (1.0 + std::abs(primal_objective)); + + if (relative_primal_residual < 100 * options.feasibility_tol && + relative_dual_residual < 100 * options.optimality_tol && + relative_complementarity_residual < 100 * options.complementarity_tol) { + settings.log.printf( + "Restoring previous solution: primal %.2e dual %.2e complementarity %.2e\n", + primal_residual_norm, + dual_residual_norm, + complementarity_residual_norm); + data.to_solution(lp, iter, + primal_objective, + primal_objective + lp.obj_constant, + vector_norm2(data.primal_residual), + vector_norm2(data.dual_residual), + solution); + settings.log.printf("Suboptimal solution found\n"); + return lp_status_t::OPTIMAL; // TODO: Barrier should probably have a separate suboptimal status + } else { + settings.log.printf( + "Primal residual %.2e dual residual %.2e complementarity residual %.2e\n", + relative_primal_residual, + relative_dual_residual, + relative_complementarity_residual); + } + settings.log.printf("Search direction computation failed\n"); + return lp_status_t::NUMERICAL_ISSUES; +} + +template +lp_status_t barrier_solver_t::solve(const barrier_solver_settings_t& options, + lp_solution_t& solution) { float64_t start_time = tic(); lp_status_t status = lp_status_t::UNSET; @@ -844,6 +949,7 @@ lp_status_t barrier_solver_t::solve(const barrier_solver_settings_t::solve(const barrier_solver_settings_t w_save = data.w; - dense_vector_t x_save = data.x; - dense_vector_t y_save = data.y; - dense_vector_t v_save = data.v; - dense_vector_t z_save = data.z; + data.w_save = data.w; + data.x_save = data.x; + data.y_save = data.y; + data.v_save = data.v; + data.z_save = data.z; const i_t iteration_limit = std::min(options.iteration_limit, settings.iteration_limit); @@ -943,51 +1049,16 @@ lp_status_t barrier_solver_t::solve(const barrier_solver_settings_t settings.time_limit) { settings.log.printf("Barrier time limit exceeded\n"); @@ -1041,51 +1112,16 @@ lp_status_t barrier_solver_t::solve(const barrier_solver_settings_t settings.time_limit) { @@ -1157,11 +1193,11 @@ lp_status_t barrier_solver_t::solve(const barrier_solver_settings_t::solve(const barrier_solver_settings_t(data.primal_residual), + vector_norm2(data.dual_residual), + solution); return lp_status_t::ITERATION_LIMIT; } diff --git a/cpp/src/dual_simplex/barrier.hpp b/cpp/src/dual_simplex/barrier.hpp index b422a09c64..e529618aa8 100644 --- a/cpp/src/dual_simplex/barrier.hpp +++ b/cpp/src/dual_simplex/barrier.hpp @@ -19,6 +19,7 @@ #include "dual_simplex/dense_vector.hpp" #include "dual_simplex/presolve.hpp" #include "dual_simplex/solve.hpp" +#include "dual_simplex/solution.hpp" #include "dual_simplex/simplex_solver_settings.hpp" #include "dual_simplex/sparse_cholesky.hpp" #include "dual_simplex/sparse_matrix.hpp" @@ -47,7 +48,8 @@ class barrier_solver_t { : lp(lp), settings(settings), presolve_info(presolve) { } - lp_status_t solve(const barrier_solver_settings_t& options); + lp_status_t solve(const barrier_solver_settings_t& options, + lp_solution_t& solution); private: void initial_point(iteration_data_t& data); @@ -79,6 +81,17 @@ class barrier_solver_t { dense_vector_t& dz, f_t& max_residual); + lp_status_t check_for_suboptimal_solution(const barrier_solver_settings_t& options, + iteration_data_t& data, + i_t iter, + f_t norm_b, + f_t norm_c, + f_t& primal_objective, + f_t& relative_primal_residual, + f_t& relative_dual_residual, + f_t& relative_complementarity_residual, + lp_solution_t& solution); + const lp_problem_t& lp; const simplex_solver_settings_t& settings; const presolve_info_t& presolve_info; diff --git a/cpp/src/dual_simplex/presolve.cpp b/cpp/src/dual_simplex/presolve.cpp index 6f4b8017f3..e059f8203d 100644 --- a/cpp/src/dual_simplex/presolve.cpp +++ b/cpp/src/dual_simplex/presolve.cpp @@ -727,6 +727,7 @@ i_t presolve(const lp_problem_t& original, } if (settings.barrier_presolve && nonzero_lower_bounds > 0) { settings.log.printf("Transforming %ld nonzero lower bound\n", nonzero_lower_bounds); + presolve_info.removed_lower_bounds.resize(problem.num_cols); // We can construct a new variable: x'_j = x_j - l_j or x_j = x'_j + l_j // than we have 0 <= x'_j <= u_j - l_j // Constraints in the form: @@ -751,6 +752,7 @@ i_t presolve(const lp_problem_t& original, } problem.obj_constant += problem.objective[j] * problem.lower[j]; problem.upper[j] -= problem.lower[j]; + presolve_info.removed_lower_bounds[j] = problem.lower[j]; problem.lower[j] = 0.0; } } @@ -1100,25 +1102,49 @@ void uncrush_solution(const presolve_info_t& presolve_info, if (presolve_info.removed_variables.size() == 0) { uncrushed_x = crushed_x; uncrushed_z = crushed_z; - return; - } + } else { + printf("Presolve info removed variables %d\n", presolve_info.removed_variables.size()); + // We removed some variables, so we need to map the crushed solution back to the original variables + const i_t n = presolve_info.removed_variables.size() + presolve_info.remaining_variables.size(); + uncrushed_x.resize(n); + uncrushed_z.resize(n); + + i_t k = 0; + for (const i_t j : presolve_info.remaining_variables) { + uncrushed_x[j] = crushed_x[k]; + uncrushed_z[j] = crushed_z[k]; + k++; + } - const i_t n = presolve_info.removed_variables.size() + presolve_info.remaining_variables.size(); - uncrushed_x.resize(n); - uncrushed_z.resize(n); + k = 0; + for (const i_t j : presolve_info.removed_variables) { + uncrushed_x[j] = presolve_info.removed_values[k]; + uncrushed_z[j] = presolve_info.removed_reduced_costs[k]; + k++; + } + } - i_t k = 0; - for (const i_t j : presolve_info.remaining_variables) { - uncrushed_x[j] = crushed_x[k]; - uncrushed_z[j] = crushed_z[k]; - k++; + const i_t num_free_variables = presolve_info.free_variable_pairs.size() / 2; + if (num_free_variables > 0) { + printf("Presolve info free variables %d\n", num_free_variables); + // We added free variables so we need to map the crushed solution back to the original variables + for (i_t k = 0; k < 2 * num_free_variables; k+= 2) + { + const i_t u = presolve_info.free_variable_pairs[k]; + const i_t v = presolve_info.free_variable_pairs[k + 1]; + uncrushed_x[u] -= uncrushed_x[v]; + } + const i_t n = uncrushed_x.size(); + uncrushed_x.resize(n - num_free_variables); + uncrushed_z.resize(n - num_free_variables); } - k = 0; - for (const i_t j : presolve_info.removed_variables) { - uncrushed_x[j] = presolve_info.removed_values[k]; - uncrushed_z[j] = presolve_info.removed_reduced_costs[k]; - k++; + if (presolve_info.removed_lower_bounds.size() > 0) { + printf("Presolve info removed lower bounds %d\n", presolve_info.removed_lower_bounds.size()); + // We removed some lower bounds so we need to map the crushed solution back to the original variables + for (i_t j = 0; j < uncrushed_x.size(); j++) { + uncrushed_x[j] += presolve_info.removed_lower_bounds[j]; + } } } diff --git a/cpp/src/dual_simplex/presolve.hpp b/cpp/src/dual_simplex/presolve.hpp index 5b2b0348d4..b528578de6 100644 --- a/cpp/src/dual_simplex/presolve.hpp +++ b/cpp/src/dual_simplex/presolve.hpp @@ -61,6 +61,8 @@ struct presolve_info_t { std::vector removed_reduced_costs; // Free variable pairs std::vector free_variable_pairs; + // Removed lower bounds + std::vector removed_lower_bounds; }; template diff --git a/cpp/src/dual_simplex/simplex_solver_settings.hpp b/cpp/src/dual_simplex/simplex_solver_settings.hpp index 2eec975958..c14d09ab1a 100644 --- a/cpp/src/dual_simplex/simplex_solver_settings.hpp +++ b/cpp/src/dual_simplex/simplex_solver_settings.hpp @@ -59,6 +59,7 @@ struct simplex_solver_settings_t { barrier_presolve(false), use_cudss(false), barrier(false), + crossover(false), refactor_frequency(100), iteration_log_frequency(1000), first_iteration_log(2), @@ -107,6 +108,7 @@ struct simplex_solver_settings_t { bool barrier_presolve; // true to use barrier presolve bool use_cudss; // true to use cuDSS for sparse Cholesky factorization, false to use cholmod bool barrier; // true to use barrier method, false to use dual simplex method + bool crossover; // true to do crossover, false to not i_t refactor_frequency; // number of basis updates before refactorization i_t iteration_log_frequency; // number of iterations between log updates i_t first_iteration_log; // number of iterations to log at beginning of solve diff --git a/cpp/src/dual_simplex/solve.cpp b/cpp/src/dual_simplex/solve.cpp index 9078811bf0..6941de7a6f 100644 --- a/cpp/src/dual_simplex/solve.cpp +++ b/cpp/src/dual_simplex/solve.cpp @@ -19,6 +19,7 @@ #include #include +#include #include #include #include @@ -244,21 +245,160 @@ lp_status_t solve_linear_program_with_barrier(const user_problem_t& us { f_t start_time = tic(); lp_problem_t original_lp(1, 1, 1); + + // Convert the user problem to a linear program with only equality constraints std::vector new_slacks; simplex_solver_settings_t barrier_settings = settings; barrier_settings.barrier_presolve = true; convert_user_problem(user_problem, barrier_settings, original_lp, new_slacks); + lp_solution_t lp_solution(original_lp.num_rows, original_lp.num_cols); + + // Presolve the linear program presolve_info_t presolve_info; lp_problem_t presolved_lp(1, 1, 1); const i_t ok = presolve(original_lp, barrier_settings, presolved_lp, presolve_info); + if (ok == -1) { return lp_status_t::INFEASIBLE; } + + // Apply columns scaling to the presolve LP lp_problem_t barrier_lp( presolved_lp.num_rows, presolved_lp.num_cols, presolved_lp.A.col_start[presolved_lp.num_cols]); std::vector column_scales; column_scaling(presolved_lp, barrier_settings, barrier_lp, column_scales); - if (ok == -1) { return lp_status_t::INFEASIBLE; } + + // Solve using barrier + lp_solution_t barrier_solution(barrier_lp.num_rows, barrier_lp.num_cols); barrier_solver_t barrier_solver(barrier_lp, presolve_info, barrier_settings); barrier_solver_settings_t barrier_solver_settings; - lp_status_t barrier_status = barrier_solver.solve(barrier_solver_settings); + lp_status_t barrier_status = barrier_solver.solve(barrier_solver_settings, barrier_solution); + if (barrier_status == lp_status_t::OPTIMAL) { + + std::vector scaled_residual = barrier_lp.rhs; + matrix_vector_multiply(barrier_lp.A, 1.0, barrier_solution.x, -1.0, scaled_residual); + f_t scaled_primal_residual = vector_norm_inf(scaled_residual); + settings.log.printf("Scaled Primal residual: %e\n", scaled_primal_residual); + + std::vector scaled_dual_residual = barrier_solution.z; + for (i_t j = 0; j < scaled_dual_residual.size(); ++j) { + scaled_dual_residual[j] -= barrier_lp.objective[j]; + } + matrix_transpose_vector_multiply(barrier_lp.A, 1.0, barrier_solution.y, 1.0, scaled_dual_residual); + f_t scaled_dual_residual_norm = vector_norm_inf(scaled_dual_residual); + settings.log.printf("Scaled Dual residual: %e\n", scaled_dual_residual_norm); + + // Unscale the solution + std::vector unscaled_x(barrier_lp.num_cols); + std::vector unscaled_z(barrier_lp.num_cols); + unscale_solution(column_scales, barrier_solution.x, barrier_solution.z, unscaled_x, unscaled_z); + + std::vector residual = presolved_lp.rhs; + matrix_vector_multiply(presolved_lp.A, 1.0, unscaled_x, -1.0, residual); + f_t primal_residual = vector_norm_inf(residual); + settings.log.printf("Unscaled Primal residual: %e\n", primal_residual); + + std::vector unscaled_dual_residual = unscaled_z; + for (i_t j = 0; j < unscaled_dual_residual.size(); ++j) { + unscaled_dual_residual[j] -= presolved_lp.objective[j]; + } + matrix_transpose_vector_multiply(presolved_lp.A, 1.0, barrier_solution.y, 1.0, unscaled_dual_residual); + f_t unscaled_dual_residual_norm = vector_norm_inf(unscaled_dual_residual); + settings.log.printf("Unscaled Dual residual: %e\n", unscaled_dual_residual_norm); + + // Undo presolve + lp_solution.y = barrier_solution.y; + uncrush_solution(presolve_info, unscaled_x, unscaled_z, lp_solution.x, lp_solution.z); + + std::vector post_solve_residual = original_lp.rhs; + matrix_vector_multiply(original_lp.A, 1.0, lp_solution.x, -1.0, post_solve_residual); + f_t post_solve_primal_residual = vector_norm_inf(post_solve_residual); + settings.log.printf("Post-solve Primal residual: %e\n", post_solve_primal_residual); + + std::vector post_solve_dual_residual = lp_solution.z; + for (i_t j = 0; j < post_solve_dual_residual.size(); ++j) { + post_solve_dual_residual[j] -= original_lp.objective[j]; + } + matrix_transpose_vector_multiply(original_lp.A, 1.0, lp_solution.y, 1.0, post_solve_dual_residual); + f_t post_solve_dual_residual_norm = vector_norm_inf(post_solve_dual_residual); + settings.log.printf("Post-solve Dual residual: %e\n", post_solve_dual_residual_norm); + + uncrush_primal_solution(user_problem, original_lp, lp_solution.x, solution.x); + uncrush_dual_solution(user_problem, original_lp, lp_solution.y, lp_solution.z, solution.y, solution.z); + solution.objective = barrier_solution.objective; + solution.user_objective = barrier_solution.user_objective; + solution.l2_primal_residual = barrier_solution.l2_primal_residual; + solution.l2_dual_residual = barrier_solution.l2_dual_residual; + solution.iterations = barrier_solution.iterations; + } + + // If we aren't doing crossover, we're done + if (!settings.crossover) { + return barrier_status; + } + + if (settings.crossover && barrier_status == lp_status_t::OPTIMAL) { + + // Check to see if we need to add artifical variables + csc_matrix_t Atranspose(1,1,1); + original_lp.A.transpose(Atranspose); + std::vector artificial_variables; + artificial_variables.reserve(original_lp.num_rows); + for (i_t i = 0; i < original_lp.num_rows; ++i) { + const i_t row_start = Atranspose.col_start[i]; + const i_t row_end = Atranspose.col_start[i + 1]; + bool found_slack = false; + for (i_t p = row_start; p < row_end; ++p) { + const i_t j = Atranspose.i[p]; + const i_t nz_col = original_lp.A.col_start[j + 1] - original_lp.A.col_start[j]; + if (nz_col == 1) { + found_slack = true; + break; + } + } + if (!found_slack) { + //settings.log.printf("No slack found for row %d\n", i); + artificial_variables.push_back(i); + } + } + if (artificial_variables.size() > 0) { + settings.log.printf("Adding %ld artificial variables\n", artificial_variables.size()); + const i_t additional_vars = artificial_variables.size(); + const i_t new_cols = original_lp.num_cols + additional_vars; + i_t col = original_lp.num_cols; + i_t nz = original_lp.A.col_start[col]; + const i_t new_nz = nz + additional_vars; + original_lp.A.col_start.resize(new_cols + 1); + original_lp.A.x.resize(new_nz); + original_lp.A.i.resize(new_nz); + original_lp.objective.resize(new_cols); + original_lp.lower.resize(new_cols); + original_lp.upper.resize(new_cols); + lp_solution.x.resize(new_cols); + lp_solution.z.resize(new_cols); + for (i_t i : artificial_variables) { + original_lp.A.x[nz] = 1.0; + original_lp.A.i[nz] = i; + original_lp.objective[col] = 0.0; + original_lp.lower[col] = 0.0; + original_lp.upper[col] = 0.0; + lp_solution.x[col] = 0.0; + lp_solution.z[col] = 0.0; + nz++; + col++; + original_lp.A.col_start[col] = nz; + } + original_lp.A.n = new_cols; + original_lp.num_cols = new_cols; + printf("nz %d =? new_nz %d =? Acol %d, num_cols %d =? new_cols %d x size %ld z size %ld\n", nz, new_nz, original_lp.A.col_start[original_lp.num_cols], original_lp.num_cols, new_cols, lp_solution.x.size(), lp_solution.z.size()); + } + + // Run crossover + lp_solution_t crossover_solution(original_lp.num_rows, original_lp.num_cols); + std::vector vstatus(original_lp.num_cols); + crossover_status_t crossover_status = crossover(original_lp, barrier_settings, lp_solution, start_time, crossover_solution, vstatus); + settings.log.printf("Crossover status: %d\n", crossover_status); + if (crossover_status == crossover_status_t::OPTIMAL) { + barrier_status = lp_status_t::OPTIMAL; + } + } return barrier_status; } diff --git a/cpp/src/linear_programming/solve.cu b/cpp/src/linear_programming/solve.cu index 3766b8ae59..154e0e7b90 100644 --- a/cpp/src/linear_programming/solve.cu +++ b/cpp/src/linear_programming/solve.cu @@ -308,6 +308,7 @@ run_barrier(dual_simplex::user_problem_t& user_problem, dual_simplex_settings.concurrent_halt = settings.concurrent_halt; dual_simplex_settings.use_cudss = settings.use_cudss; dual_simplex_settings.barrier = true; + dual_simplex_settings.crossover = settings.crossover; if (dual_simplex_settings.concurrent_halt != nullptr) { // Don't show the dual simplex log in concurrent mode. Show the PDLP log instead dual_simplex_settings.log.log = false; From b63de80d312ed5f895a37f4882d56de57207ae55 Mon Sep 17 00:00:00 2001 From: Ramakrishna Prabhu Date: Wed, 13 Aug 2025 18:47:48 -0500 Subject: [PATCH 24/31] fix cmake --- cpp/CMakeLists.txt | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 832cae5da1..3bb5999d12 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -230,9 +230,21 @@ set(CMAKE_LIBRARY_PATH ${CMAKE_CURRENT_BINARY_DIR}/libmps_parser/) find_package(cudss REQUIRED CONFIG) -#set_target_properties(cudss PROPERTIES -# INTERFACE_LINK_LIBRARIES "${cudss_LIBRARY_DIR}/libcudss.so.0" -#) +# Print all details of the cudss package +message(STATUS "cudss package details:") +message(STATUS " cudss_FOUND: ${cudss_FOUND}") +message(STATUS " cudss_VERSION: ${cudss_VERSION}") +message(STATUS " cudss_INCLUDE_DIRS: ${cudss_INCLUDE_DIRS}") +message(STATUS " cudss_LIBRARY_DIR: ${cudss_LIBRARY_DIR}") +message(STATUS " cudss_LIBRARIES: ${cudss_LIBRARIES}") +message(STATUS " cudss_DEFINITIONS: ${cudss_DEFINITIONS}") +message(STATUS " cudss_COMPILE_OPTIONS: ${cudss_COMPILE_OPTIONS}") +message(STATUS " cudss_LINK_OPTIONS: ${cudss_LINK_OPTIONS}") + +# Use the specific cudss library version to avoid symlink issues +set(CUDSS_LIB_FILE "${cudss_LIBRARY_DIR}/libcudss.so.0") +message(STATUS "Using cudss library: ${CUDSS_LIB_FILE}") + target_link_libraries(cuopt PUBLIC @@ -243,7 +255,7 @@ target_link_libraries(cuopt CCCL::CCCL raft::raft cuopt::mps_parser - cudss + ${CUDSS_LIB_FILE} PRIVATE ${CUOPT_PRIVATE_CUDA_LIBS} ) From 36b2cbf4a9fa16e322afa7ab1f618ac947453b5b Mon Sep 17 00:00:00 2001 From: Ramakrishna Prabhu Date: Thu, 14 Aug 2025 11:13:57 -0500 Subject: [PATCH 25/31] fix path to libcudss --- python/libcuopt/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/libcuopt/CMakeLists.txt b/python/libcuopt/CMakeLists.txt index 5aa1090bd5..282c423aaf 100644 --- a/python/libcuopt/CMakeLists.txt +++ b/python/libcuopt/CMakeLists.txt @@ -61,7 +61,7 @@ set(rpaths "$ORIGIN/../../rapids_logger/lib64" "$ORIGIN/../../librmm/lib64" "$ORIGIN/../../nvidia/cublas/lib" - "$ORIGIN/../../nvidia/cudss/lib" + "$ORIGIN/../../nvidia/cu12/lib" "$ORIGIN/../../nvidia/curand/lib" "$ORIGIN/../../nvidia/cusolver/lib" "$ORIGIN/../../nvidia/cusparse/lib" From bb7e38c1aa36666b1a0ba1c36ae7c4406279c496 Mon Sep 17 00:00:00 2001 From: Ramakrishna Prabhu Date: Thu, 14 Aug 2025 11:14:30 -0500 Subject: [PATCH 26/31] update --- ci/build_wheel_libcuopt.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/build_wheel_libcuopt.sh b/ci/build_wheel_libcuopt.sh index 9a40c7f17e..261c5087ed 100755 --- a/ci/build_wheel_libcuopt.sh +++ b/ci/build_wheel_libcuopt.sh @@ -77,7 +77,7 @@ EXCLUDE_ARGS=( --exclude "libraft.so" --exclude "libcublas.so.*" --exclude "libcublasLt.so.*" - --exclude "libcudss*" + --exclude "libcudss.so.*" --exclude "libcurand.so.*" --exclude "libcusolver.so.*" --exclude "libcusparse.so.*" From 859f15f4beb0caf1bbb14ebf1453e5f01f79bc6b Mon Sep 17 00:00:00 2001 From: Christopher Maes Date: Thu, 14 Aug 2025 10:43:30 -0700 Subject: [PATCH 27/31] Add support for concurrent barrier --- cpp/src/dual_simplex/barrier.cpp | 5 +++ cpp/src/dual_simplex/sparse_cholesky.hpp | 23 ++++++++++-- cpp/src/dual_simplex/sparse_matrix.cpp | 14 ++++++++ cpp/src/dual_simplex/sparse_matrix.hpp | 3 ++ cpp/src/linear_programming/solve.cu | 46 +++++++++++++++++++++++- 5 files changed, 87 insertions(+), 4 deletions(-) diff --git a/cpp/src/dual_simplex/barrier.cpp b/cpp/src/dual_simplex/barrier.cpp index 3ffc92c0be..5b55a73290 100644 --- a/cpp/src/dual_simplex/barrier.cpp +++ b/cpp/src/dual_simplex/barrier.cpp @@ -974,6 +974,11 @@ lp_status_t barrier_solver_t::solve(const barrier_solver_settings_tload(std::memory_order_acquire) == 1) { + settings.log.printf("Barrier solver halted\n"); + return lp_status_t::CONCURRENT_LIMIT; + } compute_residuals(data.w, data.x, data.y, data.v, data.z, data); f_t primal_residual_norm = std::max(vector_norm_inf(data.primal_residual), diff --git a/cpp/src/dual_simplex/sparse_cholesky.hpp b/cpp/src/dual_simplex/sparse_cholesky.hpp index 83548df0ad..6af5062386 100644 --- a/cpp/src/dual_simplex/sparse_cholesky.hpp +++ b/cpp/src/dual_simplex/sparse_cholesky.hpp @@ -162,6 +162,11 @@ class sparse_cholesky_cudss_t : public sparse_cholesky_base_t { i_t analyze(const csc_matrix_t& A_in) override { csr_matrix_t Arow; +#ifdef WRITE_MATRIX_MARKET + FILE* fid = fopen("A.mtx", "w"); + A_in.write_matrix_market(fid); + fclose(fid); +#endif A_in.to_compressed_row(Arow); nnz = A_in.col_start[A_in.n]; @@ -205,15 +210,27 @@ class sparse_cholesky_cudss_t : public sparse_cholesky_base_t { "cudssMatrixCreateCsr"); // Perform symbolic analysis + f_t start_analysis = tic(); + + CUDSS_CALL_AND_CHECK( + cudssExecute(handle, CUDSS_PHASE_REORDERING, solverConfig, solverData, A, cudss_x, cudss_b), + status, + "cudssExecute for reordering"); + + f_t reorder_time = toc(start_analysis); + settings_.log.printf("Reordering time %.2fs\n", reorder_time); + f_t start_symbolic = tic(); CUDSS_CALL_AND_CHECK( - cudssExecute(handle, CUDSS_PHASE_ANALYSIS, solverConfig, solverData, A, cudss_x, cudss_b), + cudssExecute(handle, CUDSS_PHASE_SYMBOLIC_FACTORIZATION, solverConfig, solverData, A, cudss_x, cudss_b), status, - "cudssExecute for analysis"); + "cudssExecute for symbolic factorization"); f_t symbolic_time = toc(start_symbolic); - settings_.log.printf("Symbolic time %.2fs\n", symbolic_time); + f_t analysis_time = toc(start_analysis); + settings_.log.printf("Symbolic factorization time %.2fs\n", symbolic_time); + settings_.log.printf("Symbolic time %.2fs\n", analysis_time); int64_t lu_nz = 0; size_t size_written = 0; CUDSS_CALL_AND_CHECK( diff --git a/cpp/src/dual_simplex/sparse_matrix.cpp b/cpp/src/dual_simplex/sparse_matrix.cpp index 5fc34062c6..22d286cda3 100644 --- a/cpp/src/dual_simplex/sparse_matrix.cpp +++ b/cpp/src/dual_simplex/sparse_matrix.cpp @@ -372,6 +372,20 @@ void csc_matrix_t::print_matrix(FILE* fid) const fprintf(fid, "A = sparse(ijx(:, 1), ijx(:, 2), ijx(:, 3), %d, %d);\n", this->m, this->n); } +template +void csc_matrix_t::write_matrix_market(FILE* fid) const +{ + fprintf(fid, "%%%%MatrixMarket matrix coordinate real general\n"); + fprintf(fid, "%d %d %d\n", this->m, this->n, this->col_start[this->n]); + for (i_t j = 0; j < this->n; ++j) { + const i_t col_beg = this->col_start[j]; + const i_t col_end = this->col_start[j + 1]; + for (i_t p = col_beg; p < col_end; ++p) { + fprintf(fid, "%d %d %.16e\n", this->i[p] + 1, j + 1, this->x[p]); + } + } +} + template void csc_matrix_t::print_matrix() const { diff --git a/cpp/src/dual_simplex/sparse_matrix.hpp b/cpp/src/dual_simplex/sparse_matrix.hpp index b629991ed9..ba3637c007 100644 --- a/cpp/src/dual_simplex/sparse_matrix.hpp +++ b/cpp/src/dual_simplex/sparse_matrix.hpp @@ -87,6 +87,9 @@ class csc_matrix_t { // Prints the matrix to a file void print_matrix(FILE* fid) const; + // Writes the matrix to a file in Matrix Market format + void write_matrix_market(FILE* fid) const; + // Compute || A ||_1 = max_j (sum {i = 1 to m} | A(i, j) | ) f_t norm1() const; diff --git a/cpp/src/linear_programming/solve.cu b/cpp/src/linear_programming/solve.cu index 154e0e7b90..2ff0949d67 100644 --- a/cpp/src/linear_programming/solve.cu +++ b/cpp/src/linear_programming/solve.cu @@ -349,6 +349,20 @@ optimization_problem_solution_t run_barrier( std::get<4>(sol_dual_simplex)); } +template +void run_barrier_thread( + dual_simplex::user_problem_t& problem, + pdlp_solver_settings_t const& settings, + std::unique_ptr< + std::tuple, dual_simplex::lp_status_t, f_t, f_t, f_t>>& + sol_ptr) +{ + // We will return the solution from the thread as a unique_ptr + sol_ptr = std::make_unique< + std::tuple, dual_simplex::lp_status_t, f_t, f_t, f_t>>( + run_barrier(problem, settings)); +} + template std::tuple, dual_simplex::lp_status_t, f_t, f_t, f_t> run_dual_simplex(dual_simplex::user_problem_t& user_problem, @@ -363,7 +377,6 @@ run_dual_simplex(dual_simplex::user_problem_t& user_problem, dual_simplex_settings.time_limit = settings.time_limit; dual_simplex_settings.iteration_limit = settings.iteration_limit; dual_simplex_settings.concurrent_halt = settings.concurrent_halt; - dual_simplex_settings.use_cudss = settings.use_cudss; if (dual_simplex_settings.concurrent_halt != nullptr) { // Don't show the dual simplex log in concurrent mode. Show the PDLP log instead dual_simplex_settings.log.log = false; @@ -555,12 +568,25 @@ optimization_problem_solution_t run_concurrent( std::ref(settings_pdlp), std::ref(sol_dual_simplex_ptr)); + dual_simplex::user_problem_t barrier_problem = dual_simplex_problem; + // Create a thread for barrier + std::unique_ptr< + std::tuple, dual_simplex::lp_status_t, f_t, f_t, f_t>> + sol_barrier_ptr; + std::thread barrier_thread(run_barrier_thread, + std::ref(barrier_problem), + std::ref(settings_pdlp), + std::ref(sol_barrier_ptr)); + // Run pdlp in the main thread auto sol_pdlp = run_pdlp(problem, settings_pdlp, is_batch_mode); // Wait for dual simplex thread to finish dual_simplex_thread.join(); + // Wait for barrier thread to finish + barrier_thread.join(); + // copy the dual simplex solution to the device auto sol_dual_simplex = convert_dual_simplex_sol(problem, std::get<0>(*sol_dual_simplex_ptr), @@ -569,6 +595,14 @@ optimization_problem_solution_t run_concurrent( std::get<3>(*sol_dual_simplex_ptr), std::get<4>(*sol_dual_simplex_ptr)); + // copy the barrier solution to the device + auto sol_barrier = convert_dual_simplex_sol(problem, + std::get<0>(*sol_barrier_ptr), + std::get<1>(*sol_barrier_ptr), + std::get<2>(*sol_barrier_ptr), + std::get<3>(*sol_barrier_ptr), + std::get<4>(*sol_barrier_ptr)); + f_t end_time = dual_simplex::toc(start_time); CUOPT_LOG_INFO("Concurrent time: %.3fs", end_time); // Check status to see if we should return the pdlp solution or the dual simplex solution @@ -584,6 +618,16 @@ optimization_problem_solution_t run_concurrent( sol_pdlp.get_additional_termination_information().number_of_steps_taken, end_time); return sol_pdlp; + } else if (sol_barrier.get_termination_status() == pdlp_termination_status_t::Optimal) { + CUOPT_LOG_INFO("Solved with barrier"); + sol_pdlp.copy_from(op_problem.get_handle_ptr(), sol_barrier); + sol_pdlp.set_solve_time(end_time); + CUOPT_LOG_INFO("Status: %s Objective: %.8e Iterations: %d Time: %.3fs", + sol_pdlp.get_termination_status_string().c_str(), + sol_pdlp.get_objective_value(), + sol_pdlp.get_additional_termination_information().number_of_steps_taken, + end_time); + return sol_pdlp; } else if (sol_pdlp.get_termination_status() == pdlp_termination_status_t::Optimal) { CUOPT_LOG_INFO("Solved with PDLP"); return sol_pdlp; From 7deeac87a6ea342d35736e758dcf72fce4c440ef Mon Sep 17 00:00:00 2001 From: Ramakrishnap <42624703+rgsl888prabhu@users.noreply.github.com> Date: Thu, 14 Aug 2025 15:44:49 -0500 Subject: [PATCH 28/31] Update pyproject.toml --- python/libcuopt/pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/libcuopt/pyproject.toml b/python/libcuopt/pyproject.toml index 2b5b6f7544..8434c248d5 100644 --- a/python/libcuopt/pyproject.toml +++ b/python/libcuopt/pyproject.toml @@ -67,7 +67,7 @@ select = [ ] # PyPI limit is 700 MiB, fail CI before we get too close to that # 11.X size is 300M compressed and 12.x size is 600M compressed -max_allowed_size_compressed = '700M' +max_allowed_size_compressed = '775M' [project.scripts] cuopt_cli = "libcuopt._cli_wrapper:main" From 3410e86585cbfff3f5d76a9f3aa3332f958922ee Mon Sep 17 00:00:00 2001 From: Ramakrishna Prabhu Date: Thu, 14 Aug 2025 17:10:34 -0500 Subject: [PATCH 29/31] style --- cpp/src/dual_simplex/barrier.cpp | 166 ++++++++++++----------- cpp/src/dual_simplex/barrier.hpp | 4 +- cpp/src/dual_simplex/presolve.cpp | 11 +- cpp/src/dual_simplex/solve.cpp | 69 +++++----- cpp/src/dual_simplex/sparse_cholesky.hpp | 6 +- 5 files changed, 136 insertions(+), 120 deletions(-) diff --git a/cpp/src/dual_simplex/barrier.cpp b/cpp/src/dual_simplex/barrier.cpp index 5b55a73290..141e489858 100644 --- a/cpp/src/dual_simplex/barrier.cpp +++ b/cpp/src/dual_simplex/barrier.cpp @@ -115,7 +115,13 @@ class iteration_data_t { chol->analyze(ADAT); } - void to_solution(const lp_problem_t& lp, i_t iterations, f_t objective, f_t user_objective, f_t primal_residual, f_t dual_residual, lp_solution_t& solution) + void to_solution(const lp_problem_t& lp, + i_t iterations, + f_t objective, + f_t user_objective, + f_t primal_residual, + f_t dual_residual, + lp_solution_t& solution) { solution.x = x; solution.y = y; @@ -130,11 +136,11 @@ class iteration_data_t { f_t dual_residual_norm = vector_norm_inf(dual_res); printf("Solution Dual residual: %e\n", dual_residual_norm); - solution.iterations = iterations; - solution.objective = objective; - solution.user_objective = user_objective; + solution.iterations = iterations; + solution.objective = objective; + solution.user_objective = user_objective; solution.l2_primal_residual = primal_residual; - solution.l2_dual_residual = dual_residual; + solution.l2_dual_residual = dual_residual; } void column_density(const csc_matrix_t& A, @@ -881,70 +887,69 @@ lp_status_t barrier_solver_t::check_for_suboptimal_solution( f_t& relative_complementarity_residual, lp_solution_t& solution) { - if (relative_primal_residual < 100 * options.feasibility_tol && - relative_dual_residual < 100 * options.optimality_tol && - relative_complementarity_residual < 100 * options.complementarity_tol) { - settings.log.printf("Suboptimal solution found\n"); - data.to_solution(lp, - iter, - primal_objective, - primal_objective + lp.obj_constant, - vector_norm2(data.primal_residual), - vector_norm2(data.dual_residual), - solution); - return lp_status_t::OPTIMAL; // TODO: Barrier should probably have a separate suboptimal status - } - f_t primal_residual_norm; - f_t dual_residual_norm; - f_t complementarity_residual_norm; - compute_residual_norms(data.w_save, - data.x_save, - data.y_save, - data.v_save, - data.z_save, - data, - primal_residual_norm, - dual_residual_norm, - complementarity_residual_norm); - primal_objective = data.c.inner_product(data.x_save); - relative_primal_residual = primal_residual_norm / (1.0 + norm_b); - relative_dual_residual = dual_residual_norm / (1.0 + norm_c); - relative_complementarity_residual = - complementarity_residual_norm / (1.0 + std::abs(primal_objective)); + if (relative_primal_residual < 100 * options.feasibility_tol && + relative_dual_residual < 100 * options.optimality_tol && + relative_complementarity_residual < 100 * options.complementarity_tol) { + settings.log.printf("Suboptimal solution found\n"); + data.to_solution(lp, + iter, + primal_objective, + primal_objective + lp.obj_constant, + vector_norm2(data.primal_residual), + vector_norm2(data.dual_residual), + solution); + return lp_status_t::OPTIMAL; // TODO: Barrier should probably have a separate suboptimal status + } + f_t primal_residual_norm; + f_t dual_residual_norm; + f_t complementarity_residual_norm; + compute_residual_norms(data.w_save, + data.x_save, + data.y_save, + data.v_save, + data.z_save, + data, + primal_residual_norm, + dual_residual_norm, + complementarity_residual_norm); + primal_objective = data.c.inner_product(data.x_save); + relative_primal_residual = primal_residual_norm / (1.0 + norm_b); + relative_dual_residual = dual_residual_norm / (1.0 + norm_c); + relative_complementarity_residual = + complementarity_residual_norm / (1.0 + std::abs(primal_objective)); - if (relative_primal_residual < 100 * options.feasibility_tol && - relative_dual_residual < 100 * options.optimality_tol && - relative_complementarity_residual < 100 * options.complementarity_tol) { - settings.log.printf( - "Restoring previous solution: primal %.2e dual %.2e complementarity %.2e\n", - primal_residual_norm, - dual_residual_norm, - complementarity_residual_norm); - data.to_solution(lp, iter, - primal_objective, - primal_objective + lp.obj_constant, - vector_norm2(data.primal_residual), - vector_norm2(data.dual_residual), - solution); - settings.log.printf("Suboptimal solution found\n"); - return lp_status_t::OPTIMAL; // TODO: Barrier should probably have a separate suboptimal status - } else { - settings.log.printf( - "Primal residual %.2e dual residual %.2e complementarity residual %.2e\n", - relative_primal_residual, - relative_dual_residual, - relative_complementarity_residual); - } - settings.log.printf("Search direction computation failed\n"); - return lp_status_t::NUMERICAL_ISSUES; + if (relative_primal_residual < 100 * options.feasibility_tol && + relative_dual_residual < 100 * options.optimality_tol && + relative_complementarity_residual < 100 * options.complementarity_tol) { + settings.log.printf("Restoring previous solution: primal %.2e dual %.2e complementarity %.2e\n", + primal_residual_norm, + dual_residual_norm, + complementarity_residual_norm); + data.to_solution(lp, + iter, + primal_objective, + primal_objective + lp.obj_constant, + vector_norm2(data.primal_residual), + vector_norm2(data.dual_residual), + solution); + settings.log.printf("Suboptimal solution found\n"); + return lp_status_t::OPTIMAL; // TODO: Barrier should probably have a separate suboptimal status + } else { + settings.log.printf("Primal residual %.2e dual residual %.2e complementarity residual %.2e\n", + relative_primal_residual, + relative_dual_residual, + relative_complementarity_residual); + } + settings.log.printf("Search direction computation failed\n"); + return lp_status_t::NUMERICAL_ISSUES; } template lp_status_t barrier_solver_t::solve(const barrier_solver_settings_t& options, - lp_solution_t& solution) + lp_solution_t& solution) { float64_t start_time = tic(); - lp_status_t status = lp_status_t::UNSET; + lp_status_t status = lp_status_t::UNSET; i_t n = lp.num_cols; i_t m = lp.num_rows; @@ -975,7 +980,7 @@ lp_status_t barrier_solver_t::solve(const barrier_solver_settings_tload(std::memory_order_acquire) == 1) { + settings.concurrent_halt->load(std::memory_order_acquire) == 1) { settings.log.printf("Barrier solver halted\n"); return lp_status_t::CONCURRENT_LIMIT; } @@ -1054,16 +1059,16 @@ lp_status_t barrier_solver_t::solve(const barrier_solver_settings_t settings.time_limit) { settings.log.printf("Barrier time limit exceeded\n"); @@ -1248,7 +1253,7 @@ lp_status_t barrier_solver_t::solve(const barrier_solver_settings_t::solve(const barrier_solver_settings_t(data.primal_residual), - vector_norm2(data.dual_residual), - solution); + data.to_solution(lp, + iter, + primal_objective, + primal_objective + lp.obj_constant, + vector_norm2(data.primal_residual), + vector_norm2(data.dual_residual), + solution); return lp_status_t::ITERATION_LIMIT; } diff --git a/cpp/src/dual_simplex/barrier.hpp b/cpp/src/dual_simplex/barrier.hpp index e529618aa8..b8ec5fed63 100644 --- a/cpp/src/dual_simplex/barrier.hpp +++ b/cpp/src/dual_simplex/barrier.hpp @@ -18,9 +18,9 @@ #include "dual_simplex/dense_vector.hpp" #include "dual_simplex/presolve.hpp" -#include "dual_simplex/solve.hpp" -#include "dual_simplex/solution.hpp" #include "dual_simplex/simplex_solver_settings.hpp" +#include "dual_simplex/solution.hpp" +#include "dual_simplex/solve.hpp" #include "dual_simplex/sparse_cholesky.hpp" #include "dual_simplex/sparse_matrix.hpp" #include "dual_simplex/tic_toc.hpp" diff --git a/cpp/src/dual_simplex/presolve.cpp b/cpp/src/dual_simplex/presolve.cpp index 4b51f1c00e..bd56f3232b 100644 --- a/cpp/src/dual_simplex/presolve.cpp +++ b/cpp/src/dual_simplex/presolve.cpp @@ -757,7 +757,7 @@ i_t presolve(const lp_problem_t& original, problem.obj_constant += problem.objective[j] * problem.lower[j]; problem.upper[j] -= problem.lower[j]; presolve_info.removed_lower_bounds[j] = problem.lower[j]; - problem.lower[j] = 0.0; + problem.lower[j] = 0.0; } } } @@ -1108,7 +1108,8 @@ void uncrush_solution(const presolve_info_t& presolve_info, uncrushed_z = crushed_z; } else { printf("Presolve info removed variables %d\n", presolve_info.removed_variables.size()); - // We removed some variables, so we need to map the crushed solution back to the original variables + // We removed some variables, so we need to map the crushed solution back to the original + // variables const i_t n = presolve_info.removed_variables.size() + presolve_info.remaining_variables.size(); uncrushed_x.resize(n); uncrushed_z.resize(n); @@ -1132,8 +1133,7 @@ void uncrush_solution(const presolve_info_t& presolve_info, if (num_free_variables > 0) { printf("Presolve info free variables %d\n", num_free_variables); // We added free variables so we need to map the crushed solution back to the original variables - for (i_t k = 0; k < 2 * num_free_variables; k+= 2) - { + for (i_t k = 0; k < 2 * num_free_variables; k += 2) { const i_t u = presolve_info.free_variable_pairs[k]; const i_t v = presolve_info.free_variable_pairs[k + 1]; uncrushed_x[u] -= uncrushed_x[v]; @@ -1145,7 +1145,8 @@ void uncrush_solution(const presolve_info_t& presolve_info, if (presolve_info.removed_lower_bounds.size() > 0) { printf("Presolve info removed lower bounds %d\n", presolve_info.removed_lower_bounds.size()); - // We removed some lower bounds so we need to map the crushed solution back to the original variables + // We removed some lower bounds so we need to map the crushed solution back to the original + // variables for (i_t j = 0; j < uncrushed_x.size(); j++) { uncrushed_x[j] += presolve_info.removed_lower_bounds[j]; } diff --git a/cpp/src/dual_simplex/solve.cpp b/cpp/src/dual_simplex/solve.cpp index 6941de7a6f..e3a43307c2 100644 --- a/cpp/src/dual_simplex/solve.cpp +++ b/cpp/src/dual_simplex/solve.cpp @@ -243,7 +243,7 @@ lp_status_t solve_linear_program_with_barrier(const user_problem_t& us const simplex_solver_settings_t& settings, lp_solution_t& solution) { - f_t start_time = tic(); + f_t start_time = tic(); lp_problem_t original_lp(1, 1, 1); // Convert the user problem to a linear program with only equality constraints @@ -271,7 +271,6 @@ lp_status_t solve_linear_program_with_barrier(const user_problem_t& us barrier_solver_settings_t barrier_solver_settings; lp_status_t barrier_status = barrier_solver.solve(barrier_solver_settings, barrier_solution); if (barrier_status == lp_status_t::OPTIMAL) { - std::vector scaled_residual = barrier_lp.rhs; matrix_vector_multiply(barrier_lp.A, 1.0, barrier_solution.x, -1.0, scaled_residual); f_t scaled_primal_residual = vector_norm_inf(scaled_residual); @@ -281,14 +280,16 @@ lp_status_t solve_linear_program_with_barrier(const user_problem_t& us for (i_t j = 0; j < scaled_dual_residual.size(); ++j) { scaled_dual_residual[j] -= barrier_lp.objective[j]; } - matrix_transpose_vector_multiply(barrier_lp.A, 1.0, barrier_solution.y, 1.0, scaled_dual_residual); + matrix_transpose_vector_multiply( + barrier_lp.A, 1.0, barrier_solution.y, 1.0, scaled_dual_residual); f_t scaled_dual_residual_norm = vector_norm_inf(scaled_dual_residual); settings.log.printf("Scaled Dual residual: %e\n", scaled_dual_residual_norm); // Unscale the solution std::vector unscaled_x(barrier_lp.num_cols); std::vector unscaled_z(barrier_lp.num_cols); - unscale_solution(column_scales, barrier_solution.x, barrier_solution.z, unscaled_x, unscaled_z); + unscale_solution( + column_scales, barrier_solution.x, barrier_solution.z, unscaled_x, unscaled_z); std::vector residual = presolved_lp.rhs; matrix_vector_multiply(presolved_lp.A, 1.0, unscaled_x, -1.0, residual); @@ -299,7 +300,8 @@ lp_status_t solve_linear_program_with_barrier(const user_problem_t& us for (i_t j = 0; j < unscaled_dual_residual.size(); ++j) { unscaled_dual_residual[j] -= presolved_lp.objective[j]; } - matrix_transpose_vector_multiply(presolved_lp.A, 1.0, barrier_solution.y, 1.0, unscaled_dual_residual); + matrix_transpose_vector_multiply( + presolved_lp.A, 1.0, barrier_solution.y, 1.0, unscaled_dual_residual); f_t unscaled_dual_residual_norm = vector_norm_inf(unscaled_dual_residual); settings.log.printf("Unscaled Dual residual: %e\n", unscaled_dual_residual_norm); @@ -316,12 +318,14 @@ lp_status_t solve_linear_program_with_barrier(const user_problem_t& us for (i_t j = 0; j < post_solve_dual_residual.size(); ++j) { post_solve_dual_residual[j] -= original_lp.objective[j]; } - matrix_transpose_vector_multiply(original_lp.A, 1.0, lp_solution.y, 1.0, post_solve_dual_residual); + matrix_transpose_vector_multiply( + original_lp.A, 1.0, lp_solution.y, 1.0, post_solve_dual_residual); f_t post_solve_dual_residual_norm = vector_norm_inf(post_solve_dual_residual); settings.log.printf("Post-solve Dual residual: %e\n", post_solve_dual_residual_norm); uncrush_primal_solution(user_problem, original_lp, lp_solution.x, solution.x); - uncrush_dual_solution(user_problem, original_lp, lp_solution.y, lp_solution.z, solution.y, solution.z); + uncrush_dual_solution( + user_problem, original_lp, lp_solution.y, lp_solution.z, solution.y, solution.z); solution.objective = barrier_solution.objective; solution.user_objective = barrier_solution.user_objective; solution.l2_primal_residual = barrier_solution.l2_primal_residual; @@ -330,23 +334,20 @@ lp_status_t solve_linear_program_with_barrier(const user_problem_t& us } // If we aren't doing crossover, we're done - if (!settings.crossover) { - return barrier_status; - } + if (!settings.crossover) { return barrier_status; } if (settings.crossover && barrier_status == lp_status_t::OPTIMAL) { - // Check to see if we need to add artifical variables - csc_matrix_t Atranspose(1,1,1); + csc_matrix_t Atranspose(1, 1, 1); original_lp.A.transpose(Atranspose); std::vector artificial_variables; artificial_variables.reserve(original_lp.num_rows); for (i_t i = 0; i < original_lp.num_rows; ++i) { const i_t row_start = Atranspose.col_start[i]; const i_t row_end = Atranspose.col_start[i + 1]; - bool found_slack = false; + bool found_slack = false; for (i_t p = row_start; p < row_end; ++p) { - const i_t j = Atranspose.i[p]; + const i_t j = Atranspose.i[p]; const i_t nz_col = original_lp.A.col_start[j + 1] - original_lp.A.col_start[j]; if (nz_col == 1) { found_slack = true; @@ -354,17 +355,17 @@ lp_status_t solve_linear_program_with_barrier(const user_problem_t& us } } if (!found_slack) { - //settings.log.printf("No slack found for row %d\n", i); + // settings.log.printf("No slack found for row %d\n", i); artificial_variables.push_back(i); } } if (artificial_variables.size() > 0) { settings.log.printf("Adding %ld artificial variables\n", artificial_variables.size()); const i_t additional_vars = artificial_variables.size(); - const i_t new_cols = original_lp.num_cols + additional_vars; - i_t col = original_lp.num_cols; - i_t nz = original_lp.A.col_start[col]; - const i_t new_nz = nz + additional_vars; + const i_t new_cols = original_lp.num_cols + additional_vars; + i_t col = original_lp.num_cols; + i_t nz = original_lp.A.col_start[col]; + const i_t new_nz = nz + additional_vars; original_lp.A.col_start.resize(new_cols + 1); original_lp.A.x.resize(new_nz); original_lp.A.i.resize(new_nz); @@ -374,30 +375,36 @@ lp_status_t solve_linear_program_with_barrier(const user_problem_t& us lp_solution.x.resize(new_cols); lp_solution.z.resize(new_cols); for (i_t i : artificial_variables) { - original_lp.A.x[nz] = 1.0; - original_lp.A.i[nz] = i; + original_lp.A.x[nz] = 1.0; + original_lp.A.i[nz] = i; original_lp.objective[col] = 0.0; - original_lp.lower[col] = 0.0; - original_lp.upper[col] = 0.0; - lp_solution.x[col] = 0.0; - lp_solution.z[col] = 0.0; + original_lp.lower[col] = 0.0; + original_lp.upper[col] = 0.0; + lp_solution.x[col] = 0.0; + lp_solution.z[col] = 0.0; nz++; col++; original_lp.A.col_start[col] = nz; } - original_lp.A.n = new_cols; + original_lp.A.n = new_cols; original_lp.num_cols = new_cols; - printf("nz %d =? new_nz %d =? Acol %d, num_cols %d =? new_cols %d x size %ld z size %ld\n", nz, new_nz, original_lp.A.col_start[original_lp.num_cols], original_lp.num_cols, new_cols, lp_solution.x.size(), lp_solution.z.size()); + printf("nz %d =? new_nz %d =? Acol %d, num_cols %d =? new_cols %d x size %ld z size %ld\n", + nz, + new_nz, + original_lp.A.col_start[original_lp.num_cols], + original_lp.num_cols, + new_cols, + lp_solution.x.size(), + lp_solution.z.size()); } // Run crossover lp_solution_t crossover_solution(original_lp.num_rows, original_lp.num_cols); std::vector vstatus(original_lp.num_cols); - crossover_status_t crossover_status = crossover(original_lp, barrier_settings, lp_solution, start_time, crossover_solution, vstatus); + crossover_status_t crossover_status = crossover( + original_lp, barrier_settings, lp_solution, start_time, crossover_solution, vstatus); settings.log.printf("Crossover status: %d\n", crossover_status); - if (crossover_status == crossover_status_t::OPTIMAL) { - barrier_status = lp_status_t::OPTIMAL; - } + if (crossover_status == crossover_status_t::OPTIMAL) { barrier_status = lp_status_t::OPTIMAL; } } return barrier_status; } diff --git a/cpp/src/dual_simplex/sparse_cholesky.hpp b/cpp/src/dual_simplex/sparse_cholesky.hpp index 6af5062386..4b777584f8 100644 --- a/cpp/src/dual_simplex/sparse_cholesky.hpp +++ b/cpp/src/dual_simplex/sparse_cholesky.hpp @@ -223,7 +223,8 @@ class sparse_cholesky_cudss_t : public sparse_cholesky_base_t { f_t start_symbolic = tic(); CUDSS_CALL_AND_CHECK( - cudssExecute(handle, CUDSS_PHASE_SYMBOLIC_FACTORIZATION, solverConfig, solverData, A, cudss_x, cudss_b), + cudssExecute( + handle, CUDSS_PHASE_SYMBOLIC_FACTORIZATION, solverConfig, solverData, A, cudss_x, cudss_b), status, "cudssExecute for symbolic factorization"); @@ -249,7 +250,8 @@ class sparse_cholesky_cudss_t : public sparse_cholesky_base_t { A_in.to_compressed_row(Arow); if (nnz != A_in.col_start[A_in.n]) { - settings_.log.printf("Error: nnz %d != A_in.col_start[A_in.n] %d\n", nnz, A_in.col_start[A_in.n]); + settings_.log.printf( + "Error: nnz %d != A_in.col_start[A_in.n] %d\n", nnz, A_in.col_start[A_in.n]); exit(1); } From 5599ceb0aa0d4c4822dd1f4c7d1964286514ac30 Mon Sep 17 00:00:00 2001 From: Christopher Maes Date: Fri, 15 Aug 2025 10:37:41 -0700 Subject: [PATCH 30/31] Add in concurrent checks. Fix broken assert --- cpp/src/dual_simplex/barrier.cpp | 15 +++++++++++++++ cpp/src/dual_simplex/presolve.cpp | 2 +- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/cpp/src/dual_simplex/barrier.cpp b/cpp/src/dual_simplex/barrier.cpp index 5b55a73290..4ba619e820 100644 --- a/cpp/src/dual_simplex/barrier.cpp +++ b/cpp/src/dual_simplex/barrier.cpp @@ -1039,6 +1039,11 @@ lp_status_t barrier_solver_t::solve(const barrier_solver_settings_tload(std::memory_order_acquire) == 1) { + settings.log.printf("Barrier solver halted\n"); + return lp_status_t::CONCURRENT_LIMIT; + } // Compute the affine step data.primal_rhs = data.primal_residual; data.bound_rhs = data.bound_residual; @@ -1069,6 +1074,11 @@ lp_status_t barrier_solver_t::solve(const barrier_solver_settings_tload(std::memory_order_acquire) == 1) { + settings.log.printf("Barrier solver halted\n"); + return lp_status_t::CONCURRENT_LIMIT; + } f_t step_primal_aff = std::min(max_step_to_boundary(data.w, data.dw_aff), max_step_to_boundary(data.x, data.dx_aff)); @@ -1133,6 +1143,11 @@ lp_status_t barrier_solver_t::solve(const barrier_solver_settings_tload(std::memory_order_acquire) == 1) { + settings.log.printf("Barrier solver halted\n"); + return lp_status_t::CONCURRENT_LIMIT; + } // dw = dw_aff + dw_cc // dx = dx_aff + dx_cc diff --git a/cpp/src/dual_simplex/presolve.cpp b/cpp/src/dual_simplex/presolve.cpp index 4b51f1c00e..55168d9a98 100644 --- a/cpp/src/dual_simplex/presolve.cpp +++ b/cpp/src/dual_simplex/presolve.cpp @@ -819,7 +819,7 @@ i_t presolve(const lp_problem_t& original, problem.lower[j] = 0.0; } } - assert(problem.A.p[num_cols] == nnz); + assert(problem.A.col_start[num_cols] == nnz); problem.A.n = num_cols; problem.num_cols = num_cols; } From 89cbe2a54df02304c425d1057a25303059862f38 Mon Sep 17 00:00:00 2001 From: Ramakrishna Prabhu Date: Mon, 18 Aug 2025 09:19:56 -0500 Subject: [PATCH 31/31] fix issue --- ci/utils/install_cudss.sh | 9 --------- 1 file changed, 9 deletions(-) diff --git a/ci/utils/install_cudss.sh b/ci/utils/install_cudss.sh index 5d37396de0..d885ee385c 100755 --- a/ci/utils/install_cudss.sh +++ b/ci/utils/install_cudss.sh @@ -29,12 +29,3 @@ else echo "Neither dnf nor apt-get found. Cannot install cudss dependencies." exit 1 fi - - - - - - - - -