From 7d9b9cf49be9deab20b0469f77e04672f8069b09 Mon Sep 17 00:00:00 2001 From: Christopher Maes Date: Thu, 12 Feb 2026 20:33:09 -0800 Subject: [PATCH 1/4] Do manual work estimates in dual simplex --- cpp/src/barrier/barrier.cu | 2 +- cpp/src/barrier/cusparse_view.cu | 6 +- cpp/src/dual_simplex/basis_solves.cpp | 106 ++- cpp/src/dual_simplex/basis_solves.hpp | 9 +- cpp/src/dual_simplex/basis_updates.cpp | 142 +++- cpp/src/dual_simplex/basis_updates.hpp | 9 + .../bound_flipping_ratio_test.cpp | 23 +- .../bound_flipping_ratio_test.hpp | 3 + cpp/src/dual_simplex/bounds_strengthening.cpp | 15 +- cpp/src/dual_simplex/crossover.cpp | 31 +- cpp/src/dual_simplex/folding.cpp | 20 +- cpp/src/dual_simplex/initial_basis.cpp | 3 +- cpp/src/dual_simplex/phase2.cpp | 698 +++++++++--------- cpp/src/dual_simplex/primal.cpp | 8 +- cpp/src/dual_simplex/right_looking_lu.cpp | 146 +++- cpp/src/dual_simplex/right_looking_lu.hpp | 9 +- cpp/src/dual_simplex/singletons.cpp | 89 ++- cpp/src/dual_simplex/singletons.hpp | 19 +- cpp/src/dual_simplex/sparse_matrix.cpp | 51 +- cpp/src/dual_simplex/sparse_matrix.hpp | 34 +- cpp/src/dual_simplex/sparse_vector.cpp | 51 +- cpp/src/dual_simplex/sparse_vector.hpp | 15 +- cpp/src/dual_simplex/triangle_solve.cpp | 37 +- cpp/src/dual_simplex/triangle_solve.hpp | 110 +-- cpp/src/dual_simplex/vector_math.cpp | 54 +- cpp/src/dual_simplex/vector_math.hpp | 39 +- cpp/src/mip_heuristics/problem/problem.cu | 6 +- cpp/src/pdlp/translate.hpp | 12 +- 28 files changed, 973 insertions(+), 774 deletions(-) diff --git a/cpp/src/barrier/barrier.cu b/cpp/src/barrier/barrier.cu index 386e654488..ad20034f46 100644 --- a/cpp/src/barrier/barrier.cu +++ b/cpp/src/barrier/barrier.cu @@ -1390,7 +1390,7 @@ class iteration_data_t { // v = alpha * A * w + beta * v = alpha * A * Dinv * A^T * y + beta * v matrix_vector_multiply(A, alpha, w, beta, v); if (debug) { - printf("||A|| = %.16e\n", vector_norm2(A.x.underlying())); + printf("||A|| = %.16e\n", vector_norm2(A.x)); printf("||w|| = %.16e\n", vector_norm2(w)); printf("||v|| = %.16e\n", vector_norm2(v)); } diff --git a/cpp/src/barrier/cusparse_view.cu b/cpp/src/barrier/cusparse_view.cu index 05488e77e2..11d036504f 100644 --- a/cpp/src/barrier/cusparse_view.cu +++ b/cpp/src/barrier/cusparse_view.cu @@ -159,9 +159,9 @@ cusparse_view_t::cusparse_view_t(raft::handle_t const* handle_ptr, A_indices_ = device_copy(indices, handle_ptr->get_stream()); A_data_ = device_copy(data, handle_ptr->get_stream()); - A_T_offsets_ = device_copy(A.col_start.underlying(), handle_ptr->get_stream()); - A_T_indices_ = device_copy(A.i.underlying(), handle_ptr->get_stream()); - A_T_data_ = device_copy(A.x.underlying(), handle_ptr->get_stream()); + A_T_offsets_ = device_copy(A.col_start, handle_ptr->get_stream()); + A_T_indices_ = device_copy(A.i, handle_ptr->get_stream()); + A_T_data_ = device_copy(A.x, handle_ptr->get_stream()); cusparseCreateCsr(&A_, rows, diff --git a/cpp/src/dual_simplex/basis_solves.cpp b/cpp/src/dual_simplex/basis_solves.cpp index 17f997f4a9..8d9bd71484 100644 --- a/cpp/src/dual_simplex/basis_solves.cpp +++ b/cpp/src/dual_simplex/basis_solves.cpp @@ -17,6 +17,7 @@ namespace cuopt::linear_programming::dual_simplex { +// Work = 3 * m template i_t reorder_basic_list(const std::vector& q, std::vector& basic_list) { @@ -165,7 +166,8 @@ i_t factorize_basis(const csc_matrix_t& A, std::vector& pinv, std::vector& q, std::vector& deficient, - std::vector& slacks_needed) + std::vector& slacks_needed, + f_t& work_estimate) { raft::common::nvtx::range scope("LU::factorize_basis"); const i_t m = basic_list.size(); @@ -177,14 +179,18 @@ i_t factorize_basis(const csc_matrix_t& A, // TODO: We should see if we can find the singletons without explictly forming the matrix B f_t fact_start = tic(); csc_matrix_t B(A.m, A.m, 1); - form_b(A, basic_list, B); + work_estimate += A.m; + form_b(A, basic_list, B, work_estimate); std::vector row_perm(m); std::vector col_perm(m); + work_estimate += 2*m; i_t row_singletons; i_t col_singletons; - find_singletons(B, row_singletons, row_perm, col_singletons, col_perm); + find_singletons(B, row_singletons, row_perm, col_singletons, col_perm, work_estimate); std::vector row_perm_inv(m); + work_estimate += m; inverse_permutation(row_perm, row_perm_inv); + work_estimate += 3*m; #ifdef PRINT_SINGLETONS printf("Singletons row %d col %d num %d\n", @@ -219,6 +225,7 @@ i_t factorize_basis(const csc_matrix_t& A, const i_t Bnz = B.col_start[m]; L.reallocate(Bnz); U.reallocate(Bnz); + work_estimate += 2*Bnz; i_t Lnz = 0; // Fill in L(:, 0:col_singletons-1) with I for (i_t k = 0; k < col_singletons; ++k) { @@ -228,6 +235,7 @@ i_t factorize_basis(const csc_matrix_t& A, Lnz++; assert(Lnz <= Bnz); } + work_estimate += 3*col_singletons; i_t Unz = 0; // Fill in U(:, 0:col_singletons-1) with U_11 @@ -242,8 +250,10 @@ i_t factorize_basis(const csc_matrix_t& A, Unz++; assert(Unz <= Bnz); } + work_estimate += 4*(col_end - col_start); } if (col_singletons > 0) { U.col_start[col_singletons] = Unz; } + work_estimate += 3*col_singletons; // Ensure U(i, i) is at the end of column i for U_11 for (i_t k = 0; k < col_singletons; ++k) { @@ -259,7 +269,9 @@ i_t factorize_basis(const csc_matrix_t& A, break; } } + work_estimate += (col_before_end - col_start); } + work_estimate += 2*col_singletons; // Fill in L(:, col_singletons:col_singletons+row_singletons-1) with L_22 and L_32 // and U(:, col_singletons:col_singletons+row_singletons-1) with U_12 and I @@ -284,6 +296,7 @@ i_t factorize_basis(const csc_matrix_t& A, assert(Unz <= Bnz); } } + work_estimate += 5*(col_end - col_start); // add in the identity in U U.i[Unz] = k; U.x[Unz] = 1.0; @@ -291,6 +304,8 @@ i_t factorize_basis(const csc_matrix_t& A, assert(Unz <= Bnz); } L.col_start[num_singletons] = Lnz; + work_estimate += 7*(num_singletons - col_singletons); + // Ensure L(i, i) is at the beginning of column i for L_22 and L32 for (i_t k = col_singletons; k < num_singletons; ++k) { @@ -310,8 +325,10 @@ i_t factorize_basis(const csc_matrix_t& A, break; } } + work_estimate += (col_end - col_start); assert(found_diag); } + work_estimate += 4*(num_singletons - col_singletons); // Compute how many nonzeros in B we have used so far so we know // how many nonzeros are in S @@ -322,7 +339,7 @@ i_t factorize_basis(const csc_matrix_t& A, f_t actual_factor = 0; if (Sdim > 0) { csc_matrix_t S(Sdim, Sdim, Snz_max); - + work_estimate += Sdim + Snz_max; // Build S i_t Snz = 0; for (i_t k = num_singletons; k < m; ++k) { @@ -341,13 +358,18 @@ i_t factorize_basis(const csc_matrix_t& A, assert(Snz <= Snz_max); } } + work_estimate += 3*(col_end - col_start); } S.col_start[Sdim] = Snz; // Finalize S + work_estimate += 4*(m - num_singletons); csc_matrix_t SL(Sdim, Sdim, Snz); csc_matrix_t SU(Sdim, Sdim, Snz); + work_estimate += 2*Sdim + 2*Snz; + // Factorize S std::vector S_perm_inv(Sdim); + work_estimate += Sdim; std::optional> empty = std::nullopt; f_t actual_factor_start = tic(); @@ -356,6 +378,8 @@ i_t factorize_basis(const csc_matrix_t& A, for (i_t h = 0; h < Sdim; ++h) { identity[h] = h; } + work_estimate += 3*Sdim; + Srank = right_looking_lu(S, settings, settings.threshold_partial_pivoting_tol, @@ -363,7 +387,8 @@ i_t factorize_basis(const csc_matrix_t& A, S_col_perm, SL, SU, - S_perm_inv); + S_perm_inv, + work_estimate); if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) { settings.log.printf("Concurrent halt\n"); return CONCURRENT_HALT_RETURN; @@ -375,33 +400,40 @@ i_t factorize_basis(const csc_matrix_t& A, for (i_t h = Srank; h < Sdim; ++h) { deficient[h - Srank] = col_perm[num_singletons + S_col_perm[h]]; } + work_estimate += 3*(Sdim - Srank); // Get S_perm std::vector S_perm(Sdim); inverse_permutation(S_perm_inv, S_perm); + work_estimate += 4*Sdim; // Get the slacks needed slacks_needed.resize(Sdim - Srank); for (i_t h = Srank; h < Sdim; ++h) { slacks_needed[h - Srank] = row_perm[num_singletons + S_perm[h]]; } + work_estimate += 3*(Sdim - Srank); return -1; } // Need to permute col_perm[k] according to q std::vector col_perm_sav(m - num_singletons); + work_estimate += (m - num_singletons); i_t q_j = 0; for (i_t h = num_singletons; h < m; ++h) { col_perm_sav[q_j] = col_perm[h]; q_j++; } + work_estimate += 2*(m - num_singletons); q_j = 0; for (i_t h = num_singletons; h < m; ++h) { col_perm[h] = col_perm_sav[S_col_perm[q_j]]; q_j++; } + work_estimate += 2*(m - num_singletons); std::vector S_perm(m); inverse_permutation(S_perm_inv, S_perm); + work_estimate += 4*m; actual_factor = toc(actual_factor_start); // Permute the rows of L_32 according to S_perm_inv @@ -415,11 +447,13 @@ i_t factorize_basis(const csc_matrix_t& A, L.i[p] = new_i; } } + work_estimate += (col_end - col_start); } + work_estimate += 3*(num_singletons - col_singletons); const i_t SLnz = SL.col_start[Sdim]; const i_t Lnz_max = Lnz + SLnz; - if (Lnz_max > Bnz) { L.reallocate(Lnz_max); } + if (Lnz_max > Bnz) { L.reallocate(Lnz_max); work_estimate += Lnz_max;} // Fill in L(:, num_singletons:m-1) with L_33 for (i_t k = num_singletons; k < m; ++k) { @@ -434,13 +468,16 @@ i_t factorize_basis(const csc_matrix_t& A, Lnz++; assert(Lnz <= Lnz_max); } + work_estimate += 4*(col_end - col_start); } + work_estimate += 3*(m - num_singletons); + assert(Lnz == Lnz_max); L.col_start[m] = Lnz; // Finalize L const i_t SUnz = SU.col_start[Sdim]; const i_t Unz_max = Unz + SUnz + (Bnz - Bnz_used); - if (Unz_max > Bnz) { U.reallocate(Unz_max); } + if (Unz_max > Bnz) { U.reallocate(Unz_max); work_estimate += Unz_max;} // Fill in U(:, num_singletons:m-1) with U_13 and U_33 for (i_t k = num_singletons; k < m; ++k) { @@ -459,6 +496,7 @@ i_t factorize_basis(const csc_matrix_t& A, assert(Unz <= Unz_max); } } + work_estimate += 3*(B_col_end - B_col_start); // U_33 const i_t l = k - num_singletons; @@ -471,7 +509,10 @@ i_t factorize_basis(const csc_matrix_t& A, Unz++; assert(Unz <= Unz_max); } + work_estimate += 4*(U_col_end - U_col_start); } + work_estimate += 5*(m - num_singletons); + assert(Unz <= Unz_max); U.col_start[m] = Unz; // Finalize U @@ -479,12 +520,15 @@ i_t factorize_basis(const csc_matrix_t& A, for (i_t k = 0; k < Sdim; ++k) { last_perm[k] = row_perm[num_singletons + k]; } + work_estimate += 3*Sdim; // Fix up row permutations for (i_t k = 0; k < Sdim; ++k) { row_perm[num_singletons + k] = last_perm[S_perm[k]]; } + work_estimate += 3*Sdim; inverse_permutation(row_perm, row_perm_inv); + work_estimate += 3*row_perm.size(); } else { L.col_start[m] = Lnz; // Finalize L U.col_start[m] = Unz; // Finalize U @@ -563,13 +607,16 @@ i_t factorize_basis(const csc_matrix_t& A, if (write_basis) { csc_matrix_t B(m, m, 1); - form_b(A, basic_list, B); + form_b(A, basic_list, B, work_estimate); write_basis_info(B); } q.resize(m); + work_estimate += m; f_t fact_start = tic(); - rank = right_looking_lu(A, settings, medium_tol, basic_list, q, L, U, pinv); + rank = right_looking_lu(A, settings, medium_tol, basic_list, q, L, U, pinv, work_estimate); inverse_permutation(pinv, p); + work_estimate += 3*pinv.size(); + if (rank != m) { // Get the rank deficient columns deficient.clear(); @@ -577,11 +624,13 @@ i_t factorize_basis(const csc_matrix_t& A, for (i_t h = rank; h < m; ++h) { deficient[h - rank] = q[h]; } + work_estimate += 3*(m - rank); // Get the slacks needed slacks_needed.resize(m - rank); for (i_t h = rank; h < m; ++h) { slacks_needed[h - rank] = p[h]; } + work_estimate += 3*(m - rank); } if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) { settings.log.printf("Concurrent halt\n"); @@ -596,7 +645,7 @@ i_t factorize_basis(const csc_matrix_t& A, multiply(L, U, C); csc_matrix_t B(m, m, 1); - form_b(A, basic_list, B); + form_b(A, basic_list, B, work_estimate); csc_matrix_t D(m, m, 1); B.permute_rows_and_cols(pinv, q, D); @@ -623,7 +672,8 @@ i_t basis_repair(const csc_matrix_t& A, std::vector& basis_list, std::vector& nonbasic_list, std::vector& superbasic_list, - std::vector& vstatus) + std::vector& vstatus, + f_t& work_estimate) { const i_t m = A.m; const i_t n = A.n; @@ -632,6 +682,7 @@ i_t basis_repair(const csc_matrix_t& A, // Create slack_map std::vector slack_map(m); // slack_map[i] = j if column j is e_i + work_estimate += m; i_t slacks_found = 0; for (i_t j = n - 1; j >= n - m; j--) { const i_t col_start = A.col_start[j]; @@ -643,23 +694,29 @@ i_t basis_repair(const csc_matrix_t& A, slacks_found++; } } + work_estimate += 3*m + 2*slacks_found; assert(slacks_found == m); // Create nonbasic_map std::vector nonbasic_map( n, -1); // nonbasic_map[j] = p if nonbasic[p] = j, -1 if j is basic/superbasic + work_estimate += n; const i_t num_nonbasic = nonbasic_list.size(); for (i_t k = 0; k < num_nonbasic; ++k) { nonbasic_map[nonbasic_list[k]] = k; } + work_estimate += 2*num_nonbasic; + // Create a superbasic_map std::vector superbasic_map( n, -1); // superbasic_map[j] = p if superbasic[p] = j, -1 if j is basic/nonbasic + work_estimate += n; const i_t num_superbasic = superbasic_list.size(); for (i_t k = 0; k < num_superbasic; ++k) { superbasic_map[superbasic_list[k]] = k; } + work_estimate += 2*num_superbasic; const i_t columns_to_replace = deficient.size(); for (i_t k = 0; k < columns_to_replace; ++k) { @@ -687,6 +744,7 @@ i_t basis_repair(const csc_matrix_t& A, } vstatus[replace_j] = variable_status_t::BASIC; } + work_estimate += 11*columns_to_replace; return 0; } @@ -694,7 +752,8 @@ i_t basis_repair(const csc_matrix_t& A, template i_t form_b(const csc_matrix_t& A, const std::vector& basic_list, - csc_matrix_t& B) + csc_matrix_t& B, + f_t& work_estimate) { const i_t m = A.m; i_t Bnz = 0; @@ -704,7 +763,9 @@ i_t form_b(const csc_matrix_t& A, const i_t col_end = A.col_start[j + 1]; Bnz += (col_end - col_start); } + work_estimate += 3*m; B.reallocate(Bnz); + work_estimate += 2*Bnz; const i_t Bnz_check = Bnz; Bnz = 0; for (i_t k = 0; k < m; ++k) { @@ -718,6 +779,8 @@ i_t form_b(const csc_matrix_t& A, Bnz++; } } + work_estimate += 4*m; + work_estimate += 4*Bnz; B.col_start[m] = Bnz; assert(Bnz_check == Bnz); return 0; @@ -784,9 +847,10 @@ i_t b_transpose_solve(const csc_matrix_t& L, raft::common::nvtx::range scope("LU::b_transpose_solve"); + f_t work_estimate = 0; // Solve for r such that U'*r = c std::vector r = rhs; - upper_triangular_transpose_solve(U, r); + upper_triangular_transpose_solve(U, r, work_estimate); #ifdef BASIS_DEBUG // err = norm(U'*r - c, inf) std::vector residual = rhs; @@ -798,7 +862,7 @@ i_t b_transpose_solve(const csc_matrix_t& L, #endif // Solve for w such that L'*w = r - lower_triangular_transpose_solve(L, r); + lower_triangular_transpose_solve(L, r, work_estimate); #ifdef BASIS_DEBUG // err2 = norm(L'*w -r, inf) matrix_transpose_vector_multiply(L, 1.0, r, -1.0, residual2); @@ -825,13 +889,14 @@ i_t b_solve(const csc_matrix_t& L, assert(p.size() == m); assert(rhs.size() == m); assert(solution.size() == m); + f_t work_estimate = 0; // P*B = L*U // B*x = b // P*B*x = P*b = b' permute_vector(p, rhs, solution); // Solve for v such that L*v = b' - lower_triangular_solve(L, solution); + lower_triangular_solve(L, solution, work_estimate); #ifdef BASIS_DEBUG std::vector residual1(m); @@ -844,7 +909,7 @@ i_t b_solve(const csc_matrix_t& L, #endif // Solve for x such that U*x = v - upper_triangular_solve(U, solution); + upper_triangular_solve(U, solution, work_estimate); #ifdef BASIS_DEBUG matrix_vector_multiply(U, 1.0, solution, -1.0, residual2); const f_t err2 = vector_norm_inf(residual2); @@ -874,7 +939,8 @@ template int factorize_basis(const csc_matrix_t& A, std::vector& pinv, std::vector& q, std::vector& deficient, - std::vector& slacks_needed); + std::vector& slacks_needed, + double& work_estimate); template int basis_repair(const csc_matrix_t& A, const simplex_solver_settings_t& settings, @@ -885,11 +951,13 @@ template int basis_repair(const csc_matrix_t& A, std::vector& basis_list, std::vector& nonbasic_list, std::vector& superbasic_list, - std::vector& vstatus); + std::vector& vstatus, + double& work_estimate); template int form_b(const csc_matrix_t& A, const std::vector& basic_list, - csc_matrix_t& B); + csc_matrix_t& B, + double& work_estimate); template int b_multiply(const lp_problem_t& lp, const std::vector& basic_list, diff --git a/cpp/src/dual_simplex/basis_solves.hpp b/cpp/src/dual_simplex/basis_solves.hpp index 59b4725e42..fbeba565f1 100644 --- a/cpp/src/dual_simplex/basis_solves.hpp +++ b/cpp/src/dual_simplex/basis_solves.hpp @@ -36,7 +36,8 @@ i_t factorize_basis(const csc_matrix_t& A, std::vector& pinv, std::vector& q, std::vector& deficient, - std::vector& slacks_need); + std::vector& slacks_need, + f_t& work_estimate); // Repair the basis by bringing in slacks template @@ -49,13 +50,15 @@ i_t basis_repair(const csc_matrix_t& A, std::vector& basis_list, std::vector& nonbasic_list, std::vector& superbasic_list, - std::vector& vstatus); + std::vector& vstatus, + f_t& work_estimate); // Form the basis matrix B = A(:, basic_list) template i_t form_b(const csc_matrix_t& A, const std::vector& basic_list, - csc_matrix_t& B); + csc_matrix_t& B, + f_t& work_estimate); // y = B*x = sum_{j in basis} A(:, j) * x(k) template diff --git a/cpp/src/dual_simplex/basis_updates.cpp b/cpp/src/dual_simplex/basis_updates.cpp index 71dce2e39c..ba43251ed5 100644 --- a/cpp/src/dual_simplex/basis_updates.cpp +++ b/cpp/src/dual_simplex/basis_updates.cpp @@ -237,7 +237,8 @@ i_t basis_update_t::l_solve(std::vector& rhs) const #endif // First solve // L0*x0 = b - dual_simplex::lower_triangular_solve(L0_, rhs); + f_t work_estimate = 0; + dual_simplex::lower_triangular_solve(L0_, rhs, work_estimate); #ifdef CHECK_LOWER_SOLVE { matrix_vector_multiply(L0_, 1.0, rhs, -1.0, residual); @@ -275,8 +276,9 @@ i_t basis_update_t::l_solve(sparse_vector_t& rhs) const // L0*x0 = b const i_t m = L0_.m; + f_t work_estimate = 0; i_t top = sparse_triangle_solve( - rhs, std::nullopt, xi_workspace_, L0_, x_workspace_.data()); + rhs, std::nullopt, xi_workspace_, L0_, x_workspace_.data(), work_estimate); solve_to_sparse_vector(top, rhs); // Uses xi_workspace_ and x_workspace_ to fill rhs #ifdef CHECK_L_SOLVE @@ -370,7 +372,8 @@ i_t basis_update_t::l_transpose_solve(std::vector& rhs) const } // L0'*y = c // TODO: handle a sparse rhs - dual_simplex::lower_triangular_transpose_solve(L0_, rhs); + f_t work_estimate = 0; + dual_simplex::lower_triangular_transpose_solve(L0_, rhs, work_estimate); return 0; } @@ -500,8 +503,9 @@ i_t basis_update_t::l_transpose_solve(sparse_vector_t& rhs) rhs.to_dense(cprime_dense); #endif + f_t work_estimate = 0; i_t top = sparse_triangle_solve( - rhs, std::nullopt, xi_workspace_, L0_transpose_, x_workspace_.data()); + rhs, std::nullopt, xi_workspace_, L0_transpose_, x_workspace_.data(), work_estimate); solve_to_sparse_vector(top, rhs); // Uses xi_workspace_ and x_workspace_ to fill rhs #ifdef CHECK_LOWER_TRANSPOSE_SOLVE @@ -558,7 +562,8 @@ i_t basis_update_t::u_solve(std::vector& x) const std::vector residual = bprime; #endif - dual_simplex::upper_triangular_solve(U_, bprime); + f_t work_estimate = 0; + dual_simplex::upper_triangular_solve(U_, bprime, work_estimate); #ifdef CHECK_UPPER_SOLVE matrix_vector_multiply(U_, 1.0, bprime, -1.0, residual); @@ -582,8 +587,9 @@ i_t basis_update_t::u_solve(sparse_vector_t& rhs) const sparse_vector_t bprime(m, 0); rhs.inverse_permute_vector(col_permutation_, bprime); + f_t work_estimate = 0; i_t top = sparse_triangle_solve( - bprime, std::nullopt, xi_workspace_, U_, x_workspace_.data()); + bprime, std::nullopt, xi_workspace_, U_, x_workspace_.data(), work_estimate); solve_to_sparse_vector(top, rhs); // Uses xi_workspace_ and x_workspace_ to fill rhs rhs.inverse_permute_vector(inverse_col_permutation_); @@ -604,7 +610,8 @@ i_t basis_update_t::u_transpose_solve(std::vector& x) const const i_t m = U_.m; std::vector bprime(m); inverse_permute_vector(col_permutation_, x, bprime); - dual_simplex::upper_triangular_transpose_solve(U_, bprime); + f_t work_estimate = 0; + dual_simplex::upper_triangular_transpose_solve(U_, bprime, work_estimate); permute_vector(col_permutation_, bprime, x); return 0; } @@ -649,8 +656,9 @@ i_t basis_update_t::u_transpose_solve(sparse_vector_t& rhs) #endif // U'*y = bprime + f_t work_estimate = 0; i_t top = sparse_triangle_solve( - bprime, std::nullopt, xi_workspace_, U_transpose_, x_workspace_.data()); + bprime, std::nullopt, xi_workspace_, U_transpose_, x_workspace_.data(), work_estimate); solve_to_sparse_vector(top, rhs); // Uses xi_workspace_ and x_workspace_ to fill rhs #ifdef CHECK_WORKSPACE @@ -872,7 +880,8 @@ i_t basis_update_t::update(std::vector& utilde, i_t leaving_index // Solve U'*w = delta*et std::vector w(m); w[t] = delta; - dual_simplex::upper_triangular_transpose_solve(U_, w); + f_t work_estimate = 0; + dual_simplex::upper_triangular_transpose_solve(U_, w, work_estimate); #ifdef PARANOID { // Compute the residual of the solve @@ -1111,7 +1120,9 @@ i_t basis_update_t::lower_triangular_multiply(const csc_matrix_t i_t basis_update_mpf_t::append_cuts(const csr_matrix_t& cuts_basic) @@ -1121,10 +1132,13 @@ i_t basis_update_mpf_t::append_cuts(const csr_matrix_t& cuts // Solve for U^T W^T = C_B^T // We do this one row at a time of C_B csc_matrix_t WT(m, cuts_basic.m, 0); + work_estimate_ += cuts_basic.m; + i_t WT_nz = 0; for (i_t k = 0; k < cuts_basic.m; k++) { sparse_vector_t rhs(cuts_basic, k); + work_estimate_ += rhs.i.size(); u_transpose_solve(rhs); WT.col_start[k] = WT_nz; for (i_t q = 0; q < rhs.i.size(); q++) { @@ -1132,6 +1146,7 @@ i_t basis_update_mpf_t::append_cuts(const csr_matrix_t& cuts WT.x.push_back(rhs.x[q]); WT_nz++; } + work_estimate_ += 3 * rhs.i.size(); } WT.col_start[cuts_basic.m] = WT_nz; @@ -1161,6 +1176,8 @@ i_t basis_update_mpf_t::append_cuts(const csr_matrix_t& cuts #endif csc_matrix_t V(cuts_basic.m, m, 0); + work_estimate_ += m; + i_t V_nz = 0; if (num_updates_ > 0) { // W = V T_0 ... T_{num_updates_ - 1} // or V = W T_{num_updates_ - 1}^{-1} ... T_0^{-1} @@ -1173,11 +1190,12 @@ i_t basis_update_mpf_t::append_cuts(const csr_matrix_t& cuts // for appending to L0 csr_matrix_t V_row(cuts_basic.m, m, 0); - i_t V_nz = 0; + work_estimate_ += m; const f_t zero_tol = 1e-13; for (i_t h = 0; h < cuts_basic.m; h++) { sparse_vector_t rhs(WT, h); scatter_into_workspace(rhs); + work_estimate_ += 2 * rhs.i.size(); i_t nz = rhs.i.size(); for (i_t k = num_updates_ - 1; k >= 0; --k) { // T_k^{-T} = ( I - v u^T/(1 + u^T v)) @@ -1201,10 +1219,12 @@ i_t basis_update_mpf_t::append_cuts(const csr_matrix_t& cuts V_row.x.push_back(rhs.x[q]); V_nz++; } + work_estimate_ += 2 * rhs.i.size(); } V_row.row_start[cuts_basic.m] = V_nz; V_row.to_compressed_col(V); + work_estimate_ += 3*V_nz; #ifdef CHECK_V csc_matrix_t CB_col(cuts_basic.m, m, 0); @@ -1245,6 +1265,7 @@ i_t basis_update_mpf_t::append_cuts(const csr_matrix_t& cuts } else { // W = V WT.transpose(V); + work_estimate_ += 3*V_nz; } // Extend u_i, v_i for i = 0, ..., num_updates_ - 1 @@ -1254,9 +1275,10 @@ i_t basis_update_mpf_t::append_cuts(const csr_matrix_t& cuts // L = [ L0 0 ] // [ V I ] - i_t V_nz = V.col_start[m]; + V_nz = V.col_start[m]; i_t L_nz = L0_.col_start[m]; csc_matrix_t new_L(m + cuts_basic.m, m + cuts_basic.m, L_nz + V_nz + cuts_basic.m); + work_estimate_ += (L_nz + V_nz + cuts_basic.m) + (m + cuts_basic.m); i_t predicted_nz = L_nz + V_nz + cuts_basic.m; L_nz = 0; for (i_t j = 0; j < m; ++j) { @@ -1276,16 +1298,19 @@ i_t basis_update_mpf_t::append_cuts(const csr_matrix_t& cuts L_nz++; } } + work_estimate_ += 4*L_nz; for (i_t j = m; j < m + cuts_basic.m; ++j) { new_L.col_start[j] = L_nz; new_L.i[L_nz] = j; new_L.x[L_nz] = 1.0; L_nz++; } + work_estimate_ += 3*cuts_basic.m; new_L.col_start[m + cuts_basic.m] = L_nz; assert(L_nz == predicted_nz); L0_ = new_L; + work_estimate_ += 2*L_nz; // Adjust U // U = [ U0 0 ] @@ -1295,12 +1320,14 @@ i_t basis_update_mpf_t::append_cuts(const csr_matrix_t& cuts U0_.col_start.resize(m + cuts_basic.m + 1); U0_.i.resize(U_nz + cuts_basic.m); U0_.x.resize(U_nz + cuts_basic.m); + work_estimate_ += 2*(U_nz + cuts_basic.m) + (m + cuts_basic.m); for (i_t k = m; k < m + cuts_basic.m; ++k) { U0_.col_start[k] = U_nz; U0_.i[U_nz] = k; U0_.x[U_nz] = 1.0; U_nz++; } + work_estimate_ += 3*cuts_basic.m; U0_.col_start[m + cuts_basic.m] = U_nz; U0_.n = m + cuts_basic.m; U0_.m = m + cuts_basic.m; @@ -1310,14 +1337,17 @@ i_t basis_update_mpf_t::append_cuts(const csr_matrix_t& cuts // Adjust row_permutation_ and inverse_row_permutation_ row_permutation_.resize(m + cuts_basic.m); inverse_row_permutation_.resize(m + cuts_basic.m); + work_estimate_ += 2*(m + cuts_basic.m); for (i_t k = m; k < m + cuts_basic.m; ++k) { row_permutation_[k] = k; } + work_estimate_ += cuts_basic.m; inverse_permutation(row_permutation_, inverse_row_permutation_); // Adjust workspace sizes xi_workspace_.resize(2 * (m + cuts_basic.m), 0); x_workspace_.resize(m + cuts_basic.m, 0.0); + work_estimate_ += 3*(m + cuts_basic.m); return 0; } @@ -1331,6 +1361,7 @@ void basis_update_mpf_t::gather_into_sparse_vector(i_t nz, out.x.clear(); out.i.reserve(nz); out.x.reserve(nz); + work_estimate_ += 2*nz; const f_t zero_tol = 1e-13; for (i_t k = 0; k < nz; ++k) { const i_t i = xi_workspace_[m + k]; @@ -1342,6 +1373,8 @@ void basis_update_mpf_t::gather_into_sparse_vector(i_t nz, xi_workspace_[i] = 0; x_workspace_[i] = 0.0; } + work_estimate_ += 5*nz; + work_estimate_ += 3*out.i.size(); } template @@ -1355,10 +1388,12 @@ void basis_update_mpf_t::solve_to_workspace(i_t top) const xi_workspace_[p] = 0; nz++; } + work_estimate_ += 3*(m - top); for (i_t k = 0; k < nz; ++k) { const i_t i = xi_workspace_[m + k]; xi_workspace_[i] = 1; } + work_estimate_ += 2*nz; } template @@ -1372,6 +1407,7 @@ void basis_update_mpf_t::solve_to_sparse_vector(i_t top, out.i.clear(); out.x.reserve(nz); out.i.reserve(nz); + work_estimate_ += 2*nz; i_t k = 0; const f_t zero_tol = 1e-13; for (i_t p = top; p < m; ++p) { @@ -1384,6 +1420,7 @@ void basis_update_mpf_t::solve_to_sparse_vector(i_t top, xi_workspace_[p] = 0; k++; } + work_estimate_ += 4*k + 3*out.i.size(); } template @@ -1397,10 +1434,12 @@ i_t basis_update_mpf_t::scatter_into_workspace(const sparse_vector_t::grow_storage(i_t nz, i_t& S_start, i_t& S_nz) const i_t new_last_S_col = last_S_col + 2; if (new_last_S_col >= S_.col_start.size()) { S_.col_start.resize(new_last_S_col + refactor_frequency_); + work_estimate_ += new_last_S_col + refactor_frequency_; } S_nz = S_.col_start[last_S_col]; if (S_nz + nz > S_.i.size()) { S_.i.resize(std::max(2 * S_nz, S_nz + nz)); S_.x.resize(std::max(2 * S_nz, S_nz + nz)); + work_estimate_ += 2*std::max(2 * S_nz, S_nz + nz); } S_start = last_S_col; assert(S_nz + nz <= S_.i.size()); @@ -1431,6 +1472,7 @@ i_t basis_update_mpf_t::nonzeros(const std::vector& x) const for (i_t i = 0; i < xsz; ++i) { if (x[i] != 0.0) { nz++; } } + work_estimate_ += xsz; return nz; } @@ -1445,6 +1487,7 @@ f_t basis_update_mpf_t::dot_product(i_t col, const std::vector& x const i_t i = S_.i[p]; dot += S_.x[p] * x[i]; } + work_estimate_ += 3 * (col_end - col_start); return dot; } @@ -1457,10 +1500,15 @@ f_t basis_update_mpf_t::dot_product(i_t col, f_t dot = 0.0; const i_t col_start = S_.col_start[col]; const i_t col_end = S_.col_start[col + 1]; + i_t nz_mark = 0; for (i_t p = col_start; p < col_end; ++p) { const i_t i = S_.i[p]; - if (mark[i]) { dot += S_.x[p] * x[i]; } + if (mark[i]) { + dot += S_.x[p] * x[i]; + nz_mark++; + } } + work_estimate_ += 2*nz_mark + (col_end - col_start); return dot; } @@ -1477,6 +1525,7 @@ void basis_update_mpf_t::add_sparse_column(const csc_matrix_t @@ -1490,6 +1539,7 @@ void basis_update_mpf_t::add_sparse_column(const csc_matrix_t::add_sparse_column(const csc_matrix_t @@ -1528,14 +1579,17 @@ i_t basis_update_mpf_t::b_transpose_solve(const std::vector& rhs, // Solve for r such that U'*r = c std::vector r = rhs; + work_estimate_ += 2*r.size(); u_transpose_solve(r); UTsol = r; + work_estimate_ += 2*r.size(); // Solve for w such that L'*w = r l_transpose_solve(r); // Compute y = P'*w inverse_permute_vector(row_permutation_, r, solution); + work_estimate_ += 3*r.size(); return 0; } @@ -1630,7 +1684,7 @@ template i_t basis_update_mpf_t::u_transpose_solve(std::vector& rhs) const { total_dense_U_transpose_++; - dual_simplex::upper_triangular_transpose_solve(U0_, rhs); + dual_simplex::upper_triangular_transpose_solve(U0_, rhs, work_estimate_); return 0; } @@ -1641,7 +1695,7 @@ i_t basis_update_mpf_t::u_transpose_solve(sparse_vector_t& r // U0'*x = y // Solve U0'*x0 = y i_t top = dual_simplex::sparse_triangle_solve( - rhs, std::nullopt, xi_workspace_, U0_transpose_, x_workspace_.data()); + rhs, std::nullopt, xi_workspace_, U0_transpose_, x_workspace_.data(), work_estimate_); solve_to_sparse_vector(top, rhs); return 0; } @@ -1671,9 +1725,10 @@ i_t basis_update_mpf_t::l_transpose_solve(std::vector& rhs) const if (std::abs(theta) > zero_tol) { add_sparse_column(S_, v_col, -theta, rhs); } } + work_estimate_ += 2*num_updates_; // Solve for x such that L0^T * x = b' - dual_simplex::lower_triangular_transpose_solve(L0_, rhs); + dual_simplex::lower_triangular_transpose_solve(L0_, rhs, work_estimate_); return 0; } @@ -1729,8 +1784,9 @@ i_t basis_update_mpf_t::l_transpose_solve(sparse_vector_t& r } #endif } + work_estimate_ += 2*num_updates_; -#ifdef CHECK_MULTIPLY + #ifdef CHECK_MULTIPLY for (i_t i = 0; i < m; ++i) { if (std::abs(rhs_dense_0[i] - x_workspace_[i]) > 1e-9) { printf("L transpose solve multiply error %e index %d sparse %e dense %e\n", @@ -1743,9 +1799,10 @@ i_t basis_update_mpf_t::l_transpose_solve(sparse_vector_t& r #endif sparse_vector_t b(m, nz); + work_estimate_ += nz; gather_into_sparse_vector(nz, b); i_t top = dual_simplex::sparse_triangle_solve( - b, std::nullopt, xi_workspace_, L0_transpose_, x_workspace_.data()); + b, std::nullopt, xi_workspace_, L0_transpose_, x_workspace_.data(), work_estimate_); solve_to_sparse_vector(top, rhs); #ifdef CHECK_SPARSE_SOLVE @@ -1772,6 +1829,7 @@ i_t basis_update_mpf_t::b_solve(const std::vector& rhs, { const i_t m = L0_.m; std::vector Lsol(m); + work_estimate_ += m; return b_solve(rhs, solution, Lsol); } @@ -1788,6 +1846,7 @@ i_t basis_update_mpf_t::b_solve(const std::vector& rhs, // P*B*x = P*b permute_vector(row_permutation_, rhs, solution); + work_estimate_ += 3*rhs.size(); // L*U*x = b' // Solve for v such that L*v = b' @@ -1795,7 +1854,10 @@ i_t basis_update_mpf_t::b_solve(const std::vector& rhs, std::vector rhs_permuted = solution; #endif l_solve(solution); - if (need_Lsol) { Lsol = solution; } + if (need_Lsol) { + Lsol = solution; + work_estimate_ += 2*solution.size(); + } #ifdef CHECK_L_SOLVE std::vector Lsol_check = Lsol; @@ -1836,7 +1898,9 @@ i_t basis_update_mpf_t::b_solve(const sparse_vector_t& rhs, { const i_t m = L0_.m; solution = rhs; + work_estimate_ += 2*rhs.i.size(); solution.inverse_permute_vector(inverse_row_permutation_); + work_estimate_ += 3*rhs.i.size(); #ifdef CHECK_PERMUTATION std::vector permuation_rhs; @@ -1866,10 +1930,15 @@ i_t basis_update_mpf_t::b_solve(const sparse_vector_t& rhs, } else { std::vector solution_dense; solution.to_dense(solution_dense); + work_estimate_ += solution_dense.size(); l_solve(solution_dense); solution.from_dense(solution_dense); + work_estimate_ += solution_dense.size(); + } + if (need_Lsol) { + Lsol = solution; + work_estimate_ += 2*solution.i.size(); } - if (need_Lsol) { Lsol = solution; } sum_L_ += static_cast(solution.i.size()) / input_size; #ifdef CHECK_L_SOLVE @@ -1897,8 +1966,10 @@ i_t basis_update_mpf_t::b_solve(const sparse_vector_t& rhs, } else { std::vector solution_dense; solution.to_dense(solution_dense); + work_estimate_ += solution_dense.size(); u_solve(solution_dense); solution.from_dense(solution_dense); + work_estimate_ += solution_dense.size(); } sum_U_ += static_cast(solution.i.size()) / rhs_size; @@ -1921,7 +1992,7 @@ i_t basis_update_mpf_t::u_solve(std::vector& rhs) const total_dense_U_++; const i_t m = L0_.m; // U*x = y - dual_simplex::upper_triangular_solve(U0_, rhs); + dual_simplex::upper_triangular_solve(U0_, rhs, work_estimate_); return 0; } @@ -1934,7 +2005,7 @@ i_t basis_update_mpf_t::u_solve(sparse_vector_t& rhs) const // Solve U0*x = y i_t top = dual_simplex::sparse_triangle_solve( - rhs, std::nullopt, xi_workspace_, U0_, x_workspace_.data()); + rhs, std::nullopt, xi_workspace_, U0_, x_workspace_.data(), work_estimate_); solve_to_sparse_vector(top, rhs); return 0; @@ -1955,7 +2026,7 @@ i_t basis_update_mpf_t::l_solve(std::vector& rhs) const #ifdef CHECK_L_SOLVE std::vector rhs_check = rhs; #endif - dual_simplex::lower_triangular_solve(L0_, rhs); + dual_simplex::lower_triangular_solve(L0_, rhs, work_estimate_); #ifdef CHECK_L0_SOLVE matrix_vector_multiply(L0_, 1.0, rhs, -1.0, residual); @@ -1979,6 +2050,7 @@ i_t basis_update_mpf_t::l_solve(std::vector& rhs) const if (std::abs(theta) > zero_tol) { add_sparse_column(S_, u_col, -theta, rhs); } } + work_estimate_ += 2*num_updates_; #ifdef CHECK_L_SOLVE std::vector inout = rhs; @@ -2004,7 +2076,7 @@ i_t basis_update_mpf_t::l_solve(sparse_vector_t& rhs) const // First solve L0*x0 = y i_t top = dual_simplex::sparse_triangle_solve( - rhs, std::nullopt, xi_workspace_, L0_, x_workspace_.data()); + rhs, std::nullopt, xi_workspace_, L0_, x_workspace_.data(), work_estimate_); solve_to_workspace(top); // Uses xi_workspace_ and x_workspace_ to fill rhs i_t nz = m - top; // Then T0 * T1 * ... * T_{num_updates_ - 1} * x = x0 @@ -2027,6 +2099,7 @@ i_t basis_update_mpf_t::l_solve(sparse_vector_t& rhs) const add_sparse_column(S_, u_col, -theta, xi_workspace_, nz, x_workspace_); } } + work_estimate_ += 2*num_updates_; gather_into_sparse_vector(nz, rhs); @@ -2049,6 +2122,7 @@ i_t basis_update_mpf_t::update(const std::vector& utilde, const i_t col_start = U0_.col_start[leaving_index]; const i_t col_end = U0_.col_start[leaving_index + 1]; std::vector u = utilde; + work_estimate_ += 2*utilde.size(); // u = utilde - U0(:, leaving_index) add_sparse_column(U0_, leaving_index, -1.0, u); @@ -2069,9 +2143,11 @@ i_t basis_update_mpf_t::update(const std::vector& utilde, // Scatter u into S S_.append_column(u); + work_estimate_ += u.size() + 3*u_nz; // Scatter v into S S_.append_column(etilde); + work_estimate_ += etilde.size() + 3*v_nz; // Compute mu = 1 + v^T * u const f_t mu = 1.0 + sparse_dot(S_.i.data() + S_.col_start[S_start], @@ -2080,6 +2156,7 @@ i_t basis_update_mpf_t::update(const std::vector& utilde, S_.i.data() + S_.col_start[S_start + 1], S_.x.data() + S_.col_start[S_start + 1], v_nz); + work_estimate_ += 3*std::min(u_nz, v_nz); if (std::abs(mu) < 1E-8 || std::abs(mu) > 1E+8) { // Force a refactor. Otherwise we will get numerical issues when dividing by mu. @@ -2126,6 +2203,7 @@ i_t basis_update_mpf_t::update(const sparse_vector_t& utilde // Ensure the workspace is sorted. Otherwise, the sparse dot will be incorrect. std::sort(xi_workspace_.begin() + m, xi_workspace_.begin() + m + nz, std::less()); + work_estimate_ += (m + nz) * std::log2(m + nz); // Gather the workspace into a column of S i_t S_start; @@ -2133,10 +2211,13 @@ i_t basis_update_mpf_t::update(const sparse_vector_t& utilde grow_storage(nz + etilde.i.size(), S_start, S_nz); S_.append_column(nz, xi_workspace_.data() + m, x_workspace_.data()); + work_estimate_ += 5*nz; // Gather etilde into a column of S etilde.sort(); // Needs to be sorted for the sparse dot. TODO(CMM): Is etilde sorted on input? + work_estimate_ += etilde.i.size() * std::log2(etilde.i.size()); S_.append_column(etilde); + work_estimate_ += 4*etilde.i.size(); // Compute mu = 1 + v^T * u const f_t mu = 1.0 + sparse_dot(S_.i.data() + S_.col_start[S_start], @@ -2145,6 +2226,7 @@ i_t basis_update_mpf_t::update(const sparse_vector_t& utilde S_.i.data() + S_.col_start[S_start + 1], S_.x.data() + S_.col_start[S_start + 1], S_.col_start[S_start + 2] - S_.col_start[S_start + 1]); + work_estimate_ += 3*std::min(nz, static_cast(etilde.i.size())); if (std::abs(mu) < 1E-8 || std::abs(mu) > 1E+8) { // Force a refactor. Otherwise we will get numerical issues when dividing by mu. @@ -2158,6 +2240,7 @@ i_t basis_update_mpf_t::update(const sparse_vector_t& utilde x_workspace_[i] = 0.0; xi_workspace_[m + k] = 0; } + work_estimate_ += 4*nz; #ifdef PRINT_MU_INFO printf("Update mu %e u nz %d v nz %d\n", @@ -2190,7 +2273,6 @@ void basis_update_mpf_t::l_multiply(std::vector& inout) const const f_t theta = dot; add_sparse_column(S_, u_col, theta, inout); } - std::vector out(m, 0.0); matrix_vector_multiply(L0_, 1.0, inout, 0.0, out); inout = out; @@ -2271,7 +2353,7 @@ int basis_update_mpf_t::refactor_basis( std::vector slacks_needed; std::vector superbasic_list; // Empty superbasic list - if (L0_.m != A.m) { resize(A.m); } + if (L0_.m != A.m) { resize(A.m); work_estimate_ += A.m;} std::vector q; i_t status = factorize_basis(A, settings, @@ -2282,7 +2364,8 @@ int basis_update_mpf_t::refactor_basis( inverse_row_permutation_, q, deficient, - slacks_needed); + slacks_needed, + work_estimate_); if (status == CONCURRENT_HALT_RETURN) { return CONCURRENT_HALT_RETURN; } if (status == -1) { settings.log.debug("Initial factorization failed\n"); @@ -2295,7 +2378,8 @@ int basis_update_mpf_t::refactor_basis( basic_list, nonbasic_list, superbasic_list, - vstatus); + vstatus, + work_estimate_); #ifdef CHECK_BASIS_REPAIR const i_t m = A.m; @@ -2323,7 +2407,8 @@ int basis_update_mpf_t::refactor_basis( inverse_row_permutation_, q, deficient, - slacks_needed); + slacks_needed, + work_estimate_); if (status == CONCURRENT_HALT_RETURN) { return CONCURRENT_HALT_RETURN; } if (status == -1) { #ifdef CHECK_L_FACTOR @@ -2338,6 +2423,7 @@ int basis_update_mpf_t::refactor_basis( assert(q.size() == A.m); reorder_basic_list(q, basic_list); // We no longer need q after reordering the basic list + work_estimate_ += 3*q.size(); reset(); return 0; } diff --git a/cpp/src/dual_simplex/basis_updates.hpp b/cpp/src/dual_simplex/basis_updates.hpp index d9783053f7..4f60d3bae1 100644 --- a/cpp/src/dual_simplex/basis_updates.hpp +++ b/cpp/src/dual_simplex/basis_updates.hpp @@ -265,6 +265,7 @@ class basis_update_mpf_t { assert(p.size() == Linit.m); row_permutation_ = p; inverse_permutation(row_permutation_, inverse_row_permutation_); + work_estimate_ += 4*p.size(); clear(); compute_transposes(); reset_stats(); @@ -371,6 +372,7 @@ class basis_update_mpf_t { { L0_.transpose(L0_transpose_); U0_.transpose(U0_transpose_); + work_estimate_ += 6*L0_.col_start[L0_.n] + 6*U0_.col_start[U0_.n]; } void multiply_lu(csc_matrix_t& out) const; @@ -386,6 +388,9 @@ class basis_update_mpf_t { void set_refactor_frequency(i_t new_frequency) { refactor_frequency_ = new_frequency; } + f_t work_estimate() const { return work_estimate_; } + void clear_work_estimate() { work_estimate_ = 0.0; } + private: void clear() { @@ -400,9 +405,11 @@ class basis_update_mpf_t { mu_values_.clear(); mu_values_.reserve(refactor_frequency_); num_updates_ = 0; + work_estimate_ += 2*refactor_frequency_; std::fill(xi_workspace_.begin(), xi_workspace_.end(), 0); std::fill(x_workspace_.begin(), x_workspace_.end(), 0.0); + work_estimate_ += xi_workspace_.size() + x_workspace_.size(); } void grow_storage(i_t nz, i_t& S_start, i_t& S_nz); @@ -472,6 +479,8 @@ class basis_update_mpf_t { mutable f_t sum_U_transpose_; f_t hypersparse_threshold_; + + mutable f_t work_estimate_; }; } // namespace cuopt::linear_programming::dual_simplex diff --git a/cpp/src/dual_simplex/bound_flipping_ratio_test.cpp b/cpp/src/dual_simplex/bound_flipping_ratio_test.cpp index fac65b8140..ea41b56ced 100644 --- a/cpp/src/dual_simplex/bound_flipping_ratio_test.cpp +++ b/cpp/src/dual_simplex/bound_flipping_ratio_test.cpp @@ -26,9 +26,9 @@ i_t bound_flipping_ratio_test_t::compute_breakpoints(std::vector& i_t idx = 0; while (idx == 0 && pivot_tol >= 1e-12) { - // for (i_t k = 0; k < n - m; ++k) { - // const i_t j = nonbasic_list_[k]; - for (i_t h = 0; h < delta_z_indices_.size(); ++h) { + // Loop over the nonbasic variables j with non-zero delta_z + const i_t nz = delta_z_indices_.size(); + for (i_t h = 0; h < nz; ++h) { const i_t j = delta_z_indices_[h]; const i_t k = nonbasic_mark_[j]; if (vstatus_[j] == variable_status_t::NONBASIC_FIXED) { continue; } @@ -45,6 +45,8 @@ i_t bound_flipping_ratio_test_t::compute_breakpoints(std::vector& idx++; } } + work_estimate_ += 4*nz; + work_estimate_ += 4*idx; pivot_tol /= 10; } return idx; @@ -66,11 +68,15 @@ i_t bound_flipping_ratio_test_t::single_pass(i_t start, i_t candidate = -1; f_t zero_tol = settings_.zero_tol; i_t k_idx = -1; + + i_t min_found = 0; + i_t harris_found = 0; for (i_t k = start; k < end; ++k) { if (ratios[k] < min_val) { min_val = ratios[k]; candidate = indicies[k]; k_idx = k; + min_found++; } else if (ratios[k] < min_val + zero_tol) { // Use Harris to select variables with larger pivots const i_t j = nonbasic_list_[indicies[k]]; @@ -79,8 +85,12 @@ i_t bound_flipping_ratio_test_t::single_pass(i_t start, candidate = indicies[k]; k_idx = k; } + harris_found++; } } + work_estimate_ += (end - start) + 2*min_found + 6*harris_found; + + step_length = min_val; nonbasic_entering = candidate; // this should be temporary, find root causes where the candidate is not filled @@ -116,6 +126,7 @@ i_t bound_flipping_ratio_test_t::compute_step_length(f_t& step_length, // Compute the initial set of breakpoints std::vector indicies(nz); std::vector ratios(nz); + work_estimate_ += 2*nz; i_t num_breakpoints = compute_breakpoints(indicies, ratios); if constexpr (verbose) { settings_.log.printf("Initial breakpoints %d\n", num_breakpoints); } if (num_breakpoints == 0) { @@ -132,7 +143,7 @@ i_t bound_flipping_ratio_test_t::compute_step_length(f_t& step_length, if (k_idx == RATIO_TEST_NUMERICAL_ISSUES) { return RATIO_TEST_NUMERICAL_ISSUES; } bool continue_search = k_idx >= 0 && num_breakpoints > 1 && slope > 0.0; if (!continue_search) { - if constexpr (0) { + if constexpr (verbose) { settings_.log.printf( "BFRT stopping. No bound flips. Step length %e Nonbasic entering %d Entering %d pivot %e\n", step_length, @@ -163,6 +174,7 @@ i_t bound_flipping_ratio_test_t::compute_step_length(f_t& step_length, for (i_t k = 0; k < num_breakpoints - 1; ++k) { if (ratios[k] > max_ratio) { max_ratio = ratios[k]; } } + work_estimate_ += 2*num_breakpoints; settings_.log.printf( "Starting heap passes. %d breakpoints max ratio %e\n", num_breakpoints - 1, max_ratio); bucket_pass( @@ -207,6 +219,7 @@ void bound_flipping_ratio_test_t::heap_passes(const std::vector& std::abs(delta_z[nonbasic_list[current_indicies[k]]])); } } + work_estimate_ += N; auto compare = [zero_tol, ¤t_ratios, ¤t_indicies, &delta_z, &nonbasic_list]( const i_t& a, const i_t& b) { @@ -217,12 +230,14 @@ void bound_flipping_ratio_test_t::heap_passes(const std::vector& }; std::make_heap(bare_idx.begin(), bare_idx.end(), compare); + work_estimate_ += 3*bare_idx.size(); while (bare_idx.size() > 0 && slope > 0) { // Remove minimum ratio from the heap and rebalance i_t heap_index = bare_idx.front(); std::pop_heap(bare_idx.begin(), bare_idx.end(), compare); bare_idx.pop_back(); + work_estimate_ += 2 * std::log2(bare_idx.size()); nonbasic_entering = current_indicies[heap_index]; const i_t j = entering_index = nonbasic_list_[nonbasic_entering]; diff --git a/cpp/src/dual_simplex/bound_flipping_ratio_test.hpp b/cpp/src/dual_simplex/bound_flipping_ratio_test.hpp index 51b00b1097..244ff334df 100644 --- a/cpp/src/dual_simplex/bound_flipping_ratio_test.hpp +++ b/cpp/src/dual_simplex/bound_flipping_ratio_test.hpp @@ -54,6 +54,7 @@ class bound_flipping_ratio_test_t { } i_t compute_step_length(f_t& step_length, i_t& nonbasic_entering); + f_t work_estimate() const { return work_estimate_; } private: i_t compute_breakpoints(std::vector& indices, std::vector& ratios); @@ -98,6 +99,8 @@ class bound_flipping_ratio_test_t { i_t n_; i_t m_; + + f_t work_estimate_; }; } // namespace cuopt::linear_programming::dual_simplex diff --git a/cpp/src/dual_simplex/bounds_strengthening.cpp b/cpp/src/dual_simplex/bounds_strengthening.cpp index 37ab114a7e..6506ce4873 100644 --- a/cpp/src/dual_simplex/bounds_strengthening.cpp +++ b/cpp/src/dual_simplex/bounds_strengthening.cpp @@ -102,11 +102,6 @@ bool bounds_strengthening_t::bounds_strengthening( std::vector variable_changed(n, false); std::vector constraint_changed_next(m, false); - auto& A_i = A.i.underlying(); - auto& A_x = A.x.underlying(); - auto& Arow_j = Arow.j.underlying(); - auto& Arow_x = Arow.x.underlying(); - size_t nnz_processed = 0; if (!bounds_changed.empty()) { @@ -116,7 +111,7 @@ bool bounds_strengthening_t::bounds_strengthening( const i_t col_start = A.col_start[j]; const i_t col_end = A.col_start[j + 1]; for (i_t p = col_start; p < col_end; ++p) { - const i_t i = A_i[p]; + const i_t i = A.i[p]; constraint_changed[i] = true; } } @@ -139,8 +134,8 @@ bool bounds_strengthening_t::bounds_strengthening( f_t min_a = 0.0; f_t max_a = 0.0; for (i_t p = row_start; p < row_end; ++p) { - const i_t j = Arow_j[p]; - const f_t a_ij = Arow_x[p]; + const i_t j = Arow.j[p]; + const f_t a_ij = Arow.x[p]; variable_changed[j] = true; if (a_ij > 0) { @@ -192,10 +187,10 @@ bool bounds_strengthening_t::bounds_strengthening( const i_t col_end = A.col_start[k + 1]; nnz_processed += (col_end - col_start); for (i_t p = col_start; p < col_end; ++p) { - const i_t i = A_i[p]; + const i_t i = A.i[p]; if (!constraint_changed[i]) { continue; } - const f_t a_ik = A_x[p]; + const f_t a_ik = A.x[p]; f_t delta_min_act = delta_min_activity[i]; f_t delta_max_act = delta_max_activity[i]; diff --git a/cpp/src/dual_simplex/crossover.cpp b/cpp/src/dual_simplex/crossover.cpp index 597628e735..7ebdc47cba 100644 --- a/cpp/src/dual_simplex/crossover.cpp +++ b/cpp/src/dual_simplex/crossover.cpp @@ -505,8 +505,9 @@ i_t dual_push(const lp_problem_t& lp, std::vector q(m); std::vector deficient; std::vector slacks_needed; + f_t work_estimate = 0; i_t rank = - factorize_basis(lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed); + factorize_basis(lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed, work_estimate); if (rank == CONCURRENT_HALT_RETURN) { return CONCURRENT_HALT_RETURN; } else if (rank != m) { @@ -520,9 +521,10 @@ i_t dual_push(const lp_problem_t& lp, basic_list, nonbasic_list, superbasic_list, - vstatus); + vstatus, + work_estimate); rank = - factorize_basis(lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed); + factorize_basis(lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed, work_estimate); if (rank == CONCURRENT_HALT_RETURN) { return CONCURRENT_HALT_RETURN; } else if (rank == -1) { @@ -860,8 +862,9 @@ i_t primal_push(const lp_problem_t& lp, std::vector q(m); std::vector deficient; std::vector slacks_needed; + f_t work_estimate = 0; i_t rank = - factorize_basis(lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed); + factorize_basis(lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed, work_estimate); if (rank == CONCURRENT_HALT_RETURN) { return CONCURRENT_HALT_RETURN; } else if (rank != m) { @@ -875,12 +878,13 @@ i_t primal_push(const lp_problem_t& lp, basic_list, nonbasic_list, superbasic_list, - vstatus); + vstatus, + work_estimate); // We need to be careful. As basis_repair may have changed the superbasic list find_primal_superbasic_variables( lp, settings, solution, solution, vstatus, nonbasic_list, superbasic_list); rank = - factorize_basis(lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed); + factorize_basis(lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed, work_estimate); if (rank == CONCURRENT_HALT_RETURN) { return CONCURRENT_HALT_RETURN; } else if (rank == -1) { @@ -1142,6 +1146,7 @@ crossover_status_t crossover(const lp_problem_t& lp, const i_t m = lp.num_rows; const i_t n = lp.num_cols; f_t crossover_start = tic(); + f_t work_estimate = 0; settings.log.printf("\n"); settings.log.printf("Starting crossover\n"); @@ -1223,7 +1228,7 @@ crossover_status_t crossover(const lp_problem_t& lp, std::vector deficient; std::vector slacks_needed; - rank = factorize_basis(lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed); + rank = factorize_basis(lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed, work_estimate); if (rank == CONCURRENT_HALT_RETURN) { return crossover_status_t::CONCURRENT_LIMIT; } if (rank != m) { settings.log.debug("Failed to factorize basis. rank %d m %d\n", rank, m); @@ -1236,8 +1241,9 @@ crossover_status_t crossover(const lp_problem_t& lp, basic_list, nonbasic_list, superbasic_list, - vstatus); - rank = factorize_basis(lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed); + vstatus, + work_estimate); + rank = factorize_basis(lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed, work_estimate); if (rank == CONCURRENT_HALT_RETURN) { return crossover_status_t::CONCURRENT_LIMIT; } else if (rank == -1) { @@ -1393,7 +1399,7 @@ crossover_status_t crossover(const lp_problem_t& lp, superbasic_list.clear(); get_basis_from_vstatus(m, vstatus, basic_list, nonbasic_list, superbasic_list); rank = - factorize_basis(lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed); + factorize_basis(lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed, work_estimate); if (rank == CONCURRENT_HALT_RETURN) { return crossover_status_t::CONCURRENT_LIMIT; } else if (rank != m) { @@ -1407,9 +1413,10 @@ crossover_status_t crossover(const lp_problem_t& lp, basic_list, nonbasic_list, superbasic_list, - vstatus); + vstatus, + work_estimate); rank = - factorize_basis(lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed); + factorize_basis(lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed, work_estimate); if (rank == CONCURRENT_HALT_RETURN) { return crossover_status_t::CONCURRENT_LIMIT; } else if (rank == -1) { diff --git a/cpp/src/dual_simplex/folding.cpp b/cpp/src/dual_simplex/folding.cpp index 5e42afb01f..f851e51d79 100644 --- a/cpp/src/dual_simplex/folding.cpp +++ b/cpp/src/dual_simplex/folding.cpp @@ -128,8 +128,8 @@ void compute_sums(const csc_matrix_t& A, // Find all vertices (columns) that have a neighbor in the refining color colors_to_update.reserve(num_col_colors); find_vertices_to_refine(refining_color.vertices, - Arow.row_start.underlying(), - Arow.j.underlying(), + Arow.row_start, + Arow.j, col_color_map, marked_vertices, vertices_to_refine, @@ -145,9 +145,9 @@ void compute_sums(const csc_matrix_t& A, compute_sums_of_refined_vertices(refining_color.color, refining_color.vertices, vertices_to_refine, - Arow.row_start.underlying(), - Arow.j.underlying(), - Arow.x.underlying(), + Arow.row_start, + Arow.j, + Arow.x, col_color_map, vertex_to_sum, max_sum_by_color); @@ -156,8 +156,8 @@ void compute_sums(const csc_matrix_t& A, // Find all vertices (rows) that have a neighbor in the refining color colors_to_update.reserve(num_row_colors); find_vertices_to_refine(refining_color.vertices, - A.col_start.underlying(), - A.i.underlying(), + A.col_start, + A.i, row_color_map, marked_vertices, vertices_to_refine, @@ -173,9 +173,9 @@ void compute_sums(const csc_matrix_t& A, compute_sums_of_refined_vertices(refining_color.color, refining_color.vertices, vertices_to_refine, - A.col_start.underlying(), - A.i.underlying(), - A.x.underlying(), + A.col_start, + A.i, + A.x, row_color_map, vertex_to_sum, max_sum_by_color); diff --git a/cpp/src/dual_simplex/initial_basis.cpp b/cpp/src/dual_simplex/initial_basis.cpp index 5da8449040..9343769d00 100644 --- a/cpp/src/dual_simplex/initial_basis.cpp +++ b/cpp/src/dual_simplex/initial_basis.cpp @@ -79,7 +79,8 @@ i_t initial_basis_selection(const lp_problem_t& problem, i_t row_singletons; i_t col_singletons; std::vector row_perm(N); - find_singletons(C, row_singletons, row_perm, col_singletons, q); + f_t work_estimate = 0; + find_singletons(C, row_singletons, row_perm, col_singletons, q, work_estimate); std::vector row_perm_inv(N); inverse_permutation(row_perm, row_perm_inv); diff --git a/cpp/src/dual_simplex/phase2.cpp b/cpp/src/dual_simplex/phase2.cpp index 1e057a16b8..bb88659ca5 100644 --- a/cpp/src/dual_simplex/phase2.cpp +++ b/cpp/src/dual_simplex/phase2.cpp @@ -326,55 +326,47 @@ template void compute_reduced_cost_update(const lp_problem_t& lp, const std::vector& basic_list, const std::vector& nonbasic_list, - const ins_vector& delta_y, + const std::vector& delta_y, i_t leaving_index, i_t direction, - ins_vector& delta_z_mark, - ins_vector& delta_z_indices, - ins_vector& delta_z) + std::vector& delta_z_mark, + std::vector& delta_z_indices, + std::vector& delta_z, + f_t& work_estimate) { const i_t m = lp.num_rows; const i_t n = lp.num_cols; - const f_t* __restrict__ delta_y_ptr = delta_y.data(); - const f_t* __restrict__ Ax = lp.A.x.data(); - const i_t* __restrict__ Ai = lp.A.i.data(); - const i_t* __restrict__ ptr_col_start = lp.A.col_start.data(); - f_t* __restrict__ delta_z_ptr = delta_z.data(); - size_t nnzs_processed = 0; - // delta_zB = sigma*ei for (i_t k = 0; k < m; k++) { const i_t j = basic_list[k]; - delta_z_ptr[j] = 0; + delta_z[j] = 0; } - delta_z_ptr[leaving_index] = direction; + work_estimate += 2*m; + delta_z[leaving_index] = direction; // delta_zN = -N'*delta_y const i_t num_nonbasic = n - m; for (i_t k = 0; k < num_nonbasic; k++) { const i_t j = nonbasic_list[k]; // z_j <- -A(:, j)'*delta_y - const i_t col_start = ptr_col_start[j]; - const i_t col_end = ptr_col_start[j + 1]; + const i_t col_start = lp.A.col_start[j]; + const i_t col_end = lp.A.col_start[j + 1]; f_t dot = 0.0; for (i_t p = col_start; p < col_end; ++p) { - dot += Ax[p] * delta_y_ptr[Ai[p]]; + dot += lp.A.x[p] * delta_y[lp.A.i[p]]; } nnzs_processed += col_end - col_start; - delta_z_ptr[j] = -dot; + delta_z[j] = -dot; if (dot != 0.0) { delta_z_indices.push_back(j); // Note delta_z_indices has n elements reserved delta_z_mark[j] = 1; } } - - lp.A.x.byte_loads += nnzs_processed * sizeof(f_t); - lp.A.i.byte_loads += nnzs_processed * sizeof(i_t); - delta_y.byte_loads += nnzs_processed * sizeof(f_t); - lp.A.col_start.byte_loads += 2 * num_nonbasic * sizeof(i_t); - delta_z.byte_stores += (m + 1 + num_nonbasic) * sizeof(f_t); + work_estimate += 3*num_nonbasic; + work_estimate += 3*nnzs_processed; + work_estimate += 2*delta_z_indices.size(); } template @@ -382,67 +374,42 @@ void compute_delta_z(const csc_matrix_t& A_transpose, const sparse_vector_t& delta_y, i_t leaving_index, i_t direction, - ins_vector& nonbasic_mark, - ins_vector& delta_z_mark, - ins_vector& delta_z_indices, - ins_vector& delta_z) + std::vector& nonbasic_mark, + std::vector& delta_z_mark, + std::vector& delta_z_indices, + std::vector& delta_z, + f_t& work_estimate) { - auto& At_cs = A_transpose.col_start.underlying(); - auto& At_i = A_transpose.i.underlying(); - auto& At_x = A_transpose.x.underlying(); - auto& dy_i = delta_y.i.underlying(); - auto& dy_x = delta_y.x.underlying(); - auto& nb_mk = nonbasic_mark.underlying(); - auto& dz_mk = delta_z_mark.underlying(); - auto& dz_idx = delta_z_indices.underlying(); - auto& dz = delta_z.underlying(); - // delta_zN = - N'*delta_y - const i_t nz_delta_y = dy_i.size(); + const i_t nz_delta_y = delta_y.i.size(); size_t nnz_processed = 0; - size_t nb_reads = 0; - size_t dz_rmws = 0; - size_t dz_mk_reads = 0; - size_t dz_mk_writes = 0; - size_t dz_idx_writes = 0; + size_t nonbasic_marked = 0; for (i_t k = 0; k < nz_delta_y; k++) { - const i_t i = dy_i[k]; - const f_t delta_y_i = dy_x[k]; + const i_t i = delta_y.i[k]; + const f_t delta_y_i = delta_y.x[k]; if (std::abs(delta_y_i) < 1e-12) { continue; } - const i_t row_start = At_cs[i]; - const i_t row_end = At_cs[i + 1]; + const i_t row_start = A_transpose.col_start[i]; + const i_t row_end = A_transpose.col_start[i + 1]; nnz_processed += row_end - row_start; for (i_t p = row_start; p < row_end; ++p) { - const i_t j = At_i[p]; - nb_reads++; - if (nb_mk[j] >= 0) { - dz[j] -= delta_y_i * At_x[p]; - dz_rmws++; - dz_mk_reads++; - if (!dz_mk[j]) { - dz_mk[j] = 1; - dz_idx.push_back(j); - dz_mk_writes++; - dz_idx_writes++; + const i_t j = A_transpose.i[p]; + if (nonbasic_mark[j] >= 0) { + delta_z[j] -= delta_y_i * A_transpose.x[p]; + nonbasic_marked++; + if (!delta_z_mark[j]) { + delta_z_mark[j] = 1; + delta_z_indices.push_back(j); } } } } + work_estimate += 4*nz_delta_y; + work_estimate += 2*nnz_processed; + work_estimate += 3*nonbasic_marked; + work_estimate += 2*delta_z_indices.size(); // delta_zB = sigma*ei - dz[leaving_index] = direction; - - delta_y.i.byte_loads += nz_delta_y * sizeof(i_t); - delta_y.x.byte_loads += nz_delta_y * sizeof(f_t); - A_transpose.col_start.byte_loads += 2 * nz_delta_y * sizeof(i_t); - A_transpose.i.byte_loads += nnz_processed * sizeof(i_t); - A_transpose.x.byte_loads += dz_rmws * sizeof(f_t); - nonbasic_mark.byte_loads += nb_reads * sizeof(i_t); - delta_z.byte_loads += dz_rmws * sizeof(f_t); - delta_z.byte_stores += (dz_rmws + 1) * sizeof(f_t); - delta_z_mark.byte_loads += dz_mk_reads * sizeof(i_t); - delta_z_mark.byte_stores += dz_mk_writes * sizeof(i_t); - delta_z_indices.byte_stores += dz_idx_writes * sizeof(i_t); + delta_z[leaving_index] = direction; #ifdef CHECK_CHANGE_IN_REDUCED_COST delta_y_sparse.to_dense(delta_y); @@ -480,7 +447,8 @@ void compute_reduced_costs(const std::vector& objective, const std::vector& y, const std::vector& basic_list, const std::vector& nonbasic_list, - std::vector& z) + std::vector& z, + f_t& work_estimate) { PHASE2_NVTX_RANGE("DualSimplex::compute_reduced_costs"); @@ -499,12 +467,15 @@ void compute_reduced_costs(const std::vector& objective, for (i_t p = col_start; p < col_end; ++p) { dot += A.x[p] * y[A.i[p]]; } + work_estimate += 3*(col_end - col_start); z[j] -= dot; } + work_estimate += 5*(n - m); // zB = 0 for (i_t k = 0; k < m; ++k) { z[basic_list[k]] = 0.0; } + work_estimate += 2*m; } template @@ -515,12 +486,14 @@ void compute_primal_variables(const basis_update_mpf_t& ft, const std::vector& nonbasic_list, f_t tight_tol, std::vector& x, - ins_vector& xB_workspace) + std::vector& xB_workspace, + f_t& work_estimate) { PHASE2_NVTX_RANGE("DualSimplex::compute_primal_variables"); const i_t m = A.m; const i_t n = A.n; std::vector rhs = lp_rhs; + work_estimate += 2*m; // rhs = b - sum_{j : x_j = l_j} A(:, j) * l(j) // - sum_{j : x_j = u_j} A(:, j) * u(j) for (i_t k = 0; k < n - m; ++k) { @@ -532,25 +505,31 @@ void compute_primal_variables(const basis_update_mpf_t& ft, for (i_t p = col_start; p < col_end; ++p) { rhs[A.i[p]] -= xj * A.x[p]; } + work_estimate += 3*(col_end - col_start); } + work_estimate += 5*(n - m); xB_workspace.resize(m); + work_estimate += m; ft.b_solve(rhs, xB_workspace); for (i_t k = 0; k < m; ++k) { const i_t j = basic_list[k]; x[j] = xB_workspace[k]; } + work_estimate += 2*m; } +// Work is 3*delta_z_indices.size() template void clear_delta_z(i_t entering_index, i_t leaving_index, - ins_vector& delta_z_mark, - ins_vector& delta_z_indices, - ins_vector& delta_z) + std::vector& delta_z_mark, + std::vector& delta_z_indices, + std::vector& delta_z) { - for (i_t k = 0; k < delta_z_indices.size(); k++) { + const i_t nz = delta_z_indices.size(); + for (i_t k = 0; k < nz; k++) { const i_t j = delta_z_indices[k]; delta_z[j] = 0.0; delta_z_mark[j] = 0; @@ -564,13 +543,15 @@ template void clear_delta_x(const std::vector& basic_list, i_t entering_index, sparse_vector_t& scaled_delta_xB_sparse, - ins_vector& delta_x) + std::vector& delta_x, + f_t& work_estimate) { const i_t scaled_delta_xB_nz = scaled_delta_xB_sparse.i.size(); for (i_t k = 0; k < scaled_delta_xB_nz; ++k) { const i_t j = basic_list[scaled_delta_xB_sparse.i[k]]; delta_x[j] = 0.0; } + work_estimate += 3*scaled_delta_xB_nz; // Leaving index already included above delta_x[entering_index] = 0.0; scaled_delta_xB_sparse.i.clear(); @@ -631,7 +612,7 @@ void vstatus_changes(const std::vector& vstatus, template void compute_bounded_info(const std::vector& lower, const std::vector& upper, - ins_vector& bounded_variables) + std::vector& bounded_variables) { const size_t n = lower.size(); for (size_t j = 0; j < n; j++) { @@ -646,17 +627,20 @@ void compute_dual_solution_from_basis(const lp_problem_t& lp, const std::vector& basic_list, const std::vector& nonbasic_list, std::vector& y, - std::vector& z) + std::vector& z, + f_t& work_estimate) { const i_t m = lp.num_rows; const i_t n = lp.num_cols; y.resize(m); - ins_vector cB(m); + std::vector cB(m); + work_estimate += 2*m; for (i_t k = 0; k < m; ++k) { const i_t j = basic_list[k]; cB[k] = lp.objective[j]; } + work_estimate += 3*m; ft.b_transpose_solve(cB, y); // We want A'y + z = c @@ -664,6 +648,7 @@ void compute_dual_solution_from_basis(const lp_problem_t& lp, // B' y = c_B, z_B = 0 // N' y + z_N = c_N z.resize(n); + work_estimate += n; // zN = cN - N'*y for (i_t k = 0; k < n - m; k++) { const i_t j = nonbasic_list[k]; @@ -677,12 +662,15 @@ void compute_dual_solution_from_basis(const lp_problem_t& lp, for (i_t p = col_start; p < col_end; ++p) { dot += lp.A.x[p] * y[lp.A.i[p]]; } + work_estimate += 3*(col_end - col_start); z[j] -= dot; } + work_estimate += 5*(n - m); // zB = 0 for (i_t k = 0; k < m; ++k) { z[basic_list[k]] = 0.0; } + work_estimate += 2*m; } template @@ -692,11 +680,14 @@ i_t compute_primal_solution_from_basis(const lp_problem_t& lp, const std::vector& nonbasic_list, const std::vector& vstatus, std::vector& x, - ins_vector& xB_workspace) + std::vector& xB_workspace, + f_t& work_estimate) { const i_t m = lp.num_rows; const i_t n = lp.num_cols; std::vector rhs = lp.rhs; + work_estimate += 2*m; + for (i_t k = 0; k < n - m; ++k) { const i_t j = nonbasic_list[k]; @@ -709,6 +700,7 @@ i_t compute_primal_solution_from_basis(const lp_problem_t& lp, x[j] = 0.0; } } + work_estimate += 4*(n - m); // rhs = b - sum_{j : x_j = l_j} A(:, j) l(j) - sum_{j : x_j = u_j} A(:, j) * // u(j) @@ -720,25 +712,30 @@ i_t compute_primal_solution_from_basis(const lp_problem_t& lp, for (i_t p = col_start; p < col_end; ++p) { rhs[lp.A.i[p]] -= xj * lp.A.x[p]; } + work_estimate += 3*(col_end - col_start); } + work_estimate += 4*(n - m); xB_workspace.resize(m); + work_estimate += m; ft.b_solve(rhs, xB_workspace); for (i_t k = 0; k < m; ++k) { const i_t j = basic_list[k]; x[j] = xB_workspace[k]; } + work_estimate += 3*m; return 0; } +// Work is 4*m + 2*n template f_t compute_initial_primal_infeasibilities(const lp_problem_t& lp, const simplex_solver_settings_t& settings, const std::vector& basic_list, const std::vector& x, - ins_vector& squared_infeasibilities, - ins_vector& infeasibility_indices, + std::vector& squared_infeasibilities, + std::vector& infeasibility_indices, f_t& primal_inf) { PHASE2_NVTX_RANGE("DualSimplex::compute_initial_primal_infeasibilities"); @@ -816,7 +813,8 @@ void update_primal_infeasibilities(const lp_problem_t& lp, std::vector& basic_change_list, std::vector& squared_infeasibilities, std::vector& infeasibility_indices, - f_t& primal_inf) + f_t& primal_inf, + f_t& work_estimate) { const f_t primal_tol = settings.primal_tol; const i_t nz = basic_change_list.size(); @@ -841,45 +839,39 @@ void update_primal_infeasibilities(const lp_problem_t& lp, j, primal_inf); } + work_estimate += 8*nz; } template -void clean_up_infeasibilities(ins_vector& squared_infeasibilities, - ins_vector& infeasibility_indices) +void clean_up_infeasibilities(std::vector& squared_infeasibilities, + std::vector& infeasibility_indices, + f_t& work_estimate) { - auto& sq_inf = squared_infeasibilities.underlying(); - auto& idx = infeasibility_indices.underlying(); - bool needs_clean_up = false; - const i_t initial_nz = idx.size(); + const i_t initial_nz = infeasibility_indices.size(); for (i_t k = 0; k < initial_nz; ++k) { - const i_t j = idx[k]; - const f_t squared_infeas = sq_inf[j]; + const i_t j = infeasibility_indices[k]; + const f_t squared_infeas = squared_infeasibilities[j]; if (squared_infeas == 0.0) { needs_clean_up = true; } } - - size_t idx_accesses = initial_nz; - size_t sq_accesses = initial_nz; + work_estimate += 2*initial_nz; if (needs_clean_up) { - for (size_t k = 0; k < idx.size(); ++k) { - const i_t j = idx[k]; - const f_t squared_infeas = sq_inf[j]; - idx_accesses++; - sq_accesses++; + i_t num_cleans = 0; + work_estimate += 2*infeasibility_indices.size(); + for (size_t k = 0; k < infeasibility_indices.size(); ++k) { + const i_t j = infeasibility_indices[k]; + const f_t squared_infeas = squared_infeasibilities[j]; if (squared_infeas == 0.0) { - const i_t new_j = idx.back(); - idx[k] = new_j; - idx.pop_back(); - idx_accesses += 2; - sq_accesses++; - if (sq_inf[new_j] == 0.0) { k--; } + const i_t new_j = infeasibility_indices.back(); + infeasibility_indices[k] = new_j; + infeasibility_indices.pop_back(); + if (squared_infeasibilities[new_j] == 0.0) { k--; } // Decrement k so that we process the same index again + num_cleans++; } } + work_estimate += 4*num_cleans; } - - infeasibility_indices.byte_loads += idx_accesses * sizeof(i_t); - squared_infeasibilities.byte_loads += sq_accesses * sizeof(f_t); } template @@ -887,23 +879,22 @@ i_t steepest_edge_pricing_with_infeasibilities(const lp_problem_t& lp, const simplex_solver_settings_t& settings, const std::vector& x, const std::vector& dy_steepest_edge, - const ins_vector& basic_mark, - ins_vector& squared_infeasibilities, - ins_vector& infeasibility_indices, + const std::vector& basic_mark, + std::vector& squared_infeasibilities, + std::vector& infeasibility_indices, i_t& direction, i_t& basic_leaving, - f_t& max_val) + f_t& max_val, + f_t& work_estimate) { - auto& sq_inf = squared_infeasibilities.underlying(); - auto& idx = infeasibility_indices.underlying(); - auto& bmark = basic_mark.underlying(); max_val = 0.0; i_t leaving_index = -1; - const i_t nz = idx.size(); + const i_t nz = infeasibility_indices.size(); + i_t max_count = 0; for (i_t k = 0; k < nz; ++k) { - const i_t j = idx[k]; - const f_t squared_infeas = sq_inf[j]; + const i_t j = infeasibility_indices[k]; + const f_t squared_infeas = squared_infeasibilities[j]; const f_t val = squared_infeas / dy_steepest_edge[j]; if (val > max_val || (val == max_val && j > leaving_index)) { max_val = val; @@ -911,14 +902,12 @@ i_t steepest_edge_pricing_with_infeasibilities(const lp_problem_t& lp, const f_t lower_infeas = lp.lower[j] - x[j]; const f_t upper_infeas = x[j] - lp.upper[j]; direction = lower_infeas >= upper_infeas ? 1 : -1; + max_count++; } } + work_estimate += 3*nz + 3*max_count; - basic_leaving = leaving_index >= 0 ? bmark[leaving_index] : -1; - - infeasibility_indices.byte_loads += nz * sizeof(i_t); - squared_infeasibilities.byte_loads += nz * sizeof(f_t); - if (leaving_index >= 0) { basic_mark.byte_loads += sizeof(i_t); } + basic_leaving = leaving_index >= 0 ? basic_mark[leaving_index] : -1; return leaving_index; } @@ -1042,7 +1031,7 @@ f_t first_stage_harris(const lp_problem_t& lp, const std::vector& vstatus, const std::vector& nonbasic_list, std::vector& z, - ins_vector& delta_z) + std::vector& delta_z) { const i_t n = lp.num_cols; const i_t m = lp.num_rows; @@ -1076,7 +1065,7 @@ i_t second_stage_harris(const lp_problem_t& lp, const std::vector& vstatus, const std::vector& nonbasic_list, const std::vector& z, - const ins_vector& delta_z, + const std::vector& delta_z, f_t max_step_length, f_t& step_length, i_t& nonbasic_entering) @@ -1119,7 +1108,7 @@ i_t phase2_ratio_test(const lp_problem_t& lp, std::vector& vstatus, std::vector& nonbasic_list, std::vector& z, - ins_vector& delta_z, + std::vector& delta_z, f_t& step_length, i_t& nonbasic_entering) { @@ -1169,17 +1158,18 @@ i_t phase2_ratio_test(const lp_problem_t& lp, template i_t flip_bounds(const lp_problem_t& lp, const simplex_solver_settings_t& settings, - const ins_vector& bounded_variables, - const ins_vector& objective, + const std::vector& bounded_variables, + const std::vector& objective, const std::vector& z, - const ins_vector& delta_z_indices, + const std::vector& delta_z_indices, const std::vector& nonbasic_list, i_t entering_index, std::vector& vstatus, - ins_vector& delta_x, - ins_vector& mark, - ins_vector& atilde, - ins_vector& atilde_index) + std::vector& delta_x, + std::vector& mark, + std::vector& atilde, + std::vector& atilde_index, + f_t& work_estimate) { i_t num_flipped = 0; for (i_t k = 0; k < delta_z_indices.size(); ++k) { @@ -1192,7 +1182,9 @@ i_t flip_bounds(const lp_problem_t& lp, settings.dual_tol; // lower to 1e-7 or less will cause 25fv47 and d2q06c to cycle if (vstatus[j] == variable_status_t::NONBASIC_LOWER && z[j] < -dual_tol) { const f_t delta = lp.upper[j] - lp.lower[j]; + const f_t atilde_start_size = atilde_index.size(); scatter_dense(lp.A, j, -delta, atilde, mark, atilde_index); + work_estimate += 2*(atilde_index.size() - atilde_start_size) + 4 * (lp.A.col_start[j + 1] - lp.A.col_start[j]) + 10; delta_x[j] += delta; vstatus[j] = variable_status_t::NONBASIC_UPPER; #ifdef BOUND_FLIP_DEBUG @@ -1202,7 +1194,9 @@ i_t flip_bounds(const lp_problem_t& lp, num_flipped++; } else if (vstatus[j] == variable_status_t::NONBASIC_UPPER && z[j] > dual_tol) { const f_t delta = lp.lower[j] - lp.upper[j]; + const f_t atilde_start_size = atilde_index.size(); scatter_dense(lp.A, j, -delta, atilde, mark, atilde_index); + work_estimate += 2*(atilde_index.size() - atilde_start_size) + 4 * (lp.A.col_start[j + 1] - lp.A.col_start[j]) + 10; delta_x[j] += delta; vstatus[j] = variable_status_t::NONBASIC_LOWER; #ifdef BOUND_FLIP_DEBUG @@ -1239,7 +1233,8 @@ i_t initialize_steepest_edge_norms(const lp_problem_t& lp, const f_t start_time, const std::vector& basic_list, basis_update_mpf_t& ft, - std::vector& delta_y_steepest_edge) + std::vector& delta_y_steepest_edge, + f_t& work_estimate) { const i_t m = basic_list.size(); @@ -1251,6 +1246,7 @@ i_t initialize_steepest_edge_norms(const lp_problem_t& lp, std::vector row_degree(m, 0); std::vector mapping(m, -1); std::vector coeff(m, 0.0); + work_estimate += 3*m; for (i_t k = 0; k < m; ++k) { const i_t j = basic_list[k]; @@ -1263,7 +1259,9 @@ i_t initialize_steepest_edge_norms(const lp_problem_t& lp, mapping[k] = i; coeff[k] = lp.A.x[p]; } + work_estimate += 5*(col_end - col_start); } + work_estimate += 3*m; #ifdef CHECK_SINGLETON_ROWS csc_matrix_t B(m, m, 0); @@ -1285,6 +1283,7 @@ i_t initialize_steepest_edge_norms(const lp_problem_t& lp, #endif } } + work_estimate += m; if (num_singleton_rows > 0) { settings.log.printf("Found %d singleton rows for steepest edge norms in %.2fs\n", @@ -1347,6 +1346,7 @@ i_t initialize_steepest_edge_norms(const lp_problem_t& lp, for (i_t p = 0; p < sparse_dy.x.size(); ++p) { my_init += sparse_dy.x[p] * sparse_dy.x[p]; } + work_estimate += 2*sparse_dy.x.size(); #endif #if COMPARE_WITH_DENSE if (std::abs(init - my_init) > 1e-12) { @@ -1375,6 +1375,7 @@ i_t initialize_steepest_edge_norms(const lp_problem_t& lp, return CONCURRENT_HALT_RETURN; } } + work_estimate += 7*m; return 0; } @@ -1388,17 +1389,22 @@ i_t update_steepest_edge_norms(const simplex_solver_settings_t& settin const sparse_vector_t& scaled_delta_xB, i_t basic_leaving_index, i_t entering_index, - ins_vector& v, + std::vector& v, sparse_vector_t& v_sparse, - std::vector& delta_y_steepest_edge) + std::vector& delta_y_steepest_edge, + f_t& work_estimate) { const i_t delta_y_nz = delta_y_sparse.i.size(); v_sparse.clear(); // B^T delta_y = - direction * e_basic_leaving_index // We want B v = - B^{-T} e_basic_leaving_index ft.b_solve(delta_y_sparse, v_sparse); - if (direction == -1) { v_sparse.negate(); } + if (direction == -1) { + v_sparse.negate(); + work_estimate += 2 * v_sparse.i.size(); + } v_sparse.scatter(v); + work_estimate += 2*v.size(); const i_t leaving_index = basic_list[basic_leaving_index]; const f_t prev_dy_norm_squared = delta_y_steepest_edge[leaving_index]; @@ -1417,23 +1423,21 @@ i_t update_steepest_edge_norms(const simplex_solver_settings_t& settin // B*w = A(:, leaving_index) // B*scaled_delta_xB = -A(:, leaving_index) so w = -scaled_delta_xB const f_t wr = -scaled_delta_xB.find_coefficient(basic_leaving_index); + work_estimate += scaled_delta_xB.i.size(); if (wr == 0) { return -1; } const f_t omegar = dy_norm_squared / (wr * wr); const i_t scaled_delta_xB_nz = scaled_delta_xB.i.size(); - auto& dxB_i = scaled_delta_xB.i.underlying(); - auto& dxB_x = scaled_delta_xB.x.underlying(); - auto& v_raw = v.underlying(); - for (i_t h = 0; h < scaled_delta_xB_nz; ++h) { - const i_t k = dxB_i[h]; + const i_t k = scaled_delta_xB.i[h]; const i_t j = basic_list[k]; if (k == basic_leaving_index) { - const f_t w_squared = dxB_x[h] * dxB_x[h]; + const f_t w = scaled_delta_xB.x[h]; + const f_t w_squared = w * w; delta_y_steepest_edge[j] = (1.0 / w_squared) * dy_norm_squared; } else { - const f_t wk = -dxB_x[h]; - f_t new_val = delta_y_steepest_edge[j] + wk * (2.0 * v_raw[k] / wr + wk * omegar); + const f_t wk = -scaled_delta_xB.x[h]; + f_t new_val = delta_y_steepest_edge[j] + wk * (2.0 * v[k] / wr + wk * omegar); new_val = std::max(new_val, 1e-4); #ifdef STEEPEST_EDGE_DEBUG if (!(new_val >= 0)) { @@ -1452,18 +1456,13 @@ i_t update_steepest_edge_norms(const simplex_solver_settings_t& settin delta_y_steepest_edge[j] = new_val; } } + work_estimate += 5*scaled_delta_xB_nz; - auto& vs_i = v_sparse.i.underlying(); - const i_t v_nz = vs_i.size(); + const i_t v_nz = v_sparse.i.size(); for (i_t k = 0; k < v_nz; ++k) { - v_raw[vs_i[k]] = 0.0; + v[v_sparse.i[k]] = 0.0; } - - scaled_delta_xB.i.byte_loads += scaled_delta_xB_nz * sizeof(i_t); - scaled_delta_xB.x.byte_loads += scaled_delta_xB_nz * sizeof(f_t); - v.byte_loads += scaled_delta_xB_nz * sizeof(f_t); - v.byte_stores += v_nz * sizeof(f_t); - v_sparse.i.byte_loads += v_nz * sizeof(i_t); + work_estimate += 2*v_nz; return 0; } @@ -1520,10 +1519,11 @@ i_t check_steepest_edge_norms(const simplex_solver_settings_t& setting template i_t compute_perturbation(const lp_problem_t& lp, const simplex_solver_settings_t& settings, - const ins_vector& delta_z_indices, + const std::vector& delta_z_indices, std::vector& z, - ins_vector& objective, - f_t& sum_perturb) + std::vector& objective, + f_t& sum_perturb, + f_t& work_estimate) { const i_t n = lp.num_cols; const i_t m = lp.num_rows; @@ -1558,6 +1558,7 @@ i_t compute_perturbation(const lp_problem_t& lp, #endif } } + work_estimate += 7*delta_z_indices.size(); #ifdef PERTURBATION_DEBUG if (num_perturb > 0) { settings.log.printf("Perturbed %d dual variables by %e\n", num_perturb, sum_perturb); @@ -1566,11 +1567,12 @@ i_t compute_perturbation(const lp_problem_t& lp, return 0; } -template +template void reset_basis_mark(const std::vector& basic_list, const std::vector& nonbasic_list, - ins_vector& basic_mark, - ins_vector& nonbasic_mark) + std::vector& basic_mark, + std::vector& nonbasic_mark, + f_t& work_estimate) { const i_t m = basic_list.size(); const i_t n = nonbasic_mark.size(); @@ -1579,18 +1581,22 @@ void reset_basis_mark(const std::vector& basic_list, for (i_t k = 0; k < n; k++) { basic_mark[k] = -1; } + work_estimate += n; for (i_t k = 0; k < n; k++) { nonbasic_mark[k] = -1; } + work_estimate += n; for (i_t k = 0; k < n_minus_m; k++) { nonbasic_mark[nonbasic_list[k]] = k; } + work_estimate +=2*n_minus_m; for (i_t k = 0; k < m; k++) { basic_mark[basic_list[k]] = k; } + work_estimate += 2*m; } template @@ -1617,20 +1623,14 @@ void compute_delta_y(const basis_update_mpf_t& ft, #ifdef CHECK_B_TRANSPOSE_SOLVE std::vector delta_y_sparse_vector_check(m); delta_y_sparse.to_dense(delta_y_sparse_vector_check); - f_t error_check = 0.0; - for (i_t k = 0; k < m; ++k) { - if (std::abs(delta_y[k] - delta_y_sparse_vector_check[k]) > 1e-6) { - settings.log.printf( - "\tBTranspose error %d %e %e\n", k, delta_y[k], delta_y_sparse_vector_check[k]); - } - error_check += std::abs(delta_y[k] - delta_y_sparse_vector_check[k]); - } - if (error_check > 1e-6) { settings.log.printf("BTranspose error %e\n", error_check); } + // Pass in basic_list and lp for this code to work std::vector residual(m); + std::vector ei(m, 0); + ei[basic_leaving_index] = -direction; b_transpose_multiply(lp, basic_list, delta_y_sparse_vector_check, residual); for (i_t k = 0; k < m; ++k) { if (std::abs(residual[k] - ei[k]) > 1e-6) { - settings.log.printf("\tBTranspose multiply error %d %e %e\n", k, residual[k], ei[k]); + printf("\tBTranspose multiply error %d %e %e\n", k, residual[k], ei[k]); } } #endif @@ -1638,12 +1638,13 @@ void compute_delta_y(const basis_update_mpf_t& ft, template i_t update_dual_variables(const sparse_vector_t& delta_y_sparse, - const ins_vector& delta_z_indices, - const ins_vector& delta_z, + const std::vector& delta_z_indices, + const std::vector& delta_z, f_t step_length, i_t leaving_index, std::vector& y, - std::vector& z) + std::vector& z, + f_t& work_estimate) { // Update dual variables // y <- y + steplength * delta_y @@ -1652,12 +1653,14 @@ i_t update_dual_variables(const sparse_vector_t& delta_y_sparse, const i_t i = delta_y_sparse.i[k]; y[i] += step_length * delta_y_sparse.x[k]; } + work_estimate += 3*delta_y_nz; // z <- z + steplength * delta_z const i_t delta_z_nz = delta_z_indices.size(); for (i_t k = 0; k < delta_z_nz; ++k) { const i_t j = delta_z_indices[k]; z[j] += step_length * delta_z[j]; } + work_estimate += 3*delta_z_nz; z[leaving_index] += step_length * delta_z[leaving_index]; return 0; } @@ -1665,14 +1668,15 @@ i_t update_dual_variables(const sparse_vector_t& delta_y_sparse, template void adjust_for_flips(const basis_update_mpf_t& ft, const std::vector& basic_list, - const ins_vector& delta_z_indices, - ins_vector& atilde_index, - ins_vector& atilde, - ins_vector& atilde_mark, + const std::vector& delta_z_indices, + std::vector& atilde_index, + std::vector& atilde, + std::vector& atilde_mark, sparse_vector_t& atilde_sparse, sparse_vector_t& delta_xB_0_sparse, - ins_vector& delta_x_flip, - std::vector& x) + std::vector& delta_x_flip, + std::vector& x, + f_t& work_estimate) { const i_t atilde_nz = atilde_index.size(); // B*delta_xB_0 = atilde @@ -1683,27 +1687,30 @@ void adjust_for_flips(const basis_update_mpf_t& ft, atilde_sparse.i.push_back(atilde_index[k]); atilde_sparse.x.push_back(atilde[atilde_index[k]]); } + work_estimate += 5*atilde_nz; ft.b_solve(atilde_sparse, delta_xB_0_sparse); const i_t delta_xB_0_nz = delta_xB_0_sparse.i.size(); for (i_t k = 0; k < delta_xB_0_nz; ++k) { const i_t j = basic_list[delta_xB_0_sparse.i[k]]; x[j] += delta_xB_0_sparse.x[k]; } - + work_estimate += 4*delta_xB_0_nz; for (i_t k = 0; k < delta_z_indices.size(); ++k) { const i_t j = delta_z_indices[k]; x[j] += delta_x_flip[j]; delta_x_flip[j] = 0.0; } - + work_estimate += 4*delta_z_indices.size(); // Clear atilde for (i_t k = 0; k < atilde_index.size(); ++k) { atilde[atilde_index[k]] = 0.0; } + work_estimate += 2*atilde_index.size(); // Clear atilde_mark for (i_t k = 0; k < atilde_mark.size(); ++k) { atilde_mark[k] = 0; } + work_estimate += atilde_mark.size(); atilde_index.clear(); } @@ -1715,21 +1722,27 @@ i_t compute_delta_x(const lp_problem_t& lp, i_t basic_leaving_index, i_t direction, const std::vector& basic_list, - const ins_vector& delta_x_flip, + const std::vector& delta_x_flip, const sparse_vector_t& rhs_sparse, const std::vector& x, sparse_vector_t& utilde_sparse, sparse_vector_t& scaled_delta_xB_sparse, - ins_vector& delta_x) + std::vector& delta_x, + f_t& work_estimate) { f_t delta_x_leaving = direction == 1 ? lp.lower[leaving_index] - x[leaving_index] : lp.upper[leaving_index] - x[leaving_index]; // B*w = -A(:, entering) ft.b_solve(rhs_sparse, scaled_delta_xB_sparse, utilde_sparse); scaled_delta_xB_sparse.negate(); + work_estimate += 2 * scaled_delta_xB_sparse.i.size(); #ifdef CHECK_B_SOLVE + const i_t m = basic_list.size(); std::vector scaled_delta_xB(m); + scaled_delta_xB_sparse.to_dense(scaled_delta_xB); + std::vector rhs(m); + rhs_sparse.to_dense(rhs); { std::vector residual_B(m); b_multiply(lp, basic_list, scaled_delta_xB, residual_B); @@ -1737,28 +1750,31 @@ i_t compute_delta_x(const lp_problem_t& lp, for (i_t k = 0; k < m; ++k) { const f_t err = std::abs(rhs[k] + residual_B[k]); if (err >= 1e-6) { - settings.log.printf( - "Bsolve diff %d %e rhs %e residual %e\n", k, err, rhs[k], residual_B[k]); + printf("Bsolve diff %d %e rhs %e residual %e\n", k, err, rhs[k], residual_B[k]); } err_max = std::max(err_max, err); } - if (err_max > 1e-6) { settings.log.printf("B multiply error %e\n", err_max); } + if (err_max > 1e-6) { printf("B multiply error %e\n", err_max); } else { printf("B multiply error %e\n", err_max); } } #endif f_t scale = scaled_delta_xB_sparse.find_coefficient(basic_leaving_index); + work_estimate += 2 * scaled_delta_xB_sparse.i.size(); if (scale != scale) { // We couldn't find a coefficient for the basic leaving index. // The coefficient might be very small. Switch to a regular solve and try to recover. std::vector rhs; rhs_sparse.to_dense(rhs); + work_estimate += 2 * rhs.size(); const i_t m = basic_list.size(); std::vector scaled_delta_xB(m); + work_estimate += m; ft.b_solve(rhs, scaled_delta_xB); if (scaled_delta_xB[basic_leaving_index] != 0.0 && !std::isnan(scaled_delta_xB[basic_leaving_index])) { scaled_delta_xB_sparse.from_dense(scaled_delta_xB); scaled_delta_xB_sparse.negate(); + work_estimate += 2 * scaled_delta_xB_sparse.i.size() + scaled_delta_xB.size(); scale = -scaled_delta_xB[basic_leaving_index]; } else { return -1; @@ -1770,6 +1786,7 @@ i_t compute_delta_x(const lp_problem_t& lp, const i_t j = basic_list[scaled_delta_xB_sparse.i[k]]; delta_x[j] = primal_step_length * scaled_delta_xB_sparse.x[k]; } + work_estimate += 4*scaled_delta_xB_nz; delta_x[leaving_index] = delta_x_leaving; delta_x[entering_index] = primal_step_length; return 0; @@ -1778,9 +1795,10 @@ i_t compute_delta_x(const lp_problem_t& lp, template void update_primal_variables(const sparse_vector_t& scaled_delta_xB_sparse, const std::vector& basic_list, - const ins_vector& delta_x, + const std::vector& delta_x, i_t entering_index, - std::vector& x) + std::vector& x, + f_t& work_estimate) { // x <- x + delta_x const i_t scaled_delta_xB_nz = scaled_delta_xB_sparse.i.size(); @@ -1788,23 +1806,26 @@ void update_primal_variables(const sparse_vector_t& scaled_delta_xB_sp const i_t j = basic_list[scaled_delta_xB_sparse.i[k]]; x[j] += delta_x[j]; } + work_estimate += 4*scaled_delta_xB_nz; // Leaving index already included above x[entering_index] += delta_x[entering_index]; } template void update_objective(const std::vector& basic_list, - const ins_vector& changed_basic_indices, + const std::vector& changed_basic_indices, const std::vector& objective, - const ins_vector& delta_x, + const std::vector& delta_x, i_t entering_index, - f_t& obj) + f_t& obj, + f_t& work_estimate) { const i_t changed_basic_nz = changed_basic_indices.size(); for (i_t k = 0; k < changed_basic_nz; ++k) { const i_t j = basic_list[changed_basic_indices[k]]; obj += delta_x[j] * objective[j]; } + work_estimate += 4*changed_basic_nz; // Leaving index already included above obj += delta_x[entering_index] * objective[entering_index]; } @@ -2219,7 +2240,7 @@ void set_primal_variables_on_bounds(const lp_problem_t& lp, } template -f_t compute_perturbed_objective(const ins_vector& objective, const std::vector& x) +f_t compute_perturbed_objective(const std::vector& objective, const std::vector& x) { const size_t n = objective.size(); f_t obj_val = 0.0; @@ -2230,7 +2251,7 @@ f_t compute_perturbed_objective(const ins_vector& objective, const std::vec } template -f_t amount_of_perturbation(const lp_problem_t& lp, const ins_vector& objective) +f_t amount_of_perturbation(const lp_problem_t& lp, const std::vector& objective) { f_t perturbation = 0.0; const i_t n = lp.num_cols; @@ -2246,7 +2267,7 @@ void prepare_optimality(i_t info, const lp_problem_t& lp, const simplex_solver_settings_t& settings, basis_update_mpf_t& ft, - const ins_vector& objective, + const std::vector& objective, const std::vector& basic_list, const std::vector& nonbasic_list, const std::vector& vstatus, @@ -2261,6 +2282,7 @@ void prepare_optimality(i_t info, { const i_t m = lp.num_rows; const i_t n = lp.num_cols; + f_t work_estimate = 0; // Work in this function is not captured sol.objective = compute_objective(lp, sol.x); sol.user_objective = compute_user_objective(lp, sol.objective); @@ -2271,7 +2293,7 @@ void prepare_optimality(i_t info, std::vector unperturbed_y(m); std::vector unperturbed_z(n); phase2::compute_dual_solution_from_basis( - lp, ft, basic_list, nonbasic_list, unperturbed_y, unperturbed_z); + lp, ft, basic_list, nonbasic_list, unperturbed_y, unperturbed_z, work_estimate); { const f_t dual_infeas = phase2::dual_infeasibility( lp, settings, vstatus, unperturbed_z, settings.tight_tol, settings.dual_tol); @@ -2468,6 +2490,8 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, assert(lp.lower.size() == n); assert(lp.upper.size() == n); assert(lp.rhs.size() == m); + f_t phase2_work_estimate = 0.0; + ft.clear_work_estimate(); std::vector& x = sol.x; std::vector& y = sol.y; @@ -2475,16 +2499,11 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, // Declare instrumented vectors used during initialization (before aggregator setup) // Perturbed objective - ins_vector objective(lp.objective); - ins_vector c_basic(m); - ins_vector xB_workspace(m); - - // Create instrumentation aggregator early to capture init section memory operations - instrumentation_aggregator_t aggregator; + std::vector objective(lp.objective); + std::vector c_basic(m); + std::vector xB_workspace(m); - aggregator.add("objective", objective); - aggregator.add("c_basic", c_basic); - aggregator.add("xB_workspace", xB_workspace); + phase2_work_estimate += n * 2*m; dual::status_t status = dual::status_t::UNSET; @@ -2494,13 +2513,20 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, std::vector vstatus_old = vstatus; std::vector z_old = z; + phase2_work_estimate += m; + phase2::bound_info(lp, settings); + phase2_work_estimate += 2 * n; + if (initialize_basis) { PHASE2_NVTX_RANGE("DualSimplex::init_basis"); std::vector superbasic_list; nonbasic_list.clear(); nonbasic_list.reserve(n - m); + phase2_work_estimate += (n - m); + get_basis_from_vstatus(m, vstatus, basic_list, nonbasic_list, superbasic_list); + phase2_work_estimate += 2*n; assert(superbasic_list.size() == 0); assert(nonbasic_list.size() == n - m); @@ -2517,6 +2543,7 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, const i_t j = basic_list[k]; c_basic[k] = objective[j]; } + phase2_work_estimate += 2*m; // Solve B'*y = cB ft.b_transpose_solve(c_basic, y); @@ -2527,7 +2554,7 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, "|| y || %e || cB || %e\n", vector_norm_inf(y), vector_norm_inf(c_basic)); } - phase2::compute_reduced_costs(objective.underlying(), lp.A, y, basic_list, nonbasic_list, z); + phase2::compute_reduced_costs(objective, lp.A, y, basic_list, nonbasic_list, z, phase2_work_estimate); if constexpr (print_norms) { settings.log.printf("|| z || %e\n", vector_norm_inf(z)); } #ifdef COMPUTE_DUAL_RESIDUAL @@ -2541,6 +2568,7 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, #endif phase2::set_primal_variables_on_bounds(lp, settings, z, vstatus, x); + phase2_work_estimate += 5 * (n - m); #ifdef PRINT_VSTATUS_CHANGES i_t num_vstatus_changes; @@ -2552,6 +2580,7 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, const f_t init_dual_inf = phase2::dual_infeasibility(lp, settings, vstatus, z, settings.tight_tol, settings.dual_tol); + phase2_work_estimate += 3*n; if (init_dual_inf > settings.dual_tol) { settings.log.printf("Initial dual infeasibility %e\n", init_dual_inf); } @@ -2561,9 +2590,10 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, settings.log.printf("Free variable %d vstatus %d\n", j, vstatus[j]); } } + phase2_work_estimate += 3*n; phase2::compute_primal_variables( - ft, lp.rhs, lp.A, basic_list, nonbasic_list, settings.tight_tol, x, xB_workspace); + ft, lp.rhs, lp.A, basic_list, nonbasic_list, settings.tight_tol, x, xB_workspace, phase2_work_estimate); if (toc(start_time) > settings.time_limit) { return dual::status_t::TIME_LIMIT; } if (print_norms) { settings.log.printf("|| x || %e\n", vector_norm2(x)); } @@ -2580,14 +2610,17 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, if (delta_y_steepest_edge.size() == 0) { PHASE2_NVTX_RANGE("DualSimplex::initialize_steepest_edge_norms"); delta_y_steepest_edge.resize(n); + phase2_work_estimate += n; if (slack_basis) { phase2::initialize_steepest_edge_norms_from_slack_basis( basic_list, nonbasic_list, delta_y_steepest_edge); + phase2_work_estimate += 2*n; } else { std::fill(delta_y_steepest_edge.begin(), delta_y_steepest_edge.end(), -1); + phase2_work_estimate += n; f_t steepest_edge_start = tic(); i_t status = phase2::initialize_steepest_edge_norms( - lp, settings, start_time, basic_list, ft, delta_y_steepest_edge); + lp, settings, start_time, basic_list, ft, delta_y_steepest_edge, phase2_work_estimate); f_t steepest_edge_time = toc(steepest_edge_start); if (status == CONCURRENT_HALT_RETURN) { return dual::status_t::CONCURRENT_LIMIT; } if (status == -1) { return dual::status_t::TIME_LIMIT; } @@ -2598,6 +2631,7 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, const i_t j = basic_list[k]; if (delta_y_steepest_edge[j] <= 0.0) { delta_y_steepest_edge[j] = 1e-4; } } + phase2_work_estimate += 2*m; settings.log.printf("using exisiting steepest edge %e\n", vector_norm2(delta_y_steepest_edge)); } @@ -2608,28 +2642,30 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, const i_t iter_limit = settings.iteration_limit; - // Instrumented vectors for memory access tracking - ins_vector delta_y(m, 0.0); - ins_vector delta_z(n, 0.0); - ins_vector delta_x(n, 0.0); - ins_vector delta_x_flip(n, 0.0); - ins_vector atilde(m, 0.0); - ins_vector atilde_mark(m, 0); - ins_vector atilde_index; - ins_vector nonbasic_mark(n); - ins_vector basic_mark(n); - ins_vector delta_z_mark(n, 0); - ins_vector delta_z_indices; - ins_vector v(m, 0.0); - ins_vector squared_infeasibilities; - ins_vector infeasibility_indices; + std::vector delta_y(m, 0.0); + std::vector delta_z(n, 0.0); + std::vector delta_x(n, 0.0); + std::vector delta_x_flip(n, 0.0); + std::vector atilde(m, 0.0); + std::vector atilde_mark(m, 0); + std::vector atilde_index; + std::vector nonbasic_mark(n); + std::vector basic_mark(n); + std::vector delta_z_mark(n, 0); + std::vector delta_z_indices; + std::vector v(m, 0.0); + std::vector squared_infeasibilities; + std::vector infeasibility_indices; + phase2_work_estimate += 6 * n + 4 * m; delta_z_indices.reserve(n); + phase2_work_estimate += n; - phase2::reset_basis_mark(basic_list, nonbasic_list, basic_mark, nonbasic_mark); + phase2::reset_basis_mark(basic_list, nonbasic_list, basic_mark, nonbasic_mark, phase2_work_estimate); - ins_vector bounded_variables(n, 0); + std::vector bounded_variables(n, 0); phase2::compute_bounded_info(lp.lower, lp.upper, bounded_variables); + phase2_work_estimate += 4*n; f_t primal_infeasibility; f_t primal_infeasibility_squared = @@ -2640,21 +2676,18 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, squared_infeasibilities, infeasibility_indices, primal_infeasibility); + phase2_work_estimate += 4*m + 2*n; #ifdef CHECK_BASIC_INFEASIBILITIES phase2::check_basic_infeasibilities(basic_list, basic_mark, infeasibility_indices, 0); #endif csc_matrix_t A_transpose(1, 1, 0); - aggregator.add("A_transpose.col_start", A_transpose.col_start); - aggregator.add("A_transpose.i", A_transpose.i); - aggregator.add("A_transpose.x", A_transpose.x); - { - PHASE2_NVTX_RANGE("DualSimplex::transpose_A"); - lp.A.transpose(A_transpose); - } + lp.A.transpose(A_transpose); + phase2_work_estimate += 2*lp.A.col_start[lp.A.n]; + f_t obj = compute_objective(lp, x); - init_scope.pop(); // End phase2_advanced_init range + phase2_work_estimate += 2*n; const i_t start_iter = iter; @@ -2665,11 +2698,6 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, f_t delta_y_nz_percentage = 0.0; phase2::phase2_timers_t timers(false); - // // Feature collection for regression training - // dual_simplex_features_t features; - // features.init_from_problem(lp, settings, phase, slack_basis != 0, initialize_basis); - // features.start_iteration = iter; - // Sparse vectors for main loop (declared outside loop for instrumentation) sparse_vector_t delta_y_sparse(m, 0); sparse_vector_t UTsol_sparse(m, 0); @@ -2680,82 +2708,21 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, sparse_vector_t v_sparse(m, 0); // For steepest edge norms sparse_vector_t atilde_sparse(m, 0); // For flip adjustments - // Add remaining instrumented vectors to aggregator (x, y, z, objective, c_basic, xB_workspace - // added earlier) Delta vectors - aggregator.add("delta_y", delta_y); - aggregator.add("delta_z", delta_z); - aggregator.add("delta_x", delta_x); - aggregator.add("delta_x_flip", delta_x_flip); - aggregator.add("atilde", atilde); - aggregator.add("atilde_mark", atilde_mark); - aggregator.add("atilde_index", atilde_index); - aggregator.add("nonbasic_mark", nonbasic_mark); - aggregator.add("basic_mark", basic_mark); - aggregator.add("delta_z_mark", delta_z_mark); - aggregator.add("delta_z_indices", delta_z_indices); - aggregator.add("v", v); - aggregator.add("squared_infeasibilities", squared_infeasibilities); - aggregator.add("infeasibility_indices", infeasibility_indices); - aggregator.add("bounded_variables", bounded_variables); - - // Add sparse vector internal arrays to aggregator - aggregator.add("delta_y_sparse.i", delta_y_sparse.i); - aggregator.add("delta_y_sparse.x", delta_y_sparse.x); - aggregator.add("UTsol_sparse.i", UTsol_sparse.i); - aggregator.add("UTsol_sparse.x", UTsol_sparse.x); - aggregator.add("delta_xB_0_sparse.i", delta_xB_0_sparse.i); - aggregator.add("delta_xB_0_sparse.x", delta_xB_0_sparse.x); - aggregator.add("utilde_sparse.i", utilde_sparse.i); - aggregator.add("utilde_sparse.x", utilde_sparse.x); - aggregator.add("scaled_delta_xB_sparse.i", scaled_delta_xB_sparse.i); - aggregator.add("scaled_delta_xB_sparse.x", scaled_delta_xB_sparse.x); - aggregator.add("rhs_sparse.i", rhs_sparse.i); - aggregator.add("rhs_sparse.x", rhs_sparse.x); - aggregator.add("v_sparse.i", v_sparse.i); - aggregator.add("v_sparse.x", v_sparse.x); - aggregator.add("atilde_sparse.i", atilde_sparse.i); - aggregator.add("atilde_sparse.x", atilde_sparse.x); - - // Add A matrix for entering column access during basis update - aggregator.add("A.col_start", lp.A.col_start); - aggregator.add("A.i", lp.A.i); - aggregator.add("A.x", lp.A.x); - // Track iteration interval start time for runtime measurement [[maybe_unused]] f_t interval_start_time = toc(start_time); i_t last_feature_log_iter = iter; - cuopt::scope_guard work_unit_guard([&]() { - i_t remaining_iters = iter - last_feature_log_iter; - if (remaining_iters <= 0) return; - - auto [total_loads, total_stores] = aggregator.collect_and_flush(); - // features.byte_loads = total_loads; - // features.byte_stores = total_stores; - - // f_t now = toc(start_time); - // features.interval_runtime = now - interval_start_time; - // interval_start_time = now; - - // features.iteration = iter; - // features.num_refactors = num_refactors; - // features.num_basis_updates = ft.num_updates(); - // features.sparse_delta_z_count = sparse_delta_z; - // features.dense_delta_z_count = dense_delta_z; - // features.total_bound_flips = total_bound_flips; - // features.num_infeasibilities = infeasibility_indices.size(); - // features.delta_y_nz_percentage = delta_y_nz_percentage; - // features.log_features(settings); + phase2_work_estimate += ft.work_estimate(); + ft.clear_work_estimate(); + if (work_unit_context) { + work_unit_context->record_work_sync_on_horizon((phase2_work_estimate) / 1e8); + } + phase2_work_estimate = 0.0; - if (work_unit_context) { - // TEMP; - work_unit_context->record_work_sync_on_horizon((total_loads + total_stores) / 1e8); - } - }); if (phase == 2) { settings.log.printf("%5d %+.16e %7d %.8e %.2e %.2f\n", - 0, + iter, compute_user_objective(lp, obj), infeasibility_indices.size(), primal_infeasibility_squared, @@ -2783,7 +2750,8 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, infeasibility_indices, direction, basic_leaving_index, - max_val); + max_val, + phase2_work_estimate); } else { // Max infeasibility pricing leaving_index = phase2::phase2_pricing( @@ -2883,6 +2851,7 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, timers.btran_time += timers.stop_timer(); const f_t steepest_edge_norm_check = delta_y_sparse.norm2_squared(); + phase2_work_estimate += 2*delta_y_sparse.i.size(); if (delta_y_steepest_edge[leaving_index] < settings.steepest_edge_ratio * steepest_edge_norm_check) { constexpr bool verbose = false; @@ -2904,6 +2873,7 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, for (i_t k = 0; k < nz_delta_y; k++) { if (std::abs(delta_y_sparse.x[k]) > 1e-12) { delta_y_nz0++; } } + phase2_work_estimate += nz_delta_y; delta_y_nz_percentage = delta_y_nz0 / static_cast(m) * 100.0; const bool use_transpose = delta_y_nz_percentage <= 30.0; { @@ -2917,11 +2887,13 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, nonbasic_mark, delta_z_mark, delta_z_indices, - delta_z); + delta_z, + phase2_work_estimate); } else { dense_delta_z++; // delta_zB = sigma*ei delta_y_sparse.to_dense(delta_y); + phase2_work_estimate += delta_y.size(); phase2::compute_reduced_cost_update(lp, basic_list, nonbasic_list, @@ -2930,7 +2902,8 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, direction, delta_z_mark, delta_z_indices, - delta_z); + delta_z, + phase2_work_estimate); } } timers.delta_z_time += timers.stop_timer(); @@ -2974,14 +2947,15 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, slope, lp.lower, lp.upper, - bounded_variables.underlying(), + bounded_variables, vstatus, nonbasic_list, z, - delta_z.underlying(), - delta_z_indices.underlying(), + delta_z, + delta_z_indices, nonbasic_mark); entering_index = bfrt.compute_step_length(step_length, nonbasic_entering_index); + phase2_work_estimate += bfrt.work_estimate(); if (entering_index == RATIO_TEST_NUMERICAL_ISSUES) { settings.log.printf("Numerical issues encountered in ratio test.\n"); return dual::status_t::NUMERICAL; @@ -2998,26 +2972,31 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, settings.log.printf("No entering variable found. Iter %d\n", iter); settings.log.printf("Scaled infeasibility %e\n", max_val); f_t perturbation = phase2::amount_of_perturbation(lp, objective); + phase2_work_estimate += 2*n; if (perturbation > 0.0 && phase == 2) { // Try to remove perturbation std::vector unperturbed_y(m); std::vector unperturbed_z(n); + phase2_work_estimate += m + n; phase2::compute_dual_solution_from_basis( - lp, ft, basic_list, nonbasic_list, unperturbed_y, unperturbed_z); + lp, ft, basic_list, nonbasic_list, unperturbed_y, unperturbed_z, phase2_work_estimate); { const f_t dual_infeas = phase2::dual_infeasibility( lp, settings, vstatus, unperturbed_z, settings.tight_tol, settings.dual_tol); + phase2_work_estimate += 3*n; settings.log.printf("Dual infeasibility after removing perturbation %e\n", dual_infeas); if (dual_infeas <= settings.dual_tol) { settings.log.printf("Removed perturbation of %.2e.\n", perturbation); z = unperturbed_z; y = unperturbed_y; + phase2_work_estimate += 2*n + 2*m; perturbation = 0.0; std::vector unperturbed_x(n); + phase2_work_estimate += n; phase2::compute_primal_solution_from_basis( - lp, ft, basic_list, nonbasic_list, vstatus, unperturbed_x, xB_workspace); + lp, ft, basic_list, nonbasic_list, vstatus, unperturbed_x, xB_workspace, phase2_work_estimate); x = unperturbed_x; primal_infeasibility_squared = phase2::compute_initial_primal_infeasibilities(lp, @@ -3027,11 +3006,14 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, squared_infeasibilities, infeasibility_indices, primal_infeasibility); + phase2_work_estimate += 4*m + 2*n; settings.log.printf("Updated primal infeasibility: %e\n", primal_infeasibility); objective = lp.objective; + phase2_work_estimate += 2*n; // Need to reset the objective value, since we have recomputed x obj = phase2::compute_perturbed_objective(objective, x); + phase2_work_estimate += 2*n; if (dual_infeas <= settings.dual_tol && primal_infeasibility <= settings.primal_tol) { phase2::prepare_optimality(1, primal_infeasibility, @@ -3056,14 +3038,17 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, settings.log.printf( "Continuing with perturbation removed and steepest edge norms reset\n"); // Clear delta_z before restarting the iteration + phase2_work_estimate += 3*delta_z_indices.size(); phase2::clear_delta_z( entering_index, leaving_index, delta_z_mark, delta_z_indices, delta_z); continue; } else { std::vector unperturbed_x(n); + phase2_work_estimate += n; phase2::compute_primal_solution_from_basis( - lp, ft, basic_list, nonbasic_list, vstatus, unperturbed_x, xB_workspace); + lp, ft, basic_list, nonbasic_list, vstatus, unperturbed_x, xB_workspace, phase2_work_estimate); x = unperturbed_x; + phase2_work_estimate += 2*n; primal_infeasibility_squared = phase2::compute_initial_primal_infeasibilities(lp, settings, @@ -3072,9 +3057,11 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, squared_infeasibilities, infeasibility_indices, primal_infeasibility); + phase2_work_estimate += 4*m + 2*n; const f_t orig_dual_infeas = phase2::dual_infeasibility( lp, settings, vstatus, z, settings.tight_tol, settings.dual_tol); + phase2_work_estimate += 3*n; if (primal_infeasibility <= settings.primal_tol && orig_dual_infeas <= settings.dual_tol) { @@ -3135,8 +3122,10 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, const f_t dual_infeas = phase2::dual_infeasibility(lp, settings, vstatus, z, settings.tight_tol, settings.dual_tol); + phase2_work_estimate += 3*n; settings.log.printf("Dual infeasibility %e\n", dual_infeas); const f_t primal_inf = phase2::primal_infeasibility(lp, settings, vstatus, x); + phase2_work_estimate += 3*n; settings.log.printf("Primal infeasibility %e\n", primal_inf); settings.log.printf("Updates %d\n", ft.num_updates()); settings.log.printf("Steepest edge %e\n", max_val); @@ -3153,7 +3142,7 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, // y <- y + steplength * delta_y // z <- z + steplength * delta_z i_t update_dual_variables_status = phase2::update_dual_variables( - delta_y_sparse, delta_z_indices, delta_z, step_length, leaving_index, y, z); + delta_y_sparse, delta_z_indices, delta_z, step_length, leaving_index, y, z, phase2_work_estimate); if (update_dual_variables_status == -1) { settings.log.printf("Numerical issues encountered in update_dual_variables.\n"); return dual::status_t::NUMERICAL; @@ -3182,7 +3171,8 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, delta_x_flip, atilde_mark, atilde, - atilde_index); + atilde_index, + phase2_work_estimate); timers.flip_time += timers.stop_timer(); total_bound_flips += num_flipped; @@ -3199,7 +3189,8 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, atilde_sparse, delta_xB_0_sparse, delta_x_flip, - x); + x, + phase2_work_estimate); timers.ftran_time += timers.stop_timer(); } @@ -3221,7 +3212,8 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, x, utilde_sparse, scaled_delta_xB_sparse, - delta_x) == -1) { + delta_x, + phase2_work_estimate) == -1) { settings.log.printf("Failed to compute delta_x. Iter %d\n", iter); return dual::status_t::NUMERICAL; } @@ -3248,7 +3240,8 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, entering_index, v, v_sparse, - delta_y_steepest_edge); + delta_y_steepest_edge, + phase2_work_estimate); #ifdef STEEPEST_EDGE_DEBUG if (steepest_edge_status == -1) { settings.log.printf("Num updates %d\n", ft.num_updates()); @@ -3260,7 +3253,7 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, timers.start_timer(); // x <- x + delta_x - phase2::update_primal_variables(scaled_delta_xB_sparse, basic_list, delta_x, entering_index, x); + phase2::update_primal_variables(scaled_delta_xB_sparse, basic_list, delta_x, entering_index, x, phase2_work_estimate); timers.vector_time += timers.stop_timer(); #ifdef COMPUTE_PRIMAL_RESIDUAL @@ -3276,7 +3269,7 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, // TODO(CMM): Do I also need to update the objective due to the bound flips? // TODO(CMM): I'm using the unperturbed objective here, should this be the perturbed objective? phase2::update_objective( - basic_list, scaled_delta_xB_sparse.i, lp.objective, delta_x, entering_index, obj); + basic_list, scaled_delta_xB_sparse.i, lp.objective, delta_x, entering_index, obj, phase2_work_estimate); timers.objective_time += timers.stop_timer(); timers.start_timer(); @@ -3291,10 +3284,11 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, x, entering_index, leaving_index, - delta_xB_0_sparse.i.underlying(), - squared_infeasibilities.underlying(), - infeasibility_indices.underlying(), - primal_infeasibility_squared); + delta_xB_0_sparse.i, + squared_infeasibilities, + infeasibility_indices, + primal_infeasibility_squared, + phase2_work_estimate); // Update primal infeasibilities due to changes in basic variables // from the leaving and entering variables phase2::update_primal_infeasibilities(lp, @@ -3303,21 +3297,22 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, x, entering_index, leaving_index, - scaled_delta_xB_sparse.i.underlying(), - squared_infeasibilities.underlying(), - infeasibility_indices.underlying(), - primal_infeasibility_squared); + scaled_delta_xB_sparse.i, + squared_infeasibilities, + infeasibility_indices, + primal_infeasibility_squared, + phase2_work_estimate); // Update the entering variable phase2::update_single_primal_infeasibility(lp.lower, lp.upper, x, settings.primal_tol, - squared_infeasibilities.underlying(), - infeasibility_indices.underlying(), + squared_infeasibilities, + infeasibility_indices, entering_index, primal_infeasibility_squared); - phase2::clean_up_infeasibilities(squared_infeasibilities, infeasibility_indices); + phase2::clean_up_infeasibilities(squared_infeasibilities, infeasibility_indices, phase2_work_estimate); #if CHECK_PRIMAL_INFEASIBILITIES phase2::check_primal_infeasibilities( @@ -3326,11 +3321,11 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, timers.update_infeasibility_time += timers.stop_timer(); // Clear delta_x - phase2::clear_delta_x(basic_list, entering_index, scaled_delta_xB_sparse, delta_x); + phase2::clear_delta_x(basic_list, entering_index, scaled_delta_xB_sparse, delta_x, phase2_work_estimate); timers.start_timer(); f_t sum_perturb = 0.0; - phase2::compute_perturbation(lp, settings, delta_z_indices, z, objective, sum_perturb); + phase2::compute_perturbation(lp, settings, delta_z_indices, z, objective, sum_perturb, phase2_work_estimate); timers.perturb_time += timers.stop_timer(); // Update basis information @@ -3394,12 +3389,14 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, settings.log.printf("Successfully repaired basis. Iteration %d\n", iter); } - phase2::reset_basis_mark(basic_list, nonbasic_list, basic_mark, nonbasic_mark); + phase2::reset_basis_mark(basic_list, nonbasic_list, basic_mark, nonbasic_mark, phase2_work_estimate); if (should_recompute_x) { std::vector unperturbed_x(n); + phase2_work_estimate += n; phase2::compute_primal_solution_from_basis( - lp, ft, basic_list, nonbasic_list, vstatus, unperturbed_x, xB_workspace); + lp, ft, basic_list, nonbasic_list, vstatus, unperturbed_x, xB_workspace, phase2_work_estimate); x = unperturbed_x; + phase2_work_estimate += 2*n; } primal_infeasibility_squared = phase2::compute_initial_primal_infeasibilities(lp, @@ -3409,6 +3406,7 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, squared_infeasibilities, infeasibility_indices, primal_infeasibility); + phase2_work_estimate += 4*m + 2*n; } #ifdef CHECK_BASIC_INFEASIBILITIES phase2::check_basic_infeasibilities(basic_list, basic_mark, infeasibility_indices, 7); @@ -3436,6 +3434,7 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, // Clear delta_z phase2::clear_delta_z(entering_index, leaving_index, delta_z_mark, delta_z_indices, delta_z); + phase2_work_estimate += 3*delta_z_indices.size(); f_t now = toc(start_time); @@ -3443,27 +3442,12 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, if ((iter % FEATURE_LOG_INTERVAL) == 0 && work_unit_context) { [[maybe_unused]] i_t iters_elapsed = iter - last_feature_log_iter; - auto [total_loads, total_stores] = aggregator.collect_and_flush(); - // features.byte_loads = total_loads; - // features.byte_stores = total_stores; - - // features.interval_runtime = now - interval_start_time; - // interval_start_time = now; - - // features.iteration = iter; - // features.num_refactors = num_refactors; - // features.num_basis_updates = ft.num_updates(); - // features.sparse_delta_z_count = sparse_delta_z; - // features.dense_delta_z_count = dense_delta_z; - // features.total_bound_flips = total_bound_flips; - // features.num_infeasibilities = infeasibility_indices.size(); - // features.delta_y_nz_percentage = delta_y_nz_percentage; - // features.log_features(settings); - + phase2_work_estimate += ft.work_estimate(); + ft.clear_work_estimate(); if (work_unit_context) { - // TEMP; - work_unit_context->record_work_sync_on_horizon((total_loads + total_stores) / 1e8); + work_unit_context->record_work_sync_on_horizon(phase2_work_estimate / 1e8); } + phase2_work_estimate = 0.0; last_feature_log_iter = iter; } diff --git a/cpp/src/dual_simplex/primal.cpp b/cpp/src/dual_simplex/primal.cpp index 98f5f4193b..3ec44df8bf 100644 --- a/cpp/src/dual_simplex/primal.cpp +++ b/cpp/src/dual_simplex/primal.cpp @@ -264,6 +264,7 @@ primal::status_t primal_phase2(i_t phase, assert(lp.lower.size() == n); assert(lp.upper.size() == n); assert(lp.rhs.size() == m); + f_t work_estimate = 0; std::vector basic_list(m); std::vector nonbasic_list; std::vector superbasic_list; @@ -295,7 +296,7 @@ primal::status_t primal_phase2(i_t phase, std::vector deficient; std::vector slacks_needed; i_t rank = - factorize_basis(lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed); + factorize_basis(lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed, work_estimate); if (rank == CONCURRENT_HALT_RETURN) { return primal::status_t::CONCURRENT_LIMIT; } else if (rank != m) { @@ -309,8 +310,9 @@ primal::status_t primal_phase2(i_t phase, basic_list, nonbasic_list, superbasic_list, - vstatus); - rank = factorize_basis(lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed); + vstatus, + work_estimate); + rank = factorize_basis(lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed, work_estimate); if (rank == CONCURRENT_HALT_RETURN) { return primal::status_t::CONCURRENT_LIMIT; } else if (rank == -1) { diff --git a/cpp/src/dual_simplex/right_looking_lu.cpp b/cpp/src/dual_simplex/right_looking_lu.cpp index cb9834705c..2ac0dda6ae 100644 --- a/cpp/src/dual_simplex/right_looking_lu.cpp +++ b/cpp/src/dual_simplex/right_looking_lu.cpp @@ -39,11 +39,14 @@ i_t initialize_degree_data(const csc_matrix_t& A, std::vector& Cdegree, std::vector& Rdegree, std::vector>& col_count, - std::vector>& row_count) + std::vector>& row_count, + f_t& work_estimate) { const i_t n = column_list.size(); const i_t m = A.m; std::fill(Rdegree.begin(), Rdegree.end(), 0); + work_estimate += Rdegree.size(); + i_t Bnz = 0; for (i_t k = 0; k < n; ++k) { const i_t j = column_list[k]; @@ -55,11 +58,13 @@ i_t initialize_degree_data(const csc_matrix_t& A, Bnz++; } } + work_estimate += 3*n + 2*Bnz; for (i_t k = 0; k < n; ++k) { assert(Cdegree[k] <= m && Cdegree[k] >= 0); col_count[Cdegree[k]].push_back(k); } + work_estimate += 3*n; for (i_t k = 0; k < m; ++k) { assert(Rdegree[k] <= n && Rdegree[k] >= 0); @@ -69,6 +74,8 @@ i_t initialize_degree_data(const csc_matrix_t& A, if (verbose) { printf("Zero degree row %d\n", k); } } } + work_estimate += 4*m; + return Bnz; } @@ -78,11 +85,13 @@ i_t load_elements(const csc_matrix_t& A, i_t Bnz, std::vector>& elements, std::vector& first_in_row, - std::vector& first_in_col) + std::vector& first_in_col, + f_t& work_estimate) { const i_t m = A.m; const i_t n = column_list.size(); std::vector last_element_in_row(m, kNone); + work_estimate += m; i_t nz = 0; for (i_t k = 0; k < n; ++k) { @@ -110,6 +119,7 @@ i_t load_elements(const csc_matrix_t& A, nz++; } } + work_estimate += 3*n + 15*nz; assert(nz == Bnz); for (i_t j = 0; j < n; j++) { @@ -120,6 +130,7 @@ i_t load_elements(const csc_matrix_t& A, assert(entry->i < m); } } + work_estimate += 2*n + nz; for (i_t i = 0; i < m; i++) { for (i_t p = first_in_row[i]; p != kNone; p = elements[p].next_in_row) { @@ -129,6 +140,7 @@ i_t load_elements(const csc_matrix_t& A, assert(entry->j >= 0); } } + work_estimate += 2*m + nz; return 0; } @@ -200,7 +212,8 @@ i_t markowitz_search(const std::vector& Cdegree, f_t threshold_tol, i_t& pivot_i, i_t& pivot_j, - i_t& pivot_p) + i_t& pivot_p, + f_t& work_estimate) { i_t nz = 1; const i_t m = Rdegree.size(); @@ -217,6 +230,7 @@ i_t markowitz_search(const std::vector& Cdegree, assert(Cdegree[j] == nz); const f_t max_in_col = max_in_column[j]; + work_estimate += 3 * nz; for (i_t p = first_in_col[j]; p != kNone; p = elements[p].next_in_column) { element_t* entry = &elements[p]; const i_t i = entry->i; @@ -260,6 +274,7 @@ i_t markowitz_search(const std::vector& Cdegree, assert(row_count[nz].size() >= 0); for (const i_t i : row_count[nz]) { assert(Rdegree[i] == nz); + work_estimate += 5 * nz; #ifdef THRESHOLD_ROOK_PIVOTING const f_t max_in_row_i = max_in_row[i]; #endif @@ -301,9 +316,11 @@ void update_Cdegree_and_col_count(i_t pivot_i, const std::vector& first_in_row, std::vector& Cdegree, std::vector>& col_count, - std::vector>& elements) + std::vector>& elements, + f_t& work_estimate) { // Update Cdegree and col_count + i_t loop_count = 0; for (i_t p = first_in_row[pivot_i]; p != kNone; p = elements[p].next_in_row) { element_t* entry = &elements[p]; const i_t j = entry->j; @@ -317,13 +334,16 @@ void update_Cdegree_and_col_count(i_t pivot_i, // Remove col j from col_count[cdeg] std::swap(*it, col_count[cdeg].back()); col_count[cdeg].pop_back(); + work_estimate += (it - col_count[cdeg].begin()); break; } } cdeg = --Cdegree[j]; assert(cdeg >= 0); if (j != pivot_j && cdeg >= 0) { col_count[cdeg].push_back(j); } + loop_count++; } + work_estimate += 7*loop_count; Cdegree[pivot_j] = -1; } @@ -333,9 +353,11 @@ void update_Rdegree_and_row_count(i_t pivot_i, const std::vector& first_in_col, std::vector& Rdegree, std::vector>& row_count, - std::vector>& elements) + std::vector>& elements, + f_t& work_estimate) { // Update Rdegree and row_count + i_t loop_count = 0; for (i_t p = first_in_col[pivot_j]; p != kNone; p = elements[p].next_in_column) { element_t* entry = &elements[p]; const i_t i = entry->i; @@ -348,13 +370,16 @@ void update_Rdegree_and_row_count(i_t pivot_i, // Remove row i from row_count[rdeg] std::swap(*it, row_count[rdeg].back()); row_count[rdeg].pop_back(); + work_estimate += (it - row_count[rdeg].begin()); break; } } rdeg = --Rdegree[i]; assert(rdeg >= 0); if (i != pivot_i && rdeg >= 0) { row_count[rdeg].push_back(i); } + loop_count++; } + work_estimate += 7*loop_count; Rdegree[pivot_i] = -1; } @@ -375,7 +400,8 @@ void schur_complement(i_t pivot_i, std::vector& Cdegree, std::vector>& row_count, std::vector>& col_count, - std::vector>& elements) + std::vector>& elements, + f_t& work_estimate) { for (i_t p1 = first_in_col[pivot_j]; p1 != kNone; p1 = elements[p1].next_in_column) { element_t* e = &elements[p1]; @@ -384,8 +410,10 @@ void schur_complement(i_t pivot_i, for (i_t p3 = first_in_row[i]; p3 != kNone; p3 = elements[p3].next_in_row) { row_last = p3; } + work_estimate += 2*Rdegree[i]; row_last_workspace[i] = row_last; } + work_estimate += 4*Cdegree[pivot_j]; for (i_t p0 = first_in_row[pivot_i]; p0 != kNone; p0 = elements[p0].next_in_row) { element_t* entry = &elements[p0]; @@ -402,6 +430,7 @@ void schur_complement(i_t pivot_i, column_j_workspace[i] = p1; col_last = p1; } + work_estimate += 5*Cdegree[j]; for (i_t p1 = first_in_col[pivot_j]; p1 != kNone; p1 = elements[p1].next_in_column) { element_t* e = &elements[p1]; @@ -457,6 +486,7 @@ void schur_complement(i_t pivot_i, // Remove row i from row_count[rdeg] std::swap(*it, row_count[rdeg].back()); row_count[rdeg].pop_back(); + work_estimate += 2*(it - row_count[rdeg].begin()); break; } } @@ -471,6 +501,7 @@ void schur_complement(i_t pivot_i, // Remove col j from col_count[cdeg] std::swap(*it, col_count[cdeg].back()); col_count[cdeg].pop_back(); + work_estimate += 2*(it - col_count[cdeg].begin()); break; } } @@ -478,6 +509,7 @@ void schur_complement(i_t pivot_i, col_count[cdeg].push_back(j); // Add column j to col_count[cdeg] } } + work_estimate += 10*Cdegree[pivot_j]; for (i_t p1 = first_in_col[j]; p1 != kNone; p1 = elements[p1].next_in_column) { element_t* e = &elements[p1]; @@ -485,7 +517,9 @@ void schur_complement(i_t pivot_i, assert(e->j == j); column_j_workspace[i] = kNone; } + work_estimate += 5*Cdegree[j]; } + work_estimate += 10*Rdegree[pivot_i]; } template @@ -494,16 +528,19 @@ void remove_pivot_row(i_t pivot_i, std::vector& first_in_col, std::vector& first_in_row, std::vector& max_in_column, - std::vector>& elements) + std::vector>& elements, + f_t& work_estimate) { // Remove the pivot row + i_t row_loop_count = 0; for (i_t p0 = first_in_row[pivot_i]; p0 != kNone; p0 = elements[p0].next_in_row) { element_t* e = &elements[p0]; const i_t j = e->j; if (j == pivot_j) { continue; } i_t last = kNone; f_t max_in_col_j = 0; + i_t col_loop_count = 0; for (i_t p = first_in_col[j]; p != kNone; p = elements[p].next_in_column) { element_t* entry = &elements[p]; if (entry->i == pivot_i) { @@ -520,9 +557,13 @@ void remove_pivot_row(i_t pivot_i, if (abs_entryx > max_in_col_j) { max_in_col_j = abs_entryx; } } last = p; + col_loop_count++; } + work_estimate += 3*col_loop_count; max_in_column[j] = max_in_col_j; + row_loop_count++; } + work_estimate += 5*row_loop_count; first_in_row[pivot_i] = kNone; } @@ -533,9 +574,11 @@ void remove_pivot_col(i_t pivot_i, std::vector& first_in_col, std::vector& first_in_row, std::vector& max_in_row, - std::vector>& elements) + std::vector>& elements, + f_t& work_estimate) { // Remove the pivot col + i_t col_loop_count = 0; for (i_t p1 = first_in_col[pivot_j]; p1 != kNone; p1 = elements[p1].next_in_column) { element_t* e = &elements[p1]; const i_t i = e->i; @@ -543,6 +586,7 @@ void remove_pivot_col(i_t pivot_i, #ifdef THRESHOLD_ROOK_PIVOTING f_t max_in_row_i = 0.0; #endif + i_t row_loop_count = 0; for (i_t p = first_in_row[i]; p != kNone; p = elements[p].next_in_row) { element_t* entry = &elements[p]; if (entry->j == pivot_j) { @@ -562,25 +606,30 @@ void remove_pivot_col(i_t pivot_i, } #endif last = p; + row_loop_count++; } + work_estimate += 3*row_loop_count; #ifdef THRESHOLD_ROOK_PIVOTING max_in_row[i] = max_in_row_i; #endif + col_loop_count++; } first_in_col[pivot_j] = kNone; + work_estimate += 3*col_loop_count; } } // namespace -template +template i_t right_looking_lu(const csc_matrix_t& A, const simplex_solver_settings_t& settings, f_t tol, - const VectorI& column_list, - VectorI& q, + const std::vector& column_list, + std::vector& q, csc_matrix_t& L, csc_matrix_t& U, - VectorI& pinv) + std::vector& pinv, + f_t& work_estimate) { raft::common::nvtx::range scope("LU::right_looking_lu"); const i_t n = column_list.size(); @@ -596,23 +645,30 @@ i_t right_looking_lu(const csc_matrix_t& A, std::vector Rdegree(n); // Rdegree[i] is the degree of row i std::vector Cdegree(n); // Cdegree[j] is the degree of column j + work_estimate += 2*n; std::vector> col_count( n + 1); // col_count[nz] is a list of columns with nz nonzeros in the active submatrix std::vector> row_count( n + 1); // row_count[nz] is a list of rows with nz nonzeros in the active submatrix + work_estimate += 2*n; - const i_t Bnz = initialize_degree_data(A, column_list, Cdegree, Rdegree, col_count, row_count); + const i_t Bnz = initialize_degree_data(A, column_list, Cdegree, Rdegree, col_count, row_count, work_estimate); std::vector> elements(Bnz); std::vector first_in_row(n, kNone); std::vector first_in_col(n, kNone); - load_elements(A, column_list, Bnz, elements, first_in_row, first_in_col); + work_estimate += 2*n + Bnz; + load_elements(A, column_list, Bnz, elements, first_in_row, first_in_col, work_estimate); std::vector column_j_workspace(n, kNone); std::vector row_last_workspace(n); std::vector max_in_column(n); std::vector max_in_row(m); + work_estimate += 3*n + m; + initialize_max_in_column(first_in_col, elements, max_in_column); + work_estimate += Bnz; + #ifdef THRESHOLD_ROOK_PIVOTING initialize_max_in_row(first_in_row, elements, max_in_row); #endif @@ -622,6 +678,7 @@ i_t right_looking_lu(const csc_matrix_t& A, Urow.n = Urow.m = n; Urow.row_start.resize(n + 1, -1); i_t Unz = 0; + work_estimate += 2*n; i_t Lnz = 0; L.x.clear(); @@ -631,6 +688,7 @@ i_t right_looking_lu(const csc_matrix_t& A, std::fill(pinv.begin(), pinv.end(), -1); std::vector qinv(n); std::fill(qinv.begin(), qinv.end(), -1); + work_estimate += 4*n; i_t pivots = 0; for (i_t k = 0; k < n; ++k) { @@ -657,7 +715,8 @@ i_t right_looking_lu(const csc_matrix_t& A, threshold_tol, pivot_i, pivot_j, - pivot_p); + pivot_p, + work_estimate); if (pivot_i == -1 || pivot_j == -1) { break; } element_t* pivot_entry = &elements[pivot_p]; assert(pivot_i != -1 && pivot_j != -1); @@ -688,6 +747,7 @@ i_t right_looking_lu(const csc_matrix_t& A, Unz++; } } + work_estimate += 4*(Unz - Urow.row_start[k]); // L <- [L l] L.col_start[k] = Lnz; @@ -708,10 +768,11 @@ i_t right_looking_lu(const csc_matrix_t& A, Lnz++; } } + work_estimate += 4*(Lnz - L.col_start[k]); // Update Cdegree and col_count - update_Cdegree_and_col_count(pivot_i, pivot_j, first_in_row, Cdegree, col_count, elements); - update_Rdegree_and_row_count(pivot_i, pivot_j, first_in_col, Rdegree, row_count, elements); + update_Cdegree_and_col_count(pivot_i, pivot_j, first_in_row, Cdegree, col_count, elements, work_estimate); + update_Rdegree_and_row_count(pivot_i, pivot_j, first_in_col, Rdegree, row_count, elements, work_estimate); // A22 <- A22 - l u^T schur_complement(pivot_i, @@ -730,11 +791,12 @@ i_t right_looking_lu(const csc_matrix_t& A, Cdegree, row_count, col_count, - elements); + elements, + work_estimate); // Remove the pivot row - remove_pivot_row(pivot_i, pivot_j, first_in_col, first_in_row, max_in_column, elements); - remove_pivot_col(pivot_i, pivot_j, first_in_col, first_in_row, max_in_row, elements); + remove_pivot_row(pivot_i, pivot_j, first_in_col, first_in_row, max_in_column, elements, work_estimate); + remove_pivot_col(pivot_i, pivot_j, first_in_col, first_in_row, max_in_row, elements, work_estimate); // Set pivot entry to sentinel value pivot_entry->i = -1; @@ -855,6 +917,7 @@ i_t right_looking_lu(const csc_matrix_t& A, for (i_t i = 0; i < m; ++i) { if (pinv[i] == -1) { pinv[i] = start++; } } + work_estimate += m; // Finalize the permutation q. Do this by first completing the inverse permutation qinv. // Then invert qinv to get the final permutation q. @@ -862,7 +925,9 @@ i_t right_looking_lu(const csc_matrix_t& A, for (i_t j = 0; j < n; ++j) { if (qinv[j] == -1) { qinv[j] = start++; } } + work_estimate += n; inverse_permutation(qinv, q); + work_estimate += 2*n; return pivots; } @@ -875,6 +940,7 @@ i_t right_looking_lu(const csc_matrix_t& A, for (i_t p = 0; p < Lnz; ++p) { L.i[p] = pinv[L.i[p]]; } + work_estimate += 3*Lnz; #ifdef CHECK_LOWER_TRIANGULAR for (i_t j = 0; j < n; ++j) { @@ -889,17 +955,23 @@ i_t right_looking_lu(const csc_matrix_t& A, #endif csc_matrix_t U_unpermuted(n, n, 1); + work_estimate += n; Urow.to_compressed_col( U_unpermuted); // Convert Urow to U stored in compressed sparse column format + work_estimate += n + Unz; std::vector row_perm(n); + work_estimate += n; inverse_permutation(pinv, row_perm); + work_estimate += 2*n; std::vector identity(n); for (i_t k = 0; k < n; k++) { identity[k] = k; } + work_estimate += 2*n; U_unpermuted.permute_rows_and_cols(identity, q, U); + work_estimate += 3*U.n + 5*Unz; #ifdef CHECK_UPPER_TRIANGULAR for (i_t k = 0; k < n; ++k) { @@ -929,6 +1001,7 @@ i_t right_looking_lu_row_permutation_only(const csc_matrix_t& A, // We return the inverser row permutation vector pinv and the column permutation vector q f_t factorization_start_time = tic(); + f_t work_estimate = 0; const i_t n = A.n; const i_t m = A.m; assert(pinv.size() == m); @@ -946,11 +1019,11 @@ i_t right_looking_lu_row_permutation_only(const csc_matrix_t& A, column_list[k] = k; } - const i_t Bnz = initialize_degree_data(A, column_list, Cdegree, Rdegree, col_count, row_count); + const i_t Bnz = initialize_degree_data(A, column_list, Cdegree, Rdegree, col_count, row_count, work_estimate); std::vector> elements(Bnz); std::vector first_in_row(m, kNone); std::vector first_in_col(n, kNone); - load_elements(A, column_list, Bnz, elements, first_in_row, first_in_col); + load_elements(A, column_list, Bnz, elements, first_in_row, first_in_col, work_estimate); std::vector column_j_workspace(m, kNone); std::vector row_last_workspace(m); @@ -997,7 +1070,8 @@ i_t right_looking_lu_row_permutation_only(const csc_matrix_t& A, threshold_tol, pivot_i, pivot_j, - pivot_p); + pivot_p, + work_estimate); if (pivot_i == -1 || pivot_j == -1) { settings.log.debug("Breaking can't find a pivot %d\n", k); break; @@ -1016,9 +1090,9 @@ i_t right_looking_lu_row_permutation_only(const csc_matrix_t& A, // Update Cdegree and col_count update_Cdegree_and_col_count( - pivot_i, pivot_j, first_in_row, Cdegree, col_count, elements); + pivot_i, pivot_j, first_in_row, Cdegree, col_count, elements, work_estimate); update_Rdegree_and_row_count( - pivot_i, pivot_j, first_in_col, Rdegree, row_count, elements); + pivot_i, pivot_j, first_in_col, Rdegree, row_count, elements, work_estimate); // A22 <- A22 - l u^T schur_complement(pivot_i, @@ -1037,12 +1111,13 @@ i_t right_looking_lu_row_permutation_only(const csc_matrix_t& A, Cdegree, row_count, col_count, - elements); + elements, + work_estimate); // Remove the pivot row remove_pivot_row( - pivot_i, pivot_j, first_in_col, first_in_row, max_in_column, elements); - remove_pivot_col(pivot_i, pivot_j, first_in_col, first_in_row, max_in_row, elements); + pivot_i, pivot_j, first_in_col, first_in_row, max_in_column, elements, work_estimate); + remove_pivot_col(pivot_i, pivot_j, first_in_col, first_in_row, max_in_row, elements, work_estimate); // Set pivot entry to sentinel value pivot_entry->i = -1; @@ -1149,7 +1224,7 @@ i_t right_looking_lu_row_permutation_only(const csc_matrix_t& A, #ifdef DUAL_SIMPLEX_INSTANTIATE_DOUBLE -template int right_looking_lu>( +template int right_looking_lu( const csc_matrix_t& A, const simplex_solver_settings_t& settings, double tol, @@ -1157,17 +1232,8 @@ template int right_looking_lu>( std::vector& q, csc_matrix_t& L, csc_matrix_t& U, - std::vector& pinv); - -template int right_looking_lu>( - const csc_matrix_t& A, - const simplex_solver_settings_t& settings, - double tol, - const ins_vector& column_list, - ins_vector& q, - csc_matrix_t& L, - csc_matrix_t& U, - ins_vector& pinv); + std::vector& pinv, + double& work_estimate); template int right_looking_lu_row_permutation_only( const csc_matrix_t& A, diff --git a/cpp/src/dual_simplex/right_looking_lu.hpp b/cpp/src/dual_simplex/right_looking_lu.hpp index 179fff01b2..39182eb3f4 100644 --- a/cpp/src/dual_simplex/right_looking_lu.hpp +++ b/cpp/src/dual_simplex/right_looking_lu.hpp @@ -14,15 +14,16 @@ namespace cuopt::linear_programming::dual_simplex { -template +template i_t right_looking_lu(const csc_matrix_t& A, const simplex_solver_settings_t& settings, f_t tol, - const VectorI& column_list, - VectorI& q, + const std::vector& column_list, + std::vector& q, csc_matrix_t& L, csc_matrix_t& U, - VectorI& pinv); + std::vector& pinv, + f_t& work_estimate); template i_t right_looking_lu_row_permutation_only(const csc_matrix_t& A, diff --git a/cpp/src/dual_simplex/singletons.cpp b/cpp/src/dual_simplex/singletons.cpp index 347604dcea..df535c5843 100644 --- a/cpp/src/dual_simplex/singletons.cpp +++ b/cpp/src/dual_simplex/singletons.cpp @@ -7,12 +7,10 @@ #include #include -#include +#include #include -using cuopt::ins_vector; - namespace cuopt::linear_programming::dual_simplex { // Destroys the queue but prints it @@ -27,10 +25,11 @@ void print_queue(std::queue& q) printf("\n"); } -template +template i_t order_singletons(std::queue& singleton_queue, i_t& singletons_found, - row_col_graph_t& G) + row_col_graph_t& G, + f_t& work_estimate) { constexpr i_t kEmpty = -1; while (!singleton_queue.empty()) { @@ -68,6 +67,7 @@ i_t order_singletons(std::queue& singleton_queue, break; } } + work_estimate += 2*(xend - G.Xp[xpivot]); assert(ypivot != kEmpty); assert(G.Ydeg[ypivot] >= 0); @@ -85,6 +85,7 @@ i_t order_singletons(std::queue& singleton_queue, singleton_queue.push(x); } } + work_estimate += 2*(yend - G.Yp[ypivot]); // Mark the pivot by flipping the degrees G.Xdeg[xpivot] = FLIP(1); G.Ydeg[ypivot] = FLIP(G.Ydeg[ypivot]); @@ -102,7 +103,8 @@ template void create_row_representation(const csc_matrix_t& A, std::vector& row_start, std::vector& col_index, - std::vector& workspace) + std::vector& workspace, + f_t& work_estimate) { // row counts i_t n = A.n; @@ -114,25 +116,32 @@ void create_row_representation(const csc_matrix_t& A, for (i_t i = 0; i < m; ++i) { workspace[i] = 0; } + work_estimate += m; // Compute row degrees for (i_t p = 0; p < nz; ++p) { workspace[A.i[p]]++; } + work_estimate += 3*nz; + // Compute rowstart and overwrite workspace cumulative_sum(workspace, row_start); + work_estimate += 4*workspace.size(); for (i_t j = 0; j < n; ++j) { - for (i_t p = A.col_start[j]; p < A.col_start[j + 1]; ++p) { + const i_t col_start = A.col_start[j]; + const i_t col_end = A.col_start[j + 1]; + for (i_t p = col_start; p < col_end; ++p) { i_t q = workspace[A.i[p]]++; col_index[q] = j; } } + work_estimate += 2*n + 4*nz; } // Complete the permuation -template -i_t complete_permutation(i_t singletons, std::vector& Xdeg, VectorI& Xperm) +template +i_t complete_permutation(i_t singletons, std::vector& Xdeg, std::vector& Xperm) { i_t n = Xdeg.size(); assert(Xperm.size() == n); @@ -157,12 +166,13 @@ i_t complete_permutation(i_t singletons, std::vector& Xdeg, VectorI& Xperm) return num_empty; } -template +template i_t find_singletons(const csc_matrix_t& A, i_t& row_singletons, - VectorI& row_perm, + std::vector& row_perm, i_t& col_singletons, - VectorI& col_perm) + std::vector& col_perm, + f_t& work_estimate) { i_t n = A.n; i_t m = A.m; @@ -173,6 +183,7 @@ i_t find_singletons(const csc_matrix_t& A, std::vector Cdeg(n); std::vector Rp(m + 1); std::vector Rj(nz); + work_estimate += 3*m + n + nz; i_t max_queue_len = std::max(m, n); std::queue singleton_queue; @@ -186,18 +197,20 @@ i_t find_singletons(const csc_matrix_t& A, Rdeg[A.i[p]]++; } } + work_estimate += 2*n + 2*nz; // Add all columns of degree 1 to the queue for (i_t j = n - 1; j >= 0; --j) { if (Cdeg[j] == 1) { singleton_queue.push(j); } } + work_estimate += n + singleton_queue.size(); bool row_form = false; i_t singletons_found = 0; col_singletons = 0; if (!singleton_queue.empty()) { // Don't create the row representation unless we found a singleton - create_row_representation(A, Rp, Rj, workspace); + create_row_representation(A, Rp, Rj, workspace, work_estimate); row_form = true; // Find column singletons @@ -205,8 +218,8 @@ i_t find_singletons(const csc_matrix_t& A, auto& row_perm_vec = static_cast&>(row_perm); row_col_graph_t graph{Cdeg.begin(), col_perm_vec.begin(), - A.col_start.underlying().cbegin(), - A.i.underlying().cbegin(), + A.col_start.cbegin(), + A.i.cbegin(), Rdeg.begin(), row_perm_vec.begin(), Rp.cbegin(), @@ -215,7 +228,9 @@ i_t find_singletons(const csc_matrix_t& A, #ifdef SINGLETON_DEBUG printf("Searching for column singletons. Initial size %ld\n", singleton_queue.size()); #endif - col_singletons = order_singletons(singleton_queue, singletons_found, graph); + + col_singletons = order_singletons(singleton_queue, singletons_found, graph, work_estimate); + #ifdef SINGLETON_DEBUG printf("Found %d column singletons\n", col_singletons); #endif @@ -225,12 +240,13 @@ i_t find_singletons(const csc_matrix_t& A, for (i_t i = m - 1; i >= 0; --i) { if (Rdeg[i] == 1) { singleton_queue.push(i); } } + work_estimate += m + singleton_queue.size(); row_singletons = 0; if (!singleton_queue.empty()) { if (!row_form) { // If we haven't created the row representation yet, we need to - create_row_representation(A, Rp, Rj, workspace); // use Rdeg as workspace + create_row_representation(A, Rp, Rj, workspace, work_estimate); // use Rdeg as workspace } // Find row singletons @@ -242,13 +258,13 @@ i_t find_singletons(const csc_matrix_t& A, Rj.cbegin(), Cdeg.begin(), col_perm_vec2.begin(), - A.col_start.underlying().cbegin(), - A.i.underlying().cbegin()}; + A.col_start.cbegin(), + A.i.cbegin()}; #ifdef SINGLETON_DEBUG printf("Searching for row singletons %ld\n", singleton_queue.size()); #endif i_t last = singletons_found; - row_singletons = order_singletons(singleton_queue, singletons_found, graph); + row_singletons = order_singletons(singleton_queue, singletons_found, graph, work_estimate); row_singletons = row_singletons - last; #ifdef SINGLETON_DEBUG printf("Found %d row singletons. %d\n", row_singletons, singletons_found); @@ -263,10 +279,12 @@ i_t find_singletons(const csc_matrix_t& A, printf("Col singletons %d\n", col_singletons); #endif i_t num_empty_cols = complete_permutation(singletons_found, Cdeg, col_perm); + work_estimate += 2*Cdeg.size(); #ifdef SINGLETON_DEBUG printf("Completed col perm. %d empty cols. Starting row perm\n", num_empty_cols); #endif i_t num_empty_rows = complete_permutation(singletons_found, Rdeg, row_perm); + work_estimate += 2*Rdeg.size(); #ifdef SINGLETON_DEBUG printf("Empty rows %d Empty columns %d\n", num_empty_rows, num_empty_cols); #endif @@ -277,35 +295,28 @@ i_t find_singletons(const csc_matrix_t& A, template struct row_col_graph_t; -template int order_singletons(std::queue& singleton_queue, +template int order_singletons(std::queue& singleton_queue, int& singletons_found, - row_col_graph_t& G); + row_col_graph_t& G, + double& work_estimate); // \param [in,out] workspace - size m template void create_row_representation(const csc_matrix_t& A, std::vector& row_start, std::vector& col_index, - std::vector& workspace); + std::vector& workspace, + double& work_estimate); // Complete the permuation -template int complete_permutation>(int singletons, - std::vector& Xdeg, - std::vector& Xperm); -template int complete_permutation>(int singletons, - std::vector& Xdeg, - ins_vector& Xperm); - -template int find_singletons>(const csc_matrix_t& A, +template int complete_permutation(int singletons, + std::vector& Xdeg, + std::vector& Xperm); + +template int find_singletons(const csc_matrix_t& A, int& row_singletons, std::vector& row_perm, int& col_singleton, - std::vector& col_perm); - -template int find_singletons>(const csc_matrix_t& A, - int& row_singletons, - ins_vector& row_perm, - int& col_singleton, - ins_vector& col_perm); - + std::vector& col_perm, + double& work_estimate); #endif } // namespace cuopt::linear_programming::dual_simplex diff --git a/cpp/src/dual_simplex/singletons.hpp b/cpp/src/dual_simplex/singletons.hpp index 2a1ac3e55f..9bbe9270f7 100644 --- a/cpp/src/dual_simplex/singletons.hpp +++ b/cpp/src/dual_simplex/singletons.hpp @@ -27,26 +27,29 @@ struct row_col_graph_t { typename std::vector::const_iterator Yi; }; -template +template i_t order_singletons(std::queue& singleton_queue, i_t& singletons_found, - row_col_graph_t& G); + row_col_graph_t& G, + f_t& work_estimate); // \param [in,out] workspace - size m template void create_row_representationon(const csc_matrix_t& A, std::vector& row_start, std::vector& col_index, - std::vector& workspace); + std::vector& workspace, + f_t& work_estimate); // Complete the permuation -template -i_t complete_permutationn(i_t singletons, std::vector& Xdeg, std::vector& Xperm); +template +i_t complete_permutation(i_t singletons, std::vector& Xdeg, std::vector& Xperm); -template +template i_t find_singletons(const csc_matrix_t& A, i_t& row_singletons, - VectorI& row_perm, + std::vector& row_perm, i_t& col_singleton, - VectorI& col_perm); + std::vector& col_perm, + f_t& work_estimate); } // namespace cuopt::linear_programming::dual_simplex diff --git a/cpp/src/dual_simplex/sparse_matrix.cpp b/cpp/src/dual_simplex/sparse_matrix.cpp index 6d9cf2b809..c54d7e7354 100644 --- a/cpp/src/dual_simplex/sparse_matrix.cpp +++ b/cpp/src/dual_simplex/sparse_matrix.cpp @@ -8,11 +8,9 @@ // #include #include #include -#include #include -using cuopt::ins_vector; // #include // #include @@ -37,8 +35,8 @@ void csc_matrix_t::reallocate(i_t new_nz) this->nz_max = new_nz; } -template -void cumulative_sum(std::vector& inout, OutputVector& output) +template +void cumulative_sum(std::vector& inout, std::vector& output) { i_t n = inout.size(); assert(output.size() == n + 1); @@ -171,6 +169,7 @@ void csc_matrix_t::append_column(const std::vector& x) this->n++; } +// Work = 4*x.i.size() template void csc_matrix_t::append_column(const sparse_vector_t& x) { @@ -193,6 +192,7 @@ void csc_matrix_t::append_column(const sparse_vector_t& x) this->n++; } +// Work is 5*x_nz template void csc_matrix_t::append_column(i_t x_nz, i_t* i, f_t* x) { @@ -212,6 +212,7 @@ void csc_matrix_t::append_column(i_t x_nz, i_t* i, f_t* x) this->n++; } +// Work = 6*nz + 2*n template i_t csc_matrix_t::transpose(csc_matrix_t& AT) const { @@ -658,8 +659,8 @@ i_t csr_matrix_t::check_matrix(std::string matrix_name) const } // x <- x + alpha * A(:, j) -template -void scatter_dense(const csc_matrix_t& A, i_t j, f_t alpha, VectorF& x) +template +void scatter_dense(const csc_matrix_t& A, i_t j, f_t alpha, std::vector& x) { const i_t col_start = A.col_start[j]; const i_t col_end = A.col_start[j + 1]; @@ -671,9 +672,13 @@ void scatter_dense(const csc_matrix_t& A, i_t j, f_t alpha, VectorF& x } // x <- x + alpha * A(:, j) -template -void scatter_dense( - const csc_matrix_t& A, i_t j, f_t alpha, VectorF& x, VectorI& mark, VectorI& indices) +template +void scatter_dense(const csc_matrix_t& A, + i_t j, + f_t alpha, + std::vector& x, + std::vector& mark, + std::vector& indices) { const i_t col_start = A.col_start[j]; const i_t col_end = A.col_start[j + 1]; @@ -933,10 +938,7 @@ template class csc_matrix_t; template class csr_matrix_t; -template void cumulative_sum>(std::vector& inout, - std::vector& output); -template void cumulative_sum>(std::vector& inout, - ins_vector& output); +template void cumulative_sum(std::vector& inout, std::vector& output); template int coo_to_csc(const std::vector& Ai, const std::vector& Aj, @@ -952,12 +954,12 @@ template int scatter(const csc_matrix_t& A, csc_matrix_t& C, int nz); -template void scatter_dense>(const csc_matrix_t& A, - int j, - double alpha, - std::vector& x); +template void scatter_dense(const csc_matrix_t& A, + int j, + double alpha, + std::vector& x); -template void scatter_dense, std::vector>( +template void scatter_dense( const csc_matrix_t& A, int j, double alpha, @@ -965,19 +967,6 @@ template void scatter_dense, std::vector>( std::vector& mark, std::vector& indices); -template void scatter_dense>(const csc_matrix_t& A, - int j, - double alpha, - ins_vector& x); - -template void scatter_dense, ins_vector>( - const csc_matrix_t& A, - int j, - double alpha, - ins_vector& x, - ins_vector& mark, - ins_vector& indices); - template int multiply(const csc_matrix_t& A, const csc_matrix_t& B, csc_matrix_t& C); diff --git a/cpp/src/dual_simplex/sparse_matrix.hpp b/cpp/src/dual_simplex/sparse_matrix.hpp index 8f8f622518..3011a4c713 100644 --- a/cpp/src/dual_simplex/sparse_matrix.hpp +++ b/cpp/src/dual_simplex/sparse_matrix.hpp @@ -9,7 +9,6 @@ #include #include -#include #include #include @@ -17,9 +16,6 @@ #include #include -// Import instrumented vector -using cuopt::ins_vector; - namespace cuopt::linear_programming::dual_simplex { template @@ -132,9 +128,9 @@ class csc_matrix_t { i_t m; // number of rows i_t n; // number of columns i_t nz_max; // maximum number of entries - ins_vector col_start; // column pointers (size n + 1) - ins_vector i; // row indices, size nz_max - ins_vector x; // numerical values, size nz_max + std::vector col_start; // column pointers (size n + 1) + std::vector i; // row indices, size nz_max + std::vector x; // numerical values, size nz_max static_assert(std::is_signed_v); // Require signed integers (we make use of this // to avoid extra space / computation) @@ -180,15 +176,15 @@ class csr_matrix_t { i_t nz_max; // maximum number of nonzero entries i_t m; // number of rows i_t n; // number of cols - ins_vector row_start; // row pointers (size m + 1) - ins_vector j; // column indices, size nz_max - ins_vector x; // numerical values, size nz_max + std::vector row_start; // row pointers (size m + 1) + std::vector j; // column indices, size nz_max + std::vector x; // numerical values, size nz_max static_assert(std::is_signed_v); }; -template -void cumulative_sum(std::vector& inout, OutputVector& output); +template +void cumulative_sum(std::vector& inout, std::vector& output); template i_t coo_to_csc(const std::vector& Ai, @@ -207,12 +203,16 @@ i_t scatter(const csc_matrix_t& A, i_t nz); // x <- x + alpha * A(:, j) -template -void scatter_dense(const csc_matrix_t& A, i_t j, f_t alpha, VectorF& x); +template +void scatter_dense(const csc_matrix_t& A, i_t j, f_t alpha, std::vector& x); -template -void scatter_dense( - const csc_matrix_t& A, i_t j, f_t alpha, VectorF& x, VectorI& mark, VectorI& indices); +template +void scatter_dense(const csc_matrix_t& A, + i_t j, + f_t alpha, + std::vector& x, + std::vector& mark, + std::vector& indices); // Compute C = A*B where C is m x n, A is m x k, and B = k x n // Do this by computing C(:, j) = A*B(:, j) = sum (i=1 to k) A(:, k)*B(i, j) diff --git a/cpp/src/dual_simplex/sparse_vector.cpp b/cpp/src/dual_simplex/sparse_vector.cpp index 1c6a15eb21..0d34d4c390 100644 --- a/cpp/src/dual_simplex/sparse_vector.cpp +++ b/cpp/src/dual_simplex/sparse_vector.cpp @@ -13,8 +13,6 @@ namespace cuopt::linear_programming::dual_simplex { -using cuopt::ins_vector; - template sparse_vector_t::sparse_vector_t(const csc_matrix_t& A, i_t col) { @@ -70,17 +68,12 @@ void sparse_vector_t::from_dense(const std::vector& in) n = in.size(); i.reserve(n); x.reserve(n); - auto& i_vec = i.underlying(); - auto& x_vec = x.underlying(); for (i_t k = 0; k < n; ++k) { if (in[k] != 0) { - i_vec.push_back(k); - x_vec.push_back(in[k]); + i.push_back(k); + x.push_back(in[k]); } } - const size_t nz = i_vec.size(); - i.byte_stores += nz * sizeof(i_t); - x.byte_stores += nz * sizeof(f_t); } template @@ -92,8 +85,8 @@ void sparse_vector_t::to_csc(csc_matrix_t& A) const A.col_start.resize(2); A.col_start[0] = 0; A.col_start[1] = i.size(); - A.i = i.underlying(); - A.x = x.underlying(); + A.i = i; + A.x = x; } template @@ -107,17 +100,6 @@ void sparse_vector_t::to_dense(std::vector& x_dense) const } } -template -void sparse_vector_t::to_dense(ins_vector& x_dense) const -{ - x_dense.clear(); - x_dense.resize(n, 0.0); - const i_t nz = i.size(); - for (i_t k = 0; k < nz; ++k) { - x_dense[i[k]] = x[k]; - } -} - template void sparse_vector_t::scatter(std::vector& x_dense) const { @@ -128,29 +110,12 @@ void sparse_vector_t::scatter(std::vector& x_dense) const } } -template -void sparse_vector_t::scatter(ins_vector& x_dense) const -{ - // Assumes x_dense is already cleared - auto& sv_i = i.underlying(); - auto& sv_x = x.underlying(); - auto& dense = x_dense.underlying(); - const i_t nz = sv_i.size(); - for (i_t k = 0; k < nz; ++k) { - dense[sv_i[k]] += sv_x[k]; - } - i.byte_loads += nz * sizeof(i_t); - x.byte_loads += nz * sizeof(f_t); - x_dense.byte_loads += nz * sizeof(f_t); - x_dense.byte_stores += nz * sizeof(f_t); -} - template void sparse_vector_t::inverse_permute_vector(const std::vector& p) { assert(p.size() == n); i_t nz = i.size(); - ins_vector i_perm(nz); + std::vector i_perm(nz); for (i_t k = 0; k < nz; ++k) { i_perm[k] = p[i[k]]; } @@ -166,7 +131,7 @@ void sparse_vector_t::inverse_permute_vector(const std::vector& p i_t nz = i.size(); y.n = n; y.x = x; - ins_vector i_perm(nz); + std::vector i_perm(nz); for (i_t k = 0; k < nz; ++k) { i_perm[k] = p[i[k]]; } @@ -232,8 +197,8 @@ void sparse_vector_t::sort() } else { // Use a n log n sort const i_t nz = i.size(); - ins_vector i_sorted(nz); - ins_vector x_sorted(nz); + std::vector i_sorted(nz); + std::vector x_sorted(nz); std::vector perm(nz); for (i_t k = 0; k < nz; ++k) { perm[k] = k; diff --git a/cpp/src/dual_simplex/sparse_vector.hpp b/cpp/src/dual_simplex/sparse_vector.hpp index fadd2df338..42fffa6e46 100644 --- a/cpp/src/dual_simplex/sparse_vector.hpp +++ b/cpp/src/dual_simplex/sparse_vector.hpp @@ -9,15 +9,11 @@ #include #include -#include #include namespace cuopt::linear_programming::dual_simplex { -// Import instrumented vector type -using cuopt::ins_vector; - // A sparse vector stored as a list of nonzero coefficients and their indices template class sparse_vector_t { @@ -37,13 +33,8 @@ class sparse_vector_t { void to_csc(csc_matrix_t& A) const; // convert a sparse vector into a dense vector. Dense vector is cleared and resized. void to_dense(std::vector& x_dense) const; - // convert a sparse vector into an instrumented dense vector. - void to_dense(ins_vector& x_dense) const; - // scatter a sparse vector into a dense vector. Assumes x_dense is already cleared or - // preinitialized + // scatter a sparse vector into a dense vector. Assumes x_dense is already cleared or preinitialized void scatter(std::vector& x_dense) const; - // scatter into instrumented vector - void scatter(ins_vector& x_dense) const; // inverse permute the current sparse vector void inverse_permute_vector(const std::vector& p); // inverse permute a sparse vector into another sparse vector @@ -71,8 +62,8 @@ class sparse_vector_t { void squeeze(sparse_vector_t& y) const; i_t n; - ins_vector i; - ins_vector x; + std::vector i; + std::vector x; }; } // namespace cuopt::linear_programming::dual_simplex diff --git a/cpp/src/dual_simplex/triangle_solve.cpp b/cpp/src/dual_simplex/triangle_solve.cpp index 2f280295bf..89f96ff0ec 100644 --- a/cpp/src/dual_simplex/triangle_solve.cpp +++ b/cpp/src/dual_simplex/triangle_solve.cpp @@ -26,19 +26,22 @@ template i_t reach(const sparse_vector_t& b, const std::optional>& pinv, csc_matrix_t& G, - std::vector& xi) + std::vector& xi, + f_t& work_estimate) { const i_t m = G.m; i_t top = m; const i_t bnz = b.i.size(); for (i_t p = 0; p < bnz; ++p) { if (!MARKED(G.col_start, b.i[p])) { // start a DFS at unmarked node i - top = depth_first_search(b.i[p], pinv, G, top, xi, xi.begin() + m); + top = depth_first_search(b.i[p], pinv, G, top, xi, xi.begin() + m, work_estimate); } } + work_estimate += 4*bnz; for (i_t p = top; p < m; ++p) { // restore G MARK(G.col_start, xi[p]); } + work_estimate += 3*(m - top); return top; } @@ -61,7 +64,8 @@ i_t depth_first_search(i_t j, csc_matrix_t& G, i_t top, std::vector& xi, - typename std::vector::iterator pstack) + typename std::vector::iterator pstack, + f_t& work_estimate) { i_t head = 0; xi[0] = j; // Initialize the recursion stack @@ -77,7 +81,9 @@ i_t depth_first_search(i_t j, } done = 1; // Node j is done if no unvisited neighbors i_t p2 = (jnew < 0) ? 0 : UNFLIP(G.col_start[jnew + 1]); - for (i_t p = pstack[head]; p < p2; ++p) { // Examine all neighbors of j + const i_t psav = pstack[head]; + i_t p; + for (p = psav; p < p2; ++p) { // Examine all neighbors of j i_t i = G.i[p]; // Consider neighbor i if (MARKED(G.col_start, i)) { continue; // skip visited node i @@ -87,6 +93,7 @@ i_t depth_first_search(i_t j, done = 0; // node j is not done break; // break to start dfs at node i } + work_estimate += 3*(p- psav) + 10; if (done) { pstack[head] = 0; // restore pstack so it can be used again in other routines xi[head] = 0; // restore xi so it can be used again in other routines @@ -102,18 +109,23 @@ i_t sparse_triangle_solve(const sparse_vector_t& b, const std::optional>& pinv, std::vector& xi, csc_matrix_t& G, - f_t* x) + f_t* x, + f_t& work_estimate) { i_t m = G.m; assert(b.n == m); - i_t top = reach(b, pinv, G, xi); + i_t top = reach(b, pinv, G, xi, work_estimate); for (i_t p = top; p < m; ++p) { x[xi[p]] = 0; // Clear x vector } + work_estimate += 2*(m - top); + const i_t bnz = b.i.size(); for (i_t p = 0; p < bnz; ++p) { x[b.i[p]] = b.x[p]; // Scatter b } + work_estimate += 3*bnz; + for (i_t px = top; px < m; ++px) { i_t j = xi[px]; // x(j) is nonzero i_t J = pinv ? ((*pinv)[j]) : j; // j maps to column J of G @@ -131,6 +143,7 @@ i_t sparse_triangle_solve(const sparse_vector_t& b, end = G.col_start[J + 1] - 1; } x[j] /= Gjj; + work_estimate += 4*(end - p) + 7; for (; p < end; ++p) { x[G.i[p]] -= G.x[p] * x[j]; // x(i) -= G(i,j) * x(j) } @@ -147,26 +160,30 @@ i_t sparse_triangle_solve(const sparse_vector_t& b, template int reach(const sparse_vector_t& b, const std::optional>& pinv, csc_matrix_t& G, - std::vector& xi); + std::vector& xi, + double& work_estimate); template int depth_first_search(int j, const std::optional>& pinv, csc_matrix_t& G, int top, std::vector& xi, - std::vector::iterator pstack); + std::vector::iterator pstack, + double& work_estimate); template int sparse_triangle_solve(const sparse_vector_t& b, const std::optional>& pinv, std::vector& xi, csc_matrix_t& G, - double* x); + double* x, + double& work_estimate); template int sparse_triangle_solve(const sparse_vector_t& b, const std::optional>& pinv, std::vector& xi, csc_matrix_t& G, - double* x); + double* x, + double& work_estimate); #endif } // namespace cuopt::linear_programming::dual_simplex diff --git a/cpp/src/dual_simplex/triangle_solve.hpp b/cpp/src/dual_simplex/triangle_solve.hpp index 20af92cc93..7a99ee400e 100644 --- a/cpp/src/dual_simplex/triangle_solve.hpp +++ b/cpp/src/dual_simplex/triangle_solve.hpp @@ -25,126 +25,87 @@ namespace cuopt::linear_programming::dual_simplex { // Solve L*x = b. On input x contains the right-hand side b, on output the // solution -template -i_t lower_triangular_solve(const csc_matrix_t& L, VectorF& x) +template +i_t lower_triangular_solve(const csc_matrix_t& L, std::vector& x, f_t& work_estimate) { i_t n = L.n; assert(x.size() == n); - auto& L_cs = L.col_start.underlying(); - auto& L_i = L.i.underlying(); - auto& L_x = L.x.underlying(); - - size_t nnz_processed = 0; for (i_t j = 0; j < n; ++j) { - i_t col_start = L_cs[j]; - i_t col_end = L_cs[j + 1]; + i_t col_start = L.col_start[j]; + i_t col_end = L.col_start[j + 1]; if (x[j] != 0.0) { - x[j] /= L_x[col_start]; - auto x_j = x[j]; // hoist this load out of the loop + x[j] /= L.x[col_start]; + const f_t x_j = x[j]; // hoist this load out of the loop // as the compiler cannot guess that x[j] never aliases to x[L.i[p]] for (i_t p = col_start + 1; p < col_end; ++p) { - x[L_i[p]] -= L_x[p] * x_j; + x[L.i[p]] -= L.x[p] * x_j; } - nnz_processed += col_end - col_start; } } - - L.col_start.byte_loads += (n + 1) * sizeof(i_t); - L.i.byte_loads += nnz_processed * sizeof(i_t); - L.x.byte_loads += nnz_processed * sizeof(f_t); - + work_estimate += 3*n + 3*L.col_start[n]; return 0; } // Solve L'*x = b. On input x contains the right-hand side b, on output the // solution -template -i_t lower_triangular_transpose_solve(const csc_matrix_t& L, VectorF& x) +template +i_t lower_triangular_transpose_solve(const csc_matrix_t& L, std::vector& x, f_t& work_estimate) { const i_t n = L.n; assert(x.size() == n); - auto& L_cs = L.col_start.underlying(); - auto& L_i = L.i.underlying(); - auto& L_x = L.x.underlying(); - for (i_t j = n - 1; j >= 0; --j) { - const i_t col_start = L_cs[j] + 1; - const i_t col_end = L_cs[j + 1]; + const i_t col_start = L.col_start[j] + 1; + const i_t col_end = L.col_start[j + 1]; for (i_t p = col_start; p < col_end; ++p) { - x[j] -= L_x[p] * x[L_i[p]]; + x[j] -= L.x[p] * x[L.i[p]]; } - x[j] /= L_x[L_cs[j]]; + x[j] /= L.x[L.col_start[j]]; } - - const size_t total_nnz = L_cs[n]; - L.col_start.byte_loads += (n + 1) * sizeof(i_t); - L.i.byte_loads += total_nnz * sizeof(i_t); - L.x.byte_loads += total_nnz * sizeof(f_t); - + work_estimate += 6*n + 4*L.col_start[n]; return 0; } // Solve U*x = b. On input x contains the right-hand side b, on output the // solution -template -i_t upper_triangular_solve(const csc_matrix_t& U, VectorF& x) +template +i_t upper_triangular_solve(const csc_matrix_t& U, std::vector& x, f_t& work_estimate) { const i_t n = U.n; assert(x.size() == n); - - auto& U_cs = U.col_start.underlying(); - auto& U_i = U.i.underlying(); - auto& U_x = U.x.underlying(); - - size_t nnz_processed = 0; for (i_t j = n - 1; j >= 0; --j) { - const i_t col_start = U_cs[j]; - const i_t col_end = U_cs[j + 1] - 1; + const i_t col_start = U.col_start[j]; + const i_t col_end = U.col_start[j + 1] - 1; if (x[j] != 0.0) { - x[j] /= U_x[col_end]; - auto x_j = x[j]; // same x_j load hoisting + x[j] /= U.x[col_end]; + const f_t x_j = x[j]; // same x_j load hoisting for (i_t p = col_start; p < col_end; ++p) { - x[U_i[p]] -= U_x[p] * x_j; + x[U.i[p]] -= U.x[p] * x_j; } - nnz_processed += col_end - col_start; + work_estimate += 3*(col_end - col_start) + 3; } } - - U.col_start.byte_loads += (n + 1) * sizeof(i_t); - U.i.byte_loads += nnz_processed * sizeof(i_t); - U.x.byte_loads += nnz_processed * sizeof(f_t); - + work_estimate += 3*n; return 0; } // Solve U'*x = b. On input x contains the right-hand side b, on output the // solution -template -i_t upper_triangular_transpose_solve(const csc_matrix_t& U, VectorF& x) +template +i_t upper_triangular_transpose_solve(const csc_matrix_t& U, std::vector& x, f_t& work_estimate) { const i_t n = U.n; assert(x.size() == n); - - auto& U_cs = U.col_start.underlying(); - auto& U_i = U.i.underlying(); - auto& U_x = U.x.underlying(); - for (i_t j = 0; j < n; ++j) { - const i_t col_start = U_cs[j]; - const i_t col_end = U_cs[j + 1] - 1; + const i_t col_start = U.col_start[j]; + const i_t col_end = U.col_start[j + 1] - 1; for (i_t p = col_start; p < col_end; ++p) { - x[j] -= U_x[p] * x[U_i[p]]; + x[j] -= U.x[p] * x[U.i[p]]; } - x[j] /= U_x[col_end]; + x[j] /= U.x[col_end]; } - - const size_t total_nnz = U_cs[n]; - U.col_start.byte_loads += (n + 1) * sizeof(i_t); - U.i.byte_loads += total_nnz * sizeof(i_t); - U.x.byte_loads += total_nnz * sizeof(f_t); - + work_estimate += 4*n + 4*U.col_start[n]; return 0; } @@ -159,7 +120,8 @@ template i_t reach(const sparse_vector_t& b, const std::optional>& pinv, csc_matrix_t& G, - std::vector& xi); + std::vector& xi, + f_t& work_estimate); // \brief Performs a depth-first search starting from node j in the graph // defined by G @@ -182,7 +144,8 @@ i_t depth_first_search(i_t j, csc_matrix_t& G, i_t top, std::vector& xi, - typename std::vector::iterator pstack); + typename std::vector::iterator pstack, + f_t& work_estimate); // \brief sparse_triangle_solve solve L*x = b or U*x = b where L is a sparse lower // triangular matrix @@ -201,6 +164,7 @@ i_t sparse_triangle_solve(const sparse_vector_t& b, const std::optional>& pinv, std::vector& xi, csc_matrix_t& G, - f_t* x); + f_t* x, + f_t& work_estimate); } // namespace cuopt::linear_programming::dual_simplex diff --git a/cpp/src/dual_simplex/vector_math.cpp b/cpp/src/dual_simplex/vector_math.cpp index 8ea31d3e39..2acf4078f4 100644 --- a/cpp/src/dual_simplex/vector_math.cpp +++ b/cpp/src/dual_simplex/vector_math.cpp @@ -57,6 +57,7 @@ f_t dot(const std::vector& x, const std::vector& y) return dot; } +// Work = 3*min(nz_x, nz_y) template f_t sparse_dot( i_t const* xind, f_t const* xval, i_t nx, i_t const* yind, i_t ny, f_t const* y_scatter_val) @@ -123,8 +124,47 @@ f_t sparse_dot(const std::vector& xind, return dot; } -// NOTE: permute_vector, inverse_permute_vector, and inverse_permutation are now -// templated on vector types and defined in the header file. +// Computes x = P*b or x=b(p) in MATLAB. +// Work is 3*n +template +i_t permute_vector(const std::vector& p, const std::vector& b, std::vector& x) +{ + auto n = p.size(); + assert(x.size() == n); + assert(b.size() == n); + for (i_t k = 0; k < n; ++k) { + x[k] = b[p[k]]; + } + return 0; +} + +// Computes x = P'*b or x(p) = b in MATLAB. +// Work is 3 * n +template +i_t inverse_permute_vector(const std::vector& p, const std::vector& b, std::vector& x) +{ + auto n = p.size(); + assert(x.size() == n); + assert(b.size() == n); + for (i_t k = 0; k < n; ++k) { + x[p[k]] = b[k]; + } + return 0; +} + +// Computes pinv from p. Or pinv(p) = 1:n in MATLAB +// Work is 2*n +template +i_t inverse_permutation(const std::vector& p, std::vector& pinv) +{ + auto n = p.size(); + if (pinv.size() != n) { pinv.resize(n); } + for (i_t k = 0; k < n; ++k) { + pinv[p[k]] = k; + } + return 0; +} + #ifdef DUAL_SIMPLEX_INSTANTIATE_DOUBLE @@ -160,8 +200,14 @@ template double sparse_dot(int const* xind, template double sparse_dot( int* xind, double* xval, int nx, int* yind, double* yval, int ny); -// NOTE: permute_vector, inverse_permute_vector, and inverse_permutation are now -// templated on vector types and defined in the header file, so no explicit instantiation needed. +template int permute_vector(const std::vector& p, + const std::vector& b, + std::vector& x); +template int inverse_permute_vector(const std::vector& p, + const std::vector& b, + std::vector& x); +template int inverse_permutation(const std::vector& p, std::vector& pinv); + #endif } // namespace cuopt::linear_programming::dual_simplex diff --git a/cpp/src/dual_simplex/vector_math.hpp b/cpp/src/dual_simplex/vector_math.hpp index 6bf573e892..7ac697eef3 100644 --- a/cpp/src/dual_simplex/vector_math.hpp +++ b/cpp/src/dual_simplex/vector_math.hpp @@ -57,41 +57,14 @@ template f_t sparse_dot(i_t* xind, f_t* xval, i_t nx, i_t* yind, f_t* yval, i_t ny); // Computes x = P*b or x=b(p) in MATLAB. -template -int permute_vector(const VectorI& p, const VectorF_in& b, VectorF_out& x) -{ - auto n = p.size(); - assert(x.size() == n); - assert(b.size() == n); - for (decltype(n) k = 0; k < n; ++k) { - x[k] = b[p[k]]; - } - return 0; -} - +template +i_t permute_vector(const std::vector& p, const std::vector& b, std::vector& x); // Computes x = P'*b or x(p) = b in MATLAB. -template -int inverse_permute_vector(const VectorI& p, const VectorF_in& b, VectorF_out& x) -{ - auto n = p.size(); - assert(x.size() == n); - assert(b.size() == n); - for (decltype(n) k = 0; k < n; ++k) { - x[p[k]] = b[k]; - } - return 0; -} +template +i_t inverse_permute_vector(const std::vector& p, const std::vector& b, std::vector& x); // Computes pinv from p. Or pinv(p) = 1:n in MATLAB -template -int inverse_permutation(const VectorI_in& p, VectorI_out& pinv) -{ - auto n = p.size(); - if (pinv.size() != n) { pinv.resize(n); } - for (decltype(n) k = 0; k < n; ++k) { - pinv[p[k]] = k; - } - return 0; -} +template +i_t inverse_permutation(const std::vector& p, std::vector& pinv); } // namespace cuopt::linear_programming::dual_simplex diff --git a/cpp/src/mip_heuristics/problem/problem.cu b/cpp/src/mip_heuristics/problem/problem.cu index b2e221df0c..a95398a390 100644 --- a/cpp/src/mip_heuristics/problem/problem.cu +++ b/cpp/src/mip_heuristics/problem/problem.cu @@ -1935,9 +1935,9 @@ void problem_t::get_host_user_problem( user_problem.objective = cuopt::host_copy(objective_coefficients, stream); dual_simplex::csr_matrix_t csr_A(m, n, nz); - csr_A.x = ins_vector(cuopt::host_copy(coefficients, stream)); - csr_A.j = ins_vector(cuopt::host_copy(variables, stream)); - csr_A.row_start = ins_vector(cuopt::host_copy(offsets, stream)); + csr_A.x = std::vector(cuopt::host_copy(coefficients, stream)); + csr_A.j = std::vector(cuopt::host_copy(variables, stream)); + csr_A.row_start = std::vector(cuopt::host_copy(offsets, stream)); csr_A.to_compressed_col(user_problem.A); diff --git a/cpp/src/pdlp/translate.hpp b/cpp/src/pdlp/translate.hpp index 6ecb37c6e6..8cc974da9c 100644 --- a/cpp/src/pdlp/translate.hpp +++ b/cpp/src/pdlp/translate.hpp @@ -30,9 +30,9 @@ static dual_simplex::user_problem_t cuopt_problem_to_simplex_problem( user_problem.objective = cuopt::host_copy(model.objective_coefficients, handle_ptr->get_stream()); dual_simplex::csr_matrix_t csr_A(m, n, nz); - csr_A.x = ins_vector(cuopt::host_copy(model.coefficients, handle_ptr->get_stream())); - csr_A.j = ins_vector(cuopt::host_copy(model.variables, handle_ptr->get_stream())); - csr_A.row_start = ins_vector(cuopt::host_copy(model.offsets, handle_ptr->get_stream())); + csr_A.x = std::vector(cuopt::host_copy(model.coefficients, handle_ptr->get_stream())); + csr_A.j = std::vector(cuopt::host_copy(model.variables, handle_ptr->get_stream())); + csr_A.row_start = std::vector(cuopt::host_copy(model.offsets, handle_ptr->get_stream())); csr_A.to_compressed_col(user_problem.A); @@ -121,9 +121,9 @@ void translate_to_crossover_problem(const detail::problem_t& problem, dual_simplex::csr_matrix_t csr_A( problem.n_constraints, problem.n_variables, problem.nnz); - csr_A.x = ins_vector(cuopt::host_copy(problem.coefficients, stream)); - csr_A.j = ins_vector(cuopt::host_copy(problem.variables, stream)); - csr_A.row_start = ins_vector(cuopt::host_copy(problem.offsets, stream)); + csr_A.x = std::vector(cuopt::host_copy(problem.coefficients, stream)); + csr_A.j = std::vector(cuopt::host_copy(problem.variables, stream)); + csr_A.row_start = std::vector(cuopt::host_copy(problem.offsets, stream)); stream.synchronize(); CUOPT_LOG_DEBUG("Converting to compressed column"); From e4985d05dbedc221a9ca461bd70db129ae0bd219 Mon Sep 17 00:00:00 2001 From: Christopher Maes Date: Thu, 12 Feb 2026 20:33:29 -0800 Subject: [PATCH 2/4] Style fixes --- cpp/src/dual_simplex/basis_solves.cpp | 93 +++--- cpp/src/dual_simplex/basis_updates.cpp | 108 +++---- cpp/src/dual_simplex/basis_updates.hpp | 6 +- .../bound_flipping_ratio_test.cpp | 13 +- cpp/src/dual_simplex/crossover.cpp | 32 +- cpp/src/dual_simplex/phase2.cpp | 282 ++++++++++-------- cpp/src/dual_simplex/primal.cpp | 7 +- cpp/src/dual_simplex/right_looking_lu.cpp | 112 +++---- cpp/src/dual_simplex/singletons.cpp | 34 +-- cpp/src/dual_simplex/sparse_matrix.cpp | 14 +- cpp/src/dual_simplex/sparse_matrix.hpp | 24 +- cpp/src/dual_simplex/sparse_vector.hpp | 3 +- cpp/src/dual_simplex/triangle_solve.cpp | 18 +- cpp/src/dual_simplex/triangle_solve.hpp | 18 +- cpp/src/dual_simplex/vector_math.cpp | 5 +- cpp/src/dual_simplex/vector_math.hpp | 4 +- cpp/src/pdlp/translate.hpp | 4 +- 17 files changed, 423 insertions(+), 354 deletions(-) diff --git a/cpp/src/dual_simplex/basis_solves.cpp b/cpp/src/dual_simplex/basis_solves.cpp index 8d9bd71484..777687d49b 100644 --- a/cpp/src/dual_simplex/basis_solves.cpp +++ b/cpp/src/dual_simplex/basis_solves.cpp @@ -183,14 +183,14 @@ i_t factorize_basis(const csc_matrix_t& A, form_b(A, basic_list, B, work_estimate); std::vector row_perm(m); std::vector col_perm(m); - work_estimate += 2*m; + work_estimate += 2 * m; i_t row_singletons; i_t col_singletons; find_singletons(B, row_singletons, row_perm, col_singletons, col_perm, work_estimate); std::vector row_perm_inv(m); work_estimate += m; inverse_permutation(row_perm, row_perm_inv); - work_estimate += 3*m; + work_estimate += 3 * m; #ifdef PRINT_SINGLETONS printf("Singletons row %d col %d num %d\n", @@ -225,7 +225,7 @@ i_t factorize_basis(const csc_matrix_t& A, const i_t Bnz = B.col_start[m]; L.reallocate(Bnz); U.reallocate(Bnz); - work_estimate += 2*Bnz; + work_estimate += 2 * Bnz; i_t Lnz = 0; // Fill in L(:, 0:col_singletons-1) with I for (i_t k = 0; k < col_singletons; ++k) { @@ -235,7 +235,7 @@ i_t factorize_basis(const csc_matrix_t& A, Lnz++; assert(Lnz <= Bnz); } - work_estimate += 3*col_singletons; + work_estimate += 3 * col_singletons; i_t Unz = 0; // Fill in U(:, 0:col_singletons-1) with U_11 @@ -250,10 +250,10 @@ i_t factorize_basis(const csc_matrix_t& A, Unz++; assert(Unz <= Bnz); } - work_estimate += 4*(col_end - col_start); + work_estimate += 4 * (col_end - col_start); } if (col_singletons > 0) { U.col_start[col_singletons] = Unz; } - work_estimate += 3*col_singletons; + work_estimate += 3 * col_singletons; // Ensure U(i, i) is at the end of column i for U_11 for (i_t k = 0; k < col_singletons; ++k) { @@ -271,7 +271,7 @@ i_t factorize_basis(const csc_matrix_t& A, } work_estimate += (col_before_end - col_start); } - work_estimate += 2*col_singletons; + work_estimate += 2 * col_singletons; // Fill in L(:, col_singletons:col_singletons+row_singletons-1) with L_22 and L_32 // and U(:, col_singletons:col_singletons+row_singletons-1) with U_12 and I @@ -296,7 +296,7 @@ i_t factorize_basis(const csc_matrix_t& A, assert(Unz <= Bnz); } } - work_estimate += 5*(col_end - col_start); + work_estimate += 5 * (col_end - col_start); // add in the identity in U U.i[Unz] = k; U.x[Unz] = 1.0; @@ -304,8 +304,7 @@ i_t factorize_basis(const csc_matrix_t& A, assert(Unz <= Bnz); } L.col_start[num_singletons] = Lnz; - work_estimate += 7*(num_singletons - col_singletons); - + work_estimate += 7 * (num_singletons - col_singletons); // Ensure L(i, i) is at the beginning of column i for L_22 and L32 for (i_t k = col_singletons; k < num_singletons; ++k) { @@ -328,7 +327,7 @@ i_t factorize_basis(const csc_matrix_t& A, work_estimate += (col_end - col_start); assert(found_diag); } - work_estimate += 4*(num_singletons - col_singletons); + work_estimate += 4 * (num_singletons - col_singletons); // Compute how many nonzeros in B we have used so far so we know // how many nonzeros are in S @@ -358,14 +357,14 @@ i_t factorize_basis(const csc_matrix_t& A, assert(Snz <= Snz_max); } } - work_estimate += 3*(col_end - col_start); + work_estimate += 3 * (col_end - col_start); } S.col_start[Sdim] = Snz; // Finalize S - work_estimate += 4*(m - num_singletons); + work_estimate += 4 * (m - num_singletons); csc_matrix_t SL(Sdim, Sdim, Snz); csc_matrix_t SU(Sdim, Sdim, Snz); - work_estimate += 2*Sdim + 2*Snz; + work_estimate += 2 * Sdim + 2 * Snz; // Factorize S std::vector S_perm_inv(Sdim); @@ -378,7 +377,7 @@ i_t factorize_basis(const csc_matrix_t& A, for (i_t h = 0; h < Sdim; ++h) { identity[h] = h; } - work_estimate += 3*Sdim; + work_estimate += 3 * Sdim; Srank = right_looking_lu(S, settings, @@ -400,17 +399,17 @@ i_t factorize_basis(const csc_matrix_t& A, for (i_t h = Srank; h < Sdim; ++h) { deficient[h - Srank] = col_perm[num_singletons + S_col_perm[h]]; } - work_estimate += 3*(Sdim - Srank); + work_estimate += 3 * (Sdim - Srank); // Get S_perm std::vector S_perm(Sdim); inverse_permutation(S_perm_inv, S_perm); - work_estimate += 4*Sdim; + work_estimate += 4 * Sdim; // Get the slacks needed slacks_needed.resize(Sdim - Srank); for (i_t h = Srank; h < Sdim; ++h) { slacks_needed[h - Srank] = row_perm[num_singletons + S_perm[h]]; } - work_estimate += 3*(Sdim - Srank); + work_estimate += 3 * (Sdim - Srank); return -1; } @@ -423,17 +422,17 @@ i_t factorize_basis(const csc_matrix_t& A, col_perm_sav[q_j] = col_perm[h]; q_j++; } - work_estimate += 2*(m - num_singletons); + work_estimate += 2 * (m - num_singletons); q_j = 0; for (i_t h = num_singletons; h < m; ++h) { col_perm[h] = col_perm_sav[S_col_perm[q_j]]; q_j++; } - work_estimate += 2*(m - num_singletons); + work_estimate += 2 * (m - num_singletons); std::vector S_perm(m); inverse_permutation(S_perm_inv, S_perm); - work_estimate += 4*m; + work_estimate += 4 * m; actual_factor = toc(actual_factor_start); // Permute the rows of L_32 according to S_perm_inv @@ -449,11 +448,14 @@ i_t factorize_basis(const csc_matrix_t& A, } work_estimate += (col_end - col_start); } - work_estimate += 3*(num_singletons - col_singletons); + work_estimate += 3 * (num_singletons - col_singletons); const i_t SLnz = SL.col_start[Sdim]; const i_t Lnz_max = Lnz + SLnz; - if (Lnz_max > Bnz) { L.reallocate(Lnz_max); work_estimate += Lnz_max;} + if (Lnz_max > Bnz) { + L.reallocate(Lnz_max); + work_estimate += Lnz_max; + } // Fill in L(:, num_singletons:m-1) with L_33 for (i_t k = num_singletons; k < m; ++k) { @@ -468,16 +470,19 @@ i_t factorize_basis(const csc_matrix_t& A, Lnz++; assert(Lnz <= Lnz_max); } - work_estimate += 4*(col_end - col_start); + work_estimate += 4 * (col_end - col_start); } - work_estimate += 3*(m - num_singletons); + work_estimate += 3 * (m - num_singletons); assert(Lnz == Lnz_max); L.col_start[m] = Lnz; // Finalize L const i_t SUnz = SU.col_start[Sdim]; const i_t Unz_max = Unz + SUnz + (Bnz - Bnz_used); - if (Unz_max > Bnz) { U.reallocate(Unz_max); work_estimate += Unz_max;} + if (Unz_max > Bnz) { + U.reallocate(Unz_max); + work_estimate += Unz_max; + } // Fill in U(:, num_singletons:m-1) with U_13 and U_33 for (i_t k = num_singletons; k < m; ++k) { @@ -496,7 +501,7 @@ i_t factorize_basis(const csc_matrix_t& A, assert(Unz <= Unz_max); } } - work_estimate += 3*(B_col_end - B_col_start); + work_estimate += 3 * (B_col_end - B_col_start); // U_33 const i_t l = k - num_singletons; @@ -509,9 +514,9 @@ i_t factorize_basis(const csc_matrix_t& A, Unz++; assert(Unz <= Unz_max); } - work_estimate += 4*(U_col_end - U_col_start); + work_estimate += 4 * (U_col_end - U_col_start); } - work_estimate += 5*(m - num_singletons); + work_estimate += 5 * (m - num_singletons); assert(Unz <= Unz_max); U.col_start[m] = Unz; // Finalize U @@ -520,15 +525,15 @@ i_t factorize_basis(const csc_matrix_t& A, for (i_t k = 0; k < Sdim; ++k) { last_perm[k] = row_perm[num_singletons + k]; } - work_estimate += 3*Sdim; + work_estimate += 3 * Sdim; // Fix up row permutations for (i_t k = 0; k < Sdim; ++k) { row_perm[num_singletons + k] = last_perm[S_perm[k]]; } - work_estimate += 3*Sdim; + work_estimate += 3 * Sdim; inverse_permutation(row_perm, row_perm_inv); - work_estimate += 3*row_perm.size(); + work_estimate += 3 * row_perm.size(); } else { L.col_start[m] = Lnz; // Finalize L U.col_start[m] = Unz; // Finalize U @@ -613,9 +618,9 @@ i_t factorize_basis(const csc_matrix_t& A, q.resize(m); work_estimate += m; f_t fact_start = tic(); - rank = right_looking_lu(A, settings, medium_tol, basic_list, q, L, U, pinv, work_estimate); + rank = right_looking_lu(A, settings, medium_tol, basic_list, q, L, U, pinv, work_estimate); inverse_permutation(pinv, p); - work_estimate += 3*pinv.size(); + work_estimate += 3 * pinv.size(); if (rank != m) { // Get the rank deficient columns @@ -624,13 +629,13 @@ i_t factorize_basis(const csc_matrix_t& A, for (i_t h = rank; h < m; ++h) { deficient[h - rank] = q[h]; } - work_estimate += 3*(m - rank); + work_estimate += 3 * (m - rank); // Get the slacks needed slacks_needed.resize(m - rank); for (i_t h = rank; h < m; ++h) { slacks_needed[h - rank] = p[h]; } - work_estimate += 3*(m - rank); + work_estimate += 3 * (m - rank); } if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) { settings.log.printf("Concurrent halt\n"); @@ -694,7 +699,7 @@ i_t basis_repair(const csc_matrix_t& A, slacks_found++; } } - work_estimate += 3*m + 2*slacks_found; + work_estimate += 3 * m + 2 * slacks_found; assert(slacks_found == m); @@ -706,7 +711,7 @@ i_t basis_repair(const csc_matrix_t& A, for (i_t k = 0; k < num_nonbasic; ++k) { nonbasic_map[nonbasic_list[k]] = k; } - work_estimate += 2*num_nonbasic; + work_estimate += 2 * num_nonbasic; // Create a superbasic_map std::vector superbasic_map( @@ -716,7 +721,7 @@ i_t basis_repair(const csc_matrix_t& A, for (i_t k = 0; k < num_superbasic; ++k) { superbasic_map[superbasic_list[k]] = k; } - work_estimate += 2*num_superbasic; + work_estimate += 2 * num_superbasic; const i_t columns_to_replace = deficient.size(); for (i_t k = 0; k < columns_to_replace; ++k) { @@ -744,7 +749,7 @@ i_t basis_repair(const csc_matrix_t& A, } vstatus[replace_j] = variable_status_t::BASIC; } - work_estimate += 11*columns_to_replace; + work_estimate += 11 * columns_to_replace; return 0; } @@ -763,9 +768,9 @@ i_t form_b(const csc_matrix_t& A, const i_t col_end = A.col_start[j + 1]; Bnz += (col_end - col_start); } - work_estimate += 3*m; + work_estimate += 3 * m; B.reallocate(Bnz); - work_estimate += 2*Bnz; + work_estimate += 2 * Bnz; const i_t Bnz_check = Bnz; Bnz = 0; for (i_t k = 0; k < m; ++k) { @@ -779,8 +784,8 @@ i_t form_b(const csc_matrix_t& A, Bnz++; } } - work_estimate += 4*m; - work_estimate += 4*Bnz; + work_estimate += 4 * m; + work_estimate += 4 * Bnz; B.col_start[m] = Bnz; assert(Bnz_check == Bnz); return 0; diff --git a/cpp/src/dual_simplex/basis_updates.cpp b/cpp/src/dual_simplex/basis_updates.cpp index ba43251ed5..777f7f6cdf 100644 --- a/cpp/src/dual_simplex/basis_updates.cpp +++ b/cpp/src/dual_simplex/basis_updates.cpp @@ -277,7 +277,7 @@ i_t basis_update_t::l_solve(sparse_vector_t& rhs) const const i_t m = L0_.m; f_t work_estimate = 0; - i_t top = sparse_triangle_solve( + i_t top = sparse_triangle_solve( rhs, std::nullopt, xi_workspace_, L0_, x_workspace_.data(), work_estimate); solve_to_sparse_vector(top, rhs); // Uses xi_workspace_ and x_workspace_ to fill rhs @@ -504,7 +504,7 @@ i_t basis_update_t::l_transpose_solve(sparse_vector_t& rhs) #endif f_t work_estimate = 0; - i_t top = sparse_triangle_solve( + i_t top = sparse_triangle_solve( rhs, std::nullopt, xi_workspace_, L0_transpose_, x_workspace_.data(), work_estimate); solve_to_sparse_vector(top, rhs); // Uses xi_workspace_ and x_workspace_ to fill rhs @@ -588,7 +588,7 @@ i_t basis_update_t::u_solve(sparse_vector_t& rhs) const rhs.inverse_permute_vector(col_permutation_, bprime); f_t work_estimate = 0; - i_t top = sparse_triangle_solve( + i_t top = sparse_triangle_solve( bprime, std::nullopt, xi_workspace_, U_, x_workspace_.data(), work_estimate); solve_to_sparse_vector(top, rhs); // Uses xi_workspace_ and x_workspace_ to fill rhs @@ -657,7 +657,7 @@ i_t basis_update_t::u_transpose_solve(sparse_vector_t& rhs) // U'*y = bprime f_t work_estimate = 0; - i_t top = sparse_triangle_solve( + i_t top = sparse_triangle_solve( bprime, std::nullopt, xi_workspace_, U_transpose_, x_workspace_.data(), work_estimate); solve_to_sparse_vector(top, rhs); // Uses xi_workspace_ and x_workspace_ to fill rhs @@ -879,7 +879,7 @@ i_t basis_update_t::update(std::vector& utilde, i_t leaving_index // Solve U'*w = delta*et std::vector w(m); - w[t] = delta; + w[t] = delta; f_t work_estimate = 0; dual_simplex::upper_triangular_transpose_solve(U_, w, work_estimate); #ifdef PARANOID @@ -1134,7 +1134,6 @@ i_t basis_update_mpf_t::append_cuts(const csr_matrix_t& cuts csc_matrix_t WT(m, cuts_basic.m, 0); work_estimate_ += cuts_basic.m; - i_t WT_nz = 0; for (i_t k = 0; k < cuts_basic.m; k++) { sparse_vector_t rhs(cuts_basic, k); @@ -1224,7 +1223,7 @@ i_t basis_update_mpf_t::append_cuts(const csr_matrix_t& cuts V_row.row_start[cuts_basic.m] = V_nz; V_row.to_compressed_col(V); - work_estimate_ += 3*V_nz; + work_estimate_ += 3 * V_nz; #ifdef CHECK_V csc_matrix_t CB_col(cuts_basic.m, m, 0); @@ -1265,7 +1264,7 @@ i_t basis_update_mpf_t::append_cuts(const csr_matrix_t& cuts } else { // W = V WT.transpose(V); - work_estimate_ += 3*V_nz; + work_estimate_ += 3 * V_nz; } // Extend u_i, v_i for i = 0, ..., num_updates_ - 1 @@ -1275,7 +1274,7 @@ i_t basis_update_mpf_t::append_cuts(const csr_matrix_t& cuts // L = [ L0 0 ] // [ V I ] - V_nz = V.col_start[m]; + V_nz = V.col_start[m]; i_t L_nz = L0_.col_start[m]; csc_matrix_t new_L(m + cuts_basic.m, m + cuts_basic.m, L_nz + V_nz + cuts_basic.m); work_estimate_ += (L_nz + V_nz + cuts_basic.m) + (m + cuts_basic.m); @@ -1298,19 +1297,19 @@ i_t basis_update_mpf_t::append_cuts(const csr_matrix_t& cuts L_nz++; } } - work_estimate_ += 4*L_nz; + work_estimate_ += 4 * L_nz; for (i_t j = m; j < m + cuts_basic.m; ++j) { new_L.col_start[j] = L_nz; new_L.i[L_nz] = j; new_L.x[L_nz] = 1.0; L_nz++; } - work_estimate_ += 3*cuts_basic.m; + work_estimate_ += 3 * cuts_basic.m; new_L.col_start[m + cuts_basic.m] = L_nz; assert(L_nz == predicted_nz); L0_ = new_L; - work_estimate_ += 2*L_nz; + work_estimate_ += 2 * L_nz; // Adjust U // U = [ U0 0 ] @@ -1320,14 +1319,14 @@ i_t basis_update_mpf_t::append_cuts(const csr_matrix_t& cuts U0_.col_start.resize(m + cuts_basic.m + 1); U0_.i.resize(U_nz + cuts_basic.m); U0_.x.resize(U_nz + cuts_basic.m); - work_estimate_ += 2*(U_nz + cuts_basic.m) + (m + cuts_basic.m); + work_estimate_ += 2 * (U_nz + cuts_basic.m) + (m + cuts_basic.m); for (i_t k = m; k < m + cuts_basic.m; ++k) { U0_.col_start[k] = U_nz; U0_.i[U_nz] = k; U0_.x[U_nz] = 1.0; U_nz++; } - work_estimate_ += 3*cuts_basic.m; + work_estimate_ += 3 * cuts_basic.m; U0_.col_start[m + cuts_basic.m] = U_nz; U0_.n = m + cuts_basic.m; U0_.m = m + cuts_basic.m; @@ -1337,7 +1336,7 @@ i_t basis_update_mpf_t::append_cuts(const csr_matrix_t& cuts // Adjust row_permutation_ and inverse_row_permutation_ row_permutation_.resize(m + cuts_basic.m); inverse_row_permutation_.resize(m + cuts_basic.m); - work_estimate_ += 2*(m + cuts_basic.m); + work_estimate_ += 2 * (m + cuts_basic.m); for (i_t k = m; k < m + cuts_basic.m; ++k) { row_permutation_[k] = k; } @@ -1347,7 +1346,7 @@ i_t basis_update_mpf_t::append_cuts(const csr_matrix_t& cuts // Adjust workspace sizes xi_workspace_.resize(2 * (m + cuts_basic.m), 0); x_workspace_.resize(m + cuts_basic.m, 0.0); - work_estimate_ += 3*(m + cuts_basic.m); + work_estimate_ += 3 * (m + cuts_basic.m); return 0; } @@ -1361,7 +1360,7 @@ void basis_update_mpf_t::gather_into_sparse_vector(i_t nz, out.x.clear(); out.i.reserve(nz); out.x.reserve(nz); - work_estimate_ += 2*nz; + work_estimate_ += 2 * nz; const f_t zero_tol = 1e-13; for (i_t k = 0; k < nz; ++k) { const i_t i = xi_workspace_[m + k]; @@ -1373,8 +1372,8 @@ void basis_update_mpf_t::gather_into_sparse_vector(i_t nz, xi_workspace_[i] = 0; x_workspace_[i] = 0.0; } - work_estimate_ += 5*nz; - work_estimate_ += 3*out.i.size(); + work_estimate_ += 5 * nz; + work_estimate_ += 3 * out.i.size(); } template @@ -1388,12 +1387,12 @@ void basis_update_mpf_t::solve_to_workspace(i_t top) const xi_workspace_[p] = 0; nz++; } - work_estimate_ += 3*(m - top); + work_estimate_ += 3 * (m - top); for (i_t k = 0; k < nz; ++k) { const i_t i = xi_workspace_[m + k]; xi_workspace_[i] = 1; } - work_estimate_ += 2*nz; + work_estimate_ += 2 * nz; } template @@ -1407,7 +1406,7 @@ void basis_update_mpf_t::solve_to_sparse_vector(i_t top, out.i.clear(); out.x.reserve(nz); out.i.reserve(nz); - work_estimate_ += 2*nz; + work_estimate_ += 2 * nz; i_t k = 0; const f_t zero_tol = 1e-13; for (i_t p = top; p < m; ++p) { @@ -1420,7 +1419,7 @@ void basis_update_mpf_t::solve_to_sparse_vector(i_t top, xi_workspace_[p] = 0; k++; } - work_estimate_ += 4*k + 3*out.i.size(); + work_estimate_ += 4 * k + 3 * out.i.size(); } template @@ -1434,12 +1433,12 @@ i_t basis_update_mpf_t::scatter_into_workspace(const sparse_vector_t::grow_storage(i_t nz, i_t& S_start, i_t& S_nz) if (S_nz + nz > S_.i.size()) { S_.i.resize(std::max(2 * S_nz, S_nz + nz)); S_.x.resize(std::max(2 * S_nz, S_nz + nz)); - work_estimate_ += 2*std::max(2 * S_nz, S_nz + nz); + work_estimate_ += 2 * std::max(2 * S_nz, S_nz + nz); } S_start = last_S_col; assert(S_nz + nz <= S_.i.size()); @@ -1500,7 +1499,7 @@ f_t basis_update_mpf_t::dot_product(i_t col, f_t dot = 0.0; const i_t col_start = S_.col_start[col]; const i_t col_end = S_.col_start[col + 1]; - i_t nz_mark = 0; + i_t nz_mark = 0; for (i_t p = col_start; p < col_end; ++p) { const i_t i = S_.i[p]; if (mark[i]) { @@ -1508,7 +1507,7 @@ f_t basis_update_mpf_t::dot_product(i_t col, nz_mark++; } } - work_estimate_ += 2*nz_mark + (col_end - col_start); + work_estimate_ += 2 * nz_mark + (col_end - col_start); return dot; } @@ -1539,7 +1538,7 @@ void basis_update_mpf_t::add_sparse_column(const csc_matrix_t::add_sparse_column(const csc_matrix_t @@ -1579,17 +1578,17 @@ i_t basis_update_mpf_t::b_transpose_solve(const std::vector& rhs, // Solve for r such that U'*r = c std::vector r = rhs; - work_estimate_ += 2*r.size(); + work_estimate_ += 2 * r.size(); u_transpose_solve(r); UTsol = r; - work_estimate_ += 2*r.size(); + work_estimate_ += 2 * r.size(); // Solve for w such that L'*w = r l_transpose_solve(r); // Compute y = P'*w inverse_permute_vector(row_permutation_, r, solution); - work_estimate_ += 3*r.size(); + work_estimate_ += 3 * r.size(); return 0; } @@ -1725,7 +1724,7 @@ i_t basis_update_mpf_t::l_transpose_solve(std::vector& rhs) const if (std::abs(theta) > zero_tol) { add_sparse_column(S_, v_col, -theta, rhs); } } - work_estimate_ += 2*num_updates_; + work_estimate_ += 2 * num_updates_; // Solve for x such that L0^T * x = b' dual_simplex::lower_triangular_transpose_solve(L0_, rhs, work_estimate_); @@ -1784,9 +1783,9 @@ i_t basis_update_mpf_t::l_transpose_solve(sparse_vector_t& r } #endif } - work_estimate_ += 2*num_updates_; + work_estimate_ += 2 * num_updates_; - #ifdef CHECK_MULTIPLY +#ifdef CHECK_MULTIPLY for (i_t i = 0; i < m; ++i) { if (std::abs(rhs_dense_0[i] - x_workspace_[i]) > 1e-9) { printf("L transpose solve multiply error %e index %d sparse %e dense %e\n", @@ -1846,7 +1845,7 @@ i_t basis_update_mpf_t::b_solve(const std::vector& rhs, // P*B*x = P*b permute_vector(row_permutation_, rhs, solution); - work_estimate_ += 3*rhs.size(); + work_estimate_ += 3 * rhs.size(); // L*U*x = b' // Solve for v such that L*v = b' @@ -1856,7 +1855,7 @@ i_t basis_update_mpf_t::b_solve(const std::vector& rhs, l_solve(solution); if (need_Lsol) { Lsol = solution; - work_estimate_ += 2*solution.size(); + work_estimate_ += 2 * solution.size(); } #ifdef CHECK_L_SOLVE @@ -1898,9 +1897,9 @@ i_t basis_update_mpf_t::b_solve(const sparse_vector_t& rhs, { const i_t m = L0_.m; solution = rhs; - work_estimate_ += 2*rhs.i.size(); + work_estimate_ += 2 * rhs.i.size(); solution.inverse_permute_vector(inverse_row_permutation_); - work_estimate_ += 3*rhs.i.size(); + work_estimate_ += 3 * rhs.i.size(); #ifdef CHECK_PERMUTATION std::vector permuation_rhs; @@ -1937,7 +1936,7 @@ i_t basis_update_mpf_t::b_solve(const sparse_vector_t& rhs, } if (need_Lsol) { Lsol = solution; - work_estimate_ += 2*solution.i.size(); + work_estimate_ += 2 * solution.i.size(); } sum_L_ += static_cast(solution.i.size()) / input_size; @@ -2050,7 +2049,7 @@ i_t basis_update_mpf_t::l_solve(std::vector& rhs) const if (std::abs(theta) > zero_tol) { add_sparse_column(S_, u_col, -theta, rhs); } } - work_estimate_ += 2*num_updates_; + work_estimate_ += 2 * num_updates_; #ifdef CHECK_L_SOLVE std::vector inout = rhs; @@ -2099,7 +2098,7 @@ i_t basis_update_mpf_t::l_solve(sparse_vector_t& rhs) const add_sparse_column(S_, u_col, -theta, xi_workspace_, nz, x_workspace_); } } - work_estimate_ += 2*num_updates_; + work_estimate_ += 2 * num_updates_; gather_into_sparse_vector(nz, rhs); @@ -2122,7 +2121,7 @@ i_t basis_update_mpf_t::update(const std::vector& utilde, const i_t col_start = U0_.col_start[leaving_index]; const i_t col_end = U0_.col_start[leaving_index + 1]; std::vector u = utilde; - work_estimate_ += 2*utilde.size(); + work_estimate_ += 2 * utilde.size(); // u = utilde - U0(:, leaving_index) add_sparse_column(U0_, leaving_index, -1.0, u); @@ -2143,11 +2142,11 @@ i_t basis_update_mpf_t::update(const std::vector& utilde, // Scatter u into S S_.append_column(u); - work_estimate_ += u.size() + 3*u_nz; + work_estimate_ += u.size() + 3 * u_nz; // Scatter v into S S_.append_column(etilde); - work_estimate_ += etilde.size() + 3*v_nz; + work_estimate_ += etilde.size() + 3 * v_nz; // Compute mu = 1 + v^T * u const f_t mu = 1.0 + sparse_dot(S_.i.data() + S_.col_start[S_start], @@ -2156,7 +2155,7 @@ i_t basis_update_mpf_t::update(const std::vector& utilde, S_.i.data() + S_.col_start[S_start + 1], S_.x.data() + S_.col_start[S_start + 1], v_nz); - work_estimate_ += 3*std::min(u_nz, v_nz); + work_estimate_ += 3 * std::min(u_nz, v_nz); if (std::abs(mu) < 1E-8 || std::abs(mu) > 1E+8) { // Force a refactor. Otherwise we will get numerical issues when dividing by mu. @@ -2211,13 +2210,13 @@ i_t basis_update_mpf_t::update(const sparse_vector_t& utilde grow_storage(nz + etilde.i.size(), S_start, S_nz); S_.append_column(nz, xi_workspace_.data() + m, x_workspace_.data()); - work_estimate_ += 5*nz; + work_estimate_ += 5 * nz; // Gather etilde into a column of S etilde.sort(); // Needs to be sorted for the sparse dot. TODO(CMM): Is etilde sorted on input? work_estimate_ += etilde.i.size() * std::log2(etilde.i.size()); S_.append_column(etilde); - work_estimate_ += 4*etilde.i.size(); + work_estimate_ += 4 * etilde.i.size(); // Compute mu = 1 + v^T * u const f_t mu = 1.0 + sparse_dot(S_.i.data() + S_.col_start[S_start], @@ -2226,7 +2225,7 @@ i_t basis_update_mpf_t::update(const sparse_vector_t& utilde S_.i.data() + S_.col_start[S_start + 1], S_.x.data() + S_.col_start[S_start + 1], S_.col_start[S_start + 2] - S_.col_start[S_start + 1]); - work_estimate_ += 3*std::min(nz, static_cast(etilde.i.size())); + work_estimate_ += 3 * std::min(nz, static_cast(etilde.i.size())); if (std::abs(mu) < 1E-8 || std::abs(mu) > 1E+8) { // Force a refactor. Otherwise we will get numerical issues when dividing by mu. @@ -2240,7 +2239,7 @@ i_t basis_update_mpf_t::update(const sparse_vector_t& utilde x_workspace_[i] = 0.0; xi_workspace_[m + k] = 0; } - work_estimate_ += 4*nz; + work_estimate_ += 4 * nz; #ifdef PRINT_MU_INFO printf("Update mu %e u nz %d v nz %d\n", @@ -2353,7 +2352,10 @@ int basis_update_mpf_t::refactor_basis( std::vector slacks_needed; std::vector superbasic_list; // Empty superbasic list - if (L0_.m != A.m) { resize(A.m); work_estimate_ += A.m;} + if (L0_.m != A.m) { + resize(A.m); + work_estimate_ += A.m; + } std::vector q; i_t status = factorize_basis(A, settings, @@ -2423,7 +2425,7 @@ int basis_update_mpf_t::refactor_basis( assert(q.size() == A.m); reorder_basic_list(q, basic_list); // We no longer need q after reordering the basic list - work_estimate_ += 3*q.size(); + work_estimate_ += 3 * q.size(); reset(); return 0; } diff --git a/cpp/src/dual_simplex/basis_updates.hpp b/cpp/src/dual_simplex/basis_updates.hpp index 4f60d3bae1..c6ea2791ab 100644 --- a/cpp/src/dual_simplex/basis_updates.hpp +++ b/cpp/src/dual_simplex/basis_updates.hpp @@ -265,7 +265,7 @@ class basis_update_mpf_t { assert(p.size() == Linit.m); row_permutation_ = p; inverse_permutation(row_permutation_, inverse_row_permutation_); - work_estimate_ += 4*p.size(); + work_estimate_ += 4 * p.size(); clear(); compute_transposes(); reset_stats(); @@ -372,7 +372,7 @@ class basis_update_mpf_t { { L0_.transpose(L0_transpose_); U0_.transpose(U0_transpose_); - work_estimate_ += 6*L0_.col_start[L0_.n] + 6*U0_.col_start[U0_.n]; + work_estimate_ += 6 * L0_.col_start[L0_.n] + 6 * U0_.col_start[U0_.n]; } void multiply_lu(csc_matrix_t& out) const; @@ -405,7 +405,7 @@ class basis_update_mpf_t { mu_values_.clear(); mu_values_.reserve(refactor_frequency_); num_updates_ = 0; - work_estimate_ += 2*refactor_frequency_; + work_estimate_ += 2 * refactor_frequency_; std::fill(xi_workspace_.begin(), xi_workspace_.end(), 0); std::fill(x_workspace_.begin(), x_workspace_.end(), 0.0); diff --git a/cpp/src/dual_simplex/bound_flipping_ratio_test.cpp b/cpp/src/dual_simplex/bound_flipping_ratio_test.cpp index ea41b56ced..14931da7e2 100644 --- a/cpp/src/dual_simplex/bound_flipping_ratio_test.cpp +++ b/cpp/src/dual_simplex/bound_flipping_ratio_test.cpp @@ -45,8 +45,8 @@ i_t bound_flipping_ratio_test_t::compute_breakpoints(std::vector& idx++; } } - work_estimate_ += 4*nz; - work_estimate_ += 4*idx; + work_estimate_ += 4 * nz; + work_estimate_ += 4 * idx; pivot_tol /= 10; } return idx; @@ -88,8 +88,7 @@ i_t bound_flipping_ratio_test_t::single_pass(i_t start, harris_found++; } } - work_estimate_ += (end - start) + 2*min_found + 6*harris_found; - + work_estimate_ += (end - start) + 2 * min_found + 6 * harris_found; step_length = min_val; nonbasic_entering = candidate; @@ -126,7 +125,7 @@ i_t bound_flipping_ratio_test_t::compute_step_length(f_t& step_length, // Compute the initial set of breakpoints std::vector indicies(nz); std::vector ratios(nz); - work_estimate_ += 2*nz; + work_estimate_ += 2 * nz; i_t num_breakpoints = compute_breakpoints(indicies, ratios); if constexpr (verbose) { settings_.log.printf("Initial breakpoints %d\n", num_breakpoints); } if (num_breakpoints == 0) { @@ -174,7 +173,7 @@ i_t bound_flipping_ratio_test_t::compute_step_length(f_t& step_length, for (i_t k = 0; k < num_breakpoints - 1; ++k) { if (ratios[k] > max_ratio) { max_ratio = ratios[k]; } } - work_estimate_ += 2*num_breakpoints; + work_estimate_ += 2 * num_breakpoints; settings_.log.printf( "Starting heap passes. %d breakpoints max ratio %e\n", num_breakpoints - 1, max_ratio); bucket_pass( @@ -230,7 +229,7 @@ void bound_flipping_ratio_test_t::heap_passes(const std::vector& }; std::make_heap(bare_idx.begin(), bare_idx.end(), compare); - work_estimate_ += 3*bare_idx.size(); + work_estimate_ += 3 * bare_idx.size(); while (bare_idx.size() > 0 && slope > 0) { // Remove minimum ratio from the heap and rebalance diff --git a/cpp/src/dual_simplex/crossover.cpp b/cpp/src/dual_simplex/crossover.cpp index 7ebdc47cba..aa2baa6ea2 100644 --- a/cpp/src/dual_simplex/crossover.cpp +++ b/cpp/src/dual_simplex/crossover.cpp @@ -506,8 +506,8 @@ i_t dual_push(const lp_problem_t& lp, std::vector deficient; std::vector slacks_needed; f_t work_estimate = 0; - i_t rank = - factorize_basis(lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed, work_estimate); + i_t rank = factorize_basis( + lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed, work_estimate); if (rank == CONCURRENT_HALT_RETURN) { return CONCURRENT_HALT_RETURN; } else if (rank != m) { @@ -523,8 +523,8 @@ i_t dual_push(const lp_problem_t& lp, superbasic_list, vstatus, work_estimate); - rank = - factorize_basis(lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed, work_estimate); + rank = factorize_basis( + lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed, work_estimate); if (rank == CONCURRENT_HALT_RETURN) { return CONCURRENT_HALT_RETURN; } else if (rank == -1) { @@ -863,8 +863,8 @@ i_t primal_push(const lp_problem_t& lp, std::vector deficient; std::vector slacks_needed; f_t work_estimate = 0; - i_t rank = - factorize_basis(lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed, work_estimate); + i_t rank = factorize_basis( + lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed, work_estimate); if (rank == CONCURRENT_HALT_RETURN) { return CONCURRENT_HALT_RETURN; } else if (rank != m) { @@ -883,8 +883,8 @@ i_t primal_push(const lp_problem_t& lp, // We need to be careful. As basis_repair may have changed the superbasic list find_primal_superbasic_variables( lp, settings, solution, solution, vstatus, nonbasic_list, superbasic_list); - rank = - factorize_basis(lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed, work_estimate); + rank = factorize_basis( + lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed, work_estimate); if (rank == CONCURRENT_HALT_RETURN) { return CONCURRENT_HALT_RETURN; } else if (rank == -1) { @@ -1146,7 +1146,7 @@ crossover_status_t crossover(const lp_problem_t& lp, const i_t m = lp.num_rows; const i_t n = lp.num_cols; f_t crossover_start = tic(); - f_t work_estimate = 0; + f_t work_estimate = 0; settings.log.printf("\n"); settings.log.printf("Starting crossover\n"); @@ -1228,7 +1228,8 @@ crossover_status_t crossover(const lp_problem_t& lp, std::vector deficient; std::vector slacks_needed; - rank = factorize_basis(lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed, work_estimate); + rank = factorize_basis( + lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed, work_estimate); if (rank == CONCURRENT_HALT_RETURN) { return crossover_status_t::CONCURRENT_LIMIT; } if (rank != m) { settings.log.debug("Failed to factorize basis. rank %d m %d\n", rank, m); @@ -1243,7 +1244,8 @@ crossover_status_t crossover(const lp_problem_t& lp, superbasic_list, vstatus, work_estimate); - rank = factorize_basis(lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed, work_estimate); + rank = factorize_basis( + lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed, work_estimate); if (rank == CONCURRENT_HALT_RETURN) { return crossover_status_t::CONCURRENT_LIMIT; } else if (rank == -1) { @@ -1398,8 +1400,8 @@ crossover_status_t crossover(const lp_problem_t& lp, nonbasic_list.clear(); superbasic_list.clear(); get_basis_from_vstatus(m, vstatus, basic_list, nonbasic_list, superbasic_list); - rank = - factorize_basis(lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed, work_estimate); + rank = factorize_basis( + lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed, work_estimate); if (rank == CONCURRENT_HALT_RETURN) { return crossover_status_t::CONCURRENT_LIMIT; } else if (rank != m) { @@ -1415,8 +1417,8 @@ crossover_status_t crossover(const lp_problem_t& lp, superbasic_list, vstatus, work_estimate); - rank = - factorize_basis(lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed, work_estimate); + rank = factorize_basis( + lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed, work_estimate); if (rank == CONCURRENT_HALT_RETURN) { return crossover_status_t::CONCURRENT_LIMIT; } else if (rank == -1) { diff --git a/cpp/src/dual_simplex/phase2.cpp b/cpp/src/dual_simplex/phase2.cpp index bb88659ca5..869ceae4d7 100644 --- a/cpp/src/dual_simplex/phase2.cpp +++ b/cpp/src/dual_simplex/phase2.cpp @@ -340,10 +340,10 @@ void compute_reduced_cost_update(const lp_problem_t& lp, size_t nnzs_processed = 0; // delta_zB = sigma*ei for (i_t k = 0; k < m; k++) { - const i_t j = basic_list[k]; - delta_z[j] = 0; + const i_t j = basic_list[k]; + delta_z[j] = 0; } - work_estimate += 2*m; + work_estimate += 2 * m; delta_z[leaving_index] = direction; // delta_zN = -N'*delta_y const i_t num_nonbasic = n - m; @@ -364,9 +364,9 @@ void compute_reduced_cost_update(const lp_problem_t& lp, delta_z_mark[j] = 1; } } - work_estimate += 3*num_nonbasic; - work_estimate += 3*nnzs_processed; - work_estimate += 2*delta_z_indices.size(); + work_estimate += 3 * num_nonbasic; + work_estimate += 3 * nnzs_processed; + work_estimate += 2 * delta_z_indices.size(); } template @@ -381,8 +381,8 @@ void compute_delta_z(const csc_matrix_t& A_transpose, f_t& work_estimate) { // delta_zN = - N'*delta_y - const i_t nz_delta_y = delta_y.i.size(); - size_t nnz_processed = 0; + const i_t nz_delta_y = delta_y.i.size(); + size_t nnz_processed = 0; size_t nonbasic_marked = 0; for (i_t k = 0; k < nz_delta_y; k++) { const i_t i = delta_y.i[k]; @@ -403,10 +403,10 @@ void compute_delta_z(const csc_matrix_t& A_transpose, } } } - work_estimate += 4*nz_delta_y; - work_estimate += 2*nnz_processed; - work_estimate += 3*nonbasic_marked; - work_estimate += 2*delta_z_indices.size(); + work_estimate += 4 * nz_delta_y; + work_estimate += 2 * nnz_processed; + work_estimate += 3 * nonbasic_marked; + work_estimate += 2 * delta_z_indices.size(); // delta_zB = sigma*ei delta_z[leaving_index] = direction; @@ -467,15 +467,15 @@ void compute_reduced_costs(const std::vector& objective, for (i_t p = col_start; p < col_end; ++p) { dot += A.x[p] * y[A.i[p]]; } - work_estimate += 3*(col_end - col_start); + work_estimate += 3 * (col_end - col_start); z[j] -= dot; } - work_estimate += 5*(n - m); + work_estimate += 5 * (n - m); // zB = 0 for (i_t k = 0; k < m; ++k) { z[basic_list[k]] = 0.0; } - work_estimate += 2*m; + work_estimate += 2 * m; } template @@ -493,7 +493,7 @@ void compute_primal_variables(const basis_update_mpf_t& ft, const i_t m = A.m; const i_t n = A.n; std::vector rhs = lp_rhs; - work_estimate += 2*m; + work_estimate += 2 * m; // rhs = b - sum_{j : x_j = l_j} A(:, j) * l(j) // - sum_{j : x_j = u_j} A(:, j) * u(j) for (i_t k = 0; k < n - m; ++k) { @@ -505,9 +505,9 @@ void compute_primal_variables(const basis_update_mpf_t& ft, for (i_t p = col_start; p < col_end; ++p) { rhs[A.i[p]] -= xj * A.x[p]; } - work_estimate += 3*(col_end - col_start); + work_estimate += 3 * (col_end - col_start); } - work_estimate += 5*(n - m); + work_estimate += 5 * (n - m); xB_workspace.resize(m); work_estimate += m; @@ -517,7 +517,7 @@ void compute_primal_variables(const basis_update_mpf_t& ft, const i_t j = basic_list[k]; x[j] = xB_workspace[k]; } - work_estimate += 2*m; + work_estimate += 2 * m; } // Work is 3*delta_z_indices.size() @@ -551,7 +551,7 @@ void clear_delta_x(const std::vector& basic_list, const i_t j = basic_list[scaled_delta_xB_sparse.i[k]]; delta_x[j] = 0.0; } - work_estimate += 3*scaled_delta_xB_nz; + work_estimate += 3 * scaled_delta_xB_nz; // Leaving index already included above delta_x[entering_index] = 0.0; scaled_delta_xB_sparse.i.clear(); @@ -635,12 +635,12 @@ void compute_dual_solution_from_basis(const lp_problem_t& lp, y.resize(m); std::vector cB(m); - work_estimate += 2*m; + work_estimate += 2 * m; for (i_t k = 0; k < m; ++k) { const i_t j = basic_list[k]; cB[k] = lp.objective[j]; } - work_estimate += 3*m; + work_estimate += 3 * m; ft.b_transpose_solve(cB, y); // We want A'y + z = c @@ -662,15 +662,15 @@ void compute_dual_solution_from_basis(const lp_problem_t& lp, for (i_t p = col_start; p < col_end; ++p) { dot += lp.A.x[p] * y[lp.A.i[p]]; } - work_estimate += 3*(col_end - col_start); + work_estimate += 3 * (col_end - col_start); z[j] -= dot; } - work_estimate += 5*(n - m); + work_estimate += 5 * (n - m); // zB = 0 for (i_t k = 0; k < m; ++k) { z[basic_list[k]] = 0.0; } - work_estimate += 2*m; + work_estimate += 2 * m; } template @@ -686,8 +686,7 @@ i_t compute_primal_solution_from_basis(const lp_problem_t& lp, const i_t m = lp.num_rows; const i_t n = lp.num_cols; std::vector rhs = lp.rhs; - work_estimate += 2*m; - + work_estimate += 2 * m; for (i_t k = 0; k < n - m; ++k) { const i_t j = nonbasic_list[k]; @@ -700,7 +699,7 @@ i_t compute_primal_solution_from_basis(const lp_problem_t& lp, x[j] = 0.0; } } - work_estimate += 4*(n - m); + work_estimate += 4 * (n - m); // rhs = b - sum_{j : x_j = l_j} A(:, j) l(j) - sum_{j : x_j = u_j} A(:, j) * // u(j) @@ -712,9 +711,9 @@ i_t compute_primal_solution_from_basis(const lp_problem_t& lp, for (i_t p = col_start; p < col_end; ++p) { rhs[lp.A.i[p]] -= xj * lp.A.x[p]; } - work_estimate += 3*(col_end - col_start); + work_estimate += 3 * (col_end - col_start); } - work_estimate += 4*(n - m); + work_estimate += 4 * (n - m); xB_workspace.resize(m); work_estimate += m; @@ -724,7 +723,7 @@ i_t compute_primal_solution_from_basis(const lp_problem_t& lp, const i_t j = basic_list[k]; x[j] = xB_workspace[k]; } - work_estimate += 3*m; + work_estimate += 3 * m; return 0; } @@ -839,7 +838,7 @@ void update_primal_infeasibilities(const lp_problem_t& lp, j, primal_inf); } - work_estimate += 8*nz; + work_estimate += 8 * nz; } template @@ -854,23 +853,25 @@ void clean_up_infeasibilities(std::vector& squared_infeasibilities, const f_t squared_infeas = squared_infeasibilities[j]; if (squared_infeas == 0.0) { needs_clean_up = true; } } - work_estimate += 2*initial_nz; + work_estimate += 2 * initial_nz; if (needs_clean_up) { i_t num_cleans = 0; - work_estimate += 2*infeasibility_indices.size(); + work_estimate += 2 * infeasibility_indices.size(); for (size_t k = 0; k < infeasibility_indices.size(); ++k) { const i_t j = infeasibility_indices[k]; const f_t squared_infeas = squared_infeasibilities[j]; if (squared_infeas == 0.0) { - const i_t new_j = infeasibility_indices.back(); + const i_t new_j = infeasibility_indices.back(); infeasibility_indices[k] = new_j; infeasibility_indices.pop_back(); - if (squared_infeasibilities[new_j] == 0.0) { k--; } // Decrement k so that we process the same index again + if (squared_infeasibilities[new_j] == 0.0) { + k--; + } // Decrement k so that we process the same index again num_cleans++; } } - work_estimate += 4*num_cleans; + work_estimate += 4 * num_cleans; } } @@ -887,11 +888,10 @@ i_t steepest_edge_pricing_with_infeasibilities(const lp_problem_t& lp, f_t& max_val, f_t& work_estimate) { - max_val = 0.0; i_t leaving_index = -1; const i_t nz = infeasibility_indices.size(); - i_t max_count = 0; + i_t max_count = 0; for (i_t k = 0; k < nz; ++k) { const i_t j = infeasibility_indices[k]; const f_t squared_infeas = squared_infeasibilities[j]; @@ -905,7 +905,7 @@ i_t steepest_edge_pricing_with_infeasibilities(const lp_problem_t& lp, max_count++; } } - work_estimate += 3*nz + 3*max_count; + work_estimate += 3 * nz + 3 * max_count; basic_leaving = leaving_index >= 0 ? basic_mark[leaving_index] : -1; @@ -1181,10 +1181,11 @@ i_t flip_bounds(const lp_problem_t& lp, const f_t dual_tol = settings.dual_tol; // lower to 1e-7 or less will cause 25fv47 and d2q06c to cycle if (vstatus[j] == variable_status_t::NONBASIC_LOWER && z[j] < -dual_tol) { - const f_t delta = lp.upper[j] - lp.lower[j]; + const f_t delta = lp.upper[j] - lp.lower[j]; const f_t atilde_start_size = atilde_index.size(); scatter_dense(lp.A, j, -delta, atilde, mark, atilde_index); - work_estimate += 2*(atilde_index.size() - atilde_start_size) + 4 * (lp.A.col_start[j + 1] - lp.A.col_start[j]) + 10; + work_estimate += 2 * (atilde_index.size() - atilde_start_size) + + 4 * (lp.A.col_start[j + 1] - lp.A.col_start[j]) + 10; delta_x[j] += delta; vstatus[j] = variable_status_t::NONBASIC_UPPER; #ifdef BOUND_FLIP_DEBUG @@ -1193,10 +1194,11 @@ i_t flip_bounds(const lp_problem_t& lp, #endif num_flipped++; } else if (vstatus[j] == variable_status_t::NONBASIC_UPPER && z[j] > dual_tol) { - const f_t delta = lp.lower[j] - lp.upper[j]; + const f_t delta = lp.lower[j] - lp.upper[j]; const f_t atilde_start_size = atilde_index.size(); scatter_dense(lp.A, j, -delta, atilde, mark, atilde_index); - work_estimate += 2*(atilde_index.size() - atilde_start_size) + 4 * (lp.A.col_start[j + 1] - lp.A.col_start[j]) + 10; + work_estimate += 2 * (atilde_index.size() - atilde_start_size) + + 4 * (lp.A.col_start[j + 1] - lp.A.col_start[j]) + 10; delta_x[j] += delta; vstatus[j] = variable_status_t::NONBASIC_LOWER; #ifdef BOUND_FLIP_DEBUG @@ -1246,7 +1248,7 @@ i_t initialize_steepest_edge_norms(const lp_problem_t& lp, std::vector row_degree(m, 0); std::vector mapping(m, -1); std::vector coeff(m, 0.0); - work_estimate += 3*m; + work_estimate += 3 * m; for (i_t k = 0; k < m; ++k) { const i_t j = basic_list[k]; @@ -1259,9 +1261,9 @@ i_t initialize_steepest_edge_norms(const lp_problem_t& lp, mapping[k] = i; coeff[k] = lp.A.x[p]; } - work_estimate += 5*(col_end - col_start); + work_estimate += 5 * (col_end - col_start); } - work_estimate += 3*m; + work_estimate += 3 * m; #ifdef CHECK_SINGLETON_ROWS csc_matrix_t B(m, m, 0); @@ -1346,7 +1348,7 @@ i_t initialize_steepest_edge_norms(const lp_problem_t& lp, for (i_t p = 0; p < sparse_dy.x.size(); ++p) { my_init += sparse_dy.x[p] * sparse_dy.x[p]; } - work_estimate += 2*sparse_dy.x.size(); + work_estimate += 2 * sparse_dy.x.size(); #endif #if COMPARE_WITH_DENSE if (std::abs(init - my_init) > 1e-12) { @@ -1375,7 +1377,7 @@ i_t initialize_steepest_edge_norms(const lp_problem_t& lp, return CONCURRENT_HALT_RETURN; } } - work_estimate += 7*m; + work_estimate += 7 * m; return 0; } @@ -1404,7 +1406,7 @@ i_t update_steepest_edge_norms(const simplex_solver_settings_t& settin work_estimate += 2 * v_sparse.i.size(); } v_sparse.scatter(v); - work_estimate += 2*v.size(); + work_estimate += 2 * v.size(); const i_t leaving_index = basic_list[basic_leaving_index]; const f_t prev_dy_norm_squared = delta_y_steepest_edge[leaving_index]; @@ -1432,7 +1434,7 @@ i_t update_steepest_edge_norms(const simplex_solver_settings_t& settin const i_t k = scaled_delta_xB.i[h]; const i_t j = basic_list[k]; if (k == basic_leaving_index) { - const f_t w = scaled_delta_xB.x[h]; + const f_t w = scaled_delta_xB.x[h]; const f_t w_squared = w * w; delta_y_steepest_edge[j] = (1.0 / w_squared) * dy_norm_squared; } else { @@ -1456,13 +1458,13 @@ i_t update_steepest_edge_norms(const simplex_solver_settings_t& settin delta_y_steepest_edge[j] = new_val; } } - work_estimate += 5*scaled_delta_xB_nz; + work_estimate += 5 * scaled_delta_xB_nz; const i_t v_nz = v_sparse.i.size(); for (i_t k = 0; k < v_nz; ++k) { v[v_sparse.i[k]] = 0.0; } - work_estimate += 2*v_nz; + work_estimate += 2 * v_nz; return 0; } @@ -1558,7 +1560,7 @@ i_t compute_perturbation(const lp_problem_t& lp, #endif } } - work_estimate += 7*delta_z_indices.size(); + work_estimate += 7 * delta_z_indices.size(); #ifdef PERTURBATION_DEBUG if (num_perturb > 0) { settings.log.printf("Perturbed %d dual variables by %e\n", num_perturb, sum_perturb); @@ -1591,12 +1593,12 @@ void reset_basis_mark(const std::vector& basic_list, for (i_t k = 0; k < n_minus_m; k++) { nonbasic_mark[nonbasic_list[k]] = k; } - work_estimate +=2*n_minus_m; + work_estimate += 2 * n_minus_m; for (i_t k = 0; k < m; k++) { basic_mark[basic_list[k]] = k; } - work_estimate += 2*m; + work_estimate += 2 * m; } template @@ -1653,14 +1655,14 @@ i_t update_dual_variables(const sparse_vector_t& delta_y_sparse, const i_t i = delta_y_sparse.i[k]; y[i] += step_length * delta_y_sparse.x[k]; } - work_estimate += 3*delta_y_nz; + work_estimate += 3 * delta_y_nz; // z <- z + steplength * delta_z const i_t delta_z_nz = delta_z_indices.size(); for (i_t k = 0; k < delta_z_nz; ++k) { const i_t j = delta_z_indices[k]; z[j] += step_length * delta_z[j]; } - work_estimate += 3*delta_z_nz; + work_estimate += 3 * delta_z_nz; z[leaving_index] += step_length * delta_z[leaving_index]; return 0; } @@ -1687,25 +1689,25 @@ void adjust_for_flips(const basis_update_mpf_t& ft, atilde_sparse.i.push_back(atilde_index[k]); atilde_sparse.x.push_back(atilde[atilde_index[k]]); } - work_estimate += 5*atilde_nz; + work_estimate += 5 * atilde_nz; ft.b_solve(atilde_sparse, delta_xB_0_sparse); const i_t delta_xB_0_nz = delta_xB_0_sparse.i.size(); for (i_t k = 0; k < delta_xB_0_nz; ++k) { const i_t j = basic_list[delta_xB_0_sparse.i[k]]; x[j] += delta_xB_0_sparse.x[k]; } - work_estimate += 4*delta_xB_0_nz; + work_estimate += 4 * delta_xB_0_nz; for (i_t k = 0; k < delta_z_indices.size(); ++k) { const i_t j = delta_z_indices[k]; x[j] += delta_x_flip[j]; delta_x_flip[j] = 0.0; } - work_estimate += 4*delta_z_indices.size(); + work_estimate += 4 * delta_z_indices.size(); // Clear atilde for (i_t k = 0; k < atilde_index.size(); ++k) { atilde[atilde_index[k]] = 0.0; } - work_estimate += 2*atilde_index.size(); + work_estimate += 2 * atilde_index.size(); // Clear atilde_mark for (i_t k = 0; k < atilde_mark.size(); ++k) { atilde_mark[k] = 0; @@ -1754,7 +1756,11 @@ i_t compute_delta_x(const lp_problem_t& lp, } err_max = std::max(err_max, err); } - if (err_max > 1e-6) { printf("B multiply error %e\n", err_max); } else { printf("B multiply error %e\n", err_max); } + if (err_max > 1e-6) { + printf("B multiply error %e\n", err_max); + } else { + printf("B multiply error %e\n", err_max); + } } #endif @@ -1786,7 +1792,7 @@ i_t compute_delta_x(const lp_problem_t& lp, const i_t j = basic_list[scaled_delta_xB_sparse.i[k]]; delta_x[j] = primal_step_length * scaled_delta_xB_sparse.x[k]; } - work_estimate += 4*scaled_delta_xB_nz; + work_estimate += 4 * scaled_delta_xB_nz; delta_x[leaving_index] = delta_x_leaving; delta_x[entering_index] = primal_step_length; return 0; @@ -1806,7 +1812,7 @@ void update_primal_variables(const sparse_vector_t& scaled_delta_xB_sp const i_t j = basic_list[scaled_delta_xB_sparse.i[k]]; x[j] += delta_x[j]; } - work_estimate += 4*scaled_delta_xB_nz; + work_estimate += 4 * scaled_delta_xB_nz; // Leaving index already included above x[entering_index] += delta_x[entering_index]; } @@ -1825,7 +1831,7 @@ void update_objective(const std::vector& basic_list, const i_t j = basic_list[changed_basic_indices[k]]; obj += delta_x[j] * objective[j]; } - work_estimate += 4*changed_basic_nz; + work_estimate += 4 * changed_basic_nz; // Leaving index already included above obj += delta_x[entering_index] * objective[entering_index]; } @@ -2280,8 +2286,8 @@ void prepare_optimality(i_t info, std::vector& z, lp_solution_t& sol) { - const i_t m = lp.num_rows; - const i_t n = lp.num_cols; + const i_t m = lp.num_rows; + const i_t n = lp.num_cols; f_t work_estimate = 0; // Work in this function is not captured sol.objective = compute_objective(lp, sol.x); @@ -2503,7 +2509,7 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, std::vector c_basic(m); std::vector xB_workspace(m); - phase2_work_estimate += n * 2*m; + phase2_work_estimate += n * 2 * m; dual::status_t status = dual::status_t::UNSET; @@ -2526,7 +2532,7 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, phase2_work_estimate += (n - m); get_basis_from_vstatus(m, vstatus, basic_list, nonbasic_list, superbasic_list); - phase2_work_estimate += 2*n; + phase2_work_estimate += 2 * n; assert(superbasic_list.size() == 0); assert(nonbasic_list.size() == n - m); @@ -2543,7 +2549,7 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, const i_t j = basic_list[k]; c_basic[k] = objective[j]; } - phase2_work_estimate += 2*m; + phase2_work_estimate += 2 * m; // Solve B'*y = cB ft.b_transpose_solve(c_basic, y); @@ -2554,7 +2560,8 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, "|| y || %e || cB || %e\n", vector_norm_inf(y), vector_norm_inf(c_basic)); } - phase2::compute_reduced_costs(objective, lp.A, y, basic_list, nonbasic_list, z, phase2_work_estimate); + phase2::compute_reduced_costs( + objective, lp.A, y, basic_list, nonbasic_list, z, phase2_work_estimate); if constexpr (print_norms) { settings.log.printf("|| z || %e\n", vector_norm_inf(z)); } #ifdef COMPUTE_DUAL_RESIDUAL @@ -2580,7 +2587,7 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, const f_t init_dual_inf = phase2::dual_infeasibility(lp, settings, vstatus, z, settings.tight_tol, settings.dual_tol); - phase2_work_estimate += 3*n; + phase2_work_estimate += 3 * n; if (init_dual_inf > settings.dual_tol) { settings.log.printf("Initial dual infeasibility %e\n", init_dual_inf); } @@ -2590,10 +2597,17 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, settings.log.printf("Free variable %d vstatus %d\n", j, vstatus[j]); } } - phase2_work_estimate += 3*n; + phase2_work_estimate += 3 * n; - phase2::compute_primal_variables( - ft, lp.rhs, lp.A, basic_list, nonbasic_list, settings.tight_tol, x, xB_workspace, phase2_work_estimate); + phase2::compute_primal_variables(ft, + lp.rhs, + lp.A, + basic_list, + nonbasic_list, + settings.tight_tol, + x, + xB_workspace, + phase2_work_estimate); if (toc(start_time) > settings.time_limit) { return dual::status_t::TIME_LIMIT; } if (print_norms) { settings.log.printf("|| x || %e\n", vector_norm2(x)); } @@ -2614,7 +2628,7 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, if (slack_basis) { phase2::initialize_steepest_edge_norms_from_slack_basis( basic_list, nonbasic_list, delta_y_steepest_edge); - phase2_work_estimate += 2*n; + phase2_work_estimate += 2 * n; } else { std::fill(delta_y_steepest_edge.begin(), delta_y_steepest_edge.end(), -1); phase2_work_estimate += n; @@ -2631,7 +2645,7 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, const i_t j = basic_list[k]; if (delta_y_steepest_edge[j] <= 0.0) { delta_y_steepest_edge[j] = 1e-4; } } - phase2_work_estimate += 2*m; + phase2_work_estimate += 2 * m; settings.log.printf("using exisiting steepest edge %e\n", vector_norm2(delta_y_steepest_edge)); } @@ -2661,11 +2675,12 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, delta_z_indices.reserve(n); phase2_work_estimate += n; - phase2::reset_basis_mark(basic_list, nonbasic_list, basic_mark, nonbasic_mark, phase2_work_estimate); + phase2::reset_basis_mark( + basic_list, nonbasic_list, basic_mark, nonbasic_mark, phase2_work_estimate); std::vector bounded_variables(n, 0); phase2::compute_bounded_info(lp.lower, lp.upper, bounded_variables); - phase2_work_estimate += 4*n; + phase2_work_estimate += 4 * n; f_t primal_infeasibility; f_t primal_infeasibility_squared = @@ -2676,7 +2691,7 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, squared_infeasibilities, infeasibility_indices, primal_infeasibility); - phase2_work_estimate += 4*m + 2*n; + phase2_work_estimate += 4 * m + 2 * n; #ifdef CHECK_BASIC_INFEASIBILITIES phase2::check_basic_infeasibilities(basic_list, basic_mark, infeasibility_indices, 0); @@ -2684,10 +2699,10 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, csc_matrix_t A_transpose(1, 1, 0); lp.A.transpose(A_transpose); - phase2_work_estimate += 2*lp.A.col_start[lp.A.n]; + phase2_work_estimate += 2 * lp.A.col_start[lp.A.n]; f_t obj = compute_objective(lp, x); - phase2_work_estimate += 2*n; + phase2_work_estimate += 2 * n; const i_t start_iter = iter; @@ -2719,7 +2734,6 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, } phase2_work_estimate = 0.0; - if (phase == 2) { settings.log.printf("%5d %+.16e %7d %.8e %.2e %.2f\n", iter, @@ -2851,7 +2865,7 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, timers.btran_time += timers.stop_timer(); const f_t steepest_edge_norm_check = delta_y_sparse.norm2_squared(); - phase2_work_estimate += 2*delta_y_sparse.i.size(); + phase2_work_estimate += 2 * delta_y_sparse.i.size(); if (delta_y_steepest_edge[leaving_index] < settings.steepest_edge_ratio * steepest_edge_norm_check) { constexpr bool verbose = false; @@ -2972,7 +2986,7 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, settings.log.printf("No entering variable found. Iter %d\n", iter); settings.log.printf("Scaled infeasibility %e\n", max_val); f_t perturbation = phase2::amount_of_perturbation(lp, objective); - phase2_work_estimate += 2*n; + phase2_work_estimate += 2 * n; if (perturbation > 0.0 && phase == 2) { // Try to remove perturbation @@ -2984,19 +2998,25 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, { const f_t dual_infeas = phase2::dual_infeasibility( lp, settings, vstatus, unperturbed_z, settings.tight_tol, settings.dual_tol); - phase2_work_estimate += 3*n; + phase2_work_estimate += 3 * n; settings.log.printf("Dual infeasibility after removing perturbation %e\n", dual_infeas); if (dual_infeas <= settings.dual_tol) { settings.log.printf("Removed perturbation of %.2e.\n", perturbation); - z = unperturbed_z; - y = unperturbed_y; - phase2_work_estimate += 2*n + 2*m; + z = unperturbed_z; + y = unperturbed_y; + phase2_work_estimate += 2 * n + 2 * m; perturbation = 0.0; std::vector unperturbed_x(n); phase2_work_estimate += n; - phase2::compute_primal_solution_from_basis( - lp, ft, basic_list, nonbasic_list, vstatus, unperturbed_x, xB_workspace, phase2_work_estimate); + phase2::compute_primal_solution_from_basis(lp, + ft, + basic_list, + nonbasic_list, + vstatus, + unperturbed_x, + xB_workspace, + phase2_work_estimate); x = unperturbed_x; primal_infeasibility_squared = phase2::compute_initial_primal_infeasibilities(lp, @@ -3006,14 +3026,14 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, squared_infeasibilities, infeasibility_indices, primal_infeasibility); - phase2_work_estimate += 4*m + 2*n; + phase2_work_estimate += 4 * m + 2 * n; settings.log.printf("Updated primal infeasibility: %e\n", primal_infeasibility); objective = lp.objective; - phase2_work_estimate += 2*n; + phase2_work_estimate += 2 * n; // Need to reset the objective value, since we have recomputed x obj = phase2::compute_perturbed_objective(objective, x); - phase2_work_estimate += 2*n; + phase2_work_estimate += 2 * n; if (dual_infeas <= settings.dual_tol && primal_infeasibility <= settings.primal_tol) { phase2::prepare_optimality(1, primal_infeasibility, @@ -3038,17 +3058,23 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, settings.log.printf( "Continuing with perturbation removed and steepest edge norms reset\n"); // Clear delta_z before restarting the iteration - phase2_work_estimate += 3*delta_z_indices.size(); + phase2_work_estimate += 3 * delta_z_indices.size(); phase2::clear_delta_z( entering_index, leaving_index, delta_z_mark, delta_z_indices, delta_z); continue; } else { std::vector unperturbed_x(n); phase2_work_estimate += n; - phase2::compute_primal_solution_from_basis( - lp, ft, basic_list, nonbasic_list, vstatus, unperturbed_x, xB_workspace, phase2_work_estimate); + phase2::compute_primal_solution_from_basis(lp, + ft, + basic_list, + nonbasic_list, + vstatus, + unperturbed_x, + xB_workspace, + phase2_work_estimate); x = unperturbed_x; - phase2_work_estimate += 2*n; + phase2_work_estimate += 2 * n; primal_infeasibility_squared = phase2::compute_initial_primal_infeasibilities(lp, settings, @@ -3057,11 +3083,11 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, squared_infeasibilities, infeasibility_indices, primal_infeasibility); - phase2_work_estimate += 4*m + 2*n; + phase2_work_estimate += 4 * m + 2 * n; const f_t orig_dual_infeas = phase2::dual_infeasibility( lp, settings, vstatus, z, settings.tight_tol, settings.dual_tol); - phase2_work_estimate += 3*n; + phase2_work_estimate += 3 * n; if (primal_infeasibility <= settings.primal_tol && orig_dual_infeas <= settings.dual_tol) { @@ -3122,10 +3148,10 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, const f_t dual_infeas = phase2::dual_infeasibility(lp, settings, vstatus, z, settings.tight_tol, settings.dual_tol); - phase2_work_estimate += 3*n; + phase2_work_estimate += 3 * n; settings.log.printf("Dual infeasibility %e\n", dual_infeas); const f_t primal_inf = phase2::primal_infeasibility(lp, settings, vstatus, x); - phase2_work_estimate += 3*n; + phase2_work_estimate += 3 * n; settings.log.printf("Primal infeasibility %e\n", primal_inf); settings.log.printf("Updates %d\n", ft.num_updates()); settings.log.printf("Steepest edge %e\n", max_val); @@ -3141,8 +3167,14 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, // Update dual variables // y <- y + steplength * delta_y // z <- z + steplength * delta_z - i_t update_dual_variables_status = phase2::update_dual_variables( - delta_y_sparse, delta_z_indices, delta_z, step_length, leaving_index, y, z, phase2_work_estimate); + i_t update_dual_variables_status = phase2::update_dual_variables(delta_y_sparse, + delta_z_indices, + delta_z, + step_length, + leaving_index, + y, + z, + phase2_work_estimate); if (update_dual_variables_status == -1) { settings.log.printf("Numerical issues encountered in update_dual_variables.\n"); return dual::status_t::NUMERICAL; @@ -3253,7 +3285,8 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, timers.start_timer(); // x <- x + delta_x - phase2::update_primal_variables(scaled_delta_xB_sparse, basic_list, delta_x, entering_index, x, phase2_work_estimate); + phase2::update_primal_variables( + scaled_delta_xB_sparse, basic_list, delta_x, entering_index, x, phase2_work_estimate); timers.vector_time += timers.stop_timer(); #ifdef COMPUTE_PRIMAL_RESIDUAL @@ -3268,8 +3301,13 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, timers.start_timer(); // TODO(CMM): Do I also need to update the objective due to the bound flips? // TODO(CMM): I'm using the unperturbed objective here, should this be the perturbed objective? - phase2::update_objective( - basic_list, scaled_delta_xB_sparse.i, lp.objective, delta_x, entering_index, obj, phase2_work_estimate); + phase2::update_objective(basic_list, + scaled_delta_xB_sparse.i, + lp.objective, + delta_x, + entering_index, + obj, + phase2_work_estimate); timers.objective_time += timers.stop_timer(); timers.start_timer(); @@ -3312,7 +3350,8 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, entering_index, primal_infeasibility_squared); - phase2::clean_up_infeasibilities(squared_infeasibilities, infeasibility_indices, phase2_work_estimate); + phase2::clean_up_infeasibilities( + squared_infeasibilities, infeasibility_indices, phase2_work_estimate); #if CHECK_PRIMAL_INFEASIBILITIES phase2::check_primal_infeasibilities( @@ -3321,11 +3360,13 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, timers.update_infeasibility_time += timers.stop_timer(); // Clear delta_x - phase2::clear_delta_x(basic_list, entering_index, scaled_delta_xB_sparse, delta_x, phase2_work_estimate); + phase2::clear_delta_x( + basic_list, entering_index, scaled_delta_xB_sparse, delta_x, phase2_work_estimate); timers.start_timer(); f_t sum_perturb = 0.0; - phase2::compute_perturbation(lp, settings, delta_z_indices, z, objective, sum_perturb, phase2_work_estimate); + phase2::compute_perturbation( + lp, settings, delta_z_indices, z, objective, sum_perturb, phase2_work_estimate); timers.perturb_time += timers.stop_timer(); // Update basis information @@ -3389,14 +3430,21 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, settings.log.printf("Successfully repaired basis. Iteration %d\n", iter); } - phase2::reset_basis_mark(basic_list, nonbasic_list, basic_mark, nonbasic_mark, phase2_work_estimate); + phase2::reset_basis_mark( + basic_list, nonbasic_list, basic_mark, nonbasic_mark, phase2_work_estimate); if (should_recompute_x) { std::vector unperturbed_x(n); phase2_work_estimate += n; - phase2::compute_primal_solution_from_basis( - lp, ft, basic_list, nonbasic_list, vstatus, unperturbed_x, xB_workspace, phase2_work_estimate); + phase2::compute_primal_solution_from_basis(lp, + ft, + basic_list, + nonbasic_list, + vstatus, + unperturbed_x, + xB_workspace, + phase2_work_estimate); x = unperturbed_x; - phase2_work_estimate += 2*n; + phase2_work_estimate += 2 * n; } primal_infeasibility_squared = phase2::compute_initial_primal_infeasibilities(lp, @@ -3406,7 +3454,7 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, squared_infeasibilities, infeasibility_indices, primal_infeasibility); - phase2_work_estimate += 4*m + 2*n; + phase2_work_estimate += 4 * m + 2 * n; } #ifdef CHECK_BASIC_INFEASIBILITIES phase2::check_basic_infeasibilities(basic_list, basic_mark, infeasibility_indices, 7); @@ -3434,7 +3482,7 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, // Clear delta_z phase2::clear_delta_z(entering_index, leaving_index, delta_z_mark, delta_z_indices, delta_z); - phase2_work_estimate += 3*delta_z_indices.size(); + phase2_work_estimate += 3 * delta_z_indices.size(); f_t now = toc(start_time); diff --git a/cpp/src/dual_simplex/primal.cpp b/cpp/src/dual_simplex/primal.cpp index 3ec44df8bf..b1b6be0b75 100644 --- a/cpp/src/dual_simplex/primal.cpp +++ b/cpp/src/dual_simplex/primal.cpp @@ -295,8 +295,8 @@ primal::status_t primal_phase2(i_t phase, std::vector q(m); std::vector deficient; std::vector slacks_needed; - i_t rank = - factorize_basis(lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed, work_estimate); + i_t rank = factorize_basis( + lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed, work_estimate); if (rank == CONCURRENT_HALT_RETURN) { return primal::status_t::CONCURRENT_LIMIT; } else if (rank != m) { @@ -312,7 +312,8 @@ primal::status_t primal_phase2(i_t phase, superbasic_list, vstatus, work_estimate); - rank = factorize_basis(lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed, work_estimate); + rank = factorize_basis( + lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed, work_estimate); if (rank == CONCURRENT_HALT_RETURN) { return primal::status_t::CONCURRENT_LIMIT; } else if (rank == -1) { diff --git a/cpp/src/dual_simplex/right_looking_lu.cpp b/cpp/src/dual_simplex/right_looking_lu.cpp index 2ac0dda6ae..6651bb9051 100644 --- a/cpp/src/dual_simplex/right_looking_lu.cpp +++ b/cpp/src/dual_simplex/right_looking_lu.cpp @@ -58,13 +58,13 @@ i_t initialize_degree_data(const csc_matrix_t& A, Bnz++; } } - work_estimate += 3*n + 2*Bnz; + work_estimate += 3 * n + 2 * Bnz; for (i_t k = 0; k < n; ++k) { assert(Cdegree[k] <= m && Cdegree[k] >= 0); col_count[Cdegree[k]].push_back(k); } - work_estimate += 3*n; + work_estimate += 3 * n; for (i_t k = 0; k < m; ++k) { assert(Rdegree[k] <= n && Rdegree[k] >= 0); @@ -74,7 +74,7 @@ i_t initialize_degree_data(const csc_matrix_t& A, if (verbose) { printf("Zero degree row %d\n", k); } } } - work_estimate += 4*m; + work_estimate += 4 * m; return Bnz; } @@ -119,7 +119,7 @@ i_t load_elements(const csc_matrix_t& A, nz++; } } - work_estimate += 3*n + 15*nz; + work_estimate += 3 * n + 15 * nz; assert(nz == Bnz); for (i_t j = 0; j < n; j++) { @@ -130,7 +130,7 @@ i_t load_elements(const csc_matrix_t& A, assert(entry->i < m); } } - work_estimate += 2*n + nz; + work_estimate += 2 * n + nz; for (i_t i = 0; i < m; i++) { for (i_t p = first_in_row[i]; p != kNone; p = elements[p].next_in_row) { @@ -140,7 +140,7 @@ i_t load_elements(const csc_matrix_t& A, assert(entry->j >= 0); } } - work_estimate += 2*m + nz; + work_estimate += 2 * m + nz; return 0; } @@ -343,7 +343,7 @@ void update_Cdegree_and_col_count(i_t pivot_i, if (j != pivot_j && cdeg >= 0) { col_count[cdeg].push_back(j); } loop_count++; } - work_estimate += 7*loop_count; + work_estimate += 7 * loop_count; Cdegree[pivot_j] = -1; } @@ -379,7 +379,7 @@ void update_Rdegree_and_row_count(i_t pivot_i, if (i != pivot_i && rdeg >= 0) { row_count[rdeg].push_back(i); } loop_count++; } - work_estimate += 7*loop_count; + work_estimate += 7 * loop_count; Rdegree[pivot_i] = -1; } @@ -410,10 +410,10 @@ void schur_complement(i_t pivot_i, for (i_t p3 = first_in_row[i]; p3 != kNone; p3 = elements[p3].next_in_row) { row_last = p3; } - work_estimate += 2*Rdegree[i]; + work_estimate += 2 * Rdegree[i]; row_last_workspace[i] = row_last; } - work_estimate += 4*Cdegree[pivot_j]; + work_estimate += 4 * Cdegree[pivot_j]; for (i_t p0 = first_in_row[pivot_i]; p0 != kNone; p0 = elements[p0].next_in_row) { element_t* entry = &elements[p0]; @@ -430,7 +430,7 @@ void schur_complement(i_t pivot_i, column_j_workspace[i] = p1; col_last = p1; } - work_estimate += 5*Cdegree[j]; + work_estimate += 5 * Cdegree[j]; for (i_t p1 = first_in_col[pivot_j]; p1 != kNone; p1 = elements[p1].next_in_column) { element_t* e = &elements[p1]; @@ -486,7 +486,7 @@ void schur_complement(i_t pivot_i, // Remove row i from row_count[rdeg] std::swap(*it, row_count[rdeg].back()); row_count[rdeg].pop_back(); - work_estimate += 2*(it - row_count[rdeg].begin()); + work_estimate += 2 * (it - row_count[rdeg].begin()); break; } } @@ -501,7 +501,7 @@ void schur_complement(i_t pivot_i, // Remove col j from col_count[cdeg] std::swap(*it, col_count[cdeg].back()); col_count[cdeg].pop_back(); - work_estimate += 2*(it - col_count[cdeg].begin()); + work_estimate += 2 * (it - col_count[cdeg].begin()); break; } } @@ -509,7 +509,7 @@ void schur_complement(i_t pivot_i, col_count[cdeg].push_back(j); // Add column j to col_count[cdeg] } } - work_estimate += 10*Cdegree[pivot_j]; + work_estimate += 10 * Cdegree[pivot_j]; for (i_t p1 = first_in_col[j]; p1 != kNone; p1 = elements[p1].next_in_column) { element_t* e = &elements[p1]; @@ -517,9 +517,9 @@ void schur_complement(i_t pivot_i, assert(e->j == j); column_j_workspace[i] = kNone; } - work_estimate += 5*Cdegree[j]; + work_estimate += 5 * Cdegree[j]; } - work_estimate += 10*Rdegree[pivot_i]; + work_estimate += 10 * Rdegree[pivot_i]; } template @@ -538,8 +538,8 @@ void remove_pivot_row(i_t pivot_i, element_t* e = &elements[p0]; const i_t j = e->j; if (j == pivot_j) { continue; } - i_t last = kNone; - f_t max_in_col_j = 0; + i_t last = kNone; + f_t max_in_col_j = 0; i_t col_loop_count = 0; for (i_t p = first_in_col[j]; p != kNone; p = elements[p].next_in_column) { element_t* entry = &elements[p]; @@ -559,11 +559,11 @@ void remove_pivot_row(i_t pivot_i, last = p; col_loop_count++; } - work_estimate += 3*col_loop_count; + work_estimate += 3 * col_loop_count; max_in_column[j] = max_in_col_j; row_loop_count++; } - work_estimate += 5*row_loop_count; + work_estimate += 5 * row_loop_count; first_in_row[pivot_i] = kNone; } @@ -608,14 +608,14 @@ void remove_pivot_col(i_t pivot_i, last = p; row_loop_count++; } - work_estimate += 3*row_loop_count; + work_estimate += 3 * row_loop_count; #ifdef THRESHOLD_ROOK_PIVOTING max_in_row[i] = max_in_row_i; #endif col_loop_count++; } first_in_col[pivot_j] = kNone; - work_estimate += 3*col_loop_count; + work_estimate += 3 * col_loop_count; } } // namespace @@ -645,26 +645,27 @@ i_t right_looking_lu(const csc_matrix_t& A, std::vector Rdegree(n); // Rdegree[i] is the degree of row i std::vector Cdegree(n); // Cdegree[j] is the degree of column j - work_estimate += 2*n; + work_estimate += 2 * n; std::vector> col_count( n + 1); // col_count[nz] is a list of columns with nz nonzeros in the active submatrix std::vector> row_count( n + 1); // row_count[nz] is a list of rows with nz nonzeros in the active submatrix - work_estimate += 2*n; + work_estimate += 2 * n; - const i_t Bnz = initialize_degree_data(A, column_list, Cdegree, Rdegree, col_count, row_count, work_estimate); + const i_t Bnz = + initialize_degree_data(A, column_list, Cdegree, Rdegree, col_count, row_count, work_estimate); std::vector> elements(Bnz); std::vector first_in_row(n, kNone); std::vector first_in_col(n, kNone); - work_estimate += 2*n + Bnz; + work_estimate += 2 * n + Bnz; load_elements(A, column_list, Bnz, elements, first_in_row, first_in_col, work_estimate); std::vector column_j_workspace(n, kNone); std::vector row_last_workspace(n); std::vector max_in_column(n); std::vector max_in_row(m); - work_estimate += 3*n + m; + work_estimate += 3 * n + m; initialize_max_in_column(first_in_col, elements, max_in_column); work_estimate += Bnz; @@ -678,7 +679,7 @@ i_t right_looking_lu(const csc_matrix_t& A, Urow.n = Urow.m = n; Urow.row_start.resize(n + 1, -1); i_t Unz = 0; - work_estimate += 2*n; + work_estimate += 2 * n; i_t Lnz = 0; L.x.clear(); @@ -688,7 +689,7 @@ i_t right_looking_lu(const csc_matrix_t& A, std::fill(pinv.begin(), pinv.end(), -1); std::vector qinv(n); std::fill(qinv.begin(), qinv.end(), -1); - work_estimate += 4*n; + work_estimate += 4 * n; i_t pivots = 0; for (i_t k = 0; k < n; ++k) { @@ -747,7 +748,7 @@ i_t right_looking_lu(const csc_matrix_t& A, Unz++; } } - work_estimate += 4*(Unz - Urow.row_start[k]); + work_estimate += 4 * (Unz - Urow.row_start[k]); // L <- [L l] L.col_start[k] = Lnz; @@ -768,11 +769,13 @@ i_t right_looking_lu(const csc_matrix_t& A, Lnz++; } } - work_estimate += 4*(Lnz - L.col_start[k]); + work_estimate += 4 * (Lnz - L.col_start[k]); // Update Cdegree and col_count - update_Cdegree_and_col_count(pivot_i, pivot_j, first_in_row, Cdegree, col_count, elements, work_estimate); - update_Rdegree_and_row_count(pivot_i, pivot_j, first_in_col, Rdegree, row_count, elements, work_estimate); + update_Cdegree_and_col_count( + pivot_i, pivot_j, first_in_row, Cdegree, col_count, elements, work_estimate); + update_Rdegree_and_row_count( + pivot_i, pivot_j, first_in_col, Rdegree, row_count, elements, work_estimate); // A22 <- A22 - l u^T schur_complement(pivot_i, @@ -795,8 +798,10 @@ i_t right_looking_lu(const csc_matrix_t& A, work_estimate); // Remove the pivot row - remove_pivot_row(pivot_i, pivot_j, first_in_col, first_in_row, max_in_column, elements, work_estimate); - remove_pivot_col(pivot_i, pivot_j, first_in_col, first_in_row, max_in_row, elements, work_estimate); + remove_pivot_row( + pivot_i, pivot_j, first_in_col, first_in_row, max_in_column, elements, work_estimate); + remove_pivot_col( + pivot_i, pivot_j, first_in_col, first_in_row, max_in_row, elements, work_estimate); // Set pivot entry to sentinel value pivot_entry->i = -1; @@ -927,7 +932,7 @@ i_t right_looking_lu(const csc_matrix_t& A, } work_estimate += n; inverse_permutation(qinv, q); - work_estimate += 2*n; + work_estimate += 2 * n; return pivots; } @@ -940,7 +945,7 @@ i_t right_looking_lu(const csc_matrix_t& A, for (i_t p = 0; p < Lnz; ++p) { L.i[p] = pinv[L.i[p]]; } - work_estimate += 3*Lnz; + work_estimate += 3 * Lnz; #ifdef CHECK_LOWER_TRIANGULAR for (i_t j = 0; j < n; ++j) { @@ -962,16 +967,16 @@ i_t right_looking_lu(const csc_matrix_t& A, std::vector row_perm(n); work_estimate += n; inverse_permutation(pinv, row_perm); - work_estimate += 2*n; + work_estimate += 2 * n; std::vector identity(n); for (i_t k = 0; k < n; k++) { identity[k] = k; } - work_estimate += 2*n; + work_estimate += 2 * n; U_unpermuted.permute_rows_and_cols(identity, q, U); - work_estimate += 3*U.n + 5*Unz; + work_estimate += 3 * U.n + 5 * Unz; #ifdef CHECK_UPPER_TRIANGULAR for (i_t k = 0; k < n; ++k) { @@ -1001,7 +1006,7 @@ i_t right_looking_lu_row_permutation_only(const csc_matrix_t& A, // We return the inverser row permutation vector pinv and the column permutation vector q f_t factorization_start_time = tic(); - f_t work_estimate = 0; + f_t work_estimate = 0; const i_t n = A.n; const i_t m = A.m; assert(pinv.size() == m); @@ -1019,7 +1024,8 @@ i_t right_looking_lu_row_permutation_only(const csc_matrix_t& A, column_list[k] = k; } - const i_t Bnz = initialize_degree_data(A, column_list, Cdegree, Rdegree, col_count, row_count, work_estimate); + const i_t Bnz = + initialize_degree_data(A, column_list, Cdegree, Rdegree, col_count, row_count, work_estimate); std::vector> elements(Bnz); std::vector first_in_row(m, kNone); std::vector first_in_col(n, kNone); @@ -1117,7 +1123,8 @@ i_t right_looking_lu_row_permutation_only(const csc_matrix_t& A, // Remove the pivot row remove_pivot_row( pivot_i, pivot_j, first_in_col, first_in_row, max_in_column, elements, work_estimate); - remove_pivot_col(pivot_i, pivot_j, first_in_col, first_in_row, max_in_row, elements, work_estimate); + remove_pivot_col( + pivot_i, pivot_j, first_in_col, first_in_row, max_in_row, elements, work_estimate); // Set pivot entry to sentinel value pivot_entry->i = -1; @@ -1224,16 +1231,15 @@ i_t right_looking_lu_row_permutation_only(const csc_matrix_t& A, #ifdef DUAL_SIMPLEX_INSTANTIATE_DOUBLE -template int right_looking_lu( - const csc_matrix_t& A, - const simplex_solver_settings_t& settings, - double tol, - const std::vector& column_list, - std::vector& q, - csc_matrix_t& L, - csc_matrix_t& U, - std::vector& pinv, - double& work_estimate); +template int right_looking_lu(const csc_matrix_t& A, + const simplex_solver_settings_t& settings, + double tol, + const std::vector& column_list, + std::vector& q, + csc_matrix_t& L, + csc_matrix_t& U, + std::vector& pinv, + double& work_estimate); template int right_looking_lu_row_permutation_only( const csc_matrix_t& A, diff --git a/cpp/src/dual_simplex/singletons.cpp b/cpp/src/dual_simplex/singletons.cpp index df535c5843..ee8dfe7d2e 100644 --- a/cpp/src/dual_simplex/singletons.cpp +++ b/cpp/src/dual_simplex/singletons.cpp @@ -67,7 +67,7 @@ i_t order_singletons(std::queue& singleton_queue, break; } } - work_estimate += 2*(xend - G.Xp[xpivot]); + work_estimate += 2 * (xend - G.Xp[xpivot]); assert(ypivot != kEmpty); assert(G.Ydeg[ypivot] >= 0); @@ -85,7 +85,7 @@ i_t order_singletons(std::queue& singleton_queue, singleton_queue.push(x); } } - work_estimate += 2*(yend - G.Yp[ypivot]); + work_estimate += 2 * (yend - G.Yp[ypivot]); // Mark the pivot by flipping the degrees G.Xdeg[xpivot] = FLIP(1); G.Ydeg[ypivot] = FLIP(G.Ydeg[ypivot]); @@ -122,11 +122,11 @@ void create_row_representation(const csc_matrix_t& A, for (i_t p = 0; p < nz; ++p) { workspace[A.i[p]]++; } - work_estimate += 3*nz; + work_estimate += 3 * nz; // Compute rowstart and overwrite workspace cumulative_sum(workspace, row_start); - work_estimate += 4*workspace.size(); + work_estimate += 4 * workspace.size(); for (i_t j = 0; j < n; ++j) { const i_t col_start = A.col_start[j]; @@ -136,7 +136,7 @@ void create_row_representation(const csc_matrix_t& A, col_index[q] = j; } } - work_estimate += 2*n + 4*nz; + work_estimate += 2 * n + 4 * nz; } // Complete the permuation @@ -183,7 +183,7 @@ i_t find_singletons(const csc_matrix_t& A, std::vector Cdeg(n); std::vector Rp(m + 1); std::vector Rj(nz); - work_estimate += 3*m + n + nz; + work_estimate += 3 * m + n + nz; i_t max_queue_len = std::max(m, n); std::queue singleton_queue; @@ -197,7 +197,7 @@ i_t find_singletons(const csc_matrix_t& A, Rdeg[A.i[p]]++; } } - work_estimate += 2*n + 2*nz; + work_estimate += 2 * n + 2 * nz; // Add all columns of degree 1 to the queue for (i_t j = n - 1; j >= 0; --j) { @@ -279,12 +279,12 @@ i_t find_singletons(const csc_matrix_t& A, printf("Col singletons %d\n", col_singletons); #endif i_t num_empty_cols = complete_permutation(singletons_found, Cdeg, col_perm); - work_estimate += 2*Cdeg.size(); + work_estimate += 2 * Cdeg.size(); #ifdef SINGLETON_DEBUG printf("Completed col perm. %d empty cols. Starting row perm\n", num_empty_cols); #endif i_t num_empty_rows = complete_permutation(singletons_found, Rdeg, row_perm); - work_estimate += 2*Rdeg.size(); + work_estimate += 2 * Rdeg.size(); #ifdef SINGLETON_DEBUG printf("Empty rows %d Empty columns %d\n", num_empty_rows, num_empty_cols); #endif @@ -296,9 +296,9 @@ i_t find_singletons(const csc_matrix_t& A, template struct row_col_graph_t; template int order_singletons(std::queue& singleton_queue, - int& singletons_found, - row_col_graph_t& G, - double& work_estimate); + int& singletons_found, + row_col_graph_t& G, + double& work_estimate); // \param [in,out] workspace - size m template void create_row_representation(const csc_matrix_t& A, @@ -312,11 +312,11 @@ template int complete_permutation(int singletons, std::vector& Xperm); template int find_singletons(const csc_matrix_t& A, - int& row_singletons, - std::vector& row_perm, - int& col_singleton, - std::vector& col_perm, - double& work_estimate); + int& row_singletons, + std::vector& row_perm, + int& col_singleton, + std::vector& col_perm, + double& work_estimate); #endif } // namespace cuopt::linear_programming::dual_simplex diff --git a/cpp/src/dual_simplex/sparse_matrix.cpp b/cpp/src/dual_simplex/sparse_matrix.cpp index c54d7e7354..ec8aee09c4 100644 --- a/cpp/src/dual_simplex/sparse_matrix.cpp +++ b/cpp/src/dual_simplex/sparse_matrix.cpp @@ -11,7 +11,6 @@ #include - // #include // #include @@ -959,13 +958,12 @@ template void scatter_dense(const csc_matrix_t& A, double alpha, std::vector& x); -template void scatter_dense( - const csc_matrix_t& A, - int j, - double alpha, - std::vector& x, - std::vector& mark, - std::vector& indices); +template void scatter_dense(const csc_matrix_t& A, + int j, + double alpha, + std::vector& x, + std::vector& mark, + std::vector& indices); template int multiply(const csc_matrix_t& A, const csc_matrix_t& B, diff --git a/cpp/src/dual_simplex/sparse_matrix.hpp b/cpp/src/dual_simplex/sparse_matrix.hpp index 3011a4c713..56e3ca82c6 100644 --- a/cpp/src/dual_simplex/sparse_matrix.hpp +++ b/cpp/src/dual_simplex/sparse_matrix.hpp @@ -125,12 +125,12 @@ class csc_matrix_t { return true; } - i_t m; // number of rows - i_t n; // number of columns - i_t nz_max; // maximum number of entries - std::vector col_start; // column pointers (size n + 1) - std::vector i; // row indices, size nz_max - std::vector x; // numerical values, size nz_max + i_t m; // number of rows + i_t n; // number of columns + i_t nz_max; // maximum number of entries + std::vector col_start; // column pointers (size n + 1) + std::vector i; // row indices, size nz_max + std::vector x; // numerical values, size nz_max static_assert(std::is_signed_v); // Require signed integers (we make use of this // to avoid extra space / computation) @@ -173,12 +173,12 @@ class csr_matrix_t { return true; } - i_t nz_max; // maximum number of nonzero entries - i_t m; // number of rows - i_t n; // number of cols - std::vector row_start; // row pointers (size m + 1) - std::vector j; // column indices, size nz_max - std::vector x; // numerical values, size nz_max + i_t nz_max; // maximum number of nonzero entries + i_t m; // number of rows + i_t n; // number of cols + std::vector row_start; // row pointers (size m + 1) + std::vector j; // column indices, size nz_max + std::vector x; // numerical values, size nz_max static_assert(std::is_signed_v); }; diff --git a/cpp/src/dual_simplex/sparse_vector.hpp b/cpp/src/dual_simplex/sparse_vector.hpp index 42fffa6e46..6ea642ee07 100644 --- a/cpp/src/dual_simplex/sparse_vector.hpp +++ b/cpp/src/dual_simplex/sparse_vector.hpp @@ -33,7 +33,8 @@ class sparse_vector_t { void to_csc(csc_matrix_t& A) const; // convert a sparse vector into a dense vector. Dense vector is cleared and resized. void to_dense(std::vector& x_dense) const; - // scatter a sparse vector into a dense vector. Assumes x_dense is already cleared or preinitialized + // scatter a sparse vector into a dense vector. Assumes x_dense is already cleared or + // preinitialized void scatter(std::vector& x_dense) const; // inverse permute the current sparse vector void inverse_permute_vector(const std::vector& p); diff --git a/cpp/src/dual_simplex/triangle_solve.cpp b/cpp/src/dual_simplex/triangle_solve.cpp index 89f96ff0ec..ad09ae30cf 100644 --- a/cpp/src/dual_simplex/triangle_solve.cpp +++ b/cpp/src/dual_simplex/triangle_solve.cpp @@ -37,11 +37,11 @@ i_t reach(const sparse_vector_t& b, top = depth_first_search(b.i[p], pinv, G, top, xi, xi.begin() + m, work_estimate); } } - work_estimate += 4*bnz; + work_estimate += 4 * bnz; for (i_t p = top; p < m; ++p) { // restore G MARK(G.col_start, xi[p]); } - work_estimate += 3*(m - top); + work_estimate += 3 * (m - top); return top; } @@ -79,12 +79,12 @@ i_t depth_first_search(i_t j, // Point to the first outgoing edge of node j pstack[head] = (jnew < 0) ? 0 : UNFLIP(G.col_start[jnew]); } - done = 1; // Node j is done if no unvisited neighbors - i_t p2 = (jnew < 0) ? 0 : UNFLIP(G.col_start[jnew + 1]); + done = 1; // Node j is done if no unvisited neighbors + i_t p2 = (jnew < 0) ? 0 : UNFLIP(G.col_start[jnew + 1]); const i_t psav = pstack[head]; i_t p; for (p = psav; p < p2; ++p) { // Examine all neighbors of j - i_t i = G.i[p]; // Consider neighbor i + i_t i = G.i[p]; // Consider neighbor i if (MARKED(G.col_start, i)) { continue; // skip visited node i } @@ -93,7 +93,7 @@ i_t depth_first_search(i_t j, done = 0; // node j is not done break; // break to start dfs at node i } - work_estimate += 3*(p- psav) + 10; + work_estimate += 3 * (p - psav) + 10; if (done) { pstack[head] = 0; // restore pstack so it can be used again in other routines xi[head] = 0; // restore xi so it can be used again in other routines @@ -118,13 +118,13 @@ i_t sparse_triangle_solve(const sparse_vector_t& b, for (i_t p = top; p < m; ++p) { x[xi[p]] = 0; // Clear x vector } - work_estimate += 2*(m - top); + work_estimate += 2 * (m - top); const i_t bnz = b.i.size(); for (i_t p = 0; p < bnz; ++p) { x[b.i[p]] = b.x[p]; // Scatter b } - work_estimate += 3*bnz; + work_estimate += 3 * bnz; for (i_t px = top; px < m; ++px) { i_t j = xi[px]; // x(j) is nonzero @@ -143,7 +143,7 @@ i_t sparse_triangle_solve(const sparse_vector_t& b, end = G.col_start[J + 1] - 1; } x[j] /= Gjj; - work_estimate += 4*(end - p) + 7; + work_estimate += 4 * (end - p) + 7; for (; p < end; ++p) { x[G.i[p]] -= G.x[p] * x[j]; // x(i) -= G(i,j) * x(j) } diff --git a/cpp/src/dual_simplex/triangle_solve.hpp b/cpp/src/dual_simplex/triangle_solve.hpp index 7a99ee400e..811ed8b927 100644 --- a/cpp/src/dual_simplex/triangle_solve.hpp +++ b/cpp/src/dual_simplex/triangle_solve.hpp @@ -43,14 +43,16 @@ i_t lower_triangular_solve(const csc_matrix_t& L, std::vector& x, } } } - work_estimate += 3*n + 3*L.col_start[n]; + work_estimate += 3 * n + 3 * L.col_start[n]; return 0; } // Solve L'*x = b. On input x contains the right-hand side b, on output the // solution template -i_t lower_triangular_transpose_solve(const csc_matrix_t& L, std::vector& x, f_t& work_estimate) +i_t lower_triangular_transpose_solve(const csc_matrix_t& L, + std::vector& x, + f_t& work_estimate) { const i_t n = L.n; assert(x.size() == n); @@ -63,7 +65,7 @@ i_t lower_triangular_transpose_solve(const csc_matrix_t& L, std::vecto } x[j] /= L.x[L.col_start[j]]; } - work_estimate += 6*n + 4*L.col_start[n]; + work_estimate += 6 * n + 4 * L.col_start[n]; return 0; } @@ -83,17 +85,19 @@ i_t upper_triangular_solve(const csc_matrix_t& U, std::vector& x, for (i_t p = col_start; p < col_end; ++p) { x[U.i[p]] -= U.x[p] * x_j; } - work_estimate += 3*(col_end - col_start) + 3; + work_estimate += 3 * (col_end - col_start) + 3; } } - work_estimate += 3*n; + work_estimate += 3 * n; return 0; } // Solve U'*x = b. On input x contains the right-hand side b, on output the // solution template -i_t upper_triangular_transpose_solve(const csc_matrix_t& U, std::vector& x, f_t& work_estimate) +i_t upper_triangular_transpose_solve(const csc_matrix_t& U, + std::vector& x, + f_t& work_estimate) { const i_t n = U.n; assert(x.size() == n); @@ -105,7 +109,7 @@ i_t upper_triangular_transpose_solve(const csc_matrix_t& U, std::vecto } x[j] /= U.x[col_end]; } - work_estimate += 4*n + 4*U.col_start[n]; + work_estimate += 4 * n + 4 * U.col_start[n]; return 0; } diff --git a/cpp/src/dual_simplex/vector_math.cpp b/cpp/src/dual_simplex/vector_math.cpp index 2acf4078f4..f427f252bf 100644 --- a/cpp/src/dual_simplex/vector_math.cpp +++ b/cpp/src/dual_simplex/vector_math.cpp @@ -141,7 +141,9 @@ i_t permute_vector(const std::vector& p, const std::vector& b, std::ve // Computes x = P'*b or x(p) = b in MATLAB. // Work is 3 * n template -i_t inverse_permute_vector(const std::vector& p, const std::vector& b, std::vector& x) +i_t inverse_permute_vector(const std::vector& p, + const std::vector& b, + std::vector& x) { auto n = p.size(); assert(x.size() == n); @@ -165,7 +167,6 @@ i_t inverse_permutation(const std::vector& p, std::vector& pinv) return 0; } - #ifdef DUAL_SIMPLEX_INSTANTIATE_DOUBLE template double vector_norm_inf>(const std::vector& x); diff --git a/cpp/src/dual_simplex/vector_math.hpp b/cpp/src/dual_simplex/vector_math.hpp index 7ac697eef3..478e024e51 100644 --- a/cpp/src/dual_simplex/vector_math.hpp +++ b/cpp/src/dual_simplex/vector_math.hpp @@ -61,7 +61,9 @@ template i_t permute_vector(const std::vector& p, const std::vector& b, std::vector& x); // Computes x = P'*b or x(p) = b in MATLAB. template -i_t inverse_permute_vector(const std::vector& p, const std::vector& b, std::vector& x); +i_t inverse_permute_vector(const std::vector& p, + const std::vector& b, + std::vector& x); // Computes pinv from p. Or pinv(p) = 1:n in MATLAB template diff --git a/cpp/src/pdlp/translate.hpp b/cpp/src/pdlp/translate.hpp index 8cc974da9c..aebe87b140 100644 --- a/cpp/src/pdlp/translate.hpp +++ b/cpp/src/pdlp/translate.hpp @@ -30,8 +30,8 @@ static dual_simplex::user_problem_t cuopt_problem_to_simplex_problem( user_problem.objective = cuopt::host_copy(model.objective_coefficients, handle_ptr->get_stream()); dual_simplex::csr_matrix_t csr_A(m, n, nz); - csr_A.x = std::vector(cuopt::host_copy(model.coefficients, handle_ptr->get_stream())); - csr_A.j = std::vector(cuopt::host_copy(model.variables, handle_ptr->get_stream())); + csr_A.x = std::vector(cuopt::host_copy(model.coefficients, handle_ptr->get_stream())); + csr_A.j = std::vector(cuopt::host_copy(model.variables, handle_ptr->get_stream())); csr_A.row_start = std::vector(cuopt::host_copy(model.offsets, handle_ptr->get_stream())); csr_A.to_compressed_col(user_problem.A); From 8766092b4158ee251484ee4df5967bda8ea9a773 Mon Sep 17 00:00:00 2001 From: Christopher Maes Date: Thu, 12 Feb 2026 20:59:37 -0800 Subject: [PATCH 3/4] Address CodeRabbit review comments --- cpp/src/dual_simplex/basis_updates.cpp | 2 +- cpp/src/dual_simplex/basis_updates.hpp | 2 +- cpp/src/dual_simplex/bound_flipping_ratio_test.cpp | 2 +- cpp/src/dual_simplex/phase2.cpp | 8 +++----- cpp/src/dual_simplex/right_looking_lu.cpp | 8 ++++---- cpp/src/dual_simplex/singletons.cpp | 12 ++++-------- cpp/src/dual_simplex/vector_math.cpp | 6 +++--- 7 files changed, 17 insertions(+), 23 deletions(-) diff --git a/cpp/src/dual_simplex/basis_updates.cpp b/cpp/src/dual_simplex/basis_updates.cpp index 777f7f6cdf..a5f59ef579 100644 --- a/cpp/src/dual_simplex/basis_updates.cpp +++ b/cpp/src/dual_simplex/basis_updates.cpp @@ -1264,7 +1264,7 @@ i_t basis_update_mpf_t::append_cuts(const csr_matrix_t& cuts } else { // W = V WT.transpose(V); - work_estimate_ += 3 * V_nz; + work_estimate_ += 3 * WT.col_start[WT.n]; } // Extend u_i, v_i for i = 0, ..., num_updates_ - 1 diff --git a/cpp/src/dual_simplex/basis_updates.hpp b/cpp/src/dual_simplex/basis_updates.hpp index c6ea2791ab..bb2b42d8ae 100644 --- a/cpp/src/dual_simplex/basis_updates.hpp +++ b/cpp/src/dual_simplex/basis_updates.hpp @@ -480,7 +480,7 @@ class basis_update_mpf_t { f_t hypersparse_threshold_; - mutable f_t work_estimate_; + mutable f_t work_estimate_{0.0}; }; } // namespace cuopt::linear_programming::dual_simplex diff --git a/cpp/src/dual_simplex/bound_flipping_ratio_test.cpp b/cpp/src/dual_simplex/bound_flipping_ratio_test.cpp index 14931da7e2..e30b067398 100644 --- a/cpp/src/dual_simplex/bound_flipping_ratio_test.cpp +++ b/cpp/src/dual_simplex/bound_flipping_ratio_test.cpp @@ -235,8 +235,8 @@ void bound_flipping_ratio_test_t::heap_passes(const std::vector& // Remove minimum ratio from the heap and rebalance i_t heap_index = bare_idx.front(); std::pop_heap(bare_idx.begin(), bare_idx.end(), compare); - bare_idx.pop_back(); work_estimate_ += 2 * std::log2(bare_idx.size()); + bare_idx.pop_back(); nonbasic_entering = current_indicies[heap_index]; const i_t j = entering_index = nonbasic_list_[nonbasic_entering]; diff --git a/cpp/src/dual_simplex/phase2.cpp b/cpp/src/dual_simplex/phase2.cpp index 869ceae4d7..936083634d 100644 --- a/cpp/src/dual_simplex/phase2.cpp +++ b/cpp/src/dual_simplex/phase2.cpp @@ -2509,7 +2509,7 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, std::vector c_basic(m); std::vector xB_workspace(m); - phase2_work_estimate += n * 2 * m; + phase2_work_estimate += 2 * (n + m); dual::status_t status = dual::status_t::UNSET; @@ -3481,8 +3481,8 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, iter++; // Clear delta_z - phase2::clear_delta_z(entering_index, leaving_index, delta_z_mark, delta_z_indices, delta_z); phase2_work_estimate += 3 * delta_z_indices.size(); + phase2::clear_delta_z(entering_index, leaving_index, delta_z_mark, delta_z_indices, delta_z); f_t now = toc(start_time); @@ -3492,9 +3492,7 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, phase2_work_estimate += ft.work_estimate(); ft.clear_work_estimate(); - if (work_unit_context) { - work_unit_context->record_work_sync_on_horizon(phase2_work_estimate / 1e8); - } + work_unit_context->record_work_sync_on_horizon(phase2_work_estimate / 1e8); phase2_work_estimate = 0.0; last_feature_log_iter = iter; diff --git a/cpp/src/dual_simplex/right_looking_lu.cpp b/cpp/src/dual_simplex/right_looking_lu.cpp index 6651bb9051..93a726d065 100644 --- a/cpp/src/dual_simplex/right_looking_lu.cpp +++ b/cpp/src/dual_simplex/right_looking_lu.cpp @@ -33,9 +33,9 @@ struct element_t { }; constexpr int kNone = -1; -template +template i_t initialize_degree_data(const csc_matrix_t& A, - const VectorI& column_list, + const std::vector& column_list, std::vector& Cdegree, std::vector& Rdegree, std::vector>& col_count, @@ -79,9 +79,9 @@ i_t initialize_degree_data(const csc_matrix_t& A, return Bnz; } -template +template i_t load_elements(const csc_matrix_t& A, - const VectorI& column_list, + const std::vector& column_list, i_t Bnz, std::vector>& elements, std::vector& first_in_row, diff --git a/cpp/src/dual_simplex/singletons.cpp b/cpp/src/dual_simplex/singletons.cpp index ee8dfe7d2e..dbba46f6a5 100644 --- a/cpp/src/dual_simplex/singletons.cpp +++ b/cpp/src/dual_simplex/singletons.cpp @@ -214,14 +214,12 @@ i_t find_singletons(const csc_matrix_t& A, row_form = true; // Find column singletons - auto& col_perm_vec = static_cast&>(col_perm); - auto& row_perm_vec = static_cast&>(row_perm); row_col_graph_t graph{Cdeg.begin(), - col_perm_vec.begin(), + col_perm.begin(), A.col_start.cbegin(), A.i.cbegin(), Rdeg.begin(), - row_perm_vec.begin(), + row_perm.begin(), Rp.cbegin(), Rj.cbegin()}; @@ -250,14 +248,12 @@ i_t find_singletons(const csc_matrix_t& A, } // Find row singletons - auto& row_perm_vec2 = static_cast&>(row_perm); - auto& col_perm_vec2 = static_cast&>(col_perm); row_col_graph_t graph{Rdeg.begin(), - row_perm_vec2.begin(), + row_perm.begin(), Rp.cbegin(), Rj.cbegin(), Cdeg.begin(), - col_perm_vec2.begin(), + col_perm.begin(), A.col_start.cbegin(), A.i.cbegin()}; #ifdef SINGLETON_DEBUG diff --git a/cpp/src/dual_simplex/vector_math.cpp b/cpp/src/dual_simplex/vector_math.cpp index f427f252bf..0db09b00b6 100644 --- a/cpp/src/dual_simplex/vector_math.cpp +++ b/cpp/src/dual_simplex/vector_math.cpp @@ -129,7 +129,7 @@ f_t sparse_dot(const std::vector& xind, template i_t permute_vector(const std::vector& p, const std::vector& b, std::vector& x) { - auto n = p.size(); + i_t n = p.size(); assert(x.size() == n); assert(b.size() == n); for (i_t k = 0; k < n; ++k) { @@ -145,7 +145,7 @@ i_t inverse_permute_vector(const std::vector& p, const std::vector& b, std::vector& x) { - auto n = p.size(); + i_t n = p.size(); assert(x.size() == n); assert(b.size() == n); for (i_t k = 0; k < n; ++k) { @@ -159,7 +159,7 @@ i_t inverse_permute_vector(const std::vector& p, template i_t inverse_permutation(const std::vector& p, std::vector& pinv) { - auto n = p.size(); + i_t n = p.size(); if (pinv.size() != n) { pinv.resize(n); } for (i_t k = 0; k < n; ++k) { pinv[p[k]] = k; From 9effcff6afc61dd000fc5ebf3c34180af0649400 Mon Sep 17 00:00:00 2001 From: Christopher Maes Date: Fri, 13 Feb 2026 09:54:33 -0800 Subject: [PATCH 4/4] Fix CodeRabbit review comments --- cpp/src/dual_simplex/phase2.cpp | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/cpp/src/dual_simplex/phase2.cpp b/cpp/src/dual_simplex/phase2.cpp index 936083634d..50af6971b9 100644 --- a/cpp/src/dual_simplex/phase2.cpp +++ b/cpp/src/dual_simplex/phase2.cpp @@ -1181,8 +1181,8 @@ i_t flip_bounds(const lp_problem_t& lp, const f_t dual_tol = settings.dual_tol; // lower to 1e-7 or less will cause 25fv47 and d2q06c to cycle if (vstatus[j] == variable_status_t::NONBASIC_LOWER && z[j] < -dual_tol) { - const f_t delta = lp.upper[j] - lp.lower[j]; - const f_t atilde_start_size = atilde_index.size(); + const f_t delta = lp.upper[j] - lp.lower[j]; + const size_t atilde_start_size = atilde_index.size(); scatter_dense(lp.A, j, -delta, atilde, mark, atilde_index); work_estimate += 2 * (atilde_index.size() - atilde_start_size) + 4 * (lp.A.col_start[j + 1] - lp.A.col_start[j]) + 10; @@ -1194,8 +1194,8 @@ i_t flip_bounds(const lp_problem_t& lp, #endif num_flipped++; } else if (vstatus[j] == variable_status_t::NONBASIC_UPPER && z[j] > dual_tol) { - const f_t delta = lp.lower[j] - lp.upper[j]; - const f_t atilde_start_size = atilde_index.size(); + const f_t delta = lp.lower[j] - lp.upper[j]; + const size_t atilde_start_size = atilde_index.size(); scatter_dense(lp.A, j, -delta, atilde, mark, atilde_index); work_estimate += 2 * (atilde_index.size() - atilde_start_size) + 4 * (lp.A.col_start[j + 1] - lp.A.col_start[j]) + 10; @@ -1406,7 +1406,7 @@ i_t update_steepest_edge_norms(const simplex_solver_settings_t& settin work_estimate += 2 * v_sparse.i.size(); } v_sparse.scatter(v); - work_estimate += 2 * v.size(); + work_estimate += 2 * v_sparse.i.size(); const i_t leaving_index = basic_list[basic_leaving_index]; const f_t prev_dy_norm_squared = delta_y_steepest_edge[leaving_index]; @@ -2518,8 +2518,7 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, settings.log.printf("Dual Simplex Phase %d\n", phase); std::vector vstatus_old = vstatus; std::vector z_old = z; - - phase2_work_estimate += m; + phase2_work_estimate += 4 * n; phase2::bound_info(lp, settings); phase2_work_estimate += 2 * n;