From 96179e7539065cd62d1436cb6ece008a0049f958 Mon Sep 17 00:00:00 2001 From: Christopher Maes Date: Wed, 11 Jun 2025 13:43:06 -0700 Subject: [PATCH 01/28] Fix accidental O(N^2) BFRT with O(N log N). 13% better on NETLIB --- cpp/src/dual_simplex/CMakeLists.txt | 1 + .../bound_flipping_ratio_test.cpp | 259 ++++++++++++++++++ .../bound_flipping_ratio_test.hpp | 90 ++++++ cpp/src/dual_simplex/phase2.cpp | 45 ++- 4 files changed, 393 insertions(+), 2 deletions(-) create mode 100644 cpp/src/dual_simplex/bound_flipping_ratio_test.cpp create mode 100644 cpp/src/dual_simplex/bound_flipping_ratio_test.hpp diff --git a/cpp/src/dual_simplex/CMakeLists.txt b/cpp/src/dual_simplex/CMakeLists.txt index 16ee502f83..baa5b7213b 100644 --- a/cpp/src/dual_simplex/CMakeLists.txt +++ b/cpp/src/dual_simplex/CMakeLists.txt @@ -16,6 +16,7 @@ set(DUAL_SIMPLEX_SRC_FILES ${CMAKE_CURRENT_SOURCE_DIR}/basis_solves.cpp ${CMAKE_CURRENT_SOURCE_DIR}/basis_updates.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/bound_flipping_ratio_test.cpp ${CMAKE_CURRENT_SOURCE_DIR}/branch_and_bound.cpp ${CMAKE_CURRENT_SOURCE_DIR}/crossover.cpp ${CMAKE_CURRENT_SOURCE_DIR}/initial_basis.cpp diff --git a/cpp/src/dual_simplex/bound_flipping_ratio_test.cpp b/cpp/src/dual_simplex/bound_flipping_ratio_test.cpp new file mode 100644 index 0000000000..5cfa35c748 --- /dev/null +++ b/cpp/src/dual_simplex/bound_flipping_ratio_test.cpp @@ -0,0 +1,259 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include + +#include + +namespace cuopt::linear_programming::dual_simplex { + +template +i_t bound_flipping_ratio_test_t::compute_breakpoints(std::vector& indicies, + std::vector& ratios) +{ + i_t n = n_; + i_t m = m_; + constexpr bool verbose = false; + f_t pivot_tol = settings_.pivot_tol; + const f_t dual_tol = settings_.dual_tol / 10; + + i_t idx = 0; + while (idx == 0 && pivot_tol > 1e-12) { + for (i_t k = 0; k < n - m; ++k) { + const i_t j = nonbasic_list_[k]; + if (vstatus_[j] == variable_status_t::NONBASIC_FIXED) { continue; } + if (vstatus_[j] == variable_status_t::NONBASIC_LOWER && delta_z_[j] < -pivot_tol) { + indicies[idx] = k; + ratios[idx] = std::max((-dual_tol - z_[j]) / delta_z_[j], 0.0); + if constexpr (verbose) { settings_.log.printf("ratios[%d] = %e\n", idx, ratios[idx]); } + idx++; + } + if (vstatus_[j] == variable_status_t::NONBASIC_UPPER && delta_z_[j] > pivot_tol) { + indicies[idx] = k; + ratios[idx] = std::max((dual_tol - z_[j]) / delta_z_[j], 0.0); + if constexpr (verbose) { settings_.log.printf("ratios[%d] = %e\n", idx, ratios[idx]); } + idx++; + } + } + pivot_tol /= 10; + } + return idx; +} + +template +i_t bound_flipping_ratio_test_t::single_pass(i_t start, + i_t end, + const std::vector& indicies, + const std::vector& ratios, + f_t& slope, + f_t& step_length, + i_t& nonbasic_entering, + i_t& entering_index) +{ + // Find the minimum ratio + f_t min_val = inf; + entering_index = -1; + i_t candidate = -1; + f_t zero_tol = settings_.zero_tol; + i_t k_idx = -1; + for (i_t k = start; k < end; ++k) { + if (ratios[k] < min_val) { + min_val = ratios[k]; + candidate = indicies[k]; + k_idx = k; + } else if (ratios[k] < min_val + zero_tol) { + // Use Harris to select variables with larger pivots + const i_t j = nonbasic_list_[indicies[k]]; + if (std::abs(delta_z_[j]) > std::abs(delta_z_[candidate])) { + min_val = ratios[k]; + candidate = indicies[k]; + k_idx = k; + } + } + } + step_length = min_val; + nonbasic_entering = candidate; + const i_t j = entering_index = nonbasic_list_[nonbasic_entering]; + + constexpr bool verbose = false; + if (lower_[j] > -inf && upper_[j] < inf && lower_[j] != upper_[j]) { + const f_t interval = upper_[j] - lower_[j]; + const f_t delta_slope = std::abs(delta_z_[j]) * interval; + if constexpr (verbose) { + settings_.log.printf("single pass delta slope %e slope %e after slope %e step length %e\n", + delta_slope, + slope, + slope - delta_slope, + step_length); + } + slope -= delta_slope; + return k_idx; // we should see if we can continue to increase the step-length + } + return -1; // we are done. do not increase the step-length further +} + +template +i_t bound_flipping_ratio_test_t::compute_step_length(f_t& step_length, + i_t& nonbasic_entering) +{ + i_t m = m_; + i_t n = n_; + constexpr bool verbose = false; + + // Compute the initial set of breakpoints + std::vector indicies(n - m); + std::vector ratios(n - m); + i_t num_breakpoints = compute_breakpoints(indicies, ratios); + if constexpr (verbose) { settings_.log.printf("Initial breakpoints %d\n", num_breakpoints); } + if (num_breakpoints == 0) { + nonbasic_entering = -1; + return -1; + } + + f_t slope = slope_; + nonbasic_entering = -1; + i_t entering_index = -1; + + i_t k_idx = single_pass( + 0, num_breakpoints, indicies, ratios, slope, step_length, nonbasic_entering, entering_index); + bool continue_search = k_idx >= 0 && num_breakpoints > 1 && slope > 0.0; + if (!continue_search) { + if constexpr (verbose) { + settings_.log.printf( + "BFRT stopping. No bound flips. Step length %e Nonbasic entering %d Entering %d.\n", + step_length, + nonbasic_entering, + entering_index); + } + return entering_index; + } + + if constexpr (verbose) { + settings_.log.printf( + "Continuing past initial step length %e entering index %d nonbasic entering %d slope %e\n", + step_length, + entering_index, + nonbasic_entering, + slope); + } + + // Continue the search using a heap to order the breakpoints + ratios[k_idx] = ratios[num_breakpoints - 1]; + indicies[k_idx] = indicies[num_breakpoints - 1]; + + heap_passes( + indicies, ratios, num_breakpoints - 1, slope, step_length, nonbasic_entering, entering_index); + + if constexpr (verbose) { + settings_.log.printf("BFRT step length %e entering index %d non basic entering %d\n", + step_length, + entering_index, + nonbasic_entering); + } + return entering_index; +} + +template +void bound_flipping_ratio_test_t::heap_passes(const std::vector& current_indicies, + const std::vector& current_ratios, + i_t num_breakpoints, + f_t& slope, + f_t& step_length, + i_t& nonbasic_entering, + i_t& entering_index) +{ + std::vector bare_idx(num_breakpoints); + constexpr bool verbose = false; + const f_t dual_tol = settings_.dual_tol; + const f_t zero_tol = settings_.zero_tol; + const std::vector& delta_z = delta_z_; + const std::vector& nonbasic_list = nonbasic_list_; + const i_t N = num_breakpoints; + for (i_t k = 0; k < N; ++k) { + bare_idx[k] = k; + if constexpr (verbose) { + settings_.log.printf("Adding index %d ratio %e pivot %e to heap\n", + current_indicies[k], + current_ratios[k], + std::abs(delta_z[nonbasic_list[current_indicies[k]]])); + } + } + + auto compare = [zero_tol, ¤t_ratios, ¤t_indicies, &delta_z, &nonbasic_list]( + const i_t& a, const i_t& b) { + return (current_ratios[a] > current_ratios[b]) || + (std::abs(current_ratios[a] - current_ratios[b]) < zero_tol && + std::abs(delta_z[nonbasic_list[current_indicies[a]]]) > + std::abs(delta_z[nonbasic_list[current_indicies[b]]])); + }; + + std::make_heap(bare_idx.begin(), bare_idx.end(), compare); + + while (bare_idx.size() > 0 && slope > 0) { + // Remove minimum ratio from the heap and rebalance + i_t heap_index = bare_idx.front(); + std::pop_heap(bare_idx.begin(), bare_idx.end(), compare); + bare_idx.pop_back(); + + nonbasic_entering = current_indicies[heap_index]; + const i_t j = entering_index = nonbasic_list_[nonbasic_entering]; + step_length = current_ratios[heap_index]; + + if (lower_[j] > -inf && upper_[j] < inf && lower_[j] != upper_[j]) { + // We have a bounded variable + const f_t interval = upper_[j] - lower_[j]; + const f_t delta_slope = std::abs(delta_z_[j]) * interval; + const f_t pivot = std::abs(delta_z[j]); + if constexpr (verbose) { + settings_.log.printf( + "heap %d step-length %.12e pivot %e nonbasic entering %d slope %e delta_slope %e new " + "slope %e\n", + bare_idx.size(), + current_ratios[heap_index], + pivot, + nonbasic_entering, + slope, + delta_slope, + slope - delta_slope); + } + slope -= delta_slope; + } else { + // The variable is not bounded. Stop the search. + break; + } + + if (toc(start_time_) > settings_.time_limit) { + entering_index = -2; + return; + } + if (settings_.concurrent_halt != nullptr && + settings_.concurrent_halt->load(std::memory_order_acquire) == 1) { + entering_index = -3; + return; + } + } +} + +#ifdef DUAL_SIMPLEX_INSTANTIATE_DOUBLE + +template class bound_flipping_ratio_test_t; + +#endif + +} // namespace cuopt::linear_programming::dual_simplex diff --git a/cpp/src/dual_simplex/bound_flipping_ratio_test.hpp b/cpp/src/dual_simplex/bound_flipping_ratio_test.hpp new file mode 100644 index 0000000000..c949302562 --- /dev/null +++ b/cpp/src/dual_simplex/bound_flipping_ratio_test.hpp @@ -0,0 +1,90 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include + +#include + +namespace cuopt::linear_programming::dual_simplex { + +template +class bound_flipping_ratio_test_t { + public: + bound_flipping_ratio_test_t(const simplex_solver_settings_t& settings, + f_t start_time, + i_t m, + i_t n, + f_t initial_slope, + const std::vector& lower, + const std::vector& upper, + const std::vector& vstatus, + const std::vector& nonbasic_list, + std::vector& z, + std::vector& delta_z) + : settings_(settings), + start_time_(start_time), + m_(m), + n_(n), + slope_(initial_slope), + lower_(lower), + upper_(upper), + vstatus_(vstatus), + nonbasic_list_(nonbasic_list), + z_(z), + delta_z_(delta_z) + { + } + + i_t compute_step_length(f_t& step_length, i_t& nonbasic_entering); + + private: + i_t compute_breakpoints(std::vector& indices, std::vector& ratios); + i_t single_pass(i_t start, + i_t end, + const std::vector& indices, + const std::vector& ratios, + f_t& slope, + f_t& step_length, + i_t& nonbasic_entering, + i_t& enetering_index); + void heap_passes(const std::vector& current_indicies, + const std::vector& current_ratios, + i_t num_breakpoints, + f_t& slope, + f_t& step_lenght, + i_t& nonbasic_entering, + i_t& entering_index); + + const std::vector& lower_; + const std::vector& upper_; + const std::vector& nonbasic_list_; + const std::vector& vstatus_; + const std::vector& z_; + const std::vector& delta_z_; + + const simplex_solver_settings_t& settings_; + + f_t start_time_; + f_t slope_; + + i_t n_; + i_t m_; +}; + +} // namespace cuopt::linear_programming::dual_simplex diff --git a/cpp/src/dual_simplex/phase2.cpp b/cpp/src/dual_simplex/phase2.cpp index cbfc66a222..7b12f8d10a 100644 --- a/cpp/src/dual_simplex/phase2.cpp +++ b/cpp/src/dual_simplex/phase2.cpp @@ -17,6 +17,7 @@ #include #include +#include #include #include #include @@ -428,7 +429,7 @@ i_t bound_flipping_ratio_test(const lp_problem_t& lp, const f_t delta_slope = std::abs(delta_z[j]) * interval; #ifdef BOUND_FLIP_DEBUG if (slope - delta_slope > 0) { - log.printf( + settings.log.printf( "Bound flip %d slope change %e prev slope %e slope %e. curr step " "length %e\n", j, @@ -1234,6 +1235,7 @@ dual::status_t dual_phase2(i_t phase, std::vector delta_z(n); std::vector delta_x(n); const i_t start_iter = iter; + f_t bfrt_time = 0; while (iter < iter_limit) { // Pricing i_t direction; @@ -1356,6 +1358,41 @@ dual::status_t dual_phase2(i_t phase, step_length, nonbasic_entering_index); } else if (bound_flip_ratio) { + f_t bfrt_start = tic(); +#if 0 + f_t slope = direction == 1 ? (lp.lower[leaving_index] - x[leaving_index]) + : (x[leaving_index] - lp.upper[leaving_index]); + bound_flipping_ratio_test_t bfrt(settings, start_time, m, n, slope, lp.lower, lp.upper, vstatus, nonbasic_list, z, delta_z); + entering_index = bfrt.compute_step_length(step_length, nonbasic_entering_index); + if constexpr (0) + { + f_t shadow_step_length; + i_t shadow_nonbasic_entering_index; + i_t shadow_entering_index = phase2::bound_flipping_ratio_test(lp, + settings, + start_time, + vstatus, + nonbasic_list, + x, + z, + delta_z, + direction, + leaving_index, + shadow_step_length, + shadow_nonbasic_entering_index); + if (shadow_nonbasic_entering_index != nonbasic_entering_index) + { + settings.log.printf( + "step diff %e shadow step length %e step length %e shadow nonbasic entering %d " + "nonbasic entering %d\n", + step_length - shadow_step_length, + shadow_step_length, + step_length, + shadow_nonbasic_entering_index, + nonbasic_entering_index); + } + } +#else entering_index = phase2::bound_flipping_ratio_test(lp, settings, start_time, @@ -1368,6 +1405,8 @@ dual::status_t dual_phase2(i_t phase, leaving_index, step_length, nonbasic_entering_index); +#endif + bfrt_time += toc(bfrt_start); } else { entering_index = phase2::phase2_ratio_test( lp, settings, vstatus, nonbasic_list, z, delta_z, step_length, nonbasic_entering_index); @@ -1603,11 +1642,12 @@ dual::status_t dual_phase2(i_t phase, if (phase == 1 && iter == 1) { settings.log.printf(" Iter Objective Primal Infeas Perturb Time\n"); } - settings.log.printf("%5d %+.8e %.8e %.2e %.2f\n", + settings.log.printf("%5d %+.16e %.8e %.2e %.2e %.2f\n", iter, compute_user_objective(lp, obj), primal_infeasibility, sum_perturb, + step_length, now); } @@ -1624,6 +1664,7 @@ dual::status_t dual_phase2(i_t phase, } } if (iter >= iter_limit) { status = dual::status_t::ITERATION_LIMIT; } + settings.log.printf("BFRT time %e\n", bfrt_time); return status; } From e5ae74524bdf73db9616bf56401d12435bdb62c3 Mon Sep 17 00:00:00 2001 From: Christopher Maes Date: Thu, 12 Jun 2025 13:22:51 -0700 Subject: [PATCH 02/28] Don't keep looking for small pivots. Can turn infeasible into numerical error --- cpp/src/dual_simplex/bound_flipping_ratio_test.cpp | 5 +---- cpp/src/dual_simplex/phase2.cpp | 2 +- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/cpp/src/dual_simplex/bound_flipping_ratio_test.cpp b/cpp/src/dual_simplex/bound_flipping_ratio_test.cpp index 5cfa35c748..96f266ab25 100644 --- a/cpp/src/dual_simplex/bound_flipping_ratio_test.cpp +++ b/cpp/src/dual_simplex/bound_flipping_ratio_test.cpp @@ -34,7 +34,6 @@ i_t bound_flipping_ratio_test_t::compute_breakpoints(std::vector& const f_t dual_tol = settings_.dual_tol / 10; i_t idx = 0; - while (idx == 0 && pivot_tol > 1e-12) { for (i_t k = 0; k < n - m; ++k) { const i_t j = nonbasic_list_[k]; if (vstatus_[j] == variable_status_t::NONBASIC_FIXED) { continue; } @@ -51,8 +50,6 @@ i_t bound_flipping_ratio_test_t::compute_breakpoints(std::vector& idx++; } } - pivot_tol /= 10; - } return idx; } @@ -198,7 +195,7 @@ void bound_flipping_ratio_test_t::heap_passes(const std::vector& auto compare = [zero_tol, ¤t_ratios, ¤t_indicies, &delta_z, &nonbasic_list]( const i_t& a, const i_t& b) { return (current_ratios[a] > current_ratios[b]) || - (std::abs(current_ratios[a] - current_ratios[b]) < zero_tol && + (current_ratios[b] - current_ratios[a] < zero_tol && std::abs(delta_z[nonbasic_list[current_indicies[a]]]) > std::abs(delta_z[nonbasic_list[current_indicies[b]]])); }; diff --git a/cpp/src/dual_simplex/phase2.cpp b/cpp/src/dual_simplex/phase2.cpp index 7b12f8d10a..e48a376590 100644 --- a/cpp/src/dual_simplex/phase2.cpp +++ b/cpp/src/dual_simplex/phase2.cpp @@ -1359,7 +1359,7 @@ dual::status_t dual_phase2(i_t phase, nonbasic_entering_index); } else if (bound_flip_ratio) { f_t bfrt_start = tic(); -#if 0 +#if 1 f_t slope = direction == 1 ? (lp.lower[leaving_index] - x[leaving_index]) : (x[leaving_index] - lp.upper[leaving_index]); bound_flipping_ratio_test_t bfrt(settings, start_time, m, n, slope, lp.lower, lp.upper, vstatus, nonbasic_list, z, delta_z); From 80594aef36442665d2e7e9e567feabecd5624e39 Mon Sep 17 00:00:00 2001 From: Christopher Maes Date: Wed, 18 Jun 2025 13:37:20 -0700 Subject: [PATCH 03/28] First stab at hypersparse B and B^T solve --- cpp/src/dual_simplex/basis_updates.cpp | 515 +++++++++++++++++- cpp/src/dual_simplex/basis_updates.hpp | 53 +- .../bound_flipping_ratio_test.cpp | 17 +- cpp/src/dual_simplex/phase2.cpp | 357 ++++++++++-- cpp/src/dual_simplex/sparse_matrix.hpp | 185 +++++++ cpp/src/dual_simplex/triangle_solve.cpp | 15 +- 6 files changed, 1088 insertions(+), 54 deletions(-) diff --git a/cpp/src/dual_simplex/basis_updates.cpp b/cpp/src/dual_simplex/basis_updates.cpp index 78737c1675..1412061808 100644 --- a/cpp/src/dual_simplex/basis_updates.cpp +++ b/cpp/src/dual_simplex/basis_updates.cpp @@ -30,6 +30,14 @@ i_t basis_update_t::b_solve(const std::vector& rhs, std::vector +i_t basis_update_t::b_solve(const sparse_vector_t& rhs, + sparse_vector_t& solution) const +{ + sparse_vector_t Lsol(rhs.n, 0); + return b_solve(rhs, solution, Lsol); +} + template i_t basis_update_t::b_solve(const std::vector& rhs, std::vector& solution, @@ -55,6 +63,33 @@ i_t basis_update_t::b_solve(const std::vector& rhs, return 0; } +template +i_t basis_update_t::b_solve(const sparse_vector_t& rhs, + sparse_vector_t& solution, + sparse_vector_t& Lsol) const +{ + const i_t m = L0_.m; + assert(row_permutation_.size() == m); + assert(rhs.n == m); + assert(solution.n == m); + assert(Lsol.n == m); + + // P*B = L*U + // B*x = b + // P*B*x = P*b = b' + solution = rhs; + solution.inverse_permute_vector(inverse_row_permutation_); + + // L*U*x = b' + // Solve for v such that L*v = b' + l_solve(solution); + Lsol = solution; + + // Solve for x such that U*x = v + u_solve(solution); + return 0; +} + template i_t basis_update_t::b_transpose_solve(const std::vector& rhs, std::vector& solution) const @@ -87,6 +122,114 @@ i_t basis_update_t::b_transpose_solve(const std::vector& rhs, return 0; } +template +i_t basis_update_t::b_transpose_solve(const sparse_vector_t& rhs, + sparse_vector_t& solution) const +{ + // Observe that + // P*B = L*U + // B'*P' = U'*L' + // We want to solve + // B'*y = c + // Let y = P'*w + // B'*y = B'*P'*w = U'*L'*w = c + // 1. Solve U'*r = c for r + // 2. Solve L'*w = r for w + // 3. Compute y = P'*w + + const i_t m = L0_.m; + assert(rhs.n == m); + assert(solution.n == m); + + // Solve for r such that U'*r = c + // Actually Q*U'*Q'*r = c + sparse_vector_t r = rhs; + u_transpose_solve(r); + +#ifdef CHECK_U_TRANSPOSE_SOLVE + std::vector residual; + rhs.to_dense(residual); + std::vector r_dense; + r.to_dense(r_dense); + std::vector product(m); + // Q * U' * Q' * r_dense - c + + std::vector r_dense_permuted(m); + inverse_permute_vector(col_permutation_, r_dense, r_dense_permuted); + + // product = U' * Q' * r_dense + matrix_transpose_vector_multiply(U_, 1.0, r_dense_permuted, 0.0, product); + std::vector product_permuted(m); + permute_vector(col_permutation_, product, product_permuted); + // residual = product_permuted - c + for (i_t k = 0; k < m; ++k) { + residual[k] -= product_permuted[k]; + } + + const f_t Ut_error = vector_norm_inf(residual); + if (Ut_error > 1e-6) { + printf("|| U' * r - c || %e\n", Ut_error); + for (i_t k = 0; k < m; ++k) { + if (std::abs(residual[k]) > 1e-6) { + printf("%d residual %e\n", k, residual[k]); + } + } + printf("rhs nz %d\n", rhs.i.size()); + } +#endif + + // Solve for w such that L'*w = r + l_transpose_solve(r); + + + // y = P'*w + r.inverse_permute_vector(row_permutation_, solution); + +#ifdef CHECK_PERMUTATION + std::vector r_dense2; + r.to_dense(r_dense2); + std::vector solution_dense_permuted(m); + permute_vector(inverse_row_permutation_, r_dense2, solution_dense_permuted); + std::vector solution_dense; + solution.to_dense(solution_dense); + bool found_error = false; + for (i_t k = 0; k < m; ++k) { + if (std::abs(solution_dense[k] - solution_dense_permuted[k]) > 1e-6) { + printf("B transpose inverse permutation error %d %e %e\n", k, solution_dense[k], solution_dense_permuted[k]); + found_error = true; + } + } + if (found_error) { + for (i_t k = 0; k < m; ++k) { + printf("%d (sparse -> permuted -> dense) %e (sparse -> dense -> permuted)%e\n", k, solution_dense[k], solution_dense_permuted[k]); + } + for (i_t k = 0; k < solution.i.size(); ++k) + { + printf("%d solution sparse %d %e\n", k, solution.i[k], solution.x[k]); + } + for (i_t k = 0; k < m; ++k) { + if (solution_dense[k] != 0.0) { + printf("%d solution dense %e\n", k, solution_dense[k]); + } + } + for (i_t k = 0; k < m; ++k) { + printf("inv permutation %d %d\n", k, inverse_row_permutation_[k]); + } + for (i_t k = 0; k < m; ++k) { + if (r_dense2[k] != 0.0) { + printf("%d r dense %e\n", k, r_dense2[k]); + } + } + for (i_t k = 0; k < m; ++k) { + if (solution_dense_permuted[k] != 0.0) { + printf("%d solution dense permuted %e\n", k, solution_dense_permuted[k]); + } + } + } +#endif + return 0; +} + // Solve for x such that L*x = b template i_t basis_update_t::l_solve(std::vector& rhs) const @@ -101,7 +244,6 @@ i_t basis_update_t::l_solve(std::vector& rhs) const #endif // First solve // L0*x0 = b - // TODO: Handle a sparse rhs dual_simplex::lower_triangular_solve(L0_, rhs); #ifdef CHECK_LOWER_SOLVE { @@ -129,6 +271,100 @@ i_t basis_update_t::l_solve(std::vector& rhs) const return 0; } +template +i_t basis_update_t::l_solve(sparse_vector_t& rhs) const +{ + // L = L0 * R1^{-1} * R2^{-1} * ... * Rk^{-1} + // + // where Ri = I + e_r d^T + + // First solve + // L0*x0 = b + csc_matrix_t B(1, 1, 1); + rhs.to_csc(B); + const i_t m = L0_.m; + i_t top = sparse_triangle_solve(B, 0, std::nullopt, xi_workspace_, L0_, x_workspace_.data()); + solve_to_sparse_vector(top, rhs); // Uses xi_workspace_ and x_workspace_ to fill rhs + +#ifdef CHECK_L_SOLVE + std::vector residual(m, 0.0); + const i_t col_start = B.col_start[0]; + const i_t col_end = B.col_start[1]; + for (i_t p = col_start; p < col_end; ++p) { + residual[B.i[p]] = B.x[p]; + } + + std::vector x0; + rhs.to_dense(x0); + matrix_vector_multiply(L0_, 1.0, x0, -1.0, residual); + const f_t L0_solve_error = vector_norm_inf(residual); + if (L0_solve_error > 1e-10) { + printf("L0 solve error %e\n", L0_solve_error); + } +#endif + + + + + // then solve R1^{-1}*x1 = x0 -> x1 = R1*x0 + // then solve R2^{-1}*x2 = x1 -> x2 = R2*x1 + // until we get to + // Rk^{-1}*x = xk-1 -> x = Rk*xk-1 + // Rk = (I + e_rk dk^T) + // x = Rk*xk-1 = xk-1 + erk (dk^T xk-1) + +#ifdef CHECK_MULTIPLY + std::vector multiply; + rhs.to_dense(multiply); +#endif + + i_t nz = scatter_into_workspace(rhs); + + for (i_t k = 0; k < num_updates_; ++k) { + const i_t r = pivot_indices_[k]; + f_t dot = 0.0; + const i_t col_start = S_.col_start[k]; + const i_t col_end = S_.col_start[k + 1]; + for (i_t p = col_start; p < col_end; ++p) { + if (xi_workspace_[S_.i[p]]) { + dot += S_.x[p] * x_workspace_[S_.i[p]]; + } + } + if (!xi_workspace_[r]) { + xi_workspace_[r] = 1; + xi_workspace_[m + nz] = r; + nz++; + } + x_workspace_[r] += dot; + +#ifdef CHECK_MULTIPLY + f_t dot2 = 0.0; + for (i_t p = col_start; p < col_end; ++p) { + dot2 += S_.x[p] * multiply[S_.i[p]]; + } + multiply[r] += dot2; +#endif + } + + // Gather the solution into rhs + gather_into_sparse_vector(nz, rhs); + + rhs.sort(); + +#ifdef CHECK_MULTIPLY + std::vector rhs_dense; + rhs.to_dense(rhs_dense); + for (i_t k = 0; k < m; ++k) { + if (std::abs(rhs_dense[k] - multiply[k]) > 1e-10) { + printf("l_solve rhs dense/multiply error %d %e %e\n", k, rhs_dense[k], multiply[k]); + } + } +#endif + + return 0; +} + + // Solve for y such that L'*y = c template i_t basis_update_t::l_transpose_solve(std::vector& rhs) const @@ -153,6 +389,157 @@ i_t basis_update_t::l_transpose_solve(std::vector& rhs) const return 0; } +template +i_t basis_update_t::scatter_into_workspace(const sparse_vector_t& in) const +{ + const i_t m = L0_.m; + // scatter pattern into xi_workspace_ + i_t nz = in.i.size(); + for (i_t k = 0; k < nz; ++k) { + const i_t i = in.i[k]; + xi_workspace_[i] = 1; + xi_workspace_[m + k] = i; + } + // scatter values into x_workspace_ + for (i_t k = 0; k < nz; ++k) { + x_workspace_[in.i[k]] = in.x[k]; + } + return nz; +} + +template +void basis_update_t::gather_into_sparse_vector(i_t nz, sparse_vector_t& out) const +{ + const i_t m = L0_.m; + out.i.clear(); + out.x.clear(); + out.i.resize(nz); + out.x.resize(nz); + for (i_t k = 0; k < nz; ++k) { + const i_t i = xi_workspace_[m + k]; + out.i[k] = i; + out.x[k] = x_workspace_[i]; + xi_workspace_[m + k] = 0; + xi_workspace_[i] = 0; + x_workspace_[i] = 0.0; + } +} + +template +void basis_update_t::solve_to_sparse_vector(i_t top, sparse_vector_t& out) const +{ + const i_t m = L0_.m; + out.n = m; + out.i.clear(); + out.x.clear(); + const i_t nz = m - top; + out.x.resize(nz); + out.i.resize(nz); + i_t k = 0; + for (i_t p = top; p < m; ++p) { + const i_t i = xi_workspace_[p]; + out.i[k] = i; + out.x[k] = x_workspace_[i]; + x_workspace_[i] = 0.0; + xi_workspace_[p] = 0; + k++; + } +} + +template +i_t basis_update_t::l_transpose_solve(sparse_vector_t& rhs) const +{ + // L = L0*R1^{-1}* ... * Rk^{-1} + // L' = Rk^{-T} * Rk-1^{-T} * ... * R2^{-T} * R1^{-T} * L0^T + // L'*y = c + // Rk^{-T} * Rk-1^{-T} * ... * R2^{-T} * R1^{-T} * L0^T * y = c + // L0^T * y = cprime = R1^1 * ... * Rk^T * c + const i_t m = L0_.m; + + i_t nz = 0; + +#ifdef CHECK_UPDATES + std::vector multiply; + rhs.to_dense(multiply); + for (i_t k = 0; k < 2*m; ++k) { + if (xi_workspace_[k]) { + printf("xi workspace %d %d\n", k, xi_workspace_[k]); + } + } +#endif + + if (num_updates_ > 0) { + nz = scatter_into_workspace(rhs); + } + + for (i_t k = num_updates_ - 1; k >= 0; --k) { + const i_t r = pivot_indices_[k]; + assert(r < m); + const i_t col_start = S_.col_start[k]; + const i_t col_end = S_.col_start[k + 1]; + if (xi_workspace_[r]) { + for (i_t p = col_start; p < col_end; ++p) { + // rhs.x[S_.i[p]] += rhs.x[r] * S_.x[p]; + if (!xi_workspace_[S_.i[p]]) { + xi_workspace_[S_.i[p]] = 1; + xi_workspace_[m + nz] = S_.i[p]; + nz++; + } + x_workspace_[S_.i[p]] += x_workspace_[r] * S_.x[p]; + } + } +#ifdef CHECK_UPDATES + for (i_t p = col_start; p < col_end; ++p) { + multiply[S_.i[p]] += multiply[r] * S_.x[p]; + } +#endif + } + + // Gather into rhs + if (num_updates_ > 0) { + gather_into_sparse_vector(nz, rhs); + + rhs.sort(); + +#ifdef CHECK_UPDATES + std::vector rhs_dense; + rhs.to_dense(rhs_dense); + for (i_t k = 0; k < m; ++k) + { + if (std::abs(rhs_dense[k] - multiply[k]) > 1e-6) { + printf("rhs dense/multiply error %d %e %e\n", k, rhs_dense[k], multiply[k]); + } + } +#endif + } + + // L0^T * y = cprime + csc_matrix_t Cprime(1, 1, 1); + rhs.to_csc(Cprime); + +#ifdef CHECK_LOWER_TRANSPOSE_SOLVE + std::vector cprime_dense; + rhs.to_dense(cprime_dense); +#endif + + i_t top = sparse_triangle_solve(Cprime, 0, std::nullopt, xi_workspace_, L0_transpose_, x_workspace_.data()); + solve_to_sparse_vector(top, rhs); // Uses xi_workspace_ and x_workspace_ to fill rhs + +#ifdef CHECK_LOWER_TRANSPOSE_SOLVE + std::vector y_dense; + rhs.to_dense(y_dense); + + std::vector residual = cprime_dense; + matrix_transpose_vector_multiply(L0_, 1.0, y_dense, -1.0, residual); + const f_t L0_solve_error = vector_norm_inf(residual); + if (L0_solve_error > 1e-6) { + printf("L0 solve error %e\n", L0_solve_error); + } + +#endif + return 0; +} + template f_t basis_update_t::update_lower(const std::vector& sind, const std::vector& sval, @@ -205,6 +592,29 @@ i_t basis_update_t::u_solve(std::vector& x) const return 0; } +template +i_t basis_update_t::u_solve(sparse_vector_t& rhs) const +{ + // Solve Q*U*Q'*x = b + // Multiplying by Q' we have U*Q'*x = Q'*b = bprime + // Let y = Q'*x so U*y = bprime + // 1. Compute bprime = Q'*b + // 2. Solve for y such that U*y = bprime + // 3. Compute Q*y = x + const i_t m = U_.m; + sparse_vector_t bprime(m, 0); + rhs.inverse_permute_vector(col_permutation_, bprime); + + csc_matrix_t Bprime(1, 1, 1); + bprime.to_csc(Bprime); + i_t top = sparse_triangle_solve(Bprime, 0, std::nullopt, xi_workspace_, U_, x_workspace_.data()); + solve_to_sparse_vector(top, rhs); // Uses xi_workspace_ and x_workspace_ to fill rhs + + rhs.inverse_permute_vector(inverse_col_permutation_); + + return 0; +} + // x = U'(q,q)\b template i_t basis_update_t::u_transpose_solve(std::vector& x) const @@ -223,6 +633,104 @@ i_t basis_update_t::u_transpose_solve(std::vector& x) const return 0; } +template +i_t basis_update_t::u_transpose_solve(sparse_vector_t& rhs) const +{ + // Solve Q*U'*Q'*x = b + // Multiplying by Q' we have U'*Q'*x = Q'*b = bprime + // Let y = Q'*x so U'*y = bprime + // 1. Compute bprime = Q'*b + // 2. Solve for y such that U'*y = bprime + // 3. Compute Q*y = x + const i_t m = U_.m; + sparse_vector_t bprime(1, 0); +#ifdef CHECK_PERMUTATION + std::vector rhs_dense(m); + rhs.to_dense(rhs_dense); +#endif + rhs.inverse_permute_vector(col_permutation_, bprime); +#ifdef CHECK_PERMUTATION + std::vector bprime_dense; + bprime.to_dense(bprime_dense); + std::vector rhs_dense_permuted(m); + inverse_permute_vector(col_permutation_, rhs_dense, rhs_dense_permuted); + for (i_t k = 0; k < m; ++k) { + if (std::abs(bprime_dense[k] - rhs_dense_permuted[k]) > 1e-6) { + printf("u_transpose inverse permutation error %d %e %e\n", k, bprime_dense[k], rhs_dense_permuted[k]); + } + } +#endif + +#ifdef CHECK_WORKSPACE + for (i_t k = 0; k < 2*m; ++k) { + if (xi_workspace_[k]) { + printf("before Utranspose m %d solve xi workspace %d %d\n", m, k, xi_workspace_[k]); + } + } +#endif + + + // U'*y = bprime + csc_matrix_t Bprime(1, 1, 1); + bprime.to_csc(Bprime); + i_t top = sparse_triangle_solve(Bprime, 0, std::nullopt, xi_workspace_, U_transpose_, x_workspace_.data()); + solve_to_sparse_vector(top, rhs); // Uses xi_workspace_ and x_workspace_ to fill rhs + +#ifdef CHECK_WORKSPACE + for (i_t k = 0; k < 2*m; ++k) { + if (xi_workspace_[k]) { + printf("after Utranspose m %d top %d solve xi workspace %d %d\n", m, top, k, xi_workspace_[k]); + } + } +#endif + +#ifdef CHECK_PERMUTATION + std::vector rhs_dense2; + rhs.to_dense(rhs_dense2); +#endif + + // Q*y = x + rhs.inverse_permute_vector(inverse_col_permutation_); +#ifdef CHECK_PERMUTATION + rhs.to_dense(rhs_dense_permuted); + std::vector rhs_dense_permuted2(m); + permute_vector(col_permutation_, rhs_dense2, rhs_dense_permuted2); + bool found_error = false; + for (i_t k = 0; k < m; ++k) { + if (std::abs(rhs_dense_permuted[k] - rhs_dense_permuted2[k]) > 1e-6) { + printf("u_transpose2 permutation error %d %e %e\n", k, rhs_dense_permuted[k], rhs_dense_permuted2[k]); + found_error = true; + } + } + if (found_error) { + for (i_t k = 0; k < m; ++k) { + printf("%d (sparse -> permuted -> dense) %e (sparse -> dense -> permuted)%e\n", k, rhs_dense_permuted[k], rhs_dense_permuted2[k]); + } + for (i_t k = 0; k < rhs.i.size(); ++k) { + printf("%d rhs sparse %d %e\n", k, rhs.i[k], rhs.x[k]); + } + for (i_t k = 0; k < m; ++k) { + if (rhs_dense_permuted[k] != 0.0) { + printf("%d rhs dense permuted %e\n", k, rhs_dense_permuted[k]); + } + } + for (i_t k = 0; k < m; ++k) { + if (rhs_dense2[k] != 0.0) { + printf("%d rhs dense2 %e\n", k, rhs_dense2[k]); + } + } + printf("col permutation %d rhs dense 2 %d rhs dense permuted %d\n", col_permutation_.size(), rhs_dense2.size(), rhs_dense_permuted.size()); + for (i_t k = 0; k < col_permutation_.size(); ++k) { + printf("%d col permutation %d\n", k, col_permutation_[k]); + } + for (i_t k = 0; k < m; ++k) { + printf("%d col permutation inverse %d\n", k, inverse_col_permutation_[k]); + } + } +#endif + return 0; +} + template i_t basis_update_t::index_map(i_t r) const { @@ -334,6 +842,7 @@ i_t basis_update_t::update_upper(const std::vector& ind, U_.col_start[n] = new_nz; // Check to ensure that U remains upper triangular +#ifdef CHECK_UPPER_TRIANGULAR for (i_t k = 0; k < n; ++k) { const i_t col_start = U_.col_start[k]; const i_t col_end = U_.col_start[k + 1]; @@ -341,6 +850,10 @@ i_t basis_update_t::update_upper(const std::vector& ind, assert(U_.i[p] <= k); } } +#endif + + // Update U transpose + U_.transpose(U_transpose_); return 0; } diff --git a/cpp/src/dual_simplex/basis_updates.hpp b/cpp/src/dual_simplex/basis_updates.hpp index 73bec6a5d2..783edc38e2 100644 --- a/cpp/src/dual_simplex/basis_updates.hpp +++ b/cpp/src/dual_simplex/basis_updates.hpp @@ -32,11 +32,18 @@ class basis_update_t { : L0_(Linit), U_(Uinit), row_permutation_(p), + inverse_row_permutation_(p.size()), S_(Linit.m, 1, 0), col_permutation_(Linit.m), - inverse_col_permutation_(Linit.m) + inverse_col_permutation_(Linit.m), + xi_workspace_(2*Linit.m, 0), + x_workspace_(Linit.m, 0.0), + U_transpose_(1, 1, 1), + L0_transpose_(1, 1, 1) { + inverse_permutation(row_permutation_, inverse_row_permutation_); clear(); + compute_transposes(); } i_t reset(const csc_matrix_t& Linit, @@ -47,34 +54,59 @@ class basis_update_t { U_ = Uinit; assert(p.size() == Linit.m); row_permutation_ = p; + inverse_permutation(row_permutation_, inverse_row_permutation_); clear(); + compute_transposes(); return 0; } // Solves for x such that B*x = b, where B is the basis matrix i_t b_solve(const std::vector& rhs, std::vector& solution) const; + // Solves for x such that B*x = b, where B is the basis matrix + i_t b_solve(const sparse_vector_t& rhs, sparse_vector_t& solution) const; + // Solves for x such that B*x = b, where B is the basis matrix, also returns L*v = P*b // This is useful for avoiding an extra solve with the update i_t b_solve(const std::vector& rhs, std::vector& solution, std::vector& Lsol) const; + // Solves for x such that B*x = b, where B is the basis matrix, also returns L*v = P*b + // This is useful for avoiding an extra solve with the update + i_t b_solve(const sparse_vector_t& rhs, + sparse_vector_t& solution, + sparse_vector_t& Lsol) const; + // Solves for y such that B'*y = c, where B is the basis matrix i_t b_transpose_solve(const std::vector& rhs, std::vector& solution) const; + i_t b_transpose_solve(const sparse_vector_t& rhs, sparse_vector_t& solution) const; + // Solve for x such that L*x = y i_t l_solve(std::vector& rhs) const; + // Solve for x such that L*x = y + i_t l_solve(sparse_vector_t& rhs) const; + // Solve for x such that L'*x = y i_t l_transpose_solve(std::vector& rhs) const; + // Solve for x such that L'*x = y + i_t l_transpose_solve(sparse_vector_t& rhs) const; + // Solve for x such that U*x = y i_t u_solve(std::vector& rhs) const; + // Solve for x such that U*x = y + i_t u_solve(sparse_vector_t& rhs) const; + // Solve for x such that U'*x = y i_t u_transpose_solve(std::vector& rhs) const; + // Solve for x such that U'*x = y + i_t u_transpose_solve(sparse_vector_t& rhs) const; + // Replace the column B(:, leaving_index) with the vector abar. Pass in utilde such that L*utilde // = abar i_t update(std::vector& utilde, i_t leaving_index); @@ -85,6 +117,12 @@ class basis_update_t { const std::vector& row_permutation() const { return row_permutation_; } + void compute_transposes() + { + L0_.transpose(L0_transpose_); + U_.transpose(U_transpose_); + } + private: void clear() { @@ -110,14 +148,23 @@ class basis_update_t { csc_matrix_t& out, i_t out_col) const; + void solve_to_sparse_vector(i_t top, sparse_vector_t& out) const; + i_t scatter_into_workspace(const sparse_vector_t& in) const; + void gather_into_sparse_vector(i_t nz, sparse_vector_t& out) const; + i_t num_updates_; // Number of rank-1 updates to L0 - csc_matrix_t L0_; // Sparse lower triangular matrix from initial factorization - csc_matrix_t U_; // Sparse upper triangular matrix. Is modified by updates + mutable csc_matrix_t L0_; // Sparse lower triangular matrix from initial factorization + mutable csc_matrix_t U_; // Sparse upper triangular matrix. Is modified by updates std::vector row_permutation_; // Row permutation from initial factorization L*U = P*B + std::vector inverse_row_permutation_; // Inverse row permutation from initial factorization L*U = P*B std::vector pivot_indices_; // indicies for rank-1 updates to L csc_matrix_t S_; // stores the pivot elements for rank-1 updates to L std::vector col_permutation_; // symmetric permuation q used in U(q, q) represents Q std::vector inverse_col_permutation_; // inverse permutation represents Q' + mutable std::vector xi_workspace_; + mutable std::vector x_workspace_; + mutable csc_matrix_t U_transpose_; // Needed for sparse solves + mutable csc_matrix_t L0_transpose_; // Needed for sparse solves }; } // namespace cuopt::linear_programming::dual_simplex diff --git a/cpp/src/dual_simplex/bound_flipping_ratio_test.cpp b/cpp/src/dual_simplex/bound_flipping_ratio_test.cpp index 96f266ab25..5f0ccf3b27 100644 --- a/cpp/src/dual_simplex/bound_flipping_ratio_test.cpp +++ b/cpp/src/dual_simplex/bound_flipping_ratio_test.cpp @@ -34,6 +34,7 @@ i_t bound_flipping_ratio_test_t::compute_breakpoints(std::vector& const f_t dual_tol = settings_.dual_tol / 10; i_t idx = 0; + while (idx == 0 && pivot_tol >= 1e-12) { for (i_t k = 0; k < n - m; ++k) { const i_t j = nonbasic_list_[k]; if (vstatus_[j] == variable_status_t::NONBASIC_FIXED) { continue; } @@ -50,6 +51,8 @@ i_t bound_flipping_ratio_test_t::compute_breakpoints(std::vector& idx++; } } + pivot_tol /= 10; + } return idx; } @@ -131,12 +134,13 @@ i_t bound_flipping_ratio_test_t::compute_step_length(f_t& step_length, 0, num_breakpoints, indicies, ratios, slope, step_length, nonbasic_entering, entering_index); bool continue_search = k_idx >= 0 && num_breakpoints > 1 && slope > 0.0; if (!continue_search) { - if constexpr (verbose) { + if constexpr (0) { settings_.log.printf( - "BFRT stopping. No bound flips. Step length %e Nonbasic entering %d Entering %d.\n", + "BFRT stopping. No bound flips. Step length %e Nonbasic entering %d Entering %d pivot %e\n", step_length, nonbasic_entering, - entering_index); + entering_index, + std::abs(delta_z_[entering_index])); } return entering_index; } @@ -157,11 +161,12 @@ i_t bound_flipping_ratio_test_t::compute_step_length(f_t& step_length, heap_passes( indicies, ratios, num_breakpoints - 1, slope, step_length, nonbasic_entering, entering_index); - if constexpr (verbose) { - settings_.log.printf("BFRT step length %e entering index %d non basic entering %d\n", + if constexpr (0) { + settings_.log.printf("BFRT step length %e entering index %d non basic entering %d pivot %e\n", step_length, entering_index, - nonbasic_entering); + nonbasic_entering, + std::abs(delta_z_[entering_index])); } return entering_index; } diff --git a/cpp/src/dual_simplex/phase2.cpp b/cpp/src/dual_simplex/phase2.cpp index e48a376590..1d1398b98b 100644 --- a/cpp/src/dual_simplex/phase2.cpp +++ b/cpp/src/dual_simplex/phase2.cpp @@ -57,6 +57,84 @@ f_t l2_primal_residual(const lp_problem_t& lp, const lp_solution_t(primal_residual); } +template +void compute_reduced_cost_update(const lp_problem_t& lp, + const std::vector& basic_list, + const std::vector& nonbasic_list, + const std::vector& delta_y, + i_t leaving_index, + i_t direction, + std::vector& delta_z) +{ + const i_t m = lp.num_rows; + const i_t n = lp.num_cols; + + // delta_zB = sigma*ei + for (i_t k = 0; k < m; k++) { + const i_t j = basic_list[k]; + delta_z[j] = 0; + } + delta_z[leaving_index] = direction; + // delta_zN = -N'*delta_y + for (i_t k = 0; k < n - m; k++) { + const i_t j = nonbasic_list[k]; + // z_j <- -A(:, j)'*delta_y + const i_t col_start = lp.A.col_start[j]; + const i_t col_end = lp.A.col_start[j + 1]; + f_t dot = 0.0; + for (i_t p = col_start; p < col_end; ++p) { + dot += lp.A.x[p] * delta_y[lp.A.i[p]]; + } + delta_z[j] = -dot; + } +} + +template +void compute_reduced_cost_update(const lp_problem_t& lp, + const csc_matrix_t& Atranspose, + const std::vector& basic_list, + const std::vector& nonbasic_list, + const sparse_vector_t& delta_y, + i_t leaving_index, + i_t direction, + std::vector& nonbasic, + std::vector& delta_z) +{ + const i_t m = lp.num_rows; + const i_t n = lp.num_cols; + + // delta_zB = sigma*ei, O(m) + for (i_t k = 0; k < m; k++) { + const i_t j = basic_list[k]; + delta_z[j] = 0; + } + delta_z[leaving_index] = direction; + + + // Mark the nonbasic variables O(n - m) + for (i_t k = 0; k < n - m; k++) { + const i_t j = nonbasic_list[k]; + nonbasic[j] = 1; + } + + // delta_zN = - N'*delta_y + // = - sum_{k : delta_y[k] != 0} delta_y[k] * Atranspose(:, nonbasic_list[k]) + const i_t nz_dy = delta_y.i.size(); + for (i_t k = 0; k < nz_dy; k++) { + const i_t i = delta_y.i[k]; + const i_t row_start = Atranspose.col_start[i]; + const i_t row_end = Atranspose.col_start[i + 1]; + for (i_t p = row_start; p < row_end; ++p) { + const i_t j = Atranspose.i[p]; + if (nonbasic[j]) { + delta_z[j] -= delta_y.x[i] * Atranspose.x[p]; + } + } + } + +} + + template void compute_dual_solution_from_basis(const lp_problem_t& lp, basis_update_t& ft, @@ -471,6 +549,7 @@ i_t flip_bounds(const lp_problem_t& lp, std::vector& atilde) { f_t delta_obj = 0; + i_t num_flipped = 0; for (i_t j : nonbasic_list) { if (j == entering_index) { continue; } const bool bounded = @@ -490,6 +569,7 @@ i_t flip_bounds(const lp_problem_t& lp, settings.log.printf( "Flipping nonbasic %d from lo %e to up %e. z %e\n", j, lp.lower[j], lp.upper[j], z[j]); #endif + num_flipped++; } else if (vstatus[j] == variable_status_t::NONBASIC_UPPER && z[j] > dual_tol) { const f_t delta = lp.lower[j] - lp.upper[j]; scatter_dense(lp.A, j, -delta, atilde); @@ -500,30 +580,131 @@ i_t flip_bounds(const lp_problem_t& lp, settings.log.printf( "Flipping nonbasic %d from up %e to lo %e. z %e\n", j, lp.upper[j], lp.lower[j], z[j]); #endif + num_flipped++; } } - return 0; + return num_flipped; } template -i_t initialize_steepest_edge_norms(const simplex_solver_settings_t& settings, +i_t initialize_steepest_edge_norms(const lp_problem_t& lp, + const simplex_solver_settings_t& settings, const f_t start_time, const std::vector& basic_list, - const basis_update_t& ft, + basis_update_t& ft, std::vector& delta_y_steepest_edge) { // TODO: Skip this initialization when starting from a slack basis // Or skip individual columns corresponding to slack variables + const i_t m = basic_list.size(); + + // We want to compute B^T delta_y_i = -e_i + // If there is a column u of B^T such that B^T(:, u) = alpha * e_i than the + // solve delta_y_i = -1/alpha * e_u + // So we need to find columns of B^T (or rows of B) with only a single non-zero entry + f_t start_singleton_rows = tic(); + std::vector row_degree(m, 0); + std::vector mapping(m, -1); + std::vector coeff(m, 0.0); +#if 1 + for (i_t k = 0; k < m; ++k) { + const i_t j = basic_list[k]; + const i_t col_start = lp.A.col_start[j]; + const i_t col_end = lp.A.col_start[j + 1]; + for (i_t p = col_start; p < col_end; ++p) { + const i_t i = lp.A.i[p]; + row_degree[i]++; + // column j of A is column k of B + mapping[k] = i; + coeff[k] = lp.A.x[p]; + } + } + + csc_matrix_t B(m, m, 0); + form_b(lp.A, basic_list, B); + csc_matrix_t B_transpose(m, m, 0); + B.transpose(B_transpose); + + i_t num_singleton_rows = 0; + for (i_t i = 0; i < m; ++i) { + if (row_degree[i] == 1) { + num_singleton_rows++; + const i_t col_start = B_transpose.col_start[i]; + const i_t col_end = B_transpose.col_start[i + 1]; + if (col_end - col_start != 1) { + settings.log.printf("Singleton row %d has %d non-zero entries\n", i, col_end - col_start); + } + } + } + + + + settings.log.printf("Found %d singleton rows in %.2fs\n", num_singleton_rows, toc(start_singleton_rows)); + + ft.compute_transposes(); +#endif + f_t last_log = tic(); for (i_t k = 0; k < m; ++k) { - std::vector ei(m); - std::vector dy(m); + //std::vector ei(m); + sparse_vector_t sparse_ei(m, 1); + sparse_ei.x[0] = -1.0; + sparse_ei.i[0] = k; + //std::vector dy(m, 0.0); const i_t j = basic_list[k]; - ei[k] = -1.0; - ft.b_transpose_solve(ei, dy); - ei[k] = 0.0; - const f_t init = vector_norm2_squared(dy); + //ei[k] = -1.0; + f_t init = -1.0; + if (1 && row_degree[mapping[k]] == 1) { + const i_t u = mapping[k]; + //settings.log.printf("Singleton row %d u %d\n", k, u); + const f_t alpha = coeff[k]; + //dy[u] = -1.0 / alpha; + f_t my_init = 1.0 / (alpha * alpha); + init = my_init; +#ifdef CHECK_HYPERSPARSE + std::vector residual(m); + b_transpose_multiply(lp, basic_list, dy, residual); + float error = 0; + for (i_t h = 0; h < m; ++h) { + const f_t error_component = std::abs(residual[h] - ei[h]); + error += error_component; + if (error_component > 1e-12) { + settings.log.printf("Singleton row %d component %d error %e residual %e ei %e\n", k, h, error_component, residual[h], ei[h]); + } + } + if (error > 1e-12) { + settings.log.printf("Singleton row %d error %e\n", k, error); + } +#endif + +#ifdef CHECK_HYPERSPARSE + dy[u] = 0.0; + ft.b_transpose_solve(ei, dy); + init = vector_norm2_squared(dy); + if (init != my_init) { + settings.log.printf("Singleton row %d error %.16e init %.16e my_init %.16e\n", k, std::abs(init - my_init), init, my_init); + } +#endif + } else { + //ft.b_transpose_solve(ei, dy); + //init = vector_norm2_squared(dy); + sparse_vector_t sparse_dy(m, 0); + ft.b_transpose_solve(sparse_ei, sparse_dy); + //settings.log.printf("Steepest edge norm %d nz %d\n", k, sparse_dy.x.size()); + f_t my_init = 0.0; + for (i_t p = 0; p < sparse_dy.x.size(); ++p) { + my_init += sparse_dy.x[p] * sparse_dy.x[p]; + } +#if COMPARE_WITH_DENSE + if (std::abs(init - my_init) > 1e-12) { + settings.log.printf("Singleton row %d error %.16e init %.16e my_init %.16e\n", k, std::abs(init - my_init), init, my_init); + } +#endif + init = my_init; + } + //ei[k] = 0.0; + //init = vector_norm2_squared(dy); assert(init > 0); delta_y_steepest_edge[j] = init; @@ -554,16 +735,35 @@ i_t update_steepest_edge_norms(const simplex_solver_settings_t& settin std::vector& delta_y_steepest_edge) { i_t m = delta_y.size(); - std::vector v(m); - // B^T delta_y = - direction * e_basic_leaving_index - // We want B v = - B^{-T} e_basic_leaving_index - ft.b_solve(delta_y, v); - // if direction = -1 we need to scale v - if (direction == -1) { - for (i_t k = 0; k < m; ++k) { - v[k] *= -1; + + sparse_vector_t delta_y_sparse(delta_y); + const i_t delta_y_nz = delta_y_sparse.i.size(); + + std::vector v(m, 0.0); + if (delta_y_nz > 0.25 * m) + { + // B^T delta_y = - direction * e_basic_leaving_index + // We want B v = - B^{-T} e_basic_leaving_index + ft.b_solve(delta_y, v); + // if direction = -1 we need to scale v + if (direction == -1) { + for (i_t k = 0; k < m; ++k) { + v[k] *= -1; + } } } + else + { + sparse_vector_t v_sparse(m, 0); + ft.b_solve(delta_y_sparse, v_sparse); + if (direction == -1) { + for (i_t k = 0; k < v_sparse.i.size(); ++k) { + v_sparse.x[k] *= -1; + } + } + v_sparse.scatter(v); + } + const f_t dy_norm_squared = vector_norm2_squared(delta_y); const i_t leaving_index = basic_list[basic_leaving_index]; const f_t prev_dy_norm_squared = delta_y_steepest_edge[leaving_index]; @@ -624,11 +824,21 @@ i_t compute_steepest_edge_norm_entering(const simplex_solver_settings_t& steepest_edge_norms) { +#ifdef CHECK_HYPERSPARSE std::vector es(m); es[basic_leaving_index] = -1.0; std::vector delta_ys(m); ft.b_transpose_solve(es, delta_ys); steepest_edge_norms[entering_index] = vector_norm2_squared(delta_ys); +#endif + + sparse_vector_t es_sparse(m, 1); + es_sparse.i[0] = basic_leaving_index; + es_sparse.x[0] = -1.0; + sparse_vector_t delta_ys_sparse(m, 0); + ft.b_transpose_solve(es_sparse, delta_ys_sparse); + steepest_edge_norms[entering_index] = delta_ys_sparse.norm2_squared(); + #ifdef STEEPEST_EDGE_DEBUG settings.log.printf("Steepest edge norm %e for entering j %d at i %d\n", steepest_edge_norms[entering_index], @@ -1218,7 +1428,7 @@ dual::status_t dual_phase2(i_t phase, } } else { std::fill(delta_y_steepest_edge.begin(), delta_y_steepest_edge.end(), -1); - if (phase2::initialize_steepest_edge_norms( + if (phase2::initialize_steepest_edge_norms(lp, settings, start_time, basic_list, ft, delta_y_steepest_edge) == -1) { return dual::status_t::TIME_LIMIT; } @@ -1231,7 +1441,7 @@ dual::status_t dual_phase2(i_t phase, if (phase == 2) { settings.log.printf(" Iter Objective Primal Infeas Perturb Time\n"); } const i_t iter_limit = settings.iteration_limit; - std::vector delta_y(m); + std::vector delta_y(m, 0.0); std::vector delta_z(n); std::vector delta_x(n); const i_t start_iter = iter; @@ -1280,13 +1490,49 @@ dual::status_t dual_phase2(i_t phase, // BTran // TODO: replace with sparse solve. - std::vector ei(m, 0.0); std::vector delta_y(m); +#ifdef CHECK_HYPERSPARSE + std::vector ei(m, 0.0); ei[basic_leaving_index] = -direction; // BT*delta_y = -delta_zB = -sigma*ei ft.b_transpose_solve(ei, delta_y); +#endif + sparse_vector_t ei_sparse(m, 1); + ei_sparse.i[0] = basic_leaving_index; + ei_sparse.x[0] = -direction; + sparse_vector_t delta_y_sparse(m, 0); + ft.b_transpose_solve(ei_sparse, delta_y_sparse); +#ifdef CHECK_HYPERSPARSE + std::vector delta_y_sparse_vector_check(m); + delta_y_sparse.to_dense(delta_y_sparse_vector_check); + f_t error_check = 0.0; + for (i_t k = 0; k < m; ++k) + { + if (std::abs(delta_y[k] - delta_y_sparse_vector_check[k]) > 1e-6) + { + settings.log.printf("\tBTranspose error %d %e %e\n", k, delta_y[k], delta_y_sparse_vector_check[k]); + } + error_check += std::abs(delta_y[k] - delta_y_sparse_vector_check[k]); + } + if (error_check > 1e-6) { + settings.log.printf("BTranspose error %e\n", error_check); + } + std::vector residual(m); + b_transpose_multiply(lp, basic_list, delta_y_sparse_vector_check, residual); + for (i_t k = 0; k < m; ++k) + { + if (std::abs(residual[k] - ei[k]) > 1e-6) + { + settings.log.printf("\tBTranspose multiply error %d %e %e\n", k, residual[k], ei[k]); + } + } +#endif + delta_y_sparse.scatter(delta_y); - const f_t steepest_edge_norm_check = vector_norm2_squared(delta_y); + const f_t steepest_edge_norm_check = delta_y_sparse.norm2_squared(); +#ifdef CHECK_HYPERSPARSE + f_t steepest_edge_norm_check = vector_norm2_squared(delta_y); +#endif if (delta_y_steepest_edge[leaving_index] < settings.steepest_edge_ratio * steepest_edge_norm_check) { constexpr bool verbose = false; @@ -1475,19 +1721,21 @@ dual::status_t dual_phase2(i_t phase, // Update primal variable std::vector atilde(m); std::vector delta_x_flip(n); - phase2::flip_bounds( + const i_t num_flipped = phase2::flip_bounds( lp, settings, objective, z, nonbasic_list, entering_index, vstatus, delta_x_flip, atilde); - // B*delta_xB_0 = atilde - std::vector delta_xB_0(m); - ft.b_solve(atilde, delta_xB_0); - for (i_t k = 0; k < m; ++k) { - const i_t j = basic_list[k]; - x[j] += delta_xB_0[k]; - } - for (i_t k = 0; k < n - m; ++k) { - const i_t j = nonbasic_list[k]; - x[j] += delta_x_flip[j]; + if (num_flipped > 0) { + // B*delta_xB_0 = atilde + std::vector delta_xB_0(m); + ft.b_solve(atilde, delta_xB_0); + for (i_t k = 0; k < m; ++k) { + const i_t j = basic_list[k]; + x[j] += delta_xB_0[k]; + } + for (i_t k = 0; k < n - m; ++k) { + const i_t j = nonbasic_list[k]; + x[j] += delta_x_flip[j]; + } } f_t delta_x_leaving; @@ -1498,22 +1746,43 @@ dual::status_t dual_phase2(i_t phase, } // B*w = -A(:, entering) std::vector scaled_delta_xB(m); - std::fill(rhs.begin(), rhs.end(), 0.0); - lp.A.load_a_column(entering_index, rhs); + const i_t col_nz = lp.A.col_start[entering_index + 1] - lp.A.col_start[entering_index]; std::vector utilde(m); - ft.b_solve(rhs, scaled_delta_xB, utilde); - for (i_t i = 0; i < m; ++i) { - scaled_delta_xB[i] *= -1.0; + if (col_nz > 0.25 * m) + { + std::fill(rhs.begin(), rhs.end(), 0.0); + lp.A.load_a_column(entering_index, rhs); + ft.b_solve(rhs, scaled_delta_xB, utilde); + for (i_t i = 0; i < m; ++i) { + scaled_delta_xB[i] *= -1.0; + } + } + else + { + sparse_vector_t rhs_sparse(lp.A, entering_index); + sparse_vector_t scaled_delta_xB_sparse(m, 0); + sparse_vector_t utilde_sparse(m, 0); + ft.b_solve(rhs_sparse, scaled_delta_xB_sparse, utilde_sparse); + const i_t xB_nz = scaled_delta_xB_sparse.i.size(); + for (i_t k = 0; k < xB_nz; ++k) + { + scaled_delta_xB_sparse.x[k] *= -1.0; + } + scaled_delta_xB_sparse.to_dense(scaled_delta_xB); + utilde_sparse.to_dense(utilde); +#ifdef CHECK_B_SOLVE + rhs_sparse.to_dense(rhs); +#endif } -#ifdef COMPUTE_BSOLVE_RESIDUAL +#ifdef CHECK_B_SOLVE { std::vector residual_B(m); b_multiply(lp, basic_list, scaled_delta_xB, residual_B); f_t err_max = 0; - for (Int k = 0; k < m; ++k) { - const f_t err = std::abs(rhs[k] - residual_B[k]); - if (err >= 1e-5) { + for (i_t k = 0; k < m; ++k) { + const f_t err = std::abs(rhs[k] + residual_B[k]); + if (err >= 1e-6) { settings.log.printf( "Bsolve diff %d %e rhs %e residual %e\n", k, err, rhs[k], residual_B[k]); } @@ -1635,6 +1904,12 @@ dual::status_t dual_phase2(i_t phase, iter++; + // Clear delta_y + const i_t nz_dy = delta_y_sparse.i.size(); + for (i_t k = 0; k < nz_dy; ++k) { + delta_y[delta_y_sparse.i[k]] = 0.0; + } + const f_t obj = compute_objective(lp, x); f_t now = toc(start_time); if ((iter - start_iter) < settings.first_iteration_log || diff --git a/cpp/src/dual_simplex/sparse_matrix.hpp b/cpp/src/dual_simplex/sparse_matrix.hpp index 29e6a0cf42..8c75448197 100644 --- a/cpp/src/dual_simplex/sparse_matrix.hpp +++ b/cpp/src/dual_simplex/sparse_matrix.hpp @@ -23,9 +23,11 @@ #include #include #include +#include namespace cuopt::linear_programming::dual_simplex { + template class csr_matrix_t; // Forward declaration of CSR matrix needed to define CSC matrix @@ -108,6 +110,189 @@ class csr_matrix_t { static_assert(std::is_signed_v); }; + +template +class sparse_vector_t { + public: + sparse_vector_t(i_t n, i_t nz) : n(n), i(nz), x(nz) {} + sparse_vector_t(const std::vector& in) : n(in.size()) + { + i_t nz = 0; + for (i_t k = 0; k < n; ++k) { + if (in[k] != 0) { + i.push_back(k); + x.push_back(in[k]); + } + } + } + sparse_vector_t(const csc_matrix_t& A, i_t col) + { + const i_t col_start = A.col_start[col]; + const i_t col_end = A.col_start[col + 1]; + n = A.n; + const i_t nz = col_end - col_start; + i.reserve(nz); + x.reserve(nz); + for (i_t k = col_start; k < col_end; ++k) { + i.push_back(A.i[k]); + x.push_back(A.x[k]); + } + } + + void to_csc(csc_matrix_t& A) const + { + A.m = n; + A.n = 1; + A.nz_max = i.size(); + A.col_start.clear(); + A.col_start.resize(2); + A.col_start[0] = 0; + A.col_start[1] = i.size(); + A.i = i; + A.x = x; + } + + void to_dense(std::vector& x_dense) const + { + x_dense.clear(); + x_dense.resize(n, 0.0); + const i_t nz = i.size(); + for (i_t k = 0; k < nz; ++k) { + x_dense[i[k]] = x[k]; + } + } + + void scatter(std::vector& x_dense) const + { + // Assumes x_dense is already cleared + const i_t nz = i.size(); + for (i_t k = 0; k < nz; ++k) { + x_dense[i[k]] += x[k]; + } + } + + void inverse_permute_vector(const std::vector& p) + { + assert(p.size() == n); + i_t nz = i.size(); + std::vector i_perm(nz); + for (i_t k = 0; k < nz; ++k) { + i_perm[k] = p[i[k]]; + } + i = i_perm; + } + + void inverse_permute_vector(const std::vector& p, sparse_vector_t& y) const + { + i_t m = p.size(); + assert(n == m); + i_t nz = i.size(); + y.n = n; + y.x = x; + std::vector i_perm(nz); + for (i_t k = 0; k < nz; ++k) { + i_perm[k] = p[i[k]]; + } + y.i = i_perm; + } + + f_t sparse_dot(const csc_matrix_t& Y, i_t y_col) const + { + const i_t col_start = Y.col_start[y_col]; + const i_t col_end = Y.col_start[y_col + 1]; + const i_t ny = col_end - col_start; + const i_t nx = i.size(); + f_t dot = 0.0; + for (i_t h = 0, k = col_start; h < nx && k < col_end; ) { + const i_t p = i[h]; + const i_t q = Y.i[k]; + if (p == q) { + dot += Y.x[k] * x[h]; + h++; + k++; + } else if (p < q) { + h++; + } else if (q < p) { + k++; + } + } + return dot; + } + + void sort() + { + if (i.size() < 2) { + return; + } + // If the number of nonzeros is large, use a O(n) bucket sort + if (i.size() > 0.3 *n) + { + std::vector bucket(n, 0.0); + const i_t nz = i.size(); + for (i_t k = 0; k < nz; ++k) + { + bucket[i[k]] = x[k]; + } + i.clear(); + i.reserve(nz); + x.clear(); + x.reserve(nz); + for (i_t k = 0; k < n; ++k) + { + if (bucket[k] != 0.0) + { + i.push_back(k); + x.push_back(bucket[k]); + } + } + } + else + { + // Use a n log n sort + const i_t nz = i.size(); + std::vector i_sorted(nz); + std::vector x_sorted(nz); + std::vector perm(nz); + for (i_t k = 0; k < nz; ++k) + { + perm[k] = k; + } + std::vector& iunsorted = i; + std::sort(perm.begin(), perm.end(), [&iunsorted](i_t a, i_t b) { return iunsorted[a] < iunsorted[b]; }); + for (i_t k = 0; k < nz; ++k) + { + i_sorted[k] = i[perm[k]]; + x_sorted[k] = x[perm[k]]; + } + i = i_sorted; + x = x_sorted; + } + + // Check +#ifdef CHECK_SORT + for (i_t k = 0; k < i.size() - 1; ++k) { + if (i[k] > i[k + 1]) { + printf("Sort error %d %d\n", i[k], i[k + 1]); + } + } +#endif + } + + f_t norm2_squared() const + { + f_t dot = 0.0; + const i_t nz = i.size(); + for (i_t k = 0; k < nz; ++k) { + dot += x[k] * x[k]; + } + return dot; + } + + i_t n; + std::vector i; + std::vector x; +}; + template void cumulative_sum(std::vector& inout, std::vector& output); diff --git a/cpp/src/dual_simplex/triangle_solve.cpp b/cpp/src/dual_simplex/triangle_solve.cpp index eddf04843d..0332b2b5a8 100644 --- a/cpp/src/dual_simplex/triangle_solve.cpp +++ b/cpp/src/dual_simplex/triangle_solve.cpp @@ -152,7 +152,7 @@ i_t depth_first_search(i_t j, } done = 1; // Node j is done if no unvisited neighbors i_t p2 = (jnew < 0) ? 0 : UNFLIP(G.col_start[jnew + 1]); - for (i_t p = pstack[head]; p < p2; ++p) { // Examin all neighbors of j + for (i_t p = pstack[head]; p < p2; ++p) { // Examine all neighbors of j i_t i = G.i[p]; // Consider neighbor i if (MARKED(G.col_start, i)) { continue; // skip visited node i @@ -163,8 +163,10 @@ i_t depth_first_search(i_t j, break; // break to start dfs at node i } if (done) { - head--; // remove j from the recursion stack - xi[--top] = j; // and place it the output stack + pstack[head] = 0; // restore pstack so it can be used again in other routines + xi[head] = 0; // restore xi so it can be used again in other routines + head--; // remove j from the recursion stack + xi[--top] = j; // and place it the output stack } } return top; @@ -244,6 +246,13 @@ template int sparse_triangle_solve(const csc_matrix_t& xi, csc_matrix_t& G, double* x); + +template int sparse_triangle_solve(const csc_matrix_t& B, + int col, + const std::optional>& pinv, + std::vector& xi, + csc_matrix_t& G, + double* x); #endif } // namespace cuopt::linear_programming::dual_simplex From 7da8a988e3289ad551bd5aaa87adac304863694e Mon Sep 17 00:00:00 2001 From: Christopher Maes Date: Thu, 26 Jun 2025 11:53:05 -0700 Subject: [PATCH 04/28] Hypersparsity with MPF update --- cpp/src/dual_simplex/basis_updates.cpp | 961 ++++++++++++++++++ cpp/src/dual_simplex/basis_updates.hpp | 159 +++ .../bound_flipping_ratio_test.cpp | 20 +- .../bound_flipping_ratio_test.hpp | 15 +- cpp/src/dual_simplex/phase2.cpp | 656 +++++++++--- cpp/src/dual_simplex/sparse_matrix.cpp | 25 + cpp/src/dual_simplex/sparse_matrix.hpp | 4 +- cpp/src/dual_simplex/vector_math.cpp | 65 ++ cpp/src/dual_simplex/vector_math.hpp | 16 + 9 files changed, 1757 insertions(+), 164 deletions(-) diff --git a/cpp/src/dual_simplex/basis_updates.cpp b/cpp/src/dual_simplex/basis_updates.cpp index 1412061808..6e9098f0c2 100644 --- a/cpp/src/dual_simplex/basis_updates.cpp +++ b/cpp/src/dual_simplex/basis_updates.cpp @@ -1130,8 +1130,969 @@ i_t basis_update_t::lower_triangular_multiply(const csc_matrix_t +void basis_update_mpf_t::gather_into_sparse_vector(i_t nz, sparse_vector_t& out) const +{ + const i_t m = L0_.m; + //out.i.clear(); + //out.x.clear(); + out.i.resize(nz); + out.x.resize(nz); + for (i_t k = 0; k < nz; ++k) { + const i_t i = xi_workspace_[m + k]; + out.i[k] = i; + out.x[k] = x_workspace_[i]; + xi_workspace_[m + k] = 0; + xi_workspace_[i] = 0; + x_workspace_[i] = 0.0; + } +} + +template +void basis_update_mpf_t::solve_to_workspace(i_t top) const +{ + const i_t m = L0_.m; + i_t nz = 0; + for (i_t p = top; p < m; ++p) + { + const i_t i = xi_workspace_[p]; + xi_workspace_[m + nz] = i; + xi_workspace_[p] = 0; + nz++; + } + for (i_t k = 0; k < nz; ++k) + { + const i_t i = xi_workspace_[m + k]; + xi_workspace_[i] = 1; + } +} + + +template +void basis_update_mpf_t::solve_to_sparse_vector(i_t top, sparse_vector_t& out) const +{ + const i_t m = L0_.m; + out.n = m; + //out.i.clear(); + //out.x.clear(); + const i_t nz = m - top; + out.x.resize(nz); + out.i.resize(nz); + i_t k = 0; + for (i_t p = top; p < m; ++p) { + const i_t i = xi_workspace_[p]; + out.i[k] = i; + out.x[k] = x_workspace_[i]; + x_workspace_[i] = 0.0; + xi_workspace_[p] = 0; + k++; + } +} + +template +i_t basis_update_mpf_t::scatter_into_workspace(const sparse_vector_t& in) const +{ + const i_t m = L0_.m; + // scatter pattern into xi_workspace_ + i_t nz = in.i.size(); + for (i_t k = 0; k < nz; ++k) { + const i_t i = in.i[k]; + xi_workspace_[i] = 1; + xi_workspace_[m + k] = i; + } + // scatter values into x_workspace_ + for (i_t k = 0; k < nz; ++k) { + x_workspace_[in.i[k]] = in.x[k]; + } + return nz; +} + +template +void basis_update_mpf_t::grow_storage(i_t nz, i_t& S_start, i_t& S_nz) +{ + const i_t last_S_col = num_updates_ * 2; + const i_t new_last_S_col = last_S_col + 2; + if (new_last_S_col >= S_.col_start.size()) + { + S_.col_start.resize(new_last_S_col + refactor_frequency_); + } + S_nz = S_.col_start[last_S_col]; + if (S_nz + nz > S_.i.size()) + { + S_.i.resize(std::max(2 * S_nz, S_nz + nz)); + S_.x.resize(std::max(2 * S_nz, S_nz + nz)); + } + S_start = last_S_col; +} + + +template +i_t basis_update_mpf_t::b_transpose_solve(const std::vector& rhs, std::vector& solution) const +{ + std::vector UTsol; + return b_transpose_solve(rhs, solution, UTsol); +} + +template +i_t basis_update_mpf_t::b_transpose_solve(const std::vector& rhs, std::vector& solution, std::vector& UTsol) const +{ + const i_t m = L0_.m; + // P*B = L*U + // B'*P' = U'*L' + // We want to solve + // B'*y = c + // Let y = P'*w + // B'*y = B'*P'*w = U'*L'*w = c + // 1. Solve U'*r = c for r + // 2. Solve L'*w = r for w + // 3. Compute y = P'*w + + // Solve for r such that U'*r = c + std::vector r = rhs; + u_transpose_solve(r); + UTsol = r; + + // Solve for w such that L'*w = r + l_transpose_solve(r); + + // Compute y = P'*w + inverse_permute_vector(row_permutation_, r, solution); + + return 0; +} + +template +i_t basis_update_mpf_t::b_transpose_solve(const sparse_vector_t& rhs, sparse_vector_t& solution) const +{ + sparse_vector_t UTsol(1, 0); + return b_transpose_solve(rhs, solution, UTsol); +} + +template +i_t basis_update_mpf_t::b_transpose_solve(const sparse_vector_t& rhs, sparse_vector_t& solution, sparse_vector_t& UTsol) const +{ + // Solve for r such that U'*r = c + solution = rhs; + u_transpose_solve(solution); + UTsol = solution; + +#ifdef CHECK_U_TRANSPOSE_SOLVE + std::vector UTsol_dense; + UTsol.to_dense(UTsol_dense); + std::vector rhs_dense; + rhs.to_dense(rhs_dense); + + matrix_transpose_vector_multiply(U0_, 1.0, UTsol_dense, -1.0, rhs_dense); + if (vector_norm_inf(rhs_dense) > 1e-10) + { + printf("B transpose solve U transpose residual %e\n", vector_norm_inf(rhs_dense)); + } +#endif + + // Solve for w such that L'*w = r +#ifdef CHECK_L_TRANSPOSE_SOLVE + std::vector r_dense; + solution.to_dense(r_dense); +#endif + l_transpose_solve(solution); + +#ifdef CHECK_L_TRANSPOSE_SOLVE + std::vector solution_dense; + solution.to_dense(solution_dense); + l_transpose_multiply(solution_dense); + f_t max_error = 0.0; + for (i_t k = 0; k < L0_.m; ++k) + { + if (std::abs(solution_dense[k] - r_dense[k]) > 1e-4) + { + printf("B transpose solve L transpose solve error %e: index %d multiply %e rhs %e. update %d\n", std::abs(solution_dense[k] - r_dense[k]), k, solution_dense[k], r_dense[k], num_updates_); + } + + max_error = std::max(max_error, std::abs(solution_dense[k] - r_dense[k])); + } + if (max_error > 1e-4) + { + printf("B transpose solve L transpose solve residual %e\n", max_error); + } +#endif + // Compute y = P'*w + solution.inverse_permute_vector(row_permutation_); + return 0; +} + + + +template +i_t basis_update_mpf_t::u_transpose_solve(std::vector& rhs) const +{ + dual_simplex::upper_triangular_transpose_solve(U0_, rhs); + return 0; +} + +template +i_t basis_update_mpf_t::u_transpose_solve(sparse_vector_t& rhs) const +{ + const i_t m = L0_.m; + // U0'*x = y + // Solve U0'*x0 = y + //csc_matrix_t B(m, 1, 0); + rhs.to_csc(B_); + i_t top = sparse_triangle_solve(B_, 0, std::nullopt, xi_workspace_, U0_transpose_, x_workspace_.data()); + solve_to_sparse_vector(top, rhs); + return 0; +} + +template +i_t basis_update_mpf_t::l_transpose_solve(std::vector& rhs) const +{ + // L = L0 * T0 * T1 * ... * T_{num_updates_ - 1} + // L' = T_{num_updates_ - 1}^T * T_{num_updates_ - 2}^T * ... * T0^T * L0^T + // L'*x = b + // L0^T *x = T_0^-T * T_1^-T * ... * T_{num_updates_ - 1}^-T * b = b' + + // Compute b' + for (i_t k = num_updates_ - 1; k >= 0; --k) + { + // T_k^{-T} = ( I - v u^T/(1 + u^T v)) + // T_k^{-T} * b = b - v * (u^T * b) / (1 + u^T * v) = b - theta * v, theta = u^T b / mu + + const i_t u_col = 2 * k; + const i_t v_col = 2 * k + 1; + const f_t mu = mu_values_[k]; + + // dot = u^T * b + f_t dot = 0.0; + for (i_t p = S_.col_start[u_col]; p < S_.col_start[u_col + 1]; ++p) + { + const i_t i = S_.i[p]; + dot += S_.x[p] * rhs[i]; + } + const f_t theta = dot / mu; + + for (i_t p = S_.col_start[v_col]; p < S_.col_start[v_col + 1]; ++p) + { + const i_t i = S_.i[p]; + rhs[i] -= theta * S_.x[p]; + } + } + + // Solve for x such that L0^T * x = b' + dual_simplex::lower_triangular_transpose_solve(L0_, rhs); + + return 0; +} + +template +i_t basis_update_mpf_t::l_transpose_solve(sparse_vector_t& rhs) const +{ + const i_t m = L0_.m; + // L'*x = b + // L0^T * x = T_0^-T * T_1^-T * ... * T_{num_updates_ - 1}^-T * b = b' + + scatter_into_workspace(rhs); + i_t nz = rhs.i.size(); + +#ifdef CHECK_MULTIPLY + std::vector rhs_dense_0; + rhs.to_dense(rhs_dense_0); +#endif + // Compute b' + for (i_t k = num_updates_ - 1; k >= 0; --k) + { + // T_k^{-T} = ( I - v u^T/(1 + u^T v)) + // T_k^{-T} * b = b - v * (u^T * b) / (1 + u^T * v) = b - theta * v, theta = u^T b / mu + + const i_t u_col = 2 * k; + const i_t v_col = 2 * k + 1; + const f_t mu = mu_values_[k]; + + // dot = u^T * b + f_t dot = 0.0; + for (i_t p = S_.col_start[u_col]; p < S_.col_start[u_col + 1]; ++p) + { + const i_t i = S_.i[p]; + if (xi_workspace_[i]) { + dot += S_.x[p] * x_workspace_[i]; + } + } + +#ifdef CHECK_MULTIPLY + f_t dot_check = 0.0; + for (i_t p = S_.col_start[u_col]; p < S_.col_start[u_col + 1]; ++p) + { + const i_t i = S_.i[p]; + dot_check += S_.x[p] * rhs_dense_0[i]; + } + if (std::abs(dot - dot_check) > 1e-10) + { + printf("L transpose solve dot erorr: index %d dot %e dot check %e\n", k, dot, dot_check); + } +#endif + + const f_t theta = dot / mu; + + for (i_t p = S_.col_start[v_col]; p < S_.col_start[v_col + 1]; ++p) + { + const i_t i = S_.i[p]; + if (!xi_workspace_[i]) + { + // Fill occured + xi_workspace_[i] = 1; + xi_workspace_[m + nz] = i; + nz++; + } + x_workspace_[i] -= theta * S_.x[p]; + } + +#ifdef CHECK_MULTIPLY + for (i_t p = S_.col_start[v_col]; p < S_.col_start[v_col + 1]; ++p) + { + const i_t i = S_.i[p]; + rhs_dense_0[i] -= theta * S_.x[p]; + } +#endif + } + +#ifdef CHECK_MULTIPLY + for (i_t i = 0; i < m; ++i) + { + if (std::abs(rhs_dense_0[i] - x_workspace_[i]) > 1e-9) + { + printf("L transpose solve multiply error %e index %d sparse %e dense %e\n", std::abs(rhs_dense_0[i] - x_workspace_[i]), i, x_workspace_[i], rhs_dense_0[i]); + } + } +#endif + + + + // sort the indices and place into a sparse column + std::sort(xi_workspace_.begin() + m, xi_workspace_.begin() + m + nz, std::less()); + + //csc_matrix_t B(m, 1, nz); + B_.m = m; + B_.n = 1; + B_.col_start.resize(2); + B_.i.resize(nz); + B_.x.resize(nz); + i_t b_nz = 0; + for (i_t k = 0; k < nz; ++k) + { + const i_t i = xi_workspace_[m + k]; + const f_t b_val = x_workspace_[i]; + x_workspace_[i] = 0.0; + xi_workspace_[m + k] = 0; + xi_workspace_[i] = 0; + B_.i[b_nz] = i; + B_.x[b_nz] = b_val; + b_nz++; + } + B_.col_start[0] = 0; + B_.col_start[1] = b_nz; + + i_t top = sparse_triangle_solve(B_, 0, std::nullopt, xi_workspace_, L0_transpose_, x_workspace_.data()); + solve_to_sparse_vector(top, rhs); + +#ifdef CHECK_SPARSE_SOLVE + std::vector rhs_dense; + rhs.to_dense(rhs_dense); + + std::vector b_dense(m, 0.0); + for (i_t p = 0; p < nz; ++p) + { + const i_t i = B.i[p]; + b_dense[i] = B.x[p]; + } + matrix_vector_multiply(L0_transpose_, 1.0, rhs_dense, -1.0, b_dense); + if (vector_norm_inf(b_dense) > 1e-9) + { + printf("L0 transpose solve residual %e\n", vector_norm_inf(b_dense)); + } +#endif + + return 0; +} + + +template +i_t basis_update_mpf_t::b_solve(const std::vector& rhs, std::vector& solution) const +{ + const i_t m = L0_.m; + std::vector Lsol(m); + return b_solve(rhs, solution, Lsol); +} + +// Solve for x such that B*x = y +template +i_t basis_update_mpf_t::b_solve(const std::vector& rhs, std::vector& solution, std::vector& Lsol) const +{ + const i_t m = L0_.m; + // P*B = L*U + // B*x = b + // P*B*x = P*b + + permute_vector(row_permutation_, rhs, solution); + + // L*U*x = b' + // Solve for v such that L*v = b' +#ifdef CHECK_L_SOLVE + std::vector rhs_permuted = solution; +#endif + l_solve(solution); + Lsol = solution; + +#ifdef CHECK_L_SOLVE + std::vector Lsol_check = Lsol; + l_multiply(Lsol_check); + f_t max_lsol_err = 0.0; + for (i_t k = 0; k < m; ++k) + { + const f_t err = std::abs(Lsol_check[k] - rhs_permuted[k]); + max_lsol_err = std::max(max_lsol_err, err); + } + printf("B solve L multiply error %e\n", max_lsol_err); +#endif + + // Solve for x such that U*x = v + u_solve(solution); + +#ifdef CHECK_U_SOLVE + std::vector residual = Lsol; + matrix_vector_multiply(U0_, 1.0, solution, -1.0, residual); + f_t max_err = vector_norm_inf(residual); + printf("B solve U solve residual %e\n", max_err); +#endif + return 0; +} + +template +i_t basis_update_mpf_t::b_solve(const sparse_vector_t& rhs, sparse_vector_t& solution) const +{ + sparse_vector_t Lsol(1, 0); + return b_solve(rhs, solution, Lsol); +} + +template +i_t basis_update_mpf_t::b_solve(const sparse_vector_t& rhs, sparse_vector_t& solution, sparse_vector_t& Lsol) const +{ + const i_t m = L0_.m; + solution = rhs; + solution.inverse_permute_vector(inverse_row_permutation_); + +#ifdef CHECK_PERMUTATION + std::vector permuation_rhs; + rhs.to_dense(permuation_rhs); + std::vector finish_perm(m); + permute_vector(row_permutation_, permuation_rhs, finish_perm); + + std::vector solution_dense2; + solution.to_dense(solution_dense2); + for (i_t k = 0; k < m; ++k) + { + if (finish_perm[k] != solution_dense2[k]) + { + printf("B solve permutation error %e %e %d\n", finish_perm[k], solution_dense2[k], k); + } + } +#endif + + +#ifdef CHECK_L_SOLVE + std::vector l_solve_rhs; + solution.to_dense(l_solve_rhs); +#endif + l_solve(solution); + Lsol = solution; + +#ifdef CHECK_L_SOLVE + std::vector l_solve_dense; + Lsol.to_dense(l_solve_dense); + + l_multiply(l_solve_dense); + f_t max_err_l_solve = 0.0; + for (i_t k = 0; k < m; ++k) + { + const f_t err = std::abs(l_solve_dense[k] - l_solve_rhs[k]); + max_err_l_solve = std::max(max_err_l_solve, err); + } + if (max_err_l_solve > 1e-9) + { + printf("B solve L solve residual %e\n", max_err_l_solve); + } +#endif + +#ifdef CHECK_U_SOLVE + std::vector rhs_dense; + solution.to_dense(rhs_dense); +#endif + u_solve(solution); + +#ifdef CHECK_U_SOLVE + std::vector solution_dense; + solution.to_dense(solution_dense); + + matrix_vector_multiply(U0_, 1.0, solution_dense, -1.0, rhs_dense); + + const f_t max_err = vector_norm_inf(rhs_dense); + if (max_err > 1e-9) + { + printf("B solve U0 solve residual %e\n", max_err); + } +#endif + return 0; +} + +// Solve for x such that U*x = y +template +i_t basis_update_mpf_t::u_solve(std::vector& rhs) const +{ + const i_t m = L0_.m; + // U*x = y + dual_simplex::upper_triangular_solve(U0_, rhs); + return 0; +} + +template +i_t basis_update_mpf_t::u_solve(sparse_vector_t& rhs) const +{ + const i_t m = L0_.m; + // U*x = y + + // Solve U0*x = y + //csc_matrix_t B(m, 1, 0); + rhs.to_csc(B_); + i_t top = sparse_triangle_solve(B_, 0, std::nullopt, xi_workspace_, U0_, x_workspace_.data()); + solve_to_sparse_vector(top, rhs); + + return 0; +} +// Solve for x such that L*x = y +template +i_t basis_update_mpf_t::l_solve(std::vector& rhs) const +{ + const i_t m = L0_.m; + // L*x = y + // L0 * T0 * T1 * ... * T_{num_updates_ - 1} * x = y + + // First solve L0*x0 = y +#ifdef CHECK_L0_SOLVE + std::vector residual = rhs; +#endif +#ifdef CHECK_L_SOLVE + std::vector rhs_check = rhs; +#endif + dual_simplex::lower_triangular_solve(L0_, rhs); + +#ifdef CHECK_L0_SOLVE + matrix_vector_multiply(L0_, 1.0, rhs, -1.0, residual); + f_t max_err = vector_norm_inf(residual); + printf("L solve: L0 solve residual %e\n", max_err); +#endif + + // Then T0 * T1 * ... * T_{num_updates_ - 1} * x = x0 + // Or x = T_{num_updates}^{-1} * T_1^{-1} * T_0^{-1} x0 + for (i_t k = 0; k < num_updates_; ++k) + { + // T = I + u*v^T + // T^{-1} = I - u*v^T / (1 + v^T*u) + // T^{-1} * x = x - u*v^T * x / (1 + v^T*u) = x - theta * u, theta = v^T * x / (1 + v^T*u) = v^T x / mu + const f_t mu = mu_values_[k]; + const i_t u_col = 2 * k; + const i_t v_col = 2 * k + 1; + f_t dot = 0.0; + for (i_t p = S_.col_start[v_col]; p < S_.col_start[v_col + 1]; ++p) + { + const i_t i = S_.i[p]; + dot += S_.x[p] * rhs[i]; + } + const f_t theta = dot / mu; + + for (i_t p = S_.col_start[u_col]; p < S_.col_start[u_col + 1]; ++p) + { + const i_t i = S_.i[p]; + rhs[i] -= theta * S_.x[p]; + } + } + +#ifdef CHECK_L_SOLVE + std::vector inout = rhs; + l_multiply(inout); + f_t err_max = 0.0; + for (i_t k = 0; k < m; ++k) + { + const f_t err = std::abs(inout[k] - rhs_check[k]); + err_max = std::max(err_max, err); + } + printf("L solve residual %e\n", err_max); +#endif + + return 0; +} + +template +i_t basis_update_mpf_t::l_solve(sparse_vector_t& rhs) const +{ + const i_t m = L0_.m; + // L*x = y + // L0 * T0 * T1 * ... * T_{num_updates_ - 1} * x = y + + // First solve L0*x0 = y + //csc_matrix_t B(m, 1, 0); + rhs.to_csc(B_); + i_t top = sparse_triangle_solve(B_, 0, std::nullopt, xi_workspace_, L0_, x_workspace_.data()); + solve_to_workspace(top); // Uses xi_workspace_ and x_workspace_ to fill rhs + i_t nz = m - top; + // Then T0 * T1 * ... * T_{num_updates_ - 1} * x = x0 + // Or x = T_{num_updates}^{-1} * T_1^{-1} * T_0^{-1} x0 + for (i_t k = 0; k < num_updates_; ++k) + { + // T = I + u*v^T + // T^{-1} = I - u*v^T / (1 + v^T*u) + // T^{-1} * x = x - u*v^T * x / (1 + v^T*u) = x - theta * u, theta = v^T * x / (1 + v^T*u) = v^T x / mu + const f_t mu = mu_values_[k]; + const i_t u_col = 2 * k; + const i_t v_col = 2 * k + 1; + + f_t dot = 0.0; + for (i_t p = S_.col_start[v_col]; p < S_.col_start[v_col + 1]; ++p) + { + const i_t i = S_.i[p]; + if (xi_workspace_[i]) + { + dot += S_.x[p] * x_workspace_[i]; + } + } + + const f_t theta = dot / mu; + + for (i_t p = S_.col_start[u_col]; p < S_.col_start[u_col + 1]; ++p) + { + const i_t i = S_.i[p]; + if (!xi_workspace_[i]) + { + // Fill occured + xi_workspace_[i] = 1; + xi_workspace_[m + nz] = i; + nz++; + } + x_workspace_[i] -= theta * S_.x[p]; + } + } + + gather_into_sparse_vector(nz, rhs); + + return 0; +} + + + + +// Takes in utilde such that L*utilde = abar, where abar is the column to add to the basis +// and etilde such that U'*etilde = e_leaving +template +i_t basis_update_mpf_t::update(const std::vector& utilde, + const std::vector& etilde, + i_t leaving_index) +{ + const i_t m = L0_.m; +#if 0 + printf("Update: num_updates_ %d\n", num_updates_); +#endif + + // We are going to create a new matrix T = I + u*v^T + const i_t col_start = U0_.col_start[leaving_index]; + const i_t col_end = U0_.col_start[leaving_index + 1]; + std::vector u = utilde; + // u = utilde - U0(:, leaving_index) + for (i_t p = col_start; p < col_end; ++p) + { + const i_t i = U0_.i[p]; + u[i] -= U0_.x[p]; + } + + i_t u_nz = 0; + for (i_t i = 0; i < m; ++i) + { + if (u[i] != 0.0) + { + u_nz++; + } + } + + // v = etilde + i_t v_nz = 0; + for (i_t i = 0; i < m; ++i) + { + if (etilde[i] != 0.0) + { + v_nz++; + } + } + + i_t nz = u_nz + v_nz; + i_t S_start; + i_t S_nz; + grow_storage(nz, S_start, S_nz); +#if 0 + printf("Update: S_start %d S_nz %d\n", S_start, S_nz); +#endif + + i_t S_nz_start = S_nz; + + // Scatter u into S + for (i_t i = 0; i < m; ++i) + { + if (u[i] != 0.0) + { + S_.i[S_nz] = i; + S_.x[S_nz] = u[i]; + S_nz++; + } + } + S_.col_start[S_start + 1] = S_nz; + + // Scatter v into S + for (i_t i = 0; i < m; ++i) + { + if (etilde[i] != 0.0) + { + S_.i[S_nz] = i; + S_.x[S_nz] = etilde[i]; + S_nz++; + } + } + S_.col_start[S_start + 2] = S_nz; + + + // Compute mu = 1 + v^T * u + const f_t mu = 1.0 + sparse_dot(S_.i.data() + S_.col_start[S_start], + S_.x.data() + S_.col_start[S_start], + S_.col_start[S_start + 1] - S_.col_start[S_start], + S_.i.data() + S_.col_start[S_start + 1], + S_.x.data() + S_.col_start[S_start + 1], + v_nz); + +#ifdef CHECK_MU + const f_t mu_check = 1.0 + dot(etilde, u); + printf("Update: mu %e mu_check %e diff %e\n", mu, mu_check, std::abs(mu - mu_check)); +#endif + mu_values_.push_back(mu); + +#if 0 + printf("Update mu %e u nz %d v nz %d\n", mu_values_.back(), S_.col_start[S_start + 1] - S_.col_start[S_start], S_.col_start[S_start + 2] - S_.col_start[S_start + 1]); +#endif + num_updates_++; + + return 0; +} + +// Takes in utilde such that L*utilde = abar, where abar is the column to add to the basis +template +i_t basis_update_mpf_t::update(const sparse_vector_t& utilde, + sparse_vector_t& etilde, + i_t leaving_index) +{ + const i_t m = L0_.m; +#if 0 + printf("Update: num_updates_ %d\n", num_updates_); +#endif + + // We are going to create a new matrix T = I + u*v^T + // where u = utilde - U0(:, p) and v = etilde + + // Scatter utilde into the workspace + i_t nz = scatter_into_workspace(utilde); + + // Subtract the column of U0 corresponding to the leaving index + const i_t col_start = U0_.col_start[leaving_index]; + const i_t col_end = U0_.col_start[leaving_index + 1]; + for (i_t p = col_start; p < col_end; ++p) { + const i_t i = U0_.i[p]; + if (!xi_workspace_[i]) { + // Fill occured + xi_workspace_[i] = 1; + xi_workspace_[m + nz] = i; + nz++; + } + x_workspace_[i] -= U0_.x[p]; + } + + // Ensure the workspace is sorted + std::sort(xi_workspace_.begin() + m, xi_workspace_.begin() + m + nz, std::less()); + + // Gather the workspace into a column of S + i_t S_start; + i_t S_nz; + grow_storage(nz + etilde.i.size(), S_start, S_nz); + + i_t small_count = 0; + + for (i_t k = 0; k < nz; ++k) { + const i_t i = xi_workspace_[m + k]; + const f_t x_val = x_workspace_[i]; + xi_workspace_[i] = 0; + x_workspace_[i] = 0.0; + xi_workspace_[m + k] = 0; + + if (x_val == 0.0) { + continue; + } + S_.i[S_nz] = i; + S_.x[S_nz] = x_val; + S_nz++; + } + S_.col_start[S_start + 1] = S_nz; + + //printf("small count u percentage %.2f\n", small_count / (static_cast(nz)) * 100.0); + small_count = 0; + // Gather etilde into a column of S + etilde.sort(); + const i_t etilde_nz = etilde.i.size(); + for (i_t k = 0; k < etilde_nz; ++k) { + if (etilde.x[k] == 0.0) { + continue; + } + S_.i[S_nz] = etilde.i[k]; + S_.x[S_nz] = etilde.x[k]; + S_nz++; + } + S_.col_start[S_start + 2] = S_nz; + + //printf("small count etilde percentage %.2f\n", small_count / (static_cast(etilde_nz)) * 100.0); + + + // Compute mu = 1 + v^T * u + mu_values_.push_back(1.0 + sparse_dot(S_.i.data() + S_.col_start[S_start], + S_.x.data() + S_.col_start[S_start], + S_.col_start[S_start + 1] - S_.col_start[S_start], + S_.i.data() + S_.col_start[S_start + 1], + S_.x.data() + S_.col_start[S_start + 1], + S_.col_start[S_start + 2] - S_.col_start[S_start + 1])); +#if 0 + printf("Update mu %e u nz %d v nz %d\n", mu_values_.back(), S_.col_start[S_start + 1] - S_.col_start[S_start], S_.col_start[S_start + 2] - S_.col_start[S_start + 1]);printf("Update mu %e\n", mu_values_.back()); +#endif + + num_updates_++; + + return 0; +} + +template +void basis_update_mpf_t::l_multiply(std::vector& inout) const +{ + const i_t m = L0_.m; + // L*x = y + // L0 * T0 * T1 * ... * T_{num_updates_ - 1} * x = y + + for (i_t k = num_updates_ - 1; k >= 0; --k) + { + // T_k = ( I + u v^T) + // T_k * b = b + u * (v^T * b) = b + theta * u, theta = v^T b + const i_t u_col = 2 * k; + const i_t v_col = 2 * k + 1; + const f_t mu = mu_values_[k]; + + // dot = v^T b + f_t dot = 0.0; + for (i_t p = S_.col_start[v_col]; p < S_.col_start[v_col + 1]; ++p) + { + const i_t i = S_.i[p]; + dot += S_.x[p] * inout[i]; + } + const f_t theta = dot; + for (i_t p = S_.col_start[u_col]; p < S_.col_start[u_col + 1]; ++p) + { + const i_t i = S_.i[p]; + inout[i] += theta * S_.x[p]; + } + } + + std::vector out(m, 0.0); + matrix_vector_multiply(L0_, 1.0, inout, 0.0, out); + inout = out; +} + +template +void basis_update_mpf_t::l_transpose_multiply(std::vector& inout) const +{ + const i_t m = L0_.m; + std::vector out(m, 0.0); + matrix_vector_multiply(L0_transpose_, 1.0, inout, 0.0, out); + + inout = out; + + for (i_t k = 0; k < num_updates_; ++k) + { + const i_t u_col = 2 * k; + const i_t v_col = 2 * k + 1; + const f_t mu = mu_values_[k]; + + // T_k = ( I + u v^T) + // T_k^T = ( I + v u^T) + // T_k^T * b = b + v * (u^T * b) = b + theta * v, theta = u^T * b + f_t dot = 0.0; + for (i_t p = S_.col_start[u_col]; p < S_.col_start[u_col + 1]; ++p) + { + const i_t i = S_.i[p]; + dot += S_.x[p] * inout[i]; + } + const f_t theta = dot; + for (i_t p = S_.col_start[v_col]; p < S_.col_start[v_col + 1]; ++p) + { + const i_t i = S_.i[p]; + inout[i] += theta * S_.x[p]; + } + } +} + +template +void basis_update_mpf_t::multiply_lu(csc_matrix_t& out) const +{ + + // P*B = L*U + // B = P'*L*U + const i_t m = L0_.m; + + out.col_start.resize(m + 1); + out.col_start[0] = 0; + out.i.clear(); + out.x.clear(); + + i_t B_nz = 0; + + for (i_t j = 0; j < m; ++j) + { + // B(:, j) = L*U(:, j) + out.col_start[j] = B_nz; + + std::vector Uj(m, 0.0); + for (i_t p = U0_.col_start[j]; p < U0_.col_start[j + 1]; ++p) + { + const i_t i = U0_.i[p]; + Uj[i] = U0_.x[p]; + } + l_multiply(Uj); + for (i_t i = 0; i < m; ++i) + { + if (Uj[i] != 0.0) + { + out.i.push_back(row_permutation_[i]); + out.x.push_back(Uj[i]); + B_nz++; + } + } + } + out.col_start[m] = B_nz; + + out.m = m; + out.n = m; + out.nz_max = B_nz; + +} + #ifdef DUAL_SIMPLEX_INSTANTIATE_DOUBLE template class basis_update_t; +template class basis_update_mpf_t; #endif } // namespace cuopt::linear_programming::dual_simplex diff --git a/cpp/src/dual_simplex/basis_updates.hpp b/cpp/src/dual_simplex/basis_updates.hpp index 783edc38e2..4807c43da2 100644 --- a/cpp/src/dual_simplex/basis_updates.hpp +++ b/cpp/src/dual_simplex/basis_updates.hpp @@ -167,4 +167,163 @@ class basis_update_t { mutable csc_matrix_t L0_transpose_; // Needed for sparse solves }; + +template +class basis_update_mpf_t { + public: + basis_update_mpf_t(const csc_matrix_t& Linit, + const csc_matrix_t& Uinit, + const std::vector& p, + const i_t refactor_frequency) + : L0_(Linit), + U0_(Uinit), + row_permutation_(p), + inverse_row_permutation_(p.size()), + S_(Linit.m, 1, 0), + col_permutation_(Linit.m), + inverse_col_permutation_(Linit.m), + xi_workspace_(2*Linit.m, 0), + x_workspace_(Linit.m, 0.0), + U0_transpose_(1, 1, 1), + L0_transpose_(1, 1, 1), + refactor_frequency_(refactor_frequency), + B_(Linit.m, 1, 0) + { + inverse_permutation(row_permutation_, inverse_row_permutation_); + clear(); + compute_transposes(); + } + + i_t reset(const csc_matrix_t& Linit, + const csc_matrix_t& Uinit, + const std::vector& p) + { + L0_ = Linit; + U0_ = Uinit; + assert(p.size() == Linit.m); + row_permutation_ = p; + inverse_permutation(row_permutation_, inverse_row_permutation_); + clear(); + compute_transposes(); + return 0; + } + + // Solves for x such that B*x = b, where B is the basis matrix + i_t b_solve(const std::vector& rhs, std::vector& solution) const; + i_t b_solve(const sparse_vector_t& rhs, sparse_vector_t& solution) const; + i_t b_solve(const std::vector& rhs, + std::vector& solution, + std::vector& Lsol) const; + i_t b_solve(const sparse_vector_t& rhs, + sparse_vector_t& solution, + sparse_vector_t& Lsol) const; + + // Solves for y such that B'*y = c, where B is the basis matrix + i_t b_transpose_solve(const std::vector& rhs, std::vector& solution) const; + i_t b_transpose_solve(const sparse_vector_t& rhs, sparse_vector_t& solution) const; + i_t b_transpose_solve(const std::vector& rhs, std::vector& solution, std::vector& UTsol) const; + i_t b_transpose_solve(const sparse_vector_t& rhs, sparse_vector_t& solution, sparse_vector_t& UTsol) const; + // Solve for x such that L*x = y + i_t l_solve(std::vector& rhs) const; + + // Solve for x such that L*x = y + i_t l_solve(sparse_vector_t& rhs) const; + + // Solve for x such that L'*x = y + i_t l_transpose_solve(std::vector& rhs) const; + + // Solve for x such that L'*x = y + i_t l_transpose_solve(sparse_vector_t& rhs) const; + + // Solve for x such that U*x = y + i_t u_solve(std::vector& rhs) const; + + // Solve for x such that U*x = y + i_t u_solve(sparse_vector_t& rhs) const; + + // Solve for x such that U'*x = y + i_t u_transpose_solve(std::vector& rhs) const; + + // Solve for x such that U'*x = y + i_t u_transpose_solve(sparse_vector_t& rhs) const; + + // Replace the column B(:, leaving_index) with the vector abar. Pass in utilde such that L*utilde + // = abar + i_t update(const std::vector& utilde, const std::vector& etilde, i_t leaving_index); + + // Replace the column B(:, leaving_index) with the vector abar. Pass in utilde such that L*utilde + // = abar + i_t update(const sparse_vector_t& utilde, sparse_vector_t& etilde, i_t leaving_index); + + + i_t num_updates() const { return num_updates_; } + + const std::vector& row_permutation() const { return row_permutation_; } + + void compute_transposes() + { + L0_.transpose(L0_transpose_); + U0_.transpose(U0_transpose_); + } + + void multiply_lu(csc_matrix_t& out) const; + + private: + void clear() + { + pivot_indices_.clear(); + pivot_indices_.reserve(L0_.m); + for (i_t k = 0; k < L0_.m; ++k) { + col_permutation_[k] = k; + inverse_col_permutation_[k] = k; + } + S_.col_start.resize(refactor_frequency_ + 1); + S_.col_start[0] = 0; + S_.col_start[1] = 0; + S_.i.clear(); + S_.x.clear(); + mu_values_.clear(); + mu_values_.reserve(refactor_frequency_); + num_updates_ = 0; + } + void grow_storage(i_t nz, i_t& S_start, i_t& S_nz); + i_t index_map(i_t leaving) const; + f_t u_diagonal(i_t j) const; + i_t place_diagonals(); + f_t update_lower(const std::vector& sind, const std::vector& sval, i_t leaving); + i_t update_upper(const std::vector& ind, const std::vector& baru, i_t t); + i_t lower_triangular_multiply(const csc_matrix_t& in, + i_t in_col, + csc_matrix_t& out, + i_t out_col) const; + + void solve_to_workspace(i_t top) const; + void solve_to_sparse_vector(i_t top, sparse_vector_t& out) const; + i_t scatter_into_workspace(const sparse_vector_t& in) const; + void gather_into_sparse_vector(i_t nz, sparse_vector_t& out) const; + + void l_multiply(std::vector& inout) const; + void l_transpose_multiply(std::vector& inout) const; + + i_t num_updates_; // Number of rank-1 updates to L0 + i_t refactor_frequency_; // Average updates before refactoring + mutable csc_matrix_t L0_; // Sparse lower triangular matrix from initial factorization + mutable csc_matrix_t U0_; // Sparse upper triangular matrix from initial factorization + std::vector row_permutation_; // Row permutation from initial factorization L*U = P*B + std::vector inverse_row_permutation_; // Inverse row permutation from initial factorization L*U = P*B + std::vector pivot_indices_; // indicies for rank-1 updates to L + csc_matrix_t S_; // stores information about the rank-1 updates to L + std::vector mu_values_; // stores information about the rank-1 updates to L + std::vector col_permutation_; // symmetric permuation q used in U(q, q) represents Q + std::vector inverse_col_permutation_; // inverse permutation represents Q' + mutable std::vector xi_workspace_; + mutable std::vector x_workspace_; + mutable csc_matrix_t U0_transpose_; // Needed for sparse solves + mutable csc_matrix_t L0_transpose_; // Needed for sparse solves + mutable csc_matrix_t B_; // Needed for sparse solves +}; + + + + } // namespace cuopt::linear_programming::dual_simplex diff --git a/cpp/src/dual_simplex/bound_flipping_ratio_test.cpp b/cpp/src/dual_simplex/bound_flipping_ratio_test.cpp index 5f0ccf3b27..28b646161c 100644 --- a/cpp/src/dual_simplex/bound_flipping_ratio_test.cpp +++ b/cpp/src/dual_simplex/bound_flipping_ratio_test.cpp @@ -35,8 +35,11 @@ i_t bound_flipping_ratio_test_t::compute_breakpoints(std::vector& i_t idx = 0; while (idx == 0 && pivot_tol >= 1e-12) { - for (i_t k = 0; k < n - m; ++k) { - const i_t j = nonbasic_list_[k]; + //for (i_t k = 0; k < n - m; ++k) { + // const i_t j = nonbasic_list_[k]; + for (i_t h = 0; h < delta_z_indices_.size(); ++h) { + const i_t j = delta_z_indices_[h]; + const i_t k = nonbasic_mark_[j]; if (vstatus_[j] == variable_status_t::NONBASIC_FIXED) { continue; } if (vstatus_[j] == variable_status_t::NONBASIC_LOWER && delta_z_[j] < -pivot_tol) { indicies[idx] = k; @@ -92,7 +95,7 @@ i_t bound_flipping_ratio_test_t::single_pass(i_t start, const i_t j = entering_index = nonbasic_list_[nonbasic_entering]; constexpr bool verbose = false; - if (lower_[j] > -inf && upper_[j] < inf && lower_[j] != upper_[j]) { + if (bounded_variables_[j]) { const f_t interval = upper_[j] - lower_[j]; const f_t delta_slope = std::abs(delta_z_[j]) * interval; if constexpr (verbose) { @@ -112,13 +115,14 @@ template i_t bound_flipping_ratio_test_t::compute_step_length(f_t& step_length, i_t& nonbasic_entering) { - i_t m = m_; - i_t n = n_; + const i_t m = m_; + const i_t n = n_; + const i_t nz = delta_z_indices_.size(); constexpr bool verbose = false; // Compute the initial set of breakpoints - std::vector indicies(n - m); - std::vector ratios(n - m); + std::vector indicies(nz); + std::vector ratios(nz); i_t num_breakpoints = compute_breakpoints(indicies, ratios); if constexpr (verbose) { settings_.log.printf("Initial breakpoints %d\n", num_breakpoints); } if (num_breakpoints == 0) { @@ -217,7 +221,7 @@ void bound_flipping_ratio_test_t::heap_passes(const std::vector& const i_t j = entering_index = nonbasic_list_[nonbasic_entering]; step_length = current_ratios[heap_index]; - if (lower_[j] > -inf && upper_[j] < inf && lower_[j] != upper_[j]) { + if (bounded_variables_[j]) { // We have a bounded variable const f_t interval = upper_[j] - lower_[j]; const f_t delta_slope = std::abs(delta_z_[j]) * interval; diff --git a/cpp/src/dual_simplex/bound_flipping_ratio_test.hpp b/cpp/src/dual_simplex/bound_flipping_ratio_test.hpp index c949302562..f581bd4d49 100644 --- a/cpp/src/dual_simplex/bound_flipping_ratio_test.hpp +++ b/cpp/src/dual_simplex/bound_flipping_ratio_test.hpp @@ -33,10 +33,13 @@ class bound_flipping_ratio_test_t { f_t initial_slope, const std::vector& lower, const std::vector& upper, + const std::vector& bounded_variables, const std::vector& vstatus, const std::vector& nonbasic_list, - std::vector& z, - std::vector& delta_z) + const std::vector& z, + const std::vector& delta_z, + const std::vector& delta_z_indices, + const std::vector& nonbasic_mark) : settings_(settings), start_time_(start_time), m_(m), @@ -44,10 +47,13 @@ class bound_flipping_ratio_test_t { slope_(initial_slope), lower_(lower), upper_(upper), + bounded_variables_(bounded_variables), vstatus_(vstatus), nonbasic_list_(nonbasic_list), z_(z), - delta_z_(delta_z) + delta_z_(delta_z), + delta_z_indices_(delta_z_indices), + nonbasic_mark_(nonbasic_mark) { } @@ -73,10 +79,13 @@ class bound_flipping_ratio_test_t { const std::vector& lower_; const std::vector& upper_; + const std::vector& bounded_variables_; const std::vector& nonbasic_list_; const std::vector& vstatus_; const std::vector& z_; const std::vector& delta_z_; + const std::vector& delta_z_indices_; + const std::vector& nonbasic_mark_; const simplex_solver_settings_t& settings_; diff --git a/cpp/src/dual_simplex/phase2.cpp b/cpp/src/dual_simplex/phase2.cpp index 1d1398b98b..ec5cf9add7 100644 --- a/cpp/src/dual_simplex/phase2.cpp +++ b/cpp/src/dual_simplex/phase2.cpp @@ -36,25 +36,43 @@ namespace cuopt::linear_programming::dual_simplex { namespace phase2 { template -f_t l2_dual_residual(const lp_problem_t& lp, const lp_solution_t& solution) +void compute_delta_z(const csc_matrix_t& A_transpose, + const sparse_vector_t& delta_y, + i_t leaving_index, + i_t direction, + std::vector& nonbasic_mark, + std::vector& delta_z_mark, + std::vector& delta_z_indices, + std::vector& delta_z) { - std::vector dual_residual = solution.z; - const i_t n = lp.num_cols; - // dual_residual <- z - c - for (i_t j = 0; j < n; j++) { - dual_residual[j] -= lp.objective[j]; + // delta_zN = - N'*delta_y + const i_t nz_delta_y = delta_y.i.size(); + for (i_t k = 0; k < nz_delta_y; k++) + { + const i_t i = delta_y.i[k]; + const f_t delta_y_i = delta_y.x[k]; + if (std::abs(delta_y_i) < 1e-13) { + continue; + } + const i_t row_start = A_transpose.col_start[i]; + const i_t row_end = A_transpose.col_start[i + 1]; + for (i_t p = row_start; p < row_end; ++p) + { + const i_t j = A_transpose.i[p]; + if (nonbasic_mark[j] >= 0) + { + delta_z[j] -= delta_y_i * A_transpose.x[p]; + if (!delta_z_mark[j]) + { + delta_z_mark[j] = 1; + delta_z_indices.push_back(j); + } + } + } } - // dual_residual <- 1.0*A'*y + 1.0*(z - c) - matrix_transpose_vector_multiply(lp.A, 1.0, solution.y, 1.0, dual_residual); - return vector_norm2(dual_residual); -} -template -f_t l2_primal_residual(const lp_problem_t& lp, const lp_solution_t& solution) -{ - std::vector primal_residual = lp.rhs; - matrix_vector_multiply(lp.A, 1.0, solution.x, -1.0, primal_residual); - return vector_norm2(primal_residual); + // delta_zB = sigma*ei + delta_z[leaving_index] = direction; } template @@ -89,55 +107,55 @@ void compute_reduced_cost_update(const lp_problem_t& lp, } } + template -void compute_reduced_cost_update(const lp_problem_t& lp, - const csc_matrix_t& Atranspose, - const std::vector& basic_list, - const std::vector& nonbasic_list, - const sparse_vector_t& delta_y, - i_t leaving_index, - i_t direction, - std::vector& nonbasic, - std::vector& delta_z) +void clear_delta_z(i_t entering_index, + i_t leaving_index, + std::vector& delta_z_mark, + std::vector& delta_z_indices, + std::vector& delta_z) { - const i_t m = lp.num_rows; - const i_t n = lp.num_cols; - - // delta_zB = sigma*ei, O(m) - for (i_t k = 0; k < m; k++) { - const i_t j = basic_list[k]; - delta_z[j] = 0; + for (i_t k = 0; k < delta_z_indices.size(); k++) + { + const i_t j = delta_z_indices[k]; + delta_z[j] = 0.0; + delta_z_mark[j] = 0; } - delta_z[leaving_index] = direction; + delta_z[entering_index] = 0.0; + delta_z[leaving_index] = 0.0; + delta_z_indices.clear(); +} - // Mark the nonbasic variables O(n - m) - for (i_t k = 0; k < n - m; k++) { - const i_t j = nonbasic_list[k]; - nonbasic[j] = 1; - } - // delta_zN = - N'*delta_y - // = - sum_{k : delta_y[k] != 0} delta_y[k] * Atranspose(:, nonbasic_list[k]) - const i_t nz_dy = delta_y.i.size(); - for (i_t k = 0; k < nz_dy; k++) { - const i_t i = delta_y.i[k]; - const i_t row_start = Atranspose.col_start[i]; - const i_t row_end = Atranspose.col_start[i + 1]; - for (i_t p = row_start; p < row_end; ++p) { - const i_t j = Atranspose.i[p]; - if (nonbasic[j]) { - delta_z[j] -= delta_y.x[i] * Atranspose.x[p]; - } - } +template +f_t l2_dual_residual(const lp_problem_t& lp, const lp_solution_t& solution) +{ + std::vector dual_residual = solution.z; + const i_t n = lp.num_cols; + // dual_residual <- z - c + for (i_t j = 0; j < n; j++) { + dual_residual[j] -= lp.objective[j]; } + // dual_residual <- 1.0*A'*y + 1.0*(z - c) + matrix_transpose_vector_multiply(lp.A, 1.0, solution.y, 1.0, dual_residual); + return vector_norm2(dual_residual); +} +template +f_t l2_primal_residual(const lp_problem_t& lp, const lp_solution_t& solution) +{ + std::vector primal_residual = lp.rhs; + matrix_vector_multiply(lp.A, 1.0, solution.x, -1.0, primal_residual); + return vector_norm2(primal_residual); } + + template void compute_dual_solution_from_basis(const lp_problem_t& lp, - basis_update_t& ft, + basis_update_mpf_t& ft, const std::vector& basic_list, const std::vector& nonbasic_list, std::vector& y, @@ -540,29 +558,31 @@ i_t bound_flipping_ratio_test(const lp_problem_t& lp, template i_t flip_bounds(const lp_problem_t& lp, const simplex_solver_settings_t& settings, + const std::vector& bounded_variables, const std::vector& objective, const std::vector& z, + const std::vector& delta_z_indices, const std::vector& nonbasic_list, i_t entering_index, std::vector& vstatus, std::vector& delta_x, - std::vector& atilde) + std::vector& mark, + std::vector& atilde, + std::vector& atilde_index) { - f_t delta_obj = 0; + //f_t delta_obj = 0; i_t num_flipped = 0; - for (i_t j : nonbasic_list) { + for (i_t j : delta_z_indices) { if (j == entering_index) { continue; } - const bool bounded = - (lp.lower[j] > -inf) && (lp.upper[j] < inf) && (lp.lower[j] != lp.upper[j]); - if (!bounded) { continue; } + if (!bounded_variables[j]) { continue; } // x_j is now a nonbasic bounded variable that will not enter the basis this // iteration const f_t dual_tol = settings.dual_tol; // lower to 1e-7 or less will cause 25fv47 and d2q06c to cycle if (vstatus[j] == variable_status_t::NONBASIC_LOWER && z[j] < -dual_tol) { const f_t delta = lp.upper[j] - lp.lower[j]; - scatter_dense(lp.A, j, -delta, atilde); - delta_obj += delta * objective[j]; + scatter_dense(lp.A, j, -delta, atilde, mark, atilde_index); + //delta_obj += delta * objective[j]; delta_x[j] += delta; vstatus[j] = variable_status_t::NONBASIC_UPPER; #ifdef BOUND_FLIP_DEBUG @@ -572,8 +592,8 @@ i_t flip_bounds(const lp_problem_t& lp, num_flipped++; } else if (vstatus[j] == variable_status_t::NONBASIC_UPPER && z[j] > dual_tol) { const f_t delta = lp.lower[j] - lp.upper[j]; - scatter_dense(lp.A, j, -delta, atilde); - delta_obj += delta * objective[j]; + scatter_dense(lp.A, j, -delta, atilde, mark, atilde_index); + //delta_obj += delta * objective[j]; delta_x[j] += delta; vstatus[j] = variable_status_t::NONBASIC_LOWER; #ifdef BOUND_FLIP_DEBUG @@ -591,7 +611,7 @@ i_t initialize_steepest_edge_norms(const lp_problem_t& lp, const simplex_solver_settings_t& settings, const f_t start_time, const std::vector& basic_list, - basis_update_t& ft, + basis_update_mpf_t& ft, std::vector& delta_y_steepest_edge) { // TODO: Skip this initialization when starting from a slack basis @@ -642,20 +662,23 @@ i_t initialize_steepest_edge_norms(const lp_problem_t& lp, settings.log.printf("Found %d singleton rows in %.2fs\n", num_singleton_rows, toc(start_singleton_rows)); - ft.compute_transposes(); + //ft.compute_transposes(); #endif f_t last_log = tic(); for (i_t k = 0; k < m; ++k) { - //std::vector ei(m); +#if 0 + std::vector ei(m); + ei[k] = -1.0; + std::vector dy(m, 0.0); +#else sparse_vector_t sparse_ei(m, 1); sparse_ei.x[0] = -1.0; sparse_ei.i[0] = k; - //std::vector dy(m, 0.0); +#endif const i_t j = basic_list[k]; - //ei[k] = -1.0; f_t init = -1.0; - if (1 && row_degree[mapping[k]] == 1) { + if (row_degree[mapping[k]] == 1) { const i_t u = mapping[k]; //settings.log.printf("Singleton row %d u %d\n", k, u); const f_t alpha = coeff[k]; @@ -687,15 +710,17 @@ i_t initialize_steepest_edge_norms(const lp_problem_t& lp, } #endif } else { - //ft.b_transpose_solve(ei, dy); - //init = vector_norm2_squared(dy); +#if COMPARE_WITH_DENSE + ft.b_transpose_solve(ei, dy); + init = vector_norm2_squared(dy); +#else sparse_vector_t sparse_dy(m, 0); ft.b_transpose_solve(sparse_ei, sparse_dy); - //settings.log.printf("Steepest edge norm %d nz %d\n", k, sparse_dy.x.size()); f_t my_init = 0.0; for (i_t p = 0; p < sparse_dy.x.size(); ++p) { my_init += sparse_dy.x[p] * sparse_dy.x[p]; } +#endif #if COMPARE_WITH_DENSE if (std::abs(init - my_init) > 1e-12) { settings.log.printf("Singleton row %d error %.16e init %.16e my_init %.16e\n", k, std::abs(init - my_init), init, my_init); @@ -726,24 +751,28 @@ i_t initialize_steepest_edge_norms(const lp_problem_t& lp, template i_t update_steepest_edge_norms(const simplex_solver_settings_t& settings, const std::vector& basic_list, - const basis_update_t& ft, + const basis_update_mpf_t& ft, i_t direction, - const std::vector& delta_y, - const std::vector& scaled_delta_xB, + const sparse_vector_t& delta_y_sparse, + f_t dy_norm_squared, + const sparse_vector_t& scaled_delta_xB, i_t basic_leaving_index, i_t entering_index, + std::vector& v, std::vector& delta_y_steepest_edge) { - i_t m = delta_y.size(); + i_t m = basic_list.size(); - sparse_vector_t delta_y_sparse(delta_y); + //sparse_vector_t delta_y_sparse(delta_y); const i_t delta_y_nz = delta_y_sparse.i.size(); + sparse_vector_t v_sparse(m, 0); - std::vector v(m, 0.0); if (delta_y_nz > 0.25 * m) { // B^T delta_y = - direction * e_basic_leaving_index // We want B v = - B^{-T} e_basic_leaving_index + std::vector delta_y; + delta_y_sparse.to_dense(delta_y); ft.b_solve(delta_y, v); // if direction = -1 we need to scale v if (direction == -1) { @@ -754,7 +783,6 @@ i_t update_steepest_edge_norms(const simplex_solver_settings_t& settin } else { - sparse_vector_t v_sparse(m, 0); ft.b_solve(delta_y_sparse, v_sparse); if (direction == -1) { for (i_t k = 0; k < v_sparse.i.size(); ++k) { @@ -764,7 +792,7 @@ i_t update_steepest_edge_norms(const simplex_solver_settings_t& settin v_sparse.scatter(v); } - const f_t dy_norm_squared = vector_norm2_squared(delta_y); + //const f_t dy_norm_squared = delta_y_sparse.norm2_squared(); const i_t leaving_index = basic_list[basic_leaving_index]; const f_t prev_dy_norm_squared = delta_y_steepest_edge[leaving_index]; #ifdef STEEPEST_EDGE_DEBUG @@ -781,17 +809,27 @@ i_t update_steepest_edge_norms(const simplex_solver_settings_t& settin // B*w = A(:, leaving_index) // B*scaled_delta_xB = -A(:, leaving_index) so w = -scaled_delta_xB - const f_t wr = -scaled_delta_xB[basic_leaving_index]; + f_t scale; + const i_t scaled_delta_xB_nz = scaled_delta_xB.i.size(); + for (i_t h = 0; h < scaled_delta_xB_nz; ++h) { + const i_t k = scaled_delta_xB.i[h]; + if (k == basic_leaving_index) { + scale = scaled_delta_xB.x[h]; + break; + } + } + const f_t wr = -scale; + //const f_t wr = -scaled_delta_xB.x[basic_leaving_index]; if (wr == 0) { return -1; } const f_t omegar = dy_norm_squared / (wr * wr); - - for (i_t k = 0; k < m; ++k) { + for (i_t h = 0; h < scaled_delta_xB_nz; ++h) { + const i_t k = scaled_delta_xB.i[h]; const i_t j = basic_list[k]; if (k == basic_leaving_index) { - const f_t w_squared = scaled_delta_xB[k] * scaled_delta_xB[k]; + const f_t w_squared = scaled_delta_xB.x[h] * scaled_delta_xB.x[h]; delta_y_steepest_edge[j] = (1.0 / w_squared) * dy_norm_squared; } else { - const f_t wk = -scaled_delta_xB[k]; + const f_t wk = -scaled_delta_xB.x[h]; f_t new_val = delta_y_steepest_edge[j] + wk * (2.0 * v[k] / wr + wk * omegar); new_val = std::max(new_val, 1e-4); #ifdef STEEPEST_EDGE_DEBUG @@ -812,6 +850,11 @@ i_t update_steepest_edge_norms(const simplex_solver_settings_t& settin } } + const i_t v_nz = v_sparse.i.size(); + for (i_t k = 0; k < v_nz; ++k) { + v[v_sparse.i[k]] = 0.0; + } + return 0; } @@ -819,18 +862,18 @@ i_t update_steepest_edge_norms(const simplex_solver_settings_t& settin template i_t compute_steepest_edge_norm_entering(const simplex_solver_settings_t& setttings, i_t m, - const basis_update_t& ft, + const basis_update_mpf_t& ft, i_t basic_leaving_index, i_t entering_index, std::vector& steepest_edge_norms) { -#ifdef CHECK_HYPERSPARSE +#if 0 std::vector es(m); es[basic_leaving_index] = -1.0; std::vector delta_ys(m); ft.b_transpose_solve(es, delta_ys); steepest_edge_norms[entering_index] = vector_norm2_squared(delta_ys); -#endif +#else sparse_vector_t es_sparse(m, 1); es_sparse.i[0] = basic_leaving_index; @@ -838,6 +881,7 @@ i_t compute_steepest_edge_norm_entering(const simplex_solver_settings_t delta_ys_sparse(m, 0); ft.b_transpose_solve(es_sparse, delta_ys_sparse); steepest_edge_norms[entering_index] = delta_ys_sparse.norm2_squared(); +#endif #ifdef STEEPEST_EDGE_DEBUG settings.log.printf("Steepest edge norm %e for entering j %d at i %d\n", @@ -851,7 +895,7 @@ i_t compute_steepest_edge_norm_entering(const simplex_solver_settings_t i_t check_steepest_edge_norms(const simplex_solver_settings_t& settings, const std::vector& basic_list, - const basis_update_t& ft, + const basis_update_mpf_t& ft, const std::vector& delta_y_steepest_edge) { const i_t m = basic_list.size(); @@ -875,6 +919,7 @@ i_t check_steepest_edge_norms(const simplex_solver_settings_t& setting template i_t compute_perturbation(const lp_problem_t& lp, const simplex_solver_settings_t& settings, + const std::vector& delta_z_indices, std::vector& z, std::vector& objective, f_t& sum_perturb) @@ -884,7 +929,9 @@ i_t compute_perturbation(const lp_problem_t& lp, const f_t tight_tol = settings.tight_tol; i_t num_perturb = 0; sum_perturb = 0.0; - for (i_t j = 0; j < n; ++j) { + //for (i_t j = 0; j < n; ++j) { + for (i_t k = 0; k < delta_z_indices.size(); ++k) { + const i_t j = delta_z_indices[k]; if (lp.upper[j] == inf && lp.lower[j] > -inf && z[j] < -tight_tol) { const f_t violation = -z[j]; z[j] += violation; // z[j] <- 0 @@ -1167,7 +1214,7 @@ void set_primal_variables_on_bounds(const lp_problem_t& lp, template void prepare_optimality(const lp_problem_t& lp, const simplex_solver_settings_t& settings, - basis_update_t& ft, + basis_update_mpf_t& ft, const std::vector& objective, const std::vector& basic_list, const std::vector& nonbasic_list, @@ -1206,6 +1253,9 @@ void prepare_optimality(const lp_problem_t& lp, y = unperturbed_y; perturbation = 0.0; } + else { + settings.log.printf("Failed to remove perturbation of %.2e.\n", perturbation); + } } } @@ -1304,7 +1354,7 @@ dual::status_t dual_phase2(i_t phase, if (toc(start_time) > settings.time_limit) { return dual::status_t::TIME_LIMIT; } assert(q.size() == m); reorder_basic_list(q, basic_list); - basis_update_t ft(L, U, p); + basis_update_mpf_t ft(L, U, p, settings.refactor_frequency); std::vector c_basic(m); for (i_t k = 0; k < m; ++k) { @@ -1442,10 +1492,51 @@ dual::status_t dual_phase2(i_t phase, const i_t iter_limit = settings.iteration_limit; std::vector delta_y(m, 0.0); - std::vector delta_z(n); - std::vector delta_x(n); + std::vector delta_z(n, 0.0); + std::vector delta_x(n, 0.0); + std::vector delta_x_flip(n, 0.0); + std::vector atilde(m, 0.0); + std::vector atilde_mark(m, 0); + std::vector atilde_index; + std::vector nonbasic_mark(n, -1); + std::vector delta_z_mark(n, 0); + std::vector delta_z_indices; + std::vector v(m, 0.0); + + + for (i_t k = 0; k < n - m; k++) { + nonbasic_mark[nonbasic_list[k]] = k; + } + + std::vector bounded_variables(n, false); + for (i_t j = 0; j < n; j++) { + const bool bounded = + (lp.lower[j] > -inf) && (lp.upper[j] < inf) && (lp.lower[j] != lp.upper[j]); + bounded_variables[j] = bounded; + } + + + csc_matrix_t A_transpose(1, 1, 0); + lp.A.transpose(A_transpose); + + f_t obj = compute_objective(lp, x); + settings.log.printf("Initial objective %e\n", obj); + const i_t start_iter = iter; + f_t bfrt_time = 0; + f_t pricing_time = 0; + f_t btran_time = 0; + f_t ftran_time = 0; + f_t flip_time = 0; + f_t delta_z_time = 0; + f_t se_norms_time = 0; + f_t se_entering_time = 0; + f_t lu_update_time = 0; + f_t perturb_time = 0; + f_t vector_time = 0; + f_t objective_time = 0; + while (iter < iter_limit) { // Pricing i_t direction; @@ -1453,6 +1544,7 @@ dual::status_t dual_phase2(i_t phase, f_t primal_infeasibility; i_t leaving_index = -1; f_t max_val; + f_t price_start_time = tic(); if (settings.use_steepest_edge_pricing) { leaving_index = phase2::steepest_edge_pricing(lp, settings, @@ -1468,6 +1560,7 @@ dual::status_t dual_phase2(i_t phase, leaving_index = phase2::phase2_pricing( lp, settings, x, basic_list, direction, basic_leaving_index, primal_infeasibility); } + pricing_time += toc(price_start_time); if (leaving_index == -1) { phase2::prepare_optimality(lp, settings, @@ -1490,19 +1583,43 @@ dual::status_t dual_phase2(i_t phase, // BTran // TODO: replace with sparse solve. - std::vector delta_y(m); -#ifdef CHECK_HYPERSPARSE + //std::vector delta_y(m); + f_t btran_start_time = tic(); +#if 0 std::vector ei(m, 0.0); ei[basic_leaving_index] = -direction; // BT*delta_y = -delta_zB = -sigma*ei - ft.b_transpose_solve(ei, delta_y); -#endif + std::vector UTsol; + ft.b_transpose_solve(ei, delta_y, UTsol); + + if (ei[basic_leaving_index] != 1.0) + { + // Need to flip the sign of UTsol + for (i_t k = 0; k < m; ++k) + { + UTsol[k] *= -1.0; + } + } +#else sparse_vector_t ei_sparse(m, 1); ei_sparse.i[0] = basic_leaving_index; ei_sparse.x[0] = -direction; sparse_vector_t delta_y_sparse(m, 0); - ft.b_transpose_solve(ei_sparse, delta_y_sparse); -#ifdef CHECK_HYPERSPARSE + sparse_vector_t UTsol_sparse(m, 0); + ft.b_transpose_solve(ei_sparse, delta_y_sparse, UTsol_sparse); + f_t b_transpose_solve_density = delta_y_sparse.i.size() / static_cast(m); + + if (direction != -1) + { + for (i_t k = 0; k < UTsol_sparse.x.size(); ++k) + { + UTsol_sparse.x[k] *= -1.0; + } + } + //std::vector UTsol; + //UTsol_sparse.to_dense(UTsol); +#endif +#if 0 std::vector delta_y_sparse_vector_check(m); delta_y_sparse.to_dense(delta_y_sparse_vector_check); f_t error_check = 0.0; @@ -1527,10 +1644,9 @@ dual::status_t dual_phase2(i_t phase, } } #endif - delta_y_sparse.scatter(delta_y); - +#if 1 const f_t steepest_edge_norm_check = delta_y_sparse.norm2_squared(); -#ifdef CHECK_HYPERSPARSE +#else f_t steepest_edge_norm_check = vector_norm2_squared(delta_y); #endif if (delta_y_steepest_edge[leaving_index] < @@ -1548,36 +1664,80 @@ dual::status_t dual_phase2(i_t phase, continue; } +#if 0 + // Only scatter after possible continue + delta_y_sparse.scatter(delta_y); +#endif + + btran_time += toc(btran_start_time); + #ifdef COMPUTE_BTRANSPOSE_RESIDUAL { std::vector res(m); b_transpose_multiply(lp, basic_list, delta_y, res); - for (Int k = 0; k < m; k++) { + f_t max_err = 0.0; + for (i_t k = 0; k < m; k++) { const f_t err = std::abs(res[k] - ei[k]); if (err > 1e-4) { settings.log.printf("BT err %d %e\n", k, err); } - assert(err < 1e-4); + max_err = std::max(max_err, err); } + printf("BTranspose multiply error %e\n", max_err); } #endif - // delta_zB = sigma*ei - for (i_t k = 0; k < m; k++) { - const i_t j = basic_list[k]; - delta_z[j] = 0; + f_t delta_z_start_time = tic(); + + const f_t delta_y_nz_percentage = delta_y_sparse.i.size() / static_cast(m) * 100.0; + //printf("delta y nz percentage %.2f\n", delta_y_nz_percentage); + + if (delta_y_nz_percentage <= 30.0) { + phase2::compute_delta_z(A_transpose, + delta_y_sparse, + leaving_index, + direction, + nonbasic_mark, + delta_z_mark, + delta_z_indices, + delta_z); + } else { + // delta_zB = sigma*ei + delta_y_sparse.to_dense(delta_y); + for (i_t k = 0; k < m; k++) { + const i_t j = basic_list[k]; + delta_z[j] = 0; + } + delta_z[leaving_index] = direction; + // delta_zN = -N'*delta_y + for (i_t k = 0; k < n - m; k++) { + const i_t j = nonbasic_list[k]; + // z_j <- -A(:, j)'*delta_y + const i_t col_start = lp.A.col_start[j]; + const i_t col_end = lp.A.col_start[j + 1]; + f_t dot = 0.0; + for (i_t p = col_start; p < col_end; ++p) { + dot += lp.A.x[p] * delta_y[lp.A.i[p]]; + } + delta_z[j] = -dot; + if (delta_z[j] != 0.0) { + delta_z_indices.push_back(j); + delta_z_mark[j] = 1; + } + } } - delta_z[leaving_index] = direction; - // delta_zN = -N'*delta_y - for (i_t k = 0; k < n - m; k++) { - const i_t j = nonbasic_list[k]; - // z_j <- -A(:, j)'*delta_y - const i_t col_start = lp.A.col_start[j]; - const i_t col_end = lp.A.col_start[j + 1]; - f_t dot = 0.0; - for (i_t p = col_start; p < col_end; ++p) { - dot += lp.A.x[p] * delta_y[lp.A.i[p]]; + +#if 0 + f_t error_check = 0.0; + for (i_t k = 0; k < n; ++k) { + if (std::abs(delta_z[k] - delta_z_check[k]) > 1e-6) { + settings.log.printf("delta_z error %d %e %e\n", k, delta_z[k], delta_z_check[k]); } - delta_z[j] = -dot; + error_check = std::max(error_check, std::abs(delta_z[k] - delta_z_check[k])); + } + if (error_check > 1e-6) { + settings.log.printf("delta_z error %e\n", error_check); } +#endif + delta_z_time += toc(delta_z_start_time); #ifdef COMPUTE_DUAL_RESIDUAL std::vector dual_residual = delta_z; @@ -1608,7 +1768,7 @@ dual::status_t dual_phase2(i_t phase, #if 1 f_t slope = direction == 1 ? (lp.lower[leaving_index] - x[leaving_index]) : (x[leaving_index] - lp.upper[leaving_index]); - bound_flipping_ratio_test_t bfrt(settings, start_time, m, n, slope, lp.lower, lp.upper, vstatus, nonbasic_list, z, delta_z); + bound_flipping_ratio_test_t bfrt(settings, start_time, m, n, slope, lp.lower, lp.upper, bounded_variables, vstatus, nonbasic_list, z, delta_z, delta_z_indices, nonbasic_mark); entering_index = bfrt.compute_step_length(step_length, nonbasic_entering_index); if constexpr (0) { @@ -1687,6 +1847,8 @@ dual::status_t dual_phase2(i_t phase, settings.log.printf("Dual infeasibility %e\n", dual_infeas); const f_t primal_inf = phase2::primal_infeasibility(lp, settings, vstatus, x); settings.log.printf("Primal infeasibility %e\n", primal_inf); + settings.log.printf("Updates %d\n", ft.num_updates()); + settings.log.printf("Steepest edge %e\n", max_val); if (dual_infeas > settings.dual_tol) { settings.log.printf( "Numerical issues encountered. No entering variable found with large infeasibility.\n"); @@ -1695,16 +1857,35 @@ dual::status_t dual_phase2(i_t phase, return dual::status_t::DUAL_UNBOUNDED; } + + f_t vector_y_z_start_time = tic(); // Update dual variables + + + #if 1 + const i_t delta_y_nz = delta_y_sparse.i.size(); + for (i_t k = 0; k < delta_y_nz; ++k) { + const i_t i = delta_y_sparse.i[k]; + y[i] += step_length * delta_y_sparse.x[k]; + } + const i_t delta_z_nz = delta_z_indices.size(); + for (i_t k = 0; k < delta_z_nz; ++k) { + const i_t j = delta_z_indices[k]; + z[j] += step_length * delta_z[j]; + } + z[leaving_index] += step_length * delta_z[leaving_index]; + #else + // y <- y + steplength * delta_y for (i_t i = 0; i < m; ++i) { y[i] += step_length * delta_y[i]; } - // z <- z + steplength * delta_z for (i_t j = 0; j < n; ++j) { z[j] += step_length * delta_z[j]; } +#endif + vector_time += toc(vector_y_z_start_time); #ifdef COMPUTE_DUAL_RESIDUAL dual_res1 = z; @@ -1718,13 +1899,36 @@ dual::status_t dual_phase2(i_t phase, } #endif + f_t flip_start_time = tic(); // Update primal variable - std::vector atilde(m); - std::vector delta_x_flip(n); + const i_t num_flipped = phase2::flip_bounds( - lp, settings, objective, z, nonbasic_list, entering_index, vstatus, delta_x_flip, atilde); + lp, settings, bounded_variables, objective, z, delta_z_indices, nonbasic_list, entering_index, vstatus, delta_x_flip, atilde_mark, atilde, atilde_index); + + flip_time += toc(flip_start_time); + + + f_t ftran_start_time = tic(); if (num_flipped > 0) { + //settings.log.printf("Flipped %6d bounds. Dz nz %.2f Atilde nz %6d %.2f %\n", num_flipped, static_cast(delta_z_indices.size()) / static_cast(n -m) * 100.0, atilde_index.size(), static_cast(atilde_index.size()) / static_cast(m) * 100.0); +#if 1 + // B*delta_xB_0 = atilde + sparse_vector_t atilde_sparse(m, atilde_index.size()); + for (i_t k = 0; k < atilde_index.size(); ++k) + { + atilde_sparse.i[k] = atilde_index[k]; + atilde_sparse.x[k] = atilde[atilde_index[k]]; + } + sparse_vector_t delta_xB_0_sparse(m, 0); + ft.b_solve(atilde_sparse, delta_xB_0_sparse); + const i_t delta_xB_0_nz = delta_xB_0_sparse.i.size(); + for (i_t k = 0; k < delta_xB_0_nz; ++k) + { + const i_t j = basic_list[delta_xB_0_sparse.i[k]]; + x[j] += delta_xB_0_sparse.x[k]; + } +#else // B*delta_xB_0 = atilde std::vector delta_xB_0(m); ft.b_solve(atilde, delta_xB_0); @@ -1732,10 +1936,33 @@ dual::status_t dual_phase2(i_t phase, const i_t j = basic_list[k]; x[j] += delta_xB_0[k]; } +#endif + + + +#if 1 + for (i_t j : delta_z_indices) { + x[j] += delta_x_flip[j]; + delta_x_flip[j] = 0.0; + } +#else for (i_t k = 0; k < n - m; ++k) { const i_t j = nonbasic_list[k]; x[j] += delta_x_flip[j]; } +#endif + + // Clear atilde + for (i_t k = 0; k < atilde_index.size(); ++k) + { + atilde[atilde_index[k]] = 0.0; + } + // Clear atilde_mark + for (i_t k = 0; k < atilde_mark.size(); ++k) + { + atilde_mark[k] = 0; + } + atilde_index.clear(); } f_t delta_x_leaving; @@ -1748,7 +1975,10 @@ dual::status_t dual_phase2(i_t phase, std::vector scaled_delta_xB(m); const i_t col_nz = lp.A.col_start[entering_index + 1] - lp.A.col_start[entering_index]; std::vector utilde(m); - if (col_nz > 0.25 * m) + sparse_vector_t utilde_sparse(m, 0); + f_t b_solve_density = 1.0; + sparse_vector_t scaled_delta_xB_sparse(m, 0); + if (0 && col_nz > 0.30 * m) { std::fill(rhs.begin(), rhs.end(), 0.0); lp.A.load_a_column(entering_index, rhs); @@ -1760,8 +1990,6 @@ dual::status_t dual_phase2(i_t phase, else { sparse_vector_t rhs_sparse(lp.A, entering_index); - sparse_vector_t scaled_delta_xB_sparse(m, 0); - sparse_vector_t utilde_sparse(m, 0); ft.b_solve(rhs_sparse, scaled_delta_xB_sparse, utilde_sparse); const i_t xB_nz = scaled_delta_xB_sparse.i.size(); for (i_t k = 0; k < xB_nz; ++k) @@ -1770,12 +1998,13 @@ dual::status_t dual_phase2(i_t phase, } scaled_delta_xB_sparse.to_dense(scaled_delta_xB); utilde_sparse.to_dense(utilde); -#ifdef CHECK_B_SOLVE + b_solve_density = static_cast(xB_nz) / static_cast(m); +#if 0 rhs_sparse.to_dense(rhs); #endif } -#ifdef CHECK_B_SOLVE +#if 0 { std::vector residual_B(m); b_multiply(lp, basic_list, scaled_delta_xB, residual_B); @@ -1788,11 +2017,36 @@ dual::status_t dual_phase2(i_t phase, } err_max = std::max(err_max, err); } - assert(err_max < 1e-4); + if (err_max > 1e-6) + { + printf("B multiply error %e\n", err_max); + } } #endif - f_t primal_step_length = delta_x_leaving / scaled_delta_xB[basic_leaving_index]; + ftran_time += toc(ftran_start_time); + + f_t delta_x_change_start_time = tic(); + +#if 1 + f_t scale; + const i_t scaled_delta_xB_nz = scaled_delta_xB_sparse.i.size(); + for (i_t k = 0; k < scaled_delta_xB_nz; ++k) { + if (scaled_delta_xB_sparse.i[k] == basic_leaving_index) { + scale = scaled_delta_xB_sparse.x[k]; + break; + } + } + f_t primal_step_length = delta_x_leaving / scale; + for (i_t k = 0; k < scaled_delta_xB_nz; ++k) { + const i_t j = basic_list[scaled_delta_xB_sparse.i[k]]; + delta_x[j] = primal_step_length * scaled_delta_xB_sparse.x[k]; + } + delta_x[leaving_index] = delta_x_leaving; + delta_x[entering_index] = primal_step_length; +#else + f_t primal_step_length = delta_x_leaving / scaled_delta_xB[basic_leaving_index]; + std::vector delta_x(n, 0.0); for (i_t k = 0; k < m; ++k) { const i_t j = basic_list[k]; delta_x[j] = primal_step_length * scaled_delta_xB[k]; @@ -1803,21 +2057,28 @@ dual::status_t dual_phase2(i_t phase, delta_x[j] = 0.0; } delta_x[entering_index] = primal_step_length; +#endif + vector_time += toc(delta_x_change_start_time); -#ifdef COMPUTE_PRIMAL_STEP_RESIDUAL +#if 0 + std::vector residual(m); matrix_vector_multiply(lp.A, 1.0, delta_x, 1.0, residual); - f_t primal_step_err = vector_norm_inf(residual); + f_t primal_step_err = vector_norm_inf(residual); if (primal_step_err > 1e-4) { settings.log.printf("|| A * dx || %e\n", primal_step_err); } #endif + + f_t steepest_edge_norms_start_time = tic(); const i_t steepest_edge_status = phase2::update_steepest_edge_norms(settings, basic_list, ft, direction, - delta_y, - scaled_delta_xB, + delta_y_sparse, + steepest_edge_norm_check, + scaled_delta_xB_sparse, basic_leaving_index, entering_index, + v, delta_y_steepest_edge); #ifdef STEEPEST_EDGE_DEBUG if (steepest_edge_status == -1) { @@ -1829,10 +2090,28 @@ dual::status_t dual_phase2(i_t phase, #endif assert(steepest_edge_status == 0); + se_norms_time += toc(steepest_edge_norms_start_time); + + f_t vector_x_start_time = tic(); // x <- x + delta_x - for (i_t j = 0; j < n; ++j) { - x[j] += delta_x[j]; - } +#if 1 + + //std::vector x_check = x; + for (i_t k = 0; k < scaled_delta_xB_nz; ++k) { + const i_t j = basic_list[scaled_delta_xB_sparse.i[k]]; + x[j] += delta_x[j]; + } + // Leaving index already included above + x[entering_index] += delta_x[entering_index]; +#else + + for (i_t j = 0; j < n; ++j) { + x[j] += delta_x[j]; + } + +#endif + vector_time += toc(vector_x_start_time); + #ifdef COMPUTE_PRIMAL_RESIDUAL residual = lp.rhs; matrix_vector_multiply(lp.A, 1.0, x, -1.0, residual); @@ -1842,8 +2121,38 @@ dual::status_t dual_phase2(i_t phase, } #endif + + f_t objective_start_time = tic(); +#if 1 + for (i_t k = 0; k < scaled_delta_xB_nz; ++k) { + const i_t j = basic_list[scaled_delta_xB_sparse.i[k]]; + obj += delta_x[j] * lp.objective[j]; + } + // Leaving index already included above + obj += delta_x[entering_index] * lp.objective[entering_index]; + + //const f_t obj_check = compute_objective(lp, x); + //if (std::abs(obj - obj_check) > 1e-5) { + // settings.log.printf("Objective error %e: %e %e\n", std::abs(obj - obj_check), obj, obj_check); + //} +#endif + objective_time += toc(objective_start_time); + + // Clear delta_x + for (i_t k = 0; k < scaled_delta_xB_nz; ++k) { + const i_t j = basic_list[scaled_delta_xB_sparse.i[k]]; + delta_x[j] = 0.0; + } + // Leaving index already included above + delta_x[entering_index] = 0.0; + scaled_delta_xB_sparse.i.clear(); + scaled_delta_xB_sparse.x.clear(); + + + f_t perturb_start_time = tic(); f_t sum_perturb = 0.0; - phase2::compute_perturbation(lp, settings, z, objective, sum_perturb); + phase2::compute_perturbation(lp, settings, delta_z_indices, z, objective, sum_perturb); + perturb_time += toc(perturb_start_time); // Update basis vstatus[entering_index] = variable_status_t::BASIC; @@ -1854,24 +2163,43 @@ dual::status_t dual_phase2(i_t phase, } basic_list[basic_leaving_index] = entering_index; nonbasic_list[nonbasic_entering_index] = leaving_index; + nonbasic_mark[entering_index] = -1; + nonbasic_mark[leaving_index] = nonbasic_entering_index; + f_t lu_update_start_time = tic(); // Refactor or Update bool should_refactor = ft.num_updates() > settings.refactor_frequency; if (!should_refactor) { - i_t recommend_refactor = ft.update(utilde, basic_leaving_index); -#ifdef CHECK_FT + i_t recommend_refactor = ft.update(utilde_sparse, UTsol_sparse, basic_leaving_index); + //i_t recommend_refactor = ft.update(utilde, UTsol, basic_leaving_index); +#ifdef CHECK_UPDATE { - csc_matrix_t Btest(m, m, 1); + csc_matrix_t Btest(m, m, 1); ft.multiply_lu(Btest); { - csc_matrix_t B(m, m, 1); - form_b(lp, basic_list, B); - csc_matrix_t Diff(m, m, 1); + csc_matrix_t B(m, m, 1); + form_b(lp.A, basic_list, B); + csc_matrix_t Diff(m, m, 1); add(Btest, B, 1.0, -1.0, Diff); const f_t err = Diff.norm1(); if (err > settings.primal_tol) { settings.log.printf("|| B - L*U || %e\n", Diff.norm1()); } + if (err > settings.primal_tol) + { + for (i_t j = 0; j < m; ++j) + { + for (i_t p = Diff.col_start[j]; p < Diff.col_start[j + 1]; ++p) + { + const i_t i = Diff.i[p]; + if (Diff.x[p] != 0.0) + { + settings.log.printf("Diff %d %d %e\n", j, i, Diff.x[p]); + } + } + } + } + settings.log.printf("basic leaving index %d\n", basic_leaving_index); assert(err < settings.primal_tol); } } @@ -1892,8 +2220,12 @@ dual::status_t dual_phase2(i_t phase, ft.reset(L, U, p); } + lu_update_time += toc(lu_update_start_time); + + f_t steepest_edge_entering_start_time = tic(); phase2::compute_steepest_edge_norm_entering( settings, m, ft, basic_leaving_index, entering_index, delta_y_steepest_edge); + se_entering_time += toc(steepest_edge_entering_start_time); #ifdef STEEPEST_EDGE_DEBUG if (iter < 100 || iter % 100 == 0)) @@ -1904,25 +2236,33 @@ dual::status_t dual_phase2(i_t phase, iter++; +#if 1 // Clear delta_y - const i_t nz_dy = delta_y_sparse.i.size(); - for (i_t k = 0; k < nz_dy; ++k) { - delta_y[delta_y_sparse.i[k]] = 0.0; - } + //const i_t nz_dy = delta_y_sparse.i.size(); + //for (i_t k = 0; k < nz_dy; ++k) { + // delta_y[delta_y_sparse.i[k]] = 0.0; + //} + + // Clear delta_z + phase2::clear_delta_z(entering_index, leaving_index, delta_z_mark, delta_z_indices, delta_z); + + +#endif - const f_t obj = compute_objective(lp, x); f_t now = toc(start_time); if ((iter - start_iter) < settings.first_iteration_log || (iter % settings.iteration_log_frequency) == 0) { if (phase == 1 && iter == 1) { settings.log.printf(" Iter Objective Primal Infeas Perturb Time\n"); } - settings.log.printf("%5d %+.16e %.8e %.2e %.2e %.2f\n", + settings.log.printf("%5d %+.16e %.8e %.2e %.2e %.2f %.2f %.2f\n", iter, compute_user_objective(lp, obj), primal_infeasibility, sum_perturb, step_length, + b_solve_density * 100.0, + b_transpose_solve_density * 100.0, now); } @@ -1939,7 +2279,19 @@ dual::status_t dual_phase2(i_t phase, } } if (iter >= iter_limit) { status = dual::status_t::ITERATION_LIMIT; } - settings.log.printf("BFRT time %e\n", bfrt_time); + settings.log.printf("BFRT time %.2f\n", bfrt_time); + settings.log.printf("Pricing time %.2f\n", pricing_time); + settings.log.printf("BTran time %.2f\n", btran_time); + settings.log.printf("FTran time %.2f\n", ftran_time); + settings.log.printf("Flip time %.2f\n", flip_time); + settings.log.printf("Delta_z time %.2f\n", delta_z_time); + settings.log.printf("LU update time %.2f\n", lu_update_time); + settings.log.printf("SE norms time %.2f\n", se_norms_time); + settings.log.printf("SE enter time %.2f\n", se_entering_time); + settings.log.printf("Perturb time %.2f\n", perturb_time); + settings.log.printf("Vector time %.2f\n", vector_time); + settings.log.printf("Objective time %.2f\n", objective_time); + settings.log.printf("Sum %.2f\n", bfrt_time + pricing_time + btran_time + ftran_time + flip_time + delta_z_time + lu_update_time + se_norms_time + se_entering_time + perturb_time + vector_time + objective_time); return status; } diff --git a/cpp/src/dual_simplex/sparse_matrix.cpp b/cpp/src/dual_simplex/sparse_matrix.cpp index 830838bf58..ed652a05e8 100644 --- a/cpp/src/dual_simplex/sparse_matrix.cpp +++ b/cpp/src/dual_simplex/sparse_matrix.cpp @@ -360,6 +360,24 @@ void scatter_dense(const csc_matrix_t& A, i_t j, f_t alpha, std::vecto } } +// x <- x + alpha * A(:, j) +template +void scatter_dense(const csc_matrix_t& A, i_t j, f_t alpha, std::vector& x, std::vector& mark, std::vector& indices) +{ + const i_t col_start = A.col_start[j]; + const i_t col_end = A.col_start[j + 1]; + for (i_t p = col_start; p < col_end; ++p) { + const i_t i = A.i[p]; + const f_t ax = A.x[p]; + x[i] += alpha * ax; + if (!mark[i]) + { + mark[i] = 1; + indices.push_back(i); + } + } +} + // Compute C = A*B where C is m x n, A is m x k, and B = k x n // Do this by computing C(:, j) = A*B(:, j) = sum (i=1 to k) A(:, k)*B(i, j) template @@ -695,6 +713,13 @@ template void scatter_dense(const csc_matrix_t& A, double alpha, std::vector& x); +template void scatter_dense(const csc_matrix_t& A, + int j, + double alpha, + std::vector& x, + std::vector& mark, + std::vector& indices); + template int multiply(const csc_matrix_t& A, const csc_matrix_t& B, csc_matrix_t& C); diff --git a/cpp/src/dual_simplex/sparse_matrix.hpp b/cpp/src/dual_simplex/sparse_matrix.hpp index 8c75448197..cc6955e828 100644 --- a/cpp/src/dual_simplex/sparse_matrix.hpp +++ b/cpp/src/dual_simplex/sparse_matrix.hpp @@ -144,7 +144,6 @@ class sparse_vector_t { A.m = n; A.n = 1; A.nz_max = i.size(); - A.col_start.clear(); A.col_start.resize(2); A.col_start[0] = 0; A.col_start[1] = i.size(); @@ -316,6 +315,9 @@ i_t scatter(const csc_matrix_t& A, template void scatter_dense(const csc_matrix_t& A, i_t j, f_t alpha, std::vector& x); +template +void scatter_dense(const csc_matrix_t& A, i_t j, f_t alpha, std::vector& x, std::vector& mark, std::vector& indices); + // Compute C = A*B where C is m x n, A is m x k, and B = k x n // Do this by computing C(:, j) = A*B(:, j) = sum (i=1 to k) A(:, k)*B(i, j) template diff --git a/cpp/src/dual_simplex/vector_math.cpp b/cpp/src/dual_simplex/vector_math.cpp index aa05d57435..b8f8c7cf2b 100644 --- a/cpp/src/dual_simplex/vector_math.cpp +++ b/cpp/src/dual_simplex/vector_math.cpp @@ -67,6 +67,57 @@ f_t dot(const std::vector& x, const std::vector& y) return dot; } +template +f_t sparse_dot(i_t const *xind, + f_t const *xval, + i_t nx, + i_t const *yind, + i_t ny, + f_t const *y_scatter_val) +{ + f_t dot = 0.0; + for (i_t i = 0, j = 0; i < nx && j < ny;) { + const i_t p = xind[i]; + const i_t q = yind[j]; + if (p == q) { + dot += xval[i] * y_scatter_val[q]; + i++; + j++; + } else if (p < q) { + i++; + } else if (q < p) { + j++; + } + } + return dot; +} + + +template +f_t sparse_dot(i_t *xind, + f_t *xval, + i_t nx, + i_t *yind, + f_t *yval, + i_t ny) +{ + f_t dot = 0.0; + for (i_t i = 0, j = 0; i < nx && j < ny;) { + const i_t p = xind[i]; + const i_t q = yind[j]; + if (p == q) { + dot += xval[i] * yval[j]; + i++; + j++; + } else if (p < q) { + i++; + } else if (q < p) { + j++; + } + } + return dot; +} + template f_t sparse_dot(const std::vector& xind, const std::vector& xval, @@ -146,6 +197,20 @@ template double sparse_dot(const std::vector& xind, const std::vector& yind, const std::vector& yval); +template double sparse_dot(int const *xind, + double const *xval, + int nx, + int const *yind, + int ny, + double const *y_scatter_val); + +template double sparse_dot(int *xind, + double *xval, + int nx, + int *yind, + double *yval, + int ny); + template int permute_vector(const std::vector& p, const std::vector& b, std::vector& x); diff --git a/cpp/src/dual_simplex/vector_math.hpp b/cpp/src/dual_simplex/vector_math.hpp index 962b217439..1c7c5470a7 100644 --- a/cpp/src/dual_simplex/vector_math.hpp +++ b/cpp/src/dual_simplex/vector_math.hpp @@ -44,6 +44,22 @@ f_t sparse_dot(const std::vector& xind, const std::vector& yind, const std::vector& yval); +template +f_t sparse_dot(i_t const *xind, + f_t const *xval, + i_t nx, + i_t const *yind, + i_t ny, + f_t const *y_scatter_val); + +template +f_t sparse_dot(i_t *xind, + f_t *xval, + i_t nx, + i_t *yind, + f_t *yval, + i_t ny); + // Computes x = P*b or x=b(p) in MATLAB. template i_t permute_vector(const std::vector& p, const std::vector& b, std::vector& x); From fbae0b654f21c15eee187e41650ef58d1678bd23 Mon Sep 17 00:00:00 2001 From: Christopher Maes Date: Fri, 27 Jun 2025 09:03:02 -0700 Subject: [PATCH 05/28] Dynamically switch between sparse and hypersparse solves. 1.42X improvement on NETLIB --- cpp/src/dual_simplex/basis_updates.cpp | 71 +++++- cpp/src/dual_simplex/basis_updates.hpp | 76 ++++++- cpp/src/dual_simplex/phase2.cpp | 209 +++++++++--------- .../dual_simplex/simplex_solver_settings.hpp | 2 + cpp/src/dual_simplex/sparse_matrix.hpp | 25 ++- 5 files changed, 263 insertions(+), 120 deletions(-) diff --git a/cpp/src/dual_simplex/basis_updates.cpp b/cpp/src/dual_simplex/basis_updates.cpp index 6e9098f0c2..9e9ea847da 100644 --- a/cpp/src/dual_simplex/basis_updates.cpp +++ b/cpp/src/dual_simplex/basis_updates.cpp @@ -1273,9 +1273,22 @@ template i_t basis_update_mpf_t::b_transpose_solve(const sparse_vector_t& rhs, sparse_vector_t& solution, sparse_vector_t& UTsol) const { // Solve for r such that U'*r = c - solution = rhs; - u_transpose_solve(solution); + + bool use_hypersparse = false; + const f_t input_size = static_cast(rhs.i.size()); + estimate_solution_density(input_size, sum_U_transpose_, num_calls_U_transpose_, use_hypersparse); + if (use_hypersparse) { + solution = rhs; + u_transpose_solve(solution); + } + else { + std::vector solution_dense; + rhs.to_dense(solution_dense); + u_transpose_solve(solution_dense); + solution.from_dense(solution_dense); + } UTsol = solution; + sum_U_transpose_ += static_cast(solution.i.size()) / input_size; #ifdef CHECK_U_TRANSPOSE_SOLVE std::vector UTsol_dense; @@ -1295,7 +1308,18 @@ i_t basis_update_mpf_t::b_transpose_solve(const sparse_vector_t r_dense; solution.to_dense(r_dense); #endif - l_transpose_solve(solution); + const f_t rhs_size = static_cast(solution.i.size()); + estimate_solution_density(rhs_size, sum_L_transpose_, num_calls_L_transpose_, use_hypersparse); + if (use_hypersparse) { + l_transpose_solve(solution); + } + else { + std::vector solution_dense; + solution.to_dense(solution_dense); + l_transpose_solve(solution_dense); + solution.from_dense(solution_dense); + } + sum_L_transpose_ += static_cast(solution.i.size()) / rhs_size; #ifdef CHECK_L_TRANSPOSE_SOLVE std::vector solution_dense; @@ -1326,6 +1350,7 @@ i_t basis_update_mpf_t::b_transpose_solve(const sparse_vector_t i_t basis_update_mpf_t::u_transpose_solve(std::vector& rhs) const { + total_dense_U_transpose_++; dual_simplex::upper_triangular_transpose_solve(U0_, rhs); return 0; } @@ -1333,6 +1358,7 @@ i_t basis_update_mpf_t::u_transpose_solve(std::vector& rhs) const template i_t basis_update_mpf_t::u_transpose_solve(sparse_vector_t& rhs) const { + total_sparse_U_transpose_++; const i_t m = L0_.m; // U0'*x = y // Solve U0'*x0 = y @@ -1346,6 +1372,7 @@ i_t basis_update_mpf_t::u_transpose_solve(sparse_vector_t& r template i_t basis_update_mpf_t::l_transpose_solve(std::vector& rhs) const { + total_dense_L_transpose_++; // L = L0 * T0 * T1 * ... * T_{num_updates_ - 1} // L' = T_{num_updates_ - 1}^T * T_{num_updates_ - 2}^T * ... * T0^T * L0^T // L'*x = b @@ -1386,6 +1413,7 @@ i_t basis_update_mpf_t::l_transpose_solve(std::vector& rhs) const template i_t basis_update_mpf_t::l_transpose_solve(sparse_vector_t& rhs) const { + total_sparse_L_transpose_++; const i_t m = L0_.m; // L'*x = b // L0^T * x = T_0^-T * T_1^-T * ... * T_{num_updates_ - 1}^-T * b = b' @@ -1469,7 +1497,6 @@ i_t basis_update_mpf_t::l_transpose_solve(sparse_vector_t& r // sort the indices and place into a sparse column std::sort(xi_workspace_.begin() + m, xi_workspace_.begin() + m + nz, std::less()); - //csc_matrix_t B(m, 1, nz); B_.m = m; B_.n = 1; B_.col_start.resize(2); @@ -1601,8 +1628,21 @@ i_t basis_update_mpf_t::b_solve(const sparse_vector_t& rhs, std::vector l_solve_rhs; solution.to_dense(l_solve_rhs); #endif - l_solve(solution); + + bool use_hypersparse; + const f_t input_size = static_cast(rhs.i.size()); + estimate_solution_density(input_size, sum_L_, num_calls_L_, use_hypersparse); + if (use_hypersparse) { + l_solve(solution); + } + else { + std::vector solution_dense; + solution.to_dense(solution_dense); + l_solve(solution_dense); + solution.from_dense(solution_dense); + } Lsol = solution; + sum_L_ += static_cast(solution.i.size()) / input_size; #ifdef CHECK_L_SOLVE std::vector l_solve_dense; @@ -1625,7 +1665,19 @@ i_t basis_update_mpf_t::b_solve(const sparse_vector_t& rhs, std::vector rhs_dense; solution.to_dense(rhs_dense); #endif - u_solve(solution); + + const f_t rhs_size = static_cast(solution.i.size()); + estimate_solution_density(rhs_size, sum_U_, num_calls_U_, use_hypersparse); + if (use_hypersparse) { + u_solve(solution); + } + else { + std::vector solution_dense; + solution.to_dense(solution_dense); + u_solve(solution_dense); + solution.from_dense(solution_dense); + } + sum_U_ += static_cast(solution.i.size()) / rhs_size; #ifdef CHECK_U_SOLVE std::vector solution_dense; @@ -1646,6 +1698,7 @@ i_t basis_update_mpf_t::b_solve(const sparse_vector_t& rhs, template i_t basis_update_mpf_t::u_solve(std::vector& rhs) const { + total_dense_U_++; const i_t m = L0_.m; // U*x = y dual_simplex::upper_triangular_solve(U0_, rhs); @@ -1655,6 +1708,7 @@ i_t basis_update_mpf_t::u_solve(std::vector& rhs) const template i_t basis_update_mpf_t::u_solve(sparse_vector_t& rhs) const { + total_sparse_U_++; const i_t m = L0_.m; // U*x = y @@ -1670,6 +1724,7 @@ i_t basis_update_mpf_t::u_solve(sparse_vector_t& rhs) const template i_t basis_update_mpf_t::l_solve(std::vector& rhs) const { + total_dense_L_++; const i_t m = L0_.m; // L*x = y // L0 * T0 * T1 * ... * T_{num_updates_ - 1} * x = y @@ -1732,6 +1787,7 @@ i_t basis_update_mpf_t::l_solve(std::vector& rhs) const template i_t basis_update_mpf_t::l_solve(sparse_vector_t& rhs) const { + total_sparse_L_++; const i_t m = L0_.m; // L*x = y // L0 * T0 * T1 * ... * T_{num_updates_ - 1} * x = y @@ -1784,9 +1840,6 @@ i_t basis_update_mpf_t::l_solve(sparse_vector_t& rhs) const return 0; } - - - // Takes in utilde such that L*utilde = abar, where abar is the column to add to the basis // and etilde such that U'*etilde = e_leaving template diff --git a/cpp/src/dual_simplex/basis_updates.hpp b/cpp/src/dual_simplex/basis_updates.hpp index 4807c43da2..23c93ae043 100644 --- a/cpp/src/dual_simplex/basis_updates.hpp +++ b/cpp/src/dual_simplex/basis_updates.hpp @@ -187,13 +187,52 @@ class basis_update_mpf_t { U0_transpose_(1, 1, 1), L0_transpose_(1, 1, 1), refactor_frequency_(refactor_frequency), - B_(Linit.m, 1, 0) + B_(Linit.m, 1, 0), + total_sparse_L_transpose_(0), + total_dense_L_transpose_(0), + total_sparse_L_(0), + total_dense_L_(0), + total_sparse_U_transpose_(0), + total_dense_U_transpose_(0), + total_sparse_U_(0), + total_dense_U_(0), + hypersparse_threshold_(0.05) { inverse_permutation(row_permutation_, inverse_row_permutation_); clear(); compute_transposes(); + reset_stas(); } + void print_stats() const + { + i_t total_L_transpose_calls = total_sparse_L_transpose_ + total_dense_L_transpose_; + i_t total_U_transpose_calls = total_sparse_U_transpose_ + total_dense_U_transpose_; + i_t total_L_calls = total_sparse_L_ + total_dense_L_; + i_t total_U_calls = total_sparse_U_ + total_dense_U_; + printf("sparse L transpose %8d %8.2f\n", total_sparse_L_transpose_, 100.0 * total_sparse_L_transpose_ / total_L_transpose_calls); + printf("dense L transpose %8d %8.2f\n", total_dense_L_transpose_, 100.0 * total_dense_L_transpose_ / total_L_transpose_calls); + printf("sparse U transpose %8d %8.2f\n", total_sparse_U_transpose_, 100.0 * total_sparse_U_transpose_ / total_U_transpose_calls); + printf("dense U transpose %8d %8.2f\n", total_dense_U_transpose_, 100.0 * total_dense_U_transpose_ / total_U_transpose_calls); + printf("sparse L %8d %8.2f\n", total_sparse_L_, 100.0 * total_sparse_L_ / total_L_calls); + printf("dense L %8d %8.2f\n", total_dense_L_, 100.0 * total_dense_L_ / total_L_calls); + printf("sparse U %8d %8.2f\n", total_sparse_U_, 100.0 * total_sparse_U_ / total_U_calls); + printf("dense U %8d %8.2f\n", total_dense_U_, 100.0 * total_dense_U_ / total_U_calls); + } + + void reset_stas() + { + num_calls_L_ = 0; + num_calls_U_ = 0; + num_calls_L_transpose_ = 0; + num_calls_U_transpose_ = 0; + sum_L_ = 0.0; + sum_U_ = 0.0; + sum_L_transpose_ = 0.0; + sum_U_transpose_ = 0.0; + } + + i_t reset(const csc_matrix_t& Linit, const csc_matrix_t& Uinit, const std::vector& p) @@ -205,9 +244,20 @@ class basis_update_mpf_t { inverse_permutation(row_permutation_, inverse_row_permutation_); clear(); compute_transposes(); + reset_stas(); return 0; } + f_t estimate_solution_density(f_t rhs_nz, f_t sum, i_t& num_calls, bool &use_hypersparse) const + { + num_calls++; + const f_t average_growth = std::max(1.0, sum / static_cast(num_calls)); + const f_t predicted_nz = rhs_nz * average_growth; + const f_t predicted_density = predicted_nz / static_cast(L0_.m); + use_hypersparse = predicted_density < hypersparse_threshold_; + return predicted_nz; + } + // Solves for x such that B*x = b, where B is the basis matrix i_t b_solve(const std::vector& rhs, std::vector& solution) const; i_t b_solve(const sparse_vector_t& rhs, sparse_vector_t& solution) const; @@ -321,9 +371,27 @@ class basis_update_mpf_t { mutable csc_matrix_t U0_transpose_; // Needed for sparse solves mutable csc_matrix_t L0_transpose_; // Needed for sparse solves mutable csc_matrix_t B_; // Needed for sparse solves -}; - - + mutable i_t total_sparse_L_transpose_; + mutable i_t total_dense_L_transpose_; + mutable i_t total_sparse_L_; + mutable i_t total_dense_L_; + mutable i_t total_sparse_U_transpose_; + mutable i_t total_dense_U_transpose_; + mutable i_t total_sparse_U_; + mutable i_t total_dense_U_; + + mutable i_t num_calls_L_; + mutable i_t num_calls_U_; + mutable i_t num_calls_L_transpose_; + mutable i_t num_calls_U_transpose_; + + mutable f_t sum_L_; + mutable f_t sum_U_; + mutable f_t sum_L_transpose_; + mutable f_t sum_U_transpose_; + + f_t hypersparse_threshold_; +}; } // namespace cuopt::linear_programming::dual_simplex diff --git a/cpp/src/dual_simplex/phase2.cpp b/cpp/src/dual_simplex/phase2.cpp index ec5cf9add7..c881bb5d46 100644 --- a/cpp/src/dual_simplex/phase2.cpp +++ b/cpp/src/dual_simplex/phase2.cpp @@ -51,7 +51,7 @@ void compute_delta_z(const csc_matrix_t& A_transpose, { const i_t i = delta_y.i[k]; const f_t delta_y_i = delta_y.x[k]; - if (std::abs(delta_y_i) < 1e-13) { + if (std::abs(delta_y_i) < 1e-12) { continue; } const i_t row_start = A_transpose.col_start[i]; @@ -767,7 +767,7 @@ i_t update_steepest_edge_norms(const simplex_solver_settings_t& settin const i_t delta_y_nz = delta_y_sparse.i.size(); sparse_vector_t v_sparse(m, 0); - if (delta_y_nz > 0.25 * m) + if (0) { // B^T delta_y = - direction * e_basic_leaving_index // We want B v = - B^{-T} e_basic_leaving_index @@ -860,28 +860,28 @@ i_t update_steepest_edge_norms(const simplex_solver_settings_t& settin // Compute steepest edge info for entering variable template -i_t compute_steepest_edge_norm_entering(const simplex_solver_settings_t& setttings, +i_t compute_steepest_edge_norm_entering(const simplex_solver_settings_t& settings, i_t m, const basis_update_mpf_t& ft, i_t basic_leaving_index, i_t entering_index, + f_t b_transpose_density, std::vector& steepest_edge_norms) { -#if 0 - std::vector es(m); - es[basic_leaving_index] = -1.0; - std::vector delta_ys(m); - ft.b_transpose_solve(es, delta_ys); - steepest_edge_norms[entering_index] = vector_norm2_squared(delta_ys); -#else - - sparse_vector_t es_sparse(m, 1); - es_sparse.i[0] = basic_leaving_index; - es_sparse.x[0] = -1.0; - sparse_vector_t delta_ys_sparse(m, 0); - ft.b_transpose_solve(es_sparse, delta_ys_sparse); - steepest_edge_norms[entering_index] = delta_ys_sparse.norm2_squared(); -#endif + if (0) { + std::vector es(m); + es[basic_leaving_index] = -1.0; + std::vector delta_ys(m); + ft.b_transpose_solve(es, delta_ys); + steepest_edge_norms[entering_index] = vector_norm2_squared(delta_ys); + } else { + sparse_vector_t es_sparse(m, 1); + es_sparse.i[0] = basic_leaving_index; + es_sparse.x[0] = -1.0; + sparse_vector_t delta_ys_sparse(m, 0); + ft.b_transpose_solve(es_sparse, delta_ys_sparse); + steepest_edge_norms[entering_index] = delta_ys_sparse.norm2_squared(); + } #ifdef STEEPEST_EDGE_DEBUG settings.log.printf("Steepest edge norm %e for entering j %d at i %d\n", @@ -1524,6 +1524,12 @@ dual::status_t dual_phase2(i_t phase, const i_t start_iter = iter; + f_t b_transpose_solve_density = 0.0; + f_t b_solve_density = 0.0; + + i_t sparse_delta_z = 0; + i_t dense_delta_z = 0; + f_t bfrt_time = 0; f_t pricing_time = 0; f_t btran_time = 0; @@ -1582,43 +1588,42 @@ dual::status_t dual_phase2(i_t phase, } // BTran - // TODO: replace with sparse solve. //std::vector delta_y(m); f_t btran_start_time = tic(); -#if 0 - std::vector ei(m, 0.0); - ei[basic_leaving_index] = -direction; - // BT*delta_y = -delta_zB = -sigma*ei - std::vector UTsol; - ft.b_transpose_solve(ei, delta_y, UTsol); - - if (ei[basic_leaving_index] != 1.0) - { - // Need to flip the sign of UTsol - for (i_t k = 0; k < m; ++k) - { - UTsol[k] *= -1.0; - } - } -#else - sparse_vector_t ei_sparse(m, 1); - ei_sparse.i[0] = basic_leaving_index; - ei_sparse.x[0] = -direction; sparse_vector_t delta_y_sparse(m, 0); sparse_vector_t UTsol_sparse(m, 0); - ft.b_transpose_solve(ei_sparse, delta_y_sparse, UTsol_sparse); - f_t b_transpose_solve_density = delta_y_sparse.i.size() / static_cast(m); - - if (direction != -1) - { - for (i_t k = 0; k < UTsol_sparse.x.size(); ++k) - { - UTsol_sparse.x[k] *= -1.0; + if (0) { + std::vector ei(m, 0.0); + ei[basic_leaving_index] = -direction; + // BT*delta_y = -delta_zB = -sigma*ei + std::vector UTsol; + ft.b_transpose_solve(ei, delta_y, UTsol); + + if (ei[basic_leaving_index] != 1.0) { + // Need to flip the sign of UTsol + for (i_t k = 0; k < m; ++k) { + UTsol[k] *= -1.0; + } + } + sparse_vector_t dy_sparse(delta_y); + sparse_vector_t UT_sparse(UTsol); + delta_y_sparse = dy_sparse; + UTsol_sparse = UT_sparse; + b_transpose_solve_density = delta_y_sparse.i.size() / static_cast(m); + } else { + sparse_vector_t ei_sparse(m, 1); + ei_sparse.i[0] = basic_leaving_index; + ei_sparse.x[0] = -direction; + ft.b_transpose_solve(ei_sparse, delta_y_sparse, UTsol_sparse); + b_transpose_solve_density = delta_y_sparse.i.size() / static_cast(m); + + if (direction != -1) { + for (i_t k = 0; k < UTsol_sparse.x.size(); ++k) { + UTsol_sparse.x[k] *= -1.0; + } } } - //std::vector UTsol; - //UTsol_sparse.to_dense(UTsol); -#endif + #if 0 std::vector delta_y_sparse_vector_check(m); delta_y_sparse.to_dense(delta_y_sparse_vector_check); @@ -1664,11 +1669,6 @@ dual::status_t dual_phase2(i_t phase, continue; } -#if 0 - // Only scatter after possible continue - delta_y_sparse.scatter(delta_y); -#endif - btran_time += toc(btran_start_time); #ifdef COMPUTE_BTRANSPOSE_RESIDUAL @@ -1688,9 +1688,8 @@ dual::status_t dual_phase2(i_t phase, f_t delta_z_start_time = tic(); const f_t delta_y_nz_percentage = delta_y_sparse.i.size() / static_cast(m) * 100.0; - //printf("delta y nz percentage %.2f\n", delta_y_nz_percentage); - if (delta_y_nz_percentage <= 30.0) { + sparse_delta_z++; phase2::compute_delta_z(A_transpose, delta_y_sparse, leaving_index, @@ -1700,6 +1699,7 @@ dual::status_t dual_phase2(i_t phase, delta_z_indices, delta_z); } else { + dense_delta_z++; // delta_zB = sigma*ei delta_y_sparse.to_dense(delta_y); for (i_t k = 0; k < m; k++) { @@ -1821,7 +1821,7 @@ dual::status_t dual_phase2(i_t phase, if (entering_index == -3) { return dual::status_t::CONCURRENT_LIMIT; } if (entering_index == -1) { if (primal_infeasibility > settings.primal_tol && - max_val < settings.steepest_edge_primal_tol) { + max_val < 2e-8) { // We could be done settings.log.printf("Exiting due to small primal infeasibility se %e\n", max_val); phase2::prepare_optimality(lp, @@ -1912,33 +1912,30 @@ dual::status_t dual_phase2(i_t phase, if (num_flipped > 0) { //settings.log.printf("Flipped %6d bounds. Dz nz %.2f Atilde nz %6d %.2f %\n", num_flipped, static_cast(delta_z_indices.size()) / static_cast(n -m) * 100.0, atilde_index.size(), static_cast(atilde_index.size()) / static_cast(m) * 100.0); -#if 1 - // B*delta_xB_0 = atilde - sparse_vector_t atilde_sparse(m, atilde_index.size()); - for (i_t k = 0; k < atilde_index.size(); ++k) - { - atilde_sparse.i[k] = atilde_index[k]; - atilde_sparse.x[k] = atilde[atilde_index[k]]; - } - sparse_vector_t delta_xB_0_sparse(m, 0); - ft.b_solve(atilde_sparse, delta_xB_0_sparse); - const i_t delta_xB_0_nz = delta_xB_0_sparse.i.size(); - for (i_t k = 0; k < delta_xB_0_nz; ++k) - { - const i_t j = basic_list[delta_xB_0_sparse.i[k]]; - x[j] += delta_xB_0_sparse.x[k]; - } -#else - // B*delta_xB_0 = atilde - std::vector delta_xB_0(m); - ft.b_solve(atilde, delta_xB_0); - for (i_t k = 0; k < m; ++k) { - const i_t j = basic_list[k]; - x[j] += delta_xB_0[k]; + const i_t atilde_nz = atilde_index.size(); + if (1) { + // B*delta_xB_0 = atilde + sparse_vector_t atilde_sparse(m, atilde_nz); + for (i_t k = 0; k < atilde_nz; ++k) { + atilde_sparse.i[k] = atilde_index[k]; + atilde_sparse.x[k] = atilde[atilde_index[k]]; + } + sparse_vector_t delta_xB_0_sparse(m, 0); + ft.b_solve(atilde_sparse, delta_xB_0_sparse); + const i_t delta_xB_0_nz = delta_xB_0_sparse.i.size(); + for (i_t k = 0; k < delta_xB_0_nz; ++k) { + const i_t j = basic_list[delta_xB_0_sparse.i[k]]; + x[j] += delta_xB_0_sparse.x[k]; + } + } else { + // B*delta_xB_0 = atilde + std::vector delta_xB_0(m); + ft.b_solve(atilde, delta_xB_0); + for (i_t k = 0; k < m; ++k) { + const i_t j = basic_list[k]; + x[j] += delta_xB_0[k]; + } } -#endif - - #if 1 for (i_t j : delta_z_indices) { @@ -1976,9 +1973,8 @@ dual::status_t dual_phase2(i_t phase, const i_t col_nz = lp.A.col_start[entering_index + 1] - lp.A.col_start[entering_index]; std::vector utilde(m); sparse_vector_t utilde_sparse(m, 0); - f_t b_solve_density = 1.0; sparse_vector_t scaled_delta_xB_sparse(m, 0); - if (0 && col_nz > 0.30 * m) + if (0) { std::fill(rhs.begin(), rhs.end(), 0.0); lp.A.load_a_column(entering_index, rhs); @@ -1986,6 +1982,11 @@ dual::status_t dual_phase2(i_t phase, for (i_t i = 0; i < m; ++i) { scaled_delta_xB[i] *= -1.0; } + sparse_vector_t dxB_sparse(scaled_delta_xB); + sparse_vector_t ut_sparse(utilde); + scaled_delta_xB_sparse = dxB_sparse; + utilde_sparse = ut_sparse; + b_solve_density = scaled_delta_xB_sparse.i.size() / static_cast(m); } else { @@ -2224,7 +2225,7 @@ dual::status_t dual_phase2(i_t phase, f_t steepest_edge_entering_start_time = tic(); phase2::compute_steepest_edge_norm_entering( - settings, m, ft, basic_leaving_index, entering_index, delta_y_steepest_edge); + settings, m, ft, basic_leaving_index, entering_index, b_transpose_solve_density, delta_y_steepest_edge); se_entering_time += toc(steepest_edge_entering_start_time); #ifdef STEEPEST_EDGE_DEBUG @@ -2279,19 +2280,29 @@ dual::status_t dual_phase2(i_t phase, } } if (iter >= iter_limit) { status = dual::status_t::ITERATION_LIMIT; } - settings.log.printf("BFRT time %.2f\n", bfrt_time); - settings.log.printf("Pricing time %.2f\n", pricing_time); - settings.log.printf("BTran time %.2f\n", btran_time); - settings.log.printf("FTran time %.2f\n", ftran_time); - settings.log.printf("Flip time %.2f\n", flip_time); - settings.log.printf("Delta_z time %.2f\n", delta_z_time); - settings.log.printf("LU update time %.2f\n", lu_update_time); - settings.log.printf("SE norms time %.2f\n", se_norms_time); - settings.log.printf("SE enter time %.2f\n", se_entering_time); - settings.log.printf("Perturb time %.2f\n", perturb_time); - settings.log.printf("Vector time %.2f\n", vector_time); - settings.log.printf("Objective time %.2f\n", objective_time); - settings.log.printf("Sum %.2f\n", bfrt_time + pricing_time + btran_time + ftran_time + flip_time + delta_z_time + lu_update_time + se_norms_time + se_entering_time + perturb_time + vector_time + objective_time); + + if (phase == 2) { + settings.log.printf("BFRT time %.2f\n", bfrt_time); + settings.log.printf("Pricing time %.2f\n", pricing_time); + settings.log.printf("BTran time %.2f\n", btran_time); + settings.log.printf("FTran time %.2f\n", ftran_time); + settings.log.printf("Flip time %.2f\n", flip_time); + settings.log.printf("Delta_z time %.2f\n", delta_z_time); + settings.log.printf("LU update time %.2f\n", lu_update_time); + settings.log.printf("SE norms time %.2f\n", se_norms_time); + settings.log.printf("SE enter time %.2f\n", se_entering_time); + settings.log.printf("Perturb time %.2f\n", perturb_time); + settings.log.printf("Vector time %.2f\n", vector_time); + settings.log.printf("Objective time %.2f\n", objective_time); + settings.log.printf("Sum %.2f\n", + bfrt_time + pricing_time + btran_time + ftran_time + flip_time + + delta_z_time + lu_update_time + se_norms_time + se_entering_time + + perturb_time + vector_time + objective_time); + + settings.log.printf("Sparse delta_z %8d %8.2f\n", sparse_delta_z, 100.0 * sparse_delta_z / (sparse_delta_z + dense_delta_z)); + settings.log.printf("Dense delta_z %8d %8.2f\n", dense_delta_z, 100.0 * dense_delta_z / (sparse_delta_z + dense_delta_z)); + ft.print_stats(); + } return status; } diff --git a/cpp/src/dual_simplex/simplex_solver_settings.hpp b/cpp/src/dual_simplex/simplex_solver_settings.hpp index 8328f23813..eee7c213ad 100644 --- a/cpp/src/dual_simplex/simplex_solver_settings.hpp +++ b/cpp/src/dual_simplex/simplex_solver_settings.hpp @@ -46,6 +46,7 @@ struct simplex_solver_settings_t { cut_off(std::numeric_limits::infinity()), steepest_edge_ratio(0.5), steepest_edge_primal_tol(1e-9), + hypersparse_threshold(0.05), use_steepest_edge_pricing(true), use_harris_ratio(false), use_bound_flip_ratio(true), @@ -87,6 +88,7 @@ struct simplex_solver_settings_t { f_t steepest_edge_ratio; // the ratio of computed steepest edge mismatch from updated steepest edge f_t steepest_edge_primal_tol; // Primal tolerance divided by steepest edge norm + f_t hypersparse_threshold; bool use_steepest_edge_pricing; // true if using steepest edge pricing, false if using max // infeasibility pricing bool use_harris_ratio; // true if using the harris ratio test diff --git a/cpp/src/dual_simplex/sparse_matrix.hpp b/cpp/src/dual_simplex/sparse_matrix.hpp index cc6955e828..dd80b27792 100644 --- a/cpp/src/dual_simplex/sparse_matrix.hpp +++ b/cpp/src/dual_simplex/sparse_matrix.hpp @@ -115,15 +115,9 @@ template class sparse_vector_t { public: sparse_vector_t(i_t n, i_t nz) : n(n), i(nz), x(nz) {} - sparse_vector_t(const std::vector& in) : n(in.size()) + sparse_vector_t(const std::vector& in) { - i_t nz = 0; - for (i_t k = 0; k < n; ++k) { - if (in[k] != 0) { - i.push_back(k); - x.push_back(in[k]); - } - } + from_dense(in); } sparse_vector_t(const csc_matrix_t& A, i_t col) { @@ -139,6 +133,21 @@ class sparse_vector_t { } } + void from_dense(const std::vector& in) + { + i.clear(); + x.clear(); + n = in.size(); + i.reserve(n); + x.reserve(n); + for (i_t k = 0; k < n; ++k) { + if (in[k] != 0) { + i.push_back(k); + x.push_back(in[k]); + } + } + } + void to_csc(csc_matrix_t& A) const { A.m = n; From f123cb282c58c6198dbdac7056e58e39fac36206 Mon Sep 17 00:00:00 2001 From: Christopher Maes Date: Mon, 7 Jul 2025 16:55:16 -0700 Subject: [PATCH 06/28] Maintain and update a list of primal infeasibilites and more Other experiments included in this PR: 1) Bound strengthing on CPU for dual simplex. Note that this did not lead to an improvement on the NETLIB LP test set. 2) Attempt at O(N) bound-flipping ratio test using bucket sort. This is stil a work in progress. 3) Attempt to compute Farkas certificate when no entering variable found in dual simplex ratio test. This has not been verified yet. Note that the maros NETLIB problem is classifed as infeasible with the list of primal infeasibilites and the updated pricing. As far as I can tell this is due to different choices for leaving variables in the pricing---that have the same score. To try to handle no entering variables better, I tried to remove the dual perturbation and reset the steepest edge. This allows me to continue on maros. But ultimately the problem is still classified as infeasible. --- cpp/src/dual_simplex/basis_updates.hpp | 16 +- .../bound_flipping_ratio_test.cpp | 75 +- .../bound_flipping_ratio_test.hpp | 8 + cpp/src/dual_simplex/branch_and_bound.cpp | 2 +- cpp/src/dual_simplex/phase2.cpp | 799 +++++++++++++++++- cpp/src/dual_simplex/presolve.cpp | 137 ++- cpp/src/dual_simplex/presolve.hpp | 1 + cpp/src/dual_simplex/solve.cpp | 4 +- cpp/src/dual_simplex/sparse_matrix.hpp | 2 +- 9 files changed, 991 insertions(+), 53 deletions(-) diff --git a/cpp/src/dual_simplex/basis_updates.hpp b/cpp/src/dual_simplex/basis_updates.hpp index 23c93ae043..c0abc3f426 100644 --- a/cpp/src/dual_simplex/basis_updates.hpp +++ b/cpp/src/dual_simplex/basis_updates.hpp @@ -210,14 +210,14 @@ class basis_update_mpf_t { i_t total_U_transpose_calls = total_sparse_U_transpose_ + total_dense_U_transpose_; i_t total_L_calls = total_sparse_L_ + total_dense_L_; i_t total_U_calls = total_sparse_U_ + total_dense_U_; - printf("sparse L transpose %8d %8.2f\n", total_sparse_L_transpose_, 100.0 * total_sparse_L_transpose_ / total_L_transpose_calls); - printf("dense L transpose %8d %8.2f\n", total_dense_L_transpose_, 100.0 * total_dense_L_transpose_ / total_L_transpose_calls); - printf("sparse U transpose %8d %8.2f\n", total_sparse_U_transpose_, 100.0 * total_sparse_U_transpose_ / total_U_transpose_calls); - printf("dense U transpose %8d %8.2f\n", total_dense_U_transpose_, 100.0 * total_dense_U_transpose_ / total_U_transpose_calls); - printf("sparse L %8d %8.2f\n", total_sparse_L_, 100.0 * total_sparse_L_ / total_L_calls); - printf("dense L %8d %8.2f\n", total_dense_L_, 100.0 * total_dense_L_ / total_L_calls); - printf("sparse U %8d %8.2f\n", total_sparse_U_, 100.0 * total_sparse_U_ / total_U_calls); - printf("dense U %8d %8.2f\n", total_dense_U_, 100.0 * total_dense_U_ / total_U_calls); + printf("sparse L transpose %8d %8.2f%\n", total_sparse_L_transpose_, 100.0 * total_sparse_L_transpose_ / total_L_transpose_calls); + printf("dense L transpose %8d %8.2f%\n", total_dense_L_transpose_, 100.0 * total_dense_L_transpose_ / total_L_transpose_calls); + printf("sparse U transpose %8d %8.2f%\n", total_sparse_U_transpose_, 100.0 * total_sparse_U_transpose_ / total_U_transpose_calls); + printf("dense U transpose %8d %8.2f%\n", total_dense_U_transpose_, 100.0 * total_dense_U_transpose_ / total_U_transpose_calls); + printf("sparse L %8d %8.2f%\n", total_sparse_L_, 100.0 * total_sparse_L_ / total_L_calls); + printf("dense L %8d %8.2f%\n", total_dense_L_, 100.0 * total_dense_L_ / total_L_calls); + printf("sparse U %8d %8.2f%\n", total_sparse_U_, 100.0 * total_sparse_U_ / total_U_calls); + printf("dense U %8d %8.2f%\n", total_dense_U_, 100.0 * total_dense_U_ / total_U_calls); } void reset_stas() diff --git a/cpp/src/dual_simplex/bound_flipping_ratio_test.cpp b/cpp/src/dual_simplex/bound_flipping_ratio_test.cpp index 28b646161c..6b626263d5 100644 --- a/cpp/src/dual_simplex/bound_flipping_ratio_test.cpp +++ b/cpp/src/dual_simplex/bound_flipping_ratio_test.cpp @@ -20,6 +20,7 @@ #include #include +#include namespace cuopt::linear_programming::dual_simplex { @@ -158,14 +159,30 @@ i_t bound_flipping_ratio_test_t::compute_step_length(f_t& step_length, slope); } + + // Continue the search using a heap to order the breakpoints ratios[k_idx] = ratios[num_breakpoints - 1]; indicies[k_idx] = indicies[num_breakpoints - 1]; + constexpr bool use_bucket_pass = false; + + if (use_bucket_pass) + { + f_t max_ratio = 0.0; + for (i_t k = 0; k < num_breakpoints - 1; ++k) { + if (ratios[k] > max_ratio) { max_ratio = ratios[k]; } + } + settings_.log.printf( + "Starting heap passes. %d breakpoints max ratio %e\n", num_breakpoints - 1, max_ratio); + bucket_pass( + indicies, ratios, num_breakpoints - 1, slope, step_length, nonbasic_entering, entering_index); + } + heap_passes( indicies, ratios, num_breakpoints - 1, slope, step_length, nonbasic_entering, entering_index); - if constexpr (0) { + if constexpr (verbose) { settings_.log.printf("BFRT step length %e entering index %d non basic entering %d pivot %e\n", step_length, entering_index, @@ -256,6 +273,62 @@ void bound_flipping_ratio_test_t::heap_passes(const std::vector& } } +template +void bound_flipping_ratio_test_t::bucket_pass(const std::vector& current_indicies, + const std::vector& current_ratios, + i_t num_breakpoints, + f_t& slope, + f_t& step_length, + i_t& nonbasic_entering, + i_t& entering_index) +{ + const f_t dual_tol = settings_.dual_tol; + const f_t zero_tol = settings_.zero_tol; + const std::vector& delta_z = delta_z_; + const std::vector& nonbasic_list = nonbasic_list_; + const i_t N = num_breakpoints; + + const i_t K = 400; // 0, -16, -15, ...., 0, 1, ...., 400 - 18 = 382 + std::vector buckets(K, 0.0); + std::vector bucket_count(K, 0); + for (i_t k = 0; k < N; ++k) { + const i_t idx = current_indicies[k]; + const f_t ratio = current_ratios[k]; + const f_t min_exponent = -16.0; + const f_t max_exponent = 382.0; + const f_t exponent = std::max(min_exponent, std::min(max_exponent, std::log10(ratio))); + const i_t bucket_idx = ratio == 0.0 ? 0 : static_cast(exponent - min_exponent + 1); + //settings_.log.printf("Ratio %e exponent %e bucket_idx %d\n", ratio, exponent, bucket_idx); + const i_t j = nonbasic_list[idx]; + const f_t interval = upper_[j] - lower_[j]; + const f_t delta_slope = std::abs(delta_z_[j]) * interval; + buckets[bucket_idx] += delta_slope; + bucket_count[bucket_idx]++; + } + + std::vector cumulative_sum(K, 0.0); + cumulative_sum[0] = buckets[0]; + if (cumulative_sum[0] > slope) { + settings_.log.printf("Bucket 0. Count in bucket %d. Slope %e. Cumulative sum %e. Bucket value %e\n", bucket_count[0], slope, cumulative_sum[0], buckets[0]); + return; + } + i_t k; + bool exceeded = false; + for (k = 1; k < K; ++k) { + cumulative_sum[k] = cumulative_sum[k - 1] + buckets[k]; + if (cumulative_sum[k] > slope) { + exceeded = true; + break; + } + } + + if (exceeded) { + settings_.log.printf("Value in bucket %d. Count in buckets %d. Slope %e. Cumulative sum %e. Next sum %e Bucket value %e\n", k, bucket_count[k], slope, cumulative_sum[k-1], cumulative_sum[k], buckets[k-1]); + } + +} + + #ifdef DUAL_SIMPLEX_INSTANTIATE_DOUBLE template class bound_flipping_ratio_test_t; diff --git a/cpp/src/dual_simplex/bound_flipping_ratio_test.hpp b/cpp/src/dual_simplex/bound_flipping_ratio_test.hpp index f581bd4d49..edf62915f7 100644 --- a/cpp/src/dual_simplex/bound_flipping_ratio_test.hpp +++ b/cpp/src/dual_simplex/bound_flipping_ratio_test.hpp @@ -77,6 +77,14 @@ class bound_flipping_ratio_test_t { i_t& nonbasic_entering, i_t& entering_index); + void bucket_pass(const std::vector& current_indicies, + const std::vector& current_ratios, + i_t num_breakpoints, + f_t& slope, + f_t& step_length, + i_t& nonbasic_entering, + i_t& entering_index); + const std::vector& lower_; const std::vector& upper_; const std::vector& bounded_variables_; diff --git a/cpp/src/dual_simplex/branch_and_bound.cpp b/cpp/src/dual_simplex/branch_and_bound.cpp index f1075ed557..04f08546fb 100644 --- a/cpp/src/dual_simplex/branch_and_bound.cpp +++ b/cpp/src/dual_simplex/branch_and_bound.cpp @@ -380,7 +380,7 @@ branch_and_bound_t::branch_and_bound_t( : original_problem(user_problem), settings(solver_settings), original_lp(1, 1, 1) { start_time = tic(); - convert_user_problem(original_problem, original_lp, new_slacks); + convert_user_problem(original_problem, settings, original_lp, new_slacks); full_variable_types(original_problem, original_lp, var_types); global_variables::mutex_upper.lock(); diff --git a/cpp/src/dual_simplex/phase2.cpp b/cpp/src/dual_simplex/phase2.cpp index c881bb5d46..a49106012a 100644 --- a/cpp/src/dual_simplex/phase2.cpp +++ b/cpp/src/dual_simplex/phase2.cpp @@ -26,6 +26,7 @@ #include #include +#include #include #include #include @@ -35,6 +36,278 @@ namespace cuopt::linear_programming::dual_simplex { namespace phase2 { + +// Computes vectors farkas_y, farkas_zl, farkas_zu that satisfy +// +// A'*farkas_y + farkas_zl - farkas_zu ~= 0 +// farkas_zl, farkas_zu >= 0, +// b'*farkas_y + l'*farkas_zl - u'*farkas_zu = farkas_constant > 0 +// +// This is a Farkas certificate for the infeasibility of the primal problem +// +// A*x = b, l <= x <= u +template +void compute_farkas_certificate(const lp_problem_t& lp, + const simplex_solver_settings_t& settings, + const std::vector& vstatus, + const std::vector& x, + const std::vector& y, + const std::vector& z, + const std::vector& delta_y, + const std::vector& delta_z, + i_t direction, + i_t leaving_index, + f_t obj_val, + std::vector& farkas_y, + std::vector& farkas_zl, + std::vector& farkas_zu, + f_t& farkas_constant) +{ + const i_t m = lp.num_rows; + const i_t n = lp.num_cols; + + std::vector original_residual = z; + matrix_transpose_vector_multiply(lp.A, 1.0, y, 1.0, original_residual); + for (i_t j = 0; j < n; ++j) + { + original_residual[j] -= lp.objective[j]; + } + const f_t original_residual_norm = vector_norm2(original_residual); + printf("|| A'*y + z - c || = %e\n", original_residual_norm); + + + std::vector zl(n); + std::vector zu(n); + for (i_t j = 0; j < n; ++j) + { + zl[j] = std::max(0.0, z[j]); + zu[j] = -std::min(0.0, z[j]); + } + + original_residual = zl; + matrix_transpose_vector_multiply(lp.A, 1.0, y, 1.0, original_residual); + for (i_t j = 0; j < n; ++j) + { + original_residual[j] -= (zu[j] + lp.objective[j]); + } + const f_t original_residual_2 = vector_norm2(original_residual); + printf("|| A'*y + zl - zu - c || = %e\n", original_residual_2); + + + std::vector y_bar = y; + for (i_t i = 0; i < m; ++i) + { + y_bar[i] = y[i] + delta_y[i]; + } + original_residual = z; + matrix_transpose_vector_multiply(lp.A, 1.0, y_bar, 1.0, original_residual); + for (i_t j = 0; j < n; ++j) + { + original_residual[j] += (delta_z[j] - lp.objective[j]); + } + const f_t original_residual_3 = vector_norm2(original_residual); + printf("|| A'*(y + delta_y) + (z + delta_z) - c || = %e\n", original_residual_3); + + + + farkas_y.resize(m); + farkas_zl.resize(n); + farkas_zu.resize(n); + + f_t gamma = 0.0; + for (i_t j = 0; j < n; ++j) + { + const f_t cj = lp.objective[j]; + const f_t lower = lp.lower[j]; + const f_t upper = lp.upper[j]; + if (lower > -inf) + { + gamma -= lower * std::min(0.0, cj); + } + if (upper < inf) + { + gamma -= upper * std::max(0.0, cj); + } + } + printf("gamma = %e\n", gamma); + + const f_t threshold = 1.0; + const f_t positive_threshold = std::max(-gamma, 0.0) + threshold; + printf("positive_threshold = %e\n", positive_threshold); + + // We need to increase the dual objective to positive threshold + f_t alpha = threshold; + const f_t infeas = (direction == 1) ? (lp.lower[leaving_index] - x[leaving_index]) : (x[leaving_index] - lp.upper[leaving_index]); + // We need the new objective to be at least positive_threshold + // positive_threshold = obj_val+ alpha * infeas + // infeas > 0, alpha > 0, positive_threshold > 0 + printf("infeas = %e\n", infeas); + printf("obj_val = %e\n", obj_val); + alpha = std::max(threshold,(positive_threshold - obj_val) / infeas); + printf("alpha = %e\n", alpha); + + // farkas_y = y + alpha * delta_y + for (i_t i = 0; i < m; ++i) + { + farkas_y[i] = y[i] + alpha * delta_y[i]; + } + // farkas_zl = z + alpha * delta_z - c- + for (i_t j = 0; j < n; ++j) + { + const f_t cj = lp.objective[j]; + const f_t z_j = z[j]; + const f_t delta_z_j = delta_z[j]; + farkas_zl[j] = std::max(0.0, z_j) + alpha * std::max(0.0, delta_z_j) + -std::min(0.0, cj); + } + + // farkas_zu = z + alpha * delta_z + c+ + for (i_t j = 0; j < n; ++j) + { + const f_t cj = lp.objective[j]; + const f_t z_j = z[j]; + const f_t delta_z_j = delta_z[j]; + farkas_zu[j] = -std::min(0.0, z_j) - alpha * std::min(0.0, delta_z_j) + std::max(0.0, cj); + } + + // farkas_constant = b'*farkas_y + l'*farkas_zl - u'*farkas_zu + farkas_constant = 0.0; + for (i_t i = 0; i < m; ++i) + { + farkas_constant += lp.rhs[i] * farkas_y[i]; + } + for (i_t j = 0; j < n; ++j) + { + const f_t lower = lp.lower[j]; + const f_t upper = lp.upper[j]; + if (lower > -inf) + { + farkas_constant += lower * farkas_zl[j]; + } + if (upper < inf) + { + farkas_constant -= upper * farkas_zu[j]; + } + } + + + // Verify that the Farkas certificate is valid + std::vector residual = farkas_zl; + matrix_transpose_vector_multiply(lp.A, 1.0, farkas_y, 1.0, residual); + for (i_t j = 0; j < n; ++j) + { + residual[j] -= farkas_zu[j]; + } + const f_t residual_norm = vector_norm2(residual); + + f_t zl_min = 0.0; + for (i_t j = 0; j < n; ++j) + { + zl_min = std::min(zl_min, farkas_zl[j]); + } + printf("farkas_zl_min = %e\n", zl_min); + f_t zu_min = 0.0; + for (i_t j = 0; j < n; ++j) + { + zu_min = std::min(zu_min, farkas_zu[j]); + } + printf("farkas_zu_min = %e\n", zu_min); + + printf("|| A'*farkas_y + farkas_zl - farkas_zu || = %e\n", residual_norm); + printf("b'*farkas_y + l'*farkas_zl - u'*farkas_zu = %e\n", farkas_constant); +} + + + + +template +void initial_perturbation(const lp_problem_t& lp, + const simplex_solver_settings_t& settings, + const std::vector& vstatus, + std::vector& objective) +{ + + const i_t m = lp.num_rows; + const i_t n = lp.num_cols; + f_t max_abs_obj_coeff = 0.0; + for (i_t j = 0; j < n; ++j) + { + max_abs_obj_coeff = std::max(max_abs_obj_coeff, std::abs(lp.objective[j])); + } + + const f_t dual_tol = settings.dual_tol; + + std::srand(static_cast(std::time(nullptr))); + + objective.resize(n); + f_t sum_perturb = 0.0; + i_t num_perturb = 0; + for (i_t j = 0; j < n; ++j) + { + f_t obj = objective[j] = lp.objective[j]; + + const f_t lower = lp.lower[j]; + const f_t upper = lp.upper[j]; + if (vstatus[j] == variable_status_t::NONBASIC_FIXED || + vstatus[j] == variable_status_t::NONBASIC_FREE || lower == upper || + lower == -inf && upper == inf) { + continue; + } + + const f_t rand_val = static_cast(std::rand() / (RAND_MAX + 1.0)); + const f_t perturb = (1e-5 * std::abs(obj) + 1e-7 * max_abs_obj_coeff + 10 * dual_tol) * (1.0 + rand_val); + + if (vstatus[j] == variable_status_t::NONBASIC_LOWER || lower > -inf && upper < inf && obj > 0) + { + objective[j] = obj + perturb; + sum_perturb += perturb; + num_perturb++; + } else if (vstatus[j] == variable_status_t::NONBASIC_UPPER || lower > -inf && upper < inf && obj < 0) + { + objective[j] = obj - perturb; + sum_perturb += perturb; + num_perturb++; + } + } + + settings.log.printf("Applied initial perturbation of %e to %d/%d objective coefficients\n", sum_perturb, num_perturb, n); +} + + + +template +bool use_transpose_for_delta_z(const lp_problem_t& lp, + const csc_matrix_t& A_transpose, + const sparse_vector_t& delta_y, + const std::vector& nonbasic_list) +{ + const i_t m = lp.num_rows; + const i_t n = lp.num_cols; + const i_t nz_delta_y = delta_y.i.size(); + f_t transpose_ops = 0; + for (i_t k = 0; k < nz_delta_y; k++) + { + const i_t i = delta_y.i[k]; + const f_t delta_y_i = delta_y.x[k]; + if (std::abs(delta_y_i) < 1e-12) { + continue; + } + const i_t row_start = A_transpose.col_start[i]; + const i_t row_end = A_transpose.col_start[i + 1]; + transpose_ops += row_end - row_start; + } + + f_t regular_ops = 0; + for (i_t k = 0; k < n - m; k++) { + const i_t j = nonbasic_list[k]; + const i_t col_start = lp.A.col_start[j]; + const i_t col_end = lp.A.col_start[j + 1]; + regular_ops += col_end - col_start; + } + + const bool use_transpose = transpose_ops < regular_ops; + return use_transpose; +} + template void compute_delta_z(const csc_matrix_t& A_transpose, const sparse_vector_t& delta_y, @@ -198,6 +471,217 @@ void compute_dual_solution_from_basis(const lp_problem_t& lp, } } +template +f_t compute_initial_primal_infeasibilities(const lp_problem_t& lp, + const simplex_solver_settings_t& settings, + const std::vector& basic_list, + const std::vector& x, + std::vector& squared_infeasibilities, + std::vector& infeasibility_indices) +{ + const i_t m = lp.num_rows; + const i_t n = lp.num_cols; + squared_infeasibilities.resize(n, 0.0); + infeasibility_indices.reserve(n); + infeasibility_indices.clear(); + f_t primal_inf = 0.0; + for (i_t k = 0; k < m; ++k) { + const i_t j = basic_list[k]; + const f_t lower_infeas = lp.lower[j] - x[j]; + const f_t upper_infeas = x[j] - lp.upper[j]; + const f_t infeas = std::max(lower_infeas, upper_infeas); + if (infeas > settings.primal_tol) { + const f_t square_infeas = infeas * infeas; + squared_infeasibilities[j] = square_infeas; + infeasibility_indices.push_back(j); + primal_inf += square_infeas; + } + } + return primal_inf; +} + +template +void update_single_primal_infeasibility(const std::vector& lower, + const std::vector& upper, + const std::vector& x, + f_t primal_tol, + std::vector& squared_infeasibilities, + std::vector& infeasibility_indices, + i_t j, + f_t& primal_inf) +{ + const f_t now_feasible = std::numeric_limits::denorm_min(); + const f_t old_val = squared_infeasibilities[j]; + // x_j < l_j - epsilon => -x_j + l_j > epsilon + const f_t lower_infeas = lower[j] - x[j]; + // x_j > u_j + epsilon => x_j - u_j > epsilon + const f_t upper_infeas = x[j] - upper[j]; + const f_t infeas = std::max(lower_infeas, upper_infeas); + const f_t new_val = infeas * infeas; + if (infeas > primal_tol) { + primal_inf = std::max(0.0, primal_inf + (new_val - old_val)); + // We are infeasible w.r.t the tolerance + if (old_val == 0.0) { + //printf("New infeasibility %d %e\n", j, infeas); + // This is a new infeasibility + // We need to add it to the list + infeasibility_indices.push_back(j); + } else { + //printf("Already infeasible %d %e\n", j, infeas); + } + squared_infeasibilities[j] = new_val; + } else { + // We are feasible w.r.t the tolerance + if (old_val != 0.0) { + // We were previously infeasible, + primal_inf = std::max(0.0, primal_inf - old_val); + //printf("Now feasible %d %e\n", j, infeas); + squared_infeasibilities[j] = now_feasible; + } else { + //printf("Still feasible %d %e\n", j, infeas); + } + } +} + +template +void update_primal_infeasibilities(const lp_problem_t& lp, + const simplex_solver_settings_t& settings, + const std::vector& basic_list, + const std::vector& x, + i_t entering_index, + i_t leaving_index, + std::vector& basic_change_list, + std::vector& squared_infeasibilities, + std::vector& infeasibility_indices, + f_t& primal_inf) +{ + const f_t now_feasible = std::numeric_limits::denorm_min(); + const f_t primal_tol = settings.primal_tol; + const i_t nz = basic_change_list.size(); + for (i_t k = 0; k < nz; ++k) { + const i_t j = basic_list[basic_change_list[k]]; + // The change list will contain the leaving variable, + // But not the entering variable. + + if (j == leaving_index) { + // Force the leaving variable to be feasible + const f_t old_val = squared_infeasibilities[j]; + squared_infeasibilities[j] = now_feasible; + primal_inf = std::max(0.0, primal_inf - old_val); + continue; + } + update_single_primal_infeasibility(lp.lower, + lp.upper, + x, + primal_tol, + squared_infeasibilities, + infeasibility_indices, + j, + primal_inf); + } + + // Update the entering variable + update_single_primal_infeasibility(lp.lower, + lp.upper, + x, + primal_tol, + squared_infeasibilities, + infeasibility_indices, + entering_index, + primal_inf); +} + +template +void clean_up_infeasibilities(std::vector& squared_infeasibilities, + std::vector& infeasibility_indices) +{ + const f_t now_feasible = std::numeric_limits::denorm_min(); + bool needs_clean_up = false; + for (i_t k = 0; k < infeasibility_indices.size(); ++k) { + const i_t j = infeasibility_indices[k]; + const f_t squared_infeas = squared_infeasibilities[j]; + if (squared_infeas == now_feasible) { + needs_clean_up = true; + } + } + + if (needs_clean_up) { + for (i_t k = 0; k < infeasibility_indices.size(); ++k) { + const i_t j = infeasibility_indices[k]; + const f_t squared_infeas = squared_infeasibilities[j]; + if (squared_infeas == now_feasible) { + // Set to the last element + const i_t sz = infeasibility_indices.size(); + infeasibility_indices[k] = infeasibility_indices[sz - 1]; + infeasibility_indices.pop_back(); + squared_infeasibilities[j] = 0.0; + i_t new_j = infeasibility_indices[k]; + if (squared_infeasibilities[new_j] == now_feasible) { + k--; + } + } + } + } +} + +template +i_t steepest_edge_pricing_with_infeasibilities(const lp_problem_t& lp, + const simplex_solver_settings_t& settings, + const std::vector& x, + const std::vector& dy_steepest_edge, + const std::vector& basic_mark, + std::vector& squared_infeasibilities, + std::vector& infeasibility_indices, + i_t& direction, + i_t& basic_leaving, + f_t& max_val) +{ + const f_t now_feasible = std::numeric_limits::denorm_min(); + max_val = 0.0; + i_t leaving_index = -1; + bool needs_clean_up = false; + const i_t nz = infeasibility_indices.size(); + for (i_t k = 0; k < nz; ++k) { + const i_t j = infeasibility_indices[k]; + const f_t squared_infeas = squared_infeasibilities[j]; +#if 0 + if (squared_infeas == now_feasible) + { + needs_clean_up = true; + continue; + } +#endif + const f_t val = squared_infeas / dy_steepest_edge[j]; + if (val > max_val || val == max_val && j > leaving_index) { + max_val = val; + leaving_index = j; + const f_t lower_infeas = lp.lower[j] - x[j]; + const f_t upper_infeas = x[j] - lp.upper[j]; + direction = lower_infeas >= upper_infeas ? 1 : -1; + } + } +#if 0 + if (needs_clean_up) { + for (i_t k = 0; k < infeasibility_indices.size(); ++k) { + const i_t j = infeasibility_indices[k]; + const f_t squared_infeas = squared_infeasibilities[j]; + if (squared_infeas == now_feasible) { + // Set to the last element + const i_t sz = infeasibility_indices.size(); + infeasibility_indices[k] = infeasibility_indices[sz - 1]; + infeasibility_indices.pop_back(); + squared_infeasibilities[j] = 0.0; + } + } + } +#endif + + basic_leaving = leaving_index >= 0 ? basic_mark[leaving_index] : -1; + return leaving_index; +} + + + template i_t steepest_edge_pricing(const lp_problem_t& lp, const simplex_solver_settings_t& settings, @@ -1499,15 +1983,21 @@ dual::status_t dual_phase2(i_t phase, std::vector atilde_mark(m, 0); std::vector atilde_index; std::vector nonbasic_mark(n, -1); + std::vector basic_mark(n, -1); std::vector delta_z_mark(n, 0); std::vector delta_z_indices; std::vector v(m, 0.0); - + std::vector squared_infeasibilities; + std::vector infeasibility_indices; for (i_t k = 0; k < n - m; k++) { nonbasic_mark[nonbasic_list[k]] = k; } + for (i_t k = 0; k < m; k++) { + basic_mark[basic_list[k]] = k; + } + std::vector bounded_variables(n, false); for (i_t j = 0; j < n; j++) { const bool bounded = @@ -1515,10 +2005,14 @@ dual::status_t dual_phase2(i_t phase, bounded_variables[j] = bounded; } + f_t primal_infeasibility = phase2::compute_initial_primal_infeasibilities( + lp, settings, basic_list, x, squared_infeasibilities, infeasibility_indices); + csc_matrix_t A_transpose(1, 1, 0); lp.A.transpose(A_transpose); + f_t obj = compute_objective(lp, x); settings.log.printf("Initial objective %e\n", obj); @@ -1542,25 +2036,59 @@ dual::status_t dual_phase2(i_t phase, f_t perturb_time = 0; f_t vector_time = 0; f_t objective_time = 0; + f_t update_infeasibility_time = 0; + bool restart_steepest_edge = true; while (iter < iter_limit) { // Pricing - i_t direction; - i_t basic_leaving_index; - f_t primal_infeasibility; + i_t direction = 0; + i_t basic_leaving_index = -1; i_t leaving_index = -1; f_t max_val; f_t price_start_time = tic(); if (settings.use_steepest_edge_pricing) { - leaving_index = phase2::steepest_edge_pricing(lp, +#if 0 + i_t direction_junk = 0; + i_t leaving_index_junk = -1; + f_t max_val_junk = 0.0; + f_t primal_inf_junk = 0; + i_t basic_leaving_index_junk = -1; + leaving_index_junk = phase2::steepest_edge_pricing(lp, settings, x, delta_y_steepest_edge, basic_list, - direction, - basic_leaving_index, - primal_infeasibility, - max_val); + direction_junk, + basic_leaving_index_junk, + primal_inf_junk, + max_val_junk); +#else + leaving_index = phase2::steepest_edge_pricing_with_infeasibilities(lp, + settings, + x, + delta_y_steepest_edge, + basic_mark, + squared_infeasibilities, + infeasibility_indices, + direction, + basic_leaving_index, + max_val); +#endif +#if 0 + if (leaving_index != leaving_index_junk || basic_leaving_index != basic_leaving_index_junk || max_val != max_val_junk || direction != direction_junk) { + printf("Leaving index %d %d Basic leaving index %d %d max_val %e %e\n", leaving_index, leaving_index_junk, basic_leaving_index, basic_leaving_index_junk, max_val, max_val_junk); + printf("Direction %d %d\n", direction, direction_junk); + + if (leaving_index >= 0 && leaving_index_junk >= 0) { + printf("Squared infeasibilities %d %e %d %e\n", leaving_index, squared_infeasibilities[leaving_index] / delta_y_steepest_edge[leaving_index], leaving_index_junk, squared_infeasibilities[leaving_index_junk] / delta_y_steepest_edge[leaving_index_junk]); + } + else + { + printf("Trying to print bad stuff\n"); + } + } + // printf("Leaving index %d\n", leaving_index); +#endif } else { // Max infeasibility pricing leaving_index = phase2::phase2_pricing( @@ -1588,14 +2116,14 @@ dual::status_t dual_phase2(i_t phase, } // BTran - //std::vector delta_y(m); + // BT*delta_y = -delta_zB = -sigma*ei f_t btran_start_time = tic(); sparse_vector_t delta_y_sparse(m, 0); sparse_vector_t UTsol_sparse(m, 0); if (0) { std::vector ei(m, 0.0); ei[basic_leaving_index] = -direction; - // BT*delta_y = -delta_zB = -sigma*ei + std::vector UTsol; ft.b_transpose_solve(ei, delta_y, UTsol); @@ -1615,9 +2143,11 @@ dual::status_t dual_phase2(i_t phase, ei_sparse.i[0] = basic_leaving_index; ei_sparse.x[0] = -direction; ft.b_transpose_solve(ei_sparse, delta_y_sparse, UTsol_sparse); - b_transpose_solve_density = delta_y_sparse.i.size() / static_cast(m); if (direction != -1) { + // We solved BT*delta_y = -sigma*ei, but for the update we need + // UT*etilde = ei. So we need to flip the sign of the solution + // in the case that sigma == 1. for (i_t k = 0; k < UTsol_sparse.x.size(); ++k) { UTsol_sparse.x[k] *= -1.0; } @@ -1654,7 +2184,7 @@ dual::status_t dual_phase2(i_t phase, #else f_t steepest_edge_norm_check = vector_norm2_squared(delta_y); #endif - if (delta_y_steepest_edge[leaving_index] < + if (restart_steepest_edge && delta_y_steepest_edge[leaving_index] < settings.steepest_edge_ratio * steepest_edge_norm_check) { constexpr bool verbose = false; if (verbose) { @@ -1687,8 +2217,21 @@ dual::status_t dual_phase2(i_t phase, f_t delta_z_start_time = tic(); - const f_t delta_y_nz_percentage = delta_y_sparse.i.size() / static_cast(m) * 100.0; - if (delta_y_nz_percentage <= 30.0) { + i_t delta_y_nz0 = 0; + const i_t nz_delta_y = delta_y_sparse.i.size(); + for (i_t k = 0; k < nz_delta_y; k++) { + if (std::abs(delta_y_sparse.x[k]) > 1e-12) { + delta_y_nz0++; + } + } + const f_t dy_percent = static_cast(delta_y_nz0) / static_cast(nz_delta_y) * 100.0; + if (dy_percent < 10.0) { + //settings.log.printf("delta_y_nz0 %d nz_delta_y %d percentage %.1f\n", delta_y_nz0, nz_delta_y, dy_percent); + } + const f_t delta_y_nz_percentage = delta_y_nz0 / static_cast(m) * 100.0; + //const bool use_transpose = phase2::use_transpose_for_delta_z(lp, A_transpose, delta_y_sparse, nonbasic_list); + const bool use_transpose = delta_y_nz_percentage <= 30.0; + if (use_transpose) { sparse_delta_z++; phase2::compute_delta_z(A_transpose, delta_y_sparse, @@ -1820,8 +2363,97 @@ dual::status_t dual_phase2(i_t phase, if (entering_index == -2) { return dual::status_t::TIME_LIMIT; } if (entering_index == -3) { return dual::status_t::CONCURRENT_LIMIT; } if (entering_index == -1) { - if (primal_infeasibility > settings.primal_tol && - max_val < 2e-8) { + settings.log.printf("No entering variable found.\n"); + settings.log.printf("Scaled steepest edge %e\n", max_val); + + + f_t primal_inf_check = 0.0; + i_t num_infeasible = 0; + primal_infeasibility = 0.0; + for (i_t k = 0; k < m; ++k) { + const i_t j = basic_list[k]; + const f_t lower_infeas = lp.lower[j] - x[j]; + const f_t upper_infeas = x[j] - lp.upper[j]; + const f_t infeas = std::max(lower_infeas, upper_infeas); + if (infeas > settings.primal_tol) { + //printf("%5d Basic infeasibility %5d %e. x %e lo %e up %e\n", num_infeasible, j, infeas, x[j], lp.lower[j], lp.upper[j]); + primal_inf_check += infeas; + num_infeasible++; + primal_infeasibility += infeas * infeas; + squared_infeasibilities[j] = infeas * infeas; + } + } + + for (i_t k = 0; k < m; ++k) + { + const i_t j = basic_list[k]; + delta_y_steepest_edge[j] = 1.0; + } + + restart_steepest_edge = false; + + settings.log.printf("Primal infeasibility %e Num infeasible %d\n", primal_inf_check, num_infeasible); + f_t perturbation = 0.0; + for (i_t j = 0; j < n; ++j) { + perturbation += std::abs(lp.objective[j] - objective[j]); + } + + if (perturbation > 1e-6 && phase == 2) { + // Try to remove perturbation + std::vector unperturbed_y(m); + std::vector unperturbed_z(n); + phase2::compute_dual_solution_from_basis( + lp, ft, basic_list, nonbasic_list, unperturbed_y, unperturbed_z); + { + const f_t dual_infeas = phase2::dual_infeasibility( + lp, settings, vstatus, unperturbed_z, settings.tight_tol, settings.dual_tol); + if (dual_infeas <= settings.dual_tol) { + settings.log.printf("Removed perturbation of %.2e.\n", perturbation); + z = unperturbed_z; + y = unperturbed_y; + perturbation = 0.0; + objective = lp.objective; + + f_t obj_val = 0.0; + for (i_t j = 0; j < n; ++j) + { + obj_val += objective[j] * x[j]; + } + + constexpr bool use_farkas = false; + + if constexpr (use_farkas) { + std::vector farkas_y; + std::vector farkas_zl; + std::vector farkas_zu; + f_t farkas_constant; + std::vector my_delta_y; + delta_y_sparse.to_dense(my_delta_y); + phase2::compute_farkas_certificate(lp, + settings, + vstatus, + x, + y, + z, + my_delta_y, + delta_z, + direction, + leaving_index, + obj_val, + farkas_y, + farkas_zl, + farkas_zu, + farkas_constant); + } + settings.log.printf("Continuing with perturbation removed and steepest edge norms reset\n"); + continue; + } else { + settings.log.printf("Failed to remove perturbation of %.2e.\n", perturbation); + } + } + } + + if (max_val < 2e-8) { // We could be done settings.log.printf("Exiting due to small primal infeasibility se %e\n", max_val); phase2::prepare_optimality(lp, @@ -1907,6 +2539,7 @@ dual::status_t dual_phase2(i_t phase, flip_time += toc(flip_start_time); + sparse_vector_t delta_xB_0_sparse(m, 0); f_t ftran_start_time = tic(); @@ -1920,7 +2553,6 @@ dual::status_t dual_phase2(i_t phase, atilde_sparse.i[k] = atilde_index[k]; atilde_sparse.x[k] = atilde[atilde_index[k]]; } - sparse_vector_t delta_xB_0_sparse(m, 0); ft.b_solve(atilde_sparse, delta_xB_0_sparse); const i_t delta_xB_0_nz = delta_xB_0_sparse.i.size(); for (i_t k = 0; k < delta_xB_0_nz; ++k) { @@ -2139,6 +2771,70 @@ dual::status_t dual_phase2(i_t phase, #endif objective_time += toc(objective_start_time); +#if 1 + f_t update_infeasibility_start_time = tic(); + // Update primal infeasibilities + phase2::update_primal_infeasibilities(lp, + settings, + basic_list, + x, + entering_index, + leaving_index, + delta_xB_0_sparse.i, + squared_infeasibilities, + infeasibility_indices, + primal_infeasibility); + phase2::update_primal_infeasibilities(lp, + settings, + basic_list, + x, + entering_index, + leaving_index, + scaled_delta_xB_sparse.i, + squared_infeasibilities, + infeasibility_indices, + primal_infeasibility); + + if (primal_infeasibility < 0.0) { + settings.log.printf("!!!!! Negative primal infeasibility %e\n", primal_infeasibility); + } + + phase2::clean_up_infeasibilities(squared_infeasibilities, infeasibility_indices); +#endif + +#if CHECK_PRIMAL_INFEASIBILITIES + // Check primal infeasibilities + { + for (i_t k = 0; k < m; ++k) + { + const i_t j = basic_list[k]; + const f_t lower_infeas = lp.lower[j] - x[j]; + const f_t upper_infeas = x[j] - lp.upper[j]; + const f_t infeas = std::max(lower_infeas, upper_infeas); + if (infeas > settings.primal_tol) { + const f_t square_infeas = infeas * infeas; + if (square_infeas != squared_infeasibilities[j]) { + settings.log.printf("Primal infeasibility mismatch %d %e != %e\n", j, square_infeas, squared_infeasibilities[j]); + } + bool found = false; + for (i_t h = 0; h < infeasibility_indices.size(); ++h) { + if (infeasibility_indices[h] == j) { + found = true; + break; + } + } + if (!found) { + settings.log.printf("Infeasibility index not found %d\n", j); + } + } + } + } +#endif + +#if 1 + update_infeasibility_time += toc(update_infeasibility_start_time); +#endif + // Clear delta_x for (i_t k = 0; k < scaled_delta_xB_nz; ++k) { const i_t j = basic_list[scaled_delta_xB_sparse.i[k]]; @@ -2166,6 +2862,8 @@ dual::status_t dual_phase2(i_t phase, nonbasic_list[nonbasic_entering_index] = leaving_index; nonbasic_mark[entering_index] = -1; nonbasic_mark[leaving_index] = nonbasic_entering_index; + basic_mark[leaving_index] = -1; + basic_mark[entering_index] = basic_leaving_index; f_t lu_update_start_time = tic(); // Refactor or Update @@ -2219,6 +2917,16 @@ dual::status_t dual_phase2(i_t phase, } reorder_basic_list(q, basic_list); ft.reset(L, U, p); + for (i_t k = 0; k < n; k++) { + basic_mark[k] = -1; + nonbasic_mark[k] = -1; + } + for (i_t k = 0; k < m; k++) { + basic_mark[basic_list[k]] = k; + } + for (i_t k = 0; k < n - m; k++) { + nonbasic_mark[nonbasic_list[k]] = k; + } } lu_update_time += toc(lu_update_start_time); @@ -2235,6 +2943,19 @@ dual::status_t dual_phase2(i_t phase, } #endif +#if 0 + for (i_t k = 0; k < m; k++) { + if (basic_mark[basic_list[k]] != k) { + printf("Basic mark %d %d\n", basic_list[k], k); + } + } + for (i_t k = 0; k < n - m; k++) { + if (nonbasic_mark[nonbasic_list[k]] != k) { + printf("Nonbasic mark %d %d\n", nonbasic_list[k], k); + } + } +#endif + iter++; #if 1 @@ -2256,14 +2977,13 @@ dual::status_t dual_phase2(i_t phase, if (phase == 1 && iter == 1) { settings.log.printf(" Iter Objective Primal Infeas Perturb Time\n"); } - settings.log.printf("%5d %+.16e %.8e %.2e %.2e %.2f %.2f %.2f\n", + settings.log.printf("%5d %+.16e %8d %.8e %.2e %.2e %.2f\n", iter, compute_user_objective(lp, obj), + infeasibility_indices.size(), primal_infeasibility, sum_perturb, step_length, - b_solve_density * 100.0, - b_transpose_solve_density * 100.0, now); } @@ -2282,25 +3002,26 @@ dual::status_t dual_phase2(i_t phase, if (iter >= iter_limit) { status = dual::status_t::ITERATION_LIMIT; } if (phase == 2) { - settings.log.printf("BFRT time %.2f\n", bfrt_time); - settings.log.printf("Pricing time %.2f\n", pricing_time); - settings.log.printf("BTran time %.2f\n", btran_time); - settings.log.printf("FTran time %.2f\n", ftran_time); - settings.log.printf("Flip time %.2f\n", flip_time); - settings.log.printf("Delta_z time %.2f\n", delta_z_time); - settings.log.printf("LU update time %.2f\n", lu_update_time); - settings.log.printf("SE norms time %.2f\n", se_norms_time); - settings.log.printf("SE enter time %.2f\n", se_entering_time); - settings.log.printf("Perturb time %.2f\n", perturb_time); - settings.log.printf("Vector time %.2f\n", vector_time); - settings.log.printf("Objective time %.2f\n", objective_time); - settings.log.printf("Sum %.2f\n", - bfrt_time + pricing_time + btran_time + ftran_time + flip_time + + const f_t total_time = bfrt_time + pricing_time + btran_time + ftran_time + flip_time + delta_z_time + lu_update_time + se_norms_time + se_entering_time + - perturb_time + vector_time + objective_time); - - settings.log.printf("Sparse delta_z %8d %8.2f\n", sparse_delta_z, 100.0 * sparse_delta_z / (sparse_delta_z + dense_delta_z)); - settings.log.printf("Dense delta_z %8d %8.2f\n", dense_delta_z, 100.0 * dense_delta_z / (sparse_delta_z + dense_delta_z)); + perturb_time + vector_time + objective_time + update_infeasibility_time; + settings.log.printf("BFRT time %.2f %4.1f%\n", bfrt_time, 100.0 * bfrt_time / total_time); + settings.log.printf("Pricing time %.2f %4.1f%\n", pricing_time, 100.0 * pricing_time / total_time); + settings.log.printf("BTran time %.2f %4.1f%\n", btran_time, 100.0 * btran_time / total_time); + settings.log.printf("FTran time %.2f %4.1f%\n", ftran_time, 100.0 * ftran_time / total_time); + settings.log.printf("Flip time %.2f %4.1f%\n", flip_time, 100.0 * flip_time / total_time); + settings.log.printf("Delta_z time %.2f %4.1f%\n", delta_z_time, 100.0 * delta_z_time / total_time); + settings.log.printf("LU update time %.2f %4.1f%\n", lu_update_time, 100.0 * lu_update_time / total_time); + settings.log.printf("SE norms time %.2f %4.1f%\n", se_norms_time, 100.0 * se_norms_time / total_time); + settings.log.printf("SE enter time %.2f %4.1f%\n", se_entering_time, 100.0 * se_entering_time / total_time); + settings.log.printf("Perturb time %.2f %4.1f%\n", perturb_time, 100.0 * perturb_time / total_time); + settings.log.printf("Vector time %.2f %4.1f%\n", vector_time, 100.0 * vector_time / total_time); + settings.log.printf("Objective time %.2f %4.1f%\n", objective_time, 100.0 * objective_time / total_time); + settings.log.printf("Inf update time %.2f %4.1f%\n", update_infeasibility_time, 100.0 * update_infeasibility_time / total_time); + settings.log.printf("Sum %.2f\n", total_time); + + settings.log.printf("Sparse delta_z %8d %8.2f%\n", sparse_delta_z, 100.0 * sparse_delta_z / (sparse_delta_z + dense_delta_z)); + settings.log.printf("Dense delta_z %8d %8.2f%\n", dense_delta_z, 100.0 * dense_delta_z / (sparse_delta_z + dense_delta_z)); ft.print_stats(); } return status; diff --git a/cpp/src/dual_simplex/presolve.cpp b/cpp/src/dual_simplex/presolve.cpp index e87c88b409..5a68e0a6e0 100644 --- a/cpp/src/dual_simplex/presolve.cpp +++ b/cpp/src/dual_simplex/presolve.cpp @@ -23,6 +23,130 @@ namespace cuopt::linear_programming::dual_simplex { +template +void bound_strengthening(const std::vector& row_sense, + const simplex_solver_settings_t& settings, + lp_problem_t& problem) +{ + const i_t m = problem.num_rows; + const i_t n = problem.num_cols; + + std::vector constraint_lower(m); + //std::vector constraint_upper(m); + std::vector num_lower_infinity(m); + std::vector num_upper_infinity(m); + + csc_matrix_t Arow(1, 1, 1); + problem.A.transpose(Arow); + + std::vector less_rows; + less_rows.reserve(m); + + for (i_t i = 0; i < m; ++i) { + if (row_sense[i] == 'L') { + less_rows.push_back(i); + } + } + + std::vector lower = problem.lower; + std::vector upper = problem.upper; + + std::vector updated_variables_list; + updated_variables_list.reserve(n); + std::vector updated_variables_mark(n, 0); + + i_t iter = 0; + const i_t iter_limit = 10; + i_t total_strengthened_variables = 0; + settings.log.printf("Less equal rows %d\n", less_rows.size()); + while (iter < iter_limit && less_rows.size() > 0) + { + // Derive bounds on the constraints + settings.log.printf("Running bound strengthening on %d rows\n", static_cast(less_rows.size())); + for (i_t i : less_rows) { + const i_t row_start = Arow.col_start[i]; + const i_t row_end = Arow.col_start[i + 1]; + num_lower_infinity[i] = 0; + num_upper_infinity[i] = 0; + + f_t lower_limit = 0.0; + //f_t upper_limit = 0.0; + for (i_t p = row_start; p < row_end; ++p) { + const i_t j = Arow.i[p]; + const f_t a_ij = Arow.x[p]; + if (a_ij > 0) { + lower_limit += a_ij * lower[j]; + //upper_limit += a_ij * upper[j]; + } else if (a_ij < 0) { + lower_limit += a_ij * upper[j]; + //upper_limit += a_ij * lower[j]; + } + if (lower[j] == -inf && a_ij > 0) { num_lower_infinity[i]++; lower_limit = -inf;} + if (upper[j] == inf && a_ij < 0) { num_lower_infinity[i]++; lower_limit = -inf; } + } + //printf("Constraint %d: lo %e\n", i, lower_limit); + constraint_lower[i] = lower_limit; + //constraint_upper[i] = upper_limit; + } + + // Use the constraint bounds to derive new bounds on the variables + for (i_t i : less_rows) { + if (std::isfinite(constraint_lower[i]) && num_lower_infinity[i] == 0) { + const i_t row_start = Arow.col_start[i]; + const i_t row_end = Arow.col_start[i + 1]; + for (i_t p = row_start; p < row_end; ++p) { + const i_t k = Arow.i[p]; + const f_t a_ik = Arow.x[p]; + if (a_ik > 0) { + const f_t new_upper = lower[k] + (problem.rhs[i] - constraint_lower[i]) / a_ik; + if (new_upper < upper[k]) { + //printf("Strengthed bound on variable %d: lo %e up %e -> %e\n", k, lower[k], upper[k], new_upper); + upper[k] = new_upper; + if (lower[k] > upper[k]) + { + settings.log.printf("\t INFEASIBLE!!!!!!!!!!!!!!!!! constraint_lower %e lower %e rhs %e\n", constraint_lower[i], lower[k], problem.rhs[i]); + } + if (!updated_variables_mark[k]) { updated_variables_list.push_back(k); } + } + } else if (a_ik < 0) { + const f_t new_lower = upper[k] + (problem.rhs[i] - constraint_lower[i]) / a_ik; + if (new_lower > lower[k]) { + //printf("Strengthend bound on variable %d: lo %e -> %e up %e\n", k, lower[k], new_lower, upper[k]); + lower[k] = new_lower; + if (lower[k] > upper[k]) + { + settings.log.printf("\t INFEASIBLE !!!!!!!!!!!!!!!!!!1\n"); + } + if (!updated_variables_mark[k]) { updated_variables_list.push_back(k); } + } + } + } + } + } + less_rows.clear(); + + // Update the bounds on the constraints + settings.log.printf("Round %d: Strengthend %d variables\n", iter, static_cast(updated_variables_list.size())); + total_strengthened_variables += updated_variables_list.size(); + for (i_t j : updated_variables_list) + { + updated_variables_mark[j] = 0; + const i_t col_start = problem.A.col_start[j]; + const i_t col_end = problem.A.col_start[j+1]; + for (i_t p = col_start; p < col_end; ++p) + { + const i_t i = problem.A.i[p]; + less_rows.push_back(i); + } + } + updated_variables_list.clear(); + iter++; + } + settings.log.printf("Total strengthened variables %d\n", total_strengthened_variables); + problem.lower = lower; + problem.upper = upper; +} + template i_t remove_empty_cols(lp_problem_t& problem, i_t& num_empty_cols, @@ -500,6 +624,7 @@ i_t add_artifical_variables(lp_problem_t& problem, template void convert_user_problem(const user_problem_t& user_problem, + const simplex_solver_settings_t& settings, lp_problem_t& problem, std::vector& new_slacks) { @@ -559,6 +684,14 @@ void convert_user_problem(const user_problem_t& user_problem, convert_greater_to_less(user_problem, row_sense, problem, greater_rows, less_rows); } + // At this point the problem representation is in the form: A*x {<=, =} b + // This is the time to run bound strengthening + constexpr bool run_bound_strengthening = false; + if (run_bound_strengthening) { + settings.log.printf("Running bound strengthening\n"); + bound_strengthening(row_sense, settings, problem); + } + // The original problem may have a variable without a lower bound // but a finite upper bound // -inf < x_j <= u_j @@ -669,7 +802,8 @@ void convert_user_lp_with_guess(const user_problem_t& user_problem, lp_solution_t& converted_solution) { std::vector new_slacks; - convert_user_problem(user_problem, problem, new_slacks); + simplex_solver_settings_t settings; + convert_user_problem(user_problem, settings, problem, new_slacks); crush_primal_solution_with_slack( user_problem, problem, initial_solution.x, initial_slack, new_slacks, converted_solution.x); crush_dual_solution(user_problem, @@ -877,6 +1011,7 @@ void uncrush_solution(const presolve_info_t& presolve_info, #ifdef DUAL_SIMPLEX_INSTANTIATE_DOUBLE template void convert_user_problem(const user_problem_t& user_problem, + const simplex_solver_settings_t& settings, lp_problem_t& problem, std::vector& new_slacks); diff --git a/cpp/src/dual_simplex/presolve.hpp b/cpp/src/dual_simplex/presolve.hpp index e6eb542acf..abf7c6356a 100644 --- a/cpp/src/dual_simplex/presolve.hpp +++ b/cpp/src/dual_simplex/presolve.hpp @@ -63,6 +63,7 @@ struct presolve_info_t { template void convert_user_problem(const user_problem_t& user_problem, + const simplex_solver_settings_t& settings, lp_problem_t& problem, std::vector& new_slacks); diff --git a/cpp/src/dual_simplex/solve.cpp b/cpp/src/dual_simplex/solve.cpp index 29aecb21f5..b93d80d62e 100644 --- a/cpp/src/dual_simplex/solve.cpp +++ b/cpp/src/dual_simplex/solve.cpp @@ -244,7 +244,7 @@ lp_status_t solve_linear_program(const user_problem_t& user_problem, f_t start_time = tic(); lp_problem_t original_lp(1, 1, 1); std::vector new_slacks; - convert_user_problem(user_problem, original_lp, new_slacks); + convert_user_problem(user_problem, settings, original_lp, new_slacks); solution.resize(user_problem.num_rows, user_problem.num_cols); lp_solution_t lp_solution(original_lp.num_rows, original_lp.num_cols); std::vector vstatus; @@ -283,7 +283,7 @@ i_t solve(const user_problem_t& problem, lp_problem_t original_lp( problem.num_rows, problem.num_cols, problem.A.col_start[problem.A.n]); std::vector new_slacks; - convert_user_problem(problem, original_lp, new_slacks); + convert_user_problem(problem, settings, original_lp, new_slacks); lp_solution_t solution(original_lp.num_rows, original_lp.num_cols); std::vector vstatus; std::vector edge_norms; diff --git a/cpp/src/dual_simplex/sparse_matrix.hpp b/cpp/src/dual_simplex/sparse_matrix.hpp index dd80b27792..2fdbe5647a 100644 --- a/cpp/src/dual_simplex/sparse_matrix.hpp +++ b/cpp/src/dual_simplex/sparse_matrix.hpp @@ -123,7 +123,7 @@ class sparse_vector_t { { const i_t col_start = A.col_start[col]; const i_t col_end = A.col_start[col + 1]; - n = A.n; + n = A.m; const i_t nz = col_end - col_start; i.reserve(nz); x.reserve(nz); From 35c0f46062b8121cc419f96fa76b9104e352be00 Mon Sep 17 00:00:00 2001 From: Christopher Maes Date: Mon, 21 Jul 2025 18:50:16 -0700 Subject: [PATCH 07/28] Fix incorrect infeasibility classification of maros. Helps greenbea and greenbeb as well. Farkas works on 17/24 NETLIB infeasible problems --- cpp/src/dual_simplex/phase2.cpp | 250 ++++++++++++++++++++++++-------- 1 file changed, 193 insertions(+), 57 deletions(-) diff --git a/cpp/src/dual_simplex/phase2.cpp b/cpp/src/dual_simplex/phase2.cpp index a49106012a..e5236b78e9 100644 --- a/cpp/src/dual_simplex/phase2.cpp +++ b/cpp/src/dual_simplex/phase2.cpp @@ -73,7 +73,7 @@ void compute_farkas_certificate(const lp_problem_t& lp, original_residual[j] -= lp.objective[j]; } const f_t original_residual_norm = vector_norm2(original_residual); - printf("|| A'*y + z - c || = %e\n", original_residual_norm); + settings.log.printf("|| A'*y + z - c || = %e\n", original_residual_norm); std::vector zl(n); @@ -91,10 +91,14 @@ void compute_farkas_certificate(const lp_problem_t& lp, original_residual[j] -= (zu[j] + lp.objective[j]); } const f_t original_residual_2 = vector_norm2(original_residual); - printf("|| A'*y + zl - zu - c || = %e\n", original_residual_2); + settings.log.printf("|| A'*y + zl - zu - c || = %e\n", original_residual_2); - std::vector y_bar = y; + std::vector search_dir_residual = delta_z; + matrix_transpose_vector_multiply(lp.A, 1.0, delta_y, 1.0, search_dir_residual); + settings.log.printf("|| A'*delta_y + delta_z || = %e\n", vector_norm2(search_dir_residual)); + + std::vector y_bar(m); for (i_t i = 0; i < m; ++i) { y_bar[i] = y[i] + delta_y[i]; @@ -106,7 +110,7 @@ void compute_farkas_certificate(const lp_problem_t& lp, original_residual[j] += (delta_z[j] - lp.objective[j]); } const f_t original_residual_3 = vector_norm2(original_residual); - printf("|| A'*(y + delta_y) + (z + delta_z) - c || = %e\n", original_residual_3); + settings.log.printf("|| A'*(y + delta_y) + (z + delta_z) - c || = %e\n", original_residual_3); @@ -141,15 +145,22 @@ void compute_farkas_certificate(const lp_problem_t& lp, // We need the new objective to be at least positive_threshold // positive_threshold = obj_val+ alpha * infeas // infeas > 0, alpha > 0, positive_threshold > 0 + printf("direction = %d\n", direction); + printf("lower %e x %e upper %d\n", lp.lower[leaving_index], x[leaving_index], lp.upper[leaving_index]); printf("infeas = %e\n", infeas); printf("obj_val = %e\n", obj_val); alpha = std::max(threshold,(positive_threshold - obj_val) / infeas); printf("alpha = %e\n", alpha); + std::vector y_prime(m); + std::vector zl_prime(n); + std::vector zu_prime(n); + // farkas_y = y + alpha * delta_y for (i_t i = 0; i < m; ++i) { farkas_y[i] = y[i] + alpha * delta_y[i]; + y_prime[i] = y[i] + alpha * delta_y[i]; } // farkas_zl = z + alpha * delta_z - c- for (i_t j = 0; j < n; ++j) @@ -158,6 +169,7 @@ void compute_farkas_certificate(const lp_problem_t& lp, const f_t z_j = z[j]; const f_t delta_z_j = delta_z[j]; farkas_zl[j] = std::max(0.0, z_j) + alpha * std::max(0.0, delta_z_j) + -std::min(0.0, cj); + zl_prime[j] = zl[j] + alpha * std::max(0.0, delta_z_j); } // farkas_zu = z + alpha * delta_z + c+ @@ -167,14 +179,22 @@ void compute_farkas_certificate(const lp_problem_t& lp, const f_t z_j = z[j]; const f_t delta_z_j = delta_z[j]; farkas_zu[j] = -std::min(0.0, z_j) - alpha * std::min(0.0, delta_z_j) + std::max(0.0, cj); + zu_prime[j] = zu[j] + alpha * (-std::min(0.0, delta_z_j)); } // farkas_constant = b'*farkas_y + l'*farkas_zl - u'*farkas_zu farkas_constant = 0.0; + f_t test_constant = 0.0; + f_t test_3 = 0.0; for (i_t i = 0; i < m; ++i) { farkas_constant += lp.rhs[i] * farkas_y[i]; + test_constant += lp.rhs[i] * y_prime[i]; + test_3 += lp.rhs[i] * delta_y[i]; } + printf("b'*delta_y = %e\n", test_3); + printf("|| b || %e\n", vector_norm_inf(lp.rhs)); + printf("|| delta y || %e\n", vector_norm_inf(delta_y)); for (i_t j = 0; j < n; ++j) { const f_t lower = lp.lower[j]; @@ -182,10 +202,16 @@ void compute_farkas_certificate(const lp_problem_t& lp, if (lower > -inf) { farkas_constant += lower * farkas_zl[j]; + test_constant += lower * zl_prime[j]; + const f_t delta_z_l_j = std::max(delta_z[j], 0.0); + test_3 += lower * delta_z_l_j; } if (upper < inf) { farkas_constant -= upper * farkas_zu[j]; + test_constant -= upper * zu_prime[j]; + const f_t delta_z_u_j = -std::min(delta_z[j], 0.0); + test_3 -= upper * delta_z_u_j; } } @@ -204,16 +230,21 @@ void compute_farkas_certificate(const lp_problem_t& lp, { zl_min = std::min(zl_min, farkas_zl[j]); } - printf("farkas_zl_min = %e\n", zl_min); + settings.log.printf("farkas_zl_min = %e\n", zl_min); f_t zu_min = 0.0; for (i_t j = 0; j < n; ++j) { zu_min = std::min(zu_min, farkas_zu[j]); } - printf("farkas_zu_min = %e\n", zu_min); + settings.log.printf("farkas_zu_min = %e\n", zu_min); - printf("|| A'*farkas_y + farkas_zl - farkas_zu || = %e\n", residual_norm); - printf("b'*farkas_y + l'*farkas_zl - u'*farkas_zu = %e\n", farkas_constant); + settings.log.printf("|| A'*farkas_y + farkas_zl - farkas_zu || = %e\n", residual_norm); + settings.log.printf("b'*farkas_y + l'*farkas_zl - u'*farkas_zu = %e\n", farkas_constant); + + if (residual_norm < 1e-6 && farkas_constant > 0.0 && zl_min >= 0.0 && zu_min >= 0.0) + { + settings.log.printf("Farkas certificate of infeasibility constructed\n"); + } } @@ -324,7 +355,7 @@ void compute_delta_z(const csc_matrix_t& A_transpose, { const i_t i = delta_y.i[k]; const f_t delta_y_i = delta_y.x[k]; - if (std::abs(delta_y_i) < 1e-12) { + if (0 && std::abs(delta_y_i) < 1e-12) { continue; } const i_t row_start = A_transpose.col_start[i]; @@ -355,6 +386,8 @@ void compute_reduced_cost_update(const lp_problem_t& lp, const std::vector& delta_y, i_t leaving_index, i_t direction, + std::vector& delta_z_mark, + std::vector& delta_z_indices, std::vector& delta_z) { const i_t m = lp.num_rows; @@ -377,6 +410,11 @@ void compute_reduced_cost_update(const lp_problem_t& lp, dot += lp.A.x[p] * delta_y[lp.A.i[p]]; } delta_z[j] = -dot; + if (dot != 0.0) + { + delta_z_indices.push_back(j); + delta_z_mark[j] = 1; + } } } @@ -394,7 +432,7 @@ void clear_delta_z(i_t entering_index, delta_z[j] = 0.0; delta_z_mark[j] = 0; } - delta_z[entering_index] = 0.0; + if (entering_index != -1) { delta_z[entering_index] = 0.0; } delta_z[leaving_index] = 0.0; delta_z_indices.clear(); } @@ -471,6 +509,57 @@ void compute_dual_solution_from_basis(const lp_problem_t& lp, } } +template +i_t compute_primal_solution_from_basis(const lp_problem_t& lp, + basis_update_mpf_t& ft, + const std::vector& basic_list, + const std::vector& nonbasic_list, + const std::vector& vstatus, + std::vector& x) +{ + const i_t m = lp.num_rows; + const i_t n = lp.num_cols; + std::vector rhs = lp.rhs; + + for (i_t k = 0; k < n - m; ++k) + { + const i_t j = nonbasic_list[k]; + if (vstatus[j] == variable_status_t::NONBASIC_LOWER || vstatus[j] == variable_status_t::NONBASIC_FIXED) + { + x[j] = lp.lower[j]; + } + else if (vstatus[j] == variable_status_t::NONBASIC_UPPER) + { + x[j] = lp.upper[j]; + } + else if (vstatus[j] == variable_status_t::NONBASIC_FREE) + { + x[j] = 0.0; + } + } + + // rhs = b - sum_{j : x_j = l_j} A(:, j) l(j) - sum_{j : x_j = u_j} A(:, j) * + // u(j) + for (i_t k = 0; k < n - m; ++k) { + const i_t j = nonbasic_list[k]; + const i_t col_start = lp.A.col_start[j]; + const i_t col_end = lp.A.col_start[j + 1]; + const f_t xj = x[j]; + for (i_t p = col_start; p < col_end; ++p) { + rhs[lp.A.i[p]] -= xj * lp.A.x[p]; + } + } + + std::vector xB(m); + ft.b_solve(rhs, xB); + + for (i_t k = 0; k < m; ++k) { + const i_t j = basic_list[k]; + x[j] = xB[k]; + } + return 0; +} + template f_t compute_initial_primal_infeasibilities(const lp_problem_t& lp, const simplex_solver_settings_t& settings, @@ -2245,39 +2334,42 @@ dual::status_t dual_phase2(i_t phase, dense_delta_z++; // delta_zB = sigma*ei delta_y_sparse.to_dense(delta_y); - for (i_t k = 0; k < m; k++) { - const i_t j = basic_list[k]; - delta_z[j] = 0; - } - delta_z[leaving_index] = direction; - // delta_zN = -N'*delta_y - for (i_t k = 0; k < n - m; k++) { - const i_t j = nonbasic_list[k]; - // z_j <- -A(:, j)'*delta_y - const i_t col_start = lp.A.col_start[j]; - const i_t col_end = lp.A.col_start[j + 1]; - f_t dot = 0.0; - for (i_t p = col_start; p < col_end; ++p) { - dot += lp.A.x[p] * delta_y[lp.A.i[p]]; - } - delta_z[j] = -dot; - if (delta_z[j] != 0.0) { - delta_z_indices.push_back(j); - delta_z_mark[j] = 1; - } - } + phase2::compute_reduced_cost_update(lp, + basic_list, + nonbasic_list, + delta_y, + leaving_index, + direction, + delta_z_mark, + delta_z_indices, + delta_z); } #if 0 - f_t error_check = 0.0; - for (i_t k = 0; k < n; ++k) { - if (std::abs(delta_z[k] - delta_z_check[k]) > 1e-6) { - settings.log.printf("delta_z error %d %e %e\n", k, delta_z[k], delta_z_check[k]); + if (use_transpose) + { + delta_y_sparse.to_dense(delta_y); + std::vector delta_z_check(n); + std::vector delta_z_mark_check(n, 0); + std::vector delta_z_indices_check ; + phase2::compute_reduced_cost_update(lp, + basic_list, + nonbasic_list, + delta_y, + leaving_index, + direction, + delta_z_mark_check, + delta_z_indices_check, + delta_z_check); + f_t error_check = 0.0; + for (i_t k = 0; k < n; ++k) { + const f_t diff = std::abs(delta_z[k] - delta_z_check[k]); + if (diff > 1e-6) { + settings.log.printf("delta_z error %d transpose %e no transpose %e diff %e\n", k, delta_z[k], delta_z_check[k], diff); + } + error_check = std::max(error_check, diff); } - error_check = std::max(error_check, std::abs(delta_z[k] - delta_z_check[k])); - } - if (error_check > 1e-6) { - settings.log.printf("delta_z error %e\n", error_check); + if (error_check > 1e-6) { settings.log.printf("delta_z error %e\n", error_check); } } #endif delta_z_time += toc(delta_z_start_time); @@ -2287,7 +2379,7 @@ dual::status_t dual_phase2(i_t phase, // || A'*delta_y + delta_z ||_inf matrix_transpose_vector_multiply(lp.A, 1.0, delta_y, 1.0, dual_residual); f_t dual_residual_norm = vector_norm_inf(dual_residual); - settings.log.printf("|| A'*dy - dz || %e\n", dual_residual_norm); + settings.log.printf("|| A'*dy - dz || %e use transpose %d\n", dual_residual_norm, use_transpose); #endif // Ratio test @@ -2363,12 +2455,13 @@ dual::status_t dual_phase2(i_t phase, if (entering_index == -2) { return dual::status_t::TIME_LIMIT; } if (entering_index == -3) { return dual::status_t::CONCURRENT_LIMIT; } if (entering_index == -1) { - settings.log.printf("No entering variable found.\n"); - settings.log.printf("Scaled steepest edge %e\n", max_val); + settings.log.printf("No entering variable found. Iter %d\n", iter); + settings.log.printf("Scaled infeasibility %e\n", max_val); f_t primal_inf_check = 0.0; i_t num_infeasible = 0; + f_t max_primal_infeas = 0.0; primal_infeasibility = 0.0; for (i_t k = 0; k < m; ++k) { const i_t j = basic_list[k]; @@ -2376,29 +2469,28 @@ dual::status_t dual_phase2(i_t phase, const f_t upper_infeas = x[j] - lp.upper[j]; const f_t infeas = std::max(lower_infeas, upper_infeas); if (infeas > settings.primal_tol) { - //printf("%5d Basic infeasibility %5d %e. x %e lo %e up %e\n", num_infeasible, j, infeas, x[j], lp.lower[j], lp.upper[j]); primal_inf_check += infeas; num_infeasible++; primal_infeasibility += infeas * infeas; squared_infeasibilities[j] = infeas * infeas; + max_primal_infeas = std::max(max_primal_infeas, infeas); } } - for (i_t k = 0; k < m; ++k) + for (i_t j = 0; j < n; ++j) { - const i_t j = basic_list[k]; delta_y_steepest_edge[j] = 1.0; } restart_steepest_edge = false; - - settings.log.printf("Primal infeasibility %e Num infeasible %d\n", primal_inf_check, num_infeasible); + settings.log.printf("Max Primal infeasibility %e Sum Primal infeasibility %e Num infeasible %d\n", max_primal_infeas, primal_inf_check, num_infeasible); f_t perturbation = 0.0; for (i_t j = 0; j < n; ++j) { perturbation += std::abs(lp.objective[j] - objective[j]); } + settings.log.printf("Perturbation %e\n", perturbation); - if (perturbation > 1e-6 && phase == 2) { + if (perturbation > 0.0 && phase == 2) { // Try to remove perturbation std::vector unperturbed_y(m); std::vector unperturbed_z(n); @@ -2412,15 +2504,58 @@ dual::status_t dual_phase2(i_t phase, z = unperturbed_z; y = unperturbed_y; perturbation = 0.0; + + std::vector unperturbed_x(n); + phase2::compute_primal_solution_from_basis(lp, ft, basic_list, nonbasic_list, vstatus, unperturbed_x); + x = unperturbed_x; + primal_infeasibility = phase2::compute_initial_primal_infeasibilities( + lp, settings, basic_list, x, squared_infeasibilities, infeasibility_indices); + settings.log.printf("Updated primal infeasibility: %e\n", primal_infeasibility); + objective = lp.objective; - f_t obj_val = 0.0; + obj = 0.0; for (i_t j = 0; j < n; ++j) { - obj_val += objective[j] * x[j]; + obj += objective[j] * x[j]; } - constexpr bool use_farkas = false; + if (dual_infeas <= settings.dual_tol && primal_infeasibility <= settings.primal_tol) + { + phase2::prepare_optimality(lp, + settings, + ft, + objective, + basic_list, + nonbasic_list, + vstatus, + phase, + start_time, + max_val, + iter, + x, + y, + z, + sol); + status = dual::status_t::OPTIMAL; + break; + } + + + settings.log.printf("Continuing with perturbation removed and steepest edge norms reset\n"); + // Clear delta_z + phase2::clear_delta_z(entering_index, leaving_index, delta_z_mark, delta_z_indices, delta_z); + continue; + } else { + settings.log.printf("Failed to remove perturbation of %.2e.\n", perturbation); + } + } + } + + if (perturbation == 0.0 && phase == 2) + { + + constexpr bool use_farkas = true; if constexpr (use_farkas) { std::vector farkas_y; @@ -2429,6 +2564,13 @@ dual::status_t dual_phase2(i_t phase, f_t farkas_constant; std::vector my_delta_y; delta_y_sparse.to_dense(my_delta_y); + + + f_t obj_val = 0.0; + for (i_t j = 0; j < n; ++j) + { + obj_val += objective[j] * x[j]; + } phase2::compute_farkas_certificate(lp, settings, vstatus, @@ -2445,12 +2587,6 @@ dual::status_t dual_phase2(i_t phase, farkas_zu, farkas_constant); } - settings.log.printf("Continuing with perturbation removed and steepest edge norms reset\n"); - continue; - } else { - settings.log.printf("Failed to remove perturbation of %.2e.\n", perturbation); - } - } } if (max_val < 2e-8) { From 3f0ca9fbb047c826f7e1e9d5601e79199efd6b78 Mon Sep 17 00:00:00 2001 From: Christopher Maes Date: Wed, 23 Jul 2025 12:24:00 -0700 Subject: [PATCH 08/28] Clean up code --- cpp/src/dual_simplex/phase2.cpp | 1873 +++++++++++------------- cpp/src/dual_simplex/sparse_matrix.hpp | 19 + 2 files changed, 836 insertions(+), 1056 deletions(-) diff --git a/cpp/src/dual_simplex/phase2.cpp b/cpp/src/dual_simplex/phase2.cpp index e5236b78e9..071ba085be 100644 --- a/cpp/src/dual_simplex/phase2.cpp +++ b/cpp/src/dual_simplex/phase2.cpp @@ -247,9 +247,6 @@ void compute_farkas_certificate(const lp_problem_t& lp, } } - - - template void initial_perturbation(const lp_problem_t& lp, const simplex_solver_settings_t& settings, @@ -304,39 +301,43 @@ void initial_perturbation(const lp_problem_t& lp, } - template -bool use_transpose_for_delta_z(const lp_problem_t& lp, - const csc_matrix_t& A_transpose, - const sparse_vector_t& delta_y, - const std::vector& nonbasic_list) +void compute_reduced_cost_update(const lp_problem_t& lp, + const std::vector& basic_list, + const std::vector& nonbasic_list, + const std::vector& delta_y, + i_t leaving_index, + i_t direction, + std::vector& delta_z_mark, + std::vector& delta_z_indices, + std::vector& delta_z) { const i_t m = lp.num_rows; const i_t n = lp.num_cols; - const i_t nz_delta_y = delta_y.i.size(); - f_t transpose_ops = 0; - for (i_t k = 0; k < nz_delta_y; k++) - { - const i_t i = delta_y.i[k]; - const f_t delta_y_i = delta_y.x[k]; - if (std::abs(delta_y_i) < 1e-12) { - continue; - } - const i_t row_start = A_transpose.col_start[i]; - const i_t row_end = A_transpose.col_start[i + 1]; - transpose_ops += row_end - row_start; - } - f_t regular_ops = 0; - for (i_t k = 0; k < n - m; k++) { + // delta_zB = sigma*ei + for (i_t k = 0; k < m; k++) { + const i_t j = basic_list[k]; + delta_z[j] = 0; + } + delta_z[leaving_index] = direction; + // delta_zN = -N'*delta_y + for (i_t k = 0; k < n - m; k++) { const i_t j = nonbasic_list[k]; + // z_j <- -A(:, j)'*delta_y const i_t col_start = lp.A.col_start[j]; const i_t col_end = lp.A.col_start[j + 1]; - regular_ops += col_end - col_start; - } - - const bool use_transpose = transpose_ops < regular_ops; - return use_transpose; + f_t dot = 0.0; + for (i_t p = col_start; p < col_end; ++p) { + dot += lp.A.x[p] * delta_y[lp.A.i[p]]; + } + delta_z[j] = -dot; + if (dot != 0.0) + { + delta_z_indices.push_back(j); + delta_z_mark[j] = 1; + } + } } template @@ -377,47 +378,93 @@ void compute_delta_z(const csc_matrix_t& A_transpose, // delta_zB = sigma*ei delta_z[leaving_index] = direction; + +#ifdef CHECK_CHANGE_IN_REDUCED_COST + delta_y_sparse.to_dense(delta_y); + std::vector delta_z_check(n); + std::vector delta_z_mark_check(n, 0); + std::vector delta_z_indices_check; + phase2::compute_reduced_cost_update(lp, basic_list, nonbasic_list, delta_y, leaving_index, direction, delta_z_mark_check, delta_z_indices_check, delta_z_check); + f_t error_check = 0.0; + for (i_t k = 0; k < n; ++k) { + const f_t diff = std::abs(delta_z[k] - delta_z_check[k]); + if (diff > 1e-6) { + printf("delta_z error %d transpose %e no transpose %e diff %e\n", + k, + delta_z[k], + delta_z_check[k], + diff); + } + error_check = std::max(error_check, diff); + } + if (error_check > 1e-6) { printf("delta_z error %e\n", error_check); } +#endif } template -void compute_reduced_cost_update(const lp_problem_t& lp, - const std::vector& basic_list, - const std::vector& nonbasic_list, - const std::vector& delta_y, - i_t leaving_index, - i_t direction, - std::vector& delta_z_mark, - std::vector& delta_z_indices, - std::vector& delta_z) +void compute_reduced_costs(const std::vector& objective, + const csc_matrix_t& A, + const std::vector& y, + const std::vector& basic_list, + const std::vector& nonbasic_list, + std::vector& z) { - const i_t m = lp.num_rows; - const i_t n = lp.num_cols; - - // delta_zB = sigma*ei - for (i_t k = 0; k < m; k++) { - const i_t j = basic_list[k]; - delta_z[j] = 0; - } - delta_z[leaving_index] = direction; - // delta_zN = -N'*delta_y + const i_t m = A.m; + const i_t n = A.n; + // zN = cN - N'*y for (i_t k = 0; k < n - m; k++) { const i_t j = nonbasic_list[k]; - // z_j <- -A(:, j)'*delta_y - const i_t col_start = lp.A.col_start[j]; - const i_t col_end = lp.A.col_start[j + 1]; + // z_j <- c_j + z[j] = objective[j]; + + // z_j <- z_j - A(:, j)'*y + const i_t col_start = A.col_start[j]; + const i_t col_end = A.col_start[j + 1]; f_t dot = 0.0; for (i_t p = col_start; p < col_end; ++p) { - dot += lp.A.x[p] * delta_y[lp.A.i[p]]; - } - delta_z[j] = -dot; - if (dot != 0.0) - { - delta_z_indices.push_back(j); - delta_z_mark[j] = 1; + dot += A.x[p] * y[A.i[p]]; } + z[j] -= dot; + } + // zB = 0 + for (i_t k = 0; k < m; ++k) { + z[basic_list[k]] = 0.0; } } +template +void compute_primal_variables(const basis_update_mpf_t& ft, + const std::vector& lp_rhs, + const csc_matrix_t& A, + const std::vector& basic_list, + const std::vector& nonbasic_list, + f_t tight_tol, + std::vector& x) +{ + const i_t m = A.m; + const i_t n = A.n; + std::vector rhs = lp_rhs; + // rhs = b - sum_{j : x_j = l_j} A(:, j) * l(j) + // - sum_{j : x_j = u_j} A(:, j) * u(j) + for (i_t k = 0; k < n - m; ++k) { + const i_t j = nonbasic_list[k]; + const i_t col_start = A.col_start[j]; + const i_t col_end = A.col_start[j + 1]; + const f_t xj = x[j]; + if (std::abs(xj) < tight_tol * 10) continue; + for (i_t p = col_start; p < col_end; ++p) { + rhs[A.i[p]] -= xj * A.x[p]; + } + } + + std::vector xB(m); + ft.b_solve(rhs, xB); + + for (i_t k = 0; k < m; ++k) { + const i_t j = basic_list[k]; + x[j] = xB[k]; + } +} template void clear_delta_z(i_t entering_index, @@ -437,19 +484,45 @@ void clear_delta_z(i_t entering_index, delta_z_indices.clear(); } +template +void clear_delta_x(const std::vector& basic_list, + i_t entering_index, + sparse_vector_t& scaled_delta_xB_sparse, + std::vector& delta_x) +{ + const i_t scaled_delta_xB_nz = scaled_delta_xB_sparse.i.size(); + for (i_t k = 0; k < scaled_delta_xB_nz; ++k) { + const i_t j = basic_list[scaled_delta_xB_sparse.i[k]]; + delta_x[j] = 0.0; + } + // Leaving index already included above + delta_x[entering_index] = 0.0; + scaled_delta_xB_sparse.i.clear(); + scaled_delta_xB_sparse.x.clear(); +} + +template +void compute_dual_residual(const csc_matrix_t& A, + const std::vector& objective, + const std::vector& y, + const std::vector& z, + std::vector& dual_residual) +{ + dual_residual = z; + const i_t n = A.n; + // r = A'*y + z - c + for (i_t j = 0; j < n; ++j) { + dual_residual[j] -= objective[j]; + } + matrix_transpose_vector_multiply(A, 1.0, y, 1.0, dual_residual); +} template f_t l2_dual_residual(const lp_problem_t& lp, const lp_solution_t& solution) { - std::vector dual_residual = solution.z; - const i_t n = lp.num_cols; - // dual_residual <- z - c - for (i_t j = 0; j < n; j++) { - dual_residual[j] -= lp.objective[j]; - } - // dual_residual <- 1.0*A'*y + 1.0*(z - c) - matrix_transpose_vector_multiply(lp.A, 1.0, solution.y, 1.0, dual_residual); + std::vector dual_residual; + compute_dual_residual(lp.A, lp.objective, solution.y, solution.z, dual_residual); return vector_norm2(dual_residual); } @@ -461,7 +534,35 @@ f_t l2_primal_residual(const lp_problem_t& lp, const lp_solution_t(primal_residual); } +template +void vstatus_changes(const std::vector& vstatus, + const std::vector& vstatus_old, + const std::vector& z, + const std::vector& z_old, + i_t& num_vstatus_changes, + i_t& num_z_changes) +{ + num_vstatus_changes = 0; + num_z_changes = 0; + const i_t n = vstatus.size(); + for (i_t j = 0; j < n; ++j) { + if (vstatus[j] != vstatus_old[j]) { num_vstatus_changes++; } + if (std::abs(z[j] - z_old[j]) > 1e-6) { num_z_changes++; } + } +} +template +void compute_bounded_info(const std::vector& lower, + const std::vector& upper, + std::vector& bounded_variables) +{ + const size_t n = lower.size(); + for (size_t j = 0; j < n; j++) { + const bool bounded = + (lower[j] > -inf) && (upper[j] < inf) && (lower[j] != upper[j]); + bounded_variables[j] = bounded; + } +} template @@ -611,12 +712,11 @@ void update_single_primal_infeasibility(const std::vector& lower, primal_inf = std::max(0.0, primal_inf + (new_val - old_val)); // We are infeasible w.r.t the tolerance if (old_val == 0.0) { - //printf("New infeasibility %d %e\n", j, infeas); // This is a new infeasibility // We need to add it to the list infeasibility_indices.push_back(j); } else { - //printf("Already infeasible %d %e\n", j, infeas); + // Already infeasible } squared_infeasibilities[j] = new_val; } else { @@ -624,10 +724,9 @@ void update_single_primal_infeasibility(const std::vector& lower, if (old_val != 0.0) { // We were previously infeasible, primal_inf = std::max(0.0, primal_inf - old_val); - //printf("Now feasible %d %e\n", j, infeas); squared_infeasibilities[j] = now_feasible; } else { - //printf("Still feasible %d %e\n", j, infeas); + // Still feasible } } } @@ -668,16 +767,6 @@ void update_primal_infeasibilities(const lp_problem_t& lp, j, primal_inf); } - - // Update the entering variable - update_single_primal_infeasibility(lp.lower, - lp.upper, - x, - primal_tol, - squared_infeasibilities, - infeasibility_indices, - entering_index, - primal_inf); } template @@ -733,13 +822,6 @@ i_t steepest_edge_pricing_with_infeasibilities(const lp_problem_t& lp, for (i_t k = 0; k < nz; ++k) { const i_t j = infeasibility_indices[k]; const f_t squared_infeas = squared_infeasibilities[j]; -#if 0 - if (squared_infeas == now_feasible) - { - needs_clean_up = true; - continue; - } -#endif const f_t val = squared_infeas / dy_steepest_edge[j]; if (val > max_val || val == max_val && j > leaving_index) { max_val = val; @@ -749,21 +831,6 @@ i_t steepest_edge_pricing_with_infeasibilities(const lp_problem_t& lp, direction = lower_infeas >= upper_infeas ? 1 : -1; } } -#if 0 - if (needs_clean_up) { - for (i_t k = 0; k < infeasibility_indices.size(); ++k) { - const i_t j = infeasibility_indices[k]; - const f_t squared_infeas = squared_infeasibilities[j]; - if (squared_infeas == now_feasible) { - // Set to the last element - const i_t sz = infeasibility_indices.size(); - infeasibility_indices[k] = infeasibility_indices[sz - 1]; - infeasibility_indices.pop_back(); - squared_infeasibilities[j] = 0.0; - } - } - } -#endif basic_leaving = leaving_index >= 0 ? basic_mark[leaving_index] : -1; return leaving_index; @@ -1014,120 +1081,6 @@ i_t phase2_ratio_test(const lp_problem_t& lp, return entering_index; } -template -i_t bound_flipping_ratio_test(const lp_problem_t& lp, - const simplex_solver_settings_t& settings, - f_t start_time, - const std::vector& vstatus, - const std::vector& nonbasic_list, - const std::vector& x, - std::vector& z, - std::vector& delta_z, - i_t direction, - i_t leaving_index, - f_t& step_length, - i_t& nonbasic_entering) -{ - const i_t n = lp.num_cols; - const i_t m = lp.num_rows; - - f_t slope = direction == 1 ? (lp.lower[leaving_index] - x[leaving_index]) - : (x[leaving_index] - lp.upper[leaving_index]); - assert(slope > 0); - - const f_t pivot_tol = settings.pivot_tol; - const f_t relaxed_pivot_tol = settings.pivot_tol; - const f_t zero_tol = settings.zero_tol; - std::list q_pos; - assert(nonbasic_list.size() == n - m); - for (i_t k = 0; k < n - m; ++k) { - const i_t j = nonbasic_list[k]; - if (vstatus[j] == variable_status_t::NONBASIC_FIXED) { continue; } - if (vstatus[j] == variable_status_t::NONBASIC_LOWER && delta_z[j] < -pivot_tol) { - q_pos.push_back(k); - } else if (vstatus[j] == variable_status_t::NONBASIC_UPPER && delta_z[j] > pivot_tol) { - q_pos.push_back(k); - } - } - i_t entering_index = -1; - step_length = inf; - const f_t dual_tol = settings.dual_tol / 10; - while (q_pos.size() > 0 && slope > 0) { - // Find the minimum ratio for nonbasic variables in q_pos - f_t min_val = inf; - typename std::list::iterator q_index; - i_t candidate = -1; - for (typename std::list::iterator it = q_pos.begin(); it != q_pos.end(); ++it) { - const i_t k = *it; - const i_t j = nonbasic_list[k]; - f_t ratio = inf; - if (vstatus[j] == variable_status_t::NONBASIC_LOWER && delta_z[j] < -pivot_tol) { - ratio = (-dual_tol - z[j]) / delta_z[j]; - } else if (vstatus[j] == variable_status_t::NONBASIC_UPPER && delta_z[j] > pivot_tol) { - ratio = (dual_tol - z[j]) / delta_z[j]; - } else if (min_val != inf) { - // We've already found something just continue; - } else if (vstatus[j] == variable_status_t::NONBASIC_LOWER) { - ratio = (-dual_tol - z[j]) / delta_z[j]; - } else if (vstatus[j] == variable_status_t::NONBASIC_UPPER) { - ratio = (dual_tol - z[j]) / delta_z[j]; - } else { - assert(1 == 0); - } - - ratio = std::max(ratio, 0.0); - - if (ratio < min_val) { - min_val = ratio; - q_index = it; // Save the iterator so we can remove the element it - // points to from the q_pos list later (if it corresponds - // to a bounded variable) - candidate = j; - } else if (ratio < min_val + zero_tol && - std::abs(delta_z[j]) > std::abs(delta_z[candidate])) { - min_val = ratio; - q_index = it; - candidate = j; - } - } - step_length = min_val; // Save the step length - nonbasic_entering = *q_index; - const i_t j = entering_index = nonbasic_list[nonbasic_entering]; - if (lp.lower[j] > -inf && lp.upper[j] < inf && lp.lower[j] != lp.upper[j]) { - const f_t interval = lp.upper[j] - lp.lower[j]; - const f_t delta_slope = std::abs(delta_z[j]) * interval; -#ifdef BOUND_FLIP_DEBUG - if (slope - delta_slope > 0) { - settings.log.printf( - "Bound flip %d slope change %e prev slope %e slope %e. curr step " - "length %e\n", - j, - delta_slope, - slope, - slope - delta_slope, - step_length); - } -#endif - slope -= delta_slope; - q_pos.erase(q_index); - } else { - // we hit a variable that is not bounded. Exit - break; - } - - if (toc(start_time) > settings.time_limit) { return -2; } - if (settings.concurrent_halt != nullptr && - settings.concurrent_halt->load(std::memory_order_acquire) == 1) { - return -3; - } - } - // step_length, nonbasic_entering, and entering_index are defined after the - // while loop - assert(step_length >= 0); - - return entering_index; -} - template i_t flip_bounds(const lp_problem_t& lp, const simplex_solver_settings_t& settings, @@ -1143,7 +1096,6 @@ i_t flip_bounds(const lp_problem_t& lp, std::vector& atilde, std::vector& atilde_index) { - //f_t delta_obj = 0; i_t num_flipped = 0; for (i_t j : delta_z_indices) { if (j == entering_index) { continue; } @@ -1155,7 +1107,6 @@ i_t flip_bounds(const lp_problem_t& lp, if (vstatus[j] == variable_status_t::NONBASIC_LOWER && z[j] < -dual_tol) { const f_t delta = lp.upper[j] - lp.lower[j]; scatter_dense(lp.A, j, -delta, atilde, mark, atilde_index); - //delta_obj += delta * objective[j]; delta_x[j] += delta; vstatus[j] = variable_status_t::NONBASIC_UPPER; #ifdef BOUND_FLIP_DEBUG @@ -1166,7 +1117,6 @@ i_t flip_bounds(const lp_problem_t& lp, } else if (vstatus[j] == variable_status_t::NONBASIC_UPPER && z[j] > dual_tol) { const f_t delta = lp.lower[j] - lp.upper[j]; scatter_dense(lp.A, j, -delta, atilde, mark, atilde_index); - //delta_obj += delta * objective[j]; delta_x[j] += delta; vstatus[j] = variable_status_t::NONBASIC_LOWER; #ifdef BOUND_FLIP_DEBUG @@ -1179,6 +1129,24 @@ i_t flip_bounds(const lp_problem_t& lp, return num_flipped; } +template +void initialize_steepest_edge_norms_from_slack_basis(const std::vector& basic_list, + const std::vector& nonbasic_list, + std::vector& delta_y_steepest_edge) +{ + const i_t m = basic_list.size(); + const i_t n = delta_y_steepest_edge.size(); + for (i_t k = 0; k < m; ++k) { + const i_t j = basic_list[k]; + delta_y_steepest_edge[j] = 1.0; + } + const i_t n_minus_m = n - m; + for (i_t k = 0; k < n_minus_m; ++k) { + const i_t j = nonbasic_list[k]; + delta_y_steepest_edge[j] = 1e-4; + } +} + template i_t initialize_steepest_edge_norms(const lp_problem_t& lp, const simplex_solver_settings_t& settings, @@ -1187,9 +1155,6 @@ i_t initialize_steepest_edge_norms(const lp_problem_t& lp, basis_update_mpf_t& ft, std::vector& delta_y_steepest_edge) { - // TODO: Skip this initialization when starting from a slack basis - // Or skip individual columns corresponding to slack variables - const i_t m = basic_list.size(); // We want to compute B^T delta_y_i = -e_i @@ -1200,7 +1165,7 @@ i_t initialize_steepest_edge_norms(const lp_problem_t& lp, std::vector row_degree(m, 0); std::vector mapping(m, -1); std::vector coeff(m, 0.0); -#if 1 + for (i_t k = 0; k < m; ++k) { const i_t j = basic_list[k]; const i_t col_start = lp.A.col_start[j]; @@ -1214,46 +1179,40 @@ i_t initialize_steepest_edge_norms(const lp_problem_t& lp, } } +#ifdef CHECK_SINGLETON_ROWS csc_matrix_t B(m, m, 0); form_b(lp.A, basic_list, B); csc_matrix_t B_transpose(m, m, 0); B.transpose(B_transpose); +#endif i_t num_singleton_rows = 0; for (i_t i = 0; i < m; ++i) { if (row_degree[i] == 1) { num_singleton_rows++; +#ifdef CHECK_SINGLETON_ROWS const i_t col_start = B_transpose.col_start[i]; const i_t col_end = B_transpose.col_start[i + 1]; if (col_end - col_start != 1) { settings.log.printf("Singleton row %d has %d non-zero entries\n", i, col_end - col_start); } +#endif } } - - - settings.log.printf("Found %d singleton rows in %.2fs\n", num_singleton_rows, toc(start_singleton_rows)); - - //ft.compute_transposes(); -#endif + if (num_singleton_rows > 0) { + settings.log.printf("Found %d singleton rows for steepest edge norms in %.2fs\n", num_singleton_rows, toc(start_singleton_rows)); + } f_t last_log = tic(); for (i_t k = 0; k < m; ++k) { -#if 0 - std::vector ei(m); - ei[k] = -1.0; - std::vector dy(m, 0.0); -#else sparse_vector_t sparse_ei(m, 1); sparse_ei.x[0] = -1.0; sparse_ei.i[0] = k; -#endif const i_t j = basic_list[k]; f_t init = -1.0; if (row_degree[mapping[k]] == 1) { const i_t u = mapping[k]; - //settings.log.printf("Singleton row %d u %d\n", k, u); const f_t alpha = coeff[k]; //dy[u] = -1.0 / alpha; f_t my_init = 1.0 / (alpha * alpha); @@ -1335,37 +1294,18 @@ i_t update_steepest_edge_norms(const simplex_solver_settings_t& settin std::vector& delta_y_steepest_edge) { i_t m = basic_list.size(); - - //sparse_vector_t delta_y_sparse(delta_y); const i_t delta_y_nz = delta_y_sparse.i.size(); sparse_vector_t v_sparse(m, 0); - - if (0) - { - // B^T delta_y = - direction * e_basic_leaving_index - // We want B v = - B^{-T} e_basic_leaving_index - std::vector delta_y; - delta_y_sparse.to_dense(delta_y); - ft.b_solve(delta_y, v); - // if direction = -1 we need to scale v - if (direction == -1) { - for (i_t k = 0; k < m; ++k) { - v[k] *= -1; - } - } - } - else - { - ft.b_solve(delta_y_sparse, v_sparse); - if (direction == -1) { - for (i_t k = 0; k < v_sparse.i.size(); ++k) { - v_sparse.x[k] *= -1; - } + // B^T delta_y = - direction * e_basic_leaving_index + // We want B v = - B^{-T} e_basic_leaving_index + ft.b_solve(delta_y_sparse, v_sparse); + if (direction == -1) { + for (i_t k = 0; k < v_sparse.i.size(); ++k) { + v_sparse.x[k] *= -1; } - v_sparse.scatter(v); } + v_sparse.scatter(v); - //const f_t dy_norm_squared = delta_y_sparse.norm2_squared(); const i_t leaving_index = basic_list[basic_leaving_index]; const f_t prev_dy_norm_squared = delta_y_steepest_edge[leaving_index]; #ifdef STEEPEST_EDGE_DEBUG @@ -1382,19 +1322,10 @@ i_t update_steepest_edge_norms(const simplex_solver_settings_t& settin // B*w = A(:, leaving_index) // B*scaled_delta_xB = -A(:, leaving_index) so w = -scaled_delta_xB - f_t scale; - const i_t scaled_delta_xB_nz = scaled_delta_xB.i.size(); - for (i_t h = 0; h < scaled_delta_xB_nz; ++h) { - const i_t k = scaled_delta_xB.i[h]; - if (k == basic_leaving_index) { - scale = scaled_delta_xB.x[h]; - break; - } - } - const f_t wr = -scale; - //const f_t wr = -scaled_delta_xB.x[basic_leaving_index]; + const f_t wr = -scaled_delta_xB.find_coefficient(basic_leaving_index); if (wr == 0) { return -1; } const f_t omegar = dy_norm_squared / (wr * wr); + const i_t scaled_delta_xB_nz = scaled_delta_xB.i.size(); for (i_t h = 0; h < scaled_delta_xB_nz; ++h) { const i_t k = scaled_delta_xB.i[h]; const i_t j = basic_list[k]; @@ -1438,23 +1369,14 @@ i_t compute_steepest_edge_norm_entering(const simplex_solver_settings_t& ft, i_t basic_leaving_index, i_t entering_index, - f_t b_transpose_density, std::vector& steepest_edge_norms) { - if (0) { - std::vector es(m); - es[basic_leaving_index] = -1.0; - std::vector delta_ys(m); - ft.b_transpose_solve(es, delta_ys); - steepest_edge_norms[entering_index] = vector_norm2_squared(delta_ys); - } else { - sparse_vector_t es_sparse(m, 1); - es_sparse.i[0] = basic_leaving_index; - es_sparse.x[0] = -1.0; - sparse_vector_t delta_ys_sparse(m, 0); - ft.b_transpose_solve(es_sparse, delta_ys_sparse); - steepest_edge_norms[entering_index] = delta_ys_sparse.norm2_squared(); - } + sparse_vector_t es_sparse(m, 1); + es_sparse.i[0] = basic_leaving_index; + es_sparse.x[0] = -1.0; + sparse_vector_t delta_ys_sparse(m, 0); + ft.b_transpose_solve(es_sparse, delta_ys_sparse); + steepest_edge_norms[entering_index] = delta_ys_sparse.norm2_squared(); #ifdef STEEPEST_EDGE_DEBUG settings.log.printf("Steepest edge norm %e for entering j %d at i %d\n", @@ -1502,7 +1424,6 @@ i_t compute_perturbation(const lp_problem_t& lp, const f_t tight_tol = settings.tight_tol; i_t num_perturb = 0; sum_perturb = 0.0; - //for (i_t j = 0; j < n; ++j) { for (i_t k = 0; k < delta_z_indices.size(); ++k) { const i_t j = delta_z_indices[k]; if (lp.upper[j] == inf && lp.lower[j] > -inf && z[j] < -tight_tol) { @@ -1539,37 +1460,265 @@ i_t compute_perturbation(const lp_problem_t& lp, return 0; } -template -f_t dual_infeasibility(const lp_problem_t& lp, - const simplex_solver_settings_t& settings, - const std::vector& vstatus, - const std::vector& z, - f_t tight_tol, - f_t dual_tol) +template +void reset_basis_mark(const std::vector& basic_list, + const std::vector& nonbasic_list, + std::vector& basic_mark, + std::vector& nonbasic_mark) { - const i_t n = lp.num_cols; - const i_t m = lp.num_rows; - i_t num_infeasible = 0; - f_t sum_infeasible = 0.0; - i_t lower_bound_inf = 0; - i_t upper_bound_inf = 0; - i_t free_inf = 0; - i_t non_basic_lower_inf = 0; - i_t non_basic_upper_inf = 0; + const i_t m = basic_list.size(); + const i_t n = nonbasic_mark.size(); + const i_t n_minus_m = n - m; - for (i_t j = 0; j < n; ++j) { - if (vstatus[j] == variable_status_t::NONBASIC_FIXED) { continue; } - if (lp.upper[j] == inf && lp.lower[j] > -inf && z[j] < -tight_tol) { - // -inf < l_j <= x_j < inf, so need z_j > 0 to be feasible - num_infeasible++; - sum_infeasible += std::abs(z[j]); - lower_bound_inf++; - settings.log.debug("lower_bound_inf %d lower %e upper %e z %e vstatus %d\n", - j, - lp.lower[j], - lp.upper[j], - z[j], - static_cast(vstatus[j])); + for (i_t k = 0; k < n; k++) { + basic_mark[k] = -1; + } + + for (i_t k = 0; k < n; k++) { + nonbasic_mark[k] = -1; + } + + for (i_t k = 0; k < n_minus_m; k++) { + nonbasic_mark[nonbasic_list[k]] = k; + } + + for (i_t k = 0; k < m; k++) { + basic_mark[basic_list[k]] = k; + } +} + +template +void compute_delta_y(const basis_update_mpf_t& ft, + i_t basic_leaving_index, + i_t direction, + sparse_vector_t& delta_y_sparse, + sparse_vector_t& UTsol_sparse) +{ + const i_t m = delta_y_sparse.n; + // BT*delta_y = -delta_zB = -sigma*ei + sparse_vector_t ei_sparse(m, 1); + ei_sparse.i[0] = basic_leaving_index; + ei_sparse.x[0] = -direction; + ft.b_transpose_solve(ei_sparse, delta_y_sparse, UTsol_sparse); + + if (direction != -1) { + // We solved BT*delta_y = -sigma*ei, but for the update we need + // UT*etilde = ei. So we need to flip the sign of the solution + // in the case that sigma == 1. + UTsol_sparse.negate(); + } + +#ifdef CHECK_B_TRANSPOSE_SOLVE + std::vector delta_y_sparse_vector_check(m); + delta_y_sparse.to_dense(delta_y_sparse_vector_check); + f_t error_check = 0.0; + for (i_t k = 0; k < m; ++k) + { + if (std::abs(delta_y[k] - delta_y_sparse_vector_check[k]) > 1e-6) + { + settings.log.printf("\tBTranspose error %d %e %e\n", k, delta_y[k], delta_y_sparse_vector_check[k]); + } + error_check += std::abs(delta_y[k] - delta_y_sparse_vector_check[k]); + } + if (error_check > 1e-6) { + settings.log.printf("BTranspose error %e\n", error_check); + } + std::vector residual(m); + b_transpose_multiply(lp, basic_list, delta_y_sparse_vector_check, residual); + for (i_t k = 0; k < m; ++k) + { + if (std::abs(residual[k] - ei[k]) > 1e-6) + { + settings.log.printf("\tBTranspose multiply error %d %e %e\n", k, residual[k], ei[k]); + } + } +#endif +} + +template +void update_dual_variables(const sparse_vector_t& delta_y_sparse, + const std::vector& delta_z_indices, + const std::vector& delta_z, + f_t step_length, + i_t leaving_index, + std::vector& y, + std::vector& z) +{ + // Update dual variables + // y <- y + steplength * delta_y + const i_t delta_y_nz = delta_y_sparse.i.size(); + for (i_t k = 0; k < delta_y_nz; ++k) { + const i_t i = delta_y_sparse.i[k]; + y[i] += step_length * delta_y_sparse.x[k]; + } + // z <- z + steplength * delta_z + const i_t delta_z_nz = delta_z_indices.size(); + for (i_t k = 0; k < delta_z_nz; ++k) { + const i_t j = delta_z_indices[k]; + z[j] += step_length * delta_z[j]; + } + z[leaving_index] += step_length * delta_z[leaving_index]; +} + +template +void adjust_for_flips(const basis_update_mpf_t& ft, + const std::vector& basic_list, + const std::vector& delta_z_indices, + std::vector& atilde_index, + std::vector& atilde, + std::vector& atilde_mark, + sparse_vector_t& delta_xB_0_sparse, + std::vector& delta_x_flip, + std::vector& x) +{ + const i_t m = basic_list.size(); + const i_t atilde_nz = atilde_index.size(); + // B*delta_xB_0 = atilde + sparse_vector_t atilde_sparse(m, atilde_nz); + for (i_t k = 0; k < atilde_nz; ++k) { + atilde_sparse.i[k] = atilde_index[k]; + atilde_sparse.x[k] = atilde[atilde_index[k]]; + } + ft.b_solve(atilde_sparse, delta_xB_0_sparse); + const i_t delta_xB_0_nz = delta_xB_0_sparse.i.size(); + for (i_t k = 0; k < delta_xB_0_nz; ++k) { + const i_t j = basic_list[delta_xB_0_sparse.i[k]]; + x[j] += delta_xB_0_sparse.x[k]; + } + + for (i_t j : delta_z_indices) { + x[j] += delta_x_flip[j]; + delta_x_flip[j] = 0.0; + } + + // Clear atilde + for (i_t k = 0; k < atilde_index.size(); ++k) { + atilde[atilde_index[k]] = 0.0; + } + // Clear atilde_mark + for (i_t k = 0; k < atilde_mark.size(); ++k) { + atilde_mark[k] = 0; + } + atilde_index.clear(); +} + +template +void compute_delta_x(const lp_problem_t& lp, + const basis_update_mpf_t& ft, + i_t entering_index, + i_t leaving_index, + i_t basic_leaving_index, + i_t direction, + const std::vector& basic_list, + const std::vector& delta_x_flip, + const sparse_vector_t& rhs_sparse, + const std::vector& x, + sparse_vector_t& utilde_sparse, + sparse_vector_t& scaled_delta_xB_sparse, + std::vector& delta_x) +{ + f_t delta_x_leaving = direction == 1 ? lp.lower[leaving_index] - x[leaving_index] + : lp.upper[leaving_index] - x[leaving_index]; + // B*w = -A(:, entering) + ft.b_solve(rhs_sparse, scaled_delta_xB_sparse, utilde_sparse); + scaled_delta_xB_sparse.negate(); + + +#ifdef CHECK_B_SOLVE + std::vector scaled_delta_xB(m); + { + std::vector residual_B(m); + b_multiply(lp, basic_list, scaled_delta_xB, residual_B); + f_t err_max = 0; + for (i_t k = 0; k < m; ++k) { + const f_t err = std::abs(rhs[k] + residual_B[k]); + if (err >= 1e-6) { + settings.log.printf( + "Bsolve diff %d %e rhs %e residual %e\n", k, err, rhs[k], residual_B[k]); + } + err_max = std::max(err_max, err); + } + if (err_max > 1e-6) { settings.log.printf("B multiply error %e\n", err_max); } + } +#endif + + f_t scale = scaled_delta_xB_sparse.find_coefficient(basic_leaving_index); + assert(!std::isnan(scale)); + const f_t primal_step_length = delta_x_leaving / scale; + const i_t scaled_delta_xB_nz = scaled_delta_xB_sparse.i.size(); + for (i_t k = 0; k < scaled_delta_xB_nz; ++k) { + const i_t j = basic_list[scaled_delta_xB_sparse.i[k]]; + delta_x[j] = primal_step_length * scaled_delta_xB_sparse.x[k]; + } + delta_x[leaving_index] = delta_x_leaving; + delta_x[entering_index] = primal_step_length; +} + +template +void update_primal_variables(const sparse_vector_t& scaled_delta_xB_sparse, + const std::vector& basic_list, + const std::vector& delta_x, + i_t entering_index, + std::vector& x) +{ + // x <- x + delta_x + const i_t scaled_delta_xB_nz = scaled_delta_xB_sparse.i.size(); + for (i_t k = 0; k < scaled_delta_xB_nz; ++k) { + const i_t j = basic_list[scaled_delta_xB_sparse.i[k]]; + x[j] += delta_x[j]; + } + // Leaving index already included above + x[entering_index] += delta_x[entering_index]; +} + +template +void update_objective(const std::vector& basic_list, + const std::vector& changed_basic_indices, + const std::vector& objective, + const std::vector& delta_x, + i_t entering_index, + f_t& obj) +{ + const i_t changed_basic_nz = changed_basic_indices.size(); + for (i_t k = 0; k < changed_basic_nz; ++k) { + const i_t j = basic_list[changed_basic_indices[k]]; + obj += delta_x[j] * objective[j]; + } + // Leaving index already included above + obj += delta_x[entering_index] * objective[entering_index]; +} + +template +f_t dual_infeasibility(const lp_problem_t& lp, + const simplex_solver_settings_t& settings, + const std::vector& vstatus, + const std::vector& z, + f_t tight_tol, + f_t dual_tol) +{ + const i_t n = lp.num_cols; + const i_t m = lp.num_rows; + i_t num_infeasible = 0; + f_t sum_infeasible = 0.0; + i_t lower_bound_inf = 0; + i_t upper_bound_inf = 0; + i_t free_inf = 0; + i_t non_basic_lower_inf = 0; + i_t non_basic_upper_inf = 0; + + for (i_t j = 0; j < n; ++j) { + if (vstatus[j] == variable_status_t::NONBASIC_FIXED) { continue; } + if (lp.upper[j] == inf && lp.lower[j] > -inf && z[j] < -tight_tol) { + // -inf < l_j <= x_j < inf, so need z_j > 0 to be feasible + num_infeasible++; + sum_infeasible += std::abs(z[j]); + lower_bound_inf++; + settings.log.debug("lower_bound_inf %d lower %e upper %e z %e vstatus %d\n", + j, + lp.lower[j], + lp.upper[j], + z[j], + static_cast(vstatus[j])); } else if (lp.lower[j] == -inf && lp.upper[j] < inf && z[j] > tight_tol) { // -inf < x_j <= u_j < inf, so need z_j < 0 to be feasible num_infeasible++; @@ -1664,6 +1813,91 @@ f_t primal_infeasibility(const lp_problem_t& lp, return primal_inf; } +template +void check_primal_infeasibilities(const lp_problem_t& lp, + const simplex_solver_settings_t& settings, + const std::vector& basic_list, + const std::vector& x, + const std::vector& squared_infeasibilities, + const std::vector& infeasibility_indices) +{ + const i_t m = basic_list.size(); + for (i_t k = 0; k < m; ++k) { + const i_t j = basic_list[k]; + const f_t lower_infeas = lp.lower[j] - x[j]; + const f_t upper_infeas = x[j] - lp.upper[j]; + const f_t infeas = std::max(lower_infeas, upper_infeas); + if (infeas > settings.primal_tol) { + const f_t square_infeas = infeas * infeas; + if (square_infeas != squared_infeasibilities[j]) { + settings.log.printf("Primal infeasibility mismatch %d %e != %e\n", + j, + square_infeas, + squared_infeasibilities[j]); + } + bool found = false; + for (i_t h = 0; h < infeasibility_indices.size(); ++h) { + if (infeasibility_indices[h] == j) { + found = true; + break; + } + } + if (!found) { settings.log.printf("Infeasibility index not found %d\n", j); } + } + } +} + +template +void check_update(const lp_problem_t& lp, + const simplex_solver_settings_t& settings, + const basis_update_t& ft, + const std::vector& basic_list, + const std::vector& basic_leaving_index) +{ + const i_t m = basic_list.size(); + csc_matrix_t Btest(m, m, 1); + ft.multiply_lu(Btest); + { + csc_matrix_t B(m, m, 1); + form_b(lp.A, basic_list, B); + csc_matrix_t Diff(m, m, 1); + add(Btest, B, 1.0, -1.0, Diff); + const f_t err = Diff.norm1(); + if (err > settings.primal_tol) { settings.log.printf("|| B - L*U || %e\n", Diff.norm1()); } + if (err > settings.primal_tol) { + for (i_t j = 0; j < m; ++j) { + for (i_t p = Diff.col_start[j]; p < Diff.col_start[j + 1]; ++p) { + const i_t i = Diff.i[p]; + if (Diff.x[p] != 0.0) { settings.log.printf("Diff %d %d %e\n", j, i, Diff.x[p]); } + } + } + } + settings.log.printf("basic leaving index %d\n", basic_leaving_index); + assert(err < settings.primal_tol); + } +} + +template +void check_basis_mark(const simplex_solver_settings_t& settings, + const std::vector& basic_list, + const std::vector& nonbasic_list, + const std::vector& basic_mark, + const std::vector& nonbasic_mark) +{ + const i_t m = basic_list.size(); + const i_t n = basic_mark.size(); + for (i_t k = 0; k < m; k++) { + if (basic_mark[basic_list[k]] != k) { + settings.log.printf("Basic mark %d %d\n", basic_list[k], k); + } + } + for (i_t k = 0; k < n - m; k++) { + if (nonbasic_mark[nonbasic_list[k]] != k) { + settings.log.printf("Nonbasic mark %d %d\n", nonbasic_list[k], k); + } + } +} + template void bound_info(const lp_problem_t& lp, const simplex_solver_settings_t& settings) @@ -1784,6 +2018,30 @@ void set_primal_variables_on_bounds(const lp_problem_t& lp, } } +template +f_t compute_perturbed_objective(const std::vector& objective, + const std::vector& x) +{ + const size_t n = objective.size(); + f_t obj_val = 0.0; + for (size_t j = 0; j < n; ++j) { + obj_val += objective[j] * x[j]; + } + return obj_val; +} + +template +f_t amount_of_perturbation(const lp_problem_t& lp, + const std::vector& objective) +{ + f_t perturbation = 0.0; + const i_t n = lp.num_cols; + for (i_t j = 0; j < n; ++j) { + perturbation += std::abs(lp.objective[j] - objective[j]); + } + return perturbation; +} + template void prepare_optimality(const lp_problem_t& lp, const simplex_solver_settings_t& settings, @@ -1806,11 +2064,7 @@ void prepare_optimality(const lp_problem_t& lp, sol.objective = compute_objective(lp, sol.x); sol.user_objective = compute_user_objective(lp, sol.objective); - f_t perturbation = 0.0; - for (i_t j = 0; j < n; ++j) { - perturbation += std::abs(lp.objective[j] - objective[j]); - } - + f_t perturbation = phase2::amount_of_perturbation(lp, objective); if (perturbation > 1e-6 && phase == 2) { // Try to remove perturbation std::vector unperturbed_y(m); @@ -1860,6 +2114,83 @@ void prepare_optimality(const lp_problem_t& lp, } } +template +class phase2_timers_t { + public: + phase2_timers_t(bool should_time) + : record_time(should_time), + bfrt_time(0), + pricing_time(0), + btran_time(0), + ftran_time(0), + flip_time(0), + delta_z_time(0), + se_norms_time(0), + se_entering_time(0), + lu_update_time(0), + perturb_time(0), + vector_time(0), + objective_time(0), + update_infeasibility_time(0) + { + } + + void start_timer() + { + if (!record_time) { return; } + start_time = tic(); + } + + f_t stop_timer() + { + if (!record_time) { return 0.0; } + return toc(start_time); + } + + + void print_timers(const simplex_solver_settings_t& settings) const + { + if (!record_time) { return; } + const f_t total_time = bfrt_time + pricing_time + btran_time + ftran_time + flip_time + + delta_z_time + lu_update_time + se_norms_time + se_entering_time + + perturb_time + vector_time + objective_time + update_infeasibility_time; + // clang-format off + settings.log.printf("BFRT time %.2fs %4.1f%\n", bfrt_time, 100.0 * bfrt_time / total_time); + settings.log.printf("Pricing time %.2fs %4.1f%\n", pricing_time, 100.0 * pricing_time / total_time); + settings.log.printf("BTran time %.2fs %4.1f%\n", btran_time, 100.0 * btran_time / total_time); + settings.log.printf("FTran time %.2fs %4.1f%\n", ftran_time, 100.0 * ftran_time / total_time); + settings.log.printf("Flip time %.2fs %4.1f%\n", flip_time, 100.0 * flip_time / total_time); + settings.log.printf("Delta_z time %.2fs %4.1f%\n", delta_z_time, 100.0 * delta_z_time / total_time); + settings.log.printf("LU update time %.2fs %4.1f%\n", lu_update_time, 100.0 * lu_update_time / total_time); + settings.log.printf("SE norms time %.2fs %4.1f%\n", se_norms_time, 100.0 * se_norms_time / total_time); + settings.log.printf("SE enter time %.2fs %4.1f%\n", se_entering_time, 100.0 * se_entering_time / total_time); + settings.log.printf("Perturb time %.2fs %4.1f%\n", perturb_time, 100.0 * perturb_time / total_time); + settings.log.printf("Vector time %.2fs %4.1f%\n", vector_time, 100.0 * vector_time / total_time); + settings.log.printf("Objective time %.2fs %4.1f%\n", objective_time, 100.0 * objective_time / total_time); + settings.log.printf("Inf update time %.2fs %4.1f%\n", update_infeasibility_time, 100.0 * update_infeasibility_time / total_time); + settings.log.printf("Sum %.2fs\n", total_time); + // clang-format on + } + f_t bfrt_time; + f_t pricing_time; + f_t btran_time; + f_t ftran_time; + f_t flip_time; + f_t delta_z_time; + f_t se_norms_time; + f_t se_entering_time; + f_t lu_update_time; + f_t perturb_time; + f_t vector_time; + f_t objective_time; + f_t update_infeasibility_time; + + + private: + f_t start_time; + bool record_time; +}; + } // namespace phase2 template @@ -1939,41 +2270,19 @@ dual::status_t dual_phase2(i_t phase, ft.b_transpose_solve(c_basic, y); if (toc(start_time) > settings.time_limit) { return dual::status_t::TIME_LIMIT; } constexpr bool print_norms = false; - if (print_norms) { + if constexpr (print_norms) { settings.log.printf( "|| y || %e || cB || %e\n", vector_norm_inf(y), vector_norm_inf(c_basic)); } - // zN = cN - N'*y - for (i_t k = 0; k < n - m; k++) { - const i_t j = nonbasic_list[k]; - // z_j <- c_j - z[j] = objective[j]; - - // z_j <- z_j - A(:, j)'*y - const i_t col_start = lp.A.col_start[j]; - const i_t col_end = lp.A.col_start[j + 1]; - f_t dot = 0.0; - for (i_t p = col_start; p < col_end; ++p) { - dot += lp.A.x[p] * y[lp.A.i[p]]; - } - z[j] -= dot; - } - // zB = 0 - for (i_t k = 0; k < m; ++k) { - z[basic_list[k]] = 0.0; - } - if (print_norms) { settings.log.printf("|| z || %e\n", vector_norm_inf(z)); } + phase2::compute_reduced_costs(objective, lp.A, y, basic_list, nonbasic_list, z); + if constexpr (print_norms) { settings.log.printf("|| z || %e\n", vector_norm_inf(z)); } #ifdef COMPUTE_DUAL_RESIDUAL - // || A'*y + z - c||_inf - std::vector dual_res1 = z; - for (i_t j = 0; j < n; ++j) { - dual_res1[j] -= objective[j]; - } - matrix_transpose_vector_multiply(lp.A, 1.0, y, 1.0, dual_res1); + std::vector dual_res1; + compute_dual_residual(lp.A, objective, y, z, dual_res1); f_t dual_res_norm = vector_norm_inf(dual_res1); - if (1 || dual_res_norm > settings.tight_tol) { + if (dual_res_norm > settings.tight_tol) { settings.log.printf("|| A'*y + z - c || %e\n", dual_res_norm); } assert(dual_res_norm < 1e-3); @@ -1982,15 +2291,11 @@ dual::status_t dual_phase2(i_t phase, phase2::set_primal_variables_on_bounds(lp, settings, z, vstatus, x); #ifdef PRINT_VSTATUS_CHANGES - i_t num_vstatus_changes = 0; - i_t num_z_changes = 0; - for (i_t j = 0; j < n; ++j) { - if (vstatus[j] != vstatus_old[j]) { num_vstatus_changes++; } - if (std::abs(z[j] - z_old[j]) > 1e-6) { num_z_changes++; } - } - - printf("Number of vstatus changes %d\n", num_vstatus_changes); - printf("Number of z changes %d\n", num_z_changes); + i_t num_vstatus_changes; + i_t num_z_changes; + phase2::vstatus_changes(vstatus, vstatus_old, z, z_old, num_vstatus_changes, num_z_changes); + settings.log.printf("Number of vstatus changes %d\n", num_vstatus_changes); + settings.log.printf("Number of z changes %d\n", num_z_changes); #endif const f_t init_dual_inf = @@ -2005,28 +2310,10 @@ dual::status_t dual_phase2(i_t phase, } } - std::vector rhs = lp.rhs; - // rhs = b - sum_{j : x_j = l_j} A(:, j) l(j) - sum_{j : x_j = u_j} A(:, j) * - // u(j) - for (i_t k = 0; k < n - m; ++k) { - const i_t j = nonbasic_list[k]; - const i_t col_start = lp.A.col_start[j]; - const i_t col_end = lp.A.col_start[j + 1]; - const f_t xj = x[j]; - if (std::abs(xj) < settings.tight_tol * 10) continue; - for (i_t p = col_start; p < col_end; ++p) { - rhs[lp.A.i[p]] -= xj * lp.A.x[p]; - } - } + phase2::compute_primal_variables( + ft, lp.rhs, lp.A, basic_list, nonbasic_list, settings.tight_tol, x); - std::vector xB(m); - ft.b_solve(rhs, xB); if (toc(start_time) > settings.time_limit) { return dual::status_t::TIME_LIMIT; } - - for (i_t k = 0; k < m; ++k) { - const i_t j = basic_list[k]; - x[j] = xB[k]; - } if (print_norms) { settings.log.printf("|| x || %e\n", vector_norm2(x)); } #ifdef COMPUTE_PRIMAL_RESIDUAL @@ -2041,14 +2328,7 @@ dual::status_t dual_phase2(i_t phase, if (delta_y_steepest_edge.size() == 0) { delta_y_steepest_edge.resize(n); if (slack_basis) { - for (i_t k = 0; k < m; ++k) { - const i_t j = basic_list[k]; - delta_y_steepest_edge[j] = 1.0; - } - for (i_t k = 0; k < n - m; ++k) { - const i_t j = nonbasic_list[k]; - delta_y_steepest_edge[j] = 1e-4; - } + phase2::initialize_steepest_edge_norms_from_slack_basis(basic_list, nonbasic_list, delta_y_steepest_edge); } else { std::fill(delta_y_steepest_edge.begin(), delta_y_steepest_edge.end(), -1); if (phase2::initialize_steepest_edge_norms(lp, @@ -2071,28 +2351,18 @@ dual::status_t dual_phase2(i_t phase, std::vector atilde(m, 0.0); std::vector atilde_mark(m, 0); std::vector atilde_index; - std::vector nonbasic_mark(n, -1); - std::vector basic_mark(n, -1); + std::vector nonbasic_mark(n); + std::vector basic_mark(n); std::vector delta_z_mark(n, 0); std::vector delta_z_indices; std::vector v(m, 0.0); std::vector squared_infeasibilities; std::vector infeasibility_indices; - for (i_t k = 0; k < n - m; k++) { - nonbasic_mark[nonbasic_list[k]] = k; - } - - for (i_t k = 0; k < m; k++) { - basic_mark[basic_list[k]] = k; - } + phase2::reset_basis_mark(basic_list, nonbasic_list, basic_mark, nonbasic_mark); std::vector bounded_variables(n, false); - for (i_t j = 0; j < n; j++) { - const bool bounded = - (lp.lower[j] > -inf) && (lp.upper[j] < inf) && (lp.lower[j] != lp.upper[j]); - bounded_variables[j] = bounded; - } + phase2::compute_bounded_info(lp.lower, lp.upper, bounded_variables); f_t primal_infeasibility = phase2::compute_initial_primal_infeasibilities( lp, settings, basic_list, x, squared_infeasibilities, infeasibility_indices); @@ -2101,32 +2371,14 @@ dual::status_t dual_phase2(i_t phase, csc_matrix_t A_transpose(1, 1, 0); lp.A.transpose(A_transpose); - f_t obj = compute_objective(lp, x); settings.log.printf("Initial objective %e\n", obj); const i_t start_iter = iter; - f_t b_transpose_solve_density = 0.0; - f_t b_solve_density = 0.0; - i_t sparse_delta_z = 0; i_t dense_delta_z = 0; - - f_t bfrt_time = 0; - f_t pricing_time = 0; - f_t btran_time = 0; - f_t ftran_time = 0; - f_t flip_time = 0; - f_t delta_z_time = 0; - f_t se_norms_time = 0; - f_t se_entering_time = 0; - f_t lu_update_time = 0; - f_t perturb_time = 0; - f_t vector_time = 0; - f_t objective_time = 0; - f_t update_infeasibility_time = 0; - bool restart_steepest_edge = true; + phase2::phase2_timers_t timers(false); while (iter < iter_limit) { // Pricing @@ -2134,24 +2386,8 @@ dual::status_t dual_phase2(i_t phase, i_t basic_leaving_index = -1; i_t leaving_index = -1; f_t max_val; - f_t price_start_time = tic(); + timers.start_timer(); if (settings.use_steepest_edge_pricing) { -#if 0 - i_t direction_junk = 0; - i_t leaving_index_junk = -1; - f_t max_val_junk = 0.0; - f_t primal_inf_junk = 0; - i_t basic_leaving_index_junk = -1; - leaving_index_junk = phase2::steepest_edge_pricing(lp, - settings, - x, - delta_y_steepest_edge, - basic_list, - direction_junk, - basic_leaving_index_junk, - primal_inf_junk, - max_val_junk); -#else leaving_index = phase2::steepest_edge_pricing_with_infeasibilities(lp, settings, x, @@ -2162,28 +2398,12 @@ dual::status_t dual_phase2(i_t phase, direction, basic_leaving_index, max_val); -#endif -#if 0 - if (leaving_index != leaving_index_junk || basic_leaving_index != basic_leaving_index_junk || max_val != max_val_junk || direction != direction_junk) { - printf("Leaving index %d %d Basic leaving index %d %d max_val %e %e\n", leaving_index, leaving_index_junk, basic_leaving_index, basic_leaving_index_junk, max_val, max_val_junk); - printf("Direction %d %d\n", direction, direction_junk); - - if (leaving_index >= 0 && leaving_index_junk >= 0) { - printf("Squared infeasibilities %d %e %d %e\n", leaving_index, squared_infeasibilities[leaving_index] / delta_y_steepest_edge[leaving_index], leaving_index_junk, squared_infeasibilities[leaving_index_junk] / delta_y_steepest_edge[leaving_index_junk]); - } - else - { - printf("Trying to print bad stuff\n"); - } - } - // printf("Leaving index %d\n", leaving_index); -#endif } else { // Max infeasibility pricing leaving_index = phase2::phase2_pricing( lp, settings, x, basic_list, direction, basic_leaving_index, primal_infeasibility); } - pricing_time += toc(price_start_time); + timers.pricing_time += timers.stop_timer(); if (leaving_index == -1) { phase2::prepare_optimality(lp, settings, @@ -2206,77 +2426,17 @@ dual::status_t dual_phase2(i_t phase, // BTran // BT*delta_y = -delta_zB = -sigma*ei - f_t btran_start_time = tic(); + timers.start_timer(); sparse_vector_t delta_y_sparse(m, 0); sparse_vector_t UTsol_sparse(m, 0); - if (0) { - std::vector ei(m, 0.0); - ei[basic_leaving_index] = -direction; - - std::vector UTsol; - ft.b_transpose_solve(ei, delta_y, UTsol); + phase2::compute_delta_y(ft, basic_leaving_index, direction, delta_y_sparse, UTsol_sparse); + timers.btran_time += timers.stop_timer(); - if (ei[basic_leaving_index] != 1.0) { - // Need to flip the sign of UTsol - for (i_t k = 0; k < m; ++k) { - UTsol[k] *= -1.0; - } - } - sparse_vector_t dy_sparse(delta_y); - sparse_vector_t UT_sparse(UTsol); - delta_y_sparse = dy_sparse; - UTsol_sparse = UT_sparse; - b_transpose_solve_density = delta_y_sparse.i.size() / static_cast(m); - } else { - sparse_vector_t ei_sparse(m, 1); - ei_sparse.i[0] = basic_leaving_index; - ei_sparse.x[0] = -direction; - ft.b_transpose_solve(ei_sparse, delta_y_sparse, UTsol_sparse); - - if (direction != -1) { - // We solved BT*delta_y = -sigma*ei, but for the update we need - // UT*etilde = ei. So we need to flip the sign of the solution - // in the case that sigma == 1. - for (i_t k = 0; k < UTsol_sparse.x.size(); ++k) { - UTsol_sparse.x[k] *= -1.0; - } - } - } - -#if 0 - std::vector delta_y_sparse_vector_check(m); - delta_y_sparse.to_dense(delta_y_sparse_vector_check); - f_t error_check = 0.0; - for (i_t k = 0; k < m; ++k) - { - if (std::abs(delta_y[k] - delta_y_sparse_vector_check[k]) > 1e-6) - { - settings.log.printf("\tBTranspose error %d %e %e\n", k, delta_y[k], delta_y_sparse_vector_check[k]); - } - error_check += std::abs(delta_y[k] - delta_y_sparse_vector_check[k]); - } - if (error_check > 1e-6) { - settings.log.printf("BTranspose error %e\n", error_check); - } - std::vector residual(m); - b_transpose_multiply(lp, basic_list, delta_y_sparse_vector_check, residual); - for (i_t k = 0; k < m; ++k) - { - if (std::abs(residual[k] - ei[k]) > 1e-6) - { - settings.log.printf("\tBTranspose multiply error %d %e %e\n", k, residual[k], ei[k]); - } - } -#endif -#if 1 const f_t steepest_edge_norm_check = delta_y_sparse.norm2_squared(); -#else - f_t steepest_edge_norm_check = vector_norm2_squared(delta_y); -#endif - if (restart_steepest_edge && delta_y_steepest_edge[leaving_index] < + if (delta_y_steepest_edge[leaving_index] < settings.steepest_edge_ratio * steepest_edge_norm_check) { constexpr bool verbose = false; - if (verbose) { + if constexpr (verbose) { settings.log.printf( "iteration restart due to steepest edge. Leaving %d. Actual %.2e " "from update %.2e\n", @@ -2288,8 +2448,6 @@ dual::status_t dual_phase2(i_t phase, continue; } - btran_time += toc(btran_start_time); - #ifdef COMPUTE_BTRANSPOSE_RESIDUAL { std::vector res(m); @@ -2304,8 +2462,7 @@ dual::status_t dual_phase2(i_t phase, } #endif - f_t delta_z_start_time = tic(); - + timers.start_timer(); i_t delta_y_nz0 = 0; const i_t nz_delta_y = delta_y_sparse.i.size(); for (i_t k = 0; k < nz_delta_y; k++) { @@ -2313,12 +2470,7 @@ dual::status_t dual_phase2(i_t phase, delta_y_nz0++; } } - const f_t dy_percent = static_cast(delta_y_nz0) / static_cast(nz_delta_y) * 100.0; - if (dy_percent < 10.0) { - //settings.log.printf("delta_y_nz0 %d nz_delta_y %d percentage %.1f\n", delta_y_nz0, nz_delta_y, dy_percent); - } const f_t delta_y_nz_percentage = delta_y_nz0 / static_cast(m) * 100.0; - //const bool use_transpose = phase2::use_transpose_for_delta_z(lp, A_transpose, delta_y_sparse, nonbasic_list); const bool use_transpose = delta_y_nz_percentage <= 30.0; if (use_transpose) { sparse_delta_z++; @@ -2344,40 +2496,13 @@ dual::status_t dual_phase2(i_t phase, delta_z_indices, delta_z); } - -#if 0 - if (use_transpose) - { - delta_y_sparse.to_dense(delta_y); - std::vector delta_z_check(n); - std::vector delta_z_mark_check(n, 0); - std::vector delta_z_indices_check ; - phase2::compute_reduced_cost_update(lp, - basic_list, - nonbasic_list, - delta_y, - leaving_index, - direction, - delta_z_mark_check, - delta_z_indices_check, - delta_z_check); - f_t error_check = 0.0; - for (i_t k = 0; k < n; ++k) { - const f_t diff = std::abs(delta_z[k] - delta_z_check[k]); - if (diff > 1e-6) { - settings.log.printf("delta_z error %d transpose %e no transpose %e diff %e\n", k, delta_z[k], delta_z_check[k], diff); - } - error_check = std::max(error_check, diff); - } - if (error_check > 1e-6) { settings.log.printf("delta_z error %e\n", error_check); } - } -#endif - delta_z_time += toc(delta_z_start_time); + timers.delta_z_time += timers.stop_timer(); #ifdef COMPUTE_DUAL_RESIDUAL - std::vector dual_residual = delta_z; + std::vector dual_residual; + std::vector zeros(n, 0.0); + phase2::compute_dual_residual(lp.A, zeros, delta_y, delta_z, dual_residual); // || A'*delta_y + delta_z ||_inf - matrix_transpose_vector_multiply(lp.A, 1.0, delta_y, 1.0, dual_residual); f_t dual_residual_norm = vector_norm_inf(dual_residual); settings.log.printf("|| A'*dy - dz || %e use transpose %d\n", dual_residual_norm, use_transpose); #endif @@ -2399,55 +2524,25 @@ dual::status_t dual_phase2(i_t phase, step_length, nonbasic_entering_index); } else if (bound_flip_ratio) { - f_t bfrt_start = tic(); -#if 1 + timers.start_timer(); f_t slope = direction == 1 ? (lp.lower[leaving_index] - x[leaving_index]) : (x[leaving_index] - lp.upper[leaving_index]); - bound_flipping_ratio_test_t bfrt(settings, start_time, m, n, slope, lp.lower, lp.upper, bounded_variables, vstatus, nonbasic_list, z, delta_z, delta_z_indices, nonbasic_mark); + bound_flipping_ratio_test_t bfrt(settings, + start_time, + m, + n, + slope, + lp.lower, + lp.upper, + bounded_variables, + vstatus, + nonbasic_list, + z, + delta_z, + delta_z_indices, + nonbasic_mark); entering_index = bfrt.compute_step_length(step_length, nonbasic_entering_index); - if constexpr (0) - { - f_t shadow_step_length; - i_t shadow_nonbasic_entering_index; - i_t shadow_entering_index = phase2::bound_flipping_ratio_test(lp, - settings, - start_time, - vstatus, - nonbasic_list, - x, - z, - delta_z, - direction, - leaving_index, - shadow_step_length, - shadow_nonbasic_entering_index); - if (shadow_nonbasic_entering_index != nonbasic_entering_index) - { - settings.log.printf( - "step diff %e shadow step length %e step length %e shadow nonbasic entering %d " - "nonbasic entering %d\n", - step_length - shadow_step_length, - shadow_step_length, - step_length, - shadow_nonbasic_entering_index, - nonbasic_entering_index); - } - } -#else - entering_index = phase2::bound_flipping_ratio_test(lp, - settings, - start_time, - vstatus, - nonbasic_list, - x, - z, - delta_z, - direction, - leaving_index, - step_length, - nonbasic_entering_index); -#endif - bfrt_time += toc(bfrt_start); + timers.bfrt_time += timers.stop_timer(); } else { entering_index = phase2::phase2_ratio_test( lp, settings, vstatus, nonbasic_list, z, delta_z, step_length, nonbasic_entering_index); @@ -2457,38 +2552,7 @@ dual::status_t dual_phase2(i_t phase, if (entering_index == -1) { settings.log.printf("No entering variable found. Iter %d\n", iter); settings.log.printf("Scaled infeasibility %e\n", max_val); - - - f_t primal_inf_check = 0.0; - i_t num_infeasible = 0; - f_t max_primal_infeas = 0.0; - primal_infeasibility = 0.0; - for (i_t k = 0; k < m; ++k) { - const i_t j = basic_list[k]; - const f_t lower_infeas = lp.lower[j] - x[j]; - const f_t upper_infeas = x[j] - lp.upper[j]; - const f_t infeas = std::max(lower_infeas, upper_infeas); - if (infeas > settings.primal_tol) { - primal_inf_check += infeas; - num_infeasible++; - primal_infeasibility += infeas * infeas; - squared_infeasibilities[j] = infeas * infeas; - max_primal_infeas = std::max(max_primal_infeas, infeas); - } - } - - for (i_t j = 0; j < n; ++j) - { - delta_y_steepest_edge[j] = 1.0; - } - - restart_steepest_edge = false; - settings.log.printf("Max Primal infeasibility %e Sum Primal infeasibility %e Num infeasible %d\n", max_primal_infeas, primal_inf_check, num_infeasible); - f_t perturbation = 0.0; - for (i_t j = 0; j < n; ++j) { - perturbation += std::abs(lp.objective[j] - objective[j]); - } - settings.log.printf("Perturbation %e\n", perturbation); + f_t perturbation = phase2::amount_of_perturbation(lp, objective); if (perturbation > 0.0 && phase == 2) { // Try to remove perturbation @@ -2513,13 +2577,8 @@ dual::status_t dual_phase2(i_t phase, settings.log.printf("Updated primal infeasibility: %e\n", primal_infeasibility); objective = lp.objective; - - obj = 0.0; - for (i_t j = 0; j < n; ++j) - { - obj += objective[j] * x[j]; - } - + // Need to reset the objective value, since we have recomputed x + obj = phase2::compute_perturbed_objective(objective, x); if (dual_infeas <= settings.dual_tol && primal_infeasibility <= settings.primal_tol) { phase2::prepare_optimality(lp, @@ -2540,10 +2599,8 @@ dual::status_t dual_phase2(i_t phase, status = dual::status_t::OPTIMAL; break; } - - settings.log.printf("Continuing with perturbation removed and steepest edge norms reset\n"); - // Clear delta_z + // Clear delta_z before restarting the iteration phase2::clear_delta_z(entering_index, leaving_index, delta_z_mark, delta_z_indices, delta_z); continue; } else { @@ -2552,64 +2609,36 @@ dual::status_t dual_phase2(i_t phase, } } - if (perturbation == 0.0 && phase == 2) - { - - constexpr bool use_farkas = true; - - if constexpr (use_farkas) { - std::vector farkas_y; - std::vector farkas_zl; - std::vector farkas_zu; - f_t farkas_constant; - std::vector my_delta_y; - delta_y_sparse.to_dense(my_delta_y); - - - f_t obj_val = 0.0; - for (i_t j = 0; j < n; ++j) - { - obj_val += objective[j] * x[j]; - } - phase2::compute_farkas_certificate(lp, - settings, - vstatus, - x, - y, - z, - my_delta_y, - delta_z, - direction, - leaving_index, - obj_val, - farkas_y, - farkas_zl, - farkas_zu, - farkas_constant); - } + if (perturbation == 0.0 && phase == 2) { + constexpr bool use_farkas = true; + if constexpr (use_farkas) { + std::vector farkas_y; + std::vector farkas_zl; + std::vector farkas_zu; + f_t farkas_constant; + std::vector my_delta_y; + delta_y_sparse.to_dense(my_delta_y); + + // TODO(CMM): Do I use the perturbed or unperturbed objective? + const f_t obj_val = phase2::compute_perturbed_objective(objective, x); + phase2::compute_farkas_certificate(lp, + settings, + vstatus, + x, + y, + z, + my_delta_y, + delta_z, + direction, + leaving_index, + obj_val, + farkas_y, + farkas_zl, + farkas_zu, + farkas_constant); + } } - if (max_val < 2e-8) { - // We could be done - settings.log.printf("Exiting due to small primal infeasibility se %e\n", max_val); - phase2::prepare_optimality(lp, - settings, - ft, - objective, - basic_list, - nonbasic_list, - vstatus, - phase, - start_time, - max_val, - iter, - x, - y, - z, - sol); - status = dual::status_t::OPTIMAL; - break; - } const f_t dual_infeas = phase2::dual_infeasibility(lp, settings, vstatus, z, settings.tight_tol, settings.dual_tol); settings.log.printf("Dual infeasibility %e\n", dual_infeas); @@ -2626,210 +2655,68 @@ dual::status_t dual_phase2(i_t phase, } - f_t vector_y_z_start_time = tic(); + timers.start_timer(); // Update dual variables - - - #if 1 - const i_t delta_y_nz = delta_y_sparse.i.size(); - for (i_t k = 0; k < delta_y_nz; ++k) { - const i_t i = delta_y_sparse.i[k]; - y[i] += step_length * delta_y_sparse.x[k]; - } - const i_t delta_z_nz = delta_z_indices.size(); - for (i_t k = 0; k < delta_z_nz; ++k) { - const i_t j = delta_z_indices[k]; - z[j] += step_length * delta_z[j]; - } - z[leaving_index] += step_length * delta_z[leaving_index]; - #else - // y <- y + steplength * delta_y - for (i_t i = 0; i < m; ++i) { - y[i] += step_length * delta_y[i]; - } // z <- z + steplength * delta_z - for (i_t j = 0; j < n; ++j) { - z[j] += step_length * delta_z[j]; - } -#endif - vector_time += toc(vector_y_z_start_time); + phase2::update_dual_variables(delta_y_sparse, delta_z_indices, delta_z, step_length, leaving_index, y, z); + timers.vector_time += timers.stop_timer(); #ifdef COMPUTE_DUAL_RESIDUAL - dual_res1 = z; - for (i_t j = 0; j < n; ++j) { - dual_res1[j] -= objective[j]; - } - matrix_transpose_vector_multiply(lp.A, 1.0, y, 1.0, dual_res1); + phase2::compute_dual_residual(lp.A, objective, y, z, dual_res1); f_t dual_res_norm = vector_norm_inf(dual_res1); if (dual_res_norm > settings.dual_tol) { settings.log.printf("|| A'*y + z - c || %e steplength %e\n", dual_res_norm, step_length); } #endif - f_t flip_start_time = tic(); + timers.start_timer(); // Update primal variable - - const i_t num_flipped = phase2::flip_bounds( - lp, settings, bounded_variables, objective, z, delta_z_indices, nonbasic_list, entering_index, vstatus, delta_x_flip, atilde_mark, atilde, atilde_index); - - flip_time += toc(flip_start_time); + const i_t num_flipped = phase2::flip_bounds(lp, + settings, + bounded_variables, + objective, + z, + delta_z_indices, + nonbasic_list, + entering_index, + vstatus, + delta_x_flip, + atilde_mark, + atilde, + atilde_index); + + timers.flip_time += timers.stop_timer(); sparse_vector_t delta_xB_0_sparse(m, 0); - - f_t ftran_start_time = tic(); - if (num_flipped > 0) { - //settings.log.printf("Flipped %6d bounds. Dz nz %.2f Atilde nz %6d %.2f %\n", num_flipped, static_cast(delta_z_indices.size()) / static_cast(n -m) * 100.0, atilde_index.size(), static_cast(atilde_index.size()) / static_cast(m) * 100.0); - const i_t atilde_nz = atilde_index.size(); - if (1) { - // B*delta_xB_0 = atilde - sparse_vector_t atilde_sparse(m, atilde_nz); - for (i_t k = 0; k < atilde_nz; ++k) { - atilde_sparse.i[k] = atilde_index[k]; - atilde_sparse.x[k] = atilde[atilde_index[k]]; - } - ft.b_solve(atilde_sparse, delta_xB_0_sparse); - const i_t delta_xB_0_nz = delta_xB_0_sparse.i.size(); - for (i_t k = 0; k < delta_xB_0_nz; ++k) { - const i_t j = basic_list[delta_xB_0_sparse.i[k]]; - x[j] += delta_xB_0_sparse.x[k]; - } - } else { - // B*delta_xB_0 = atilde - std::vector delta_xB_0(m); - ft.b_solve(atilde, delta_xB_0); - for (i_t k = 0; k < m; ++k) { - const i_t j = basic_list[k]; - x[j] += delta_xB_0[k]; - } - } - -#if 1 - for (i_t j : delta_z_indices) { - x[j] += delta_x_flip[j]; - delta_x_flip[j] = 0.0; - } -#else - for (i_t k = 0; k < n - m; ++k) { - const i_t j = nonbasic_list[k]; - x[j] += delta_x_flip[j]; - } -#endif - - // Clear atilde - for (i_t k = 0; k < atilde_index.size(); ++k) - { - atilde[atilde_index[k]] = 0.0; - } - // Clear atilde_mark - for (i_t k = 0; k < atilde_mark.size(); ++k) - { - atilde_mark[k] = 0; - } - atilde_index.clear(); + timers.start_timer(); + phase2::adjust_for_flips( + ft, basic_list, delta_z_indices, atilde_index, atilde, atilde_mark, delta_xB_0_sparse, delta_x_flip, x); + timers.ftran_time += timers.stop_timer(); } - f_t delta_x_leaving; - if (direction == 1) { - delta_x_leaving = lp.lower[leaving_index] - x[leaving_index]; - } else { - delta_x_leaving = lp.upper[leaving_index] - x[leaving_index]; - } - // B*w = -A(:, entering) - std::vector scaled_delta_xB(m); - const i_t col_nz = lp.A.col_start[entering_index + 1] - lp.A.col_start[entering_index]; - std::vector utilde(m); + timers.start_timer(); sparse_vector_t utilde_sparse(m, 0); sparse_vector_t scaled_delta_xB_sparse(m, 0); - if (0) - { - std::fill(rhs.begin(), rhs.end(), 0.0); - lp.A.load_a_column(entering_index, rhs); - ft.b_solve(rhs, scaled_delta_xB, utilde); - for (i_t i = 0; i < m; ++i) { - scaled_delta_xB[i] *= -1.0; - } - sparse_vector_t dxB_sparse(scaled_delta_xB); - sparse_vector_t ut_sparse(utilde); - scaled_delta_xB_sparse = dxB_sparse; - utilde_sparse = ut_sparse; - b_solve_density = scaled_delta_xB_sparse.i.size() / static_cast(m); - } - else - { - sparse_vector_t rhs_sparse(lp.A, entering_index); - ft.b_solve(rhs_sparse, scaled_delta_xB_sparse, utilde_sparse); - const i_t xB_nz = scaled_delta_xB_sparse.i.size(); - for (i_t k = 0; k < xB_nz; ++k) - { - scaled_delta_xB_sparse.x[k] *= -1.0; - } - scaled_delta_xB_sparse.to_dense(scaled_delta_xB); - utilde_sparse.to_dense(utilde); - b_solve_density = static_cast(xB_nz) / static_cast(m); -#if 0 - rhs_sparse.to_dense(rhs); -#endif - } - -#if 0 - { - std::vector residual_B(m); - b_multiply(lp, basic_list, scaled_delta_xB, residual_B); - f_t err_max = 0; - for (i_t k = 0; k < m; ++k) { - const f_t err = std::abs(rhs[k] + residual_B[k]); - if (err >= 1e-6) { - settings.log.printf( - "Bsolve diff %d %e rhs %e residual %e\n", k, err, rhs[k], residual_B[k]); - } - err_max = std::max(err_max, err); - } - if (err_max > 1e-6) - { - printf("B multiply error %e\n", err_max); - } - } -#endif - - ftran_time += toc(ftran_start_time); - - f_t delta_x_change_start_time = tic(); - -#if 1 - f_t scale; - const i_t scaled_delta_xB_nz = scaled_delta_xB_sparse.i.size(); - for (i_t k = 0; k < scaled_delta_xB_nz; ++k) { - if (scaled_delta_xB_sparse.i[k] == basic_leaving_index) { - scale = scaled_delta_xB_sparse.x[k]; - break; - } - } - f_t primal_step_length = delta_x_leaving / scale; - for (i_t k = 0; k < scaled_delta_xB_nz; ++k) { - const i_t j = basic_list[scaled_delta_xB_sparse.i[k]]; - delta_x[j] = primal_step_length * scaled_delta_xB_sparse.x[k]; - } - delta_x[leaving_index] = delta_x_leaving; - delta_x[entering_index] = primal_step_length; -#else - f_t primal_step_length = delta_x_leaving / scaled_delta_xB[basic_leaving_index]; - std::vector delta_x(n, 0.0); - for (i_t k = 0; k < m; ++k) { - const i_t j = basic_list[k]; - delta_x[j] = primal_step_length * scaled_delta_xB[k]; - } - delta_x[leaving_index] = delta_x_leaving; - for (i_t k = 0; k < n - m; k++) { - const i_t j = nonbasic_list[k]; - delta_x[j] = 0.0; - } - delta_x[entering_index] = primal_step_length; -#endif - vector_time += toc(delta_x_change_start_time); - -#if 0 + sparse_vector_t rhs_sparse(lp.A, entering_index); + phase2::compute_delta_x(lp, + ft, + entering_index, + leaving_index, + basic_leaving_index, + direction, + basic_list, + delta_x_flip, + rhs_sparse, + x, + utilde_sparse, + scaled_delta_xB_sparse, + delta_x); + + timers.vector_time += timers.stop_timer(); + +#ifdef CHECK_PRIMAL_STEP std::vector residual(m); matrix_vector_multiply(lp.A, 1.0, delta_x, 1.0, residual); f_t primal_step_err = vector_norm_inf(residual); @@ -2837,7 +2724,7 @@ dual::status_t dual_phase2(i_t phase, #endif - f_t steepest_edge_norms_start_time = tic(); + timers.start_timer(); const i_t steepest_edge_status = phase2::update_steepest_edge_norms(settings, basic_list, ft, @@ -2852,34 +2739,16 @@ dual::status_t dual_phase2(i_t phase, #ifdef STEEPEST_EDGE_DEBUG if (steepest_edge_status == -1) { settings.log.printf("Num updates %d\n", ft.num_updates()); - settings.log.printf(" Primal step length %e\n", primal_step_length); - settings.log.printf("|| delta_xB || %e\n", vector_norm_inf(scaled_delta_xB)); settings.log.printf("|| rhs || %e\n", vector_norm_inf(rhs)); } #endif assert(steepest_edge_status == 0); + timers.se_norms_time += timers.stop_timer(); - se_norms_time += toc(steepest_edge_norms_start_time); - - f_t vector_x_start_time = tic(); + timers.start_timer(); // x <- x + delta_x -#if 1 - - //std::vector x_check = x; - for (i_t k = 0; k < scaled_delta_xB_nz; ++k) { - const i_t j = basic_list[scaled_delta_xB_sparse.i[k]]; - x[j] += delta_x[j]; - } - // Leaving index already included above - x[entering_index] += delta_x[entering_index]; -#else - - for (i_t j = 0; j < n; ++j) { - x[j] += delta_x[j]; - } - -#endif - vector_time += toc(vector_x_start_time); + phase2::update_primal_variables(scaled_delta_xB_sparse, basic_list, delta_x, entering_index, x); + timers.vector_time += timers.stop_timer(); #ifdef COMPUTE_PRIMAL_RESIDUAL residual = lp.rhs; @@ -2891,25 +2760,15 @@ dual::status_t dual_phase2(i_t phase, #endif - f_t objective_start_time = tic(); -#if 1 - for (i_t k = 0; k < scaled_delta_xB_nz; ++k) { - const i_t j = basic_list[scaled_delta_xB_sparse.i[k]]; - obj += delta_x[j] * lp.objective[j]; - } - // Leaving index already included above - obj += delta_x[entering_index] * lp.objective[entering_index]; - - //const f_t obj_check = compute_objective(lp, x); - //if (std::abs(obj - obj_check) > 1e-5) { - // settings.log.printf("Objective error %e: %e %e\n", std::abs(obj - obj_check), obj, obj_check); - //} -#endif - objective_time += toc(objective_start_time); + timers.start_timer(); + // TODO(CMM): Do I also need to update the objective due to the bound flips? + // TODO(CMM): I'm using the unperturbed objective here, should this be the perturbed objective? + phase2::update_objective(basic_list, scaled_delta_xB_sparse.i, lp.objective, delta_x, entering_index, obj); + timers.objective_time += timers.stop_timer(); -#if 1 - f_t update_infeasibility_start_time = tic(); - // Update primal infeasibilities + timers.start_timer(); + // Update primal infeasibilities due to changes in basic variables + // from flipping bounds phase2::update_primal_infeasibilities(lp, settings, basic_list, @@ -2920,6 +2779,8 @@ dual::status_t dual_phase2(i_t phase, squared_infeasibilities, infeasibility_indices, primal_infeasibility); + // Update primal infeasibilities due to changes in basic variables + // from the leaving and entering variables phase2::update_primal_infeasibilities(lp, settings, basic_list, @@ -2930,62 +2791,31 @@ dual::status_t dual_phase2(i_t phase, squared_infeasibilities, infeasibility_indices, primal_infeasibility); + // Update the entering variable + phase2::update_single_primal_infeasibility(lp.lower, + lp.upper, + x, + settings.primal_tol, + squared_infeasibilities, + infeasibility_indices, + entering_index, + primal_infeasibility); - if (primal_infeasibility < 0.0) { - settings.log.printf("!!!!! Negative primal infeasibility %e\n", primal_infeasibility); - } phase2::clean_up_infeasibilities(squared_infeasibilities, infeasibility_indices); -#endif #if CHECK_PRIMAL_INFEASIBILITIES - // Check primal infeasibilities - { - for (i_t k = 0; k < m; ++k) - { - const i_t j = basic_list[k]; - const f_t lower_infeas = lp.lower[j] - x[j]; - const f_t upper_infeas = x[j] - lp.upper[j]; - const f_t infeas = std::max(lower_infeas, upper_infeas); - if (infeas > settings.primal_tol) { - const f_t square_infeas = infeas * infeas; - if (square_infeas != squared_infeasibilities[j]) { - settings.log.printf("Primal infeasibility mismatch %d %e != %e\n", j, square_infeas, squared_infeasibilities[j]); - } - bool found = false; - for (i_t h = 0; h < infeasibility_indices.size(); ++h) { - if (infeasibility_indices[h] == j) { - found = true; - break; - } - } - if (!found) { - settings.log.printf("Infeasibility index not found %d\n", j); - } - } - } - } -#endif - -#if 1 - update_infeasibility_time += toc(update_infeasibility_start_time); + phase2::check_primal_infeasibilities(lp, settings, basic_list, x, squared_infeasibilities, infeasibility_indices); #endif + timers.update_infeasibility_time += timers.stop_timer(); // Clear delta_x - for (i_t k = 0; k < scaled_delta_xB_nz; ++k) { - const i_t j = basic_list[scaled_delta_xB_sparse.i[k]]; - delta_x[j] = 0.0; - } - // Leaving index already included above - delta_x[entering_index] = 0.0; - scaled_delta_xB_sparse.i.clear(); - scaled_delta_xB_sparse.x.clear(); - + phase2::clear_delta_x(basic_list, entering_index, scaled_delta_xB_sparse, delta_x); - f_t perturb_start_time = tic(); + timers.start_timer(); f_t sum_perturb = 0.0; phase2::compute_perturbation(lp, settings, delta_z_indices, z, objective, sum_perturb); - perturb_time += toc(perturb_start_time); + timers.perturb_time += timers.stop_timer(); // Update basis vstatus[entering_index] = variable_status_t::BASIC; @@ -3001,43 +2831,13 @@ dual::status_t dual_phase2(i_t phase, basic_mark[leaving_index] = -1; basic_mark[entering_index] = basic_leaving_index; - f_t lu_update_start_time = tic(); + timers.start_timer(); // Refactor or Update bool should_refactor = ft.num_updates() > settings.refactor_frequency; if (!should_refactor) { i_t recommend_refactor = ft.update(utilde_sparse, UTsol_sparse, basic_leaving_index); - //i_t recommend_refactor = ft.update(utilde, UTsol, basic_leaving_index); #ifdef CHECK_UPDATE - { - csc_matrix_t Btest(m, m, 1); - ft.multiply_lu(Btest); - { - csc_matrix_t B(m, m, 1); - form_b(lp.A, basic_list, B); - csc_matrix_t Diff(m, m, 1); - add(Btest, B, 1.0, -1.0, Diff); - const f_t err = Diff.norm1(); - if (err > settings.primal_tol) { - settings.log.printf("|| B - L*U || %e\n", Diff.norm1()); - } - if (err > settings.primal_tol) - { - for (i_t j = 0; j < m; ++j) - { - for (i_t p = Diff.col_start[j]; p < Diff.col_start[j + 1]; ++p) - { - const i_t i = Diff.i[p]; - if (Diff.x[p] != 0.0) - { - settings.log.printf("Diff %d %d %e\n", j, i, Diff.x[p]); - } - } - } - } - settings.log.printf("basic leaving index %d\n", basic_leaving_index); - assert(err < settings.primal_tol); - } - } + phase2::check_update(lp, settings, ft, basic_list, basic_leaving_index); #endif should_refactor = recommend_refactor == 1; } @@ -3053,48 +2853,28 @@ dual::status_t dual_phase2(i_t phase, } reorder_basic_list(q, basic_list); ft.reset(L, U, p); - for (i_t k = 0; k < n; k++) { - basic_mark[k] = -1; - nonbasic_mark[k] = -1; - } - for (i_t k = 0; k < m; k++) { - basic_mark[basic_list[k]] = k; - } - for (i_t k = 0; k < n - m; k++) { - nonbasic_mark[nonbasic_list[k]] = k; - } + phase2::reset_basis_mark(basic_list, nonbasic_list, basic_mark, nonbasic_mark); } + timers.lu_update_time += timers.stop_timer(); - lu_update_time += toc(lu_update_start_time); - - f_t steepest_edge_entering_start_time = tic(); + timers.start_timer(); phase2::compute_steepest_edge_norm_entering( - settings, m, ft, basic_leaving_index, entering_index, b_transpose_solve_density, delta_y_steepest_edge); - se_entering_time += toc(steepest_edge_entering_start_time); + settings, m, ft, basic_leaving_index, entering_index, delta_y_steepest_edge); + timers.se_entering_time += timers.stop_timer(); #ifdef STEEPEST_EDGE_DEBUG if (iter < 100 || iter % 100 == 0)) - { - phase2::check_steepest_edge_norms(settings, basic_list, ft, delta_y_steepest_edge); - } + { + phase2::check_steepest_edge_norms(settings, basic_list, ft, delta_y_steepest_edge); + } #endif -#if 0 - for (i_t k = 0; k < m; k++) { - if (basic_mark[basic_list[k]] != k) { - printf("Basic mark %d %d\n", basic_list[k], k); - } - } - for (i_t k = 0; k < n - m; k++) { - if (nonbasic_mark[nonbasic_list[k]] != k) { - printf("Nonbasic mark %d %d\n", nonbasic_list[k], k); - } - } +#ifdef CHECK_BASIS_MARK + phase2::check_basis_mark(settings, basic_list, nonbasic_list, basic_mark, nonbasic_mark); #endif iter++; -#if 1 // Clear delta_y //const i_t nz_dy = delta_y_sparse.i.size(); //for (i_t k = 0; k < nz_dy; ++k) { @@ -3105,8 +2885,6 @@ dual::status_t dual_phase2(i_t phase, phase2::clear_delta_z(entering_index, leaving_index, delta_z_mark, delta_z_indices, delta_z); -#endif - f_t now = toc(start_time); if ((iter - start_iter) < settings.first_iteration_log || (iter % settings.iteration_log_frequency) == 0) { @@ -3138,24 +2916,7 @@ dual::status_t dual_phase2(i_t phase, if (iter >= iter_limit) { status = dual::status_t::ITERATION_LIMIT; } if (phase == 2) { - const f_t total_time = bfrt_time + pricing_time + btran_time + ftran_time + flip_time + - delta_z_time + lu_update_time + se_norms_time + se_entering_time + - perturb_time + vector_time + objective_time + update_infeasibility_time; - settings.log.printf("BFRT time %.2f %4.1f%\n", bfrt_time, 100.0 * bfrt_time / total_time); - settings.log.printf("Pricing time %.2f %4.1f%\n", pricing_time, 100.0 * pricing_time / total_time); - settings.log.printf("BTran time %.2f %4.1f%\n", btran_time, 100.0 * btran_time / total_time); - settings.log.printf("FTran time %.2f %4.1f%\n", ftran_time, 100.0 * ftran_time / total_time); - settings.log.printf("Flip time %.2f %4.1f%\n", flip_time, 100.0 * flip_time / total_time); - settings.log.printf("Delta_z time %.2f %4.1f%\n", delta_z_time, 100.0 * delta_z_time / total_time); - settings.log.printf("LU update time %.2f %4.1f%\n", lu_update_time, 100.0 * lu_update_time / total_time); - settings.log.printf("SE norms time %.2f %4.1f%\n", se_norms_time, 100.0 * se_norms_time / total_time); - settings.log.printf("SE enter time %.2f %4.1f%\n", se_entering_time, 100.0 * se_entering_time / total_time); - settings.log.printf("Perturb time %.2f %4.1f%\n", perturb_time, 100.0 * perturb_time / total_time); - settings.log.printf("Vector time %.2f %4.1f%\n", vector_time, 100.0 * vector_time / total_time); - settings.log.printf("Objective time %.2f %4.1f%\n", objective_time, 100.0 * objective_time / total_time); - settings.log.printf("Inf update time %.2f %4.1f%\n", update_infeasibility_time, 100.0 * update_infeasibility_time / total_time); - settings.log.printf("Sum %.2f\n", total_time); - + timers.print_timers(settings); settings.log.printf("Sparse delta_z %8d %8.2f%\n", sparse_delta_z, 100.0 * sparse_delta_z / (sparse_delta_z + dense_delta_z)); settings.log.printf("Dense delta_z %8d %8.2f%\n", dense_delta_z, 100.0 * dense_delta_z / (sparse_delta_z + dense_delta_z)); ft.print_stats(); diff --git a/cpp/src/dual_simplex/sparse_matrix.hpp b/cpp/src/dual_simplex/sparse_matrix.hpp index 2fdbe5647a..95ef49ecdb 100644 --- a/cpp/src/dual_simplex/sparse_matrix.hpp +++ b/cpp/src/dual_simplex/sparse_matrix.hpp @@ -296,6 +296,25 @@ class sparse_vector_t { return dot; } + void negate() + { + const i_t nz = x.size(); + for (i_t k = 0; k < nz; ++k) { + x[k] *= -1.0; + } + } + + f_t find_coefficient(i_t index) const + { + const i_t nz = i.size(); + for (i_t k = 0; k < nz; ++k) { + if (i[k] == index) { + return x[k]; + } + } + return std::numeric_limits::quiet_NaN(); + } + i_t n; std::vector i; std::vector x; From c672416673146e859b7acfcdc4de777579fcff54 Mon Sep 17 00:00:00 2001 From: Christopher Maes Date: Wed, 23 Jul 2025 12:46:33 -0700 Subject: [PATCH 09/28] Move sparse_vector_t into seperate files --- cpp/src/dual_simplex/CMakeLists.txt | 1 + cpp/src/dual_simplex/basis_updates.hpp | 1 + cpp/src/dual_simplex/sparse_matrix.hpp | 209 ----------------------- cpp/src/dual_simplex/sparse_vector.cpp | 228 +++++++++++++++++++++++++ cpp/src/dual_simplex/sparse_vector.hpp | 66 +++++++ 5 files changed, 296 insertions(+), 209 deletions(-) create mode 100644 cpp/src/dual_simplex/sparse_vector.cpp create mode 100644 cpp/src/dual_simplex/sparse_vector.hpp diff --git a/cpp/src/dual_simplex/CMakeLists.txt b/cpp/src/dual_simplex/CMakeLists.txt index baa5b7213b..d85471f9b9 100644 --- a/cpp/src/dual_simplex/CMakeLists.txt +++ b/cpp/src/dual_simplex/CMakeLists.txt @@ -31,6 +31,7 @@ set(DUAL_SIMPLEX_SRC_FILES ${CMAKE_CURRENT_SOURCE_DIR}/singletons.cpp ${CMAKE_CURRENT_SOURCE_DIR}/solve.cpp ${CMAKE_CURRENT_SOURCE_DIR}/sparse_matrix.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/sparse_vector.cpp ${CMAKE_CURRENT_SOURCE_DIR}/tic_toc.cpp ${CMAKE_CURRENT_SOURCE_DIR}/triangle_solve.cpp ${CMAKE_CURRENT_SOURCE_DIR}/vector_math.cpp) diff --git a/cpp/src/dual_simplex/basis_updates.hpp b/cpp/src/dual_simplex/basis_updates.hpp index c0abc3f426..a19abf380a 100644 --- a/cpp/src/dual_simplex/basis_updates.hpp +++ b/cpp/src/dual_simplex/basis_updates.hpp @@ -18,6 +18,7 @@ #pragma once #include +#include #include namespace cuopt::linear_programming::dual_simplex { diff --git a/cpp/src/dual_simplex/sparse_matrix.hpp b/cpp/src/dual_simplex/sparse_matrix.hpp index 95ef49ecdb..21f97c01e0 100644 --- a/cpp/src/dual_simplex/sparse_matrix.hpp +++ b/cpp/src/dual_simplex/sparse_matrix.hpp @@ -111,215 +111,6 @@ class csr_matrix_t { }; -template -class sparse_vector_t { - public: - sparse_vector_t(i_t n, i_t nz) : n(n), i(nz), x(nz) {} - sparse_vector_t(const std::vector& in) - { - from_dense(in); - } - sparse_vector_t(const csc_matrix_t& A, i_t col) - { - const i_t col_start = A.col_start[col]; - const i_t col_end = A.col_start[col + 1]; - n = A.m; - const i_t nz = col_end - col_start; - i.reserve(nz); - x.reserve(nz); - for (i_t k = col_start; k < col_end; ++k) { - i.push_back(A.i[k]); - x.push_back(A.x[k]); - } - } - - void from_dense(const std::vector& in) - { - i.clear(); - x.clear(); - n = in.size(); - i.reserve(n); - x.reserve(n); - for (i_t k = 0; k < n; ++k) { - if (in[k] != 0) { - i.push_back(k); - x.push_back(in[k]); - } - } - } - - void to_csc(csc_matrix_t& A) const - { - A.m = n; - A.n = 1; - A.nz_max = i.size(); - A.col_start.resize(2); - A.col_start[0] = 0; - A.col_start[1] = i.size(); - A.i = i; - A.x = x; - } - - void to_dense(std::vector& x_dense) const - { - x_dense.clear(); - x_dense.resize(n, 0.0); - const i_t nz = i.size(); - for (i_t k = 0; k < nz; ++k) { - x_dense[i[k]] = x[k]; - } - } - - void scatter(std::vector& x_dense) const - { - // Assumes x_dense is already cleared - const i_t nz = i.size(); - for (i_t k = 0; k < nz; ++k) { - x_dense[i[k]] += x[k]; - } - } - - void inverse_permute_vector(const std::vector& p) - { - assert(p.size() == n); - i_t nz = i.size(); - std::vector i_perm(nz); - for (i_t k = 0; k < nz; ++k) { - i_perm[k] = p[i[k]]; - } - i = i_perm; - } - - void inverse_permute_vector(const std::vector& p, sparse_vector_t& y) const - { - i_t m = p.size(); - assert(n == m); - i_t nz = i.size(); - y.n = n; - y.x = x; - std::vector i_perm(nz); - for (i_t k = 0; k < nz; ++k) { - i_perm[k] = p[i[k]]; - } - y.i = i_perm; - } - - f_t sparse_dot(const csc_matrix_t& Y, i_t y_col) const - { - const i_t col_start = Y.col_start[y_col]; - const i_t col_end = Y.col_start[y_col + 1]; - const i_t ny = col_end - col_start; - const i_t nx = i.size(); - f_t dot = 0.0; - for (i_t h = 0, k = col_start; h < nx && k < col_end; ) { - const i_t p = i[h]; - const i_t q = Y.i[k]; - if (p == q) { - dot += Y.x[k] * x[h]; - h++; - k++; - } else if (p < q) { - h++; - } else if (q < p) { - k++; - } - } - return dot; - } - - void sort() - { - if (i.size() < 2) { - return; - } - // If the number of nonzeros is large, use a O(n) bucket sort - if (i.size() > 0.3 *n) - { - std::vector bucket(n, 0.0); - const i_t nz = i.size(); - for (i_t k = 0; k < nz; ++k) - { - bucket[i[k]] = x[k]; - } - i.clear(); - i.reserve(nz); - x.clear(); - x.reserve(nz); - for (i_t k = 0; k < n; ++k) - { - if (bucket[k] != 0.0) - { - i.push_back(k); - x.push_back(bucket[k]); - } - } - } - else - { - // Use a n log n sort - const i_t nz = i.size(); - std::vector i_sorted(nz); - std::vector x_sorted(nz); - std::vector perm(nz); - for (i_t k = 0; k < nz; ++k) - { - perm[k] = k; - } - std::vector& iunsorted = i; - std::sort(perm.begin(), perm.end(), [&iunsorted](i_t a, i_t b) { return iunsorted[a] < iunsorted[b]; }); - for (i_t k = 0; k < nz; ++k) - { - i_sorted[k] = i[perm[k]]; - x_sorted[k] = x[perm[k]]; - } - i = i_sorted; - x = x_sorted; - } - - // Check -#ifdef CHECK_SORT - for (i_t k = 0; k < i.size() - 1; ++k) { - if (i[k] > i[k + 1]) { - printf("Sort error %d %d\n", i[k], i[k + 1]); - } - } -#endif - } - - f_t norm2_squared() const - { - f_t dot = 0.0; - const i_t nz = i.size(); - for (i_t k = 0; k < nz; ++k) { - dot += x[k] * x[k]; - } - return dot; - } - - void negate() - { - const i_t nz = x.size(); - for (i_t k = 0; k < nz; ++k) { - x[k] *= -1.0; - } - } - - f_t find_coefficient(i_t index) const - { - const i_t nz = i.size(); - for (i_t k = 0; k < nz; ++k) { - if (i[k] == index) { - return x[k]; - } - } - return std::numeric_limits::quiet_NaN(); - } - - i_t n; - std::vector i; - std::vector x; -}; - template void cumulative_sum(std::vector& inout, std::vector& output); diff --git a/cpp/src/dual_simplex/sparse_vector.cpp b/cpp/src/dual_simplex/sparse_vector.cpp new file mode 100644 index 0000000000..d3c655d3e0 --- /dev/null +++ b/cpp/src/dual_simplex/sparse_vector.cpp @@ -0,0 +1,228 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#include + +#include +#include +#include + +namespace cuopt::linear_programming::dual_simplex { + +template +sparse_vector_t::sparse_vector_t(const csc_matrix_t& A, i_t col) +{ + const i_t col_start = A.col_start[col]; + const i_t col_end = A.col_start[col + 1]; + n = A.m; + const i_t nz = col_end - col_start; + i.reserve(nz); + x.reserve(nz); + for (i_t k = col_start; k < col_end; ++k) { + i.push_back(A.i[k]); + x.push_back(A.x[k]); + } +} + +template +void sparse_vector_t::from_dense(const std::vector& in) +{ + i.clear(); + x.clear(); + n = in.size(); + i.reserve(n); + x.reserve(n); + for (i_t k = 0; k < n; ++k) { + if (in[k] != 0) { + i.push_back(k); + x.push_back(in[k]); + } + } +} + +template +void sparse_vector_t::to_csc(csc_matrix_t& A) const +{ + A.m = n; + A.n = 1; + A.nz_max = i.size(); + A.col_start.resize(2); + A.col_start[0] = 0; + A.col_start[1] = i.size(); + A.i = i; + A.x = x; +} + +template +void sparse_vector_t::to_dense(std::vector& x_dense) const +{ + x_dense.clear(); + x_dense.resize(n, 0.0); + const i_t nz = i.size(); + for (i_t k = 0; k < nz; ++k) { + x_dense[i[k]] = x[k]; + } +} + +template +void sparse_vector_t::scatter(std::vector& x_dense) const +{ + // Assumes x_dense is already cleared + const i_t nz = i.size(); + for (i_t k = 0; k < nz; ++k) { + x_dense[i[k]] += x[k]; + } +} + +template +void sparse_vector_t::inverse_permute_vector(const std::vector& p) +{ + assert(p.size() == n); + i_t nz = i.size(); + std::vector i_perm(nz); + for (i_t k = 0; k < nz; ++k) { + i_perm[k] = p[i[k]]; + } + i = i_perm; +} + +template +void sparse_vector_t::inverse_permute_vector(const std::vector& p, + sparse_vector_t& y) const +{ + i_t m = p.size(); + assert(n == m); + i_t nz = i.size(); + y.n = n; + y.x = x; + std::vector i_perm(nz); + for (i_t k = 0; k < nz; ++k) { + i_perm[k] = p[i[k]]; + } + y.i = i_perm; +} + +template +f_t sparse_vector_t::sparse_dot(const csc_matrix_t& Y, i_t y_col) const +{ + const i_t col_start = Y.col_start[y_col]; + const i_t col_end = Y.col_start[y_col + 1]; + const i_t ny = col_end - col_start; + const i_t nx = i.size(); + f_t dot = 0.0; + for (i_t h = 0, k = col_start; h < nx && k < col_end;) { + const i_t p = i[h]; + const i_t q = Y.i[k]; + if (p == q) { + dot += Y.x[k] * x[h]; + h++; + k++; + } else if (p < q) { + h++; + } else if (q < p) { + k++; + } + } + return dot; +} + +template +void sparse_vector_t::sort() +{ + if (i.size() == 1) { return; } + // If the number of nonzeros is large, use a O(n) bucket sort + if (i.size() > 0.3 * n) { + std::vector bucket(n, 0.0); + const i_t nz = i.size(); + for (i_t k = 0; k < nz; ++k) { + bucket[i[k]] = x[k]; + } + i.clear(); + i.reserve(nz); + x.clear(); + x.reserve(nz); + for (i_t k = 0; k < n; ++k) { + if (bucket[k] != 0.0) { + i.push_back(k); + x.push_back(bucket[k]); + } + } + } else { + // Use a n log n sort + const i_t nz = i.size(); + std::vector i_sorted(nz); + std::vector x_sorted(nz); + std::vector perm(nz); + for (i_t k = 0; k < nz; ++k) { + perm[k] = k; + } + std::vector& iunsorted = i; + std::sort( + perm.begin(), perm.end(), [&iunsorted](i_t a, i_t b) { return iunsorted[a] < iunsorted[b]; }); + for (i_t k = 0; k < nz; ++k) { + i_sorted[k] = i[perm[k]]; + x_sorted[k] = x[perm[k]]; + } + i = i_sorted; + x = x_sorted; + } + + // Check +#ifdef CHECK_SORT + for (i_t k = 0; k < i.size() - 1; ++k) { + if (i[k] > i[k + 1]) { printf("Sort error %d %d\n", i[k], i[k + 1]); } + } +#endif +} + +template +f_t sparse_vector_t::norm2_squared() const +{ + f_t dot = 0.0; + const i_t nz = i.size(); + for (i_t k = 0; k < nz; ++k) { + dot += x[k] * x[k]; + } + return dot; +} + +template +void sparse_vector_t::negate() +{ + const i_t nz = x.size(); + for (i_t k = 0; k < nz; ++k) { + x[k] *= -1.0; + } +} + +template +f_t sparse_vector_t::find_coefficient(i_t index) const +{ + const i_t nz = i.size(); + for (i_t k = 0; k < nz; ++k) { + if (i[k] == index) { return x[k]; } + } + return std::numeric_limits::quiet_NaN(); +} + + +#ifdef DUAL_SIMPLEX_INSTANTIATE_DOUBLE +template class sparse_vector_t; +#endif + +} // namespace cuopt::linear_programming::dual_simplex diff --git a/cpp/src/dual_simplex/sparse_vector.hpp b/cpp/src/dual_simplex/sparse_vector.hpp new file mode 100644 index 0000000000..ed8f39a63b --- /dev/null +++ b/cpp/src/dual_simplex/sparse_vector.hpp @@ -0,0 +1,66 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +#include + +namespace cuopt::linear_programming::dual_simplex { + +// A sparse vector stored as a list of nonzero coefficients and their indices +template +class sparse_vector_t { + public: + // Construct a sparse vector of dimension n with nz nonzero coefficients + sparse_vector_t(i_t n, i_t nz) : n(n), i(nz), x(nz) {} + // Construct a sparse vector from a dense vector. + sparse_vector_t(const std::vector& in) + { + from_dense(in); + } + // Construct a sparse vector from a column of a CSC matrix + sparse_vector_t(const csc_matrix_t& A, i_t col); + // gather a dense vector into a sparse vector + void from_dense(const std::vector& in); + // convert a sparse vector into a CSC matrix with a single column + void to_csc(csc_matrix_t& A) const; + // convert a sparse vector into a dense vector. Dense vector is cleared and resized. + void to_dense(std::vector& x_dense) const; + // scatter a sparse vector into a dense vector. Assumes x_dense is already cleared or preinitialized + void scatter(std::vector& x_dense) const; + // inverse permute the current sparse vector + void inverse_permute_vector(const std::vector& p); + // inverse permute a sparse vector into another sparse vector + void inverse_permute_vector(const std::vector& p, sparse_vector_t& y) const; + // compute the dot product of a sparse vector with a column of a CSC matrix + f_t sparse_dot(const csc_matrix_t& Y, i_t y_col) const; + // ensure the coefficients in the sparse vectory are sorted in terms of increasing index + void sort(); + // compute the squared 2-norm of the sparse vector + f_t norm2_squared() const; + void negate(); + f_t find_coefficient(i_t index) const; + + i_t n; + std::vector i; + std::vector x; +}; + +} // namespace cuopt::linear_programming::dual_simplex From efb768c5e9791f235104fe3f8a9fede2a8c9da2a Mon Sep 17 00:00:00 2001 From: Christopher Maes Date: Wed, 23 Jul 2025 13:27:16 -0700 Subject: [PATCH 10/28] More cleanup --- cpp/src/dual_simplex/phase2.cpp | 46 ++++++++++----------------------- 1 file changed, 13 insertions(+), 33 deletions(-) diff --git a/cpp/src/dual_simplex/phase2.cpp b/cpp/src/dual_simplex/phase2.cpp index 071ba085be..2adcb54cb1 100644 --- a/cpp/src/dual_simplex/phase2.cpp +++ b/cpp/src/dual_simplex/phase2.cpp @@ -2103,7 +2103,6 @@ void prepare_optimality(const lp_problem_t& lp, settings.log.printf("Primal infeasibility (abs): %.2e\n", primal_infeas); settings.log.printf("Dual infeasibility (abs): %.2e\n", dual_infeas); settings.log.printf("Perturbation: %.2e\n", perturbation); - settings.log.printf("Max steepest edge norm: %.2e\n", max_val); } else { settings.log.printf("\n"); settings.log.printf( @@ -2341,7 +2340,7 @@ dual::status_t dual_phase2(i_t phase, vector_norm2(delta_y_steepest_edge)); } - if (phase == 2) { settings.log.printf(" Iter Objective Primal Infeas Perturb Time\n"); } + if (phase == 2) { settings.log.printf(" Iter Objective Num Inf. Sum Inf. Perturb Time\n"); } const i_t iter_limit = settings.iteration_limit; std::vector delta_y(m, 0.0); @@ -2372,8 +2371,6 @@ dual::status_t dual_phase2(i_t phase, lp.A.transpose(A_transpose); f_t obj = compute_objective(lp, x); - settings.log.printf("Initial objective %e\n", obj); - const i_t start_iter = iter; i_t sparse_delta_z = 0; @@ -2448,20 +2445,6 @@ dual::status_t dual_phase2(i_t phase, continue; } -#ifdef COMPUTE_BTRANSPOSE_RESIDUAL - { - std::vector res(m); - b_transpose_multiply(lp, basic_list, delta_y, res); - f_t max_err = 0.0; - for (i_t k = 0; k < m; k++) { - const f_t err = std::abs(res[k] - ei[k]); - if (err > 1e-4) { settings.log.printf("BT err %d %e\n", k, err); } - max_err = std::max(max_err, err); - } - printf("BTranspose multiply error %e\n", max_err); - } -#endif - timers.start_timer(); i_t delta_y_nz0 = 0; const i_t nz_delta_y = delta_y_sparse.i.size(); @@ -2817,7 +2800,7 @@ dual::status_t dual_phase2(i_t phase, phase2::compute_perturbation(lp, settings, delta_z_indices, z, objective, sum_perturb); timers.perturb_time += timers.stop_timer(); - // Update basis + // Update basis information vstatus[entering_index] = variable_status_t::BASIC; if (lp.lower[leaving_index] != lp.upper[leaving_index]) { vstatus[leaving_index] = static_cast(-direction); @@ -2832,7 +2815,7 @@ dual::status_t dual_phase2(i_t phase, basic_mark[entering_index] = basic_leaving_index; timers.start_timer(); - // Refactor or Update + // Refactor or update the basis factorization bool should_refactor = ft.num_updates() > settings.refactor_frequency; if (!should_refactor) { i_t recommend_refactor = ft.update(utilde_sparse, UTsol_sparse, basic_leaving_index); @@ -2875,29 +2858,23 @@ dual::status_t dual_phase2(i_t phase, iter++; - // Clear delta_y - //const i_t nz_dy = delta_y_sparse.i.size(); - //for (i_t k = 0; k < nz_dy; ++k) { - // delta_y[delta_y_sparse.i[k]] = 0.0; - //} - + // TODO(CMM): Do we also need to clear delta_y? // Clear delta_z phase2::clear_delta_z(entering_index, leaving_index, delta_z_mark, delta_z_indices, delta_z); - f_t now = toc(start_time); + f_t now = toc(start_time); if ((iter - start_iter) < settings.first_iteration_log || (iter % settings.iteration_log_frequency) == 0) { if (phase == 1 && iter == 1) { - settings.log.printf(" Iter Objective Primal Infeas Perturb Time\n"); + settings.log.printf(" Iter Objective Num Inf. Sum Inf. Perturb Time\n"); } - settings.log.printf("%5d %+.16e %8d %.8e %.2e %.2e %.2f\n", + settings.log.printf("%5d %+.16e %7d %.8e %.2e %.2f\n", iter, compute_user_objective(lp, obj), infeasibility_indices.size(), primal_infeasibility, sum_perturb, - step_length, now); } @@ -2917,9 +2894,12 @@ dual::status_t dual_phase2(i_t phase, if (phase == 2) { timers.print_timers(settings); - settings.log.printf("Sparse delta_z %8d %8.2f%\n", sparse_delta_z, 100.0 * sparse_delta_z / (sparse_delta_z + dense_delta_z)); - settings.log.printf("Dense delta_z %8d %8.2f%\n", dense_delta_z, 100.0 * dense_delta_z / (sparse_delta_z + dense_delta_z)); - ft.print_stats(); + constexpr bool print_stats = false; + if constexpr (print_stats) { + settings.log.printf("Sparse delta_z %8d %8.2f%\n", sparse_delta_z, 100.0 * sparse_delta_z / (sparse_delta_z + dense_delta_z)); + settings.log.printf("Dense delta_z %8d %8.2f%\n", dense_delta_z, 100.0 * dense_delta_z / (sparse_delta_z + dense_delta_z)); + ft.print_stats(); + } } return status; } From 39056651897db53a695838779c5f4df82f9a83b2 Mon Sep 17 00:00:00 2001 From: Christopher Maes Date: Wed, 23 Jul 2025 15:32:51 -0700 Subject: [PATCH 11/28] Formatting --- cpp/src/dual_simplex/basis_updates.cpp | 637 ++++++++---------- cpp/src/dual_simplex/basis_updates.hpp | 105 +-- .../bound_flipping_ratio_test.cpp | 50 +- cpp/src/dual_simplex/phase2.cpp | 533 +++++++-------- cpp/src/dual_simplex/presolve.cpp | 81 ++- .../dual_simplex/simplex_solver_settings.hpp | 2 +- cpp/src/dual_simplex/sparse_matrix.cpp | 10 +- cpp/src/dual_simplex/sparse_matrix.hpp | 11 +- cpp/src/dual_simplex/sparse_vector.cpp | 4 +- cpp/src/dual_simplex/sparse_vector.hpp | 10 +- cpp/src/dual_simplex/triangle_solve.cpp | 12 +- cpp/src/dual_simplex/vector_math.cpp | 36 +- cpp/src/dual_simplex/vector_math.hpp | 15 +- 13 files changed, 707 insertions(+), 799 deletions(-) diff --git a/cpp/src/dual_simplex/basis_updates.cpp b/cpp/src/dual_simplex/basis_updates.cpp index 9e9ea847da..b594f47374 100644 --- a/cpp/src/dual_simplex/basis_updates.cpp +++ b/cpp/src/dual_simplex/basis_updates.cpp @@ -170,9 +170,7 @@ i_t basis_update_t::b_transpose_solve(const sparse_vector_t& if (Ut_error > 1e-6) { printf("|| U' * r - c || %e\n", Ut_error); for (i_t k = 0; k < m; ++k) { - if (std::abs(residual[k]) > 1e-6) { - printf("%d residual %e\n", k, residual[k]); - } + if (std::abs(residual[k]) > 1e-6) { printf("%d residual %e\n", k, residual[k]); } } printf("rhs nz %d\n", rhs.i.size()); } @@ -181,7 +179,6 @@ i_t basis_update_t::b_transpose_solve(const sparse_vector_t& // Solve for w such that L'*w = r l_transpose_solve(r); - // y = P'*w r.inverse_permute_vector(row_permutation_, solution); @@ -195,30 +192,31 @@ i_t basis_update_t::b_transpose_solve(const sparse_vector_t& bool found_error = false; for (i_t k = 0; k < m; ++k) { if (std::abs(solution_dense[k] - solution_dense_permuted[k]) > 1e-6) { - printf("B transpose inverse permutation error %d %e %e\n", k, solution_dense[k], solution_dense_permuted[k]); + printf("B transpose inverse permutation error %d %e %e\n", + k, + solution_dense[k], + solution_dense_permuted[k]); found_error = true; } } if (found_error) { for (i_t k = 0; k < m; ++k) { - printf("%d (sparse -> permuted -> dense) %e (sparse -> dense -> permuted)%e\n", k, solution_dense[k], solution_dense_permuted[k]); + printf("%d (sparse -> permuted -> dense) %e (sparse -> dense -> permuted)%e\n", + k, + solution_dense[k], + solution_dense_permuted[k]); } - for (i_t k = 0; k < solution.i.size(); ++k) - { + for (i_t k = 0; k < solution.i.size(); ++k) { printf("%d solution sparse %d %e\n", k, solution.i[k], solution.x[k]); } for (i_t k = 0; k < m; ++k) { - if (solution_dense[k] != 0.0) { - printf("%d solution dense %e\n", k, solution_dense[k]); - } + if (solution_dense[k] != 0.0) { printf("%d solution dense %e\n", k, solution_dense[k]); } } for (i_t k = 0; k < m; ++k) { printf("inv permutation %d %d\n", k, inverse_row_permutation_[k]); } for (i_t k = 0; k < m; ++k) { - if (r_dense2[k] != 0.0) { - printf("%d r dense %e\n", k, r_dense2[k]); - } + if (r_dense2[k] != 0.0) { printf("%d r dense %e\n", k, r_dense2[k]); } } for (i_t k = 0; k < m; ++k) { if (solution_dense_permuted[k] != 0.0) { @@ -283,8 +281,10 @@ i_t basis_update_t::l_solve(sparse_vector_t& rhs) const csc_matrix_t B(1, 1, 1); rhs.to_csc(B); const i_t m = L0_.m; - i_t top = sparse_triangle_solve(B, 0, std::nullopt, xi_workspace_, L0_, x_workspace_.data()); - solve_to_sparse_vector(top, rhs); // Uses xi_workspace_ and x_workspace_ to fill rhs + + i_t top = sparse_triangle_solve( + B, 0, std::nullopt, xi_workspace_, L0_, x_workspace_.data()); + solve_to_sparse_vector(top, rhs); // Uses xi_workspace_ and x_workspace_ to fill rhs #ifdef CHECK_L_SOLVE std::vector residual(m, 0.0); @@ -298,14 +298,9 @@ i_t basis_update_t::l_solve(sparse_vector_t& rhs) const rhs.to_dense(x0); matrix_vector_multiply(L0_, 1.0, x0, -1.0, residual); const f_t L0_solve_error = vector_norm_inf(residual); - if (L0_solve_error > 1e-10) { - printf("L0 solve error %e\n", L0_solve_error); - } + if (L0_solve_error > 1e-10) { printf("L0 solve error %e\n", L0_solve_error); } #endif - - - // then solve R1^{-1}*x1 = x0 -> x1 = R1*x0 // then solve R2^{-1}*x2 = x1 -> x2 = R2*x1 // until we get to @@ -321,17 +316,15 @@ i_t basis_update_t::l_solve(sparse_vector_t& rhs) const i_t nz = scatter_into_workspace(rhs); for (i_t k = 0; k < num_updates_; ++k) { - const i_t r = pivot_indices_[k]; + const i_t r = pivot_indices_[k]; f_t dot = 0.0; const i_t col_start = S_.col_start[k]; const i_t col_end = S_.col_start[k + 1]; for (i_t p = col_start; p < col_end; ++p) { - if (xi_workspace_[S_.i[p]]) { - dot += S_.x[p] * x_workspace_[S_.i[p]]; - } + if (xi_workspace_[S_.i[p]]) { dot += S_.x[p] * x_workspace_[S_.i[p]]; } } if (!xi_workspace_[r]) { - xi_workspace_[r] = 1; + xi_workspace_[r] = 1; xi_workspace_[m + nz] = r; nz++; } @@ -364,7 +357,6 @@ i_t basis_update_t::l_solve(sparse_vector_t& rhs) const return 0; } - // Solve for y such that L'*y = c template i_t basis_update_t::l_transpose_solve(std::vector& rhs) const @@ -396,9 +388,9 @@ i_t basis_update_t::scatter_into_workspace(const sparse_vector_t::scatter_into_workspace(const sparse_vector_t -void basis_update_t::gather_into_sparse_vector(i_t nz, sparse_vector_t& out) const +void basis_update_t::gather_into_sparse_vector(i_t nz, + sparse_vector_t& out) const { const i_t m = L0_.m; out.i.clear(); @@ -416,12 +409,12 @@ void basis_update_t::gather_into_sparse_vector(i_t nz, sparse_vector_t out.i.resize(nz); out.x.resize(nz); for (i_t k = 0; k < nz; ++k) { - const i_t i = xi_workspace_[m + k]; - out.i[k] = i; - out.x[k] = x_workspace_[i]; + const i_t i = xi_workspace_[m + k]; + out.i[k] = i; + out.x[k] = x_workspace_[i]; xi_workspace_[m + k] = 0; - xi_workspace_[i] = 0; - x_workspace_[i] = 0.0; + xi_workspace_[i] = 0; + x_workspace_[i] = 0.0; } } @@ -429,7 +422,7 @@ template void basis_update_t::solve_to_sparse_vector(i_t top, sparse_vector_t& out) const { const i_t m = L0_.m; - out.n = m; + out.n = m; out.i.clear(); out.x.clear(); const i_t nz = m - top; @@ -437,10 +430,10 @@ void basis_update_t::solve_to_sparse_vector(i_t top, sparse_vector_t::l_transpose_solve(sparse_vector_t& rhs) #ifdef CHECK_UPDATES std::vector multiply; rhs.to_dense(multiply); - for (i_t k = 0; k < 2*m; ++k) { - if (xi_workspace_[k]) { - printf("xi workspace %d %d\n", k, xi_workspace_[k]); - } + for (i_t k = 0; k < 2 * m; ++k) { + if (xi_workspace_[k]) { printf("xi workspace %d %d\n", k, xi_workspace_[k]); } } #endif - if (num_updates_ > 0) { - nz = scatter_into_workspace(rhs); - } + if (num_updates_ > 0) { nz = scatter_into_workspace(rhs); } for (i_t k = num_updates_ - 1; k >= 0; --k) { const i_t r = pivot_indices_[k]; @@ -482,7 +471,7 @@ i_t basis_update_t::l_transpose_solve(sparse_vector_t& rhs) // rhs.x[S_.i[p]] += rhs.x[r] * S_.x[p]; if (!xi_workspace_[S_.i[p]]) { xi_workspace_[S_.i[p]] = 1; - xi_workspace_[m + nz] = S_.i[p]; + xi_workspace_[m + nz] = S_.i[p]; nz++; } x_workspace_[S_.i[p]] += x_workspace_[r] * S_.x[p]; @@ -502,14 +491,13 @@ i_t basis_update_t::l_transpose_solve(sparse_vector_t& rhs) rhs.sort(); #ifdef CHECK_UPDATES - std::vector rhs_dense; - rhs.to_dense(rhs_dense); - for (i_t k = 0; k < m; ++k) - { - if (std::abs(rhs_dense[k] - multiply[k]) > 1e-6) { - printf("rhs dense/multiply error %d %e %e\n", k, rhs_dense[k], multiply[k]); + std::vector rhs_dense; + rhs.to_dense(rhs_dense); + for (i_t k = 0; k < m; ++k) { + if (std::abs(rhs_dense[k] - multiply[k]) > 1e-6) { + printf("rhs dense/multiply error %d %e %e\n", k, rhs_dense[k], multiply[k]); + } } - } #endif } @@ -522,8 +510,9 @@ i_t basis_update_t::l_transpose_solve(sparse_vector_t& rhs) rhs.to_dense(cprime_dense); #endif - i_t top = sparse_triangle_solve(Cprime, 0, std::nullopt, xi_workspace_, L0_transpose_, x_workspace_.data()); - solve_to_sparse_vector(top, rhs); // Uses xi_workspace_ and x_workspace_ to fill rhs + i_t top = sparse_triangle_solve( + Cprime, 0, std::nullopt, xi_workspace_, L0_transpose_, x_workspace_.data()); + solve_to_sparse_vector(top, rhs); // Uses xi_workspace_ and x_workspace_ to fill rhs #ifdef CHECK_LOWER_TRANSPOSE_SOLVE std::vector y_dense; @@ -532,9 +521,7 @@ i_t basis_update_t::l_transpose_solve(sparse_vector_t& rhs) std::vector residual = cprime_dense; matrix_transpose_vector_multiply(L0_, 1.0, y_dense, -1.0, residual); const f_t L0_solve_error = vector_norm_inf(residual); - if (L0_solve_error > 1e-6) { - printf("L0 solve error %e\n", L0_solve_error); - } + if (L0_solve_error > 1e-6) { printf("L0 solve error %e\n", L0_solve_error); } #endif return 0; @@ -607,8 +594,9 @@ i_t basis_update_t::u_solve(sparse_vector_t& rhs) const csc_matrix_t Bprime(1, 1, 1); bprime.to_csc(Bprime); - i_t top = sparse_triangle_solve(Bprime, 0, std::nullopt, xi_workspace_, U_, x_workspace_.data()); - solve_to_sparse_vector(top, rhs); // Uses xi_workspace_ and x_workspace_ to fill rhs + i_t top = sparse_triangle_solve( + Bprime, 0, std::nullopt, xi_workspace_, U_, x_workspace_.data()); + solve_to_sparse_vector(top, rhs); // Uses xi_workspace_ and x_workspace_ to fill rhs rhs.inverse_permute_vector(inverse_col_permutation_); @@ -656,30 +644,34 @@ i_t basis_update_t::u_transpose_solve(sparse_vector_t& rhs) inverse_permute_vector(col_permutation_, rhs_dense, rhs_dense_permuted); for (i_t k = 0; k < m; ++k) { if (std::abs(bprime_dense[k] - rhs_dense_permuted[k]) > 1e-6) { - printf("u_transpose inverse permutation error %d %e %e\n", k, bprime_dense[k], rhs_dense_permuted[k]); + printf("u_transpose inverse permutation error %d %e %e\n", + k, + bprime_dense[k], + rhs_dense_permuted[k]); } } #endif #ifdef CHECK_WORKSPACE - for (i_t k = 0; k < 2*m; ++k) { + for (i_t k = 0; k < 2 * m; ++k) { if (xi_workspace_[k]) { printf("before Utranspose m %d solve xi workspace %d %d\n", m, k, xi_workspace_[k]); } } #endif - // U'*y = bprime csc_matrix_t Bprime(1, 1, 1); bprime.to_csc(Bprime); - i_t top = sparse_triangle_solve(Bprime, 0, std::nullopt, xi_workspace_, U_transpose_, x_workspace_.data()); - solve_to_sparse_vector(top, rhs); // Uses xi_workspace_ and x_workspace_ to fill rhs + i_t top = sparse_triangle_solve( + Bprime, 0, std::nullopt, xi_workspace_, U_transpose_, x_workspace_.data()); + solve_to_sparse_vector(top, rhs); // Uses xi_workspace_ and x_workspace_ to fill rhs #ifdef CHECK_WORKSPACE - for (i_t k = 0; k < 2*m; ++k) { + for (i_t k = 0; k < 2 * m; ++k) { if (xi_workspace_[k]) { - printf("after Utranspose m %d top %d solve xi workspace %d %d\n", m, top, k, xi_workspace_[k]); + printf( + "after Utranspose m %d top %d solve xi workspace %d %d\n", m, top, k, xi_workspace_[k]); } } #endif @@ -689,7 +681,7 @@ i_t basis_update_t::u_transpose_solve(sparse_vector_t& rhs) rhs.to_dense(rhs_dense2); #endif - // Q*y = x + // Q*y = x rhs.inverse_permute_vector(inverse_col_permutation_); #ifdef CHECK_PERMUTATION rhs.to_dense(rhs_dense_permuted); @@ -698,13 +690,19 @@ i_t basis_update_t::u_transpose_solve(sparse_vector_t& rhs) bool found_error = false; for (i_t k = 0; k < m; ++k) { if (std::abs(rhs_dense_permuted[k] - rhs_dense_permuted2[k]) > 1e-6) { - printf("u_transpose2 permutation error %d %e %e\n", k, rhs_dense_permuted[k], rhs_dense_permuted2[k]); + printf("u_transpose2 permutation error %d %e %e\n", + k, + rhs_dense_permuted[k], + rhs_dense_permuted2[k]); found_error = true; } } if (found_error) { for (i_t k = 0; k < m; ++k) { - printf("%d (sparse -> permuted -> dense) %e (sparse -> dense -> permuted)%e\n", k, rhs_dense_permuted[k], rhs_dense_permuted2[k]); + printf("%d (sparse -> permuted -> dense) %e (sparse -> dense -> permuted)%e\n", + k, + rhs_dense_permuted[k], + rhs_dense_permuted2[k]); } for (i_t k = 0; k < rhs.i.size(); ++k) { printf("%d rhs sparse %d %e\n", k, rhs.i[k], rhs.x[k]); @@ -715,11 +713,12 @@ i_t basis_update_t::u_transpose_solve(sparse_vector_t& rhs) } } for (i_t k = 0; k < m; ++k) { - if (rhs_dense2[k] != 0.0) { - printf("%d rhs dense2 %e\n", k, rhs_dense2[k]); - } + if (rhs_dense2[k] != 0.0) { printf("%d rhs dense2 %e\n", k, rhs_dense2[k]); } } - printf("col permutation %d rhs dense 2 %d rhs dense permuted %d\n", col_permutation_.size(), rhs_dense2.size(), rhs_dense_permuted.size()); + printf("col permutation %d rhs dense 2 %d rhs dense permuted %d\n", + col_permutation_.size(), + rhs_dense2.size(), + rhs_dense_permuted.size()); for (i_t k = 0; k < col_permutation_.size(); ++k) { printf("%d col permutation %d\n", k, col_permutation_[k]); } @@ -949,7 +948,7 @@ i_t basis_update_t::update(std::vector& utilde, i_t leaving_index norm_s = update_lower(sind, sval, leaving_index); } -#ifdef PARANOID +#ifdef CHECK_ABAR { sparse_matrix_t abar_test(m, 1, 1); const Int nz = lower_triangular_multiply(U_, m - 1, abar_test, 1); @@ -986,21 +985,19 @@ i_t basis_update_t::multiply_lu(csc_matrix_t& out) out.col_start.resize(m + 1); assert(out.m == m); const i_t nz_estimate = L0_.col_start[m] + U_.col_start[m]; -#if 0 - printf("Nz estimate %d m %d num updates %d\n", nz_estimate, m, num_updates_); - printf("q = ["); - for (Int k = 0; k < m; ++k) - { - printf("%d ", col_permutation_[k]); - } - printf("];\n"); - //PrintMatrix(L0_); - printf("p = ["); - for (Int k = 0; k < m; ++k) - { - printf("%d ", row_permutation_[k]); - } - printf("];\n"); +#ifdef PRINT_PERMUTATIONS + printf("Nz estimate %d m %d num updates %d\n", nz_estimate, m, num_updates_); + printf("q = ["); + for (i_t k = 0; k < m; ++k) { + printf("%d ", col_permutation_[k]); + } + printf("];\n"); + // PrintMatrix(L0_); + printf("p = ["); + for (i_t k = 0; k < m; ++k) { + printf("%d ", row_permutation_[k]); + } + printf("];\n"); #endif out.reallocate(nz_estimate); @@ -1060,16 +1057,14 @@ i_t basis_update_t::lower_triangular_multiply(const csc_matrix_t::lower_triangular_multiply(const csc_matrix_t -void basis_update_mpf_t::gather_into_sparse_vector(i_t nz, sparse_vector_t& out) const +void basis_update_mpf_t::gather_into_sparse_vector(i_t nz, + sparse_vector_t& out) const { const i_t m = L0_.m; - //out.i.clear(); - //out.x.clear(); out.i.resize(nz); out.x.resize(nz); for (i_t k = 0; k < nz; ++k) { - const i_t i = xi_workspace_[m + k]; - out.i[k] = i; - out.x[k] = x_workspace_[i]; + const i_t i = xi_workspace_[m + k]; + out.i[k] = i; + out.x[k] = x_workspace_[i]; xi_workspace_[m + k] = 0; - xi_workspace_[i] = 0; - x_workspace_[i] = 0.0; + xi_workspace_[i] = 0; + x_workspace_[i] = 0.0; } } @@ -1153,38 +1146,34 @@ template void basis_update_mpf_t::solve_to_workspace(i_t top) const { const i_t m = L0_.m; - i_t nz = 0; - for (i_t p = top; p < m; ++p) - { - const i_t i = xi_workspace_[p]; + i_t nz = 0; + for (i_t p = top; p < m; ++p) { + const i_t i = xi_workspace_[p]; xi_workspace_[m + nz] = i; - xi_workspace_[p] = 0; + xi_workspace_[p] = 0; nz++; } - for (i_t k = 0; k < nz; ++k) - { - const i_t i = xi_workspace_[m + k]; + for (i_t k = 0; k < nz; ++k) { + const i_t i = xi_workspace_[m + k]; xi_workspace_[i] = 1; } } - template -void basis_update_mpf_t::solve_to_sparse_vector(i_t top, sparse_vector_t& out) const +void basis_update_mpf_t::solve_to_sparse_vector(i_t top, + sparse_vector_t& out) const { - const i_t m = L0_.m; - out.n = m; - //out.i.clear(); - //out.x.clear(); + const i_t m = L0_.m; + out.n = m; const i_t nz = m - top; out.x.resize(nz); out.i.resize(nz); i_t k = 0; for (i_t p = top; p < m; ++p) { - const i_t i = xi_workspace_[p]; - out.i[k] = i; - out.x[k] = x_workspace_[i]; - x_workspace_[i] = 0.0; + const i_t i = xi_workspace_[p]; + out.i[k] = i; + out.x[k] = x_workspace_[i]; + x_workspace_[i] = 0.0; xi_workspace_[p] = 0; k++; } @@ -1197,9 +1186,9 @@ i_t basis_update_mpf_t::scatter_into_workspace(const sparse_vector_t::scatter_into_workspace(const sparse_vector_t void basis_update_mpf_t::grow_storage(i_t nz, i_t& S_start, i_t& S_nz) { - const i_t last_S_col = num_updates_ * 2; + const i_t last_S_col = num_updates_ * 2; const i_t new_last_S_col = last_S_col + 2; - if (new_last_S_col >= S_.col_start.size()) - { + if (new_last_S_col >= S_.col_start.size()) { S_.col_start.resize(new_last_S_col + refactor_frequency_); } S_nz = S_.col_start[last_S_col]; - if (S_nz + nz > S_.i.size()) - { + if (S_nz + nz > S_.i.size()) { S_.i.resize(std::max(2 * S_nz, S_nz + nz)); S_.x.resize(std::max(2 * S_nz, S_nz + nz)); } S_start = last_S_col; } - template -i_t basis_update_mpf_t::b_transpose_solve(const std::vector& rhs, std::vector& solution) const +i_t basis_update_mpf_t::b_transpose_solve(const std::vector& rhs, + std::vector& solution) const { std::vector UTsol; return b_transpose_solve(rhs, solution, UTsol); } template -i_t basis_update_mpf_t::b_transpose_solve(const std::vector& rhs, std::vector& solution, std::vector& UTsol) const +i_t basis_update_mpf_t::b_transpose_solve(const std::vector& rhs, + std::vector& solution, + std::vector& UTsol) const { const i_t m = L0_.m; // P*B = L*U @@ -1263,14 +1252,17 @@ i_t basis_update_mpf_t::b_transpose_solve(const std::vector& rhs, } template -i_t basis_update_mpf_t::b_transpose_solve(const sparse_vector_t& rhs, sparse_vector_t& solution) const +i_t basis_update_mpf_t::b_transpose_solve(const sparse_vector_t& rhs, + sparse_vector_t& solution) const { sparse_vector_t UTsol(1, 0); return b_transpose_solve(rhs, solution, UTsol); } template -i_t basis_update_mpf_t::b_transpose_solve(const sparse_vector_t& rhs, sparse_vector_t& solution, sparse_vector_t& UTsol) const +i_t basis_update_mpf_t::b_transpose_solve(const sparse_vector_t& rhs, + sparse_vector_t& solution, + sparse_vector_t& UTsol) const { // Solve for r such that U'*r = c @@ -1280,8 +1272,7 @@ i_t basis_update_mpf_t::b_transpose_solve(const sparse_vector_t solution_dense; rhs.to_dense(solution_dense); u_transpose_solve(solution_dense); @@ -1297,8 +1288,7 @@ i_t basis_update_mpf_t::b_transpose_solve(const sparse_vector_t(rhs_dense) > 1e-10) - { + if (vector_norm_inf(rhs_dense) > 1e-10) { printf("B transpose solve U transpose residual %e\n", vector_norm_inf(rhs_dense)); } #endif @@ -1312,8 +1302,7 @@ i_t basis_update_mpf_t::b_transpose_solve(const sparse_vector_t solution_dense; solution.to_dense(solution_dense); l_transpose_solve(solution_dense); @@ -1326,27 +1315,26 @@ i_t basis_update_mpf_t::b_transpose_solve(const sparse_vector_t 1e-4) - { - printf("B transpose solve L transpose solve error %e: index %d multiply %e rhs %e. update %d\n", std::abs(solution_dense[k] - r_dense[k]), k, solution_dense[k], r_dense[k], num_updates_); + for (i_t k = 0; k < L0_.m; ++k) { + if (std::abs(solution_dense[k] - r_dense[k]) > 1e-4) { + printf( + "B transpose solve L transpose solve error %e: index %d multiply %e rhs %e. update %d\n", + std::abs(solution_dense[k] - r_dense[k]), + k, + solution_dense[k], + r_dense[k], + num_updates_); } max_error = std::max(max_error, std::abs(solution_dense[k] - r_dense[k])); } - if (max_error > 1e-4) - { - printf("B transpose solve L transpose solve residual %e\n", max_error); - } + if (max_error > 1e-4) { printf("B transpose solve L transpose solve residual %e\n", max_error); } #endif // Compute y = P'*w solution.inverse_permute_vector(row_permutation_); return 0; } - - template i_t basis_update_mpf_t::u_transpose_solve(std::vector& rhs) const { @@ -1362,9 +1350,10 @@ i_t basis_update_mpf_t::u_transpose_solve(sparse_vector_t& r const i_t m = L0_.m; // U0'*x = y // Solve U0'*x0 = y - //csc_matrix_t B(m, 1, 0); + // csc_matrix_t B(m, 1, 0); rhs.to_csc(B_); - i_t top = sparse_triangle_solve(B_, 0, std::nullopt, xi_workspace_, U0_transpose_, x_workspace_.data()); + i_t top = sparse_triangle_solve( + B_, 0, std::nullopt, xi_workspace_, U0_transpose_, x_workspace_.data()); solve_to_sparse_vector(top, rhs); return 0; } @@ -1379,26 +1368,23 @@ i_t basis_update_mpf_t::l_transpose_solve(std::vector& rhs) const // L0^T *x = T_0^-T * T_1^-T * ... * T_{num_updates_ - 1}^-T * b = b' // Compute b' - for (i_t k = num_updates_ - 1; k >= 0; --k) - { + for (i_t k = num_updates_ - 1; k >= 0; --k) { // T_k^{-T} = ( I - v u^T/(1 + u^T v)) // T_k^{-T} * b = b - v * (u^T * b) / (1 + u^T * v) = b - theta * v, theta = u^T b / mu const i_t u_col = 2 * k; const i_t v_col = 2 * k + 1; - const f_t mu = mu_values_[k]; + const f_t mu = mu_values_[k]; // dot = u^T * b f_t dot = 0.0; - for (i_t p = S_.col_start[u_col]; p < S_.col_start[u_col + 1]; ++p) - { + for (i_t p = S_.col_start[u_col]; p < S_.col_start[u_col + 1]; ++p) { const i_t i = S_.i[p]; dot += S_.x[p] * rhs[i]; } const f_t theta = dot / mu; - for (i_t p = S_.col_start[v_col]; p < S_.col_start[v_col + 1]; ++p) - { + for (i_t p = S_.col_start[v_col]; p < S_.col_start[v_col + 1]; ++p) { const i_t i = S_.i[p]; rhs[i] -= theta * S_.x[p]; } @@ -1425,48 +1411,40 @@ i_t basis_update_mpf_t::l_transpose_solve(sparse_vector_t& r std::vector rhs_dense_0; rhs.to_dense(rhs_dense_0); #endif - // Compute b' - for (i_t k = num_updates_ - 1; k >= 0; --k) - { + // Compute b' + for (i_t k = num_updates_ - 1; k >= 0; --k) { // T_k^{-T} = ( I - v u^T/(1 + u^T v)) // T_k^{-T} * b = b - v * (u^T * b) / (1 + u^T * v) = b - theta * v, theta = u^T b / mu const i_t u_col = 2 * k; const i_t v_col = 2 * k + 1; - const f_t mu = mu_values_[k]; + const f_t mu = mu_values_[k]; // dot = u^T * b f_t dot = 0.0; - for (i_t p = S_.col_start[u_col]; p < S_.col_start[u_col + 1]; ++p) - { + for (i_t p = S_.col_start[u_col]; p < S_.col_start[u_col + 1]; ++p) { const i_t i = S_.i[p]; - if (xi_workspace_[i]) { - dot += S_.x[p] * x_workspace_[i]; - } + if (xi_workspace_[i]) { dot += S_.x[p] * x_workspace_[i]; } } #ifdef CHECK_MULTIPLY f_t dot_check = 0.0; - for (i_t p = S_.col_start[u_col]; p < S_.col_start[u_col + 1]; ++p) - { + for (i_t p = S_.col_start[u_col]; p < S_.col_start[u_col + 1]; ++p) { const i_t i = S_.i[p]; dot_check += S_.x[p] * rhs_dense_0[i]; } - if (std::abs(dot - dot_check) > 1e-10) - { + if (std::abs(dot - dot_check) > 1e-10) { printf("L transpose solve dot erorr: index %d dot %e dot check %e\n", k, dot, dot_check); } #endif const f_t theta = dot / mu; - for (i_t p = S_.col_start[v_col]; p < S_.col_start[v_col + 1]; ++p) - { + for (i_t p = S_.col_start[v_col]; p < S_.col_start[v_col + 1]; ++p) { const i_t i = S_.i[p]; - if (!xi_workspace_[i]) - { + if (!xi_workspace_[i]) { // Fill occured - xi_workspace_[i] = 1; + xi_workspace_[i] = 1; xi_workspace_[m + nz] = i; nz++; } @@ -1474,8 +1452,7 @@ i_t basis_update_mpf_t::l_transpose_solve(sparse_vector_t& r } #ifdef CHECK_MULTIPLY - for (i_t p = S_.col_start[v_col]; p < S_.col_start[v_col + 1]; ++p) - { + for (i_t p = S_.col_start[v_col]; p < S_.col_start[v_col + 1]; ++p) { const i_t i = S_.i[p]; rhs_dense_0[i] -= theta * S_.x[p]; } @@ -1483,17 +1460,17 @@ i_t basis_update_mpf_t::l_transpose_solve(sparse_vector_t& r } #ifdef CHECK_MULTIPLY - for (i_t i = 0; i < m; ++i) - { - if (std::abs(rhs_dense_0[i] - x_workspace_[i]) > 1e-9) - { - printf("L transpose solve multiply error %e index %d sparse %e dense %e\n", std::abs(rhs_dense_0[i] - x_workspace_[i]), i, x_workspace_[i], rhs_dense_0[i]); + for (i_t i = 0; i < m; ++i) { + if (std::abs(rhs_dense_0[i] - x_workspace_[i]) > 1e-9) { + printf("L transpose solve multiply error %e index %d sparse %e dense %e\n", + std::abs(rhs_dense_0[i] - x_workspace_[i]), + i, + x_workspace_[i], + rhs_dense_0[i]); } } #endif - - // sort the indices and place into a sparse column std::sort(xi_workspace_.begin() + m, xi_workspace_.begin() + m + nz, std::less()); @@ -1503,21 +1480,21 @@ i_t basis_update_mpf_t::l_transpose_solve(sparse_vector_t& r B_.i.resize(nz); B_.x.resize(nz); i_t b_nz = 0; - for (i_t k = 0; k < nz; ++k) - { - const i_t i = xi_workspace_[m + k]; - const f_t b_val = x_workspace_[i]; - x_workspace_[i] = 0.0; + for (i_t k = 0; k < nz; ++k) { + const i_t i = xi_workspace_[m + k]; + const f_t b_val = x_workspace_[i]; + x_workspace_[i] = 0.0; xi_workspace_[m + k] = 0; - xi_workspace_[i] = 0; - B_.i[b_nz] = i; - B_.x[b_nz] = b_val; + xi_workspace_[i] = 0; + B_.i[b_nz] = i; + B_.x[b_nz] = b_val; b_nz++; } B_.col_start[0] = 0; B_.col_start[1] = b_nz; - i_t top = sparse_triangle_solve(B_, 0, std::nullopt, xi_workspace_, L0_transpose_, x_workspace_.data()); + i_t top = sparse_triangle_solve( + B_, 0, std::nullopt, xi_workspace_, L0_transpose_, x_workspace_.data()); solve_to_sparse_vector(top, rhs); #ifdef CHECK_SPARSE_SOLVE @@ -1525,14 +1502,12 @@ i_t basis_update_mpf_t::l_transpose_solve(sparse_vector_t& r rhs.to_dense(rhs_dense); std::vector b_dense(m, 0.0); - for (i_t p = 0; p < nz; ++p) - { + for (i_t p = 0; p < nz; ++p) { const i_t i = B.i[p]; - b_dense[i] = B.x[p]; + b_dense[i] = B.x[p]; } matrix_vector_multiply(L0_transpose_, 1.0, rhs_dense, -1.0, b_dense); - if (vector_norm_inf(b_dense) > 1e-9) - { + if (vector_norm_inf(b_dense) > 1e-9) { printf("L0 transpose solve residual %e\n", vector_norm_inf(b_dense)); } #endif @@ -1540,9 +1515,9 @@ i_t basis_update_mpf_t::l_transpose_solve(sparse_vector_t& r return 0; } - template -i_t basis_update_mpf_t::b_solve(const std::vector& rhs, std::vector& solution) const +i_t basis_update_mpf_t::b_solve(const std::vector& rhs, + std::vector& solution) const { const i_t m = L0_.m; std::vector Lsol(m); @@ -1551,7 +1526,9 @@ i_t basis_update_mpf_t::b_solve(const std::vector& rhs, std::vect // Solve for x such that B*x = y template -i_t basis_update_mpf_t::b_solve(const std::vector& rhs, std::vector& solution, std::vector& Lsol) const +i_t basis_update_mpf_t::b_solve(const std::vector& rhs, + std::vector& solution, + std::vector& Lsol) const { const i_t m = L0_.m; // P*B = L*U @@ -1572,10 +1549,9 @@ i_t basis_update_mpf_t::b_solve(const std::vector& rhs, std::vect std::vector Lsol_check = Lsol; l_multiply(Lsol_check); f_t max_lsol_err = 0.0; - for (i_t k = 0; k < m; ++k) - { + for (i_t k = 0; k < m; ++k) { const f_t err = std::abs(Lsol_check[k] - rhs_permuted[k]); - max_lsol_err = std::max(max_lsol_err, err); + max_lsol_err = std::max(max_lsol_err, err); } printf("B solve L multiply error %e\n", max_lsol_err); #endif @@ -1593,17 +1569,20 @@ i_t basis_update_mpf_t::b_solve(const std::vector& rhs, std::vect } template -i_t basis_update_mpf_t::b_solve(const sparse_vector_t& rhs, sparse_vector_t& solution) const +i_t basis_update_mpf_t::b_solve(const sparse_vector_t& rhs, + sparse_vector_t& solution) const { sparse_vector_t Lsol(1, 0); return b_solve(rhs, solution, Lsol); } template -i_t basis_update_mpf_t::b_solve(const sparse_vector_t& rhs, sparse_vector_t& solution, sparse_vector_t& Lsol) const +i_t basis_update_mpf_t::b_solve(const sparse_vector_t& rhs, + sparse_vector_t& solution, + sparse_vector_t& Lsol) const { const i_t m = L0_.m; - solution = rhs; + solution = rhs; solution.inverse_permute_vector(inverse_row_permutation_); #ifdef CHECK_PERMUTATION @@ -1614,16 +1593,13 @@ i_t basis_update_mpf_t::b_solve(const sparse_vector_t& rhs, std::vector solution_dense2; solution.to_dense(solution_dense2); - for (i_t k = 0; k < m; ++k) - { - if (finish_perm[k] != solution_dense2[k]) - { + for (i_t k = 0; k < m; ++k) { + if (finish_perm[k] != solution_dense2[k]) { printf("B solve permutation error %e %e %d\n", finish_perm[k], solution_dense2[k], k); } } #endif - #ifdef CHECK_L_SOLVE std::vector l_solve_rhs; solution.to_dense(l_solve_rhs); @@ -1634,8 +1610,7 @@ i_t basis_update_mpf_t::b_solve(const sparse_vector_t& rhs, estimate_solution_density(input_size, sum_L_, num_calls_L_, use_hypersparse); if (use_hypersparse) { l_solve(solution); - } - else { + } else { std::vector solution_dense; solution.to_dense(solution_dense); l_solve(solution_dense); @@ -1650,15 +1625,11 @@ i_t basis_update_mpf_t::b_solve(const sparse_vector_t& rhs, l_multiply(l_solve_dense); f_t max_err_l_solve = 0.0; - for (i_t k = 0; k < m; ++k) - { - const f_t err = std::abs(l_solve_dense[k] - l_solve_rhs[k]); + for (i_t k = 0; k < m; ++k) { + const f_t err = std::abs(l_solve_dense[k] - l_solve_rhs[k]); max_err_l_solve = std::max(max_err_l_solve, err); } - if (max_err_l_solve > 1e-9) - { - printf("B solve L solve residual %e\n", max_err_l_solve); - } + if (max_err_l_solve > 1e-9) { printf("B solve L solve residual %e\n", max_err_l_solve); } #endif #ifdef CHECK_U_SOLVE @@ -1670,8 +1641,7 @@ i_t basis_update_mpf_t::b_solve(const sparse_vector_t& rhs, estimate_solution_density(rhs_size, sum_U_, num_calls_U_, use_hypersparse); if (use_hypersparse) { u_solve(solution); - } - else { + } else { std::vector solution_dense; solution.to_dense(solution_dense); u_solve(solution_dense); @@ -1686,10 +1656,7 @@ i_t basis_update_mpf_t::b_solve(const sparse_vector_t& rhs, matrix_vector_multiply(U0_, 1.0, solution_dense, -1.0, rhs_dense); const f_t max_err = vector_norm_inf(rhs_dense); - if (max_err > 1e-9) - { - printf("B solve U0 solve residual %e\n", max_err); - } + if (max_err > 1e-9) { printf("B solve U0 solve residual %e\n", max_err); } #endif return 0; } @@ -1713,9 +1680,10 @@ i_t basis_update_mpf_t::u_solve(sparse_vector_t& rhs) const // U*x = y // Solve U0*x = y - //csc_matrix_t B(m, 1, 0); + // csc_matrix_t B(m, 1, 0); rhs.to_csc(B_); - i_t top = sparse_triangle_solve(B_, 0, std::nullopt, xi_workspace_, U0_, x_workspace_.data()); + i_t top = sparse_triangle_solve( + B_, 0, std::nullopt, xi_workspace_, U0_, x_workspace_.data()); solve_to_sparse_vector(top, rhs); return 0; @@ -1746,24 +1714,22 @@ i_t basis_update_mpf_t::l_solve(std::vector& rhs) const // Then T0 * T1 * ... * T_{num_updates_ - 1} * x = x0 // Or x = T_{num_updates}^{-1} * T_1^{-1} * T_0^{-1} x0 - for (i_t k = 0; k < num_updates_; ++k) - { + for (i_t k = 0; k < num_updates_; ++k) { // T = I + u*v^T // T^{-1} = I - u*v^T / (1 + v^T*u) - // T^{-1} * x = x - u*v^T * x / (1 + v^T*u) = x - theta * u, theta = v^T * x / (1 + v^T*u) = v^T x / mu - const f_t mu = mu_values_[k]; + // T^{-1} * x = x - u*v^T * x / (1 + v^T*u) = x - theta * u, theta = v^T * x / (1 + v^T*u) = v^T + // x / mu + const f_t mu = mu_values_[k]; const i_t u_col = 2 * k; const i_t v_col = 2 * k + 1; - f_t dot = 0.0; - for (i_t p = S_.col_start[v_col]; p < S_.col_start[v_col + 1]; ++p) - { + f_t dot = 0.0; + for (i_t p = S_.col_start[v_col]; p < S_.col_start[v_col + 1]; ++p) { const i_t i = S_.i[p]; dot += S_.x[p] * rhs[i]; } const f_t theta = dot / mu; - for (i_t p = S_.col_start[u_col]; p < S_.col_start[u_col + 1]; ++p) - { + for (i_t p = S_.col_start[u_col]; p < S_.col_start[u_col + 1]; ++p) { const i_t i = S_.i[p]; rhs[i] -= theta * S_.x[p]; } @@ -1773,10 +1739,9 @@ i_t basis_update_mpf_t::l_solve(std::vector& rhs) const std::vector inout = rhs; l_multiply(inout); f_t err_max = 0.0; - for (i_t k = 0; k < m; ++k) - { + for (i_t k = 0; k < m; ++k) { const f_t err = std::abs(inout[k] - rhs_check[k]); - err_max = std::max(err_max, err); + err_max = std::max(err_max, err); } printf("L solve residual %e\n", err_max); #endif @@ -1793,45 +1758,40 @@ i_t basis_update_mpf_t::l_solve(sparse_vector_t& rhs) const // L0 * T0 * T1 * ... * T_{num_updates_ - 1} * x = y // First solve L0*x0 = y - //csc_matrix_t B(m, 1, 0); + // csc_matrix_t B(m, 1, 0); rhs.to_csc(B_); - i_t top = sparse_triangle_solve(B_, 0, std::nullopt, xi_workspace_, L0_, x_workspace_.data()); - solve_to_workspace(top); // Uses xi_workspace_ and x_workspace_ to fill rhs + i_t top = sparse_triangle_solve( + B_, 0, std::nullopt, xi_workspace_, L0_, x_workspace_.data()); + solve_to_workspace(top); // Uses xi_workspace_ and x_workspace_ to fill rhs i_t nz = m - top; // Then T0 * T1 * ... * T_{num_updates_ - 1} * x = x0 // Or x = T_{num_updates}^{-1} * T_1^{-1} * T_0^{-1} x0 - for (i_t k = 0; k < num_updates_; ++k) - { + for (i_t k = 0; k < num_updates_; ++k) { // T = I + u*v^T // T^{-1} = I - u*v^T / (1 + v^T*u) - // T^{-1} * x = x - u*v^T * x / (1 + v^T*u) = x - theta * u, theta = v^T * x / (1 + v^T*u) = v^T x / mu - const f_t mu = mu_values_[k]; + // T^{-1} * x = x - u*v^T * x / (1 + v^T*u) = x - theta * u, theta = v^T * x / (1 + v^T*u) = v^T + // x / mu + const f_t mu = mu_values_[k]; const i_t u_col = 2 * k; const i_t v_col = 2 * k + 1; f_t dot = 0.0; - for (i_t p = S_.col_start[v_col]; p < S_.col_start[v_col + 1]; ++p) - { + for (i_t p = S_.col_start[v_col]; p < S_.col_start[v_col + 1]; ++p) { const i_t i = S_.i[p]; - if (xi_workspace_[i]) - { - dot += S_.x[p] * x_workspace_[i]; - } + if (xi_workspace_[i]) { dot += S_.x[p] * x_workspace_[i]; } } const f_t theta = dot / mu; - for (i_t p = S_.col_start[u_col]; p < S_.col_start[u_col + 1]; ++p) - { + for (i_t p = S_.col_start[u_col]; p < S_.col_start[u_col + 1]; ++p) { const i_t i = S_.i[p]; - if (!xi_workspace_[i]) - { + if (!xi_workspace_[i]) { // Fill occured - xi_workspace_[i] = 1; + xi_workspace_[i] = 1; xi_workspace_[m + nz] = i; nz++; } - x_workspace_[i] -= theta * S_.x[p]; + x_workspace_[i] -= theta * S_.x[p]; } } @@ -1848,55 +1808,44 @@ i_t basis_update_mpf_t::update(const std::vector& utilde, i_t leaving_index) { const i_t m = L0_.m; -#if 0 +#ifdef PRINT_NUM_UPDATES printf("Update: num_updates_ %d\n", num_updates_); #endif // We are going to create a new matrix T = I + u*v^T const i_t col_start = U0_.col_start[leaving_index]; const i_t col_end = U0_.col_start[leaving_index + 1]; - std::vector u = utilde; + std::vector u = utilde; // u = utilde - U0(:, leaving_index) - for (i_t p = col_start; p < col_end; ++p) - { + for (i_t p = col_start; p < col_end; ++p) { const i_t i = U0_.i[p]; u[i] -= U0_.x[p]; } i_t u_nz = 0; - for (i_t i = 0; i < m; ++i) - { - if (u[i] != 0.0) - { - u_nz++; - } + for (i_t i = 0; i < m; ++i) { + if (u[i] != 0.0) { u_nz++; } } // v = etilde i_t v_nz = 0; - for (i_t i = 0; i < m; ++i) - { - if (etilde[i] != 0.0) - { - v_nz++; - } + for (i_t i = 0; i < m; ++i) { + if (etilde[i] != 0.0) { v_nz++; } } i_t nz = u_nz + v_nz; i_t S_start; i_t S_nz; grow_storage(nz, S_start, S_nz); -#if 0 +#ifdef PRINT_S_INFO printf("Update: S_start %d S_nz %d\n", S_start, S_nz); #endif i_t S_nz_start = S_nz; // Scatter u into S - for (i_t i = 0; i < m; ++i) - { - if (u[i] != 0.0) - { + for (i_t i = 0; i < m; ++i) { + if (u[i] != 0.0) { S_.i[S_nz] = i; S_.x[S_nz] = u[i]; S_nz++; @@ -1905,10 +1854,8 @@ i_t basis_update_mpf_t::update(const std::vector& utilde, S_.col_start[S_start + 1] = S_nz; // Scatter v into S - for (i_t i = 0; i < m; ++i) - { - if (etilde[i] != 0.0) - { + for (i_t i = 0; i < m; ++i) { + if (etilde[i] != 0.0) { S_.i[S_nz] = i; S_.x[S_nz] = etilde[i]; S_nz++; @@ -1916,14 +1863,13 @@ i_t basis_update_mpf_t::update(const std::vector& utilde, } S_.col_start[S_start + 2] = S_nz; - // Compute mu = 1 + v^T * u const f_t mu = 1.0 + sparse_dot(S_.i.data() + S_.col_start[S_start], - S_.x.data() + S_.col_start[S_start], - S_.col_start[S_start + 1] - S_.col_start[S_start], - S_.i.data() + S_.col_start[S_start + 1], - S_.x.data() + S_.col_start[S_start + 1], - v_nz); + S_.x.data() + S_.col_start[S_start], + S_.col_start[S_start + 1] - S_.col_start[S_start], + S_.i.data() + S_.col_start[S_start + 1], + S_.x.data() + S_.col_start[S_start + 1], + v_nz); #ifdef CHECK_MU const f_t mu_check = 1.0 + dot(etilde, u); @@ -1931,8 +1877,11 @@ i_t basis_update_mpf_t::update(const std::vector& utilde, #endif mu_values_.push_back(mu); -#if 0 - printf("Update mu %e u nz %d v nz %d\n", mu_values_.back(), S_.col_start[S_start + 1] - S_.col_start[S_start], S_.col_start[S_start + 2] - S_.col_start[S_start + 1]); +#ifdef PRINT_MU_INFO + printf("Update mu %e u nz %d v nz %d\n", + mu_values_.back(), + S_.col_start[S_start + 1] - S_.col_start[S_start], + S_.col_start[S_start + 2] - S_.col_start[S_start + 1]); #endif num_updates_++; @@ -1946,7 +1895,7 @@ i_t basis_update_mpf_t::update(const sparse_vector_t& utilde i_t leaving_index) { const i_t m = L0_.m; -#if 0 +#ifdef PRINT_NUM_UPDATES printf("Update: num_updates_ %d\n", num_updates_); #endif @@ -1978,42 +1927,31 @@ i_t basis_update_mpf_t::update(const sparse_vector_t& utilde i_t S_nz; grow_storage(nz + etilde.i.size(), S_start, S_nz); - i_t small_count = 0; - for (i_t k = 0; k < nz; ++k) { - const i_t i = xi_workspace_[m + k]; - const f_t x_val = x_workspace_[i]; + const i_t i = xi_workspace_[m + k]; + const f_t x_val = x_workspace_[i]; xi_workspace_[i] = 0; x_workspace_[i] = 0.0; xi_workspace_[m + k] = 0; - if (x_val == 0.0) { - continue; - } - S_.i[S_nz] = i; - S_.x[S_nz] = x_val; + if (x_val == 0.0) { continue; } + S_.i[S_nz] = i; + S_.x[S_nz] = x_val; S_nz++; } S_.col_start[S_start + 1] = S_nz; - //printf("small count u percentage %.2f\n", small_count / (static_cast(nz)) * 100.0); - small_count = 0; - // Gather etilde into a column of S + // Gather etilde into a column of S etilde.sort(); const i_t etilde_nz = etilde.i.size(); for (i_t k = 0; k < etilde_nz; ++k) { - if (etilde.x[k] == 0.0) { - continue; - } + if (etilde.x[k] == 0.0) { continue; } S_.i[S_nz] = etilde.i[k]; S_.x[S_nz] = etilde.x[k]; S_nz++; } S_.col_start[S_start + 2] = S_nz; - //printf("small count etilde percentage %.2f\n", small_count / (static_cast(etilde_nz)) * 100.0); - - // Compute mu = 1 + v^T * u mu_values_.push_back(1.0 + sparse_dot(S_.i.data() + S_.col_start[S_start], S_.x.data() + S_.col_start[S_start], @@ -2021,8 +1959,11 @@ i_t basis_update_mpf_t::update(const sparse_vector_t& utilde S_.i.data() + S_.col_start[S_start + 1], S_.x.data() + S_.col_start[S_start + 1], S_.col_start[S_start + 2] - S_.col_start[S_start + 1])); -#if 0 - printf("Update mu %e u nz %d v nz %d\n", mu_values_.back(), S_.col_start[S_start + 1] - S_.col_start[S_start], S_.col_start[S_start + 2] - S_.col_start[S_start + 1]);printf("Update mu %e\n", mu_values_.back()); +#ifdef PRINT_MU_INFO + printf("Update mu %e u nz %d v nz %d\n", + mu_values_.back(), + S_.col_start[S_start + 1] - S_.col_start[S_start], + S_.col_start[S_start + 2] - S_.col_start[S_start + 1]); #endif num_updates_++; @@ -2037,24 +1978,21 @@ void basis_update_mpf_t::l_multiply(std::vector& inout) const // L*x = y // L0 * T0 * T1 * ... * T_{num_updates_ - 1} * x = y - for (i_t k = num_updates_ - 1; k >= 0; --k) - { + for (i_t k = num_updates_ - 1; k >= 0; --k) { // T_k = ( I + u v^T) // T_k * b = b + u * (v^T * b) = b + theta * u, theta = v^T b const i_t u_col = 2 * k; const i_t v_col = 2 * k + 1; - const f_t mu = mu_values_[k]; + const f_t mu = mu_values_[k]; // dot = v^T b f_t dot = 0.0; - for (i_t p = S_.col_start[v_col]; p < S_.col_start[v_col + 1]; ++p) - { + for (i_t p = S_.col_start[v_col]; p < S_.col_start[v_col + 1]; ++p) { const i_t i = S_.i[p]; dot += S_.x[p] * inout[i]; } const f_t theta = dot; - for (i_t p = S_.col_start[u_col]; p < S_.col_start[u_col + 1]; ++p) - { + for (i_t p = S_.col_start[u_col]; p < S_.col_start[u_col + 1]; ++p) { const i_t i = S_.i[p]; inout[i] += theta * S_.x[p]; } @@ -2074,24 +2012,21 @@ void basis_update_mpf_t::l_transpose_multiply(std::vector& inout) inout = out; - for (i_t k = 0; k < num_updates_; ++k) - { + for (i_t k = 0; k < num_updates_; ++k) { const i_t u_col = 2 * k; const i_t v_col = 2 * k + 1; - const f_t mu = mu_values_[k]; + const f_t mu = mu_values_[k]; // T_k = ( I + u v^T) // T_k^T = ( I + v u^T) // T_k^T * b = b + v * (u^T * b) = b + theta * v, theta = u^T * b f_t dot = 0.0; - for (i_t p = S_.col_start[u_col]; p < S_.col_start[u_col + 1]; ++p) - { + for (i_t p = S_.col_start[u_col]; p < S_.col_start[u_col + 1]; ++p) { const i_t i = S_.i[p]; dot += S_.x[p] * inout[i]; } const f_t theta = dot; - for (i_t p = S_.col_start[v_col]; p < S_.col_start[v_col + 1]; ++p) - { + for (i_t p = S_.col_start[v_col]; p < S_.col_start[v_col + 1]; ++p) { const i_t i = S_.i[p]; inout[i] += theta * S_.x[p]; } @@ -2101,7 +2036,6 @@ void basis_update_mpf_t::l_transpose_multiply(std::vector& inout) template void basis_update_mpf_t::multiply_lu(csc_matrix_t& out) const { - // P*B = L*U // B = P'*L*U const i_t m = L0_.m; @@ -2113,22 +2047,18 @@ void basis_update_mpf_t::multiply_lu(csc_matrix_t& out) cons i_t B_nz = 0; - for (i_t j = 0; j < m; ++j) - { + for (i_t j = 0; j < m; ++j) { // B(:, j) = L*U(:, j) out.col_start[j] = B_nz; std::vector Uj(m, 0.0); - for (i_t p = U0_.col_start[j]; p < U0_.col_start[j + 1]; ++p) - { + for (i_t p = U0_.col_start[j]; p < U0_.col_start[j + 1]; ++p) { const i_t i = U0_.i[p]; - Uj[i] = U0_.x[p]; + Uj[i] = U0_.x[p]; } l_multiply(Uj); - for (i_t i = 0; i < m; ++i) - { - if (Uj[i] != 0.0) - { + for (i_t i = 0; i < m; ++i) { + if (Uj[i] != 0.0) { out.i.push_back(row_permutation_[i]); out.x.push_back(Uj[i]); B_nz++; @@ -2137,10 +2067,9 @@ void basis_update_mpf_t::multiply_lu(csc_matrix_t& out) cons } out.col_start[m] = B_nz; - out.m = m; - out.n = m; + out.m = m; + out.n = m; out.nz_max = B_nz; - } #ifdef DUAL_SIMPLEX_INSTANTIATE_DOUBLE diff --git a/cpp/src/dual_simplex/basis_updates.hpp b/cpp/src/dual_simplex/basis_updates.hpp index a19abf380a..551cfdb627 100644 --- a/cpp/src/dual_simplex/basis_updates.hpp +++ b/cpp/src/dual_simplex/basis_updates.hpp @@ -37,7 +37,7 @@ class basis_update_t { S_(Linit.m, 1, 0), col_permutation_(Linit.m), inverse_col_permutation_(Linit.m), - xi_workspace_(2*Linit.m, 0), + xi_workspace_(2 * Linit.m, 0), x_workspace_(Linit.m, 0.0), U_transpose_(1, 1, 1), L0_transpose_(1, 1, 1) @@ -82,7 +82,8 @@ class basis_update_t { // Solves for y such that B'*y = c, where B is the basis matrix i_t b_transpose_solve(const std::vector& rhs, std::vector& solution) const; - i_t b_transpose_solve(const sparse_vector_t& rhs, sparse_vector_t& solution) const; + i_t b_transpose_solve(const sparse_vector_t& rhs, + sparse_vector_t& solution) const; // Solve for x such that L*x = y i_t l_solve(std::vector& rhs) const; @@ -153,29 +154,29 @@ class basis_update_t { i_t scatter_into_workspace(const sparse_vector_t& in) const; void gather_into_sparse_vector(i_t nz, sparse_vector_t& out) const; - i_t num_updates_; // Number of rank-1 updates to L0 - mutable csc_matrix_t L0_; // Sparse lower triangular matrix from initial factorization - mutable csc_matrix_t U_; // Sparse upper triangular matrix. Is modified by updates - std::vector row_permutation_; // Row permutation from initial factorization L*U = P*B - std::vector inverse_row_permutation_; // Inverse row permutation from initial factorization L*U = P*B - std::vector pivot_indices_; // indicies for rank-1 updates to L - csc_matrix_t S_; // stores the pivot elements for rank-1 updates to L - std::vector col_permutation_; // symmetric permuation q used in U(q, q) represents Q + i_t num_updates_; // Number of rank-1 updates to L0 + mutable csc_matrix_t L0_; // Sparse lower triangular matrix from initial factorization + mutable csc_matrix_t U_; // Sparse upper triangular matrix. Is modified by updates + std::vector row_permutation_; // Row permutation from initial factorization L*U = P*B + std::vector + inverse_row_permutation_; // Inverse row permutation from initial factorization L*U = P*B + std::vector pivot_indices_; // indicies for rank-1 updates to L + csc_matrix_t S_; // stores the pivot elements for rank-1 updates to L + std::vector col_permutation_; // symmetric permuation q used in U(q, q) represents Q std::vector inverse_col_permutation_; // inverse permutation represents Q' mutable std::vector xi_workspace_; mutable std::vector x_workspace_; - mutable csc_matrix_t U_transpose_; // Needed for sparse solves - mutable csc_matrix_t L0_transpose_; // Needed for sparse solves + mutable csc_matrix_t U_transpose_; // Needed for sparse solves + mutable csc_matrix_t L0_transpose_; // Needed for sparse solves }; - template class basis_update_mpf_t { public: basis_update_mpf_t(const csc_matrix_t& Linit, - const csc_matrix_t& Uinit, - const std::vector& p, - const i_t refactor_frequency) + const csc_matrix_t& Uinit, + const std::vector& p, + const i_t refactor_frequency) : L0_(Linit), U0_(Uinit), row_permutation_(p), @@ -183,7 +184,7 @@ class basis_update_mpf_t { S_(Linit.m, 1, 0), col_permutation_(Linit.m), inverse_col_permutation_(Linit.m), - xi_workspace_(2*Linit.m, 0), + xi_workspace_(2 * Linit.m, 0), x_workspace_(Linit.m, 0.0), U0_transpose_(1, 1, 1), L0_transpose_(1, 1, 1), @@ -209,8 +210,9 @@ class basis_update_mpf_t { { i_t total_L_transpose_calls = total_sparse_L_transpose_ + total_dense_L_transpose_; i_t total_U_transpose_calls = total_sparse_U_transpose_ + total_dense_U_transpose_; - i_t total_L_calls = total_sparse_L_ + total_dense_L_; - i_t total_U_calls = total_sparse_U_ + total_dense_U_; + i_t total_L_calls = total_sparse_L_ + total_dense_L_; + i_t total_U_calls = total_sparse_U_ + total_dense_U_; + // clang-format off printf("sparse L transpose %8d %8.2f%\n", total_sparse_L_transpose_, 100.0 * total_sparse_L_transpose_ / total_L_transpose_calls); printf("dense L transpose %8d %8.2f%\n", total_dense_L_transpose_, 100.0 * total_dense_L_transpose_ / total_L_transpose_calls); printf("sparse U transpose %8d %8.2f%\n", total_sparse_U_transpose_, 100.0 * total_sparse_U_transpose_ / total_U_transpose_calls); @@ -219,27 +221,27 @@ class basis_update_mpf_t { printf("dense L %8d %8.2f%\n", total_dense_L_, 100.0 * total_dense_L_ / total_L_calls); printf("sparse U %8d %8.2f%\n", total_sparse_U_, 100.0 * total_sparse_U_ / total_U_calls); printf("dense U %8d %8.2f%\n", total_dense_U_, 100.0 * total_dense_U_ / total_U_calls); + // clang-format on } void reset_stas() { - num_calls_L_ = 0; - num_calls_U_ = 0; + num_calls_L_ = 0; + num_calls_U_ = 0; num_calls_L_transpose_ = 0; num_calls_U_transpose_ = 0; - sum_L_ = 0.0; - sum_U_ = 0.0; - sum_L_transpose_ = 0.0; - sum_U_transpose_ = 0.0; + sum_L_ = 0.0; + sum_U_ = 0.0; + sum_L_transpose_ = 0.0; + sum_U_transpose_ = 0.0; } - i_t reset(const csc_matrix_t& Linit, const csc_matrix_t& Uinit, const std::vector& p) { L0_ = Linit; - U0_ = Uinit; + U0_ = Uinit; assert(p.size() == Linit.m); row_permutation_ = p; inverse_permutation(row_permutation_, inverse_row_permutation_); @@ -249,13 +251,13 @@ class basis_update_mpf_t { return 0; } - f_t estimate_solution_density(f_t rhs_nz, f_t sum, i_t& num_calls, bool &use_hypersparse) const + f_t estimate_solution_density(f_t rhs_nz, f_t sum, i_t& num_calls, bool& use_hypersparse) const { num_calls++; - const f_t average_growth = std::max(1.0, sum / static_cast(num_calls)); - const f_t predicted_nz = rhs_nz * average_growth; + const f_t average_growth = std::max(1.0, sum / static_cast(num_calls)); + const f_t predicted_nz = rhs_nz * average_growth; const f_t predicted_density = predicted_nz / static_cast(L0_.m); - use_hypersparse = predicted_density < hypersparse_threshold_; + use_hypersparse = predicted_density < hypersparse_threshold_; return predicted_nz; } @@ -271,9 +273,14 @@ class basis_update_mpf_t { // Solves for y such that B'*y = c, where B is the basis matrix i_t b_transpose_solve(const std::vector& rhs, std::vector& solution) const; - i_t b_transpose_solve(const sparse_vector_t& rhs, sparse_vector_t& solution) const; - i_t b_transpose_solve(const std::vector& rhs, std::vector& solution, std::vector& UTsol) const; - i_t b_transpose_solve(const sparse_vector_t& rhs, sparse_vector_t& solution, sparse_vector_t& UTsol) const; + i_t b_transpose_solve(const sparse_vector_t& rhs, + sparse_vector_t& solution) const; + i_t b_transpose_solve(const std::vector& rhs, + std::vector& solution, + std::vector& UTsol) const; + i_t b_transpose_solve(const sparse_vector_t& rhs, + sparse_vector_t& solution, + sparse_vector_t& UTsol) const; // Solve for x such that L*x = y i_t l_solve(std::vector& rhs) const; @@ -304,8 +311,9 @@ class basis_update_mpf_t { // Replace the column B(:, leaving_index) with the vector abar. Pass in utilde such that L*utilde // = abar - i_t update(const sparse_vector_t& utilde, sparse_vector_t& etilde, i_t leaving_index); - + i_t update(const sparse_vector_t& utilde, + sparse_vector_t& etilde, + i_t leaving_index); i_t num_updates() const { return num_updates_; } @@ -356,22 +364,23 @@ class basis_update_mpf_t { void l_multiply(std::vector& inout) const; void l_transpose_multiply(std::vector& inout) const; - i_t num_updates_; // Number of rank-1 updates to L0 + i_t num_updates_; // Number of rank-1 updates to L0 i_t refactor_frequency_; // Average updates before refactoring - mutable csc_matrix_t L0_; // Sparse lower triangular matrix from initial factorization - mutable csc_matrix_t U0_; // Sparse upper triangular matrix from initial factorization - std::vector row_permutation_; // Row permutation from initial factorization L*U = P*B - std::vector inverse_row_permutation_; // Inverse row permutation from initial factorization L*U = P*B - std::vector pivot_indices_; // indicies for rank-1 updates to L - csc_matrix_t S_; // stores information about the rank-1 updates to L - std::vector mu_values_; // stores information about the rank-1 updates to L - std::vector col_permutation_; // symmetric permuation q used in U(q, q) represents Q + mutable csc_matrix_t L0_; // Sparse lower triangular matrix from initial factorization + mutable csc_matrix_t U0_; // Sparse upper triangular matrix from initial factorization + std::vector row_permutation_; // Row permutation from initial factorization L*U = P*B + std::vector + inverse_row_permutation_; // Inverse row permutation from initial factorization L*U = P*B + std::vector pivot_indices_; // indicies for rank-1 updates to L + csc_matrix_t S_; // stores information about the rank-1 updates to L + std::vector mu_values_; // stores information about the rank-1 updates to L + std::vector col_permutation_; // symmetric permuation q used in U(q, q) represents Q std::vector inverse_col_permutation_; // inverse permutation represents Q' mutable std::vector xi_workspace_; mutable std::vector x_workspace_; - mutable csc_matrix_t U0_transpose_; // Needed for sparse solves - mutable csc_matrix_t L0_transpose_; // Needed for sparse solves - mutable csc_matrix_t B_; // Needed for sparse solves + mutable csc_matrix_t U0_transpose_; // Needed for sparse solves + mutable csc_matrix_t L0_transpose_; // Needed for sparse solves + mutable csc_matrix_t B_; // Needed for sparse solves mutable i_t total_sparse_L_transpose_; mutable i_t total_dense_L_transpose_; diff --git a/cpp/src/dual_simplex/bound_flipping_ratio_test.cpp b/cpp/src/dual_simplex/bound_flipping_ratio_test.cpp index 6b626263d5..11753cbcb7 100644 --- a/cpp/src/dual_simplex/bound_flipping_ratio_test.cpp +++ b/cpp/src/dual_simplex/bound_flipping_ratio_test.cpp @@ -36,8 +36,8 @@ i_t bound_flipping_ratio_test_t::compute_breakpoints(std::vector& i_t idx = 0; while (idx == 0 && pivot_tol >= 1e-12) { - //for (i_t k = 0; k < n - m; ++k) { - // const i_t j = nonbasic_list_[k]; + // for (i_t k = 0; k < n - m; ++k) { + // const i_t j = nonbasic_list_[k]; for (i_t h = 0; h < delta_z_indices_.size(); ++h) { const i_t j = delta_z_indices_[h]; const i_t k = nonbasic_mark_[j]; @@ -116,9 +116,9 @@ template i_t bound_flipping_ratio_test_t::compute_step_length(f_t& step_length, i_t& nonbasic_entering) { - const i_t m = m_; - const i_t n = n_; - const i_t nz = delta_z_indices_.size(); + const i_t m = m_; + const i_t n = n_; + const i_t nz = delta_z_indices_.size(); constexpr bool verbose = false; // Compute the initial set of breakpoints @@ -159,16 +159,13 @@ i_t bound_flipping_ratio_test_t::compute_step_length(f_t& step_length, slope); } - - // Continue the search using a heap to order the breakpoints ratios[k_idx] = ratios[num_breakpoints - 1]; indicies[k_idx] = indicies[num_breakpoints - 1]; constexpr bool use_bucket_pass = false; - if (use_bucket_pass) - { + if (use_bucket_pass) { f_t max_ratio = 0.0; for (i_t k = 0; k < num_breakpoints - 1; ++k) { if (ratios[k] > max_ratio) { max_ratio = ratios[k]; } @@ -288,19 +285,19 @@ void bound_flipping_ratio_test_t::bucket_pass(const std::vector& const std::vector& nonbasic_list = nonbasic_list_; const i_t N = num_breakpoints; - const i_t K = 400; // 0, -16, -15, ...., 0, 1, ...., 400 - 18 = 382 + const i_t K = 400; // 0, -16, -15, ...., 0, 1, ...., 400 - 18 = 382 std::vector buckets(K, 0.0); std::vector bucket_count(K, 0); for (i_t k = 0; k < N; ++k) { - const i_t idx = current_indicies[k]; - const f_t ratio = current_ratios[k]; + const i_t idx = current_indicies[k]; + const f_t ratio = current_ratios[k]; const f_t min_exponent = -16.0; const f_t max_exponent = 382.0; - const f_t exponent = std::max(min_exponent, std::min(max_exponent, std::log10(ratio))); - const i_t bucket_idx = ratio == 0.0 ? 0 : static_cast(exponent - min_exponent + 1); - //settings_.log.printf("Ratio %e exponent %e bucket_idx %d\n", ratio, exponent, bucket_idx); - const i_t j = nonbasic_list[idx]; - const f_t interval = upper_[j] - lower_[j]; + const f_t exponent = std::max(min_exponent, std::min(max_exponent, std::log10(ratio))); + const i_t bucket_idx = ratio == 0.0 ? 0 : static_cast(exponent - min_exponent + 1); + // settings_.log.printf("Ratio %e exponent %e bucket_idx %d\n", ratio, exponent, bucket_idx); + const i_t j = nonbasic_list[idx]; + const f_t interval = upper_[j] - lower_[j]; const f_t delta_slope = std::abs(delta_z_[j]) * interval; buckets[bucket_idx] += delta_slope; bucket_count[bucket_idx]++; @@ -309,7 +306,12 @@ void bound_flipping_ratio_test_t::bucket_pass(const std::vector& std::vector cumulative_sum(K, 0.0); cumulative_sum[0] = buckets[0]; if (cumulative_sum[0] > slope) { - settings_.log.printf("Bucket 0. Count in bucket %d. Slope %e. Cumulative sum %e. Bucket value %e\n", bucket_count[0], slope, cumulative_sum[0], buckets[0]); + settings_.log.printf( + "Bucket 0. Count in bucket %d. Slope %e. Cumulative sum %e. Bucket value %e\n", + bucket_count[0], + slope, + cumulative_sum[0], + buckets[0]); return; } i_t k; @@ -323,12 +325,18 @@ void bound_flipping_ratio_test_t::bucket_pass(const std::vector& } if (exceeded) { - settings_.log.printf("Value in bucket %d. Count in buckets %d. Slope %e. Cumulative sum %e. Next sum %e Bucket value %e\n", k, bucket_count[k], slope, cumulative_sum[k-1], cumulative_sum[k], buckets[k-1]); + settings_.log.printf( + "Value in bucket %d. Count in buckets %d. Slope %e. Cumulative sum %e. Next sum %e Bucket " + "value %e\n", + k, + bucket_count[k], + slope, + cumulative_sum[k - 1], + cumulative_sum[k], + buckets[k - 1]); } - } - #ifdef DUAL_SIMPLEX_INSTANTIATE_DOUBLE template class bound_flipping_ratio_test_t; diff --git a/cpp/src/dual_simplex/phase2.cpp b/cpp/src/dual_simplex/phase2.cpp index 2adcb54cb1..b652d04da8 100644 --- a/cpp/src/dual_simplex/phase2.cpp +++ b/cpp/src/dual_simplex/phase2.cpp @@ -36,7 +36,6 @@ namespace cuopt::linear_programming::dual_simplex { namespace phase2 { - // Computes vectors farkas_y, farkas_zl, farkas_zu that satisfy // // A'*farkas_y + farkas_zl - farkas_zu ~= 0 @@ -68,88 +67,75 @@ void compute_farkas_certificate(const lp_problem_t& lp, std::vector original_residual = z; matrix_transpose_vector_multiply(lp.A, 1.0, y, 1.0, original_residual); - for (i_t j = 0; j < n; ++j) - { + for (i_t j = 0; j < n; ++j) { original_residual[j] -= lp.objective[j]; } const f_t original_residual_norm = vector_norm2(original_residual); settings.log.printf("|| A'*y + z - c || = %e\n", original_residual_norm); - std::vector zl(n); std::vector zu(n); - for (i_t j = 0; j < n; ++j) - { + for (i_t j = 0; j < n; ++j) { zl[j] = std::max(0.0, z[j]); zu[j] = -std::min(0.0, z[j]); } original_residual = zl; matrix_transpose_vector_multiply(lp.A, 1.0, y, 1.0, original_residual); - for (i_t j = 0; j < n; ++j) - { + for (i_t j = 0; j < n; ++j) { original_residual[j] -= (zu[j] + lp.objective[j]); } const f_t original_residual_2 = vector_norm2(original_residual); settings.log.printf("|| A'*y + zl - zu - c || = %e\n", original_residual_2); - std::vector search_dir_residual = delta_z; matrix_transpose_vector_multiply(lp.A, 1.0, delta_y, 1.0, search_dir_residual); - settings.log.printf("|| A'*delta_y + delta_z || = %e\n", vector_norm2(search_dir_residual)); + settings.log.printf("|| A'*delta_y + delta_z || = %e\n", + vector_norm2(search_dir_residual)); std::vector y_bar(m); - for (i_t i = 0; i < m; ++i) - { + for (i_t i = 0; i < m; ++i) { y_bar[i] = y[i] + delta_y[i]; } original_residual = z; matrix_transpose_vector_multiply(lp.A, 1.0, y_bar, 1.0, original_residual); - for (i_t j = 0; j < n; ++j) - { + for (i_t j = 0; j < n; ++j) { original_residual[j] += (delta_z[j] - lp.objective[j]); } const f_t original_residual_3 = vector_norm2(original_residual); settings.log.printf("|| A'*(y + delta_y) + (z + delta_z) - c || = %e\n", original_residual_3); - - farkas_y.resize(m); farkas_zl.resize(n); farkas_zu.resize(n); f_t gamma = 0.0; - for (i_t j = 0; j < n; ++j) - { - const f_t cj = lp.objective[j]; + for (i_t j = 0; j < n; ++j) { + const f_t cj = lp.objective[j]; const f_t lower = lp.lower[j]; const f_t upper = lp.upper[j]; - if (lower > -inf) - { - gamma -= lower * std::min(0.0, cj); - } - if (upper < inf) - { - gamma -= upper * std::max(0.0, cj); - } + if (lower > -inf) { gamma -= lower * std::min(0.0, cj); } + if (upper < inf) { gamma -= upper * std::max(0.0, cj); } } printf("gamma = %e\n", gamma); - const f_t threshold = 1.0; + const f_t threshold = 1.0; const f_t positive_threshold = std::max(-gamma, 0.0) + threshold; printf("positive_threshold = %e\n", positive_threshold); // We need to increase the dual objective to positive threshold - f_t alpha = threshold; - const f_t infeas = (direction == 1) ? (lp.lower[leaving_index] - x[leaving_index]) : (x[leaving_index] - lp.upper[leaving_index]); + f_t alpha = threshold; + const f_t infeas = (direction == 1) ? (lp.lower[leaving_index] - x[leaving_index]) + : (x[leaving_index] - lp.upper[leaving_index]); // We need the new objective to be at least positive_threshold // positive_threshold = obj_val+ alpha * infeas // infeas > 0, alpha > 0, positive_threshold > 0 printf("direction = %d\n", direction); - printf("lower %e x %e upper %d\n", lp.lower[leaving_index], x[leaving_index], lp.upper[leaving_index]); + printf( + "lower %e x %e upper %d\n", lp.lower[leaving_index], x[leaving_index], lp.upper[leaving_index]); printf("infeas = %e\n", infeas); printf("obj_val = %e\n", obj_val); - alpha = std::max(threshold,(positive_threshold - obj_val) / infeas); + alpha = std::max(threshold, (positive_threshold - obj_val) / infeas); printf("alpha = %e\n", alpha); std::vector y_prime(m); @@ -157,37 +143,33 @@ void compute_farkas_certificate(const lp_problem_t& lp, std::vector zu_prime(n); // farkas_y = y + alpha * delta_y - for (i_t i = 0; i < m; ++i) - { + for (i_t i = 0; i < m; ++i) { farkas_y[i] = y[i] + alpha * delta_y[i]; - y_prime[i] = y[i] + alpha * delta_y[i]; + y_prime[i] = y[i] + alpha * delta_y[i]; } // farkas_zl = z + alpha * delta_z - c- - for (i_t j = 0; j < n; ++j) - { - const f_t cj = lp.objective[j]; - const f_t z_j = z[j]; + for (i_t j = 0; j < n; ++j) { + const f_t cj = lp.objective[j]; + const f_t z_j = z[j]; const f_t delta_z_j = delta_z[j]; farkas_zl[j] = std::max(0.0, z_j) + alpha * std::max(0.0, delta_z_j) + -std::min(0.0, cj); - zl_prime[j] = zl[j] + alpha * std::max(0.0, delta_z_j); + zl_prime[j] = zl[j] + alpha * std::max(0.0, delta_z_j); } // farkas_zu = z + alpha * delta_z + c+ - for (i_t j = 0; j < n; ++j) - { - const f_t cj = lp.objective[j]; - const f_t z_j = z[j]; + for (i_t j = 0; j < n; ++j) { + const f_t cj = lp.objective[j]; + const f_t z_j = z[j]; const f_t delta_z_j = delta_z[j]; farkas_zu[j] = -std::min(0.0, z_j) - alpha * std::min(0.0, delta_z_j) + std::max(0.0, cj); - zu_prime[j] = zu[j] + alpha * (-std::min(0.0, delta_z_j)); + zu_prime[j] = zu[j] + alpha * (-std::min(0.0, delta_z_j)); } // farkas_constant = b'*farkas_y + l'*farkas_zl - u'*farkas_zu - farkas_constant = 0.0; + farkas_constant = 0.0; f_t test_constant = 0.0; - f_t test_3 = 0.0; - for (i_t i = 0; i < m; ++i) - { + f_t test_3 = 0.0; + for (i_t i = 0; i < m; ++i) { farkas_constant += lp.rhs[i] * farkas_y[i]; test_constant += lp.rhs[i] * y_prime[i]; test_3 += lp.rhs[i] * delta_y[i]; @@ -195,19 +177,16 @@ void compute_farkas_certificate(const lp_problem_t& lp, printf("b'*delta_y = %e\n", test_3); printf("|| b || %e\n", vector_norm_inf(lp.rhs)); printf("|| delta y || %e\n", vector_norm_inf(delta_y)); - for (i_t j = 0; j < n; ++j) - { + for (i_t j = 0; j < n; ++j) { const f_t lower = lp.lower[j]; const f_t upper = lp.upper[j]; - if (lower > -inf) - { + if (lower > -inf) { farkas_constant += lower * farkas_zl[j]; test_constant += lower * zl_prime[j]; const f_t delta_z_l_j = std::max(delta_z[j], 0.0); test_3 += lower * delta_z_l_j; } - if (upper < inf) - { + if (upper < inf) { farkas_constant -= upper * farkas_zu[j]; test_constant -= upper * zu_prime[j]; const f_t delta_z_u_j = -std::min(delta_z[j], 0.0); @@ -215,25 +194,21 @@ void compute_farkas_certificate(const lp_problem_t& lp, } } - // Verify that the Farkas certificate is valid std::vector residual = farkas_zl; matrix_transpose_vector_multiply(lp.A, 1.0, farkas_y, 1.0, residual); - for (i_t j = 0; j < n; ++j) - { + for (i_t j = 0; j < n; ++j) { residual[j] -= farkas_zu[j]; } const f_t residual_norm = vector_norm2(residual); f_t zl_min = 0.0; - for (i_t j = 0; j < n; ++j) - { + for (i_t j = 0; j < n; ++j) { zl_min = std::min(zl_min, farkas_zl[j]); } settings.log.printf("farkas_zl_min = %e\n", zl_min); f_t zu_min = 0.0; - for (i_t j = 0; j < n; ++j) - { + for (i_t j = 0; j < n; ++j) { zu_min = std::min(zu_min, farkas_zu[j]); } settings.log.printf("farkas_zu_min = %e\n", zu_min); @@ -241,8 +216,7 @@ void compute_farkas_certificate(const lp_problem_t& lp, settings.log.printf("|| A'*farkas_y + farkas_zl - farkas_zu || = %e\n", residual_norm); settings.log.printf("b'*farkas_y + l'*farkas_zl - u'*farkas_zu = %e\n", farkas_constant); - if (residual_norm < 1e-6 && farkas_constant > 0.0 && zl_min >= 0.0 && zu_min >= 0.0) - { + if (residual_norm < 1e-6 && farkas_constant > 0.0 && zl_min >= 0.0 && zu_min >= 0.0) { settings.log.printf("Farkas certificate of infeasibility constructed\n"); } } @@ -253,12 +227,10 @@ void initial_perturbation(const lp_problem_t& lp, const std::vector& vstatus, std::vector& objective) { - - const i_t m = lp.num_rows; - const i_t n = lp.num_cols; + const i_t m = lp.num_rows; + const i_t n = lp.num_cols; f_t max_abs_obj_coeff = 0.0; - for (i_t j = 0; j < n; ++j) - { + for (i_t j = 0; j < n; ++j) { max_abs_obj_coeff = std::max(max_abs_obj_coeff, std::abs(lp.objective[j])); } @@ -269,8 +241,7 @@ void initial_perturbation(const lp_problem_t& lp, objective.resize(n); f_t sum_perturb = 0.0; i_t num_perturb = 0; - for (i_t j = 0; j < n; ++j) - { + for (i_t j = 0; j < n; ++j) { f_t obj = objective[j] = lp.objective[j]; const f_t lower = lp.lower[j]; @@ -282,25 +253,27 @@ void initial_perturbation(const lp_problem_t& lp, } const f_t rand_val = static_cast(std::rand() / (RAND_MAX + 1.0)); - const f_t perturb = (1e-5 * std::abs(obj) + 1e-7 * max_abs_obj_coeff + 10 * dual_tol) * (1.0 + rand_val); + const f_t perturb = + (1e-5 * std::abs(obj) + 1e-7 * max_abs_obj_coeff + 10 * dual_tol) * (1.0 + rand_val); - if (vstatus[j] == variable_status_t::NONBASIC_LOWER || lower > -inf && upper < inf && obj > 0) - { + if (vstatus[j] == variable_status_t::NONBASIC_LOWER || lower > -inf && upper < inf && obj > 0) { objective[j] = obj + perturb; sum_perturb += perturb; num_perturb++; - } else if (vstatus[j] == variable_status_t::NONBASIC_UPPER || lower > -inf && upper < inf && obj < 0) - { + } else if (vstatus[j] == variable_status_t::NONBASIC_UPPER || + lower > -inf && upper < inf && obj < 0) { objective[j] = obj - perturb; sum_perturb += perturb; num_perturb++; } } - settings.log.printf("Applied initial perturbation of %e to %d/%d objective coefficients\n", sum_perturb, num_perturb, n); + settings.log.printf("Applied initial perturbation of %e to %d/%d objective coefficients\n", + sum_perturb, + num_perturb, + n); } - template void compute_reduced_cost_update(const lp_problem_t& lp, const std::vector& basic_list, @@ -332,8 +305,7 @@ void compute_reduced_cost_update(const lp_problem_t& lp, dot += lp.A.x[p] * delta_y[lp.A.i[p]]; } delta_z[j] = -dot; - if (dot != 0.0) - { + if (dot != 0.0) { delta_z_indices.push_back(j); delta_z_mark[j] = 1; } @@ -352,23 +324,17 @@ void compute_delta_z(const csc_matrix_t& A_transpose, { // delta_zN = - N'*delta_y const i_t nz_delta_y = delta_y.i.size(); - for (i_t k = 0; k < nz_delta_y; k++) - { - const i_t i = delta_y.i[k]; + for (i_t k = 0; k < nz_delta_y; k++) { + const i_t i = delta_y.i[k]; const f_t delta_y_i = delta_y.x[k]; - if (0 && std::abs(delta_y_i) < 1e-12) { - continue; - } + if (0 && std::abs(delta_y_i) < 1e-12) { continue; } const i_t row_start = A_transpose.col_start[i]; - const i_t row_end = A_transpose.col_start[i + 1]; - for (i_t p = row_start; p < row_end; ++p) - { + const i_t row_end = A_transpose.col_start[i + 1]; + for (i_t p = row_start; p < row_end; ++p) { const i_t j = A_transpose.i[p]; - if (nonbasic_mark[j] >= 0) - { + if (nonbasic_mark[j] >= 0) { delta_z[j] -= delta_y_i * A_transpose.x[p]; - if (!delta_z_mark[j]) - { + if (!delta_z_mark[j]) { delta_z_mark[j] = 1; delta_z_indices.push_back(j); } @@ -384,7 +350,15 @@ void compute_delta_z(const csc_matrix_t& A_transpose, std::vector delta_z_check(n); std::vector delta_z_mark_check(n, 0); std::vector delta_z_indices_check; - phase2::compute_reduced_cost_update(lp, basic_list, nonbasic_list, delta_y, leaving_index, direction, delta_z_mark_check, delta_z_indices_check, delta_z_check); + phase2::compute_reduced_cost_update(lp, + basic_list, + nonbasic_list, + delta_y, + leaving_index, + direction, + delta_z_mark_check, + delta_z_indices_check, + delta_z_check); f_t error_check = 0.0; for (i_t k = 0; k < n; ++k) { const f_t diff = std::abs(delta_z[k] - delta_z_check[k]); @@ -441,8 +415,8 @@ void compute_primal_variables(const basis_update_mpf_t& ft, f_t tight_tol, std::vector& x) { - const i_t m = A.m; - const i_t n = A.n; + const i_t m = A.m; + const i_t n = A.n; std::vector rhs = lp_rhs; // rhs = b - sum_{j : x_j = l_j} A(:, j) * l(j) // - sum_{j : x_j = u_j} A(:, j) * u(j) @@ -473,10 +447,9 @@ void clear_delta_z(i_t entering_index, std::vector& delta_z_indices, std::vector& delta_z) { - for (i_t k = 0; k < delta_z_indices.size(); k++) - { - const i_t j = delta_z_indices[k]; - delta_z[j] = 0.0; + for (i_t k = 0; k < delta_z_indices.size(); k++) { + const i_t j = delta_z_indices[k]; + delta_z[j] = 0.0; delta_z_mark[j] = 0; } if (entering_index != -1) { delta_z[entering_index] = 0.0; } @@ -501,7 +474,6 @@ void clear_delta_x(const std::vector& basic_list, scaled_delta_xB_sparse.x.clear(); } - template void compute_dual_residual(const csc_matrix_t& A, const std::vector& objective, @@ -510,7 +482,7 @@ void compute_dual_residual(const csc_matrix_t& A, std::vector& dual_residual) { dual_residual = z; - const i_t n = A.n; + const i_t n = A.n; // r = A'*y + z - c for (i_t j = 0; j < n; ++j) { dual_residual[j] -= objective[j]; @@ -544,7 +516,7 @@ void vstatus_changes(const std::vector& vstatus, { num_vstatus_changes = 0; num_z_changes = 0; - const i_t n = vstatus.size(); + const i_t n = vstatus.size(); for (i_t j = 0; j < n; ++j) { if (vstatus[j] != vstatus_old[j]) { num_vstatus_changes++; } if (std::abs(z[j] - z_old[j]) > 1e-6) { num_z_changes++; } @@ -558,13 +530,11 @@ void compute_bounded_info(const std::vector& lower, { const size_t n = lower.size(); for (size_t j = 0; j < n; j++) { - const bool bounded = - (lower[j] > -inf) && (upper[j] < inf) && (lower[j] != upper[j]); + const bool bounded = (lower[j] > -inf) && (upper[j] < inf) && (lower[j] != upper[j]); bounded_variables[j] = bounded; } } - template void compute_dual_solution_from_basis(const lp_problem_t& lp, basis_update_mpf_t& ft, @@ -612,29 +582,24 @@ void compute_dual_solution_from_basis(const lp_problem_t& lp, template i_t compute_primal_solution_from_basis(const lp_problem_t& lp, - basis_update_mpf_t& ft, - const std::vector& basic_list, - const std::vector& nonbasic_list, - const std::vector& vstatus, - std::vector& x) + basis_update_mpf_t& ft, + const std::vector& basic_list, + const std::vector& nonbasic_list, + const std::vector& vstatus, + std::vector& x) { - const i_t m = lp.num_rows; - const i_t n = lp.num_cols; + const i_t m = lp.num_rows; + const i_t n = lp.num_cols; std::vector rhs = lp.rhs; - for (i_t k = 0; k < n - m; ++k) - { + for (i_t k = 0; k < n - m; ++k) { const i_t j = nonbasic_list[k]; - if (vstatus[j] == variable_status_t::NONBASIC_LOWER || vstatus[j] == variable_status_t::NONBASIC_FIXED) - { + if (vstatus[j] == variable_status_t::NONBASIC_LOWER || + vstatus[j] == variable_status_t::NONBASIC_FIXED) { x[j] = lp.lower[j]; - } - else if (vstatus[j] == variable_status_t::NONBASIC_UPPER) - { + } else if (vstatus[j] == variable_status_t::NONBASIC_UPPER) { x[j] = lp.upper[j]; - } - else if (vstatus[j] == variable_status_t::NONBASIC_FREE) - { + } else if (vstatus[j] == variable_status_t::NONBASIC_FREE) { x[j] = 0.0; } } @@ -676,12 +641,12 @@ f_t compute_initial_primal_infeasibilities(const lp_problem_t& lp, infeasibility_indices.clear(); f_t primal_inf = 0.0; for (i_t k = 0; k < m; ++k) { - const i_t j = basic_list[k]; + const i_t j = basic_list[k]; const f_t lower_infeas = lp.lower[j] - x[j]; const f_t upper_infeas = x[j] - lp.upper[j]; - const f_t infeas = std::max(lower_infeas, upper_infeas); + const f_t infeas = std::max(lower_infeas, upper_infeas); if (infeas > settings.primal_tol) { - const f_t square_infeas = infeas * infeas; + const f_t square_infeas = infeas * infeas; squared_infeasibilities[j] = square_infeas; infeasibility_indices.push_back(j); primal_inf += square_infeas; @@ -701,13 +666,13 @@ void update_single_primal_infeasibility(const std::vector& lower, f_t& primal_inf) { const f_t now_feasible = std::numeric_limits::denorm_min(); - const f_t old_val = squared_infeasibilities[j]; + const f_t old_val = squared_infeasibilities[j]; // x_j < l_j - epsilon => -x_j + l_j > epsilon const f_t lower_infeas = lower[j] - x[j]; // x_j > u_j + epsilon => x_j - u_j > epsilon const f_t upper_infeas = x[j] - upper[j]; - const f_t infeas = std::max(lower_infeas, upper_infeas); - const f_t new_val = infeas * infeas; + const f_t infeas = std::max(lower_infeas, upper_infeas); + const f_t new_val = infeas * infeas; if (infeas > primal_tol) { primal_inf = std::max(0.0, primal_inf + (new_val - old_val)); // We are infeasible w.r.t the tolerance @@ -723,7 +688,7 @@ void update_single_primal_infeasibility(const std::vector& lower, // We are feasible w.r.t the tolerance if (old_val != 0.0) { // We were previously infeasible, - primal_inf = std::max(0.0, primal_inf - old_val); + primal_inf = std::max(0.0, primal_inf - old_val); squared_infeasibilities[j] = now_feasible; } else { // Still feasible @@ -744,8 +709,8 @@ void update_primal_infeasibilities(const lp_problem_t& lp, f_t& primal_inf) { const f_t now_feasible = std::numeric_limits::denorm_min(); - const f_t primal_tol = settings.primal_tol; - const i_t nz = basic_change_list.size(); + const f_t primal_tol = settings.primal_tol; + const i_t nz = basic_change_list.size(); for (i_t k = 0; k < nz; ++k) { const i_t j = basic_list[basic_change_list[k]]; // The change list will contain the leaving variable, @@ -753,9 +718,9 @@ void update_primal_infeasibilities(const lp_problem_t& lp, if (j == leaving_index) { // Force the leaving variable to be feasible - const f_t old_val = squared_infeasibilities[j]; + const f_t old_val = squared_infeasibilities[j]; squared_infeasibilities[j] = now_feasible; - primal_inf = std::max(0.0, primal_inf - old_val); + primal_inf = std::max(0.0, primal_inf - old_val); continue; } update_single_primal_infeasibility(lp.lower, @@ -774,29 +739,25 @@ void clean_up_infeasibilities(std::vector& squared_infeasibilities, std::vector& infeasibility_indices) { const f_t now_feasible = std::numeric_limits::denorm_min(); - bool needs_clean_up = false; + bool needs_clean_up = false; for (i_t k = 0; k < infeasibility_indices.size(); ++k) { - const i_t j = infeasibility_indices[k]; + const i_t j = infeasibility_indices[k]; const f_t squared_infeas = squared_infeasibilities[j]; - if (squared_infeas == now_feasible) { - needs_clean_up = true; - } + if (squared_infeas == now_feasible) { needs_clean_up = true; } } - if (needs_clean_up) { + if (needs_clean_up) { for (i_t k = 0; k < infeasibility_indices.size(); ++k) { - const i_t j = infeasibility_indices[k]; + const i_t j = infeasibility_indices[k]; const f_t squared_infeas = squared_infeasibilities[j]; if (squared_infeas == now_feasible) { // Set to the last element - const i_t sz = infeasibility_indices.size(); + const i_t sz = infeasibility_indices.size(); infeasibility_indices[k] = infeasibility_indices[sz - 1]; infeasibility_indices.pop_back(); squared_infeasibilities[j] = 0.0; - i_t new_j = infeasibility_indices[k]; - if (squared_infeasibilities[new_j] == now_feasible) { - k--; - } + i_t new_j = infeasibility_indices[k]; + if (squared_infeasibilities[new_j] == now_feasible) { k--; } } } } @@ -815,20 +776,20 @@ i_t steepest_edge_pricing_with_infeasibilities(const lp_problem_t& lp, f_t& max_val) { const f_t now_feasible = std::numeric_limits::denorm_min(); - max_val = 0.0; - i_t leaving_index = -1; - bool needs_clean_up = false; - const i_t nz = infeasibility_indices.size(); + max_val = 0.0; + i_t leaving_index = -1; + bool needs_clean_up = false; + const i_t nz = infeasibility_indices.size(); for (i_t k = 0; k < nz; ++k) { - const i_t j = infeasibility_indices[k]; + const i_t j = infeasibility_indices[k]; const f_t squared_infeas = squared_infeasibilities[j]; - const f_t val = squared_infeas / dy_steepest_edge[j]; + const f_t val = squared_infeas / dy_steepest_edge[j]; if (val > max_val || val == max_val && j > leaving_index) { - max_val = val; - leaving_index = j; + max_val = val; + leaving_index = j; const f_t lower_infeas = lp.lower[j] - x[j]; const f_t upper_infeas = x[j] - lp.upper[j]; - direction = lower_infeas >= upper_infeas ? 1 : -1; + direction = lower_infeas >= upper_infeas ? 1 : -1; } } @@ -836,8 +797,6 @@ i_t steepest_edge_pricing_with_infeasibilities(const lp_problem_t& lp, return leaving_index; } - - template i_t steepest_edge_pricing(const lp_problem_t& lp, const simplex_solver_settings_t& settings, @@ -1131,8 +1090,8 @@ i_t flip_bounds(const lp_problem_t& lp, template void initialize_steepest_edge_norms_from_slack_basis(const std::vector& basic_list, - const std::vector& nonbasic_list, - std::vector& delta_y_steepest_edge) + const std::vector& nonbasic_list, + std::vector& delta_y_steepest_edge) { const i_t m = basic_list.size(); const i_t n = delta_y_steepest_edge.size(); @@ -1155,7 +1114,7 @@ i_t initialize_steepest_edge_norms(const lp_problem_t& lp, basis_update_mpf_t& ft, std::vector& delta_y_steepest_edge) { - const i_t m = basic_list.size(); + const i_t m = basic_list.size(); // We want to compute B^T delta_y_i = -e_i // If there is a column u of B^T such that B^T(:, u) = alpha * e_i than the @@ -1167,15 +1126,15 @@ i_t initialize_steepest_edge_norms(const lp_problem_t& lp, std::vector coeff(m, 0.0); for (i_t k = 0; k < m; ++k) { - const i_t j = basic_list[k]; + const i_t j = basic_list[k]; const i_t col_start = lp.A.col_start[j]; - const i_t col_end = lp.A.col_start[j + 1]; + const i_t col_end = lp.A.col_start[j + 1]; for (i_t p = col_start; p < col_end; ++p) { const i_t i = lp.A.i[p]; row_degree[i]++; // column j of A is column k of B mapping[k] = i; - coeff[k] = lp.A.x[p]; + coeff[k] = lp.A.x[p]; } } @@ -1192,7 +1151,7 @@ i_t initialize_steepest_edge_norms(const lp_problem_t& lp, num_singleton_rows++; #ifdef CHECK_SINGLETON_ROWS const i_t col_start = B_transpose.col_start[i]; - const i_t col_end = B_transpose.col_start[i + 1]; + const i_t col_end = B_transpose.col_start[i + 1]; if (col_end - col_start != 1) { settings.log.printf("Singleton row %d has %d non-zero entries\n", i, col_end - col_start); } @@ -1201,7 +1160,9 @@ i_t initialize_steepest_edge_norms(const lp_problem_t& lp, } if (num_singleton_rows > 0) { - settings.log.printf("Found %d singleton rows for steepest edge norms in %.2fs\n", num_singleton_rows, toc(start_singleton_rows)); + settings.log.printf("Found %d singleton rows for steepest edge norms in %.2fs\n", + num_singleton_rows, + toc(start_singleton_rows)); } f_t last_log = tic(); @@ -1209,14 +1170,14 @@ i_t initialize_steepest_edge_norms(const lp_problem_t& lp, sparse_vector_t sparse_ei(m, 1); sparse_ei.x[0] = -1.0; sparse_ei.i[0] = k; - const i_t j = basic_list[k]; - f_t init = -1.0; + const i_t j = basic_list[k]; + f_t init = -1.0; if (row_degree[mapping[k]] == 1) { - const i_t u = mapping[k]; + const i_t u = mapping[k]; const f_t alpha = coeff[k]; - //dy[u] = -1.0 / alpha; + // dy[u] = -1.0 / alpha; f_t my_init = 1.0 / (alpha * alpha); - init = my_init; + init = my_init; #ifdef CHECK_HYPERSPARSE std::vector residual(m); b_transpose_multiply(lp, basic_list, dy, residual); @@ -1225,12 +1186,15 @@ i_t initialize_steepest_edge_norms(const lp_problem_t& lp, const f_t error_component = std::abs(residual[h] - ei[h]); error += error_component; if (error_component > 1e-12) { - settings.log.printf("Singleton row %d component %d error %e residual %e ei %e\n", k, h, error_component, residual[h], ei[h]); + settings.log.printf("Singleton row %d component %d error %e residual %e ei %e\n", + k, + h, + error_component, + residual[h], + ei[h]); } } - if (error > 1e-12) { - settings.log.printf("Singleton row %d error %e\n", k, error); - } + if (error > 1e-12) { settings.log.printf("Singleton row %d error %e\n", k, error); } #endif #ifdef CHECK_HYPERSPARSE @@ -1238,7 +1202,11 @@ i_t initialize_steepest_edge_norms(const lp_problem_t& lp, ft.b_transpose_solve(ei, dy); init = vector_norm2_squared(dy); if (init != my_init) { - settings.log.printf("Singleton row %d error %.16e init %.16e my_init %.16e\n", k, std::abs(init - my_init), init, my_init); + settings.log.printf("Singleton row %d error %.16e init %.16e my_init %.16e\n", + k, + std::abs(init - my_init), + init, + my_init); } #endif } else { @@ -1255,13 +1223,17 @@ i_t initialize_steepest_edge_norms(const lp_problem_t& lp, #endif #if COMPARE_WITH_DENSE if (std::abs(init - my_init) > 1e-12) { - settings.log.printf("Singleton row %d error %.16e init %.16e my_init %.16e\n", k, std::abs(init - my_init), init, my_init); + settings.log.printf("Singleton row %d error %.16e init %.16e my_init %.16e\n", + k, + std::abs(init - my_init), + init, + my_init); } #endif init = my_init; } - //ei[k] = 0.0; - //init = vector_norm2_squared(dy); + // ei[k] = 0.0; + // init = vector_norm2_squared(dy); assert(init > 0); delta_y_steepest_edge[j] = init; @@ -1293,7 +1265,7 @@ i_t update_steepest_edge_norms(const simplex_solver_settings_t& settin std::vector& v, std::vector& delta_y_steepest_edge) { - i_t m = basic_list.size(); + i_t m = basic_list.size(); const i_t delta_y_nz = delta_y_sparse.i.size(); sparse_vector_t v_sparse(m, 0); // B^T delta_y = - direction * e_basic_leaving_index @@ -1324,7 +1296,7 @@ i_t update_steepest_edge_norms(const simplex_solver_settings_t& settin // B*scaled_delta_xB = -A(:, leaving_index) so w = -scaled_delta_xB const f_t wr = -scaled_delta_xB.find_coefficient(basic_leaving_index); if (wr == 0) { return -1; } - const f_t omegar = dy_norm_squared / (wr * wr); + const f_t omegar = dy_norm_squared / (wr * wr); const i_t scaled_delta_xB_nz = scaled_delta_xB.i.size(); for (i_t h = 0; h < scaled_delta_xB_nz; ++h) { const i_t k = scaled_delta_xB.i[h]; @@ -1495,7 +1467,7 @@ void compute_delta_y(const basis_update_mpf_t& ft, sparse_vector_t& UTsol_sparse) { const i_t m = delta_y_sparse.n; - // BT*delta_y = -delta_zB = -sigma*ei + // BT*delta_y = -delta_zB = -sigma*ei sparse_vector_t ei_sparse(m, 1); ei_sparse.i[0] = basic_leaving_index; ei_sparse.x[0] = -direction; @@ -1509,29 +1481,24 @@ void compute_delta_y(const basis_update_mpf_t& ft, } #ifdef CHECK_B_TRANSPOSE_SOLVE - std::vector delta_y_sparse_vector_check(m); - delta_y_sparse.to_dense(delta_y_sparse_vector_check); - f_t error_check = 0.0; - for (i_t k = 0; k < m; ++k) - { - if (std::abs(delta_y[k] - delta_y_sparse_vector_check[k]) > 1e-6) - { - settings.log.printf("\tBTranspose error %d %e %e\n", k, delta_y[k], delta_y_sparse_vector_check[k]); - } - error_check += std::abs(delta_y[k] - delta_y_sparse_vector_check[k]); - } - if (error_check > 1e-6) { - settings.log.printf("BTranspose error %e\n", error_check); + std::vector delta_y_sparse_vector_check(m); + delta_y_sparse.to_dense(delta_y_sparse_vector_check); + f_t error_check = 0.0; + for (i_t k = 0; k < m; ++k) { + if (std::abs(delta_y[k] - delta_y_sparse_vector_check[k]) > 1e-6) { + settings.log.printf( + "\tBTranspose error %d %e %e\n", k, delta_y[k], delta_y_sparse_vector_check[k]); } - std::vector residual(m); - b_transpose_multiply(lp, basic_list, delta_y_sparse_vector_check, residual); - for (i_t k = 0; k < m; ++k) - { - if (std::abs(residual[k] - ei[k]) > 1e-6) - { - settings.log.printf("\tBTranspose multiply error %d %e %e\n", k, residual[k], ei[k]); - } + error_check += std::abs(delta_y[k] - delta_y_sparse_vector_check[k]); + } + if (error_check > 1e-6) { settings.log.printf("BTranspose error %e\n", error_check); } + std::vector residual(m); + b_transpose_multiply(lp, basic_list, delta_y_sparse_vector_check, residual); + for (i_t k = 0; k < m; ++k) { + if (std::abs(residual[k] - ei[k]) > 1e-6) { + settings.log.printf("\tBTranspose multiply error %d %e %e\n", k, residual[k], ei[k]); } + } #endif } @@ -1544,20 +1511,20 @@ void update_dual_variables(const sparse_vector_t& delta_y_sparse, std::vector& y, std::vector& z) { - // Update dual variables - // y <- y + steplength * delta_y - const i_t delta_y_nz = delta_y_sparse.i.size(); - for (i_t k = 0; k < delta_y_nz; ++k) { - const i_t i = delta_y_sparse.i[k]; - y[i] += step_length * delta_y_sparse.x[k]; - } - // z <- z + steplength * delta_z - const i_t delta_z_nz = delta_z_indices.size(); - for (i_t k = 0; k < delta_z_nz; ++k) { - const i_t j = delta_z_indices[k]; - z[j] += step_length * delta_z[j]; - } - z[leaving_index] += step_length * delta_z[leaving_index]; + // Update dual variables + // y <- y + steplength * delta_y + const i_t delta_y_nz = delta_y_sparse.i.size(); + for (i_t k = 0; k < delta_y_nz; ++k) { + const i_t i = delta_y_sparse.i[k]; + y[i] += step_length * delta_y_sparse.x[k]; + } + // z <- z + steplength * delta_z + const i_t delta_z_nz = delta_z_indices.size(); + for (i_t k = 0; k < delta_z_nz; ++k) { + const i_t j = delta_z_indices[k]; + z[j] += step_length * delta_z[j]; + } + z[leaving_index] += step_length * delta_z[leaving_index]; } template @@ -1571,7 +1538,7 @@ void adjust_for_flips(const basis_update_mpf_t& ft, std::vector& delta_x_flip, std::vector& x) { - const i_t m = basic_list.size(); + const i_t m = basic_list.size(); const i_t atilde_nz = atilde_index.size(); // B*delta_xB_0 = atilde sparse_vector_t atilde_sparse(m, atilde_nz); @@ -1623,7 +1590,6 @@ void compute_delta_x(const lp_problem_t& lp, ft.b_solve(rhs_sparse, scaled_delta_xB_sparse, utilde_sparse); scaled_delta_xB_sparse.negate(); - #ifdef CHECK_B_SOLVE std::vector scaled_delta_xB(m); { @@ -1680,12 +1646,12 @@ void update_objective(const std::vector& basic_list, f_t& obj) { const i_t changed_basic_nz = changed_basic_indices.size(); - for (i_t k = 0; k < changed_basic_nz; ++k) { - const i_t j = basic_list[changed_basic_indices[k]]; - obj += delta_x[j] * objective[j]; - } - // Leaving index already included above - obj += delta_x[entering_index] * objective[entering_index]; + for (i_t k = 0; k < changed_basic_nz; ++k) { + const i_t j = basic_list[changed_basic_indices[k]]; + obj += delta_x[j] * objective[j]; + } + // Leaving index already included above + obj += delta_x[entering_index] * objective[entering_index]; } template @@ -2019,11 +1985,10 @@ void set_primal_variables_on_bounds(const lp_problem_t& lp, } template -f_t compute_perturbed_objective(const std::vector& objective, - const std::vector& x) +f_t compute_perturbed_objective(const std::vector& objective, const std::vector& x) { const size_t n = objective.size(); - f_t obj_val = 0.0; + f_t obj_val = 0.0; for (size_t j = 0; j < n; ++j) { obj_val += objective[j] * x[j]; } @@ -2031,11 +1996,10 @@ f_t compute_perturbed_objective(const std::vector& objective, } template -f_t amount_of_perturbation(const lp_problem_t& lp, - const std::vector& objective) +f_t amount_of_perturbation(const lp_problem_t& lp, const std::vector& objective) { f_t perturbation = 0.0; - const i_t n = lp.num_cols; + const i_t n = lp.num_cols; for (i_t j = 0; j < n; ++j) { perturbation += std::abs(lp.objective[j] - objective[j]); } @@ -2064,7 +2028,7 @@ void prepare_optimality(const lp_problem_t& lp, sol.objective = compute_objective(lp, sol.x); sol.user_objective = compute_user_objective(lp, sol.objective); - f_t perturbation = phase2::amount_of_perturbation(lp, objective); + f_t perturbation = phase2::amount_of_perturbation(lp, objective); if (perturbation > 1e-6 && phase == 2) { // Try to remove perturbation std::vector unperturbed_y(m); @@ -2079,8 +2043,7 @@ void prepare_optimality(const lp_problem_t& lp, z = unperturbed_z; y = unperturbed_y; perturbation = 0.0; - } - else { + } else { settings.log.printf("Failed to remove perturbation of %.2e.\n", perturbation); } } @@ -2146,7 +2109,6 @@ class phase2_timers_t { return toc(start_time); } - void print_timers(const simplex_solver_settings_t& settings) const { if (!record_time) { return; } @@ -2184,10 +2146,9 @@ class phase2_timers_t { f_t objective_time; f_t update_infeasibility_time; - - private: - f_t start_time; - bool record_time; + private: + f_t start_time; + bool record_time; }; } // namespace phase2 @@ -2327,11 +2288,12 @@ dual::status_t dual_phase2(i_t phase, if (delta_y_steepest_edge.size() == 0) { delta_y_steepest_edge.resize(n); if (slack_basis) { - phase2::initialize_steepest_edge_norms_from_slack_basis(basic_list, nonbasic_list, delta_y_steepest_edge); + phase2::initialize_steepest_edge_norms_from_slack_basis( + basic_list, nonbasic_list, delta_y_steepest_edge); } else { std::fill(delta_y_steepest_edge.begin(), delta_y_steepest_edge.end(), -1); - if (phase2::initialize_steepest_edge_norms(lp, - settings, start_time, basic_list, ft, delta_y_steepest_edge) == -1) { + if (phase2::initialize_steepest_edge_norms( + lp, settings, start_time, basic_list, ft, delta_y_steepest_edge) == -1) { return dual::status_t::TIME_LIMIT; } } @@ -2340,7 +2302,9 @@ dual::status_t dual_phase2(i_t phase, vector_norm2(delta_y_steepest_edge)); } - if (phase == 2) { settings.log.printf(" Iter Objective Num Inf. Sum Inf. Perturb Time\n"); } + if (phase == 2) { + settings.log.printf(" Iter Objective Num Inf. Sum Inf. Perturb Time\n"); + } const i_t iter_limit = settings.iteration_limit; std::vector delta_y(m, 0.0); @@ -2366,22 +2330,21 @@ dual::status_t dual_phase2(i_t phase, f_t primal_infeasibility = phase2::compute_initial_primal_infeasibilities( lp, settings, basic_list, x, squared_infeasibilities, infeasibility_indices); - csc_matrix_t A_transpose(1, 1, 0); lp.A.transpose(A_transpose); - f_t obj = compute_objective(lp, x); + f_t obj = compute_objective(lp, x); const i_t start_iter = iter; i_t sparse_delta_z = 0; - i_t dense_delta_z = 0; + i_t dense_delta_z = 0; phase2::phase2_timers_t timers(false); while (iter < iter_limit) { // Pricing - i_t direction = 0; + i_t direction = 0; i_t basic_leaving_index = -1; - i_t leaving_index = -1; + i_t leaving_index = -1; f_t max_val; timers.start_timer(); if (settings.use_steepest_edge_pricing) { @@ -2446,15 +2409,13 @@ dual::status_t dual_phase2(i_t phase, } timers.start_timer(); - i_t delta_y_nz0 = 0; + i_t delta_y_nz0 = 0; const i_t nz_delta_y = delta_y_sparse.i.size(); for (i_t k = 0; k < nz_delta_y; k++) { - if (std::abs(delta_y_sparse.x[k]) > 1e-12) { - delta_y_nz0++; - } + if (std::abs(delta_y_sparse.x[k]) > 1e-12) { delta_y_nz0++; } } const f_t delta_y_nz_percentage = delta_y_nz0 / static_cast(m) * 100.0; - const bool use_transpose = delta_y_nz_percentage <= 30.0; + const bool use_transpose = delta_y_nz_percentage <= 30.0; if (use_transpose) { sparse_delta_z++; phase2::compute_delta_z(A_transpose, @@ -2487,7 +2448,8 @@ dual::status_t dual_phase2(i_t phase, phase2::compute_dual_residual(lp.A, zeros, delta_y, delta_z, dual_residual); // || A'*delta_y + delta_z ||_inf f_t dual_residual_norm = vector_norm_inf(dual_residual); - settings.log.printf("|| A'*dy - dz || %e use transpose %d\n", dual_residual_norm, use_transpose); + settings.log.printf( + "|| A'*dy - dz || %e use transpose %d\n", dual_residual_norm, use_transpose); #endif // Ratio test @@ -2509,7 +2471,7 @@ dual::status_t dual_phase2(i_t phase, } else if (bound_flip_ratio) { timers.start_timer(); f_t slope = direction == 1 ? (lp.lower[leaving_index] - x[leaving_index]) - : (x[leaving_index] - lp.upper[leaving_index]); + : (x[leaving_index] - lp.upper[leaving_index]); bound_flipping_ratio_test_t bfrt(settings, start_time, m, @@ -2553,8 +2515,9 @@ dual::status_t dual_phase2(i_t phase, perturbation = 0.0; std::vector unperturbed_x(n); - phase2::compute_primal_solution_from_basis(lp, ft, basic_list, nonbasic_list, vstatus, unperturbed_x); - x = unperturbed_x; + phase2::compute_primal_solution_from_basis( + lp, ft, basic_list, nonbasic_list, vstatus, unperturbed_x); + x = unperturbed_x; primal_infeasibility = phase2::compute_initial_primal_infeasibilities( lp, settings, basic_list, x, squared_infeasibilities, infeasibility_indices); settings.log.printf("Updated primal infeasibility: %e\n", primal_infeasibility); @@ -2562,8 +2525,7 @@ dual::status_t dual_phase2(i_t phase, objective = lp.objective; // Need to reset the objective value, since we have recomputed x obj = phase2::compute_perturbed_objective(objective, x); - if (dual_infeas <= settings.dual_tol && primal_infeasibility <= settings.primal_tol) - { + if (dual_infeas <= settings.dual_tol && primal_infeasibility <= settings.primal_tol) { phase2::prepare_optimality(lp, settings, ft, @@ -2582,9 +2544,11 @@ dual::status_t dual_phase2(i_t phase, status = dual::status_t::OPTIMAL; break; } - settings.log.printf("Continuing with perturbation removed and steepest edge norms reset\n"); - // Clear delta_z before restarting the iteration - phase2::clear_delta_z(entering_index, leaving_index, delta_z_mark, delta_z_indices, delta_z); + settings.log.printf( + "Continuing with perturbation removed and steepest edge norms reset\n"); + // Clear delta_z before restarting the iteration + phase2::clear_delta_z( + entering_index, leaving_index, delta_z_mark, delta_z_indices, delta_z); continue; } else { settings.log.printf("Failed to remove perturbation of %.2e.\n", perturbation); @@ -2637,12 +2601,12 @@ dual::status_t dual_phase2(i_t phase, return dual::status_t::DUAL_UNBOUNDED; } - timers.start_timer(); // Update dual variables // y <- y + steplength * delta_y // z <- z + steplength * delta_z - phase2::update_dual_variables(delta_y_sparse, delta_z_indices, delta_z, step_length, leaving_index, y, z); + phase2::update_dual_variables( + delta_y_sparse, delta_z_indices, delta_z, step_length, leaving_index, y, z); timers.vector_time += timers.stop_timer(); #ifdef COMPUTE_DUAL_RESIDUAL @@ -2674,8 +2638,15 @@ dual::status_t dual_phase2(i_t phase, sparse_vector_t delta_xB_0_sparse(m, 0); if (num_flipped > 0) { timers.start_timer(); - phase2::adjust_for_flips( - ft, basic_list, delta_z_indices, atilde_index, atilde, atilde_mark, delta_xB_0_sparse, delta_x_flip, x); + phase2::adjust_for_flips(ft, + basic_list, + delta_z_indices, + atilde_index, + atilde, + atilde_mark, + delta_xB_0_sparse, + delta_x_flip, + x); timers.ftran_time += timers.stop_timer(); } @@ -2706,7 +2677,6 @@ dual::status_t dual_phase2(i_t phase, if (primal_step_err > 1e-4) { settings.log.printf("|| A * dx || %e\n", primal_step_err); } #endif - timers.start_timer(); const i_t steepest_edge_status = phase2::update_steepest_edge_norms(settings, basic_list, @@ -2742,11 +2712,11 @@ dual::status_t dual_phase2(i_t phase, } #endif - timers.start_timer(); // TODO(CMM): Do I also need to update the objective due to the bound flips? // TODO(CMM): I'm using the unperturbed objective here, should this be the perturbed objective? - phase2::update_objective(basic_list, scaled_delta_xB_sparse.i, lp.objective, delta_x, entering_index, obj); + phase2::update_objective( + basic_list, scaled_delta_xB_sparse.i, lp.objective, delta_x, entering_index, obj); timers.objective_time += timers.stop_timer(); timers.start_timer(); @@ -2774,7 +2744,7 @@ dual::status_t dual_phase2(i_t phase, squared_infeasibilities, infeasibility_indices, primal_infeasibility); - // Update the entering variable + // Update the entering variable phase2::update_single_primal_infeasibility(lp.lower, lp.upper, x, @@ -2784,11 +2754,11 @@ dual::status_t dual_phase2(i_t phase, entering_index, primal_infeasibility); - phase2::clean_up_infeasibilities(squared_infeasibilities, infeasibility_indices); #if CHECK_PRIMAL_INFEASIBILITIES - phase2::check_primal_infeasibilities(lp, settings, basic_list, x, squared_infeasibilities, infeasibility_indices); + phase2::check_primal_infeasibilities( + lp, settings, basic_list, x, squared_infeasibilities, infeasibility_indices); #endif timers.update_infeasibility_time += timers.stop_timer(); @@ -2809,10 +2779,10 @@ dual::status_t dual_phase2(i_t phase, } basic_list[basic_leaving_index] = entering_index; nonbasic_list[nonbasic_entering_index] = leaving_index; - nonbasic_mark[entering_index] = -1; - nonbasic_mark[leaving_index] = nonbasic_entering_index; - basic_mark[leaving_index] = -1; - basic_mark[entering_index] = basic_leaving_index; + nonbasic_mark[entering_index] = -1; + nonbasic_mark[leaving_index] = nonbasic_entering_index; + basic_mark[leaving_index] = -1; + basic_mark[entering_index] = basic_leaving_index; timers.start_timer(); // Refactor or update the basis factorization @@ -2820,7 +2790,7 @@ dual::status_t dual_phase2(i_t phase, if (!should_refactor) { i_t recommend_refactor = ft.update(utilde_sparse, UTsol_sparse, basic_leaving_index); #ifdef CHECK_UPDATE - phase2::check_update(lp, settings, ft, basic_list, basic_leaving_index); + phase2::check_update(lp, settings, ft, basic_list, basic_leaving_index); #endif should_refactor = recommend_refactor == 1; } @@ -2853,7 +2823,7 @@ dual::status_t dual_phase2(i_t phase, #endif #ifdef CHECK_BASIS_MARK - phase2::check_basis_mark(settings, basic_list, nonbasic_list, basic_mark, nonbasic_mark); + phase2::check_basis_mark(settings, basic_list, nonbasic_list, basic_mark, nonbasic_mark); #endif iter++; @@ -2862,7 +2832,6 @@ dual::status_t dual_phase2(i_t phase, // Clear delta_z phase2::clear_delta_z(entering_index, leaving_index, delta_z_mark, delta_z_indices, delta_z); - f_t now = toc(start_time); if ((iter - start_iter) < settings.first_iteration_log || (iter % settings.iteration_log_frequency) == 0) { @@ -2896,8 +2865,12 @@ dual::status_t dual_phase2(i_t phase, timers.print_timers(settings); constexpr bool print_stats = false; if constexpr (print_stats) { - settings.log.printf("Sparse delta_z %8d %8.2f%\n", sparse_delta_z, 100.0 * sparse_delta_z / (sparse_delta_z + dense_delta_z)); - settings.log.printf("Dense delta_z %8d %8.2f%\n", dense_delta_z, 100.0 * dense_delta_z / (sparse_delta_z + dense_delta_z)); + settings.log.printf("Sparse delta_z %8d %8.2f%\n", + sparse_delta_z, + 100.0 * sparse_delta_z / (sparse_delta_z + dense_delta_z)); + settings.log.printf("Dense delta_z %8d %8.2f%\n", + dense_delta_z, + 100.0 * dense_delta_z / (sparse_delta_z + dense_delta_z)); ft.print_stats(); } } diff --git a/cpp/src/dual_simplex/presolve.cpp b/cpp/src/dual_simplex/presolve.cpp index 7504d7c1d9..41555a44e0 100644 --- a/cpp/src/dual_simplex/presolve.cpp +++ b/cpp/src/dual_simplex/presolve.cpp @@ -32,7 +32,7 @@ void bound_strengthening(const std::vector& row_sense, const i_t n = problem.num_cols; std::vector constraint_lower(m); - //std::vector constraint_upper(m); + // std::vector constraint_upper(m); std::vector num_lower_infinity(m); std::vector num_upper_infinity(m); @@ -43,9 +43,7 @@ void bound_strengthening(const std::vector& row_sense, less_rows.reserve(m); for (i_t i = 0; i < m; ++i) { - if (row_sense[i] == 'L') { - less_rows.push_back(i); - } + if (row_sense[i] == 'L') { less_rows.push_back(i); } } std::vector lower = problem.lower; @@ -55,38 +53,44 @@ void bound_strengthening(const std::vector& row_sense, updated_variables_list.reserve(n); std::vector updated_variables_mark(n, 0); - i_t iter = 0; - const i_t iter_limit = 10; + i_t iter = 0; + const i_t iter_limit = 10; i_t total_strengthened_variables = 0; settings.log.printf("Less equal rows %d\n", less_rows.size()); - while (iter < iter_limit && less_rows.size() > 0) - { + while (iter < iter_limit && less_rows.size() > 0) { // Derive bounds on the constraints - settings.log.printf("Running bound strengthening on %d rows\n", static_cast(less_rows.size())); + settings.log.printf("Running bound strengthening on %d rows\n", + static_cast(less_rows.size())); for (i_t i : less_rows) { - const i_t row_start = Arow.col_start[i]; - const i_t row_end = Arow.col_start[i + 1]; + const i_t row_start = Arow.col_start[i]; + const i_t row_end = Arow.col_start[i + 1]; num_lower_infinity[i] = 0; num_upper_infinity[i] = 0; f_t lower_limit = 0.0; - //f_t upper_limit = 0.0; + // f_t upper_limit = 0.0; for (i_t p = row_start; p < row_end; ++p) { const i_t j = Arow.i[p]; const f_t a_ij = Arow.x[p]; if (a_ij > 0) { lower_limit += a_ij * lower[j]; - //upper_limit += a_ij * upper[j]; + // upper_limit += a_ij * upper[j]; } else if (a_ij < 0) { lower_limit += a_ij * upper[j]; - //upper_limit += a_ij * lower[j]; + // upper_limit += a_ij * lower[j]; + } + if (lower[j] == -inf && a_ij > 0) { + num_lower_infinity[i]++; + lower_limit = -inf; + } + if (upper[j] == inf && a_ij < 0) { + num_lower_infinity[i]++; + lower_limit = -inf; } - if (lower[j] == -inf && a_ij > 0) { num_lower_infinity[i]++; lower_limit = -inf;} - if (upper[j] == inf && a_ij < 0) { num_lower_infinity[i]++; lower_limit = -inf; } } - //printf("Constraint %d: lo %e\n", i, lower_limit); + // printf("Constraint %d: lo %e\n", i, lower_limit); constraint_lower[i] = lower_limit; - //constraint_upper[i] = upper_limit; + // constraint_upper[i] = upper_limit; } // Use the constraint bounds to derive new bounds on the variables @@ -100,21 +104,25 @@ void bound_strengthening(const std::vector& row_sense, if (a_ik > 0) { const f_t new_upper = lower[k] + (problem.rhs[i] - constraint_lower[i]) / a_ik; if (new_upper < upper[k]) { - //printf("Strengthed bound on variable %d: lo %e up %e -> %e\n", k, lower[k], upper[k], new_upper); + // printf("Strengthed bound on variable %d: lo %e up %e -> %e\n", k, lower[k], + // upper[k], new_upper); upper[k] = new_upper; - if (lower[k] > upper[k]) - { - settings.log.printf("\t INFEASIBLE!!!!!!!!!!!!!!!!! constraint_lower %e lower %e rhs %e\n", constraint_lower[i], lower[k], problem.rhs[i]); + if (lower[k] > upper[k]) { + settings.log.printf( + "\t INFEASIBLE!!!!!!!!!!!!!!!!! constraint_lower %e lower %e rhs %e\n", + constraint_lower[i], + lower[k], + problem.rhs[i]); } if (!updated_variables_mark[k]) { updated_variables_list.push_back(k); } } } else if (a_ik < 0) { const f_t new_lower = upper[k] + (problem.rhs[i] - constraint_lower[i]) / a_ik; if (new_lower > lower[k]) { - //printf("Strengthend bound on variable %d: lo %e -> %e up %e\n", k, lower[k], new_lower, upper[k]); + // printf("Strengthend bound on variable %d: lo %e -> %e up %e\n", k, lower[k], + // new_lower, upper[k]); lower[k] = new_lower; - if (lower[k] > upper[k]) - { + if (lower[k] > upper[k]) { settings.log.printf("\t INFEASIBLE !!!!!!!!!!!!!!!!!!1\n"); } if (!updated_variables_mark[k]) { updated_variables_list.push_back(k); } @@ -126,15 +134,15 @@ void bound_strengthening(const std::vector& row_sense, less_rows.clear(); // Update the bounds on the constraints - settings.log.printf("Round %d: Strengthend %d variables\n", iter, static_cast(updated_variables_list.size())); + settings.log.printf("Round %d: Strengthend %d variables\n", + iter, + static_cast(updated_variables_list.size())); total_strengthened_variables += updated_variables_list.size(); - for (i_t j : updated_variables_list) - { + for (i_t j : updated_variables_list) { updated_variables_mark[j] = 0; - const i_t col_start = problem.A.col_start[j]; - const i_t col_end = problem.A.col_start[j+1]; - for (i_t p = col_start; p < col_end; ++p) - { + const i_t col_start = problem.A.col_start[j]; + const i_t col_end = problem.A.col_start[j + 1]; + for (i_t p = col_start; p < col_end; ++p) { const i_t i = problem.A.i[p]; less_rows.push_back(i); } @@ -1034,10 +1042,11 @@ void uncrush_solution(const presolve_info_t& presolve_info, #ifdef DUAL_SIMPLEX_INSTANTIATE_DOUBLE -template void convert_user_problem(const user_problem_t& user_problem, - const simplex_solver_settings_t& settings, - lp_problem_t& problem, - std::vector& new_slacks); +template void convert_user_problem( + const user_problem_t& user_problem, + const simplex_solver_settings_t& settings, + lp_problem_t& problem, + std::vector& new_slacks); template void convert_user_lp_with_guess( const user_problem_t& user_problem, diff --git a/cpp/src/dual_simplex/simplex_solver_settings.hpp b/cpp/src/dual_simplex/simplex_solver_settings.hpp index eee7c213ad..99a7c1f1ca 100644 --- a/cpp/src/dual_simplex/simplex_solver_settings.hpp +++ b/cpp/src/dual_simplex/simplex_solver_settings.hpp @@ -87,7 +87,7 @@ struct simplex_solver_settings_t { f_t cut_off; // If the dual objective is greater than the cutoff we stop f_t steepest_edge_ratio; // the ratio of computed steepest edge mismatch from updated steepest edge - f_t steepest_edge_primal_tol; // Primal tolerance divided by steepest edge norm + f_t steepest_edge_primal_tol; // Primal tolerance divided by steepest edge norm f_t hypersparse_threshold; bool use_steepest_edge_pricing; // true if using steepest edge pricing, false if using max // infeasibility pricing diff --git a/cpp/src/dual_simplex/sparse_matrix.cpp b/cpp/src/dual_simplex/sparse_matrix.cpp index ed652a05e8..ca5f31c5bf 100644 --- a/cpp/src/dual_simplex/sparse_matrix.cpp +++ b/cpp/src/dual_simplex/sparse_matrix.cpp @@ -362,7 +362,12 @@ void scatter_dense(const csc_matrix_t& A, i_t j, f_t alpha, std::vecto // x <- x + alpha * A(:, j) template -void scatter_dense(const csc_matrix_t& A, i_t j, f_t alpha, std::vector& x, std::vector& mark, std::vector& indices) +void scatter_dense(const csc_matrix_t& A, + i_t j, + f_t alpha, + std::vector& x, + std::vector& mark, + std::vector& indices) { const i_t col_start = A.col_start[j]; const i_t col_end = A.col_start[j + 1]; @@ -370,8 +375,7 @@ void scatter_dense(const csc_matrix_t& A, i_t j, f_t alpha, std::vecto const i_t i = A.i[p]; const f_t ax = A.x[p]; x[i] += alpha * ax; - if (!mark[i]) - { + if (!mark[i]) { mark[i] = 1; indices.push_back(i); } diff --git a/cpp/src/dual_simplex/sparse_matrix.hpp b/cpp/src/dual_simplex/sparse_matrix.hpp index 21f97c01e0..5e12f6a5da 100644 --- a/cpp/src/dual_simplex/sparse_matrix.hpp +++ b/cpp/src/dual_simplex/sparse_matrix.hpp @@ -20,14 +20,13 @@ #include #include +#include #include #include #include -#include namespace cuopt::linear_programming::dual_simplex { - template class csr_matrix_t; // Forward declaration of CSR matrix needed to define CSC matrix @@ -110,7 +109,6 @@ class csr_matrix_t { static_assert(std::is_signed_v); }; - template void cumulative_sum(std::vector& inout, std::vector& output); @@ -135,7 +133,12 @@ template void scatter_dense(const csc_matrix_t& A, i_t j, f_t alpha, std::vector& x); template -void scatter_dense(const csc_matrix_t& A, i_t j, f_t alpha, std::vector& x, std::vector& mark, std::vector& indices); +void scatter_dense(const csc_matrix_t& A, + i_t j, + f_t alpha, + std::vector& x, + std::vector& mark, + std::vector& indices); // Compute C = A*B where C is m x n, A is m x k, and B = k x n // Do this by computing C(:, j) = A*B(:, j) = sum (i=1 to k) A(:, k)*B(i, j) diff --git a/cpp/src/dual_simplex/sparse_vector.cpp b/cpp/src/dual_simplex/sparse_vector.cpp index d3c655d3e0..8a617ae28d 100644 --- a/cpp/src/dual_simplex/sparse_vector.cpp +++ b/cpp/src/dual_simplex/sparse_vector.cpp @@ -15,7 +15,6 @@ * limitations under the License. */ - #include #include @@ -103,7 +102,7 @@ void sparse_vector_t::inverse_permute_vector(const std::vector& p template void sparse_vector_t::inverse_permute_vector(const std::vector& p, - sparse_vector_t& y) const + sparse_vector_t& y) const { i_t m = p.size(); assert(n == m); @@ -220,7 +219,6 @@ f_t sparse_vector_t::find_coefficient(i_t index) const return std::numeric_limits::quiet_NaN(); } - #ifdef DUAL_SIMPLEX_INSTANTIATE_DOUBLE template class sparse_vector_t; #endif diff --git a/cpp/src/dual_simplex/sparse_vector.hpp b/cpp/src/dual_simplex/sparse_vector.hpp index ed8f39a63b..cf970acda2 100644 --- a/cpp/src/dual_simplex/sparse_vector.hpp +++ b/cpp/src/dual_simplex/sparse_vector.hpp @@ -17,8 +17,8 @@ #pragma once -#include #include +#include #include @@ -31,10 +31,7 @@ class sparse_vector_t { // Construct a sparse vector of dimension n with nz nonzero coefficients sparse_vector_t(i_t n, i_t nz) : n(n), i(nz), x(nz) {} // Construct a sparse vector from a dense vector. - sparse_vector_t(const std::vector& in) - { - from_dense(in); - } + sparse_vector_t(const std::vector& in) { from_dense(in); } // Construct a sparse vector from a column of a CSC matrix sparse_vector_t(const csc_matrix_t& A, i_t col); // gather a dense vector into a sparse vector @@ -43,7 +40,8 @@ class sparse_vector_t { void to_csc(csc_matrix_t& A) const; // convert a sparse vector into a dense vector. Dense vector is cleared and resized. void to_dense(std::vector& x_dense) const; - // scatter a sparse vector into a dense vector. Assumes x_dense is already cleared or preinitialized + // scatter a sparse vector into a dense vector. Assumes x_dense is already cleared or + // preinitialized void scatter(std::vector& x_dense) const; // inverse permute the current sparse vector void inverse_permute_vector(const std::vector& p); diff --git a/cpp/src/dual_simplex/triangle_solve.cpp b/cpp/src/dual_simplex/triangle_solve.cpp index 0332b2b5a8..b8183f68e3 100644 --- a/cpp/src/dual_simplex/triangle_solve.cpp +++ b/cpp/src/dual_simplex/triangle_solve.cpp @@ -164,7 +164,7 @@ i_t depth_first_search(i_t j, } if (done) { pstack[head] = 0; // restore pstack so it can be used again in other routines - xi[head] = 0; // restore xi so it can be used again in other routines + xi[head] = 0; // restore xi so it can be used again in other routines head--; // remove j from the recursion stack xi[--top] = j; // and place it the output stack } @@ -248,11 +248,11 @@ template int sparse_triangle_solve(const csc_matrix_t(const csc_matrix_t& B, - int col, - const std::optional>& pinv, - std::vector& xi, - csc_matrix_t& G, - double* x); + int col, + const std::optional>& pinv, + std::vector& xi, + csc_matrix_t& G, + double* x); #endif } // namespace cuopt::linear_programming::dual_simplex diff --git a/cpp/src/dual_simplex/vector_math.cpp b/cpp/src/dual_simplex/vector_math.cpp index b8f8c7cf2b..239848ac3f 100644 --- a/cpp/src/dual_simplex/vector_math.cpp +++ b/cpp/src/dual_simplex/vector_math.cpp @@ -68,14 +68,10 @@ f_t dot(const std::vector& x, const std::vector& y) } template -f_t sparse_dot(i_t const *xind, - f_t const *xval, - i_t nx, - i_t const *yind, - i_t ny, - f_t const *y_scatter_val) +f_t sparse_dot( + i_t const* xind, f_t const* xval, i_t nx, i_t const* yind, i_t ny, f_t const* y_scatter_val) { - f_t dot = 0.0; + f_t dot = 0.0; for (i_t i = 0, j = 0; i < nx && j < ny;) { const i_t p = xind[i]; const i_t q = yind[j]; @@ -92,16 +88,10 @@ f_t sparse_dot(i_t const *xind, return dot; } - template -f_t sparse_dot(i_t *xind, - f_t *xval, - i_t nx, - i_t *yind, - f_t *yval, - i_t ny) +f_t sparse_dot(i_t* xind, f_t* xval, i_t nx, i_t* yind, f_t* yval, i_t ny) { - f_t dot = 0.0; + f_t dot = 0.0; for (i_t i = 0, j = 0; i < nx && j < ny;) { const i_t p = xind[i]; const i_t q = yind[j]; @@ -197,19 +187,15 @@ template double sparse_dot(const std::vector& xind, const std::vector& yind, const std::vector& yval); -template double sparse_dot(int const *xind, - double const *xval, +template double sparse_dot(int const* xind, + double const* xval, int nx, - int const *yind, + int const* yind, int ny, - double const *y_scatter_val); + double const* y_scatter_val); -template double sparse_dot(int *xind, - double *xval, - int nx, - int *yind, - double *yval, - int ny); +template double sparse_dot( + int* xind, double* xval, int nx, int* yind, double* yval, int ny); template int permute_vector(const std::vector& p, const std::vector& b, diff --git a/cpp/src/dual_simplex/vector_math.hpp b/cpp/src/dual_simplex/vector_math.hpp index 1c7c5470a7..c5bd128632 100644 --- a/cpp/src/dual_simplex/vector_math.hpp +++ b/cpp/src/dual_simplex/vector_math.hpp @@ -45,20 +45,11 @@ f_t sparse_dot(const std::vector& xind, const std::vector& yval); template -f_t sparse_dot(i_t const *xind, - f_t const *xval, - i_t nx, - i_t const *yind, - i_t ny, - f_t const *y_scatter_val); +f_t sparse_dot( + i_t const* xind, f_t const* xval, i_t nx, i_t const* yind, i_t ny, f_t const* y_scatter_val); template -f_t sparse_dot(i_t *xind, - f_t *xval, - i_t nx, - i_t *yind, - f_t *yval, - i_t ny); +f_t sparse_dot(i_t* xind, f_t* xval, i_t nx, i_t* yind, f_t* yval, i_t ny); // Computes x = P*b or x=b(p) in MATLAB. template From 7e7d0de13a0d51f0e2d9a46b81f738371ef688d9 Mon Sep 17 00:00:00 2001 From: Christopher Maes Date: Thu, 24 Jul 2025 09:46:53 -0700 Subject: [PATCH 12/28] Add support for sparse vector rhs input to sparse triangular solve --- cpp/src/dual_simplex/basis_updates.cpp | 44 ++++---------- cpp/src/dual_simplex/basis_updates.hpp | 2 - cpp/src/dual_simplex/presolve.cpp | 12 +--- cpp/src/dual_simplex/triangle_solve.cpp | 78 +++++++++++++++++++++++++ cpp/src/dual_simplex/triangle_solve.hpp | 36 ++++++++++++ 5 files changed, 125 insertions(+), 47 deletions(-) diff --git a/cpp/src/dual_simplex/basis_updates.cpp b/cpp/src/dual_simplex/basis_updates.cpp index b594f47374..c17af69c86 100644 --- a/cpp/src/dual_simplex/basis_updates.cpp +++ b/cpp/src/dual_simplex/basis_updates.cpp @@ -1347,13 +1347,10 @@ template i_t basis_update_mpf_t::u_transpose_solve(sparse_vector_t& rhs) const { total_sparse_U_transpose_++; - const i_t m = L0_.m; // U0'*x = y // Solve U0'*x0 = y - // csc_matrix_t B(m, 1, 0); - rhs.to_csc(B_); - i_t top = sparse_triangle_solve( - B_, 0, std::nullopt, xi_workspace_, U0_transpose_, x_workspace_.data()); + i_t top = dual_simplex::sparse_triangle_solve( + rhs, std::nullopt, xi_workspace_, U0_transpose_, x_workspace_.data()); solve_to_sparse_vector(top, rhs); return 0; } @@ -1474,27 +1471,10 @@ i_t basis_update_mpf_t::l_transpose_solve(sparse_vector_t& r // sort the indices and place into a sparse column std::sort(xi_workspace_.begin() + m, xi_workspace_.begin() + m + nz, std::less()); - B_.m = m; - B_.n = 1; - B_.col_start.resize(2); - B_.i.resize(nz); - B_.x.resize(nz); - i_t b_nz = 0; - for (i_t k = 0; k < nz; ++k) { - const i_t i = xi_workspace_[m + k]; - const f_t b_val = x_workspace_[i]; - x_workspace_[i] = 0.0; - xi_workspace_[m + k] = 0; - xi_workspace_[i] = 0; - B_.i[b_nz] = i; - B_.x[b_nz] = b_val; - b_nz++; - } - B_.col_start[0] = 0; - B_.col_start[1] = b_nz; - - i_t top = sparse_triangle_solve( - B_, 0, std::nullopt, xi_workspace_, L0_transpose_, x_workspace_.data()); + sparse_vector_t b(m, nz); + gather_into_sparse_vector(nz, b); + i_t top = dual_simplex::sparse_triangle_solve( + b, std::nullopt, xi_workspace_, L0_transpose_, x_workspace_.data()); solve_to_sparse_vector(top, rhs); #ifdef CHECK_SPARSE_SOLVE @@ -1680,10 +1660,8 @@ i_t basis_update_mpf_t::u_solve(sparse_vector_t& rhs) const // U*x = y // Solve U0*x = y - // csc_matrix_t B(m, 1, 0); - rhs.to_csc(B_); - i_t top = sparse_triangle_solve( - B_, 0, std::nullopt, xi_workspace_, U0_, x_workspace_.data()); + i_t top = dual_simplex::sparse_triangle_solve( + rhs, std::nullopt, xi_workspace_, U0_, x_workspace_.data()); solve_to_sparse_vector(top, rhs); return 0; @@ -1758,10 +1736,8 @@ i_t basis_update_mpf_t::l_solve(sparse_vector_t& rhs) const // L0 * T0 * T1 * ... * T_{num_updates_ - 1} * x = y // First solve L0*x0 = y - // csc_matrix_t B(m, 1, 0); - rhs.to_csc(B_); - i_t top = sparse_triangle_solve( - B_, 0, std::nullopt, xi_workspace_, L0_, x_workspace_.data()); + i_t top = dual_simplex::sparse_triangle_solve( + rhs, std::nullopt, xi_workspace_, L0_, x_workspace_.data()); solve_to_workspace(top); // Uses xi_workspace_ and x_workspace_ to fill rhs i_t nz = m - top; // Then T0 * T1 * ... * T_{num_updates_ - 1} * x = x0 diff --git a/cpp/src/dual_simplex/basis_updates.hpp b/cpp/src/dual_simplex/basis_updates.hpp index 551cfdb627..7af81ed70f 100644 --- a/cpp/src/dual_simplex/basis_updates.hpp +++ b/cpp/src/dual_simplex/basis_updates.hpp @@ -189,7 +189,6 @@ class basis_update_mpf_t { U0_transpose_(1, 1, 1), L0_transpose_(1, 1, 1), refactor_frequency_(refactor_frequency), - B_(Linit.m, 1, 0), total_sparse_L_transpose_(0), total_dense_L_transpose_(0), total_sparse_L_(0), @@ -380,7 +379,6 @@ class basis_update_mpf_t { mutable std::vector x_workspace_; mutable csc_matrix_t U0_transpose_; // Needed for sparse solves mutable csc_matrix_t L0_transpose_; // Needed for sparse solves - mutable csc_matrix_t B_; // Needed for sparse solves mutable i_t total_sparse_L_transpose_; mutable i_t total_dense_L_transpose_; diff --git a/cpp/src/dual_simplex/presolve.cpp b/cpp/src/dual_simplex/presolve.cpp index 41555a44e0..68043e06a6 100644 --- a/cpp/src/dual_simplex/presolve.cpp +++ b/cpp/src/dual_simplex/presolve.cpp @@ -32,7 +32,6 @@ void bound_strengthening(const std::vector& row_sense, const i_t n = problem.num_cols; std::vector constraint_lower(m); - // std::vector constraint_upper(m); std::vector num_lower_infinity(m); std::vector num_upper_infinity(m); @@ -68,16 +67,13 @@ void bound_strengthening(const std::vector& row_sense, num_upper_infinity[i] = 0; f_t lower_limit = 0.0; - // f_t upper_limit = 0.0; for (i_t p = row_start; p < row_end; ++p) { const i_t j = Arow.i[p]; const f_t a_ij = Arow.x[p]; if (a_ij > 0) { lower_limit += a_ij * lower[j]; - // upper_limit += a_ij * upper[j]; } else if (a_ij < 0) { lower_limit += a_ij * upper[j]; - // upper_limit += a_ij * lower[j]; } if (lower[j] == -inf && a_ij > 0) { num_lower_infinity[i]++; @@ -88,9 +84,7 @@ void bound_strengthening(const std::vector& row_sense, lower_limit = -inf; } } - // printf("Constraint %d: lo %e\n", i, lower_limit); constraint_lower[i] = lower_limit; - // constraint_upper[i] = upper_limit; } // Use the constraint bounds to derive new bounds on the variables @@ -104,8 +98,6 @@ void bound_strengthening(const std::vector& row_sense, if (a_ik > 0) { const f_t new_upper = lower[k] + (problem.rhs[i] - constraint_lower[i]) / a_ik; if (new_upper < upper[k]) { - // printf("Strengthed bound on variable %d: lo %e up %e -> %e\n", k, lower[k], - // upper[k], new_upper); upper[k] = new_upper; if (lower[k] > upper[k]) { settings.log.printf( @@ -119,8 +111,6 @@ void bound_strengthening(const std::vector& row_sense, } else if (a_ik < 0) { const f_t new_lower = upper[k] + (problem.rhs[i] - constraint_lower[i]) / a_ik; if (new_lower > lower[k]) { - // printf("Strengthend bound on variable %d: lo %e -> %e up %e\n", k, lower[k], - // new_lower, upper[k]); lower[k] = new_lower; if (lower[k] > upper[k]) { settings.log.printf("\t INFEASIBLE !!!!!!!!!!!!!!!!!!1\n"); @@ -695,7 +685,7 @@ void convert_user_problem(const user_problem_t& user_problem, // At this point the problem representation is in the form: A*x {<=, =} b // This is the time to run bound strengthening constexpr bool run_bound_strengthening = false; - if (run_bound_strengthening) { + if constexpr (run_bound_strengthening) { settings.log.printf("Running bound strengthening\n"); bound_strengthening(row_sense, settings, problem); } diff --git a/cpp/src/dual_simplex/triangle_solve.cpp b/cpp/src/dual_simplex/triangle_solve.cpp index b8183f68e3..54c0faeca2 100644 --- a/cpp/src/dual_simplex/triangle_solve.cpp +++ b/cpp/src/dual_simplex/triangle_solve.cpp @@ -117,6 +117,26 @@ i_t reach(const csc_matrix_t& B, return top; } +template +i_t reach(const sparse_vector_t& b, + const std::optional>& pinv, + csc_matrix_t& G, + std::vector& xi) +{ + const i_t m = G.m; + i_t top = m; + const i_t bnz = b.i.size(); + for (i_t p = 0; p < bnz; ++p) { + if (!MARKED(G.col_start, b.i[p])) { // start a DFS at unmarked node i + top = depth_first_search(b.i[p], pinv, G, top, xi, xi.begin() + m); + } + } + for (i_t p = top; p < m; ++p) { // restore G + MARK(G.col_start, xi[p]); + } + return top; +} + // \brief Performs a depth-first search starting from node j in the graph // defined by G \param[in] j - root node \param[in] pinv - inverse permutation // \param[in, out] G - graph defined by sparse CSC matrix @@ -213,6 +233,47 @@ i_t sparse_triangle_solve(const csc_matrix_t& B, return top; } +template +i_t sparse_triangle_solve(const sparse_vector_t& b, + const std::optional>& pinv, + std::vector& xi, + csc_matrix_t& G, + f_t* x) +{ + i_t m = G.m; + assert(b.n == m); + i_t top = reach(b, pinv, G, xi); + for (i_t p = top; p < m; ++p) { + x[xi[p]] = 0; // Clear x vector + } + const i_t bnz = b.i.size(); + for (i_t p = 0; p < bnz; ++p) { + x[b.i[p]] = b.x[p]; // Scatter b + } + for (i_t px = top; px < m; ++px) { + i_t j = xi[px]; // x(j) is nonzero + i_t J = pinv ? ((*pinv)[j]) : j; // j maps to column J of G + if (J < 0) continue; // column j is empty + f_t Gjj; + i_t p; + i_t end; + if constexpr (lo) { + Gjj = G.x[G.col_start[J]]; // lo: L(j, j) is the first entry + p = G.col_start[J] + 1; + end = G.col_start[J + 1]; + } else { + Gjj = G.x[G.col_start[J + 1] - 1]; // up: U(j,j) is the last entry + p = G.col_start[J]; + end = G.col_start[J + 1] - 1; + } + x[j] /= Gjj; + for (; p < end; ++p) { + x[G.i[p]] -= G.x[p] * x[j]; // x(i) -= G(i,j) * x(j) + } + } + return top; +} + #ifdef DUAL_SIMPLEX_INSTANTIATE_DOUBLE template int lower_triangular_solve(const csc_matrix_t& L, @@ -233,6 +294,11 @@ template int reach(const csc_matrix_t& B, csc_matrix_t& G, std::vector& xi); +template int reach(const sparse_vector_t& b, + const std::optional>& pinv, + csc_matrix_t& G, + std::vector& xi); + template int depth_first_search(int j, const std::optional>& pinv, csc_matrix_t& G, @@ -253,6 +319,18 @@ template int sparse_triangle_solve(const csc_matrix_t& xi, csc_matrix_t& G, double* x); + +template int sparse_triangle_solve(const sparse_vector_t& b, + const std::optional>& pinv, + std::vector& xi, + csc_matrix_t& G, + double* x); + +template int sparse_triangle_solve(const sparse_vector_t& b, + const std::optional>& pinv, + std::vector& xi, + csc_matrix_t& G, + double* x); #endif } // namespace cuopt::linear_programming::dual_simplex diff --git a/cpp/src/dual_simplex/triangle_solve.hpp b/cpp/src/dual_simplex/triangle_solve.hpp index fc01613c78..fd9bf8b901 100644 --- a/cpp/src/dual_simplex/triangle_solve.hpp +++ b/cpp/src/dual_simplex/triangle_solve.hpp @@ -18,6 +18,7 @@ #pragma once #include +#include #include #include @@ -67,6 +68,21 @@ i_t reach(const csc_matrix_t& B, csc_matrix_t& G, std::vector& xi); + +// \brief Reach computes the reach of b in the graph of G +// \param[in] b - sparse vector containing the rhs +// \param[in] pinv - inverse permuation vector +// \param[in, out] G - Sparse CSC matrix G. The column pointers of G are +// modified (but restored) during this call \param[out] xi - stack of size 2*n. +// xi[top] .. xi[n-1] contains the reachable indicies \returns top - the size of +// the stack +template +i_t reach(const sparse_vector_t& b, + const std::optional>& pinv, + csc_matrix_t& G, + std::vector& xi); + + // \brief Performs a depth-first search starting from node j in the graph // defined by G // \param[in] j - root node @@ -111,4 +127,24 @@ i_t sparse_triangle_solve(const csc_matrix_t& B, csc_matrix_t& G, f_t* x); +// \brief sparse_triangle_solve solve L*x = b or U*x = b where L is a sparse lower +// triangular matrix +// and U is a sparse upper triangular matrix, and b is a sparse +// right-hand side. The vector b is obtained from the column of a sparse +// matrix. +// \param[in] b - Sparse vector contain the rhs +// \param[in] pinv - optional inverse permutation vector +// \param[in, out] xi - An array of size 2*m, on output it contains the non-zero +// pattern of x in xi[top] through xi[m-1] +// \param[in, out] G - The lower triangular matrix L or the upper triangular matrix U +// G.col_start is marked and restored during the algorithm +// \param[out] - The solution vector xi_t +template +i_t sparse_triangle_solve(const sparse_vector_t& b, + const std::optional>& pinv, + std::vector& xi, + csc_matrix_t& G, + f_t* x); + + } // namespace cuopt::linear_programming::dual_simplex From 7cfe1d5eee435fb93d02469faa8ea79e2c53e592 Mon Sep 17 00:00:00 2001 From: Christopher Maes Date: Thu, 24 Jul 2025 09:53:09 -0700 Subject: [PATCH 13/28] Remove unused sparse triangle solve accepting column of CSC as rhs --- cpp/src/dual_simplex/basis_updates.cpp | 17 ++--- cpp/src/dual_simplex/triangle_solve.cpp | 85 +------------------------ cpp/src/dual_simplex/triangle_solve.hpp | 36 ----------- 3 files changed, 6 insertions(+), 132 deletions(-) diff --git a/cpp/src/dual_simplex/basis_updates.cpp b/cpp/src/dual_simplex/basis_updates.cpp index c17af69c86..d7c18988a3 100644 --- a/cpp/src/dual_simplex/basis_updates.cpp +++ b/cpp/src/dual_simplex/basis_updates.cpp @@ -278,12 +278,10 @@ i_t basis_update_t::l_solve(sparse_vector_t& rhs) const // First solve // L0*x0 = b - csc_matrix_t B(1, 1, 1); - rhs.to_csc(B); const i_t m = L0_.m; i_t top = sparse_triangle_solve( - B, 0, std::nullopt, xi_workspace_, L0_, x_workspace_.data()); + rhs, std::nullopt, xi_workspace_, L0_, x_workspace_.data()); solve_to_sparse_vector(top, rhs); // Uses xi_workspace_ and x_workspace_ to fill rhs #ifdef CHECK_L_SOLVE @@ -502,16 +500,13 @@ i_t basis_update_t::l_transpose_solve(sparse_vector_t& rhs) } // L0^T * y = cprime - csc_matrix_t Cprime(1, 1, 1); - rhs.to_csc(Cprime); - #ifdef CHECK_LOWER_TRANSPOSE_SOLVE std::vector cprime_dense; rhs.to_dense(cprime_dense); #endif i_t top = sparse_triangle_solve( - Cprime, 0, std::nullopt, xi_workspace_, L0_transpose_, x_workspace_.data()); + rhs, std::nullopt, xi_workspace_, L0_transpose_, x_workspace_.data()); solve_to_sparse_vector(top, rhs); // Uses xi_workspace_ and x_workspace_ to fill rhs #ifdef CHECK_LOWER_TRANSPOSE_SOLVE @@ -592,10 +587,8 @@ i_t basis_update_t::u_solve(sparse_vector_t& rhs) const sparse_vector_t bprime(m, 0); rhs.inverse_permute_vector(col_permutation_, bprime); - csc_matrix_t Bprime(1, 1, 1); - bprime.to_csc(Bprime); i_t top = sparse_triangle_solve( - Bprime, 0, std::nullopt, xi_workspace_, U_, x_workspace_.data()); + bprime, std::nullopt, xi_workspace_, U_, x_workspace_.data()); solve_to_sparse_vector(top, rhs); // Uses xi_workspace_ and x_workspace_ to fill rhs rhs.inverse_permute_vector(inverse_col_permutation_); @@ -661,10 +654,8 @@ i_t basis_update_t::u_transpose_solve(sparse_vector_t& rhs) #endif // U'*y = bprime - csc_matrix_t Bprime(1, 1, 1); - bprime.to_csc(Bprime); i_t top = sparse_triangle_solve( - Bprime, 0, std::nullopt, xi_workspace_, U_transpose_, x_workspace_.data()); + bprime, std::nullopt, xi_workspace_, U_transpose_, x_workspace_.data()); solve_to_sparse_vector(top, rhs); // Uses xi_workspace_ and x_workspace_ to fill rhs #ifdef CHECK_WORKSPACE diff --git a/cpp/src/dual_simplex/triangle_solve.cpp b/cpp/src/dual_simplex/triangle_solve.cpp index 54c0faeca2..e2988619e7 100644 --- a/cpp/src/dual_simplex/triangle_solve.cpp +++ b/cpp/src/dual_simplex/triangle_solve.cpp @@ -89,34 +89,13 @@ i_t upper_triangular_transpose_solve(const csc_matrix_t& U, std::vecto return 0; } -// \brief Reach computes the reach of b=B(:, col) in the graph of G -// \param[in] B - Sparse CSC matrix containing rhs -// \param[in] col - column of B +// \brief Reach computes the reach of b in the graph of G +// \param[in] b - Sparse vector containing the rhs // \param[in] pinv - inverse permuation vector // \param[in, out] G - Sparse CSC matrix G. The column pointers of G are // modified (but restored) during this call \param[out] xi - stack of size 2*n. // xi[top] .. xi[n-1] contains the reachable indicies \returns top - the size of // the stack -template -i_t reach(const csc_matrix_t& B, - i_t col, - const std::optional>& pinv, - csc_matrix_t& G, - std::vector& xi) -{ - const i_t m = G.m; - i_t top = m; - for (i_t p = B.col_start[col]; p < B.col_start[col + 1]; ++p) { - if (!MARKED(G.col_start, B.i[p])) { // start a DFS at unmarked node i - top = depth_first_search(B.i[p], pinv, G, top, xi, xi.begin() + m); - } - } - for (i_t p = top; p < m; ++p) { // restore G - MARK(G.col_start, xi[p]); - } - return top; -} - template i_t reach(const sparse_vector_t& b, const std::optional>& pinv, @@ -192,47 +171,6 @@ i_t depth_first_search(i_t j, return top; } -template -i_t sparse_triangle_solve(const csc_matrix_t& B, - i_t col, - const std::optional>& pinv, - std::vector& xi, - csc_matrix_t& G, - f_t* x) -{ - i_t m = G.m; - assert(B.m == m); - i_t top = reach(B, col, pinv, G, xi); - for (i_t p = top; p < m; ++p) { - x[xi[p]] = 0; // Clear x vector - } - for (i_t p = B.col_start[col]; p < B.col_start[col + 1]; ++p) { - x[B.i[p]] = B.x[p]; // Scatter b - } - for (i_t px = top; px < m; ++px) { - i_t j = xi[px]; // x(j) is nonzero - i_t J = pinv ? ((*pinv)[j]) : j; // j maps to column J of G - if (J < 0) continue; // column j is empty - f_t Gjj; - i_t p; - i_t end; - if constexpr (lo) { - Gjj = G.x[G.col_start[J]]; // lo: L(j, j) is the first entry - p = G.col_start[J] + 1; - end = G.col_start[J + 1]; - } else { - Gjj = G.x[G.col_start[J + 1] - 1]; // up: U(j,j) is the last entry - p = G.col_start[J]; - end = G.col_start[J + 1] - 1; - } - x[j] /= Gjj; - for (; p < end; ++p) { - x[G.i[p]] -= G.x[p] * x[j]; // x(i) -= G(i,j) * x(j) - } - } - return top; -} - template i_t sparse_triangle_solve(const sparse_vector_t& b, const std::optional>& pinv, @@ -288,11 +226,6 @@ template int upper_triangular_solve(const csc_matrix_t template int upper_triangular_transpose_solve(const csc_matrix_t& U, std::vector& x); -template int reach(const csc_matrix_t& B, - int col, - const std::optional>& pinv, - csc_matrix_t& G, - std::vector& xi); template int reach(const sparse_vector_t& b, const std::optional>& pinv, @@ -306,20 +239,6 @@ template int depth_first_search(int j, std::vector& xi, std::vector::iterator pstack); -template int sparse_triangle_solve(const csc_matrix_t& B, - int col, - const std::optional>& pinv, - std::vector& xi, - csc_matrix_t& G, - double* x); - -template int sparse_triangle_solve(const csc_matrix_t& B, - int col, - const std::optional>& pinv, - std::vector& xi, - csc_matrix_t& G, - double* x); - template int sparse_triangle_solve(const sparse_vector_t& b, const std::optional>& pinv, std::vector& xi, diff --git a/cpp/src/dual_simplex/triangle_solve.hpp b/cpp/src/dual_simplex/triangle_solve.hpp index fd9bf8b901..282f02ac91 100644 --- a/cpp/src/dual_simplex/triangle_solve.hpp +++ b/cpp/src/dual_simplex/triangle_solve.hpp @@ -53,21 +53,6 @@ i_t upper_triangular_solve(const csc_matrix_t& U, std::vector& x) template i_t upper_triangular_transpose_solve(const csc_matrix_t& U, std::vector& x); -// \brief Reach computes the reach of b=B(:, col) in the graph of G -// \param[in] B - Sparse CSC matrix containing rhs -// \param[in] col - column of B -// \param[in] pinv - inverse permuation vector -// \param[in, out] G - Sparse CSC matrix G. The column pointers of G are -// modified (but restored) during this call \param[out] xi - stack of size 2*n. -// xi[top] .. xi[n-1] contains the reachable indicies \returns top - the size of -// the stack -template -i_t reach(const csc_matrix_t& B, - i_t col, - const std::optional>& pinv, - csc_matrix_t& G, - std::vector& xi); - // \brief Reach computes the reach of b in the graph of G // \param[in] b - sparse vector containing the rhs @@ -106,27 +91,6 @@ i_t depth_first_search(i_t j, std::vector& xi, typename std::vector::iterator pstack); -// \brief sparse_triangle_solve solve L*x = b or U*x = b where L is a sparse lower -// triangular matrix -// and U is a sparse upper triangular matrix, and b is a sparse -// right-hand side. The vector b is obtained from the column of a sparse -// matrix. -// \param[in] B - Sparse CSC matrix contain the rhs -// \param[in] col - the column of B to use as b. b = B(:, col) -// \param[in] pinv - optional inverse permutation vector -// \param[in, out] xi - An array of size 2*m, on output it contains the non-zero -// pattern of x in xi[top] through xi[m-1] -// \param[in, out] G - The lower triangular matrix L or the upper triangular matrix U -// G.col_start is marked and restored during the algorithm -// \param[out] - The solution vector xi_t -template -i_t sparse_triangle_solve(const csc_matrix_t& B, - i_t col, - const std::optional>& pinv, - std::vector& xi, - csc_matrix_t& G, - f_t* x); - // \brief sparse_triangle_solve solve L*x = b or U*x = b where L is a sparse lower // triangular matrix // and U is a sparse upper triangular matrix, and b is a sparse From 1991fd3e6e0f05fca0844a4e78c55db1da3cd789 Mon Sep 17 00:00:00 2001 From: Christopher Maes Date: Thu, 24 Jul 2025 09:53:50 -0700 Subject: [PATCH 14/28] Formatting --- cpp/src/dual_simplex/triangle_solve.cpp | 7 +++---- cpp/src/dual_simplex/triangle_solve.hpp | 3 --- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/cpp/src/dual_simplex/triangle_solve.cpp b/cpp/src/dual_simplex/triangle_solve.cpp index e2988619e7..13a42d2f98 100644 --- a/cpp/src/dual_simplex/triangle_solve.cpp +++ b/cpp/src/dual_simplex/triangle_solve.cpp @@ -102,11 +102,11 @@ i_t reach(const sparse_vector_t& b, csc_matrix_t& G, std::vector& xi) { - const i_t m = G.m; - i_t top = m; + const i_t m = G.m; + i_t top = m; const i_t bnz = b.i.size(); for (i_t p = 0; p < bnz; ++p) { - if (!MARKED(G.col_start, b.i[p])) { // start a DFS at unmarked node i + if (!MARKED(G.col_start, b.i[p])) { // start a DFS at unmarked node i top = depth_first_search(b.i[p], pinv, G, top, xi, xi.begin() + m); } } @@ -226,7 +226,6 @@ template int upper_triangular_solve(const csc_matrix_t template int upper_triangular_transpose_solve(const csc_matrix_t& U, std::vector& x); - template int reach(const sparse_vector_t& b, const std::optional>& pinv, csc_matrix_t& G, diff --git a/cpp/src/dual_simplex/triangle_solve.hpp b/cpp/src/dual_simplex/triangle_solve.hpp index 282f02ac91..5016332dae 100644 --- a/cpp/src/dual_simplex/triangle_solve.hpp +++ b/cpp/src/dual_simplex/triangle_solve.hpp @@ -53,7 +53,6 @@ i_t upper_triangular_solve(const csc_matrix_t& U, std::vector& x) template i_t upper_triangular_transpose_solve(const csc_matrix_t& U, std::vector& x); - // \brief Reach computes the reach of b in the graph of G // \param[in] b - sparse vector containing the rhs // \param[in] pinv - inverse permuation vector @@ -67,7 +66,6 @@ i_t reach(const sparse_vector_t& b, csc_matrix_t& G, std::vector& xi); - // \brief Performs a depth-first search starting from node j in the graph // defined by G // \param[in] j - root node @@ -110,5 +108,4 @@ i_t sparse_triangle_solve(const sparse_vector_t& b, csc_matrix_t& G, f_t* x); - } // namespace cuopt::linear_programming::dual_simplex From 53be9bc0413cecc6f1ca886245976e9d68d3991c Mon Sep 17 00:00:00 2001 From: Christopher Maes Date: Thu, 24 Jul 2025 09:55:10 -0700 Subject: [PATCH 15/28] Disable computing Farkas certificate for now --- cpp/src/dual_simplex/phase2.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/dual_simplex/phase2.cpp b/cpp/src/dual_simplex/phase2.cpp index b652d04da8..786981b974 100644 --- a/cpp/src/dual_simplex/phase2.cpp +++ b/cpp/src/dual_simplex/phase2.cpp @@ -2557,7 +2557,7 @@ dual::status_t dual_phase2(i_t phase, } if (perturbation == 0.0 && phase == 2) { - constexpr bool use_farkas = true; + constexpr bool use_farkas = false; if constexpr (use_farkas) { std::vector farkas_y; std::vector farkas_zl; From 0ca65eaf88921620fe1955f34793eeeebd7a14f6 Mon Sep 17 00:00:00 2001 From: Christopher Maes Date: Thu, 24 Jul 2025 15:35:48 -0700 Subject: [PATCH 16/28] Drop small delta_y. Always recompute primal variables if no entering variable. Allocate timer to ftran --- cpp/src/dual_simplex/phase2.cpp | 40 ++++++++++++++++++++++++++++----- 1 file changed, 34 insertions(+), 6 deletions(-) diff --git a/cpp/src/dual_simplex/phase2.cpp b/cpp/src/dual_simplex/phase2.cpp index 786981b974..0d0dc7fc5b 100644 --- a/cpp/src/dual_simplex/phase2.cpp +++ b/cpp/src/dual_simplex/phase2.cpp @@ -327,7 +327,7 @@ void compute_delta_z(const csc_matrix_t& A_transpose, for (i_t k = 0; k < nz_delta_y; k++) { const i_t i = delta_y.i[k]; const f_t delta_y_i = delta_y.x[k]; - if (0 && std::abs(delta_y_i) < 1e-12) { continue; } + if (std::abs(delta_y_i) < 1e-12) { continue; } const i_t row_start = A_transpose.col_start[i]; const i_t row_end = A_transpose.col_start[i + 1]; for (i_t p = row_start; p < row_end; ++p) { @@ -1272,9 +1272,7 @@ i_t update_steepest_edge_norms(const simplex_solver_settings_t& settin // We want B v = - B^{-T} e_basic_leaving_index ft.b_solve(delta_y_sparse, v_sparse); if (direction == -1) { - for (i_t k = 0; k < v_sparse.i.size(); ++k) { - v_sparse.x[k] *= -1; - } + v_sparse.negate(); } v_sparse.scatter(v); @@ -2508,6 +2506,7 @@ dual::status_t dual_phase2(i_t phase, { const f_t dual_infeas = phase2::dual_infeasibility( lp, settings, vstatus, unperturbed_z, settings.tight_tol, settings.dual_tol); + settings.log.printf("Dual infeasibility after removing perturbation %e\n", dual_infeas); if (dual_infeas <= settings.dual_tol) { settings.log.printf("Removed perturbation of %.2e.\n", perturbation); z = unperturbed_z; @@ -2551,13 +2550,42 @@ dual::status_t dual_phase2(i_t phase, entering_index, leaving_index, delta_z_mark, delta_z_indices, delta_z); continue; } else { + std::vector unperturbed_x(n); + phase2::compute_primal_solution_from_basis( + lp, ft, basic_list, nonbasic_list, vstatus, unperturbed_x); + x = unperturbed_x; + primal_infeasibility = phase2::compute_initial_primal_infeasibilities( + lp, settings, basic_list, x, squared_infeasibilities, infeasibility_indices); + + const f_t orig_dual_infeas = phase2::dual_infeasibility( + lp, settings, vstatus, z, settings.tight_tol, settings.dual_tol); + + if (primal_infeasibility <= settings.primal_tol && orig_dual_infeas <= settings.dual_tol) { + phase2::prepare_optimality(lp, + settings, + ft, + objective, + basic_list, + nonbasic_list, + vstatus, + phase, + start_time, + max_val, + iter, + x, + y, + z, + sol); + status = dual::status_t::OPTIMAL; + break; + } settings.log.printf("Failed to remove perturbation of %.2e.\n", perturbation); } } } if (perturbation == 0.0 && phase == 2) { - constexpr bool use_farkas = false; + constexpr bool use_farkas = true; if constexpr (use_farkas) { std::vector farkas_y; std::vector farkas_zl; @@ -2668,7 +2696,7 @@ dual::status_t dual_phase2(i_t phase, scaled_delta_xB_sparse, delta_x); - timers.vector_time += timers.stop_timer(); + timers.ftran_time += timers.stop_timer(); #ifdef CHECK_PRIMAL_STEP std::vector residual(m); From 1ac7e1f4d144485dc98cde3f07b91be5b3e7249d Mon Sep 17 00:00:00 2001 From: Christopher Maes Date: Thu, 24 Jul 2025 15:37:34 -0700 Subject: [PATCH 17/28] Keep farkas off --- cpp/src/dual_simplex/phase2.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/dual_simplex/phase2.cpp b/cpp/src/dual_simplex/phase2.cpp index 0d0dc7fc5b..6496d3e765 100644 --- a/cpp/src/dual_simplex/phase2.cpp +++ b/cpp/src/dual_simplex/phase2.cpp @@ -2585,7 +2585,7 @@ dual::status_t dual_phase2(i_t phase, } if (perturbation == 0.0 && phase == 2) { - constexpr bool use_farkas = true; + constexpr bool use_farkas = false; if constexpr (use_farkas) { std::vector farkas_y; std::vector farkas_zl; From 2230fbc47af00478aa452d860ecc885d720cf57a Mon Sep 17 00:00:00 2001 From: Christopher Maes Date: Thu, 24 Jul 2025 15:38:04 -0700 Subject: [PATCH 18/28] Formatting --- cpp/src/dual_simplex/phase2.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/cpp/src/dual_simplex/phase2.cpp b/cpp/src/dual_simplex/phase2.cpp index 6496d3e765..606542da52 100644 --- a/cpp/src/dual_simplex/phase2.cpp +++ b/cpp/src/dual_simplex/phase2.cpp @@ -1271,9 +1271,7 @@ i_t update_steepest_edge_norms(const simplex_solver_settings_t& settin // B^T delta_y = - direction * e_basic_leaving_index // We want B v = - B^{-T} e_basic_leaving_index ft.b_solve(delta_y_sparse, v_sparse); - if (direction == -1) { - v_sparse.negate(); - } + if (direction == -1) { v_sparse.negate(); } v_sparse.scatter(v); const i_t leaving_index = basic_list[basic_leaving_index]; @@ -2560,7 +2558,8 @@ dual::status_t dual_phase2(i_t phase, const f_t orig_dual_infeas = phase2::dual_infeasibility( lp, settings, vstatus, z, settings.tight_tol, settings.dual_tol); - if (primal_infeasibility <= settings.primal_tol && orig_dual_infeas <= settings.dual_tol) { + if (primal_infeasibility <= settings.primal_tol && + orig_dual_infeas <= settings.dual_tol) { phase2::prepare_optimality(lp, settings, ft, From 42265d662a942dee2c8df7752dd3341a101e8f35 Mon Sep 17 00:00:00 2001 From: Christopher Maes Date: Mon, 28 Jul 2025 10:19:49 -0700 Subject: [PATCH 19/28] Drop small elements. Dont copy intermediate solutions if not needed --- cpp/src/dual_simplex/basis_updates.cpp | 104 +++++++++++++++---------- cpp/src/dual_simplex/basis_updates.hpp | 6 +- 2 files changed, 66 insertions(+), 44 deletions(-) diff --git a/cpp/src/dual_simplex/basis_updates.cpp b/cpp/src/dual_simplex/basis_updates.cpp index d7c18988a3..c61704c58f 100644 --- a/cpp/src/dual_simplex/basis_updates.cpp +++ b/cpp/src/dual_simplex/basis_updates.cpp @@ -1121,12 +1121,16 @@ void basis_update_mpf_t::gather_into_sparse_vector(i_t nz, sparse_vector_t& out) const { const i_t m = L0_.m; - out.i.resize(nz); - out.x.resize(nz); + out.i.clear(); + out.x.clear(); + out.i.reserve(nz); + out.x.reserve(nz); for (i_t k = 0; k < nz; ++k) { const i_t i = xi_workspace_[m + k]; - out.i[k] = i; - out.x[k] = x_workspace_[i]; + if (std::abs(x_workspace_[i]) > 1e-13) { + out.i.push_back(i); + out.x.push_back(x_workspace_[i]); + } xi_workspace_[m + k] = 0; xi_workspace_[i] = 0; x_workspace_[i] = 0.0; @@ -1157,13 +1161,17 @@ void basis_update_mpf_t::solve_to_sparse_vector(i_t top, const i_t m = L0_.m; out.n = m; const i_t nz = m - top; - out.x.resize(nz); - out.i.resize(nz); + out.x.clear(); + out.i.clear(); + out.x.reserve(nz); + out.i.reserve(nz); i_t k = 0; for (i_t p = top; p < m; ++p) { const i_t i = xi_workspace_[p]; - out.i[k] = i; - out.x[k] = x_workspace_[i]; + if (std::abs(x_workspace_[i]) > 1e-13) { + out.i.push_back(i); + out.x.push_back(x_workspace_[i]); + } x_workspace_[i] = 0.0; xi_workspace_[p] = 0; k++; @@ -1372,9 +1380,11 @@ i_t basis_update_mpf_t::l_transpose_solve(std::vector& rhs) const } const f_t theta = dot / mu; - for (i_t p = S_.col_start[v_col]; p < S_.col_start[v_col + 1]; ++p) { - const i_t i = S_.i[p]; - rhs[i] -= theta * S_.x[p]; + if (std::abs(theta) > 1e-13) { + for (i_t p = S_.col_start[v_col]; p < S_.col_start[v_col + 1]; ++p) { + const i_t i = S_.i[p]; + rhs[i] -= theta * S_.x[p]; + } } } @@ -1427,16 +1437,17 @@ i_t basis_update_mpf_t::l_transpose_solve(sparse_vector_t& r #endif const f_t theta = dot / mu; - - for (i_t p = S_.col_start[v_col]; p < S_.col_start[v_col + 1]; ++p) { - const i_t i = S_.i[p]; - if (!xi_workspace_[i]) { - // Fill occured - xi_workspace_[i] = 1; - xi_workspace_[m + nz] = i; - nz++; + if (std::abs(theta) > 1e-13) { + for (i_t p = S_.col_start[v_col]; p < S_.col_start[v_col + 1]; ++p) { + const i_t i = S_.i[p]; + if (!xi_workspace_[i]) { + // Fill occured + xi_workspace_[i] = 1; + xi_workspace_[m + nz] = i; + nz++; + } + x_workspace_[i] -= theta * S_.x[p]; } - x_workspace_[i] -= theta * S_.x[p]; } #ifdef CHECK_MULTIPLY @@ -1459,8 +1470,6 @@ i_t basis_update_mpf_t::l_transpose_solve(sparse_vector_t& r } #endif - // sort the indices and place into a sparse column - std::sort(xi_workspace_.begin() + m, xi_workspace_.begin() + m + nz, std::less()); sparse_vector_t b(m, nz); gather_into_sparse_vector(nz, b); @@ -1499,7 +1508,8 @@ i_t basis_update_mpf_t::b_solve(const std::vector& rhs, template i_t basis_update_mpf_t::b_solve(const std::vector& rhs, std::vector& solution, - std::vector& Lsol) const + std::vector& Lsol, + bool need_Lsol) const { const i_t m = L0_.m; // P*B = L*U @@ -1514,7 +1524,9 @@ i_t basis_update_mpf_t::b_solve(const std::vector& rhs, std::vector rhs_permuted = solution; #endif l_solve(solution); - Lsol = solution; + if (need_Lsol) { + Lsol = solution; + } #ifdef CHECK_L_SOLVE std::vector Lsol_check = Lsol; @@ -1544,13 +1556,14 @@ i_t basis_update_mpf_t::b_solve(const sparse_vector_t& rhs, sparse_vector_t& solution) const { sparse_vector_t Lsol(1, 0); - return b_solve(rhs, solution, Lsol); + return b_solve(rhs, solution, Lsol, false); } template i_t basis_update_mpf_t::b_solve(const sparse_vector_t& rhs, sparse_vector_t& solution, - sparse_vector_t& Lsol) const + sparse_vector_t& Lsol, + bool need_Lsol) const { const i_t m = L0_.m; solution = rhs; @@ -1587,7 +1600,9 @@ i_t basis_update_mpf_t::b_solve(const sparse_vector_t& rhs, l_solve(solution_dense); solution.from_dense(solution_dense); } - Lsol = solution; + if (need_Lsol) { + Lsol = solution; + } sum_L_ += static_cast(solution.i.size()) / input_size; #ifdef CHECK_L_SOLVE @@ -1698,9 +1713,11 @@ i_t basis_update_mpf_t::l_solve(std::vector& rhs) const } const f_t theta = dot / mu; - for (i_t p = S_.col_start[u_col]; p < S_.col_start[u_col + 1]; ++p) { - const i_t i = S_.i[p]; - rhs[i] -= theta * S_.x[p]; + if (std::abs(theta) > 1e-13) { + for (i_t p = S_.col_start[u_col]; p < S_.col_start[u_col + 1]; ++p) { + const i_t i = S_.i[p]; + rhs[i] -= theta * S_.x[p]; + } } } @@ -1749,16 +1766,17 @@ i_t basis_update_mpf_t::l_solve(sparse_vector_t& rhs) const } const f_t theta = dot / mu; - - for (i_t p = S_.col_start[u_col]; p < S_.col_start[u_col + 1]; ++p) { - const i_t i = S_.i[p]; - if (!xi_workspace_[i]) { - // Fill occured - xi_workspace_[i] = 1; - xi_workspace_[m + nz] = i; - nz++; + if (std::abs(theta) > 1e-13) { + for (i_t p = S_.col_start[u_col]; p < S_.col_start[u_col + 1]; ++p) { + const i_t i = S_.i[p]; + if (!xi_workspace_[i]) { + // Fill occured + xi_workspace_[i] = 1; + xi_workspace_[m + nz] = i; + nz++; + } + x_workspace_[i] -= theta * S_.x[p]; } - x_workspace_[i] -= theta * S_.x[p]; } } @@ -1993,9 +2011,11 @@ void basis_update_mpf_t::l_transpose_multiply(std::vector& inout) dot += S_.x[p] * inout[i]; } const f_t theta = dot; - for (i_t p = S_.col_start[v_col]; p < S_.col_start[v_col + 1]; ++p) { - const i_t i = S_.i[p]; - inout[i] += theta * S_.x[p]; + if (std::abs(theta) > 1e-13) { + for (i_t p = S_.col_start[v_col]; p < S_.col_start[v_col + 1]; ++p) { + const i_t i = S_.i[p]; + inout[i] += theta * S_.x[p]; + } } } } diff --git a/cpp/src/dual_simplex/basis_updates.hpp b/cpp/src/dual_simplex/basis_updates.hpp index 7af81ed70f..bd830a1edf 100644 --- a/cpp/src/dual_simplex/basis_updates.hpp +++ b/cpp/src/dual_simplex/basis_updates.hpp @@ -265,10 +265,12 @@ class basis_update_mpf_t { i_t b_solve(const sparse_vector_t& rhs, sparse_vector_t& solution) const; i_t b_solve(const std::vector& rhs, std::vector& solution, - std::vector& Lsol) const; + std::vector& Lsol, + bool need_Lsol = true) const; i_t b_solve(const sparse_vector_t& rhs, sparse_vector_t& solution, - sparse_vector_t& Lsol) const; + sparse_vector_t& Lsol, + bool need_Lsol = true) const; // Solves for y such that B'*y = c, where B is the basis matrix i_t b_transpose_solve(const std::vector& rhs, std::vector& solution) const; From 8eb349a7f5d08bdce67146c5e1e842e5f6971439 Mon Sep 17 00:00:00 2001 From: Christopher Maes Date: Mon, 28 Jul 2025 19:32:37 -0700 Subject: [PATCH 20/28] Address comments/suggestions in code review Thanks Alice! --- cpp/src/dual_simplex/basis_updates.cpp | 270 ++++++++---------- cpp/src/dual_simplex/basis_updates.hpp | 18 +- .../bound_flipping_ratio_test.hpp | 4 +- cpp/src/dual_simplex/phase2.cpp | 37 ++- cpp/src/dual_simplex/random.hpp | 8 +- cpp/src/dual_simplex/sparse_matrix.cpp | 56 ++++ cpp/src/dual_simplex/sparse_matrix.hpp | 13 + 7 files changed, 233 insertions(+), 173 deletions(-) diff --git a/cpp/src/dual_simplex/basis_updates.cpp b/cpp/src/dual_simplex/basis_updates.cpp index c61704c58f..7ee674ef8d 100644 --- a/cpp/src/dual_simplex/basis_updates.cpp +++ b/cpp/src/dual_simplex/basis_updates.cpp @@ -1125,9 +1125,10 @@ void basis_update_mpf_t::gather_into_sparse_vector(i_t nz, out.x.clear(); out.i.reserve(nz); out.x.reserve(nz); + const f_t zero_tol = 1e-13; for (i_t k = 0; k < nz; ++k) { const i_t i = xi_workspace_[m + k]; - if (std::abs(x_workspace_[i]) > 1e-13) { + if (std::abs(x_workspace_[i]) > zero_tol) { out.i.push_back(i); out.x.push_back(x_workspace_[i]); } @@ -1166,9 +1167,10 @@ void basis_update_mpf_t::solve_to_sparse_vector(i_t top, out.x.reserve(nz); out.i.reserve(nz); i_t k = 0; + const f_t zero_tol = 1e-13; for (i_t p = top; p < m; ++p) { const i_t i = xi_workspace_[p]; - if (std::abs(x_workspace_[i]) > 1e-13) { + if (std::abs(x_workspace_[i]) > zero_tol) { out.i.push_back(i); out.x.push_back(x_workspace_[i]); } @@ -1212,6 +1214,77 @@ void basis_update_mpf_t::grow_storage(i_t nz, i_t& S_start, i_t& S_nz) S_start = last_S_col; } +template +i_t basis_update_mpf_t::nonzeros(const std::vector& x) const +{ + i_t nz = 0; + const i_t xsz = x.size(); + for (i_t i = 0; i < xsz; ++i) { + if (x[i] != 0.0) { nz++; } + } + return nz; +} + +// dot = S(:, col)' * x +template +f_t basis_update_mpf_t::dot_product(i_t col, const std::vector& x) const +{ + f_t dot = 0.0; + const i_t col_start = S_.col_start[col]; + const i_t col_end = S_.col_start[col + 1]; + for (i_t p = col_start; p < col_end; ++p) { + const i_t i = S_.i[p]; + dot += S_.x[p] * x[i]; + } + return dot; +} + +// dot = S(:, col)' * x +template +f_t basis_update_mpf_t::dot_product(i_t col, const std::vector& mark, const std::vector& x) const +{ + f_t dot = 0.0; + const i_t col_start = S_.col_start[col]; + const i_t col_end = S_.col_start[col + 1]; + for (i_t p = col_start; p < col_end; ++p) { + const i_t i = S_.i[p]; + if (mark[i]) { + dot += S_.x[p] * x[i]; + } + } + return dot; +} + +// x <- x + theta * S(:, col) +template +void basis_update_mpf_t::add_sparse_column(const csc_matrix_t& S, i_t col, f_t theta, std::vector& x) const +{ + const i_t col_start = S.col_start[col]; + const i_t col_end = S.col_start[col + 1]; + for (i_t p = col_start; p < col_end; ++p) { + const i_t i = S.i[p]; + x[i] += theta * S.x[p]; + } +} + +template +void basis_update_mpf_t::add_sparse_column(const csc_matrix_t& S, i_t col, f_t theta, std::vector& mark, i_t& nz, std::vector& x) const +{ + const i_t m = L0_.m; + const i_t col_start = S.col_start[col]; + const i_t col_end = S.col_start[col + 1]; + for (i_t p = col_start; p < col_end; ++p) { + const i_t i = S.i[p]; + if (!mark[i]) { + // Fill occured + mark[i] = 1; + mark[m + nz] = i; + nz++; + } + x[i] += theta * S.x[p]; + } +} + template i_t basis_update_mpf_t::b_transpose_solve(const std::vector& rhs, std::vector& solution) const @@ -1317,12 +1390,13 @@ i_t basis_update_mpf_t::b_transpose_solve(const sparse_vector_t 1e-4) { printf( - "B transpose solve L transpose solve error %e: index %d multiply %e rhs %e. update %d\n", + "B transpose solve L transpose solve error %e: index %d multiply %e rhs %e. update %d. use hypersparse %d\n", std::abs(solution_dense[k] - r_dense[k]), k, solution_dense[k], r_dense[k], - num_updates_); + num_updates_, + use_hypersparse); } max_error = std::max(max_error, std::abs(solution_dense[k] - r_dense[k])); @@ -1363,6 +1437,7 @@ i_t basis_update_mpf_t::l_transpose_solve(std::vector& rhs) const // L'*x = b // L0^T *x = T_0^-T * T_1^-T * ... * T_{num_updates_ - 1}^-T * b = b' + const f_t zero_tol = 1e-13; // Compute b' for (i_t k = num_updates_ - 1; k >= 0; --k) { // T_k^{-T} = ( I - v u^T/(1 + u^T v)) @@ -1373,18 +1448,11 @@ i_t basis_update_mpf_t::l_transpose_solve(std::vector& rhs) const const f_t mu = mu_values_[k]; // dot = u^T * b - f_t dot = 0.0; - for (i_t p = S_.col_start[u_col]; p < S_.col_start[u_col + 1]; ++p) { - const i_t i = S_.i[p]; - dot += S_.x[p] * rhs[i]; - } + f_t dot = dot_product(u_col, rhs); const f_t theta = dot / mu; - if (std::abs(theta) > 1e-13) { - for (i_t p = S_.col_start[v_col]; p < S_.col_start[v_col + 1]; ++p) { - const i_t i = S_.i[p]; - rhs[i] -= theta * S_.x[p]; - } + if (std::abs(theta) > zero_tol) { + add_sparse_column(S_, v_col, -theta, rhs); } } @@ -1409,6 +1477,7 @@ i_t basis_update_mpf_t::l_transpose_solve(sparse_vector_t& r std::vector rhs_dense_0; rhs.to_dense(rhs_dense_0); #endif + const f_t zero_tol = 1e-13; // Compute b' for (i_t k = num_updates_ - 1; k >= 0; --k) { // T_k^{-T} = ( I - v u^T/(1 + u^T v)) @@ -1419,11 +1488,7 @@ i_t basis_update_mpf_t::l_transpose_solve(sparse_vector_t& r const f_t mu = mu_values_[k]; // dot = u^T * b - f_t dot = 0.0; - for (i_t p = S_.col_start[u_col]; p < S_.col_start[u_col + 1]; ++p) { - const i_t i = S_.i[p]; - if (xi_workspace_[i]) { dot += S_.x[p] * x_workspace_[i]; } - } + f_t dot = dot_product(u_col, xi_workspace_, x_workspace_); #ifdef CHECK_MULTIPLY f_t dot_check = 0.0; @@ -1437,17 +1502,8 @@ i_t basis_update_mpf_t::l_transpose_solve(sparse_vector_t& r #endif const f_t theta = dot / mu; - if (std::abs(theta) > 1e-13) { - for (i_t p = S_.col_start[v_col]; p < S_.col_start[v_col + 1]; ++p) { - const i_t i = S_.i[p]; - if (!xi_workspace_[i]) { - // Fill occured - xi_workspace_[i] = 1; - xi_workspace_[m + nz] = i; - nz++; - } - x_workspace_[i] -= theta * S_.x[p]; - } + if (std::abs(theta) > zero_tol) { + add_sparse_column(S_, v_col, -theta, xi_workspace_, nz, x_workspace_); } #ifdef CHECK_MULTIPLY @@ -1483,8 +1539,8 @@ i_t basis_update_mpf_t::l_transpose_solve(sparse_vector_t& r std::vector b_dense(m, 0.0); for (i_t p = 0; p < nz; ++p) { - const i_t i = B.i[p]; - b_dense[i] = B.x[p]; + const i_t i = b.i[p]; + b_dense[i] = b.x[p]; } matrix_vector_multiply(L0_transpose_, 1.0, rhs_dense, -1.0, b_dense); if (vector_norm_inf(b_dense) > 1e-9) { @@ -1698,6 +1754,7 @@ i_t basis_update_mpf_t::l_solve(std::vector& rhs) const // Then T0 * T1 * ... * T_{num_updates_ - 1} * x = x0 // Or x = T_{num_updates}^{-1} * T_1^{-1} * T_0^{-1} x0 + const f_t zero_tol = 1e-16; // Any higher and pilot_ja fails for (i_t k = 0; k < num_updates_; ++k) { // T = I + u*v^T // T^{-1} = I - u*v^T / (1 + v^T*u) @@ -1706,18 +1763,11 @@ i_t basis_update_mpf_t::l_solve(std::vector& rhs) const const f_t mu = mu_values_[k]; const i_t u_col = 2 * k; const i_t v_col = 2 * k + 1; - f_t dot = 0.0; - for (i_t p = S_.col_start[v_col]; p < S_.col_start[v_col + 1]; ++p) { - const i_t i = S_.i[p]; - dot += S_.x[p] * rhs[i]; - } + f_t dot = dot_product(v_col, rhs); const f_t theta = dot / mu; - if (std::abs(theta) > 1e-13) { - for (i_t p = S_.col_start[u_col]; p < S_.col_start[u_col + 1]; ++p) { - const i_t i = S_.i[p]; - rhs[i] -= theta * S_.x[p]; - } + if (std::abs(theta) > zero_tol) { + add_sparse_column(S_, u_col, -theta, rhs); } } @@ -1750,6 +1800,7 @@ i_t basis_update_mpf_t::l_solve(sparse_vector_t& rhs) const i_t nz = m - top; // Then T0 * T1 * ... * T_{num_updates_ - 1} * x = x0 // Or x = T_{num_updates}^{-1} * T_1^{-1} * T_0^{-1} x0 + const f_t zero_tol = 1e-13; for (i_t k = 0; k < num_updates_; ++k) { // T = I + u*v^T // T^{-1} = I - u*v^T / (1 + v^T*u) @@ -1759,24 +1810,12 @@ i_t basis_update_mpf_t::l_solve(sparse_vector_t& rhs) const const i_t u_col = 2 * k; const i_t v_col = 2 * k + 1; - f_t dot = 0.0; - for (i_t p = S_.col_start[v_col]; p < S_.col_start[v_col + 1]; ++p) { - const i_t i = S_.i[p]; - if (xi_workspace_[i]) { dot += S_.x[p] * x_workspace_[i]; } - } + // dot = v^T * x + f_t dot = dot_product(v_col, xi_workspace_, x_workspace_); const f_t theta = dot / mu; - if (std::abs(theta) > 1e-13) { - for (i_t p = S_.col_start[u_col]; p < S_.col_start[u_col + 1]; ++p) { - const i_t i = S_.i[p]; - if (!xi_workspace_[i]) { - // Fill occured - xi_workspace_[i] = 1; - xi_workspace_[m + nz] = i; - nz++; - } - x_workspace_[i] -= theta * S_.x[p]; - } + if (std::abs(theta) > zero_tol) { + add_sparse_column(S_, u_col, -theta, xi_workspace_, nz, x_workspace_); } } @@ -1802,51 +1841,28 @@ i_t basis_update_mpf_t::update(const std::vector& utilde, const i_t col_end = U0_.col_start[leaving_index + 1]; std::vector u = utilde; // u = utilde - U0(:, leaving_index) - for (i_t p = col_start; p < col_end; ++p) { - const i_t i = U0_.i[p]; - u[i] -= U0_.x[p]; - } + add_sparse_column(U0_, leaving_index, -1.0, u); - i_t u_nz = 0; - for (i_t i = 0; i < m; ++i) { - if (u[i] != 0.0) { u_nz++; } - } + i_t u_nz = nonzeros(u); // v = etilde - i_t v_nz = 0; - for (i_t i = 0; i < m; ++i) { - if (etilde[i] != 0.0) { v_nz++; } - } + i_t v_nz = nonzeros(etilde); i_t nz = u_nz + v_nz; i_t S_start; i_t S_nz; grow_storage(nz, S_start, S_nz); -#ifdef PRINT_S_INFO - printf("Update: S_start %d S_nz %d\n", S_start, S_nz); +#ifdef PRINT_NZ_INFO + printf("Update: S_start %d S_nz %d num updates %d S.n %d\n", S_start, S_nz, num_updates_, S_.n); #endif i_t S_nz_start = S_nz; // Scatter u into S - for (i_t i = 0; i < m; ++i) { - if (u[i] != 0.0) { - S_.i[S_nz] = i; - S_.x[S_nz] = u[i]; - S_nz++; - } - } - S_.col_start[S_start + 1] = S_nz; + S_.append_column(u); // Scatter v into S - for (i_t i = 0; i < m; ++i) { - if (etilde[i] != 0.0) { - S_.i[S_nz] = i; - S_.x[S_nz] = etilde[i]; - S_nz++; - } - } - S_.col_start[S_start + 2] = S_nz; + S_.append_column(etilde); // Compute mu = 1 + v^T * u const f_t mu = 1.0 + sparse_dot(S_.i.data() + S_.col_start[S_start], @@ -1891,20 +1907,9 @@ i_t basis_update_mpf_t::update(const sparse_vector_t& utilde i_t nz = scatter_into_workspace(utilde); // Subtract the column of U0 corresponding to the leaving index - const i_t col_start = U0_.col_start[leaving_index]; - const i_t col_end = U0_.col_start[leaving_index + 1]; - for (i_t p = col_start; p < col_end; ++p) { - const i_t i = U0_.i[p]; - if (!xi_workspace_[i]) { - // Fill occured - xi_workspace_[i] = 1; - xi_workspace_[m + nz] = i; - nz++; - } - x_workspace_[i] -= U0_.x[p]; - } + add_sparse_column(U0_, leaving_index, -1.0, xi_workspace_, nz, x_workspace_); - // Ensure the workspace is sorted + // Ensure the workspace is sorted. Otherwise, the sparse dot will be incorrect. std::sort(xi_workspace_.begin() + m, xi_workspace_.begin() + m + nz, std::less()); // Gather the workspace into a column of S @@ -1912,30 +1917,11 @@ i_t basis_update_mpf_t::update(const sparse_vector_t& utilde i_t S_nz; grow_storage(nz + etilde.i.size(), S_start, S_nz); - for (i_t k = 0; k < nz; ++k) { - const i_t i = xi_workspace_[m + k]; - const f_t x_val = x_workspace_[i]; - xi_workspace_[i] = 0; - x_workspace_[i] = 0.0; - xi_workspace_[m + k] = 0; - - if (x_val == 0.0) { continue; } - S_.i[S_nz] = i; - S_.x[S_nz] = x_val; - S_nz++; - } - S_.col_start[S_start + 1] = S_nz; + S_.append_column(nz, xi_workspace_.data() + m, x_workspace_.data()); // Gather etilde into a column of S - etilde.sort(); - const i_t etilde_nz = etilde.i.size(); - for (i_t k = 0; k < etilde_nz; ++k) { - if (etilde.x[k] == 0.0) { continue; } - S_.i[S_nz] = etilde.i[k]; - S_.x[S_nz] = etilde.x[k]; - S_nz++; - } - S_.col_start[S_start + 2] = S_nz; + etilde.sort(); // Needs to be sorted for the sparse dot. TODO(CMM): Is etilde sorted on input? + S_.append_column(etilde); // Compute mu = 1 + v^T * u mu_values_.push_back(1.0 + sparse_dot(S_.i.data() + S_.col_start[S_start], @@ -1944,6 +1930,14 @@ i_t basis_update_mpf_t::update(const sparse_vector_t& utilde S_.i.data() + S_.col_start[S_start + 1], S_.x.data() + S_.col_start[S_start + 1], S_.col_start[S_start + 2] - S_.col_start[S_start + 1])); + // Clear the workspace + for (i_t k = 0; k < nz; ++k) { + const i_t i = xi_workspace_[m + k]; + xi_workspace_[i] = 0; + x_workspace_[i] = 0.0; + xi_workspace_[m + k] = 0; + } + #ifdef PRINT_MU_INFO printf("Update mu %e u nz %d v nz %d\n", mu_values_.back(), @@ -1971,16 +1965,9 @@ void basis_update_mpf_t::l_multiply(std::vector& inout) const const f_t mu = mu_values_[k]; // dot = v^T b - f_t dot = 0.0; - for (i_t p = S_.col_start[v_col]; p < S_.col_start[v_col + 1]; ++p) { - const i_t i = S_.i[p]; - dot += S_.x[p] * inout[i]; - } + f_t dot = dot_product(v_col, inout); const f_t theta = dot; - for (i_t p = S_.col_start[u_col]; p < S_.col_start[u_col + 1]; ++p) { - const i_t i = S_.i[p]; - inout[i] += theta * S_.x[p]; - } + add_sparse_column(S_, u_col, theta, inout); } std::vector out(m, 0.0); @@ -1997,6 +1984,7 @@ void basis_update_mpf_t::l_transpose_multiply(std::vector& inout) inout = out; + const f_t zero_tol = 1e-13; for (i_t k = 0; k < num_updates_; ++k) { const i_t u_col = 2 * k; const i_t v_col = 2 * k + 1; @@ -2005,17 +1993,10 @@ void basis_update_mpf_t::l_transpose_multiply(std::vector& inout) // T_k = ( I + u v^T) // T_k^T = ( I + v u^T) // T_k^T * b = b + v * (u^T * b) = b + theta * v, theta = u^T * b - f_t dot = 0.0; - for (i_t p = S_.col_start[u_col]; p < S_.col_start[u_col + 1]; ++p) { - const i_t i = S_.i[p]; - dot += S_.x[p] * inout[i]; - } + f_t dot = dot_product(u_col, inout); const f_t theta = dot; - if (std::abs(theta) > 1e-13) { - for (i_t p = S_.col_start[v_col]; p < S_.col_start[v_col + 1]; ++p) { - const i_t i = S_.i[p]; - inout[i] += theta * S_.x[p]; - } + if (std::abs(theta) > zero_tol) { + add_sparse_column(S_, v_col, theta, inout); } } } @@ -2039,10 +2020,7 @@ void basis_update_mpf_t::multiply_lu(csc_matrix_t& out) cons out.col_start[j] = B_nz; std::vector Uj(m, 0.0); - for (i_t p = U0_.col_start[j]; p < U0_.col_start[j + 1]; ++p) { - const i_t i = U0_.i[p]; - Uj[i] = U0_.x[p]; - } + U0_.load_a_column(j, Uj); l_multiply(Uj); for (i_t i = 0; i < m; ++i) { if (Uj[i] != 0.0) { diff --git a/cpp/src/dual_simplex/basis_updates.hpp b/cpp/src/dual_simplex/basis_updates.hpp index bd830a1edf..f260688cd3 100644 --- a/cpp/src/dual_simplex/basis_updates.hpp +++ b/cpp/src/dual_simplex/basis_updates.hpp @@ -21,6 +21,8 @@ #include #include +#include + namespace cuopt::linear_programming::dual_simplex { // Forrest-Tomlin update to the LU factorization of a basis matrix B @@ -170,6 +172,8 @@ class basis_update_t { mutable csc_matrix_t L0_transpose_; // Needed for sparse solves }; + +// Middle product form update to the LU factorization of a basis matrix B template class basis_update_mpf_t { public: @@ -181,7 +185,7 @@ class basis_update_mpf_t { U0_(Uinit), row_permutation_(p), inverse_row_permutation_(p.size()), - S_(Linit.m, 1, 0), + S_(Linit.m, 0, 0), col_permutation_(Linit.m), inverse_col_permutation_(Linit.m), xi_workspace_(2 * Linit.m, 0), @@ -333,15 +337,14 @@ class basis_update_mpf_t { { pivot_indices_.clear(); pivot_indices_.reserve(L0_.m); - for (i_t k = 0; k < L0_.m; ++k) { - col_permutation_[k] = k; - inverse_col_permutation_[k] = k; - } + std::iota(col_permutation_.begin(), col_permutation_.end(), 0); + std::iota(inverse_col_permutation_.begin(), inverse_col_permutation_.end(), 0); S_.col_start.resize(refactor_frequency_ + 1); S_.col_start[0] = 0; S_.col_start[1] = 0; S_.i.clear(); S_.x.clear(); + S_.n = 0; mu_values_.clear(); mu_values_.reserve(refactor_frequency_); num_updates_ = 0; @@ -361,6 +364,11 @@ class basis_update_mpf_t { void solve_to_sparse_vector(i_t top, sparse_vector_t& out) const; i_t scatter_into_workspace(const sparse_vector_t& in) const; void gather_into_sparse_vector(i_t nz, sparse_vector_t& out) const; + i_t nonzeros(const std::vector& x) const; + f_t dot_product(i_t col, const std::vector& x) const; + f_t dot_product(i_t col, const std::vector& mark, const std::vector& x) const; + void add_sparse_column(const csc_matrix_t& S, i_t col, f_t theta, std::vector& x) const; + void add_sparse_column(const csc_matrix_t& S, i_t col, f_t theta, std::vector& mark, i_t& nz, std::vector& x) const; void l_multiply(std::vector& inout) const; void l_transpose_multiply(std::vector& inout) const; diff --git a/cpp/src/dual_simplex/bound_flipping_ratio_test.hpp b/cpp/src/dual_simplex/bound_flipping_ratio_test.hpp index edf62915f7..1d741ba288 100644 --- a/cpp/src/dual_simplex/bound_flipping_ratio_test.hpp +++ b/cpp/src/dual_simplex/bound_flipping_ratio_test.hpp @@ -33,7 +33,7 @@ class bound_flipping_ratio_test_t { f_t initial_slope, const std::vector& lower, const std::vector& upper, - const std::vector& bounded_variables, + const std::vector& bounded_variables, const std::vector& vstatus, const std::vector& nonbasic_list, const std::vector& z, @@ -87,7 +87,7 @@ class bound_flipping_ratio_test_t { const std::vector& lower_; const std::vector& upper_; - const std::vector& bounded_variables_; + const std::vector& bounded_variables_; const std::vector& nonbasic_list_; const std::vector& vstatus_; const std::vector& z_; diff --git a/cpp/src/dual_simplex/phase2.cpp b/cpp/src/dual_simplex/phase2.cpp index 606542da52..88a6b0ff78 100644 --- a/cpp/src/dual_simplex/phase2.cpp +++ b/cpp/src/dual_simplex/phase2.cpp @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -241,6 +242,8 @@ void initial_perturbation(const lp_problem_t& lp, objective.resize(n); f_t sum_perturb = 0.0; i_t num_perturb = 0; + + random_t random(settings.seed); for (i_t j = 0; j < n; ++j) { f_t obj = objective[j] = lp.objective[j]; @@ -252,7 +255,7 @@ void initial_perturbation(const lp_problem_t& lp, continue; } - const f_t rand_val = static_cast(std::rand() / (RAND_MAX + 1.0)); + const f_t rand_val = random.random(); const f_t perturb = (1e-5 * std::abs(obj) + 1e-7 * max_abs_obj_coeff + 10 * dual_tol) * (1.0 + rand_val); @@ -306,7 +309,7 @@ void compute_reduced_cost_update(const lp_problem_t& lp, } delta_z[j] = -dot; if (dot != 0.0) { - delta_z_indices.push_back(j); + delta_z_indices.push_back(j); // Note delta_z_indices has n elements reserved delta_z_mark[j] = 1; } } @@ -336,7 +339,7 @@ void compute_delta_z(const csc_matrix_t& A_transpose, delta_z[j] -= delta_y_i * A_transpose.x[p]; if (!delta_z_mark[j]) { delta_z_mark[j] = 1; - delta_z_indices.push_back(j); + delta_z_indices.push_back(j); // Note delta_z_indices has n elements reserved } } } @@ -526,12 +529,12 @@ void vstatus_changes(const std::vector& vstatus, template void compute_bounded_info(const std::vector& lower, const std::vector& upper, - std::vector& bounded_variables) + std::vector& bounded_variables) { const size_t n = lower.size(); for (size_t j = 0; j < n; j++) { const bool bounded = (lower[j] > -inf) && (upper[j] < inf) && (lower[j] != upper[j]); - bounded_variables[j] = bounded; + bounded_variables[j] = static_cast(bounded); } } @@ -665,7 +668,6 @@ void update_single_primal_infeasibility(const std::vector& lower, i_t j, f_t& primal_inf) { - const f_t now_feasible = std::numeric_limits::denorm_min(); const f_t old_val = squared_infeasibilities[j]; // x_j < l_j - epsilon => -x_j + l_j > epsilon const f_t lower_infeas = lower[j] - x[j]; @@ -689,7 +691,7 @@ void update_single_primal_infeasibility(const std::vector& lower, if (old_val != 0.0) { // We were previously infeasible, primal_inf = std::max(0.0, primal_inf - old_val); - squared_infeasibilities[j] = now_feasible; + squared_infeasibilities[j] = 0.0; } else { // Still feasible } @@ -708,7 +710,6 @@ void update_primal_infeasibilities(const lp_problem_t& lp, std::vector& infeasibility_indices, f_t& primal_inf) { - const f_t now_feasible = std::numeric_limits::denorm_min(); const f_t primal_tol = settings.primal_tol; const i_t nz = basic_change_list.size(); for (i_t k = 0; k < nz; ++k) { @@ -719,7 +720,7 @@ void update_primal_infeasibilities(const lp_problem_t& lp, if (j == leaving_index) { // Force the leaving variable to be feasible const f_t old_val = squared_infeasibilities[j]; - squared_infeasibilities[j] = now_feasible; + squared_infeasibilities[j] = 0.0; primal_inf = std::max(0.0, primal_inf - old_val); continue; } @@ -738,26 +739,24 @@ template void clean_up_infeasibilities(std::vector& squared_infeasibilities, std::vector& infeasibility_indices) { - const f_t now_feasible = std::numeric_limits::denorm_min(); bool needs_clean_up = false; for (i_t k = 0; k < infeasibility_indices.size(); ++k) { const i_t j = infeasibility_indices[k]; const f_t squared_infeas = squared_infeasibilities[j]; - if (squared_infeas == now_feasible) { needs_clean_up = true; } + if (squared_infeas == 0.0) { needs_clean_up = true; } } if (needs_clean_up) { for (i_t k = 0; k < infeasibility_indices.size(); ++k) { const i_t j = infeasibility_indices[k]; const f_t squared_infeas = squared_infeasibilities[j]; - if (squared_infeas == now_feasible) { + if (squared_infeas == 0.0) { // Set to the last element const i_t sz = infeasibility_indices.size(); infeasibility_indices[k] = infeasibility_indices[sz - 1]; infeasibility_indices.pop_back(); - squared_infeasibilities[j] = 0.0; i_t new_j = infeasibility_indices[k]; - if (squared_infeasibilities[new_j] == now_feasible) { k--; } + if (squared_infeasibilities[new_j] == 0.0) { k--; } } } } @@ -775,10 +774,8 @@ i_t steepest_edge_pricing_with_infeasibilities(const lp_problem_t& lp, i_t& basic_leaving, f_t& max_val) { - const f_t now_feasible = std::numeric_limits::denorm_min(); max_val = 0.0; i_t leaving_index = -1; - bool needs_clean_up = false; const i_t nz = infeasibility_indices.size(); for (i_t k = 0; k < nz; ++k) { const i_t j = infeasibility_indices[k]; @@ -1043,7 +1040,7 @@ i_t phase2_ratio_test(const lp_problem_t& lp, template i_t flip_bounds(const lp_problem_t& lp, const simplex_solver_settings_t& settings, - const std::vector& bounded_variables, + const std::vector& bounded_variables, const std::vector& objective, const std::vector& z, const std::vector& delta_z_indices, @@ -2318,9 +2315,11 @@ dual::status_t dual_phase2(i_t phase, std::vector squared_infeasibilities; std::vector infeasibility_indices; + delta_z_indices.reserve(n); + phase2::reset_basis_mark(basic_list, nonbasic_list, basic_mark, nonbasic_mark); - std::vector bounded_variables(n, false); + std::vector bounded_variables(n, 0); phase2::compute_bounded_info(lp.lower, lp.upper, bounded_variables); f_t primal_infeasibility = phase2::compute_initial_primal_infeasibilities( @@ -2828,6 +2827,7 @@ dual::status_t dual_phase2(i_t phase, basis_repair(lp.A, settings, deficient, slacks_needed, basic_list, nonbasic_list, vstatus); if (factorize_basis( lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed) == -1) { + settings.log.printf("Failed to repair basis. %d deficient columns.\n", static_cast(deficient.size())); return dual::status_t::NUMERICAL; } } @@ -2855,7 +2855,6 @@ dual::status_t dual_phase2(i_t phase, iter++; - // TODO(CMM): Do we also need to clear delta_y? // Clear delta_z phase2::clear_delta_z(entering_index, leaving_index, delta_z_mark, delta_z_indices, delta_z); diff --git a/cpp/src/dual_simplex/random.hpp b/cpp/src/dual_simplex/random.hpp index e1ad01fef2..dfc60dbd53 100644 --- a/cpp/src/dual_simplex/random.hpp +++ b/cpp/src/dual_simplex/random.hpp @@ -21,7 +21,7 @@ namespace cuopt::linear_programming::dual_simplex { -template +template class random_t { public: random_t(i_t seed) : gen(seed) {} @@ -34,6 +34,12 @@ class random_t { return distrib(gen); } + f_t random() + { + std::uniform_real_distribution<> distrib(0.0, 1.0); + return distrib(gen); + } + private: std::mt19937 gen; }; diff --git a/cpp/src/dual_simplex/sparse_matrix.cpp b/cpp/src/dual_simplex/sparse_matrix.cpp index ca5f31c5bf..bb20a5becf 100644 --- a/cpp/src/dual_simplex/sparse_matrix.cpp +++ b/cpp/src/dual_simplex/sparse_matrix.cpp @@ -16,6 +16,7 @@ */ #include +#include #include @@ -148,6 +149,61 @@ i_t csc_matrix_t::load_a_column(i_t j, std::vector& Aj) const return (col_end - col_start); } +template +void csc_matrix_t::append_column(const std::vector& x) +{ + const i_t m = this->m; + assert(x.size() == m); + const i_t xsz = x.size(); + i_t nz = this->col_start[this->n]; + for (i_t j = 0; j < xsz; ++j) { + if (x[j] != 0.0) { + this->i[nz] = j; + this->x[nz] = x[j]; + nz++; + } + } + this->col_start[this->n + 1] = nz; + this->n++; +} + +template +void csc_matrix_t::append_column(const sparse_vector_t& x) +{ + const i_t m = this->m; + assert(x.n == m); + i_t nz = this->col_start[this->n]; + const i_t xnz = x.i.size(); + for (i_t k = 0; k < xnz; ++k) { + const i_t i = x.i[k]; + const f_t x_val = x.x[k]; + if (x_val != 0.0) { + this->i[nz] = i; + this->x[nz] = x_val; + nz++; + } + } + this->col_start[this->n + 1] = nz; + this->n++; +} + +template +void csc_matrix_t::append_column(i_t x_nz, i_t* i, f_t* x) +{ + i_t nz = this->col_start[this->n]; + for (i_t k = 0; k < x_nz; ++k) { + const i_t i_val = i[k]; + const f_t x_val = x[i_val]; + if (x_val != 0.0) { + this->i[nz] = i_val; + this->x[nz] = x_val; + nz++; + } + } + this->col_start[this->n + 1] = nz; + this->n++; +} + template i_t csc_matrix_t::transpose(csc_matrix_t& AT) const { diff --git a/cpp/src/dual_simplex/sparse_matrix.hpp b/cpp/src/dual_simplex/sparse_matrix.hpp index 5e12f6a5da..0b765ecfb3 100644 --- a/cpp/src/dual_simplex/sparse_matrix.hpp +++ b/cpp/src/dual_simplex/sparse_matrix.hpp @@ -30,6 +30,9 @@ namespace cuopt::linear_programming::dual_simplex { template class csr_matrix_t; // Forward declaration of CSR matrix needed to define CSC matrix +template +class sparse_vector_t; // Forward declaration of sparse vector needed to define CSC matrix + // A sparse matrix stored in compressed sparse column format template class csc_matrix_t { @@ -60,6 +63,16 @@ class csc_matrix_t { // Compute the transpose of A i_t transpose(csc_matrix_t& AT) const; + + // Append a dense column to the matrix. Assumes the matrix has already been resized accordingly + void append_column(const std::vector& x); + + // Append a sparse column to the matrix. Assumes the matrix has already been resized accordingly + void append_column(const sparse_vector_t& x); + + // Append a sparse column to the matrix. Assumes the matrix has already been resized accordingly + void append_column(i_t nz, i_t* i, f_t* x); + // Remove columns from the matrix i_t remove_columns(const std::vector& cols_to_remove); From 45f5a436cb089d84bf3690efd05898cbca5c94e7 Mon Sep 17 00:00:00 2001 From: Christopher Maes Date: Mon, 28 Jul 2025 19:33:19 -0700 Subject: [PATCH 21/28] Formatting --- cpp/src/dual_simplex/basis_updates.cpp | 64 +++++++++++++------------- cpp/src/dual_simplex/basis_updates.hpp | 13 ++++-- cpp/src/dual_simplex/phase2.cpp | 21 +++++---- cpp/src/dual_simplex/sparse_matrix.cpp | 6 +-- cpp/src/dual_simplex/sparse_matrix.hpp | 1 - 5 files changed, 55 insertions(+), 50 deletions(-) diff --git a/cpp/src/dual_simplex/basis_updates.cpp b/cpp/src/dual_simplex/basis_updates.cpp index 7ee674ef8d..a7654ce124 100644 --- a/cpp/src/dual_simplex/basis_updates.cpp +++ b/cpp/src/dual_simplex/basis_updates.cpp @@ -1127,7 +1127,7 @@ void basis_update_mpf_t::gather_into_sparse_vector(i_t nz, out.x.reserve(nz); const f_t zero_tol = 1e-13; for (i_t k = 0; k < nz; ++k) { - const i_t i = xi_workspace_[m + k]; + const i_t i = xi_workspace_[m + k]; if (std::abs(x_workspace_[i]) > zero_tol) { out.i.push_back(i); out.x.push_back(x_workspace_[i]); @@ -1166,10 +1166,10 @@ void basis_update_mpf_t::solve_to_sparse_vector(i_t top, out.i.clear(); out.x.reserve(nz); out.i.reserve(nz); - i_t k = 0; + i_t k = 0; const f_t zero_tol = 1e-13; for (i_t p = top; p < m; ++p) { - const i_t i = xi_workspace_[p]; + const i_t i = xi_workspace_[p]; if (std::abs(x_workspace_[i]) > zero_tol) { out.i.push_back(i); out.x.push_back(x_workspace_[i]); @@ -1217,7 +1217,7 @@ void basis_update_mpf_t::grow_storage(i_t nz, i_t& S_start, i_t& S_nz) template i_t basis_update_mpf_t::nonzeros(const std::vector& x) const { - i_t nz = 0; + i_t nz = 0; const i_t xsz = x.size(); for (i_t i = 0; i < xsz; ++i) { if (x[i] != 0.0) { nz++; } @@ -1229,7 +1229,7 @@ i_t basis_update_mpf_t::nonzeros(const std::vector& x) const template f_t basis_update_mpf_t::dot_product(i_t col, const std::vector& x) const { - f_t dot = 0.0; + f_t dot = 0.0; const i_t col_start = S_.col_start[col]; const i_t col_end = S_.col_start[col + 1]; for (i_t p = col_start; p < col_end; ++p) { @@ -1241,23 +1241,26 @@ f_t basis_update_mpf_t::dot_product(i_t col, const std::vector& x // dot = S(:, col)' * x template -f_t basis_update_mpf_t::dot_product(i_t col, const std::vector& mark, const std::vector& x) const +f_t basis_update_mpf_t::dot_product(i_t col, + const std::vector& mark, + const std::vector& x) const { - f_t dot = 0.0; + f_t dot = 0.0; const i_t col_start = S_.col_start[col]; const i_t col_end = S_.col_start[col + 1]; for (i_t p = col_start; p < col_end; ++p) { const i_t i = S_.i[p]; - if (mark[i]) { - dot += S_.x[p] * x[i]; - } + if (mark[i]) { dot += S_.x[p] * x[i]; } } return dot; } // x <- x + theta * S(:, col) template -void basis_update_mpf_t::add_sparse_column(const csc_matrix_t& S, i_t col, f_t theta, std::vector& x) const +void basis_update_mpf_t::add_sparse_column(const csc_matrix_t& S, + i_t col, + f_t theta, + std::vector& x) const { const i_t col_start = S.col_start[col]; const i_t col_end = S.col_start[col + 1]; @@ -1268,9 +1271,14 @@ void basis_update_mpf_t::add_sparse_column(const csc_matrix_t -void basis_update_mpf_t::add_sparse_column(const csc_matrix_t& S, i_t col, f_t theta, std::vector& mark, i_t& nz, std::vector& x) const +void basis_update_mpf_t::add_sparse_column(const csc_matrix_t& S, + i_t col, + f_t theta, + std::vector& mark, + i_t& nz, + std::vector& x) const { - const i_t m = L0_.m; + const i_t m = L0_.m; const i_t col_start = S.col_start[col]; const i_t col_end = S.col_start[col + 1]; for (i_t p = col_start; p < col_end; ++p) { @@ -1390,7 +1398,8 @@ i_t basis_update_mpf_t::b_transpose_solve(const sparse_vector_t 1e-4) { printf( - "B transpose solve L transpose solve error %e: index %d multiply %e rhs %e. update %d. use hypersparse %d\n", + "B transpose solve L transpose solve error %e: index %d multiply %e rhs %e. update %d. use " + "hypersparse %d\n", std::abs(solution_dense[k] - r_dense[k]), k, solution_dense[k], @@ -1448,12 +1457,10 @@ i_t basis_update_mpf_t::l_transpose_solve(std::vector& rhs) const const f_t mu = mu_values_[k]; // dot = u^T * b - f_t dot = dot_product(u_col, rhs); + f_t dot = dot_product(u_col, rhs); const f_t theta = dot / mu; - if (std::abs(theta) > zero_tol) { - add_sparse_column(S_, v_col, -theta, rhs); - } + if (std::abs(theta) > zero_tol) { add_sparse_column(S_, v_col, -theta, rhs); } } // Solve for x such that L0^T * x = b' @@ -1526,7 +1533,6 @@ i_t basis_update_mpf_t::l_transpose_solve(sparse_vector_t& r } #endif - sparse_vector_t b(m, nz); gather_into_sparse_vector(nz, b); i_t top = dual_simplex::sparse_triangle_solve( @@ -1580,9 +1586,7 @@ i_t basis_update_mpf_t::b_solve(const std::vector& rhs, std::vector rhs_permuted = solution; #endif l_solve(solution); - if (need_Lsol) { - Lsol = solution; - } + if (need_Lsol) { Lsol = solution; } #ifdef CHECK_L_SOLVE std::vector Lsol_check = Lsol; @@ -1656,9 +1660,7 @@ i_t basis_update_mpf_t::b_solve(const sparse_vector_t& rhs, l_solve(solution_dense); solution.from_dense(solution_dense); } - if (need_Lsol) { - Lsol = solution; - } + if (need_Lsol) { Lsol = solution; } sum_L_ += static_cast(solution.i.size()) / input_size; #ifdef CHECK_L_SOLVE @@ -1766,9 +1768,7 @@ i_t basis_update_mpf_t::l_solve(std::vector& rhs) const f_t dot = dot_product(v_col, rhs); const f_t theta = dot / mu; - if (std::abs(theta) > zero_tol) { - add_sparse_column(S_, u_col, -theta, rhs); - } + if (std::abs(theta) > zero_tol) { add_sparse_column(S_, u_col, -theta, rhs); } } #ifdef CHECK_L_SOLVE @@ -1965,7 +1965,7 @@ void basis_update_mpf_t::l_multiply(std::vector& inout) const const f_t mu = mu_values_[k]; // dot = v^T b - f_t dot = dot_product(v_col, inout); + f_t dot = dot_product(v_col, inout); const f_t theta = dot; add_sparse_column(S_, u_col, theta, inout); } @@ -1993,11 +1993,9 @@ void basis_update_mpf_t::l_transpose_multiply(std::vector& inout) // T_k = ( I + u v^T) // T_k^T = ( I + v u^T) // T_k^T * b = b + v * (u^T * b) = b + theta * v, theta = u^T * b - f_t dot = dot_product(u_col, inout); + f_t dot = dot_product(u_col, inout); const f_t theta = dot; - if (std::abs(theta) > zero_tol) { - add_sparse_column(S_, v_col, theta, inout); - } + if (std::abs(theta) > zero_tol) { add_sparse_column(S_, v_col, theta, inout); } } } diff --git a/cpp/src/dual_simplex/basis_updates.hpp b/cpp/src/dual_simplex/basis_updates.hpp index f260688cd3..73592e1801 100644 --- a/cpp/src/dual_simplex/basis_updates.hpp +++ b/cpp/src/dual_simplex/basis_updates.hpp @@ -172,7 +172,6 @@ class basis_update_t { mutable csc_matrix_t L0_transpose_; // Needed for sparse solves }; - // Middle product form update to the LU factorization of a basis matrix B template class basis_update_mpf_t { @@ -367,8 +366,16 @@ class basis_update_mpf_t { i_t nonzeros(const std::vector& x) const; f_t dot_product(i_t col, const std::vector& x) const; f_t dot_product(i_t col, const std::vector& mark, const std::vector& x) const; - void add_sparse_column(const csc_matrix_t& S, i_t col, f_t theta, std::vector& x) const; - void add_sparse_column(const csc_matrix_t& S, i_t col, f_t theta, std::vector& mark, i_t& nz, std::vector& x) const; + void add_sparse_column(const csc_matrix_t& S, + i_t col, + f_t theta, + std::vector& x) const; + void add_sparse_column(const csc_matrix_t& S, + i_t col, + f_t theta, + std::vector& mark, + i_t& nz, + std::vector& x) const; void l_multiply(std::vector& inout) const; void l_transpose_multiply(std::vector& inout) const; diff --git a/cpp/src/dual_simplex/phase2.cpp b/cpp/src/dual_simplex/phase2.cpp index 88a6b0ff78..9a9f0746f7 100644 --- a/cpp/src/dual_simplex/phase2.cpp +++ b/cpp/src/dual_simplex/phase2.cpp @@ -21,9 +21,9 @@ #include #include #include +#include #include #include -#include #include #include @@ -668,7 +668,7 @@ void update_single_primal_infeasibility(const std::vector& lower, i_t j, f_t& primal_inf) { - const f_t old_val = squared_infeasibilities[j]; + const f_t old_val = squared_infeasibilities[j]; // x_j < l_j - epsilon => -x_j + l_j > epsilon const f_t lower_infeas = lower[j] - x[j]; // x_j > u_j + epsilon => x_j - u_j > epsilon @@ -710,8 +710,8 @@ void update_primal_infeasibilities(const lp_problem_t& lp, std::vector& infeasibility_indices, f_t& primal_inf) { - const f_t primal_tol = settings.primal_tol; - const i_t nz = basic_change_list.size(); + const f_t primal_tol = settings.primal_tol; + const i_t nz = basic_change_list.size(); for (i_t k = 0; k < nz; ++k) { const i_t j = basic_list[basic_change_list[k]]; // The change list will contain the leaving variable, @@ -739,7 +739,7 @@ template void clean_up_infeasibilities(std::vector& squared_infeasibilities, std::vector& infeasibility_indices) { - bool needs_clean_up = false; + bool needs_clean_up = false; for (i_t k = 0; k < infeasibility_indices.size(); ++k) { const i_t j = infeasibility_indices[k]; const f_t squared_infeas = squared_infeasibilities[j]; @@ -755,7 +755,7 @@ void clean_up_infeasibilities(std::vector& squared_infeasibilities, const i_t sz = infeasibility_indices.size(); infeasibility_indices[k] = infeasibility_indices[sz - 1]; infeasibility_indices.pop_back(); - i_t new_j = infeasibility_indices[k]; + i_t new_j = infeasibility_indices[k]; if (squared_infeasibilities[new_j] == 0.0) { k--; } } } @@ -774,9 +774,9 @@ i_t steepest_edge_pricing_with_infeasibilities(const lp_problem_t& lp, i_t& basic_leaving, f_t& max_val) { - max_val = 0.0; - i_t leaving_index = -1; - const i_t nz = infeasibility_indices.size(); + max_val = 0.0; + i_t leaving_index = -1; + const i_t nz = infeasibility_indices.size(); for (i_t k = 0; k < nz; ++k) { const i_t j = infeasibility_indices[k]; const f_t squared_infeas = squared_infeasibilities[j]; @@ -2827,7 +2827,8 @@ dual::status_t dual_phase2(i_t phase, basis_repair(lp.A, settings, deficient, slacks_needed, basic_list, nonbasic_list, vstatus); if (factorize_basis( lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed) == -1) { - settings.log.printf("Failed to repair basis. %d deficient columns.\n", static_cast(deficient.size())); + settings.log.printf("Failed to repair basis. %d deficient columns.\n", + static_cast(deficient.size())); return dual::status_t::NUMERICAL; } } diff --git a/cpp/src/dual_simplex/sparse_matrix.cpp b/cpp/src/dual_simplex/sparse_matrix.cpp index bb20a5becf..dc4df39904 100644 --- a/cpp/src/dual_simplex/sparse_matrix.cpp +++ b/cpp/src/dual_simplex/sparse_matrix.cpp @@ -155,7 +155,7 @@ void csc_matrix_t::append_column(const std::vector& x) const i_t m = this->m; assert(x.size() == m); const i_t xsz = x.size(); - i_t nz = this->col_start[this->n]; + i_t nz = this->col_start[this->n]; for (i_t j = 0; j < xsz; ++j) { if (x[j] != 0.0) { this->i[nz] = j; @@ -172,10 +172,10 @@ void csc_matrix_t::append_column(const sparse_vector_t& x) { const i_t m = this->m; assert(x.n == m); - i_t nz = this->col_start[this->n]; + i_t nz = this->col_start[this->n]; const i_t xnz = x.i.size(); for (i_t k = 0; k < xnz; ++k) { - const i_t i = x.i[k]; + const i_t i = x.i[k]; const f_t x_val = x.x[k]; if (x_val != 0.0) { this->i[nz] = i; diff --git a/cpp/src/dual_simplex/sparse_matrix.hpp b/cpp/src/dual_simplex/sparse_matrix.hpp index 0b765ecfb3..9cc3d6380c 100644 --- a/cpp/src/dual_simplex/sparse_matrix.hpp +++ b/cpp/src/dual_simplex/sparse_matrix.hpp @@ -63,7 +63,6 @@ class csc_matrix_t { // Compute the transpose of A i_t transpose(csc_matrix_t& AT) const; - // Append a dense column to the matrix. Assumes the matrix has already been resized accordingly void append_column(const std::vector& x); From 60c03bb486b4fa07745229b43301f88106169e55 Mon Sep 17 00:00:00 2001 From: Christopher Maes Date: Wed, 30 Jul 2025 08:22:49 -0700 Subject: [PATCH 22/28] Use std::is_sorted --- cpp/src/dual_simplex/sparse_vector.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/src/dual_simplex/sparse_vector.cpp b/cpp/src/dual_simplex/sparse_vector.cpp index 8a617ae28d..bcb05d63b2 100644 --- a/cpp/src/dual_simplex/sparse_vector.cpp +++ b/cpp/src/dual_simplex/sparse_vector.cpp @@ -183,8 +183,8 @@ void sparse_vector_t::sort() // Check #ifdef CHECK_SORT - for (i_t k = 0; k < i.size() - 1; ++k) { - if (i[k] > i[k + 1]) { printf("Sort error %d %d\n", i[k], i[k + 1]); } + if (!std::is_sorted(i.begin(), i.end())) { + printf("Sort error\n"); } #endif } From ee5b17955e1335c369e620a171fa85d9d49f4cfc Mon Sep 17 00:00:00 2001 From: Christopher Maes Date: Wed, 30 Jul 2025 11:23:43 -0700 Subject: [PATCH 23/28] Catch an issue when primal step length would be inf. Add more leeway to test cast --- cpp/src/dual_simplex/phase2.cpp | 18 ++++++++++++++---- .../c_api_tests/c_api_tests.cpp | 2 +- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/cpp/src/dual_simplex/phase2.cpp b/cpp/src/dual_simplex/phase2.cpp index 9a9f0746f7..f2f8dda7ad 100644 --- a/cpp/src/dual_simplex/phase2.cpp +++ b/cpp/src/dual_simplex/phase2.cpp @@ -1563,7 +1563,7 @@ void adjust_for_flips(const basis_update_mpf_t& ft, } template -void compute_delta_x(const lp_problem_t& lp, +i_t compute_delta_x(const lp_problem_t& lp, const basis_update_mpf_t& ft, i_t entering_index, i_t leaving_index, @@ -1602,7 +1602,11 @@ void compute_delta_x(const lp_problem_t& lp, #endif f_t scale = scaled_delta_xB_sparse.find_coefficient(basic_leaving_index); - assert(!std::isnan(scale)); + if (scale != scale) { + // We couldn't find a coefficient for the basic leaving index. + // Either this is a bug or the primal step length is inf. + return -1; + } const f_t primal_step_length = delta_x_leaving / scale; const i_t scaled_delta_xB_nz = scaled_delta_xB_sparse.i.size(); for (i_t k = 0; k < scaled_delta_xB_nz; ++k) { @@ -1611,6 +1615,7 @@ void compute_delta_x(const lp_problem_t& lp, } delta_x[leaving_index] = delta_x_leaving; delta_x[entering_index] = primal_step_length; + return 0; } template @@ -2680,7 +2685,7 @@ dual::status_t dual_phase2(i_t phase, sparse_vector_t utilde_sparse(m, 0); sparse_vector_t scaled_delta_xB_sparse(m, 0); sparse_vector_t rhs_sparse(lp.A, entering_index); - phase2::compute_delta_x(lp, + if (phase2::compute_delta_x(lp, ft, entering_index, leaving_index, @@ -2692,7 +2697,10 @@ dual::status_t dual_phase2(i_t phase, x, utilde_sparse, scaled_delta_xB_sparse, - delta_x); + delta_x) == -1) { + settings.log.printf("Failed to compute delta_x. Iter %d\n", iter); + return dual::status_t::NUMERICAL; + } timers.ftran_time += timers.stop_timer(); @@ -2810,6 +2818,8 @@ dual::status_t dual_phase2(i_t phase, basic_mark[leaving_index] = -1; basic_mark[entering_index] = basic_leaving_index; + phase2::check_primal_infeasibilities_basic(basic_list, basic_mark, squared_infeasibilities, infeasibility_indices, 7); + timers.start_timer(); // Refactor or update the basis factorization bool should_refactor = ft.num_updates() > settings.refactor_frequency; diff --git a/cpp/tests/linear_programming/c_api_tests/c_api_tests.cpp b/cpp/tests/linear_programming/c_api_tests/c_api_tests.cpp index e675a3d5d3..57a4f432ba 100644 --- a/cpp/tests/linear_programming/c_api_tests/c_api_tests.cpp +++ b/cpp/tests/linear_programming/c_api_tests/c_api_tests.cpp @@ -57,7 +57,7 @@ TEST_P(TimeLimitTestFixture, time_limit) method), CUOPT_SUCCESS); EXPECT_EQ(termination_status, CUOPT_TERIMINATION_STATUS_TIME_LIMIT); - EXPECT_NEAR(solve_time, target_solve_time, 0.1); + EXPECT_NEAR(solve_time, target_solve_time, 0.5); } INSTANTIATE_TEST_SUITE_P( c_api, From 3d543851a7bc4889b0281949983edbe7ceb23ee5 Mon Sep 17 00:00:00 2001 From: Christopher Maes Date: Wed, 30 Jul 2025 11:24:17 -0700 Subject: [PATCH 24/28] Formatting --- cpp/src/dual_simplex/phase2.cpp | 51 +++++++++++++------------- cpp/src/dual_simplex/sparse_vector.cpp | 4 +- 2 files changed, 27 insertions(+), 28 deletions(-) diff --git a/cpp/src/dual_simplex/phase2.cpp b/cpp/src/dual_simplex/phase2.cpp index f2f8dda7ad..4889a1b84d 100644 --- a/cpp/src/dual_simplex/phase2.cpp +++ b/cpp/src/dual_simplex/phase2.cpp @@ -1564,18 +1564,18 @@ void adjust_for_flips(const basis_update_mpf_t& ft, template i_t compute_delta_x(const lp_problem_t& lp, - const basis_update_mpf_t& ft, - i_t entering_index, - i_t leaving_index, - i_t basic_leaving_index, - i_t direction, - const std::vector& basic_list, - const std::vector& delta_x_flip, - const sparse_vector_t& rhs_sparse, - const std::vector& x, - sparse_vector_t& utilde_sparse, - sparse_vector_t& scaled_delta_xB_sparse, - std::vector& delta_x) + const basis_update_mpf_t& ft, + i_t entering_index, + i_t leaving_index, + i_t basic_leaving_index, + i_t direction, + const std::vector& basic_list, + const std::vector& delta_x_flip, + const sparse_vector_t& rhs_sparse, + const std::vector& x, + sparse_vector_t& utilde_sparse, + sparse_vector_t& scaled_delta_xB_sparse, + std::vector& delta_x) { f_t delta_x_leaving = direction == 1 ? lp.lower[leaving_index] - x[leaving_index] : lp.upper[leaving_index] - x[leaving_index]; @@ -2686,18 +2686,18 @@ dual::status_t dual_phase2(i_t phase, sparse_vector_t scaled_delta_xB_sparse(m, 0); sparse_vector_t rhs_sparse(lp.A, entering_index); if (phase2::compute_delta_x(lp, - ft, - entering_index, - leaving_index, - basic_leaving_index, - direction, - basic_list, - delta_x_flip, - rhs_sparse, - x, - utilde_sparse, - scaled_delta_xB_sparse, - delta_x) == -1) { + ft, + entering_index, + leaving_index, + basic_leaving_index, + direction, + basic_list, + delta_x_flip, + rhs_sparse, + x, + utilde_sparse, + scaled_delta_xB_sparse, + delta_x) == -1) { settings.log.printf("Failed to compute delta_x. Iter %d\n", iter); return dual::status_t::NUMERICAL; } @@ -2818,7 +2818,8 @@ dual::status_t dual_phase2(i_t phase, basic_mark[leaving_index] = -1; basic_mark[entering_index] = basic_leaving_index; - phase2::check_primal_infeasibilities_basic(basic_list, basic_mark, squared_infeasibilities, infeasibility_indices, 7); + phase2::check_primal_infeasibilities_basic( + basic_list, basic_mark, squared_infeasibilities, infeasibility_indices, 7); timers.start_timer(); // Refactor or update the basis factorization diff --git a/cpp/src/dual_simplex/sparse_vector.cpp b/cpp/src/dual_simplex/sparse_vector.cpp index bcb05d63b2..73a0c0a8f0 100644 --- a/cpp/src/dual_simplex/sparse_vector.cpp +++ b/cpp/src/dual_simplex/sparse_vector.cpp @@ -183,9 +183,7 @@ void sparse_vector_t::sort() // Check #ifdef CHECK_SORT - if (!std::is_sorted(i.begin(), i.end())) { - printf("Sort error\n"); - } + if (!std::is_sorted(i.begin(), i.end())) { printf("Sort error\n"); } #endif } From be60ae7a30faaaf405950216f01969530dd1921f Mon Sep 17 00:00:00 2001 From: Christopher Maes Date: Wed, 30 Jul 2025 11:25:28 -0700 Subject: [PATCH 25/28] Remove debug --- cpp/src/dual_simplex/phase2.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/cpp/src/dual_simplex/phase2.cpp b/cpp/src/dual_simplex/phase2.cpp index 4889a1b84d..4749be83bc 100644 --- a/cpp/src/dual_simplex/phase2.cpp +++ b/cpp/src/dual_simplex/phase2.cpp @@ -2818,9 +2818,6 @@ dual::status_t dual_phase2(i_t phase, basic_mark[leaving_index] = -1; basic_mark[entering_index] = basic_leaving_index; - phase2::check_primal_infeasibilities_basic( - basic_list, basic_mark, squared_infeasibilities, infeasibility_indices, 7); - timers.start_timer(); // Refactor or update the basis factorization bool should_refactor = ft.num_updates() > settings.refactor_frequency; From 458e1dd5d0595fbad89185a3b2088adee588f75d Mon Sep 17 00:00:00 2001 From: Christopher Maes Date: Wed, 30 Jul 2025 15:25:08 -0700 Subject: [PATCH 26/28] Loosen tolerance on test again --- cpp/tests/linear_programming/c_api_tests/c_api_tests.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/tests/linear_programming/c_api_tests/c_api_tests.cpp b/cpp/tests/linear_programming/c_api_tests/c_api_tests.cpp index 57a4f432ba..bfd100946c 100644 --- a/cpp/tests/linear_programming/c_api_tests/c_api_tests.cpp +++ b/cpp/tests/linear_programming/c_api_tests/c_api_tests.cpp @@ -57,7 +57,7 @@ TEST_P(TimeLimitTestFixture, time_limit) method), CUOPT_SUCCESS); EXPECT_EQ(termination_status, CUOPT_TERIMINATION_STATUS_TIME_LIMIT); - EXPECT_NEAR(solve_time, target_solve_time, 0.5); + EXPECT_NEAR(solve_time, target_solve_time, 1.0); } INSTANTIATE_TEST_SUITE_P( c_api, From c7a1b0efe30e1891dd817b8646433f512030a0b6 Mon Sep 17 00:00:00 2001 From: Christopher Maes Date: Wed, 30 Jul 2025 20:49:31 -0700 Subject: [PATCH 27/28] Fix bug with nonbasic in infeasible list. Try to recover from numerical errors --- cpp/src/dual_simplex/basis_solves.cpp | 2 +- cpp/src/dual_simplex/branch_and_bound.cpp | 28 ++++++++++++ cpp/src/dual_simplex/phase2.cpp | 44 ++++++++++++++++++- cpp/src/dual_simplex/right_looking_lu.cpp | 4 +- .../dual_simplex/simplex_solver_settings.hpp | 2 + 5 files changed, 75 insertions(+), 5 deletions(-) diff --git a/cpp/src/dual_simplex/basis_solves.cpp b/cpp/src/dual_simplex/basis_solves.cpp index d363c3ee84..5f9375cdb5 100644 --- a/cpp/src/dual_simplex/basis_solves.cpp +++ b/cpp/src/dual_simplex/basis_solves.cpp @@ -364,7 +364,7 @@ i_t factorize_basis(const csc_matrix_t& A, for (i_t h = 0; h < Sdim; ++h) { identity[h] = h; } - Srank = right_looking_lu(S, medium_tol, identity, S_col_perm, SL, SU, S_perm_inv); + Srank = right_looking_lu(S, settings.threshold_partial_pivoting_tol, identity, S_col_perm, SL, SU, S_perm_inv); if (Srank != Sdim) { // Get the rank deficient columns deficient.resize(Sdim - Srank); diff --git a/cpp/src/dual_simplex/branch_and_bound.cpp b/cpp/src/dual_simplex/branch_and_bound.cpp index 3f1798425c..147bc9bfb0 100644 --- a/cpp/src/dual_simplex/branch_and_bound.cpp +++ b/cpp/src/dual_simplex/branch_and_bound.cpp @@ -205,6 +205,29 @@ void graphviz_edge(const simplex_solver_settings_t& settings, } } +dual::status_t convert_lp_status_to_dual_status(lp_status_t status) +{ + if (status == lp_status_t::OPTIMAL) { + return dual::status_t::OPTIMAL; + } else if (status == lp_status_t::INFEASIBLE) { + return dual::status_t::DUAL_UNBOUNDED; + } else if (status == lp_status_t::ITERATION_LIMIT) { + return dual::status_t::ITERATION_LIMIT; + } else if (status == lp_status_t::TIME_LIMIT) { + return dual::status_t::TIME_LIMIT; + } else if (status == lp_status_t::NUMERICAL_ISSUES) { + return dual::status_t::NUMERICAL; + } else if (status == lp_status_t::CUTOFF) { + return dual::status_t::CUTOFF; + } else if (status == lp_status_t::CONCURRENT_LIMIT) { + return dual::status_t::CONCURRENT_LIMIT; + } else if (status == lp_status_t::UNSET) { + return dual::status_t::UNSET; + } else { + return dual::status_t::NUMERICAL; + } +} + } // namespace template @@ -674,6 +697,11 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut leaf_solution, node_iter, leaf_edge_norms); + if (lp_status == dual::status_t::NUMERICAL) { + settings.log.printf("Numerical issue node %d. Resolving from scratch.\n", nodes_explored); + lp_status_t second_status = solve_linear_program_advanced(leaf_problem, lp_start_time, lp_settings, leaf_solution, leaf_vstatus, leaf_edge_norms); + lp_status = convert_lp_status_to_dual_status(second_status); + } total_lp_solve_time += toc(lp_start_time); total_lp_iters += node_iter; diff --git a/cpp/src/dual_simplex/phase2.cpp b/cpp/src/dual_simplex/phase2.cpp index 4749be83bc..5abecc3314 100644 --- a/cpp/src/dual_simplex/phase2.cpp +++ b/cpp/src/dual_simplex/phase2.cpp @@ -1811,6 +1811,20 @@ void check_primal_infeasibilities(const lp_problem_t& lp, } } +template +void check_basic_infeasibilities(const std::vector& basic_list, + const std::vector& basic_mark, + const std::vector& infeasibility_indices, + i_t info) +{ + for (i_t k = 0; k < infeasibility_indices.size(); ++k) { + const i_t j = infeasibility_indices[k]; + if (basic_mark[j] < 0) { + printf("%d basic_infeasibilities basic_mark[%d] < 0\n", info, j); + } + } +} + template void check_update(const lp_problem_t& lp, const simplex_solver_settings_t& settings, @@ -2330,6 +2344,10 @@ dual::status_t dual_phase2(i_t phase, f_t primal_infeasibility = phase2::compute_initial_primal_infeasibilities( lp, settings, basic_list, x, squared_infeasibilities, infeasibility_indices); +#ifdef CHECK_BASIC_INFEASIBILITIES + phase2::check_basic_infeasibilities(basic_list, basic_mark, infeasibility_indices, 0); +#endif + csc_matrix_t A_transpose(1, 1, 0); lp.A.transpose(A_transpose); @@ -2756,6 +2774,9 @@ dual::status_t dual_phase2(i_t phase, timers.start_timer(); // Update primal infeasibilities due to changes in basic variables // from flipping bounds +#ifdef CHECK_BASIC_INFEASIBILITIES + phase2::check_basic_infeasibilities(basic_list, basic_mark, infeasibility_indices, 2); +#endif phase2::update_primal_infeasibilities(lp, settings, basic_list, @@ -2818,6 +2839,10 @@ dual::status_t dual_phase2(i_t phase, basic_mark[leaving_index] = -1; basic_mark[entering_index] = basic_leaving_index; +#ifdef CHECK_BASIC_INFEASIBILITIES + phase2::check_basic_infeasibilities(basic_list, basic_mark, infeasibility_indices, 5); +#endif + timers.start_timer(); // Refactor or update the basis factorization bool should_refactor = ft.num_updates() > settings.refactor_frequency; @@ -2829,21 +2854,36 @@ dual::status_t dual_phase2(i_t phase, should_refactor = recommend_refactor == 1; } +#ifdef CHECK_BASIC_INFEASIBILITIES + phase2::check_basic_infeasibilities(basic_list, basic_mark, infeasibility_indices, 6); +#endif if (should_refactor) { if (factorize_basis(lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed) == -1) { + if (toc(start_time) > settings.time_limit) { return dual::status_t::TIME_LIMIT; } basis_repair(lp.A, settings, deficient, slacks_needed, basic_list, nonbasic_list, vstatus); - if (factorize_basis( + i_t count = 0; + while (factorize_basis( lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed) == -1) { settings.log.printf("Failed to repair basis. %d deficient columns.\n", static_cast(deficient.size())); - return dual::status_t::NUMERICAL; + if (toc(start_time) > settings.time_limit) { return dual::status_t::TIME_LIMIT; } + settings.threshold_partial_pivoting_tol = 1.0; + count++; + if (count > 100) { + return dual::status_t::NUMERICAL; + } + basis_repair(lp.A, settings, deficient, slacks_needed, basic_list, nonbasic_list, vstatus); } } reorder_basic_list(q, basic_list); ft.reset(L, U, p); phase2::reset_basis_mark(basic_list, nonbasic_list, basic_mark, nonbasic_mark); + phase2::compute_initial_primal_infeasibilities(lp, settings, basic_list, x, squared_infeasibilities, infeasibility_indices); } +#ifdef CHECK_BASIC_INFEASIBILITIES + phase2::check_basic_infeasibilities(basic_list, basic_mark, infeasibility_indices, 7); +#endif timers.lu_update_time += timers.stop_timer(); timers.start_timer(); diff --git a/cpp/src/dual_simplex/right_looking_lu.cpp b/cpp/src/dual_simplex/right_looking_lu.cpp index 57eb9b01d5..2e5f572483 100644 --- a/cpp/src/dual_simplex/right_looking_lu.cpp +++ b/cpp/src/dual_simplex/right_looking_lu.cpp @@ -575,8 +575,8 @@ i_t right_looking_lu(const csc_matrix_t& A, i_t pivot_j = -1; i_t pivot_p = kNone; constexpr f_t pivot_tol = 1e-11; - constexpr f_t drop_tol = 1e-13; - constexpr f_t threshold_tol = 1.0 / 10.0; + const f_t drop_tol = tol == 1.0 ? 0.0 : 1e-13; + const f_t threshold_tol = tol; markowitz_search(Cdegree, Rdegree, col_count, diff --git a/cpp/src/dual_simplex/simplex_solver_settings.hpp b/cpp/src/dual_simplex/simplex_solver_settings.hpp index a93a727083..a51ed19bcf 100644 --- a/cpp/src/dual_simplex/simplex_solver_settings.hpp +++ b/cpp/src/dual_simplex/simplex_solver_settings.hpp @@ -47,6 +47,7 @@ struct simplex_solver_settings_t { steepest_edge_ratio(0.5), steepest_edge_primal_tol(1e-9), hypersparse_threshold(0.05), + threshold_partial_pivoting_tol(1.0 / 10.0), use_steepest_edge_pricing(true), use_harris_ratio(false), use_bound_flip_ratio(true), @@ -89,6 +90,7 @@ struct simplex_solver_settings_t { steepest_edge_ratio; // the ratio of computed steepest edge mismatch from updated steepest edge f_t steepest_edge_primal_tol; // Primal tolerance divided by steepest edge norm f_t hypersparse_threshold; + mutable f_t threshold_partial_pivoting_tol; bool use_steepest_edge_pricing; // true if using steepest edge pricing, false if using max // infeasibility pricing bool use_harris_ratio; // true if using the harris ratio test From 8d15c2e023ae43111a1f88c7d2971d1d83af0a2f Mon Sep 17 00:00:00 2001 From: Christopher Maes Date: Wed, 30 Jul 2025 20:59:59 -0700 Subject: [PATCH 28/28] Formatting --- cpp/src/dual_simplex/basis_solves.cpp | 3 ++- cpp/src/dual_simplex/branch_and_bound.cpp | 3 ++- cpp/src/dual_simplex/phase2.cpp | 16 +++++++--------- cpp/src/dual_simplex/right_looking_lu.cpp | 8 ++++---- 4 files changed, 15 insertions(+), 15 deletions(-) diff --git a/cpp/src/dual_simplex/basis_solves.cpp b/cpp/src/dual_simplex/basis_solves.cpp index 5f9375cdb5..53464fc664 100644 --- a/cpp/src/dual_simplex/basis_solves.cpp +++ b/cpp/src/dual_simplex/basis_solves.cpp @@ -364,7 +364,8 @@ i_t factorize_basis(const csc_matrix_t& A, for (i_t h = 0; h < Sdim; ++h) { identity[h] = h; } - Srank = right_looking_lu(S, settings.threshold_partial_pivoting_tol, identity, S_col_perm, SL, SU, S_perm_inv); + Srank = right_looking_lu( + S, settings.threshold_partial_pivoting_tol, identity, S_col_perm, SL, SU, S_perm_inv); if (Srank != Sdim) { // Get the rank deficient columns deficient.resize(Sdim - Srank); diff --git a/cpp/src/dual_simplex/branch_and_bound.cpp b/cpp/src/dual_simplex/branch_and_bound.cpp index 147bc9bfb0..9067040129 100644 --- a/cpp/src/dual_simplex/branch_and_bound.cpp +++ b/cpp/src/dual_simplex/branch_and_bound.cpp @@ -699,7 +699,8 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut leaf_edge_norms); if (lp_status == dual::status_t::NUMERICAL) { settings.log.printf("Numerical issue node %d. Resolving from scratch.\n", nodes_explored); - lp_status_t second_status = solve_linear_program_advanced(leaf_problem, lp_start_time, lp_settings, leaf_solution, leaf_vstatus, leaf_edge_norms); + lp_status_t second_status = solve_linear_program_advanced( + leaf_problem, lp_start_time, lp_settings, leaf_solution, leaf_vstatus, leaf_edge_norms); lp_status = convert_lp_status_to_dual_status(second_status); } total_lp_solve_time += toc(lp_start_time); diff --git a/cpp/src/dual_simplex/phase2.cpp b/cpp/src/dual_simplex/phase2.cpp index 5abecc3314..a83e3bf72a 100644 --- a/cpp/src/dual_simplex/phase2.cpp +++ b/cpp/src/dual_simplex/phase2.cpp @@ -1819,9 +1819,7 @@ void check_basic_infeasibilities(const std::vector& basic_list, { for (i_t k = 0; k < infeasibility_indices.size(); ++k) { const i_t j = infeasibility_indices[k]; - if (basic_mark[j] < 0) { - printf("%d basic_infeasibilities basic_mark[%d] < 0\n", info, j); - } + if (basic_mark[j] < 0) { printf("%d basic_infeasibilities basic_mark[%d] < 0\n", info, j); } } } @@ -2864,22 +2862,22 @@ dual::status_t dual_phase2(i_t phase, basis_repair(lp.A, settings, deficient, slacks_needed, basic_list, nonbasic_list, vstatus); i_t count = 0; while (factorize_basis( - lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed) == -1) { + lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed) == -1) { settings.log.printf("Failed to repair basis. %d deficient columns.\n", static_cast(deficient.size())); if (toc(start_time) > settings.time_limit) { return dual::status_t::TIME_LIMIT; } settings.threshold_partial_pivoting_tol = 1.0; count++; - if (count > 100) { - return dual::status_t::NUMERICAL; - } - basis_repair(lp.A, settings, deficient, slacks_needed, basic_list, nonbasic_list, vstatus); + if (count > 100) { return dual::status_t::NUMERICAL; } + basis_repair( + lp.A, settings, deficient, slacks_needed, basic_list, nonbasic_list, vstatus); } } reorder_basic_list(q, basic_list); ft.reset(L, U, p); phase2::reset_basis_mark(basic_list, nonbasic_list, basic_mark, nonbasic_mark); - phase2::compute_initial_primal_infeasibilities(lp, settings, basic_list, x, squared_infeasibilities, infeasibility_indices); + phase2::compute_initial_primal_infeasibilities( + lp, settings, basic_list, x, squared_infeasibilities, infeasibility_indices); } #ifdef CHECK_BASIC_INFEASIBILITIES phase2::check_basic_infeasibilities(basic_list, basic_mark, infeasibility_indices, 7); diff --git a/cpp/src/dual_simplex/right_looking_lu.cpp b/cpp/src/dual_simplex/right_looking_lu.cpp index 2e5f572483..7fe276ec63 100644 --- a/cpp/src/dual_simplex/right_looking_lu.cpp +++ b/cpp/src/dual_simplex/right_looking_lu.cpp @@ -571,10 +571,10 @@ i_t right_looking_lu(const csc_matrix_t& A, // Find pivot that satisfies // abs(pivot) >= abstol, // abs(pivot) >= threshold_tol * max abs[pivot column] - i_t pivot_i = -1; - i_t pivot_j = -1; - i_t pivot_p = kNone; - constexpr f_t pivot_tol = 1e-11; + i_t pivot_i = -1; + i_t pivot_j = -1; + i_t pivot_p = kNone; + constexpr f_t pivot_tol = 1e-11; const f_t drop_tol = tol == 1.0 ? 0.0 : 1e-13; const f_t threshold_tol = tol; markowitz_search(Cdegree,