Dual simplex performance improvements#1043
Conversation
…ering, A row nonbasic end, dynamic refactorization. 1.16X on NETLIB
| reliability_branching_settings_t<i_t, f_t> reliability_branching_settings; | ||
|
|
||
| csc_matrix_t<i_t, f_t> AT; // Transpose of the constraint matrix A | ||
| csr_matrix_t<i_t, f_t> Arow; |
There was a problem hiding this comment.
Is it an invariant that all the nonbasic coefficients appear first, or only in some contexts? Consider documenting. I'm also wondering if there's scope for a helper struct to group Arow with nonbasic_end since nonbasic_end is a property of the stored representation of the matrix.
There was a problem hiding this comment.
In order to use the compute_delta_z function the nonbasic coefficients must appear first. I can add some documentation and consider grouping with a struct.
| @@ -143,29 +143,24 @@ void compute_delta_z(const csc_matrix_t<i_t, f_t>& A_transpose, | |||
| // delta_zN = - N'*delta_y | |||
| const i_t nz_delta_y = delta_y.i.size(); | |||
| size_t nnz_processed = 0; | |||
| size_t nonbasic_marked = 0; | |||
| for (i_t k = 0; k < nz_delta_y; k++) { | |||
| const i_t i = delta_y.i[k]; | |||
| const f_t delta_y_i = delta_y.x[k]; | |||
| if (std::abs(delta_y_i) < 1e-12) { continue; } | |||
| const i_t row_start = A_transpose.col_start[i]; | |||
| const i_t row_end = A_transpose.col_start[i + 1]; | |||
| const i_t row_start = Arow.row_start[i]; | |||
| const i_t row_end = nonbasic_end[i] + 1; | |||
| nnz_processed += row_end - row_start; | |||
| for (i_t p = row_start; p < row_end; ++p) { | |||
| const i_t j = A_transpose.i[p]; | |||
| if (nonbasic_mark[j] >= 0) { | |||
| delta_z[j] -= delta_y_i * A_transpose.x[p]; | |||
| nonbasic_marked++; | |||
| if (!delta_z_mark[j]) { | |||
| delta_z_mark[j] = 1; | |||
| delta_z_indices.push_back(j); | |||
| } | |||
| const i_t j = Arow.j[p]; | |||
| delta_z[j] -= delta_y_i * Arow.x[p]; | |||
| if (!delta_z_mark[j]) { | |||
| delta_z_mark[j] = 1; | |||
| delta_z_indices.push_back(j); | |||
| } | |||
| } | |||
| } | |||
| work_estimate += 4 * nz_delta_y; | |||
| work_estimate += 2 * nnz_processed; | |||
| work_estimate += 3 * nonbasic_marked; | |||
| work_estimate += 4 * nnz_processed; | |||
| work_estimate += 2 * delta_z_indices.size(); | |||
|
|
|||
| // delta_zB = sigma*ei | |||
| @@ -205,6 +200,102 @@ void compute_delta_z(const csc_matrix_t<i_t, f_t>& A_transpose, | |||
| #endif | |||
| } | |||
|
|
|||
| template <typename i_t, typename f_t> | |||
| void compute_initial_nonbasic_end(const std::vector<i_t>& basic_mark, | |||
| csr_matrix_t<i_t, f_t>& Arow, | |||
| std::vector<i_t>& nonbasic_end) | |||
| { | |||
| i_t m = Arow.m; | |||
| // Swap coefficients so that all coefficients for nonbasic variables in row i | |||
| // appear from Arow.row_start[i] to nonbasic_end[i] | |||
| for (i_t i = 0; i < m; i++) { | |||
| i_t lo = Arow.row_start[i]; | |||
| i_t hi = Arow.row_start[i + 1] - 1; | |||
| while (lo <= hi) { | |||
| const i_t j = Arow.j[lo]; | |||
| if (basic_mark[j] >= 0) { | |||
| // Swap coefficients | |||
| std::swap(Arow.j[lo], Arow.j[hi]); | |||
| std::swap(Arow.x[lo], Arow.x[hi]); | |||
| hi--; | |||
| } else { | |||
| lo++; | |||
| } | |||
| } | |||
| nonbasic_end[i] = hi; | |||
| } | |||
| } | |||
|
|
|||
| template <typename i_t, typename f_t> | |||
| void update_Arow(i_t leaving, | |||
| i_t entering, | |||
| const csc_matrix_t<i_t, f_t>& A, | |||
| std::vector<i_t>& row_mark, | |||
| std::vector<i_t>& nonbasic_end, | |||
| csr_matrix_t<i_t, f_t>& Arow, | |||
| f_t& work_estimate) | |||
| { | |||
| // Update the Arow matrix to reflect the new basis. So | |||
| // that the coefficients for all nonbasic variables in row i | |||
| // appear in Arow.row_start[i] to nonbasic_end[i]. | |||
| // Swap the coefficients for the leaving and entering variables | |||
| // The leaving variable is now nonbasic, the entering variable is now basic | |||
| row_mark.clear(); | |||
| const i_t col_start_enter = A.col_start[entering]; | |||
| const i_t col_end_enter = A.col_start[entering + 1]; | |||
| for (i_t p = col_start_enter; p < col_end_enter; p++) { | |||
| const i_t i = A.i[p]; | |||
| row_mark.push_back(i); | |||
| } | |||
| work_estimate += 2*(col_end_enter - col_start_enter); | |||
|
|
|||
| // Move the coefficients for the entering variable to the end of the nonbasics | |||
| // And decrement the nonbasic count for the row | |||
| for (i_t i: row_mark) { | |||
| const i_t row_start = Arow.row_start[i]; | |||
| const i_t nb_end = nonbasic_end[i]; | |||
| for (i_t p = row_start; p <= nb_end; p++) { | |||
| const i_t j = Arow.j[p]; | |||
| if (j == entering) { | |||
| std::swap(Arow.j[p], Arow.j[nb_end]); | |||
| std::swap(Arow.x[p], Arow.x[nb_end]); | |||
| nonbasic_end[i]--; | |||
| break; | |||
| } | |||
| } | |||
| work_estimate += nb_end - row_start; | |||
| } | |||
| work_estimate += 2*row_mark.size(); | |||
|
|
|||
| row_mark.clear(); | |||
| const i_t col_start_leave = A.col_start[leaving]; | |||
| const i_t col_end_leave = A.col_start[leaving + 1]; | |||
| for (i_t p = col_start_leave; p < col_end_leave; p++) { | |||
| const i_t i = A.i[p]; | |||
| row_mark.push_back(i); | |||
| } | |||
| work_estimate += 2*(col_end_leave - col_start_leave); | |||
|
|
|||
|
|
|||
| // Move the coefficient for the leaving variable to the end of the nonbasics | |||
| // And increment the nonbasic count for the row | |||
| for (i_t i: row_mark) { | |||
| const i_t nb_end = nonbasic_end[i]; | |||
| const i_t row_end = Arow.row_start[i + 1]; | |||
| for (i_t p = nb_end + 1; p < row_end; p++) { | |||
| const i_t j = Arow.j[p]; | |||
| if (j == leaving) { | |||
| std::swap(Arow.j[p], Arow.j[nb_end + 1]); | |||
| std::swap(Arow.x[p], Arow.x[nb_end + 1]); | |||
| nonbasic_end[i]++; | |||
| break; | |||
| } | |||
| } | |||
| work_estimate += row_end - nb_end; | |||
| } | |||
| work_estimate += 2*row_mark.size(); | |||
| } | |||
|
|
|||
| namespace phase2 { | |||
|
|
|||
| // Computes vectors farkas_y, farkas_zl, farkas_zu that satisfy | |||
| @@ -1480,15 +1571,27 @@ i_t compute_steepest_edge_norm_entering(const simplex_solver_settings_t<i_t, f_t | |||
| const basis_update_mpf_t<i_t, f_t>& ft, | |||
| i_t basic_leaving_index, | |||
| i_t entering_index, | |||
| i_t leaving_index, | |||
| std::vector<f_t>& steepest_edge_norms) | |||
| { | |||
| #if 0 | |||
There was a problem hiding this comment.
Why not delete this code block?
There was a problem hiding this comment.
Yep. This code will all be deleted as will the function compute_steepest_edge_norm_entering before merging.
|
No actionable comments were generated in the recent review. 🎉 ℹ️ Recent review info⚙️ Run configurationConfiguration used: Path: .coderabbit.yaml Review profile: CHILL Plan: Pro Plus Run ID: 📒 Files selected for processing (1)
✅ Files skipped from review due to trivial changes (1)
📝 WalkthroughWalkthroughReplaced CSC/transposed matrix usage with a CSR row-compressed representation ( Changes
Estimated code review effort🎯 4 (Complex) | ⏱️ ~60 minutes 🚥 Pre-merge checks | ✅ 2 | ❌ 1❌ Failed checks (1 warning)
✅ Passed checks (2 passed)
✏️ Tip: You can configure your own custom pre-merge checks in the settings. ✨ Finishing Touches🧪 Generate unit tests (beta)
Comment |
There was a problem hiding this comment.
Actionable comments posted: 2
Caution
Some comments are outside the diff and can’t be posted inline due to platform limitations.
⚠️ Outside diff range comments (2)
cpp/src/dual_simplex/phase2.cpp (2)
3366-3409:⚠️ Potential issue | 🟠 MajorCount the flip-adjustment solve in
solve_work.Lines 3366-3379 call
phase2::adjust_for_flips(), and that helper does aft.b_solve(...), but only the later FTRAN on Lines 3386-3408 contributes tosolve_work. On flip-heavy models, the Line 3562 trigger will undercount solve cost and delay the refactorization it is supposed to schedule.💡 Suggested fix
delta_xB_0_sparse.clear(); if (num_flipped > 0) { timers.start_timer(); + f_t flip_adjust_start_work = ft.work_estimate(); phase2::adjust_for_flips(ft, basic_list, delta_z_indices, atilde_index, atilde, atilde_mark, atilde_sparse, delta_xB_0_sparse, delta_x_flip, x, phase2_work_estimate); + solve_work += (ft.work_estimate() - flip_adjust_start_work); timers.ftran_time += timers.stop_timer(); }🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed. In `@cpp/src/dual_simplex/phase2.cpp` around lines 3366 - 3409, The flip-adjustment FTRAN performed inside phase2::adjust_for_flips (which calls ft.b_solve) is not being added to solve_work; capture ft.work_estimate() immediately before and after calling phase2::adjust_for_flips (similar to how ftran_start_work is used later) and add their difference to solve_work so the flip-related solve cost is included when deciding refactorization.
2656-2675:⚠️ Potential issue | 🟠 MajorGuard the work-based refactor trigger for warm-started bases.
Lines 2656-2675 only populate
refactor_workin the initialization branch. When this entry point reuses an existing basis (initialize_basis == false), Line 3562 comparessolve_workagainst0.0, so the new heuristic collapses into “refactor after 100 updates” on warm-started solves.💡 Suggested fix
- f_t refactor_work = 0.0; - f_t solve_work = 0.0; + f_t refactor_work = 0.0; + f_t solve_work = 0.0; + bool have_refactor_work = false; ... - refactor_work = ft.work_estimate() - refactor_start_work; + refactor_work = ft.work_estimate() - refactor_start_work; + have_refactor_work = true; ... - bool should_refactor = - (ft.num_updates() > 100 && solve_work > refactor_work) || (ft.num_updates() > 1000); + bool should_refactor = + (ft.num_updates() > 1000) || + (have_refactor_work && ft.num_updates() > 100 && solve_work > refactor_work); ... - refactor_work = ft.work_estimate() - refactor_start_work; + refactor_work = ft.work_estimate() - refactor_start_work; + have_refactor_work = true;Also applies to: 3560-3563, 3621-3650
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed. In `@cpp/src/dual_simplex/phase2.cpp` around lines 2656 - 2675, The refactor_work baseline is only set inside the initialize_basis branch so warm-started solves leave refactor_work == 0.0 and break the work-based refactor heuristic; fix by establishing a proper baseline when initialize_basis == false (e.g. assign refactor_work = ft.work_estimate() before any update/solve work) or by guarding the later work-comparison logic with initialize_basis so the work-based trigger is only evaluated when refactor_work was actually computed (refer to refactor_work, solve_work, initialize_basis, and ft.work_estimate()/ft.refactor_basis to locate the affected code).
🤖 Prompt for all review comments with AI agents
Verify each finding against the current code and only fix it if needed.
Inline comments:
In `@cpp/src/branch_and_bound/pseudo_costs.cpp`:
- Around line 1476-1492: compute_initial_nonbasic_end mutates the shared
pseudo_costs_t::Arow in-place, causing races when multiple BEST_FIRST workers
run concurrently; make a worker-local copy of Arow and use that copy for
compute_initial_nonbasic_end and all subsequent calls in this block (e.g., pass
local_Arow to compute_initial_nonbasic_end and to
single_pivot_objective_change_estimate) so nonbasic_end and the pivot estimate
operate on a stable, per-worker CSR ordering instead of the shared Arow.
In `@cpp/src/dual_simplex/phase2.cpp`:
- Around line 133-141: The CHECK_CHANGE_IN_REDUCED_COST validation path inside
compute_delta_z is stale because it references removed symbols (A_transpose, lp,
basic_list, nonbasic_list); either remove that macro block or update it to use
the current inputs (Arow, delta_y, leaving_index, direction, nonbasic_end,
delta_z_mark, delta_z_indices, delta_z) and any available LP state, ensuring the
validation only uses in-scope variables; specifically locate the
CHECK_CHANGE_IN_REDUCED_COST block in compute_delta_z and refactor or delete it
so the checker no longer references the out-of-scope identifiers.
---
Outside diff comments:
In `@cpp/src/dual_simplex/phase2.cpp`:
- Around line 3366-3409: The flip-adjustment FTRAN performed inside
phase2::adjust_for_flips (which calls ft.b_solve) is not being added to
solve_work; capture ft.work_estimate() immediately before and after calling
phase2::adjust_for_flips (similar to how ftran_start_work is used later) and add
their difference to solve_work so the flip-related solve cost is included when
deciding refactorization.
- Around line 2656-2675: The refactor_work baseline is only set inside the
initialize_basis branch so warm-started solves leave refactor_work == 0.0 and
break the work-based refactor heuristic; fix by establishing a proper baseline
when initialize_basis == false (e.g. assign refactor_work = ft.work_estimate()
before any update/solve work) or by guarding the later work-comparison logic
with initialize_basis so the work-based trigger is only evaluated when
refactor_work was actually computed (refer to refactor_work, solve_work,
initialize_basis, and ft.work_estimate()/ft.refactor_basis to locate the
affected code).
🪄 Autofix (Beta)
Fix all unresolved CodeRabbit comments on this PR:
- Push a commit to this branch (recommended)
- Create a new PR with the fixes
ℹ️ Review info
⚙️ Run configuration
Configuration used: Path: .coderabbit.yaml
Review profile: CHILL
Plan: Pro
Run ID: b1230117-8559-4b83-8b43-f42888816349
📒 Files selected for processing (5)
cpp/src/branch_and_bound/branch_and_bound.cppcpp/src/branch_and_bound/pseudo_costs.cppcpp/src/branch_and_bound/pseudo_costs.hppcpp/src/dual_simplex/phase2.cppcpp/src/dual_simplex/phase2.hpp
There was a problem hiding this comment.
Caution
Some comments are outside the diff and can’t be posted inline due to platform limitations.
⚠️ Outside diff range comments (1)
cpp/src/dual_simplex/phase2.cpp (1)
3333-3375:⚠️ Potential issue | 🟠 MajorCount the flip-adjustment solve in
solve_work.
adjust_for_flips()does aft.b_solve(...), but only the latercompute_delta_x()solve is added tosolve_work. On bound-flip-heavy iterations this undercounts the new refactor metric, soshould_refactorcan stay false even after the basis has already spent more work in solves than in the last factorization.[suggested fix]
Patch
delta_xB_0_sparse.clear(); if (num_flipped > 0) { timers.start_timer(); + f_t flip_adjust_start_work = ft.work_estimate(); phase2::adjust_for_flips(ft, basic_list, delta_z_indices, atilde_index, atilde, atilde_mark, atilde_sparse, delta_xB_0_sparse, delta_x_flip, x, phase2_work_estimate); + solve_work += (ft.work_estimate() - flip_adjust_start_work); timers.ftran_time += timers.stop_timer(); }🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed. In `@cpp/src/dual_simplex/phase2.cpp` around lines 3333 - 3375, The flip-adjustment B-solve performed inside phase2::adjust_for_flips(ft, ...) isn't currently included in solve_work, causing undercounting; before calling phase2::adjust_for_flips capture ft.work_estimate(), call the function, then add the difference (ft.work_estimate() - prior_work) to solve_work (similar to how ftran_start_work is used around phase2::compute_delta_x) so the work from ft.b_solve during adjust_for_flips is accounted for when deciding should_refactor.
🤖 Prompt for all review comments with AI agents
Verify each finding against the current code and only fix it if needed.
Outside diff comments:
In `@cpp/src/dual_simplex/phase2.cpp`:
- Around line 3333-3375: The flip-adjustment B-solve performed inside
phase2::adjust_for_flips(ft, ...) isn't currently included in solve_work,
causing undercounting; before calling phase2::adjust_for_flips capture
ft.work_estimate(), call the function, then add the difference
(ft.work_estimate() - prior_work) to solve_work (similar to how ftran_start_work
is used around phase2::compute_delta_x) so the work from ft.b_solve during
adjust_for_flips is accounted for when deciding should_refactor.
ℹ️ Review info
⚙️ Run configuration
Configuration used: Path: .coderabbit.yaml
Review profile: CHILL
Plan: Pro Plus
Run ID: 700fb170-9e65-4296-9a09-62cd3304fb11
📒 Files selected for processing (2)
cpp/src/branch_and_bound/pseudo_costs.cppcpp/src/dual_simplex/phase2.cpp
This PR includes three performance improvements to dual simplex