Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions cpp/src/barrier/barrier.cu
Original file line number Diff line number Diff line change
Expand Up @@ -1094,6 +1094,7 @@ class iteration_data_t {
std::sort(column_nz_permutation.begin(),
column_nz_permutation.end(),
[&column_nz](i_t i, i_t j) { return column_nz[i] < column_nz[j]; });
if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) { return; }

// We then compute the exact sparsity pattern for columns of A whose where
// the number of nonzeros is less than a threshold. This part can be done
Expand Down Expand Up @@ -1124,6 +1125,7 @@ class iteration_data_t {
// The best way to do that is to have A stored in CSR format.
csr_matrix_t<i_t, f_t> A_row(0, 0, 0);
A.to_compressed_row(A_row);
if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) { return; }

std::vector<i_t> histogram(m + 1, 0);
for (i_t j = 0; j < n; j++) {
Expand Down Expand Up @@ -1253,6 +1255,7 @@ class iteration_data_t {
std::sort(permutation.begin(), permutation.end(), [&delta_nz](i_t i, i_t j) {
return delta_nz[i] < delta_nz[j];
});
if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) { return; }

// Now we make a forward pass and compute the number of nonzeros in C
// assuming we had included column j
Expand Down Expand Up @@ -2297,6 +2300,12 @@ i_t barrier_solver_t<i_t, f_t>::gpu_compute_search_direction(iteration_data_t<i_
if (use_augmented) {
RAFT_CUDA_TRY(cudaStreamSynchronize(stream_view_));
data.form_augmented();
// Check halt after form_augmented (synchronous) and before factorize (~1s).
// If halt was set while form_augmented ran, we catch it here and skip the
// expensive factorization entirely.
if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) {
return CONCURRENT_HALT_RETURN;
}
status = data.chol->factorize(data.device_augmented);

#ifdef CHOLESKY_DEBUG_CHECK
Expand All @@ -2305,6 +2314,12 @@ i_t barrier_solver_t<i_t, f_t>::gpu_compute_search_direction(iteration_data_t<i_
} else {
// compute ADAT = A Dinv * A^T
data.form_adat();
// Check halt after form_adat (synchronous) and before factorize (~1s).
// If halt was set while form_adat ran, we catch it here and skip the
// expensive Cholesky factorization entirely.
if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) {
return CONCURRENT_HALT_RETURN;
}
// factorize
status = data.chol->factorize(data.device_ADAT);
}
Expand Down
17 changes: 16 additions & 1 deletion cpp/src/dual_simplex/basis_updates.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2431,7 +2431,22 @@ int basis_update_mpf_t<i_t, f_t>::refactor_basis(
assert(q.size() == A.m);
reorder_basic_list(q, basic_list); // We no longer need q after reordering the basic list
work_estimate_ += 3 * q.size();
reset();

// Check halt before the transpose operations: these can take hundreds of ms
// on large problems (L0 and U0 each have O(fill-in) nonzeros) and have no
// internal halt checks. Catching the flag here avoids the dead zone.
if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) {
return CONCURRENT_HALT_RETURN;
}
// Inline reset() so we can check halt between the two transposes.
clear();
L0_.transpose(L0_transpose_);
if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) {
return CONCURRENT_HALT_RETURN;
}
U0_.transpose(U0_transpose_);
work_estimate_ += 6 * L0_.col_start[L0_.n] + 6 * U0_.col_start[U0_.n];
reset_stats();
return 0;
}

Expand Down
25 changes: 24 additions & 1 deletion cpp/src/dual_simplex/phase2.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2488,7 +2488,6 @@ dual::status_t dual_phase2(i_t phase,
const i_t n = lp.num_cols;
std::vector<i_t> basic_list(m);
std::vector<i_t> nonbasic_list;
std::vector<i_t> superbasic_list;
basis_update_mpf_t<i_t, f_t> ft(m, settings.refactor_frequency);
const bool initialize_basis = true;
return dual_phase2_with_advanced_basis(phase,
Expand Down Expand Up @@ -2688,6 +2687,10 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase,
vector_norm2<i_t, f_t>(delta_y_steepest_edge));
}

if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) {
return dual::status_t::CONCURRENT_LIMIT;
}

if (phase == 2) {
settings.log.printf(" Iter Objective Num Inf. Sum Inf. Perturb Time\n");
}
Expand Down Expand Up @@ -2735,10 +2738,18 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase,
phase2::check_basic_infeasibilities(basic_list, basic_mark, infeasibility_indices, 0);
#endif

if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) {
return dual::status_t::CONCURRENT_LIMIT;
}

csc_matrix_t<i_t, f_t> A_transpose(1, 1, 0);
lp.A.transpose(A_transpose);
phase2_work_estimate += 2 * lp.A.col_start[lp.A.n];

if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) {
return dual::status_t::CONCURRENT_LIMIT;
}

f_t obj = compute_objective(lp, x);
phase2_work_estimate += 2 * n;

Expand Down Expand Up @@ -2908,6 +2919,9 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase,
phase2::compute_delta_y(ft, basic_leaving_index, direction, delta_y_sparse, UTsol_sparse);
}
timers.btran_time += timers.stop_timer();
if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) {
return dual::status_t::CONCURRENT_LIMIT;
}

const f_t steepest_edge_norm_check = delta_y_sparse.norm2_squared();
phase2_work_estimate += 2 * delta_y_sparse.i.size();
Expand Down Expand Up @@ -2966,6 +2980,9 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase,
}
}
timers.delta_z_time += timers.stop_timer();
if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) {
return dual::status_t::CONCURRENT_LIMIT;
}

#ifdef COMPUTE_DUAL_RESIDUAL
std::vector<f_t> dual_residual;
Expand Down Expand Up @@ -3301,6 +3318,9 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase,
}

timers.ftran_time += timers.stop_timer();
if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) {
return dual::status_t::CONCURRENT_LIMIT;
}

#ifdef CHECK_PRIMAL_STEP
std::vector<f_t> residual(m);
Expand Down Expand Up @@ -3331,6 +3351,9 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase,
#endif
assert(steepest_edge_status == 0);
timers.se_norms_time += timers.stop_timer();
if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) {
return dual::status_t::CONCURRENT_LIMIT;
}

timers.start_timer();
// x <- x + delta_x
Expand Down
37 changes: 25 additions & 12 deletions cpp/src/dual_simplex/solve.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -120,16 +120,17 @@ lp_status_t solve_linear_program_advanced(const lp_problem_t<i_t, f_t>& original
std::vector<i_t> basic_list(m);
std::vector<i_t> nonbasic_list;
basis_update_mpf_t<i_t, f_t> ft(m, settings.refactor_frequency);
return solve_linear_program_with_advanced_basis(original_lp,
start_time,
settings,
original_solution,
ft,
basic_list,
nonbasic_list,
vstatus,
edge_norms,
work_unit_context);
lp_status_t result = solve_linear_program_with_advanced_basis(original_lp,
start_time,
settings,
original_solution,
ft,
basic_list,
nonbasic_list,
vstatus,
edge_norms,
work_unit_context);
return result;
}

template <typename i_t, typename f_t>
Expand Down Expand Up @@ -222,7 +223,10 @@ lp_status_t solve_linear_program_with_advanced_basis(
if (phase1_status == dual::status_t::TIME_LIMIT) { return lp_status_t::TIME_LIMIT; }
if (phase1_status == dual::status_t::WORK_LIMIT) { return lp_status_t::WORK_LIMIT; }
if (phase1_status == dual::status_t::ITERATION_LIMIT) { return lp_status_t::ITERATION_LIMIT; }
if (phase1_status == dual::status_t::CONCURRENT_LIMIT) { return lp_status_t::CONCURRENT_LIMIT; }
if (phase1_status == dual::status_t::CONCURRENT_LIMIT) {
original_solution.iterations = iter;
return lp_status_t::CONCURRENT_LIMIT;
}
phase1_obj = phase1_solution.objective;
if (phase1_obj > -settings.primal_tol) {
settings.log.printf("Dual feasible solution found.\n");
Expand Down Expand Up @@ -309,7 +313,10 @@ lp_status_t solve_linear_program_with_advanced_basis(
if (status == dual::status_t::TIME_LIMIT) { lp_status = lp_status_t::TIME_LIMIT; }
if (status == dual::status_t::WORK_LIMIT) { lp_status = lp_status_t::WORK_LIMIT; }
if (status == dual::status_t::ITERATION_LIMIT) { lp_status = lp_status_t::ITERATION_LIMIT; }
if (status == dual::status_t::CONCURRENT_LIMIT) { lp_status = lp_status_t::CONCURRENT_LIMIT; }
if (status == dual::status_t::CONCURRENT_LIMIT) {
original_solution.iterations = iter;
Comment thread
srib marked this conversation as resolved.
return lp_status_t::CONCURRENT_LIMIT;
}
if (status == dual::status_t::NUMERICAL) { lp_status = lp_status_t::NUMERICAL_ISSUES; }
if (status == dual::status_t::CUTOFF) { lp_status = lp_status_t::CUTOFF; }
original_solution.iterations = iter;
Expand Down Expand Up @@ -581,6 +588,8 @@ lp_status_t solve_linear_program_with_barrier(const user_problem_t<i_t, f_t>& us
solution.iterations = barrier_solution.iterations;
}

if (barrier_status == lp_status_t::CONCURRENT_LIMIT) { return lp_status_t::CONCURRENT_LIMIT; }

// If we aren't doing crossover, we're done
if (!settings.crossover || barrier_lp.Q.n > 0) { return barrier_status; }

Expand Down Expand Up @@ -681,6 +690,10 @@ lp_status_t solve_linear_program(const user_problem_t<i_t, f_t>& user_problem,
std::vector<f_t> edge_norms;
lp_status_t status = solve_linear_program_advanced(
original_lp, start_time, settings, lp_solution, vstatus, edge_norms);
if (status == lp_status_t::CONCURRENT_LIMIT) {
solution.iterations = lp_solution.iterations;
return lp_status_t::CONCURRENT_LIMIT;
Comment thread
coderabbitai[bot] marked this conversation as resolved.
}
uncrush_primal_solution(user_problem, original_lp, lp_solution.x, solution.x);
uncrush_dual_solution(
user_problem, original_lp, lp_solution.y, lp_solution.z, solution.y, solution.z);
Expand Down