diff --git a/ci/validate_wheel.sh b/ci/validate_wheel.sh index 79188cacc3..2dc95a23a1 100755 --- a/ci/validate_wheel.sh +++ b/ci/validate_wheel.sh @@ -22,11 +22,11 @@ PYDISTCHECK_ARGS=( if [[ "${package_dir}" == "python/libcuopt" ]]; then if [[ "${RAPIDS_CUDA_MAJOR}" == "12" ]]; then PYDISTCHECK_ARGS+=( - --max-allowed-size-compressed '650Mi' + --max-allowed-size-compressed '660Mi' ) else PYDISTCHECK_ARGS+=( - --max-allowed-size-compressed '495Mi' + --max-allowed-size-compressed '520Mi' ) fi elif [[ "${package_dir}" != "python/cuopt" ]] && \ diff --git a/cpp/src/branch_and_bound/CMakeLists.txt b/cpp/src/branch_and_bound/CMakeLists.txt index 5bb1017120..1e40c1bbf1 100644 --- a/cpp/src/branch_and_bound/CMakeLists.txt +++ b/cpp/src/branch_and_bound/CMakeLists.txt @@ -5,7 +5,6 @@ set(BRANCH_AND_BOUND_SRC_FILES ${CMAKE_CURRENT_SOURCE_DIR}/branch_and_bound.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/mip_node.cpp ${CMAKE_CURRENT_SOURCE_DIR}/pseudo_costs.cpp ${CMAKE_CURRENT_SOURCE_DIR}/diving_heuristics.cpp ) diff --git a/cpp/src/branch_and_bound/branch_and_bound.cpp b/cpp/src/branch_and_bound/branch_and_bound.cpp index 33a2d983c9..8881565b6d 100644 --- a/cpp/src/branch_and_bound/branch_and_bound.cpp +++ b/cpp/src/branch_and_bound/branch_and_bound.cpp @@ -6,6 +6,7 @@ /* clang-format on */ #include +#include #include #include @@ -35,15 +36,12 @@ #include #include #include -#include #include #include #include -#include #include namespace cuopt::linear_programming::dual_simplex { - namespace { template @@ -1039,7 +1037,9 @@ struct deterministic_bfs_policy_t const std::vector& fractional, const std::vector& x) override { - i_t var = this->worker.pc_snapshot.variable_selection(fractional, x); + logger_t log; + log.log = false; + i_t var = this->worker.pc_snapshot.variable_selection(fractional, x, log); auto dir = martin_criteria(x[var], this->bnb.root_relax_soln_.x[var]); return {var, dir}; } @@ -1048,8 +1048,10 @@ struct deterministic_bfs_policy_t const std::vector& fractional, const std::vector& x) override { + logger_t log; + log.log = false; node->objective_estimate = - this->worker.pc_snapshot.obj_estimate(fractional, x, node->lower_bound); + this->worker.pc_snapshot.obj_estimate(fractional, x, node->lower_bound, log); } void on_node_completed(mip_node_t* node, @@ -1114,25 +1116,28 @@ struct deterministic_diving_policy_t const std::vector& fractional, const std::vector& x) override { + logger_t log; + log.log = false; + switch (this->worker.diving_type) { case search_strategy_t::PSEUDOCOST_DIVING: - return this->worker.variable_selection_from_snapshot(fractional, x); + return pseudocost_diving( + this->worker.pc_snapshot, fractional, x, *this->worker.root_solution, log); case search_strategy_t::LINE_SEARCH_DIVING: - if (this->worker.root_solution) { - logger_t log; - log.log = false; - return line_search_diving(fractional, x, *this->worker.root_solution, log); - } - return this->worker.variable_selection_from_snapshot(fractional, x); + return line_search_diving(fractional, x, *this->worker.root_solution, log); case search_strategy_t::GUIDED_DIVING: - return this->worker.guided_variable_selection(fractional, x); + if (this->worker.incumbent_snapshot.empty()) { + return pseudocost_diving( + this->worker.pc_snapshot, fractional, x, *this->worker.root_solution, log); + } else { + return guided_diving( + this->worker.pc_snapshot, fractional, x, this->worker.incumbent_snapshot, log); + } case search_strategy_t::COEFFICIENT_DIVING: { - logger_t log; - log.log = false; - return coefficient_diving(this->bnb.original_lp_, + return coefficient_diving(this->worker.leaf_problem, fractional, x, this->bnb.var_up_locks_, @@ -1140,7 +1145,7 @@ struct deterministic_diving_policy_t log); } - default: return this->worker.variable_selection_from_snapshot(fractional, x); + default: CUOPT_LOG_ERROR("Invalid diving method!"); return {-1, rounding_direction_t::NONE}; } } @@ -1188,6 +1193,9 @@ std::pair branch_and_bound_t::upd node_status_t status = node_status_t::PENDING; rounding_direction_t round_dir = rounding_direction_t::NONE; + worker->recompute_basis = true; + worker->recompute_bounds = true; + if (lp_status == dual::status_t::DUAL_UNBOUNDED) { node_ptr->lower_bound = inf; policy.graphviz(search_tree, node_ptr, "infeasible", 0.0); @@ -1247,6 +1255,8 @@ std::pair branch_and_bound_t::upd assert(dir != rounding_direction_t::NONE); policy.update_objective_estimate(node_ptr, leaf_fractional, leaf_solution.x); + worker->recompute_basis = false; + worker->recompute_bounds = false; logger_t log; log.log = false; @@ -2506,7 +2516,7 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut set_uninitialized_steepest_edge_norms(original_lp_, basic_list, edge_norms_); pc_.resize(original_lp_.num_cols); - original_lp_.A.transpose(pc_.AT); + original_lp_.A.transpose(*pc_.AT); { raft::common::nvtx::range scope_sb("BB::strong_branching"); strong_branching(original_lp_, @@ -3321,11 +3331,12 @@ template void branch_and_bound_t::deterministic_broadcast_snapshots( PoolT& pool, const std::vector& incumbent_snapshot) { - deterministic_snapshot_t snap; - snap.upper_bound = upper_bound_.load(); - snap.total_lp_iters = exploration_stats_.total_lp_iters.load(); - snap.incumbent = incumbent_snapshot; - snap.pc_snapshot = pc_.create_snapshot(); + deterministic_snapshot_t snap{ + .upper_bound = upper_bound_, + .pc_snapshot = pc_, + .incumbent = incumbent_snapshot, + .total_lp_iters = exploration_stats_.total_lp_iters, + }; for (auto& worker : pool) { worker.set_snapshots(snap); diff --git a/cpp/src/branch_and_bound/branch_and_bound.hpp b/cpp/src/branch_and_bound/branch_and_bound.hpp index f2917ba930..6a9269bb2f 100644 --- a/cpp/src/branch_and_bound/branch_and_bound.hpp +++ b/cpp/src/branch_and_bound/branch_and_bound.hpp @@ -8,12 +8,12 @@ #pragma once #include -#include #include -#include #include #include #include +#include +#include #include @@ -107,7 +107,7 @@ class branch_and_bound_t { } } - // Set a solution based on the user problem during the course of the solve + // Set a solution based on the user problem during solve time void set_new_solution(const std::vector& solution); // This queues the solution to be processed at the correct work unit timestamp diff --git a/cpp/src/branch_and_bound/constants.hpp b/cpp/src/branch_and_bound/constants.hpp new file mode 100644 index 0000000000..ab8677095c --- /dev/null +++ b/cpp/src/branch_and_bound/constants.hpp @@ -0,0 +1,31 @@ +/* clang-format off */ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + */ +/* clang-format on */ + +#pragma once + +namespace cuopt::linear_programming::dual_simplex { + +constexpr int num_search_strategies = 5; + +// Indicate the search and variable selection algorithms used by each thread +// in B&B (See [1]). +// +// [1] T. Achterberg, “Constraint Integer Programming,” PhD, Technischen Universität Berlin, +// Berlin, 2007. doi: 10.14279/depositonce-1634. +enum search_strategy_t : int { + BEST_FIRST = 0, // Best-First + Plunging. + PSEUDOCOST_DIVING = 1, // Pseudocost diving (9.2.5) + LINE_SEARCH_DIVING = 2, // Line search diving (9.2.4) + GUIDED_DIVING = 3, // Guided diving (9.2.3). + COEFFICIENT_DIVING = 4 // Coefficient diving (9.2.1) +}; + +enum class rounding_direction_t { NONE = -1, DOWN = 0, UP = 1 }; + +enum class branch_and_bound_mode_t { PARALLEL = 0, DETERMINISTIC = 1 }; + +} // namespace cuopt::linear_programming::dual_simplex diff --git a/cpp/src/branch_and_bound/deterministic_workers.hpp b/cpp/src/branch_and_bound/deterministic_workers.hpp index 7a074051c6..a5c3769126 100644 --- a/cpp/src/branch_and_bound/deterministic_workers.hpp +++ b/cpp/src/branch_and_bound/deterministic_workers.hpp @@ -8,9 +8,9 @@ #pragma once #include -#include #include #include +#include #include @@ -58,7 +58,7 @@ struct deterministic_snapshot_t { f_t upper_bound; pseudo_cost_snapshot_t pc_snapshot; std::vector incumbent; - i_t total_lp_iters; + int64_t total_lp_iters; }; template @@ -74,7 +74,7 @@ class deterministic_worker_base_t : public branch_and_bound_worker_t { // Diving-specific snapshots (ignored by BFS workers) std::vector incumbent_snapshot; - i_t total_lp_iters_snapshot{0}; + int64_t total_lp_iters_snapshot{0}; std::vector> integer_solutions; int next_solution_seq{0}; @@ -90,7 +90,7 @@ class deterministic_worker_base_t : public branch_and_bound_worker_t { const std::vector& var_types, const simplex_solver_settings_t& settings, const std::string& context_name) - : base_t(id, original_lp, Arow, var_types, settings), work_context(context_name) + : base_t(id, original_lp, Arow, var_types, settings), work_context(context_name), pc_snapshot(1) { work_context.deterministic = true; } @@ -342,22 +342,6 @@ class deterministic_diving_worker_t {objective, solution, depth, this->worker_id, this->next_solution_seq++}); ++this->total_integer_solutions; } - - branch_variable_t variable_selection_from_snapshot(const std::vector& fractional, - const std::vector& solution) const - { - assert(root_solution != nullptr); - return this->pc_snapshot.pseudocost_diving(fractional, solution, *root_solution); - } - - branch_variable_t guided_variable_selection(const std::vector& fractional, - const std::vector& solution) const - { - if (this->incumbent_snapshot.empty()) { - return variable_selection_from_snapshot(fractional, solution); - } - return this->pc_snapshot.guided_diving(fractional, solution, this->incumbent_snapshot); - } }; template diff --git a/cpp/src/branch_and_bound/diving_heuristics.cpp b/cpp/src/branch_and_bound/diving_heuristics.cpp index f9791280a6..571027c1d7 100644 --- a/cpp/src/branch_and_bound/diving_heuristics.cpp +++ b/cpp/src/branch_and_bound/diving_heuristics.cpp @@ -65,38 +65,117 @@ branch_variable_t line_search_diving(const std::vector& fractional, return {branch_var, round_dir}; } -template -branch_variable_t pseudocost_diving(pseudo_costs_t& pc, +template +branch_variable_t pseudocost_diving(pseudo_costs_t& pc, const std::vector& fractional, const std::vector& solution, const std::vector& root_solution, logger_t& log) { - return pseudocost_diving_from_arrays(pc.pseudo_cost_sum_down.data(), - pc.pseudo_cost_sum_up.data(), - pc.pseudo_cost_num_down.data(), - pc.pseudo_cost_num_up.data(), - (i_t)pc.pseudo_cost_sum_down.size(), - fractional, - solution, - root_solution); + const i_t num_fractional = fractional.size(); + if (num_fractional == 0) return {-1, rounding_direction_t::NONE}; + + pseudo_cost_averages_t avgs = pc.compute_averages(); + + i_t branch_var = fractional[0]; + f_t max_score = std::numeric_limits::lowest(); + rounding_direction_t round_dir = rounding_direction_t::DOWN; + constexpr f_t eps = f_t(1e-6); + + for (i_t j : fractional) { + f_t f_down = solution[j] - std::floor(solution[j]); + f_t f_up = std::ceil(solution[j]) - solution[j]; + f_t pc_down = pc.pseudo_cost_num_down[j] != 0 + ? pc.pseudo_cost_sum_down[j] / pc.pseudo_cost_num_down[j] + : avgs.down_avg; + f_t pc_up = pc.pseudo_cost_num_up[j] != 0 ? pc.pseudo_cost_sum_up[j] / pc.pseudo_cost_num_up[j] + : avgs.up_avg; + + f_t score_down = std::sqrt(f_up) * (1 + pc_up) / (1 + pc_down); + f_t score_up = std::sqrt(f_down) * (1 + pc_down) / (1 + pc_up); + + f_t score = 0; + rounding_direction_t dir = rounding_direction_t::DOWN; + + f_t root_val = (j < static_cast(root_solution.size())) ? root_solution[j] : solution[j]; + + if (solution[j] < root_val - f_t(0.4)) { + score = score_down; + dir = rounding_direction_t::DOWN; + } else if (solution[j] > root_val + f_t(0.4)) { + score = score_up; + dir = rounding_direction_t::UP; + } else if (f_down < f_t(0.3)) { + score = score_down; + dir = rounding_direction_t::DOWN; + } else if (f_down > f_t(0.7)) { + score = score_up; + dir = rounding_direction_t::UP; + } else if (pc_down < pc_up + eps) { + score = score_down; + dir = rounding_direction_t::DOWN; + } else { + score = score_up; + dir = rounding_direction_t::UP; + } + + if (score > max_score) { + max_score = score; + branch_var = j; + round_dir = dir; + } + } + + if (round_dir == rounding_direction_t::NONE) { + branch_var = fractional[0]; + round_dir = rounding_direction_t::DOWN; + } + + return {branch_var, round_dir}; } -template -branch_variable_t guided_diving(pseudo_costs_t& pc, +template +branch_variable_t guided_diving(pseudo_costs_t& pc, const std::vector& fractional, const std::vector& solution, const std::vector& incumbent, logger_t& log) { - return guided_diving_from_arrays(pc.pseudo_cost_sum_down.data(), - pc.pseudo_cost_sum_up.data(), - pc.pseudo_cost_num_down.data(), - pc.pseudo_cost_num_up.data(), - (i_t)pc.pseudo_cost_sum_down.size(), - fractional, - solution, - incumbent); + const i_t num_fractional = fractional.size(); + if (num_fractional == 0) return {-1, rounding_direction_t::NONE}; + + pseudo_cost_averages_t avgs = pc.compute_averages(); + + i_t branch_var = fractional[0]; + f_t max_score = std::numeric_limits::lowest(); + rounding_direction_t round_dir = rounding_direction_t::DOWN; + constexpr f_t eps = f_t(1e-6); + + for (i_t j : fractional) { + f_t f_down = solution[j] - std::floor(solution[j]); + f_t f_up = std::ceil(solution[j]) - solution[j]; + f_t down_dist = std::abs(incumbent[j] - std::floor(solution[j])); + f_t up_dist = std::abs(std::ceil(solution[j]) - incumbent[j]); + rounding_direction_t dir = + down_dist < up_dist + eps ? rounding_direction_t::DOWN : rounding_direction_t::UP; + + f_t pc_down = pc.pseudo_cost_num_down[j] != 0 + ? pc.pseudo_cost_sum_down[j] / pc.pseudo_cost_num_down[j] + : avgs.down_avg; + f_t pc_up = pc.pseudo_cost_num_up[j] != 0 ? pc.pseudo_cost_sum_up[j] / pc.pseudo_cost_num_up[j] + : avgs.up_avg; + f_t score1 = dir == rounding_direction_t::DOWN ? 5 * pc_down * f_down : 5 * pc_up * f_up; + f_t score2 = dir == rounding_direction_t::DOWN ? pc_up * f_up : pc_down * f_down; + f_t score = (score1 + score2) / 6; + + if (score > max_score) { + max_score = score; + branch_var = j; + round_dir = dir; + } + } + + return {branch_var, round_dir}; } template @@ -187,12 +266,26 @@ template branch_variable_t pseudocost_diving(pseudo_costs_t& p const std::vector& root_solution, logger_t& log); +template branch_variable_t pseudocost_diving( + pseudo_costs_t& pc, + const std::vector& fractional, + const std::vector& solution, + const std::vector& root_solution, + logger_t& log); + template branch_variable_t guided_diving(pseudo_costs_t& pc, const std::vector& fractional, const std::vector& solution, const std::vector& incumbent, logger_t& log); +template branch_variable_t guided_diving( + pseudo_costs_t& pc, + const std::vector& fractional, + const std::vector& solution, + const std::vector& incumbent, + logger_t& log); + template void calculate_variable_locks(const lp_problem_t& lp_problem, std::vector& up_locks, std::vector& down_locks); diff --git a/cpp/src/branch_and_bound/diving_heuristics.hpp b/cpp/src/branch_and_bound/diving_heuristics.hpp index dfeabe3a5f..325aa0b878 100644 --- a/cpp/src/branch_and_bound/diving_heuristics.hpp +++ b/cpp/src/branch_and_bound/diving_heuristics.hpp @@ -22,15 +22,15 @@ branch_variable_t line_search_diving(const std::vector& fractional, const std::vector& root_solution, logger_t& log); -template -branch_variable_t pseudocost_diving(pseudo_costs_t& pc, +template +branch_variable_t pseudocost_diving(pseudo_costs_t& pc, const std::vector& fractional, const std::vector& solution, const std::vector& root_solution, logger_t& log); -template -branch_variable_t guided_diving(pseudo_costs_t& pc, +template +branch_variable_t guided_diving(pseudo_costs_t& pc, const std::vector& fractional, const std::vector& solution, const std::vector& incumbent, diff --git a/cpp/src/branch_and_bound/mip_node.cpp b/cpp/src/branch_and_bound/mip_node.cpp deleted file mode 100644 index 7b0f644f4e..0000000000 --- a/cpp/src/branch_and_bound/mip_node.cpp +++ /dev/null @@ -1,18 +0,0 @@ -/* clang-format off */ -/* - * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. - * SPDX-License-Identifier: Apache-2.0 - */ -/* clang-format on */ - -#include - -namespace cuopt::linear_programming::dual_simplex { - -bool inactive_status(node_status_t status) -{ - return (status == node_status_t::FATHOMED || status == node_status_t::INTEGER_FEASIBLE || - status == node_status_t::INFEASIBLE || status == node_status_t::NUMERICAL); -} - -} // namespace cuopt::linear_programming::dual_simplex diff --git a/cpp/src/branch_and_bound/mip_node.hpp b/cpp/src/branch_and_bound/mip_node.hpp index a24f67c3bc..cce23c3bd7 100644 --- a/cpp/src/branch_and_bound/mip_node.hpp +++ b/cpp/src/branch_and_bound/mip_node.hpp @@ -7,6 +7,8 @@ #pragma once +#include + #include #include @@ -29,9 +31,11 @@ enum class node_status_t : int { NUMERICAL = 5 // Encountered numerical issue when solving the LP relaxation }; -enum class rounding_direction_t : int8_t { NONE = -1, DOWN = 0, UP = 1 }; - -bool inactive_status(node_status_t status); +inline bool inactive_status(node_status_t status) +{ + return (status == node_status_t::FATHOMED || status == node_status_t::INTEGER_FEASIBLE || + status == node_status_t::INFEASIBLE || status == node_status_t::NUMERICAL); +} template class mip_node_t { diff --git a/cpp/src/branch_and_bound/pseudo_costs.cpp b/cpp/src/branch_and_bound/pseudo_costs.cpp index c38e98e27d..cf67a69046 100644 --- a/cpp/src/branch_and_bound/pseudo_costs.cpp +++ b/cpp/src/branch_and_bound/pseudo_costs.cpp @@ -218,8 +218,10 @@ void initialize_pseudo_costs_with_estimate(const lp_problem_t& lp, const std::vector& basic_list, const std::vector& nonbasic_list, const std::vector& fractional, + const csc_matrix_t& AT, basis_update_mpf_t& basis_factors, - pseudo_costs_t& pc) + std::vector& strong_branch_down, + std::vector& strong_branch_up) { i_t m = lp.num_rows; i_t n = lp.num_cols; @@ -246,7 +248,7 @@ void initialize_pseudo_costs_with_estimate(const lp_problem_t& lp, objective_change_estimate_t estimate = single_pivot_objective_change_estimate(lp, settings, - pc.AT, + AT, vstatus, j, basic_map[j], @@ -258,8 +260,8 @@ void initialize_pseudo_costs_with_estimate(const lp_problem_t& lp, workspace, delta_z, work_estimate); - pc.strong_branch_down[k] = estimate.down_obj_change; - pc.strong_branch_up[k] = estimate.up_obj_change; + strong_branch_down[k] = estimate.down_obj_change; + strong_branch_up[k] = estimate.up_obj_change; } } @@ -298,12 +300,14 @@ void strong_branch_helper(i_t start, f_t root_obj, f_t upper_bound, i_t iter_limit, - pseudo_costs_t& pc, + std::vector& strong_branch_down, + std::vector& strong_branch_up, std::vector& dual_simplex_obj_down, std::vector& dual_simplex_obj_up, std::vector& dual_simplex_status_down, std::vector& dual_simplex_status_up, - shared_strong_branching_context_view_t& sb_view) + shared_strong_branching_context_view_t& sb_view, + omp_atomic_t& num_strong_branches_completed) { raft::common::nvtx::range scope("BB::strong_branch_helper"); lp_problem_t child_problem = original_lp; @@ -380,7 +384,7 @@ void strong_branch_helper(i_t start, } if (branch == 0) { - pc.strong_branch_down[k] = std::max(obj - root_obj, 0.0); + strong_branch_down[k] = std::max(obj - root_obj, 0.0); dual_simplex_obj_down[k] = std::max(obj - root_obj, 0.0); dual_simplex_status_down[k] = status; if (verbose) { @@ -393,7 +397,7 @@ void strong_branch_helper(i_t start, toc(start_time)); } } else { - pc.strong_branch_up[k] = std::max(obj - root_obj, 0.0); + strong_branch_up[k] = std::max(obj - root_obj, 0.0); dual_simplex_obj_up[k] = std::max(obj - root_obj, 0.0); dual_simplex_status_up[k] = status; if (verbose) { @@ -431,7 +435,7 @@ void strong_branch_helper(i_t start, } if (toc(start_time) > settings.time_limit) { break; } - const i_t completed = pc.num_strong_branches_completed++; + const i_t completed = num_strong_branches_completed++; if (thread_id == 0 && toc(last_log) > 10) { last_log = tic(); @@ -463,7 +467,7 @@ std::pair trial_branching(const lp_problem_t& ori f_t upper_bound, f_t start_time, i_t iter_limit, - omp_atomic_t& total_lp_iter) + i_t& iter) { lp_problem_t child_problem = original_lp; child_problem.lower[branch_var] = branch_var_lower; @@ -479,7 +483,7 @@ std::pair trial_branching(const lp_problem_t& ori objective_upper_bound(child_problem, upper_bound, child_settings.dual_tol); lp_solution_t solution(original_lp.num_rows, original_lp.num_cols); - i_t iter = 0; + iter = 0; std::vector child_vstatus = vstatus; std::vector child_edge_norms = edge_norms; std::vector child_basic_list = basic_list; @@ -502,7 +506,7 @@ std::pair trial_branching(const lp_problem_t& ori solution, iter, child_edge_norms); - total_lp_iter += iter; + settings.log.debug("Trial branching on variable %d. Lo: %e Up: %e. Iter %d. Status %s. Obj %e\n", branch_var, child_problem.lower[branch_var], @@ -732,9 +736,9 @@ static void batch_pdlp_strong_branching_task( std::max(static_cast(0.0), settings.time_limit - batch_elapsed_time); if (warm_start_remaining_time <= 0.0) { return; } - assert(!pc.pdlp_warm_cache.populated && "PDLP warm cache should not be populated at this point"); + assert(!pc.pdlp_warm_cache->populated && "PDLP warm cache should not be populated at this point"); - if (!pc.pdlp_warm_cache.populated) { + if (!pc.pdlp_warm_cache->populated) { pdlp_solver_settings_t ws_settings; ws_settings.method = method_t::PDLP; ws_settings.presolver = presolver_t::None; @@ -756,14 +760,14 @@ static void batch_pdlp_strong_branching_task( ws_settings.inside_mip = true; if (effective_batch_pdlp == 1) { ws_settings.concurrent_halt = &concurrent_halt; } - auto start_time = std::chrono::high_resolution_clock::now(); + auto lp_start_time = std::chrono::high_resolution_clock::now(); - auto ws_solution = solve_lp(&pc.pdlp_warm_cache.batch_pdlp_handle, mps_model, ws_settings); + auto ws_solution = solve_lp(&pc.pdlp_warm_cache->batch_pdlp_handle, mps_model, ws_settings); if (verbose) { auto end_time = std::chrono::high_resolution_clock::now(); auto duration = - std::chrono::duration_cast(end_time - start_time).count(); + std::chrono::duration_cast(end_time - lp_start_time).count(); settings.log.printf( "Original problem solved in %d milliseconds" " and iterations: %d\n", @@ -777,21 +781,21 @@ static void batch_pdlp_strong_branching_task( const auto& ws_dual = ws_solution.get_dual_solution(); // Need to use the pc steam since the batch pdlp handle will get destroyed after the warm // start - cache.initial_primal = rmm::device_uvector(ws_primal, ws_primal.stream()); - cache.initial_dual = rmm::device_uvector(ws_dual, ws_dual.stream()); - cache.step_size = ws_solution.get_pdlp_warm_start_data().initial_step_size_; - cache.primal_weight = ws_solution.get_pdlp_warm_start_data().initial_primal_weight_; - cache.pdlp_iteration = ws_solution.get_pdlp_warm_start_data().total_pdlp_iterations_; - cache.populated = true; + cache->initial_primal = rmm::device_uvector(ws_primal, ws_primal.stream()); + cache->initial_dual = rmm::device_uvector(ws_dual, ws_dual.stream()); + cache->step_size = ws_solution.get_pdlp_warm_start_data().initial_step_size_; + cache->primal_weight = ws_solution.get_pdlp_warm_start_data().initial_primal_weight_; + cache->pdlp_iteration = ws_solution.get_pdlp_warm_start_data().total_pdlp_iterations_; + cache->populated = true; if (verbose) { settings.log.printf( "Cached PDLP warm start: primal=%zu dual=%zu step_size=%e primal_weight=%e iters=%d\n", - cache.initial_primal.size(), - cache.initial_dual.size(), - cache.step_size, - cache.primal_weight, - cache.pdlp_iteration); + cache->initial_primal.size(), + cache->initial_dual.size(), + cache->step_size, + cache->primal_weight, + cache->pdlp_iteration); } } else { if (verbose) { @@ -817,22 +821,23 @@ static void batch_pdlp_strong_branching_task( if (batch_remaining_time <= 0.0) { return; } pdlp_settings.time_limit = batch_remaining_time; - if (pc.pdlp_warm_cache.populated) { + if (pc.pdlp_warm_cache->populated) { auto& cache = pc.pdlp_warm_cache; - pdlp_settings.set_initial_primal_solution(cache.initial_primal.data(), - cache.initial_primal.size(), - cache.batch_pdlp_handle.get_stream()); - pdlp_settings.set_initial_dual_solution( - cache.initial_dual.data(), cache.initial_dual.size(), cache.batch_pdlp_handle.get_stream()); - pdlp_settings.set_initial_step_size(cache.step_size); - pdlp_settings.set_initial_primal_weight(cache.primal_weight); - pdlp_settings.set_initial_pdlp_iteration(cache.pdlp_iteration); + pdlp_settings.set_initial_primal_solution(cache->initial_primal.data(), + cache->initial_primal.size(), + cache->batch_pdlp_handle.get_stream()); + pdlp_settings.set_initial_dual_solution(cache->initial_dual.data(), + cache->initial_dual.size(), + cache->batch_pdlp_handle.get_stream()); + pdlp_settings.set_initial_step_size(cache->step_size); + pdlp_settings.set_initial_primal_weight(cache->primal_weight); + pdlp_settings.set_initial_pdlp_iteration(cache->pdlp_iteration); } if (concurrent_halt.load() == 1) { return; } const auto solutions = batch_pdlp_solve( - &pc.pdlp_warm_cache.batch_pdlp_handle, mps_model, fractional, fraction_values, pdlp_settings); + &pc.pdlp_warm_cache->batch_pdlp_handle, mps_model, fractional, fraction_values, pdlp_settings); f_t batch_pdlp_strong_branching_time = toc(start_batch); // Fail safe in case the batch PDLP failed and produced no solutions @@ -888,7 +893,7 @@ static void batch_pdlp_reliability_branching_task( const std::vector& candidate_vars, const simplex_solver_settings_t& settings, shared_strong_branching_context_view_t& sb_view, - batch_pdlp_warm_cache_t& pdlp_warm_cache, + batch_pdlp_warm_cache_t* pdlp_warm_cache, std::vector& pdlp_obj_down, std::vector& pdlp_obj_up) { @@ -935,15 +940,16 @@ static void batch_pdlp_reliability_branching_task( } pdlp_settings.time_limit = batch_remaining_time; - if (pdlp_warm_cache.populated) { - auto& cache = pdlp_warm_cache; - pdlp_settings.set_initial_primal_solution( - cache.initial_primal.data(), cache.initial_primal.size(), batch_pdlp_handle.get_stream()); - pdlp_settings.set_initial_dual_solution( - cache.initial_dual.data(), cache.initial_dual.size(), batch_pdlp_handle.get_stream()); - pdlp_settings.set_initial_step_size(cache.step_size); - pdlp_settings.set_initial_primal_weight(cache.primal_weight); - pdlp_settings.set_initial_pdlp_iteration(cache.pdlp_iteration); + if (pdlp_warm_cache->populated) { + pdlp_settings.set_initial_primal_solution(pdlp_warm_cache->initial_primal.data(), + pdlp_warm_cache->initial_primal.size(), + batch_pdlp_handle.get_stream()); + pdlp_settings.set_initial_dual_solution(pdlp_warm_cache->initial_dual.data(), + pdlp_warm_cache->initial_dual.size(), + batch_pdlp_handle.get_stream()); + pdlp_settings.set_initial_step_size(pdlp_warm_cache->step_size); + pdlp_settings.set_initial_primal_weight(pdlp_warm_cache->primal_weight); + pdlp_settings.set_initial_pdlp_iteration(pdlp_warm_cache->pdlp_iteration); } if (concurrent_halt.load() == 1) { return; } @@ -1002,9 +1008,9 @@ void strong_branching(const lp_problem_t& original_lp, constexpr bool verbose = false; pc.resize(original_lp.num_cols); - pc.strong_branch_down.assign(fractional.size(), 0); - pc.strong_branch_up.assign(fractional.size(), 0); - pc.num_strong_branches_completed = 0; + std::vector strong_branch_down(fractional.size(), std::numeric_limits::quiet_NaN()); + std::vector strong_branch_up(fractional.size(), std::numeric_limits::quiet_NaN()); + omp_atomic_t num_strong_branches_completed = 0; const f_t elapsed_time = toc(start_time); if (elapsed_time > settings.time_limit) { return; } @@ -1049,8 +1055,10 @@ void strong_branching(const lp_problem_t& original_lp, basic_list, nonbasic_list, fractional, + *pc.AT, basis_factors, - pc); + strong_branch_down, + strong_branch_up); } else { #pragma omp parallel num_threads(settings.num_threads) { @@ -1082,7 +1090,6 @@ void strong_branching(const lp_problem_t& original_lp, i_t start = std::floor(k * fractional.size() / n); i_t end = std::floor((k + 1) * fractional.size() / n); - constexpr bool verbose = false; if (verbose) { settings.log.printf("Thread id %d task id %d start %d end %d. size %d\n", omp_get_thread_num(), @@ -1105,12 +1112,14 @@ void strong_branching(const lp_problem_t& original_lp, root_obj, upper_bound, simplex_iteration_limit, - pc, + strong_branch_down, + strong_branch_up, dual_simplex_obj_down, dual_simplex_obj_up, dual_simplex_status_down, dual_simplex_status_up, - sb_view); + sb_view, + num_strong_branches_completed); } // DS done: signal PDLP to stop (time-limit or all work done) and wait if (effective_batch_pdlp == 1) { concurrent_halt.store(1); } @@ -1178,7 +1187,7 @@ void strong_branching(const lp_problem_t& original_lp, for (i_t k = 0; k < fractional.size(); k++) { for (i_t branch = 0; branch < 2; branch++) { const bool is_down = (branch == 0); - f_t& sb_dest = is_down ? pc.strong_branch_down[k] : pc.strong_branch_up[k]; + f_t& sb_dest = is_down ? strong_branch_down[k] : strong_branch_up[k]; f_t ds_obj = is_down ? dual_simplex_obj_down[k] : dual_simplex_obj_up[k]; dual::status_t ds_status = is_down ? dual_simplex_status_down[k] : dual_simplex_status_up[k]; @@ -1211,12 +1220,12 @@ void strong_branching(const lp_problem_t& original_lp, } } - pc.pdlp_warm_cache.percent_solved_by_batch_pdlp_at_root = + pc.pdlp_warm_cache->percent_solved_by_batch_pdlp_at_root = (f_t(merged_from_pdlp) / f_t(fractional.size() * 2)) * 100.0; if (verbose) { settings.log.printf( "Batch PDLP for strong branching. Percent solved by batch PDLP at root: %f\n", - pc.pdlp_warm_cache.percent_solved_by_batch_pdlp_at_root); + pc.pdlp_warm_cache->percent_solved_by_batch_pdlp_at_root); settings.log.printf( "Merged results: %d from DS, %d from PDLP, %d unresolved (NaN), %d solved by both\n", merged_from_ds, @@ -1226,28 +1235,29 @@ void strong_branching(const lp_problem_t& original_lp, } } - pc.update_pseudo_costs_from_strong_branching(fractional, root_solution.x); + pc.update_pseudo_costs_from_strong_branching( + fractional, strong_branch_down, strong_branch_up, root_solution.x); } -template -f_t pseudo_costs_t::calculate_pseudocost_score(i_t j, - const std::vector& solution, - f_t pseudo_cost_up_avg, - f_t pseudo_cost_down_avg) const +template +f_t pseudo_costs_t::calculate_pseudocost_score( + i_t j, const std::vector& solution, pseudo_cost_averages_t averages) const { constexpr f_t eps = 1e-6; i_t num_up = pseudo_cost_num_up[j]; + f_t sum_up = pseudo_cost_sum_up[j]; i_t num_down = pseudo_cost_num_down[j]; - f_t pc_up = num_up > 0 ? pseudo_cost_sum_up[j] / num_up : pseudo_cost_up_avg; - f_t pc_down = num_down > 0 ? pseudo_cost_sum_down[j] / num_down : pseudo_cost_down_avg; + f_t sum_down = pseudo_cost_sum_down[j]; + f_t pc_up = num_up > 0 ? sum_up / num_up : averages.up_avg; + f_t pc_down = num_down > 0 ? sum_down / num_down : averages.down_avg; f_t f_down = solution[j] - std::floor(solution[j]); f_t f_up = std::ceil(solution[j]) - solution[j]; return std::max(f_down * pc_down, eps) * std::max(f_up * pc_up, eps); } -template -void pseudo_costs_t::update_pseudo_costs(mip_node_t* node_ptr, - f_t leaf_objective) +template +void pseudo_costs_t::update_pseudo_costs(mip_node_t* node_ptr, + f_t leaf_objective) { const f_t change_in_obj = std::max(leaf_objective - node_ptr->lower_bound, 0.0); const f_t frac = node_ptr->branch_dir == rounding_direction_t::DOWN @@ -1263,43 +1273,51 @@ void pseudo_costs_t::update_pseudo_costs(mip_node_t* node_pt } } -template -void pseudo_costs_t::initialized(i_t& num_initialized_down, - i_t& num_initialized_up, - f_t& pseudo_cost_down_avg, - f_t& pseudo_cost_up_avg) const +template +pseudo_cost_averages_t pseudo_costs_t::compute_averages() const { - auto avgs = compute_pseudo_cost_averages(pseudo_cost_sum_down.data(), - pseudo_cost_sum_up.data(), - pseudo_cost_num_down.data(), - pseudo_cost_num_up.data(), - pseudo_cost_sum_down.size()); - pseudo_cost_down_avg = avgs.down_avg; - pseudo_cost_up_avg = avgs.up_avg; + i_t num_initialized_down = 0; + i_t num_initialized_up = 0; + f_t pseudo_cost_down_avg = 0.0; + f_t pseudo_cost_up_avg = 0.0; + + for (size_t j = 0; j < pseudo_cost_sum_down.size(); ++j) { + if (pseudo_cost_num_down[j] > 0 && std::isfinite(pseudo_cost_sum_down[j])) { + ++num_initialized_down; + pseudo_cost_down_avg += pseudo_cost_sum_down[j] / pseudo_cost_num_down[j]; + } + + if (pseudo_cost_num_up[j] > 0 && std::isfinite(pseudo_cost_sum_up[j])) { + ++num_initialized_up; + pseudo_cost_up_avg += pseudo_cost_sum_up[j] / pseudo_cost_num_up[j]; + } + } + + pseudo_cost_averages_t averages{ + .down_avg = (num_initialized_down > 0) ? pseudo_cost_down_avg / num_initialized_down : 1.0, + .num_init_down = num_initialized_down, + .up_avg = (num_initialized_up > 0) ? pseudo_cost_up_avg / num_initialized_up : 1.0, + .num_init_up = num_initialized_up}; + return averages; } -template -i_t pseudo_costs_t::variable_selection(const std::vector& fractional, - const std::vector& solution, - logger_t& log) +template +i_t pseudo_costs_t::variable_selection(const std::vector& fractional, + const std::vector& solution, + logger_t& log) { - i_t branch_var = fractional[0]; - f_t max_score = -1; - i_t num_initialized_down; - i_t num_initialized_up; - f_t pseudo_cost_down_avg; - f_t pseudo_cost_up_avg; - - initialized(num_initialized_down, num_initialized_up, pseudo_cost_down_avg, pseudo_cost_up_avg); + i_t branch_var = fractional[0]; + f_t max_score = -1; + pseudo_cost_averages_t averages = compute_averages(); log.printf("PC: num initialized down %d up %d avg down %e up %e\n", - num_initialized_down, - num_initialized_up, - pseudo_cost_down_avg, - pseudo_cost_up_avg); + averages.num_init_down, + averages.num_init_up, + averages.down_avg, + averages.up_avg); for (i_t j : fractional) { - f_t score = calculate_pseudocost_score(j, solution, pseudo_cost_up_avg, pseudo_cost_down_avg); + f_t score = calculate_pseudocost_score(j, solution, averages); if (score > max_score) { max_score = score; @@ -1315,8 +1333,8 @@ i_t pseudo_costs_t::variable_selection(const std::vector& fractio return branch_var; } -template -i_t pseudo_costs_t::reliable_variable_selection( +template +i_t pseudo_costs_t::reliable_variable_selection( const mip_node_t* node_ptr, const std::vector& fractional, branch_and_bound_worker_t* worker, @@ -1329,12 +1347,11 @@ i_t pseudo_costs_t::reliable_variable_selection( const std::vector& new_slacks, const lp_problem_t& original_lp) { - constexpr f_t eps = 1e-6; - f_t start_time = bnb_stats.start_time; - i_t branch_var = fractional[0]; - f_t max_score = -1; - f_t pseudo_cost_down_avg = -1; - f_t pseudo_cost_up_avg = -1; + constexpr f_t eps = 1e-6; + f_t start_time = bnb_stats.start_time; + i_t branch_var = fractional[0]; + f_t max_score = -1; + pseudo_cost_averages_t averages; lp_solution_t& leaf_solution = worker->leaf_solution; const int64_t branch_and_bound_lp_iters = bnb_stats.total_lp_iters; @@ -1367,14 +1384,12 @@ i_t pseudo_costs_t::reliable_variable_selection( // In the latter, we are not using the average pseudocost (which calculated in the `initialized` // method). if (reliable_threshold == 0) { - i_t num_initialized_up; - i_t num_initialized_down; - initialized(num_initialized_down, num_initialized_up, pseudo_cost_down_avg, pseudo_cost_up_avg); + averages = compute_averages(); log.printf("PC: num initialized down %d up %d avg down %e up %e\n", - num_initialized_down, - num_initialized_up, - pseudo_cost_down_avg, - pseudo_cost_up_avg); + averages.num_init_down, + averages.num_init_up, + averages.down_avg, + averages.up_avg); } std::vector> unreliable_list; @@ -1386,8 +1401,7 @@ i_t pseudo_costs_t::reliable_variable_selection( unreliable_list.push_back(std::make_pair(-1, j)); continue; } - f_t score = - calculate_pseudocost_score(j, leaf_solution.x, pseudo_cost_up_avg, pseudo_cost_down_avg); + f_t score = calculate_pseudocost_score(j, leaf_solution.x, averages); if (score > max_score) { max_score = score; @@ -1416,12 +1430,12 @@ i_t pseudo_costs_t::reliable_variable_selection( constexpr f_t min_percent_solved_by_batch_pdlp_at_root_for_pdlp = 5.0; // Batch PDLP is either forced or we use the heuristic to decide if it should be used const bool use_pdlp = (rb_mode == 2) || (rb_mode != 0 && !settings.sub_mip && - !settings.deterministic && pdlp_warm_cache.populated && + !settings.deterministic && pdlp_warm_cache->populated && unreliable_list.size() > min_num_candidates_for_pdlp && - pdlp_warm_cache.percent_solved_by_batch_pdlp_at_root > + pdlp_warm_cache->percent_solved_by_batch_pdlp_at_root > min_percent_solved_by_batch_pdlp_at_root_for_pdlp); - if (rb_mode != 0 && !pdlp_warm_cache.populated) { + if (rb_mode != 0 && !pdlp_warm_cache->populated) { log.printf("PDLP warm start data not populated, using DS only\n"); } else if (rb_mode != 0 && settings.sub_mip) { log.printf("Batch PDLP reliability branching is disabled because sub-MIP is enabled\n"); @@ -1430,7 +1444,7 @@ i_t pseudo_costs_t::reliable_variable_selection( "Batch PDLP reliability branching is disabled because deterministic mode is enabled\n"); } else if (rb_mode != 0 && unreliable_list.size() < min_num_candidates_for_pdlp) { log.printf("Not enough candidates to use batch PDLP, using DS only\n"); - } else if (rb_mode != 0 && pdlp_warm_cache.percent_solved_by_batch_pdlp_at_root < 5.0) { + } else if (rb_mode != 0 && pdlp_warm_cache->percent_solved_by_batch_pdlp_at_root < 5.0) { log.printf("Percent solved by batch PDLP at root is too low, using DS only\n"); } else if (use_pdlp) { log.printf( @@ -1438,7 +1452,7 @@ i_t pseudo_costs_t::reliable_variable_selection( "by batch PDLP at root is %f%% (> %f%%)\n", static_cast(unreliable_list.size()), min_num_candidates_for_pdlp, - pdlp_warm_cache.percent_solved_by_batch_pdlp_at_root, + pdlp_warm_cache->percent_solved_by_batch_pdlp_at_root, min_percent_solved_by_batch_pdlp_at_root_for_pdlp); } @@ -1456,7 +1470,7 @@ i_t pseudo_costs_t::reliable_variable_selection( log.printf( "RB iters = %d, B&B iters = %d, unreliable = %d, num_tasks = %d, reliable_threshold = %d\n", - strong_branching_lp_iter.load(), + static_cast(strong_branching_lp_iter), branch_and_bound_lp_iters, unreliable_list.size(), num_tasks, @@ -1487,7 +1501,7 @@ i_t pseudo_costs_t::reliable_variable_selection( objective_change_estimate_t estimate = single_pivot_objective_change_estimate(worker->leaf_problem, settings, - AT, + *AT, node_ptr->vstatus, j, basic_map[j], @@ -1503,8 +1517,7 @@ i_t pseudo_costs_t::reliable_variable_selection( score = std::max(estimate.up_obj_change, eps) * std::max(estimate.down_obj_change, eps); } else { // Use the previous score, even if it is unreliable - score = calculate_pseudocost_score( - j, leaf_solution.x, pseudo_cost_up_avg, pseudo_cost_down_avg); + score = calculate_pseudocost_score(j, leaf_solution.x, averages); } } } else { @@ -1554,7 +1567,7 @@ i_t pseudo_costs_t::reliable_variable_selection( candidate_vars, settings, sb_view, - pdlp_warm_cache, + pdlp_warm_cache.get(), pdlp_obj_down, pdlp_obj_up); } @@ -1596,6 +1609,7 @@ i_t pseudo_costs_t::reliable_variable_selection( pseudo_cost_mutex_down[j].lock(); if (pseudo_cost_num_down[j] < reliable_threshold) { // Do trial branching on the down branch + i_t iter = 0; const auto [obj, status] = trial_branching(worker->leaf_problem, settings, var_types, @@ -1610,7 +1624,8 @@ i_t pseudo_costs_t::reliable_variable_selection( upper_bound, start_time, iter_limit_per_trial, - strong_branching_lp_iter); + iter); + strong_branching_lp_iter += iter; dual_simplex_obj_down[i] = obj; dual_simplex_status_down[i] = status; @@ -1639,6 +1654,7 @@ i_t pseudo_costs_t::reliable_variable_selection( } else { pseudo_cost_mutex_up[j].lock(); if (pseudo_cost_num_up[j] < reliable_threshold) { + i_t iter = 0; const auto [obj, status] = trial_branching(worker->leaf_problem, settings, var_types, @@ -1653,7 +1669,8 @@ i_t pseudo_costs_t::reliable_variable_selection( upper_bound, start_time, iter_limit_per_trial, - strong_branching_lp_iter); + iter); + strong_branching_lp_iter += iter; dual_simplex_obj_up[i] = obj; dual_simplex_status_up[i] = status; @@ -1674,9 +1691,7 @@ i_t pseudo_costs_t::reliable_variable_selection( if (toc(start_time) > settings.time_limit) { continue; } - score = - calculate_pseudocost_score(j, leaf_solution.x, pseudo_cost_up_avg, pseudo_cost_down_avg); - + score = calculate_pseudocost_score(j, leaf_solution.x, averages); score_mutex.lock(); if (score > max_score) { max_score = score; @@ -1756,8 +1771,7 @@ i_t pseudo_costs_t::reliable_variable_selection( } } - f_t score = - calculate_pseudocost_score(j, leaf_solution.x, pseudo_cost_up_avg, pseudo_cost_down_avg); + f_t score = calculate_pseudocost_score(j, leaf_solution.x, averages); if (score > max_score) { max_score = score; branch_var = j; @@ -1777,28 +1791,23 @@ i_t pseudo_costs_t::reliable_variable_selection( return branch_var; } -template -f_t pseudo_costs_t::obj_estimate(const std::vector& fractional, - const std::vector& solution, - f_t lower_bound, - logger_t& log) +template +f_t pseudo_costs_t::obj_estimate(const std::vector& fractional, + const std::vector& solution, + f_t lower_bound, + logger_t& log) { const i_t num_fractional = fractional.size(); f_t estimate = lower_bound; - i_t num_initialized_down; - i_t num_initialized_up; - f_t pseudo_cost_down_avg; - f_t pseudo_cost_up_avg; - - initialized(num_initialized_down, num_initialized_up, pseudo_cost_down_avg, pseudo_cost_up_avg); + pseudo_cost_averages_t averages = compute_averages(); for (i_t j : fractional) { constexpr f_t eps = 1e-6; i_t num_up = pseudo_cost_num_up[j]; i_t num_down = pseudo_cost_num_down[j]; - f_t pc_up = num_up > 0 ? pseudo_cost_sum_up[j] / num_up : pseudo_cost_up_avg; - f_t pc_down = num_down > 0 ? pseudo_cost_sum_down[j] / num_down : pseudo_cost_down_avg; + f_t pc_up = num_up > 0 ? pseudo_cost_sum_up[j] / num_up : averages.up_avg; + f_t pc_down = num_down > 0 ? pseudo_cost_sum_down[j] / num_down : averages.down_avg; f_t f_down = solution[j] - std::floor(solution[j]); f_t f_up = std::ceil(solution[j]) - solution[j]; estimate += std::min(pc_down * f_down, pc_up * f_up); @@ -1808,9 +1817,12 @@ f_t pseudo_costs_t::obj_estimate(const std::vector& fractional, return estimate; } -template -void pseudo_costs_t::update_pseudo_costs_from_strong_branching( - const std::vector& fractional, const std::vector& root_soln) +template +void pseudo_costs_t::update_pseudo_costs_from_strong_branching( + const std::vector& fractional, + const std::vector& strong_branch_down, + const std::vector& strong_branch_up, + const std::vector& root_soln) { for (i_t k = 0; k < fractional.size(); k++) { const i_t j = fractional[k]; @@ -1834,7 +1846,9 @@ void pseudo_costs_t::update_pseudo_costs_from_strong_branching( #ifdef DUAL_SIMPLEX_INSTANTIATE_DOUBLE -template class pseudo_costs_t; +template class pseudo_costs_t; +template class pseudo_costs_t; +template class pseudo_cost_snapshot_t; template void strong_branching(const lp_problem_t& original_lp, const simplex_solver_settings_t& settings, diff --git a/cpp/src/branch_and_bound/pseudo_costs.hpp b/cpp/src/branch_and_bound/pseudo_costs.hpp index 009bd8b81a..5db0d573a2 100644 --- a/cpp/src/branch_and_bound/pseudo_costs.hpp +++ b/cpp/src/branch_and_bound/pseudo_costs.hpp @@ -7,8 +7,9 @@ #pragma once -#include +#include #include +#include #include #include @@ -27,354 +28,6 @@ namespace cuopt::linear_programming::dual_simplex { -template -struct branch_variable_t { - i_t variable; - rounding_direction_t direction; -}; - -template -struct pseudo_cost_update_t { - i_t variable; - rounding_direction_t direction; - f_t delta; - double work_timestamp; - int worker_id; - - bool operator<(const pseudo_cost_update_t& other) const - { - if (work_timestamp != other.work_timestamp) return work_timestamp < other.work_timestamp; - if (variable != other.variable) return variable < other.variable; - if (delta != other.delta) return delta < other.delta; - return worker_id < other.worker_id; - } -}; - -template -struct pseudo_cost_averages_t { - f_t down_avg; - f_t up_avg; -}; - -// used to get T from omp_atomic_t based on the fact that omp_atomic_t::operator++ returns T -template -using underlying_type = decltype(std::declval()++); - -// Necessary because omp_atomic_t may be passed instead of f_t -template -auto compute_pseudo_cost_averages(const MaybeWrappedF* pc_sum_down, - const MaybeWrappedF* pc_sum_up, - const MaybeWrappedI* pc_num_down, - const MaybeWrappedI* pc_num_up, - size_t n) -{ - using underlying_f_t = underlying_type; - using underlying_i_t = underlying_type; - - underlying_i_t num_initialized_down = 0; - underlying_i_t num_initialized_up = 0; - underlying_f_t pseudo_cost_down_avg = 0.0; - underlying_f_t pseudo_cost_up_avg = 0.0; - - for (size_t j = 0; j < n; ++j) { - if (pc_num_down[j] > 0) { - ++num_initialized_down; - if (std::isfinite(pc_sum_down[j])) { - pseudo_cost_down_avg += pc_sum_down[j] / pc_num_down[j]; - } - } - if (pc_num_up[j] > 0) { - ++num_initialized_up; - if (std::isfinite(pc_sum_up[j])) { pseudo_cost_up_avg += pc_sum_up[j] / pc_num_up[j]; } - } - } - - pseudo_cost_down_avg = - (num_initialized_down > 0) ? pseudo_cost_down_avg / num_initialized_down : 1.0; - pseudo_cost_up_avg = (num_initialized_up > 0) ? pseudo_cost_up_avg / num_initialized_up : 1.0; - - return pseudo_cost_averages_t{pseudo_cost_down_avg, pseudo_cost_up_avg}; -} - -// Variable selection using pseudo-cost product scoring -// Returns the best variable to branch on -template -i_t variable_selection_from_pseudo_costs(const f_t* pc_sum_down, - const f_t* pc_sum_up, - const i_t* pc_num_down, - const i_t* pc_num_up, - i_t n_vars, - const std::vector& fractional, - const std::vector& solution) -{ - const i_t num_fractional = fractional.size(); - if (num_fractional == 0) return -1; - - auto [pc_down_avg, pc_up_avg] = - compute_pseudo_cost_averages(pc_sum_down, pc_sum_up, pc_num_down, pc_num_up, n_vars); - - i_t branch_var = fractional[0]; - f_t max_score = std::numeric_limits::lowest(); - constexpr f_t eps = f_t(1e-6); - - for (i_t j : fractional) { - f_t pc_down = pc_num_down[j] != 0 ? pc_sum_down[j] / pc_num_down[j] : pc_down_avg; - f_t pc_up = pc_num_up[j] != 0 ? pc_sum_up[j] / pc_num_up[j] : pc_up_avg; - const f_t f_down = solution[j] - std::floor(solution[j]); - const f_t f_up = std::ceil(solution[j]) - solution[j]; - f_t score = std::max(f_down * pc_down, eps) * std::max(f_up * pc_up, eps); - if (score > max_score) { - max_score = score; - branch_var = j; - } - } - - return branch_var; -} - -// Objective estimate using pseudo-costs (lock-free implementation) -// Returns lower_bound + estimated cost to reach integer feasibility -template -f_t obj_estimate_from_arrays(const f_t* pc_sum_down, - const f_t* pc_sum_up, - const i_t* pc_num_down, - const i_t* pc_num_up, - i_t n_vars, - const std::vector& fractional, - const std::vector& solution, - f_t lower_bound) -{ - auto [pc_down_avg, pc_up_avg] = - compute_pseudo_cost_averages(pc_sum_down, pc_sum_up, pc_num_down, pc_num_up, n_vars); - - f_t estimate = lower_bound; - constexpr f_t eps = f_t(1e-6); - - for (i_t j : fractional) { - f_t pc_down = pc_num_down[j] != 0 ? pc_sum_down[j] / pc_num_down[j] : pc_down_avg; - f_t pc_up = pc_num_up[j] != 0 ? pc_sum_up[j] / pc_num_up[j] : pc_up_avg; - const f_t f_down = solution[j] - std::floor(solution[j]); - const f_t f_up = std::ceil(solution[j]) - solution[j]; - estimate += std::min(std::max(pc_down * f_down, eps), std::max(pc_up * f_up, eps)); - } - - return estimate; -} - -template -branch_variable_t pseudocost_diving_from_arrays(const MaybeWrappedF* pc_sum_down, - const MaybeWrappedF* pc_sum_up, - const MaybeWrappedI* pc_num_down, - const MaybeWrappedI* pc_num_up, - i_t n_vars, - const std::vector& fractional, - const std::vector& solution, - const std::vector& root_solution) -{ - const i_t num_fractional = fractional.size(); - if (num_fractional == 0) return {-1, rounding_direction_t::NONE}; - - auto avgs = compute_pseudo_cost_averages(pc_sum_down, pc_sum_up, pc_num_down, pc_num_up, n_vars); - - i_t branch_var = fractional[0]; - f_t max_score = std::numeric_limits::lowest(); - rounding_direction_t round_dir = rounding_direction_t::DOWN; - constexpr f_t eps = f_t(1e-6); - - for (i_t j : fractional) { - f_t f_down = solution[j] - std::floor(solution[j]); - f_t f_up = std::ceil(solution[j]) - solution[j]; - f_t pc_down = pc_num_down[j] != 0 ? (f_t)pc_sum_down[j] / (f_t)pc_num_down[j] : avgs.down_avg; - f_t pc_up = pc_num_up[j] != 0 ? (f_t)pc_sum_up[j] / (f_t)pc_num_up[j] : avgs.up_avg; - - f_t score_down = std::sqrt(f_up) * (1 + pc_up) / (1 + pc_down); - f_t score_up = std::sqrt(f_down) * (1 + pc_down) / (1 + pc_up); - - f_t score = 0; - rounding_direction_t dir = rounding_direction_t::DOWN; - - f_t root_val = (j < static_cast(root_solution.size())) ? root_solution[j] : solution[j]; - - if (solution[j] < root_val - f_t(0.4)) { - score = score_down; - dir = rounding_direction_t::DOWN; - } else if (solution[j] > root_val + f_t(0.4)) { - score = score_up; - dir = rounding_direction_t::UP; - } else if (f_down < f_t(0.3)) { - score = score_down; - dir = rounding_direction_t::DOWN; - } else if (f_down > f_t(0.7)) { - score = score_up; - dir = rounding_direction_t::UP; - } else if (pc_down < pc_up + eps) { - score = score_down; - dir = rounding_direction_t::DOWN; - } else { - score = score_up; - dir = rounding_direction_t::UP; - } - - if (score > max_score) { - max_score = score; - branch_var = j; - round_dir = dir; - } - } - - if (round_dir == rounding_direction_t::NONE) { - branch_var = fractional[0]; - round_dir = rounding_direction_t::DOWN; - } - - return {branch_var, round_dir}; -} - -template -branch_variable_t guided_diving_from_arrays(const MaybeWrappedF* pc_sum_down, - const MaybeWrappedF* pc_sum_up, - const MaybeWrappedI* pc_num_down, - const MaybeWrappedI* pc_num_up, - i_t n_vars, - const std::vector& fractional, - const std::vector& solution, - const std::vector& incumbent) -{ - const i_t num_fractional = fractional.size(); - if (num_fractional == 0) return {-1, rounding_direction_t::NONE}; - - auto avgs = compute_pseudo_cost_averages(pc_sum_down, pc_sum_up, pc_num_down, pc_num_up, n_vars); - - i_t branch_var = fractional[0]; - f_t max_score = std::numeric_limits::lowest(); - rounding_direction_t round_dir = rounding_direction_t::DOWN; - constexpr f_t eps = f_t(1e-6); - - for (i_t j : fractional) { - f_t f_down = solution[j] - std::floor(solution[j]); - f_t f_up = std::ceil(solution[j]) - solution[j]; - f_t down_dist = std::abs(incumbent[j] - std::floor(solution[j])); - f_t up_dist = std::abs(std::ceil(solution[j]) - incumbent[j]); - rounding_direction_t dir = - down_dist < up_dist + eps ? rounding_direction_t::DOWN : rounding_direction_t::UP; - - f_t pc_down = pc_num_down[j] != 0 ? (f_t)pc_sum_down[j] / (f_t)pc_num_down[j] : avgs.down_avg; - f_t pc_up = pc_num_up[j] != 0 ? (f_t)pc_sum_up[j] / (f_t)pc_num_up[j] : avgs.up_avg; - - f_t score1 = dir == rounding_direction_t::DOWN ? 5 * pc_down * f_down : 5 * pc_up * f_up; - f_t score2 = dir == rounding_direction_t::DOWN ? pc_up * f_up : pc_down * f_down; - f_t score = (score1 + score2) / 6; - - if (score > max_score) { - max_score = score; - branch_var = j; - round_dir = dir; - } - } - - return {branch_var, round_dir}; -} - -template -class pseudo_cost_snapshot_t { - public: - pseudo_cost_snapshot_t() = default; - - pseudo_cost_snapshot_t(std::vector sum_down, - std::vector sum_up, - std::vector num_down, - std::vector num_up) - : sum_down_(std::move(sum_down)), - sum_up_(std::move(sum_up)), - num_down_(std::move(num_down)), - num_up_(std::move(num_up)) - { - } - - i_t variable_selection(const std::vector& fractional, const std::vector& solution) const - { - return variable_selection_from_pseudo_costs(sum_down_.data(), - sum_up_.data(), - num_down_.data(), - num_up_.data(), - n_vars(), - fractional, - solution); - } - - f_t obj_estimate(const std::vector& fractional, - const std::vector& solution, - f_t lower_bound) const - { - return obj_estimate_from_arrays(sum_down_.data(), - sum_up_.data(), - num_down_.data(), - num_up_.data(), - n_vars(), - fractional, - solution, - lower_bound); - } - - branch_variable_t pseudocost_diving(const std::vector& fractional, - const std::vector& solution, - const std::vector& root_solution) const - { - return pseudocost_diving_from_arrays(sum_down_.data(), - sum_up_.data(), - num_down_.data(), - num_up_.data(), - n_vars(), - fractional, - solution, - root_solution); - } - - branch_variable_t guided_diving(const std::vector& fractional, - const std::vector& solution, - const std::vector& incumbent) const - { - return guided_diving_from_arrays(sum_down_.data(), - sum_up_.data(), - num_down_.data(), - num_up_.data(), - n_vars(), - fractional, - solution, - incumbent); - } - - void queue_update( - i_t variable, rounding_direction_t direction, f_t delta, double clock, int worker_id) - { - updates_.push_back({variable, direction, delta, clock, worker_id}); - if (direction == rounding_direction_t::DOWN) { - sum_down_[variable] += delta; - num_down_[variable]++; - } else { - sum_up_[variable] += delta; - num_up_[variable]++; - } - } - - std::vector> take_updates() - { - std::vector> result; - result.swap(updates_); - return result; - } - - i_t n_vars() const { return (i_t)sum_down_.size(); } - - std::vector sum_down_; - std::vector sum_up_; - std::vector num_down_; - std::vector num_up_; - - private: - std::vector> updates_; -}; - template struct reliability_branching_settings_t { // Lower bound for the maximum number of LP iterations for a single trial branching @@ -413,6 +66,12 @@ struct reliability_branching_settings_t { bool rank_candidates_with_dual_pivot = true; }; +template +struct branch_variable_t { + i_t variable; + rounding_direction_t direction; +}; + template struct batch_pdlp_warm_cache_t { const raft::handle_t batch_pdlp_handle{}; @@ -426,8 +85,58 @@ struct batch_pdlp_warm_cache_t { }; template +struct pseudo_cost_averages_t { + f_t down_avg; + i_t num_init_down; + f_t up_avg; + i_t num_init_up; +}; + +template +struct pseudo_cost_update_t { + i_t variable; + rounding_direction_t direction; + f_t delta; + double work_timestamp; + int worker_id; + + bool operator<(const pseudo_cost_update_t& other) const + { + if (work_timestamp != other.work_timestamp) return work_timestamp < other.work_timestamp; + if (variable != other.variable) return variable < other.variable; + if (delta != other.delta) return delta < other.delta; + return worker_id < other.worker_id; + } +}; + +// `BnBMode` specify how we control the memory accesses: +// - If `BnBMode == branch_and_bound_mode_t::PARALLEL`, then we assume that this object is shared +// among the B&B threads, and thus, require atomics and mutexes to avoid data races. +// - If `BnBMode == branch_and_bound_mode_t::DETERMINISTIC`, then each thread has it own pseudocost +// snapshot, hence, we can disable all atomics and mutexes. +// `BnBMode` is automatically set depending if it is a `pseudo_costs_t` (PARALLEL) +// or a `pseudo_costs_snapshot_t` (DETERMINISTIC). +template class pseudo_costs_t { public: + // Define the types used for storing the pseudocost of each variable. + // Disable or enable atomics depending on if we are in REGULAR or DETERMINISTIC modes + using float_type = + std::conditional_t, f_t>; + + using int_type = + std::conditional_t, i_t>; + + // Counting the number of LP iterations might require more than an int32 can hold. + using int64_type = std:: + conditional_t, int64_t>; + + // Disable or enable mutexes depending on if we are in REGULAR or DETERMINISTIC modes + using mutex_type = + std::conditional_t; + explicit pseudo_costs_t(i_t num_variables) : pseudo_cost_sum_down(num_variables), pseudo_cost_sum_up(num_variables), @@ -435,27 +144,13 @@ class pseudo_costs_t { pseudo_cost_num_up(num_variables), pseudo_cost_mutex_up(num_variables), pseudo_cost_mutex_down(num_variables), - AT(1, 1, 1) + AT(std::make_shared>(1, 1, 1)), + pdlp_warm_cache(std::make_shared>()) { } void update_pseudo_costs(mip_node_t* node_ptr, f_t leaf_objective); - pseudo_cost_snapshot_t create_snapshot() const - { - const i_t n = (i_t)pseudo_cost_sum_down.size(); - std::vector sd(n), su(n); - std::vector nd(n), nu(n); - for (i_t j = 0; j < n; ++j) { - sd[j] = pseudo_cost_sum_down[j]; - su[j] = pseudo_cost_sum_up[j]; - nd[j] = pseudo_cost_num_down[j]; - nu[j] = pseudo_cost_num_up[j]; - } - return pseudo_cost_snapshot_t( - std::move(sd), std::move(su), std::move(nd), std::move(nu)); - } - void merge_updates(const std::vector>& updates) { for (const auto& upd : updates) { @@ -479,10 +174,7 @@ class pseudo_costs_t { pseudo_cost_mutex_down.resize(num_variables); } - void initialized(i_t& num_initialized_down, - i_t& num_initialized_up, - f_t& pseudo_cost_down_avg, - f_t& pseudo_cost_up_avg) const; + pseudo_cost_averages_t compute_averages() const; f_t obj_estimate(const std::vector& fractional, const std::vector& solution, @@ -506,6 +198,8 @@ class pseudo_costs_t { const lp_problem_t& original_lp); void update_pseudo_costs_from_strong_branching(const std::vector& fractional, + const std::vector& strong_branch_down, + const std::vector& strong_branch_up, const std::vector& root_soln); uint32_t compute_state_hash() const @@ -514,31 +208,99 @@ class pseudo_costs_t { detail::compute_hash(pseudo_cost_num_down) ^ detail::compute_hash(pseudo_cost_num_up); } - uint32_t compute_strong_branch_hash() const - { - return detail::compute_hash(strong_branch_down) ^ detail::compute_hash(strong_branch_up); - } - f_t calculate_pseudocost_score(i_t j, const std::vector& solution, - f_t pseudo_cost_up_avg, - f_t pseudo_cost_down_avg) const; + pseudo_cost_averages_t averages) const; reliability_branching_settings_t reliability_branching_settings; - csc_matrix_t AT; // Transpose of the constraint matrix A - std::vector> pseudo_cost_sum_up; - std::vector> pseudo_cost_sum_down; - std::vector> pseudo_cost_num_up; - std::vector> pseudo_cost_num_down; - std::vector strong_branch_down; - std::vector strong_branch_up; - std::vector pseudo_cost_mutex_up; - std::vector pseudo_cost_mutex_down; - omp_atomic_t num_strong_branches_completed = 0; - omp_atomic_t strong_branching_lp_iter = 0; - - batch_pdlp_warm_cache_t pdlp_warm_cache; + std::shared_ptr> AT; // Transpose of the constraint matrix A + std::vector pseudo_cost_sum_up; + std::vector pseudo_cost_sum_down; + std::vector pseudo_cost_num_up; + std::vector pseudo_cost_num_down; + std::vector pseudo_cost_mutex_up; + std::vector pseudo_cost_mutex_down; + int64_type strong_branching_lp_iter = 0; + + std::shared_ptr> pdlp_warm_cache; +}; + +template +class pseudo_cost_snapshot_t : public pseudo_costs_t { + public: + using Base = pseudo_costs_t; + + pseudo_cost_snapshot_t(i_t num_variables) : Base(num_variables) {}; + + pseudo_cost_snapshot_t(const pseudo_costs_t& other) + : Base(1) + { + *this = other; + } + + pseudo_cost_snapshot_t(const Base& other) : Base(1) { *this = other; } + pseudo_cost_snapshot_t& operator=( + const pseudo_costs_t& other) + { + Base::AT = other.AT; + Base::pdlp_warm_cache = other.pdlp_warm_cache; + + i_t n = other.pseudo_cost_num_down.size(); + Base::pseudo_cost_num_down.resize(n); + Base::pseudo_cost_num_up.resize(n); + Base::pseudo_cost_sum_down.resize(n); + Base::pseudo_cost_sum_up.resize(n); + + for (i_t i = 0; i < n; ++i) { + Base::pseudo_cost_num_down[i] = other.pseudo_cost_num_down[i].get_no_atomic(); + Base::pseudo_cost_num_up[i] = other.pseudo_cost_num_up[i].get_no_atomic(); + Base::pseudo_cost_sum_down[i] = other.pseudo_cost_sum_down[i].get_no_atomic(); + Base::pseudo_cost_sum_up[i] = other.pseudo_cost_sum_up[i].get_no_atomic(); + } + + return *this; + } + + pseudo_cost_snapshot_t& operator=(const Base& other) + { + if (this != &other) { + Base::AT = other.AT; + Base::pdlp_warm_cache = other.pdlp_warm_cache; + Base::pseudo_cost_num_down = other.pseudo_cost_num_down; + Base::pseudo_cost_num_up = other.pseudo_cost_num_up; + Base::pseudo_cost_sum_down = other.pseudo_cost_sum_down; + Base::pseudo_cost_sum_up = other.pseudo_cost_sum_up; + } + return *this; + }; + + void queue_update( + i_t variable, rounding_direction_t direction, f_t delta, double clock, int worker_id) + { + updates_.push_back({variable, direction, delta, clock, worker_id}); + if (direction == rounding_direction_t::DOWN) { + Base::pseudo_cost_sum_down[variable] += delta; + ++Base::pseudo_cost_num_down[variable]; + } else { + Base::pseudo_cost_sum_up[variable] += delta; + ++Base::pseudo_cost_num_up[variable]; + } + } + + std::vector> take_updates() + { + std::vector> result; + result.swap(updates_); + return result; + } + + i_t n_vars() const { return Base::pseudo_cost_sum_down.size(); } + + private: + std::vector> updates_; }; template diff --git a/cpp/src/branch_and_bound/branch_and_bound_worker.hpp b/cpp/src/branch_and_bound/worker.hpp similarity index 52% rename from cpp/src/branch_and_bound/branch_and_bound_worker.hpp rename to cpp/src/branch_and_bound/worker.hpp index 4de2b43cae..87689e57bb 100644 --- a/cpp/src/branch_and_bound/branch_and_bound_worker.hpp +++ b/cpp/src/branch_and_bound/worker.hpp @@ -7,36 +7,19 @@ #pragma once +#include #include #include #include -#include #include -#include #include -#include #include namespace cuopt::linear_programming::dual_simplex { -constexpr int num_search_strategies = 5; - -// Indicate the search and variable selection algorithms used by each thread -// in B&B (See [1]). -// -// [1] T. Achterberg, “Constraint Integer Programming,” PhD, Technischen Universität Berlin, -// Berlin, 2007. doi: 10.14279/depositonce-1634. -enum search_strategy_t : int { - BEST_FIRST = 0, // Best-First + Plunging. - PSEUDOCOST_DIVING = 1, // Pseudocost diving (9.2.5) - LINE_SEARCH_DIVING = 2, // Line search diving (9.2.4) - GUIDED_DIVING = 3, // Guided diving (9.2.3). - COEFFICIENT_DIVING = 4 // Coefficient diving (9.2.1) -}; - template struct branch_and_bound_stats_t { f_t start_time = 0.0; @@ -116,9 +99,8 @@ class branch_and_bound_worker_t { const lp_problem_t& original_lp, const simplex_solver_settings_t& settings) { - internal_node = node->detach_copy(); - start_node = &internal_node; - + internal_node = node->detach_copy(); + start_node = &internal_node; start_lower = original_lp.lower; start_upper = original_lp.upper; search_strategy = type; @@ -130,7 +112,7 @@ class branch_and_bound_worker_t { return node_presolver.bounds_strengthening(settings, bounds_changed, start_lower, start_upper); } - // Set the variables bounds for the LP relaxation of the current node. + // Set the variables bounds for the LP relaxation in the current node. bool set_lp_variable_bounds(mip_node_t* node_ptr, const simplex_solver_settings_t& settings) { @@ -162,120 +144,4 @@ class branch_and_bound_worker_t { mip_node_t internal_node; }; -template -class branch_and_bound_worker_pool_t { - public: - void init(i_t num_workers, - const lp_problem_t& original_lp, - const csr_matrix_t& Arow, - const std::vector& var_type, - const simplex_solver_settings_t& settings) - { - workers_.resize(num_workers); - num_idle_workers_ = num_workers; - for (i_t i = 0; i < num_workers; ++i) { - workers_[i] = std::make_unique>( - i, original_lp, Arow, var_type, settings); - idle_workers_.push_front(i); - } - - is_initialized = true; - } - - // Here, we are assuming that the scheduler is the only - // thread that can retrieve/pop an idle worker. - branch_and_bound_worker_t* get_idle_worker() - { - std::lock_guard lock(mutex_); - if (idle_workers_.empty()) { - return nullptr; - } else { - i_t idx = idle_workers_.front(); - return workers_[idx].get(); - } - } - - // Here, we are assuming that the scheduler is the only - // thread that can retrieve/pop an idle worker. - void pop_idle_worker() - { - std::lock_guard lock(mutex_); - if (!idle_workers_.empty()) { - idle_workers_.pop_front(); - num_idle_workers_--; - } - } - - void return_worker_to_pool(branch_and_bound_worker_t* worker) - { - worker->is_active = false; - std::lock_guard lock(mutex_); - idle_workers_.push_back(worker->worker_id); - num_idle_workers_++; - } - - f_t get_lower_bound() - { - f_t lower_bound = std::numeric_limits::infinity(); - - if (is_initialized) { - for (i_t i = 0; i < workers_.size(); ++i) { - if (workers_[i]->search_strategy == BEST_FIRST && workers_[i]->is_active) { - lower_bound = std::min(workers_[i]->lower_bound.load(), lower_bound); - } - } - } - - return lower_bound; - } - - i_t num_idle_workers() { return num_idle_workers_; } - - private: - // Worker pool - std::vector>> workers_; - bool is_initialized = false; - - omp_mutex_t mutex_; - std::deque idle_workers_; - omp_atomic_t num_idle_workers_; -}; - -template -std::vector get_search_strategies( - diving_heuristics_settings_t settings) -{ - std::vector types; - types.reserve(num_search_strategies); - types.push_back(BEST_FIRST); - if (settings.pseudocost_diving != 0) { types.push_back(PSEUDOCOST_DIVING); } - if (settings.line_search_diving != 0) { types.push_back(LINE_SEARCH_DIVING); } - if (settings.guided_diving != 0) { types.push_back(GUIDED_DIVING); } - if (settings.coefficient_diving != 0) { types.push_back(COEFFICIENT_DIVING); } - return types; -} - -template -std::array get_max_workers( - i_t num_workers, const std::vector& strategies) -{ - std::array max_num_workers; - max_num_workers.fill(0); - - i_t bfs_workers = std::max(strategies.size() == 1 ? num_workers : num_workers / 4, 1); - max_num_workers[BEST_FIRST] = bfs_workers; - - i_t diving_workers = (num_workers - bfs_workers); - i_t m = strategies.size() - 1; - - for (size_t i = 1, k = 0; i < strategies.size(); ++i) { - i_t start = (double)k * diving_workers / m; - i_t end = (double)(k + 1) * diving_workers / m; - max_num_workers[strategies[i]] = end - start; - ++k; - } - - return max_num_workers; -} - } // namespace cuopt::linear_programming::dual_simplex diff --git a/cpp/src/branch_and_bound/worker_pool.hpp b/cpp/src/branch_and_bound/worker_pool.hpp new file mode 100644 index 0000000000..2b52b6e7bf --- /dev/null +++ b/cpp/src/branch_and_bound/worker_pool.hpp @@ -0,0 +1,130 @@ +/* clang-format off */ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + */ +/* clang-format on */ + +#pragma once + +#include + +namespace cuopt::linear_programming::dual_simplex { + +template +class branch_and_bound_worker_pool_t { + public: + void init(i_t num_workers, + const lp_problem_t& original_lp, + const csr_matrix_t& Arow, + const std::vector& var_type, + const simplex_solver_settings_t& settings) + { + workers_.resize(num_workers); + num_idle_workers_ = num_workers; + for (i_t i = 0; i < num_workers; ++i) { + workers_[i] = std::make_unique>( + i, original_lp, Arow, var_type, settings); + idle_workers_.push_front(i); + } + + is_initialized = true; + } + + // Here, we are assuming that the scheduler is the only + // thread that can retrieve/pop an idle worker. + branch_and_bound_worker_t* get_idle_worker() + { + std::lock_guard lock(mutex_); + if (idle_workers_.empty()) { + return nullptr; + } else { + i_t idx = idle_workers_.front(); + return workers_[idx].get(); + } + } + + // Here, we are assuming that the scheduler is the only + // thread that can retrieve/pop an idle worker. + void pop_idle_worker() + { + std::lock_guard lock(mutex_); + if (!idle_workers_.empty()) { + idle_workers_.pop_front(); + num_idle_workers_--; + } + } + + void return_worker_to_pool(branch_and_bound_worker_t* worker) + { + worker->is_active = false; + std::lock_guard lock(mutex_); + idle_workers_.push_back(worker->worker_id); + num_idle_workers_++; + } + + f_t get_lower_bound() + { + f_t lower_bound = std::numeric_limits::infinity(); + + if (is_initialized) { + for (i_t i = 0; i < workers_.size(); ++i) { + if (workers_[i]->search_strategy == BEST_FIRST && workers_[i]->is_active) { + lower_bound = std::min(workers_[i]->lower_bound.load(), lower_bound); + } + } + } + + return lower_bound; + } + + i_t num_idle_workers() { return num_idle_workers_; } + + private: + // Worker pool + std::vector>> workers_; + bool is_initialized = false; + + omp_mutex_t mutex_; + std::deque idle_workers_; + omp_atomic_t num_idle_workers_; +}; + +template +std::vector get_search_strategies( + diving_heuristics_settings_t settings) +{ + std::vector types; + types.reserve(num_search_strategies); + types.push_back(BEST_FIRST); + if (settings.pseudocost_diving != 0) { types.push_back(PSEUDOCOST_DIVING); } + if (settings.line_search_diving != 0) { types.push_back(LINE_SEARCH_DIVING); } + if (settings.guided_diving != 0) { types.push_back(GUIDED_DIVING); } + if (settings.coefficient_diving != 0) { types.push_back(COEFFICIENT_DIVING); } + return types; +} + +template +std::array get_max_workers( + i_t num_workers, const std::vector& strategies) +{ + std::array max_num_workers; + max_num_workers.fill(0); + + i_t bfs_workers = std::max(strategies.size() == 1 ? num_workers : num_workers / 4, 1); + max_num_workers[BEST_FIRST] = bfs_workers; + + i_t diving_workers = (num_workers - bfs_workers); + i_t m = strategies.size() - 1; + + for (size_t i = 1, k = 0; i < strategies.size(); ++i) { + i_t start = (double)k * diving_workers / m; + i_t end = (double)(k + 1) * diving_workers / m; + max_num_workers[strategies[i]] = end - start; + ++k; + } + + return max_num_workers; +} + +} // namespace cuopt::linear_programming::dual_simplex diff --git a/cpp/src/utilities/omp_helpers.hpp b/cpp/src/utilities/omp_helpers.hpp index f6e66472dd..8890a7487a 100644 --- a/cpp/src/utilities/omp_helpers.hpp +++ b/cpp/src/utilities/omp_helpers.hpp @@ -54,6 +54,15 @@ class omp_mutex_t { std::unique_ptr mutex; }; +// Empty class with the same methods as `omp_mutex_t`. This is mainly used for cleanly disabling +// the `omp_mutex_t` via type alias (`lock` and `unlock` are replaced by NOOPs). +class fake_omp_mutex_t { + public: + static void lock() {} + static void unlock() {} + static bool try_lock() { return true; } +}; + // Wrapper for omp atomic operations. See // https://www.openmp.org/spec-html/5.1/openmpsu105.html. template @@ -117,6 +126,11 @@ class omp_atomic_t { T fetch_sub(T inc) { return fetch_add(-inc); } + // Get the underlying value without atomics + T& get_no_atomic() { return val; } + + T get_no_atomic() const { return val; } + private: T val;