diff --git a/benchmarks/linear_programming/cuopt/run_mip.cpp b/benchmarks/linear_programming/cuopt/run_mip.cpp index 64b1bb4bb1..a4f52cb4ed 100644 --- a/benchmarks/linear_programming/cuopt/run_mip.cpp +++ b/benchmarks/linear_programming/cuopt/run_mip.cpp @@ -36,6 +36,7 @@ #include #include +#include #include #include #include @@ -382,6 +383,8 @@ int main(int argc, char* argv[]) double memory_limit = program.get("--memory-limit"); bool track_allocations = program.get("--track-allocations")[0] == 't'; + if (num_cpu_threads < 0) { num_cpu_threads = omp_get_max_threads() / n_gpus; } + if (program.is_used("--out-dir")) { out_dir = program.get("--out-dir"); result_file = out_dir + "/final_result.csv"; diff --git a/cpp/src/dual_simplex/branch_and_bound.cpp b/cpp/src/dual_simplex/branch_and_bound.cpp index 1a7ea7d825..5870a1b46a 100644 --- a/cpp/src/dual_simplex/branch_and_bound.cpp +++ b/cpp/src/dual_simplex/branch_and_bound.cpp @@ -15,9 +15,9 @@ * limitations under the License. */ -#include +#include +#include #include - #include #include #include @@ -29,10 +29,12 @@ #include #include +#include #include #include -#include +#include #include +#include #include #include @@ -133,37 +135,6 @@ void set_uninitialized_steepest_edge_norms(std::vector& edge_norms) } } -constexpr int write_graphviz = false; - -template -void graphviz_node(const simplex_solver_settings_t& settings, - mip_node_t* node_ptr, - std::string label, - f_t val) -{ - if (write_graphviz) { - settings.log.printf("Node%d [label=\"%s %.16e\"]\n", node_ptr->node_id, label.c_str(), val); - } -} - -template -void graphviz_edge(const simplex_solver_settings_t& settings, - mip_node_t* origin_ptr, - mip_node_t* dest_ptr, - i_t branch_var, - i_t branch_dir, - f_t bound) -{ - if (write_graphviz) { - settings.log.printf("Node%d -> Node%d [label=\"x%d %s %e\"]\n", - origin_ptr->node_id, - dest_ptr->node_id, - branch_var, - branch_dir == 0 ? "<=" : ">=", - bound); - } -} - dual::status_t convert_lp_status_to_dual_status(lp_status_t status) { if (status == lp_status_t::OPTIMAL) { @@ -199,7 +170,18 @@ f_t relative_gap(f_t obj_value, f_t lower_bound) f_t user_mip_gap = obj_value == 0.0 ? (lower_bound == 0.0 ? 0.0 : std::numeric_limits::infinity()) : std::abs(obj_value - lower_bound) / std::abs(obj_value); - // Handle NaNs (i.e., NaN != NaN) + if (std::isnan(user_mip_gap)) { return std::numeric_limits::infinity(); } + return user_mip_gap; +} + +template +f_t user_relative_gap(const lp_problem_t& lp, f_t obj_value, f_t lower_bound) +{ + f_t user_obj = compute_user_objective(lp, obj_value); + f_t user_lower_bound = compute_user_objective(lp, lower_bound); + f_t user_mip_gap = user_obj == 0.0 + ? (user_lower_bound == 0.0 ? 0.0 : std::numeric_limits::infinity()) + : std::abs(user_obj - user_lower_bound) / std::abs(user_obj); if (std::isnan(user_mip_gap)) { return std::numeric_limits::infinity(); } return user_mip_gap; } @@ -229,7 +211,8 @@ branch_and_bound_t::branch_and_bound_t( original_lp_(1, 1, 1), incumbent_(1), root_relax_soln_(1, 1), - pc_(1) + pc_(1), + status_(mip_exploration_status_t::UNSET) { stats_.start_time = tic(); convert_user_problem(original_problem_, settings_, original_lp_, new_slacks_); @@ -238,14 +221,6 @@ branch_and_bound_t::branch_and_bound_t( mutex_upper_.lock(); upper_bound_ = inf; mutex_upper_.unlock(); - - mutex_lower_.lock(); - lower_bound_ = -inf; - mutex_lower_.unlock(); - - mutex_branching_.lock(); - currently_branching_ = false; - mutex_branching_.unlock(); } template @@ -260,12 +235,27 @@ f_t branch_and_bound_t::get_upper_bound() template f_t branch_and_bound_t::get_lower_bound() { - mutex_lower_.lock(); - const f_t lower_bound = lower_bound_; - mutex_lower_.unlock(); + f_t lower_bound = lower_bound_ceiling_.load(); + mutex_heap_.lock(); + if (heap_.size() > 0) { lower_bound = std::min(heap_.top()->lower_bound, lower_bound); } + mutex_heap_.unlock(); + + for (i_t i = 0; i < local_lower_bounds_.size(); ++i) { + lower_bound = std::min(local_lower_bounds_[i].load(), lower_bound); + } + return lower_bound; } +template +i_t branch_and_bound_t::get_heap_size() +{ + mutex_heap_.lock(); + i_t size = heap_.size(); + mutex_heap_.unlock(); + return size; +} + template void branch_and_bound_t::set_new_solution(const std::vector& solution) { @@ -304,16 +294,13 @@ void branch_and_bound_t::set_new_solution(const std::vector& solu mutex_upper_.unlock(); if (is_feasible) { - mutex_branching_.lock(); - bool currently_branching = currently_branching_; - mutex_branching_.unlock(); - if (currently_branching) { + if (status_ == mip_exploration_status_t::RUNNING) { f_t user_obj = compute_user_objective(original_lp_, obj); f_t user_lower = compute_user_objective(original_lp_, get_lower_bound()); std::string gap = user_mip_gap(user_obj, user_lower); settings_.log.printf( - "H %+13.6e %+10.6e %s %9.2f\n", + "H %+13.6e %+10.6e %s %9.2f\n", user_obj, user_lower, gap.c_str(), @@ -424,6 +411,7 @@ void branch_and_bound_t::repair_heuristic_solutions() f_t obj = compute_user_objective(original_lp_, repaired_obj); f_t lower = compute_user_objective(original_lp_, get_lower_bound()); std::string user_gap = user_mip_gap(obj, lower); + settings_.log.printf( "H %+13.6e %+10.6e %s %9.2f\n", obj, @@ -445,58 +433,83 @@ void branch_and_bound_t::repair_heuristic_solutions() } template -void branch_and_bound_t::branch(mip_node_t* parent_node, - i_t branch_var, - f_t branch_var_val, - const std::vector& parent_vstatus) +mip_status_t branch_and_bound_t::set_final_solution(mip_solution_t& solution, + f_t lower_bound) { - // down child - auto down_child = std::make_unique>( - original_lp_, parent_node, ++stats_.num_nodes, branch_var, 0, branch_var_val, parent_vstatus); + mip_status_t mip_status = mip_status_t::UNSET; - graphviz_edge( - settings_, parent_node, down_child.get(), branch_var, 0, std::floor(branch_var_val)); + if (status_ == mip_exploration_status_t::NUMERICAL) { + settings_.log.printf("Numerical issue encountered. Stopping the solver...\n"); + mip_status = mip_status_t::NUMERICAL; + } - // up child - auto up_child = std::make_unique>( - original_lp_, parent_node, ++stats_.num_nodes, branch_var, 1, branch_var_val, parent_vstatus); + if (status_ == mip_exploration_status_t::TIME_LIMIT) { + settings_.log.printf("Time limit reached. Stopping the solver...\n"); + mip_status = mip_status_t::TIME_LIMIT; + } - graphviz_edge(settings_, parent_node, up_child.get(), branch_var, 1, std::ceil(branch_var_val)); + f_t upper_bound = get_upper_bound(); + f_t gap = upper_bound - lower_bound; + f_t obj = compute_user_objective(original_lp_, upper_bound); + f_t user_lower = compute_user_objective(original_lp_, lower_bound); + f_t gap_rel = user_relative_gap(original_lp_, upper_bound, lower_bound); - assert(parent_vstatus.size() == original_lp_.num_cols); - parent_node->add_children(std::move(down_child), - std::move(up_child)); // child pointers moved into the tree -} + settings_.log.printf( + "Explored %d nodes in %.2fs.\n", stats_.nodes_explored, toc(stats_.start_time)); + settings_.log.printf("Absolute Gap %e Objective %.16e Lower Bound %.16e\n", gap, obj, user_lower); -template -void branch_and_bound_t::update_tree(mip_node_t* node_ptr, node_status_t status) -{ - std::vector*> stack; - node_ptr->set_status(status, stack); - remove_fathomed_nodes(stack); + if (gap <= settings_.absolute_mip_gap_tol || gap_rel <= settings_.relative_mip_gap_tol) { + mip_status = mip_status_t::OPTIMAL; + if (gap > 0 && gap <= settings_.absolute_mip_gap_tol) { + settings_.log.printf("Optimal solution found within absolute MIP gap tolerance (%.1e)\n", + settings_.absolute_mip_gap_tol); + } else if (gap > 0 && gap_rel <= settings_.relative_mip_gap_tol) { + settings_.log.printf("Optimal solution found within relative MIP gap tolerance (%.1e)\n", + settings_.relative_mip_gap_tol); + } else { + settings_.log.printf("Optimal solution found.\n"); + } + if (settings_.heuristic_preemption_callback != nullptr) { + settings_.heuristic_preemption_callback(); + } + } + + if (stats_.nodes_explored > 0 && stats_.nodes_unexplored == 0 && upper_bound == inf) { + settings_.log.printf("Integer infeasible.\n"); + mip_status = mip_status_t::INFEASIBLE; + if (settings_.heuristic_preemption_callback != nullptr) { + settings_.heuristic_preemption_callback(); + } + } + + uncrush_primal_solution(original_problem_, original_lp_, incumbent_.x, solution.x); + solution.objective = incumbent_.objective; + solution.lower_bound = lower_bound; + solution.nodes_explored = stats_.nodes_explored; + solution.simplex_iterations = stats_.total_lp_iters; + + return mip_status; } template void branch_and_bound_t::add_feasible_solution(f_t leaf_objective, const std::vector& leaf_solution, i_t leaf_depth, - char symbol) + char thread_type) { bool send_solution = false; i_t nodes_explored = stats_.nodes_explored; i_t nodes_unexplored = stats_.nodes_unexplored; - f_t gap; mutex_upper_.lock(); if (leaf_objective < upper_bound_) { incumbent_.set_incumbent_solution(leaf_objective, leaf_solution); upper_bound_ = leaf_objective; f_t lower_bound = get_lower_bound(); - gap = upper_bound_ - lower_bound; f_t obj = compute_user_objective(original_lp_, upper_bound_); f_t lower = compute_user_objective(original_lp_, lower_bound); - settings_.log.printf("%c%8d %8lu %+13.6e %+10.6e %4d %7.1e %s %9.2f\n", - symbol, + settings_.log.printf("%c%10d %10lu %+13.6e %+10.6e %6d %7.1e %s %9.2f\n", + thread_type, nodes_explored, nodes_unexplored, obj, @@ -515,29 +528,57 @@ void branch_and_bound_t::add_feasible_solution(f_t leaf_objective, settings_.solution_callback(original_x, upper_bound_); } mutex_upper_.unlock(); +} - if (send_solution) { - mutex_gap_.lock(); - gap_ = gap; - mutex_gap_.unlock(); +template +std::pair*, mip_node_t*> +branch_and_bound_t::child_selection(mip_node_t* node_ptr) +{ + const i_t branch_var = node_ptr->get_down_child()->branch_var; + const f_t branch_var_val = node_ptr->get_down_child()->fractional_val; + const f_t down_val = std::floor(root_relax_soln_.x[branch_var]); + const f_t up_val = std::ceil(root_relax_soln_.x[branch_var]); + const f_t down_dist = branch_var_val - down_val; + const f_t up_dist = up_val - branch_var_val; + constexpr f_t eps = 1e-6; + + if (down_dist < up_dist + eps) { + return std::make_pair(node_ptr->get_down_child(), node_ptr->get_up_child()); + + } else { + return std::make_pair(node_ptr->get_up_child(), node_ptr->get_down_child()); } } template -dual::status_t branch_and_bound_t::node_dual_simplex( - i_t leaf_id, - lp_problem_t& leaf_problem, - std::vector& leaf_vstatus, - lp_solution_t& leaf_solution, - std::vector& bounds_changed, - csc_matrix_t& Arow, - f_t upper_bound, - logger_t& log) +node_status_t branch_and_bound_t::solve_node(search_tree_t& search_tree, + mip_node_t* node_ptr, + lp_problem_t& leaf_problem, + const csc_matrix_t& Arow, + f_t upper_bound, + logger_t& log, + char thread_type) { - i_t node_iter = 0; + f_t abs_fathom_tol = settings_.absolute_mip_gap_tol / 10; + + std::vector& leaf_vstatus = node_ptr->vstatus; + lp_solution_t leaf_solution(leaf_problem.num_rows, leaf_problem.num_cols); assert(leaf_vstatus.size() == leaf_problem.num_cols); - f_t lp_start_time = tic(); - std::vector leaf_edge_norms = edge_norms_; // = node.steepest_edge_norms; + + // Set the correct bounds for the leaf problem + leaf_problem.lower = original_lp_.lower; + leaf_problem.upper = original_lp_.upper; + + std::vector bounds_changed(leaf_problem.num_cols, false); + // Technically, we can get the already strengthened bounds from the node/parent instead of + // getting it from the original problem and re-strengthening. But this requires storing + // two vectors at each node and potentially cause memory issues + node_ptr->get_variable_bounds(leaf_problem.lower, leaf_problem.upper, bounds_changed); + + i_t node_iter = 0; + f_t lp_start_time = tic(); + std::vector leaf_edge_norms = edge_norms_; // = node.steepest_edge_norms; + simplex_solver_settings_t lp_settings = settings_; lp_settings.set_log(false); lp_settings.cut_off = upper_bound + settings_.dual_tol; @@ -562,307 +603,410 @@ dual::status_t branch_and_bound_t::node_dual_simplex( leaf_edge_norms); if (lp_status == dual::status_t::NUMERICAL) { - log.printf("Numerical issue node %d. Resolving from scratch.\n", leaf_id); + log.printf("Numerical issue node %d. Resolving from scratch.\n", node_ptr->node_id); lp_status_t second_status = solve_linear_program_advanced( leaf_problem, lp_start_time, lp_settings, leaf_solution, leaf_vstatus, leaf_edge_norms); lp_status = convert_lp_status_to_dual_status(second_status); } } - mutex_stats_.lock(); stats_.total_lp_solve_time += toc(lp_start_time); stats_.total_lp_iters += node_iter; - mutex_stats_.unlock(); - - return lp_status; -} - -template -mip_status_t branch_and_bound_t::solve_node_lp( - mip_node_t* node_ptr, - lp_problem_t& leaf_problem, - csc_matrix_t& Arow, - const std::vector& var_types, - f_t upper_bound) -{ - logger_t log; - log.log = false; - std::vector& leaf_vstatus = node_ptr->vstatus; - lp_solution_t leaf_solution(leaf_problem.num_rows, leaf_problem.num_cols); - - // Set the correct bounds for the leaf problem - leaf_problem.lower = original_lp_.lower; - leaf_problem.upper = original_lp_.upper; - - std::vector bounds_changed(original_lp_.num_cols, false); - // Technically, we can get the already strengthened bounds from the node/parent instead of - // getting it from the original problem and re-strengthening. But this requires storing - // two vectors at each node and potentially cause memory issues - node_ptr->get_variable_bounds(leaf_problem.lower, leaf_problem.upper, bounds_changed); - - dual::status_t lp_status = node_dual_simplex(node_ptr->node_id, - leaf_problem, - leaf_vstatus, - leaf_solution, - bounds_changed, - Arow, - upper_bound, - settings_.log); if (lp_status == dual::status_t::DUAL_UNBOUNDED) { - node_ptr->lower_bound = inf; - graphviz_node(settings_, node_ptr, "infeasible", 0.0); - update_tree(node_ptr, node_status_t::INFEASIBLE); // Node was infeasible. Do not branch + node_ptr->lower_bound = inf; + search_tree.graphviz_node(log, node_ptr, "infeasible", 0.0); + search_tree.update_tree(node_ptr, node_status_t::INFEASIBLE); + return node_status_t::INFEASIBLE; } else if (lp_status == dual::status_t::CUTOFF) { + // Node was cut off. Do not branch node_ptr->lower_bound = upper_bound; f_t leaf_objective = compute_objective(leaf_problem, leaf_solution.x); - graphviz_node(settings_, node_ptr, "cut off", leaf_objective); - update_tree(node_ptr, node_status_t::FATHOMED); - // Node was cut off. Do not branch + search_tree.graphviz_node(log, node_ptr, "cut off", leaf_objective); + search_tree.update_tree(node_ptr, node_status_t::FATHOMED); + return node_status_t::FATHOMED; + } else if (lp_status == dual::status_t::OPTIMAL) { // LP was feasible - std::vector fractional; - const i_t leaf_fractional = - fractional_variables(settings_, leaf_solution.x, var_types_, fractional); - f_t leaf_objective = compute_objective(leaf_problem, leaf_solution.x); - graphviz_node(settings_, node_ptr, "lower bound", leaf_objective); - - mutex_pc_.lock(); - pc_.update_pseudo_costs(node_ptr, leaf_objective); - mutex_pc_.unlock(); + std::vector leaf_fractional; + i_t leaf_num_fractional = + fractional_variables(settings_, leaf_solution.x, var_types_, leaf_fractional); + f_t leaf_objective = compute_objective(leaf_problem, leaf_solution.x); node_ptr->lower_bound = leaf_objective; + search_tree.graphviz_node(log, node_ptr, "lower bound", leaf_objective); + pc_.update_pseudo_costs(node_ptr, leaf_objective); - constexpr f_t fathom_tol = 1e-5; - if (leaf_fractional == 0) { - add_feasible_solution(leaf_objective, leaf_solution.x, node_ptr->depth, 'B'); - graphviz_node(settings_, node_ptr, "integer feasible", leaf_objective); - update_tree(node_ptr, node_status_t::INTEGER_FEASIBLE); + if (leaf_num_fractional == 0) { + // Found a integer feasible solution + add_feasible_solution(leaf_objective, leaf_solution.x, node_ptr->depth, thread_type); + search_tree.graphviz_node(log, node_ptr, "integer feasible", leaf_objective); + search_tree.update_tree(node_ptr, node_status_t::INTEGER_FEASIBLE); + return node_status_t::INTEGER_FEASIBLE; - } else if (leaf_objective <= upper_bound + fathom_tol) { + } else if (leaf_objective <= upper_bound + abs_fathom_tol) { // Choose fractional variable to branch on - mutex_pc_.lock(); - const i_t branch_var = pc_.variable_selection( - fractional, leaf_solution.x, leaf_problem.lower, leaf_problem.upper, log); - mutex_pc_.unlock(); + const i_t branch_var = + pc_.variable_selection(leaf_fractional, leaf_solution.x, lp_settings.log); assert(leaf_vstatus.size() == leaf_problem.num_cols); - branch(node_ptr, branch_var, leaf_solution.x[branch_var], leaf_vstatus); + search_tree.branch( + node_ptr, branch_var, leaf_solution.x[branch_var], leaf_vstatus, original_lp_, log); node_ptr->status = node_status_t::HAS_CHILDREN; + return node_status_t::HAS_CHILDREN; } else { - graphviz_node(settings_, node_ptr, "fathomed", leaf_objective); - update_tree(node_ptr, node_status_t::FATHOMED); + search_tree.graphviz_node(log, node_ptr, "fathomed", leaf_objective); + search_tree.update_tree(node_ptr, node_status_t::FATHOMED); + return node_status_t::FATHOMED; } + } else if (lp_status == dual::status_t::TIME_LIMIT) { + search_tree.graphviz_node(log, node_ptr, "timeout", 0.0); + search_tree.update_tree(node_ptr, node_status_t::TIME_LIMIT); + return node_status_t::TIME_LIMIT; + } else { - graphviz_node(settings_, node_ptr, "numerical", 0.0); - settings_.log.printf("Encountered LP status %d. This indicates a numerical issue.\n", - lp_status); - return mip_status_t::NUMERICAL; - } + if (thread_type == 'B') { + lower_bound_ceiling_.fetch_min(node_ptr->lower_bound); + log.printf( + "LP returned status %d on node %d. This indicates a numerical issue. The best bound is set " + "to " + "%+10.6e.\n", + lp_status, + node_ptr->node_id, + compute_user_objective(original_lp_, lower_bound_ceiling_.load())); + } - return mip_status_t::UNSET; + search_tree.graphviz_node(log, node_ptr, "numerical", 0.0); + search_tree.update_tree(node_ptr, node_status_t::NUMERICAL); + return node_status_t::NUMERICAL; + } } template -mip_status_t branch_and_bound_t::explore_tree(i_t branch_var, - mip_solution_t& solution) +void branch_and_bound_t::exploration_ramp_up(search_tree_t* search_tree, + mip_node_t* node, + lp_problem_t& leaf_problem, + const csc_matrix_t& Arow, + i_t initial_heap_size) { - mip_status_t status = mip_status_t::UNSET; - logger_t log; - log.log = false; - auto compare = [](mip_node_t* a, mip_node_t* b) { - return a->lower_bound > - b->lower_bound; // True if a comes before b, elements that come before are output last - }; + if (status_ != mip_exploration_status_t::RUNNING) { return; } + if (omp_get_thread_num() == 0) { repair_heuristic_solutions(); } + + f_t lower_bound = node->lower_bound; + f_t upper_bound = get_upper_bound(); + f_t rel_gap = user_relative_gap(original_lp_, upper_bound, lower_bound); + f_t abs_gap = upper_bound - lower_bound; + i_t nodes_explored = 0; + i_t nodes_unexplored = 0; + + nodes_explored = (stats_.nodes_explored++); + nodes_unexplored = (stats_.nodes_unexplored--); + stats_.nodes_since_last_log++; + + if (lower_bound > upper_bound || rel_gap < settings_.relative_mip_gap_tol) { + search_tree->graphviz_node(settings_.log, node, "cutoff", node->lower_bound); + search_tree->update_tree(node, node_status_t::FATHOMED); + return; + } - std::priority_queue*, std::vector*>, decltype(compare)> - heap(compare); + f_t now = toc(stats_.start_time); - mip_node_t root_node(root_objective_, root_vstatus_); - graphviz_node(settings_, &root_node, "lower bound", root_objective_); + if (omp_get_thread_num() == 0) { + f_t time_since_last_log = stats_.last_log == 0 ? 1.0 : toc(stats_.last_log); - branch(&root_node, branch_var, root_relax_soln_.x[branch_var], root_vstatus_); + if (((stats_.nodes_since_last_log >= 10 || abs_gap < 10 * settings_.absolute_mip_gap_tol) && + (time_since_last_log >= 1)) || + (time_since_last_log > 30) || now > settings_.time_limit) { + f_t obj = compute_user_objective(original_lp_, upper_bound); + f_t user_lower = compute_user_objective(original_lp_, root_objective_); + std::string gap_user = user_mip_gap(obj, user_lower); - // the stack does not own the unique_ptr the tree does - heap.push(root_node.get_down_child()); - heap.push(root_node.get_up_child()); + settings_.log.printf(" %10d %10lu %+13.6e %+10.6e %6d %7.1e %s %9.2f\n", + nodes_explored, + nodes_unexplored, + obj, + user_lower, + node->depth, + nodes_explored > 0 ? stats_.total_lp_iters / nodes_explored : 0, + gap_user.c_str(), + now); - // Make a copy of the original LP. We will modify its bounds at each leaf - lp_problem_t leaf_problem = original_lp_; - csc_matrix_t Arow(1, 1, 1); - original_lp_.A.transpose(Arow); + stats_.nodes_since_last_log = 0; + } + } + + if (toc(stats_.start_time) > settings_.time_limit) { + status_ = mip_exploration_status_t::TIME_LIMIT; + return; + } + node_status_t node_status = + solve_node(*search_tree, node, leaf_problem, Arow, upper_bound, settings_.log, 'B'); + + if (node_status == node_status_t::TIME_LIMIT) { + status_ = mip_exploration_status_t::TIME_LIMIT; + return; + + } else if (node_status == node_status_t::HAS_CHILDREN) { + stats_.nodes_unexplored += 2; + + // If we haven't generated enough nodes to keep the threads busy, continue the ramp up phase + if (stats_.nodes_unexplored < initial_heap_size) { +#pragma omp task + exploration_ramp_up( + search_tree, node->get_down_child(), leaf_problem, Arow, initial_heap_size); + +#pragma omp task + exploration_ramp_up(search_tree, node->get_up_child(), leaf_problem, Arow, initial_heap_size); + + } else { + // We've generated enough nodes, push further nodes onto the heap + mutex_heap_.lock(); + heap_.push(node->get_down_child()); + heap_.push(node->get_up_child()); + mutex_heap_.unlock(); + } + } +} - f_t lower_bound = get_lower_bound(); - f_t gap = get_upper_bound() - lower_bound; - f_t last_log = 0; +template +void branch_and_bound_t::explore_subtree(i_t id, + search_tree_t& search_tree, + mip_node_t* start_node, + lp_problem_t& leaf_problem, + const csc_matrix_t& Arow) +{ + std::deque*> stack; + stack.push_front(start_node); - f_t user_upper_bound = compute_user_objective(original_lp_, get_upper_bound()); - f_t user_lower_bound = compute_user_objective(original_lp_, lower_bound); - f_t rel_gap = relative_gap(user_upper_bound, user_lower_bound); - while (gap > settings_.absolute_mip_gap_tol && rel_gap > settings_.relative_mip_gap_tol && - heap.size() > 0) { - repair_heuristic_solutions(); + while (stack.size() > 0 && status_ == mip_exploration_status_t::RUNNING) { + if (omp_get_thread_num() == 0) { repair_heuristic_solutions(); } - // Get a node off the heap - mip_node_t* node_ptr = heap.top(); - heap.pop(); - stats_.nodes_explored++; + mip_node_t* node_ptr = stack.front(); + stack.pop_front(); + f_t lower_bound = node_ptr->lower_bound; f_t upper_bound = get_upper_bound(); - if (upper_bound < node_ptr->lower_bound) { - // This node was put on the heap earlier but its lower bound is now greater than the current - // upper bound - update_tree(node_ptr, node_status_t::FATHOMED); - graphviz_node(settings_, node_ptr, "cutoff", node_ptr->lower_bound); + f_t abs_gap = upper_bound - lower_bound; + f_t rel_gap = user_relative_gap(original_lp_, upper_bound, lower_bound); + + // This is based on three assumptions: + // - The stack only contains sibling nodes, i.e., the current node and it sibling (if + // applicable) + // - The current node and its siblings uses the lower bound of the parent before solving the LP + // relaxation + // - The lower bound of the parent is lower or equal to its children + assert(id < local_lower_bounds_.size()); + local_lower_bounds_[id] = lower_bound; + i_t nodes_explored = stats_.nodes_explored++; + i_t nodes_unexplored = stats_.nodes_unexplored--; + stats_.nodes_since_last_log++; + + if (lower_bound > upper_bound || rel_gap < settings_.relative_mip_gap_tol) { + search_tree.graphviz_node(settings_.log, node_ptr, "cutoff", node_ptr->lower_bound); + search_tree.update_tree(node_ptr, node_status_t::FATHOMED); continue; } - mutex_lower_.lock(); - lower_bound = lower_bound_ = node_ptr->lower_bound; - mutex_lower_.unlock(); - - mutex_gap_.lock(); - gap_ = gap = upper_bound - lower_bound; - mutex_gap_.unlock(); - - i_t nodes_explored = stats_.nodes_explored; - f_t now = toc(stats_.start_time); - f_t time_since_log = last_log == 0 ? 1.0 : toc(last_log); - if ((nodes_explored % 1000 == 0 || gap < 10 * settings_.absolute_mip_gap_tol || - nodes_explored < 1000) && - (time_since_log >= 1) || - (time_since_log > 60) || now > settings_.time_limit) { - f_t user_obj = compute_user_objective(original_lp_, upper_bound); - f_t user_lower = compute_user_objective(original_lp_, lower_bound); - std::string user_gap = user_mip_gap(user_obj, user_lower); - - settings_.log.printf(" %8d %8lu %+13.6e %+10.6e %4d %7.1e %s %9.2f\n", - nodes_explored, - heap.size(), - user_obj, - user_lower, - node_ptr->depth, - nodes_explored > 0 ? stats_.total_lp_iters / nodes_explored : 0, - user_gap.c_str(), - now); - last_log = tic(); + + f_t now = toc(stats_.start_time); + + if (id == 0) { + f_t time_since_last_log = stats_.last_log == 0 ? 1.0 : toc(stats_.last_log); + + if (((stats_.nodes_since_last_log >= 1000 || abs_gap < 10 * settings_.absolute_mip_gap_tol) && + time_since_last_log >= 1) || + (time_since_last_log > 30) || now > settings_.time_limit) { + f_t obj = compute_user_objective(original_lp_, upper_bound); + f_t user_lower = compute_user_objective(original_lp_, get_lower_bound()); + std::string gap_user = user_mip_gap(obj, user_lower); + settings_.log.printf(" %10d %10lu %+13.6e %+10.6e %6d %7.1e %s %9.2f\n", + nodes_explored, + nodes_unexplored, + obj, + user_lower, + node_ptr->depth, + nodes_explored > 0 ? stats_.total_lp_iters / nodes_explored : 0, + gap_user.c_str(), + now); + stats_.last_log = tic(); + stats_.nodes_since_last_log = 0; + } } if (toc(stats_.start_time) > settings_.time_limit) { - settings_.log.printf("Hit time limit. Stopping\n"); - status = mip_status_t::TIME_LIMIT; + status_ = mip_exploration_status_t::TIME_LIMIT; break; } - status = solve_node_lp(node_ptr, leaf_problem, Arow, var_types_, upper_bound); + node_status_t node_status = + solve_node(search_tree, node_ptr, leaf_problem, Arow, upper_bound, settings_.log, 'B'); + + if (node_status == node_status_t::TIME_LIMIT) { + status_ = mip_exploration_status_t::TIME_LIMIT; + break; + + } else if (node_status == node_status_t::HAS_CHILDREN) { + // The stack should only contain the children of the current parent. + // If the stack size is greater than 0, + // we pop the current node from the stack and place it in the global heap, + // since we are about to add the two children to the stack + if (stack.size() > 0) { + mip_node_t* node = stack.back(); + stack.pop_back(); + + // The order here matters. We want to create a copy of the node + // before adding to the global heap. Otherwise, + // some thread may consume the node (possibly fathoming it) + // before we had the chance to add to the diving queue. + // This lead to a SIGSEGV. Although, in this case, it + // would be better if we discard the node instead. + if (get_heap_size() > settings_.num_bfs_threads) { + mutex_dive_queue_.lock(); + dive_queue_.push(node->detach_copy()); + mutex_dive_queue_.unlock(); + } + + mutex_heap_.lock(); + heap_.push(node); + mutex_heap_.unlock(); + } - if (status == mip_status_t::NUMERICAL) { break; } + stats_.nodes_unexplored += 2; - if (node_ptr->status == node_status_t::HAS_CHILDREN) { - // the heap does not own the unique_ptr the tree does - heap.push(node_ptr->get_down_child()); - heap.push(node_ptr->get_up_child()); + auto [first, second] = child_selection(node_ptr); + stack.push_front(second); + stack.push_front(first); } } +} - stats_.nodes_unexplored = heap.size(); +template +void branch_and_bound_t::best_first_thread(i_t id, + search_tree_t& search_tree, + lp_problem_t& leaf_problem, + const csc_matrix_t& Arow) +{ + f_t lower_bound = -inf; + f_t upper_bound = inf; + f_t abs_gap = inf; + f_t rel_gap = inf; + + while (status_ == mip_exploration_status_t::RUNNING && abs_gap > settings_.absolute_mip_gap_tol && + rel_gap > settings_.relative_mip_gap_tol && + (active_subtrees_ > 0 || get_heap_size() > 0)) { + mip_node_t* node_ptr = nullptr; + + // If there any node left in the heap, we pop the top node and explore it. + mutex_heap_.lock(); + if (heap_.size() > 0) { + node_ptr = heap_.top(); + heap_.pop(); + active_subtrees_++; + } + mutex_heap_.unlock(); + + if (node_ptr != nullptr) { + if (get_upper_bound() < node_ptr->lower_bound) { + // This node was put on the heap earlier but its lower bound is now greater than the + // current upper bound + search_tree.graphviz_node(settings_.log, node_ptr, "cutoff", node_ptr->lower_bound); + search_tree.update_tree(node_ptr, node_status_t::FATHOMED); + active_subtrees_--; + continue; + } - if (stats_.nodes_unexplored == 0) { - mutex_lower_.lock(); - lower_bound = lower_bound_ = root_node.lower_bound; - mutex_lower_.unlock(); + // Best-first search with plunging + explore_subtree(id, search_tree, node_ptr, leaf_problem, Arow); + active_subtrees_--; + } - mutex_gap_.lock(); - gap_ = gap = get_upper_bound() - lower_bound; - mutex_gap_.unlock(); + lower_bound = get_lower_bound(); + upper_bound = get_upper_bound(); + abs_gap = upper_bound - lower_bound; + rel_gap = user_relative_gap(original_lp_, upper_bound, lower_bound); } - return status; + // Check if it is the last thread that exited the loop and no + // timeout or numerical error has happen. + if (status_ == mip_exploration_status_t::RUNNING) { + if (active_subtrees_ == 0) { + status_ = mip_exploration_status_t::COMPLETED; + } else { + local_lower_bounds_[id] = inf; + } + } } template -mip_status_t branch_and_bound_t::dive(i_t branch_var, mip_solution_t& solution) +void branch_and_bound_t::diving_thread(lp_problem_t& leaf_problem, + const csc_matrix_t& Arow) { - mip_status_t status = mip_status_t::UNSET; - logger_t log; log.log = false; - std::vector*> node_stack; - - mip_node_t root_node(root_objective_, root_vstatus_); - graphviz_node(settings_, &root_node, "lower bound", root_objective_); - - branch(&root_node, branch_var, root_relax_soln_.x[branch_var], root_vstatus_); + while (status_ == mip_exploration_status_t::RUNNING && + (active_subtrees_ > 0 || get_heap_size() > 0)) { + std::optional> start_node; - // the stack does not own the unique_ptr the tree does - node_stack.push_back(root_node.get_down_child()); - node_stack.push_back(root_node.get_up_child()); + mutex_dive_queue_.lock(); + if (dive_queue_.size() > 0) { start_node = dive_queue_.pop(); } + mutex_dive_queue_.unlock(); - // Make a copy of the original LP. We will modify its bounds at each leaf - lp_problem_t leaf_problem = original_lp_; + if (start_node.has_value()) { + if (get_upper_bound() < start_node->lower_bound) { continue; } - csc_matrix_t Arow(1, 1, 1); - original_lp_.A.transpose(Arow); + search_tree_t subtree(std::move(start_node.value())); + std::deque*> stack; + stack.push_front(&subtree.root); - f_t lower_bound = get_lower_bound(); - f_t gap = get_upper_bound() - lower_bound; - i_t nodes_explored = 0; - - while (node_stack.size() > 0) { - // Get a node off the stack - mip_node_t* node_ptr = node_stack.back(); - node_stack.pop_back(); - nodes_explored++; - - f_t upper_bound = get_upper_bound(); - lower_bound = get_lower_bound(); - gap = upper_bound - lower_bound; - f_t user_upper_bound = compute_user_objective(original_lp_, get_upper_bound()); - f_t user_lower_bound = compute_user_objective(original_lp_, lower_bound); - f_t rel_gap = relative_gap(user_upper_bound, user_lower_bound); - if (gap < settings_.absolute_mip_gap_tol && rel_gap < settings_.relative_mip_gap_tol) { - update_tree(node_ptr, node_status_t::FATHOMED); - continue; - } + while (stack.size() > 0 && status_ == mip_exploration_status_t::RUNNING) { + mip_node_t* node_ptr = stack.front(); + stack.pop_front(); + f_t upper_bound = get_upper_bound(); + f_t rel_gap = user_relative_gap(original_lp_, upper_bound, node_ptr->lower_bound); - if (toc(stats_.start_time) > settings_.time_limit) { - status = mip_status_t::TIME_LIMIT; - break; - } + if (node_ptr->lower_bound > upper_bound || rel_gap < settings_.relative_mip_gap_tol) { + continue; + } - status = solve_node_lp(node_ptr, leaf_problem, Arow, var_types_, upper_bound); - - if (status == mip_status_t::NUMERICAL) { continue; } - - if (node_ptr->status == node_status_t::HAS_CHILDREN) { - // Martin's child selection - const i_t branch_var = node_ptr->get_down_child()->branch_var; - const f_t branch_var_val = node_ptr->get_down_child()->fractional_val; - const f_t down_val = std::floor(root_relax_soln_.x[branch_var]); - const f_t up_val = std::ceil(root_relax_soln_.x[branch_var]); - const f_t down_dist = branch_var_val - down_val; - const f_t up_dist = up_val - branch_var_val; - - if (down_dist < up_dist) { - node_stack.push_back(node_ptr->get_up_child()); - node_stack.push_back(node_ptr->get_down_child()); - } else { - node_stack.push_back(node_ptr->get_down_child()); - node_stack.push_back(node_ptr->get_up_child()); + node_status_t node_status = + solve_node(subtree, node_ptr, leaf_problem, Arow, upper_bound, log, 'D'); + + if (node_status == node_status_t::TIME_LIMIT) { + break; + + } else if (node_status == node_status_t::HAS_CHILDREN) { + auto [first, second] = child_selection(node_ptr); + stack.push_front(second); + stack.push_front(first); + + // If the diving thread is consuming the nodes faster than the + // best first search, then we split the current subtree at the + // lowest possible point and move to the queue, so it can + // be picked by another thread. + if (dive_queue_.size() < min_diving_queue_size_) { + mutex_dive_queue_.lock(); + mip_node_t* new_node = stack.back(); + stack.pop_back(); + dive_queue_.push(new_node->detach_copy()); + mutex_dive_queue_.unlock(); + } + } } } } - - return status; } template mip_status_t branch_and_bound_t::solve(mip_solution_t& solution) { - mip_status_t status = mip_status_t::UNSET; + logger_t log; + log.log = false; + status_ = mip_exploration_status_t::UNSET; + stats_.nodes_unexplored = 0; + stats_.nodes_explored = 0; if (guess_.size() != 0) { std::vector crushed_guess; @@ -888,6 +1032,7 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut lp_settings.inside_mip = 1; lp_status_t root_status = solve_linear_program_advanced( original_lp_, stats_.start_time, lp_settings, root_relax_soln_, root_vstatus_, edge_norms_); + stats_.total_lp_iters = root_relax_soln_.iterations; stats_.total_lp_solve_time = toc(stats_.start_time); assert(root_vstatus_.size() == original_lp_.num_cols); if (root_status == lp_status_t::INFEASIBLE) { @@ -907,13 +1052,17 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut } return mip_status_t::UNBOUNDED; } + if (root_status == lp_status_t::TIME_LIMIT) { - settings_.log.printf("Hit time limit\n"); - return mip_status_t::TIME_LIMIT; + status_ = mip_exploration_status_t::TIME_LIMIT; + return set_final_solution(solution, -inf); } + set_uninitialized_steepest_edge_norms(edge_norms_); root_objective_ = compute_objective(original_lp_, root_relax_soln_.x); + local_lower_bounds_.assign(settings_.num_bfs_threads, root_objective_); + if (settings_.set_simplex_solution_callback != nullptr) { std::vector original_x; uncrush_primal_solution(original_problem_, original_lp_, root_relax_soln_.x, original_x); @@ -928,9 +1077,6 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut settings_.set_simplex_solution_callback( original_x, original_dual, compute_user_objective(original_lp_, root_objective_)); } - mutex_lower_.lock(); - lower_bound_ = root_objective_; - mutex_lower_.unlock(); std::vector fractional; const i_t num_fractional = @@ -944,12 +1090,13 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut // We should be done here uncrush_primal_solution(original_problem_, original_lp_, incumbent_.x, solution.x); solution.objective = incumbent_.objective; - solution.lower_bound = lower_bound_; + solution.lower_bound = root_objective_; solution.nodes_explored = 0; solution.simplex_iterations = root_relax_soln_.iterations; settings_.log.printf("Optimal solution found at root node. Objective %.16e. Time %.2f.\n", compute_user_objective(original_lp_, root_objective_), toc(stats_.start_time)); + if (settings_.solution_callback != nullptr) { settings_.solution_callback(solution.x, solution.objective); } @@ -971,81 +1118,84 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut edge_norms_, pc_); + if (toc(stats_.start_time) > settings_.time_limit) { + status_ = mip_exploration_status_t::TIME_LIMIT; + return set_final_solution(solution, root_objective_); + } + // Choose variable to branch on - logger_t log; - log.log = false; - i_t branch_var = pc_.variable_selection( - fractional, root_relax_soln_.x, original_lp_.lower, original_lp_.upper, log); + i_t branch_var = pc_.variable_selection(fractional, root_relax_soln_.x, log); - stats_.total_lp_iters = 0; - stats_.nodes_explored = 0; - stats_.nodes_unexplored = 0; - stats_.num_nodes = 1; + search_tree_t search_tree(root_objective_, root_vstatus_); + search_tree.graphviz_node(settings_.log, &search_tree.root, "lower bound", root_objective_); + search_tree.branch(&search_tree.root, + branch_var, + root_relax_soln_.x[branch_var], + root_vstatus_, + original_lp_, + log); settings_.log.printf( - "| Explored | Unexplored | Objective | Bound | Depth | Iter/Node | Gap | " - " Time \n"); + "Exploring the B&B tree using %d best-first threads and %d diving threads (%d threads)\n", + settings_.num_bfs_threads, + settings_.num_diving_threads, + settings_.num_threads); - mutex_branching_.lock(); - currently_branching_ = true; - mutex_branching_.unlock(); - - std::future diving_thread; - - if (settings_.num_threads > 0) { - diving_thread = std::async(std::launch::async, [&]() { return dive(branch_var, solution); }); - } + settings_.log.printf( + " | Explored | Unexplored | Objective | Bound | Depth | Iter/Node | Gap " + "| Time |\n"); - status = explore_tree(branch_var, solution); + csc_matrix_t Arow(1, 1, 1); + original_lp_.A.transpose(Arow); - if (settings_.num_threads > 0) { mip_status_t diving_status = diving_thread.get(); } + stats_.nodes_explored = 0; + stats_.nodes_unexplored = 2; + stats_.nodes_since_last_log = 0; + stats_.last_log = tic(); + active_subtrees_ = 0; + min_diving_queue_size_ = 4 * settings_.num_diving_threads; + status_ = mip_exploration_status_t::RUNNING; + lower_bound_ceiling_ = inf; + +#pragma omp parallel num_threads(settings_.num_threads) + { + // Make a copy of the original LP. We will modify its bounds at each leaf + lp_problem_t leaf_problem = original_lp_; + +#pragma omp master + { + auto down_child = search_tree.root.get_down_child(); + auto up_child = search_tree.root.get_up_child(); + i_t initial_size = 2 * settings_.num_threads; + +#pragma omp task + exploration_ramp_up(&search_tree, down_child, leaf_problem, Arow, initial_size); + +#pragma omp task + exploration_ramp_up(&search_tree, up_child, leaf_problem, Arow, initial_size); + } - mutex_branching_.lock(); - currently_branching_ = false; - mutex_branching_.unlock(); +#pragma omp barrier - f_t user_upper_bound = compute_user_objective(original_lp_, get_upper_bound()); - f_t user_lower_bound = compute_user_objective(original_lp_, lower_bound_); - settings_.log.printf( - "Explored %d nodes in %.2fs.\nAbsolute Gap %e Objective %.16e Lower Bound %.16e\n", - stats_.nodes_explored.load(), - toc(stats_.start_time), - gap_, - user_upper_bound, - user_lower_bound); - - if (gap_ <= settings_.absolute_mip_gap_tol || - relative_gap(user_upper_bound, user_lower_bound) <= settings_.relative_mip_gap_tol) { - status = mip_status_t::OPTIMAL; - if (gap_ > 0 && gap_ <= settings_.absolute_mip_gap_tol) { - settings_.log.printf("Optimal solution found within absolute MIP gap tolerance (%.1e)\n", - settings_.absolute_mip_gap_tol); - } else if (gap_ > 0 && - relative_gap(user_upper_bound, user_lower_bound) <= settings_.relative_mip_gap_tol) { - settings_.log.printf("Optimal solution found within relative MIP gap tolerance (%.1e)\n", - settings_.relative_mip_gap_tol); - } else { - settings_.log.printf("Optimal solution found.\n"); - } - if (settings_.heuristic_preemption_callback != nullptr) { - settings_.heuristic_preemption_callback(); - } - } +#pragma omp master + { + if (status_ == mip_exploration_status_t::RUNNING && + (active_subtrees_ > 0 || get_heap_size() > 0)) { + for (i_t i = 0; i < settings_.num_bfs_threads; i++) { +#pragma omp task + best_first_thread(i, search_tree, leaf_problem, Arow); + } - if (stats_.nodes_unexplored == 0 && get_upper_bound() == inf) { - settings_.log.printf("Integer infeasible.\n"); - status = mip_status_t::INFEASIBLE; - if (settings_.heuristic_preemption_callback != nullptr) { - settings_.heuristic_preemption_callback(); + for (i_t i = 0; i < settings_.num_diving_threads; i++) { +#pragma omp task + diving_thread(leaf_problem, Arow); + } + } } } - uncrush_primal_solution(original_problem_, original_lp_, incumbent_.x, solution.x); - solution.objective = incumbent_.objective; - solution.lower_bound = get_lower_bound(); - solution.nodes_explored = stats_.nodes_explored; - solution.simplex_iterations = stats_.total_lp_iters; - return status; + f_t lower_bound = heap_.size() > 0 ? heap_.top()->lower_bound : search_tree.root.lower_bound; + return set_final_solution(solution, lower_bound); } #ifdef DUAL_SIMPLEX_INSTANTIATE_DOUBLE diff --git a/cpp/src/dual_simplex/branch_and_bound.hpp b/cpp/src/dual_simplex/branch_and_bound.hpp index efe28000bd..7b80f88fa9 100644 --- a/cpp/src/dual_simplex/branch_and_bound.hpp +++ b/cpp/src/dual_simplex/branch_and_bound.hpp @@ -18,40 +18,82 @@ #pragma once #include +#include #include #include #include #include #include #include +#include -#include -#include -#include +#include #include -#include #include -#include "cuopt/linear_programming/mip/solver_settings.hpp" -#include "dual_simplex/mip_node.hpp" namespace cuopt::linear_programming::dual_simplex { enum class mip_status_t { - OPTIMAL = 0, - UNBOUNDED = 1, - INFEASIBLE = 2, - TIME_LIMIT = 3, - NODE_LIMIT = 4, - NUMERICAL = 5, - UNSET = 6 + OPTIMAL = 0, // The optimal integer solution was found + UNBOUNDED = 1, // The problem is unbounded + INFEASIBLE = 2, // The problem is infeasible + TIME_LIMIT = 3, // The solver reached a time limit + NODE_LIMIT = 4, // The maximum number of nodes was reached (not implemented) + NUMERICAL = 5, // The solver encountered a numerical error + UNSET = 6, // The status is not set +}; + +enum class mip_exploration_status_t { + UNSET = 0, // The status is not set + TIME_LIMIT = 1, // The solver reached a time limit + NODE_LIMIT = 2, // The maximum number of nodes was reached (not implemented) + NUMERICAL = 3, // The solver encountered a numerical error + RUNNING = 4, // The solver is currently exploring the tree + COMPLETED = 5, // The solver finished exploring the tree }; template void upper_bound_callback(f_t upper_bound); +// A min-heap for storing the starting nodes for the dives. +// This has a maximum size of 8192, such that the container +// will discard the least promising node if the queue is full. +template +class dive_queue_t { + private: + std::vector> buffer; + static constexpr i_t max_size_ = 2048; + + public: + dive_queue_t() { buffer.reserve(max_size_); } + + void push(mip_node_t&& node) + { + buffer.push_back(std::move(node)); + std::push_heap(buffer.begin(), buffer.end(), node_compare_t()); + if (buffer.size() > max_size()) { buffer.pop_back(); } + } + + mip_node_t pop() + { + std::pop_heap(buffer.begin(), buffer.end(), node_compare_t()); + mip_node_t node = std::move(buffer.back()); + buffer.pop_back(); + return node; + } + + i_t size() const { return buffer.size(); } + constexpr i_t max_size() const { return max_size_; } + const mip_node_t& top() const { return buffer.front(); } + void clear() { buffer.clear(); } +}; + template class branch_and_bound_t { public: + template + using mip_node_heap_t = std::priority_queue, node_compare_t>; + branch_and_bound_t(const user_problem_t& user_problem, const simplex_solver_settings_t& solver_settings); @@ -68,8 +110,8 @@ class branch_and_bound_t { std::vector& repaired_solution) const; f_t get_upper_bound(); - f_t get_lower_bound(); + i_t get_heap_size(); // The main entry routine. Returns the solver status and populates solution with the incumbent. mip_status_t solve(mip_solution_t& solution); @@ -81,18 +123,16 @@ class branch_and_bound_t { // Initial guess. std::vector guess_; + // LP relaxation lp_problem_t original_lp_; std::vector new_slacks_; std::vector var_types_; - // Mutex for lower bound - std::mutex mutex_lower_; - - // Global variable for lower bound - f_t lower_bound_; + // Local lower bounds for each thread + std::vector> local_lower_bounds_; // Mutex for upper bound - std::mutex mutex_upper_; + omp_mutex_t mutex_upper_; // Global variable for upper bound f_t upper_bound_; @@ -100,31 +140,21 @@ class branch_and_bound_t { // Global variable for incumbent. The incumbent should be updated with the upper bound mip_solution_t incumbent_; - // Mutex for gap - std::mutex mutex_gap_; - - // Global variable for gap - f_t gap_; - - // Mutex for branching - std::mutex mutex_branching_; - bool currently_branching_; - - // Global variable for stats - std::mutex mutex_stats_; - - // Note that floating point atomics are only supported in C++20. + // Structure with the general info of the solver. struct stats_t { - f_t start_time = 0.0; - f_t total_lp_solve_time = 0.0; - std::atomic nodes_explored = 0; - std::atomic nodes_unexplored = 0; - f_t total_lp_iters = 0; - std::atomic num_nodes = 0; + f_t start_time = 0.0; + omp_atomic_t total_lp_solve_time = 0.0; + omp_atomic_t nodes_explored = 0; + omp_atomic_t nodes_unexplored = 0; + omp_atomic_t total_lp_iters = 0; + + // This should only be used by the main thread + f_t last_log = 0.0; + omp_atomic_t nodes_since_last_log = 0; } stats_; // Mutex for repair - std::mutex mutex_repair_; + omp_mutex_t mutex_repair_; std::vector> repair_queue_; // Variables for the root node in the search tree. @@ -135,49 +165,77 @@ class branch_and_bound_t { // Pseudocosts pseudo_costs_t pc_; - std::mutex mutex_pc_; - // Update the status of the nodes in the search tree. - void update_tree(mip_node_t* node_ptr, node_status_t status); + // Heap storing the nodes to be explored. + omp_mutex_t mutex_heap_; + mip_node_heap_t*> heap_; + + // Count the number of subtrees that are currently being explored. + omp_atomic_t active_subtrees_; + + // Queue for storing the promising node for performing dives. + omp_mutex_t mutex_dive_queue_; + dive_queue_t dive_queue_; + i_t min_diving_queue_size_; + + // Global status of the solver. + omp_atomic_t status_; + + // In case, a best-first thread encounters a numerical issue when solving a node, + // its blocks the progression of the lower bound. + omp_atomic_t lower_bound_ceiling_; + + // Set the final solution. + mip_status_t set_final_solution(mip_solution_t& solution, f_t lower_bound); // Update the incumbent solution with the new feasible solution. // found during branch and bound. void add_feasible_solution(f_t leaf_objective, const std::vector& leaf_solution, i_t leaf_depth, - char symbol); + char thread_type); // Repairs low-quality solutions from the heuristics, if it is applicable. void repair_heuristic_solutions(); - // Explore the search tree using the best-first search strategy. - mip_status_t explore_tree(i_t branch_var, mip_solution_t& solution); - - // Explore the search tree using the depth-first search strategy. - mip_status_t dive(i_t branch_var, mip_solution_t& solution); - - // Branch the current node, creating two children. - void branch(mip_node_t* parent_node, - i_t branch_var, - f_t branch_var_val, - const std::vector& parent_vstatus); - - // Solve the LP relaxation of a leaf node. - mip_status_t solve_node_lp(mip_node_t* node_ptr, - lp_problem_t& leaf_problem, - csc_matrix_t& Arow, - const std::vector& var_types, - f_t upper_bound); - - // Solve the LP relaxation of a leaf node using the dual simplex method. - dual::status_t node_dual_simplex(i_t leaf_id, - lp_problem_t& leaf_problem, - std::vector& leaf_vstatus, - lp_solution_t& leaf_solution, - std::vector& bounds_changed, - csc_matrix_t& Arow, - f_t upper_bound, - logger_t& log); + // Ramp-up phase of the solver, where we greedily expand the tree until + // there is enough unexplored nodes. This is done recursively using OpenMP tasks. + void exploration_ramp_up(search_tree_t* search_tree, + mip_node_t* node, + lp_problem_t& leaf_problem, + const csc_matrix_t& Arow, + i_t initial_heap_size); + + // Explore the search tree using the best-first search with plunging strategy. + void explore_subtree(i_t id, + search_tree_t& search_tree, + mip_node_t* start_node, + lp_problem_t& leaf_problem, + const csc_matrix_t& Arow); + + // Each "main" thread pops a node from the global heap and then performs a plunge + // (i.e., a shallow dive) into the subtree determined by the node. + void best_first_thread(i_t id, + search_tree_t& search_tree, + lp_problem_t& leaf_problem, + const csc_matrix_t& Arow); + + // Each diving thread pops the first node from the dive queue and then performs + // a deep dive into the subtree determined by the node. + void diving_thread(lp_problem_t& leaf_problem, const csc_matrix_t& Arow); + + // Solve the LP relaxation of a leaf node and update the tree. + node_status_t solve_node(search_tree_t& search_tree, + mip_node_t* node_ptr, + lp_problem_t& leaf_problem, + const csc_matrix_t& Arow, + f_t upper_bound, + logger_t& log, + char thread_type); + + // Sort the children based on the Martin's criteria. + std::pair*, mip_node_t*> child_selection( + mip_node_t* node_ptr); }; } // namespace cuopt::linear_programming::dual_simplex diff --git a/cpp/src/dual_simplex/mip_node.cpp b/cpp/src/dual_simplex/mip_node.cpp index f0b275d0a4..2a907c4022 100644 --- a/cpp/src/dual_simplex/mip_node.cpp +++ b/cpp/src/dual_simplex/mip_node.cpp @@ -22,7 +22,7 @@ namespace cuopt::linear_programming::dual_simplex { bool inactive_status(node_status_t status) { return (status == node_status_t::FATHOMED || status == node_status_t::INTEGER_FEASIBLE || - status == node_status_t::INFEASIBLE); + status == node_status_t::INFEASIBLE || status == node_status_t::NUMERICAL); } } // namespace cuopt::linear_programming::dual_simplex diff --git a/cpp/src/dual_simplex/mip_node.hpp b/cpp/src/dual_simplex/mip_node.hpp index 2d315fe0e3..a3a34eb81d 100644 --- a/cpp/src/dual_simplex/mip_node.hpp +++ b/cpp/src/dual_simplex/mip_node.hpp @@ -19,8 +19,8 @@ #include #include +#include -#include #include #include #include @@ -30,11 +30,12 @@ namespace cuopt::linear_programming::dual_simplex { enum class node_status_t : int { ACTIVE = 0, // Node still in the tree - IN_PROGRESS = 1, // Node is currently being solved - INTEGER_FEASIBLE = 2, // Node has an integer feasible solution - INFEASIBLE = 3, // Node is infeasible - FATHOMED = 4, // Node objective is greater than the upper bound - HAS_CHILDREN = 5, // Node has children to explore + INTEGER_FEASIBLE = 1, // Node has an integer feasible solution + INFEASIBLE = 2, // Node is infeasible + FATHOMED = 3, // Node objective is greater than the upper bound + HAS_CHILDREN = 4, // Node has children to explore + NUMERICAL = 5, // Encountered numerical issue when solving the LP relaxation + TIME_LIMIT = 6 // Time out during the LP relaxation }; bool inactive_status(node_status_t status); @@ -201,6 +202,22 @@ class mip_node_t { } } + // This method creates a copy of the current node + // with its parent set to `nullptr`, `node_id = 0` + // and `depth = 0` such that it is the root + // of a separated tree. + mip_node_t detach_copy() const + { + mip_node_t copy(lower_bound, vstatus); + copy.branch_var = branch_var; + copy.branch_dir = branch_dir; + copy.branch_var_lower = branch_var_lower; + copy.branch_var_upper = branch_var_upper; + copy.fractional_val = fractional_val; + copy.node_id = node_id; + return copy; + } + node_status_t status; f_t lower_bound; i_t depth; @@ -230,11 +247,96 @@ void remove_fathomed_nodes(std::vector*>& stack) template class node_compare_t { public: - bool operator()(mip_node_t& a, mip_node_t& b) + bool operator()(const mip_node_t& a, const mip_node_t& b) const { return a.lower_bound > b.lower_bound; // True if a comes before b, elements that come before are output last } + + bool operator()(const mip_node_t* a, const mip_node_t* b) const + { + return a->lower_bound > + b->lower_bound; // True if a comes before b, elements that come before are output last + } +}; + +template +class search_tree_t { + public: + search_tree_t(f_t root_lower_bound, const std::vector& basis) + : root(root_lower_bound, basis), num_nodes(0) + { + } + + search_tree_t(mip_node_t&& node) : root(std::move(node)), num_nodes(0) {} + + void update_tree(mip_node_t* node_ptr, node_status_t status) + { + mutex.lock(); + std::vector*> stack; + node_ptr->set_status(status, stack); + remove_fathomed_nodes(stack); + mutex.unlock(); + } + + void branch(mip_node_t* parent_node, + const i_t branch_var, + const f_t fractional_val, + const std::vector& parent_vstatus, + const lp_problem_t& original_lp, + logger_t& log) + { + i_t id = num_nodes.fetch_add(2); + + // down child + auto down_child = std::make_unique>( + original_lp, parent_node, ++id, branch_var, 0, fractional_val, parent_vstatus); + + graphviz_edge(log, parent_node, down_child.get(), branch_var, 0, std::floor(fractional_val)); + + // up child + auto up_child = std::make_unique>( + original_lp, parent_node, ++id, branch_var, 1, fractional_val, parent_vstatus); + + graphviz_edge(log, parent_node, up_child.get(), branch_var, 1, std::ceil(fractional_val)); + + assert(parent_vstatus.size() == original_lp.num_cols); + parent_node->add_children(std::move(down_child), + std::move(up_child)); // child pointers moved into the tree + } + + void graphviz_node(logger_t& log, + const mip_node_t* node_ptr, + const std::string label, + const f_t val) + { + if (write_graphviz) { + log.printf("Node%d [label=\"%s %.16e\"]\n", node_ptr->node_id, label.c_str(), val); + } + } + + void graphviz_edge(logger_t& log, + const mip_node_t* origin_ptr, + const mip_node_t* dest_ptr, + const i_t branch_var, + const i_t branch_dir, + const f_t bound) + { + if (write_graphviz) { + log.printf("Node%d -> Node%d [label=\"x%d %s %e\"]\n", + origin_ptr->node_id, + dest_ptr->node_id, + branch_var, + branch_dir == 0 ? "<=" : ">=", + bound); + } + } + + mip_node_t root; + omp_mutex_t mutex; + omp_atomic_t num_nodes; + + static constexpr bool write_graphviz = false; }; } // namespace cuopt::linear_programming::dual_simplex diff --git a/cpp/src/dual_simplex/phase2.cpp b/cpp/src/dual_simplex/phase2.cpp index 370badf332..01d995ba6b 100644 --- a/cpp/src/dual_simplex/phase2.cpp +++ b/cpp/src/dual_simplex/phase2.cpp @@ -29,9 +29,7 @@ #include #include #include -#include -#include -#include +#include namespace cuopt::linear_programming::dual_simplex { diff --git a/cpp/src/dual_simplex/pseudo_costs.cpp b/cpp/src/dual_simplex/pseudo_costs.cpp index 95c96f859f..4bd9590e16 100644 --- a/cpp/src/dual_simplex/pseudo_costs.cpp +++ b/cpp/src/dual_simplex/pseudo_costs.cpp @@ -21,15 +21,14 @@ #include #include -#include +#include namespace cuopt::linear_programming::dual_simplex { namespace { template -void strong_branch_helper(i_t thread_id, - i_t start, +void strong_branch_helper(i_t start, i_t end, f_t start_time, const lp_problem_t& original_lp, @@ -46,6 +45,7 @@ void strong_branch_helper(i_t thread_id, constexpr bool verbose = false; f_t last_log = tic(); + i_t thread_id = omp_get_thread_num(); for (i_t k = start; k < end; ++k) { const i_t j = fractional[k]; @@ -79,7 +79,6 @@ void strong_branch_helper(i_t thread_id, solution, iter, child_edge_norms); - const f_t lp_solve_time = toc(lp_start_time); f_t obj = std::numeric_limits::infinity(); if (status == dual::status_t::DUAL_UNBOUNDED) { @@ -126,15 +125,10 @@ void strong_branch_helper(i_t thread_id, } if (toc(start_time) > settings.time_limit) { break; } - pc.strong_branches_completed.lock(); - pc.num_strong_branches_completed++; - pc.strong_branches_completed.unlock(); + const i_t completed = pc.num_strong_branches_completed++; if (thread_id == 0 && toc(last_log) > 10) { last_log = tic(); - pc.strong_branches_completed.lock(); - const i_t completed = pc.num_strong_branches_completed; - pc.strong_branches_completed.unlock(); settings.log.printf("%d of %ld strong branches completed in %.1fs\n", completed, fractional.size(), @@ -148,58 +142,6 @@ void strong_branch_helper(i_t thread_id, } } -template -void get_partitioning(i_t N, i_t num_threads, i_t tid, i_t& begin, i_t& end) -{ - int base_tasks = N / num_threads; - int remaining_tasks = N % num_threads; - - if (tid < remaining_tasks) { - begin = tid * (base_tasks + 1); - end = begin + base_tasks + 1; - } else { - begin = tid * base_tasks + remaining_tasks; - end = begin + base_tasks; - } -} - -template -void thread_strong_branch(i_t thread_id, - i_t num_threads, - f_t start_time, - const lp_problem_t& original_lp, - const simplex_solver_settings_t& settings, - const std::vector& var_types, - const std::vector& fractional, - f_t root_obj, - const std::vector& root_soln, - const std::vector& root_vstatus, - const std::vector& edge_norms, - pseudo_costs_t& pc) -{ - i_t start, end; - get_partitioning(static_cast(fractional.size()), num_threads, thread_id, start, end); - constexpr bool verbose = false; - if (verbose) { - settings.log.printf( - "Thread id %d start %d end %d. size %d\n", thread_id, start, end, end - start); - } - - strong_branch_helper(thread_id, - start, - end, - start_time, - original_lp, - settings, - var_types, - fractional, - root_obj, - root_soln, - root_vstatus, - edge_norms, - pc); -} - } // namespace template @@ -214,51 +156,49 @@ void strong_branching(const lp_problem_t& original_lp, const std::vector& edge_norms, pseudo_costs_t& pc) { + pc.resize(original_lp.num_cols); pc.strong_branch_down.resize(fractional.size()); pc.strong_branch_up.resize(fractional.size()); - pc.num_strong_branches_completed = 0; - if (fractional.size() > settings.num_threads) { - int num_threads = settings.num_threads; - std::thread threads[num_threads]; - settings.log.printf("Strong branching using %d threads and %ld fractional variables\n", - num_threads, - fractional.size()); - for (int thread_id = 0; thread_id < num_threads; thread_id++) { - threads[thread_id] = std::thread(thread_strong_branch, - thread_id, - num_threads, - start_time, - original_lp, - settings, - var_types, - fractional, - root_obj, - root_soln, - root_vstatus, - edge_norms, - std::ref(pc)); - } + settings.log.printf("Strong branching using %d threads and %ld fractional variables\n", + settings.num_threads, + fractional.size()); + +#pragma omp parallel num_threads(settings.num_threads) + { + i_t n = std::min(4 * settings.num_threads, fractional.size()); + + // Here we are creating more tasks than the number of threads + // such that they can be scheduled dynamically to the threads. +#pragma omp for schedule(dynamic, 1) + for (i_t k = 0; k < n; k++) { + i_t start = std::floor(k * fractional.size() / n); + i_t end = std::floor((k + 1) * fractional.size() / n); + + constexpr bool verbose = false; + if (verbose) { + settings.log.printf("Thread id %d task id %d start %d end %d. size %d\n", + omp_get_thread_num(), + k, + start, + end, + end - start); + } - for (int thread_id = 0; thread_id < num_threads; thread_id++) { - threads[thread_id].join(); + strong_branch_helper(start, + end, + start_time, + original_lp, + settings, + var_types, + fractional, + root_obj, + root_soln, + root_vstatus, + edge_norms, + pc); } - } else { - settings.log.printf("Strong branching on %ld fractional variables\n", fractional.size()); - strong_branch_helper(0, - 0, - static_cast(fractional.size()), - start_time, - original_lp, - settings, - var_types, - fractional, - root_obj, - root_soln, - root_vstatus, - edge_norms, - pc); } pc.update_pseudo_costs_from_strong_branching(fractional, root_soln); @@ -268,6 +208,7 @@ template void pseudo_costs_t::update_pseudo_costs(mip_node_t* node_ptr, f_t leaf_objective) { + mutex.lock(); const f_t change_in_obj = leaf_objective - node_ptr->lower_bound; const f_t frac = node_ptr->branch_dir == 0 ? node_ptr->fractional_val - std::floor(node_ptr->fractional_val) @@ -279,6 +220,7 @@ void pseudo_costs_t::update_pseudo_costs(mip_node_t* node_pt pseudo_cost_sum_up[node_ptr->branch_var] += change_in_obj / frac; pseudo_cost_num_up[node_ptr->branch_var]++; } + mutex.unlock(); } template @@ -317,10 +259,10 @@ void pseudo_costs_t::initialized(i_t& num_initialized_down, template i_t pseudo_costs_t::variable_selection(const std::vector& fractional, const std::vector& solution, - const std::vector& lower, - const std::vector& upper, - logger_t& log) const + logger_t& log) { + mutex.lock(); + const i_t num_fractional = fractional.size(); std::vector pseudo_cost_up(num_fractional); std::vector pseudo_cost_down(num_fractional); @@ -373,6 +315,8 @@ i_t pseudo_costs_t::variable_selection(const std::vector& fractio log.printf( "pc branching on %d. Value %e. Score %e\n", branch_var, solution[branch_var], score[select]); + mutex.unlock(); + return branch_var; } diff --git a/cpp/src/dual_simplex/pseudo_costs.hpp b/cpp/src/dual_simplex/pseudo_costs.hpp index d26fe8d489..5bd03e3fcd 100644 --- a/cpp/src/dual_simplex/pseudo_costs.hpp +++ b/cpp/src/dual_simplex/pseudo_costs.hpp @@ -21,8 +21,9 @@ #include #include #include +#include -#include +#include namespace cuopt::linear_programming::dual_simplex { @@ -54,9 +55,7 @@ class pseudo_costs_t { i_t variable_selection(const std::vector& fractional, const std::vector& solution, - const std::vector& lower, - const std::vector& upper, - logger_t& log) const; + logger_t& log); void update_pseudo_costs_from_strong_branching(const std::vector& fractional, const std::vector& root_soln); @@ -67,8 +66,8 @@ class pseudo_costs_t { std::vector strong_branch_down; std::vector strong_branch_up; - std::mutex strong_branches_completed; - i_t num_strong_branches_completed = 0; + omp_mutex_t mutex; + omp_atomic_t num_strong_branches_completed = 0; }; template diff --git a/cpp/src/dual_simplex/simplex_solver_settings.hpp b/cpp/src/dual_simplex/simplex_solver_settings.hpp index 5b7e8bf0fa..9ff96ebbd0 100644 --- a/cpp/src/dual_simplex/simplex_solver_settings.hpp +++ b/cpp/src/dual_simplex/simplex_solver_settings.hpp @@ -20,11 +20,11 @@ #include #include +#include #include #include #include #include -#include namespace cuopt::linear_programming::dual_simplex { @@ -59,9 +59,9 @@ struct simplex_solver_settings_t { refactor_frequency(100), iteration_log_frequency(1000), first_iteration_log(2), - num_threads(std::thread::hardware_concurrency() > 8 - ? (std::thread::hardware_concurrency() / 8) - : std::thread::hardware_concurrency()), + num_threads(omp_get_max_threads() - 1), + num_bfs_threads(std::min(num_threads / 4, 1)), + num_diving_threads(std::min(num_threads - num_bfs_threads, 1)), random_seed(0), inside_mip(0), solution_callback(nullptr), @@ -97,15 +97,17 @@ struct simplex_solver_settings_t { bool use_bound_flip_ratio; // true if using the bound flip ratio test bool scale_columns; // true to scale the columns of A bool relaxation; // true to only solve the LP relaxation of a MIP - bool - use_left_looking_lu; // true to use left looking LU factorization, false to use right looking - bool eliminate_singletons; // true to eliminate singletons from the basis - bool print_presolve_stats; // true to print presolve stats - i_t refactor_frequency; // number of basis updates before refactorization - i_t iteration_log_frequency; // number of iterations between log updates - i_t first_iteration_log; // number of iterations to log at beginning of solve - i_t num_threads; // number of threads to use - i_t random_seed; // random seed + bool use_left_looking_lu; // true to use left looking LU factorization, + // false to use right looking + bool eliminate_singletons; // true to eliminate singletons from the basis + bool print_presolve_stats; // true to print presolve stats + i_t refactor_frequency; // number of basis updates before refactorization + i_t iteration_log_frequency; // number of iterations between log updates + i_t first_iteration_log; // number of iterations to log at beginning of solve + i_t num_threads; // number of threads to use + i_t num_bfs_threads; // number of threads dedicated to the best-first search + i_t num_diving_threads; // number of threads dedicated to diving + i_t random_seed; // random seed i_t inside_mip; // 0 if outside MIP, 1 if inside MIP at root node, 2 if inside MIP at leaf node std::function&, f_t)> solution_callback; std::function heuristic_preemption_callback; diff --git a/cpp/src/math_optimization/solver_settings.cu b/cpp/src/math_optimization/solver_settings.cu index 14986df664..a8da9c3255 100644 --- a/cpp/src/math_optimization/solver_settings.cu +++ b/cpp/src/math_optimization/solver_settings.cu @@ -91,7 +91,7 @@ solver_settings_t::solver_settings_t() : pdlp_settings(), mip_settings {CUOPT_ITERATION_LIMIT, &pdlp_settings.iteration_limit, 0, std::numeric_limits::max(), std::numeric_limits::max()}, {CUOPT_PDLP_SOLVER_MODE, reinterpret_cast(&pdlp_settings.pdlp_solver_mode), CUOPT_PDLP_SOLVER_MODE_STABLE1, CUOPT_PDLP_SOLVER_MODE_STABLE3, CUOPT_PDLP_SOLVER_MODE_STABLE3}, {CUOPT_METHOD, reinterpret_cast(&pdlp_settings.method), CUOPT_METHOD_CONCURRENT, CUOPT_METHOD_DUAL_SIMPLEX, CUOPT_METHOD_CONCURRENT}, - {CUOPT_NUM_CPU_THREADS, &mip_settings.num_cpu_threads, -1, std::numeric_limits::max(), -1} + {CUOPT_NUM_CPU_THREADS, &mip_settings.num_cpu_threads, -1, std::numeric_limits::max(), -1}, }; // Bool parameters diff --git a/cpp/src/mip/diversity/diversity_manager.cu b/cpp/src/mip/diversity/diversity_manager.cu index 9df0966554..a9cd335a90 100644 --- a/cpp/src/mip/diversity/diversity_manager.cu +++ b/cpp/src/mip/diversity/diversity_manager.cu @@ -312,9 +312,9 @@ void diversity_manager_t::run_fj_alone(solution_t& solution) template void diversity_manager_t::run_fp_alone(solution_t& solution) { - CUOPT_LOG_INFO("Running FP alone!"); + CUOPT_LOG_DEBUG("Running FP alone!"); ls.run_fp(solution, timer, &population); - CUOPT_LOG_INFO("FP alone finished!"); + CUOPT_LOG_DEBUG("FP alone finished!"); } template @@ -543,7 +543,7 @@ void diversity_manager_t::recombine_and_ls_with_all( { raft::common::nvtx::range fun_scope("recombine_and_ls_with_all"); if (solutions.size() > 0) { - CUOPT_LOG_INFO("Running recombiners on B&B solutions with size %lu", solutions.size()); + CUOPT_LOG_DEBUG("Running recombiners on B&B solutions with size %lu", solutions.size()); // add all solutions because time limit might have been consumed and we might have exited before for (auto& sol : solutions) { cuopt_func_call(sol.test_feasibility(true)); diff --git a/cpp/src/mip/diversity/recombiners/sub_mip.cuh b/cpp/src/mip/diversity/recombiners/sub_mip.cuh index 1818092fc9..fa5968c6f5 100644 --- a/cpp/src/mip/diversity/recombiners/sub_mip.cuh +++ b/cpp/src/mip/diversity/recombiners/sub_mip.cuh @@ -111,8 +111,11 @@ class sub_mip_recombiner_t : public recombiner_t { branch_and_bound_settings.print_presolve_stats = false; branch_and_bound_settings.absolute_mip_gap_tol = context.settings.tolerances.absolute_mip_gap; branch_and_bound_settings.relative_mip_gap_tol = context.settings.tolerances.relative_mip_gap; - branch_and_bound_settings.integer_tol = context.settings.tolerances.integrality_tolerance; - branch_and_bound_settings.solution_callback = [this](std::vector& solution, + branch_and_bound_settings.integer_tol = context.settings.tolerances.integrality_tolerance; + branch_and_bound_settings.num_threads = 2; + branch_and_bound_settings.num_bfs_threads = 1; + branch_and_bound_settings.num_diving_threads = 1; + branch_and_bound_settings.solution_callback = [this](std::vector& solution, f_t objective) { this->solution_callback(solution, objective); }; diff --git a/cpp/src/mip/local_search/rounding/lb_constraint_prop.cu b/cpp/src/mip/local_search/rounding/lb_constraint_prop.cu index b9408755c8..be74611063 100644 --- a/cpp/src/mip/local_search/rounding/lb_constraint_prop.cu +++ b/cpp/src/mip/local_search/rounding/lb_constraint_prop.cu @@ -770,7 +770,6 @@ bool lb_constraint_prop_t::find_integer( { RAFT_CHECK_CUDA(problem.handle_ptr->get_stream()); if (orig_sol.problem_ptr->n_integer_vars == 0) { - CUOPT_LOG_ERROR("No integer variables provided in the bounds prop rounding"); cuopt_func_call(orig_sol.test_variable_bounds()); return orig_sol.compute_feasibility(); } diff --git a/cpp/src/mip/solver.cu b/cpp/src/mip/solver.cu index a9b7615ed6..7c89e849d8 100644 --- a/cpp/src/mip/solver.cu +++ b/cpp/src/mip/solver.cu @@ -172,11 +172,19 @@ solution_t mip_solver_t::run_solver() branch_and_bound_settings.relative_mip_gap_tol = context.settings.tolerances.relative_mip_gap; branch_and_bound_settings.integer_tol = context.settings.tolerances.integrality_tolerance; - if (context.settings.num_cpu_threads != -1) { + if (context.settings.num_cpu_threads < 0) { + branch_and_bound_settings.num_threads = omp_get_max_threads() - 1; + } else { branch_and_bound_settings.num_threads = std::max(1, context.settings.num_cpu_threads); } CUOPT_LOG_INFO("Using %d CPU threads for B&B", branch_and_bound_settings.num_threads); + i_t num_threads = branch_and_bound_settings.num_threads; + i_t num_bfs_threads = std::max(1, num_threads / 4); + i_t num_diving_threads = std::max(1, num_threads - num_bfs_threads); + branch_and_bound_settings.num_bfs_threads = num_bfs_threads; + branch_and_bound_settings.num_diving_threads = num_diving_threads; + // Set the branch and bound -> primal heuristics callback branch_and_bound_settings.solution_callback = std::bind(&branch_and_bound_solution_helper_t::solution_callback, diff --git a/cpp/src/utilities/omp_helpers.hpp b/cpp/src/utilities/omp_helpers.hpp new file mode 100644 index 0000000000..d5b91c6f91 --- /dev/null +++ b/cpp/src/utilities/omp_helpers.hpp @@ -0,0 +1,138 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights + * reserved. SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#ifdef _OPENMP + +#include +#include + +namespace cuopt { + +// Wrapper of omp_lock_t. Optionally, you can provide a hint as defined in +// https://www.openmp.org/spec-html/5.1/openmpse39.html#x224-2570003.9 +class omp_mutex_t { + public: + omp_mutex_t() { omp_init_lock(&mutex); } + virtual ~omp_mutex_t() { omp_destroy_lock(&mutex); } + void lock() { omp_set_lock(&mutex); } + void unlock() { omp_unset_lock(&mutex); } + bool try_lock() { return omp_test_lock(&mutex); } + + private: + omp_lock_t mutex; +}; + +// Wrapper for omp atomic operations. See +// https://www.openmp.org/spec-html/5.1/openmpsu105.html. +template +class omp_atomic_t { + public: + omp_atomic_t() = default; + omp_atomic_t(T val) : val(val) {} + + T operator=(T new_val) + { + store(new_val); + return new_val; + } + + operator T() { return load(); } + T operator+=(T inc) { return fetch_add(inc) + inc; } + T operator-=(T inc) { return fetch_sub(inc) - inc; } + + // In theory, this should be enabled only for integers, + // but it works for any numerical types. + T operator++() { return fetch_add(T(1)) + 1; } + T operator++(int) { return fetch_add(T(1)); } + T operator--() { return fetch_sub(T(1)) - 1; } + T operator--(int) { return fetch_sub(T(1)); } + + T load() + { + T res; +#pragma omp atomic read + res = val; + return res; + } + + void store(T new_val) + { +#pragma omp atomic write + val = new_val; + } + + T exchange(T other) + { + T old; +#pragma omp atomic capture + { + old = val; + val = other; + } + return old; + } + + T fetch_add(T inc) + { + T old; +#pragma omp atomic capture + { + old = val; + val += inc; + } + return old; + } + + T fetch_sub(T inc) { return fetch_add(-inc); } + +// Atomic CAS are only supported in OpenMP v5.1 +// (gcc 12+ or clang 14+), however, nvcc (or the host compiler) cannot +// parse it correctly yet +#ifndef __NVCC__ + + T fetch_min(T other) + { + T old; +#pragma omp atomic compare capture + { + old = val; + val = other < val ? other : val; + } + return old; + } + + T fetch_max(T other) + { + T old; +#pragma omp atomic compare capture + { + old = val; + val = other > val ? other : val; + } + return old; + } +#endif + + private: + T val; +}; + +#endif + +} // namespace cuopt