diff --git a/cpp/src/dual_simplex/branch_and_bound.cpp b/cpp/src/dual_simplex/branch_and_bound.cpp index 1b8a307d22..96f0e2ab22 100644 --- a/cpp/src/dual_simplex/branch_and_bound.cpp +++ b/cpp/src/dual_simplex/branch_and_bound.cpp @@ -321,6 +321,7 @@ bool branch_and_bound_t::repair_solution( f_t& repaired_obj, std::vector& repaired_solution) const { + raft::common::nvtx::range scope("repair_solution"); bool feasible = false; repaired_obj = std::numeric_limits::quiet_NaN(); i_t n = original_lp.num_cols; @@ -399,6 +400,7 @@ branch_and_bound_t::branch_and_bound_t( template mip_status_t branch_and_bound_t::solve(mip_solution_t& solution) { + raft::common::nvtx::range scope("solve_branch_and_bound"); mip_status_t status = mip_status_t::UNSET; mip_solution_t incumbent(original_lp.num_cols); if (guess.size() != 0) { diff --git a/cpp/src/dual_simplex/crossover.cpp b/cpp/src/dual_simplex/crossover.cpp index 957b8d2ca5..13f0cc0306 100644 --- a/cpp/src/dual_simplex/crossover.cpp +++ b/cpp/src/dual_simplex/crossover.cpp @@ -1032,6 +1032,7 @@ crossover_status_t crossover(const lp_problem_t& lp, lp_solution_t& solution, std::vector& vstatus) { + raft::common::nvtx::range scope("crossover"); const i_t m = lp.num_rows; const i_t n = lp.num_cols; f_t crossover_start = tic(); diff --git a/cpp/src/dual_simplex/solve.cpp b/cpp/src/dual_simplex/solve.cpp index 29aecb21f5..7f423915c3 100644 --- a/cpp/src/dual_simplex/solve.cpp +++ b/cpp/src/dual_simplex/solve.cpp @@ -115,6 +115,7 @@ lp_status_t solve_linear_program_advanced(const lp_problem_t& original std::vector& vstatus, std::vector& edge_norms) { + raft::common::nvtx::range scope("solve_linear_program_advanced"); lp_status_t lp_status = lp_status_t::UNSET; lp_problem_t presolved_lp(1, 1, 1); presolve_info_t presolve_info; @@ -241,6 +242,7 @@ lp_status_t solve_linear_program(const user_problem_t& user_problem, const simplex_solver_settings_t& settings, lp_solution_t& solution) { + raft::common::nvtx::range scope("solve_linear_program"); f_t start_time = tic(); lp_problem_t original_lp(1, 1, 1); std::vector new_slacks; @@ -267,6 +269,7 @@ i_t solve(const user_problem_t& problem, const simplex_solver_settings_t& settings, std::vector& primal_solution) { + raft::common::nvtx::range scope("solve"); i_t status; if (is_mip(problem) && !settings.relaxation) { branch_and_bound_t branch_and_bound(problem, settings); @@ -305,6 +308,7 @@ i_t solve_mip_with_guess(const user_problem_t& problem, const std::vector& guess, mip_solution_t& solution) { + raft::common::nvtx::range scope("solve_mip_with_guess"); i_t status; if (is_mip(problem)) { branch_and_bound_t branch_and_bound(problem, settings); diff --git a/cpp/src/dual_simplex/solve.hpp b/cpp/src/dual_simplex/solve.hpp index f20844d6e6..533a8c3785 100644 --- a/cpp/src/dual_simplex/solve.hpp +++ b/cpp/src/dual_simplex/solve.hpp @@ -22,6 +22,8 @@ #include #include +#include + namespace cuopt::linear_programming::dual_simplex { template diff --git a/cpp/src/mip/diversity/diversity_manager.cu b/cpp/src/mip/diversity/diversity_manager.cu index 4a8f56e987..7281ce84ee 100644 --- a/cpp/src/mip/diversity/diversity_manager.cu +++ b/cpp/src/mip/diversity/diversity_manager.cu @@ -263,6 +263,7 @@ bool diversity_manager_t::run_presolve(f_t time_limit) template void diversity_manager_t::generate_quick_feasible_solution() { + raft::common::nvtx::range fun_scope("generate_quick_feasible_solution"); solution_t solution(*problem_ptr); // min 1 second, max 10 seconds const f_t generate_fast_solution_time = min(10., max(1., timer.remaining_time() / 20.)); diff --git a/cpp/src/mip/diversity/population.cu b/cpp/src/mip/diversity/population.cu index d82ac0f14b..e2ba61d0af 100644 --- a/cpp/src/mip/diversity/population.cu +++ b/cpp/src/mip/diversity/population.cu @@ -320,6 +320,7 @@ i_t population_t::add_solution(solution_t&& sol) template void population_t::normalize_weights() { + raft::common::nvtx::range fun_scope("normalize_weights"); CUOPT_LOG_DEBUG("Normalizing weights"); rmm::device_scalar l2_norm(problem_ptr->handle_ptr->get_stream()); diff --git a/cpp/src/mip/feasibility_jump/feasibility_jump.cu b/cpp/src/mip/feasibility_jump/feasibility_jump.cu index f3c60d094f..9950d1e77b 100644 --- a/cpp/src/mip/feasibility_jump/feasibility_jump.cu +++ b/cpp/src/mip/feasibility_jump/feasibility_jump.cu @@ -397,7 +397,7 @@ void fj_t::climber_init(i_t climber_idx, const rmm::cuda_stream_view& climber->best_l1_distance.set_value_to_zero_async(climber_stream); climber->weighted_violation_score.set_value_to_zero_async(climber_stream); init_lhs_and_violation<<<256, 256, 0, climber_stream.value()>>>(view); - + RAFT_CHECK_CUDA(climber_stream); // initialize the best_objective values according to the initial assignment f_t best_obj = compute_objective_from_vec( climber->incumbent_assignment, pb_ptr->objective_coefficients, climber_stream); @@ -508,6 +508,7 @@ void fj_t::climber_init(i_t climber_idx, const rmm::cuda_stream_view& load_balancing_init_cstr_bounds_csr<<<4096, 128, 0, climber_stream.value()>>>( view, view.row_size_nonbin_prefix_sum, view.work_id_to_nonbin_var_idx); + RAFT_CHECK_CUDA(climber_stream); cuopt_assert( pb_ptr->binary_indices.size() + pb_ptr->nonbinary_indices.size() == pb_ptr->n_variables, "invalid variable indices total"); @@ -542,6 +543,7 @@ template void fj_t::load_balancing_score_update(const rmm::cuda_stream_view& stream, i_t climber_idx) { + raft::common::nvtx::range scope("load_balancing_score_update"); auto [grid_load_balancing_prepare, blocks_load_balancing_prepare] = load_balancing_prepare_launch_dims; auto [grid_load_balancing_binary, blocks_load_balancing_binary] = @@ -573,6 +575,7 @@ void fj_t::load_balancing_score_update(const rmm::cuda_stream_view& st blocks_load_balancing_binary, 0, data.load_balancing_bin_stream.view()>>>(v); + RAFT_CHECK_CUDA(data.load_balancing_bin_stream); data.load_balancing_bin_finished_event.record(data.load_balancing_bin_stream.view()); } if (pb_ptr->nonbinary_indices.size() > 0) { @@ -582,10 +585,12 @@ void fj_t::load_balancing_score_update(const rmm::cuda_stream_view& st blocks_load_balancing_mtm_compute_candidates, 0, data.load_balancing_nonbin_stream.view()>>>(v); + RAFT_CHECK_CUDA(data.load_balancing_nonbin_stream); load_balancing_mtm_compute_scores<<>>(v); + RAFT_CHECK_CUDA(data.load_balancing_nonbin_stream); data.load_balancing_nonbin_finished_event.record(data.load_balancing_nonbin_stream.view()); } @@ -767,11 +772,13 @@ void fj_t::run_step_device(const rmm::cuda_stream_view& climber_stream } if (use_graph) cudaGraphLaunch(graph_instance, climber_stream); + climber_stream.synchronize(); } template void fj_t::refresh_lhs_and_violation(const rmm::cuda_stream_view& stream, i_t climber_idx) { + raft::common::nvtx::range scope("refresh_lhs_and_violation"); auto& data = *climbers[climber_idx]; auto v = data.view(); @@ -779,6 +786,7 @@ void fj_t::refresh_lhs_and_violation(const rmm::cuda_stream_view& stre data.violation_score.set_value_to_zero_async(stream); data.weighted_violation_score.set_value_to_zero_async(stream); init_lhs_and_violation<<<4096, 256, 0, stream>>>(v); + RAFT_CHECK_CUDA(stream); } template @@ -855,6 +863,7 @@ i_t fj_t::host_loop(solution_t& solution, i_t climber_idx) i_t iterations = data.iterations.value(climber_stream); // make sure we have the current incumbent saved (e.g. in the case of a timeout) update_best_solution_kernel<<<1, blocks_resetmoves, 0, climber_stream>>>(v); + RAFT_CHECK_CUDA(climber_stream); // check feasibility with the relative tolerance rather than the violation score raft::copy(solution.assignment.data(), data.best_assignment.data(), diff --git a/cpp/src/mip/local_search/feasibility_pump/feasibility_pump.cu b/cpp/src/mip/local_search/feasibility_pump/feasibility_pump.cu index c15a5d2f47..f72e66ecc5 100644 --- a/cpp/src/mip/local_search/feasibility_pump/feasibility_pump.cu +++ b/cpp/src/mip/local_search/feasibility_pump/feasibility_pump.cu @@ -146,6 +146,7 @@ bool feasibility_pump_t::linear_project_onto_polytope(solution_tvariable_upper_bounds, @@ -248,135 +249,11 @@ bool feasibility_pump_t::linear_project_onto_polytope(solution_t -bool feasibility_pump_t::random_round_with_fj(solution_t& solution, - timer_t& round_timer) -{ - const i_t n_tries = 200; - bool is_feasible = false; - rmm::device_uvector original_assign(solution.assignment, solution.handle_ptr->get_stream()); - rmm::device_uvector best_rounding(solution.assignment, solution.handle_ptr->get_stream()); - f_t best_fj_qual = std::numeric_limits::max(); - i_t i; - for (i = 0; i < n_tries; ++i) { - if (round_timer.check_time_limit()) { break; } - CUOPT_LOG_DEBUG("Trying random with FJ"); - is_feasible = solution.round_nearest(); - if (is_feasible) { - CUOPT_LOG_DEBUG("Feasible found after random round"); - return true; - } - // run fj_single descent - fj.settings.mode = fj_mode_t::GREEDY_DESCENT; - // fj.settings.n_of_minimums_for_exit = 1; - fj.settings.update_weights = false; - fj.settings.feasibility_run = true; - fj.settings.time_limit = round_timer.remaining_time(); - i_t original_sync_period = fj.settings.parameters.sync_period; - // iterations_per_graph is 10 - fj.settings.parameters.sync_period = 500; - is_feasible = fj.solve(solution); - fj.settings.parameters.sync_period = original_sync_period; - cuopt_assert(solution.test_number_all_integer(), "All integers must be rounded"); - if (is_feasible) { - CUOPT_LOG_DEBUG("Feasible found after random round FJ run"); - return true; - } - f_t current_qual = solution.get_quality(fj.cstr_weights, fj.objective_weight); - if (current_qual < best_fj_qual) { - CUOPT_LOG_DEBUG("Updating best quality of roundings %f", current_qual); - best_fj_qual = current_qual; - raft::copy(best_rounding.data(), - solution.assignment.data(), - solution.assignment.size(), - solution.handle_ptr->get_stream()); - } - raft::copy(solution.assignment.data(), - original_assign.data(), - solution.assignment.size(), - solution.handle_ptr->get_stream()); - } - n_fj_single_descents = i; - const i_t min_sols_to_run_fj = 1; - const i_t non_improving_local_minima_count = 200; - // if at least 5 solutions are explored, run longer fj - if (i >= min_sols_to_run_fj) { - // run longer fj on best sol - raft::copy(solution.assignment.data(), - best_rounding.data(), - solution.assignment.size(), - solution.handle_ptr->get_stream()); - rmm::device_uvector original_weights(fj.cstr_weights, solution.handle_ptr->get_stream()); - fj.settings.mode = fj_mode_t::EXIT_NON_IMPROVING; - fj.settings.update_weights = true; - fj.settings.feasibility_run = true; - fj.settings.time_limit = 0.5; - fj.settings.n_of_minimums_for_exit = non_improving_local_minima_count; - is_feasible = fj.solve(solution); - // restore the weights - raft::copy(fj.cstr_weights.data(), - original_weights.data(), - fj.cstr_weights.size(), - solution.handle_ptr->get_stream()); - } - if (is_feasible) { - CUOPT_LOG_DEBUG("Feasible found after %d minima FJ run", non_improving_local_minima_count); - return true; - } - raft::copy(solution.assignment.data(), - original_assign.data(), - solution.assignment.size(), - solution.handle_ptr->get_stream()); - solution.handle_ptr->sync_stream(); - return is_feasible; -} - -template -bool feasibility_pump_t::round_multiple_points(solution_t& solution) -{ - n_fj_single_descents = 0; - const f_t max_time_limit = last_lp_time * 0.1; - timer_t round_timer{min(max_time_limit, timer.remaining_time())}; - bool is_feasible = random_round_with_fj(solution, round_timer); - if (is_feasible) { - CUOPT_LOG_DEBUG("Feasible found after random round with fj"); - return true; - } - timer_t line_segment_timer{min(1., timer.remaining_time())}; - i_t n_points_to_search = n_fj_single_descents; - bool is_feasibility_run = true; - // create a copy, because assignment is changing within kernel and we want a separate point_1 - rmm::device_uvector starting_point(solution.assignment, solution.handle_ptr->get_stream()); - is_feasible = line_segment_search.search_line_segment(solution, - starting_point, - lp_optimal_solution, - n_points_to_search, - is_feasibility_run, - line_segment_timer); - if (is_feasible) { - CUOPT_LOG_DEBUG("Feasible found after line segment"); - return true; - } - // lns.config.run_lp_and_loop = false; - // timer_t lns_timer(min(last_lp_time * 0.1, timer.remaining_time())); - // is_feasible = lns.do_lns(solution, lns_timer); - // if (is_feasible) { - // CUOPT_LOG_DEBUG("Feasible found after inevitable feasibility"); - // return true; - // } - // TODO add the solution with the min distance to the population, if population is given - is_feasible = solution.round_nearest(); - if (is_feasible) { - CUOPT_LOG_DEBUG("Feasible found after nearest rounding"); - return true; - } - return is_feasible; -} - // round will use inevitable infeasibility while propagating the bounds template bool feasibility_pump_t::round(solution_t& solution) { + raft::common::nvtx::range fun_scope("round"); bool result; CUOPT_LOG_DEBUG("Rounding the point"); timer_t bounds_prop_timer(min(2., timer.remaining_time())); @@ -453,6 +330,7 @@ bool feasibility_pump_t::test_fj_feasible(solution_t& soluti template bool feasibility_pump_t::handle_cycle(solution_t& solution) { + raft::common::nvtx::range fun_scope("handle_cycle"); CUOPT_LOG_DEBUG("running handle cycle"); bool is_feasible = false; fp_fj_cycle_time_begin = timer.remaining_time(); @@ -560,6 +438,7 @@ void feasibility_pump_t::save_best_excess_solution(solution_t void feasibility_pump_t::relax_general_integers(solution_t& solution) { + raft::common::nvtx::range fun_scope("relax_general_integers"); orig_variable_types.resize(solution.problem_ptr->n_variables, solution.handle_ptr->get_stream()); auto var_types = make_span(solution.problem_ptr->variable_types); @@ -609,6 +488,7 @@ void feasibility_pump_t::revert_relaxation(solution_t& solut template bool feasibility_pump_t::run_single_fp_descent(solution_t& solution) { + raft::common::nvtx::range fun_scope("run_single_fp_descent"); // start by doing nearest rounding solution.round_nearest(); raft::copy(last_rounding.data(), diff --git a/cpp/src/mip/local_search/feasibility_pump/feasibility_pump.cuh b/cpp/src/mip/local_search/feasibility_pump/feasibility_pump.cuh index 8b7891b04b..61362ebeb6 100644 --- a/cpp/src/mip/local_search/feasibility_pump/feasibility_pump.cuh +++ b/cpp/src/mip/local_search/feasibility_pump/feasibility_pump.cuh @@ -140,8 +140,6 @@ class feasibility_pump_t { void reset(); void resize_vectors(problem_t& problem, const raft::handle_t* handle_ptr); void save_best_excess_solution(solution_t& solution); - bool random_round_with_fj(solution_t& solution, timer_t& round_timer); - bool round_multiple_points(solution_t& solution); void relax_general_integers(solution_t& solution); void revert_relaxation(solution_t& solution); bool test_fj_feasible(solution_t& solution, f_t time_limit); diff --git a/cpp/src/mip/local_search/line_segment_search/line_segment_search.cu b/cpp/src/mip/local_search/line_segment_search/line_segment_search.cu index 1f166bdf69..e71b835406 100644 --- a/cpp/src/mip/local_search/line_segment_search/line_segment_search.cu +++ b/cpp/src/mip/local_search/line_segment_search/line_segment_search.cu @@ -55,6 +55,7 @@ bool line_segment_search_t::search_line_segment(solution_t& bool is_feasibility_run, cuopt::timer_t& timer) { + raft::common::nvtx::range scope("search_line_segment"); CUOPT_LOG_DEBUG("Running line segment search"); cuopt_assert(point_1.size() == point_2.size(), "size mismatch"); cuopt_assert(point_1.size() == solution.assignment.size(), "size mismatch"); @@ -136,7 +137,9 @@ bool line_segment_search_t::search_line_segment(solution_t& best_assignment.data(), solution.assignment.size(), solution.handle_ptr->get_stream()); - return solution.compute_feasibility(); + bool is_feasible = solution.compute_feasibility(); + solution.handle_ptr->sync_stream(); + return is_feasible; } #if MIP_INSTANTIATE_FLOAT diff --git a/cpp/src/mip/local_search/local_search.cu b/cpp/src/mip/local_search/local_search.cu index 8e3ef2e11f..c8e23a33f7 100644 --- a/cpp/src/mip/local_search/local_search.cu +++ b/cpp/src/mip/local_search/local_search.cu @@ -38,17 +38,10 @@ local_search_t::local_search_t(mip_solver_context_t& context fj_sol_on_lp_opt(context.problem_ptr->n_variables, context.problem_ptr->handle_ptr->get_stream()), fj(context), - // fj_tree(fj), constraint_prop(context), lb_constraint_prop(context), line_segment_search(fj), - fp(context, - fj, - // fj_tree, - constraint_prop, - lb_constraint_prop, - line_segment_search, - lp_optimal_solution_), + fp(context, fj, constraint_prop, lb_constraint_prop, line_segment_search, lp_optimal_solution_), rng(cuopt::seed_generator::get_seed()) { } @@ -344,7 +337,7 @@ bool local_search_t::generate_solution(solution_t& solution, bool& early_exit, f_t time_limit) { - raft::common::nvtx::range fun_scope("LS FP Loop"); + raft::common::nvtx::range fun_scope("generate_solution"); timer_t timer(time_limit); auto n_vars = solution.problem_ptr->n_variables; diff --git a/cpp/src/mip/local_search/rounding/bounds_repair.cu b/cpp/src/mip/local_search/rounding/bounds_repair.cu index 4b9c0ca504..7a3e41b04d 100644 --- a/cpp/src/mip/local_search/rounding/bounds_repair.cu +++ b/cpp/src/mip/local_search/rounding/bounds_repair.cu @@ -67,6 +67,7 @@ void bounds_repair_t::reset() template f_t bounds_repair_t::get_ii_violation(problem_t& problem) { + raft::common::nvtx::range scope("get_ii_violation"); bound_presolve.calculate_activity_on_problem_bounds(problem); // calculate the violation and mark of violated constraints thrust::for_each( @@ -125,6 +126,7 @@ i_t bounds_repair_t::compute_best_shift(problem_t& problem, problem_t& original_problem, i_t curr_cstr) { + raft::common::nvtx::range scope("compute_best_shift"); // for each variable in the constraint, compute the best shift value. // if the shift value doesn't change the violation at all, set it to 0 i_t cstr_offset = problem.offsets.element(curr_cstr, handle_ptr->get_stream()); @@ -256,6 +258,7 @@ __global__ void compute_damages_kernel(typename problem_t::view_t prob template void bounds_repair_t::compute_damages(problem_t& problem, i_t n_candidates) { + raft::common::nvtx::range scope("compute_damages"); CUOPT_LOG_TRACE("Bounds repair: Computing damanges!"); // TODO check performance, we can apply load balancing here const i_t TPB = 256; @@ -292,6 +295,7 @@ i_t bounds_repair_t::find_cutoff_index(const candidates_t& c f_t best_damage, i_t n_candidates) { + raft::common::nvtx::range scope("find_cutoff_index"); auto iterator = thrust::make_zip_iterator( thrust::make_tuple(candidates.cstr_delta.data(), candidates.damage.data())); auto out_iter = thrust::partition_point( @@ -387,6 +391,7 @@ bool bounds_repair_t::repair_problem(problem_t& problem, timer_t timer_, const raft::handle_t* handle_ptr_) { + raft::common::nvtx::range scope("repair_problem"); CUOPT_LOG_DEBUG("Running bounds repair"); handle_ptr = handle_ptr_; timer = timer_; @@ -474,4 +479,4 @@ template class bounds_repair_t; template class bounds_repair_t; #endif -}; // namespace cuopt::linear_programming::detail \ No newline at end of file +}; // namespace cuopt::linear_programming::detail diff --git a/cpp/src/mip/local_search/rounding/constraint_prop.cu b/cpp/src/mip/local_search/rounding/constraint_prop.cu index b59e246df6..bdc43cc74e 100644 --- a/cpp/src/mip/local_search/rounding/constraint_prop.cu +++ b/cpp/src/mip/local_search/rounding/constraint_prop.cu @@ -185,6 +185,7 @@ void constraint_prop_t::sort_by_implied_slack_consumption(solution_tget_stream()); thrust::sort_by_key(sol.handle_ptr->get_thrust_policy(), implied_slack_consumption_per_var.begin(), implied_slack_consumption_per_var.end(), @@ -733,6 +734,7 @@ bool constraint_prop_t::find_integer( timer_t& timer, std::optional>> probing_candidates) { + raft::common::nvtx::range scope("find_integer"); using crit_t = termination_criterion_t; auto& unset_integer_vars = unset_vars; std::mt19937 rng(cuopt::seed_generator::get_seed()); diff --git a/cpp/src/mip/local_search/rounding/lb_constraint_prop.cu b/cpp/src/mip/local_search/rounding/lb_constraint_prop.cu index 50867a5315..f513b43073 100644 --- a/cpp/src/mip/local_search/rounding/lb_constraint_prop.cu +++ b/cpp/src/mip/local_search/rounding/lb_constraint_prop.cu @@ -387,6 +387,7 @@ void lb_constraint_prop_t::sort_by_implied_slack_consumption( make_span(implied_slack_consumption_per_var), problem_ii, context.settings.get_tolerances()); + RAFT_CHECK_CUDA(original_problem.handle_ptr->get_stream()); thrust::sort_by_key(original_problem.handle_ptr->get_thrust_policy(), implied_slack_consumption_per_var.begin(), implied_slack_consumption_per_var.end(), diff --git a/cpp/src/mip/local_search/rounding/simple_rounding.cu b/cpp/src/mip/local_search/rounding/simple_rounding.cu index 17c8793e3e..58bb49d4a0 100644 --- a/cpp/src/mip/local_search/rounding/simple_rounding.cu +++ b/cpp/src/mip/local_search/rounding/simple_rounding.cu @@ -36,6 +36,7 @@ namespace cuopt::linear_programming::detail { template bool check_brute_force_rounding(solution_t& solution) { + raft::common::nvtx::range scope("check_brute_force_rounding"); i_t TPB = 128; i_t n_integers = solution.compute_number_of_integers(); CUOPT_LOG_TRACE("before rounding n_integers %d total n_integers %d", @@ -68,11 +69,13 @@ bool check_brute_force_rounding(solution_t& solution) cuopt::make_span(var_map), cuopt::make_span(constraint_buf), best_config.data()); + RAFT_CHECK_CUDA(solution.handle_ptr->get_stream()); if (best_config.value(solution.handle_ptr->get_stream()) != -1) { CUOPT_LOG_DEBUG("Feasible found during brute force rounding!"); // apply the feasible rounding apply_feasible_rounding_kernel<<<1, TPB, 0, solution.handle_ptr->get_stream()>>>( solution.view(), n_integers_to_round, cuopt::make_span(var_map), best_config.data()); + RAFT_CHECK_CUDA(solution.handle_ptr->get_stream()); solution.handle_ptr->sync_stream(); bool feas = solution.compute_feasibility(); cuopt_assert(feas, "Solution must be feasible!"); @@ -85,6 +88,7 @@ bool check_brute_force_rounding(solution_t& solution) template void invoke_round_nearest(solution_t& solution) { + raft::common::nvtx::range scope("invoke_round_nearest"); i_t TPB = 128; bool brute_force_found_feas = check_brute_force_rounding(solution); if (brute_force_found_feas) { return; } @@ -92,11 +96,13 @@ void invoke_round_nearest(solution_t& solution) nearest_rounding_kernel<<get_stream()>>>( solution.view(), cuopt::seed_generator::get_seed()); RAFT_CHECK_CUDA(solution.handle_ptr->get_stream()); + solution.handle_ptr->sync_stream(); } template void invoke_random_round_nearest(solution_t& solution, i_t n_target_random_rounds) { + raft::common::nvtx::range fun_scope("invoke_random_round_nearest"); i_t TPB = 128; i_t n_blocks = (solution.problem_ptr->n_variables + TPB - 1) / TPB; i_t n_integers = solution.compute_number_of_integers(); @@ -106,6 +112,7 @@ void invoke_random_round_nearest(solution_t& solution, i_t n_target_ra rmm::device_scalar n_randomly_rounded(0, solution.handle_ptr->get_stream()); random_nearest_rounding_kernel<<get_stream()>>>( solution.view(), cuopt::seed_generator::get_seed(), n_randomly_rounded.data()); + RAFT_CHECK_CUDA(solution.handle_ptr->get_stream()); i_t h_n_random_rounds = n_randomly_rounded.value(solution.handle_ptr->get_stream()); CUOPT_LOG_TRACE("Randomly rounded integers %d", h_n_random_rounds); i_t additional_roundings_needed = n_target_random_rounds - h_n_random_rounds; @@ -125,12 +132,14 @@ void invoke_random_round_nearest(solution_t& solution, i_t n_target_ra shuffled_indices.data(), n_randomly_rounded.data(), additional_roundings_needed); + RAFT_CHECK_CUDA(solution.handle_ptr->get_stream()); h_n_random_rounds = n_randomly_rounded.value(solution.handle_ptr->get_stream()); CUOPT_LOG_TRACE("Randomly rounded integers, after adding close integers too %d", h_n_random_rounds); } solution.round_nearest(); RAFT_CHECK_CUDA(solution.handle_ptr->get_stream()); + solution.handle_ptr->sync_stream(); } template diff --git a/cpp/src/mip/presolve/bounds_presolve.cu b/cpp/src/mip/presolve/bounds_presolve.cu index 19527d9d08..f263c15c7d 100644 --- a/cpp/src/mip/presolve/bounds_presolve.cu +++ b/cpp/src/mip/presolve/bounds_presolve.cu @@ -101,6 +101,7 @@ void bound_presolve_t::resize(problem_t& problem) template void bound_presolve_t::calculate_activity(problem_t& pb) { + raft::common::nvtx::range scope("calculate_activity"); cuopt_assert(pb.n_variables == upd.lb.size(), "bounds array size inconsistent"); cuopt_assert(pb.n_variables == upd.ub.size(), "bounds array size inconsistent"); cuopt_assert(pb.n_constraints == upd.min_activity.size(), "activity array size inconsistent"); @@ -109,6 +110,8 @@ void bound_presolve_t::calculate_activity(problem_t& pb) constexpr auto n_threads = 256; calc_activity_kernel <<get_stream()>>>(pb.view(), upd.view()); + RAFT_CHECK_CUDA(pb.handle_ptr->get_stream()); + pb.handle_ptr->sync_stream(); } template @@ -178,6 +181,7 @@ template termination_criterion_t bound_presolve_t::bound_update_loop(problem_t& pb, timer_t timer) { + raft::common::nvtx::range scope("bound_update_loop"); termination_criterion_t criteria = termination_criterion_t::ITERATION_LIMIT; i_t iter; @@ -299,6 +303,7 @@ termination_criterion_t bound_presolve_t::solve(problem_t& p template bool bound_presolve_t::calculate_infeasible_redundant_constraints(problem_t& pb) { + raft::common::nvtx::range scope("calculate_infeasible_redundant_constraints"); auto detect_iter = thrust::make_transform_iterator( thrust::make_zip_iterator(thrust::make_tuple(upd.min_activity.begin(), upd.max_activity.begin(), diff --git a/cpp/src/mip/presolve/lb_probing_cache.cu b/cpp/src/mip/presolve/lb_probing_cache.cu index 598a4c6bce..4cacafdfd0 100644 --- a/cpp/src/mip/presolve/lb_probing_cache.cu +++ b/cpp/src/mip/presolve/lb_probing_cache.cu @@ -76,6 +76,7 @@ f_t lb_probing_cache_t::get_least_conflicting_rounding(problem_t compute_prioritized_integer_indices( load_balanced_bounds_presolve_t& bound_presolve, load_balanced_problem_t& problem) { + raft::common::nvtx::range fun_scope("compute_prioritized_integer_indices"); // sort the variables according to the min slack they have across constraints // we also need to consider the variable range // the priority is computed as the var_range * min_slack @@ -294,6 +296,7 @@ inline std::vector compute_prioritized_integer_indices( make_span(different_coefficient), make_span(max_excess_per_var), make_span(max_n_violated_per_constraint)); + RAFT_CHECK_CUDA(problem.handle_ptr->get_stream()); auto iterator = thrust::make_zip_iterator(thrust::make_tuple( max_n_violated_per_constraint.begin(), max_excess_per_var.begin(), min_slack_per_var.begin())); // sort the vars @@ -321,6 +324,7 @@ void compute_probing_cache(load_balanced_bounds_presolve_t& bound_pres load_balanced_problem_t& problem, timer_t timer) { + raft::common::nvtx::range fun_scope("compute_probing_cache"); // we dont want to compute the probing cache for all variables for time and computation resources auto priority_indices = compute_prioritized_integer_indices(bound_presolve, problem); // std::cout<<"priority_indices\n"; diff --git a/cpp/src/mip/presolve/load_balanced_bounds_presolve_helpers.cuh b/cpp/src/mip/presolve/load_balanced_bounds_presolve_helpers.cuh index fa6b717f6c..c255c01360 100644 --- a/cpp/src/mip/presolve/load_balanced_bounds_presolve_helpers.cuh +++ b/cpp/src/mip/presolve/load_balanced_bounds_presolve_helpers.cuh @@ -94,6 +94,7 @@ i_t create_heavy_item_block_segments(rmm::cuda_stream_view stream, const std::vector& bin_offsets, rmm::device_uvector const& offsets) { + raft::common::nvtx::range scope("create_heavy_item_block_segments"); // TODO : assert that bin_offsets.back() == offsets.size() - 1 auto heavy_id_beg = bin_offsets[ceil_log_2(heavy_degree_cutoff)]; auto n_items = offsets.size() - 1; @@ -167,15 +168,18 @@ void calc_activity_heavy_cnst(managed_stream_pool& streams, heavy_degree_cutoff, view, tmp_cnst_act); + RAFT_CHECK_CUDA(heavy_cnst_stream); auto num_heavy_cnst = cnst_bin_offsets.back() - heavy_cnst_beg_id; if (erase_inf_cnst) { finalize_calc_act_kernel <<>>( heavy_cnst_beg_id, make_span(heavy_cnst_block_segments), tmp_cnst_act, view); + RAFT_CHECK_CUDA(heavy_cnst_stream); } else { finalize_calc_act_kernel <<>>( heavy_cnst_beg_id, make_span(heavy_cnst_block_segments), tmp_cnst_act, view); + RAFT_CHECK_CUDA(heavy_cnst_stream); } } } @@ -201,9 +205,11 @@ void calc_activity_per_block(managed_stream_pool& streams, if (erase_inf_cnst) { lb_calc_act_block_kernel <<>>(cnst_id_beg, view); + RAFT_CHECK_CUDA(block_stream); } else { lb_calc_act_block_kernel <<>>(cnst_id_beg, view); + RAFT_CHECK_CUDA(block_stream); } } } @@ -260,9 +266,11 @@ void calc_activity_sub_warp(managed_stream_pool& streams, if (erase_inf_cnst) { lb_calc_act_sub_warp_kernel <<>>(cnst_id_beg, cnst_id_end, view); + RAFT_CHECK_CUDA(sub_warp_thread); } else { lb_calc_act_sub_warp_kernel <<>>(cnst_id_beg, cnst_id_end, view); + RAFT_CHECK_CUDA(sub_warp_thread); } } } @@ -303,10 +311,12 @@ void calc_activity_sub_warp(managed_stream_pool& streams, lb_calc_act_sub_warp_kernel <<>>( view, make_span(warp_cnst_offsets), make_span(warp_cnst_id_offsets)); + RAFT_CHECK_CUDA(sub_warp_stream); } else { lb_calc_act_sub_warp_kernel <<>>( view, make_span(warp_cnst_offsets), make_span(warp_cnst_id_offsets)); + RAFT_CHECK_CUDA(sub_warp_stream); } } } @@ -385,9 +395,11 @@ void upd_bounds_heavy_vars(managed_stream_pool& streams, heavy_degree_cutoff, view, tmp_vars_bnd); + RAFT_CHECK_CUDA(heavy_vars_stream); auto num_heavy_vars = vars_bin_offsets.back() - heavy_vars_beg_id; finalize_upd_bnd_kernel<<>>( heavy_vars_beg_id, make_span(heavy_vars_block_segments), tmp_vars_bnd, view); + RAFT_CHECK_CUDA(heavy_vars_stream); } } } @@ -414,9 +426,11 @@ void upd_bounds_heavy_vars(managed_stream_pool& streams, heavy_degree_cutoff, view, tmp_vars_bnd); + RAFT_CHECK_CUDA(heavy_vars_stream); auto num_heavy_vars = vars_bin_offsets.back() - heavy_vars_beg_id; finalize_upd_bnd_kernel<<>>( heavy_vars_beg_id, make_span(heavy_vars_block_segments), tmp_vars_bnd, view); + RAFT_CHECK_CUDA(heavy_vars_stream); } } } @@ -439,6 +453,7 @@ void upd_bounds_per_block(managed_stream_pool& streams, if (!dry_run) { lb_upd_bnd_block_kernel <<>>(vars_id_beg, view); + RAFT_CHECK_CUDA(block_stream); } } } @@ -486,6 +501,7 @@ void upd_bounds_sub_warp(managed_stream_pool& streams, if (!dry_run) { lb_upd_bnd_sub_warp_kernel <<>>(vars_id_beg, vars_id_end, view); + RAFT_CHECK_CUDA(sub_warp_stream); } } } @@ -507,6 +523,7 @@ void upd_bounds_sub_warp(managed_stream_pool& streams, lb_upd_bnd_sub_warp_kernel <<>>( view, make_span(warp_vars_offsets), make_span(warp_vars_id_offsets)); + RAFT_CHECK_CUDA(sub_warp_stream); } } } diff --git a/cpp/src/mip/presolve/load_balanced_partition_helpers.cuh b/cpp/src/mip/presolve/load_balanced_partition_helpers.cuh index 55c18a902e..974ed7c3c6 100644 --- a/cpp/src/mip/presolve/load_balanced_partition_helpers.cuh +++ b/cpp/src/mip/presolve/load_balanced_partition_helpers.cuh @@ -203,14 +203,15 @@ void bin_vertices(rmm::device_uvector& reorg_vertices, unsigned blocks = ((vertex_end - vertex_begin) + BLOCK_SIZE - 1) / BLOCK_SIZE; count_bin_sizes<<>>( bin_count.data(), offsets, vertex_begin, vertex_end, active_bitmap); - + RAFT_CHECK_CUDA(stream); exclusive_scan<<<1, 1, 0, stream>>>(bin_count.data(), bin_count_offsets.data()); - + RAFT_CHECK_CUDA(stream); i_t vertex_count = bin_count.back_element(stream); reorg_vertices.resize(vertex_count, stream); create_vertex_bins<<>>( reorg_vertices.data(), bin_count.data(), offsets, vertex_begin, vertex_end, active_bitmap); + RAFT_CHECK_CUDA(stream); } template diff --git a/cpp/src/mip/presolve/multi_probe.cu b/cpp/src/mip/presolve/multi_probe.cu index 5cb61070b7..6b7972035d 100644 --- a/cpp/src/mip/presolve/multi_probe.cu +++ b/cpp/src/mip/presolve/multi_probe.cu @@ -275,6 +275,7 @@ termination_criterion_t multi_probe_t::bound_update_loop(problem_t inline std::vector compute_prioritized_integer_indices( bound_presolve_t& bound_presolve, problem_t& problem) { + raft::common::nvtx::range scope("compute_prioritized_integer_indices"); // sort the variables according to the min slack they have across constraints // we also need to consider the variable range // the priority is computed as the var_range * min_slack @@ -337,6 +338,7 @@ inline std::vector compute_prioritized_integer_indices( make_span(different_coefficient), make_span(max_excess_per_var), make_span(max_n_violated_per_constraint)); + RAFT_CHECK_CUDA(problem.handle_ptr->get_stream()); auto iterator = thrust::make_zip_iterator(thrust::make_tuple( max_n_violated_per_constraint.begin(), max_excess_per_var.begin(), min_slack_per_var.begin())); // sort the vars @@ -372,6 +374,7 @@ void compute_cache_for_var(i_t var_idx, std::atomic& n_of_cached_probings, i_t device_id) { + raft::common::nvtx::range scope("compute_cache_for_var"); RAFT_CUDA_TRY(cudaSetDevice(device_id)); // test if we need per thread handle raft::handle_t handle{}; @@ -467,6 +470,7 @@ void compute_probing_cache(bound_presolve_t& bound_presolve, problem_t& problem, timer_t timer) { + raft::common::nvtx::range scope("compute_probing_cache"); // we dont want to compute the probing cache for all variables for time and computation resources auto priority_indices = compute_prioritized_integer_indices(bound_presolve, problem); CUOPT_LOG_DEBUG("Computing probing cache"); diff --git a/cpp/src/mip/problem/load_balanced_problem.cu b/cpp/src/mip/problem/load_balanced_problem.cu index 2fe6842f38..e2e1717864 100644 --- a/cpp/src/mip/problem/load_balanced_problem.cu +++ b/cpp/src/mip/problem/load_balanced_problem.cu @@ -215,6 +215,7 @@ void create_constraint_graph(const raft::handle_t* handle_ptr, // copy adjacency lists and vertex properties constraint_data_copy<<get_stream()>>>( make_span(reorg_ids), make_span(offsets), make_span(coeff), make_span(edge), bounds, pb.view()); + RAFT_CHECK_CUDA(handle_ptr->get_stream()); if (debug) { rmm::device_scalar errors(0, handle_ptr->get_stream()); @@ -226,6 +227,7 @@ void create_constraint_graph(const raft::handle_t* handle_ptr, bounds, pb.view(), errors.data()); + RAFT_CHECK_CUDA(handle_ptr->get_stream()); i_t error_count = errors.value(handle_ptr->get_stream()); if (error_count != 0) { std::cerr << "adjacency list copy mismatch\n"; } } @@ -262,6 +264,7 @@ void create_variable_graph(const raft::handle_t* handle_ptr, bounds, make_span(types), pb.view()); + RAFT_CHECK_CUDA(handle_ptr->get_stream()); if (debug) { rmm::device_scalar errors(0, handle_ptr->get_stream()); @@ -274,6 +277,7 @@ void create_variable_graph(const raft::handle_t* handle_ptr, make_span(types), pb.view(), errors.data()); + RAFT_CHECK_CUDA(handle_ptr->get_stream()); i_t error_count = errors.value(handle_ptr->get_stream()); if (error_count != 0) { std::cerr << "adjacency list copy mismatch\n"; } } diff --git a/cpp/src/mip/problem/problem.cu b/cpp/src/mip/problem/problem.cu index 1ba681d132..31c483b7d2 100644 --- a/cpp/src/mip/problem/problem.cu +++ b/cpp/src/mip/problem/problem.cu @@ -766,6 +766,7 @@ void problem_t::compute_binary_var_table() template void problem_t::compute_related_variables(double time_limit) { + raft::common::nvtx::range scope("compute_related_variables"); auto pb_view = view(); handle_ptr->sync_stream(); @@ -809,6 +810,7 @@ void problem_t::compute_related_variables(double time_limit) thrust::fill(handle_ptr->get_thrust_policy(), varmap.begin(), varmap.end(), 0); compute_related_vars_unique<<<1024, 128, 0, handle_ptr->get_stream()>>>( pb_view, slice_begin, slice_end, make_span(varmap)); + RAFT_CHECK_CUDA(handle_ptr->get_stream()); // prefix sum to generate offsets thrust::inclusive_scan(handle_ptr->get_thrust_policy(), @@ -1023,6 +1025,7 @@ void problem_t::fix_given_variables(problem_t& original_prob const rmm::device_uvector& variables_to_fix, const raft::handle_t* handle_ptr) { + raft::common::nvtx::range scope("fix_given_variables"); i_t TPB = 64; fix_given_variables_kernel<<get_stream()>>>( original_problem.view(), @@ -1030,6 +1033,7 @@ void problem_t::fix_given_variables(problem_t& original_prob raft::device_span{assignment.data(), assignment.size()}, raft::device_span{const_cast(variables_to_fix.data()), variables_to_fix.size()}); RAFT_CHECK_CUDA(handle_ptr->get_stream()); + handle_ptr->sync_stream(); } template @@ -1087,6 +1091,7 @@ void problem_t::remove_given_variables(problem_t& original_p rmm::device_uvector& variable_map, const raft::handle_t* handle_ptr) { + raft::common::nvtx::range scope("remove_given_variables"); thrust::fill(handle_ptr->get_thrust_policy(), offsets.begin(), offsets.end(), 0); cuopt_assert(assignment.size() == n_variables, "Variable size mismatch"); cuopt_assert(variable_map.size() < n_variables, "Too many variables to fix"); @@ -1250,6 +1255,7 @@ template void compute_csr(const std::vector>>& variable_constraint_map, problem_t& pb) { + raft::common::nvtx::range scope("compute_csr"); auto handle_ptr = pb.handle_ptr; std::vector> vars_per_constraint(pb.n_constraints); std::vector> coefficient_per_constraint(pb.n_constraints); diff --git a/cpp/src/mip/relaxed_lp/relaxed_lp.cu b/cpp/src/mip/relaxed_lp/relaxed_lp.cu index 05fcf32051..00f7eaf1e3 100644 --- a/cpp/src/mip/relaxed_lp/relaxed_lp.cu +++ b/cpp/src/mip/relaxed_lp/relaxed_lp.cu @@ -140,6 +140,7 @@ bool run_lp_with_vars_fixed(problem_t& op_problem, bool return_first_feasible, bound_presolve_t* bound_presolve) { + raft::common::nvtx::range scope("run_lp_with_vars_fixed"); // if we are fixing all vars, there is no lp to be run if (variables_to_fix.size() == (size_t)op_problem.n_variables) { return true; } auto [fixed_problem, fixed_assignment, variable_map] = solution.fix_variables(variables_to_fix); diff --git a/cpp/src/mip/solution/solution.cu b/cpp/src/mip/solution/solution.cu index 05309fb3aa..309127d1b9 100644 --- a/cpp/src/mip/solution/solution.cu +++ b/cpp/src/mip/solution/solution.cu @@ -389,6 +389,7 @@ template std::tuple, rmm::device_uvector, rmm::device_uvector> solution_t::fix_variables(const rmm::device_uvector& variable_indices) { + raft::common::nvtx::range scope("fix_variables"); rmm::device_uvector new_assignment(assignment, handle_ptr->get_stream()); rmm::device_uvector variable_map(assignment.size(), handle_ptr->get_stream()); @@ -408,6 +409,7 @@ template void solution_t::unfix_variables(rmm::device_uvector& fixed_assignment, const rmm::device_uvector& variable_map) { + raft::common::nvtx::range scope("unfix_variables"); f_t* fixed_assignment_ptr = fixed_assignment.data(); f_t* assignment_ptr = assignment.data(); const i_t* variable_map_ptr = variable_map.data();