From e3e7378e5600fb7dda5d7315b38cd6d599e86db7 Mon Sep 17 00:00:00 2001 From: Vitor Sessak Date: Tue, 24 Jun 2025 11:52:53 +0000 Subject: [PATCH] Several fixes needed to compile cuOpt with LLVM: - add std:: prefix to min() and max(); - add missing headers; - avoid calling std::optional::value() on device code since it can throw exceptions; - add "template" keyword to help parsing; - use a constexpr static method to define init_data(), to avoid using a not-yet-finished classes. --- cpp/cuopt_cli.cpp | 6 ++--- .../mip/solver_settings.hpp | 2 ++ cpp/libmps_parser/src/utilities/error.hpp | 2 ++ cpp/src/linear_programming/pdlp_constants.hpp | 14 +++++----- cpp/src/linear_programming/utils.cuh | 2 +- cpp/src/math_optimization/solution_reader.cu | 3 +++ cpp/src/mip/diversity/diversity_manager.cu | 23 ++++++++-------- cpp/src/mip/diversity/population.cu | 16 +++++------ .../recombiners/recombiner_stats.hpp | 4 +-- .../mip/feasibility_jump/load_balancing.cuh | 8 +++--- .../feasibility_pump/feasibility_pump.cu | 14 +++++----- .../line_segment_search.cu | 2 +- cpp/src/mip/local_search/local_search.cu | 14 +++++----- .../local_search/rounding/constraint_prop.cu | 2 +- .../rounding/lb_constraint_prop.cu | 2 +- .../conditional_bound_strengthening.cu | 2 +- .../presolve/load_balanced_bounds_presolve.cu | 1 - cpp/src/mip/problem/problem.cu | 2 +- .../crossovers/inversion_recombiner.hpp | 2 +- cpp/src/routing/crossovers/ox_recombiner.cuh | 2 +- cpp/src/routing/diversity/diverse_solver.hpp | 27 ++++++++++--------- cpp/src/routing/generator/generator.cu | 5 ++-- cpp/src/routing/ges/eject_until_feasible.cu | 12 ++------- cpp/src/routing/ges/execute_insertion.cuh | 3 ++- cpp/src/routing/ges/guided_ejection_search.cu | 4 +-- .../brute_force_lexico.cu | 3 ++- .../lexicographic_search.cu | 11 ++++---- .../lexicographic_search.cuh | 2 +- .../local_search/compute_ejections.cuh | 5 ++-- .../local_search/compute_insertions.cu | 2 +- .../cycle_finder/hash_functions.cuh | 1 + cpp/src/routing/local_search/local_search.cuh | 16 ++++++++--- cpp/src/routing/local_search/sliding_tsp.cu | 7 ++--- cpp/src/routing/local_search/sliding_tsp.cuh | 12 ++++++--- .../routing/local_search/sliding_window.cu | 7 ++--- cpp/src/routing/local_search/two_opt.cu | 7 ++--- .../local_search/vrp/nodes_to_search.cu | 2 +- .../routing/local_search/vrp/vrp_execute.cu | 4 +-- cpp/src/routing/solution/solution.cu | 4 +-- cpp/src/routing/solution/solution.cuh | 2 +- cpp/src/routing/util_kernels/top_k.cuh | 2 +- cpp/src/routing/utilities/cuopt_utils.cuh | 26 +++++++----------- cpp/src/routing/vehicle_info.hpp | 2 +- cpp/tests/routing/routing_test.cuh | 2 +- 44 files changed, 153 insertions(+), 138 deletions(-) diff --git a/cpp/cuopt_cli.cpp b/cpp/cuopt_cli.cpp index a81804a70a..594fb745d5 100644 --- a/cpp/cuopt_cli.cpp +++ b/cpp/cuopt_cli.cpp @@ -177,9 +177,9 @@ std::string param_name_to_arg_name(const std::string& input) int main(int argc, char* argv[]) { // Get the version string from the version_config.hpp file - const auto version_string = std::string("cuOpt ") + std::to_string(CUOPT_VERSION_MAJOR) + "." + - std::to_string(CUOPT_VERSION_MINOR) + "." + - std::to_string(CUOPT_VERSION_PATCH); + const std::string version_string = std::string("cuOpt ") + std::to_string(CUOPT_VERSION_MAJOR) + + "." + std::to_string(CUOPT_VERSION_MINOR) + "." + + std::to_string(CUOPT_VERSION_PATCH); // Create the argument parser argparse::ArgumentParser program("cuopt_cli", version_string); diff --git a/cpp/include/cuopt/linear_programming/mip/solver_settings.hpp b/cpp/include/cuopt/linear_programming/mip/solver_settings.hpp index 037ef752b4..24257f920c 100644 --- a/cpp/include/cuopt/linear_programming/mip/solver_settings.hpp +++ b/cpp/include/cuopt/linear_programming/mip/solver_settings.hpp @@ -17,6 +17,8 @@ #pragma once +#include + #include #include diff --git a/cpp/libmps_parser/src/utilities/error.hpp b/cpp/libmps_parser/src/utilities/error.hpp index 0e2054ed3f..3aafa6340a 100644 --- a/cpp/libmps_parser/src/utilities/error.hpp +++ b/cpp/libmps_parser/src/utilities/error.hpp @@ -16,6 +16,8 @@ */ #pragma once +#include + #include #include diff --git a/cpp/src/linear_programming/pdlp_constants.hpp b/cpp/src/linear_programming/pdlp_constants.hpp index 0491fdb505..8bf9b6c2b7 100644 --- a/cpp/src/linear_programming/pdlp_constants.hpp +++ b/cpp/src/linear_programming/pdlp_constants.hpp @@ -20,13 +20,13 @@ #include namespace cuopt::linear_programming::detail { -constexpr int block_size = 128; +inline constexpr int block_size = 128; // When using APIs that handle variable stride sizes these are used to express that we assume that // the data accessed has a contigous layout in memory for both solutions // { -constexpr int primal_stride = 1; -constexpr int dual_stride = 1; +inline constexpr int primal_stride = 1; +inline constexpr int dual_stride = 1; // } // #define PDLP_DEBUG_MODE @@ -34,18 +34,18 @@ constexpr int dual_stride = 1; // Value used to determine what we see as too small (the value) or too large (1/value) values when // computing the new primal weight during the restart. template -constexpr f_t safe_guard_for_extreme_values_in_primal_weight_computation = 1.0e-10; +inline constexpr f_t safe_guard_for_extreme_values_in_primal_weight_computation = 1.0e-10; // } // used to detect divergence in the movement as should trigger a numerical_error template -constexpr f_t divergent_movement = f_t{}; +inline constexpr f_t divergent_movement = f_t{}; template <> -constexpr float divergent_movement = 1.0e20f; +inline constexpr float divergent_movement = 1.0e20f; template <> -constexpr double divergent_movement = 1.0e100; +inline constexpr double divergent_movement = 1.0e100; // } diff --git a/cpp/src/linear_programming/utils.cuh b/cpp/src/linear_programming/utils.cuh index 55684edf14..d4df2815b1 100644 --- a/cpp/src/linear_programming/utils.cuh +++ b/cpp/src/linear_programming/utils.cuh @@ -398,7 +398,7 @@ struct relative_residual_t { // Used for best primal so far, count how many constraints are violated if (abs_.has_value() && nb_violated_constraints_.has_value()) { - if (residual >= abs_.value() + rel_ * rhs) atomicAdd(nb_violated_constraints_.value(), 1); + if (residual >= *abs_ + rel_ * rhs) atomicAdd(*nb_violated_constraints_, 1); } return residual - rel_ * rhs; } diff --git a/cpp/src/math_optimization/solution_reader.cu b/cpp/src/math_optimization/solution_reader.cu index 2b662a3750..cec2da9b3b 100644 --- a/cpp/src/math_optimization/solution_reader.cu +++ b/cpp/src/math_optimization/solution_reader.cu @@ -17,12 +17,15 @@ #include "solution_reader.hpp" +#include #include #include #include #include #include #include +#include + namespace cuopt::linear_programming { /** diff --git a/cpp/src/mip/diversity/diversity_manager.cu b/cpp/src/mip/diversity/diversity_manager.cu index a858c10958..a0ef30a124 100644 --- a/cpp/src/mip/diversity/diversity_manager.cu +++ b/cpp/src/mip/diversity/diversity_manager.cu @@ -71,7 +71,7 @@ bool diversity_manager_t::regenerate_solutions() const i_t min_size = 2; while (population.current_size() <= min_size && (current_step == 0 || counter < 5)) { CUOPT_LOG_DEBUG("Trying to regenerate solution, pop size %d\n", population.current_size()); - time_limit = min(time_limit, timer.remaining_time()); + time_limit = std::min(time_limit, timer.remaining_time()); ls.fj.randomize_weights(problem_ptr->handle_ptr); population.add_solution(generate_solution(time_limit)); if (timer.check_time_limit()) { return false; } @@ -90,18 +90,18 @@ std::vector> diversity_manager_t::generate_more_s { std::vector> solutions; timer_t total_time_to_generate = timer_t(timer.remaining_time() / 5.); - f_t time_limit = min(60., total_time_to_generate.remaining_time()); - f_t ls_limit = min(5., timer.remaining_time() / 20.); + f_t time_limit = std::min(60., total_time_to_generate.remaining_time()); + f_t ls_limit = std::min(5., timer.remaining_time() / 20.); const i_t n_sols_to_generate = 2; for (i_t i = 0; i < n_sols_to_generate; ++i) { CUOPT_LOG_DEBUG("Trying to generate more solutions"); - time_limit = min(time_limit, timer.remaining_time()); + time_limit = std::min(time_limit, timer.remaining_time()); ls.fj.randomize_weights(problem_ptr->handle_ptr); auto sol = generate_solution(time_limit); population.run_solution_callbacks(sol); solutions.emplace_back(solution_t(sol)); if (total_time_to_generate.check_time_limit()) { return solutions; } - timer_t timer(min(ls_limit, timer.remaining_time())); + timer_t timer(std::min(ls_limit, timer.remaining_time())); ls.run_local_search(sol, population.weights, timer); population.run_solution_callbacks(sol); solutions.emplace_back(std::move(sol)); @@ -183,7 +183,7 @@ void diversity_manager_t::generate_initial_solutions() // solution if we can generate faster generate up to 10 sols const f_t generation_time_limit = 0.6 * timer.get_time_limit(); const f_t max_island_gen_time = 600; - f_t total_island_gen_time = min(generation_time_limit, max_island_gen_time); + f_t total_island_gen_time = std::min(generation_time_limit, max_island_gen_time); timer_t gen_timer(total_island_gen_time); f_t sol_time_limit = gen_timer.remaining_time(); for (i_t i = 0; i < maximum_island_size; ++i) { @@ -265,7 +265,7 @@ void diversity_manager_t::generate_quick_feasible_solution() { solution_t solution(*problem_ptr); // min 1 second, max 10 seconds - const f_t generate_fast_solution_time = min(10., max(1., timer.remaining_time() / 20.)); + const f_t generate_fast_solution_time = std::min(10., std::max(1., timer.remaining_time() / 20.)); timer_t sol_timer(generate_fast_solution_time); // do very short LP run to get somewhere close to the optimal point ls.generate_fast_solution(solution, sol_timer); @@ -306,7 +306,7 @@ solution_t diversity_manager_t::run_solver() const f_t time_limit = timer.remaining_time(); constexpr f_t time_ratio_on_init_lp = 0.1; constexpr f_t max_time_on_lp = 30; - const f_t lp_time_limit = min(max_time_on_lp, time_limit * time_ratio_on_init_lp); + const f_t lp_time_limit = std::min(max_time_on_lp, time_limit * time_ratio_on_init_lp); // after every change to the problem, we should resize all the relevant vars // we need to encapsulate that to prevent repetitions @@ -328,7 +328,8 @@ solution_t diversity_manager_t::run_solver() generate_quick_feasible_solution(); constexpr f_t time_ratio_of_probing_cache = 0.10; constexpr f_t max_time_on_probing = 60; - f_t time_for_probing_cache = min(max_time_on_probing, time_limit * time_ratio_of_probing_cache); + f_t time_for_probing_cache = + std::min(max_time_on_probing, time_limit * time_ratio_of_probing_cache); timer_t probing_timer{time_for_probing_cache}; if (check_b_b_preemption()) { return population.best_feasible(); } compute_probing_cache(ls.constraint_prop.bounds_update, *problem_ptr, probing_timer); @@ -542,7 +543,7 @@ diversity_manager_t::recombine_and_local_search(solution_t& cuopt_assert(population.test_invariant(), ""); cuopt_assert(lp_offspring.test_number_all_integer(), "All must be integers before LP"); f_t lp_run_time = offspring.get_feasible() ? 3. : 1.; - lp_run_time = min(lp_run_time, timer.remaining_time()); + lp_run_time = std::min(lp_run_time, timer.remaining_time()); run_lp_with_vars_fixed(*lp_offspring.problem_ptr, lp_offspring, lp_offspring.problem_ptr->integer_indices, @@ -553,7 +554,7 @@ diversity_manager_t::recombine_and_local_search(solution_t& cuopt_assert(lp_offspring.test_number_all_integer(), "All must be integers after LP"); f_t lp_qual = lp_offspring.get_quality(population.weights); CUOPT_LOG_DEBUG("After LP offspring sol cost:feas %f : %d", lp_qual, lp_offspring.get_feasible()); - f_t offspring_qual = min(offspring.get_quality(population.weights), lp_qual); + f_t offspring_qual = std::min(offspring.get_quality(population.weights), lp_qual); recombine_stats.update_improve_stats( offspring_qual, sol1.get_quality(population.weights), sol2.get_quality(population.weights)); return std::make_pair(std::move(offspring), std::move(lp_offspring)); diff --git a/cpp/src/mip/diversity/population.cu b/cpp/src/mip/diversity/population.cu index d82ac0f14b..0b4001772f 100644 --- a/cpp/src/mip/diversity/population.cu +++ b/cpp/src/mip/diversity/population.cu @@ -65,7 +65,7 @@ template void population_t::initialize_population() { var_threshold = - max(problem_ptr->n_variables - var_threshold, (problem_ptr->n_variables / 10) * 8); + std::max(problem_ptr->n_variables - var_threshold, (problem_ptr->n_variables / 10) * 8); initial_threshold_ratio = (f_t)var_threshold / problem_ptr->n_variables; solutions.reserve(max_solutions); indices.reserve(max_solutions); @@ -378,7 +378,7 @@ void population_t::compute_new_weights() infeasibility_importance *= weight_increase_ratio; } - infeasibility_importance = min(max_infeasibility_weight, infeasibility_importance); + infeasibility_importance = std::min(max_infeasibility_weight, infeasibility_importance); thrust::for_each(best_sol.handle_ptr->get_thrust_policy(), thrust::counting_iterator(0), thrust::counting_iterator(0) + weights.cstr_weights.size(), @@ -394,7 +394,7 @@ void population_t::compute_new_weights() } else { CUOPT_LOG_DEBUG("Decreasing weights!"); infeasibility_importance *= weight_decrease_ratio; - infeasibility_importance = max(min_infeasibility_weight, infeasibility_importance); + infeasibility_importance = std::max(min_infeasibility_weight, infeasibility_importance); thrust::for_each( best_sol.handle_ptr->get_thrust_policy(), @@ -535,7 +535,7 @@ template i_t get_max_var_threshold(i_t n_vars) { if (n_vars < 50) { - return max(1, n_vars - 1); + return std::max(1, n_vars - 1); } else if (n_vars < 80) { return n_vars - 2; } else if (n_vars < 200) { @@ -559,7 +559,7 @@ void population_t::halve_the_population() size_t max_var_threshold = get_max_var_threshold(problem_ptr->n_integer_vars); while (current_size() > max_solutions / 2) { clear_except_best_feasible(); - var_threshold = max(var_threshold * 0.97, 0.5 * problem_ptr->n_integer_vars); + var_threshold = std::max(var_threshold * 0.97, 0.5 * problem_ptr->n_integer_vars); for (auto& sol : sol_vec) { add_solution(solution_t(sol)); } @@ -569,9 +569,9 @@ void population_t::halve_the_population() // if we removed too many decrease the diversity a little while (current_size() < max_solutions / 4) { clear_except_best_feasible(); - var_threshold = - min(max_var_threshold, - min((size_t)(var_threshold * 0.97), (size_t)(0.995 * problem_ptr->n_integer_vars))); + var_threshold = std::min( + max_var_threshold, + std::min((size_t)(var_threshold * 0.97), (size_t)(0.995 * problem_ptr->n_integer_vars))); for (auto& sol : sol_vec) { add_solution(solution_t(sol)); } diff --git a/cpp/src/mip/diversity/recombiners/recombiner_stats.hpp b/cpp/src/mip/diversity/recombiners/recombiner_stats.hpp index 6afd16d36b..36091db662 100644 --- a/cpp/src/mip/diversity/recombiners/recombiner_stats.hpp +++ b/cpp/src/mip/diversity/recombiners/recombiner_stats.hpp @@ -39,8 +39,8 @@ struct recombine_stats { void update_improve_stats(double cost_new, double cost_first, double cost_second) { - if (cost_new < (min(cost_first, cost_second) - OBJECTIVE_EPSILON)) ++better_than_both; - if (cost_new < (max(cost_first, cost_second) - OBJECTIVE_EPSILON)) ++better_than_one; + if (cost_new < (std::min(cost_first, cost_second) - OBJECTIVE_EPSILON)) ++better_than_both; + if (cost_new < (std::max(cost_first, cost_second) - OBJECTIVE_EPSILON)) ++better_than_one; } void add_attempt() { ++attempts; } diff --git a/cpp/src/mip/feasibility_jump/load_balancing.cuh b/cpp/src/mip/feasibility_jump/load_balancing.cuh index a92c3431b9..f3515e5de0 100644 --- a/cpp/src/mip/feasibility_jump/load_balancing.cuh +++ b/cpp/src/mip/feasibility_jump/load_balancing.cuh @@ -15,6 +15,8 @@ * limitations under the License. */ +#include + #include "feasibility_jump_kernels.cuh" #include @@ -380,8 +382,7 @@ __global__ void load_balancing_mtm_compute_candidates( i_t lane_id = threadIdx.x % raft::WarpSize; const i_t stride = get_warp_id_stride(); - for (auto [var_idx, subworkid, offset_begin, offset_end, csr_offset, skip] : - csr_load_balancer{ + for (auto [var_idx, subworkid, offset_begin, offset_end, _, skip] : csr_load_balancer{ fj, fj.row_size_nonbin_prefix_sum, fj.work_id_to_nonbin_var_idx}) { cuopt_assert(!fj.pb.is_binary_variable[var_idx], "variable is binary"); @@ -483,8 +484,7 @@ __launch_bounds__(TPB_loadbalance, 16) __global__ i_t lane_id = threadIdx.x % raft::WarpSize; const i_t stride = get_warp_id_stride(); - for (auto [var_idx, subworkid, offset_begin, offset_end, csr_offset, skip] : - csr_load_balancer{ + for (auto [var_idx, subworkid, offset_begin, offset_end, _, skip] : csr_load_balancer{ fj, fj.row_size_nonbin_prefix_sum, fj.work_id_to_nonbin_var_idx}) { cuopt_assert(!fj.pb.is_binary_variable[var_idx], "variable is binary"); diff --git a/cpp/src/mip/local_search/feasibility_pump/feasibility_pump.cu b/cpp/src/mip/local_search/feasibility_pump/feasibility_pump.cu index c15a5d2f47..cd959b652e 100644 --- a/cpp/src/mip/local_search/feasibility_pump/feasibility_pump.cu +++ b/cpp/src/mip/local_search/feasibility_pump/feasibility_pump.cu @@ -218,7 +218,7 @@ bool feasibility_pump_t::linear_project_onto_polytope(solution_t::round_multiple_points(solution_t& s { n_fj_single_descents = 0; const f_t max_time_limit = last_lp_time * 0.1; - timer_t round_timer{min(max_time_limit, timer.remaining_time())}; + timer_t round_timer{std::min(max_time_limit, timer.remaining_time())}; bool is_feasible = random_round_with_fj(solution, round_timer); if (is_feasible) { CUOPT_LOG_DEBUG("Feasible found after random round with fj"); return true; } - timer_t line_segment_timer{min(1., timer.remaining_time())}; + timer_t line_segment_timer{std::min(1., timer.remaining_time())}; i_t n_points_to_search = n_fj_single_descents; bool is_feasibility_run = true; // create a copy, because assignment is changing within kernel and we want a separate point_1 @@ -379,8 +379,8 @@ bool feasibility_pump_t::round(solution_t& solution) { bool result; CUOPT_LOG_DEBUG("Rounding the point"); - timer_t bounds_prop_timer(min(2., timer.remaining_time())); - const f_t lp_run_time_after_feasible = min(3., timer.remaining_time() / 20.); + timer_t bounds_prop_timer(std::min(2., timer.remaining_time())); + const f_t lp_run_time_after_feasible = std::min(3., timer.remaining_time() / 20.); result = lb_constraint_prop.apply_round(solution, lp_run_time_after_feasible, bounds_prop_timer); cuopt_func_call(solution.test_variable_bounds(true)); // copy the last rounding @@ -409,7 +409,7 @@ bool feasibility_pump_t::run_fj_cycle_escape(solution_t& sol fj.settings.update_weights = true; fj.settings.feasibility_run = true; fj.settings.n_of_minimums_for_exit = 5000; - fj.settings.time_limit = min(3., timer.remaining_time()); + fj.settings.time_limit = std::min(3., timer.remaining_time()); fj.settings.termination = fj_termination_flags_t::FJ_TERMINATION_TIME_LIMIT; is_feasible = fj.solve(solution); // if FJ didn't change the solution, take last incumbent solution @@ -432,7 +432,7 @@ bool feasibility_pump_t::test_fj_feasible(solution_t& soluti fj.settings.update_weights = true; fj.settings.feasibility_run = true; fj.settings.n_of_minimums_for_exit = 5000; - fj.settings.time_limit = min(time_limit, timer.remaining_time()); + fj.settings.time_limit = std::min(time_limit, timer.remaining_time()); fj.settings.termination = fj_termination_flags_t::FJ_TERMINATION_TIME_LIMIT; cuopt_func_call(solution.test_variable_bounds(true)); is_feasible = fj.solve(solution); diff --git a/cpp/src/mip/local_search/line_segment_search/line_segment_search.cu b/cpp/src/mip/local_search/line_segment_search/line_segment_search.cu index 1f166bdf69..0d1147a90c 100644 --- a/cpp/src/mip/local_search/line_segment_search/line_segment_search.cu +++ b/cpp/src/mip/local_search/line_segment_search/line_segment_search.cu @@ -113,7 +113,7 @@ bool line_segment_search_t::search_line_segment(solution_t& fj.settings.mode = fj_mode_t::GREEDY_DESCENT; fj.settings.update_weights = false; fj.settings.feasibility_run = is_feasibility_run; - fj.settings.time_limit = min(0.5, timer.remaining_time()); + fj.settings.time_limit = std::min(0.5, timer.remaining_time()); is_feasible = fj.solve(solution); if (is_feasibility_run) { if (is_feasible) { diff --git a/cpp/src/mip/local_search/local_search.cu b/cpp/src/mip/local_search/local_search.cu index 8e3ef2e11f..7c02fefdcc 100644 --- a/cpp/src/mip/local_search/local_search.cu +++ b/cpp/src/mip/local_search/local_search.cu @@ -66,14 +66,14 @@ void local_search_t::generate_fast_solution(solution_t& solu fj.settings.update_weights = true; fj.settings.feasibility_run = true; fj.settings.termination = fj_termination_flags_t::FJ_TERMINATION_TIME_LIMIT; - fj.settings.time_limit = min(30., timer.remaining_time()); + fj.settings.time_limit = std::min(30., timer.remaining_time()); while (!timer.check_time_limit()) { - timer_t constr_prop_timer = timer_t(min(timer.remaining_time(), 2.)); + timer_t constr_prop_timer = timer_t(std::min(timer.remaining_time(), 2.)); // do constraint prop on lp optimal solution constraint_prop.apply_round(solution, 1., constr_prop_timer); if (solution.compute_feasibility()) { return; } if (timer.check_time_limit()) { return; }; - fj.settings.time_limit = min(3., timer.remaining_time()); + fj.settings.time_limit = std::min(3., timer.remaining_time()); // run fj on the solution fj.solve(solution); // TODO check if FJ returns the same solution @@ -150,7 +150,7 @@ bool local_search_t::run_fj_annealing(solution_t& solution, fj.settings.mode = fj_mode_t::EXIT_NON_IMPROVING; fj.settings.candidate_selection = fj_candidate_selection_t::FEASIBLE_FIRST; fj.settings.termination = fj_termination_flags_t::FJ_TERMINATION_TIME_LIMIT; - fj.settings.time_limit = min(10., timer.remaining_time()); + fj.settings.time_limit = std::min(10., timer.remaining_time()); fj.settings.parameters.allow_infeasibility_iterations = 100; fj.settings.update_weights = 1; fj.solve(solution); @@ -189,7 +189,7 @@ bool local_search_t::check_fj_on_lp_optimal(solution_t& solu } cuopt_func_call(solution.test_variable_bounds(false)); f_t lp_run_time_after_feasible = 1.; - timer_t bounds_prop_timer = timer_t(min(timer.remaining_time(), 10.)); + timer_t bounds_prop_timer = timer_t(std::min(timer.remaining_time(), 10.)); bool is_feasible = constraint_prop.apply_round(solution, lp_run_time_after_feasible, bounds_prop_timer); if (!is_feasible) { @@ -209,7 +209,7 @@ bool local_search_t::check_fj_on_lp_optimal(solution_t& solu fj.settings.update_weights = true; fj.settings.feasibility_run = true; fj.settings.termination = fj_termination_flags_t::FJ_TERMINATION_TIME_LIMIT; - fj.settings.time_limit = min(30., timer.remaining_time()); + fj.settings.time_limit = std::min(30., timer.remaining_time()); fj.solve(solution); return solution.get_feasible(); } @@ -227,7 +227,7 @@ bool local_search_t::run_fj_on_zero(solution_t& solution, ti fj.settings.update_weights = true; fj.settings.feasibility_run = true; fj.settings.termination = fj_termination_flags_t::FJ_TERMINATION_TIME_LIMIT; - fj.settings.time_limit = min(30., timer.remaining_time()); + fj.settings.time_limit = std::min(30., timer.remaining_time()); bool is_feasible = fj.solve(solution); return is_feasible; } diff --git a/cpp/src/mip/local_search/rounding/constraint_prop.cu b/cpp/src/mip/local_search/rounding/constraint_prop.cu index b59e246df6..e681474add 100644 --- a/cpp/src/mip/local_search/rounding/constraint_prop.cu +++ b/cpp/src/mip/local_search/rounding/constraint_prop.cu @@ -826,7 +826,7 @@ bool constraint_prop_t::find_integer( generate_bulk_rounding_vector(sol, orig_sol, host_vars_to_set, probing_candidates); probe(sol, orig_sol.problem_ptr, var_val_pairs, &set_count, unset_integer_vars); if (!repair_tried && rounding_ii && !timeout_happened) { - timer_t repair_timer{min(timer.remaining_time() / 5, timer.elapsed_time() / 3)}; + timer_t repair_timer{std::min(timer.remaining_time() / 5, timer.elapsed_time() / 3)}; save_bounds(sol); // update bounds and run repair procedure bool bounds_repaired = diff --git a/cpp/src/mip/local_search/rounding/lb_constraint_prop.cu b/cpp/src/mip/local_search/rounding/lb_constraint_prop.cu index 50867a5315..93eb936932 100644 --- a/cpp/src/mip/local_search/rounding/lb_constraint_prop.cu +++ b/cpp/src/mip/local_search/rounding/lb_constraint_prop.cu @@ -880,7 +880,7 @@ bool lb_constraint_prop_t::find_integer( &set_count, unset_integer_vars); if (!repair_tried && rounding_ii && !timeout_happened) { - timer_t repair_timer{min(timer.remaining_time() / 5, timer.elapsed_time() / 3)}; + timer_t repair_timer{std::min(timer.remaining_time() / 5, timer.elapsed_time() / 3)}; save_bounds(problem, assignment, orig_sol.handle_ptr); // update bounds and run repair procedure // infeasible cnst_slack invalid diff --git a/cpp/src/mip/presolve/conditional_bound_strengthening.cu b/cpp/src/mip/presolve/conditional_bound_strengthening.cu index 749609c018..2723a9b244 100644 --- a/cpp/src/mip/presolve/conditional_bound_strengthening.cu +++ b/cpp/src/mip/presolve/conditional_bound_strengthening.cu @@ -657,7 +657,7 @@ void conditional_bound_strengthening_t::solve(problem_t& pro if (n_blocks == 0) { return; } int max_row_size = get_max_row_size(problem.offsets, problem.handle_ptr->get_stream()); - max_row_size = min(TPB, max_row_size); + max_row_size = std::min(TPB, max_row_size); size_t sh_size = raft::alignTo(5 * sizeof(f_t) + sizeof(i_t) + sizeof(var_t), sizeof(i_t)) * max_row_size; diff --git a/cpp/src/mip/presolve/load_balanced_bounds_presolve.cu b/cpp/src/mip/presolve/load_balanced_bounds_presolve.cu index 9cee255b7e..091f8a53b5 100644 --- a/cpp/src/mip/presolve/load_balanced_bounds_presolve.cu +++ b/cpp/src/mip/presolve/load_balanced_bounds_presolve.cu @@ -30,7 +30,6 @@ #include #include -#include #include #include "load_balanced_bounds_presolve.cuh" #include "load_balanced_bounds_presolve_helpers.cuh" diff --git a/cpp/src/mip/problem/problem.cu b/cpp/src/mip/problem/problem.cu index d5be3ce7a4..c0c6f29c47 100644 --- a/cpp/src/mip/problem/problem.cu +++ b/cpp/src/mip/problem/problem.cu @@ -804,7 +804,7 @@ void problem_t::compute_related_variables(double time_limit) i_t related_var_offset = 0; auto start_time = std::chrono::high_resolution_clock::now(); for (i_t i = 0;; ++i) { - i_t slice_size = min(max_slice_size, n_variables - i * max_slice_size); + i_t slice_size = std::min(max_slice_size, n_variables - i * max_slice_size); if (slice_size <= 0) break; i_t slice_begin = i * max_slice_size; diff --git a/cpp/src/routing/crossovers/inversion_recombiner.hpp b/cpp/src/routing/crossovers/inversion_recombiner.hpp index 375322838c..577d92125c 100644 --- a/cpp/src/routing/crossovers/inversion_recombiner.hpp +++ b/cpp/src/routing/crossovers/inversion_recombiner.hpp @@ -194,7 +194,7 @@ struct inversion { } int routes_number = 0; int total_length = 0; - const int max_inverted_routes = max(1, min((int)different_routes.size() / 2, 5)); + const int max_inverted_routes = std::max(1, std::min((int)different_routes.size() / 2, 5)); while (total_length < 60 && routes_number < max_inverted_routes && different_routes.size() > 0) { size_t initial_id = next_random() % (different_routes.size()); diff --git a/cpp/src/routing/crossovers/ox_recombiner.cuh b/cpp/src/routing/crossovers/ox_recombiner.cuh index 8a04c98d01..e57fc8e6b1 100644 --- a/cpp/src/routing/crossovers/ox_recombiner.cuh +++ b/cpp/src/routing/crossovers/ox_recombiner.cuh @@ -748,7 +748,7 @@ struct OX { if (S.routes.size() == 1) { i = next_random() % (genome_A.size() - 1); } int j = i + 1 + (next_random() % (3 * max_route_len)); if ((size_t)j >= genome_A.size() - 1) { - j = i + next_random() % max(1, (((int)genome_A.size() - 1 - i) / 2)); + j = i + next_random() % std::max(1, (((int)genome_A.size() - 1 - i) / 2)); } for (int k = i; k <= j; k++) { diff --git a/cpp/src/routing/diversity/diverse_solver.hpp b/cpp/src/routing/diversity/diverse_solver.hpp index 94031c89cb..2147d23dd7 100644 --- a/cpp/src/routing/diversity/diverse_solver.hpp +++ b/cpp/src/routing/diversity/diverse_solver.hpp @@ -84,8 +84,8 @@ struct recombine_stats { void update_improve_stats(double cost_new, double cost_first, double cost_second) { - if (cost_new < (min(cost_first, cost_second) - MOVE_EPSILON)) ++better_than_both; - if (cost_new < (max(cost_first, cost_second) - MOVE_EPSILON)) ++better_than_one; + if (cost_new < (std::min(cost_first, cost_second) - MOVE_EPSILON)) ++better_than_both; + if (cost_new < (std::max(cost_first, cost_second) - MOVE_EPSILON)) ++better_than_one; } void add_attempt() { ++attempts; } @@ -419,8 +419,8 @@ struct solve { benchmark_print("%d solutions loaded to reserve with %f diversity\n", (int)reserve_population.current_size(), reserve_population.threshold); - const int n_best_solutions = min(5, (int)reserve_population.current_size()); - const int n_sampled_solutions = min(27, (int)reserve_population.current_size()); + const int n_best_solutions = std::min(5, (int)reserve_population.current_size()); + const int n_sampled_solutions = std::min(27, (int)reserve_population.current_size()); auto best_solutions = reserve_population.get_n_best(n_best_solutions); auto sampled_solutions = reserve_population.get_n_random(n_sampled_solutions, false); benchmark_print("%d solutions selected\n", n_sampled_solutions); @@ -618,7 +618,7 @@ struct solve { constexpr bool use_average = false; int threshold_index = p->is_cvrp() ? 1 : find_initial_diversity(working_vector, use_average); working_population.threshold = diversity_levels[threshold_index]; - if (!p->is_cvrp()) { threshold_index = min(4, max(2, threshold_index)); } + if (!p->is_cvrp()) { threshold_index = std::min(4, std::max(2, threshold_index)); } populate_working_population(); // for pure cvrp problems, we add more solutions to the reserve population @@ -627,7 +627,7 @@ struct solve { // We probably should generalize this for all easy problems if (p->is_cvrp()) { double time_left = timer.remaining_time(); - double single_gen_time = min(time_left * 0.05, 20.) * ges_time_fraction; + double single_gen_time = std::min(time_left * 0.05, 20.) * ges_time_fraction; // add five or so solutions to reserve for (int i = 0; i < 5; ++i) { g.generate_solution( @@ -729,7 +729,7 @@ struct solve { // TODO check refill reserve times double time_left = timer.remaining_time(); - double single_gen_time = min(time_left * 0.05, 20.) * ges_time_fraction; + double single_gen_time = std::min(time_left * 0.05, 20.) * ges_time_fraction; sols_num = std::min( sols_num, (int)reserve_population.max_solutions - (int)reserve_population.current_size()); for (int i = 0; i < sols_num; ++i) { @@ -814,9 +814,9 @@ struct solve { * 5 islands x 3 per island), and if all of these take the entire allocated * time */ - auto first_sol_gen_time = min(timer.get_time_limit() * 0.3, - 300.); // Upper limit of 5 mins (targetted for 15 min runs) - auto sol_gen_time = ges_time_fraction * min(timer.get_time_limit() * 0.05, 60.); + auto first_sol_gen_time = std::min(timer.get_time_limit() * 0.3, + 300.); // Upper limit of 5 mins (targetted for 15 min runs) + auto sol_gen_time = ges_time_fraction * std::min(timer.get_time_limit() * 0.05, 60.); auto const n_islands_size = islands_size; double max_island_generation_time; auto pop_size = p->is_tsp ? diversity_config_t::population_size() @@ -911,7 +911,7 @@ struct solve { // Give all the island generation time as some problems might consume // all the time improving the first threshold. double improve_time_limit = - max(0.0, max_island_generation_time - island_creation_timer.elapsed_time()); + std::max(0.0, max_island_generation_time - island_creation_timer.elapsed_time()); improvement_timer = timer_t(improve_time_limit); benchmark_print( @@ -980,8 +980,9 @@ struct solve { while (start_threshold_index >= 0) { // Lowering the threshold does not require updating the population - int valid_start_threshold_index = min(start_threshold_index, (int)step_lengths.size() - 1); - p.threshold = diversity_levels[valid_start_threshold_index]; + int valid_start_threshold_index = + std::min(start_threshold_index, (int)step_lengths.size() - 1); + p.threshold = diversity_levels[valid_start_threshold_index]; benchmark_print("time elapsed: %f \n", timer.elapsed_time()); benchmark_print("Improvement steps: %d\n", step_lengths[valid_start_threshold_index]); p.add_solutions_to_island(timer.elapsed_time(), reserve_population); diff --git a/cpp/src/routing/generator/generator.cu b/cpp/src/routing/generator/generator.cu index 93e7861f39..f3b7154e5b 100644 --- a/cpp/src/routing/generator/generator.cu +++ b/cpp/src/routing/generator/generator.cu @@ -229,8 +229,9 @@ d_mdarray_t generate_matrices(raft::handle_t const& handle, { constexpr f_t asymmetry_scalar = 0.01; dim3 n_threads(32, 32); - dim3 n_blocks(min((params.n_locations + n_threads.x - 1) / n_threads.x, CUDA_MAX_BLOCKS_2D), - min((params.n_locations + n_threads.y - 1) / n_threads.y, CUDA_MAX_BLOCKS_2D)); + dim3 n_blocks( + std::min((params.n_locations + n_threads.x - 1) / n_threads.x, (unsigned)CUDA_MAX_BLOCKS_2D), + std::min((params.n_locations + n_threads.y - 1) / n_threads.y, (unsigned)CUDA_MAX_BLOCKS_2D)); rmm::device_uvector cost_matrix(params.n_locations * params.n_locations, handle.get_stream()); diff --git a/cpp/src/routing/ges/eject_until_feasible.cu b/cpp/src/routing/ges/eject_until_feasible.cu index 6bd9e40e3c..7376e14635 100644 --- a/cpp/src/routing/ges/eject_until_feasible.cu +++ b/cpp/src/routing/ges/eject_until_feasible.cu @@ -27,14 +27,6 @@ namespace cuopt { namespace routing { namespace detail { -template -__device__ inline i_t get_lane_id() -{ - i_t id; - asm("mov.s32 %0, %laneid;" : "=r"(id)); - return id; -} - template __device__ inline T shfl_sync(T val, i_t srcLane, @@ -49,8 +41,8 @@ DI void weighted_random_warp_reduce(raft::random::PCGenerator& rng, T& weight, i { #pragma unroll for (i_t offset = raft::WarpSize / 2; offset > 0; offset /= 2) { - T tmp_weight = shfl_sync(weight, get_lane_id() + offset); - i_t tmp_idx = shfl_sync(idx, get_lane_id() + offset); + T tmp_weight = shfl_sync(weight, raft::laneId() + offset); + i_t tmp_idx = shfl_sync(idx, raft::laneId() + offset); T sum = (tmp_weight + weight); weight = sum; if (sum != 0) { diff --git a/cpp/src/routing/ges/execute_insertion.cuh b/cpp/src/routing/ges/execute_insertion.cuh index 6eea434841..e9c6edbca8 100644 --- a/cpp/src/routing/ges/execute_insertion.cuh +++ b/cpp/src/routing/ges/execute_insertion.cuh @@ -56,7 +56,8 @@ DI void execute_insert(typename solution_t::view_t& view, "Pickup should be smaller than delivery"); } - route_to_modify.insert_request(request_location, request_node, view.route_node_map); + route_to_modify.template insert_request( + request_location, request_node, view.route_node_map); route_t::view_t::compute_forward(route_to_modify); route_t::view_t::compute_backward(route_to_modify); diff --git a/cpp/src/routing/ges/guided_ejection_search.cu b/cpp/src/routing/ges/guided_ejection_search.cu index f1958be073..3dbebf15b2 100644 --- a/cpp/src/routing/ges/guided_ejection_search.cu +++ b/cpp/src/routing/ges/guided_ejection_search.cu @@ -231,7 +231,7 @@ bool guided_ejection_search_t::guided_ejection_search_loop(i_ cuopt_assert(K > 0, "number of routes should be positive!"); i_t N = solution_ptr->get_num_requests(); i_t cM = N * N / K; - iteration_limit = min(iteration_limit, cM); + iteration_limit = std::min(iteration_limit, cM); } i_t const n_max_multiple_insertions = @@ -240,7 +240,7 @@ bool guided_ejection_search_t::guided_ejection_search_loop(i_ const bool depot_included = solution_ptr->problem_ptr->order_info.depot_included_; - min_ep_size = min(EP.size(), min_ep_size); + min_ep_size = std::min(EP.size(), min_ep_size); while (EP.size() > desired_ep_size) { solution_ptr->global_runtime_checks(false, true, "ges_while_loop_begin"); diff --git a/cpp/src/routing/ges/lexicographic_search/brute_force_lexico.cu b/cpp/src/routing/ges/lexicographic_search/brute_force_lexico.cu index ac7afee445..f2e0c39b86 100644 --- a/cpp/src/routing/ges/lexicographic_search/brute_force_lexico.cu +++ b/cpp/src/routing/ges/lexicographic_search/brute_force_lexico.cu @@ -96,7 +96,8 @@ __global__ void brute_force_lexico_kernel( __syncthreads(); if (threadIdx.x == 0) { request_id_t request_locations(pickup_idx, delivery_idx); - s_route.insert_request(request_locations, request_node, s_route_node_map, true); + s_route.template insert_request( + request_locations, request_node, s_route_node_map, true); sequence_t<2 * b_k_max> sequence_including_delivery; i_t counter = 0; bool pd_feasible = true; diff --git a/cpp/src/routing/ges/lexicographic_search/lexicographic_search.cu b/cpp/src/routing/ges/lexicographic_search/lexicographic_search.cu index ff797a8135..a896bdf987 100644 --- a/cpp/src/routing/ges/lexicographic_search/lexicographic_search.cu +++ b/cpp/src/routing/ges/lexicographic_search/lexicographic_search.cu @@ -256,7 +256,7 @@ __global__ void lexicographic_search(typename solution_t::vie if (REQUEST == request_t::PDP && (node_stack.top().from_idx == node_stack.route_length || node_stack.template is_stack_top_insertion())) { time_between = - node_stack.get_dim_from_delivery(node_stack.top().intra_idx + 1); + node_stack.template get_dim_from_delivery(node_stack.top().intra_idx + 1); from_node = node_stack.delivery_node; @@ -265,9 +265,9 @@ __global__ void lexicographic_search(typename solution_t::vie node_stack.s_route.get_node(node_stack.top().intra_idx + 1)), "dim buffer mismatch"); } else { - time_between = node_stack.get_dim_between(node_stack.top().from_idx, - node_stack.top().intra_idx + 1); - from_node = node_stack.s_route.get_node(node_stack.top().from_idx); + time_between = node_stack.template get_dim_between( + node_stack.top().from_idx, node_stack.top().intra_idx + 1); + from_node = node_stack.s_route.get_node(node_stack.top().from_idx); copy_forward_data(from_node, node_stack.top()); cuopt_assert(node_stack.check_dim_between( node_stack.top().from_idx, @@ -624,7 +624,8 @@ __global__ void execute_lexico_move( request_locations = request_id_t(pickup_insert_idx); } // insert request - s_route.insert_request(request_locations, request_node, solution.route_node_map, true); + s_route.template insert_request( + request_locations, request_node, solution.route_node_map, true); i_t n_ejections_executed = 0; for (i_t i = 0; i < sequence_size; ++i) { bool eject = true; diff --git a/cpp/src/routing/ges/lexicographic_search/lexicographic_search.cuh b/cpp/src/routing/ges/lexicographic_search/lexicographic_search.cuh index 1a0a5f2c02..2d137205ee 100644 --- a/cpp/src/routing/ges/lexicographic_search/lexicographic_search.cuh +++ b/cpp/src/routing/ges/lexicographic_search/lexicographic_search.cuh @@ -25,7 +25,7 @@ namespace routing { namespace detail { struct p_val_seq_t { - __device__ p_val_seq_t(uint16_t p_v, uint16_t s_s) : p_val(p_v), sequence_size(s_s) {} + __host__ __device__ p_val_seq_t(uint16_t p_v, uint16_t s_s) : p_val(p_v), sequence_size(s_s) {} #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ uint p_val : 16; uint sequence_size : 16; diff --git a/cpp/src/routing/local_search/compute_ejections.cuh b/cpp/src/routing/local_search/compute_ejections.cuh index 23e6bf3b8a..a6b6f42f97 100644 --- a/cpp/src/routing/local_search/compute_ejections.cuh +++ b/cpp/src/routing/local_search/compute_ejections.cuh @@ -56,7 +56,7 @@ DI void compute_temp_route(typename route_t::view_t& temp_rou if (intra_idx == -1) { intra_idx = find_intra_idx(route, request_id.id()); } __syncthreads(); // copies the fixed route data (i.e tw values, demand, node_id etc.) - temp_route.copy_route_data_after_ejection(route, intra_idx, true); + temp_route.template copy_route_data_after_ejection(route, intra_idx, true); __syncthreads(); temp_route.copy_backward_data(route, intra_idx + 1, n_nodes_route + 1, intra_idx); __syncthreads(); @@ -110,7 +110,8 @@ DI void compute_temp_route(typename route_t::view_t& temp_rou } // copies the fixed route data (i.e tw values, demand, node_id etc.) __syncthreads(); - temp_route.copy_route_data_after_ejection(route, intra_idx, delivery_intra_idx, true); + temp_route.template copy_route_data_after_ejection( + route, intra_idx, delivery_intra_idx, true); __syncthreads(); if (threadIdx.x == 0) { auto prev_node = temp_route.get_node(intra_idx - 1); diff --git a/cpp/src/routing/local_search/compute_insertions.cu b/cpp/src/routing/local_search/compute_insertions.cu index 4f5dd306ec..5bad349a56 100644 --- a/cpp/src/routing/local_search/compute_insertions.cu +++ b/cpp/src/routing/local_search/compute_insertions.cu @@ -845,7 +845,7 @@ void find_insertions(solution_t& sol, } else { // for cross the load-balance factor is always 4 move_candidates.number_of_blocks_per_ls_route = - max(1, sol.get_max_active_nodes_for_all_routes() / 4); + std::max(1, sol.get_max_active_nodes_for_all_routes() / 4); if (search_type == search_type_t::CROSS) { n_blocks = move_candidates.number_of_blocks_per_ls_route * sol.get_n_routes() + sol.get_num_requests(); diff --git a/cpp/src/routing/local_search/cycle_finder/hash_functions.cuh b/cpp/src/routing/local_search/cycle_finder/hash_functions.cuh index e7383a702b..22ac7ff280 100644 --- a/cpp/src/routing/local_search/cycle_finder/hash_functions.cuh +++ b/cpp/src/routing/local_search/cycle_finder/hash_functions.cuh @@ -85,6 +85,7 @@ struct MurmurHash3_32 { const uint8_t* tail = (const uint8_t*)(data + nblocks * 4); uint32_t k1 = 0; switch (len & 3) { + case 0: break; case 3: k1 ^= tail[2] << 16; case 2: k1 ^= tail[1] << 8; case 1: diff --git a/cpp/src/routing/local_search/local_search.cuh b/cpp/src/routing/local_search/local_search.cuh index 6cbbc8f566..8f340f717f 100644 --- a/cpp/src/routing/local_search/local_search.cuh +++ b/cpp/src/routing/local_search/local_search.cuh @@ -51,8 +51,10 @@ struct found_sliding_solution_t { template struct is_sliding_uinitialized_t { - static constexpr found_sliding_solution_t init_data{ - std::numeric_limits::max(), -1, -1, -1, -1}; + static constexpr found_sliding_solution_t init_data() + { + return {std::numeric_limits::max(), -1, -1, -1, -1}; + } __device__ bool operator()(const found_sliding_solution_t& x) { @@ -73,7 +75,10 @@ struct two_opt_cand_t { i_t first; i_t second; double selection_delta; - static constexpr two_opt_cand_t init_data{-1, -1, std::numeric_limits::max()}; + static constexpr two_opt_cand_t init_data() + { + return two_opt_cand_t{-1, -1, std::numeric_limits::max()}; + } constexpr bool operator!=(const two_opt_cand_t& cand) const { return this->selection_delta != cand.selection_delta; @@ -82,7 +87,10 @@ struct two_opt_cand_t { template struct is_two_opt_uinitialized_t { - static constexpr two_opt_cand_t init_data{-1, -1, std::numeric_limits::max()}; + static constexpr two_opt_cand_t init_data() + { + return two_opt_cand_t{-1, -1, std::numeric_limits::max()}; + } __device__ bool operator()(const two_opt_cand_t& x) { diff --git a/cpp/src/routing/local_search/sliding_tsp.cu b/cpp/src/routing/local_search/sliding_tsp.cu index 6ba86e63e2..4dcf532464 100644 --- a/cpp/src/routing/local_search/sliding_tsp.cu +++ b/cpp/src/routing/local_search/sliding_tsp.cu @@ -152,7 +152,7 @@ __global__ void find_sliding_moves_tsp( const double excess_limit = s_route.get_weighted_excess(move_candidates.weights) * ls_excess_multiplier_route; - sliding_tsp_cand_t sliding_tsp_cand = is_sliding_tsp_uinitialized_t::init_data; + sliding_tsp_cand_t sliding_tsp_cand = is_sliding_tsp_uinitialized_t::init_data(); double cost_delta, selection_delta; constexpr bool exclude_self_in_neighbors = false; // for reverse op @@ -512,8 +512,9 @@ bool local_search_t::perform_sliding_tsp( sol, move_candidates, n_nodes, n_threads, temp_storage_bytes); auto n_blocks = move_candidates.nodes_to_search.n_sampled_nodes; - async_fill( - sampled_tsp_data_, is_sliding_tsp_uinitialized_t::init_data, sol.sol_handle->get_stream()); + async_fill(sampled_tsp_data_, + is_sliding_tsp_uinitialized_t::init_data(), + sol.sol_handle->get_stream()); auto sh_size = raft::alignTo(shared_route_size, sizeof(double)) + max_window_size * sizeof(double); diff --git a/cpp/src/routing/local_search/sliding_tsp.cuh b/cpp/src/routing/local_search/sliding_tsp.cuh index f14124d55e..f21b0c8c75 100644 --- a/cpp/src/routing/local_search/sliding_tsp.cuh +++ b/cpp/src/routing/local_search/sliding_tsp.cuh @@ -27,8 +27,10 @@ struct sliding_tsp_cand_t { i_t reverse; double selection_delta; - static constexpr sliding_tsp_cand_t init_data{ - -1, -1, -1, 0, std::numeric_limits::max()}; + static constexpr sliding_tsp_cand_t init_data() + { + return {-1, -1, -1, 0, std::numeric_limits::max()}; + } constexpr bool operator()(sliding_tsp_cand_t cand1, sliding_tsp_cand_t cand2) const { @@ -38,8 +40,10 @@ struct sliding_tsp_cand_t { template struct is_sliding_tsp_uinitialized_t { - static constexpr sliding_tsp_cand_t init_data{ - -1, -1, -1, 0, std::numeric_limits::max()}; + static constexpr sliding_tsp_cand_t init_data() + { + return {-1, -1, -1, 0, std::numeric_limits::max()}; + } __device__ bool operator()(const sliding_tsp_cand_t& x) { return x.window_size == -1; } }; diff --git a/cpp/src/routing/local_search/sliding_window.cu b/cpp/src/routing/local_search/sliding_window.cu index abe5554555..fa53203f4b 100644 --- a/cpp/src/routing/local_search/sliding_window.cu +++ b/cpp/src/routing/local_search/sliding_window.cu @@ -746,7 +746,8 @@ __global__ void kernel_perform_sliding_window( // All permutations of windows (size 2 to max_permutation_intra) are tryed along the whole route // Insertion with the lowest cost is recorded globally - found_sliding_solution_t found_sliding_solution = is_sliding_uinitialized_t::init_data; + found_sliding_solution_t found_sliding_solution = + is_sliding_uinitialized_t::init_data(); const double excess_limit = s_route.get_weighted_excess(move_candidates.weights) * ls_excess_multiplier_route; @@ -1047,7 +1048,7 @@ bool local_search_t::perform_sliding_window( sliding_cuda_graph.start_capture(solution.sol_handle->get_stream()); async_fill(found_sliding_solution_data_, - is_sliding_uinitialized_t::init_data, + is_sliding_uinitialized_t::init_data(), solution.sol_handle->get_stream()); async_fill(locks_, 0, solution.sol_handle->get_stream()); // So that it only trigger in debug @@ -1064,7 +1065,7 @@ bool local_search_t::perform_sliding_window( return false; } int ideal_blocks = 4 * solution.sol_handle->get_num_sms(); - int blocks_per_node = max(ideal_blocks / move_candidates.nodes_to_search.n_sampled_nodes, 1); + int blocks_per_node = std::max(ideal_blocks / move_candidates.nodes_to_search.n_sampled_nodes, 1); auto n_blocks = move_candidates.nodes_to_search.n_sampled_nodes * blocks_per_node; cuopt_assert(n_blocks > 0, "n_blocks should be positive"); diff --git a/cpp/src/routing/local_search/two_opt.cu b/cpp/src/routing/local_search/two_opt.cu index 7352d8af87..5d187942fc 100644 --- a/cpp/src/routing/local_search/two_opt.cu +++ b/cpp/src/routing/local_search/two_opt.cu @@ -129,7 +129,7 @@ __global__ void find_two_opt_moves(typename solution_t::view_ __syncthreads(); } - two_opt_cand_t two_opt_cand = is_two_opt_uinitialized_t::init_data; + two_opt_cand_t two_opt_cand = is_two_opt_uinitialized_t::init_data(); double cost_delta, selection_delta; auto nodes = route.get_num_nodes() - intra_idx; auto first = intra_idx; @@ -390,8 +390,9 @@ bool local_search_t::perform_two_opt( if (sol.problem_ptr->is_cvrp_intra()) { sampled_nodes_data_.resize(sol.get_n_routes() * sol.get_num_orders(), sol.sol_handle->get_stream()); - async_fill( - sampled_nodes_data_, is_two_opt_uinitialized_t::init_data, sol.sol_handle->get_stream()); + async_fill(sampled_nodes_data_, + is_two_opt_uinitialized_t::init_data(), + sol.sol_handle->get_stream()); } else { two_opt_cand_data_.resize(sol.get_n_routes(), sol.sol_handle->get_stream()); async_fill(locks_, 0, sol.sol_handle->get_stream()); diff --git a/cpp/src/routing/local_search/vrp/nodes_to_search.cu b/cpp/src/routing/local_search/vrp/nodes_to_search.cu index 818dd93b54..670d6c4065 100644 --- a/cpp/src/routing/local_search/vrp/nodes_to_search.cu +++ b/cpp/src/routing/local_search/vrp/nodes_to_search.cu @@ -147,7 +147,7 @@ bool nodes_to_search_t::sample_nodes_to_search( } cuopt_assert(n_sampled_nodes > 0, "There must be at least one operator!"); cuopt_assert(curr_n_nodes_to_search > 0, "There must be at least one operator!"); - n_sampled_nodes = min(n_sampled_nodes, curr_n_nodes_to_search); + n_sampled_nodes = std::min(n_sampled_nodes, curr_n_nodes_to_search); h_sampled_nodes.clear(); for (i_t i = 0; i < n_sampled_nodes; ++i) { std::uniform_int_distribution rng_dist(0, h_nodes_to_search.size() - 1); diff --git a/cpp/src/routing/local_search/vrp/vrp_execute.cu b/cpp/src/routing/local_search/vrp/vrp_execute.cu index 4e2c93e843..b19f2e25f5 100644 --- a/cpp/src/routing/local_search/vrp/vrp_execute.cu +++ b/cpp/src/routing/local_search/vrp/vrp_execute.cu @@ -444,8 +444,8 @@ bool execute_vrp_moves(solution_t& sol, cudaOccupancyMaxActiveBlocksPerMultiprocessor( &numBlocksPerSm, execute_vrp_moves_kernel, TPB, 0); // if the number of blocks are larger than the gpu can hold, only execute the max fitting moves - n_blocks = - min(n_blocks, sol.sol_handle->get_device_properties().multiProcessorCount * numBlocksPerSm); + n_blocks = std::min(n_blocks, + sol.sol_handle->get_device_properties().multiProcessorCount * numBlocksPerSm); auto sol_view = sol.view(); auto move_cand_view = move_candidates.view(); // launch diff --git a/cpp/src/routing/solution/solution.cu b/cpp/src/routing/solution/solution.cu index 66b0c8f7f9..c2162fc3ec 100644 --- a/cpp/src/routing/solution/solution.cu +++ b/cpp/src/routing/solution/solution.cu @@ -216,7 +216,7 @@ bool solution_t::remove_nodes(const std::vector>& rmm::device_scalar empty_route_produced(sol_handle->get_stream()); raft::copy(temp_nodes.data(), nodes_to_eject.data(), n_nodes_to_eject, sol_handle->get_stream()); compute_max_active(); - size_t sh_size = max(get_temp_route_shared_size(), n_routes * sizeof(i_t)); + size_t sh_size = std::max(get_temp_route_shared_size(), n_routes * sizeof(i_t)); bool is_set = set_shmem_of_kernel(remove_nodes_kernel, sh_size); cuopt_assert(is_set, "Not enough shared memory on device for remove_nodes!"); cuopt_expects(is_set, error_type_t::OutOfMemoryError, "Not enough shared memory on device"); @@ -529,7 +529,7 @@ void solution_t::copy_device_solution(solution_tget_stream()), temp_nodes(problem_.get_num_orders(), sol_handle_->get_stream()), temp_stack_counter(sol_handle_->get_stream()), - temp_int_vector(max(problem_.get_num_orders(), problem_.get_fleet_size()), + temp_int_vector(std::max(problem_.get_num_orders(), problem_.get_fleet_size()), sol_handle_->get_stream()) { raft::common::nvtx::range fun_scope("solution_t"); diff --git a/cpp/src/routing/util_kernels/top_k.cuh b/cpp/src/routing/util_kernels/top_k.cuh index de2c0a4532..f906f2ffe2 100644 --- a/cpp/src/routing/util_kernels/top_k.cuh +++ b/cpp/src/routing/util_kernels/top_k.cuh @@ -44,7 +44,7 @@ constexpr auto get_default() if constexpr (::cuda::std::is_same_v) { return std::numeric_limits::max(); } else { - return output_t::init_data; + return output_t::init_data(); } } diff --git a/cpp/src/routing/utilities/cuopt_utils.cuh b/cpp/src/routing/utilities/cuopt_utils.cuh index 053c6ec196..60628d6448 100644 --- a/cpp/src/routing/utilities/cuopt_utils.cuh +++ b/cpp/src/routing/utilities/cuopt_utils.cuh @@ -23,6 +23,7 @@ #include #include +#include #include #include @@ -62,20 +63,13 @@ __device__ inline T shfl(T val, i_t srcLane, i_t width = warp_size, uint mask = { return __shfl_sync(mask, val, srcLane, width); } -template -__device__ inline i_t lane_id() -{ - i_t id; - asm("mov.s32 %0, %laneid;" : "=r"(id)); - return id; -} template __device__ inline T warp_reduce(T val) { #pragma unroll for (i_t i = warp_size / 2; i > 0; i >>= 1) { - T tmp = shfl(val, lane_id() + i); + T tmp = shfl(val, raft::laneId() + i); val = min(val, tmp); } return val; @@ -86,7 +80,7 @@ template __device__ inline void block_reduce(T val, T* shmem, const i_t size = blockDim.x) { i_t nWarps = (size + warp_size - 1) / warp_size; - i_t lid = lane_id(); + i_t lid = raft::laneId(); i_t wid = threadIdx.x / warp_size; T warp_min = warp_reduce(val); if (lid == 0) shmem[wid] = warp_min; @@ -107,9 +101,9 @@ __inline__ __device__ void warp_reduce_ranked(val1_t& val1, val2_t& val2, i_t& i { #pragma unroll for (i_t offset = warp_size / 2; offset > 0; offset /= 2) { - val1_t tmp_val1 = shfl(val1, lane_id() + offset); - val2_t tmp_val2 = shfl(val2, lane_id() + offset); - i_t tmp_idx = shfl(idx, lane_id() + offset); + val1_t tmp_val1 = shfl(val1, raft::laneId() + offset); + val2_t tmp_val2 = shfl(val2, raft::laneId() + offset); + i_t tmp_idx = shfl(idx, raft::laneId() + offset); if (tmp_val1 < val1 || (tmp_val1 == val1 && tmp_val2 > val2)) { val1 = tmp_val1; val2 = tmp_val2; @@ -123,8 +117,8 @@ __inline__ __device__ void warp_reduce_ranked(T& val, i_t& idx) { #pragma unroll for (i_t offset = warp_size / 2; offset > 0; offset /= 2) { - T tmpVal = shfl(val, lane_id() + offset); - i_t tmpIdx = shfl(idx, lane_id() + offset); + T tmpVal = shfl(val, raft::laneId() + offset); + i_t tmpIdx = shfl(idx, raft::laneId() + offset); if (tmpVal < val) { val = tmpVal; idx = tmpIdx; @@ -140,7 +134,7 @@ __inline__ __device__ void block_reduce_ranked(T& val, i_t& idx, T* shbuf, i_t* i_t wid = threadIdx.x / warp_size; i_t nWarps = (blockDim.x + warp_size - 1) / warp_size; warp_reduce_ranked(val, idx); // Each warp performs partial reduction - i_t lane = lane_id(); + i_t lane = raft::laneId(); if (lane == 0) { values[wid] = val; // Write reduced value to shared memory indices[wid] = idx; // Write reduced value to shared memory @@ -174,7 +168,7 @@ __inline__ __device__ void block_reduce_ranked(__half& val, i_t& idx, __half* sh i_t wid = threadIdx.x / warp_size; i_t nWarps = (blockDim.x + warp_size - 1) / warp_size; warp_reduce_ranked(val, idx); // Each warp performs partial reduction - i_t lane = lane_id(); + i_t lane = raft::laneId(); if (lane == 0) { values[wid] = val; // Write reduced value to shared memory indices[wid] = idx; // Write reduced value to shared memory diff --git a/cpp/src/routing/vehicle_info.hpp b/cpp/src/routing/vehicle_info.hpp index 81c6710a3b..cd127a5cee 100644 --- a/cpp/src/routing/vehicle_info.hpp +++ b/cpp/src/routing/vehicle_info.hpp @@ -30,7 +30,7 @@ template struct VehicleInfo { constexpr bool has_time_matrix() const { return matrices.extent[1] > 1; } - bool operator==(VehicleInfo const& rhs) + bool operator==(VehicleInfo const& rhs) const { return drop_return_trip == rhs.drop_return_trip && skip_first_trip == rhs.skip_first_trip && type == rhs.type && order_service_times == rhs.order_service_times && diff --git a/cpp/tests/routing/routing_test.cuh b/cpp/tests/routing/routing_test.cuh index f50c080bde..2e8db07aae 100644 --- a/cpp/tests/routing/routing_test.cuh +++ b/cpp/tests/routing/routing_test.cuh @@ -576,7 +576,7 @@ class base_test_t { double order_latest = next_is_break_order ? break_latest_h[(break_dim + 1) * n_vehicles + id] : (next_is_depot ? depot_latest : latest_time_h[new_order]); - double curr_wait = max(0.0, order_earliest - order_arrival); + double curr_wait = std::max(0.0, order_earliest - order_arrival); order_arrival += curr_wait; arrival_stamp.push_back(order_arrival);