From e3e7378e5600fb7dda5d7315b38cd6d599e86db7 Mon Sep 17 00:00:00 2001
From: Vitor Sessak <vsessak@google.com>
Date: Tue, 24 Jun 2025 11:52:53 +0000
Subject: [PATCH] Several fixes needed to compile cuOpt with LLVM: - add std::
 prefix to min() and max(); - add missing headers; - avoid calling
 std::optional::value() on device code since it can throw exceptions; - add
 "template" keyword to help parsing; - use a constexpr static method to define
 init_data(), to avoid using a not-yet-finished classes.

---
 cpp/cuopt_cli.cpp                             |  6 ++---
 .../mip/solver_settings.hpp                   |  2 ++
 cpp/libmps_parser/src/utilities/error.hpp     |  2 ++
 cpp/src/linear_programming/pdlp_constants.hpp | 14 +++++-----
 cpp/src/linear_programming/utils.cuh          |  2 +-
 cpp/src/math_optimization/solution_reader.cu  |  3 +++
 cpp/src/mip/diversity/diversity_manager.cu    | 23 ++++++++--------
 cpp/src/mip/diversity/population.cu           | 16 +++++------
 .../recombiners/recombiner_stats.hpp          |  4 +--
 .../mip/feasibility_jump/load_balancing.cuh   |  8 +++---
 .../feasibility_pump/feasibility_pump.cu      | 14 +++++-----
 .../line_segment_search.cu                    |  2 +-
 cpp/src/mip/local_search/local_search.cu      | 14 +++++-----
 .../local_search/rounding/constraint_prop.cu  |  2 +-
 .../rounding/lb_constraint_prop.cu            |  2 +-
 .../conditional_bound_strengthening.cu        |  2 +-
 .../presolve/load_balanced_bounds_presolve.cu |  1 -
 cpp/src/mip/problem/problem.cu                |  2 +-
 .../crossovers/inversion_recombiner.hpp       |  2 +-
 cpp/src/routing/crossovers/ox_recombiner.cuh  |  2 +-
 cpp/src/routing/diversity/diverse_solver.hpp  | 27 ++++++++++---------
 cpp/src/routing/generator/generator.cu        |  5 ++--
 cpp/src/routing/ges/eject_until_feasible.cu   | 12 ++-------
 cpp/src/routing/ges/execute_insertion.cuh     |  3 ++-
 cpp/src/routing/ges/guided_ejection_search.cu |  4 +--
 .../brute_force_lexico.cu                     |  3 ++-
 .../lexicographic_search.cu                   | 11 ++++----
 .../lexicographic_search.cuh                  |  2 +-
 .../local_search/compute_ejections.cuh        |  5 ++--
 .../local_search/compute_insertions.cu        |  2 +-
 .../cycle_finder/hash_functions.cuh           |  1 +
 cpp/src/routing/local_search/local_search.cuh | 16 ++++++++---
 cpp/src/routing/local_search/sliding_tsp.cu   |  7 ++---
 cpp/src/routing/local_search/sliding_tsp.cuh  | 12 ++++++---
 .../routing/local_search/sliding_window.cu    |  7 ++---
 cpp/src/routing/local_search/two_opt.cu       |  7 ++---
 .../local_search/vrp/nodes_to_search.cu       |  2 +-
 .../routing/local_search/vrp/vrp_execute.cu   |  4 +--
 cpp/src/routing/solution/solution.cu          |  4 +--
 cpp/src/routing/solution/solution.cuh         |  2 +-
 cpp/src/routing/util_kernels/top_k.cuh        |  2 +-
 cpp/src/routing/utilities/cuopt_utils.cuh     | 26 +++++++-----------
 cpp/src/routing/vehicle_info.hpp              |  2 +-
 cpp/tests/routing/routing_test.cuh            |  2 +-
 44 files changed, 153 insertions(+), 138 deletions(-)

diff --git a/cpp/cuopt_cli.cpp b/cpp/cuopt_cli.cpp
index a81804a70a..594fb745d5 100644
--- a/cpp/cuopt_cli.cpp
+++ b/cpp/cuopt_cli.cpp
@@ -177,9 +177,9 @@ std::string param_name_to_arg_name(const std::string& input)
 int main(int argc, char* argv[])
 {
   // Get the version string from the version_config.hpp file
-  const auto version_string = std::string("cuOpt ") + std::to_string(CUOPT_VERSION_MAJOR) + "." +
-                              std::to_string(CUOPT_VERSION_MINOR) + "." +
-                              std::to_string(CUOPT_VERSION_PATCH);
+  const std::string version_string = std::string("cuOpt ") + std::to_string(CUOPT_VERSION_MAJOR) +
+                                     "." + std::to_string(CUOPT_VERSION_MINOR) + "." +
+                                     std::to_string(CUOPT_VERSION_PATCH);
 
   // Create the argument parser
   argparse::ArgumentParser program("cuopt_cli", version_string);
diff --git a/cpp/include/cuopt/linear_programming/mip/solver_settings.hpp b/cpp/include/cuopt/linear_programming/mip/solver_settings.hpp
index 037ef752b4..24257f920c 100644
--- a/cpp/include/cuopt/linear_programming/mip/solver_settings.hpp
+++ b/cpp/include/cuopt/linear_programming/mip/solver_settings.hpp
@@ -17,6 +17,8 @@
 
 #pragma once
 
+#include <vector>
+
 #include <cuopt/linear_programming/constants.h>
 #include <cuopt/linear_programming/utilities/internals.hpp>
 
diff --git a/cpp/libmps_parser/src/utilities/error.hpp b/cpp/libmps_parser/src/utilities/error.hpp
index 0e2054ed3f..3aafa6340a 100644
--- a/cpp/libmps_parser/src/utilities/error.hpp
+++ b/cpp/libmps_parser/src/utilities/error.hpp
@@ -16,6 +16,8 @@
  */
 #pragma once
 
+#include <string>
+
 #include <stdarg.h>
 #include <stdexcept>
 
diff --git a/cpp/src/linear_programming/pdlp_constants.hpp b/cpp/src/linear_programming/pdlp_constants.hpp
index 0491fdb505..8bf9b6c2b7 100644
--- a/cpp/src/linear_programming/pdlp_constants.hpp
+++ b/cpp/src/linear_programming/pdlp_constants.hpp
@@ -20,13 +20,13 @@
 #include <raft/util/cuda_utils.cuh>
 
 namespace cuopt::linear_programming::detail {
-constexpr int block_size = 128;
+inline constexpr int block_size = 128;
 
 // When using APIs that handle variable stride sizes these are used to express that we assume that
 // the data accessed has a contigous layout in memory for both solutions
 // {
-constexpr int primal_stride = 1;
-constexpr int dual_stride   = 1;
+inline constexpr int primal_stride = 1;
+inline constexpr int dual_stride   = 1;
 // }
 
 // #define PDLP_DEBUG_MODE
@@ -34,18 +34,18 @@ constexpr int dual_stride   = 1;
 // Value used to determine what we see as too small (the value) or too large (1/value) values when
 // computing the new primal weight during the restart.
 template <typename f_t>
-constexpr f_t safe_guard_for_extreme_values_in_primal_weight_computation = 1.0e-10;
+inline constexpr f_t safe_guard_for_extreme_values_in_primal_weight_computation = 1.0e-10;
 // }
 
 // used to detect divergence in the movement as should trigger a numerical_error
 template <typename f_t>
-constexpr f_t divergent_movement = f_t{};
+inline constexpr f_t divergent_movement = f_t{};
 
 template <>
-constexpr float divergent_movement<float> = 1.0e20f;
+inline constexpr float divergent_movement<float> = 1.0e20f;
 
 template <>
-constexpr double divergent_movement<double> = 1.0e100;
+inline constexpr double divergent_movement<double> = 1.0e100;
 
 // }
 
diff --git a/cpp/src/linear_programming/utils.cuh b/cpp/src/linear_programming/utils.cuh
index 55684edf14..d4df2815b1 100644
--- a/cpp/src/linear_programming/utils.cuh
+++ b/cpp/src/linear_programming/utils.cuh
@@ -398,7 +398,7 @@ struct relative_residual_t {
 
     // Used for best primal so far, count how many constraints are violated
     if (abs_.has_value() && nb_violated_constraints_.has_value()) {
-      if (residual >= abs_.value() + rel_ * rhs) atomicAdd(nb_violated_constraints_.value(), 1);
+      if (residual >= *abs_ + rel_ * rhs) atomicAdd(*nb_violated_constraints_, 1);
     }
     return residual - rel_ * rhs;
   }
diff --git a/cpp/src/math_optimization/solution_reader.cu b/cpp/src/math_optimization/solution_reader.cu
index 2b662a3750..cec2da9b3b 100644
--- a/cpp/src/math_optimization/solution_reader.cu
+++ b/cpp/src/math_optimization/solution_reader.cu
@@ -17,12 +17,15 @@
 
 #include "solution_reader.hpp"
 
+#include <algorithm>
 #include <fstream>
 #include <optional>
 #include <regex>
 #include <sstream>
 #include <stdexcept>
 #include <string>
+#include <unordered_map>
+
 namespace cuopt::linear_programming {
 
 /**
diff --git a/cpp/src/mip/diversity/diversity_manager.cu b/cpp/src/mip/diversity/diversity_manager.cu
index a858c10958..a0ef30a124 100644
--- a/cpp/src/mip/diversity/diversity_manager.cu
+++ b/cpp/src/mip/diversity/diversity_manager.cu
@@ -71,7 +71,7 @@ bool diversity_manager_t<i_t, f_t>::regenerate_solutions()
   const i_t min_size = 2;
   while (population.current_size() <= min_size && (current_step == 0 || counter < 5)) {
     CUOPT_LOG_DEBUG("Trying to regenerate solution, pop size %d\n", population.current_size());
-    time_limit = min(time_limit, timer.remaining_time());
+    time_limit = std::min(time_limit, timer.remaining_time());
     ls.fj.randomize_weights(problem_ptr->handle_ptr);
     population.add_solution(generate_solution(time_limit));
     if (timer.check_time_limit()) { return false; }
@@ -90,18 +90,18 @@ std::vector<solution_t<i_t, f_t>> diversity_manager_t<i_t, f_t>::generate_more_s
 {
   std::vector<solution_t<i_t, f_t>> solutions;
   timer_t total_time_to_generate = timer_t(timer.remaining_time() / 5.);
-  f_t time_limit                 = min(60., total_time_to_generate.remaining_time());
-  f_t ls_limit                   = min(5., timer.remaining_time() / 20.);
+  f_t time_limit                 = std::min(60., total_time_to_generate.remaining_time());
+  f_t ls_limit                   = std::min(5., timer.remaining_time() / 20.);
   const i_t n_sols_to_generate   = 2;
   for (i_t i = 0; i < n_sols_to_generate; ++i) {
     CUOPT_LOG_DEBUG("Trying to generate more solutions");
-    time_limit = min(time_limit, timer.remaining_time());
+    time_limit = std::min(time_limit, timer.remaining_time());
     ls.fj.randomize_weights(problem_ptr->handle_ptr);
     auto sol = generate_solution(time_limit);
     population.run_solution_callbacks(sol);
     solutions.emplace_back(solution_t<i_t, f_t>(sol));
     if (total_time_to_generate.check_time_limit()) { return solutions; }
-    timer_t timer(min(ls_limit, timer.remaining_time()));
+    timer_t timer(std::min(ls_limit, timer.remaining_time()));
     ls.run_local_search(sol, population.weights, timer);
     population.run_solution_callbacks(sol);
     solutions.emplace_back(std::move(sol));
@@ -183,7 +183,7 @@ void diversity_manager_t<i_t, f_t>::generate_initial_solutions()
   // solution if we can generate faster generate up to 10 sols
   const f_t generation_time_limit = 0.6 * timer.get_time_limit();
   const f_t max_island_gen_time   = 600;
-  f_t total_island_gen_time       = min(generation_time_limit, max_island_gen_time);
+  f_t total_island_gen_time       = std::min(generation_time_limit, max_island_gen_time);
   timer_t gen_timer(total_island_gen_time);
   f_t sol_time_limit = gen_timer.remaining_time();
   for (i_t i = 0; i < maximum_island_size; ++i) {
@@ -265,7 +265,7 @@ void diversity_manager_t<i_t, f_t>::generate_quick_feasible_solution()
 {
   solution_t<i_t, f_t> solution(*problem_ptr);
   // min 1 second, max 10 seconds
-  const f_t generate_fast_solution_time = min(10., max(1., timer.remaining_time() / 20.));
+  const f_t generate_fast_solution_time = std::min(10., std::max(1., timer.remaining_time() / 20.));
   timer_t sol_timer(generate_fast_solution_time);
   // do very short LP run to get somewhere close to the optimal point
   ls.generate_fast_solution(solution, sol_timer);
@@ -306,7 +306,7 @@ solution_t<i_t, f_t> diversity_manager_t<i_t, f_t>::run_solver()
   const f_t time_limit                = timer.remaining_time();
   constexpr f_t time_ratio_on_init_lp = 0.1;
   constexpr f_t max_time_on_lp        = 30;
-  const f_t lp_time_limit             = min(max_time_on_lp, time_limit * time_ratio_on_init_lp);
+  const f_t lp_time_limit = std::min(max_time_on_lp, time_limit * time_ratio_on_init_lp);
 
   // after every change to the problem, we should resize all the relevant vars
   // we need to encapsulate that to prevent repetitions
@@ -328,7 +328,8 @@ solution_t<i_t, f_t> diversity_manager_t<i_t, f_t>::run_solver()
   generate_quick_feasible_solution();
   constexpr f_t time_ratio_of_probing_cache = 0.10;
   constexpr f_t max_time_on_probing         = 60;
-  f_t time_for_probing_cache = min(max_time_on_probing, time_limit * time_ratio_of_probing_cache);
+  f_t time_for_probing_cache =
+    std::min(max_time_on_probing, time_limit * time_ratio_of_probing_cache);
   timer_t probing_timer{time_for_probing_cache};
   if (check_b_b_preemption()) { return population.best_feasible(); }
   compute_probing_cache(ls.constraint_prop.bounds_update, *problem_ptr, probing_timer);
@@ -542,7 +543,7 @@ diversity_manager_t<i_t, f_t>::recombine_and_local_search(solution_t<i_t, f_t>&
   cuopt_assert(population.test_invariant(), "");
   cuopt_assert(lp_offspring.test_number_all_integer(), "All must be integers before LP");
   f_t lp_run_time = offspring.get_feasible() ? 3. : 1.;
-  lp_run_time     = min(lp_run_time, timer.remaining_time());
+  lp_run_time     = std::min(lp_run_time, timer.remaining_time());
   run_lp_with_vars_fixed(*lp_offspring.problem_ptr,
                          lp_offspring,
                          lp_offspring.problem_ptr->integer_indices,
@@ -553,7 +554,7 @@ diversity_manager_t<i_t, f_t>::recombine_and_local_search(solution_t<i_t, f_t>&
   cuopt_assert(lp_offspring.test_number_all_integer(), "All must be integers after LP");
   f_t lp_qual = lp_offspring.get_quality(population.weights);
   CUOPT_LOG_DEBUG("After LP offspring sol cost:feas %f : %d", lp_qual, lp_offspring.get_feasible());
-  f_t offspring_qual = min(offspring.get_quality(population.weights), lp_qual);
+  f_t offspring_qual = std::min(offspring.get_quality(population.weights), lp_qual);
   recombine_stats.update_improve_stats(
     offspring_qual, sol1.get_quality(population.weights), sol2.get_quality(population.weights));
   return std::make_pair(std::move(offspring), std::move(lp_offspring));
diff --git a/cpp/src/mip/diversity/population.cu b/cpp/src/mip/diversity/population.cu
index d82ac0f14b..0b4001772f 100644
--- a/cpp/src/mip/diversity/population.cu
+++ b/cpp/src/mip/diversity/population.cu
@@ -65,7 +65,7 @@ template <typename i_t, typename f_t>
 void population_t<i_t, f_t>::initialize_population()
 {
   var_threshold =
-    max(problem_ptr->n_variables - var_threshold, (problem_ptr->n_variables / 10) * 8);
+    std::max(problem_ptr->n_variables - var_threshold, (problem_ptr->n_variables / 10) * 8);
   initial_threshold_ratio = (f_t)var_threshold / problem_ptr->n_variables;
   solutions.reserve(max_solutions);
   indices.reserve(max_solutions);
@@ -378,7 +378,7 @@ void population_t<i_t, f_t>::compute_new_weights()
       infeasibility_importance *= weight_increase_ratio;
     }
 
-    infeasibility_importance = min(max_infeasibility_weight, infeasibility_importance);
+    infeasibility_importance = std::min(max_infeasibility_weight, infeasibility_importance);
     thrust::for_each(best_sol.handle_ptr->get_thrust_policy(),
                      thrust::counting_iterator(0),
                      thrust::counting_iterator(0) + weights.cstr_weights.size(),
@@ -394,7 +394,7 @@ void population_t<i_t, f_t>::compute_new_weights()
   } else {
     CUOPT_LOG_DEBUG("Decreasing weights!");
     infeasibility_importance *= weight_decrease_ratio;
-    infeasibility_importance = max(min_infeasibility_weight, infeasibility_importance);
+    infeasibility_importance = std::max(min_infeasibility_weight, infeasibility_importance);
 
     thrust::for_each(
       best_sol.handle_ptr->get_thrust_policy(),
@@ -535,7 +535,7 @@ template <typename i_t>
 i_t get_max_var_threshold(i_t n_vars)
 {
   if (n_vars < 50) {
-    return max(1, n_vars - 1);
+    return std::max(1, n_vars - 1);
   } else if (n_vars < 80) {
     return n_vars - 2;
   } else if (n_vars < 200) {
@@ -559,7 +559,7 @@ void population_t<i_t, f_t>::halve_the_population()
   size_t max_var_threshold      = get_max_var_threshold(problem_ptr->n_integer_vars);
   while (current_size() > max_solutions / 2) {
     clear_except_best_feasible();
-    var_threshold = max(var_threshold * 0.97, 0.5 * problem_ptr->n_integer_vars);
+    var_threshold = std::max(var_threshold * 0.97, 0.5 * problem_ptr->n_integer_vars);
     for (auto& sol : sol_vec) {
       add_solution(solution_t<i_t, f_t>(sol));
     }
@@ -569,9 +569,9 @@ void population_t<i_t, f_t>::halve_the_population()
   // if we removed too many decrease the diversity a little
   while (current_size() < max_solutions / 4) {
     clear_except_best_feasible();
-    var_threshold =
-      min(max_var_threshold,
-          min((size_t)(var_threshold * 0.97), (size_t)(0.995 * problem_ptr->n_integer_vars)));
+    var_threshold = std::min(
+      max_var_threshold,
+      std::min((size_t)(var_threshold * 0.97), (size_t)(0.995 * problem_ptr->n_integer_vars)));
     for (auto& sol : sol_vec) {
       add_solution(solution_t<i_t, f_t>(sol));
     }
diff --git a/cpp/src/mip/diversity/recombiners/recombiner_stats.hpp b/cpp/src/mip/diversity/recombiners/recombiner_stats.hpp
index 6afd16d36b..36091db662 100644
--- a/cpp/src/mip/diversity/recombiners/recombiner_stats.hpp
+++ b/cpp/src/mip/diversity/recombiners/recombiner_stats.hpp
@@ -39,8 +39,8 @@ struct recombine_stats {
 
   void update_improve_stats(double cost_new, double cost_first, double cost_second)
   {
-    if (cost_new < (min(cost_first, cost_second) - OBJECTIVE_EPSILON)) ++better_than_both;
-    if (cost_new < (max(cost_first, cost_second) - OBJECTIVE_EPSILON)) ++better_than_one;
+    if (cost_new < (std::min(cost_first, cost_second) - OBJECTIVE_EPSILON)) ++better_than_both;
+    if (cost_new < (std::max(cost_first, cost_second) - OBJECTIVE_EPSILON)) ++better_than_one;
   }
 
   void add_attempt() { ++attempts; }
diff --git a/cpp/src/mip/feasibility_jump/load_balancing.cuh b/cpp/src/mip/feasibility_jump/load_balancing.cuh
index a92c3431b9..f3515e5de0 100644
--- a/cpp/src/mip/feasibility_jump/load_balancing.cuh
+++ b/cpp/src/mip/feasibility_jump/load_balancing.cuh
@@ -15,6 +15,8 @@
  * limitations under the License.
  */
 
+#include <cuda_runtime_api.h>
+
 #include "feasibility_jump_kernels.cuh"
 
 #include <cub/block/block_merge_sort.cuh>
@@ -380,8 +382,7 @@ __global__ void load_balancing_mtm_compute_candidates(
   i_t lane_id = threadIdx.x % raft::WarpSize;
 
   const i_t stride = get_warp_id_stride();
-  for (auto [var_idx, subworkid, offset_begin, offset_end, csr_offset, skip] :
-       csr_load_balancer<i_t, f_t>{
+  for (auto [var_idx, subworkid, offset_begin, offset_end, _, skip] : csr_load_balancer<i_t, f_t>{
          fj, fj.row_size_nonbin_prefix_sum, fj.work_id_to_nonbin_var_idx}) {
     cuopt_assert(!fj.pb.is_binary_variable[var_idx], "variable is binary");
 
@@ -483,8 +484,7 @@ __launch_bounds__(TPB_loadbalance, 16) __global__
   i_t lane_id = threadIdx.x % raft::WarpSize;
 
   const i_t stride = get_warp_id_stride();
-  for (auto [var_idx, subworkid, offset_begin, offset_end, csr_offset, skip] :
-       csr_load_balancer<i_t, f_t>{
+  for (auto [var_idx, subworkid, offset_begin, offset_end, _, skip] : csr_load_balancer<i_t, f_t>{
          fj, fj.row_size_nonbin_prefix_sum, fj.work_id_to_nonbin_var_idx}) {
     cuopt_assert(!fj.pb.is_binary_variable[var_idx], "variable is binary");
 
diff --git a/cpp/src/mip/local_search/feasibility_pump/feasibility_pump.cu b/cpp/src/mip/local_search/feasibility_pump/feasibility_pump.cu
index c15a5d2f47..cd959b652e 100644
--- a/cpp/src/mip/local_search/feasibility_pump/feasibility_pump.cu
+++ b/cpp/src/mip/local_search/feasibility_pump/feasibility_pump.cu
@@ -218,7 +218,7 @@ bool feasibility_pump_t<i_t, f_t>::linear_project_onto_polytope(solution_t<i_t,
     get_tolerance_from_ratio(ratio_of_set_integers, context.settings.tolerances.absolute_tolerance);
   temp_p.check_problem_representation(true);
   f_t time_limit                 = longer_lp_run ? 5. : 1.;
-  time_limit                     = min(time_limit, timer.remaining_time());
+  time_limit                     = std::min(time_limit, timer.remaining_time());
   static f_t lp_time             = 0;
   static i_t n_calls             = 0;
   f_t old_remaining              = timer.remaining_time();
@@ -336,13 +336,13 @@ bool feasibility_pump_t<i_t, f_t>::round_multiple_points(solution_t<i_t, f_t>& s
 {
   n_fj_single_descents     = 0;
   const f_t max_time_limit = last_lp_time * 0.1;
-  timer_t round_timer{min(max_time_limit, timer.remaining_time())};
+  timer_t round_timer{std::min(max_time_limit, timer.remaining_time())};
   bool is_feasible = random_round_with_fj(solution, round_timer);
   if (is_feasible) {
     CUOPT_LOG_DEBUG("Feasible found after random round with fj");
     return true;
   }
-  timer_t line_segment_timer{min(1., timer.remaining_time())};
+  timer_t line_segment_timer{std::min(1., timer.remaining_time())};
   i_t n_points_to_search  = n_fj_single_descents;
   bool is_feasibility_run = true;
   // create a copy, because assignment is changing within kernel and we want a separate point_1
@@ -379,8 +379,8 @@ bool feasibility_pump_t<i_t, f_t>::round(solution_t<i_t, f_t>& solution)
 {
   bool result;
   CUOPT_LOG_DEBUG("Rounding the point");
-  timer_t bounds_prop_timer(min(2., timer.remaining_time()));
-  const f_t lp_run_time_after_feasible = min(3., timer.remaining_time() / 20.);
+  timer_t bounds_prop_timer(std::min(2., timer.remaining_time()));
+  const f_t lp_run_time_after_feasible = std::min(3., timer.remaining_time() / 20.);
   result = lb_constraint_prop.apply_round(solution, lp_run_time_after_feasible, bounds_prop_timer);
   cuopt_func_call(solution.test_variable_bounds(true));
   // copy the last rounding
@@ -409,7 +409,7 @@ bool feasibility_pump_t<i_t, f_t>::run_fj_cycle_escape(solution_t<i_t, f_t>& sol
   fj.settings.update_weights         = true;
   fj.settings.feasibility_run        = true;
   fj.settings.n_of_minimums_for_exit = 5000;
-  fj.settings.time_limit             = min(3., timer.remaining_time());
+  fj.settings.time_limit             = std::min(3., timer.remaining_time());
   fj.settings.termination            = fj_termination_flags_t::FJ_TERMINATION_TIME_LIMIT;
   is_feasible                        = fj.solve(solution);
   // if FJ didn't change the solution, take last incumbent solution
@@ -432,7 +432,7 @@ bool feasibility_pump_t<i_t, f_t>::test_fj_feasible(solution_t<i_t, f_t>& soluti
   fj.settings.update_weights         = true;
   fj.settings.feasibility_run        = true;
   fj.settings.n_of_minimums_for_exit = 5000;
-  fj.settings.time_limit             = min(time_limit, timer.remaining_time());
+  fj.settings.time_limit             = std::min(time_limit, timer.remaining_time());
   fj.settings.termination            = fj_termination_flags_t::FJ_TERMINATION_TIME_LIMIT;
   cuopt_func_call(solution.test_variable_bounds(true));
   is_feasible = fj.solve(solution);
diff --git a/cpp/src/mip/local_search/line_segment_search/line_segment_search.cu b/cpp/src/mip/local_search/line_segment_search/line_segment_search.cu
index 1f166bdf69..0d1147a90c 100644
--- a/cpp/src/mip/local_search/line_segment_search/line_segment_search.cu
+++ b/cpp/src/mip/local_search/line_segment_search/line_segment_search.cu
@@ -113,7 +113,7 @@ bool line_segment_search_t<i_t, f_t>::search_line_segment(solution_t<i_t, f_t>&
     fj.settings.mode            = fj_mode_t::GREEDY_DESCENT;
     fj.settings.update_weights  = false;
     fj.settings.feasibility_run = is_feasibility_run;
-    fj.settings.time_limit      = min(0.5, timer.remaining_time());
+    fj.settings.time_limit      = std::min(0.5, timer.remaining_time());
     is_feasible                 = fj.solve(solution);
     if (is_feasibility_run) {
       if (is_feasible) {
diff --git a/cpp/src/mip/local_search/local_search.cu b/cpp/src/mip/local_search/local_search.cu
index 8e3ef2e11f..7c02fefdcc 100644
--- a/cpp/src/mip/local_search/local_search.cu
+++ b/cpp/src/mip/local_search/local_search.cu
@@ -66,14 +66,14 @@ void local_search_t<i_t, f_t>::generate_fast_solution(solution_t<i_t, f_t>& solu
   fj.settings.update_weights         = true;
   fj.settings.feasibility_run        = true;
   fj.settings.termination            = fj_termination_flags_t::FJ_TERMINATION_TIME_LIMIT;
-  fj.settings.time_limit             = min(30., timer.remaining_time());
+  fj.settings.time_limit             = std::min(30., timer.remaining_time());
   while (!timer.check_time_limit()) {
-    timer_t constr_prop_timer = timer_t(min(timer.remaining_time(), 2.));
+    timer_t constr_prop_timer = timer_t(std::min(timer.remaining_time(), 2.));
     // do constraint prop on lp optimal solution
     constraint_prop.apply_round(solution, 1., constr_prop_timer);
     if (solution.compute_feasibility()) { return; }
     if (timer.check_time_limit()) { return; };
-    fj.settings.time_limit = min(3., timer.remaining_time());
+    fj.settings.time_limit = std::min(3., timer.remaining_time());
     // run fj on the solution
     fj.solve(solution);
     // TODO check if FJ returns the same solution
@@ -150,7 +150,7 @@ bool local_search_t<i_t, f_t>::run_fj_annealing(solution_t<i_t, f_t>& solution,
   fj.settings.mode                   = fj_mode_t::EXIT_NON_IMPROVING;
   fj.settings.candidate_selection    = fj_candidate_selection_t::FEASIBLE_FIRST;
   fj.settings.termination            = fj_termination_flags_t::FJ_TERMINATION_TIME_LIMIT;
-  fj.settings.time_limit             = min(10., timer.remaining_time());
+  fj.settings.time_limit             = std::min(10., timer.remaining_time());
   fj.settings.parameters.allow_infeasibility_iterations = 100;
   fj.settings.update_weights                            = 1;
   fj.solve(solution);
@@ -189,7 +189,7 @@ bool local_search_t<i_t, f_t>::check_fj_on_lp_optimal(solution_t<i_t, f_t>& solu
   }
   cuopt_func_call(solution.test_variable_bounds(false));
   f_t lp_run_time_after_feasible = 1.;
-  timer_t bounds_prop_timer      = timer_t(min(timer.remaining_time(), 10.));
+  timer_t bounds_prop_timer      = timer_t(std::min(timer.remaining_time(), 10.));
   bool is_feasible =
     constraint_prop.apply_round(solution, lp_run_time_after_feasible, bounds_prop_timer);
   if (!is_feasible) {
@@ -209,7 +209,7 @@ bool local_search_t<i_t, f_t>::check_fj_on_lp_optimal(solution_t<i_t, f_t>& solu
   fj.settings.update_weights         = true;
   fj.settings.feasibility_run        = true;
   fj.settings.termination            = fj_termination_flags_t::FJ_TERMINATION_TIME_LIMIT;
-  fj.settings.time_limit             = min(30., timer.remaining_time());
+  fj.settings.time_limit             = std::min(30., timer.remaining_time());
   fj.solve(solution);
   return solution.get_feasible();
 }
@@ -227,7 +227,7 @@ bool local_search_t<i_t, f_t>::run_fj_on_zero(solution_t<i_t, f_t>& solution, ti
   fj.settings.update_weights         = true;
   fj.settings.feasibility_run        = true;
   fj.settings.termination            = fj_termination_flags_t::FJ_TERMINATION_TIME_LIMIT;
-  fj.settings.time_limit             = min(30., timer.remaining_time());
+  fj.settings.time_limit             = std::min(30., timer.remaining_time());
   bool is_feasible                   = fj.solve(solution);
   return is_feasible;
 }
diff --git a/cpp/src/mip/local_search/rounding/constraint_prop.cu b/cpp/src/mip/local_search/rounding/constraint_prop.cu
index b59e246df6..e681474add 100644
--- a/cpp/src/mip/local_search/rounding/constraint_prop.cu
+++ b/cpp/src/mip/local_search/rounding/constraint_prop.cu
@@ -826,7 +826,7 @@ bool constraint_prop_t<i_t, f_t>::find_integer(
       generate_bulk_rounding_vector(sol, orig_sol, host_vars_to_set, probing_candidates);
     probe(sol, orig_sol.problem_ptr, var_val_pairs, &set_count, unset_integer_vars);
     if (!repair_tried && rounding_ii && !timeout_happened) {
-      timer_t repair_timer{min(timer.remaining_time() / 5, timer.elapsed_time() / 3)};
+      timer_t repair_timer{std::min(timer.remaining_time() / 5, timer.elapsed_time() / 3)};
       save_bounds(sol);
       // update bounds and run repair procedure
       bool bounds_repaired =
diff --git a/cpp/src/mip/local_search/rounding/lb_constraint_prop.cu b/cpp/src/mip/local_search/rounding/lb_constraint_prop.cu
index 50867a5315..93eb936932 100644
--- a/cpp/src/mip/local_search/rounding/lb_constraint_prop.cu
+++ b/cpp/src/mip/local_search/rounding/lb_constraint_prop.cu
@@ -880,7 +880,7 @@ bool lb_constraint_prop_t<i_t, f_t>::find_integer(
           &set_count,
           unset_integer_vars);
     if (!repair_tried && rounding_ii && !timeout_happened) {
-      timer_t repair_timer{min(timer.remaining_time() / 5, timer.elapsed_time() / 3)};
+      timer_t repair_timer{std::min(timer.remaining_time() / 5, timer.elapsed_time() / 3)};
       save_bounds(problem, assignment, orig_sol.handle_ptr);
       // update bounds and run repair procedure
       // infeasible cnst_slack invalid
diff --git a/cpp/src/mip/presolve/conditional_bound_strengthening.cu b/cpp/src/mip/presolve/conditional_bound_strengthening.cu
index 749609c018..2723a9b244 100644
--- a/cpp/src/mip/presolve/conditional_bound_strengthening.cu
+++ b/cpp/src/mip/presolve/conditional_bound_strengthening.cu
@@ -657,7 +657,7 @@ void conditional_bound_strengthening_t<i_t, f_t>::solve(problem_t<i_t, f_t>& pro
 
   if (n_blocks == 0) { return; }
   int max_row_size = get_max_row_size(problem.offsets, problem.handle_ptr->get_stream());
-  max_row_size     = min(TPB, max_row_size);
+  max_row_size     = std::min(TPB, max_row_size);
   size_t sh_size =
     raft::alignTo(5 * sizeof(f_t) + sizeof(i_t) + sizeof(var_t), sizeof(i_t)) * max_row_size;
 
diff --git a/cpp/src/mip/presolve/load_balanced_bounds_presolve.cu b/cpp/src/mip/presolve/load_balanced_bounds_presolve.cu
index 9cee255b7e..091f8a53b5 100644
--- a/cpp/src/mip/presolve/load_balanced_bounds_presolve.cu
+++ b/cpp/src/mip/presolve/load_balanced_bounds_presolve.cu
@@ -30,7 +30,6 @@
 #include <utilities/device_utils.cuh>
 
 #include <cub/cub.cuh>
-#include <nvtx3/nvtx3.hpp>
 #include <raft/common/nvtx.hpp>
 #include "load_balanced_bounds_presolve.cuh"
 #include "load_balanced_bounds_presolve_helpers.cuh"
diff --git a/cpp/src/mip/problem/problem.cu b/cpp/src/mip/problem/problem.cu
index d5be3ce7a4..c0c6f29c47 100644
--- a/cpp/src/mip/problem/problem.cu
+++ b/cpp/src/mip/problem/problem.cu
@@ -804,7 +804,7 @@ void problem_t<i_t, f_t>::compute_related_variables(double time_limit)
   i_t related_var_offset = 0;
   auto start_time        = std::chrono::high_resolution_clock::now();
   for (i_t i = 0;; ++i) {
-    i_t slice_size = min(max_slice_size, n_variables - i * max_slice_size);
+    i_t slice_size = std::min(max_slice_size, n_variables - i * max_slice_size);
     if (slice_size <= 0) break;
 
     i_t slice_begin = i * max_slice_size;
diff --git a/cpp/src/routing/crossovers/inversion_recombiner.hpp b/cpp/src/routing/crossovers/inversion_recombiner.hpp
index 375322838c..577d92125c 100644
--- a/cpp/src/routing/crossovers/inversion_recombiner.hpp
+++ b/cpp/src/routing/crossovers/inversion_recombiner.hpp
@@ -194,7 +194,7 @@ struct inversion {
     }
     int routes_number             = 0;
     int total_length              = 0;
-    const int max_inverted_routes = max(1, min((int)different_routes.size() / 2, 5));
+    const int max_inverted_routes = std::max(1, std::min((int)different_routes.size() / 2, 5));
     while (total_length < 60 && routes_number < max_inverted_routes &&
            different_routes.size() > 0) {
       size_t initial_id            = next_random() % (different_routes.size());
diff --git a/cpp/src/routing/crossovers/ox_recombiner.cuh b/cpp/src/routing/crossovers/ox_recombiner.cuh
index 8a04c98d01..e57fc8e6b1 100644
--- a/cpp/src/routing/crossovers/ox_recombiner.cuh
+++ b/cpp/src/routing/crossovers/ox_recombiner.cuh
@@ -748,7 +748,7 @@ struct OX {
     if (S.routes.size() == 1) { i = next_random() % (genome_A.size() - 1); }
     int j = i + 1 + (next_random() % (3 * max_route_len));
     if ((size_t)j >= genome_A.size() - 1) {
-      j = i + next_random() % max(1, (((int)genome_A.size() - 1 - i) / 2));
+      j = i + next_random() % std::max(1, (((int)genome_A.size() - 1 - i) / 2));
     }
 
     for (int k = i; k <= j; k++) {
diff --git a/cpp/src/routing/diversity/diverse_solver.hpp b/cpp/src/routing/diversity/diverse_solver.hpp
index 94031c89cb..2147d23dd7 100644
--- a/cpp/src/routing/diversity/diverse_solver.hpp
+++ b/cpp/src/routing/diversity/diverse_solver.hpp
@@ -84,8 +84,8 @@ struct recombine_stats {
 
   void update_improve_stats(double cost_new, double cost_first, double cost_second)
   {
-    if (cost_new < (min(cost_first, cost_second) - MOVE_EPSILON)) ++better_than_both;
-    if (cost_new < (max(cost_first, cost_second) - MOVE_EPSILON)) ++better_than_one;
+    if (cost_new < (std::min(cost_first, cost_second) - MOVE_EPSILON)) ++better_than_both;
+    if (cost_new < (std::max(cost_first, cost_second) - MOVE_EPSILON)) ++better_than_one;
   }
 
   void add_attempt() { ++attempts; }
@@ -419,8 +419,8 @@ struct solve {
     benchmark_print("%d solutions loaded to reserve with %f diversity\n",
                     (int)reserve_population.current_size(),
                     reserve_population.threshold);
-    const int n_best_solutions    = min(5, (int)reserve_population.current_size());
-    const int n_sampled_solutions = min(27, (int)reserve_population.current_size());
+    const int n_best_solutions    = std::min(5, (int)reserve_population.current_size());
+    const int n_sampled_solutions = std::min(27, (int)reserve_population.current_size());
     auto best_solutions           = reserve_population.get_n_best(n_best_solutions);
     auto sampled_solutions        = reserve_population.get_n_random(n_sampled_solutions, false);
     benchmark_print("%d solutions selected\n", n_sampled_solutions);
@@ -618,7 +618,7 @@ struct solve {
       constexpr bool use_average = false;
       int threshold_index = p->is_cvrp() ? 1 : find_initial_diversity(working_vector, use_average);
       working_population.threshold = diversity_levels[threshold_index];
-      if (!p->is_cvrp()) { threshold_index = min(4, max(2, threshold_index)); }
+      if (!p->is_cvrp()) { threshold_index = std::min(4, std::max(2, threshold_index)); }
       populate_working_population();
 
       // for pure cvrp problems, we add more solutions to the reserve population
@@ -627,7 +627,7 @@ struct solve {
       // We probably should generalize this for all easy problems
       if (p->is_cvrp()) {
         double time_left       = timer.remaining_time();
-        double single_gen_time = min(time_left * 0.05, 20.) * ges_time_fraction;
+        double single_gen_time = std::min(time_left * 0.05, 20.) * ges_time_fraction;
         // add five or so solutions to reserve
         for (int i = 0; i < 5; ++i) {
           g.generate_solution(
@@ -729,7 +729,7 @@ struct solve {
 
     // TODO check refill reserve times
     double time_left       = timer.remaining_time();
-    double single_gen_time = min(time_left * 0.05, 20.) * ges_time_fraction;
+    double single_gen_time = std::min(time_left * 0.05, 20.) * ges_time_fraction;
     sols_num               = std::min<int>(
       sols_num, (int)reserve_population.max_solutions - (int)reserve_population.current_size());
     for (int i = 0; i < sols_num; ++i) {
@@ -814,9 +814,9 @@ struct solve {
      * 5 islands x 3 per island), and if all of these take the entire allocated
      * time
      */
-    auto first_sol_gen_time   = min(timer.get_time_limit() * 0.3,
-                                  300.);  // Upper limit of 5 mins (targetted for 15 min runs)
-    auto sol_gen_time         = ges_time_fraction * min(timer.get_time_limit() * 0.05, 60.);
+    auto first_sol_gen_time   = std::min(timer.get_time_limit() * 0.3,
+                                       300.);  // Upper limit of 5 mins (targetted for 15 min runs)
+    auto sol_gen_time         = ges_time_fraction * std::min(timer.get_time_limit() * 0.05, 60.);
     auto const n_islands_size = islands_size;
     double max_island_generation_time;
     auto pop_size = p->is_tsp      ? diversity_config_t<int>::population_size<config_t::TSP>()
@@ -911,7 +911,7 @@ struct solve {
       // Give all the island generation time as some problems might consume
       // all the time improving the first threshold.
       double improve_time_limit =
-        max(0.0, max_island_generation_time - island_creation_timer.elapsed_time());
+        std::max(0.0, max_island_generation_time - island_creation_timer.elapsed_time());
 
       improvement_timer = timer_t(improve_time_limit);
       benchmark_print(
@@ -980,8 +980,9 @@ struct solve {
 
     while (start_threshold_index >= 0) {
       // Lowering the threshold does not require updating the population
-      int valid_start_threshold_index = min(start_threshold_index, (int)step_lengths.size() - 1);
-      p.threshold                     = diversity_levels[valid_start_threshold_index];
+      int valid_start_threshold_index =
+        std::min(start_threshold_index, (int)step_lengths.size() - 1);
+      p.threshold = diversity_levels[valid_start_threshold_index];
       benchmark_print("time elapsed: %f \n", timer.elapsed_time());
       benchmark_print("Improvement steps: %d\n", step_lengths[valid_start_threshold_index]);
       p.add_solutions_to_island(timer.elapsed_time(), reserve_population);
diff --git a/cpp/src/routing/generator/generator.cu b/cpp/src/routing/generator/generator.cu
index 93e7861f39..f3b7154e5b 100644
--- a/cpp/src/routing/generator/generator.cu
+++ b/cpp/src/routing/generator/generator.cu
@@ -229,8 +229,9 @@ d_mdarray_t<f_t> generate_matrices(raft::handle_t const& handle,
 {
   constexpr f_t asymmetry_scalar = 0.01;
   dim3 n_threads(32, 32);
-  dim3 n_blocks(min((params.n_locations + n_threads.x - 1) / n_threads.x, CUDA_MAX_BLOCKS_2D),
-                min((params.n_locations + n_threads.y - 1) / n_threads.y, CUDA_MAX_BLOCKS_2D));
+  dim3 n_blocks(
+    std::min((params.n_locations + n_threads.x - 1) / n_threads.x, (unsigned)CUDA_MAX_BLOCKS_2D),
+    std::min((params.n_locations + n_threads.y - 1) / n_threads.y, (unsigned)CUDA_MAX_BLOCKS_2D));
 
   rmm::device_uvector<f_t> cost_matrix(params.n_locations * params.n_locations,
                                        handle.get_stream());
diff --git a/cpp/src/routing/ges/eject_until_feasible.cu b/cpp/src/routing/ges/eject_until_feasible.cu
index 6bd9e40e3c..7376e14635 100644
--- a/cpp/src/routing/ges/eject_until_feasible.cu
+++ b/cpp/src/routing/ges/eject_until_feasible.cu
@@ -27,14 +27,6 @@ namespace cuopt {
 namespace routing {
 namespace detail {
 
-template <typename i_t = int, typename f_t = float>
-__device__ inline i_t get_lane_id()
-{
-  i_t id;
-  asm("mov.s32 %0, %laneid;" : "=r"(id));
-  return id;
-}
-
 template <typename T, typename i_t = int, typename f_t = float>
 __device__ inline T shfl_sync(T val,
                               i_t srcLane,
@@ -49,8 +41,8 @@ DI void weighted_random_warp_reduce(raft::random::PCGenerator& rng, T& weight, i
 {
 #pragma unroll
   for (i_t offset = raft::WarpSize / 2; offset > 0; offset /= 2) {
-    T tmp_weight = shfl_sync(weight, get_lane_id() + offset);
-    i_t tmp_idx  = shfl_sync(idx, get_lane_id() + offset);
+    T tmp_weight = shfl_sync(weight, raft::laneId() + offset);
+    i_t tmp_idx  = shfl_sync(idx, raft::laneId() + offset);
     T sum        = (tmp_weight + weight);
     weight       = sum;
     if (sum != 0) {
diff --git a/cpp/src/routing/ges/execute_insertion.cuh b/cpp/src/routing/ges/execute_insertion.cuh
index 6eea434841..e9c6edbca8 100644
--- a/cpp/src/routing/ges/execute_insertion.cuh
+++ b/cpp/src/routing/ges/execute_insertion.cuh
@@ -56,7 +56,8 @@ DI void execute_insert(typename solution_t<i_t, f_t, REQUEST>::view_t& view,
                  "Pickup should be smaller than delivery");
   }
 
-  route_to_modify.insert_request<REQUEST>(request_location, request_node, view.route_node_map);
+  route_to_modify.template insert_request<REQUEST>(
+    request_location, request_node, view.route_node_map);
 
   route_t<i_t, f_t, REQUEST>::view_t::compute_forward(route_to_modify);
   route_t<i_t, f_t, REQUEST>::view_t::compute_backward(route_to_modify);
diff --git a/cpp/src/routing/ges/guided_ejection_search.cu b/cpp/src/routing/ges/guided_ejection_search.cu
index f1958be073..3dbebf15b2 100644
--- a/cpp/src/routing/ges/guided_ejection_search.cu
+++ b/cpp/src/routing/ges/guided_ejection_search.cu
@@ -231,7 +231,7 @@ bool guided_ejection_search_t<i_t, f_t, REQUEST>::guided_ejection_search_loop(i_
     cuopt_assert(K > 0, "number of routes should be positive!");
     i_t N           = solution_ptr->get_num_requests();
     i_t cM          = N * N / K;
-    iteration_limit = min(iteration_limit, cM);
+    iteration_limit = std::min(iteration_limit, cM);
   }
 
   i_t const n_max_multiple_insertions =
@@ -240,7 +240,7 @@ bool guided_ejection_search_t<i_t, f_t, REQUEST>::guided_ejection_search_loop(i_
 
   const bool depot_included = solution_ptr->problem_ptr->order_info.depot_included_;
 
-  min_ep_size = min(EP.size(), min_ep_size);
+  min_ep_size = std::min(EP.size(), min_ep_size);
 
   while (EP.size() > desired_ep_size) {
     solution_ptr->global_runtime_checks(false, true, "ges_while_loop_begin");
diff --git a/cpp/src/routing/ges/lexicographic_search/brute_force_lexico.cu b/cpp/src/routing/ges/lexicographic_search/brute_force_lexico.cu
index ac7afee445..f2e0c39b86 100644
--- a/cpp/src/routing/ges/lexicographic_search/brute_force_lexico.cu
+++ b/cpp/src/routing/ges/lexicographic_search/brute_force_lexico.cu
@@ -96,7 +96,8 @@ __global__ void brute_force_lexico_kernel(
       __syncthreads();
       if (threadIdx.x == 0) {
         request_id_t<REQUEST> request_locations(pickup_idx, delivery_idx);
-        s_route.insert_request<REQUEST>(request_locations, request_node, s_route_node_map, true);
+        s_route.template insert_request<REQUEST>(
+          request_locations, request_node, s_route_node_map, true);
         sequence_t<2 * b_k_max> sequence_including_delivery;
         i_t counter      = 0;
         bool pd_feasible = true;
diff --git a/cpp/src/routing/ges/lexicographic_search/lexicographic_search.cu b/cpp/src/routing/ges/lexicographic_search/lexicographic_search.cu
index ff797a8135..a896bdf987 100644
--- a/cpp/src/routing/ges/lexicographic_search/lexicographic_search.cu
+++ b/cpp/src/routing/ges/lexicographic_search/lexicographic_search.cu
@@ -256,7 +256,7 @@ __global__ void lexicographic_search(typename solution_t<i_t, f_t, REQUEST>::vie
         if (REQUEST == request_t::PDP && (node_stack.top().from_idx == node_stack.route_length ||
                                           node_stack.template is_stack_top_insertion<REQUEST>())) {
           time_between =
-            node_stack.get_dim_from_delivery<dim_t::TIME>(node_stack.top().intra_idx + 1);
+            node_stack.template get_dim_from_delivery<dim_t::TIME>(node_stack.top().intra_idx + 1);
 
           from_node = node_stack.delivery_node;
 
@@ -265,9 +265,9 @@ __global__ void lexicographic_search(typename solution_t<i_t, f_t, REQUEST>::vie
                          node_stack.s_route.get_node(node_stack.top().intra_idx + 1)),
                        "dim buffer mismatch");
         } else {
-          time_between = node_stack.get_dim_between<dim_t::TIME>(node_stack.top().from_idx,
-                                                                 node_stack.top().intra_idx + 1);
-          from_node    = node_stack.s_route.get_node(node_stack.top().from_idx);
+          time_between = node_stack.template get_dim_between<dim_t::TIME>(
+            node_stack.top().from_idx, node_stack.top().intra_idx + 1);
+          from_node = node_stack.s_route.get_node(node_stack.top().from_idx);
           copy_forward_data(from_node, node_stack.top());
           cuopt_assert(node_stack.check_dim_between(
                          node_stack.top().from_idx,
@@ -624,7 +624,8 @@ __global__ void execute_lexico_move(
       request_locations = request_id_t<REQUEST>(pickup_insert_idx);
     }
     // insert request
-    s_route.insert_request<REQUEST>(request_locations, request_node, solution.route_node_map, true);
+    s_route.template insert_request<REQUEST>(
+      request_locations, request_node, solution.route_node_map, true);
     i_t n_ejections_executed = 0;
     for (i_t i = 0; i < sequence_size; ++i) {
       bool eject = true;
diff --git a/cpp/src/routing/ges/lexicographic_search/lexicographic_search.cuh b/cpp/src/routing/ges/lexicographic_search/lexicographic_search.cuh
index 1a0a5f2c02..2d137205ee 100644
--- a/cpp/src/routing/ges/lexicographic_search/lexicographic_search.cuh
+++ b/cpp/src/routing/ges/lexicographic_search/lexicographic_search.cuh
@@ -25,7 +25,7 @@ namespace routing {
 namespace detail {
 
 struct p_val_seq_t {
-  __device__ p_val_seq_t(uint16_t p_v, uint16_t s_s) : p_val(p_v), sequence_size(s_s) {}
+  __host__ __device__ p_val_seq_t(uint16_t p_v, uint16_t s_s) : p_val(p_v), sequence_size(s_s) {}
 #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
   uint p_val         : 16;
   uint sequence_size : 16;
diff --git a/cpp/src/routing/local_search/compute_ejections.cuh b/cpp/src/routing/local_search/compute_ejections.cuh
index 23e6bf3b8a..a6b6f42f97 100644
--- a/cpp/src/routing/local_search/compute_ejections.cuh
+++ b/cpp/src/routing/local_search/compute_ejections.cuh
@@ -56,7 +56,7 @@ DI void compute_temp_route(typename route_t<i_t, f_t, REQUEST>::view_t& temp_rou
   if (intra_idx == -1) { intra_idx = find_intra_idx<i_t, f_t, REQUEST>(route, request_id.id()); }
   __syncthreads();
   // copies the fixed route data (i.e tw values, demand, node_id etc.)
-  temp_route.copy_route_data_after_ejection<REQUEST>(route, intra_idx, true);
+  temp_route.template copy_route_data_after_ejection<REQUEST>(route, intra_idx, true);
   __syncthreads();
   temp_route.copy_backward_data(route, intra_idx + 1, n_nodes_route + 1, intra_idx);
   __syncthreads();
@@ -110,7 +110,8 @@ DI void compute_temp_route(typename route_t<i_t, f_t, REQUEST>::view_t& temp_rou
   }
   // copies the fixed route data (i.e tw values, demand, node_id etc.)
   __syncthreads();
-  temp_route.copy_route_data_after_ejection<REQUEST>(route, intra_idx, delivery_intra_idx, true);
+  temp_route.template copy_route_data_after_ejection<REQUEST>(
+    route, intra_idx, delivery_intra_idx, true);
   __syncthreads();
   if (threadIdx.x == 0) {
     auto prev_node = temp_route.get_node(intra_idx - 1);
diff --git a/cpp/src/routing/local_search/compute_insertions.cu b/cpp/src/routing/local_search/compute_insertions.cu
index 4f5dd306ec..5bad349a56 100644
--- a/cpp/src/routing/local_search/compute_insertions.cu
+++ b/cpp/src/routing/local_search/compute_insertions.cu
@@ -845,7 +845,7 @@ void find_insertions(solution_t<i_t, f_t, REQUEST>& sol,
   } else {
     // for cross the load-balance factor is always 4
     move_candidates.number_of_blocks_per_ls_route =
-      max(1, sol.get_max_active_nodes_for_all_routes() / 4);
+      std::max(1, sol.get_max_active_nodes_for_all_routes() / 4);
     if (search_type == search_type_t::CROSS) {
       n_blocks =
         move_candidates.number_of_blocks_per_ls_route * sol.get_n_routes() + sol.get_num_requests();
diff --git a/cpp/src/routing/local_search/cycle_finder/hash_functions.cuh b/cpp/src/routing/local_search/cycle_finder/hash_functions.cuh
index e7383a702b..22ac7ff280 100644
--- a/cpp/src/routing/local_search/cycle_finder/hash_functions.cuh
+++ b/cpp/src/routing/local_search/cycle_finder/hash_functions.cuh
@@ -85,6 +85,7 @@ struct MurmurHash3_32 {
     const uint8_t* tail = (const uint8_t*)(data + nblocks * 4);
     uint32_t k1         = 0;
     switch (len & 3) {
+      case 0: break;
       case 3: k1 ^= tail[2] << 16;
       case 2: k1 ^= tail[1] << 8;
       case 1:
diff --git a/cpp/src/routing/local_search/local_search.cuh b/cpp/src/routing/local_search/local_search.cuh
index 6cbbc8f566..8f340f717f 100644
--- a/cpp/src/routing/local_search/local_search.cuh
+++ b/cpp/src/routing/local_search/local_search.cuh
@@ -51,8 +51,10 @@ struct found_sliding_solution_t {
 
 template <typename i_t>
 struct is_sliding_uinitialized_t {
-  static constexpr found_sliding_solution_t<i_t> init_data{
-    std::numeric_limits<double>::max(), -1, -1, -1, -1};
+  static constexpr found_sliding_solution_t<i_t> init_data()
+  {
+    return {std::numeric_limits<double>::max(), -1, -1, -1, -1};
+  }
 
   __device__ bool operator()(const found_sliding_solution_t<i_t>& x)
   {
@@ -73,7 +75,10 @@ struct two_opt_cand_t {
   i_t first;
   i_t second;
   double selection_delta;
-  static constexpr two_opt_cand_t<i_t> init_data{-1, -1, std::numeric_limits<double>::max()};
+  static constexpr two_opt_cand_t<i_t> init_data()
+  {
+    return two_opt_cand_t<i_t>{-1, -1, std::numeric_limits<double>::max()};
+  }
   constexpr bool operator!=(const two_opt_cand_t<i_t>& cand) const
   {
     return this->selection_delta != cand.selection_delta;
@@ -82,7 +87,10 @@ struct two_opt_cand_t {
 
 template <typename i_t>
 struct is_two_opt_uinitialized_t {
-  static constexpr two_opt_cand_t<i_t> init_data{-1, -1, std::numeric_limits<double>::max()};
+  static constexpr two_opt_cand_t<i_t> init_data()
+  {
+    return two_opt_cand_t<i_t>{-1, -1, std::numeric_limits<double>::max()};
+  }
 
   __device__ bool operator()(const two_opt_cand_t<i_t>& x)
   {
diff --git a/cpp/src/routing/local_search/sliding_tsp.cu b/cpp/src/routing/local_search/sliding_tsp.cu
index 6ba86e63e2..4dcf532464 100644
--- a/cpp/src/routing/local_search/sliding_tsp.cu
+++ b/cpp/src/routing/local_search/sliding_tsp.cu
@@ -152,7 +152,7 @@ __global__ void find_sliding_moves_tsp(
   const double excess_limit =
     s_route.get_weighted_excess(move_candidates.weights) * ls_excess_multiplier_route;
 
-  sliding_tsp_cand_t<i_t> sliding_tsp_cand = is_sliding_tsp_uinitialized_t<i_t>::init_data;
+  sliding_tsp_cand_t<i_t> sliding_tsp_cand = is_sliding_tsp_uinitialized_t<i_t>::init_data();
   double cost_delta, selection_delta;
 
   constexpr bool exclude_self_in_neighbors = false;  // for reverse op
@@ -512,8 +512,9 @@ bool local_search_t<i_t, f_t, REQUEST>::perform_sliding_tsp(
     sol, move_candidates, n_nodes, n_threads, temp_storage_bytes);
 
   auto n_blocks = move_candidates.nodes_to_search.n_sampled_nodes;
-  async_fill(
-    sampled_tsp_data_, is_sliding_tsp_uinitialized_t<i_t>::init_data, sol.sol_handle->get_stream());
+  async_fill(sampled_tsp_data_,
+             is_sliding_tsp_uinitialized_t<i_t>::init_data(),
+             sol.sol_handle->get_stream());
 
   auto sh_size =
     raft::alignTo(shared_route_size, sizeof(double)) + max_window_size * sizeof(double);
diff --git a/cpp/src/routing/local_search/sliding_tsp.cuh b/cpp/src/routing/local_search/sliding_tsp.cuh
index f14124d55e..f21b0c8c75 100644
--- a/cpp/src/routing/local_search/sliding_tsp.cuh
+++ b/cpp/src/routing/local_search/sliding_tsp.cuh
@@ -27,8 +27,10 @@ struct sliding_tsp_cand_t {
   i_t reverse;
   double selection_delta;
 
-  static constexpr sliding_tsp_cand_t<i_t> init_data{
-    -1, -1, -1, 0, std::numeric_limits<double>::max()};
+  static constexpr sliding_tsp_cand_t<i_t> init_data()
+  {
+    return {-1, -1, -1, 0, std::numeric_limits<double>::max()};
+  }
 
   constexpr bool operator()(sliding_tsp_cand_t<i_t> cand1, sliding_tsp_cand_t<i_t> cand2) const
   {
@@ -38,8 +40,10 @@ struct sliding_tsp_cand_t {
 
 template <typename i_t>
 struct is_sliding_tsp_uinitialized_t {
-  static constexpr sliding_tsp_cand_t<i_t> init_data{
-    -1, -1, -1, 0, std::numeric_limits<double>::max()};
+  static constexpr sliding_tsp_cand_t<i_t> init_data()
+  {
+    return {-1, -1, -1, 0, std::numeric_limits<double>::max()};
+  }
 
   __device__ bool operator()(const sliding_tsp_cand_t<i_t>& x) { return x.window_size == -1; }
 };
diff --git a/cpp/src/routing/local_search/sliding_window.cu b/cpp/src/routing/local_search/sliding_window.cu
index abe5554555..fa53203f4b 100644
--- a/cpp/src/routing/local_search/sliding_window.cu
+++ b/cpp/src/routing/local_search/sliding_window.cu
@@ -746,7 +746,8 @@ __global__ void kernel_perform_sliding_window(
   // All permutations of windows (size 2 to max_permutation_intra) are tryed along the whole route
   // Insertion with the lowest cost is recorded globally
 
-  found_sliding_solution_t<i_t> found_sliding_solution = is_sliding_uinitialized_t<i_t>::init_data;
+  found_sliding_solution_t<i_t> found_sliding_solution =
+    is_sliding_uinitialized_t<i_t>::init_data();
 
   const double excess_limit =
     s_route.get_weighted_excess(move_candidates.weights) * ls_excess_multiplier_route;
@@ -1047,7 +1048,7 @@ bool local_search_t<i_t, f_t, REQUEST>::perform_sliding_window(
 
   sliding_cuda_graph.start_capture(solution.sol_handle->get_stream());
   async_fill(found_sliding_solution_data_,
-             is_sliding_uinitialized_t<i_t>::init_data,
+             is_sliding_uinitialized_t<i_t>::init_data(),
              solution.sol_handle->get_stream());
   async_fill(locks_, 0, solution.sol_handle->get_stream());
   // So that it only trigger in debug
@@ -1064,7 +1065,7 @@ bool local_search_t<i_t, f_t, REQUEST>::perform_sliding_window(
     return false;
   }
   int ideal_blocks    = 4 * solution.sol_handle->get_num_sms();
-  int blocks_per_node = max(ideal_blocks / move_candidates.nodes_to_search.n_sampled_nodes, 1);
+  int blocks_per_node = std::max(ideal_blocks / move_candidates.nodes_to_search.n_sampled_nodes, 1);
 
   auto n_blocks = move_candidates.nodes_to_search.n_sampled_nodes * blocks_per_node;
   cuopt_assert(n_blocks > 0, "n_blocks should be positive");
diff --git a/cpp/src/routing/local_search/two_opt.cu b/cpp/src/routing/local_search/two_opt.cu
index 7352d8af87..5d187942fc 100644
--- a/cpp/src/routing/local_search/two_opt.cu
+++ b/cpp/src/routing/local_search/two_opt.cu
@@ -129,7 +129,7 @@ __global__ void find_two_opt_moves(typename solution_t<i_t, f_t, REQUEST>::view_
     __syncthreads();
   }
 
-  two_opt_cand_t<i_t> two_opt_cand = is_two_opt_uinitialized_t<i_t>::init_data;
+  two_opt_cand_t<i_t> two_opt_cand = is_two_opt_uinitialized_t<i_t>::init_data();
   double cost_delta, selection_delta;
   auto nodes = route.get_num_nodes() - intra_idx;
   auto first = intra_idx;
@@ -390,8 +390,9 @@ bool local_search_t<i_t, f_t, REQUEST>::perform_two_opt(
   if (sol.problem_ptr->is_cvrp_intra()) {
     sampled_nodes_data_.resize(sol.get_n_routes() * sol.get_num_orders(),
                                sol.sol_handle->get_stream());
-    async_fill(
-      sampled_nodes_data_, is_two_opt_uinitialized_t<i_t>::init_data, sol.sol_handle->get_stream());
+    async_fill(sampled_nodes_data_,
+               is_two_opt_uinitialized_t<i_t>::init_data(),
+               sol.sol_handle->get_stream());
   } else {
     two_opt_cand_data_.resize(sol.get_n_routes(), sol.sol_handle->get_stream());
     async_fill(locks_, 0, sol.sol_handle->get_stream());
diff --git a/cpp/src/routing/local_search/vrp/nodes_to_search.cu b/cpp/src/routing/local_search/vrp/nodes_to_search.cu
index 818dd93b54..670d6c4065 100644
--- a/cpp/src/routing/local_search/vrp/nodes_to_search.cu
+++ b/cpp/src/routing/local_search/vrp/nodes_to_search.cu
@@ -147,7 +147,7 @@ bool nodes_to_search_t<i_t, f_t>::sample_nodes_to_search(
   }
   cuopt_assert(n_sampled_nodes > 0, "There must be at least one operator!");
   cuopt_assert(curr_n_nodes_to_search > 0, "There must be at least one operator!");
-  n_sampled_nodes = min(n_sampled_nodes, curr_n_nodes_to_search);
+  n_sampled_nodes = std::min(n_sampled_nodes, curr_n_nodes_to_search);
   h_sampled_nodes.clear();
   for (i_t i = 0; i < n_sampled_nodes; ++i) {
     std::uniform_int_distribution<i_t> rng_dist(0, h_nodes_to_search.size() - 1);
diff --git a/cpp/src/routing/local_search/vrp/vrp_execute.cu b/cpp/src/routing/local_search/vrp/vrp_execute.cu
index 4e2c93e843..b19f2e25f5 100644
--- a/cpp/src/routing/local_search/vrp/vrp_execute.cu
+++ b/cpp/src/routing/local_search/vrp/vrp_execute.cu
@@ -444,8 +444,8 @@ bool execute_vrp_moves(solution_t<i_t, f_t, REQUEST>& sol,
   cudaOccupancyMaxActiveBlocksPerMultiprocessor(
     &numBlocksPerSm, execute_vrp_moves_kernel<i_t, f_t, REQUEST>, TPB, 0);
   // if the number of blocks are larger than the gpu can hold, only execute the max fitting moves
-  n_blocks =
-    min(n_blocks, sol.sol_handle->get_device_properties().multiProcessorCount * numBlocksPerSm);
+  n_blocks            = std::min(n_blocks,
+                      sol.sol_handle->get_device_properties().multiProcessorCount * numBlocksPerSm);
   auto sol_view       = sol.view();
   auto move_cand_view = move_candidates.view();
   // launch
diff --git a/cpp/src/routing/solution/solution.cu b/cpp/src/routing/solution/solution.cu
index 66b0c8f7f9..c2162fc3ec 100644
--- a/cpp/src/routing/solution/solution.cu
+++ b/cpp/src/routing/solution/solution.cu
@@ -216,7 +216,7 @@ bool solution_t<i_t, f_t, REQUEST>::remove_nodes(const std::vector<NodeInfo<>>&
   rmm::device_scalar<i_t> empty_route_produced(sol_handle->get_stream());
   raft::copy(temp_nodes.data(), nodes_to_eject.data(), n_nodes_to_eject, sol_handle->get_stream());
   compute_max_active();
-  size_t sh_size = max(get_temp_route_shared_size(), n_routes * sizeof(i_t));
+  size_t sh_size = std::max(get_temp_route_shared_size(), n_routes * sizeof(i_t));
   bool is_set    = set_shmem_of_kernel(remove_nodes_kernel<i_t, f_t, REQUEST>, sh_size);
   cuopt_assert(is_set, "Not enough shared memory on device for remove_nodes!");
   cuopt_expects(is_set, error_type_t::OutOfMemoryError, "Not enough shared memory on device");
@@ -529,7 +529,7 @@ void solution_t<i_t, f_t, REQUEST>::copy_device_solution(solution_t<i_t, f_t, RE
 
   n_routes = src_sol.n_routes;
 
-  const auto common_max_size = max(src_sol.max_nodes_per_route, max_nodes_per_route);
+  const auto common_max_size = std::max(src_sol.max_nodes_per_route, max_nodes_per_route);
   resize_routes(common_max_size);
   const i_t TPB       = 256;
   const auto n_blocks = n_routes;
diff --git a/cpp/src/routing/solution/solution.cuh b/cpp/src/routing/solution/solution.cuh
index a436f2d765..9c0ff8da31 100644
--- a/cpp/src/routing/solution/solution.cuh
+++ b/cpp/src/routing/solution/solution.cuh
@@ -349,7 +349,7 @@ class solution_t {
       max_active_nodes_for_all_routes(sol_handle_->get_stream()),
       temp_nodes(problem_.get_num_orders(), sol_handle_->get_stream()),
       temp_stack_counter(sol_handle_->get_stream()),
-      temp_int_vector(max(problem_.get_num_orders(), problem_.get_fleet_size()),
+      temp_int_vector(std::max(problem_.get_num_orders(), problem_.get_fleet_size()),
                       sol_handle_->get_stream())
   {
     raft::common::nvtx::range fun_scope("solution_t");
diff --git a/cpp/src/routing/util_kernels/top_k.cuh b/cpp/src/routing/util_kernels/top_k.cuh
index de2c0a4532..f906f2ffe2 100644
--- a/cpp/src/routing/util_kernels/top_k.cuh
+++ b/cpp/src/routing/util_kernels/top_k.cuh
@@ -44,7 +44,7 @@ constexpr auto get_default()
   if constexpr (::cuda::std::is_same_v<output_t, double>) {
     return std::numeric_limits<double>::max();
   } else {
-    return output_t::init_data;
+    return output_t::init_data();
   }
 }
 
diff --git a/cpp/src/routing/utilities/cuopt_utils.cuh b/cpp/src/routing/utilities/cuopt_utils.cuh
index 053c6ec196..60628d6448 100644
--- a/cpp/src/routing/utilities/cuopt_utils.cuh
+++ b/cpp/src/routing/utilities/cuopt_utils.cuh
@@ -23,6 +23,7 @@
 #include <utilities/seed_generator.cuh>
 
 #include <raft/random/rng_device.cuh>
+#include <raft/util/cuda_dev_essentials.cuh>
 
 #include <cuda_fp16.h>
 #include <thrust/reduce.h>
@@ -62,20 +63,13 @@ __device__ inline T shfl(T val, i_t srcLane, i_t width = warp_size, uint mask =
 {
   return __shfl_sync(mask, val, srcLane, width);
 }
-template <typename i_t = int, typename f_t = float>
-__device__ inline i_t lane_id()
-{
-  i_t id;
-  asm("mov.s32 %0, %laneid;" : "=r"(id));
-  return id;
-}
 
 template <typename T, typename i_t = int, typename f_t = float>
 __device__ inline T warp_reduce(T val)
 {
 #pragma unroll
   for (i_t i = warp_size / 2; i > 0; i >>= 1) {
-    T tmp = shfl(val, lane_id() + i);
+    T tmp = shfl(val, raft::laneId() + i);
     val   = min(val, tmp);
   }
   return val;
@@ -86,7 +80,7 @@ template <typename T, typename i_t = int>
 __device__ inline void block_reduce(T val, T* shmem, const i_t size = blockDim.x)
 {
   i_t nWarps = (size + warp_size - 1) / warp_size;
-  i_t lid    = lane_id();
+  i_t lid    = raft::laneId();
   i_t wid    = threadIdx.x / warp_size;
   T warp_min = warp_reduce<T>(val);
   if (lid == 0) shmem[wid] = warp_min;
@@ -107,9 +101,9 @@ __inline__ __device__ void warp_reduce_ranked(val1_t& val1, val2_t& val2, i_t& i
 {
 #pragma unroll
   for (i_t offset = warp_size / 2; offset > 0; offset /= 2) {
-    val1_t tmp_val1 = shfl(val1, lane_id() + offset);
-    val2_t tmp_val2 = shfl(val2, lane_id() + offset);
-    i_t tmp_idx     = shfl(idx, lane_id() + offset);
+    val1_t tmp_val1 = shfl(val1, raft::laneId() + offset);
+    val2_t tmp_val2 = shfl(val2, raft::laneId() + offset);
+    i_t tmp_idx     = shfl(idx, raft::laneId() + offset);
     if (tmp_val1 < val1 || (tmp_val1 == val1 && tmp_val2 > val2)) {
       val1 = tmp_val1;
       val2 = tmp_val2;
@@ -123,8 +117,8 @@ __inline__ __device__ void warp_reduce_ranked(T& val, i_t& idx)
 {
 #pragma unroll
   for (i_t offset = warp_size / 2; offset > 0; offset /= 2) {
-    T tmpVal   = shfl(val, lane_id() + offset);
-    i_t tmpIdx = shfl(idx, lane_id() + offset);
+    T tmpVal   = shfl(val, raft::laneId() + offset);
+    i_t tmpIdx = shfl(idx, raft::laneId() + offset);
     if (tmpVal < val) {
       val = tmpVal;
       idx = tmpIdx;
@@ -140,7 +134,7 @@ __inline__ __device__ void block_reduce_ranked(T& val, i_t& idx, T* shbuf, i_t*
   i_t wid      = threadIdx.x / warp_size;
   i_t nWarps   = (blockDim.x + warp_size - 1) / warp_size;
   warp_reduce_ranked(val, idx);  // Each warp performs partial reduction
-  i_t lane = lane_id();
+  i_t lane = raft::laneId();
   if (lane == 0) {
     values[wid]  = val;  // Write reduced value to shared memory
     indices[wid] = idx;  // Write reduced value to shared memory
@@ -174,7 +168,7 @@ __inline__ __device__ void block_reduce_ranked(__half& val, i_t& idx, __half* sh
   i_t wid        = threadIdx.x / warp_size;
   i_t nWarps     = (blockDim.x + warp_size - 1) / warp_size;
   warp_reduce_ranked(val, idx);  // Each warp performs partial reduction
-  i_t lane = lane_id();
+  i_t lane = raft::laneId();
   if (lane == 0) {
     values[wid]  = val;  // Write reduced value to shared memory
     indices[wid] = idx;  // Write reduced value to shared memory
diff --git a/cpp/src/routing/vehicle_info.hpp b/cpp/src/routing/vehicle_info.hpp
index 81c6710a3b..cd127a5cee 100644
--- a/cpp/src/routing/vehicle_info.hpp
+++ b/cpp/src/routing/vehicle_info.hpp
@@ -30,7 +30,7 @@ template <typename f_t, bool is_device = true>
 struct VehicleInfo {
   constexpr bool has_time_matrix() const { return matrices.extent[1] > 1; }
 
-  bool operator==(VehicleInfo<f_t, is_device> const& rhs)
+  bool operator==(VehicleInfo<f_t, is_device> const& rhs) const
   {
     return drop_return_trip == rhs.drop_return_trip && skip_first_trip == rhs.skip_first_trip &&
            type == rhs.type && order_service_times == rhs.order_service_times &&
diff --git a/cpp/tests/routing/routing_test.cuh b/cpp/tests/routing/routing_test.cuh
index f50c080bde..2e8db07aae 100644
--- a/cpp/tests/routing/routing_test.cuh
+++ b/cpp/tests/routing/routing_test.cuh
@@ -576,7 +576,7 @@ class base_test_t {
         double order_latest = next_is_break_order
                                 ? break_latest_h[(break_dim + 1) * n_vehicles + id]
                                 : (next_is_depot ? depot_latest : latest_time_h[new_order]);
-        double curr_wait    = max(0.0, order_earliest - order_arrival);
+        double curr_wait    = std::max(0.0, order_earliest - order_arrival);
         order_arrival += curr_wait;
 
         arrival_stamp.push_back(order_arrival);