diff --git a/ci/validate_wheel.sh b/ci/validate_wheel.sh index f21f152e25..6541c0e2d1 100755 --- a/ci/validate_wheel.sh +++ b/ci/validate_wheel.sh @@ -22,11 +22,11 @@ PYDISTCHECK_ARGS=( if [[ "${package_dir}" == "python/libcuopt" ]]; then if [[ "${RAPIDS_CUDA_MAJOR}" == "12" ]]; then PYDISTCHECK_ARGS+=( - --max-allowed-size-compressed '635Mi' + --max-allowed-size-compressed '645Mi' ) else PYDISTCHECK_ARGS+=( - --max-allowed-size-compressed '475Mi' + --max-allowed-size-compressed '485Mi' ) fi elif [[ "${package_dir}" != "python/cuopt" ]] && \ diff --git a/cpp/src/branch_and_bound/branch_and_bound.cpp b/cpp/src/branch_and_bound/branch_and_bound.cpp index c3a14ce11c..b48c8f89eb 100644 --- a/cpp/src/branch_and_bound/branch_and_bound.cpp +++ b/cpp/src/branch_and_bound/branch_and_bound.cpp @@ -489,7 +489,7 @@ void branch_and_bound_t::set_new_solution(const std::vector& solu if (is_feasible) { report_heuristic(obj); } if (attempt_repair) { mutex_repair_.lock(); - repair_queue_.push_back(crushed_solution); + repair_queue_.push_back(solution); mutex_repair_.unlock(); } } @@ -522,9 +522,11 @@ void branch_and_bound_t::queue_external_solution_deterministic( mutex_original_lp_.unlock(); if (!is_feasible) { - // Queue for repair + // Queue the uncrushed solution for repair; it will be crushed at + // consumption time so that the crush reflects the current LP state + // (which may have gained slack columns from cuts added after this point). mutex_repair_.lock(); - repair_queue_.push_back(crushed_solution); + repair_queue_.push_back(solution); mutex_repair_.unlock(); return; } @@ -613,11 +615,14 @@ void branch_and_bound_t::repair_heuristic_solutions() if (to_repair.size() > 0) { settings_.log.debug("Attempting to repair %ld injected solutions\n", to_repair.size()); - for (const std::vector& potential_solution : to_repair) { + for (const std::vector& uncrushed_solution : to_repair) { + std::vector crushed_solution; + crush_primal_solution( + original_problem_, original_lp_, uncrushed_solution, new_slacks_, crushed_solution); std::vector repaired_solution; f_t repaired_obj; bool is_feasible = - repair_solution(edge_norms_, potential_solution, repaired_obj, repaired_solution); + repair_solution(edge_norms_, crushed_solution, repaired_obj, repaired_solution); if (is_feasible) { mutex_upper_.lock(); @@ -1195,6 +1200,9 @@ std::pair branch_and_bound_t::upd policy.graphviz(search_tree, node_ptr, "lower bound", leaf_obj); policy.update_pseudo_costs(node_ptr, leaf_obj); node_ptr->lower_bound = leaf_obj; + if (original_lp_.objective_is_integral) { + node_ptr->lower_bound = std::ceil(leaf_obj - settings_.integer_tol); + } policy.on_optimal_callback(leaf_solution.x, leaf_obj); if (num_frac == 0) { @@ -1308,7 +1316,11 @@ dual::status_t branch_and_bound_t::solve_node_lp( simplex_solver_settings_t lp_settings = settings_; lp_settings.set_log(false); - lp_settings.cut_off = upper_bound_ + settings_.dual_tol; + if (original_lp_.objective_is_integral) { + lp_settings.cut_off = std::ceil(upper_bound_ - settings_.integer_tol) - 1 + settings_.dual_tol; + } else { + lp_settings.cut_off = upper_bound_ + settings_.dual_tol; + } lp_settings.inside_mip = 2; lp_settings.time_limit = settings_.time_limit - toc(exploration_stats_.start_time); lp_settings.scale_columns = false; @@ -3158,11 +3170,14 @@ void branch_and_bound_t::deterministic_sort_replay_events( if (to_repair.size() > 0) { settings_.log.debug("Deterministic sync: Attempting to repair %ld injected solutions\n", to_repair.size()); - for (const std::vector& potential_solution : to_repair) { + for (const std::vector& uncrushed_solution : to_repair) { + std::vector crushed_solution; + crush_primal_solution( + original_problem_, original_lp_, uncrushed_solution, new_slacks_, crushed_solution); std::vector repaired_solution; f_t repaired_obj; bool success = - repair_solution(edge_norms_, potential_solution, repaired_obj, repaired_solution); + repair_solution(edge_norms_, crushed_solution, repaired_obj, repaired_solution); if (success) { // Queue repaired solution with work unit timestamp (...workstamp?) mutex_heuristic_queue_.lock(); diff --git a/cpp/src/dual_simplex/presolve.cpp b/cpp/src/dual_simplex/presolve.cpp index d104036eae..8d4a533e93 100644 --- a/cpp/src/dual_simplex/presolve.cpp +++ b/cpp/src/dual_simplex/presolve.cpp @@ -569,15 +569,16 @@ void convert_user_problem(const user_problem_t& user_problem, } // Copy info from user_problem to problem - problem.num_rows = user_problem.num_rows; - problem.num_cols = user_problem.num_cols; - problem.A = user_problem.A; - problem.objective = user_problem.objective; - problem.obj_scale = user_problem.obj_scale; - problem.obj_constant = user_problem.obj_constant; - problem.rhs = user_problem.rhs; - problem.lower = user_problem.lower; - problem.upper = user_problem.upper; + problem.num_rows = user_problem.num_rows; + problem.num_cols = user_problem.num_cols; + problem.A = user_problem.A; + problem.objective = user_problem.objective; + problem.obj_scale = user_problem.obj_scale; + problem.obj_constant = user_problem.obj_constant; + problem.objective_is_integral = user_problem.objective_is_integral; + problem.rhs = user_problem.rhs; + problem.lower = user_problem.lower; + problem.upper = user_problem.upper; // Make a copy of row_sense so we can modify it std::vector row_sense = user_problem.row_sense; diff --git a/cpp/src/dual_simplex/presolve.hpp b/cpp/src/dual_simplex/presolve.hpp index 557ebe648b..17e6176e3b 100644 --- a/cpp/src/dual_simplex/presolve.hpp +++ b/cpp/src/dual_simplex/presolve.hpp @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -41,6 +41,7 @@ struct lp_problem_t { std::vector upper; f_t obj_constant; f_t obj_scale; // 1.0 for min, -1.0 for max + bool objective_is_integral{false}; }; template diff --git a/cpp/src/dual_simplex/user_problem.hpp b/cpp/src/dual_simplex/user_problem.hpp index 6bdbb10db6..f50a6d33a5 100644 --- a/cpp/src/dual_simplex/user_problem.hpp +++ b/cpp/src/dual_simplex/user_problem.hpp @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -47,6 +47,7 @@ struct user_problem_t { std::vector col_names; f_t obj_constant; f_t obj_scale; // 1.0 for min, -1.0 for max + bool objective_is_integral{false}; std::vector var_types; std::vector Q_offsets; std::vector Q_indices; diff --git a/cpp/src/mip_heuristics/feasibility_jump/fj_cpu.cu b/cpp/src/mip_heuristics/feasibility_jump/fj_cpu.cu index 0976160714..9d79f34620 100644 --- a/cpp/src/mip_heuristics/feasibility_jump/fj_cpu.cu +++ b/cpp/src/mip_heuristics/feasibility_jump/fj_cpu.cu @@ -39,15 +39,14 @@ namespace cuopt::linear_programming::detail { template -HDI thrust::tuple get_mtm_for_bound( - const typename fj_t::climber_data_t::view_t& fj, - i_t var_idx, - i_t cstr_idx, - f_t cstr_coeff, - f_t bound, - f_t sign, - const ArrayType& assignment, - const ArrayType& lhs_vector) +thrust::tuple get_mtm_for_bound(const typename fj_t::climber_data_t::view_t& fj, + i_t var_idx, + i_t cstr_idx, + f_t cstr_coeff, + f_t bound, + f_t sign, + const ArrayType& assignment, + const ArrayType& lhs_vector) { f_t delta_ij = 0; f_t slack = 0; @@ -63,7 +62,7 @@ HDI thrust::tuple get_mtm_for_bound( } template -HDI thrust::tuple get_mtm_for_constraint( +thrust::tuple get_mtm_for_constraint( const typename fj_t::climber_data_t::view_t& fj, i_t var_idx, i_t cstr_idx, @@ -109,17 +108,16 @@ HDI thrust::tuple get_mtm_for_constraint( } template -HDI std::pair feas_score_constraint( - const typename fj_t::climber_data_t::view_t& fj, - i_t var_idx, - f_t delta, - i_t cstr_idx, - f_t cstr_coeff, - f_t c_lb, - f_t c_ub, - f_t current_lhs, - f_t left_weight, - f_t right_weight) +std::pair feas_score_constraint(const typename fj_t::climber_data_t::view_t& fj, + i_t var_idx, + f_t delta, + i_t cstr_idx, + f_t cstr_coeff, + f_t c_lb, + f_t c_ub, + f_t current_lhs, + f_t left_weight, + f_t right_weight) { cuopt_assert(isfinite(delta), "invalid delta"); cuopt_assert(cstr_coeff != 0 && isfinite(cstr_coeff), "invalid coefficient"); diff --git a/cpp/src/mip_heuristics/problem/problem.cu b/cpp/src/mip_heuristics/problem/problem.cu index b2e221df0c..df7f37c541 100644 --- a/cpp/src/mip_heuristics/problem/problem.cu +++ b/cpp/src/mip_heuristics/problem/problem.cu @@ -1195,6 +1195,124 @@ void problem_t::insert_constraints(constraints_delta_t& h_co combine_constraint_bounds(*this, combined_bounds); } +// Best rational approximation p/q to x with q <= max_denom, via continued fractions. +// Returns the last valid convergent if the denominator limit is reached. +std::pair rational_approximation(double x, int64_t max_denom, double epsilon) +{ + double ax = std::abs(x); + if (ax < epsilon) { return {0, 1}; } + + if (x < 0) { + auto [p, q] = rational_approximation(-x, max_denom, epsilon); + return {-p, q}; + } + + int64_t p_prev2 = 1, q_prev2 = 0; + int64_t p_prev1 = (int64_t)std::floor(x), q_prev1 = 1; + + double remainder = x - std::floor(x); + + for (int iter = 0; iter < 100; ++iter) { + if (std::abs(remainder) < 1e-15) break; + + remainder = 1.0 / remainder; + int64_t a = (int64_t)std::floor(remainder); + remainder -= a; + + int64_t p_curr = a * p_prev1 + p_prev2; + int64_t q_curr = a * q_prev1 + q_prev2; + + if (q_curr > max_denom) break; + // overflow guard + if (std::abs(p_curr) < std::abs(p_prev1)) break; + + p_prev2 = p_prev1; + q_prev2 = q_prev1; + p_prev1 = p_curr; + q_prev1 = q_curr; + + double approx_err = x - (double)p_curr / (double)q_curr; + if (std::abs(approx_err) < epsilon) break; + } + + return {p_prev1, q_prev1}; +} + +// Brute-force: try scalars 1..max_brute and return the smallest that makes all coefficients +// integral. +double find_scaling_brute_force(const std::vector& coefficients, + int max_brute = 100, + double tol = 1e-6) +{ + for (int s = 1; s <= max_brute; ++s) { + bool ok = true; + for (double c : coefficients) { + double scaled = s * c; + if (std::abs(scaled - std::round(scaled)) > tol) { + ok = false; + break; + } + } + if (ok) return (double)s; + } + return std::numeric_limits::quiet_NaN(); +} + +// Continued-fractions approach: rationalize each coefficient, compute scm/gcd incrementally. +double find_scaling_rational(const std::vector& coefficients, + double maxscale = 1e6, + int64_t maxdnom = 10000000, + double maxfinal = 10000, + double intcheck_tol = 1e-6) +{ + constexpr double no_scaling = std::numeric_limits::quiet_NaN(); + double epsilon = 1.0 / maxscale; + + int64_t gcd = 0; + int64_t scm = 1; + + for (double c : coefficients) { + auto [num, den] = rational_approximation(c, maxdnom, epsilon); + if (den == 0 || num == 0) continue; + + int64_t abs_num = std::abs(num); + if (gcd == 0) { + gcd = abs_num; + scm = den; + } else { + gcd = std::gcd(gcd, abs_num); + int64_t factor = den / std::gcd(scm, den); + int64_t new_scm; + if (__builtin_mul_overflow(scm, factor, &new_scm)) return no_scaling; + scm = new_scm; + } + + if ((double)scm / (double)gcd > maxscale) return no_scaling; + } + + if (gcd == 0) return 1.0; + + double intscalar = (double)scm / (double)gcd; + if (intscalar > maxfinal) return no_scaling; + + for (double c : coefficients) { + double scaled = intscalar * c; + if (std::abs(scaled - std::round(scaled)) > intcheck_tol) return no_scaling; + } + + return intscalar; +} + +// Finds the smallest integer scaling factor s such that s * c_i is integral for all i. +// Tries a brute-force sweep first (cheap, numerically robust), then falls back to +// continued fractions for larger scalars. +double find_objective_scaling_factor(const std::vector& coefficients) +{ + double s = find_scaling_brute_force(coefficients); + if (!std::isnan(s)) return s; + return find_scaling_rational(coefficients); +} + template void problem_t::set_implied_integers(const std::vector& implied_integer_indices) { @@ -1209,15 +1327,54 @@ void problem_t::set_implied_integers(const std::vector& implied_i cuopt_assert(var_types[idx] == var_t::CONTINUOUS, "Variable is integer"); var_flags[idx] |= (i_t)VAR_IMPLIED_INTEGER; }); +} + +template +void problem_t::recompute_objective_integrality() +{ + // FIXME: we do not consider implied integers here + // because it incorrectly considers neos-827175 as having an integer optimal. + // need to figure out if Papilo is producing an incorrect flag. objective_is_integral = thrust::all_of(handle_ptr->get_thrust_policy(), thrust::make_counting_iterator(0), thrust::make_counting_iterator(n_variables), [v = view()] __device__(i_t var_idx) -> bool { if (v.objective_coefficients[var_idx] == 0) return true; return v.is_integer(v.objective_coefficients[var_idx]) && - (v.variable_types[var_idx] == var_t::INTEGER || - (v.var_flags[var_idx] & VAR_IMPLIED_INTEGER)); + (v.variable_types[var_idx] == var_t::INTEGER); }); + + bool objvars_all_integral = thrust::all_of(handle_ptr->get_thrust_policy(), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(n_variables), + [v = view()] __device__(i_t var_idx) -> bool { + if (v.objective_coefficients[var_idx] == 0) + return true; + return (v.variable_types[var_idx] == var_t::INTEGER); + }); + if (objvars_all_integral && !objective_is_integral) { + auto h_objective_coefficients = + cuopt::host_copy(objective_coefficients, handle_ptr->get_stream()); + std::vector h_nonzero_obj_coefs; + for (i_t i = 0; i < n_variables; ++i) { + if (h_objective_coefficients[i] != 0) { + h_nonzero_obj_coefs.push_back(h_objective_coefficients[i]); + } + } + double scaling_factor = find_objective_scaling_factor(h_nonzero_obj_coefs); + if (!std::isnan(scaling_factor)) { + CUOPT_LOG_INFO("Scaling objective coefficients by %.0f to allow integrality", scaling_factor); + thrust::for_each( + handle_ptr->get_thrust_policy(), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(n_variables), + [objective_coefficients = make_span(objective_coefficients), + scaling_factor] __device__(i_t idx) { objective_coefficients[idx] *= scaling_factor; }); + presolve_data.objective_scaling_factor /= scaling_factor; + presolve_data.objective_offset *= scaling_factor; + objective_is_integral = true; + } + } } template @@ -1287,7 +1444,8 @@ void problem_t::substitute_variables(const std::vector& var_indic var_to_substitute_indices = make_span(d_var_to_substitute_indices), objective_coefficients = make_span(objective_coefficients), objective_offset_delta_per_variable = make_span(objective_offset_delta_per_variable), - objective_offset = objective_offset.data()] __device__(i_t idx) { + objective_offset = objective_offset.data(), + var_flags = make_span(presolve_data.var_flags)] __device__(i_t idx) { i_t var_idx = var_indices[idx]; i_t substituting_var_idx = var_to_substitute_indices[idx]; variable_fix_mask[var_idx] = idx; @@ -1296,6 +1454,9 @@ void problem_t::substitute_variables(const std::vector& var_indic // atomicAdd(objective_offset, objective_offset_difference); atomicAdd(&objective_coefficients[substituting_var_idx], objective_coefficients[var_idx] * substitute_coefficient[idx]); + // Substitution changes the constraint coefficients on x_B, invalidating + // any implied-integrality proof that relied on the original structure. + var_flags[substituting_var_idx] &= ~(i_t)VAR_IMPLIED_INTEGER; }); presolve_data.objective_offset += thrust::reduce(handle_ptr->get_thrust_policy(), objective_offset_delta_per_variable.begin(), @@ -1380,23 +1541,17 @@ void problem_t::substitute_variables(const std::vector& var_indic offsets = make_span(offsets), objective_coefficients = make_span(objective_coefficients), dummy_substituted_variable] __device__(i_t cstr_idx) { - i_t offset_begin = offsets[cstr_idx]; - i_t offset_end = offsets[cstr_idx + 1]; - i_t duplicate_start_idx = -1; - while (offset_begin < offset_end - 1) { - i_t var_idx = variables[offset_begin]; - i_t next_var_idx = variables[offset_begin + 1]; - if (var_idx == next_var_idx) { - if (duplicate_start_idx == -1) { duplicate_start_idx = offset_begin; } - coefficients[duplicate_start_idx] += coefficients[offset_begin + 1]; - variables[duplicate_start_idx] = variables[offset_begin + 1]; - // mark those for elimination - variables[offset_begin + 1] = dummy_substituted_variable; - coefficients[offset_begin + 1] = 0.; + i_t offset_begin = offsets[cstr_idx]; + i_t offset_end = offsets[cstr_idx + 1]; + i_t run_start = offset_begin; + for (i_t j = offset_begin + 1; j < offset_end; ++j) { + if (variables[j] == variables[run_start]) { + coefficients[run_start] += coefficients[j]; + variables[j] = dummy_substituted_variable; + coefficients[j] = 0.; } else { - duplicate_start_idx = -1; + run_start = j; } - offset_begin++; } }); // in case we use this function in context other than propagation, it is possible that substituted diff --git a/cpp/src/mip_heuristics/problem/problem.cuh b/cpp/src/mip_heuristics/problem/problem.cuh index d51af702fe..6cd180a800 100644 --- a/cpp/src/mip_heuristics/problem/problem.cuh +++ b/cpp/src/mip_heuristics/problem/problem.cuh @@ -88,6 +88,7 @@ class problem_t { void insert_variables(variables_delta_t& h_vars); void insert_constraints(constraints_delta_t& h_constraints); void set_implied_integers(const std::vector& implied_integer_indices); + void recompute_objective_integrality(); void resize_variables(size_t size); void resize_constraints(size_t matrix_size, size_t constraint_size, size_t var_size); void preprocess_problem(); diff --git a/cpp/src/mip_heuristics/solution/solution.cu b/cpp/src/mip_heuristics/solution/solution.cu index b8e828f261..5f1c13199b 100644 --- a/cpp/src/mip_heuristics/solution/solution.cu +++ b/cpp/src/mip_heuristics/solution/solution.cu @@ -607,9 +607,12 @@ mip_solution_t solution_t::get_solution(bool output_feasible if (output_feasible) { // TODO we can streamline these info in class - f_t solution_bound = stats.get_solution_bound(); - f_t rel_mip_gap = compute_rel_mip_gap(h_user_obj, solution_bound); - f_t abs_mip_gap = fabs(h_user_obj - solution_bound); + f_t solution_bound = stats.get_solution_bound(); + f_t rel_mip_gap = compute_rel_mip_gap(h_user_obj, solution_bound); + f_t abs_mip_gap = fabs(h_user_obj - solution_bound); + if ((problem_ptr->presolve_data.objective_scaling_factor > 0 && h_user_obj <= solution_bound) || + (problem_ptr->presolve_data.objective_scaling_factor < 0 && h_user_obj >= solution_bound)) + rel_mip_gap = 0; f_t max_constraint_violation = compute_max_constraint_violation(); f_t max_int_violation = compute_max_int_violation(); f_t max_variable_bound_violation = compute_max_variable_violation(); diff --git a/cpp/src/mip_heuristics/solve.cu b/cpp/src/mip_heuristics/solve.cu index 301a168135..1d29cba4d1 100644 --- a/cpp/src/mip_heuristics/solve.cu +++ b/cpp/src/mip_heuristics/solve.cu @@ -286,7 +286,6 @@ mip_solution_t solve_mip(optimization_problem_t& op_problem, if (presolve_result->implied_integer_indices.size() > 0) { CUOPT_LOG_INFO("%d implied integers", presolve_result->implied_integer_indices.size()); } - if (problem.is_objective_integral()) { CUOPT_LOG_INFO("Objective function is integral"); } CUOPT_LOG_INFO("Papilo presolve time: %.2f", presolve_time); } if (settings.user_problem_file != "") { diff --git a/cpp/src/mip_heuristics/solver.cu b/cpp/src/mip_heuristics/solver.cu index 664a2fac49..6300114c07 100644 --- a/cpp/src/mip_heuristics/solver.cu +++ b/cpp/src/mip_heuristics/solver.cu @@ -169,6 +169,12 @@ solution_t mip_solver_t::run_solver() namespace dual_simplex = cuopt::linear_programming::dual_simplex; std::future branch_and_bound_status_future; dual_simplex::user_problem_t branch_and_bound_problem(context.problem_ptr->handle_ptr); + context.problem_ptr->recompute_objective_integrality(); + if (context.problem_ptr->is_objective_integral()) { + CUOPT_LOG_INFO("Objective function is integral, scale %g", + context.problem_ptr->presolve_data.objective_scaling_factor); + } + branch_and_bound_problem.objective_is_integral = context.problem_ptr->is_objective_integral(); dual_simplex::simplex_solver_settings_t branch_and_bound_settings; std::unique_ptr> branch_and_bound; branch_and_bound_solution_helper_t solution_helper(&dm, branch_and_bound_settings); diff --git a/cpp/src/utilities/memory_instrumentation.hpp b/cpp/src/utilities/memory_instrumentation.hpp index d33e7de0b9..645266d337 100644 --- a/cpp/src/utilities/memory_instrumentation.hpp +++ b/cpp/src/utilities/memory_instrumentation.hpp @@ -495,32 +495,11 @@ struct memop_instrumentation_wrapper_t : public memory_instrumentation_base_t { using const_reverse_iterator = std::reverse_iterator; // Constructors - memop_instrumentation_wrapper_t() : array_() - { - if constexpr (type_traits_utils::has_data::value) { - data_ptr = array_.data(); - } else { - data_ptr = nullptr; - } - } + memop_instrumentation_wrapper_t() : array_() {} // Copy/move from underlying type - memop_instrumentation_wrapper_t(const T& arr) : array_(arr) - { - if constexpr (type_traits_utils::has_data::value) { - data_ptr = const_cast(array_.data()); - } else { - data_ptr = nullptr; - } - } - memop_instrumentation_wrapper_t(T&& arr) : array_(std::move(arr)) - { - if constexpr (type_traits_utils::has_data::value) { - data_ptr = array_.data(); - } else { - data_ptr = nullptr; - } - } + memop_instrumentation_wrapper_t(const T& arr) : array_(arr) {} + memop_instrumentation_wrapper_t(T&& arr) : array_(std::move(arr)) {} // Forwarding constructor for underlying container initialization // Only enabled for types that aren't the wrapper itself or the underlying type @@ -533,31 +512,16 @@ struct memop_instrumentation_wrapper_t : public memory_instrumentation_base_t { explicit memop_instrumentation_wrapper_t(Arg&& arg, Args&&... args) : array_(std::forward(arg), std::forward(args)...) { - if constexpr (type_traits_utils::has_data::value) { - data_ptr = array_.data(); - } else { - data_ptr = nullptr; - } } memop_instrumentation_wrapper_t(const memop_instrumentation_wrapper_t& other) : memory_instrumentation_base_t(), array_(other.array_) { - if constexpr (type_traits_utils::has_data::value) { - data_ptr = array_.data(); - } else { - data_ptr = nullptr; - } } memop_instrumentation_wrapper_t(memop_instrumentation_wrapper_t&& other) noexcept : memory_instrumentation_base_t(), array_(std::move(other.array_)) { - if constexpr (type_traits_utils::has_data::value) { - data_ptr = array_.data(); - } else { - data_ptr = nullptr; - } } memop_instrumentation_wrapper_t& operator=(const memop_instrumentation_wrapper_t& other) @@ -565,11 +529,6 @@ struct memop_instrumentation_wrapper_t : public memory_instrumentation_base_t { if (this != &other) { reset_counters(); array_ = other.array_; - if constexpr (type_traits_utils::has_data::value) { - data_ptr = array_.data(); - } else { - data_ptr = nullptr; - } } return *this; } @@ -579,11 +538,6 @@ struct memop_instrumentation_wrapper_t : public memory_instrumentation_base_t { if (this != &other) { reset_counters(); array_ = std::move(other.array_); - if constexpr (type_traits_utils::has_data::value) { - data_ptr = array_.data(); - } else { - data_ptr = nullptr; - } } return *this; } @@ -593,15 +547,10 @@ struct memop_instrumentation_wrapper_t : public memory_instrumentation_base_t { return element_proxy_t(underlying()[index], *this); } - HDI value_type operator[](size_type index) const + value_type operator[](size_type index) const { this->template record_load(); - // really ugly hack because otherwise nvcc complains about vector operator[] being __host__ only - if constexpr (type_traits_utils::has_data::value) { - return data_ptr[index]; - } else { - return underlying()[index]; - } + return underlying()[index]; } template @@ -674,21 +623,18 @@ struct memop_instrumentation_wrapper_t : public memory_instrumentation_base_t { std::enable_if_t::value> reserve(size_type new_cap) { underlying().reserve(new_cap); - if constexpr (type_traits_utils::has_data::value) { data_ptr = underlying().data(); } } template std::enable_if_t::value> shrink_to_fit() { underlying().shrink_to_fit(); - if constexpr (type_traits_utils::has_data::value) { data_ptr = underlying().data(); } } template std::enable_if_t::value> clear() noexcept { underlying().clear(); - if constexpr (type_traits_utils::has_data::value) { data_ptr = underlying().data(); } } template @@ -698,7 +644,6 @@ struct memop_instrumentation_wrapper_t : public memory_instrumentation_base_t { // hot loops shouldn't be doing such operations anyway this->template record_store(); underlying().push_back(value); - if constexpr (type_traits_utils::has_data::value) { data_ptr = underlying().data(); } } template @@ -706,7 +651,6 @@ struct memop_instrumentation_wrapper_t : public memory_instrumentation_base_t { { this->template record_store(); underlying().push_back(std::move(value)); - if constexpr (type_traits_utils::has_data::value) { data_ptr = underlying().data(); } } template @@ -714,7 +658,6 @@ struct memop_instrumentation_wrapper_t : public memory_instrumentation_base_t { { this->template record_store(); underlying().emplace_back(std::forward(args)...); - if constexpr (type_traits_utils::has_data::value) { data_ptr = underlying().data(); } } template @@ -722,7 +665,6 @@ struct memop_instrumentation_wrapper_t : public memory_instrumentation_base_t { { this->template record_load(); // Reading the element before removal underlying().pop_back(); - if constexpr (type_traits_utils::has_data::value) { data_ptr = underlying().data(); } } template @@ -733,7 +675,6 @@ struct memop_instrumentation_wrapper_t : public memory_instrumentation_base_t { if (count > old_size) { this->byte_stores += (count - old_size) * type_size; // New elements initialized } - if constexpr (type_traits_utils::has_data::value) { data_ptr = underlying().data(); } } template @@ -743,7 +684,6 @@ struct memop_instrumentation_wrapper_t : public memory_instrumentation_base_t { size_type old_size = underlying().size(); underlying().resize(count, value); if (count > old_size) { this->byte_stores += (count - old_size) * type_size; } - if constexpr (type_traits_utils::has_data::value) { data_ptr = underlying().data(); } } template @@ -769,7 +709,6 @@ struct memop_instrumentation_wrapper_t : public memory_instrumentation_base_t { private: T array_; - value_type* data_ptr{nullptr}; }; #else // !CUOPT_ENABLE_MEMORY_INSTRUMENTATION