From 67f293b1cbb21314e372c0b3e4c9b9fc86450af0 Mon Sep 17 00:00:00 2001 From: Alice Boucher Date: Thu, 28 Aug 2025 09:26:19 +0000 Subject: [PATCH 01/33] tmp --- cpp/src/linear_programming/cuopt_c.cpp | 10 + .../utilities/problem_checking.cu | 2 + .../utilities/problem_checking.cuh | 10 +- .../c_api_tests/c_api_test.c | 152 +++++++++++++++ .../c_api_tests/c_api_tests.cpp | 182 +++++++++--------- .../c_api_tests/c_api_tests.h | 1 + 6 files changed, 268 insertions(+), 89 deletions(-) diff --git a/cpp/src/linear_programming/cuopt_c.cpp b/cpp/src/linear_programming/cuopt_c.cpp index fc77e2323e..abc4478756 100644 --- a/cpp/src/linear_programming/cuopt_c.cpp +++ b/cpp/src/linear_programming/cuopt_c.cpp @@ -21,6 +21,7 @@ #include #include #include +#include #include @@ -148,7 +149,11 @@ cuopt_int_t cuOptCreateProblem(cuopt_int_t num_constraints, variable_types[j] == CUOPT_CONTINUOUS ? var_t::CONTINUOUS : var_t::INTEGER; } problem_and_stream->op_problem->set_variable_types(variable_types_host.data(), num_variables); + problem_checking_t::check_problem_representation( + *problem_and_stream->op_problem); *problem_ptr = static_cast(problem_and_stream); + } catch (const cuopt::logic_error& e) { + return static_cast(e.get_error_type()); } catch (const raft::exception& e) { return CUOPT_INVALID_ARGUMENT; } @@ -205,7 +210,12 @@ cuopt_int_t cuOptCreateRangedProblem(cuopt_int_t num_constraints, variable_types[j] == CUOPT_CONTINUOUS ? var_t::CONTINUOUS : var_t::INTEGER; } problem_and_stream->op_problem->set_variable_types(variable_types_host.data(), num_variables); + problem_checking_t::check_problem_representation( + *problem_and_stream->op_problem); *problem_ptr = static_cast(problem_and_stream); + } catch (const cuopt::logic_error& e) { + printf("Error: %s, type %d\n", e.what(), static_cast(e.get_error_type())); + return static_cast(e.get_error_type()); } catch (const raft::exception& e) { return CUOPT_INVALID_ARGUMENT; } diff --git a/cpp/src/linear_programming/utilities/problem_checking.cu b/cpp/src/linear_programming/utilities/problem_checking.cu index d0fc6811b3..3a74012147 100644 --- a/cpp/src/linear_programming/utilities/problem_checking.cu +++ b/cpp/src/linear_programming/utilities/problem_checking.cu @@ -22,6 +22,8 @@ #include #include +#include + #include #include #include diff --git a/cpp/src/linear_programming/utilities/problem_checking.cuh b/cpp/src/linear_programming/utilities/problem_checking.cuh index 2df0a517a7..91be9160b1 100644 --- a/cpp/src/linear_programming/utilities/problem_checking.cuh +++ b/cpp/src/linear_programming/utilities/problem_checking.cuh @@ -20,10 +20,18 @@ #include #include -#include +namespace rmm { +template +class device_uvector; +} // namespace rmm namespace cuopt::linear_programming { +namespace detail { +template +class problem_t; +} // namespace detail + template class problem_checking_t { public: diff --git a/cpp/tests/linear_programming/c_api_tests/c_api_test.c b/cpp/tests/linear_programming/c_api_tests/c_api_test.c index 28472b2012..c2955b80c4 100644 --- a/cpp/tests/linear_programming/c_api_tests/c_api_test.c +++ b/cpp/tests/linear_programming/c_api_tests/c_api_test.c @@ -872,3 +872,155 @@ cuopt_int_t test_ranged_problem(cuopt_int_t *termination_status_ptr, cuopt_float return status; } + +// Test invalid bounds scenario (what MOI wrapper was producing) +cuopt_int_t test_invalid_bounds() +{ + cuOptOptimizationProblem problem = NULL; + cuOptSolverSettings settings = NULL; + cuOptSolution solution = NULL; + + /* Test the invalid bounds scenario: + maximize 2*x + subject to: + x >= 0.2 + x <= 0.5 + x is binary (0 or 1) + + After MOI wrapper processing: + - Lower bound = ceil(max(0.0, 0.2)) = 1.0 + - Upper bound = floor(min(1.0, 0.5)) = 0.0 + - Result: 1.0 <= x <= 0.0 (INVALID!) + */ + + cuopt_int_t num_variables = 1; + cuopt_int_t num_constraints = 2; + cuopt_int_t nnz = 2; + + // CSR format constraint matrix + // From the constraints: + // x >= 0.2 + // x <= 0.5 + cuopt_int_t row_offsets[] = {0, 1, 2}; + cuopt_int_t column_indices[] = {0, 0}; + cuopt_float_t values[] = {1.0, 1.0}; + + // Objective coefficients + // From the objective function: maximize 2*x + cuopt_float_t objective_coefficients[] = {2.0}; + + // Constraint bounds + // From the constraints: + // x >= 0.2 + // x <= 0.5 + cuopt_float_t constraint_upper_bounds[] = {CUOPT_INFINITY, 0.5}; + cuopt_float_t constraint_lower_bounds[] = {0.2, -CUOPT_INFINITY}; + + // Variable bounds - INVALID: lower > upper + // After MOI wrapper processing: + cuopt_float_t var_lower_bounds[] = {1.0}; // ceil(max(0.0, 0.2)) = 1.0 + cuopt_float_t var_upper_bounds[] = {0.0}; // floor(min(1.0, 0.5)) = 0.0 + + // Variable types (binary) + char variable_types[] = {CUOPT_INTEGER}; // Binary variable + + cuopt_int_t status; + cuopt_float_t time; + cuopt_int_t termination_status; + cuopt_float_t objective_value; + + printf("Testing invalid bounds scenario (MOI wrapper issue)...\n"); + printf("Problem: Binary variable with bounds 1.0 <= x <= 0.0 (INVALID!)\n"); + + // Create the problem + status = cuOptCreateRangedProblem(num_constraints, + num_variables, + CUOPT_MAXIMIZE, // maximize + 0.0, // objective offset + objective_coefficients, + row_offsets, + column_indices, + values, + constraint_lower_bounds, + constraint_upper_bounds, + var_lower_bounds, + var_upper_bounds, + variable_types, + &problem); + + printf("cuOptCreateRangedProblem returned: %d\n", status); + + if (status == CUOPT_VALIDATION_ERROR) { + printf("✓ Validation error triggered as expected!\n"); + printf("This reproduces the 'Variable bounds are invalid' error from MOI wrapper\n"); + return CUOPT_SUCCESS; // This is the expected result + } else if (status != CUOPT_SUCCESS) { + printf("✗ Unexpected error: %d\n", status); + goto DONE; + } + + // If we get here, the problem was created successfully + printf("✓ Problem created successfully (unexpected!)\n"); + + // Create solver settings + status = cuOptCreateSolverSettings(&settings); + if (status != CUOPT_SUCCESS) { + printf("Error creating solver settings: %d\n", status); + goto DONE; + } + + // Solve the problem + status = cuOptSolve(problem, settings, &solution); + if (status != CUOPT_SUCCESS) { + printf("Error solving problem: %d\n", status); + goto DONE; + } + + // Get solution information + status = cuOptGetSolveTime(solution, &time); + if (status != CUOPT_SUCCESS) { + printf("Error getting solve time: %d\n", status); + goto DONE; + } + + status = cuOptGetTerminationStatus(solution, &termination_status); + if (status != CUOPT_SUCCESS) { + printf("Error getting termination status: %d\n", status); + goto DONE; + } + + status = cuOptGetObjectiveValue(solution, &objective_value); + if (status != CUOPT_SUCCESS) { + printf("Error getting objective value: %d\n", status); + goto DONE; + } + + // Print results + printf("\nResults:\n"); + printf("--------\n"); + printf("Termination status: %s (%d)\n", termination_status_to_string(termination_status), termination_status); + printf("Solve time: %f seconds\n", time); + printf("Objective value: %f\n", objective_value); + + // Get and print solution variables + cuopt_float_t* solution_values = (cuopt_float_t*)malloc(num_variables * sizeof(cuopt_float_t)); + status = cuOptGetPrimalSolution(solution, solution_values); + if (status != CUOPT_SUCCESS) { + printf("Error getting solution values: %d\n", status); + free(solution_values); + goto DONE; + } + + printf("\nSolution: \n"); + for (cuopt_int_t i = 0; i < num_variables; i++) { + printf("x%d = %f\n", i + 1, solution_values[i]); + } + free(solution_values); + +DONE: + cuOptDestroyProblem(&problem); + cuOptDestroySolverSettings(&settings); + cuOptDestroySolution(&solution); + + return status; +} diff --git a/cpp/tests/linear_programming/c_api_tests/c_api_tests.cpp b/cpp/tests/linear_programming/c_api_tests/c_api_tests.cpp index d01f4319b9..36bb21ab27 100644 --- a/cpp/tests/linear_programming/c_api_tests/c_api_tests.cpp +++ b/cpp/tests/linear_programming/c_api_tests/c_api_tests.cpp @@ -24,91 +24,97 @@ #include -TEST(c_api, int_size) { EXPECT_EQ(test_int_size(), sizeof(int32_t)); } - -TEST(c_api, float_size) { EXPECT_EQ(test_float_size(), sizeof(double)); } - -TEST(c_api, afiro) -{ - const std::string& rapidsDatasetRootDir = cuopt::test::get_rapids_dataset_root_dir(); - std::string filename = rapidsDatasetRootDir + "/linear_programming/" + "afiro_original.mps"; - int termination_status; - EXPECT_EQ(solve_mps_file(filename.c_str(), 60, CUOPT_INFINITY, &termination_status), - CUOPT_SUCCESS); - EXPECT_EQ(termination_status, CUOPT_TERIMINATION_STATUS_OPTIMAL); -} - -// Test both LP and MIP codepaths -class TimeLimitTestFixture : public ::testing::TestWithParam> { -}; -TEST_P(TimeLimitTestFixture, time_limit) -{ - const std::string& rapidsDatasetRootDir = cuopt::test::get_rapids_dataset_root_dir(); - std::string filename = rapidsDatasetRootDir + std::get<0>(GetParam()); - double target_solve_time = std::get<1>(GetParam()); - int method = std::get<2>(GetParam()); - int termination_status; - double solve_time = std::numeric_limits::quiet_NaN(); - EXPECT_EQ(solve_mps_file(filename.c_str(), - target_solve_time, - CUOPT_INFINITY, - &termination_status, - &solve_time, - method), - CUOPT_SUCCESS); - EXPECT_EQ(termination_status, CUOPT_TERIMINATION_STATUS_TIME_LIMIT); - - // Dual simplex is spending some time for factorizing the basis, and this computation does not - // check for time limit - double excess_allowed_time = 2.0; - EXPECT_NEAR(solve_time, target_solve_time, excess_allowed_time); -} -INSTANTIATE_TEST_SUITE_P( - c_api, - TimeLimitTestFixture, - ::testing::Values( - std::make_tuple("/linear_programming/square41/square41.mps", - 5, - CUOPT_METHOD_DUAL_SIMPLEX), // LP, Dual Simplex - std::make_tuple("/linear_programming/square41/square41.mps", 5, CUOPT_METHOD_PDLP), // LP, PDLP - std::make_tuple("/mip/enlight_hard.mps", 5, CUOPT_METHOD_DUAL_SIMPLEX) // MIP - )); - -TEST(c_api, iteration_limit) -{ - const std::string& rapidsDatasetRootDir = cuopt::test::get_rapids_dataset_root_dir(); - std::string filename = rapidsDatasetRootDir + "/linear_programming/" + "afiro_original.mps"; - int termination_status; - EXPECT_EQ(solve_mps_file(filename.c_str(), 60, 1, &termination_status), CUOPT_SUCCESS); - EXPECT_EQ(termination_status, CUOPT_TERIMINATION_STATUS_ITERATION_LIMIT); -} - -TEST(c_api, solve_time_bb_preemption) -{ - const std::string& rapidsDatasetRootDir = cuopt::test::get_rapids_dataset_root_dir(); - std::string filename = rapidsDatasetRootDir + "/mip/" + "bb_optimality.mps"; - int termination_status; - double solve_time = std::numeric_limits::quiet_NaN(); - EXPECT_EQ(solve_mps_file(filename.c_str(), 5, CUOPT_INFINITY, &termination_status, &solve_time), - CUOPT_SUCCESS); - EXPECT_EQ(termination_status, CUOPT_TERIMINATION_STATUS_OPTIMAL); - EXPECT_GT(solve_time, 0); // solve time should not be equal to 0, even on very simple instances - // solved by B&B before the diversity solver has time to run -} - -TEST(c_api, bad_parameter_name) { EXPECT_EQ(test_bad_parameter_name(), CUOPT_INVALID_ARGUMENT); } - -TEST(c_api, burglar) { EXPECT_EQ(burglar_problem(), CUOPT_SUCCESS); } - -TEST(c_api, test_missing_file) { EXPECT_EQ(test_missing_file(), CUOPT_MPS_FILE_ERROR); } - -TEST(c_api, test_infeasible_problem) { EXPECT_EQ(test_infeasible_problem(), CUOPT_SUCCESS); } - -TEST(c_api, test_ranged_problem) -{ - cuopt_int_t termination_status; - cuopt_float_t objective; - EXPECT_EQ(test_ranged_problem(&termination_status, &objective), CUOPT_SUCCESS); - EXPECT_EQ(termination_status, CUOPT_TERIMINATION_STATUS_OPTIMAL); - EXPECT_NEAR(objective, 32.0, 1e-3); -} +// TEST(c_api, int_size) { EXPECT_EQ(test_int_size(), sizeof(int32_t)); } + +// TEST(c_api, float_size) { EXPECT_EQ(test_float_size(), sizeof(double)); } + +// TEST(c_api, afiro) +// { +// const std::string& rapidsDatasetRootDir = cuopt::test::get_rapids_dataset_root_dir(); +// std::string filename = rapidsDatasetRootDir + "/linear_programming/" + "afiro_original.mps"; +// int termination_status; +// EXPECT_EQ(solve_mps_file(filename.c_str(), 60, CUOPT_INFINITY, &termination_status), +// CUOPT_SUCCESS); +// EXPECT_EQ(termination_status, CUOPT_TERIMINATION_STATUS_OPTIMAL); +// } + +// // Test both LP and MIP codepaths +// class TimeLimitTestFixture : public ::testing::TestWithParam> { +// }; +// TEST_P(TimeLimitTestFixture, time_limit) +// { +// const std::string& rapidsDatasetRootDir = cuopt::test::get_rapids_dataset_root_dir(); +// std::string filename = rapidsDatasetRootDir + std::get<0>(GetParam()); +// double target_solve_time = std::get<1>(GetParam()); +// int method = std::get<2>(GetParam()); +// int termination_status; +// double solve_time = std::numeric_limits::quiet_NaN(); +// EXPECT_EQ(solve_mps_file(filename.c_str(), +// target_solve_time, +// CUOPT_INFINITY, +// &termination_status, +// &solve_time, +// method), +// CUOPT_SUCCESS); +// EXPECT_EQ(termination_status, CUOPT_TERIMINATION_STATUS_TIME_LIMIT); +// +// Dual simplex is spending some time for factorizing the basis, and this computation does not +// check for time limit +// double excess_allowed_time = 2.0; +// EXPECT_NEAR(solve_time, target_solve_time, excess_allowed_time); +// } +// INSTANTIATE_TEST_SUITE_P( +// c_api, +// TimeLimitTestFixture, +// ::testing::Values( +// std::make_tuple("/linear_programming/square41/square41.mps", +// 5, +// CUOPT_METHOD_DUAL_SIMPLEX), // LP, Dual Simplex +// std::make_tuple("/linear_programming/square41/square41.mps", 5, CUOPT_METHOD_PDLP), // LP, +// PDLP std::make_tuple("/mip/enlight_hard.mps", 5, CUOPT_METHOD_DUAL_SIMPLEX) // +// MIP +// )); + +// TEST(c_api, iteration_limit) +// { +// const std::string& rapidsDatasetRootDir = cuopt::test::get_rapids_dataset_root_dir(); +// std::string filename = rapidsDatasetRootDir + "/linear_programming/" + "afiro_original.mps"; +// int termination_status; +// EXPECT_EQ(solve_mps_file(filename.c_str(), 60, 1, &termination_status), CUOPT_SUCCESS); +// EXPECT_EQ(termination_status, CUOPT_TERIMINATION_STATUS_ITERATION_LIMIT); +// } + +// TEST(c_api, solve_time_bb_preemption) +// { +// const std::string& rapidsDatasetRootDir = cuopt::test::get_rapids_dataset_root_dir(); +// std::string filename = rapidsDatasetRootDir + "/mip/" + "bb_optimality.mps"; +// int termination_status; +// double solve_time = std::numeric_limits::quiet_NaN(); +// EXPECT_EQ(solve_mps_file(filename.c_str(), 5, CUOPT_INFINITY, &termination_status, +// &solve_time), +// CUOPT_SUCCESS); +// EXPECT_EQ(termination_status, CUOPT_TERIMINATION_STATUS_OPTIMAL); +// EXPECT_GT(solve_time, 0); // solve time should not be equal to 0, even on very simple +// instances +// // solved by B&B before the diversity solver has time to run +// } + +// TEST(c_api, bad_parameter_name) { EXPECT_EQ(test_bad_parameter_name(), CUOPT_INVALID_ARGUMENT); } + +// TEST(c_api, burglar) { EXPECT_EQ(burglar_problem(), CUOPT_SUCCESS); } + +// TEST(c_api, test_missing_file) { EXPECT_EQ(test_missing_file(), CUOPT_MPS_FILE_ERROR); } + +// TEST(c_api, test_infeasible_problem) { EXPECT_EQ(test_infeasible_problem(), CUOPT_SUCCESS); } + +// TEST(c_api, test_ranged_problem) +// { +// cuopt_int_t termination_status; +// cuopt_float_t objective; +// EXPECT_EQ(test_ranged_problem(&termination_status, &objective), CUOPT_SUCCESS); +// EXPECT_EQ(termination_status, CUOPT_TERIMINATION_STATUS_OPTIMAL); +// EXPECT_NEAR(objective, 32.0, 1e-3); +// } + +TEST(c_api, test_invalid_bounds) { EXPECT_EQ(test_invalid_bounds(), CUOPT_SUCCESS); } diff --git a/cpp/tests/linear_programming/c_api_tests/c_api_tests.h b/cpp/tests/linear_programming/c_api_tests/c_api_tests.h index e5c9a965c8..66a202f41f 100644 --- a/cpp/tests/linear_programming/c_api_tests/c_api_tests.h +++ b/cpp/tests/linear_programming/c_api_tests/c_api_tests.h @@ -39,6 +39,7 @@ cuopt_int_t test_missing_file(); cuopt_int_t test_infeasible_problem(); cuopt_int_t test_bad_parameter_name(); cuopt_int_t test_ranged_problem(cuopt_int_t* termination_status_ptr, cuopt_float_t* objective_ptr); +cuopt_int_t test_invalid_bounds(); #ifdef __cplusplus } From ef26750e5e12498161a7ef5701b73267bd216450 Mon Sep 17 00:00:00 2001 From: Alice Boucher Date: Fri, 29 Aug 2025 08:46:56 -0700 Subject: [PATCH 02/33] added tests --- cpp/libmps_parser/src/mps_parser.cpp | 9 +- .../utilities/problem_checking.cu | 28 +++ .../utilities/problem_checking.cuh | 1 + cpp/src/mip/presolve/third_party_presolve.cpp | 11 +- cpp/src/mip/solve.cu | 9 +- .../c_api_tests/c_api_test.c | 20 +- .../c_api_tests/c_api_tests.cpp | 180 +++++++++--------- cpp/tests/mip/termination_test.cu | 6 + datasets/mip/crossing_var_bounds.mps | 27 +++ 9 files changed, 179 insertions(+), 112 deletions(-) create mode 100644 datasets/mip/crossing_var_bounds.mps diff --git a/cpp/libmps_parser/src/mps_parser.cpp b/cpp/libmps_parser/src/mps_parser.cpp index b9d3daaa37..f88568db9e 100644 --- a/cpp/libmps_parser/src/mps_parser.cpp +++ b/cpp/libmps_parser/src/mps_parser.cpp @@ -505,9 +505,12 @@ void mps_parser_t::parse_string(char* buf) variable_lower_bounds[i] = 0; variable_upper_bounds[i] = 1; } - mps_parser_expects(variable_lower_bounds[i] <= variable_upper_bounds[i], - error_type_t::ValidationError, - "MPS Parser Internal Error - Please contact cuOpt team"); + if (variable_lower_bounds[i] > variable_upper_bounds[i]) { + printf("WARNING: Variable %d has crossing bounds: %f > %f\n", + i, + variable_lower_bounds[i], + variable_upper_bounds[i]); + } } } diff --git a/cpp/src/linear_programming/utilities/problem_checking.cu b/cpp/src/linear_programming/utilities/problem_checking.cu index 3a74012147..a4d979cf94 100644 --- a/cpp/src/linear_programming/utilities/problem_checking.cu +++ b/cpp/src/linear_programming/utilities/problem_checking.cu @@ -338,6 +338,34 @@ void problem_checking_t::check_unscaled_solution( } } +template +bool problem_checking_t::has_crossing_bounds( + const optimization_problem_t& op_problem) +{ + // Check if all variable bounds are valid (upper >= lower) + bool all_variable_bounds_valid = thrust::all_of( + op_problem.get_handle_ptr()->get_thrust_policy(), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(0) + op_problem.get_variable_upper_bounds().size(), + [upper_bounds = make_span(op_problem.get_variable_upper_bounds()), + lower_bounds = make_span(op_problem.get_variable_lower_bounds())] __device__(size_t i) { + return upper_bounds[i] >= lower_bounds[i]; + }); + + // Check if all constraint bounds are valid (upper >= lower) + bool all_constraint_bounds_valid = thrust::all_of( + op_problem.get_handle_ptr()->get_thrust_policy(), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(0) + op_problem.get_constraint_upper_bounds().size(), + [upper_bounds = make_span(op_problem.get_constraint_upper_bounds()), + lower_bounds = make_span(op_problem.get_constraint_lower_bounds())] __device__(size_t i) { + return upper_bounds[i] >= lower_bounds[i]; + }); + + // Return true if any bounds are invalid (crossing) + return !all_variable_bounds_valid || !all_constraint_bounds_valid; +} + #define INSTANTIATE(F_TYPE) template class problem_checking_t; #if MIP_INSTANTIATE_FLOAT diff --git a/cpp/src/linear_programming/utilities/problem_checking.cuh b/cpp/src/linear_programming/utilities/problem_checking.cuh index 91be9160b1..aeeb5a1154 100644 --- a/cpp/src/linear_programming/utilities/problem_checking.cuh +++ b/cpp/src/linear_programming/utilities/problem_checking.cuh @@ -38,6 +38,7 @@ class problem_checking_t { static void check_csr_representation(const optimization_problem_t& op_problem); // Check all fields and convert row_types to constraints lower/upper bounds if needed static void check_problem_representation(const optimization_problem_t& op_problem); + static bool has_crossing_bounds(const optimization_problem_t& op_problem); static void check_scaled_problem(detail::problem_t const& scaled_problem, detail::problem_t const& op_problem); diff --git a/cpp/src/mip/presolve/third_party_presolve.cpp b/cpp/src/mip/presolve/third_party_presolve.cpp index 7c9867d0e7..bf863f5d65 100644 --- a/cpp/src/mip/presolve/third_party_presolve.cpp +++ b/cpp/src/mip/presolve/third_party_presolve.cpp @@ -26,8 +26,7 @@ namespace cuopt::linear_programming::detail { static papilo::PostsolveStorage post_solve_storage_; -static int presolve_calls_ = 0; -static bool maximize_ = false; +static bool maximize_ = false; template papilo::Problem build_papilo_problem(const optimization_problem_t& op_problem) @@ -317,10 +316,6 @@ std::pair, bool> third_party_presolve_t papilo_problem = build_papilo_problem(op_problem); CUOPT_LOG_INFO("Unpresolved problem:: %d constraints, %d variables, %d nonzeros", @@ -340,7 +335,6 @@ std::pair, bool> third_party_presolve_t(op_problem.get_handle_ptr()), false); } post_solve_storage_ = result.postsolve; @@ -365,9 +359,6 @@ void third_party_presolve_t::undo(rmm::device_uvector& primal_sol bool status_to_skip, rmm::cuda_stream_view stream_view) { - --presolve_calls_; - cuopt_expects( - presolve_calls_ == 0, error_type_t::ValidationError, "Postsolve can only be called once"); if (status_to_skip) { return; } std::vector primal_sol_vec_h(primal_solution.size()); raft::copy(primal_sol_vec_h.data(), primal_solution.data(), primal_solution.size(), stream_view); diff --git a/cpp/src/mip/solve.cu b/cpp/src/mip/solve.cu index de84e2c236..94637eb82f 100644 --- a/cpp/src/mip/solve.cu +++ b/cpp/src/mip/solve.cu @@ -176,6 +176,13 @@ mip_solution_t solve_mip(optimization_problem_t& op_problem, problem_checking_t::check_problem_representation(op_problem); problem_checking_t::check_initial_solution_representation(op_problem, settings); + // Check for crossing bounds. Return infeasible if there are any + if (problem_checking_t::has_crossing_bounds(op_problem)) { + return mip_solution_t(mip_termination_status_t::Infeasible, + solver_stats_t{}, + op_problem.get_handle_ptr()->get_stream()); + } + auto timer = cuopt::timer_t(time_limit); double presolve_time = 0.0; @@ -204,7 +211,7 @@ mip_solution_t solve_mip(optimization_problem_t& op_problem, op_problem.get_handle_ptr()->get_stream()); } - problem = detail::problem_t(reduced_op_problem); + problem = detail::problem_t(reduced_op_problem, settings.get_tolerances()); presolve_time = timer.elapsed_time(); CUOPT_LOG_INFO("Third party presolve time: %f", presolve_time); } diff --git a/cpp/tests/linear_programming/c_api_tests/c_api_test.c b/cpp/tests/linear_programming/c_api_tests/c_api_test.c index c2955b80c4..4a976277aa 100644 --- a/cpp/tests/linear_programming/c_api_tests/c_api_test.c +++ b/cpp/tests/linear_programming/c_api_tests/c_api_test.c @@ -950,17 +950,13 @@ cuopt_int_t test_invalid_bounds() printf("cuOptCreateRangedProblem returned: %d\n", status); - if (status == CUOPT_VALIDATION_ERROR) { - printf("✓ Validation error triggered as expected!\n"); - printf("This reproduces the 'Variable bounds are invalid' error from MOI wrapper\n"); - return CUOPT_SUCCESS; // This is the expected result - } else if (status != CUOPT_SUCCESS) { + if (status != CUOPT_SUCCESS) { printf("✗ Unexpected error: %d\n", status); goto DONE; } // If we get here, the problem was created successfully - printf("✓ Problem created successfully (unexpected!)\n"); + printf("✓ Problem created successfully\n"); // Create solver settings status = cuOptCreateSolverSettings(&settings); @@ -988,6 +984,18 @@ cuopt_int_t test_invalid_bounds() printf("Error getting termination status: %d\n", status); goto DONE; } + if (termination_status != CUOPT_TERIMINATION_STATUS_INFEASIBLE) { + printf("Error: expected termination status to be %d, but got %d\n", + CUOPT_TERIMINATION_STATUS_INFEASIBLE, + termination_status); + status = CUOPT_VALIDATION_ERROR; + goto DONE; + } + else { + printf("✓ Problem found infeasible as expected\n"); + status = CUOPT_SUCCESS; + goto DONE; + } status = cuOptGetObjectiveValue(solution, &objective_value); if (status != CUOPT_SUCCESS) { diff --git a/cpp/tests/linear_programming/c_api_tests/c_api_tests.cpp b/cpp/tests/linear_programming/c_api_tests/c_api_tests.cpp index 36bb21ab27..8ae49c48cc 100644 --- a/cpp/tests/linear_programming/c_api_tests/c_api_tests.cpp +++ b/cpp/tests/linear_programming/c_api_tests/c_api_tests.cpp @@ -24,97 +24,93 @@ #include -// TEST(c_api, int_size) { EXPECT_EQ(test_int_size(), sizeof(int32_t)); } - -// TEST(c_api, float_size) { EXPECT_EQ(test_float_size(), sizeof(double)); } - -// TEST(c_api, afiro) -// { -// const std::string& rapidsDatasetRootDir = cuopt::test::get_rapids_dataset_root_dir(); -// std::string filename = rapidsDatasetRootDir + "/linear_programming/" + "afiro_original.mps"; -// int termination_status; -// EXPECT_EQ(solve_mps_file(filename.c_str(), 60, CUOPT_INFINITY, &termination_status), -// CUOPT_SUCCESS); -// EXPECT_EQ(termination_status, CUOPT_TERIMINATION_STATUS_OPTIMAL); -// } - -// // Test both LP and MIP codepaths -// class TimeLimitTestFixture : public ::testing::TestWithParam> { -// }; -// TEST_P(TimeLimitTestFixture, time_limit) -// { -// const std::string& rapidsDatasetRootDir = cuopt::test::get_rapids_dataset_root_dir(); -// std::string filename = rapidsDatasetRootDir + std::get<0>(GetParam()); -// double target_solve_time = std::get<1>(GetParam()); -// int method = std::get<2>(GetParam()); -// int termination_status; -// double solve_time = std::numeric_limits::quiet_NaN(); -// EXPECT_EQ(solve_mps_file(filename.c_str(), -// target_solve_time, -// CUOPT_INFINITY, -// &termination_status, -// &solve_time, -// method), -// CUOPT_SUCCESS); -// EXPECT_EQ(termination_status, CUOPT_TERIMINATION_STATUS_TIME_LIMIT); -// -// Dual simplex is spending some time for factorizing the basis, and this computation does not -// check for time limit -// double excess_allowed_time = 2.0; -// EXPECT_NEAR(solve_time, target_solve_time, excess_allowed_time); -// } -// INSTANTIATE_TEST_SUITE_P( -// c_api, -// TimeLimitTestFixture, -// ::testing::Values( -// std::make_tuple("/linear_programming/square41/square41.mps", -// 5, -// CUOPT_METHOD_DUAL_SIMPLEX), // LP, Dual Simplex -// std::make_tuple("/linear_programming/square41/square41.mps", 5, CUOPT_METHOD_PDLP), // LP, -// PDLP std::make_tuple("/mip/enlight_hard.mps", 5, CUOPT_METHOD_DUAL_SIMPLEX) // -// MIP -// )); - -// TEST(c_api, iteration_limit) -// { -// const std::string& rapidsDatasetRootDir = cuopt::test::get_rapids_dataset_root_dir(); -// std::string filename = rapidsDatasetRootDir + "/linear_programming/" + "afiro_original.mps"; -// int termination_status; -// EXPECT_EQ(solve_mps_file(filename.c_str(), 60, 1, &termination_status), CUOPT_SUCCESS); -// EXPECT_EQ(termination_status, CUOPT_TERIMINATION_STATUS_ITERATION_LIMIT); -// } - -// TEST(c_api, solve_time_bb_preemption) -// { -// const std::string& rapidsDatasetRootDir = cuopt::test::get_rapids_dataset_root_dir(); -// std::string filename = rapidsDatasetRootDir + "/mip/" + "bb_optimality.mps"; -// int termination_status; -// double solve_time = std::numeric_limits::quiet_NaN(); -// EXPECT_EQ(solve_mps_file(filename.c_str(), 5, CUOPT_INFINITY, &termination_status, -// &solve_time), -// CUOPT_SUCCESS); -// EXPECT_EQ(termination_status, CUOPT_TERIMINATION_STATUS_OPTIMAL); -// EXPECT_GT(solve_time, 0); // solve time should not be equal to 0, even on very simple -// instances -// // solved by B&B before the diversity solver has time to run -// } - -// TEST(c_api, bad_parameter_name) { EXPECT_EQ(test_bad_parameter_name(), CUOPT_INVALID_ARGUMENT); } - -// TEST(c_api, burglar) { EXPECT_EQ(burglar_problem(), CUOPT_SUCCESS); } - -// TEST(c_api, test_missing_file) { EXPECT_EQ(test_missing_file(), CUOPT_MPS_FILE_ERROR); } - -// TEST(c_api, test_infeasible_problem) { EXPECT_EQ(test_infeasible_problem(), CUOPT_SUCCESS); } - -// TEST(c_api, test_ranged_problem) -// { -// cuopt_int_t termination_status; -// cuopt_float_t objective; -// EXPECT_EQ(test_ranged_problem(&termination_status, &objective), CUOPT_SUCCESS); -// EXPECT_EQ(termination_status, CUOPT_TERIMINATION_STATUS_OPTIMAL); -// EXPECT_NEAR(objective, 32.0, 1e-3); -// } +TEST(c_api, int_size) { EXPECT_EQ(test_int_size(), sizeof(int32_t)); } + +TEST(c_api, float_size) { EXPECT_EQ(test_float_size(), sizeof(double)); } + +TEST(c_api, afiro) +{ + const std::string& rapidsDatasetRootDir = cuopt::test::get_rapids_dataset_root_dir(); + std::string filename = rapidsDatasetRootDir + "/linear_programming/" + "afiro_original.mps"; + int termination_status; + EXPECT_EQ(solve_mps_file(filename.c_str(), 60, CUOPT_INFINITY, &termination_status), + CUOPT_SUCCESS); + EXPECT_EQ(termination_status, CUOPT_TERIMINATION_STATUS_OPTIMAL); +} + +// Test both LP and MIP codepaths +class TimeLimitTestFixture : public ::testing::TestWithParam> { +}; +TEST_P(TimeLimitTestFixture, time_limit) +{ + const std::string& rapidsDatasetRootDir = cuopt::test::get_rapids_dataset_root_dir(); + std::string filename = rapidsDatasetRootDir + std::get<0>(GetParam()); + double target_solve_time = std::get<1>(GetParam()); + int method = std::get<2>(GetParam()); + int termination_status; + double solve_time = std::numeric_limits::quiet_NaN(); + EXPECT_EQ(solve_mps_file(filename.c_str(), + target_solve_time, + CUOPT_INFINITY, + &termination_status, + &solve_time, + method), + CUOPT_SUCCESS); + EXPECT_EQ(termination_status, CUOPT_TERIMINATION_STATUS_TIME_LIMIT); + + // Dual simplex is spending some time for factorizing the basis, and this computation does not + // check for time limit + double excess_allowed_time = 2.0; + EXPECT_NEAR(solve_time, target_solve_time, excess_allowed_time); +} +INSTANTIATE_TEST_SUITE_P( + c_api, + TimeLimitTestFixture, + ::testing::Values( + std::make_tuple("/linear_programming/square41/square41.mps", + 5, + CUOPT_METHOD_DUAL_SIMPLEX), // LP, Dual Simplex + std::make_tuple("/linear_programming/square41/square41.mps", 5, CUOPT_METHOD_PDLP), // LP, PDLP + std::make_tuple("/mip/enlight_hard.mps", 5, CUOPT_METHOD_DUAL_SIMPLEX) // MIP + )); + +TEST(c_api, iteration_limit) +{ + const std::string& rapidsDatasetRootDir = cuopt::test::get_rapids_dataset_root_dir(); + std::string filename = rapidsDatasetRootDir + "/linear_programming/" + "afiro_original.mps"; + int termination_status; + EXPECT_EQ(solve_mps_file(filename.c_str(), 60, 1, &termination_status), CUOPT_SUCCESS); + EXPECT_EQ(termination_status, CUOPT_TERIMINATION_STATUS_ITERATION_LIMIT); +} + +TEST(c_api, solve_time_bb_preemption) +{ + const std::string& rapidsDatasetRootDir = cuopt::test::get_rapids_dataset_root_dir(); + std::string filename = rapidsDatasetRootDir + "/mip/" + "bb_optimality.mps"; + int termination_status; + double solve_time = std::numeric_limits::quiet_NaN(); + EXPECT_EQ(solve_mps_file(filename.c_str(), 5, CUOPT_INFINITY, &termination_status, &solve_time), + CUOPT_SUCCESS); + EXPECT_EQ(termination_status, CUOPT_TERIMINATION_STATUS_OPTIMAL); + EXPECT_GT(solve_time, 0); // solve time should not be equal to 0, even on very simple instances + // solved by B&B before the diversity solver has time to run +} + +TEST(c_api, bad_parameter_name) { EXPECT_EQ(test_bad_parameter_name(), CUOPT_INVALID_ARGUMENT); } + +TEST(c_api, burglar) { EXPECT_EQ(burglar_problem(), CUOPT_SUCCESS); } + +TEST(c_api, test_missing_file) { EXPECT_EQ(test_missing_file(), CUOPT_MPS_FILE_ERROR); } + +TEST(c_api, test_infeasible_problem) { EXPECT_EQ(test_infeasible_problem(), CUOPT_SUCCESS); } + +TEST(c_api, test_ranged_problem) +{ + cuopt_int_t termination_status; + cuopt_float_t objective; + EXPECT_EQ(test_ranged_problem(&termination_status, &objective), CUOPT_SUCCESS); + EXPECT_EQ(termination_status, CUOPT_TERIMINATION_STATUS_OPTIMAL); + EXPECT_NEAR(objective, 32.0, 1e-3); +} TEST(c_api, test_invalid_bounds) { EXPECT_EQ(test_invalid_bounds(), CUOPT_SUCCESS); } diff --git a/cpp/tests/mip/termination_test.cu b/cpp/tests/mip/termination_test.cu index 54be62c591..ce2003c40a 100644 --- a/cpp/tests/mip/termination_test.cu +++ b/cpp/tests/mip/termination_test.cu @@ -110,6 +110,12 @@ TEST(termination_status, lower_bound_bb_timeout) EXPECT_GE(lb, obj_val); } +TEST(termination_status, crossing_bounds_infeasible) +{ + auto [termination_status, obj_val, lb] = test_mps_file("mip/crossing_var_bounds.mps", 0.5, false); + EXPECT_EQ(termination_status, mip_termination_status_t::Infeasible); +} + TEST(termination_status, bb_infeasible_test) { // First, check that presolve doesn't reduce the problem to infeasibility diff --git a/datasets/mip/crossing_var_bounds.mps b/datasets/mip/crossing_var_bounds.mps new file mode 100644 index 0000000000..3ed790bbcf --- /dev/null +++ b/datasets/mip/crossing_var_bounds.mps @@ -0,0 +1,27 @@ +* Optimal solution -28 +NAME MIP_SAMPLE +ROWS + N OBJ + L C1 + L C2 + L C3 +COLUMNS + MARK0001 'MARKER' 'INTORG' + X1 OBJ -7 + X1 C1 -1 + X1 C2 5 + X1 C3 -2 + X2 OBJ -2 + X2 C1 2 + X2 C2 1 + X2 C3 -2 + MARK0001 'MARKER' 'INTEND' +RHS + RHS C1 4 + RHS C2 20 + RHS C3 -7 +BOUNDS + UP BOUND X1 10 + LO BOUNDS X1 20 + UP BOUND X2 10 +ENDATA From 480c4f1f2e6bdb1e4f99a46557607a84b316be46 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Fri, 29 Aug 2025 11:38:21 -0500 Subject: [PATCH 03/33] use GCC 14, consolidate dependency groups, update pre-commit hooks (#358) Contributes to https://github.com/rapidsai/build-planning/issues/208 and #294, by doing some of the pre-work to support CUDA 13 * updates from GCC 13 to GCC 14 (ref: https://github.com/rapidsai/build-planning/issues/188) * consolidates some dependency groups in `dependencies.yaml` marked with `cuda: "12.*"` that also should apply for CUDA 13 Other small packaging / CI changes: * updates to latest versions of all RAPIDS `pre-commit` hooks - *including the new `--strict` flag for `rapids-dependency-file-generator` from https://github.com/rapidsai/dependency-file-generator/pull/163* * removes empty `conda/recipes/cuopt-sh-client/conda_build_config.yaml` ## Issue #294 Authors: - James Lamb (https://github.com/jameslamb) Approvers: - Ramakrishnap (https://github.com/rgsl888prabhu) URL: https://github.com/NVIDIA/cuopt/pull/358 --- .pre-commit-config.yaml | 8 +-- .../all_cuda-129_arch-aarch64.yaml | 4 +- .../all_cuda-129_arch-x86_64.yaml | 4 +- .../cuopt-sh-client/conda_build_config.yaml | 0 conda/recipes/cuopt/conda_build_config.yaml | 4 +- .../recipes/libcuopt/conda_build_config.yaml | 4 +- .../mps-parser/conda_build_config.yaml | 6 +++ dependencies.yaml | 50 ++++--------------- 8 files changed, 27 insertions(+), 53 deletions(-) delete mode 100644 conda/recipes/cuopt-sh-client/conda_build_config.yaml diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index d5ac1c0ac7..20355b4836 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -15,7 +15,7 @@ repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: 'v5.0.0' + rev: 'v6.0.0' hooks: - id: end-of-file-fixer exclude: ^(datasets|helmchart)/.*\.(mps|json|yaml|yml|txt)$ @@ -75,17 +75,17 @@ repos: types_or: [file] args: ['-fallback-style=none', '-style=file', '-i'] - repo: https://github.com/rapidsai/dependency-file-generator - rev: v1.18.1 + rev: v1.20.0 hooks: - id: rapids-dependency-file-generator - args: ["--clean"] + args: ["--clean", "--warn-all", "--strict"] - repo: https://github.com/shellcheck-py/shellcheck-py rev: v0.10.0.1 hooks: - id: shellcheck args: ["--severity=warning"] - repo: https://github.com/rapidsai/pre-commit-hooks - rev: v0.6.0 + rev: v0.7.0 hooks: - id: verify-copyright files: | diff --git a/conda/environments/all_cuda-129_arch-aarch64.yaml b/conda/environments/all_cuda-129_arch-aarch64.yaml index 3565eae6a1..9f9a642c26 100644 --- a/conda/environments/all_cuda-129_arch-aarch64.yaml +++ b/conda/environments/all_cuda-129_arch-aarch64.yaml @@ -13,10 +13,8 @@ dependencies: - clang==20.1.4 - cmake>=3.30.4 - cpp-argparse -- cuda-crt - cuda-nvcc - cuda-nvtx-dev -- cuda-nvvm - cuda-sanitizer-api - cuda-version=12.9 - cudf==25.10.*,>=0.0.0a0 @@ -28,7 +26,7 @@ dependencies: - exhale - fastapi - folium -- gcc_linux-aarch64=13.* +- gcc_linux-aarch64=14.* - geopandas - gmock - gtest diff --git a/conda/environments/all_cuda-129_arch-x86_64.yaml b/conda/environments/all_cuda-129_arch-x86_64.yaml index 04132f8c7a..f2a73c08a4 100644 --- a/conda/environments/all_cuda-129_arch-x86_64.yaml +++ b/conda/environments/all_cuda-129_arch-x86_64.yaml @@ -13,10 +13,8 @@ dependencies: - clang==20.1.4 - cmake>=3.30.4 - cpp-argparse -- cuda-crt - cuda-nvcc - cuda-nvtx-dev -- cuda-nvvm - cuda-sanitizer-api - cuda-version=12.9 - cudf==25.10.*,>=0.0.0a0 @@ -28,7 +26,7 @@ dependencies: - exhale - fastapi - folium -- gcc_linux-64=13.* +- gcc_linux-64=14.* - geopandas - gmock - gtest diff --git a/conda/recipes/cuopt-sh-client/conda_build_config.yaml b/conda/recipes/cuopt-sh-client/conda_build_config.yaml deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/conda/recipes/cuopt/conda_build_config.yaml b/conda/recipes/cuopt/conda_build_config.yaml index 02a5582323..4f1ae065c4 100644 --- a/conda/recipes/cuopt/conda_build_config.yaml +++ b/conda/recipes/cuopt/conda_build_config.yaml @@ -1,8 +1,8 @@ c_compiler_version: - - 13 + - 14 cxx_compiler_version: - - 13 + - 14 cuda_compiler: - cuda-nvcc diff --git a/conda/recipes/libcuopt/conda_build_config.yaml b/conda/recipes/libcuopt/conda_build_config.yaml index eddb7ec94d..e7d25dc436 100644 --- a/conda/recipes/libcuopt/conda_build_config.yaml +++ b/conda/recipes/libcuopt/conda_build_config.yaml @@ -1,8 +1,8 @@ c_compiler_version: - - 13 + - 14 cxx_compiler_version: - - 13 + - 14 cuda_compiler: - cuda-nvcc diff --git a/conda/recipes/mps-parser/conda_build_config.yaml b/conda/recipes/mps-parser/conda_build_config.yaml index b5dfe2b140..bc330ea431 100644 --- a/conda/recipes/mps-parser/conda_build_config.yaml +++ b/conda/recipes/mps-parser/conda_build_config.yaml @@ -1,6 +1,12 @@ numpy_version: - ">=1.21,<1.25" +c_compiler_version: + - 14 + +cxx_compiler_version: + - 14 + c_stdlib: - sysroot diff --git a/dependencies.yaml b/dependencies.yaml index ae7c345a79..1a8e1f8d77 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -282,6 +282,7 @@ dependencies: - output_types: conda packages: - c-compiler + - cuda-nvcc - cxx-compiler - ccache specific: @@ -289,22 +290,14 @@ dependencies: matrices: - matrix: arch: x86_64 - cuda: "12.*" packages: - - gcc_linux-64=13.* + - gcc_linux-64=14.* - sysroot_linux-64==2.28 - matrix: arch: aarch64 - cuda: "12.*" packages: - - gcc_linux-aarch64=13.* + - gcc_linux-aarch64=14.* - sysroot_linux-aarch64==2.28 - - output_types: conda - matrices: - - matrix: - cuda: "12.*" - packages: - - cuda-nvcc build_cpp: common: - output_types: conda @@ -317,17 +310,9 @@ dependencies: common: - output_types: [conda] packages: + - cuda-sanitizer-api - gmock - gtest - specific: - - output_types: conda - matrices: - - matrix: - cuda: "12.*" - packages: - - cuda-sanitizer-api - - matrix: - packages: build_wheels: common: - output_types: [requirements, pyproject] @@ -369,7 +354,6 @@ dependencies: - output_types: requirements packages: # pip recognizes the index as a global option for the requirements.txt file - # This index is needed for cubinlinker, ptxcompiler. - --extra-index-url=https://pypi.nvidia.com - --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple specific: @@ -682,25 +666,13 @@ dependencies: packages: - cuda-version=12.9 cuda: - specific: - - output_types: conda - matrices: - - matrix: - cuda: "12.[01]" - packages: - - libcurand-dev - - libcusolver-dev - - libcusparse-dev - - cuda-nvtx-dev - - matrix: - cuda: "12.*" - packages: - - libcurand-dev - - libcusolver-dev - - libcusparse-dev - - cuda-nvtx-dev - - cuda-nvvm - - cuda-crt + common: + - output_types: [conda] + packages: + - libcurand-dev + - libcusolver-dev + - libcusparse-dev + - cuda-nvtx-dev cuda_wheels: specific: From 91541703def25b7b1efc2fbd7283103f8dfadeb0 Mon Sep 17 00:00:00 2001 From: Alice Boucher Date: Mon, 1 Sep 2025 09:18:29 +0000 Subject: [PATCH 04/33] fix failing test --- cpp/tests/mip/unit_test.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/tests/mip/unit_test.cu b/cpp/tests/mip/unit_test.cu index b7eb242220..1114c7aba7 100644 --- a/cpp/tests/mip/unit_test.cu +++ b/cpp/tests/mip/unit_test.cu @@ -159,7 +159,7 @@ TEST(ErrorTest, TestError) // Set constraint bounds std::vector lower_bounds = {1.0}; - std::vector upper_bounds = {0.0}; + std::vector upper_bounds = {1.0, 1.0}; problem.set_constraint_lower_bounds(lower_bounds.data(), lower_bounds.size()); problem.set_constraint_upper_bounds(upper_bounds.data(), upper_bounds.size()); From 81c8553ba66ea73bc425628502ed414f95ae8273 Mon Sep 17 00:00:00 2001 From: Alice Boucher Date: Mon, 1 Sep 2025 02:20:15 -0700 Subject: [PATCH 05/33] Revert "use GCC 14, consolidate dependency groups, update pre-commit hooks (#358)" This reverts commit 480c4f1f2e6bdb1e4f99a46557607a84b316be46. --- .pre-commit-config.yaml | 8 +-- .../all_cuda-129_arch-aarch64.yaml | 4 +- .../all_cuda-129_arch-x86_64.yaml | 4 +- .../cuopt-sh-client/conda_build_config.yaml | 0 conda/recipes/cuopt/conda_build_config.yaml | 4 +- .../recipes/libcuopt/conda_build_config.yaml | 4 +- .../mps-parser/conda_build_config.yaml | 6 --- dependencies.yaml | 50 +++++++++++++++---- 8 files changed, 53 insertions(+), 27 deletions(-) create mode 100644 conda/recipes/cuopt-sh-client/conda_build_config.yaml diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 20355b4836..d5ac1c0ac7 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -15,7 +15,7 @@ repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: 'v6.0.0' + rev: 'v5.0.0' hooks: - id: end-of-file-fixer exclude: ^(datasets|helmchart)/.*\.(mps|json|yaml|yml|txt)$ @@ -75,17 +75,17 @@ repos: types_or: [file] args: ['-fallback-style=none', '-style=file', '-i'] - repo: https://github.com/rapidsai/dependency-file-generator - rev: v1.20.0 + rev: v1.18.1 hooks: - id: rapids-dependency-file-generator - args: ["--clean", "--warn-all", "--strict"] + args: ["--clean"] - repo: https://github.com/shellcheck-py/shellcheck-py rev: v0.10.0.1 hooks: - id: shellcheck args: ["--severity=warning"] - repo: https://github.com/rapidsai/pre-commit-hooks - rev: v0.7.0 + rev: v0.6.0 hooks: - id: verify-copyright files: | diff --git a/conda/environments/all_cuda-129_arch-aarch64.yaml b/conda/environments/all_cuda-129_arch-aarch64.yaml index 9f9a642c26..3565eae6a1 100644 --- a/conda/environments/all_cuda-129_arch-aarch64.yaml +++ b/conda/environments/all_cuda-129_arch-aarch64.yaml @@ -13,8 +13,10 @@ dependencies: - clang==20.1.4 - cmake>=3.30.4 - cpp-argparse +- cuda-crt - cuda-nvcc - cuda-nvtx-dev +- cuda-nvvm - cuda-sanitizer-api - cuda-version=12.9 - cudf==25.10.*,>=0.0.0a0 @@ -26,7 +28,7 @@ dependencies: - exhale - fastapi - folium -- gcc_linux-aarch64=14.* +- gcc_linux-aarch64=13.* - geopandas - gmock - gtest diff --git a/conda/environments/all_cuda-129_arch-x86_64.yaml b/conda/environments/all_cuda-129_arch-x86_64.yaml index f2a73c08a4..04132f8c7a 100644 --- a/conda/environments/all_cuda-129_arch-x86_64.yaml +++ b/conda/environments/all_cuda-129_arch-x86_64.yaml @@ -13,8 +13,10 @@ dependencies: - clang==20.1.4 - cmake>=3.30.4 - cpp-argparse +- cuda-crt - cuda-nvcc - cuda-nvtx-dev +- cuda-nvvm - cuda-sanitizer-api - cuda-version=12.9 - cudf==25.10.*,>=0.0.0a0 @@ -26,7 +28,7 @@ dependencies: - exhale - fastapi - folium -- gcc_linux-64=14.* +- gcc_linux-64=13.* - geopandas - gmock - gtest diff --git a/conda/recipes/cuopt-sh-client/conda_build_config.yaml b/conda/recipes/cuopt-sh-client/conda_build_config.yaml new file mode 100644 index 0000000000..e69de29bb2 diff --git a/conda/recipes/cuopt/conda_build_config.yaml b/conda/recipes/cuopt/conda_build_config.yaml index 4f1ae065c4..02a5582323 100644 --- a/conda/recipes/cuopt/conda_build_config.yaml +++ b/conda/recipes/cuopt/conda_build_config.yaml @@ -1,8 +1,8 @@ c_compiler_version: - - 14 + - 13 cxx_compiler_version: - - 14 + - 13 cuda_compiler: - cuda-nvcc diff --git a/conda/recipes/libcuopt/conda_build_config.yaml b/conda/recipes/libcuopt/conda_build_config.yaml index e7d25dc436..eddb7ec94d 100644 --- a/conda/recipes/libcuopt/conda_build_config.yaml +++ b/conda/recipes/libcuopt/conda_build_config.yaml @@ -1,8 +1,8 @@ c_compiler_version: - - 14 + - 13 cxx_compiler_version: - - 14 + - 13 cuda_compiler: - cuda-nvcc diff --git a/conda/recipes/mps-parser/conda_build_config.yaml b/conda/recipes/mps-parser/conda_build_config.yaml index bc330ea431..b5dfe2b140 100644 --- a/conda/recipes/mps-parser/conda_build_config.yaml +++ b/conda/recipes/mps-parser/conda_build_config.yaml @@ -1,12 +1,6 @@ numpy_version: - ">=1.21,<1.25" -c_compiler_version: - - 14 - -cxx_compiler_version: - - 14 - c_stdlib: - sysroot diff --git a/dependencies.yaml b/dependencies.yaml index 1a8e1f8d77..ae7c345a79 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -282,7 +282,6 @@ dependencies: - output_types: conda packages: - c-compiler - - cuda-nvcc - cxx-compiler - ccache specific: @@ -290,14 +289,22 @@ dependencies: matrices: - matrix: arch: x86_64 + cuda: "12.*" packages: - - gcc_linux-64=14.* + - gcc_linux-64=13.* - sysroot_linux-64==2.28 - matrix: arch: aarch64 + cuda: "12.*" packages: - - gcc_linux-aarch64=14.* + - gcc_linux-aarch64=13.* - sysroot_linux-aarch64==2.28 + - output_types: conda + matrices: + - matrix: + cuda: "12.*" + packages: + - cuda-nvcc build_cpp: common: - output_types: conda @@ -310,9 +317,17 @@ dependencies: common: - output_types: [conda] packages: - - cuda-sanitizer-api - gmock - gtest + specific: + - output_types: conda + matrices: + - matrix: + cuda: "12.*" + packages: + - cuda-sanitizer-api + - matrix: + packages: build_wheels: common: - output_types: [requirements, pyproject] @@ -354,6 +369,7 @@ dependencies: - output_types: requirements packages: # pip recognizes the index as a global option for the requirements.txt file + # This index is needed for cubinlinker, ptxcompiler. - --extra-index-url=https://pypi.nvidia.com - --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple specific: @@ -666,13 +682,25 @@ dependencies: packages: - cuda-version=12.9 cuda: - common: - - output_types: [conda] - packages: - - libcurand-dev - - libcusolver-dev - - libcusparse-dev - - cuda-nvtx-dev + specific: + - output_types: conda + matrices: + - matrix: + cuda: "12.[01]" + packages: + - libcurand-dev + - libcusolver-dev + - libcusparse-dev + - cuda-nvtx-dev + - matrix: + cuda: "12.*" + packages: + - libcurand-dev + - libcusolver-dev + - libcusparse-dev + - cuda-nvtx-dev + - cuda-nvvm + - cuda-crt cuda_wheels: specific: From d614f64cc9d1cc8f8c2450a2672050f4d339e570 Mon Sep 17 00:00:00 2001 From: Alice Boucher Date: Mon, 1 Sep 2025 09:26:43 +0000 Subject: [PATCH 06/33] remove undeeded change --- cpp/src/linear_programming/cuopt_c.cpp | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/cpp/src/linear_programming/cuopt_c.cpp b/cpp/src/linear_programming/cuopt_c.cpp index abc4478756..fc77e2323e 100644 --- a/cpp/src/linear_programming/cuopt_c.cpp +++ b/cpp/src/linear_programming/cuopt_c.cpp @@ -21,7 +21,6 @@ #include #include #include -#include #include @@ -149,11 +148,7 @@ cuopt_int_t cuOptCreateProblem(cuopt_int_t num_constraints, variable_types[j] == CUOPT_CONTINUOUS ? var_t::CONTINUOUS : var_t::INTEGER; } problem_and_stream->op_problem->set_variable_types(variable_types_host.data(), num_variables); - problem_checking_t::check_problem_representation( - *problem_and_stream->op_problem); *problem_ptr = static_cast(problem_and_stream); - } catch (const cuopt::logic_error& e) { - return static_cast(e.get_error_type()); } catch (const raft::exception& e) { return CUOPT_INVALID_ARGUMENT; } @@ -210,12 +205,7 @@ cuopt_int_t cuOptCreateRangedProblem(cuopt_int_t num_constraints, variable_types[j] == CUOPT_CONTINUOUS ? var_t::CONTINUOUS : var_t::INTEGER; } problem_and_stream->op_problem->set_variable_types(variable_types_host.data(), num_variables); - problem_checking_t::check_problem_representation( - *problem_and_stream->op_problem); *problem_ptr = static_cast(problem_and_stream); - } catch (const cuopt::logic_error& e) { - printf("Error: %s, type %d\n", e.what(), static_cast(e.get_error_type())); - return static_cast(e.get_error_type()); } catch (const raft::exception& e) { return CUOPT_INVALID_ARGUMENT; } From aa58f62c5feff807efc812d4edfb01db5446221b Mon Sep 17 00:00:00 2001 From: Ramakrishnap <42624703+rgsl888prabhu@users.noreply.github.com> Date: Tue, 2 Sep 2025 10:05:20 -0500 Subject: [PATCH 07/33] Add Commit Sha to container for reference (#362) Adding commit SHA for future reference Authors: - Ramakrishnap (https://github.com/rgsl888prabhu) - James Lamb (https://github.com/jameslamb) - Ishika Roy (https://github.com/Iroy30) Approvers: - James Lamb (https://github.com/jameslamb) - Ishika Roy (https://github.com/Iroy30) URL: https://github.com/NVIDIA/cuopt/pull/362 --- .github/workflows/build_images.yaml | 8 ++++++++ .github/workflows/build_test_publish_images.yaml | 4 ++++ .github/workflows/test_images.yaml | 6 +++++- ci/docker/Dockerfile | 2 +- python/cuopt_server/cuopt_server/utils/utils.py | 4 +--- 5 files changed, 19 insertions(+), 5 deletions(-) diff --git a/.github/workflows/build_images.yaml b/.github/workflows/build_images.yaml index 7964272644..ee1bcbe4cc 100644 --- a/.github/workflows/build_images.yaml +++ b/.github/workflows/build_images.yaml @@ -36,6 +36,10 @@ on: PYTHON_VER: required: true type: string + sha: + description: "sha: full git commit SHA to check out" + required: true + type: string jobs: build: @@ -52,6 +56,7 @@ jobs: uses: actions/checkout@v4 with: fetch-depth: 0 + ref: ${{ inputs.sha }} - name: Login to DockerHub uses: docker/login-action@v3 with: @@ -62,6 +67,9 @@ jobs: cp ./LICENSE ./ci/docker/context/LICENSE cp ./VERSION ./ci/docker/context/VERSION cp ./thirdparty/THIRD_PARTY_LICENSES ./ci/docker/context/THIRD_PARTY_LICENSES + - name: Copy Commit SHA + run: | + git rev-parse HEAD > ./ci/docker/context/COMMIT_SHA - name: Login to NGC uses: docker/login-action@v3 with: diff --git a/.github/workflows/build_test_publish_images.yaml b/.github/workflows/build_test_publish_images.yaml index af066e41ee..22ba97859f 100644 --- a/.github/workflows/build_test_publish_images.yaml +++ b/.github/workflows/build_test_publish_images.yaml @@ -79,6 +79,7 @@ jobs: with: fetch-depth: 0 # unshallow fetch for setuptools-scm persist-credentials: false + ref: ${{ inputs.sha }} - name: Compute matrix id: compute-matrix @@ -128,6 +129,7 @@ jobs: IMAGE_TAG_PREFIX: ${{ needs.compute-matrix.outputs.IMAGE_TAG_PREFIX }} LINUX_VER: ${{ matrix.linux_ver }} PYTHON_VER: ${{ matrix.python_ver }} + sha: ${{ inputs.sha }} build-cuopt-multiarch-manifest: name: Build cuopt multiarch manifest @@ -141,6 +143,7 @@ jobs: - name: Checkout code repo uses: actions/checkout@v4 with: + ref: ${{ inputs.sha }} fetch-depth: 0 - name: Login to DockerHub uses: docker/login-action@v3 @@ -183,3 +186,4 @@ jobs: CUDA_VER: ${{ matrix.CUDA_VER }} PYTHON_VER: ${{ matrix.PYTHON_VER }} IMAGE_TAG_PREFIX: ${{ needs.compute-matrix.outputs.IMAGE_TAG_PREFIX }} + sha: ${{ inputs.sha }} diff --git a/.github/workflows/test_images.yaml b/.github/workflows/test_images.yaml index bff2bb6d11..bfeda82f61 100644 --- a/.github/workflows/test_images.yaml +++ b/.github/workflows/test_images.yaml @@ -27,7 +27,10 @@ on: IMAGE_TAG_PREFIX: required: true type: string - + sha: + description: "sha: full git commit SHA to check out" + required: true + type: string jobs: @@ -56,6 +59,7 @@ jobs: uses: actions/checkout@v4 with: fetch-depth: 0 + ref: ${{ inputs.sha }} - name: Test cuopt run: | bash ./ci/docker/test_image.sh diff --git a/ci/docker/Dockerfile b/ci/docker/Dockerfile index e3ee6c73ca..f00564fcf2 100644 --- a/ci/docker/Dockerfile +++ b/ci/docker/Dockerfile @@ -107,7 +107,7 @@ RUN mkdir -p /opt/cuopt && \ WORKDIR /opt/cuopt # Copy all static files in a single layer -COPY ./LICENSE ./VERSION ./THIRD_PARTY_LICENSES /opt/cuopt/ +COPY ./LICENSE ./VERSION ./THIRD_PARTY_LICENSES ./COMMIT_SHA /opt/cuopt/ # Copy CUDA libraries from the cuda-libs stage COPY --from=cuda-libs /usr/local/cuda/lib64/libnvrtc* /usr/local/cuda/lib64/ diff --git a/python/cuopt_server/cuopt_server/utils/utils.py b/python/cuopt_server/cuopt_server/utils/utils.py index 9384957ca1..5eb493644c 100644 --- a/python/cuopt_server/cuopt_server/utils/utils.py +++ b/python/cuopt_server/cuopt_server/utils/utils.py @@ -16,7 +16,7 @@ import json import os -from cuopt_server.utils.job_queue import SolverLPJob, lp_datamodel_compat +from cuopt_server.utils.job_queue import SolverLPJob from cuopt_server.utils.linear_programming.data_definition import LPData from cuopt_server.utils.linear_programming.solver import ( create_data_model as lp_create_data_model, @@ -73,14 +73,12 @@ def build_lp_datamodel_from_json(data): """ if isinstance(data, dict): - lp_datamodel_compat(data) data = LPData.parse_obj(data) elif os.path.isfile(data): with open(data, "r") as f: data = json.loads(f.read()) # Remove this once we support variable names data.pop("variable_names") - lp_datamodel_compat(data) data = LPData.parse_obj(data) else: raise ValueError( From 2eccc32a264d2ddc357e3e015452d49b4ca9461c Mon Sep 17 00:00:00 2001 From: Franc-Z <85468633+Franc-Z@users.noreply.github.com> Date: Wed, 3 Sep 2025 10:25:59 +0800 Subject: [PATCH 08/33] QPS extension for MPS (#352) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # QPS (Quadratic Programming Specification) Support This library now supports the QPS format, which is an extension of the standard MPS format for representing quadratic programming problems. ## QPS Format Extensions QPS files are a superset of MPS files, adding the following new section to the standard MPS sections: ### QUADOBJ Section Defines quadratic terms in the objective function. Format: ``` QUADOBJ variable1 variable2 coefficient X1 X1 2.0 X1 X2 1.0 X2 X2 2.0 ``` This represents quadratic terms in the objective function: 2.0*X1² + 1.0*X1*X2 + 2.0*X2² **Note**: QUADOBJ stores only the upper triangular elements of the quadratic matrix, which are automatically expanded to create the full symmetric matrix during parsing. ## API Usage ### Parsing QPS Files ```cpp #include // Parse QPS file (using the same API as MPS files) auto qp_model = cuopt::mps_parser::parse_mps("problem.qps", false); ``` ### Checking Quadratic Terms ```cpp // Check for quadratic objective function if (qp_model.has_quadratic_objective()) { const auto& Q_values = qp_model.get_quadratic_objective_values(); const auto& Q_indices = qp_model.get_quadratic_objective_indices(); const auto& Q_offsets = qp_model.get_quadratic_objective_offsets(); // Quadratic objective matrix stored in CSR format // Matrix is automatically expanded from upper triangular to full symmetric form } ``` ### Manually Setting Quadratic Data ```cpp // Set quadratic objective matrix std::vector Q_values = {2.0, 1.0, 1.0, 2.0}; std::vector Q_indices = {0, 1, 0, 1}; std::vector Q_offsets = {0, 2, 4}; qp_model.set_quadratic_objective_matrix(Q_values.data(), Q_values.size(), Q_indices.data(), Q_indices.size(), Q_offsets.data(), Q_offsets.size()); ``` ## Data Storage Format Quadratic matrix data is stored in CSR (Compressed Sparse Row) format, consistent with the linear constraint matrix A: - `Q_values`: Non-zero element values - `Q_indices`: Column indices of non-zero elements - `Q_offsets`: Row offset positions ## Backward Compatibility - All existing MPS parsing functionality remains unchanged - Standard MPS files are still fully compatible - QPS-specific features are activated only when corresponding sections are detected ## Example Files Refer to the `tests/test_quadratic.qps` file for a complete example of the QPS format. ## Testing Run tests to verify QPS functionality: ```bash # Build and run tests mkdir build && cd build cmake .. -DBUILD_TESTS=ON make ./MPS_PARSER_TEST ``` ## Technical Details ### CSR Matrix Representation The quadratic matrices use the same efficient sparse storage format as the linear constraint matrices: ```cpp // For a 2x2 quadratic matrix: // [2.0 1.0] // [0.0 2.0] Q_values = [2.0, 1.0, 2.0] // Non-zero values Q_indices = [0, 1, 1] // Column indices Q_offsets = [0, 2, 3] // Row start positions ``` ### Format Detection The library automatically detects QPS format by scanning for: - `QUADOBJ` section headers - Quadratic coefficient entries This enables seamless handling of both MPS and QPS files with the same API. ### Performance Considerations - QPS parsing performance scales linearly with problem size: **O(m + n + nnz)** - Uses efficient double transpose algorithm instead of sorting: **O(m + n + nnz)** vs **O(nnz log nnz)** - CSR storage provides optimal memory usage for sparse quadratic matrices - Upper triangular QUADOBJ input automatically expanded to full symmetric CSR format - No performance penalty for standard MPS files without quadratic terms ## Supported QPS Features ### Quadratic Objective Functions - ✅ Full support for `QUADOBJ` sections - ✅ Upper triangular storage format (QUADOBJ standard) - ✅ Automatic symmetric matrix expansion using double transpose algorithm - ✅ CSR format storage for efficient computation - ✅ Automatic sparsity detection - ✅ Linear complexity parsing: O(m + n + nnz) ### Validation and Error Handling - ✅ Comprehensive format validation - ✅ Detailed error messages for malformed QPS files - ✅ Graceful handling of missing sections - ✅ Variable name consistency checking ## Integration Examples ### With Optimization Solvers ```cpp // Example integration with optimization libraries auto qp_data = cuopt::mps_parser::parse_mps("portfolio.qps"); if (qp_data.has_quadratic_objective()) { // Pass CSR matrices directly to solver // Matrix is automatically expanded from QUADOBJ upper triangular format solver.set_quadratic_objective( qp_data.get_quadratic_objective_values(), qp_data.get_quadratic_objective_indices(), qp_data.get_quadratic_objective_offsets() ); } ``` ### Data Analysis ```cpp // Analyze problem characteristics std::cout << "Problem type: " << (qp_data.has_quadratic_objective() ? "QP" : "LP") << std::endl; std::cout << "Quadratic density: " << qp_data.get_quadratic_objective_values().size() << " / " << (qp_data.get_n_variables() * qp_data.get_n_variables()) << std::endl; ``` Authors: - https://github.com/Franc-Z - Ramakrishnap (https://github.com/rgsl888prabhu) Approvers: - Chris Maes (https://github.com/chris-maes) - Ramakrishnap (https://github.com/rgsl888prabhu) URL: https://github.com/NVIDIA/cuopt/pull/352 --- .gitattributes | 1 + .gitignore | 2 + .../include/mps_parser/data_model_view.hpp | 55 ++++++++ .../include/mps_parser/mps_data_model.hpp | 37 +++++ .../include/mps_parser/parser.hpp | 13 +- cpp/libmps_parser/src/data_model_view.cpp | 52 +++++++ cpp/libmps_parser/src/mps_data_model.cpp | 73 ++++++++++ cpp/libmps_parser/src/mps_parser.cpp | 132 ++++++++++++++++++ cpp/libmps_parser/src/mps_parser.hpp | 9 ++ cpp/libmps_parser/tests/mps_parser_test.cpp | 69 +++++++++ datasets/quadratic_programming/QP_Test_1.qps | 20 +++ datasets/quadratic_programming/QP_Test_2.qps | 20 +++ 12 files changed, 479 insertions(+), 4 deletions(-) create mode 100644 datasets/quadratic_programming/QP_Test_1.qps create mode 100644 datasets/quadratic_programming/QP_Test_2.qps diff --git a/.gitattributes b/.gitattributes index cd8f8830bf..f16f39ab5a 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1 +1,2 @@ *.mps linguist-detectable=false +*.qps linguist-detectable=false diff --git a/.gitignore b/.gitignore index 9755d86dce..9edc9823c1 100644 --- a/.gitignore +++ b/.gitignore @@ -66,3 +66,5 @@ docs/cuopt/build # generated version file cpp/include/cuopt/semantic_version.hpp +!datasets/quadratic_programming +!datasets/quadratic_programming/** diff --git a/cpp/libmps_parser/include/mps_parser/data_model_view.hpp b/cpp/libmps_parser/include/mps_parser/data_model_view.hpp index 05f75f7340..17f74a6c2c 100644 --- a/cpp/libmps_parser/include/mps_parser/data_model_view.hpp +++ b/cpp/libmps_parser/include/mps_parser/data_model_view.hpp @@ -229,6 +229,30 @@ class data_model_view_t { */ void set_initial_dual_solution(const f_t* initial_dual_solution, i_t size); + /** + * @brief Set the quadratic objective matrix (Q) in CSR format for QPS files. + * + * @note This is used for quadratic programming problems where the objective + * function contains quadratic terms: (1/2) * x^T * Q * x + c^T * x + * cuOpt does not own or copy this data. + * + * @param[in] Q_values Device memory pointer to values of the CSR representation of the quadratic + * objective matrix + * @param size_values Size of the Q_values array + * @param[in] Q_indices Device memory pointer to indices of the CSR representation of the + * quadratic objective matrix + * @param size_indices Size of the Q_indices array + * @param[in] Q_offsets Device memory pointer to offsets of the CSR representation of the + * quadratic objective matrix + * @param size_offsets Size of the Q_offsets array + */ + void set_quadratic_objective_matrix(const f_t* Q_values, + i_t size_values, + const i_t* Q_indices, + i_t size_indices, + const i_t* Q_offsets, + i_t size_offsets); + /** * @brief Get the sense value (false:minimize, true:maximize) * @@ -339,6 +363,32 @@ class data_model_view_t { */ std::string get_objective_name() const noexcept; + // QPS-specific getters + /** + * @brief Get the quadratic objective matrix values + * + * @return span + */ + span get_quadratic_objective_values() const noexcept; + /** + * @brief Get the quadratic objective matrix indices + * + * @return span + */ + span get_quadratic_objective_indices() const noexcept; + /** + * @brief Get the quadratic objective matrix offsets + * + * @return span + */ + span get_quadratic_objective_offsets() const noexcept; + /** + * @brief Check if the problem has quadratic objective terms + * + * @return bool + */ + bool has_quadratic_objective() const noexcept; + private: bool maximize_{false}; span A_; @@ -361,6 +411,11 @@ class data_model_view_t { span initial_primal_solution_; span initial_dual_solution_; + // QPS-specific data members for quadratic programming support + span Q_objective_; + span Q_objective_indices_; + span Q_objective_offsets_; + }; // class data_model_view_t } // namespace cuopt::mps_parser diff --git a/cpp/libmps_parser/include/mps_parser/mps_data_model.hpp b/cpp/libmps_parser/include/mps_parser/mps_data_model.hpp index 9101b98cf0..9ebc480533 100644 --- a/cpp/libmps_parser/include/mps_parser/mps_data_model.hpp +++ b/cpp/libmps_parser/include/mps_parser/mps_data_model.hpp @@ -251,6 +251,26 @@ class mps_data_model_t { */ void set_initial_dual_solution(const f_t* initial_dual_solution, i_t size); + /** + * @brief Set the quadratic objective matrix (Q) in CSR format for QPS files. + * + * @note This is used for quadratic programming problems where the objective + * function contains quadratic terms: (1/2) * x^T * Q * x + c^T * x + * + * @param[in] Q_values Values of the CSR representation of the quadratic objective matrix + * @param size_values Size of the Q_values array + * @param[in] Q_indices Indices of the CSR representation of the quadratic objective matrix + * @param size_indices Size of the Q_indices array + * @param[in] Q_offsets Offsets of the CSR representation of the quadratic objective matrix + * @param size_offsets Size of the Q_offsets array + */ + void set_quadratic_objective_matrix(const f_t* Q_values, + i_t size_values, + const i_t* Q_indices, + i_t size_indices, + const i_t* Q_offsets, + i_t size_offsets); + i_t get_n_variables() const; i_t get_n_constraints() const; i_t get_nnz() const; @@ -285,6 +305,16 @@ class mps_data_model_t { const std::vector& get_variable_names() const; const std::vector& get_row_names() const; + // QPS-specific getters + const std::vector& get_quadratic_objective_values() const; + std::vector& get_quadratic_objective_values(); + const std::vector& get_quadratic_objective_indices() const; + std::vector& get_quadratic_objective_indices(); + const std::vector& get_quadratic_objective_offsets() const; + std::vector& get_quadratic_objective_offsets(); + + bool has_quadratic_objective() const noexcept; + /** whether to maximize or minimize the objective function */ bool maximize_; /** @@ -333,6 +363,13 @@ class mps_data_model_t { std::vector initial_primal_solution_; /** Initial dual solution */ std::vector initial_dual_solution_; + + // QPS-specific data members for quadratic programming support + /** Quadratic objective matrix in CSR format (for (1/2) * x^T * Q * x term) */ + std::vector Q_objective_; + std::vector Q_objective_indices_; + std::vector Q_objective_offsets_; + }; // class mps_data_model_t } // namespace cuopt::mps_parser diff --git a/cpp/libmps_parser/include/mps_parser/parser.hpp b/cpp/libmps_parser/include/mps_parser/parser.hpp index 9d55aaac0e..fedfca514e 100644 --- a/cpp/libmps_parser/include/mps_parser/parser.hpp +++ b/cpp/libmps_parser/include/mps_parser/parser.hpp @@ -22,14 +22,19 @@ namespace cuopt::mps_parser { /** - * @brief Reads the equation from the input text file which is MPS formatted + * @brief Reads the equation from the input text file which is MPS or QPS formatted * * Read this link http://lpsolve.sourceforge.net/5.5/mps-format.htm for more * details on both free and fixed MPS format. * - * @param[in] mps_file_path Path to MPS formatted file. - * @param[in] fixed_mps_format If MPS file should be parsed as fixed, false by default - * @return mps_data_model_t A fully formed LP problem which represents the given MPS file + * This function supports both standard MPS files (for linear programming) and + * QPS files (for quadratic programming). QPS files are MPS files with additional + * sections: + * - QUADOBJ: Defines quadratic terms in the objective function + * + * @param[in] mps_file_path Path to MPS or QPS formatted file. + * @param[in] fixed_mps_format If MPS/QPS file should be parsed as fixed format, false by default + * @return mps_data_model_t A fully formed LP/QP problem which represents the given MPS/QPS file */ template mps_data_model_t parse_mps(const std::string& mps_file_path, diff --git a/cpp/libmps_parser/src/data_model_view.cpp b/cpp/libmps_parser/src/data_model_view.cpp index 4bca8c0563..efbe1a0f25 100644 --- a/cpp/libmps_parser/src/data_model_view.cpp +++ b/cpp/libmps_parser/src/data_model_view.cpp @@ -152,6 +152,33 @@ void data_model_view_t::set_initial_dual_solution(const f_t* initial_d initial_dual_solution_ = span(initial_dual_solution, size); } +template +void data_model_view_t::set_quadratic_objective_matrix(const f_t* Q_values, + i_t size_values, + const i_t* Q_indices, + i_t size_indices, + const i_t* Q_offsets, + i_t size_offsets) +{ + if (size_values != 0) { + mps_parser_expects( + Q_values != nullptr, error_type_t::ValidationError, "Q_values cannot be null"); + } + Q_objective_ = span(Q_values, size_values); + + if (size_indices != 0) { + mps_parser_expects( + Q_indices != nullptr, error_type_t::ValidationError, "Q_indices cannot be null"); + } + Q_objective_indices_ = span(Q_indices, size_indices); + + mps_parser_expects( + Q_offsets != nullptr, error_type_t::ValidationError, "Q_offsets cannot be null"); + mps_parser_expects( + size_offsets > 0, error_type_t::ValidationError, "size_offsets cannot be empty"); + Q_objective_offsets_ = span(Q_offsets, size_offsets); +} + template void data_model_view_t::set_row_types(const char* row_types, i_t size) { @@ -279,6 +306,31 @@ bool data_model_view_t::get_sense() const noexcept return maximize_; } +// QPS-specific getter implementations +template +span data_model_view_t::get_quadratic_objective_values() const noexcept +{ + return Q_objective_; +} + +template +span data_model_view_t::get_quadratic_objective_indices() const noexcept +{ + return Q_objective_indices_; +} + +template +span data_model_view_t::get_quadratic_objective_offsets() const noexcept +{ + return Q_objective_offsets_; +} + +template +bool data_model_view_t::has_quadratic_objective() const noexcept +{ + return Q_objective_.size() > 0; +} + // NOTE: Explicitly instantiate all types here in order to avoid linker error template class data_model_view_t; diff --git a/cpp/libmps_parser/src/mps_data_model.cpp b/cpp/libmps_parser/src/mps_data_model.cpp index ee92cd38c2..6e756a255a 100644 --- a/cpp/libmps_parser/src/mps_data_model.cpp +++ b/cpp/libmps_parser/src/mps_data_model.cpp @@ -199,6 +199,36 @@ void mps_data_model_t::set_initial_dual_solution(const f_t* initial_du std::copy(initial_dual_solution, initial_dual_solution + size, initial_dual_solution_.data()); } +template +void mps_data_model_t::set_quadratic_objective_matrix(const f_t* Q_values, + i_t size_values, + const i_t* Q_indices, + i_t size_indices, + const i_t* Q_offsets, + i_t size_offsets) +{ + if (size_values != 0) { + mps_parser_expects( + Q_values != nullptr, error_type_t::ValidationError, "Q_values cannot be null"); + } + Q_objective_.resize(size_values); + std::copy(Q_values, Q_values + size_values, Q_objective_.data()); + + if (size_indices != 0) { + mps_parser_expects( + Q_indices != nullptr, error_type_t::ValidationError, "Q_indices cannot be null"); + } + Q_objective_indices_.resize(size_indices); + std::copy(Q_indices, Q_indices + size_indices, Q_objective_indices_.data()); + + mps_parser_expects( + Q_offsets != nullptr, error_type_t::ValidationError, "Q_offsets cannot be null"); + mps_parser_expects( + size_offsets > 0, error_type_t::ValidationError, "size_offsets cannot be empty"); + Q_objective_offsets_.resize(size_offsets); + std::copy(Q_offsets, Q_offsets + size_offsets, Q_objective_offsets_.data()); +} + template const std::vector& mps_data_model_t::get_constraint_matrix_values() const { @@ -397,6 +427,49 @@ i_t mps_data_model_t::get_nnz() const return A_.size(); } +// QPS-specific getter implementations +template +const std::vector& mps_data_model_t::get_quadratic_objective_values() const +{ + return Q_objective_; +} + +template +std::vector& mps_data_model_t::get_quadratic_objective_values() +{ + return Q_objective_; +} + +template +const std::vector& mps_data_model_t::get_quadratic_objective_indices() const +{ + return Q_objective_indices_; +} + +template +std::vector& mps_data_model_t::get_quadratic_objective_indices() +{ + return Q_objective_indices_; +} + +template +const std::vector& mps_data_model_t::get_quadratic_objective_offsets() const +{ + return Q_objective_offsets_; +} + +template +std::vector& mps_data_model_t::get_quadratic_objective_offsets() +{ + return Q_objective_offsets_; +} + +template +bool mps_data_model_t::has_quadratic_objective() const noexcept +{ + return !Q_objective_.empty(); +} + // NOTE: Explicitly instantiate all types here in order to avoid linker error template class mps_data_model_t; diff --git a/cpp/libmps_parser/src/mps_parser.cpp b/cpp/libmps_parser/src/mps_parser.cpp index f88568db9e..a4b4223569 100644 --- a/cpp/libmps_parser/src/mps_parser.cpp +++ b/cpp/libmps_parser/src/mps_parser.cpp @@ -264,6 +264,77 @@ void mps_parser_t::fill_problem(mps_data_model_t& problem) problem.set_variable_types(std::move(var_types)); problem.set_row_names(std::move(row_names)); problem.set_maximize(maximize); + + // Helper function to build CSR format using double transpose (O(m+n+nnz) instead of + // O(nnz*log(nnz))) For QUADOBJ: handles upper triangular input by expanding to full symmetric + // matrix + auto build_csr_via_transpose = [](const std::vector>& entries, + i_t num_rows, + i_t num_cols, + bool is_quadobj = false) { + struct CSRResult { + std::vector values; + std::vector indices; + std::vector offsets; + }; + + if (entries.empty()) { + CSRResult result; + result.offsets.resize(num_rows + 1, 0); + return result; + } + + // First transpose: build CSC format (entries sorted by column) + std::vector>> csc_data(num_cols); + for (const auto& entry : entries) { + i_t row = std::get<0>(entry); + i_t col = std::get<1>(entry); + f_t val = std::get<2>(entry); + + // For QUADOBJ (upper triangular), add both (row,col) and (col,row) if off-diagonal + csc_data[col].emplace_back(row, val); + if (is_quadobj && row != col) { csc_data[row].emplace_back(col, val); } + } + + // Second transpose: convert CSC to CSR (entries sorted by row, columns within rows sorted) + std::vector>> csr_data(num_rows); + for (i_t col = 0; col < num_cols; ++col) { + for (const auto& [row, val] : csc_data[col]) { + csr_data[row].emplace_back(col, val); + } + } + + // Build final CSR format + CSRResult result; + result.offsets.reserve(num_rows + 1); + result.offsets.push_back(0); + + for (i_t row = 0; row < num_rows; ++row) { + for (const auto& [col, val] : csr_data[row]) { + result.values.push_back(val); + result.indices.push_back(col); + } + result.offsets.push_back(result.values.size()); + } + + return result; + }; + + // Process QUADOBJ data if present (upper triangular format) + if (!quadobj_entries.empty()) { + // Convert quadratic objective entries to CSR format using double transpose + // QUADOBJ stores upper triangular elements, so we expand to full symmetric matrix + i_t num_vars = static_cast(var_names.size()); + auto csr_result = build_csr_via_transpose(quadobj_entries, num_vars, num_vars, true); + + // Use optimized double transpose method - O(m+n+nnz) instead of O(nnz*log(nnz)) + problem.set_quadratic_objective_matrix(csr_result.values.data(), + csr_result.values.size(), + csr_result.indices.data(), + csr_result.indices.size(), + csr_result.offsets.data(), + csr_result.offsets.size()); + } } template @@ -426,6 +497,16 @@ void mps_parser_t::parse_string(char* buf) inside_ranges_ = false; inside_objname_ = true; inside_objsense_ = false; + } else if (line.find("QUADOBJ", 0, 7) == 0) { + encountered_sections.insert("QUADOBJ"); + inside_rows_ = false; + inside_columns_ = false; + inside_rhs_ = false; + inside_bounds_ = false; + inside_ranges_ = false; + inside_objname_ = false; + inside_objsense_ = false; + inside_quadobj_ = true; } else if (line.find("ENDATA", 0, 6) == 0) { encountered_sections.insert("ENDATA"); break; @@ -462,6 +543,8 @@ void mps_parser_t::parse_string(char* buf) parse_objsense(line); } else if (inside_objname_) { parse_objname(line); + } else if (inside_quadobj_) { + parse_quadobj(line); } else { mps_parser_expects(false, error_type_t::ValidationError, @@ -981,6 +1064,55 @@ void mps_parser_t::parse_objname(std::string_view line) } } +template +void mps_parser_t::parse_quadobj(std::string_view line) +{ + // Parse QUADOBJ section for quadratic objective terms + // Format: variable1 variable2 value + + std::string var1_name, var2_name; + f_t value; + + if (fixed_mps_format) { + mps_parser_expects(line.size() >= 25, + error_type_t::ValidationError, + "QUADOBJ should have at least 3 entities! line=%s", + std::string(line).c_str()); + + var1_name = std::string(trim(line.substr(4, 8))); // max of 8 chars allowed + var2_name = std::string(trim(line.substr(14, 8))); // max of 8 chars allowed + if (var1_name[0] == '$' || var2_name[0] == '$') return; + + i_t pos = 24; + value = get_numerical_bound(line, pos); + } else { + std::stringstream ss{std::string(line)}; + ss >> var1_name >> var2_name >> value; + if (var1_name[0] == '$' || var2_name[0] == '$') return; + } + + // Find variable indices + auto var1_it = var_names_map.find(var1_name); + auto var2_it = var_names_map.find(var2_name); + + mps_parser_expects(var1_it != var_names_map.end(), + error_type_t::ValidationError, + "Variable '%s' not found in QUADOBJ! line=%s", + var1_name.c_str(), + std::string(line).c_str()); + mps_parser_expects(var2_it != var_names_map.end(), + error_type_t::ValidationError, + "Variable '%s' not found in QUADOBJ! line=%s", + var2_name.c_str(), + std::string(line).c_str()); + + i_t var1_id = var1_it->second; + i_t var2_id = var2_it->second; + + // Store quadratic objective entry (QUADOBJ stores upper triangular elements only) + quadobj_entries.emplace_back(var1_id, var2_id, value); +} + template template f_t mps_parser_t::get_numerical_bound(std::string_view line, i_t& start) diff --git a/cpp/libmps_parser/src/mps_parser.hpp b/cpp/libmps_parser/src/mps_parser.hpp index afaf470f31..e06b2bbe63 100644 --- a/cpp/libmps_parser/src/mps_parser.hpp +++ b/cpp/libmps_parser/src/mps_parser.hpp @@ -123,6 +123,10 @@ class mps_parser_t { /** Objection function sense (maximize of minimize) */ bool maximize{false}; + // QPS-specific data for quadratic programming + /** Quadratic objective matrix entries */ + std::vector> quadobj_entries{}; + private: bool inside_rows_{false}; bool inside_columns_{false}; @@ -132,6 +136,8 @@ class mps_parser_t { bool inside_objsense_{false}; bool inside_intcapture_{false}; bool inside_objname_{false}; + // QPS-specific parsing states + bool inside_quadobj_{false}; std::unordered_set encountered_sections{}; std::unordered_map row_names_map{}; std::unordered_map var_names_map{}; @@ -169,6 +175,9 @@ class mps_parser_t { void parse_ranges(std::string_view line); i_t insert_range_value(std::string_view line, bool skip_range = true); + // QPS-specific parsing methods + void parse_quadobj(std::string_view line); + }; // class mps_parser_t } // namespace cuopt::mps_parser diff --git a/cpp/libmps_parser/tests/mps_parser_test.cpp b/cpp/libmps_parser/tests/mps_parser_test.cpp index b70e8ff290..1e7f218c82 100644 --- a/cpp/libmps_parser/tests/mps_parser_test.cpp +++ b/cpp/libmps_parser/tests/mps_parser_test.cpp @@ -757,4 +757,73 @@ TEST(mps_parser, good_mps_file_partial_bounds) EXPECT_EQ(10.0, mps.variable_upper_bounds[1]); } +// ================================================================================================ +// QPS (Quadratic Programming) Support Tests +// ================================================================================================ + +// QPS-specific tests for quadratic programming support +TEST(qps_parser, quadratic_objective_basic) +{ + // Create a simple QPS test to verify quadratic objective parsing + // This would require actual QPS test files - for now, test the API + mps_data_model_t model; + + // Test setting quadratic objective matrix + std::vector Q_values = {2.0, 1.0, 1.0, 2.0}; // 2x2 matrix + std::vector Q_indices = {0, 1, 0, 1}; + std::vector Q_offsets = {0, 2, 4}; // CSR offsets + + model.set_quadratic_objective_matrix(Q_values.data(), + Q_values.size(), + Q_indices.data(), + Q_indices.size(), + Q_offsets.data(), + Q_offsets.size()); + + // Verify the data was stored correctly + EXPECT_TRUE(model.has_quadratic_objective()); + EXPECT_EQ(4, model.get_quadratic_objective_values().size()); + EXPECT_EQ(2.0, model.get_quadratic_objective_values()[0]); + EXPECT_EQ(1.0, model.get_quadratic_objective_values()[1]); +} + +// Test actual QPS files from the dataset +TEST(qps_parser, test_qps_files) +{ + // Test QP_Test_1.qps if it exists + if (file_exists("quadratic_programming/QP_Test_1.qps")) { + auto parsed_data = parse_mps( + cuopt::test::get_rapids_dataset_root_dir() + "/quadratic_programming/QP_Test_1.qps", false); + + EXPECT_EQ("QP_Test_1", parsed_data.get_problem_name()); + EXPECT_EQ(2, parsed_data.get_n_variables()); // C------1 and C------2 + EXPECT_EQ(1, parsed_data.get_n_constraints()); // R------1 + EXPECT_TRUE(parsed_data.has_quadratic_objective()); + + // Check variable bounds + const auto& lower_bounds = parsed_data.get_variable_lower_bounds(); + const auto& upper_bounds = parsed_data.get_variable_upper_bounds(); + + EXPECT_NEAR(2.0, lower_bounds[0], tolerance); // C------1 lower bound + EXPECT_NEAR(50.0, upper_bounds[0], tolerance); // C------1 upper bound + EXPECT_NEAR(-50.0, lower_bounds[1], tolerance); // C------2 lower bound + EXPECT_NEAR(50.0, upper_bounds[1], tolerance); // C------2 upper bound + } + + // Test QP_Test_2.qps if it exists + if (file_exists("quadratic_programming/QP_Test_2.qps")) { + auto parsed_data = parse_mps( + cuopt::test::get_rapids_dataset_root_dir() + "/quadratic_programming/QP_Test_2.qps", false); + + EXPECT_EQ("QP_Test_2", parsed_data.get_problem_name()); + EXPECT_EQ(3, parsed_data.get_n_variables()); // C------1, C------2, C------3 + EXPECT_EQ(1, parsed_data.get_n_constraints()); // R------1 + EXPECT_TRUE(parsed_data.has_quadratic_objective()); + + // Check that quadratic objective matrix has values + const auto& Q_values = parsed_data.get_quadratic_objective_values(); + EXPECT_GT(Q_values.size(), 0) << "Quadratic objective should have non-zero elements"; + } +} + } // namespace cuopt::mps_parser diff --git a/datasets/quadratic_programming/QP_Test_1.qps b/datasets/quadratic_programming/QP_Test_1.qps new file mode 100644 index 0000000000..e9fa9c9024 --- /dev/null +++ b/datasets/quadratic_programming/QP_Test_1.qps @@ -0,0 +1,20 @@ +NAME QP_Test_1 +ROWS + N OBJ.FUNC + G R------1 +COLUMNS + C------1 R------1 0.100000e+02 + C------2 R------1 -.100000e+01 +RHS + RHS OBJ.FUNC 0.100000e+03 + RHS R------1 0.100000e+02 +RANGES +BOUNDS + LO BOUNDS C------1 0.200000e+01 + UP BOUNDS C------1 0.500000e+02 + LO BOUNDS C------2 -.500000e+02 + UP BOUNDS C------2 0.500000e+02 +QUADOBJ + C------1 C------1 0.200000e-01 + C------2 C------2 0.200000e+01 +ENDATA diff --git a/datasets/quadratic_programming/QP_Test_2.qps b/datasets/quadratic_programming/QP_Test_2.qps new file mode 100644 index 0000000000..fe07c33258 --- /dev/null +++ b/datasets/quadratic_programming/QP_Test_2.qps @@ -0,0 +1,20 @@ +NAME QP_Test_2 +ROWS + N OBJ.FUNC + G R------1 +COLUMNS + C------1 OBJ.FUNC -.800000e+01 R------1 -.100000e+01 + C------2 OBJ.FUNC -.600000e+01 R------1 -.100000e+01 + C------3 OBJ.FUNC -.400000e+01 R------1 -.200000e+01 +RHS + RHS OBJ.FUNC -.900000e+01 + RHS R------1 -.300000e+01 +RANGES +BOUNDS +QUADOBJ + C------1 C------1 0.400000e+01 + C------1 C------2 0.200000e+01 + C------1 C------3 0.200000e+01 + C------2 C------2 0.400000e+01 + C------3 C------3 0.200000e+01 +ENDATA From 1e716586f1d5daddbcd12b61506f28ab7de2e3e8 Mon Sep 17 00:00:00 2001 From: Nicolas Blin <31096601+Kh4ster@users.noreply.github.com> Date: Wed, 3 Sep 2025 16:48:44 +0200 Subject: [PATCH 09/33] Faster engine compile time (#316) This PR aims at making the compile time faster by adding several options: - build lp only (extracted MIP files necessary in LP), also meant we needed a skip c and python adapter option - put back no fetch rapids - put back not building test option - added skip routing - added skip write of fatbin - adding ccache to libcuopt target - added two benchmarking targets, one for LP and for MIP Authors: - Nicolas Blin (https://github.com/Kh4ster) Approvers: - Alice Boucher (https://github.com/aliceb-nv) - Rajesh Gandham (https://github.com/rg20) - Ramakrishnap (https://github.com/rgsl888prabhu) URL: https://github.com/NVIDIA/cuopt/pull/316 --- .../linear_programming/cuopt/run_mip.cpp | 2 +- .../linear_programming/cuopt/run_pdlp.cu | 2 +- .../utils/benchmark_lp_mittelmann.sh | 4 +- build.sh | 44 ++++++- cpp/CMakeLists.txt | 109 +++++++++++------- cpp/libmps_parser/CMakeLists.txt | 20 ++-- cpp/src/CMakeLists.txt | 7 +- cpp/src/linear_programming/CMakeLists.txt | 16 ++- cpp/src/mip/CMakeLists.txt | 29 +++-- cpp/tests/CMakeLists.txt | 63 +++++----- 10 files changed, 203 insertions(+), 93 deletions(-) diff --git a/benchmarks/linear_programming/cuopt/run_mip.cpp b/benchmarks/linear_programming/cuopt/run_mip.cpp index 713d55f16b..e3e8641b4f 100644 --- a/benchmarks/linear_programming/cuopt/run_mip.cpp +++ b/benchmarks/linear_programming/cuopt/run_mip.cpp @@ -292,7 +292,7 @@ void return_gpu_to_the_queue(std::unordered_map& pid_gpu_map, int main(int argc, char* argv[]) { - argparse::ArgumentParser program("solve_mps_file"); + argparse::ArgumentParser program("solve_MIP"); // Define all arguments with appropriate defaults and help messages program.add_argument("--path").help("input path").required(); diff --git a/benchmarks/linear_programming/cuopt/run_pdlp.cu b/benchmarks/linear_programming/cuopt/run_pdlp.cu index e4fad3c266..396ff6fbd1 100644 --- a/benchmarks/linear_programming/cuopt/run_pdlp.cu +++ b/benchmarks/linear_programming/cuopt/run_pdlp.cu @@ -113,7 +113,7 @@ static cuopt::linear_programming::pdlp_solver_settings_t create_sol int main(int argc, char* argv[]) { // Parse binary arguments - argparse::ArgumentParser program("solve_mps_file"); + argparse::ArgumentParser program("solve_LP"); parse_arguments(program); try { diff --git a/benchmarks/linear_programming/utils/benchmark_lp_mittelmann.sh b/benchmarks/linear_programming/utils/benchmark_lp_mittelmann.sh index bf561fceba..0f2b475cc8 100755 --- a/benchmarks/linear_programming/utils/benchmark_lp_mittelmann.sh +++ b/benchmarks/linear_programming/utils/benchmark_lp_mittelmann.sh @@ -30,12 +30,12 @@ echo "Download done" # EAGER module loading to simulate real-life condition export CUDA_MODULE_LOADING=EAGER -# Benchmark all instances (cuOpt needs to be compiled first) +# Benchmark all instances (cuOpt needs to be compiled first, you can compile in LP only mode and you should turn on BUILD_LP_BENCHMARKS) for instance in ${CUOPT_HOME}/benchmarks/linear_programming/datasets/*/ ; do # Will generate the solver log for each instance. Could addtionally generate the solution file by uncommenting the --solution-path instance_name=$(basename $instance) echo "Parsing ${instance_name}.mps then solving" - ${CUOPT_HOME}/cpp/build/solve_MPS_file --path ${CUOPT_HOME}/benchmarks/linear_programming/datasets/${instance_name}/${instance_name}.mps --time-limit 3600 # --solution-path $CUOPT_HOME/benchmarks/linear_programming/datasets/$instance.sol + ${CUOPT_HOME}/cpp/build/solve_LP --path ${CUOPT_HOME}/benchmarks/linear_programming/datasets/${instance_name}/${instance_name}.mps --time-limit 3600 # --solution-path $CUOPT_HOME/benchmarks/linear_programming/datasets/$instance.sol done echo "Benchmark done" diff --git a/build.sh b/build.sh index 0d376c5380..15367eb3ee 100755 --- a/build.sh +++ b/build.sh @@ -27,7 +27,7 @@ REPODIR=$(cd "$(dirname "$0")"; pwd) LIBCUOPT_BUILD_DIR=${LIBCUOPT_BUILD_DIR:=${REPODIR}/cpp/build} LIBMPS_PARSER_BUILD_DIR=${LIBMPS_PARSER_BUILD_DIR:=${REPODIR}/cpp/libmps_parser/build} -VALIDARGS="clean libcuopt libmps_parser cuopt_mps_parser cuopt cuopt_server cuopt_sh_client docs deb -a -b -g -v -l= --verbose-pdlp [--cmake-args=\\\"\\\"] [--cache-tool=] -n --allgpuarch --ci-only-arch --show_depr_warn -h --help" +VALIDARGS="clean libcuopt libmps_parser cuopt_mps_parser cuopt cuopt_server cuopt_sh_client docs deb -a -b -g -v -l= --verbose-pdlp --build-lp-only --no-fetch-rapids --skip-c-python-adapters --skip-tests-build --skip-routing-build --skip-fatbin-write [--cmake-args=\\\"\\\"] [--cache-tool=] -n --allgpuarch --ci-only-arch --show_depr_warn -h --help" HELP="$0 [ ...] [ ...] where is: clean - remove all existing build artifacts and configuration (start over) @@ -45,8 +45,14 @@ HELP="$0 [ ...] [ ...] -a - Enable assertion (by default in debug mode) -b - Build with benchmark settings -n - no install step + --no-fetch-rapids - don't fetch rapids dependencies -l= - log level. Options are: TRACE | DEBUG | INFO | WARN | ERROR | CRITICAL | OFF. Default=INFO --verbose-pdlp - verbose mode for pdlp solver + --build-lp-only - build only linear programming components, excluding routing package and MIP-specific files + --skip-c-python-adapters - skip building C and Python adapter files (cython_solve.cu and cuopt_c.cpp) + --skip-tests-build - disable building of all tests + --skip-routing-build - skip building routing components + --skip-fatbin-write - skip the fatbin write --cache-tool= - pass the build cache tool (eg: ccache, sccache, distcc) that will be used to speedup the build process. --cmake-args=\\\"\\\" - pass arbitrary list of CMake configuration options (escape all quotes in argument) @@ -78,6 +84,11 @@ INSTALL_TARGET=install BUILD_DISABLE_DEPRECATION_WARNING=ON BUILD_ALL_GPU_ARCH=0 BUILD_CI_ONLY=0 +BUILD_LP_ONLY=0 +SKIP_C_PYTHON_ADAPTERS=0 +SKIP_TESTS_BUILD=0 +SKIP_ROUTING_BUILD=0 +WRITE_FATBIN=1 CACHE_ARGS=() PYTHON_ARGS_FOR_INSTALL=("-m" "pip" "install" "--no-build-isolation" "--no-deps") LOGGING_ACTIVE_LEVEL="INFO" @@ -208,6 +219,9 @@ fi if hasArg -n; then INSTALL_TARGET="" fi +if hasArg --no-fetch-rapids; then + FETCH_RAPIDS=OFF +fi if hasArg --allgpuarch; then BUILD_ALL_GPU_ARCH=1 fi @@ -217,6 +231,22 @@ fi if hasArg --show_depr_warn; then BUILD_DISABLE_DEPRECATION_WARNING=OFF fi +if hasArg --build-lp-only; then + BUILD_LP_ONLY=1 + SKIP_ROUTING_BUILD=1 # Automatically skip routing when building LP-only +fi +if hasArg --skip-c-python-adapters; then + SKIP_C_PYTHON_ADAPTERS=1 +fi +if hasArg --skip-tests-build; then + SKIP_TESTS_BUILD=1 +fi +if hasArg --skip-routing-build; then + SKIP_ROUTING_BUILD=1 +fi +if hasArg --skip-fatbin-write; then + WRITE_FATBIN=0 +fi function contains_string { local search_string="$1" @@ -264,6 +294,12 @@ if [ ${BUILD_CI_ONLY} -eq 1 ] && [ ${BUILD_ALL_GPU_ARCH} -eq 1 ]; then exit 1 fi +if [ ${BUILD_LP_ONLY} -eq 1 ] && [ ${SKIP_C_PYTHON_ADAPTERS} -eq 0 ]; then + echo "ERROR: When using --build-lp-only, you must also specify --skip-c-python-adapters" + echo "The C and Python adapter files (cython_solve.cu and cuopt_c.cpp) are not compatible with LP-only builds" + exit 1 +fi + if [ ${BUILD_ALL_GPU_ARCH} -eq 1 ]; then CUOPT_CMAKE_CUDA_ARCHITECTURES="RAPIDS" echo "Building for *ALL* supported GPU architectures..." @@ -308,6 +344,12 @@ if buildAll || hasArg libcuopt; then -DDISABLE_DEPRECATION_WARNING=${BUILD_DISABLE_DEPRECATION_WARNING} \ -DCMAKE_BUILD_TYPE=${BUILD_TYPE} \ -DFETCH_RAPIDS=${FETCH_RAPIDS} \ + -DBUILD_LP_ONLY=${BUILD_LP_ONLY} \ + -DSKIP_C_PYTHON_ADAPTERS=${SKIP_C_PYTHON_ADAPTERS} \ + -DBUILD_TESTS=$((1 - ${SKIP_TESTS_BUILD})) \ + -DSKIP_ROUTING_BUILD=${SKIP_ROUTING_BUILD} \ + -DWRITE_FATBIN=${WRITE_FATBIN} \ + "${CACHE_ARGS[@]}" \ "${EXTRA_CMAKE_ARGS[@]}" \ "${REPODIR}"/cpp if hasArg -n; then diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 76c856e0e5..919064b872 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -52,11 +52,19 @@ option(CMAKE_CUDA_LINEINFO "Enable the -lineinfo option for nvcc useful for cuda option(BUILD_TESTS "Configure CMake to build tests" ON) option(DISABLE_OPENMP "Disable OpenMP" OFF) option(CUDA_STATIC_RUNTIME "Statically link the CUDA toolkit runtime and libraries" OFF) +option(BUILD_LP_ONLY "Build only linear programming components, exclude routing and MIP-specific files" OFF) +option(SKIP_C_PYTHON_ADAPTERS "Skip building C and Python adapter files (cython_solve.cu and cuopt_c.cpp)" OFF) +option(SKIP_ROUTING_BUILD "Skip building routing components" OFF) +option(WRITE_FATBIN "Enable fatbin writing" ON) message(VERBOSE "cuOpt: Enable nvcc -lineinfo: ${CMAKE_CUDA_LINEINFO}") message(VERBOSE "cuOpt: Build cuOpt unit-tests: ${BUILD_TESTS}") message(VERBOSE "cuOpt: Build cuOpt multigpu tests: ${BUILD_TESTS}") message(VERBOSE "cuOpt: Disable OpenMP: ${DISABLE_OPENMP}") +message(VERBOSE "cuOpt: Build LP-only mode: ${BUILD_LP_ONLY}") +message(VERBOSE "cuOpt: Skip C/Python adapters: ${SKIP_C_PYTHON_ADAPTERS}") +message(VERBOSE "cuOpt: Skip routing build: ${SKIP_ROUTING_BUILD}") +message(VERBOSE "cuOpt: fatbin: ${WRITE_FATBIN}") # ################################################################################################## # - compiler options ------------------------------------------------------------------------------ @@ -209,15 +217,17 @@ target_compile_options(cuopt "$<$:${CUOPT_CUDA_FLAGS}>" ) -file(WRITE "${CUOPT_BINARY_DIR}/fatbin.ld" - [=[ -SECTIONS -{ - .nvFatBinSegment : { *(.nvFatBinSegment) } - .nv_fatbin : { *(.nv_fatbin) } -} -]=]) -target_link_options(cuopt PRIVATE "${CUOPT_BINARY_DIR}/fatbin.ld") +if(WRITE_FATBIN) + file(WRITE "${CUOPT_BINARY_DIR}/fatbin.ld" + [=[ + SECTIONS + { + .nvFatBinSegment : { *(.nvFatBinSegment) } + .nv_fatbin : { *(.nv_fatbin) } + } + ]=]) + target_link_options(cuopt PRIVATE "${CUOPT_BINARY_DIR}/fatbin.ld") +endif() add_library(cuopt::cuopt ALIAS cuopt) # ################################################################################################## @@ -361,44 +371,63 @@ if(Doxygen_FOUND) endif() -add_executable(cuopt_cli cuopt_cli.cpp) -target_compile_options(cuopt_cli - PRIVATE "$<$:${CUOPT_CXX_FLAGS}>" - "$<$:${CUOPT_CUDA_FLAGS}>" -) +if(NOT BUILD_LP_ONLY) + add_executable(cuopt_cli cuopt_cli.cpp) + target_compile_options(cuopt_cli + PRIVATE "$<$:${CUOPT_CXX_FLAGS}>" + "$<$:${CUOPT_CUDA_FLAGS}>" + ) -target_include_directories(cuopt_cli - PRIVATE - "${CMAKE_CURRENT_SOURCE_DIR}/src" - PUBLIC - "$" - "$" - "$" -) + target_include_directories(cuopt_cli + PRIVATE + "${CMAKE_CURRENT_SOURCE_DIR}/src" + PUBLIC + "$" + "$" + "$" + ) -target_link_libraries(cuopt_cli - PUBLIC - cuopt - OpenMP::OpenMP_CXX - PRIVATE - papilo-core -) -set_property(TARGET cuopt_cli PROPERTY INSTALL_RPATH "$ORIGIN/../${lib_dir}") + target_link_libraries(cuopt_cli + PUBLIC + cuopt + OpenMP::OpenMP_CXX + PRIVATE + papilo-core + ) + set_property(TARGET cuopt_cli PROPERTY INSTALL_RPATH "$ORIGIN/../${lib_dir}") -# adds the cuopt_cli executable to the runtime deb package -install(TARGETS cuopt_cli - COMPONENT runtime - RUNTIME DESTINATION ${_BIN_DEST} -) + # adds the cuopt_cli executable to the runtime deb package + install(TARGETS cuopt_cli + COMPONENT runtime + RUNTIME DESTINATION ${_BIN_DEST} + ) +endif() + + +option(BUILD_MIP_BENCHMARKS "Build MIP benchmarks" OFF) +if(BUILD_MIP_BENCHMARKS AND NOT BUILD_LP_ONLY) + add_executable(solve_MIP ../benchmarks/linear_programming/cuopt/run_mip.cpp) + target_compile_options(solve_MIP + PRIVATE "$<$:${CUOPT_CXX_FLAGS}>" + "$<$:${CUOPT_CUDA_FLAGS}>" + ) + target_link_libraries(solve_MIP + PUBLIC + cuopt + OpenMP::OpenMP_CXX + PRIVATE + papilo-core + ) +endif() -option(BUILD_BENCHMARKS "Build benchmarks" ON) -if(BUILD_BENCHMARKS) - add_executable(solve_MPS_file ../benchmarks/linear_programming/cuopt/run_mip.cpp) - target_compile_options(solve_MPS_file +option(BUILD_LP_BENCHMARKS "Build LP benchmarks" OFF) +if(BUILD_LP_BENCHMARKS) + add_executable(solve_LP ../benchmarks/linear_programming/cuopt/run_pdlp.cu) + target_compile_options(solve_LP PRIVATE "$<$:${CUOPT_CXX_FLAGS}>" "$<$:${CUOPT_CUDA_FLAGS}>" ) - target_link_libraries(solve_MPS_file + target_link_libraries(solve_LP PUBLIC cuopt OpenMP::OpenMP_CXX diff --git a/cpp/libmps_parser/CMakeLists.txt b/cpp/libmps_parser/CMakeLists.txt index 33f0a7b581..0c0ead6ce3 100644 --- a/cpp/libmps_parser/CMakeLists.txt +++ b/cpp/libmps_parser/CMakeLists.txt @@ -84,15 +84,17 @@ target_compile_options(mps_parser PRIVATE "$<$:${MPS_PARSER_CXX_FLAGS}>" ) -file(WRITE "${MPS_PARSER_BINARY_DIR}/fatbin.ld" - [=[ -SECTIONS -{ - .nvFatBinSegment : { *(.nvFatBinSegment) } - .nv_fatbin : { *(.nv_fatbin) } -} -]=]) -target_link_options(mps_parser PRIVATE "${MPS_PARSER_BINARY_DIR}/fatbin.ld") +if(WRITE_FATBIN) + file(WRITE "${MPS_PARSER_BINARY_DIR}/fatbin.ld" + [=[ + SECTIONS + { + .nvFatBinSegment : { *(.nvFatBinSegment) } + .nv_fatbin : { *(.nv_fatbin) } + } + ]=]) + target_link_options(mps_parser PRIVATE "${MPS_PARSER_BINARY_DIR}/fatbin.ld") +endif() add_library(cuopt::mps_parser ALIAS mps_parser) diff --git a/cpp/src/CMakeLists.txt b/cpp/src/CMakeLists.txt index 18d83f393b..1ffa94fc0d 100644 --- a/cpp/src/CMakeLists.txt +++ b/cpp/src/CMakeLists.txt @@ -19,7 +19,12 @@ set(UTIL_SRC_FILES ${CMAKE_CURRENT_SOURCE_DIR}/utilities/seed_generator.cu add_subdirectory(linear_programming) add_subdirectory(math_optimization) add_subdirectory(mip) -add_subdirectory(routing) + +# Only build routing for full builds, not LP-only builds +if(NOT SKIP_ROUTING_BUILD) + add_subdirectory(routing) +endif() + add_subdirectory(dual_simplex) set(CUOPT_SRC_FILES ${CUOPT_SRC_FILES} ${UTIL_SRC_FILES} PARENT_SCOPE) diff --git a/cpp/src/linear_programming/CMakeLists.txt b/cpp/src/linear_programming/CMakeLists.txt index 45fa591341..e2deef2e40 100644 --- a/cpp/src/linear_programming/CMakeLists.txt +++ b/cpp/src/linear_programming/CMakeLists.txt @@ -13,10 +13,10 @@ # See the License for the specific language governing permissions and # limitations under the License. -set(LP_SRC_FILES +# Core LP files always included +set(LP_CORE_FILES ${CMAKE_CURRENT_SOURCE_DIR}/solver_settings.cu ${CMAKE_CURRENT_SOURCE_DIR}/optimization_problem.cu - ${CMAKE_CURRENT_SOURCE_DIR}/utilities/cython_solve.cu ${CMAKE_CURRENT_SOURCE_DIR}/utilities/problem_checking.cu ${CMAKE_CURRENT_SOURCE_DIR}/solve.cu ${CMAKE_CURRENT_SOURCE_DIR}/pdlp.cu @@ -34,7 +34,19 @@ set(LP_SRC_FILES ${CMAKE_CURRENT_SOURCE_DIR}/termination_strategy/termination_strategy.cu ${CMAKE_CURRENT_SOURCE_DIR}/termination_strategy/infeasibility_information.cu ${CMAKE_CURRENT_SOURCE_DIR}/termination_strategy/convergence_information.cu +) + +# C and Python adapter files +set(LP_ADAPTER_FILES + ${CMAKE_CURRENT_SOURCE_DIR}/utilities/cython_solve.cu ${CMAKE_CURRENT_SOURCE_DIR}/cuopt_c.cpp ) +# Choose which files to include based on build mode +if(SKIP_C_PYTHON_ADAPTERS) + set(LP_SRC_FILES ${LP_CORE_FILES}) +else() + set(LP_SRC_FILES ${LP_CORE_FILES} ${LP_ADAPTER_FILES}) +endif() + set(CUOPT_SRC_FILES ${CUOPT_SRC_FILES} ${LP_SRC_FILES} PARENT_SCOPE) diff --git a/cpp/src/mip/CMakeLists.txt b/cpp/src/mip/CMakeLists.txt index 43c0072802..8f859e2d0d 100644 --- a/cpp/src/mip/CMakeLists.txt +++ b/cpp/src/mip/CMakeLists.txt @@ -13,15 +13,21 @@ # See the License for the specific language governing permissions and # limitations under the License. -set(MIP_SRC_FILES ${CMAKE_CURRENT_SOURCE_DIR}/problem/problem.cu) - -list(PREPEND - MIP_SRC_FILES +# Files necessary for Linear Programming functionality +set(MIP_LP_NECESSARY_FILES + ${CMAKE_CURRENT_SOURCE_DIR}/problem/problem.cu + ${CMAKE_CURRENT_SOURCE_DIR}/solver_settings.cu + ${CMAKE_CURRENT_SOURCE_DIR}/solver_solution.cu ${CMAKE_CURRENT_SOURCE_DIR}/problem/write_mps.cu + ${CMAKE_CURRENT_SOURCE_DIR}/local_search/rounding/simple_rounding.cu + ${CMAKE_CURRENT_SOURCE_DIR}/presolve/third_party_presolve.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/solution/solution.cu +) + +# Files that are MIP-specific and not needed for pure LP +set(MIP_NON_LP_FILES ${CMAKE_CURRENT_SOURCE_DIR}/solve.cu ${CMAKE_CURRENT_SOURCE_DIR}/solver.cu - ${CMAKE_CURRENT_SOURCE_DIR}/solver_settings.cu - ${CMAKE_CURRENT_SOURCE_DIR}/solver_solution.cu ${CMAKE_CURRENT_SOURCE_DIR}/diversity/assignment_hash_map.cu ${CMAKE_CURRENT_SOURCE_DIR}/diversity/diversity_manager.cu ${CMAKE_CURRENT_SOURCE_DIR}/diversity/multi_armed_bandit.cu @@ -32,7 +38,6 @@ list(PREPEND ${CMAKE_CURRENT_SOURCE_DIR}/local_search/rounding/constraint_prop.cu ${CMAKE_CURRENT_SOURCE_DIR}/local_search/rounding/lb_bounds_repair.cu ${CMAKE_CURRENT_SOURCE_DIR}/local_search/rounding/lb_constraint_prop.cu - ${CMAKE_CURRENT_SOURCE_DIR}/local_search/rounding/simple_rounding.cu ${CMAKE_CURRENT_SOURCE_DIR}/local_search/feasibility_pump/feasibility_pump.cu ${CMAKE_CURRENT_SOURCE_DIR}/local_search/line_segment_search/line_segment_search.cu ${CMAKE_CURRENT_SOURCE_DIR}/presolve/bounds_presolve.cu @@ -42,12 +47,18 @@ list(PREPEND ${CMAKE_CURRENT_SOURCE_DIR}/presolve/load_balanced_bounds_presolve.cu ${CMAKE_CURRENT_SOURCE_DIR}/presolve/multi_probe.cu ${CMAKE_CURRENT_SOURCE_DIR}/presolve/probing_cache.cu - ${CMAKE_CURRENT_SOURCE_DIR}/presolve/third_party_presolve.cpp ${CMAKE_CURRENT_SOURCE_DIR}/presolve/trivial_presolve.cu ${CMAKE_CURRENT_SOURCE_DIR}/problem/load_balanced_problem.cu ${CMAKE_CURRENT_SOURCE_DIR}/feasibility_jump/feasibility_jump.cu ${CMAKE_CURRENT_SOURCE_DIR}/feasibility_jump/feasibility_jump_kernels.cu - ${CMAKE_CURRENT_SOURCE_DIR}/solution/solution.cu) +) + +# Choose which files to include based on build mode +if(BUILD_LP_ONLY) + set(MIP_SRC_FILES ${MIP_LP_NECESSARY_FILES}) +else() + set(MIP_SRC_FILES ${MIP_LP_NECESSARY_FILES} ${MIP_NON_LP_FILES}) +endif() set(CUOPT_SRC_FILES ${CUOPT_SRC_FILES} ${MIP_SRC_FILES} PARENT_SCOPE) diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 6e413319b5..691b6992ef 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -13,28 +13,31 @@ # See the License for the specific language governing permissions and # limitations under the License. -add_library(cuopttestutils STATIC - routing/utilities/check_constraints.cu -) +if(BUILD_TESTS) + add_library(cuopttestutils STATIC + routing/utilities/check_constraints.cu + ) -target_compile_options(cuopttestutils - PUBLIC "$<$:${CUOPT_CXX_FLAGS}>" - "$:${CUOPT_CUDA_FLAGS}>>" -) + target_compile_options(cuopttestutils + PUBLIC "$<$:${CUOPT_CXX_FLAGS}>" + "$:${CUOPT_CUDA_FLAGS}>>" + ) -target_include_directories(cuopttestutils - PRIVATE - "${CMAKE_CURRENT_SOURCE_DIR}/../src" - "${CMAKE_CURRENT_SOURCE_DIR}" -) + target_include_directories(cuopttestutils + PRIVATE + "${CMAKE_CURRENT_SOURCE_DIR}/../src" + "${CMAKE_CURRENT_SOURCE_DIR}" + ) + + target_link_libraries(cuopttestutils + PUBLIC + cuopt + GTest::gmock + GTest::gtest + papilo-core + ) +endif() -target_link_libraries(cuopttestutils - PUBLIC - cuopt - GTest::gmock - GTest::gtest - papilo-core -) set(CUOPT_TEST_DIR ${CMAKE_CURRENT_SOURCE_DIR}) @@ -82,12 +85,18 @@ if(RAPIDS_DATASET_ROOT_DIR) endif(RAPIDS_DATASET_ROOT_DIR) # ## test sources -add_subdirectory(routing) -add_subdirectory(linear_programming) -add_subdirectory(distance_engine) -add_subdirectory(mip) -add_subdirectory(dual_simplex) -add_subdirectory(examples) -add_subdirectory(utilities) +if(BUILD_TESTS) + if(NOT SKIP_ROUTING_BUILD) + add_subdirectory(routing) + add_subdirectory(distance_engine) + add_subdirectory(examples) + endif() + if(NOT BUILD_LP_ONLY) + add_subdirectory(mip) + endif() + add_subdirectory(linear_programming) + add_subdirectory(dual_simplex) + add_subdirectory(utilities) -enable_testing() + enable_testing() +endif() From f13ea6f597b37f8f15e99e640372852acbda9195 Mon Sep 17 00:00:00 2001 From: Clement Courbet Date: Wed, 3 Sep 2025 17:16:52 +0200 Subject: [PATCH 10/33] Fix out-of-bound access in `clean_up_infeasibilities`. (#346) This OOB access happens when the last element is the one being removed: the second access to `infeasibility_indices[k]` after the `pop_back` is invalid. This is another example for issue #150. Authors: - Clement Courbet (https://github.com/legrosbuffle) - Ramakrishnap (https://github.com/rgsl888prabhu) Approvers: - Rajesh Gandham (https://github.com/rg20) URL: https://github.com/NVIDIA/cuopt/pull/346 --- cpp/src/dual_simplex/phase2.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/cpp/src/dual_simplex/phase2.cpp b/cpp/src/dual_simplex/phase2.cpp index 76f4768abd..7383f42216 100644 --- a/cpp/src/dual_simplex/phase2.cpp +++ b/cpp/src/dual_simplex/phase2.cpp @@ -752,10 +752,9 @@ void clean_up_infeasibilities(std::vector& squared_infeasibilities, const f_t squared_infeas = squared_infeasibilities[j]; if (squared_infeas == 0.0) { // Set to the last element - const i_t sz = infeasibility_indices.size(); - infeasibility_indices[k] = infeasibility_indices[sz - 1]; + const i_t new_j = infeasibility_indices.back(); + infeasibility_indices[k] = new_j; infeasibility_indices.pop_back(); - i_t new_j = infeasibility_indices[k]; if (squared_infeasibilities[new_j] == 0.0) { k--; } } } From c2b34c84ebfd51f8d56eb2fb7fd2a115570d8219 Mon Sep 17 00:00:00 2001 From: ahehn-nv Date: Wed, 3 Sep 2025 19:30:36 +0200 Subject: [PATCH 11/33] Decompression for .mps.gz and .mps.bz2 files (#357) Adding zlib and bzip2 decompression to mps_parser, such that .mps.gz and .mps.bz2 files can be opened directly. Authors: - https://github.com/ahehn-nv - Ramakrishnap (https://github.com/rgsl888prabhu) Approvers: - Kyle Edwards (https://github.com/KyleFromNVIDIA) - Nicolas Blin (https://github.com/Kh4ster) - Alice Boucher (https://github.com/aliceb-nv) - Ishika Roy (https://github.com/Iroy30) URL: https://github.com/NVIDIA/cuopt/pull/357 --- ci/build_wheel_cuopt_mps_parser.sh | 8 +- .../all_cuda-129_arch-aarch64.yaml | 2 + .../all_cuda-129_arch-x86_64.yaml | 2 + conda/recipes/libcuopt/recipe.yaml | 14 ++ conda/recipes/mps-parser/recipe.yaml | 1 + cpp/libmps_parser/CMakeLists.txt | 20 ++ .../include/mps_parser/parser.hpp | 15 +- cpp/libmps_parser/src/mps_parser.cpp | 196 ++++++++++++++++++ cpp/libmps_parser/src/mps_parser.hpp | 6 +- cpp/libmps_parser/src/utilities/error.hpp | 32 ++- cpp/libmps_parser/tests/mps_parser_test.cpp | 72 +++++++ .../linear_programming/good-mps-1.mps.bz2 | Bin 0 -> 230 bytes datasets/linear_programming/good-mps-1.mps.gz | Bin 0 -> 219 bytes dependencies.yaml | 2 + thirdparty/THIRD_PARTY_LICENSES | 73 +++++++ 15 files changed, 433 insertions(+), 10 deletions(-) create mode 100644 datasets/linear_programming/good-mps-1.mps.bz2 create mode 100644 datasets/linear_programming/good-mps-1.mps.gz diff --git a/ci/build_wheel_cuopt_mps_parser.sh b/ci/build_wheel_cuopt_mps_parser.sh index 826b229e5b..3ca2220dfd 100755 --- a/ci/build_wheel_cuopt_mps_parser.sh +++ b/ci/build_wheel_cuopt_mps_parser.sh @@ -33,7 +33,13 @@ fi ci/build_wheel.sh cuopt_mps_parser ${package_dir} + +EXCLUDE_ARGS=( + --exclude "libzlib.so" + --exclude "libbz2.so" +) + # repair wheels and write to the location that artifact-uploading code expects to find them -python -m auditwheel repair -w "${RAPIDS_WHEEL_BLD_OUTPUT_DIR}" ${package_dir}/dist/* +python -m auditwheel repair "${EXCLUDE_ARGS[@]}" -w "${RAPIDS_WHEEL_BLD_OUTPUT_DIR}" ${package_dir}/dist/* ci/validate_wheel.sh "${package_dir}" "${RAPIDS_WHEEL_BLD_OUTPUT_DIR}" diff --git a/conda/environments/all_cuda-129_arch-aarch64.yaml b/conda/environments/all_cuda-129_arch-aarch64.yaml index 9f9a642c26..0db519d773 100644 --- a/conda/environments/all_cuda-129_arch-aarch64.yaml +++ b/conda/environments/all_cuda-129_arch-aarch64.yaml @@ -7,6 +7,7 @@ channels: dependencies: - boost - breathe +- bzip2 - c-compiler - ccache - clang-tools=20.1.4 @@ -79,6 +80,7 @@ dependencies: - sphinxcontrib-websupport - sysroot_linux-aarch64==2.28 - uvicorn==0.34.* +- zlib - pip: - nvidia_sphinx_theme - swagger-plugin-for-sphinx diff --git a/conda/environments/all_cuda-129_arch-x86_64.yaml b/conda/environments/all_cuda-129_arch-x86_64.yaml index f2a73c08a4..ac47f68083 100644 --- a/conda/environments/all_cuda-129_arch-x86_64.yaml +++ b/conda/environments/all_cuda-129_arch-x86_64.yaml @@ -7,6 +7,7 @@ channels: dependencies: - boost - breathe +- bzip2 - c-compiler - ccache - clang-tools=20.1.4 @@ -79,6 +80,7 @@ dependencies: - sphinxcontrib-websupport - sysroot_linux-64==2.28 - uvicorn==0.34.* +- zlib - pip: - nvidia_sphinx_theme - swagger-plugin-for-sphinx diff --git a/conda/recipes/libcuopt/recipe.yaml b/conda/recipes/libcuopt/recipe.yaml index 9924107d27..176e296ddf 100644 --- a/conda/recipes/libcuopt/recipe.yaml +++ b/conda/recipes/libcuopt/recipe.yaml @@ -57,6 +57,8 @@ cache: - cuda-version =${{ cuda_version }} - cmake ${{ cmake_version }} - ninja + - zlib + - bzip2 host: - cpp-argparse - cuda-version =${{ cuda_version }} @@ -70,6 +72,8 @@ cache: - libcusparse-dev - cuda-cudart-dev - boost + - zlib + - bzip2 outputs: - package: @@ -90,6 +94,14 @@ outputs: build: - cmake ${{ cmake_version }} - ${{ stdlib("c") }} + - zlib + - bzip2 + host: + - zlib + - bzip2 + run: + - zlib + - bzip2 ignore_run_exports: by_name: - cuda-cudart @@ -99,6 +111,8 @@ outputs: - libcurand - libcusparse - librmm + - libzlib + - libbz2 tests: - package_contents: files: diff --git a/conda/recipes/mps-parser/recipe.yaml b/conda/recipes/mps-parser/recipe.yaml index c9e10c8a87..bf3666f089 100644 --- a/conda/recipes/mps-parser/recipe.yaml +++ b/conda/recipes/mps-parser/recipe.yaml @@ -32,6 +32,7 @@ requirements: build: - cmake ${{ cmake_version }} - ninja + - libmps-parser =${{ version }} - ${{ compiler("c") }} - ${{ compiler("cxx") }} - ${{ stdlib("c") }} diff --git a/cpp/libmps_parser/CMakeLists.txt b/cpp/libmps_parser/CMakeLists.txt index 0c0ead6ce3..d0cbd8a295 100644 --- a/cpp/libmps_parser/CMakeLists.txt +++ b/cpp/libmps_parser/CMakeLists.txt @@ -38,6 +38,8 @@ rapids_cmake_build_type(Release) # ############################################################################# # - User Options ------------------------------------------------------------ option(BUILD_TESTS "Configure CMake to build tests" ON) +option(MPS_PARSER_WITH_BZIP2 "Build with bzip2 decompression" ON) +option(MPS_PARSER_WITH_ZLIB "Build with zlib decompression" ON) message(VERBOSE "cuOpt: Build mps-parser unit-tests: ${BUILD_TESTS}") @@ -50,6 +52,16 @@ if(CMAKE_COMPILER_IS_GNUCXX) list(APPEND MPS_PARSER_CXX_FLAGS -Werror -Wno-error=deprecated-declarations) endif(CMAKE_COMPILER_IS_GNUCXX) +if(MPS_PARSER_WITH_BZIP2) + find_package(BZip2 REQUIRED) + add_compile_definitions(-DMPS_PARSER_WITH_BZIP2) +endif(MPS_PARSER_WITH_BZIP2) + +if(MPS_PARSER_WITH_ZLIB) + find_package(ZLIB REQUIRED) + add_compile_definitions(-DMPS_PARSER_WITH_ZLIB) +endif(MPS_PARSER_WITH_ZLIB) + if(DEFINE_ASSERT) add_definitions(-DASSERT_MODE) endif(DEFINE_ASSERT) @@ -109,6 +121,14 @@ target_include_directories(mps_parser "$" ) +if(MPS_PARSER_WITH_BZIP2) + target_include_directories(mps_parser PRIVATE BZip2::BZip2) +endif(MPS_PARSER_WITH_BZIP2) + +if(MPS_PARSER_WITH_ZLIB) + target_include_directories(mps_parser PRIVATE ZLIB::ZLIB) +endif(MPS_PARSER_WITH_ZLIB) + # ################################################################################################## # - generate tests -------------------------------------------------------------------------------- if(BUILD_TESTS) diff --git a/cpp/libmps_parser/include/mps_parser/parser.hpp b/cpp/libmps_parser/include/mps_parser/parser.hpp index fedfca514e..0660491c70 100644 --- a/cpp/libmps_parser/include/mps_parser/parser.hpp +++ b/cpp/libmps_parser/include/mps_parser/parser.hpp @@ -22,19 +22,24 @@ namespace cuopt::mps_parser { /** - * @brief Reads the equation from the input text file which is MPS or QPS formatted + * @brief Reads the equation from an MPS or QPS file. + * + * The input file can be a plain text file in MPS-/QPS-format or a compressed MPS/QPS + * file (.mps.gz or .mps.bz2). * * Read this link http://lpsolve.sourceforge.net/5.5/mps-format.htm for more * details on both free and fixed MPS format. - * * This function supports both standard MPS files (for linear programming) and * QPS files (for quadratic programming). QPS files are MPS files with additional * sections: * - QUADOBJ: Defines quadratic terms in the objective function * - * @param[in] mps_file_path Path to MPS or QPS formatted file. - * @param[in] fixed_mps_format If MPS/QPS file should be parsed as fixed format, false by default - * @return mps_data_model_t A fully formed LP/QP problem which represents the given MPS/QPS file + * Note: Compressed MPS files .mps.gz, .mps.bz2 can only be read if the compression + * libraries zlib or libbzip2 are installed, respectively. + * + * @param[in] mps_file_path Path to MPS/QPSfile. + * @param[in] fixed_mps_format If MPS/QPS file should be parsed as fixed, false by default + * @return mps_data_model_t A fully formed LP/QP problem which represents the given file */ template mps_data_model_t parse_mps(const std::string& mps_file_path, diff --git a/cpp/libmps_parser/src/mps_parser.cpp b/cpp/libmps_parser/src/mps_parser.cpp index a4b4223569..4e5c1af5a8 100644 --- a/cpp/libmps_parser/src/mps_parser.cpp +++ b/cpp/libmps_parser/src/mps_parser.cpp @@ -26,9 +26,193 @@ #include #include #include +#include #include #include +#ifdef MPS_PARSER_WITH_BZIP2 +#include +#endif // MPS_PARSER_WITH_BZIP2 + +#ifdef MPS_PARSER_WITH_ZLIB +#include +#endif // MPS_PARSER_WITH_ZLIB + +#if defined(MPS_PARSER_WITH_BZIP2) || defined(MPS_PARSER_WITH_ZLIB) +#include +#endif // MPS_PARSER_WITH_BZIP2 || MPS_PARSER_WITH_ZLIB + +namespace { +using cuopt::mps_parser::error_type_t; +using cuopt::mps_parser::mps_parser_expects; +using cuopt::mps_parser::mps_parser_expects_fatal; + +struct FcloseDeleter { + void operator()(FILE* fp) + { + mps_parser_expects_fatal( + fclose(fp) == 0, error_type_t::ValidationError, "Error closing MPS file!"); + } +}; +} // end namespace + +#ifdef MPS_PARSER_WITH_BZIP2 +namespace { +using BZ2_bzReadOpen_t = decltype(&BZ2_bzReadOpen); +using BZ2_bzReadClose_t = decltype(&BZ2_bzReadClose); +using BZ2_bzRead_t = decltype(&BZ2_bzRead); + +std::vector bz2_file_to_string(const std::string& file) +{ + struct DlCloseDeleter { + void operator()(void* fp) + { + mps_parser_expects_fatal( + dlclose(fp) == 0, error_type_t::ValidationError, "Error closing libbz2.so!"); + } + }; + struct BzReadCloseDeleter { + void operator()(void* f) + { + int bzerror; + if (f != nullptr) fptr(&bzerror, f); + mps_parser_expects_fatal( + bzerror == BZ_OK, error_type_t::ValidationError, "Error closing bzip2 file!"); + } + BZ2_bzReadClose_t fptr = nullptr; + }; + + std::unique_ptr lbz2handle{dlopen("libbz2.so", RTLD_LAZY)}; + mps_parser_expects( + lbz2handle != nullptr, + error_type_t::ValidationError, + "Could not open .mps.bz2 file since libbz2.so was not found. In order to open .mps.bz2 files " + "directly, please ensure libbzip2 is installed. Alternatively, decompress the .mps.bz2 file " + "manually and open the uncompressed .mps file. Given path: %s", + file.c_str()); + + BZ2_bzReadOpen_t BZ2_bzReadOpen = + reinterpret_cast(dlsym(lbz2handle.get(), "BZ2_bzReadOpen")); + BZ2_bzReadClose_t BZ2_bzReadClose = + reinterpret_cast(dlsym(lbz2handle.get(), "BZ2_bzReadClose")); + BZ2_bzRead_t BZ2_bzRead = reinterpret_cast(dlsym(lbz2handle.get(), "BZ2_bzRead")); + mps_parser_expects( + BZ2_bzReadOpen != nullptr && BZ2_bzReadClose != nullptr && BZ2_bzRead != nullptr, + error_type_t::ValidationError, + "Error loading libbzip2! Library version might be incompatible. Please decompress the .mps.bz2 " + "file manually and open the uncompressed .mps file. Given path: %s", + file.c_str()); + + std::unique_ptr fp{fopen(file.c_str(), "rb")}; + mps_parser_expects(fp != nullptr, + error_type_t::ValidationError, + "Error opening MPS file! Given path: %s", + file.c_str()); + int bzerror = BZ_OK; + std::unique_ptr bzfile{ + BZ2_bzReadOpen(&bzerror, fp.get(), 0, 0, nullptr, 0), {BZ2_bzReadClose}}; + mps_parser_expects(bzerror == BZ_OK, + error_type_t::ValidationError, + "Could not open bzip2 compressed file! Given path: %s", + file.c_str()); + + std::vector buf; + const size_t readbufsize = 1ull << 24; // 16MiB - just a guess. + std::vector readbuf(readbufsize); + while (bzerror == BZ_OK) { + const size_t bytes_read = BZ2_bzRead(&bzerror, bzfile.get(), readbuf.data(), readbuf.size()); + if (bzerror == BZ_OK || bzerror == BZ_STREAM_END) { + buf.insert(buf.end(), begin(readbuf), begin(readbuf) + bytes_read); + } + } + buf.push_back('\0'); + mps_parser_expects(bzerror == BZ_STREAM_END, + error_type_t::ValidationError, + "Error in bzip2 decompression of MPS file! Given path: %s", + file.c_str()); + return buf; +} +} // end namespace +#endif // MPS_PARSER_WITH_BZIP2 + +#ifdef MPS_PARSER_WITH_ZLIB +namespace { +using gzopen_t = decltype(&gzopen); +using gzclose_r_t = decltype(&gzclose_r); +using gzbuffer_t = decltype(&gzbuffer); +using gzread_t = decltype(&gzread); +using gzerror_t = decltype(&gzerror); +std::vector zlib_file_to_string(const std::string& file) +{ + struct DlCloseDeleter { + void operator()(void* fp) + { + mps_parser_expects_fatal( + dlclose(fp) == 0, error_type_t::ValidationError, "Error closing libbz2.so!"); + } + }; + struct GzCloseDeleter { + void operator()(gzFile_s* f) + { + int err = fptr(f); + mps_parser_expects_fatal( + err == Z_OK, error_type_t::ValidationError, "Error closing gz file!"); + } + gzclose_r_t fptr = nullptr; + }; + + std::unique_ptr lzhandle{dlopen("libz.so.1", RTLD_LAZY)}; + mps_parser_expects( + lzhandle != nullptr, + error_type_t::ValidationError, + "Could not open .mps.gz file since libz.so was not found. In order to open .mps.gz files " + "directly, please ensure zlib is installed. Alternatively, decompress the .mps.gz file " + "manually and open the uncompressed .mps file. Given path: %s", + file.c_str()); + gzopen_t gzopen = reinterpret_cast(dlsym(lzhandle.get(), "gzopen")); + gzclose_r_t gzclose_r = reinterpret_cast(dlsym(lzhandle.get(), "gzclose_r")); + gzbuffer_t gzbuffer = reinterpret_cast(dlsym(lzhandle.get(), "gzbuffer")); + gzread_t gzread = reinterpret_cast(dlsym(lzhandle.get(), "gzread")); + gzerror_t gzerror = reinterpret_cast(dlsym(lzhandle.get(), "gzerror")); + mps_parser_expects( + gzopen != nullptr && gzclose_r != nullptr && gzbuffer != nullptr && gzread != nullptr && + gzerror != nullptr, + error_type_t::ValidationError, + "Error loading zlib! Library version might be incompatible. Please decompress the .mps.gz file " + "manually and open the uncompressed .mps file. Given path: %s", + file.c_str()); + std::unique_ptr gzfp{gzopen(file.c_str(), "rb"), {gzclose_r}}; + mps_parser_expects(gzfp != nullptr, + error_type_t::ValidationError, + "Error opening compressed MPS file! Given path: %s", + file.c_str()); + int zlib_status = gzbuffer(gzfp.get(), 1 << 20); // 1 MiB + mps_parser_expects(zlib_status == Z_OK, + error_type_t::ValidationError, + "Could not set zlib internal buffer size for decompression! Given path: %s", + file.c_str()); + std::vector buf; + const size_t readbufsize = 1ull << 24; // 16MiB + std::vector readbuf(readbufsize); + int bytes_read = -1; + while (bytes_read != 0) { + bytes_read = gzread(gzfp.get(), readbuf.data(), readbuf.size()); + if (bytes_read > 0) { buf.insert(buf.end(), begin(readbuf), begin(readbuf) + bytes_read); } + if (bytes_read < 0) { + gzerror(gzfp.get(), &zlib_status); + break; + } + } + buf.push_back('\0'); + mps_parser_expects(zlib_status == Z_OK, + error_type_t::ValidationError, + "Error in zlib decompression of MPS file! Given path: %s", + file.c_str()); + return buf; +} +} // end namespace +#endif // MPS_PARSER_WITH_ZLIB + namespace cuopt::mps_parser { template @@ -342,6 +526,18 @@ std::vector mps_parser_t::file_to_string(const std::string& file { // raft::common::nvtx::range fun_scope("file to string"); +#ifdef MPS_PARSER_WITH_BZIP2 + if (file.size() > 4 && file.substr(file.size() - 4, 4) == ".bz2") { + return bz2_file_to_string(file); + } +#endif // MPS_PARSER_WITH_BZIP2 + +#ifdef MPS_PARSER_WITH_ZLIB + if (file.size() > 3 && file.substr(file.size() - 3, 3) == ".gz") { + return zlib_file_to_string(file); + } +#endif // MPS_PARSER_WITH_ZLIB + // Faster than using C++ I/O FILE* fp = fopen(file.c_str(), "r"); mps_parser_expects(fp != nullptr, diff --git a/cpp/libmps_parser/src/mps_parser.hpp b/cpp/libmps_parser/src/mps_parser.hpp index e06b2bbe63..58b4a6128b 100644 --- a/cpp/libmps_parser/src/mps_parser.hpp +++ b/cpp/libmps_parser/src/mps_parser.hpp @@ -145,10 +145,10 @@ class mps_parser_t { std::unordered_set bounds_defined_for_var_id{}; static constexpr f_t unset_range_value = std::numeric_limits::infinity(); - /* Reads the equation from the input text file which is MPS formatted + /* Reads an MPS input file into a buffer. * - * Read this link http://lpsolve.sourceforge.net/5.5/mps-format.htm for more - * details on this format. + * If the file has a .gz or .bz2 suffix and zlib or libbzip2 are installed, respectively, + * the function directly reads and decompresses the compressed MPS file. */ std::vector file_to_string(const std::string& file); void fill_problem(mps_data_model_t& problem); diff --git a/cpp/libmps_parser/src/utilities/error.hpp b/cpp/libmps_parser/src/utilities/error.hpp index 3aafa6340a..17518d8daf 100644 --- a/cpp/libmps_parser/src/utilities/error.hpp +++ b/cpp/libmps_parser/src/utilities/error.hpp @@ -45,7 +45,7 @@ inline std::string error_to_string(error_type_t error) } /** - * @brief Macro for checking (pre-)conditions that throws an exception when a + * @brief Function for checking (pre-)conditions that throws an exception when a * condition is false * * @param[bool] cond From expression that evaluates to true or false @@ -70,6 +70,36 @@ inline void mps_parser_expects(bool cond, error_type_t error_type, const char* f } } +/** + * @brief Function for checking (pre-)conditions that aborts the program when a + * condition is false + * + * @param[bool] cond From expression that evaluates to true or false + * @param[error_type_t] error enum error type + * @param[const char *] fmt String format for error message + * @param variable set of arguments used for fmt + * @throw std::logic_error if the condition evaluates to false. + */ +inline void mps_parser_expects_fatal(bool cond, error_type_t error_type, const char* fmt, ...) +{ + if (not cond) { + va_list args; + va_start(args, fmt); + + char msg[2048]; + va_start(args, fmt); + vsnprintf(msg, sizeof(msg), fmt, args); + va_end(args); + std::string error_string = error_to_string(error_type); + std::fprintf(stderr, + "{\"MPS_PARSER_ERROR_TYPE\": \"%s\", \"msg\": \"%s\"}\n", + error_to_string(error_type).c_str(), + msg); + std::fflush(stderr); + std::abort(); + } +} + #define MPS_PARSER_SET_ERROR_MSG(msg, location_prefix, fmt, ...) \ do { \ char err_msg[2048]; /* NOLINT */ \ diff --git a/cpp/libmps_parser/tests/mps_parser_test.cpp b/cpp/libmps_parser/tests/mps_parser_test.cpp index 1e7f218c82..508cc3abfd 100644 --- a/cpp/libmps_parser/tests/mps_parser_test.cpp +++ b/cpp/libmps_parser/tests/mps_parser_test.cpp @@ -757,6 +757,78 @@ TEST(mps_parser, good_mps_file_partial_bounds) EXPECT_EQ(10.0, mps.variable_upper_bounds[1]); } +TEST(mps_parser, good_mps_file_bzip2_compressed) +{ + auto mps = read_from_mps("linear_programming/good-mps-1.mps.bz2"); + EXPECT_EQ("good-1", mps.problem_name); + ASSERT_EQ(int(2), mps.row_names.size()); + EXPECT_EQ("ROW1", mps.row_names[0]); + EXPECT_EQ("ROW2", mps.row_names[1]); + ASSERT_EQ(int(2), mps.row_types.size()); + EXPECT_EQ(LesserThanOrEqual, mps.row_types[0]); + EXPECT_EQ(LesserThanOrEqual, mps.row_types[1]); + EXPECT_EQ("COST", mps.objective_name); + ASSERT_EQ(int(2), mps.var_names.size()); + EXPECT_EQ("VAR1", mps.var_names[0]); + EXPECT_EQ("VAR2", mps.var_names[1]); + ASSERT_EQ(int(2), mps.A_indices.size()); + ASSERT_EQ(int(2), mps.A_indices[0].size()); + EXPECT_EQ(int(0), mps.A_indices[0][0]); + EXPECT_EQ(int(1), mps.A_indices[0][1]); + ASSERT_EQ(int(2), mps.A_indices[1].size()); + EXPECT_EQ(int(0), mps.A_indices[1][0]); + EXPECT_EQ(int(1), mps.A_indices[1][1]); + ASSERT_EQ(int(2), mps.A_values.size()); + ASSERT_EQ(int(2), mps.A_values[0].size()); + EXPECT_EQ(3., mps.A_values[0][0]); + EXPECT_EQ(4., mps.A_values[0][1]); + ASSERT_EQ(int(2), mps.A_values[1].size()); + EXPECT_EQ(2.7, mps.A_values[1][0]); + EXPECT_EQ(10.1, mps.A_values[1][1]); + ASSERT_EQ(int(2), mps.b_values.size()); + EXPECT_EQ(5.4, mps.b_values[0]); + EXPECT_EQ(4.9, mps.b_values[1]); + ASSERT_EQ(int(2), mps.c_values.size()); + EXPECT_EQ(0.2, mps.c_values[0]); + EXPECT_EQ(0.1, mps.c_values[1]); +} + +TEST(mps_parser, good_mps_file_zlib_compressed) +{ + auto mps = read_from_mps("linear_programming/good-mps-1.mps.gz"); + EXPECT_EQ("good-1", mps.problem_name); + ASSERT_EQ(int(2), mps.row_names.size()); + EXPECT_EQ("ROW1", mps.row_names[0]); + EXPECT_EQ("ROW2", mps.row_names[1]); + ASSERT_EQ(int(2), mps.row_types.size()); + EXPECT_EQ(LesserThanOrEqual, mps.row_types[0]); + EXPECT_EQ(LesserThanOrEqual, mps.row_types[1]); + EXPECT_EQ("COST", mps.objective_name); + ASSERT_EQ(int(2), mps.var_names.size()); + EXPECT_EQ("VAR1", mps.var_names[0]); + EXPECT_EQ("VAR2", mps.var_names[1]); + ASSERT_EQ(int(2), mps.A_indices.size()); + ASSERT_EQ(int(2), mps.A_indices[0].size()); + EXPECT_EQ(int(0), mps.A_indices[0][0]); + EXPECT_EQ(int(1), mps.A_indices[0][1]); + ASSERT_EQ(int(2), mps.A_indices[1].size()); + EXPECT_EQ(int(0), mps.A_indices[1][0]); + EXPECT_EQ(int(1), mps.A_indices[1][1]); + ASSERT_EQ(int(2), mps.A_values.size()); + ASSERT_EQ(int(2), mps.A_values[0].size()); + EXPECT_EQ(3., mps.A_values[0][0]); + EXPECT_EQ(4., mps.A_values[0][1]); + ASSERT_EQ(int(2), mps.A_values[1].size()); + EXPECT_EQ(2.7, mps.A_values[1][0]); + EXPECT_EQ(10.1, mps.A_values[1][1]); + ASSERT_EQ(int(2), mps.b_values.size()); + EXPECT_EQ(5.4, mps.b_values[0]); + EXPECT_EQ(4.9, mps.b_values[1]); + ASSERT_EQ(int(2), mps.c_values.size()); + EXPECT_EQ(0.2, mps.c_values[0]); + EXPECT_EQ(0.1, mps.c_values[1]); +} + // ================================================================================================ // QPS (Quadratic Programming) Support Tests // ================================================================================================ diff --git a/datasets/linear_programming/good-mps-1.mps.bz2 b/datasets/linear_programming/good-mps-1.mps.bz2 new file mode 100644 index 0000000000000000000000000000000000000000..ee96fb0558b3ca29366a11c17f8ee5c0581502d9 GIT binary patch literal 230 zcmVEV1c!MAVV8q9CG= gK>X=f<1&< z@X!aeKJL~c6(mCI#jkhMSc0wYC9|2?VRk*}N7vk&SIs=2(rt$j3ohZ|s0x7@HNrNM z6z%hTtEHgeyof{zB95KA5yR`QkOg|jh|fUjAm(ip&1>&slQB0rT384%^=o*@C~ zM+`u+C{GS)MiQ#tvo9mIh%%2jGjEZPuQGlXQN&?LP7TF~bB4h`L$f9;b~YxxlPGkX V-;?Gdakf=^#oqSTOHh;n003T%V(kC` literal 0 HcmV?d00001 diff --git a/dependencies.yaml b/dependencies.yaml index 1a8e1f8d77..1ea0f59528 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -306,6 +306,8 @@ dependencies: - cpp-argparse - librmm==25.10.* - libraft-headers==25.10.* + - zlib + - bzip2 test_cpp: common: - output_types: [conda] diff --git a/thirdparty/THIRD_PARTY_LICENSES b/thirdparty/THIRD_PARTY_LICENSES index e6cb70a114..6bce42d313 100644 --- a/thirdparty/THIRD_PARTY_LICENSES +++ b/thirdparty/THIRD_PARTY_LICENSES @@ -878,3 +878,76 @@ may consider it more useful to permit linking proprietary applications with the library. If this is what you want to do, use the GNU Lesser General Public License instead of this License. But first, please read . + + +----------------------------------------------------------------------------------------- +== bzip2 + +Usage: libmps_parser uses libbzip2 + +This program, "bzip2", the associated library "libbzip2", and all +documentation, are copyright (C) 1996-2019 Julian R Seward. All +rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +2. The origin of this software must not be misrepresented; you must + not claim that you wrote the original software. If you use this + software in a product, an acknowledgment in the product + documentation would be appreciated but is not required. + +3. Altered source versions must be plainly marked as such, and must + not be misrepresented as being the original software. + +4. The name of the author may not be used to endorse or promote + products derived from this software without specific prior written + permission. + +THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS +OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE +GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +Julian Seward, jseward@acm.org +bzip2/libbzip2 version 1.0.8 of 13 July 2019 + + +----------------------------------------------------------------------------------------- +== zlib + +Usage: libmps_parser uses zlib + +Copyright notice: + + (C) 1995-2024 Jean-loup Gailly and Mark Adler + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any damages + arising from the use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + 3. This notice may not be removed or altered from any source distribution. + + Jean-loup Gailly Mark Adler + jloup@gzip.org madler@alumni.caltech.edu From 90095dfd626463e963ce27f8a7387abe7987d498 Mon Sep 17 00:00:00 2001 From: Ramakrishnap <42624703+rgsl888prabhu@users.noreply.github.com> Date: Wed, 3 Sep 2025 17:34:39 -0500 Subject: [PATCH 12/33] Add documentation on nightly installation commands (#367) Add documentation on nightly installation commands Authors: - Ramakrishnap (https://github.com/rgsl888prabhu) Approvers: - Ishika Roy (https://github.com/Iroy30) - Trevor McKay (https://github.com/tmckayus) - Cindy Wilkinson (https://github.com/cwilkinson76) URL: https://github.com/NVIDIA/cuopt/pull/367 --- README.md | 12 +++++++++++- docs/cuopt/source/cuopt-c/quick-start.rst | 16 +++++++++++++++- docs/cuopt/source/cuopt-python/quick-start.rst | 10 +++++++++- docs/cuopt/source/cuopt-server/quick-start.rst | 12 +++++++++++- 4 files changed, 46 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index f7390b9586..f34020f4d2 100644 --- a/README.md +++ b/README.md @@ -62,10 +62,20 @@ on the major version of CUDA available in your environment: For CUDA 12.x: ```bash -pip install --extra-index-url=https://pypi.nvidia.com cuopt-server-cu12==25.10.* cuopt-sh-client==25.10.* nvidia-cuda-runtime-cu12==12.9.* +pip install \ + --extra-index-url=https://pypi.nvidia.com \ + nvidia-cuda-runtime-cu12=12.9.* \ + cuopt-server-cu12==25.10.* cuopt-sh-client==25.10.* ``` Development wheels are available as nightlies, please update `--extra-index-url` to `https://pypi.anaconda.org/rapidsai-wheels-nightly/simple/` to install latest nightly packages. +```bash +pip install --pre \ + --extra-index-url=https://pypi.nvidia.com \ + --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple/ \ + nvidia-cuda-runtime-cu12=12.9.* \ + cuopt-server-cu12==25.10.* cuopt-sh-client==25.10.* +``` ### Conda diff --git a/docs/cuopt/source/cuopt-c/quick-start.rst b/docs/cuopt/source/cuopt-c/quick-start.rst index 624ca6a2ab..04d5454c01 100644 --- a/docs/cuopt/source/cuopt-c/quick-start.rst +++ b/docs/cuopt/source/cuopt-c/quick-start.rst @@ -20,9 +20,20 @@ This wheel is a Python wrapper around the C++ library and eases installation and # This is a deprecated module and no longer used, but it shares the same name for the CLI, so we need to uninstall it first if it exists. pip uninstall cuopt-thin-client - pip install --extra-index-url=https://pypi.nvidia.com libcuopt-cu12==25.10.* nvidia-cuda-runtime-cu12==12.9.* + pip install --extra-index-url=https://pypi.nvidia.com \ + nvidia-cuda-runtime-cu12==12.9.* \ + libcuopt-cu12==25.10.* +.. note:: + For development wheels which are available as nightlies, please update `--extra-index-url` to `https://pypi.anaconda.org/rapidsai-wheels-nightly/simple/`. + +.. code-block:: bash + + pip install --pre --extra-index-url=https://pypi.nvidia.com --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple/ \ + nvidia-cuda-runtime-cu12==12.9.* \ + libcuopt-cu12==25.10.* + Conda ----- @@ -36,3 +47,6 @@ NVIDIA cuOpt can be installed with Conda (via `miniforge Date: Fri, 5 Sep 2025 09:39:33 -0500 Subject: [PATCH 13/33] Warn in case a dependent library is not found in libcuopt load (#375) While loading libcuopt using load, sometimes due to a missing dependent library we get error as libcuopt.so is missing even though libcuopt.so is present but some other dependency is missing. This PR adds a warning in such cases so it doesn't fail in the corner cases, but provide a better details through a warning. ## Issue closes #340 Authors: - Ramakrishnap (https://github.com/rgsl888prabhu) Approvers: - Trevor McKay (https://github.com/tmckayus) URL: https://github.com/NVIDIA/cuopt/pull/375 --- python/libcuopt/libcuopt/load.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/python/libcuopt/libcuopt/load.py b/python/libcuopt/libcuopt/load.py index 0857398793..353e80f16e 100644 --- a/python/libcuopt/libcuopt/load.py +++ b/python/libcuopt/libcuopt/load.py @@ -85,12 +85,23 @@ def load_library(): libcuopt_lib = _load_wheel_installation(soname) if libcuopt_lib is None: libcuopt_lib = _load_system_installation(soname) - except OSError: + except OSError as e: # If none of the searches above succeed, just silently return None # and rely on other mechanisms (like RPATHs on other DSOs) to # help the loader find the library. - pass + import warnings + + warnings.warn( + f"Failed to load libcuopt library: {soname}. " + f"Error: {str(e)}. " + "Falling back to relying on system loader. " + "cuOpt functionality may be unavailable. " + "This might lead to a generic error such as " + "'libcuopt.so missing' if the library cannot be found.", + RuntimeWarning, + ) + pass # The caller almost never needs to do anything with this library, but no # harm in offering the option since this object at least provides a handle # to inspect where libcuopt was loaded from. From d4a4e5eac671d63578277ddf3483eabd3ea83515 Mon Sep 17 00:00:00 2001 From: Alice Boucher <160623740+aliceb-nv@users.noreply.github.com> Date: Fri, 5 Sep 2025 18:40:25 +0200 Subject: [PATCH 14/33] Print cuOpt version / machine info before solving (#370) closes https://github.com/NVIDIA/cuopt/issues/369 Most solvers report the version in their logs, along with some extra information such as build date or commit hash. This PR adds code to cuopt_cli (and solve_MPS_problem, until we deprecate it) to print out the cuOpt version, build hash, and target architectures, along with host/device details when starting the solver. Example output: ``` cuOpt version: 25.10.0, git hash: 9006ad7, host arch: x86_64, device archs: 90-real CPU: AMD EPYC 7742 64-Core Processor, threads (physical/logical): 128/256, RAM: 726.66 GiB CUDA 12.9, device: NVIDIA H100 PCIe (ID 0), VRAM: 79.18 GiB CUDA device UUID: 1effffffa4ffffff9937-fffffff928-ffff ``` Authors: - Alice Boucher (https://github.com/aliceb-nv) Approvers: - Ramakrishnap (https://github.com/rgsl888prabhu) - Rajesh Gandham (https://github.com/rg20) URL: https://github.com/NVIDIA/cuopt/pull/370 --- cpp/CMakeLists.txt | 18 ++- cpp/src/CMakeLists.txt | 3 +- cpp/src/linear_programming/solve.cu | 3 + cpp/src/mip/solve.cu | 3 + cpp/src/utilities/version_info.cpp | 227 ++++++++++++++++++++++++++++ cpp/src/utilities/version_info.hpp | 21 +++ 6 files changed, 273 insertions(+), 2 deletions(-) create mode 100644 cpp/src/utilities/version_info.cpp create mode 100644 cpp/src/utilities/version_info.hpp diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 919064b872..76e5b16a03 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -108,7 +108,8 @@ message(VERBOSE "CUOPT: LIBCUOPT_LOGGING_LEVEL = '${LIBCUOPT_LOGGING_LEVEL}'.") message("-- Building with logging level = ${LIBCUOPT_LOGGING_LEVEL}") -message("-- Building for GPU_ARCHS = ${CMAKE_CUDA_ARCHITECTURES}") +message("-- Building for GPU_ARCHS = '${CMAKE_CUDA_ARCHITECTURES}'") +message("-- Host target architecture = '${CMAKE_SYSTEM_PROCESSOR}'") # make the flags global in order to propagate flags to test cmake files set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr --expt-extended-lambda") @@ -261,6 +262,21 @@ add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/libmps_parser) set(CMAKE_LIBRARY_PATH ${CMAKE_CURRENT_BINARY_DIR}/libmps_parser/) +execute_process( + COMMAND git rev-parse --short HEAD + WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} + OUTPUT_VARIABLE GIT_COMMIT_HASH + OUTPUT_STRIP_TRAILING_WHITESPACE +) +message("-- Building with GIT_COMMIT_HASH = '${GIT_COMMIT_HASH}'") + + +list(JOIN CMAKE_CUDA_ARCHITECTURES "," JOINED_CUDA_ARCHITECTURES) # ';' breaks compile_definitions, replace it +target_compile_definitions(cuopt PUBLIC + CUOPT_GIT_COMMIT_HASH="${GIT_COMMIT_HASH}" + CUOPT_CUDA_ARCHITECTURES="${JOINED_CUDA_ARCHITECTURES}" + CUOPT_CPU_ARCHITECTURE="${CMAKE_SYSTEM_PROCESSOR}") + target_link_libraries(cuopt PUBLIC CUDA::cublas diff --git a/cpp/src/CMakeLists.txt b/cpp/src/CMakeLists.txt index 1ffa94fc0d..76a7426178 100644 --- a/cpp/src/CMakeLists.txt +++ b/cpp/src/CMakeLists.txt @@ -14,7 +14,8 @@ # limitations under the License. set(UTIL_SRC_FILES ${CMAKE_CURRENT_SOURCE_DIR}/utilities/seed_generator.cu - ${CMAKE_CURRENT_SOURCE_DIR}/utilities/logger_helper.cpp) + ${CMAKE_CURRENT_SOURCE_DIR}/utilities/logger_helper.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/utilities/version_info.cpp) add_subdirectory(linear_programming) add_subdirectory(math_optimization) diff --git a/cpp/src/linear_programming/solve.cu b/cpp/src/linear_programming/solve.cu index 8dc1909c87..e90312067b 100644 --- a/cpp/src/linear_programming/solve.cu +++ b/cpp/src/linear_programming/solve.cu @@ -35,6 +35,7 @@ #include #include +#include #include #include @@ -571,6 +572,8 @@ optimization_problem_solution_t solve_lp(optimization_problem_t #include #include +#include #include #include @@ -170,6 +171,8 @@ mip_solution_t solve_mip(optimization_problem_t& op_problem, // This needs to be called before pdlp is initialized init_handler(op_problem.get_handle_ptr()); + print_version_info(); + raft::common::nvtx::range fun_scope("Running solver"); // This is required as user might forget to set some fields diff --git a/cpp/src/utilities/version_info.cpp b/cpp/src/utilities/version_info.cpp new file mode 100644 index 0000000000..902495a48c --- /dev/null +++ b/cpp/src/utilities/version_info.cpp @@ -0,0 +1,227 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights + * reserved. SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "version_info.hpp" + +#include + +#include +#include + +#include +#include +#include +#include +#include +#include + +namespace cuopt { + +static int get_physical_cores() +{ + std::ifstream cpuinfo("/proc/cpuinfo"); + if (!cpuinfo.is_open()) return 0; + + std::string line; + int physical_id = -1, core_id = -1; + std::set> cores; + + while (std::getline(cpuinfo, line)) { + if (line.find("physical id") != std::string::npos) { + physical_id = std::stoi(line.substr(line.find(":") + 1)); + } else if (line.find("core id") != std::string::npos) { + core_id = std::stoi(line.substr(line.find(":") + 1)); + } + + if (physical_id != -1 && core_id != -1) { + cores.insert({physical_id, core_id}); + physical_id = -1; + core_id = -1; + } + } + + if (cores.empty()) { + cpuinfo.clear(); + cpuinfo.seekg(0); + while (std::getline(cpuinfo, line)) { + if (line.find("cpu cores") != std::string::npos) { + return std::stoi(line.substr(line.find(":") + 1)); + } + } + return 1; + } + return cores.size(); +} + +static std::string get_cpu_model_from_proc() +{ + std::ifstream cpuinfo("/proc/cpuinfo"); + if (!cpuinfo.is_open()) return ""; + + std::string line; + while (std::getline(cpuinfo, line)) { + std::size_t pos = line.find("model name"); + if (pos == std::string::npos) pos = line.find("Processor"); + if (pos != std::string::npos) { + std::size_t colon = line.find(':', pos); + if (colon != std::string::npos) return line.substr(colon + 2); // Skip ": " + } + } + return ""; +} + +// From https://gcc.gnu.org/onlinedocs/gcc/x86-Built-in-Functions.html +// Also supported by clang +static std::string get_cpu_model_builtin() +{ +#if (defined(__x86_64__) || defined(__i386__)) && (defined(__GNUC__) || defined(__clang__)) + __builtin_cpu_init(); + return __builtin_cpu_is("amd") ? "AMD CPU" + : __builtin_cpu_is("intel") ? "Intel CPU" + : __builtin_cpu_is("atom") ? "Intel Atom CPU" + : __builtin_cpu_is("slm") ? "Intel Silvermont CPU" + : __builtin_cpu_is("core2") ? "Intel Core 2 CPU" + : __builtin_cpu_is("corei7") ? "Intel Core i7 CPU" + : __builtin_cpu_is("nehalem") ? "Intel Core i7 Nehalem CPU" + : __builtin_cpu_is("westmere") ? "Intel Core i7 Westmere CPU" + : __builtin_cpu_is("sandybridge") ? "Intel Core i7 Sandy Bridge CPU" + : __builtin_cpu_is("ivybridge") ? "Intel Core i7 Ivy Bridge CPU" + : __builtin_cpu_is("haswell") ? "Intel Core i7 Haswell CPU" + : __builtin_cpu_is("broadwell") ? "Intel Core i7 Broadwell CPU" + : __builtin_cpu_is("skylake") ? "Intel Core i7 Skylake CPU" + : __builtin_cpu_is("skylake-avx512") ? "Intel Core i7 Skylake AVX512 CPU" + : __builtin_cpu_is("cannonlake") ? "Intel Core i7 Cannon Lake CPU" + : __builtin_cpu_is("icelake-client") ? "Intel Core i7 Ice Lake Client CPU" + : __builtin_cpu_is("icelake-server") ? "Intel Core i7 Ice Lake Server CPU" + : __builtin_cpu_is("cascadelake") ? "Intel Core i7 Cascadelake CPU" + : __builtin_cpu_is("tigerlake") ? "Intel Core i7 Tigerlake CPU" + : __builtin_cpu_is("cooperlake") ? "Intel Core i7 Cooperlake CPU" + : __builtin_cpu_is("sapphirerapids") ? "Intel Core i7 sapphirerapids CPU" + : __builtin_cpu_is("alderlake") ? "Intel Core i7 Alderlake CPU" + : __builtin_cpu_is("rocketlake") ? "Intel Core i7 Rocketlake CPU" + : __builtin_cpu_is("graniterapids") ? "Intel Core i7 graniterapids CPU" + : __builtin_cpu_is("graniterapids-d") ? "Intel Core i7 graniterapids D CPU" + : __builtin_cpu_is("bonnell") ? "Intel Atom Bonnell CPU" + : __builtin_cpu_is("silvermont") ? "Intel Atom Silvermont CPU" + : __builtin_cpu_is("goldmont") ? "Intel Atom Goldmont CPU" + : __builtin_cpu_is("goldmont-plus") ? "Intel Atom Goldmont Plus CPU" + : __builtin_cpu_is("tremont") ? "Intel Atom Tremont CPU" + : __builtin_cpu_is("sierraforest") ? "Intel Atom Sierra Forest CPU" + : __builtin_cpu_is("grandridge") ? "Intel Atom Grand Ridge CPU" + : __builtin_cpu_is("amdfam10h") ? "AMD Family 10h CPU" + : __builtin_cpu_is("barcelona") ? "AMD Family 10h Barcelona CPU" + : __builtin_cpu_is("shanghai") ? "AMD Family 10h Shanghai CPU" + : __builtin_cpu_is("istanbul") ? "AMD Family 10h Istanbul CPU" + : __builtin_cpu_is("btver1") ? "AMD Family 14h CPU" + : __builtin_cpu_is("amdfam15h") ? "AMD Family 15h CPU" + : __builtin_cpu_is("bdver1") ? "AMD Family 15h Bulldozer version 1" + : __builtin_cpu_is("bdver2") ? "AMD Family 15h Bulldozer version 2" + : __builtin_cpu_is("bdver3") ? "AMD Family 15h Bulldozer version 3" + : __builtin_cpu_is("bdver4") ? "AMD Family 15h Bulldozer version 4" + : __builtin_cpu_is("btver2") ? "AMD Family 16h CPU" + : __builtin_cpu_is("amdfam17h") ? "AMD Family 17h CPU" + : __builtin_cpu_is("znver1") ? "AMD Family 17h Zen version 1" + : __builtin_cpu_is("znver2") ? "AMD Family 17h Zen version 2" + : __builtin_cpu_is("amdfam19h") ? "AMD Family 19h CPU" + : "Unknown"; +#else + return "Unknown"; +#endif +} + +static std::string get_cpu_model() +{ + if (auto model_from_proc = get_cpu_model_from_proc(); !model_from_proc.empty()) { + return model_from_proc; + } else if (auto model_from_builtin = get_cpu_model_builtin(); !model_from_builtin.empty()) { + return model_from_builtin; + } + return "Unknown"; +} + +static double get_available_memory_gb() +{ + std::ifstream meminfo("/proc/meminfo"); + if (!meminfo.is_open()) return 0.0; + + std::string line; + long kb = 0; + while (std::getline(meminfo, line)) { + if (line.find("MemAvailable:") == 0 || line.find("MemFree:") == 0) { + std::size_t pos = line.find_first_of("0123456789"); + if (pos != std::string::npos) { + kb = std::stol(line.substr(pos)); + break; + } + } + } + + return kb / (1024.0 * 1024.0); // Convert KB to GB +} + +void print_version_info() +{ + int device_id = 0; + cudaGetDevice(&device_id); + cudaDeviceProp device_prop; + cudaGetDeviceProperties(&device_prop, device_id); + cudaUUID_t uuid = device_prop.uuid; + char uuid_str[37] = {0}; + snprintf(uuid_str, + sizeof(uuid_str), + "%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x", + uuid.bytes[0], + uuid.bytes[1], + uuid.bytes[2], + uuid.bytes[3], + uuid.bytes[4], + uuid.bytes[5], + uuid.bytes[6], + uuid.bytes[7], + uuid.bytes[8], + uuid.bytes[9], + uuid.bytes[10], + uuid.bytes[11], + uuid.bytes[12], + uuid.bytes[13], + uuid.bytes[14], + uuid.bytes[15]); + int version = 0; + cudaRuntimeGetVersion(&version); + int major = version / 1000; + int minor = (version % 1000) / 10; + CUOPT_LOG_INFO("cuOpt version: %d.%d.%d, git hash: %s, host arch: %s, device archs: %s", + CUOPT_VERSION_MAJOR, + CUOPT_VERSION_MINOR, + CUOPT_VERSION_PATCH, + CUOPT_GIT_COMMIT_HASH, + CUOPT_CPU_ARCHITECTURE, + CUOPT_CUDA_ARCHITECTURES); + CUOPT_LOG_INFO("CPU: %s, threads (physical/logical): %d/%d, RAM: %.2f GiB", + get_cpu_model().c_str(), + get_physical_cores(), + std::thread::hardware_concurrency(), + get_available_memory_gb()); + CUOPT_LOG_INFO("CUDA %d.%d, device: %s (ID %d), VRAM: %.2f GiB", + major, + minor, + device_prop.name, + device_id, + (double)device_prop.totalGlobalMem / (1024.0 * 1024.0 * 1024.0)); + CUOPT_LOG_INFO("CUDA device UUID: %s\n", uuid_str); +} + +} // namespace cuopt diff --git a/cpp/src/utilities/version_info.hpp b/cpp/src/utilities/version_info.hpp new file mode 100644 index 0000000000..7549fcdccd --- /dev/null +++ b/cpp/src/utilities/version_info.hpp @@ -0,0 +1,21 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights + * reserved. SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +namespace cuopt { +void print_version_info(); +} From 29fae5f19f48f2725f74bd2358af110ab6f78940 Mon Sep 17 00:00:00 2001 From: Hugo Linsenmaier Date: Fri, 5 Sep 2025 10:28:16 -0700 Subject: [PATCH 15/33] Enable parallelism for root node presolve (#371) Papilo's problem builder was spending a long amount of time building the constraint matrix. Rewrote that step -> provides ~1.8x speedup on presolve With TBB there is another ~1.2x additional speedup so ~2x. [Parallel Presolve.xlsx](https://github.com/user-attachments/files/22145182/Parallel.Presolve.xlsx) Authors: - Hugo Linsenmaier (https://github.com/hlinsen) Approvers: - Rajesh Gandham (https://github.com/rg20) - Ramakrishnap (https://github.com/rgsl888prabhu) URL: https://github.com/NVIDIA/cuopt/pull/371 --- ci/build_wheel_libcuopt.sh | 4 +- ...{install_boost.sh => install_boost_tbb.sh} | 10 +-- .../all_cuda-129_arch-aarch64.yaml | 1 + .../all_cuda-129_arch-x86_64.yaml | 1 + conda/recipes/libcuopt/recipe.yaml | 2 + cpp/CMakeLists.txt | 3 +- cpp/cmake/thirdparty/FindTBB.cmake | 90 +++++++++++++++++++ cpp/src/mip/presolve/third_party_presolve.cpp | 64 ++++++++++--- cpp/src/mip/presolve/third_party_presolve.hpp | 3 +- cpp/src/mip/solve.cu | 3 +- dependencies.yaml | 1 + python/libcuopt/CMakeLists.txt | 1 - 12 files changed, 159 insertions(+), 24 deletions(-) rename ci/utils/{install_boost.sh => install_boost_tbb.sh} (82%) create mode 100644 cpp/cmake/thirdparty/FindTBB.cmake diff --git a/ci/build_wheel_libcuopt.sh b/ci/build_wheel_libcuopt.sh index 7651b52f8c..2b3a8b0fb5 100755 --- a/ci/build_wheel_libcuopt.sh +++ b/ci/build_wheel_libcuopt.sh @@ -21,8 +21,8 @@ source rapids-init-pip package_name="libcuopt" package_dir="python/libcuopt" -# Install Boost -bash ci/utils/install_boost.sh +# Install Boost and TBB +bash ci/utils/install_boost_tbb.sh export SKBUILD_CMAKE_ARGS="-DCUOPT_BUILD_WHEELS=ON;-DDISABLE_DEPRECATION_WARNING=ON" diff --git a/ci/utils/install_boost.sh b/ci/utils/install_boost_tbb.sh similarity index 82% rename from ci/utils/install_boost.sh rename to ci/utils/install_boost_tbb.sh index bd85d41e90..c3e1eb0bdc 100644 --- a/ci/utils/install_boost.sh +++ b/ci/utils/install_boost_tbb.sh @@ -17,19 +17,19 @@ set -euo pipefail -# Install Boost +# Install Boost and TBB if [ -f /etc/os-release ]; then . /etc/os-release if [[ "$ID" == "rocky" ]]; then - echo "Detected Rocky Linux. Installing Boost via dnf..." - dnf install -y boost-devel + echo "Detected Rocky Linux. Installing Boost and TBB via dnf..." + dnf install -y boost-devel tbb-devel if [[ "$(uname -m)" == "x86_64" ]]; then dnf install -y gcc-toolset-14-libquadmath-devel fi elif [[ "$ID" == "ubuntu" ]]; then - echo "Detected Ubuntu. Installing Boost via apt..." + echo "Detected Ubuntu. Installing Boost and TBB via apt..." apt-get update - apt-get install -y libboost-dev + apt-get install -y libboost-dev libtbb-dev else echo "Unknown OS: $ID. Please install Boost development libraries manually." exit 1 diff --git a/conda/environments/all_cuda-129_arch-aarch64.yaml b/conda/environments/all_cuda-129_arch-aarch64.yaml index 0db519d773..adf1d9d9f1 100644 --- a/conda/environments/all_cuda-129_arch-aarch64.yaml +++ b/conda/environments/all_cuda-129_arch-aarch64.yaml @@ -79,6 +79,7 @@ dependencies: - sphinxcontrib-openapi - sphinxcontrib-websupport - sysroot_linux-aarch64==2.28 +- tbb-devel - uvicorn==0.34.* - zlib - pip: diff --git a/conda/environments/all_cuda-129_arch-x86_64.yaml b/conda/environments/all_cuda-129_arch-x86_64.yaml index ac47f68083..23a5ecef35 100644 --- a/conda/environments/all_cuda-129_arch-x86_64.yaml +++ b/conda/environments/all_cuda-129_arch-x86_64.yaml @@ -79,6 +79,7 @@ dependencies: - sphinxcontrib-openapi - sphinxcontrib-websupport - sysroot_linux-64==2.28 +- tbb-devel - uvicorn==0.34.* - zlib - pip: diff --git a/conda/recipes/libcuopt/recipe.yaml b/conda/recipes/libcuopt/recipe.yaml index 176e296ddf..4af09ce4d6 100644 --- a/conda/recipes/libcuopt/recipe.yaml +++ b/conda/recipes/libcuopt/recipe.yaml @@ -57,6 +57,7 @@ cache: - cuda-version =${{ cuda_version }} - cmake ${{ cmake_version }} - ninja + - tbb-devel - zlib - bzip2 host: @@ -72,6 +73,7 @@ cache: - libcusparse-dev - cuda-cudart-dev - boost + - tbb-devel - zlib - bzip2 diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 76e5b16a03..d2e31ad1f4 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -177,8 +177,8 @@ FetchContent_Declare( SYSTEM ) +find_package(TBB REQUIRED) set(BUILD_TESTING OFF CACHE BOOL "Disable test build for papilo") -set(TBB OFF CACHE BOOL "Disable TBB") set(PAPILO_NO_BINARIES ON) option(LUSOL "Disable LUSOL" OFF) @@ -254,6 +254,7 @@ target_include_directories(cuopt set(CUOPT_PRIVATE_CUDA_LIBS CUDA::curand CUDA::cusolver + TBB::tbb OpenMP::OpenMP_CXX) list(PREPEND CUOPT_PRIVATE_CUDA_LIBS CUDA::cublasLt) diff --git a/cpp/cmake/thirdparty/FindTBB.cmake b/cpp/cmake/thirdparty/FindTBB.cmake new file mode 100644 index 0000000000..f71f816208 --- /dev/null +++ b/cpp/cmake/thirdparty/FindTBB.cmake @@ -0,0 +1,90 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# FindTBB.cmake - Find TBB (Threading Building Blocks) library +# +# This module defines the following variables: +# TBB_FOUND - True if TBB is found +# TBB_INCLUDE_DIRS - TBB include directories +# TBB_LIBRARIES - TBB libraries +# TBB::tbb - Imported target for TBB + +# Try pkg-config first +find_package(PkgConfig QUIET) +if(PkgConfig_FOUND) + pkg_check_modules(PC_TBB QUIET tbb) +endif() + +find_path(TBB_INCLUDE_DIR + NAMES tbb/tbb.h + PATHS + ${PC_TBB_INCLUDE_DIRS} + /usr/include + /usr/local/include + /opt/intel/tbb/include + /opt/intel/oneapi/tbb/latest/include +) + +find_library(TBB_LIBRARY + NAMES tbb + PATHS + ${PC_TBB_LIBRARY_DIRS} + /usr/lib + /usr/lib64 + /usr/local/lib + /usr/local/lib64 + /opt/intel/tbb/lib + /opt/intel/oneapi/tbb/latest/lib +) + +find_library(TBB_MALLOC_LIBRARY + NAMES tbbmalloc + PATHS + ${PC_TBB_LIBRARY_DIRS} + /usr/lib + /usr/lib64 + /usr/local/lib + /usr/local/lib64 + /opt/intel/tbb/lib + /opt/intel/oneapi/tbb/latest/lib +) + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(TBB + REQUIRED_VARS TBB_INCLUDE_DIR TBB_LIBRARY +) + +if(TBB_FOUND AND NOT TARGET TBB::tbb) + add_library(TBB::tbb UNKNOWN IMPORTED) + set_target_properties(TBB::tbb PROPERTIES + IMPORTED_LOCATION "${TBB_LIBRARY}" + INTERFACE_INCLUDE_DIRECTORIES "${TBB_INCLUDE_DIR}" + ) + + if(TBB_MALLOC_LIBRARY) + set_target_properties(TBB::tbb PROPERTIES + INTERFACE_LINK_LIBRARIES "${TBB_MALLOC_LIBRARY}" + ) + endif() + + # Add compile definitions from pkg-config if available + if(PC_TBB_CFLAGS_OTHER) + set_target_properties(TBB::tbb PROPERTIES + INTERFACE_COMPILE_OPTIONS "${PC_TBB_CFLAGS_OTHER}" + ) + endif() +endif() + +mark_as_advanced(TBB_INCLUDE_DIR TBB_LIBRARY TBB_MALLOC_LIBRARY) diff --git a/cpp/src/mip/presolve/third_party_presolve.cpp b/cpp/src/mip/presolve/third_party_presolve.cpp index bf863f5d65..aa32bd8716 100644 --- a/cpp/src/mip/presolve/third_party_presolve.cpp +++ b/cpp/src/mip/presolve/third_party_presolve.cpp @@ -19,9 +19,13 @@ #include #include #include +#include +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wstringop-overflow" // ignore boost error for pip wheel build #include #include +#pragma GCC diagnostic pop namespace cuopt::linear_programming::detail { @@ -123,18 +127,32 @@ papilo::Problem build_papilo_problem(const optimization_problem_t builder.setRowRhsAll(h_constr_ub); } + std::vector h_row_flags(h_constr_lb.size()); + std::vector> h_entries; // Add constraints row by row for (size_t i = 0; i < h_constr_lb.size(); ++i) { // Get row entries i_t row_start = h_offsets[i]; i_t row_end = h_offsets[i + 1]; i_t num_entries = row_end - row_start; - builder.addRowEntries( - i, num_entries, h_variables.data() + row_start, h_coefficients.data() + row_start); - builder.setRowLhsInf(i, h_constr_lb[i] == -std::numeric_limits::infinity()); - builder.setRowRhsInf(i, h_constr_ub[i] == std::numeric_limits::infinity()); - if (h_constr_lb[i] == -std::numeric_limits::infinity()) { builder.setRowLhs(i, 0); } - if (h_constr_ub[i] == std::numeric_limits::infinity()) { builder.setRowRhs(i, 0); } + for (size_t j = 0; j < num_entries; ++j) { + h_entries.push_back( + std::make_tuple(i, h_variables[row_start + j], h_coefficients[row_start + j])); + } + + if (h_constr_lb[i] == -std::numeric_limits::infinity()) { + h_row_flags[i].set(papilo::RowFlag::kLhsInf); + } else { + h_row_flags[i].unset(papilo::RowFlag::kLhsInf); + } + if (h_constr_ub[i] == std::numeric_limits::infinity()) { + h_row_flags[i].set(papilo::RowFlag::kRhsInf); + } else { + h_row_flags[i].unset(papilo::RowFlag::kRhsInf); + } + + if (h_constr_lb[i] == -std::numeric_limits::infinity()) { h_constr_lb[i] = 0; } + if (h_constr_ub[i] == std::numeric_limits::infinity()) { h_constr_ub[i] = 0; } } for (size_t i = 0; i < h_var_lb.size(); ++i) { @@ -143,7 +161,24 @@ papilo::Problem build_papilo_problem(const optimization_problem_t if (h_var_lb[i] == -std::numeric_limits::infinity()) { builder.setColLb(i, 0); } if (h_var_ub[i] == std::numeric_limits::infinity()) { builder.setColUb(i, 0); } } - return builder.build(); + + auto problem = builder.build(); + + if (h_entries.size()) { + auto constexpr const sorted_entries = true; + auto csr_storage = papilo::SparseStorage(h_entries, num_rows, num_cols, sorted_entries); + problem.setConstraintMatrix(csr_storage, h_constr_lb, h_constr_ub, h_row_flags); + + papilo::ConstraintMatrix& matrix = problem.getConstraintMatrix(); + for (int i = 0; i < problem.getNRows(); ++i) { + papilo::RowFlags rowFlag = matrix.getRowFlags()[i]; + if (!rowFlag.test(papilo::RowFlag::kRhsInf) && !rowFlag.test(papilo::RowFlag::kLhsInf) && + matrix.getLeftHandSides()[i] == matrix.getRightHandSides()[i]) + matrix.getRowFlags()[i].set(papilo::RowFlag::kEquation); + } + } + + return problem; } template @@ -298,14 +333,16 @@ void set_presolve_methods(papilo::Presolve& presolver, problem_category_t c presolver.addPresolveMethod(uptr(new papilo::Substitution())); } -template +template void set_presolve_options(papilo::Presolve& presolver, problem_category_t category, f_t absolute_tolerance, f_t relative_tolerance, - double time_limit) + double time_limit, + i_t num_cpu_threads) { - presolver.getPresolveOptions().tlim = time_limit; + presolver.getPresolveOptions().tlim = time_limit; + presolver.getPresolveOptions().threads = num_cpu_threads; // user setting or 0 (automatic) } template @@ -314,7 +351,8 @@ std::pair, bool> third_party_presolve_t papilo_problem = build_papilo_problem(op_problem); @@ -325,8 +363,8 @@ std::pair, bool> third_party_presolve_t presolver; set_presolve_methods(presolver, category); - set_presolve_options( - presolver, category, absolute_tolerance, relative_tolerance, time_limit); + set_presolve_options( + presolver, category, absolute_tolerance, relative_tolerance, time_limit, num_cpu_threads); // Disable papilo logs presolver.setVerbosityLevel(papilo::VerbosityLevel::kQuiet); diff --git a/cpp/src/mip/presolve/third_party_presolve.hpp b/cpp/src/mip/presolve/third_party_presolve.hpp index d7096a1a2f..5631fc12e2 100644 --- a/cpp/src/mip/presolve/third_party_presolve.hpp +++ b/cpp/src/mip/presolve/third_party_presolve.hpp @@ -31,7 +31,8 @@ class third_party_presolve_t { problem_category_t category, f_t absolute_tolerance, f_t relative_tolerance, - double time_limit); + double time_limit, + i_t num_cpu_threads = 0); void undo(rmm::device_uvector& primal_solution, rmm::device_uvector& dual_solution, diff --git a/cpp/src/mip/solve.cu b/cpp/src/mip/solve.cu index a3814f2654..dc2f77d906 100644 --- a/cpp/src/mip/solve.cu +++ b/cpp/src/mip/solve.cu @@ -207,7 +207,8 @@ mip_solution_t solve_mip(optimization_problem_t& op_problem, cuopt::linear_programming::problem_category_t::MIP, settings.tolerances.absolute_tolerance, settings.tolerances.relative_tolerance, - presolve_time_limit); + presolve_time_limit, + settings.num_cpu_threads); if (!feasible) { return mip_solution_t(mip_termination_status_t::Infeasible, solver_stats_t{}, diff --git a/dependencies.yaml b/dependencies.yaml index 1ea0f59528..d08a5ef960 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -306,6 +306,7 @@ dependencies: - cpp-argparse - librmm==25.10.* - libraft-headers==25.10.* + - tbb-devel - zlib - bzip2 test_cpp: diff --git a/python/libcuopt/CMakeLists.txt b/python/libcuopt/CMakeLists.txt index 85b7c98513..f4124477de 100644 --- a/python/libcuopt/CMakeLists.txt +++ b/python/libcuopt/CMakeLists.txt @@ -43,7 +43,6 @@ FetchContent_Declare( ) FetchContent_MakeAvailable(argparse) -set(TBB OFF CACHE BOOL "Disable TBB") set(BUILD_TESTING OFF CACHE BOOL "Disable test build for papilo") set(PAPILO_NO_BINARIES ON) option(LUSOL "Disable LUSOL" OFF) From bbdf0e6f035210f28e01e5dbd854e2513a9357d2 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Fri, 5 Sep 2025 13:55:42 -0500 Subject: [PATCH 16/33] Build and test with CUDA 13.0.0 (#366) Contributes to https://github.com/rapidsai/build-planning/issues/208 * uses CUDA 13.0.0 to build and test * moves some dependency pins: - `cuda-python`: `>=12.9.2` (CUDA 12), `>=13.0.1` (CUDA 13) - `cupy`: `>=13.6.0` * declares `cuda-python` runtime dependency for wheels ([it was previously only declared for conda packages](https://github.com/NVIDIA/cuopt/blob/c62320447414c47f25ea67bb2570e05c7d0d29ac/conda/recipes/cuopt/recipe.yaml#L70)) Contributes to https://github.com/rapidsai/build-planning/issues/68 * updates to CUDA 13 dependencies in fallback entries in `dependencies.yaml` matrices (i.e., the ones that get written to `pyproject.toml` in source control) ## Notes for Reviewers This switches GitHub Actions workflows to the `cuda13.0` branch from here: https://github.com/rapidsai/shared-workflows/pull/413 A future round of PRs will revert that back to `branch-25.10`, once all of RAPIDS supports CUDA 13. ## Issue Closes #294 Authors: - James Lamb (https://github.com/jameslamb) - Ramakrishnap (https://github.com/rgsl888prabhu) Approvers: - Ramakrishnap (https://github.com/rgsl888prabhu) - Hugo Linsenmaier (https://github.com/hlinsen) URL: https://github.com/NVIDIA/cuopt/pull/366 --- .github/workflows/build.yaml | 33 ++++--- .github/workflows/pr.yaml | 35 ++++---- .github/workflows/test.yaml | 8 +- .../trigger-breaking-change-alert.yaml | 2 +- CONTRIBUTING.md | 6 +- ci/test_wheel_cuopt.sh | 2 +- .../all_cuda-129_arch-aarch64.yaml | 7 +- .../all_cuda-129_arch-x86_64.yaml | 7 +- .../all_cuda-130_arch-aarch64.yaml | 89 ++++++++++++++++++ .../all_cuda-130_arch-x86_64.yaml | 89 ++++++++++++++++++ conda/recipes/cuopt/recipe.yaml | 14 +-- cpp/CMakeLists.txt | 2 +- .../load_balanced_bounds_presolve_helpers.cuh | 84 ++++++++++++++--- dependencies.yaml | 90 +++++++++++++++++-- docs/cuopt/source/cuopt-c/quick-start.rst | 30 +++++-- .../cuopt/source/cuopt-python/quick-start.rst | 28 ++++-- .../cuopt/source/cuopt-server/quick-start.rst | 28 ++++-- .../cuopt/linear_programming/pyproject.toml | 1 + python/cuopt/pyproject.toml | 8 +- python/cuopt_server/pyproject.toml | 1 + python/libcuopt/pyproject.toml | 2 +- 21 files changed, 470 insertions(+), 96 deletions(-) create mode 100644 conda/environments/all_cuda-130_arch-aarch64.yaml create mode 100644 conda/environments/all_cuda-130_arch-x86_64.yaml diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 263b8e5418..7fdf5ee5d8 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -43,7 +43,7 @@ concurrency: jobs: cpp-build: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-25.10 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@cuda13.0 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -53,7 +53,7 @@ jobs: python-build: needs: [cpp-build] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-25.10 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@cuda13.0 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -63,7 +63,7 @@ jobs: upload-conda: needs: [cpp-build, python-build] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@branch-25.10 + uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@cuda13.0 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -71,7 +71,7 @@ jobs: sha: ${{ inputs.sha }} wheel-build-cuopt-mps-parser: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@cuda13.0 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -81,10 +81,13 @@ jobs: package-name: cuopt_mps_parser package-type: python append-cuda-suffix: false + pure-wheel: true + # need 1 build per Python version and arch (but CUDA version doesn't matter so choose the latest) + matrix_filter: 'group_by([.ARCH, (.PY_VER |split(".") | map(tonumber))])|map(max_by([(.CUDA_VER|split(".")|map(tonumber))]))' wheel-publish-cuopt-mps-parser: needs: wheel-build-cuopt-mps-parser secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-25.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@cuda13.0 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -95,7 +98,7 @@ jobs: wheel-build-libcuopt: needs: wheel-build-cuopt-mps-parser secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@cuda13.0 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -108,7 +111,7 @@ jobs: wheel-publish-libcuopt: needs: wheel-build-libcuopt secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-25.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@cuda13.0 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -119,7 +122,7 @@ jobs: wheel-build-cuopt: needs: [wheel-build-cuopt-mps-parser, wheel-build-libcuopt] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@cuda13.0 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -131,7 +134,7 @@ jobs: wheel-publish-cuopt: needs: wheel-build-cuopt secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-25.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@cuda13.0 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -142,7 +145,7 @@ jobs: wheel-build-cuopt-server: needs: wheel-build-cuopt secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@cuda13.0 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -154,7 +157,7 @@ jobs: wheel-publish-cuopt-server: needs: wheel-build-cuopt-server secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-25.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@cuda13.0 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -165,7 +168,7 @@ jobs: docs-build: needs: [python-build] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.10 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@cuda13.0 with: build_type: ${{ inputs.build_type || 'branch' }} node_type: "gpu-l4-latest-1" @@ -180,7 +183,7 @@ jobs: wheel-build-cuopt-sh-client: needs: wheel-build-cuopt secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@cuda13.0 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -190,10 +193,12 @@ jobs: package-name: cuopt_sh_client package-type: python append-cuda-suffix: false + # need 1 build per Python version and arch (but CUDA version doesn't matter) + matrix_filter: '[max_by((.CUDA_VER | split(".") | map(tonumber))]' wheel-publish-cuopt-sh-client: needs: wheel-build-cuopt-sh-client secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-25.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@cuda13.0 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index b23bf5cfc4..2b34641888 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -44,10 +44,10 @@ jobs: - wheel-build-cuopt-sh-client - test-self-hosted-server secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-25.10 + uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@cuda13.0 changed-files: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/changed-files.yaml@branch-25.10 + uses: rapidsai/shared-workflows/.github/workflows/changed-files.yaml@cuda13.0 with: files_yaml: | test_cpp: @@ -112,21 +112,21 @@ jobs: - '!python/nvcf_client/**' checks: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@branch-25.10 + uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@cuda13.0 with: enable_check_generated_files: false conda-cpp-build: needs: checks secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-25.10 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@cuda13.0 with: build_type: pull-request script: ci/build_cpp.sh conda-cpp-tests: needs: [conda-cpp-build, changed-files] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-25.10 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@cuda13.0 #if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_cpp with: build_type: pull-request @@ -134,14 +134,14 @@ jobs: conda-python-build: needs: conda-cpp-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-25.10 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@cuda13.0 with: build_type: pull-request script: ci/build_python.sh conda-python-tests: needs: [conda-python-build, changed-files] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-25.10 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@cuda13.0 #if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python with: run_codecov: false @@ -150,7 +150,7 @@ jobs: docs-build: needs: conda-python-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.10 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@cuda13.0 with: build_type: pull-request node_type: "gpu-l4-latest-1" @@ -161,17 +161,19 @@ jobs: script: "ci/build_docs.sh" wheel-build-cuopt-mps-parser: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@cuda13.0 with: build_type: pull-request script: ci/build_wheel_cuopt_mps_parser.sh package-name: cuopt_mps_parser package-type: python append-cuda-suffix: false + # need 1 build per Python version and arch (but CUDA version doesn't matter so choose the latest) + matrix_filter: 'group_by([.ARCH, (.PY_VER |split(".") | map(tonumber))])|map(max_by([(.CUDA_VER|split(".")|map(tonumber))]))' wheel-build-libcuopt: needs: wheel-build-cuopt-mps-parser secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@cuda13.0 with: # build for every combination of arch and CUDA version, but only for the latest Python matrix_filter: group_by([.ARCH, (.CUDA_VER|split(".")|map(tonumber)|.[0])]) | map(max_by(.PY_VER|split(".")|map(tonumber))) @@ -182,7 +184,7 @@ jobs: wheel-build-cuopt: needs: [wheel-build-cuopt-mps-parser, wheel-build-libcuopt] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@cuda13.0 with: build_type: pull-request script: ci/build_wheel_cuopt.sh @@ -191,7 +193,7 @@ jobs: wheel-tests-cuopt: needs: [wheel-build-cuopt, wheel-build-cuopt-mps-parser, wheel-build-cuopt-sh-client, changed-files] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@cuda13.0 #if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python_cuopt with: build_type: pull-request @@ -199,7 +201,7 @@ jobs: wheel-build-cuopt-server: needs: wheel-build-cuopt secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@cuda13.0 with: build_type: pull-request script: ci/build_wheel_cuopt_server.sh @@ -207,17 +209,20 @@ jobs: package-type: python wheel-build-cuopt-sh-client: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@cuda13.0 with: build_type: pull-request script: ci/build_wheel_cuopt_sh_client.sh package-name: cuopt_sh_client package-type: python append-cuda-suffix: false + pure-wheel: true + # only need 1 build (noarch package): this selects amd64, oldest-supported Python, latest-supported CUDA + matrix_filter: '[map(select(.ARCH == "amd64")) | min_by((.PY_VER | split(".") | map(tonumber)), (.CUDA_VER | split(".") | map(-tonumber)))]' wheel-tests-cuopt-server: needs: [wheel-build-cuopt-server, changed-files] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@cuda13.0 #if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python_cuopt_server with: build_type: pull-request diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 80ba1f869f..6bd4caac04 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -34,7 +34,7 @@ on: jobs: conda-cpp-tests: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-25.10 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@cuda13.0 with: build_type: ${{ inputs.build_type }} branch: ${{ inputs.branch }} @@ -43,7 +43,7 @@ jobs: script: ci/test_cpp.sh conda-python-tests: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-25.10 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@cuda13.0 with: run_codecov: false build_type: ${{ inputs.build_type }} @@ -53,7 +53,7 @@ jobs: script: ci/test_python.sh wheel-tests-cuopt: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@cuda13.0 with: build_type: ${{ inputs.build_type }} branch: ${{ inputs.branch }} @@ -62,7 +62,7 @@ jobs: script: ci/test_wheel_cuopt.sh wheel-tests-cuopt-server: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@cuda13.0 with: build_type: ${{ inputs.build_type }} branch: ${{ inputs.branch }} diff --git a/.github/workflows/trigger-breaking-change-alert.yaml b/.github/workflows/trigger-breaking-change-alert.yaml index 1317f4a8f9..800f780e7b 100644 --- a/.github/workflows/trigger-breaking-change-alert.yaml +++ b/.github/workflows/trigger-breaking-change-alert.yaml @@ -27,7 +27,7 @@ jobs: trigger-notifier: if: contains(github.event.pull_request.labels.*.name, 'breaking') secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/breaking-change-alert.yaml@branch-25.10 + uses: rapidsai/shared-workflows/.github/workflows/breaking-change-alert.yaml@cuda13.0 with: sender_login: ${{ github.event.sender.login }} sender_avatar: ${{ github.event.sender.avatar_url }} diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 133c86e28b..9ff4944b7d 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -70,12 +70,12 @@ library features. The following instructions are for building with a conda envir CUDA/GPU Runtime: -* CUDA 12.9 +* CUDA 12.0 or higher * Volta architecture or better ([Compute Capability](https://docs.nvidia.com/deploy/cuda-compatibility/) >=7.0) Python: -* Python >=3.10.x, <= 3.12.x +* Python >=3.10.x, <= 3.13.x OS: @@ -110,7 +110,7 @@ Please install conda if you don't have it already. You can install [miniforge](h # create the conda environment (assuming in base `cuopt` directory) # note: cuOpt currently doesn't support `channel_priority: strict`; # use `channel_priority: flexible` instead -conda env create --name cuopt_dev --file conda/environments/all_cuda-128_arch-x86_64.yaml +conda env create --name cuopt_dev --file conda/environments/all_cuda-130_arch-x86_64.yaml # activate the environment conda activate cuopt_dev ``` diff --git a/ci/test_wheel_cuopt.sh b/ci/test_wheel_cuopt.sh index 7a16db43f1..77da9c1991 100755 --- a/ci/test_wheel_cuopt.sh +++ b/ci/test_wheel_cuopt.sh @@ -24,7 +24,7 @@ source rapids-init-pip # Download the packages built in the previous step RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen "${RAPIDS_CUDA_VERSION}")" CUOPT_MPS_PARSER_WHEELHOUSE=$(RAPIDS_PY_WHEEL_NAME="cuopt_mps_parser" rapids-download-wheels-from-github python) -CUOPT_SH_CLIENT_WHEELHOUSE=$(RAPIDS_PY_WHEEL_NAME="cuopt_sh_client" rapids-download-wheels-from-github python) +CUOPT_SH_CLIENT_WHEELHOUSE=$(RAPIDS_PY_WHEEL_NAME="cuopt_sh_client" RAPIDS_PY_WHEEL_PURE="1" rapids-download-wheels-from-github python) CUOPT_WHEELHOUSE=$(RAPIDS_PY_WHEEL_NAME="cuopt_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-github python) LIBCUOPT_WHEELHOUSE=$(RAPIDS_PY_WHEEL_NAME="libcuopt_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-github cpp) diff --git a/conda/environments/all_cuda-129_arch-aarch64.yaml b/conda/environments/all_cuda-129_arch-aarch64.yaml index adf1d9d9f1..5353890305 100644 --- a/conda/environments/all_cuda-129_arch-aarch64.yaml +++ b/conda/environments/all_cuda-129_arch-aarch64.yaml @@ -16,10 +16,11 @@ dependencies: - cpp-argparse - cuda-nvcc - cuda-nvtx-dev +- cuda-python>=12.9.2,<13.0a0 - cuda-sanitizer-api - cuda-version=12.9 - cudf==25.10.*,>=0.0.0a0 -- cupy>=12.0.0 +- cupy>=13.6.0 - cuvs==25.10.*,>=0.0.0a0 - cxx-compiler - cython>=3.0.3 @@ -48,8 +49,8 @@ dependencies: - myst-parser - ninja - notebook -- numba-cuda>=0.14.0 -- numba>=0.59.1 +- numba-cuda>=0.19.1,<0.20.0a0 +- numba>=0.60.0 - numpy>=1.23.5,<3.0a0 - numpydoc - pandas>=2.0 diff --git a/conda/environments/all_cuda-129_arch-x86_64.yaml b/conda/environments/all_cuda-129_arch-x86_64.yaml index 23a5ecef35..63fb69d765 100644 --- a/conda/environments/all_cuda-129_arch-x86_64.yaml +++ b/conda/environments/all_cuda-129_arch-x86_64.yaml @@ -16,10 +16,11 @@ dependencies: - cpp-argparse - cuda-nvcc - cuda-nvtx-dev +- cuda-python>=12.9.2,<13.0a0 - cuda-sanitizer-api - cuda-version=12.9 - cudf==25.10.*,>=0.0.0a0 -- cupy>=12.0.0 +- cupy>=13.6.0 - cuvs==25.10.*,>=0.0.0a0 - cxx-compiler - cython>=3.0.3 @@ -48,8 +49,8 @@ dependencies: - myst-parser - ninja - notebook -- numba-cuda>=0.14.0 -- numba>=0.59.1 +- numba-cuda>=0.19.1,<0.20.0a0 +- numba>=0.60.0 - numpy>=1.23.5,<3.0a0 - numpydoc - pandas>=2.0 diff --git a/conda/environments/all_cuda-130_arch-aarch64.yaml b/conda/environments/all_cuda-130_arch-aarch64.yaml new file mode 100644 index 0000000000..5648d8799a --- /dev/null +++ b/conda/environments/all_cuda-130_arch-aarch64.yaml @@ -0,0 +1,89 @@ +# This file is generated by `rapids-dependency-file-generator`. +# To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. +channels: +- rapidsai +- rapidsai-nightly +- conda-forge +dependencies: +- boost +- breathe +- bzip2 +- c-compiler +- ccache +- clang-tools=20.1.4 +- clang==20.1.4 +- cmake>=3.30.4 +- cpp-argparse +- cuda-nvcc +- cuda-nvtx-dev +- cuda-python>=13.0.1,<14.0a0 +- cuda-sanitizer-api +- cuda-version=13.0 +- cudf==25.10.*,>=0.0.0a0 +- cupy>=13.6.0 +- cuvs==25.10.*,>=0.0.0a0 +- cxx-compiler +- cython>=3.0.3 +- doxygen=1.9.1 +- exhale +- fastapi +- folium +- gcc_linux-aarch64=14.* +- geopandas +- gmock +- gtest +- httpx +- ipython +- jsonref==1.1.0 +- libcurand-dev +- libcusolver-dev +- libcusparse-dev +- libgdal<3.9.0 +- libraft-headers==25.10.* +- librmm==25.10.* +- make +- matplotlib +- msgpack-numpy==0.4.8 +- msgpack-python==1.1.0 +- myst-nb +- myst-parser +- ninja +- notebook +- numba-cuda>=0.19.1,<0.20.0a0 +- numba>=0.60.0 +- numpy>=1.23.5,<3.0a0 +- numpydoc +- pandas>=2.0 +- pexpect +- pip +- polyline +- pre-commit +- psutil>=5.9,<6.0a0 +- pylibraft==25.10.*,>=0.0.0a0 +- pyrsistent +- pytest-cov +- pytest<8 +- python>=3.10,<3.14 +- raft-dask==25.10.*,>=0.0.0a0 +- rapids-build-backend>=0.4.0,<0.5.0.dev0 +- rapids-dask-dependency==25.10.*,>=0.0.0a0 +- rapids-logger==0.1.*,>=0.0.0a0 +- requests +- rmm==25.10.*,>=0.0.0a0 +- scikit-build-core>=0.10.0 +- scipy +- sphinx +- sphinx-copybutton +- sphinx-design +- sphinx-markdown-tables +- sphinx_rtd_theme +- sphinxcontrib-openapi +- sphinxcontrib-websupport +- sysroot_linux-aarch64==2.28 +- uvicorn==0.34.* +- zlib +- pip: + - nvidia_sphinx_theme + - swagger-plugin-for-sphinx + - veroviz +name: all_cuda-130_arch-aarch64 diff --git a/conda/environments/all_cuda-130_arch-x86_64.yaml b/conda/environments/all_cuda-130_arch-x86_64.yaml new file mode 100644 index 0000000000..36e522b235 --- /dev/null +++ b/conda/environments/all_cuda-130_arch-x86_64.yaml @@ -0,0 +1,89 @@ +# This file is generated by `rapids-dependency-file-generator`. +# To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. +channels: +- rapidsai +- rapidsai-nightly +- conda-forge +dependencies: +- boost +- breathe +- bzip2 +- c-compiler +- ccache +- clang-tools=20.1.4 +- clang==20.1.4 +- cmake>=3.30.4 +- cpp-argparse +- cuda-nvcc +- cuda-nvtx-dev +- cuda-python>=13.0.1,<14.0a0 +- cuda-sanitizer-api +- cuda-version=13.0 +- cudf==25.10.*,>=0.0.0a0 +- cupy>=13.6.0 +- cuvs==25.10.*,>=0.0.0a0 +- cxx-compiler +- cython>=3.0.3 +- doxygen=1.9.1 +- exhale +- fastapi +- folium +- gcc_linux-64=14.* +- geopandas +- gmock +- gtest +- httpx +- ipython +- jsonref==1.1.0 +- libcurand-dev +- libcusolver-dev +- libcusparse-dev +- libgdal<3.9.0 +- libraft-headers==25.10.* +- librmm==25.10.* +- make +- matplotlib +- msgpack-numpy==0.4.8 +- msgpack-python==1.1.0 +- myst-nb +- myst-parser +- ninja +- notebook +- numba-cuda>=0.19.1,<0.20.0a0 +- numba>=0.60.0 +- numpy>=1.23.5,<3.0a0 +- numpydoc +- pandas>=2.0 +- pexpect +- pip +- polyline +- pre-commit +- psutil>=5.9,<6.0a0 +- pylibraft==25.10.*,>=0.0.0a0 +- pyrsistent +- pytest-cov +- pytest<8 +- python>=3.10,<3.14 +- raft-dask==25.10.*,>=0.0.0a0 +- rapids-build-backend>=0.4.0,<0.5.0.dev0 +- rapids-dask-dependency==25.10.*,>=0.0.0a0 +- rapids-logger==0.1.*,>=0.0.0a0 +- requests +- rmm==25.10.*,>=0.0.0a0 +- scikit-build-core>=0.10.0 +- scipy +- sphinx +- sphinx-copybutton +- sphinx-design +- sphinx-markdown-tables +- sphinx_rtd_theme +- sphinxcontrib-openapi +- sphinxcontrib-websupport +- sysroot_linux-64==2.28 +- uvicorn==0.34.* +- zlib +- pip: + - nvidia_sphinx_theme + - swagger-plugin-for-sphinx + - veroviz +name: all_cuda-130_arch-x86_64 diff --git a/conda/recipes/cuopt/recipe.yaml b/conda/recipes/cuopt/recipe.yaml index f3b8cca9ab..84fec08793 100644 --- a/conda/recipes/cuopt/recipe.yaml +++ b/conda/recipes/cuopt/recipe.yaml @@ -67,17 +67,19 @@ requirements: - rmm =${{ dep_minor_version }} - scikit-build-core >=0.10.0 - cuda-cudart-dev - - cuda-python >=12.6.2,<13.0a0 + - if: cuda_major == "12" + then: cuda-python >=12.9.2,<13.0a0 + else: cuda-python >=13.0.1,<14.0a0 run: - ${{ pin_compatible("cuda-version", upper_bound="x", lower_bound="x") }} - cudf =${{ dep_minor_version }} - cuopt-mps-parser =${{ version }} - - cupy >=12.0.0 + - cupy >=13.6.0 - cuvs =${{ dep_minor_version }} - h5py - libcuopt =${{ version }} - - numba >=0.59.1 - - numba-cuda >=0.11.0 + - numba >=0.60.0 + - numba-cuda>=0.19.1,<0.20.0a0 - numpy >=1.23,<3.0a0 - pandas >=2.0 - pylibraft =${{ dep_minor_version }} @@ -89,7 +91,9 @@ requirements: - cuda-nvcc-impl # TODO: Add nvjitlink here # xref: https://github.com/rapidsai/cudf/issues/12822 - - cuda-python >=12.6.2,<13.0a0 + - if: cuda_major == "12" + then: cuda-python >=12.9.2,<13.0a0 + else: cuda-python >=13.0.1,<14.0a0 ignore_run_exports: by_name: - cuda-cudart diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index d2e31ad1f4..a7897e7df8 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -113,7 +113,7 @@ message("-- Host target architecture = '${CMAKE_SYSTEM_PROCESSOR}'") # make the flags global in order to propagate flags to test cmake files set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr --expt-extended-lambda") -if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 12.9 AND CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 13.0) +if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 12.9) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -static-global-template-stub=false") endif() list(APPEND CUOPT_CUDA_FLAGS -Werror=cross-execution-space-call -Wno-deprecated-declarations -Xcompiler=-Werror) diff --git a/cpp/src/mip/presolve/load_balanced_bounds_presolve_helpers.cuh b/cpp/src/mip/presolve/load_balanced_bounds_presolve_helpers.cuh index 7eb2b41a9d..f3e5e2ee0e 100644 --- a/cpp/src/mip/presolve/load_balanced_bounds_presolve_helpers.cuh +++ b/cpp/src/mip/presolve/load_balanced_bounds_presolve_helpers.cuh @@ -29,8 +29,12 @@ #include #include +#include + namespace cuopt::linear_programming::detail { +#define CUDA_VER_13_0_UP (CUDART_VERSION >= 13000) + template i_t get_id_offset(const std::vector& bin_offsets, i_t degree_cutoff) { @@ -416,7 +420,13 @@ void create_activity_sub_warp(cudaGraph_t act_graph, } cudaGraphAddKernelNode(&act_sub_warp_node, act_graph, NULL, 0, &kernelNodeParams); - cudaGraphAddDependencies(act_graph, &act_sub_warp_node, &set_bounds_changed_node, 1); + cudaGraphAddDependencies(act_graph, + &act_sub_warp_node, // "from" nodes + &set_bounds_changed_node, // "to" nodes +#if CUDA_VER_13_0_UP + nullptr, // edge data +#endif + 1); // number of dependencies } } @@ -471,7 +481,13 @@ void create_activity_sub_warp(cudaGraph_t act_graph, } cudaGraphAddKernelNode(&act_sub_warp_node, act_graph, NULL, 0, &kernelNodeParams); - cudaGraphAddDependencies(act_graph, &act_sub_warp_node, &set_bounds_changed_node, 1); + cudaGraphAddDependencies(act_graph, + &act_sub_warp_node, // "from" nodes + &set_bounds_changed_node, // "to" nodes +#if CUDA_VER_13_0_UP + nullptr, // edge data +#endif + 1); // number of dependencies } } @@ -555,7 +571,13 @@ void create_activity_per_block(cudaGraph_t act_graph, } cudaGraphAddKernelNode(&act_block_node, act_graph, NULL, 0, &kernelNodeParams); - cudaGraphAddDependencies(act_graph, &act_block_node, &set_bounds_changed_node, 1); + cudaGraphAddDependencies(act_graph, + &act_block_node, // "from" nodes + &set_bounds_changed_node, // "to" nodes +#if CUDA_VER_13_0_UP + nullptr, // edge data +#endif + 1); // number of dependencies } } @@ -660,8 +682,20 @@ void create_activity_heavy_cnst(cudaGraph_t act_graph, cudaGraphAddKernelNode(&finalize_heavy_node, act_graph, NULL, 0, &kernelNodeParams); } - cudaGraphAddDependencies(act_graph, &act_heavy_node, &finalize_heavy_node, 1); - cudaGraphAddDependencies(act_graph, &finalize_heavy_node, &set_bounds_changed_node, 1); + cudaGraphAddDependencies(act_graph, + &act_heavy_node, // "from" nodes + &finalize_heavy_node, // "to" nodes +#if CUDA_VER_13_0_UP + nullptr, // edge data +#endif + 1); // number of dependencies + cudaGraphAddDependencies(act_graph, + &finalize_heavy_node, // "from" nodes + &set_bounds_changed_node, // "to" nodes +#if CUDA_VER_13_0_UP + nullptr, // edge data +#endif + 1); // number of dependencies } } @@ -873,7 +907,13 @@ void create_update_bounds_sub_warp(cudaGraph_t upd_graph, cudaGraphAddKernelNode(&upd_bnd_sub_warp_node, upd_graph, NULL, 0, &kernelNodeParams); RAFT_CUDA_TRY(cudaGetLastError()); - cudaGraphAddDependencies(upd_graph, &upd_bnd_sub_warp_node, &bounds_changed_node, 1); + cudaGraphAddDependencies(upd_graph, + &upd_bnd_sub_warp_node, // "from" nodes + &bounds_changed_node, // "to" nodes +#if CUDA_VER_13_0_UP + nullptr, // edge data +#endif + 1); // number of dependencies RAFT_CUDA_TRY(cudaGetLastError()); } } @@ -925,7 +965,13 @@ void create_update_bounds_sub_warp(cudaGraph_t upd_graph, cudaGraphAddKernelNode(&upd_bnd_sub_warp_node, upd_graph, NULL, 0, &kernelNodeParams); RAFT_CUDA_TRY(cudaGetLastError()); - cudaGraphAddDependencies(upd_graph, &upd_bnd_sub_warp_node, &bounds_changed_node, 1); + cudaGraphAddDependencies(upd_graph, + &upd_bnd_sub_warp_node, // "from" nodes + &bounds_changed_node, // "to" nodes +#if CUDA_VER_13_0_UP + nullptr, // edge data +#endif + 1); // number of dependencies RAFT_CUDA_TRY(cudaGetLastError()); } } @@ -1003,7 +1049,13 @@ void create_update_bounds_per_block(cudaGraph_t upd_graph, cudaGraphAddKernelNode(&upd_bnd_block_node, upd_graph, NULL, 0, &kernelNodeParams); RAFT_CUDA_TRY(cudaGetLastError()); - cudaGraphAddDependencies(upd_graph, &upd_bnd_block_node, &bounds_changed_node, 1); + cudaGraphAddDependencies(upd_graph, + &upd_bnd_block_node, // "from" nodes + &bounds_changed_node, // "to" nodes +#if CUDA_VER_13_0_UP + nullptr, // edge data +#endif + 1); // number of dependencies RAFT_CUDA_TRY(cudaGetLastError()); } } @@ -1097,9 +1149,21 @@ void create_update_bounds_heavy_vars(cudaGraph_t upd_graph, cudaGraphAddKernelNode(&finalize_heavy_node, upd_graph, NULL, 0, &kernelNodeParams); RAFT_CUDA_TRY(cudaGetLastError()); } - cudaGraphAddDependencies(upd_graph, &upd_bnd_heavy_node, &finalize_heavy_node, 1); + cudaGraphAddDependencies(upd_graph, + &upd_bnd_heavy_node, // "from" nodes + &finalize_heavy_node, // "to" nodes +#if CUDA_VER_13_0_UP + nullptr, // edge data +#endif + 1); // number of dependencies RAFT_CUDA_TRY(cudaGetLastError()); - cudaGraphAddDependencies(upd_graph, &finalize_heavy_node, &bounds_changed_node, 1); + cudaGraphAddDependencies(upd_graph, + &finalize_heavy_node, // "from" nodes + &bounds_changed_node, // "to" nodes +#if CUDA_VER_13_0_UP + nullptr, // edge data +#endif + 1); // number of dependencies RAFT_CUDA_TRY(cudaGetLastError()); } } diff --git a/dependencies.yaml b/dependencies.yaml index d08a5ef960..d1dbc07f2e 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -19,7 +19,7 @@ files: all: output: conda matrix: - cuda: ["12.9"] + cuda: ["12.9", "13.0"] arch: [x86_64, aarch64] includes: - build_common @@ -350,8 +350,8 @@ dependencies: common: - output_types: [conda, requirements, pyproject] packages: - - numba-cuda>=0.14.0 - - numba>=0.59.1 + - numba-cuda>=0.19.1,<0.20.0a0 + - numba>=0.60.0 - rapids-dask-dependency==25.10.*,>=0.0.0a0 - &pandas pandas>=2.0 - output_types: requirements @@ -360,16 +360,16 @@ dependencies: - --extra-index-url=https://pypi.nvidia.com - --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple specific: - - output_types: [requirements, pyproject] + - output_types: [conda, requirements, pyproject] matrices: - matrix: cuda: "12.*" - cuda_suffixed: "true" packages: - - cupy-cuda12x - - matrix: null + - cuda-python>=12.9.2,<13.0a0 + # fallback to CUDA 13 versions if 'cuda' is '13.*' or not provided + - matrix: packages: - - cupy-cuda12x + - cuda-python>=13.0.1,<14.0a0 test_python_cuopt_server: common: @@ -437,6 +437,11 @@ dependencies: cuda_suffixed: "true" packages: - libcuopt-cu12==25.10.*,>=0.0.0a0 + - matrix: + cuda: "13.*" + cuda_suffixed: "true" + packages: + - libcuopt-cu13==25.10.*,>=0.0.0a0 - {matrix: null, packages: [*libcuopt_unsuffixed]} depends_on_cuopt: common: @@ -456,6 +461,11 @@ dependencies: cuda_suffixed: "true" packages: - cuopt-cu12==25.10.*,>=0.0.0a0 + - matrix: + cuda: "13.*" + cuda_suffixed: "true" + packages: + - cuopt-cu13==25.10.*,>=0.0.0a0 - {matrix: null, packages: [*cuopt_unsuffixed]} depends_on_cuopt_server: common: @@ -475,6 +485,11 @@ dependencies: cuda_suffixed: "true" packages: - cuopt-server-cu12==25.10.*,>=0.0.0a0 + - matrix: + cuda: "13.*" + cuda_suffixed: "true" + packages: + - cuopt-server-cu13==25.10.*,>=0.0.0a0 - {matrix: null, packages: [*cuopt_server_unsuffixed]} depends_on_cuopt_sh_client: common: @@ -519,12 +534,31 @@ dependencies: cuda_suffixed: "true" packages: - librmm-cu12==25.10.*,>=0.0.0a0 + - matrix: + cuda: "13.*" + cuda_suffixed: "true" + packages: + - librmm-cu13==25.10.*,>=0.0.0a0 - {matrix: null, packages: [*librmm_unsuffixed]} depends_on_cupy: common: - output_types: conda packages: - - cupy>=12.0.0 + - cupy>=13.6.0 + # NOTE: This is intentionally not broken into groups by a 'cuda_suffixed' selector like + # other packages with -cu{nn}x suffixes in this file. + # All RAPIDS wheel builds (including in devcontainers) expect cupy to be suffixed. + specific: + - output_types: [requirements, pyproject] + matrices: + - matrix: + cuda: "12.*" + packages: + - cupy-cuda12x>=13.6.0 + # fallback to CUDA 13 versions if 'cuda' is '13.*' or not provided + - matrix: + packages: + - cupy-cuda13x>=13.6.0 depends_on_rapids_logger: common: - output_types: [conda, requirements, pyproject] @@ -553,6 +587,11 @@ dependencies: cuda_suffixed: "true" packages: - rmm-cu12==25.10.*,>=0.0.0a0 + - matrix: + cuda: "13.*" + cuda_suffixed: "true" + packages: + - rmm-cu13==25.10.*,>=0.0.0a0 - matrix: packages: - *rmm_unsuffixed @@ -574,6 +613,11 @@ dependencies: cuda_suffixed: "true" packages: - cudf-cu12==25.10.*,>=0.0.0a0 + - matrix: + cuda: "13.*" + cuda_suffixed: "true" + packages: + - cudf-cu13==25.10.*,>=0.0.0a0 - matrix: packages: - *cudf_unsuffixed @@ -595,6 +639,11 @@ dependencies: cuda_suffixed: "true" packages: - cuvs-cu12==25.10.*,>=0.0.0a0 + - matrix: + cuda: "13.*" + cuda_suffixed: "true" + packages: + - cuvs-cu13==25.10.*,>=0.0.0a0 - matrix: packages: - *cuvs_unsuffixed @@ -616,6 +665,11 @@ dependencies: cuda_suffixed: "true" packages: - raft-dask-cu12==25.10.*,>=0.0.0a0 + - matrix: + cuda: "13.*" + cuda_suffixed: "true" + packages: + - raft-dask-cu13==25.10.*,>=0.0.0a0 - matrix: packages: - *raft_dask_unsuffixed @@ -637,6 +691,11 @@ dependencies: cuda_suffixed: "true" packages: - pylibraft-cu12==25.10.*,>=0.0.0a0 + - matrix: + cuda: "13.*" + cuda_suffixed: "true" + packages: + - pylibraft-cu13==25.10.*,>=0.0.0a0 - matrix: packages: - *pylibraft_unsuffixed @@ -668,6 +727,10 @@ dependencies: cuda: "12.9" packages: - cuda-version=12.9 + - matrix: + cuda: "13.0" + packages: + - cuda-version=13.0 cuda: common: - output_types: [conda] @@ -690,6 +753,15 @@ dependencies: - nvidia-cusparse-cu12 - nvidia-cusolver-cu12 - nvidia-nvtx-cu12 + - matrix: + cuda: "13.*" + use_cuda_wheels: "true" + packages: + - nvidia-cublas-cu13 + - nvidia-curand-cu13 + - nvidia-cusparse-cu13 + - nvidia-cusolver-cu13 + - nvidia-nvtx-cu13 # if use_cuda_wheels=false is provided, do not add dependencies on any CUDA wheels # (e.g. for DLFW and pip devcontainers) - matrix: diff --git a/docs/cuopt/source/cuopt-c/quick-start.rst b/docs/cuopt/source/cuopt-c/quick-start.rst index 04d5454c01..b04cfee52a 100644 --- a/docs/cuopt/source/cuopt-c/quick-start.rst +++ b/docs/cuopt/source/cuopt-c/quick-start.rst @@ -11,18 +11,22 @@ Installation pip --- -For CUDA 12.x: - This wheel is a Python wrapper around the C++ library and eases installation and access to libcuopt. This also helps in the pip environment to load libraries dynamically while using the Python SDK. - .. code-block:: bash # This is a deprecated module and no longer used, but it shares the same name for the CLI, so we need to uninstall it first if it exists. pip uninstall cuopt-thin-client + + # CUDA 13 + pip install --extra-index-url=https://pypi.nvidia.com \ + 'nvidia-cuda-runtime-cu12==13.0.*' \ + 'libcuopt-cu12==25.10.*' + + # CUDA 12 pip install --extra-index-url=https://pypi.nvidia.com \ - nvidia-cuda-runtime-cu12==12.9.* \ - libcuopt-cu12==25.10.* + 'nvidia-cuda-runtime-cu12==12.9.*' \ + 'libcuopt-cu12==25.10.*' .. note:: @@ -30,9 +34,15 @@ This wheel is a Python wrapper around the C++ library and eases installation and .. code-block:: bash + # CUDA 13 + pip install --pre --extra-index-url=https://pypi.nvidia.com --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple/ \ + 'nvidia-cuda-runtime-cu13==13.0.*' \ + 'libcuopt-cu13==25.10.*' + + # CUDA 12 pip install --pre --extra-index-url=https://pypi.nvidia.com --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple/ \ - nvidia-cuda-runtime-cu12==12.9.* \ - libcuopt-cu12==25.10.* + 'nvidia-cuda-runtime-cu12==12.9.*' \ + 'libcuopt-cu12==25.10.*' Conda ----- @@ -43,8 +53,12 @@ NVIDIA cuOpt can be installed with Conda (via `miniforge =13.0.1,<14.0a0", "cudf==25.10.*,>=0.0.0a0", "cuopt-mps-parser==25.10.*,>=0.0.0a0", - "cupy-cuda12x", + "cupy-cuda13x>=13.6.0", "cuvs==25.10.*,>=0.0.0a0", "libcuopt==25.10.*,>=0.0.0a0", - "numba-cuda>=0.14.0", - "numba>=0.59.1", + "numba-cuda>=0.19.1,<0.20.0a0", + "numba>=0.60.0", "numpy>=1.23.5,<3.0a0", "pandas>=2.0", "pylibraft==25.10.*,>=0.0.0a0", @@ -151,6 +152,7 @@ matrix-entry = "cuda_suffixed=true;use_cuda_wheels=true" requires = [ "cmake>=3.30.4", "cuopt-mps-parser==25.10.*,>=0.0.0a0", + "cupy-cuda13x>=13.6.0", "cython>=3.0.3", "libcuopt==25.10.*,>=0.0.0a0", "ninja", diff --git a/python/cuopt_server/pyproject.toml b/python/cuopt_server/pyproject.toml index b640264e91..83e764f23d 100644 --- a/python/cuopt_server/pyproject.toml +++ b/python/cuopt_server/pyproject.toml @@ -33,6 +33,7 @@ license = { text = "Apache-2.0" } requires-python = ">=3.10" dependencies = [ "cuopt==25.10.*,>=0.0.0a0", + "cupy-cuda13x>=13.6.0", "fastapi", "httpx", "jsonref==1.1.0", diff --git a/python/libcuopt/pyproject.toml b/python/libcuopt/pyproject.toml index 908d9e28c1..7a1bfdf942 100644 --- a/python/libcuopt/pyproject.toml +++ b/python/libcuopt/pyproject.toml @@ -64,7 +64,7 @@ libcuopt = "libcuopt" select = [ "distro-too-large-compressed", ] -max_allowed_size_compressed = '775M' +max_allowed_size_compressed = '850M' [project.scripts] cuopt_cli = "libcuopt._cli_wrapper:main" From 738f43cef572a90d6acc67b063ffc5dd512e2f03 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Sat, 6 Sep 2025 09:12:50 -0500 Subject: [PATCH 17/33] CUDA 13 support: follow-ups (#377) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Contributes to https://github.com/rapidsai/build-planning/issues/208 * updates all GitHub Actions branch references back to `branch-25.10`, now that https://github.com/rapidsai/shared-workflows/pull/413 is merged * fixes docs mistakes (https://github.com/NVIDIA/cuopt/pull/366#discussion_r2325719184) * fixes nightly builds Nightly wheel builds of `cuopt-mps-parser` are failing like this: > Error: Failed to CreateArtifact: Received non-retryable error: Failed request: (409) Conflict: an artifact with this name already exists on the workflow run ([wheel-build-cuopt-mps-parser link](https://github.com/NVIDIA/cuopt/actions/runs/17501959410/job/49716771454)) Because I forgot to bring over all of the artifact-naming changes made in `pr.yaml` to the corresponding entries on `build.yaml`, sorry 😬 Authors: - James Lamb (https://github.com/jameslamb) - Ramakrishnap (https://github.com/rgsl888prabhu) Approvers: - Ramakrishnap (https://github.com/rgsl888prabhu) URL: https://github.com/NVIDIA/cuopt/pull/377 --- .github/workflows/build.yaml | 33 +++++++++---------- .github/workflows/pr.yaml | 30 ++++++++--------- .github/workflows/test.yaml | 8 ++--- .../trigger-breaking-change-alert.yaml | 2 +- .../all_cuda-130_arch-aarch64.yaml | 1 + .../all_cuda-130_arch-x86_64.yaml | 1 + docs/cuopt/source/cuopt-c/quick-start.rst | 4 +-- python/libcuopt/pyproject.toml | 2 +- 8 files changed, 41 insertions(+), 40 deletions(-) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 7fdf5ee5d8..7b5f430dfa 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -43,7 +43,7 @@ concurrency: jobs: cpp-build: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@cuda13.0 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-25.10 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -53,7 +53,7 @@ jobs: python-build: needs: [cpp-build] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@cuda13.0 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-25.10 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -63,7 +63,7 @@ jobs: upload-conda: needs: [cpp-build, python-build] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@cuda13.0 + uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@branch-25.10 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -71,7 +71,7 @@ jobs: sha: ${{ inputs.sha }} wheel-build-cuopt-mps-parser: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@cuda13.0 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.10 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -81,13 +81,12 @@ jobs: package-name: cuopt_mps_parser package-type: python append-cuda-suffix: false - pure-wheel: true # need 1 build per Python version and arch (but CUDA version doesn't matter so choose the latest) matrix_filter: 'group_by([.ARCH, (.PY_VER |split(".") | map(tonumber))])|map(max_by([(.CUDA_VER|split(".")|map(tonumber))]))' wheel-publish-cuopt-mps-parser: needs: wheel-build-cuopt-mps-parser secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@cuda13.0 + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-25.10 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -98,7 +97,7 @@ jobs: wheel-build-libcuopt: needs: wheel-build-cuopt-mps-parser secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@cuda13.0 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.10 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -111,7 +110,7 @@ jobs: wheel-publish-libcuopt: needs: wheel-build-libcuopt secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@cuda13.0 + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-25.10 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -122,7 +121,7 @@ jobs: wheel-build-cuopt: needs: [wheel-build-cuopt-mps-parser, wheel-build-libcuopt] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@cuda13.0 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.10 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -134,7 +133,7 @@ jobs: wheel-publish-cuopt: needs: wheel-build-cuopt secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@cuda13.0 + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-25.10 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -145,7 +144,7 @@ jobs: wheel-build-cuopt-server: needs: wheel-build-cuopt secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@cuda13.0 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.10 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -157,7 +156,7 @@ jobs: wheel-publish-cuopt-server: needs: wheel-build-cuopt-server secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@cuda13.0 + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-25.10 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -168,7 +167,7 @@ jobs: docs-build: needs: [python-build] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@cuda13.0 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.10 with: build_type: ${{ inputs.build_type || 'branch' }} node_type: "gpu-l4-latest-1" @@ -183,7 +182,7 @@ jobs: wheel-build-cuopt-sh-client: needs: wheel-build-cuopt secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@cuda13.0 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.10 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -193,12 +192,12 @@ jobs: package-name: cuopt_sh_client package-type: python append-cuda-suffix: false - # need 1 build per Python version and arch (but CUDA version doesn't matter) - matrix_filter: '[max_by((.CUDA_VER | split(".") | map(tonumber))]' + # only need 1 build (noarch package): this selects amd64, oldest-supported Python, latest-supported CUDA + matrix_filter: '[map(select(.ARCH == "amd64")) | min_by((.PY_VER | split(".") | map(tonumber)), (.CUDA_VER | split(".") | map(-tonumber)))]' wheel-publish-cuopt-sh-client: needs: wheel-build-cuopt-sh-client secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@cuda13.0 + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-25.10 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 2b34641888..e13e02b88a 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -44,10 +44,10 @@ jobs: - wheel-build-cuopt-sh-client - test-self-hosted-server secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@cuda13.0 + uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-25.10 changed-files: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/changed-files.yaml@cuda13.0 + uses: rapidsai/shared-workflows/.github/workflows/changed-files.yaml@branch-25.10 with: files_yaml: | test_cpp: @@ -112,21 +112,21 @@ jobs: - '!python/nvcf_client/**' checks: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@cuda13.0 + uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@branch-25.10 with: enable_check_generated_files: false conda-cpp-build: needs: checks secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@cuda13.0 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-25.10 with: build_type: pull-request script: ci/build_cpp.sh conda-cpp-tests: needs: [conda-cpp-build, changed-files] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@cuda13.0 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-25.10 #if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_cpp with: build_type: pull-request @@ -134,14 +134,14 @@ jobs: conda-python-build: needs: conda-cpp-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@cuda13.0 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-25.10 with: build_type: pull-request script: ci/build_python.sh conda-python-tests: needs: [conda-python-build, changed-files] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@cuda13.0 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-25.10 #if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python with: run_codecov: false @@ -150,7 +150,7 @@ jobs: docs-build: needs: conda-python-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@cuda13.0 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.10 with: build_type: pull-request node_type: "gpu-l4-latest-1" @@ -161,7 +161,7 @@ jobs: script: "ci/build_docs.sh" wheel-build-cuopt-mps-parser: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@cuda13.0 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.10 with: build_type: pull-request script: ci/build_wheel_cuopt_mps_parser.sh @@ -173,7 +173,7 @@ jobs: wheel-build-libcuopt: needs: wheel-build-cuopt-mps-parser secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@cuda13.0 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.10 with: # build for every combination of arch and CUDA version, but only for the latest Python matrix_filter: group_by([.ARCH, (.CUDA_VER|split(".")|map(tonumber)|.[0])]) | map(max_by(.PY_VER|split(".")|map(tonumber))) @@ -184,7 +184,7 @@ jobs: wheel-build-cuopt: needs: [wheel-build-cuopt-mps-parser, wheel-build-libcuopt] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@cuda13.0 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.10 with: build_type: pull-request script: ci/build_wheel_cuopt.sh @@ -193,7 +193,7 @@ jobs: wheel-tests-cuopt: needs: [wheel-build-cuopt, wheel-build-cuopt-mps-parser, wheel-build-cuopt-sh-client, changed-files] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@cuda13.0 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.10 #if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python_cuopt with: build_type: pull-request @@ -201,7 +201,7 @@ jobs: wheel-build-cuopt-server: needs: wheel-build-cuopt secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@cuda13.0 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.10 with: build_type: pull-request script: ci/build_wheel_cuopt_server.sh @@ -209,7 +209,7 @@ jobs: package-type: python wheel-build-cuopt-sh-client: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@cuda13.0 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.10 with: build_type: pull-request script: ci/build_wheel_cuopt_sh_client.sh @@ -222,7 +222,7 @@ jobs: wheel-tests-cuopt-server: needs: [wheel-build-cuopt-server, changed-files] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@cuda13.0 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.10 #if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python_cuopt_server with: build_type: pull-request diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 6bd4caac04..80ba1f869f 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -34,7 +34,7 @@ on: jobs: conda-cpp-tests: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@cuda13.0 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-25.10 with: build_type: ${{ inputs.build_type }} branch: ${{ inputs.branch }} @@ -43,7 +43,7 @@ jobs: script: ci/test_cpp.sh conda-python-tests: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@cuda13.0 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-25.10 with: run_codecov: false build_type: ${{ inputs.build_type }} @@ -53,7 +53,7 @@ jobs: script: ci/test_python.sh wheel-tests-cuopt: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@cuda13.0 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.10 with: build_type: ${{ inputs.build_type }} branch: ${{ inputs.branch }} @@ -62,7 +62,7 @@ jobs: script: ci/test_wheel_cuopt.sh wheel-tests-cuopt-server: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@cuda13.0 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.10 with: build_type: ${{ inputs.build_type }} branch: ${{ inputs.branch }} diff --git a/.github/workflows/trigger-breaking-change-alert.yaml b/.github/workflows/trigger-breaking-change-alert.yaml index 800f780e7b..1317f4a8f9 100644 --- a/.github/workflows/trigger-breaking-change-alert.yaml +++ b/.github/workflows/trigger-breaking-change-alert.yaml @@ -27,7 +27,7 @@ jobs: trigger-notifier: if: contains(github.event.pull_request.labels.*.name, 'breaking') secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/breaking-change-alert.yaml@cuda13.0 + uses: rapidsai/shared-workflows/.github/workflows/breaking-change-alert.yaml@branch-25.10 with: sender_login: ${{ github.event.sender.login }} sender_avatar: ${{ github.event.sender.avatar_url }} diff --git a/conda/environments/all_cuda-130_arch-aarch64.yaml b/conda/environments/all_cuda-130_arch-aarch64.yaml index 5648d8799a..c473a9713b 100644 --- a/conda/environments/all_cuda-130_arch-aarch64.yaml +++ b/conda/environments/all_cuda-130_arch-aarch64.yaml @@ -80,6 +80,7 @@ dependencies: - sphinxcontrib-openapi - sphinxcontrib-websupport - sysroot_linux-aarch64==2.28 +- tbb-devel - uvicorn==0.34.* - zlib - pip: diff --git a/conda/environments/all_cuda-130_arch-x86_64.yaml b/conda/environments/all_cuda-130_arch-x86_64.yaml index 36e522b235..0eddbfeeea 100644 --- a/conda/environments/all_cuda-130_arch-x86_64.yaml +++ b/conda/environments/all_cuda-130_arch-x86_64.yaml @@ -80,6 +80,7 @@ dependencies: - sphinxcontrib-openapi - sphinxcontrib-websupport - sysroot_linux-64==2.28 +- tbb-devel - uvicorn==0.34.* - zlib - pip: diff --git a/docs/cuopt/source/cuopt-c/quick-start.rst b/docs/cuopt/source/cuopt-c/quick-start.rst index b04cfee52a..e7ca890127 100644 --- a/docs/cuopt/source/cuopt-c/quick-start.rst +++ b/docs/cuopt/source/cuopt-c/quick-start.rst @@ -20,8 +20,8 @@ This wheel is a Python wrapper around the C++ library and eases installation and # CUDA 13 pip install --extra-index-url=https://pypi.nvidia.com \ - 'nvidia-cuda-runtime-cu12==13.0.*' \ - 'libcuopt-cu12==25.10.*' + 'nvidia-cuda-runtime-cu13==13.0.*' \ + 'libcuopt-cu13==25.10.*' # CUDA 12 pip install --extra-index-url=https://pypi.nvidia.com \ diff --git a/python/libcuopt/pyproject.toml b/python/libcuopt/pyproject.toml index 7a1bfdf942..3bbefc2c17 100644 --- a/python/libcuopt/pyproject.toml +++ b/python/libcuopt/pyproject.toml @@ -64,7 +64,7 @@ libcuopt = "libcuopt" select = [ "distro-too-large-compressed", ] -max_allowed_size_compressed = '850M' +max_allowed_size_compressed = '900M' [project.scripts] cuopt_cli = "libcuopt._cli_wrapper:main" From db1b630ee1a0e77ef3bbf4c37431becf3b3b585c Mon Sep 17 00:00:00 2001 From: Ramakrishnap <42624703+rgsl888prabhu@users.noreply.github.com> Date: Mon, 8 Sep 2025 13:47:43 -0500 Subject: [PATCH 18/33] Adding support nightly cuopt-examples notebook testing (#342) This adds testing for https://github.com/NVIDIA/cuopt-examples to nightly testing suite. ## Issue closes #334 closes #212 Authors: - Ramakrishnap (https://github.com/rgsl888prabhu) Approvers: - James Lamb (https://github.com/jameslamb) - Trevor McKay (https://github.com/tmckayus) URL: https://github.com/NVIDIA/cuopt/pull/342 --- .github/workflows/pr.yaml | 1 - .github/workflows/test.yaml | 12 + ci/test_notebooks.sh | 46 +-- ci/utils/nbtest.sh | 101 +++---- ci/utils/notebook_command_extractor.py | 276 ++++++++++++++++++ ci/utils/notebook_list.py | 21 -- .../all_cuda-129_arch-aarch64.yaml | 6 - .../all_cuda-129_arch-x86_64.yaml | 6 - .../all_cuda-130_arch-aarch64.yaml | 6 - .../all_cuda-130_arch-x86_64.yaml | 6 - dependencies.yaml | 9 +- 11 files changed, 361 insertions(+), 129 deletions(-) create mode 100644 ci/utils/notebook_command_extractor.py diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index e13e02b88a..51203186fa 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -35,7 +35,6 @@ jobs: - conda-python-tests - docs-build - wheel-build-libcuopt - # - conda-notebook-tests - wheel-build-cuopt - wheel-tests-cuopt - wheel-build-cuopt-server diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 80ba1f869f..080d81a7ae 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -69,3 +69,15 @@ jobs: date: ${{ inputs.date }} sha: ${{ inputs.sha }} script: ci/test_wheel_cuopt_server.sh + conda-notebook-tests: + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.10 + with: + build_type: ${{ inputs.build_type }} + branch: ${{ inputs.branch }} + date: ${{ inputs.date }} + sha: ${{ inputs.sha }} + node_type: "gpu-l4-latest-1" + arch: "amd64" + container_image: "rapidsai/ci-conda:25.10-latest" + script: ci/test_notebooks.sh diff --git a/ci/test_notebooks.sh b/ci/test_notebooks.sh index feb711e78e..7ff6f2424b 100755 --- a/ci/test_notebooks.sh +++ b/ci/test_notebooks.sh @@ -21,13 +21,22 @@ set -euo pipefail CUOPT_VERSION="$(rapids-version)" +rapids-logger "Downloading artifacts from previous jobs" +CPP_CHANNEL=$(rapids-download-conda-from-github cpp) +PYTHON_CHANNEL=$(rapids-download-conda-from-github python) + rapids-logger "Generate notebook testing dependencies" + +ENV_YAML_DIR="$(mktemp -d)" + rapids-dependency-file-generator \ --output conda \ --file-key test_notebooks \ - --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION}" | tee env.yaml + --prepend-channel "${CPP_CHANNEL}" \ + --prepend-channel "${PYTHON_CHANNEL}" \ + --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION}" | tee "${ENV_YAML_DIR}/env.yaml" -rapids-mamba-retry env create --yes -f env.yaml -n test +rapids-mamba-retry env create --yes -f "${ENV_YAML_DIR}/env.yaml" -n test # Temporarily allow unbound variables for conda activation. set +u @@ -36,25 +45,20 @@ set -u rapids-print-env -rapids-logger "Downloading artifacts from previous jobs" -CPP_CHANNEL=$(rapids-download-conda-from-github cpp) -PYTHON_CHANNEL=$(rapids-download-conda-from-github python) +EXAMPLES_BRANCH="branch-${CUOPT_VERSION%.*}" -rapids-mamba-retry install \ - --channel "${CPP_CHANNEL}" \ - --channel "${PYTHON_CHANNEL}" \ - "libcuopt=${CUOPT_VERSION}" \ - "cuopt=${CUOPT_VERSION}" \ - "cuopt-server=${CUOPT_VERSION}" +# Remove any existing cuopt-examples directory -pip install python/cuopt_self_hosted/ +rapids-logger "Cloning cuopt-examples repository for branch: ${EXAMPLES_BRANCH}" +rm -rf cuopt-examples +git clone --single-branch --branch "${EXAMPLES_BRANCH}" https://github.com/NVIDIA/cuopt-examples.git NBTEST="$(realpath "$(dirname "$0")/utils/nbtest.sh")" NBLIST_PATH="$(realpath "$(dirname "$0")/utils/notebook_list.py")" -NBLIST=$(python "${NBLIST_PATH}") -SERVER_WAIT_DELAY=10 -pushd notebooks +pushd cuopt-examples + +NBLIST=$(python "${NBLIST_PATH}") EXITCODE=0 trap "EXITCODE=1" ERR @@ -62,20 +66,16 @@ trap "EXITCODE=1" ERR rapids-logger "Start cuopt-server" set +e -#python -c "from cuopt_server.cuopt_service import run_server; run_server()" & - -python -m cuopt_server.cuopt_service & -export SERVER_PID=$! -sleep "${SERVER_WAIT_DELAY}" -curl http://0.0.0.0:5000/cuopt/health rapids-logger "Start notebooks tests" for nb in ${NBLIST}; do nvidia-smi ${NBTEST} "${nb}" + if [ $? -ne 0 ]; then + echo "Notebook ${nb} failed to execute. Exiting." + exit 1 + fi done rapids-logger "Notebook test script exiting with value: $EXITCODE" -kill -s SIGTERM $SERVER_PID -wait $SERVER_PID exit ${EXITCODE} diff --git a/ci/utils/nbtest.sh b/ci/utils/nbtest.sh index 1b99d68247..356e92795f 100755 --- a/ci/utils/nbtest.sh +++ b/ci/utils/nbtest.sh @@ -12,75 +12,70 @@ # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. +# See the License for the specific language governing permissions and limitations. +# +# This script executes Jupyter notebooks directly using nbconvert. set +e # do not abort the script on error set -o pipefail # piped commands propagate their error set -E # ERR traps are inherited by subcommands trap "EXITCODE=1" ERR -# Prepend the following code to all scripts generated from nbconvert. This -# allows all cell and line magic code to run and update the namespace as if -# running in jupyter, but will also tolerate failures due to running in a -# non-jupyter env. -# Note: depending on the assumptions of the notebook script, ignoring failures -# may not be acceptable (meaning the converted notebook simply cannot run -# outside of jupyter as-is), hence the warning. -MAGIC_OVERRIDE_CODE=" -def my_run_line_magic(*args, **kwargs): - g=globals() - l={} - for a in args: - try: - exec(str(a),g,l) - except Exception as e: - print('WARNING: %s\n While executing this magic function code:\n%s\n continuing...\n' % (e, a)) - else: - g.update(l) - -def my_run_cell_magic(*args, **kwargs): - my_run_line_magic(*args, **kwargs) - -get_ipython().run_line_magic=my_run_line_magic -get_ipython().run_cell_magic=my_run_cell_magic - -" - -NO_COLORS=--colors=NoColor -EXITCODE=0 +# Save the original directory +ORIGINAL_DIR=$(pwd) -# PWD is REPO_ROOT/notebooks -NBTMPDIR="${PWD}/../tmp" -mkdir -p "${NBTMPDIR}" -NBUTILS="${PWD}/external" -cp -r "${NBUTILS}/python/utils" "${NBTMPDIR}/." -cp -r "${NBUTILS}/server/notebook_utils" "${NBTMPDIR}/." -cp -r "${NBUTILS}/dli/helper_function" "${NBTMPDIR}/." -cd "${NBTMPDIR}" || exit 1 +EXITCODE=0 for nb in "$@"; do NBFILENAME=$nb NBNAME=${NBFILENAME%.*} NBNAME=${NBNAME##*/} - NBTESTSCRIPT=${NBTMPDIR}/${NBNAME}-test.py - shift + + # Get the directory where the notebook is located + NBDIR=$(dirname "$NBFILENAME") + + cd "${NBDIR}" || exit 1 + + # Output the executed notebook in the same folder + EXECUTED_NOTEBOOK="${NBNAME}-executed.ipynb" echo -------------------------------------------------------------------------------- echo STARTING: "${NBNAME}" echo -------------------------------------------------------------------------------- - jupyter nbconvert --to script ../"${NBFILENAME}" --output "${NBTMPDIR}"/"${NBNAME}"-test - python "${PWD}/../ci/utils/dli_nb_strip.py" "${NBTESTSCRIPT}" - echo "${MAGIC_OVERRIDE_CODE}" > "${NBTMPDIR}"/tmpfile - cat "${NBTESTSCRIPT}" >> "${NBTMPDIR}"/tmpfile - mv "${NBTMPDIR}"/tmpfile "${NBTESTSCRIPT}" - - echo "Running \"ipython ${NO_COLORS} ${NBTESTSCRIPT}\" on $(date)" - echo - time timeout 30m bash -c "ipython ${NO_COLORS} ${NBTESTSCRIPT}; EC=\$?; echo -------------------------------------------------------------------------------- ; echo DONE: ${NBNAME}; exit \$EC" - NBEXITCODE=$? - echo EXIT CODE: ${NBEXITCODE} - echo + + # Skip notebooks that are not yet supported + SKIP_NOTEBOOKS=( + "trnsport_cuopt" + "Production_Planning_Example_Pulp" + "Simple_LP_pulp" + "Simple_MIP_pulp" + "Sudoku_pulp" + ) + + for skip in "${SKIP_NOTEBOOKS[@]}"; do + if [[ "$NBNAME" == "$skip"* ]]; then + echo "Skipping notebook '${NBNAME}' as it matches skip pattern '${skip}'" + cd "$ORIGINAL_DIR" || exit 1 + continue 2 + fi + done + + rapids-logger "Running commands from notebook: ${NBNAME}.ipynb" + + python3 "$ORIGINAL_DIR/../ci/utils/notebook_command_extractor.py" "$NBNAME.ipynb" --verbose + + rapids-logger "Executing notebook: ${NBNAME}.ipynb" + # Execute notebook with default kernel + jupyter nbconvert --execute "${NBNAME}.ipynb" --to notebook --output "${EXECUTED_NOTEBOOK}" --ExecutePreprocessor.kernel_name="python3" + + if [ $? -eq 0 ]; then + echo "Notebook executed successfully: ${EXECUTED_NOTEBOOK}" + else + echo "ERROR: Failed to execute notebook: ${NBFILENAME}" + EXITCODE=1 + fi + + cd "${ORIGINAL_DIR}" || exit 1 done exit ${EXITCODE} diff --git a/ci/utils/notebook_command_extractor.py b/ci/utils/notebook_command_extractor.py new file mode 100644 index 0000000000..7b443c7bd2 --- /dev/null +++ b/ci/utils/notebook_command_extractor.py @@ -0,0 +1,276 @@ +#!/usr/bin/env python3 + +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +""" +Notebook Command Extractor + +This script extracts pip install and other shell commands from Jupyter notebooks +and can optionally execute them. It's designed to be used by the nbtest.sh script. +""" + +import argparse +import json +import subprocess +import sys +from typing import List, Tuple + + +def extract_pip_commands(notebook_path: str) -> List[str]: + """Extract pip install commands from a Jupyter notebook.""" + try: + with open(notebook_path, "r") as f: + notebook = json.load(f) + + pip_commands = [] + for cell in notebook.get("cells", []): + if cell.get("cell_type") == "code": + source = "".join(cell.get("source", [])) + lines = source.split("\n") + for line in lines: + line = line.strip() + if line.startswith("!pip install") or line.startswith( + "pip install" + ): + # Clean up the line but preserve quotes + clean_line = line.strip() + if clean_line: + pip_commands.append(clean_line) + + return pip_commands + + except Exception as e: + print(f"Error parsing notebook: {e}", file=sys.stderr) + return [] + + +def extract_shell_commands(notebook_path: str) -> List[str]: + """Extract other shell commands from a Jupyter notebook.""" + try: + with open(notebook_path, "r") as f: + notebook = json.load(f) + + shell_commands = [] + allowed_commands = [ + "wget", + "curl", + "git", + "python", + "cd", + "mkdir", + "rm", + "cp", + "mv", + "unzip", + "tar", + ] + + for cell in notebook.get("cells", []): + if cell.get("cell_type") == "code": + source = "".join(cell.get("source", [])) + lines = source.split("\n") + for line in lines: + line = line.strip() + if line.startswith("!"): + # Check if it's a shell command we want to execute + cmd = ( + line[1:].strip().split()[0] + if line[1:].strip() + else "" + ) + if cmd in allowed_commands: + shell_commands.append(line) + + return shell_commands + + except Exception as e: + print(f"Error parsing notebook: {e}", file=sys.stderr) + return [] + + +def execute_pip_command(cmd: str, verbose: bool = False) -> bool: + """Execute a pip install command.""" + if verbose: + print(f"Processing command: '{cmd}'") + + # Remove the ! prefix if present for execution + exec_cmd = cmd.lstrip("!").strip() + + if verbose: + print(f"DEBUG: Original command: '{cmd}'") + print(f"DEBUG: Cleaned command: '{exec_cmd}'") + print(f"DEBUG: Command length: {len(exec_cmd)}") + print( + f"DEBUG: Command contains 'numpy': {'YES' if 'numpy' in exec_cmd else 'NO'}" + ) + print(f"Executing: {exec_cmd}") + + # Add --pre to exec_cmd if not already present + if exec_cmd.startswith("pip install") and "--pre" not in exec_cmd: + exec_cmd += " --pre --extra-index-url https://pypi.anaconda.org/rapidsai-nightly/simple" + + if verbose: + print(f"Final command: {exec_cmd}") + + try: + # Execute pip install commands + if exec_cmd.startswith("pip install"): + # Use shell=True for pip install to handle quoted arguments properly + # This is safe since we're only executing pip install commands + result = subprocess.run( + exec_cmd, shell=True, capture_output=True, text=True + ) + if result.returncode == 0: + if verbose: + print(f"✓ Successfully executed: {cmd}") + return True + else: + if verbose: + print(f"✗ Failed to execute: {cmd}") + print(f"Error: {result.stderr}") + return False + else: + if verbose: + print(f"✗ Invalid pip install command format: {exec_cmd}") + return False + except Exception as e: + if verbose: + print(f"✗ Exception executing {cmd}: {e}") + return False + + +def execute_shell_command(cmd: str, verbose: bool = False) -> bool: + """Execute a shell command.""" + if verbose: + print(f"Processing command: '{cmd}'") + + # Remove the ! prefix for execution + exec_cmd = cmd.lstrip("!").strip() + + if verbose: + print(f"DEBUG: Original command: '{cmd}'") + print(f"DEBUG: Cleaned command: '{exec_cmd}'") + print(f"DEBUG: Command length: {len(exec_cmd)}") + print(f"Executing: {exec_cmd}") + + # Skip potentially dangerous commands + dangerous_commands = ["chmod", "chown", "sudo", "su"] + if any(exec_cmd.startswith(dangerous) for dangerous in dangerous_commands): + if verbose: + print(f"⚠ Skipping potentially dangerous command: {cmd}") + return False + + try: + if verbose: + print("Executing shell command...") + + result = subprocess.run( + exec_cmd, shell=True, capture_output=True, text=True + ) + if result.returncode == 0: + if verbose: + print(f"✓ Successfully executed: {cmd}") + return True + else: + if verbose: + print(f"✗ Failed to execute: {cmd}") + print(f"Error: {result.stderr}") + return False + except Exception as e: + if verbose: + print(f"✗ Exception executing {cmd}: {e}") + return False + + +def main(): + parser = argparse.ArgumentParser( + description="Extract and optionally execute commands from Jupyter notebooks" + ) + parser.add_argument("notebook_path", help="Path to the Jupyter notebook") + parser.add_argument( + "--extract-only", + action="store_true", + help="Only extract commands, do not execute", + ) + parser.add_argument( + "--verbose", "-v", action="store_true", help="Verbose output" + ) + parser.add_argument( + "--output-format", + choices=["json", "text"], + default="text", + help="Output format for extracted commands", + ) + + args = parser.parse_args() + + # Extract commands + pip_commands = extract_pip_commands(args.notebook_path) + shell_commands = extract_shell_commands(args.notebook_path) + + print(f"Pip commands: {pip_commands}") + print(f"Shell commands: {shell_commands}") + + if args.output_format == "json": + # Output as JSON for shell script processing + output = { + "pip_commands": pip_commands, + "shell_commands": shell_commands, + } + print(json.dumps(output)) + else: + # Output as text (default) + if pip_commands: + print("PIP_COMMANDS:") + for cmd in pip_commands: + print(cmd) + + if shell_commands: + print("SHELL_COMMANDS:") + for cmd in shell_commands: + print(cmd) + + # Execute commands if not extract-only mode + if not args.extract_only: + success_count = 0 + total_count = 0 + + if pip_commands: + print(f"\nExecuting {len(pip_commands)} pip install commands...") + for cmd in pip_commands: + if execute_pip_command(cmd, args.verbose): + success_count += 1 + total_count += 1 + + if shell_commands: + print(f"\nExecuting {len(shell_commands)} shell commands...") + for cmd in shell_commands: + if execute_shell_command(cmd, args.verbose): + success_count += 1 + total_count += 1 + + if total_count > 0: + print( + f"\nExecution summary: {success_count}/{total_count} commands succeeded" + ) + return 0 if success_count == total_count else 1 + + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/ci/utils/notebook_list.py b/ci/utils/notebook_list.py index 0ecbddcc3b..1b4e5a26ba 100644 --- a/ci/utils/notebook_list.py +++ b/ci/utils/notebook_list.py @@ -41,27 +41,6 @@ skip = True print(f"SKIPPING {filename} (marked as skip)", file=sys.stderr) break - elif re.search("dask", line): - print( - f"SKIPPING {filename} (suspected Dask usage, not currently automatable)", - file=sys.stderr, - ) - skip = True - break - elif pascal and re.search("# Does not run on Pascal", line): - print( - f"SKIPPING {filename} (does not run on Pascal)", - file=sys.stderr, - ) - skip = True - break - elif re.search("CVRPTW Exercise", line): - print( - f"SKIPPING {filename} (user exercise notebook)", - file=sys.stderr, - ) - skip = True - break if not skip: print(filename) diff --git a/conda/environments/all_cuda-129_arch-aarch64.yaml b/conda/environments/all_cuda-129_arch-aarch64.yaml index 5353890305..2a3120f5a3 100644 --- a/conda/environments/all_cuda-129_arch-aarch64.yaml +++ b/conda/environments/all_cuda-129_arch-aarch64.yaml @@ -27,9 +27,7 @@ dependencies: - doxygen=1.9.1 - exhale - fastapi -- folium - gcc_linux-aarch64=14.* -- geopandas - gmock - gtest - httpx @@ -38,11 +36,9 @@ dependencies: - libcurand-dev - libcusolver-dev - libcusparse-dev -- libgdal<3.9.0 - libraft-headers==25.10.* - librmm==25.10.* - make -- matplotlib - msgpack-numpy==0.4.8 - msgpack-python==1.1.0 - myst-nb @@ -56,7 +52,6 @@ dependencies: - pandas>=2.0 - pexpect - pip -- polyline - pre-commit - psutil>=5.9,<6.0a0 - pylibraft==25.10.*,>=0.0.0a0 @@ -71,7 +66,6 @@ dependencies: - requests - rmm==25.10.*,>=0.0.0a0 - scikit-build-core>=0.10.0 -- scipy - sphinx - sphinx-copybutton - sphinx-design diff --git a/conda/environments/all_cuda-129_arch-x86_64.yaml b/conda/environments/all_cuda-129_arch-x86_64.yaml index 63fb69d765..da6d19c193 100644 --- a/conda/environments/all_cuda-129_arch-x86_64.yaml +++ b/conda/environments/all_cuda-129_arch-x86_64.yaml @@ -27,9 +27,7 @@ dependencies: - doxygen=1.9.1 - exhale - fastapi -- folium - gcc_linux-64=14.* -- geopandas - gmock - gtest - httpx @@ -38,11 +36,9 @@ dependencies: - libcurand-dev - libcusolver-dev - libcusparse-dev -- libgdal<3.9.0 - libraft-headers==25.10.* - librmm==25.10.* - make -- matplotlib - msgpack-numpy==0.4.8 - msgpack-python==1.1.0 - myst-nb @@ -56,7 +52,6 @@ dependencies: - pandas>=2.0 - pexpect - pip -- polyline - pre-commit - psutil>=5.9,<6.0a0 - pylibraft==25.10.*,>=0.0.0a0 @@ -71,7 +66,6 @@ dependencies: - requests - rmm==25.10.*,>=0.0.0a0 - scikit-build-core>=0.10.0 -- scipy - sphinx - sphinx-copybutton - sphinx-design diff --git a/conda/environments/all_cuda-130_arch-aarch64.yaml b/conda/environments/all_cuda-130_arch-aarch64.yaml index c473a9713b..cf306f74e6 100644 --- a/conda/environments/all_cuda-130_arch-aarch64.yaml +++ b/conda/environments/all_cuda-130_arch-aarch64.yaml @@ -27,9 +27,7 @@ dependencies: - doxygen=1.9.1 - exhale - fastapi -- folium - gcc_linux-aarch64=14.* -- geopandas - gmock - gtest - httpx @@ -38,11 +36,9 @@ dependencies: - libcurand-dev - libcusolver-dev - libcusparse-dev -- libgdal<3.9.0 - libraft-headers==25.10.* - librmm==25.10.* - make -- matplotlib - msgpack-numpy==0.4.8 - msgpack-python==1.1.0 - myst-nb @@ -56,7 +52,6 @@ dependencies: - pandas>=2.0 - pexpect - pip -- polyline - pre-commit - psutil>=5.9,<6.0a0 - pylibraft==25.10.*,>=0.0.0a0 @@ -71,7 +66,6 @@ dependencies: - requests - rmm==25.10.*,>=0.0.0a0 - scikit-build-core>=0.10.0 -- scipy - sphinx - sphinx-copybutton - sphinx-design diff --git a/conda/environments/all_cuda-130_arch-x86_64.yaml b/conda/environments/all_cuda-130_arch-x86_64.yaml index 0eddbfeeea..c9ba5beba0 100644 --- a/conda/environments/all_cuda-130_arch-x86_64.yaml +++ b/conda/environments/all_cuda-130_arch-x86_64.yaml @@ -27,9 +27,7 @@ dependencies: - doxygen=1.9.1 - exhale - fastapi -- folium - gcc_linux-64=14.* -- geopandas - gmock - gtest - httpx @@ -38,11 +36,9 @@ dependencies: - libcurand-dev - libcusolver-dev - libcusparse-dev -- libgdal<3.9.0 - libraft-headers==25.10.* - librmm==25.10.* - make -- matplotlib - msgpack-numpy==0.4.8 - msgpack-python==1.1.0 - myst-nb @@ -56,7 +52,6 @@ dependencies: - pandas>=2.0 - pexpect - pip -- polyline - pre-commit - psutil>=5.9,<6.0a0 - pylibraft==25.10.*,>=0.0.0a0 @@ -71,7 +66,6 @@ dependencies: - requests - rmm==25.10.*,>=0.0.0a0 - scikit-build-core>=0.10.0 -- scipy - sphinx - sphinx-copybutton - sphinx-design diff --git a/dependencies.yaml b/dependencies.yaml index d1dbc07f2e..6508a60a69 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -69,6 +69,8 @@ files: - cuda_version - notebooks - py_version + - depends_on_cuopt_server + - depends_on_cuopt_sh_client checks: output: none includes: @@ -817,15 +819,8 @@ dependencies: common: - output_types: [conda, requirements] packages: - - breathe - - folium - - geopandas - ipython - - matplotlib - notebook - - polyline - - scipy - - libgdal<3.9.0 - output_types: [conda] packages: - *jsonref From e07f9a71a4003b6024f9376e6b96e8398bac28e5 Mon Sep 17 00:00:00 2001 From: Ramakrishnap <42624703+rgsl888prabhu@users.noreply.github.com> Date: Tue, 9 Sep 2025 10:05:56 -0500 Subject: [PATCH 19/33] Add support for cuda13 container and fix cuda13 lib issues in wheel (#379) - This PR adds support for cuda 13 container for nightly and release - Also updates python version to 3.13 - Also cuda-toolkits have stopped using cu13 suffixes and they need to be fixed in dependencies - This also requires addition to RPATH since the paths are different for cu13 Authors: - Ramakrishnap (https://github.com/rgsl888prabhu) Approvers: - Trevor McKay (https://github.com/tmckayus) - James Lamb (https://github.com/jameslamb) URL: https://github.com/NVIDIA/cuopt/pull/379 --- .../workflows/build_test_publish_images.yaml | 4 +-- README.md | 27 +++++++++++++++++-- ci/docker/Dockerfile | 6 +++-- dependencies.yaml | 12 ++------- docs/cuopt/source/cuopt-c/quick-start.rst | 4 +-- .../cuopt/source/cuopt-python/quick-start.rst | 4 +-- .../cuopt/source/cuopt-server/quick-start.rst | 4 +-- python/libcuopt/CMakeLists.txt | 2 ++ 8 files changed, 41 insertions(+), 22 deletions(-) diff --git a/.github/workflows/build_test_publish_images.yaml b/.github/workflows/build_test_publish_images.yaml index 22ba97859f..5f107cc57a 100644 --- a/.github/workflows/build_test_publish_images.yaml +++ b/.github/workflows/build_test_publish_images.yaml @@ -32,11 +32,11 @@ on: description: 'JSON array of architectures to build for' cuda_ver: type: string - default: '["12.9.0"]' + default: '["12.9.0", "13.0.0"]' description: 'JSON array of CUDA versions to build for' python_ver: type: string - default: '["3.12.11"]' + default: '["3.13.7"]' description: 'JSON array of Python versions to build for' linux_ver: type: string diff --git a/README.md b/README.md index f34020f4d2..8c73898506 100644 --- a/README.md +++ b/README.md @@ -33,7 +33,7 @@ This repo is also hosted as a [COIN-OR](http://github.com/coin-or/cuopt/) projec ### CUDA/GPU requirements -* CUDA 12.0+ +* CUDA 12.0+ or CUDA 13.0+ * NVIDIA driver >= 525.60.13 (Linux) and >= 527.41 (Windows) * Volta architecture or better (Compute Capability >=7.0) @@ -77,6 +77,25 @@ pip install --pre \ cuopt-server-cu12==25.10.* cuopt-sh-client==25.10.* ``` +For CUDA 13.x: + +```bash +pip install \ + --extra-index-url=https://pypi.nvidia.com \ + nvidia-cuda-runtime==13.0.* \ + cuopt-server-cu13==25.10.* cuopt-sh-client==25.10.* +``` + +Development wheels are available as nightlies, please update `--extra-index-url` to `https://pypi.anaconda.org/rapidsai-wheels-nightly/simple/` to install latest nightly packages. +```bash +pip install --pre \ + --extra-index-url=https://pypi.nvidia.com \ + --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple/ \ + nvidia-cuda-runtime==13.0.* \ + cuopt-server-cu13==25.10.* cuopt-sh-client==25.10.* +``` + + ### Conda cuOpt can be installed with conda (via [miniforge](https://github.com/conda-forge/miniforge)): @@ -95,10 +114,14 @@ of our latest development branch. Just replace `-c rapidsai` with `-c rapidsai-n Users can pull the cuOpt container from the NVIDIA container registry. ```bash +# For CUDA 12.x docker pull nvidia/cuopt:latest-cuda12.9-py312 + +# For CUDA 13.x +docker pull nvidia/cuopt:latest-cuda13.0-py312 ``` -Note: The ``latest`` tag is the latest stable release of cuOpt. If you want to use a specific version, you can use the ``-cuda12.9-py312`` tag. For example, to use cuOpt 25.5.0, you can use the ``25.5.0-cuda12.8-py312`` tag. Please refer to `cuOpt dockerhub page `_ for the list of available tags. +Note: The ``latest`` tag is the latest stable release of cuOpt. If you want to use a specific version, you can use the ``-cuda12.9-py312`` or ``-cuda13.0-py312`` tag. For example, to use cuOpt 25.5.0, you can use the ``25.5.0-cuda12.8-py312`` or ``25.5.0-cuda13.0-py312`` tag. Please refer to `cuOpt dockerhub page `_ for the list of available tags. More information about the cuOpt container can be found [here](https://docs.nvidia.com/cuopt/user-guide/latest/cuopt-server/quick-start.html#container-from-docker-hub). diff --git a/ci/docker/Dockerfile b/ci/docker/Dockerfile index f00564fcf2..e7f023040b 100644 --- a/ci/docker/Dockerfile +++ b/ci/docker/Dockerfile @@ -54,7 +54,8 @@ ARG CUOPT_VER ARG PYTHON_SHORT_VER # Install cuOpt as root to make it available to all users -RUN cuda_suffix=cu$(echo ${CUDA_VER} | cut -d'.' -f1) && \ +RUN \ + cuda_suffix=cu$(echo ${CUDA_VER} | cut -d'.' -f1) && \ cuda_major_minor=$(echo ${CUDA_VER} | cut -d'.' -f1-2) && \ python -m pip install \ --extra-index-url https://pypi.nvidia.com \ @@ -62,7 +63,8 @@ RUN cuda_suffix=cu$(echo ${CUDA_VER} | cut -d'.' -f1) && \ --no-cache-dir \ "cuopt-server-${cuda_suffix}==${CUOPT_VER}" \ "cuopt-sh-client==${CUOPT_VER}" \ - "nvidia-cuda-runtime-${cuda_suffix}==${cuda_major_minor}.*" && \ + "cuda-toolkit[cudart]==${cuda_major_minor}.*" \ + ${nvidia_cuda_runtime_pkg} && \ python -m pip list # Remove gcc to save space, gcc was required for building psutils diff --git a/dependencies.yaml b/dependencies.yaml index 6508a60a69..5abb3197e8 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -750,20 +750,12 @@ dependencies: cuda: "12.*" use_cuda_wheels: "true" packages: - - nvidia-cublas-cu12 - - nvidia-curand-cu12 - - nvidia-cusparse-cu12 - - nvidia-cusolver-cu12 - - nvidia-nvtx-cu12 + - cuda-toolkit[cublas,curand,cusolver,cusparse,nvtx]==12.* - matrix: cuda: "13.*" use_cuda_wheels: "true" packages: - - nvidia-cublas-cu13 - - nvidia-curand-cu13 - - nvidia-cusparse-cu13 - - nvidia-cusolver-cu13 - - nvidia-nvtx-cu13 + - cuda-toolkit[cublas,curand,cusolver,cusparse,nvtx]==13.* # if use_cuda_wheels=false is provided, do not add dependencies on any CUDA wheels # (e.g. for DLFW and pip devcontainers) - matrix: diff --git a/docs/cuopt/source/cuopt-c/quick-start.rst b/docs/cuopt/source/cuopt-c/quick-start.rst index e7ca890127..4ae4e1ef8c 100644 --- a/docs/cuopt/source/cuopt-c/quick-start.rst +++ b/docs/cuopt/source/cuopt-c/quick-start.rst @@ -20,7 +20,7 @@ This wheel is a Python wrapper around the C++ library and eases installation and # CUDA 13 pip install --extra-index-url=https://pypi.nvidia.com \ - 'nvidia-cuda-runtime-cu13==13.0.*' \ + 'nvidia-cuda-runtime==13.0.*' \ 'libcuopt-cu13==25.10.*' # CUDA 12 @@ -36,7 +36,7 @@ This wheel is a Python wrapper around the C++ library and eases installation and # CUDA 13 pip install --pre --extra-index-url=https://pypi.nvidia.com --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple/ \ - 'nvidia-cuda-runtime-cu13==13.0.*' \ + 'nvidia-cuda-runtime==13.0.*' \ 'libcuopt-cu13==25.10.*' # CUDA 12 diff --git a/docs/cuopt/source/cuopt-python/quick-start.rst b/docs/cuopt/source/cuopt-python/quick-start.rst index 5077ff6991..65acea5db6 100644 --- a/docs/cuopt/source/cuopt-python/quick-start.rst +++ b/docs/cuopt/source/cuopt-python/quick-start.rst @@ -14,7 +14,7 @@ pip # CUDA 13 pip install --extra-index-url=https://pypi.nvidia.com \ - 'nvidia-cuda-runtime-cu13==13.0.*' \ + 'nvidia-cuda-runtime==13.0.*' \ 'cuopt-cu13==25.10.*' # CUDA 12 @@ -30,7 +30,7 @@ pip # CUDA 13 pip install --pre --extra-index-url=https://pypi.nvidia.com --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple/ \ - 'nvidia-cuda-runtime-cu13==13.0.*' \ + 'nvidia-cuda-runtime==13.0.*' \ 'cuopt-cu13==25.10.*' # CUDA 12 diff --git a/docs/cuopt/source/cuopt-server/quick-start.rst b/docs/cuopt/source/cuopt-server/quick-start.rst index 85a7c78e9c..1e6fce235b 100644 --- a/docs/cuopt/source/cuopt-server/quick-start.rst +++ b/docs/cuopt/source/cuopt-server/quick-start.rst @@ -12,7 +12,7 @@ pip # CUDA 13 pip install --extra-index-url=https://pypi.nvidia.com \ - 'nvidia-cuda-runtime-cu13==13.0.*' \ + 'nvidia-cuda-runtime==13.0.*' \ 'cuopt-server-cu13==25.10.*' \ 'cuopt-sh-client==25.10.*' @@ -29,7 +29,7 @@ pip # CUDA 13 pip install --pre --extra-index-url=https://pypi.nvidia.com --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple/ \ - 'nvidia-cuda-runtime-cu13==13.0.*' \ + 'nvidia-cuda-runtime==13.0.*' \ 'cuopt-server-cu13==25.10.*' \ 'cuopt-sh-client==25.10.*' diff --git a/python/libcuopt/CMakeLists.txt b/python/libcuopt/CMakeLists.txt index f4124477de..b1cf980b7e 100644 --- a/python/libcuopt/CMakeLists.txt +++ b/python/libcuopt/CMakeLists.txt @@ -69,6 +69,8 @@ set(rpaths "$ORIGIN/../../nvidia/curand/lib" "$ORIGIN/../../nvidia/cusolver/lib" "$ORIGIN/../../nvidia/cusparse/lib" + # For CUDA 13.x + "$ORIGIN/../../nvidia/cu13/lib" ) set_property(TARGET cuopt PROPERTY INSTALL_RPATH ${rpaths} APPEND) From a1a15d235ebdfe774cf247778a4027c3e6f6cc78 Mon Sep 17 00:00:00 2001 From: Kumar Aatish Date: Wed, 10 Sep 2025 08:43:13 -0400 Subject: [PATCH 20/33] Combined variable bounds (#372) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR combined the buffers problem_t::variable_lower_bounds and problem_t::variable_upper_bounds into a single buffer. This is done so as to improve the cache utilization when randomly accessing variable bounds. ## Issue Authors: - Kumar Aatish (https://github.com/kaatish) Approvers: - Hugo Linsenmaier (https://github.com/hlinsen) - Alice Boucher (https://github.com/aliceb-nv) - Akif ÇÖRDÜK (https://github.com/akifcorduk) - Ramakrishnap (https://github.com/rgsl888prabhu) URL: https://github.com/NVIDIA/cuopt/pull/372 --- .../initial_scaling.cu | 19 +- cpp/src/linear_programming/pdhg.cu | 6 +- cpp/src/linear_programming/pdlp.cu | 29 +- .../restart_strategy/pdlp_restart_strategy.cu | 35 +- .../convergence_information.cu | 30 +- .../infeasibility_information.cu | 42 ++- cpp/src/linear_programming/translate.hpp | 9 +- .../utilities/problem_checking.cu | 62 ++-- cpp/src/linear_programming/utils.cuh | 72 ++++- cpp/src/mip/CMakeLists.txt | 6 +- cpp/src/mip/diversity/assignment_hash_map.cu | 1 + cpp/src/mip/diversity/diversity_manager.cu | 10 +- .../recombiners/bound_prop_recombiner.cuh | 5 +- .../mip/feasibility_jump/feasibility_jump.cu | 9 +- .../feasibility_jump_kernels.cu | 66 ++-- .../mip/feasibility_jump/load_balancing.cuh | 15 +- .../feasibility_pump/feasibility_pump.cu | 40 ++- .../feasibility_pump/feasibility_pump.cuh | 3 - cpp/src/mip/local_search/local_search.cu | 6 - cpp/src/mip/local_search/local_search.cuh | 1 - .../local_search/rounding/bounds_repair.cu | 75 ++--- .../local_search/rounding/bounds_repair.cuh | 24 +- .../local_search/rounding/constraint_prop.cu | 306 +++++++++--------- .../local_search/rounding/constraint_prop.cuh | 11 + .../rounding/simple_rounding_kernels.cuh | 5 +- cpp/src/mip/presolve/bounds_presolve.cu | 68 ++-- cpp/src/mip/presolve/bounds_presolve.cuh | 6 +- .../conditional_bound_strengthening.cu | 14 +- cpp/src/mip/presolve/lb_probing_cache.cu | 6 +- .../load_balanced_partition_helpers.cuh | 39 --- cpp/src/mip/presolve/multi_probe.cu | 74 +++-- cpp/src/mip/presolve/multi_probe.cuh | 6 +- cpp/src/mip/presolve/probing_cache.cu | 44 ++- cpp/src/mip/presolve/probing_cache.cuh | 13 - cpp/src/mip/presolve/trivial_presolve.cuh | 64 ++-- .../mip/presolve/trivial_presolve_helpers.cuh | 60 ++-- cpp/src/mip/problem/host_helper.cuh | 10 +- cpp/src/mip/problem/problem.cu | 157 ++++----- cpp/src/mip/problem/problem.cuh | 24 +- cpp/src/mip/problem/problem_helpers.cuh | 68 ++-- cpp/src/mip/problem/write_mps.cu | 3 +- cpp/src/mip/solution/feasibility_test.cuh | 8 +- cpp/src/mip/solution/solution.cu | 45 ++- cpp/src/mip/solve.cu | 6 +- cpp/src/mip/utils.cuh | 38 ++- cpp/src/utilities/copy_helpers.hpp | 106 ++++++ cpp/tests/mip/CMakeLists.txt | 3 - cpp/tests/mip/elim_var_remap_test.cu | 8 +- cpp/tests/mip/load_balancing_test.cu | 23 +- cpp/tests/mip/multi_probe_test.cu | 18 +- 50 files changed, 956 insertions(+), 842 deletions(-) diff --git a/cpp/src/linear_programming/initial_scaling_strategy/initial_scaling.cu b/cpp/src/linear_programming/initial_scaling_strategy/initial_scaling.cu index 4c6cbf475b..72931267ad 100644 --- a/cpp/src/linear_programming/initial_scaling_strategy/initial_scaling.cu +++ b/cpp/src/linear_programming/initial_scaling_strategy/initial_scaling.cu @@ -375,18 +375,13 @@ void pdlp_initial_scaling_strategy_t::scale_problem() primal_size_h_, stream_view_); - raft::linalg::eltwiseDivideCheckZero( - const_cast&>(op_problem_scaled_.variable_lower_bounds).data(), - op_problem_scaled_.variable_lower_bounds.data(), - cummulative_variable_scaling_.data(), - primal_size_h_, - stream_view_); - raft::linalg::eltwiseDivideCheckZero( - const_cast&>(op_problem_scaled_.variable_upper_bounds).data(), - op_problem_scaled_.variable_upper_bounds.data(), - cummulative_variable_scaling_.data(), - primal_size_h_, - stream_view_); + using f_t2 = typename type_2::type; + cub::DeviceTransform::Transform(cuda::std::make_tuple(op_problem_scaled_.variable_bounds.data(), + cummulative_variable_scaling_.data()), + op_problem_scaled_.variable_bounds.data(), + primal_size_h_, + divide_check_zero(), + stream_view_); raft::linalg::eltwiseMultiply( const_cast&>(op_problem_scaled_.constraint_lower_bounds).data(), diff --git a/cpp/src/linear_programming/pdhg.cu b/cpp/src/linear_programming/pdhg.cu index f932eeb8d8..38373391aa 100644 --- a/cpp/src/linear_programming/pdhg.cu +++ b/cpp/src/linear_programming/pdhg.cu @@ -143,18 +143,18 @@ void pdhg_solver_t::compute_primal_projection_with_gradient( // project by max(min(x[i], upperbound[i]),lowerbound[i]) // compute delta_primal x'-x + using f_t2 = typename type_2::type; // All is fused in a single call to limit number of read / write in memory cub::DeviceTransform::Transform( cuda::std::make_tuple(current_saddle_point_state_.get_primal_solution().data(), problem_ptr->objective_coefficients.data(), current_saddle_point_state_.get_current_AtY().data(), - problem_ptr->variable_lower_bounds.data(), - problem_ptr->variable_upper_bounds.data()), + problem_ptr->variable_bounds.data()), thrust::make_zip_iterator(potential_next_primal_solution_.data(), current_saddle_point_state_.get_delta_primal().data(), tmp_primal_.data()), primal_size_h_, - primal_projection(primal_step_size.data()), + primal_projection(primal_step_size.data()), stream_view_); } diff --git a/cpp/src/linear_programming/pdlp.cu b/cpp/src/linear_programming/pdlp.cu index 84b04d43f9..824539e0ca 100644 --- a/cpp/src/linear_programming/pdlp.cu +++ b/cpp/src/linear_programming/pdlp.cu @@ -1041,20 +1041,21 @@ optimization_problem_solution_t pdlp_solver_t::run_solver( // Project initial primal solution if (pdlp_hyper_params::project_initial_primal) { - raft::linalg::ternaryOp(pdhg_solver_.get_primal_solution().data(), - pdhg_solver_.get_primal_solution().data(), - op_problem_scaled_.variable_lower_bounds.data(), - op_problem_scaled_.variable_upper_bounds.data(), - primal_size_h_, - clamp(), - stream_view_); - raft::linalg::ternaryOp(unscaled_primal_avg_solution_.data(), - unscaled_primal_avg_solution_.data(), - op_problem_scaled_.variable_lower_bounds.data(), - op_problem_scaled_.variable_upper_bounds.data(), - primal_size_h_, - clamp(), - stream_view_); + using f_t2 = typename type_2::type; + cub::DeviceTransform::Transform( + cuda::std::make_tuple(pdhg_solver_.get_primal_solution().data(), + op_problem_scaled_.variable_bounds.data()), + pdhg_solver_.get_primal_solution().data(), + primal_size_h_, + clamp(), + stream_view_); + cub::DeviceTransform::Transform( + cuda::std::make_tuple(unscaled_primal_avg_solution_.data(), + op_problem_scaled_.variable_bounds.data()), + unscaled_primal_avg_solution_.data(), + primal_size_h_, + clamp(), + stream_view_); } if (verbose) { diff --git a/cpp/src/linear_programming/restart_strategy/pdlp_restart_strategy.cu b/cpp/src/linear_programming/restart_strategy/pdlp_restart_strategy.cu index 236ec373d8..cf23c8a1d7 100644 --- a/cpp/src/linear_programming/restart_strategy/pdlp_restart_strategy.cu +++ b/cpp/src/linear_programming/restart_strategy/pdlp_restart_strategy.cu @@ -1388,6 +1388,14 @@ median breakpoint is evaluated, eliminating half of the components. The process is iterated until the argmin is identified. */ +template +struct extract_bounds_t { + __device__ thrust::tuple operator()(f_t2 bounds) + { + return thrust::make_tuple(get_lower(bounds), get_upper(bounds)); + } +}; + template void pdlp_restart_strategy_t::solve_bound_constrained_trust_region( localized_duality_gap_container_t& duality_gap, @@ -1451,10 +1459,13 @@ void pdlp_restart_strategy_t::solve_bound_constrained_trust_region( // component becomes fixed by its bounds // Copying primal / dual bound before sorting them according to threshold - raft::copy( - lower_bound_.data(), problem_ptr->variable_lower_bounds.data(), primal_size_h_, stream_view_); - raft::copy( - upper_bound_.data(), problem_ptr->variable_upper_bounds.data(), primal_size_h_, stream_view_); + using f_t2 = typename type_2::type; + cub::DeviceTransform::Transform( + problem_ptr->variable_bounds.data(), + thrust::make_zip_iterator(thrust::make_tuple(lower_bound_.data(), upper_bound_.data())), + primal_size_h_, + extract_bounds_t(), + stream_view_); raft::copy(lower_bound_.data() + primal_size_h_, transformed_constraint_lower_bounds_.data(), dual_size_h_, @@ -1632,13 +1643,13 @@ void pdlp_restart_strategy_t::solve_bound_constrained_trust_region( a_add_scalar_times_b(target_threshold_.data()), stream_view_); // project by max(min(x[i], upperbound[i]),lowerbound[i]) for primal part - raft::linalg::ternaryOp(duality_gap.primal_solution_tr_.data(), - duality_gap.primal_solution_tr_.data(), - problem_ptr->variable_lower_bounds.data(), - problem_ptr->variable_upper_bounds.data(), - primal_size_h_, - clamp(), - stream_view_); + using f_t2 = typename type_2::type; + cub::DeviceTransform::Transform(cuda::std::make_tuple(duality_gap.primal_solution_tr_.data(), + problem_ptr->variable_bounds.data()), + duality_gap.primal_solution_tr_.data(), + primal_size_h_, + clamp(), + stream_view_); // project by max(min(y[i], upperbound[i]),lowerbound[i]) raft::linalg::ternaryOp(duality_gap.dual_solution_tr_.data(), @@ -1646,7 +1657,7 @@ void pdlp_restart_strategy_t::solve_bound_constrained_trust_region( transformed_constraint_lower_bounds_.data(), transformed_constraint_upper_bounds_.data(), dual_size_h_, - clamp(), + constraint_clamp(), stream_view_); // } } diff --git a/cpp/src/linear_programming/termination_strategy/convergence_information.cu b/cpp/src/linear_programming/termination_strategy/convergence_information.cu index 2378b8b9bf..ce579e3104 100644 --- a/cpp/src/linear_programming/termination_strategy/convergence_information.cu +++ b/cpp/src/linear_programming/termination_strategy/convergence_information.cu @@ -336,7 +336,7 @@ void convergence_information_t::compute_dual_objective( problem_ptr->constraint_lower_bounds.data(), problem_ptr->constraint_upper_bounds.data(), dual_size_h_, - bound_value_reduced_cost_product(), + constraint_bound_value_reduced_cost_product(), stream_view_); cub::DeviceReduce::Sum(rmm_tmp_buffer_.data(), @@ -371,13 +371,13 @@ void convergence_information_t::compute_reduced_cost_from_primal_gradi { raft::common::nvtx::range fun_scope("compute_reduced_cost_from_primal_gradient"); - raft::linalg::ternaryOp(bound_value_.data(), - primal_gradient.data(), - problem_ptr->variable_lower_bounds.data(), - problem_ptr->variable_upper_bounds.data(), - primal_size_h_, - bound_value_gradient(), - stream_view_); + using f_t2 = typename type_2::type; + cub::DeviceTransform::Transform( + cuda::std::make_tuple(primal_gradient.data(), problem_ptr->variable_bounds.data()), + bound_value_.data(), + primal_size_h_, + bound_value_gradient(), + stream_view_); if (pdlp_hyper_params::handle_some_primal_gradients_on_finite_bounds_as_residuals) { raft::linalg::ternaryOp(reduced_cost_.data(), @@ -402,15 +402,15 @@ void convergence_information_t::compute_reduced_costs_dual_objective_c { raft::common::nvtx::range fun_scope("compute_reduced_costs_dual_objective_contribution"); + using f_t2 = typename type_2::type; // if reduced cost is positive -> lower bound, negative -> upper bounds, 0 -> 0 // if bound_val is not finite let element be -inf, otherwise bound_value*reduced_cost - raft::linalg::ternaryOp(bound_value_.data(), - reduced_cost_.data(), - problem_ptr->variable_lower_bounds.data(), - problem_ptr->variable_upper_bounds.data(), - primal_size_h_, - bound_value_reduced_cost_product(), - stream_view_); + cub::DeviceTransform::Transform( + cuda::std::make_tuple(reduced_cost_.data(), problem_ptr->variable_bounds.data()), + bound_value_.data(), + primal_size_h_, + bound_value_reduced_cost_product(), + stream_view_); // sum over bound_value*reduced_cost, but should be -inf if any element is -inf cub::DeviceReduce::Sum(rmm_tmp_buffer_.data(), diff --git a/cpp/src/linear_programming/termination_strategy/infeasibility_information.cu b/cpp/src/linear_programming/termination_strategy/infeasibility_information.cu index fd9651d9a2..1e1bf3abf2 100644 --- a/cpp/src/linear_programming/termination_strategy/infeasibility_information.cu +++ b/cpp/src/linear_programming/termination_strategy/infeasibility_information.cu @@ -254,16 +254,14 @@ void infeasibility_information_t::compute_max_violation( // Convert raw pointer to thrust::device_ptr to write directly device side through reduce thrust::device_ptr primal_ray_max_violation(primal_ray_max_violation_.data()); + using f_t2 = typename type_2::type; *primal_ray_max_violation = thrust::transform_reduce( handle_ptr_->get_thrust_policy(), - thrust::make_zip_iterator(thrust::make_tuple(primal_ray.data(), - problem_ptr->variable_lower_bounds.data(), - problem_ptr->variable_upper_bounds.data())), thrust::make_zip_iterator( - thrust::make_tuple(primal_ray.data() + primal_size_h_, - problem_ptr->variable_lower_bounds.data() + primal_size_h_, - problem_ptr->variable_upper_bounds.data() + primal_size_h_)), - max_violation(), + thrust::make_tuple(primal_ray.data(), problem_ptr->variable_bounds.data())), + thrust::make_zip_iterator(thrust::make_tuple( + primal_ray.data() + primal_size_h_, problem_ptr->variable_bounds.data() + primal_size_h_)), + max_violation(), f_t(0.0), thrust::maximum()); } @@ -329,7 +327,7 @@ void infeasibility_information_t::compute_homogenous_dual_objective( problem_ptr->constraint_lower_bounds.data(), problem_ptr->constraint_upper_bounds.data(), dual_size_h_, - bound_value_reduced_cost_product(), + constraint_bound_value_reduced_cost_product(), stream_view_); cub::DeviceReduce::Sum(rmm_tmp_buffer_.data(), @@ -364,13 +362,13 @@ template void infeasibility_information_t::compute_reduced_cost_from_primal_gradient( rmm::device_uvector& primal_gradient, rmm::device_uvector& primal_ray) { - raft::linalg::ternaryOp(bound_value_.data(), - primal_gradient.data(), - problem_ptr->variable_lower_bounds.data(), - problem_ptr->variable_upper_bounds.data(), - primal_size_h_, - bound_value_gradient(), - stream_view_); + using f_t2 = typename type_2::type; + cub::DeviceTransform::Transform( + cuda::std::make_tuple(primal_gradient.data(), problem_ptr->variable_bounds.data()), + bound_value_.data(), + primal_size_h_, + bound_value_gradient(), + stream_view_); if (pdlp_hyper_params::handle_some_primal_gradients_on_finite_bounds_as_residuals) { raft::linalg::ternaryOp(reduced_cost_.data(), @@ -393,16 +391,16 @@ void infeasibility_information_t::compute_reduced_cost_from_primal_gra template void infeasibility_information_t::compute_reduced_costs_dual_objective_contribution() { + using f_t2 = typename type_2::type; // Check if these bounds are the same as computed above // if reduced cost is positive -> lower bound, negative -> upper bounds, 0 -> 0 // if bound_val is not finite let element be -inf, otherwise bound_value*reduced_cost - raft::linalg::ternaryOp(bound_value_.data(), - reduced_cost_.data(), - problem_ptr->variable_lower_bounds.data(), - problem_ptr->variable_upper_bounds.data(), - primal_size_h_, - bound_value_reduced_cost_product(), - stream_view_); + cub::DeviceTransform::Transform( + cuda::std::make_tuple(reduced_cost_.data(), problem_ptr->variable_bounds.data()), + bound_value_.data(), + primal_size_h_, + bound_value_reduced_cost_product(), + stream_view_); // sum over bound_value*reduced_cost cub::DeviceReduce::Sum(rmm_tmp_buffer_.data(), diff --git a/cpp/src/linear_programming/translate.hpp b/cpp/src/linear_programming/translate.hpp index 6731be6ec3..0f1fc9f1c1 100644 --- a/cpp/src/linear_programming/translate.hpp +++ b/cpp/src/linear_programming/translate.hpp @@ -81,9 +81,9 @@ static dual_simplex::user_problem_t cuopt_problem_to_simplex_problem( } } user_problem.num_range_rows = user_problem.range_rows.size(); - user_problem.lower = cuopt::host_copy(model.variable_lower_bounds); - user_problem.upper = cuopt::host_copy(model.variable_upper_bounds); - user_problem.problem_name = model.original_problem_ptr->get_problem_name(); + std::tie(user_problem.lower, user_problem.upper) = + extract_host_bounds(model.variable_bounds, model.handle_ptr); + user_problem.problem_name = model.original_problem_ptr->get_problem_name(); if (model.row_names.size() > 0) { user_problem.row_names.resize(m); for (int i = 0; i < m; ++i) { @@ -164,8 +164,7 @@ void translate_to_crossover_problem(const detail::problem_t& problem, lp.obj_constant = problem.presolve_data.objective_offset; lp.obj_scale = problem.presolve_data.objective_scaling_factor; - std::vector lower = cuopt::host_copy(problem.variable_lower_bounds); - std::vector upper = cuopt::host_copy(problem.variable_upper_bounds); + auto [lower, upper] = extract_host_bounds(problem.variable_bounds, problem.handle_ptr); std::vector constraint_lower = cuopt::host_copy(problem.constraint_lower_bounds); std::vector constraint_upper = cuopt::host_copy(problem.constraint_upper_bounds); diff --git a/cpp/src/linear_programming/utilities/problem_checking.cu b/cpp/src/linear_programming/utilities/problem_checking.cu index a4d979cf94..fbbdd1ad93 100644 --- a/cpp/src/linear_programming/utilities/problem_checking.cu +++ b/cpp/src/linear_programming/utilities/problem_checking.cu @@ -265,21 +265,16 @@ template void problem_checking_t::check_scaled_problem( detail::problem_t const& scaled_problem, detail::problem_t const& op_problem) { + using f_t2 = typename type_2::type; // original problem to host - auto& d_variable_upper_bounds = op_problem.variable_upper_bounds; - auto& d_variable_lower_bounds = op_problem.variable_lower_bounds; - auto& d_variable_types = op_problem.variable_types; - std::vector variable_upper_bounds(d_variable_upper_bounds.size()); - std::vector variable_lower_bounds(d_variable_lower_bounds.size()); + auto& d_variable_bounds = op_problem.variable_bounds; + auto& d_variable_types = op_problem.variable_types; + std::vector variable_bounds(d_variable_bounds.size()); std::vector variable_types(d_variable_types.size()); - raft::copy(variable_upper_bounds.data(), - d_variable_upper_bounds.data(), - d_variable_upper_bounds.size(), - op_problem.handle_ptr->get_stream()); - raft::copy(variable_lower_bounds.data(), - d_variable_lower_bounds.data(), - d_variable_lower_bounds.size(), + raft::copy(variable_bounds.data(), + d_variable_bounds.data(), + d_variable_bounds.size(), op_problem.handle_ptr->get_stream()); raft::copy(variable_types.data(), d_variable_types.data(), @@ -287,26 +282,20 @@ void problem_checking_t::check_scaled_problem( op_problem.handle_ptr->get_stream()); // scaled problem to host - std::vector scaled_variable_upper_bounds(scaled_problem.variable_upper_bounds.size()); - std::vector scaled_variable_lower_bounds(scaled_problem.variable_lower_bounds.size()); - std::vector scaled_variables(scaled_problem.variable_lower_bounds.size()); + std::vector scaled_variable_bounds(scaled_problem.variable_bounds.size()); - raft::copy(scaled_variable_upper_bounds.data(), - scaled_problem.variable_upper_bounds.data(), - scaled_problem.variable_upper_bounds.size(), - op_problem.handle_ptr->get_stream()); - raft::copy(scaled_variable_lower_bounds.data(), - scaled_problem.variable_lower_bounds.data(), - scaled_problem.variable_lower_bounds.size(), + raft::copy(scaled_variable_bounds.data(), + scaled_problem.variable_bounds.data(), + scaled_problem.variable_bounds.size(), op_problem.handle_ptr->get_stream()); for (size_t i = 0; i < variable_types.size(); ++i) { auto var_type = variable_types[i]; if (var_type == var_t::INTEGER) { // Integers should be untouched - cuopt_assert(variable_upper_bounds[i] == scaled_variable_upper_bounds[i], - "Mismatch upper scaling"); - cuopt_assert(variable_lower_bounds[i] == scaled_variable_lower_bounds[i], + cuopt_assert(get_lower(variable_bounds[i]) == get_lower(scaled_variable_bounds[i]), "Mismatch lower scaling"); + cuopt_assert(get_upper(variable_bounds[i]) == get_upper(scaled_variable_bounds[i]), + "Mismatch upper scaling"); } } } @@ -315,26 +304,21 @@ template void problem_checking_t::check_unscaled_solution( detail::problem_t& op_problem, rmm::device_uvector const& assignment) { - auto& d_variable_upper_bounds = op_problem.variable_upper_bounds; - auto& d_variable_lower_bounds = op_problem.variable_lower_bounds; - std::vector variable_upper_bounds(d_variable_upper_bounds.size()); - std::vector variable_lower_bounds(d_variable_lower_bounds.size()); + using f_t2 = typename type_2::type; + auto& d_variable_bounds = op_problem.variable_bounds; + std::vector variable_bounds(d_variable_bounds.size()); std::vector h_assignment(assignment.size()); - raft::copy(variable_upper_bounds.data(), - d_variable_upper_bounds.data(), - d_variable_upper_bounds.size(), - op_problem.handle_ptr->get_stream()); - raft::copy(variable_lower_bounds.data(), - d_variable_lower_bounds.data(), - d_variable_lower_bounds.size(), + raft::copy(variable_bounds.data(), + d_variable_bounds.data(), + d_variable_bounds.size(), op_problem.handle_ptr->get_stream()); raft::copy( h_assignment.data(), assignment.data(), assignment.size(), op_problem.handle_ptr->get_stream()); const f_t int_tol = op_problem.tolerances.integrality_tolerance; - for (size_t i = 0; i < variable_upper_bounds.size(); ++i) { - cuopt_assert(h_assignment[i] <= variable_upper_bounds[i] + int_tol, "Excess upper bound"); - cuopt_assert(h_assignment[i] >= variable_lower_bounds[i] - int_tol, "Excess lower bound"); + for (size_t i = 0; i < variable_bounds.size(); ++i) { + cuopt_assert(h_assignment[i] >= get_lower(variable_bounds[i]) - int_tol, "Excess lower bound"); + cuopt_assert(h_assignment[i] <= get_upper(variable_bounds[i]) + int_tol, "Excess upper bound"); } } diff --git a/cpp/src/linear_programming/utils.cuh b/cpp/src/linear_programming/utils.cuh index d4df2815b1..de107a66fb 100644 --- a/cpp/src/linear_programming/utils.cuh +++ b/cpp/src/linear_programming/utils.cuh @@ -77,13 +77,17 @@ struct a_sub_scalar_times_b { const f_t* scalar_; }; -template +template struct primal_projection { primal_projection(const f_t* step_size) : step_size_(step_size) {} - __device__ __forceinline__ thrust::tuple operator()( - f_t primal, f_t obj_coeff, f_t AtY, f_t lower, f_t upper) + __device__ __forceinline__ thrust::tuple operator()(f_t primal, + f_t obj_coeff, + f_t AtY, + f_t2 bounds) { + f_t lower = get_lower(bounds); + f_t upper = get_upper(bounds); f_t gradient = obj_coeff - AtY; f_t next = primal - (*step_size_ * gradient); next = raft::max(raft::min(next, upper), lower); @@ -129,13 +133,21 @@ struct a_divides_sqrt_b_bounded { }; template -struct clamp { +struct constraint_clamp { __device__ f_t operator()(f_t value, f_t lower, f_t upper) { return raft::min(raft::max(value, lower), upper); } }; +template +struct clamp { + __device__ f_t operator()(f_t value, f_t2 bounds) + { + return raft::min(raft::max(value, get_lower(bounds)), get_upper(bounds)); + } +}; + template struct combine_finite_abs_bounds { __device__ __host__ f_t operator()(f_t lower, f_t upper) @@ -177,32 +189,47 @@ struct violation { } }; -template +template struct max_violation { max_violation() {} - __device__ f_t operator()(const thrust::tuple& t) const + __device__ f_t operator()(const thrust::tuple& t) const { - const f_t value = thrust::get<0>(t); - const f_t lower = thrust::get<1>(t); - const f_t upper = thrust::get<2>(t); - f_t local_max = f_t(0.0); + const f_t value = thrust::get<0>(t); + const f_t2 bounds = thrust::get<1>(t); + const f_t lower = get_lower(bounds); + const f_t upper = get_upper(bounds); + f_t local_max = f_t(0.0); if (isfinite(lower)) { local_max = raft::max(local_max, -value); } if (isfinite(upper)) { local_max = raft::max(local_max, value); } return local_max; } }; -template +template +struct divide_check_zero { + __device__ f_t2 operator()(f_t2 bounds, f_t value) + { + if (value == f_t{0}) { + return f_t2{0, 0}; + } else { + return f_t2{get_lower(bounds) / value, get_upper(bounds) / value}; + } + } +}; + +template struct bound_value_gradient { - __device__ f_t operator()(f_t value, f_t lower, f_t upper) + __device__ f_t operator()(f_t value, f_t2 bounds) { + f_t lower = get_lower(bounds); + f_t upper = get_upper(bounds); if (value > f_t(0) && value < f_t(0)) { return 0; } return value > f_t(0) ? lower : upper; } }; template -struct bound_value_reduced_cost_product { +struct constraint_bound_value_reduced_cost_product { __device__ f_t operator()(f_t value, f_t lower, f_t upper) { f_t bound_value = f_t(0); @@ -218,6 +245,25 @@ struct bound_value_reduced_cost_product { } }; +template +struct bound_value_reduced_cost_product { + __device__ f_t operator()(f_t value, f_t2 variable_bounds) + { + f_t lower = get_lower(variable_bounds); + f_t upper = get_upper(variable_bounds); + f_t bound_value = f_t(0); + if (value > f_t(0)) { + // A positive reduced cost is associated with a binding lower bound. + bound_value = lower; + } else if (value < f_t(0)) { + // A negative reduced cost is associated with a binding upper bound. + bound_value = upper; + } + f_t val = isfinite(bound_value) ? value * bound_value : f_t(0); + return val; + } +}; + template struct copy_gradient_if_should_be_reduced_cost { __device__ f_t operator()(f_t value, f_t bound, f_t gradient) diff --git a/cpp/src/mip/CMakeLists.txt b/cpp/src/mip/CMakeLists.txt index 8f859e2d0d..7165b3bb56 100644 --- a/cpp/src/mip/CMakeLists.txt +++ b/cpp/src/mip/CMakeLists.txt @@ -36,19 +36,15 @@ set(MIP_NON_LP_FILES ${CMAKE_CURRENT_SOURCE_DIR}/local_search/local_search.cu ${CMAKE_CURRENT_SOURCE_DIR}/local_search/rounding/bounds_repair.cu ${CMAKE_CURRENT_SOURCE_DIR}/local_search/rounding/constraint_prop.cu - ${CMAKE_CURRENT_SOURCE_DIR}/local_search/rounding/lb_bounds_repair.cu - ${CMAKE_CURRENT_SOURCE_DIR}/local_search/rounding/lb_constraint_prop.cu + ${CMAKE_CURRENT_SOURCE_DIR}/local_search/rounding/simple_rounding.cu ${CMAKE_CURRENT_SOURCE_DIR}/local_search/feasibility_pump/feasibility_pump.cu ${CMAKE_CURRENT_SOURCE_DIR}/local_search/line_segment_search/line_segment_search.cu ${CMAKE_CURRENT_SOURCE_DIR}/presolve/bounds_presolve.cu ${CMAKE_CURRENT_SOURCE_DIR}/presolve/bounds_update_data.cu ${CMAKE_CURRENT_SOURCE_DIR}/presolve/conditional_bound_strengthening.cu - ${CMAKE_CURRENT_SOURCE_DIR}/presolve/lb_probing_cache.cu - ${CMAKE_CURRENT_SOURCE_DIR}/presolve/load_balanced_bounds_presolve.cu ${CMAKE_CURRENT_SOURCE_DIR}/presolve/multi_probe.cu ${CMAKE_CURRENT_SOURCE_DIR}/presolve/probing_cache.cu ${CMAKE_CURRENT_SOURCE_DIR}/presolve/trivial_presolve.cu - ${CMAKE_CURRENT_SOURCE_DIR}/problem/load_balanced_problem.cu ${CMAKE_CURRENT_SOURCE_DIR}/feasibility_jump/feasibility_jump.cu ${CMAKE_CURRENT_SOURCE_DIR}/feasibility_jump/feasibility_jump_kernels.cu ) diff --git a/cpp/src/mip/diversity/assignment_hash_map.cu b/cpp/src/mip/diversity/assignment_hash_map.cu index 91ef05bd1f..24d0051b37 100644 --- a/cpp/src/mip/diversity/assignment_hash_map.cu +++ b/cpp/src/mip/diversity/assignment_hash_map.cu @@ -91,6 +91,7 @@ template size_t assignment_hash_map_t::hash_solution(solution_t& solution) { const int TPB = 1024; + fill_integer_assignment(solution); thrust::fill( solution.handle_ptr->get_thrust_policy(), reduction_buffer.begin(), reduction_buffer.end(), 0); diff --git a/cpp/src/mip/diversity/diversity_manager.cu b/cpp/src/mip/diversity/diversity_manager.cu index b406d56a39..726eb5b41a 100644 --- a/cpp/src/mip/diversity/diversity_manager.cu +++ b/cpp/src/mip/diversity/diversity_manager.cu @@ -402,13 +402,13 @@ solution_t diversity_manager_t::run_solver() cuopt::scope_guard([&]() { stats.total_solve_time = timer.elapsed_time(); }); // after every change to the problem, we should resize all the relevant vars // we need to encapsulate that to prevent repetitions + ls.resize_vectors(*problem_ptr, problem_ptr->handle_ptr); - ls.lb_constraint_prop.temp_problem.setup(*problem_ptr); - ls.lb_constraint_prop.bounds_update.setup(ls.lb_constraint_prop.temp_problem); ls.constraint_prop.bounds_update.resize(*problem_ptr); problem_ptr->check_problem_representation(true); // have the structure ready for reusing later problem_ptr->compute_integer_fixed_problem(); + // test problem is not ii cuopt_func_call( ls.constraint_prop.bounds_update.calculate_activity_on_problem_bounds(*problem_ptr)); @@ -428,9 +428,6 @@ solution_t diversity_manager_t::run_solver() if (!fj_only_run) { compute_probing_cache(ls.constraint_prop.bounds_update, *problem_ptr, probing_timer); } - // careful, assign the correct probing cache - ls.lb_constraint_prop.bounds_update.probing_cache.probing_cache = - ls.constraint_prop.bounds_update.probing_cache.probing_cache; if (check_b_b_preemption()) { return population.best_feasible(); } lp_state_t& lp_state = problem_ptr->lp_state; @@ -482,6 +479,7 @@ solution_t diversity_manager_t::run_solver() // in case the pdlp returned var boudns that are out of bounds clamp_within_var_bounds(lp_optimal_solution, problem_ptr, problem_ptr->handle_ptr); } + population.allocate_solutions(); if (check_b_b_preemption()) { return population.best_feasible(); } if (!fp_only_run) { @@ -505,7 +503,9 @@ solution_t diversity_manager_t::run_solver() } if (timer.check_time_limit()) { return population.best_feasible(); } + main_loop(); + return population.best_feasible(); }; diff --git a/cpp/src/mip/diversity/recombiners/bound_prop_recombiner.cuh b/cpp/src/mip/diversity/recombiners/bound_prop_recombiner.cuh index f38cc5759e..52347fc8b7 100644 --- a/cpp/src/mip/diversity/recombiners/bound_prop_recombiner.cuh +++ b/cpp/src/mip/diversity/recombiners/bound_prop_recombiner.cuh @@ -86,8 +86,9 @@ class bound_prop_recombiner_t : public recombiner_t { f_t second_val = round(avg_val) == other_val ? guiding_val : round(avg_val); probing_values[idx] = thrust::make_pair(other_val, second_val); // assign some floating value, so that they can be rounded by bounds prop - f_t lb = guiding_view.problem.variable_lower_bounds[idx]; - f_t ub = guiding_view.problem.variable_upper_bounds[idx]; + auto bounds = guiding_view.problem.variable_bounds[idx]; + f_t lb = get_lower(bounds); + f_t ub = get_upper(bounds); if (integer_equal(lb, ub, int_tol)) { cuopt_assert(false, "The var values must be different in A and B!"); } else if (isfinite(lb)) { diff --git a/cpp/src/mip/feasibility_jump/feasibility_jump.cu b/cpp/src/mip/feasibility_jump/feasibility_jump.cu index 8e864dcf20..7520d1431e 100644 --- a/cpp/src/mip/feasibility_jump/feasibility_jump.cu +++ b/cpp/src/mip/feasibility_jump/feasibility_jump.cu @@ -291,8 +291,8 @@ void fj_t::device_init(const rmm::cuda_stream_view& stream) cuopt_assert(var_idx < pb.is_binary_variable.size(), ""); if (pb.is_binary_variable[var_idx]) { - cuopt_assert(pb.variable_lower_bounds[var_idx] == 0 && - pb.variable_upper_bounds[var_idx] == 1, + cuopt_assert(get_lower(pb.variable_bounds[var_idx]) == 0 && + get_upper(pb.variable_bounds[var_idx]) == 1, "invalid bounds for binary variable"); } }); @@ -392,9 +392,9 @@ void fj_t::climber_init(i_t climber_idx, const rmm::cuda_stream_view& incumbent_assignment[var_idx] = round(incumbent_assignment[var_idx]); } // clamp to bounds + auto bounds = pb.variable_bounds[var_idx]; incumbent_assignment[var_idx] = - max(pb.variable_lower_bounds[var_idx], - min(pb.variable_upper_bounds[var_idx], incumbent_assignment[var_idx])); + max(get_lower(bounds), min(get_upper(bounds), incumbent_assignment[var_idx])); }); thrust::for_each( @@ -1102,6 +1102,7 @@ i_t fj_t::solve(solution_t& solution) settings.iteration_limit * settings.parameters.rounding_second_stage_split; round_remaining_fractionals(solution); + // if time limit exceeded: round all remaining fractionals if any by nearest rounding. if (climbers[0]->fractional_variables.set_size.value(handle_ptr->get_stream()) > 0) { solution.round_nearest(); diff --git a/cpp/src/mip/feasibility_jump/feasibility_jump_kernels.cu b/cpp/src/mip/feasibility_jump/feasibility_jump_kernels.cu index 213d31bc5a..0d96cd4b2a 100644 --- a/cpp/src/mip/feasibility_jump/feasibility_jump_kernels.cu +++ b/cpp/src/mip/feasibility_jump/feasibility_jump_kernels.cu @@ -438,16 +438,15 @@ DI std::pair::move_score_info_t> compute_best_mtm( auto best_score_info = fj_t::move_score_info_t::invalid(); // fixed variables - if (fj.pb.integer_equal(fj.pb.variable_lower_bounds[var_idx], - fj.pb.variable_upper_bounds[var_idx])) { - return std::make_pair(fj.pb.variable_lower_bounds[var_idx], - fj_t::move_score_info_t::invalid()); + auto bounds = fj.pb.variable_bounds[var_idx]; + if (fj.pb.integer_equal(get_lower(bounds), get_upper(bounds))) { + return std::make_pair(get_lower(bounds), fj_t::move_score_info_t::invalid()); } f_t old_val = fj.incumbent_assignment[var_idx]; f_t obj_coeff = fj.pb.objective_coefficients[var_idx]; - f_t v_lb = fj.pb.variable_lower_bounds[var_idx]; - f_t v_ub = fj.pb.variable_upper_bounds[var_idx]; + f_t v_lb = get_lower(bounds); + f_t v_ub = get_upper(bounds); raft::random::PCGenerator rng(fj.settings->seed + *fj.iterations, 0, 0); cuopt_assert(isfinite(v_lb) || isfinite(v_ub), "unexpected free variable"); @@ -549,9 +548,9 @@ DI void update_jump_value(typename fj_t::climber_data_t::view_t fj, i_ cuopt_assert(fj.pb.integer_equal(fj.incumbent_assignment[var_idx], 0) || fj.pb.integer_equal(fj.incumbent_assignment[var_idx], 1), "Current assignment is not binary!"); - cuopt_assert( - fj.pb.variable_lower_bounds[var_idx] == 0 && fj.pb.variable_upper_bounds[var_idx] == 1, - ""); + cuopt_assert(get_lower(fj.pb.variable_bounds[var_idx]) == 0 && + get_upper(fj.pb.variable_bounds[var_idx]) == 1, + ""); cuopt_assert( fj.pb.check_variable_within_bounds(var_idx, fj.incumbent_assignment[var_idx] + delta), "Var not within bounds!"); @@ -566,8 +565,9 @@ DI void update_jump_value(typename fj_t::climber_data_t::view_t fj, i_ } else { delta = round(1.0 - 2 * fj.incumbent_assignment[var_idx]); if (threadIdx.x == 0) { - cuopt_assert( - fj.pb.variable_lower_bounds[var_idx] == 0 && fj.pb.variable_upper_bounds[var_idx] == 1, ""); + cuopt_assert(get_lower(fj.pb.variable_bounds[var_idx]) == 0 && + get_upper(fj.pb.variable_bounds[var_idx]) == 1, + ""); cuopt_assert( fj.pb.check_variable_within_bounds(var_idx, fj.incumbent_assignment[var_idx] + delta), "Var not within bounds!"); @@ -795,7 +795,8 @@ __global__ void update_assignment_kernel(typename fj_t::climber_data_t } } - i_t var_range = fj.pb.variable_upper_bounds[var_idx] - fj.pb.variable_lower_bounds[var_idx]; + auto bounds = fj.pb.variable_bounds[var_idx]; + i_t var_range = get_upper(bounds) - get_lower(bounds); double delta_rel_err = fabs(fj.jump_move_delta[var_idx]) / var_range; if (delta_rel_err < fj.settings->parameters.small_move_tabu_threshold) { *fj.small_move_tabu = *fj.iterations; @@ -807,8 +808,8 @@ __global__ void update_assignment_kernel(typename fj_t::climber_data_t "err_range %.2g%%, infeas %.2g, total viol %d\n", *fj.iterations, var_idx, - fj.pb.variable_lower_bounds[var_idx], - fj.pb.variable_upper_bounds[var_idx], + get_lower(fj.pb.variable_bounds[var_idx]), + get_upper(fj.pb.variable_bounds[var_idx]), fj.incumbent_assignment[var_idx], fj.jump_move_delta[var_idx], fj.incumbent_assignment[var_idx] + fj.jump_move_delta[var_idx], @@ -891,8 +892,9 @@ DI void update_lift_moves(typename fj_t::climber_data_t::view_t fj) f_t obj_coeff = fj.pb.objective_coefficients[var_idx]; f_t delta = -std::numeric_limits::infinity(); - f_t th_lower_delta = fj.pb.variable_lower_bounds[var_idx] - fj.incumbent_assignment[var_idx]; - f_t th_upper_delta = fj.pb.variable_upper_bounds[var_idx] - fj.incumbent_assignment[var_idx]; + auto bounds = fj.pb.variable_bounds[var_idx]; + f_t th_lower_delta = get_lower(bounds) - fj.incumbent_assignment[var_idx]; + f_t th_upper_delta = get_upper(bounds) - fj.incumbent_assignment[var_idx]; auto [offset_begin, offset_end] = fj.pb.reverse_range_for_var(var_idx); for (i_t j = threadIdx.x + offset_begin; j < offset_end; j += blockDim.x) { auto cstr_idx = fj.pb.reverse_constraints[j]; @@ -992,8 +994,9 @@ template DI f_t get_breakthrough_move(typename fj_t::climber_data_t::view_t fj, i_t var_idx) { f_t obj_coeff = fj.pb.objective_coefficients[var_idx]; - f_t v_lb = fj.pb.variable_lower_bounds[var_idx]; - f_t v_ub = fj.pb.variable_upper_bounds[var_idx]; + auto bounds = fj.pb.variable_bounds[var_idx]; + f_t v_lb = get_lower(bounds); + f_t v_ub = get_upper(bounds); cuopt_assert(isfinite(v_lb) || isfinite(v_ub), "unexpected free variable"); cuopt_assert(v_lb <= v_ub, "invalid bounds"); cuopt_assert(fj.pb.check_variable_within_bounds(var_idx, fj.incumbent_assignment[var_idx]), @@ -1217,8 +1220,8 @@ __device__ void compute_mtm_moves(typename fj_t::climber_data_t::view_ bool exclude_from_search = false; // "fixed" variables are to be excluded (as they cannot take any other value) - exclude_from_search |= fj.pb.integer_equal(fj.pb.variable_lower_bounds[var_idx], - fj.pb.variable_upper_bounds[var_idx]); + auto bounds = fj.pb.variable_bounds[var_idx]; + exclude_from_search |= fj.pb.integer_equal(get_lower(bounds), get_upper(bounds)); if (exclude_from_search) { if (threadIdx.x == 0) { @@ -1272,7 +1275,8 @@ __global__ void select_variable_kernel(typename fj_t::climber_data_t:: auto var_idx = fj.candidate_variables.contents[setidx]; auto move_score = fj.jump_move_scores[var_idx]; - i_t var_range = fj.pb.variable_upper_bounds[var_idx] - fj.pb.variable_lower_bounds[var_idx]; + auto bounds = fj.pb.variable_bounds[var_idx]; + i_t var_range = get_upper(bounds) - get_lower(bounds); double delta_rel_err = fabs(fj.jump_move_delta[var_idx]) / var_range; // tabu for small moves to avoid very long descents/numerical issues if (delta_rel_err < fj.settings->parameters.small_move_tabu_threshold && @@ -1319,16 +1323,16 @@ __global__ void select_variable_kernel(typename fj_t::climber_data_t:: *fj.selected_var = selected_var; if (selected_var != std::numeric_limits::max()) { #if FJ_SINGLE_STEP - i_t var_range = - fj.pb.variable_upper_bounds[selected_var] - fj.pb.variable_lower_bounds[selected_var]; + auto bounds = fj.pb.variable_bounds[selected_var]; + i_t var_range = get_upper(bounds) - get_lower(bounds); double delta_rel_err = fabs(fj.jump_move_delta[selected_var]) / var_range * 100; DEVICE_LOG_INFO( "=---- FJ: selected %d [%g/%g] %c :%.4g+{%.4g}=%.4g score {%g,%g}, d_obj %.2g+%.2g->%.2g, " "delta_rel_err %.2g%%, " "infeas %.2g, total viol %d, out of %d\n", selected_var, - fj.pb.variable_lower_bounds[selected_var], - fj.pb.variable_upper_bounds[selected_var], + get_lower(bounds), + get_upper(bounds), fj.pb.variable_types[selected_var] == var_t::INTEGER ? 'I' : 'C', fj.incumbent_assignment[selected_var], fj.jump_move_delta[selected_var], @@ -1553,9 +1557,10 @@ __global__ void handle_local_minimum_kernel(typename fj_t::climber_dat // if no move was found, fallback to round-nearest if (fj.pb.integer_equal(delta, 0)) { - delta = round_nearest(fj.incumbent_assignment[selected], - fj.pb.variable_lower_bounds[selected], - fj.pb.variable_upper_bounds[selected], + auto bounds = fj.pb.variable_bounds[selected]; + delta = round_nearest(fj.incumbent_assignment[selected], + get_lower(bounds), + get_upper(bounds), fj.pb.tolerances.integrality_tolerance, rng) - fj.incumbent_assignment[selected]; @@ -1564,10 +1569,11 @@ __global__ void handle_local_minimum_kernel(typename fj_t::climber_dat if (FIRST_THREAD) { fj.jump_move_delta[selected] = delta; *fj.selected_var = selected; + auto bounds = fj.pb.variable_bounds[*fj.selected_var]; DEVICE_LOG_TRACE("selected_var: %d bounds [%.4g/%.4g], delta %g, old val %g\n", *fj.selected_var, - fj.pb.variable_lower_bounds[*fj.selected_var], - fj.pb.variable_upper_bounds[*fj.selected_var], + get_lower(bounds), + get_upper(bounds), fj.jump_move_delta[*fj.selected_var], fj.incumbent_assignment[*fj.selected_var]); } diff --git a/cpp/src/mip/feasibility_jump/load_balancing.cuh b/cpp/src/mip/feasibility_jump/load_balancing.cuh index b539687400..67af9c06ae 100644 --- a/cpp/src/mip/feasibility_jump/load_balancing.cuh +++ b/cpp/src/mip/feasibility_jump/load_balancing.cuh @@ -322,8 +322,9 @@ __global__ void load_balancing_compute_scores_binary( if (threadIdx.x == 0) { cuopt_assert(fj.incumbent_assignment[var_idx] == 0 || fj.incumbent_assignment[var_idx] == 1, "Current assignment is not binary!"); - cuopt_assert( - fj.pb.variable_lower_bounds[var_idx] == 0 && fj.pb.variable_upper_bounds[var_idx] == 1, ""); + cuopt_assert(get_lower(fj.pb.variable_bounds[var_idx]) == 0 && + get_upper(fj.pb.variable_bounds[var_idx]) == 1, + ""); cuopt_assert( fj.pb.check_variable_within_bounds(var_idx, fj.incumbent_assignment[var_idx] + delta), "Var not within bounds!"); @@ -400,8 +401,9 @@ __global__ void load_balancing_mtm_compute_candidates( auto rcp_cstr_coeff = fj.cstr_coeff_reciprocal[csr_offset]; f_t c_lb = fj.constraint_lower_bounds_csr[csr_offset]; f_t c_ub = fj.constraint_upper_bounds_csr[csr_offset]; - f_t v_lb = fj.pb.variable_lower_bounds[var_idx]; - f_t v_ub = fj.pb.variable_upper_bounds[var_idx]; + auto v_bnd = fj.pb.variable_bounds[var_idx]; + f_t v_lb = get_lower(v_bnd); + f_t v_ub = get_upper(v_bnd); cuopt_assert(c_lb == fj.pb.constraint_lower_bounds[cstr_idx], ""); cuopt_assert(c_ub == fj.pb.constraint_upper_bounds[cstr_idx], ""); @@ -512,8 +514,9 @@ __launch_bounds__(TPB_loadbalance, 16) __global__ cuopt_assert(cstr_idx >= 0 && cstr_idx < fj.pb.n_constraints, ""); } - f_t v_lb = fj.pb.variable_lower_bounds[var_idx]; - f_t v_ub = fj.pb.variable_upper_bounds[var_idx]; + auto v_bnd = fj.pb.variable_bounds[var_idx]; + f_t v_lb = get_lower(v_bnd); + f_t v_ub = get_upper(v_bnd); // candidate counts is usually very small (<4) thanks to early duplicate deletion in the // previous kernel rarely limits the thoroughput nor leads to noticeable imbalance diff --git a/cpp/src/mip/local_search/feasibility_pump/feasibility_pump.cu b/cpp/src/mip/local_search/feasibility_pump/feasibility_pump.cu index 8286d8148e..f8b7d34fc5 100644 --- a/cpp/src/mip/local_search/feasibility_pump/feasibility_pump.cu +++ b/cpp/src/mip/local_search/feasibility_pump/feasibility_pump.cu @@ -46,7 +46,6 @@ feasibility_pump_t::feasibility_pump_t( fj_t& fj_, // fj_tree_t& fj_tree_, constraint_prop_t& constraint_prop_, - lb_constraint_prop_t& lb_constraint_prop_, line_segment_search_t& line_segment_search_, rmm::device_uvector& lp_optimal_solution_) : context(context_), @@ -55,7 +54,6 @@ feasibility_pump_t::feasibility_pump_t( line_segment_search(line_segment_search_), cycle_queue(*context.problem_ptr), constraint_prop(constraint_prop_), - lb_constraint_prop(lb_constraint_prop_), last_rounding(context.problem_ptr->n_variables, context.problem_ptr->handle_ptr->get_stream()), last_projection(context.problem_ptr->n_variables, context.problem_ptr->handle_ptr->get_stream()), @@ -147,11 +145,9 @@ bool feasibility_pump_t::linear_project_onto_polytope(solution_tvariable_upper_bounds, - solution.handle_ptr->get_stream()); - auto h_variable_lower_bounds = cuopt::host_copy(solution.problem_ptr->variable_lower_bounds, - solution.handle_ptr->get_stream()); + auto h_assignment = solution.get_host_assignment(); + auto h_variable_bounds = + cuopt::host_copy(solution.problem_ptr->variable_bounds, solution.handle_ptr->get_stream()); auto h_last_projection = cuopt::host_copy(last_projection, solution.handle_ptr->get_stream()); const f_t int_tol = context.settings.tolerances.integrality_tolerance; constraints_delta_t h_constraints; @@ -164,23 +160,24 @@ bool feasibility_pump_t::linear_project_onto_polytope(solution_tinteger_equal(h_assignment[i], h_variable_upper_bounds[i])) { - obj_offset += h_variable_upper_bounds[i]; + auto h_var_bounds = h_variable_bounds[i]; + if (solution.problem_ptr->integer_equal(h_assignment[i], get_upper(h_var_bounds))) { + obj_offset += get_upper(h_var_bounds); // set the objective weight to -1, u - x obj_coefficients[i] = -1; - } else if (solution.problem_ptr->integer_equal(h_assignment[i], h_variable_lower_bounds[i])) { - obj_offset -= h_variable_lower_bounds[i]; + } else if (solution.problem_ptr->integer_equal(h_assignment[i], get_lower(h_var_bounds))) { + obj_offset -= get_lower(h_var_bounds); // set the objective weight to +1, x - l obj_coefficients[i] = 1; } else { // objective weight is 1 const f_t obj_weight = 1.; // the distance should always be positive - i_t var_id = h_variables.add_variable( - 0, - (h_variable_upper_bounds[i] - h_variable_lower_bounds[i]) + int_tol, - obj_weight, - var_t::CONTINUOUS); + i_t var_id = + h_variables.add_variable(0, + (get_upper(h_var_bounds) - get_lower(h_var_bounds)) + int_tol, + obj_weight, + var_t::CONTINUOUS); obj_coefficients.push_back(obj_weight); f_t dist_val = abs(h_assignment[i] - h_last_projection[i]); // if it is out of bounds, because of the approximation issues,or init issues @@ -442,8 +439,7 @@ void feasibility_pump_t::relax_general_integers(solution_t& orig_variable_types.resize(solution.problem_ptr->n_variables, solution.handle_ptr->get_stream()); auto var_types = make_span(solution.problem_ptr->variable_types); - auto var_lb = make_span(solution.problem_ptr->variable_lower_bounds); - auto var_ub = make_span(solution.problem_ptr->variable_upper_bounds); + auto var_bnds = make_span(solution.problem_ptr->variable_bounds); auto copy_types = make_span(orig_variable_types); raft::copy(orig_variable_types.data(), @@ -454,11 +450,11 @@ void feasibility_pump_t::relax_general_integers(solution_t& solution.handle_ptr->get_thrust_policy(), thrust::make_counting_iterator(0), thrust::make_counting_iterator(solution.problem_ptr->n_variables), - [var_types, var_lb, var_ub, copy_types, pb = solution.problem_ptr->view()] __device__( - auto v_idx) { + [var_types, var_bnds, copy_types, pb = solution.problem_ptr->view()] __device__(auto v_idx) { auto orig_v_type = var_types[v_idx]; - auto lb = var_lb[v_idx]; - auto ub = var_ub[v_idx]; + auto var_bounds = var_bnds[v_idx]; + auto lb = get_lower(var_bounds); + auto ub = get_upper(var_bounds); bool var_binary = (pb.integer_equal(lb, 0) && pb.integer_equal(ub, 1)); auto copy_type = (orig_v_type == var_t::INTEGER) && var_binary ? var_t::INTEGER : var_t::CONTINUOUS; diff --git a/cpp/src/mip/local_search/feasibility_pump/feasibility_pump.cuh b/cpp/src/mip/local_search/feasibility_pump/feasibility_pump.cuh index 60a249f893..2013e80f51 100644 --- a/cpp/src/mip/local_search/feasibility_pump/feasibility_pump.cuh +++ b/cpp/src/mip/local_search/feasibility_pump/feasibility_pump.cuh @@ -20,7 +20,6 @@ #include #include #include -#include #include #include @@ -118,7 +117,6 @@ class feasibility_pump_t { fj_t& fj, // fj_tree_t& fj_tree_, constraint_prop_t& constraint_prop_, - lb_constraint_prop_t& lb_constraint_prop_, line_segment_search_t& line_segment_search_, rmm::device_uvector& lp_optimal_solution_); @@ -153,7 +151,6 @@ class feasibility_pump_t { line_segment_search_t& line_segment_search; cycle_queue_t cycle_queue; constraint_prop_t& constraint_prop; - lb_constraint_prop_t& lb_constraint_prop; fp_config_t config; rmm::device_uvector last_rounding; rmm::device_uvector last_projection; diff --git a/cpp/src/mip/local_search/local_search.cu b/cpp/src/mip/local_search/local_search.cu index ae8a416fa9..ad444568fd 100644 --- a/cpp/src/mip/local_search/local_search.cu +++ b/cpp/src/mip/local_search/local_search.cu @@ -40,13 +40,11 @@ local_search_t::local_search_t(mip_solver_context_t& context fj(context), // fj_tree(fj), constraint_prop(context), - lb_constraint_prop(context), line_segment_search(fj, constraint_prop), fp(context, fj, // fj_tree, constraint_prop, - lb_constraint_prop, line_segment_search, lp_optimal_solution_), rng(cuopt::seed_generator::get_seed()), @@ -390,8 +388,6 @@ bool local_search_t::run_fp(solution_t& solution, solution.problem_ptr = &problem_with_objective_cut; solution.resize_to_problem(); resize_vectors(problem_with_objective_cut, solution.handle_ptr); - lb_constraint_prop.temp_problem.setup(problem_with_objective_cut); - lb_constraint_prop.bounds_update.setup(lb_constraint_prop.temp_problem); constraint_prop.bounds_update.resize(problem_with_objective_cut); } for (i_t i = 0; i < n_fp_iterations && !timer.check_time_limit(); ++i) { @@ -441,8 +437,6 @@ bool local_search_t::run_fp(solution_t& solution, solution.problem_ptr = old_problem_ptr; solution.resize_to_problem(); resize_vectors(*old_problem_ptr, solution.handle_ptr); - lb_constraint_prop.temp_problem.setup(*old_problem_ptr); - lb_constraint_prop.bounds_update.setup(lb_constraint_prop.temp_problem); constraint_prop.bounds_update.resize(*old_problem_ptr); solution.handle_ptr->sync_stream(); } diff --git a/cpp/src/mip/local_search/local_search.cuh b/cpp/src/mip/local_search/local_search.cuh index a2664e890b..bb95a8dc55 100644 --- a/cpp/src/mip/local_search/local_search.cuh +++ b/cpp/src/mip/local_search/local_search.cuh @@ -89,7 +89,6 @@ class local_search_t { fj_t fj; // fj_tree_t fj_tree; constraint_prop_t constraint_prop; - lb_constraint_prop_t lb_constraint_prop; line_segment_search_t line_segment_search; feasibility_pump_t fp; std::mt19937 rng; diff --git a/cpp/src/mip/local_search/rounding/bounds_repair.cu b/cpp/src/mip/local_search/rounding/bounds_repair.cu index a63c6c8669..e77702c38f 100644 --- a/cpp/src/mip/local_search/rounding/bounds_repair.cu +++ b/cpp/src/mip/local_search/rounding/bounds_repair.cu @@ -152,10 +152,12 @@ i_t bounds_repair_t::compute_best_shift(problem_t& problem, shift_amount = (down_vio / var_coeff); } if (shift_amount != 0.) { - f_t var_lb = pb_v.variable_lower_bounds[var_idx]; - f_t var_ub = pb_v.variable_upper_bounds[var_idx]; - f_t o_var_lb = o_pb_v.variable_lower_bounds[var_idx]; - f_t o_var_ub = o_pb_v.variable_upper_bounds[var_idx]; + auto var_bnd = pb_v.variable_bounds[var_idx]; + auto o_var_bnd = o_pb_v.variable_bounds[var_idx]; + f_t var_lb = get_lower(var_bnd); + f_t var_ub = get_upper(var_bnd); + f_t o_var_lb = get_lower(o_var_bnd); + f_t o_var_ub = get_upper(o_var_bnd); cuopt_assert(var_lb + pb_v.tolerances.integrality_tolerance >= o_var_lb, ""); cuopt_assert(o_var_ub + pb_v.tolerances.integrality_tolerance >= var_ub, ""); // round the shift amount of integer @@ -211,8 +213,9 @@ __global__ void compute_damages_kernel(typename problem_t::view_t prob { i_t var_idx = candidates.variable_index[blockIdx.x]; f_t shift_amount = candidates.bound_shift[blockIdx.x]; - f_t v_lb = problem.variable_lower_bounds[var_idx]; - f_t v_ub = problem.variable_upper_bounds[var_idx]; + auto v_bnd = problem.variable_bounds[var_idx]; + f_t v_lb = get_lower(v_bnd); + f_t v_ub = get_upper(v_bnd); f_t th_damage = 0.; i_t n_infeasible_cstr_delta = 0; auto [offset_begin, offset_end] = problem.reverse_range_for_var(var_idx); @@ -348,37 +351,37 @@ void bounds_repair_t::apply_move(problem_t& problem, problem_t& original_problem, i_t move_idx) { - run_device_lambda( - handle_ptr->get_stream(), - [move_idx, - candidates = candidates.view(), - problem = problem.view(), - original_problem = original_problem.view()] __device__() { - i_t var_idx = candidates.variable_index[move_idx]; - f_t shift_value = candidates.bound_shift[move_idx]; - DEVICE_LOG_TRACE("Applying move on var %d with shift %f lb %f ub %f o_lb %f o_ub %f \n", - var_idx, - shift_value, - problem.variable_lower_bounds[var_idx], - problem.variable_upper_bounds[var_idx], - original_problem.variable_lower_bounds[var_idx], - original_problem.variable_upper_bounds[var_idx]); - if (problem.integer_equal(problem.variable_lower_bounds[var_idx], - problem.variable_upper_bounds[var_idx])) { - *candidates.at_least_one_singleton_moved = 1; - } + run_device_lambda(handle_ptr->get_stream(), + [move_idx, + candidates = candidates.view(), + problem = problem.view(), + original_problem = original_problem.view()] __device__() { + i_t var_idx = candidates.variable_index[move_idx]; + f_t shift_value = candidates.bound_shift[move_idx]; + auto bounds = problem.variable_bounds[var_idx]; + DEVICE_LOG_TRACE( + "Applying move on var %d with shift %f lb %f ub %f o_lb %f o_ub %f \n", + var_idx, + shift_value, + get_lower(bounds), + get_upper(bounds), + get_lower(original_problem.variable_bounds[var_idx]), + get_upper(original_problem.variable_bounds[var_idx])); + if (problem.integer_equal(get_lower(bounds), get_upper(bounds))) { + *candidates.at_least_one_singleton_moved = 1; + } - problem.variable_lower_bounds[var_idx] += shift_value; - problem.variable_upper_bounds[var_idx] += shift_value; - cuopt_assert( - original_problem.variable_lower_bounds[var_idx] <= - problem.variable_lower_bounds[var_idx] + problem.tolerances.integrality_tolerance, - ""); - cuopt_assert(original_problem.variable_upper_bounds[var_idx] + - problem.tolerances.integrality_tolerance >= - problem.variable_upper_bounds[var_idx], - ""); - }); + get_lower(bounds) += shift_value; + get_upper(bounds) += shift_value; + problem.variable_bounds[var_idx] = bounds; + cuopt_assert(get_lower(original_problem.variable_bounds[var_idx]) <= + get_lower(bounds) + problem.tolerances.integrality_tolerance, + ""); + cuopt_assert(get_upper(original_problem.variable_bounds[var_idx]) + + problem.tolerances.integrality_tolerance >= + get_upper(bounds), + ""); + }); } template diff --git a/cpp/src/mip/local_search/rounding/bounds_repair.cuh b/cpp/src/mip/local_search/rounding/bounds_repair.cuh index 2a57d06600..8cc392c0b9 100644 --- a/cpp/src/mip/local_search/rounding/bounds_repair.cuh +++ b/cpp/src/mip/local_search/rounding/bounds_repair.cuh @@ -43,17 +43,25 @@ struct bounds_t { } void update_from(const problem_t& pb, const raft::handle_t* handle_ptr) { - cuopt_assert(lb.size() == pb.variable_lower_bounds.size(), ""); - cuopt_assert(ub.size() == pb.variable_upper_bounds.size(), ""); - raft::copy(lb.data(), pb.variable_lower_bounds.data(), lb.size(), handle_ptr->get_stream()); - raft::copy(ub.data(), pb.variable_upper_bounds.data(), ub.size(), handle_ptr->get_stream()); + cuopt_assert(lb.size() == pb.variable_bounds.size(), ""); + cuopt_assert(ub.size() == pb.variable_bounds.size(), ""); + thrust::transform( + handle_ptr->get_thrust_policy(), + pb.variable_bounds.begin(), + pb.variable_bounds.end(), + thrust::make_zip_iterator(thrust::make_tuple(lb.begin(), ub.begin())), + [] __device__(auto i) { return thrust::make_tuple(get_lower(i), get_upper(i)); }); }; void update_to(problem_t& pb, const raft::handle_t* handle_ptr) { - cuopt_assert(lb.size() == pb.variable_lower_bounds.size(), ""); - cuopt_assert(ub.size() == pb.variable_upper_bounds.size(), ""); - raft::copy(pb.variable_lower_bounds.data(), lb.data(), lb.size(), handle_ptr->get_stream()); - raft::copy(pb.variable_upper_bounds.data(), ub.data(), ub.size(), handle_ptr->get_stream()); + cuopt_assert(lb.size() == pb.variable_bounds.size(), ""); + cuopt_assert(ub.size() == pb.variable_bounds.size(), ""); + using f_t2 = typename type_2::type; + thrust::transform(handle_ptr->get_thrust_policy(), + thrust::make_zip_iterator(thrust::make_tuple(lb.begin(), ub.begin())), + thrust::make_zip_iterator(thrust::make_tuple(lb.end(), ub.end())), + pb.variable_bounds.begin(), + [] __device__(auto i) { return f_t2{thrust::get<0>(i), thrust::get<1>(i)}; }); }; rmm::device_uvector lb; rmm::device_uvector ub; diff --git a/cpp/src/mip/local_search/rounding/constraint_prop.cu b/cpp/src/mip/local_search/rounding/constraint_prop.cu index 6e6a5deb37..61e8e08675 100644 --- a/cpp/src/mip/local_search/rounding/constraint_prop.cu +++ b/cpp/src/mip/local_search/rounding/constraint_prop.cu @@ -130,8 +130,9 @@ __global__ void compute_implied_slack_consumption_per_var( i_t var_offset = pb.reverse_offsets[var_idx]; i_t var_degree = pb.reverse_offsets[var_idx + 1] - var_offset; f_t th_var_implied_slack_consumption = 0.; - f_t lb = pb.variable_lower_bounds[var_idx]; - f_t ub = pb.variable_upper_bounds[var_idx]; + auto var_bnd = pb.variable_bounds[var_idx]; + f_t lb = get_lower(var_bnd); + f_t ub = get_upper(var_bnd); for (i_t i = threadIdx.x; i < var_degree; i += blockDim.x) { auto a = pb.reverse_coefficients[var_offset + i]; auto cnst_idx = pb.reverse_constraints[var_offset + i]; @@ -206,25 +207,26 @@ void constraint_prop_t::sort_by_interval_and_frac(solution_t // we can't call this function when the problem is ii. it causes false offset computations // TODO add assert that the problem is not ii auto assgn = make_span(sol.assignment); - thrust::stable_sort(sol.handle_ptr->get_thrust_policy(), - vars.begin(), - vars.end(), - [lb = sol.problem_ptr->variable_lower_bounds.data(), - ub = sol.problem_ptr->variable_upper_bounds.data(), - assgn] __device__(i_t v_idx_1, i_t v_idx_2) { - f_t bounds_interval_1 = ub[v_idx_1] - lb[v_idx_1]; - f_t bounds_interval_2 = ub[v_idx_2] - lb[v_idx_2]; - // if bounds interval are equal (binary and ternary) check fraction - // if both bounds intervals are greater than 2. then do fraction - if ((bounds_interval_1 == bounds_interval_2) || - (bounds_interval_1 > 2 && bounds_interval_2 > 2)) { - f_t frac_1 = get_fractionality_of_val(assgn[v_idx_1]); - f_t frac_2 = get_fractionality_of_val(assgn[v_idx_2]); - return frac_1 < frac_2; - } else { - return bounds_interval_1 < bounds_interval_2; - } - }); + thrust::stable_sort( + sol.handle_ptr->get_thrust_policy(), + vars.begin(), + vars.end(), + [bnds = sol.problem_ptr->variable_bounds.data(), assgn] __device__(i_t v_idx_1, i_t v_idx_2) { + auto bnd_1 = bnds[v_idx_1]; + auto bnd_2 = bnds[v_idx_2]; + f_t bounds_interval_1 = get_upper(bnd_1) - get_lower(bnd_1); + f_t bounds_interval_2 = get_upper(bnd_2) - get_lower(bnd_2); + // if bounds interval are equal (binary and ternary) check fraction + // if both bounds intervals are greater than 2. then do fraction + if ((bounds_interval_1 == bounds_interval_2) || + (bounds_interval_1 > 2 && bounds_interval_2 > 2)) { + f_t frac_1 = get_fractionality_of_val(assgn[v_idx_1]); + f_t frac_2 = get_fractionality_of_val(assgn[v_idx_2]); + return frac_1 < frac_2; + } else { + return bounds_interval_1 < bounds_interval_2; + } + }); // now do the suffling, for that we need to assign some random values to rnd array // we will sort this rnd array and the vars in subsections, so that each subsection will be // shuffled in total we will have 3(binary, ternary and rest) x 7 intervals = 21 subsections. @@ -237,15 +239,16 @@ void constraint_prop_t::sort_by_interval_and_frac(solution_t thrust::for_each(sol.handle_ptr->get_thrust_policy(), thrust::make_counting_iterator(0), thrust::make_counting_iterator((i_t)vars.size() - 1), - [lb = make_span(sol.problem_ptr->variable_lower_bounds), - ub = make_span(sol.problem_ptr->variable_upper_bounds), + [bnds = make_span(sol.problem_ptr->variable_bounds), offsets = make_span(subsection_offsets), vars, assgn] __device__(i_t idx) { i_t var_1 = vars[idx]; i_t var_2 = vars[idx + 1]; - f_t bounds_interval_1 = ub[var_1] - lb[var_1]; - f_t bounds_interval_2 = ub[var_2] - lb[var_2]; + auto bnd_1 = bnds[var_1]; + auto bnd_2 = bnds[var_2]; + f_t bounds_interval_1 = get_upper(bnd_1) - get_lower(bnd_1); + f_t bounds_interval_2 = get_upper(bnd_2) - get_lower(bnd_2); f_t frac_1 = get_fractionality_of_val(assgn[var_1]); f_t frac_2 = get_fractionality_of_val(assgn[var_2]); if (bounds_interval_1 == 1 && bounds_interval_2 == 1) { @@ -390,24 +393,22 @@ void constraint_prop_t::collapse_crossing_bounds(problem_t& problem_t& orig_problem, const raft::handle_t* handle_ptr) { - auto lb = make_span(problem.variable_lower_bounds); - auto ub = make_span(problem.variable_upper_bounds); - auto original_lb = make_span(orig_problem.variable_lower_bounds); - auto original_ub = make_span(orig_problem.variable_upper_bounds); + auto v_bnds = make_span(problem.variable_bounds); + auto original_v_bnds = make_span(orig_problem.variable_bounds); thrust::for_each( handle_ptr->get_thrust_policy(), thrust::make_counting_iterator(0), - thrust::make_counting_iterator((i_t)lb.size()), - [lb, - ub, - original_lb, - original_ub, + thrust::make_counting_iterator((i_t)v_bnds.size()), + [v_bnds, + original_v_bnds, variable_types = make_span(problem.variable_types), int_tol = problem.tolerances.integrality_tolerance] __device__(i_t idx) { - auto v_lb = lb[idx]; - auto v_ub = ub[idx]; - auto o_lb = original_lb[idx]; - auto o_ub = original_ub[idx]; + auto v_bnd = v_bnds[idx]; + auto ov_bnd = original_v_bnds[idx]; + auto v_lb = get_lower(v_bnd); + auto v_ub = get_upper(v_bnd); + auto o_lb = get_lower(ov_bnd); + auto o_ub = get_upper(ov_bnd); if (v_lb > v_ub) { f_t val_to_collapse; if (variable_types[idx] == var_t::INTEGER) { @@ -422,8 +423,7 @@ void constraint_prop_t::collapse_crossing_bounds(problem_t& cuopt_assert(o_lb - int_tol <= val_to_collapse && val_to_collapse <= o_ub + int_tol, "Out of original bounds!"); - lb[idx] = val_to_collapse; - ub[idx] = val_to_collapse; + v_bnds[idx] = typename type_2::type{val_to_collapse, val_to_collapse}; } }); } @@ -431,51 +431,42 @@ void constraint_prop_t::collapse_crossing_bounds(problem_t& template void constraint_prop_t::set_bounds_on_fixed_vars(solution_t& sol) { - auto assgn = make_span(sol.assignment); - auto lb = make_span(sol.problem_ptr->variable_lower_bounds); - auto ub = make_span(sol.problem_ptr->variable_upper_bounds); + auto assgn = make_span(sol.assignment); + auto var_bounds = make_span(sol.problem_ptr->variable_bounds); thrust::for_each(sol.handle_ptr->get_thrust_policy(), sol.problem_ptr->integer_indices.begin(), sol.problem_ptr->integer_indices.end(), - [pb = sol.problem_ptr->view(), assgn, lb, ub] __device__(i_t idx) { + [pb = sol.problem_ptr->view(), assgn, var_bounds] __device__(i_t idx) { auto var_val = assgn[idx]; if (pb.is_integer(var_val)) { - lb[idx] = var_val; - ub[idx] = var_val; + var_bounds[idx] = typename type_2::type{var_val, var_val}; } }); } -template +template struct is_bound_fixed_t { // This functor should be called only on integer variables f_t eps; - raft::device_span lb; - raft::device_span ub; - raft::device_span original_lb; - raft::device_span original_ub; + raft::device_span::type> bnd; + raft::device_span::type> original_bnd; raft::device_span assignment; is_bound_fixed_t(f_t eps_, - raft::device_span lb_, - raft::device_span ub_, - raft::device_span original_lb_, - raft::device_span original_ub_, + raft::device_span bnd_, + raft::device_span original_bnd_, raft::device_span assignment_) - : eps(eps_), - lb(lb_), - ub(ub_), - original_lb(original_lb_), - original_ub(original_ub_), - assignment(assignment_) + : eps(eps_), bnd(bnd_), original_bnd(original_bnd_), assignment(assignment_) { } HDI bool operator()(i_t idx) { - auto v_lb = lb[idx]; - auto v_ub = ub[idx]; - auto o_lb = original_lb[idx]; - auto o_ub = original_ub[idx]; + auto v_bnd = bnd[idx]; + auto v_lb = get_lower(v_bnd); + auto v_ub = get_upper(v_bnd); + auto ov_bnd = original_bnd[idx]; + auto o_lb = get_lower(ov_bnd); + auto o_ub = get_upper(ov_bnd); bool is_singleton = round_val_on_singleton_and_crossing(assignment[idx], v_lb, v_ub, o_lb, o_ub); return is_singleton; @@ -518,6 +509,40 @@ struct greater_than_threshold_t { __host__ __device__ bool operator()(const i_t& x) const { return assignment[x] > threshold; } }; +template +void constraint_prop_t::copy_bounds( + rmm::device_uvector::type>& output_bounds, + const rmm::device_uvector& input_lb, + const rmm::device_uvector& input_ub, + const raft::handle_t* handle_ptr) +{ + thrust::transform( + handle_ptr->get_thrust_policy(), + thrust::make_zip_iterator(thrust::make_tuple(input_lb.begin(), input_ub.begin())), + thrust::make_zip_iterator(thrust::make_tuple(input_lb.end(), input_ub.end())), + output_bounds.begin(), + [] __device__(auto bounds) { + return typename type_2::type{thrust::get<0>(bounds), thrust::get<1>(bounds)}; + }); +} + +template +void constraint_prop_t::copy_bounds( + rmm::device_uvector& output_lb, + rmm::device_uvector& output_ub, + const rmm::device_uvector::type>& input_bounds, + const raft::handle_t* handle_ptr) +{ + thrust::transform( + handle_ptr->get_thrust_policy(), + input_bounds.begin(), + input_bounds.end(), + thrust::make_zip_iterator(thrust::make_tuple(output_lb.begin(), output_ub.begin())), + [] __device__(auto bounds) { + return thrust::make_tuple(get_lower(bounds), get_upper(bounds)); + }); +} + template void constraint_prop_t::copy_bounds(rmm::device_uvector& output_lb, rmm::device_uvector& output_ub, @@ -548,38 +573,35 @@ void constraint_prop_t::copy_bounds(rmm::device_uvector& output_l template void constraint_prop_t::save_bounds(solution_t& sol) { - copy_bounds(lb_restore, - ub_restore, - assignment_restore, - sol.problem_ptr->variable_lower_bounds, - sol.problem_ptr->variable_upper_bounds, - sol.assignment, - sol.handle_ptr); + copy_bounds(lb_restore, ub_restore, sol.problem_ptr->variable_bounds, sol.handle_ptr); + raft::copy(assignment_restore.data(), + sol.assignment.data(), + sol.assignment.size(), + sol.handle_ptr->get_stream()); } template void constraint_prop_t::restore_bounds(solution_t& sol) { - copy_bounds(sol.problem_ptr->variable_lower_bounds, - sol.problem_ptr->variable_upper_bounds, - sol.assignment, - lb_restore, - ub_restore, - assignment_restore, - sol.handle_ptr); + copy_bounds(sol.problem_ptr->variable_bounds, lb_restore, ub_restore, sol.handle_ptr); + raft::copy(sol.assignment.data(), + assignment_restore.data(), + assignment_restore.size(), + sol.handle_ptr->get_stream()); } template void constraint_prop_t::restore_original_bounds(solution_t& sol, solution_t& orig_sol) { - copy_bounds(sol.problem_ptr->variable_lower_bounds, - sol.problem_ptr->variable_upper_bounds, - sol.assignment, - orig_sol.problem_ptr->variable_lower_bounds, - orig_sol.problem_ptr->variable_upper_bounds, - orig_sol.assignment, - orig_sol.handle_ptr); + raft::copy(sol.problem_ptr->variable_bounds.data(), + orig_sol.problem_ptr->variable_bounds.data(), + orig_sol.problem_ptr->variable_bounds.size(), + orig_sol.handle_ptr->get_stream()); + raft::copy(sol.assignment.data(), + orig_sol.assignment.data(), + orig_sol.assignment.size(), + orig_sol.handle_ptr->get_stream()); } template @@ -635,6 +657,18 @@ thrust::pair constraint_prop_t::generate_double_probing_pair return thrust::make_pair(first_probe, second_probe); } +template +bool test_var_out_of_bounds(const solution_t& orig_sol, + i_t unset_var_idx, + f_t probe, + f_t int_tol, + const raft::handle_t* handle_ptr) +{ + auto var_bnd = + orig_sol.problem_ptr->variable_bounds.element(unset_var_idx, handle_ptr->get_stream()); + return (get_lower(var_bnd) <= probe + int_tol) && (probe - int_tol <= get_upper(var_bnd)); +} + template std::tuple, std::vector, std::vector> constraint_prop_t::generate_bulk_rounding_vector( @@ -660,16 +694,12 @@ constraint_prop_t::generate_bulk_rounding_vector( cuda::std::tie(first_probe, second_probe) = generate_double_probing_pair(sol, orig_sol, unset_var_idx, probing_config, false); } - cuopt_assert(orig_sol.problem_ptr->variable_lower_bounds.element( - unset_var_idx, sol.handle_ptr->get_stream()) <= first_probe + int_tol && - first_probe - int_tol <= orig_sol.problem_ptr->variable_upper_bounds.element( - unset_var_idx, sol.handle_ptr->get_stream()), - "Variable out of original bounds!"); - cuopt_assert(orig_sol.problem_ptr->variable_lower_bounds.element( - unset_var_idx, sol.handle_ptr->get_stream()) <= second_probe + int_tol && - second_probe - int_tol <= orig_sol.problem_ptr->variable_upper_bounds.element( - unset_var_idx, sol.handle_ptr->get_stream()), - "Variable out of original bounds!"); + cuopt_assert( + test_var_out_of_bounds(orig_sol, unset_var_idx, first_probe, int_tol, sol.handle_ptr), + "Variable out of original bounds!"); + cuopt_assert( + test_var_out_of_bounds(orig_sol, unset_var_idx, second_probe, int_tol, sol.handle_ptr), + "Variable out of original bounds!"); cuopt_assert(orig_sol.problem_ptr->is_integer(first_probe), "Probing value must be an integer"); cuopt_assert(orig_sol.problem_ptr->is_integer(second_probe), "Probing value must be an integer"); @@ -687,16 +717,12 @@ constraint_prop_t::generate_bulk_rounding_vector( int_tol); if (val_to_round == second_probe) { second_probe = first_probe; } } - cuopt_assert(orig_sol.problem_ptr->variable_lower_bounds.element( - unset_var_idx, sol.handle_ptr->get_stream()) <= val_to_round + int_tol && - val_to_round - int_tol <= orig_sol.problem_ptr->variable_upper_bounds.element( - unset_var_idx, sol.handle_ptr->get_stream()), - "Variable out of original bounds!"); - cuopt_assert(orig_sol.problem_ptr->variable_lower_bounds.element( - unset_var_idx, sol.handle_ptr->get_stream()) <= second_probe + int_tol && - second_probe - int_tol <= orig_sol.problem_ptr->variable_upper_bounds.element( - unset_var_idx, sol.handle_ptr->get_stream()), - "Variable out of original bounds!"); + cuopt_assert( + test_var_out_of_bounds(orig_sol, unset_var_idx, val_to_round, int_tol, sol.handle_ptr), + "Variable out of original bounds!"); + cuopt_assert( + test_var_out_of_bounds(orig_sol, unset_var_idx, second_probe, int_tol, sol.handle_ptr), + "Variable out of original bounds!"); std::get<0>(var_probe_vals)[i] = unset_var_idx; std::get<1>(var_probe_vals)[i] = val_to_round; std::get<2>(var_probe_vals)[i] = second_probe; @@ -718,18 +744,8 @@ void constraint_prop_t::update_host_assignment(const solution_t void constraint_prop_t::set_host_bounds(const solution_t& sol) { - cuopt_assert(sol.problem_ptr->variable_lower_bounds.size() == multi_probe.host_lb.size(), - "size of variable lower bound mismatch"); - raft::copy(multi_probe.host_lb.data(), - sol.problem_ptr->variable_lower_bounds.data(), - sol.problem_ptr->variable_lower_bounds.size(), - sol.handle_ptr->get_stream()); - cuopt_assert(sol.problem_ptr->variable_upper_bounds.size() == multi_probe.host_ub.size(), - "size of variable upper bound mismatch"); - raft::copy(multi_probe.host_ub.data(), - sol.problem_ptr->variable_upper_bounds.data(), - sol.problem_ptr->variable_upper_bounds.size(), - sol.handle_ptr->get_stream()); + std::tie(multi_probe.host_lb, multi_probe.host_ub) = + extract_host_bounds(sol.problem_ptr->variable_bounds, sol.handle_ptr); } template @@ -742,11 +758,10 @@ void constraint_prop_t::restore_original_bounds_on_unfixed( thrust::make_counting_iterator(0), thrust::make_counting_iterator(problem.n_variables), [p_v = problem.view(), op_v = original_problem.view()] __device__(i_t var_idx) { - if (!p_v.integer_equal(p_v.variable_lower_bounds[var_idx], - p_v.variable_upper_bounds[var_idx]) || + auto p_v_var_bnd = p_v.variable_bounds[var_idx]; + if (!p_v.integer_equal(get_lower(p_v_var_bnd), get_upper(p_v_var_bnd)) || !p_v.is_integer_var(var_idx)) { - p_v.variable_lower_bounds[var_idx] = op_v.variable_lower_bounds[var_idx]; - p_v.variable_upper_bounds[var_idx] = op_v.variable_upper_bounds[var_idx]; + p_v.variable_bounds[var_idx] = op_v.variable_bounds[var_idx]; } }); } @@ -947,15 +962,14 @@ bool constraint_prop_t::find_integer( rounding_ii = false; n_iter_in_recovery = 0; // during repair procedure some variables might be collapsed - auto iter = thrust::stable_partition( - sol.handle_ptr->get_thrust_policy(), - unset_vars.begin() + set_count, - unset_vars.end(), - is_bound_fixed_t{orig_sol.problem_ptr->tolerances.integrality_tolerance, - make_span(sol.problem_ptr->variable_lower_bounds), - make_span(sol.problem_ptr->variable_upper_bounds), - make_span(orig_sol.problem_ptr->variable_lower_bounds), - make_span(orig_sol.problem_ptr->variable_upper_bounds), + auto iter = + thrust::stable_partition(sol.handle_ptr->get_thrust_policy(), + unset_vars.begin() + set_count, + unset_vars.end(), + is_bound_fixed_t::type>{ + orig_sol.problem_ptr->tolerances.integrality_tolerance, + make_span(sol.problem_ptr->variable_bounds), + make_span(orig_sol.problem_ptr->variable_bounds), make_span(sol.assignment)}); i_t n_fixed_vars = (iter - (unset_vars.begin() + set_count)); CUOPT_LOG_TRACE("After repair procedure, number of additional fixed vars %d", n_fixed_vars); @@ -983,9 +997,7 @@ bool constraint_prop_t::find_integer( // we update from the problem bounds and not the final bounds of bounds update // because we might be in a recovery mode where we want to continue with the bounds before bulk // which is the unchanged problem bounds - multi_probe.update_host_bounds(sol.handle_ptr, - make_span(sol.problem_ptr->variable_lower_bounds), - make_span(sol.problem_ptr->variable_upper_bounds)); + multi_probe.update_host_bounds(sol.handle_ptr, make_span(sol.problem_ptr->variable_bounds)); } CUOPT_LOG_DEBUG( "Bounds propagation rounding end: ii constraint count first buffer %d, second buffer %d", @@ -1096,10 +1108,10 @@ std::tuple constraint_prop_t::probing_values( "probing value out of bounds"); return std::make_tuple(first_round_val, var_val, second_round_val); } else { - auto orig_v_lb = - orig_sol.problem_ptr->variable_lower_bounds.element(idx, sol.handle_ptr->get_stream()); - auto orig_v_ub = - orig_sol.problem_ptr->variable_upper_bounds.element(idx, sol.handle_ptr->get_stream()); + auto orig_v_bnd = + orig_sol.problem_ptr->variable_bounds.element(idx, sol.handle_ptr->get_stream()); + auto orig_v_lb = get_lower(orig_v_bnd); + auto orig_v_ub = get_upper(orig_v_bnd); cuopt_assert(v_lb >= orig_v_lb, "Current lb should be greater than original lb"); cuopt_assert(v_ub <= orig_v_ub, "Current ub should be smaller than original ub"); v_lb = std::max(v_lb, orig_v_lb); @@ -1137,16 +1149,14 @@ bool constraint_prop_t::handle_fixed_vars( auto set_count = *set_count_ptr; const f_t int_tol = sol.problem_ptr->tolerances.integrality_tolerance; // which other variables were affected? - auto iter = thrust::stable_partition( - sol.handle_ptr->get_thrust_policy(), - unset_vars.begin() + set_count, - unset_vars.end(), - is_bound_fixed_t{int_tol, - make_span(sol.problem_ptr->variable_lower_bounds), - make_span(sol.problem_ptr->variable_upper_bounds), - make_span(original_problem->variable_lower_bounds), - make_span(original_problem->variable_upper_bounds), - make_span(sol.assignment)}); + auto iter = thrust::stable_partition(sol.handle_ptr->get_thrust_policy(), + unset_vars.begin() + set_count, + unset_vars.end(), + is_bound_fixed_t::type>{ + int_tol, + make_span(sol.problem_ptr->variable_bounds), + make_span(original_problem->variable_bounds), + make_span(sol.assignment)}); i_t n_fixed_vars = (iter - (unset_vars.begin() + set_count)); cuopt_assert(n_fixed_vars >= std::get<0>(var_probe_vals).size(), "Error in number of vars fixed!"); diff --git a/cpp/src/mip/local_search/rounding/constraint_prop.cuh b/cpp/src/mip/local_search/rounding/constraint_prop.cuh index cb9cbbd00b..3b01da2749 100644 --- a/cpp/src/mip/local_search/rounding/constraint_prop.cuh +++ b/cpp/src/mip/local_search/rounding/constraint_prop.cuh @@ -95,6 +95,17 @@ struct constraint_prop_t { void sort_by_frac(solution_t& sol, raft::device_span vars); void restore_bounds(solution_t& sol); void save_bounds(solution_t& sol); + + void copy_bounds(rmm::device_uvector& output_lb, + rmm::device_uvector& output_ub, + const rmm::device_uvector::type>& input_bounds, + const raft::handle_t* handle_ptr); + + void copy_bounds(rmm::device_uvector::type>& output_bounds, + const rmm::device_uvector& input_lb, + const rmm::device_uvector& input_ub, + const raft::handle_t* handle_ptr); + void copy_bounds(rmm::device_uvector& output_lb, rmm::device_uvector& output_ub, const rmm::device_uvector& input_lb, diff --git a/cpp/src/mip/local_search/rounding/simple_rounding_kernels.cuh b/cpp/src/mip/local_search/rounding/simple_rounding_kernels.cuh index 9c1ac7f3b1..1c0103110c 100644 --- a/cpp/src/mip/local_search/rounding/simple_rounding_kernels.cuh +++ b/cpp/src/mip/local_search/rounding/simple_rounding_kernels.cuh @@ -36,8 +36,9 @@ __global__ void nearest_rounding_kernel(typename solution_t::view_t so f_t curr_val = solution.assignment[var_id]; if (solution.problem.is_integer(curr_val)) { return; } const f_t int_tol = solution.problem.tolerances.integrality_tolerance; - f_t lb = solution.problem.variable_lower_bounds[var_id]; - f_t ub = solution.problem.variable_upper_bounds[var_id]; + auto var_bnd = solution.problem.variable_bounds[var_id]; + f_t lb = get_lower(var_bnd); + f_t ub = get_upper(var_bnd); f_t nearest_val = round_nearest(curr_val, lb, ub, int_tol, rng); solution.assignment[var_id] = nearest_val; } diff --git a/cpp/src/mip/presolve/bounds_presolve.cu b/cpp/src/mip/presolve/bounds_presolve.cu index 72440cd9a8..35d2ee0821 100644 --- a/cpp/src/mip/presolve/bounds_presolve.cu +++ b/cpp/src/mip/presolve/bounds_presolve.cu @@ -212,14 +212,7 @@ void bound_presolve_t::calculate_activity_on_problem_bounds(problem_t< { auto& handle_ptr = pb.handle_ptr; upd.init_changed_constraints(handle_ptr); - cuopt_assert(upd.lb.size() == pb.variable_lower_bounds.size(), - "size of variable lower bound mismatch"); - raft::copy( - upd.lb.data(), pb.variable_lower_bounds.data(), upd.lb.size(), handle_ptr->get_stream()); - cuopt_assert(upd.ub.size() == pb.variable_upper_bounds.size(), - "size of variable upper bound mismatch"); - raft::copy( - upd.ub.data(), pb.variable_upper_bounds.data(), upd.ub.size(), handle_ptr->get_stream()); + copy_input_bounds(pb); calculate_activity(pb); } @@ -228,14 +221,15 @@ void bound_presolve_t::copy_input_bounds(problem_t& pb) { auto& handle_ptr = pb.handle_ptr; - cuopt_assert(upd.lb.size() == pb.variable_lower_bounds.size(), - "size of variable lower bound mismatch"); - raft::copy( - upd.lb.data(), pb.variable_lower_bounds.data(), upd.lb.size(), handle_ptr->get_stream()); - cuopt_assert(upd.ub.size() == pb.variable_upper_bounds.size(), - "size of variable upper bound mismatch"); - raft::copy( - upd.ub.data(), pb.variable_upper_bounds.data(), upd.ub.size(), handle_ptr->get_stream()); + cuopt_assert(upd.lb.size() == pb.variable_bounds.size(), "size of variable lower bound mismatch"); + cuopt_assert(upd.ub.size() == pb.variable_bounds.size(), "size of variable upper bound mismatch"); + + thrust::transform( + handle_ptr->get_thrust_policy(), + pb.variable_bounds.begin(), + pb.variable_bounds.end(), + thrust::make_zip_iterator(thrust::make_tuple(upd.lb.begin(), upd.ub.begin())), + [] __device__(auto i) { return thrust::make_tuple(get_lower(i), get_upper(i)); }); } template @@ -271,30 +265,11 @@ termination_criterion_t bound_presolve_t::solve( } template -termination_criterion_t bound_presolve_t::solve(problem_t& pb, - raft::device_span input_lb, - raft::device_span input_ub) +termination_criterion_t bound_presolve_t::solve(problem_t& pb) { timer_t timer(settings.time_limit); auto& handle_ptr = pb.handle_ptr; - if (input_lb.size() == 0) { - cuopt_assert(upd.lb.size() == pb.variable_lower_bounds.size(), - "size of variable lower bound mismatch"); - raft::copy( - upd.lb.data(), pb.variable_lower_bounds.data(), upd.lb.size(), handle_ptr->get_stream()); - } else { - cuopt_assert(input_lb.size() == upd.lb.size(), "size of variable lower bound mismatch"); - raft::copy(upd.lb.data(), input_lb.data(), input_lb.size(), handle_ptr->get_stream()); - } - if (input_ub.size() == 0) { - cuopt_assert(upd.ub.size() == pb.variable_upper_bounds.size(), - "size of variable upper bound mismatch"); - raft::copy( - upd.ub.data(), pb.variable_upper_bounds.data(), upd.ub.size(), handle_ptr->get_stream()); - } else { - cuopt_assert(input_ub.size() == upd.ub.size(), "size of variable lower bound mismatch"); - raft::copy(upd.ub.data(), input_ub.data(), upd.ub.size(), handle_ptr->get_stream()); - } + copy_input_bounds(pb); return bound_update_loop(pb, timer); } @@ -329,9 +304,7 @@ bool bound_presolve_t::calculate_infeasible_redundant_constraints(prob template void bound_presolve_t::set_updated_bounds(problem_t& pb) { - set_updated_bounds(pb.handle_ptr, - cuopt::make_span(pb.variable_lower_bounds), - cuopt::make_span(pb.variable_upper_bounds)); + set_updated_bounds(pb.handle_ptr, cuopt::make_span(pb.variable_bounds)); pb.compute_n_integer_vars(); pb.compute_binary_var_table(); } @@ -347,6 +320,21 @@ void bound_presolve_t::set_updated_bounds(const raft::handle_t* handle raft::copy(output_ub.data(), upd.ub.data(), upd.ub.size(), handle_ptr->get_stream()); } +template +void bound_presolve_t::set_updated_bounds( + const raft::handle_t* handle_ptr, raft::device_span::type> output_bounds) +{ + cuopt_assert(upd.ub.size() == output_bounds.size(), "size of variable upper bound mismatch"); + cuopt_assert(upd.lb.size() == output_bounds.size(), "size of variable lower bound mismatch"); + thrust::transform(handle_ptr->get_thrust_policy(), + thrust::make_zip_iterator(thrust::make_tuple(upd.lb.begin(), upd.ub.begin())), + thrust::make_zip_iterator(thrust::make_tuple(upd.lb.end(), upd.ub.end())), + output_bounds.begin(), + [] __device__(auto i) { + return typename type_2::type{thrust::get<0>(i), thrust::get<1>(i)}; + }); +} + template void bound_presolve_t::calc_and_set_updated_constraint_bounds(problem_t& pb) { diff --git a/cpp/src/mip/presolve/bounds_presolve.cuh b/cpp/src/mip/presolve/bounds_presolve.cuh index 84853a7812..0cb818e8ca 100644 --- a/cpp/src/mip/presolve/bounds_presolve.cuh +++ b/cpp/src/mip/presolve/bounds_presolve.cuh @@ -50,9 +50,7 @@ class bound_presolve_t { // when we need to accept a vector, we can use input_lb version termination_criterion_t solve(problem_t& pb, f_t lb, f_t ub, i_t var_idx); - termination_criterion_t solve(problem_t& pb, - raft::device_span input_lb = {}, - raft::device_span input_ub = {}); + termination_criterion_t solve(problem_t& pb); termination_criterion_t solve(problem_t& pb, const std::vector>& var_probe_val_pairs, @@ -62,6 +60,8 @@ class bound_presolve_t { void calculate_activity_on_problem_bounds(problem_t& pb); bool calculate_bounds_update(problem_t& pb); void set_updated_bounds(problem_t& pb); + void set_updated_bounds(const raft::handle_t* handle_ptr, + raft::device_span::type> output_bounds); void set_updated_bounds(const raft::handle_t* handle_ptr, raft::device_span output_lb, raft::device_span output_ub); diff --git a/cpp/src/mip/presolve/conditional_bound_strengthening.cu b/cpp/src/mip/presolve/conditional_bound_strengthening.cu index 2723a9b244..4eb8417b73 100644 --- a/cpp/src/mip/presolve/conditional_bound_strengthening.cu +++ b/cpp/src/mip/presolve/conditional_bound_strengthening.cu @@ -497,10 +497,10 @@ __global__ void update_constraint_bounds_kernel(typename problem_t::vi raft::device_span lock_per_constraint) { auto constraint_pair = constraint_pairs[blockIdx.x]; - int constr_i = constraint_pair.x; + int constr_i = get_lower(constraint_pair); if (constr_i == -1) { return; } - int constr_j = constraint_pair.y; + int constr_j = get_upper(constraint_pair); // FIXME:: for now handle only the constraints that fit in shared i_t offset_j = pb.offsets[constr_j]; @@ -550,8 +550,9 @@ __global__ void update_constraint_bounds_kernel(typename problem_t::vi if (tid < n_variables_in_constraint) { i_t variable_j = pb.variables[offset_j + tid]; a[tid] = pb.coefficients[offset_j + tid]; - lb[tid] = pb.variable_lower_bounds[variable_j]; - ub[tid] = pb.variable_upper_bounds[variable_j]; + auto bounds = pb.variable_bounds[variable_j]; + lb[tid] = get_lower(bounds); + ub[tid] = get_upper(bounds); vtypes[tid] = pb.variable_types[variable_j]; c[tid] = 0.; @@ -575,8 +576,9 @@ __global__ void update_constraint_bounds_kernel(typename problem_t::vi if (jj < 0) { f_t coeff = pb.coefficients[offset_i + index]; - f_t li = pb.variable_lower_bounds[variable_i]; - f_t ui = pb.variable_upper_bounds[variable_i]; + auto bounds = pb.variable_bounds[variable_i]; + f_t li = get_lower(bounds); + f_t ui = get_upper(bounds); min_activity_if_not_participating += (coeff > 0. ? coeff * li : coeff * ui); max_activity_if_not_participating += (coeff > 0. ? coeff * ui : coeff * li); } diff --git a/cpp/src/mip/presolve/lb_probing_cache.cu b/cpp/src/mip/presolve/lb_probing_cache.cu index 598a4c6bce..00c28d3450 100644 --- a/cpp/src/mip/presolve/lb_probing_cache.cu +++ b/cpp/src/mip/presolve/lb_probing_cache.cu @@ -201,9 +201,9 @@ __global__ void compute_min_slack_per_var(typename problem_t::view_t p if (std::signbit(a) != std::signbit(first_coeff)) { different_coeff = true; } auto cnst_idx = pb.reverse_constraints[var_offset + i]; auto cnstr_slack = cnst_slack[cnst_idx]; - auto delta_min_act = cnstr_slack.x + ((a < 0) ? a * ub : a * lb); + auto delta_min_act = get_lower(cnstr_slack) + ((a < 0) ? a * ub : a * lb); th_var_unit_slack = min(th_var_unit_slack, (delta_min_act / a)); - auto delta_max_act = cnstr_slack.y + ((a > 0) ? a * ub : a * lb); + auto delta_max_act = get_upper(cnstr_slack) + ((a > 0) ? a * ub : a * lb); th_var_unit_slack = min(th_var_unit_slack, (delta_max_act / a)); } __shared__ f_t shmem[raft::WarpSize]; @@ -232,7 +232,7 @@ __global__ void compute_min_slack_per_var(typename problem_t::view_t p th_max_excess = max(th_max_excess, excess); th_n_of_excess++; } - excess = max(0., cnstr_slack.y + diff); + excess = max(0., get_upper(cnstr_slack) + diff); if (excess > 0) { th_max_excess = max(th_max_excess, excess); th_n_of_excess++; diff --git a/cpp/src/mip/presolve/load_balanced_partition_helpers.cuh b/cpp/src/mip/presolve/load_balanced_partition_helpers.cuh index 55c18a902e..cffc5debb6 100644 --- a/cpp/src/mip/presolve/load_balanced_partition_helpers.cuh +++ b/cpp/src/mip/presolve/load_balanced_partition_helpers.cuh @@ -27,45 +27,6 @@ namespace cuopt::linear_programming::detail { -template -struct type_2 { - using type = void; -}; - -template <> -struct type_2 { - using type = int2; -}; - -template <> -struct type_2 { - using type = float2; -}; - -template <> -struct type_2 { - using type = double2; -}; - -template -raft::device_span::type> make_span_2(rmm::device_uvector& container) -{ - // TODO : ceildiv or throw assert - using T2 = typename type_2::type; - return raft::device_span(reinterpret_cast(container.data()), - sizeof(T) * container.size() / sizeof(T2)); -} - -template -raft::device_span::type> make_span_2( - rmm::device_uvector const& container) -{ - // TODO : ceildiv or throw assert - using T2 = typename type_2::type; - return raft::device_span(reinterpret_cast(container.data()), - sizeof(T) * container.size() / sizeof(T2)); -} - template constexpr int BitsPWrd = sizeof(degree_t) * 8; diff --git a/cpp/src/mip/presolve/multi_probe.cu b/cpp/src/mip/presolve/multi_probe.cu index 699a5f1dd4..ccd3f19511 100644 --- a/cpp/src/mip/presolve/multi_probe.cu +++ b/cpp/src/mip/presolve/multi_probe.cu @@ -331,36 +331,47 @@ void multi_probe_t::update_device_bounds(const raft::handle_t* handle_ } template -void multi_probe_t::update_host_bounds(const raft::handle_t* handle_ptr, - const raft::device_span variable_lb, - const raft::device_span variable_ub) +void multi_probe_t::update_host_bounds( + const raft::handle_t* handle_ptr, + const raft::device_span::type> variable_bounds) { - cuopt_assert(variable_lb.size() == host_lb.size(), "size of variable lower bound mismatch"); - raft::copy(host_lb.data(), variable_lb.data(), variable_lb.size(), handle_ptr->get_stream()); - cuopt_assert(variable_ub.size() == host_ub.size(), "size of variable upper bound mismatch"); - raft::copy(host_ub.data(), variable_ub.data(), variable_ub.size(), handle_ptr->get_stream()); + cuopt_assert(variable_bounds.size() == host_lb.size(), "size of variable lower bound mismatch"); + cuopt_assert(variable_bounds.size() == host_ub.size(), "size of variable upper bound mismatch"); + + rmm::device_uvector var_lb(variable_bounds.size(), handle_ptr->get_stream()); + rmm::device_uvector var_ub(variable_bounds.size(), handle_ptr->get_stream()); + thrust::transform( + handle_ptr->get_thrust_policy(), + variable_bounds.begin(), + variable_bounds.end(), + thrust::make_zip_iterator(thrust::make_tuple(var_lb.begin(), var_ub.begin())), + [] __device__(auto i) { return thrust::make_tuple(get_lower(i), get_upper(i)); }); + raft::copy(host_lb.data(), var_lb.data(), var_lb.size(), handle_ptr->get_stream()); + raft::copy(host_ub.data(), var_ub.data(), var_ub.size(), handle_ptr->get_stream()); } template void multi_probe_t::copy_problem_into_probing_buffers(problem_t& pb, const raft::handle_t* handle_ptr) { - cuopt_assert(upd_0.lb.size() == pb.variable_lower_bounds.size(), + cuopt_assert(upd_0.lb.size() == pb.variable_bounds.size(), "size of variable lower bound mismatch"); - raft::copy( - upd_0.lb.data(), pb.variable_lower_bounds.data(), upd_0.lb.size(), handle_ptr->get_stream()); - cuopt_assert(upd_1.lb.size() == pb.variable_lower_bounds.size(), + cuopt_assert(upd_1.lb.size() == pb.variable_bounds.size(), "size of variable lower bound mismatch"); - raft::copy( - upd_1.lb.data(), pb.variable_lower_bounds.data(), upd_1.lb.size(), handle_ptr->get_stream()); - cuopt_assert(upd_0.ub.size() == pb.variable_upper_bounds.size(), + cuopt_assert(upd_0.ub.size() == pb.variable_bounds.size(), "size of variable upper bound mismatch"); - raft::copy( - upd_0.ub.data(), pb.variable_upper_bounds.data(), upd_0.ub.size(), handle_ptr->get_stream()); - cuopt_assert(upd_1.ub.size() == pb.variable_upper_bounds.size(), + cuopt_assert(upd_1.ub.size() == pb.variable_bounds.size(), "size of variable upper bound mismatch"); - raft::copy( - upd_1.ub.data(), pb.variable_upper_bounds.data(), upd_1.ub.size(), handle_ptr->get_stream()); + + thrust::transform( + handle_ptr->get_thrust_policy(), + pb.variable_bounds.begin(), + pb.variable_bounds.end(), + thrust::make_zip_iterator( + thrust::make_tuple(upd_0.lb.begin(), upd_0.ub.begin(), upd_1.lb.begin(), upd_1.ub.begin())), + [] __device__(auto i) { + return thrust::make_tuple(get_lower(i), get_upper(i), get_lower(i), get_upper(i)); + }); } template @@ -410,6 +421,26 @@ void multi_probe_t::set_updated_bounds(const raft::handle_t* handle_pt raft::copy(output_lb.data(), lb.data(), lb.size(), handle_ptr->get_stream()); } +template +void multi_probe_t::set_updated_bounds( + const raft::handle_t* handle_ptr, + raft::device_span::type> output_bounds, + i_t select_update) +{ + auto& lb = select_update ? upd_1.lb : upd_0.lb; + auto& ub = select_update ? upd_1.ub : upd_0.ub; + + cuopt_assert(ub.size() == output_bounds.size(), "size of variable upper bound mismatch"); + cuopt_assert(lb.size() == output_bounds.size(), "size of variable lower bound mismatch"); + thrust::transform(handle_ptr->get_thrust_policy(), + thrust::make_zip_iterator(thrust::make_tuple(lb.begin(), ub.begin())), + thrust::make_zip_iterator(thrust::make_tuple(lb.end(), ub.end())), + output_bounds.begin(), + [] __device__(auto i) { + return typename type_2::type{thrust::get<0>(i), thrust::get<1>(i)}; + }); +} + template void multi_probe_t::constraint_stats(problem_t& pb, const raft::handle_t* handle_ptr) @@ -454,10 +485,7 @@ void multi_probe_t::set_updated_bounds(problem_t& pb, i_t select_update, const raft::handle_t* handle_ptr) { - set_updated_bounds(handle_ptr, - make_span(pb.variable_lower_bounds), - make_span(pb.variable_upper_bounds), - select_update); + set_updated_bounds(handle_ptr, make_span(pb.variable_bounds), select_update); } #if MIP_INSTANTIATE_FLOAT diff --git a/cpp/src/mip/presolve/multi_probe.cuh b/cpp/src/mip/presolve/multi_probe.cuh index 609702e909..db0b59f4ba 100644 --- a/cpp/src/mip/presolve/multi_probe.cuh +++ b/cpp/src/mip/presolve/multi_probe.cuh @@ -55,6 +55,9 @@ class multi_probe_t { void set_updated_bounds(problem_t& pb, i_t select_update, const raft::handle_t* handle_ptr); + void set_updated_bounds(const raft::handle_t* handle_ptr, + raft::device_span::type> output_bounds, + i_t select_update); void set_updated_bounds(const raft::handle_t* handle_ptr, raft::device_span output_lb, raft::device_span output_ub, @@ -72,8 +75,7 @@ class multi_probe_t { void constraint_stats(problem_t& pb, const raft::handle_t* handle_ptr); void copy_problem_into_probing_buffers(problem_t& pb, const raft::handle_t* handle_ptr); void update_host_bounds(const raft::handle_t* handle_ptr, - const raft::device_span variable_lb, - const raft::device_span variable_ub); + const raft::device_span::type> variable_bounds); void update_device_bounds(const raft::handle_t* handle_ptr); mip_solver_context_t& context; bounds_update_data_t upd_0; diff --git a/cpp/src/mip/presolve/probing_cache.cu b/cpp/src/mip/presolve/probing_cache.cu index 36140c5b31..7389ec206f 100644 --- a/cpp/src/mip/presolve/probing_cache.cu +++ b/cpp/src/mip/presolve/probing_cache.cu @@ -155,12 +155,11 @@ bool probing_cache_t::contains(problem_t& problem, i_t var_i return probing_cache.count(problem.original_ids[var_id]) > 0; } -template +template void inline insert_current_probing_to_cache(i_t var_idx, const val_interval_t& probe_val, bound_presolve_t& bound_presolve, - const std::vector& original_lb, - const std::vector& original_ub, + const std::vector& original_bounds, const std::vector& modified_lb, const std::vector& modified_ub, const std::vector& h_integer_indices, @@ -171,15 +170,16 @@ void inline insert_current_probing_to_cache(i_t var_idx, cache_entry_t cache_item; cache_item.val_interval = probe_val; for (auto impacted_var_idx : h_integer_indices) { - if (original_lb[impacted_var_idx] != modified_lb[impacted_var_idx] || - original_ub[impacted_var_idx] != modified_ub[impacted_var_idx]) { + auto original_var_bounds = original_bounds[impacted_var_idx]; + if (get_lower(original_var_bounds) != modified_lb[impacted_var_idx] || + get_upper(original_var_bounds) != modified_ub[impacted_var_idx]) { if (integer_equal( modified_lb[impacted_var_idx], modified_ub[impacted_var_idx], int_tol)) { ++n_implied_singletons; } - cuopt_assert(modified_lb[impacted_var_idx] >= original_lb[impacted_var_idx], + cuopt_assert(modified_lb[impacted_var_idx] >= get_lower(original_var_bounds), "Lower bound must be greater than or equal to original lower bound"); - cuopt_assert(modified_ub[impacted_var_idx] <= original_ub[impacted_var_idx], + cuopt_assert(modified_ub[impacted_var_idx] <= get_upper(original_var_bounds), "Upper bound must be less than or equal to original upper bound"); cached_bound_t new_bound{modified_lb[impacted_var_idx], modified_ub[impacted_var_idx]}; cache_item.var_to_cached_bound_map.insert({impacted_var_idx, new_bound}); @@ -210,8 +210,9 @@ __global__ void compute_min_slack_per_var(typename problem_t::view_t p i_t var_offset = pb.reverse_offsets[var_idx]; i_t var_degree = pb.reverse_offsets[var_idx + 1] - var_offset; f_t th_var_unit_slack = std::numeric_limits::max(); - f_t lb = pb.variable_lower_bounds[var_idx]; - f_t ub = pb.variable_upper_bounds[var_idx]; + auto var_bounds = pb.variable_bounds[var_idx]; + f_t lb = get_lower(var_bounds); + f_t ub = get_upper(var_bounds); f_t first_coeff = pb.reverse_coefficients[var_offset]; bool different_coeff = false; for (i_t i = threadIdx.x; i < var_degree; i += blockDim.x) { @@ -360,13 +361,12 @@ inline std::vector compute_prioritized_integer_indices( return h_priority_indices; } -template +template void compute_cache_for_var(i_t var_idx, bound_presolve_t& bound_presolve, problem_t& problem, multi_probe_t& multi_probe_presolve, - const std::vector& h_var_lower_bounds, - const std::vector& h_var_upper_bounds, + const std::vector& h_var_bounds, const std::vector& h_integer_indices, std::atomic& n_of_implied_singletons, std::atomic& n_of_cached_probings, @@ -375,11 +375,12 @@ void compute_cache_for_var(i_t var_idx, RAFT_CUDA_TRY(cudaSetDevice(device_id)); // test if we need per thread handle raft::handle_t handle{}; - std::vector h_improved_lower_bounds(h_var_lower_bounds.size()); - std::vector h_improved_upper_bounds(h_var_upper_bounds.size()); + std::vector h_improved_lower_bounds(h_var_bounds.size()); + std::vector h_improved_upper_bounds(h_var_bounds.size()); std::pair, val_interval_t> probe_vals; - f_t lb = h_var_lower_bounds[var_idx]; - f_t ub = h_var_upper_bounds[var_idx]; + auto bounds = h_var_bounds[var_idx]; + f_t lb = get_lower(bounds); + f_t ub = get_upper(bounds); for (i_t i = 0; i < 2; ++i) { auto& probe_val = i == 0 ? probe_vals.first : probe_vals.second; // if binary, probe both values @@ -451,8 +452,7 @@ void compute_cache_for_var(i_t var_idx, insert_current_probing_to_cache(var_idx, probe_val, bound_presolve, - h_var_lower_bounds, - h_var_upper_bounds, + h_var_bounds, h_improved_lower_bounds, h_improved_upper_bounds, h_integer_indices, @@ -470,9 +470,8 @@ void compute_probing_cache(bound_presolve_t& bound_presolve, // we dont want to compute the probing cache for all variables for time and computation resources auto priority_indices = compute_prioritized_integer_indices(bound_presolve, problem); CUOPT_LOG_DEBUG("Computing probing cache"); - auto h_integer_indices = host_copy(problem.integer_indices); - const auto h_var_upper_bounds = host_copy(problem.variable_upper_bounds); - const auto h_var_lower_bounds = host_copy(problem.variable_lower_bounds); + auto h_integer_indices = host_copy(problem.integer_indices); + const auto h_var_bounds = host_copy(problem.variable_bounds); // TODO adjust the iteration limit depending on the total time limit and time it takes for single // var bound_presolve.settings.iteration_limit = 50; @@ -512,8 +511,7 @@ void compute_probing_cache(bound_presolve_t& bound_presolve, bound_presolve, problem, multi_probe_presolve, - h_var_lower_bounds, - h_var_upper_bounds, + h_var_bounds, h_integer_indices, n_of_implied_singletons, n_of_cached_probings, diff --git a/cpp/src/mip/presolve/probing_cache.cuh b/cpp/src/mip/presolve/probing_cache.cuh index 2ba5010c6b..755c18b0bb 100644 --- a/cpp/src/mip/presolve/probing_cache.cuh +++ b/cpp/src/mip/presolve/probing_cache.cuh @@ -17,8 +17,6 @@ #pragma once -#include -#include #include "bounds_presolve.cuh" #include @@ -30,12 +28,6 @@ namespace cuopt::linear_programming::detail { template class bound_presolve_t; -template -class load_balanced_bounds_presolve_t; - -template -class load_balanced_problem_t; - /* Probing cache is a set of implied bounds when we set a variable to some value. We keep two sets of changed bounds for each interval: @@ -137,9 +129,4 @@ void compute_probing_cache(bound_presolve_t& bound_presolve, problem_t& problem, timer_t timer); -template -void compute_probing_cache(load_balanced_bounds_presolve_t& bound_presolve, - load_balanced_problem_t& problem, - timer_t timer); - } // namespace cuopt::linear_programming::detail diff --git a/cpp/src/mip/presolve/trivial_presolve.cuh b/cpp/src/mip/presolve/trivial_presolve.cuh index f4940a62f6..803bfca800 100644 --- a/cpp/src/mip/presolve/trivial_presolve.cuh +++ b/cpp/src/mip/presolve/trivial_presolve.cuh @@ -77,16 +77,11 @@ void cleanup_vectors(problem_t& pb, handle_ptr->get_stream()); handle_ptr->sync_stream(); - auto lb_iter = thrust::remove_if(handle_ptr->get_thrust_policy(), - pb.variable_lower_bounds.begin(), - pb.variable_lower_bounds.end(), - var_map.begin(), - is_zero_t{}); - auto ub_iter = thrust::remove_if(handle_ptr->get_thrust_policy(), - pb.variable_upper_bounds.begin(), - pb.variable_upper_bounds.end(), - var_map.begin(), - is_zero_t{}); + auto bnd_iter = thrust::remove_if(handle_ptr->get_thrust_policy(), + pb.variable_bounds.begin(), + pb.variable_bounds.end(), + var_map.begin(), + is_zero_t{}); auto type_iter = thrust::remove_if(handle_ptr->get_thrust_policy(), pb.variable_types.begin(), pb.variable_types.end(), @@ -102,10 +97,7 @@ void cleanup_vectors(problem_t& pb, pb.objective_coefficients.end(), var_map.begin(), is_zero_t{}); - pb.variable_lower_bounds.resize(lb_iter - pb.variable_lower_bounds.begin(), - handle_ptr->get_stream()); - pb.variable_upper_bounds.resize(ub_iter - pb.variable_upper_bounds.begin(), - handle_ptr->get_stream()); + pb.variable_bounds.resize(bnd_iter - pb.variable_bounds.begin(), handle_ptr->get_stream()); pb.variable_types.resize(type_iter - pb.variable_types.begin(), handle_ptr->get_stream()); pb.is_binary_variable.resize(binary_iter - pb.is_binary_variable.begin(), handle_ptr->get_stream()); @@ -117,6 +109,7 @@ void cleanup_vectors(problem_t& pb, template void update_from_csr(problem_t& pb) { + using f_t2 = typename type_2::type; auto handle_ptr = pb.handle_ptr; rmm::device_uvector cnst(pb.coefficients.size(), handle_ptr->get_stream()); thrust::uninitialized_fill(handle_ptr->get_thrust_policy(), cnst.begin(), cnst.end(), 0); @@ -145,9 +138,8 @@ void update_from_csr(problem_t& pb) thrust::stable_partition(handle_ptr->get_thrust_policy(), coo_begin, coo_begin + cnst.size(), - is_variable_free_t{pb.tolerances.integrality_tolerance, - make_span(pb.variable_lower_bounds), - make_span(pb.variable_upper_bounds)}); + is_variable_free_t{pb.tolerances.integrality_tolerance, + make_span(pb.variable_bounds)}); RAFT_CHECK_CUDA(handle_ptr->get_stream()); nnz_edge_count = partition_iter - coo_begin; } @@ -180,12 +172,11 @@ void update_from_csr(problem_t& pb) handle_ptr->get_thrust_policy(), thrust::make_counting_iterator(0), thrust::make_counting_iterator(pb.n_variables), - assign_fixed_var_t{make_span(var_map), - make_span(pb.variable_lower_bounds), - make_span(pb.variable_upper_bounds), - make_span(pb.objective_coefficients), - make_span(pb.presolve_data.variable_mapping), - make_span(pb.presolve_data.fixed_var_assignment)}); + assign_fixed_var_t{make_span(var_map), + make_span(pb.variable_bounds), + make_span(pb.objective_coefficients), + make_span(pb.presolve_data.variable_mapping), + make_span(pb.presolve_data.fixed_var_assignment)}); auto used_iter = thrust::stable_partition(handle_ptr->get_thrust_policy(), pb.presolve_data.variable_mapping.begin(), pb.presolve_data.variable_mapping.end(), @@ -203,11 +194,10 @@ void update_from_csr(problem_t& pb) handle_ptr->get_stream()); rmm::device_uvector unused_coo_cnst_bound_updates(cnst.size() - nnz_edge_count, handle_ptr->get_stream()); - elem_multi_t mul{make_span(pb.coefficients), - make_span(pb.variables), - make_span(pb.objective_coefficients), - make_span(pb.variable_lower_bounds), - make_span(pb.variable_upper_bounds)}; + elem_multi_t mul{make_span(pb.coefficients), + make_span(pb.variables), + make_span(pb.objective_coefficients), + make_span(pb.variable_bounds)}; auto iter = thrust::reduce_by_key( handle_ptr->get_thrust_policy(), @@ -233,16 +223,14 @@ void update_from_csr(problem_t& pb) } // update objective_offset - pb.presolve_data.objective_offset += - thrust::transform_reduce(handle_ptr->get_thrust_policy(), - thrust::counting_iterator(0), - thrust::counting_iterator(pb.n_variables), - unused_var_obj_offset_t{make_span(var_map), - make_span(pb.objective_coefficients), - make_span(pb.variable_lower_bounds), - make_span(pb.variable_upper_bounds)}, - 0., - thrust::plus{}); + pb.presolve_data.objective_offset += thrust::transform_reduce( + handle_ptr->get_thrust_policy(), + thrust::counting_iterator(0), + thrust::counting_iterator(pb.n_variables), + unused_var_obj_offset_t{ + make_span(var_map), make_span(pb.objective_coefficients), make_span(pb.variable_bounds)}, + 0., + thrust::plus{}); RAFT_CHECK_CUDA(handle_ptr->get_stream()); // create renumbering maps diff --git a/cpp/src/mip/presolve/trivial_presolve_helpers.cuh b/cpp/src/mip/presolve/trivial_presolve_helpers.cuh index b7e3f4b464..cf7e5064d2 100644 --- a/cpp/src/mip/presolve/trivial_presolve_helpers.cuh +++ b/cpp/src/mip/presolve/trivial_presolve_helpers.cuh @@ -30,40 +30,34 @@ struct non_zero_degree_t { __device__ i_t operator()(i_t i) { return offsets[i] != offsets[i + 1]; } }; -template +template struct is_variable_free_t { f_t tol; - raft::device_span lb; - raft::device_span ub; - is_variable_free_t(f_t tol_, raft::device_span lb_, raft::device_span ub_) - : tol(tol_), lb(lb_), ub(ub_) - { - } + raft::device_span bnd; + is_variable_free_t(f_t tol_, raft::device_span bnd_) : tol(tol_), bnd(bnd_) {} template __device__ bool operator()(tuple_t edge) { - auto var = thrust::get<2>(edge); - return abs(ub[var] - lb[var]) > tol; + auto var = thrust::get<2>(edge); + auto bounds = bnd[var]; + return abs(get_upper(bounds) - get_lower(bounds)) > tol; } }; -template +template struct assign_fixed_var_t { raft::device_span is_var_used; - raft::device_span variable_lower_bounds; - raft::device_span variable_upper_bounds; + raft::device_span variable_bounds; raft::device_span objective_coefficients; raft::device_span variable_mapping; raft::device_span fixed_assignment; assign_fixed_var_t(raft::device_span is_var_used_, - raft::device_span variable_lower_bounds_, - raft::device_span variable_upper_bounds_, + raft::device_span variable_bounds_, raft::device_span objective_coefficients_, raft::device_span variable_mapping_, raft::device_span fixed_assignment_) : is_var_used(is_var_used_), - variable_lower_bounds(variable_lower_bounds_), - variable_upper_bounds(variable_upper_bounds_), + variable_bounds(variable_bounds_), objective_coefficients(objective_coefficients_), variable_mapping(variable_mapping_), fixed_assignment(fixed_assignment_) @@ -74,39 +68,38 @@ struct assign_fixed_var_t { { if (!is_var_used[i]) { auto orig_v_idx = variable_mapping[i]; + auto bounds = variable_bounds[i]; fixed_assignment[orig_v_idx] = - (objective_coefficients[i] > 0) ? variable_lower_bounds[i] : variable_upper_bounds[i]; + (objective_coefficients[i] > 0) ? get_lower(bounds) : get_upper(bounds); } } }; -template +template struct elem_multi_t { raft::device_span coefficients; raft::device_span variables; raft::device_span obj_coefficients; - raft::device_span variable_lower_bounds; - raft::device_span variable_upper_bounds; + raft::device_span variable_bounds; elem_multi_t(raft::device_span coefficients_, raft::device_span variables_, raft::device_span obj_coefficients_, - raft::device_span variable_lower_bounds_, - raft::device_span variable_upper_bounds_) + raft::device_span variable_bounds_) : coefficients(coefficients_), variables(variables_), obj_coefficients(obj_coefficients_), - variable_lower_bounds(variable_lower_bounds_), - variable_upper_bounds(variable_upper_bounds_) + variable_bounds(variable_bounds_) { } __device__ f_t operator()(i_t i) const { - auto var = variables[i]; + auto var = variables[i]; + auto bounds = variable_bounds[var]; if (obj_coefficients[var] > 0) { - return variable_lower_bounds[var] * coefficients[i]; + return get_lower(bounds) * coefficients[i]; } else { - return variable_upper_bounds[var] * coefficients[i]; + return get_upper(bounds) * coefficients[i]; } } }; @@ -136,18 +129,16 @@ struct update_constraint_bounds_t { } }; -template +template struct unused_var_obj_offset_t { raft::device_span var_map; raft::device_span objective_coefficients; - raft::device_span lb; - raft::device_span ub; + raft::device_span bnd; unused_var_obj_offset_t(raft::device_span var_map_, raft::device_span objective_coefficients_, - raft::device_span lb_, - raft::device_span ub_) - : var_map(var_map_), objective_coefficients(objective_coefficients_), lb(lb_), ub(ub_) + raft::device_span bnd_) + : var_map(var_map_), objective_coefficients(objective_coefficients_), bnd(bnd_) { } @@ -156,7 +147,8 @@ struct unused_var_obj_offset_t { auto obj_coeff = objective_coefficients[i]; // in case both bounds are infinite if (obj_coeff == 0.) return 0.; - auto obj_off = (obj_coeff > 0) ? obj_coeff * lb[i] : obj_coeff * ub[i]; + auto bounds = bnd[i]; + auto obj_off = (obj_coeff > 0) ? obj_coeff * get_lower(bounds) : obj_coeff * get_upper(bounds); return var_map[i] ? 0. : obj_off; } }; diff --git a/cpp/src/mip/problem/host_helper.cuh b/cpp/src/mip/problem/host_helper.cuh index 4023a32d27..9136e6899e 100644 --- a/cpp/src/mip/problem/host_helper.cuh +++ b/cpp/src/mip/problem/host_helper.cuh @@ -17,6 +17,7 @@ #pragma once +#include #include #include @@ -52,22 +53,21 @@ struct constraints_delta_t { template struct variables_delta_t { + using f_t2 = typename type_2::type; std::vector objective_coefficients; - std::vector lower_bounds; - std::vector upper_bounds; + std::vector variable_bounds; std::vector variable_types; std::vector is_binary_variable; i_t n_vars; - i_t size() const { return lower_bounds.size(); } + i_t size() const { return variable_bounds.size(); } // returns the added variable id i_t add_variable(f_t lower_bound, f_t upper_bound, f_t obj_weight, var_t var_type) { cuopt_assert(lower_bound >= 0, "Variable bounds must be non-negative!"); - lower_bounds.push_back(lower_bound); - upper_bounds.push_back(upper_bound); + variable_bounds.push_back(f_t2{lower_bound, upper_bound}); objective_coefficients.push_back(obj_weight); variable_types.push_back(var_type); is_binary_variable.push_back(0); diff --git a/cpp/src/mip/problem/problem.cu b/cpp/src/mip/problem/problem.cu index 1a5f76b038..37064b5c63 100644 --- a/cpp/src/mip/problem/problem.cu +++ b/cpp/src/mip/problem/problem.cu @@ -70,6 +70,8 @@ void problem_t::op_problem_cstr_body(const optimization_problem_tget_problem_category() != problem_category_t::LP; if (is_mip) { variable_types = @@ -121,10 +123,7 @@ problem_t::problem_t( offsets(problem_.get_constraint_matrix_offsets(), problem_.get_handle_ptr()->get_stream()), objective_coefficients(problem_.get_objective_coefficients(), problem_.get_handle_ptr()->get_stream()), - variable_lower_bounds(problem_.get_variable_lower_bounds(), - problem_.get_handle_ptr()->get_stream()), - variable_upper_bounds(problem_.get_variable_upper_bounds(), - problem_.get_handle_ptr()->get_stream()), + variable_bounds(0, problem_.get_handle_ptr()->get_stream()), constraint_lower_bounds(problem_.get_constraint_lower_bounds(), problem_.get_handle_ptr()->get_stream()), constraint_upper_bounds(problem_.get_constraint_upper_bounds(), @@ -173,8 +172,7 @@ problem_t::problem_t(const problem_t& problem_) variables(problem_.variables, handle_ptr->get_stream()), offsets(problem_.offsets, handle_ptr->get_stream()), objective_coefficients(problem_.objective_coefficients, handle_ptr->get_stream()), - variable_lower_bounds(problem_.variable_lower_bounds, handle_ptr->get_stream()), - variable_upper_bounds(problem_.variable_upper_bounds, handle_ptr->get_stream()), + variable_bounds(problem_.variable_bounds, handle_ptr->get_stream()), constraint_lower_bounds(problem_.constraint_lower_bounds, handle_ptr->get_stream()), constraint_upper_bounds(problem_.constraint_upper_bounds, handle_ptr->get_stream()), combined_bounds(problem_.combined_bounds, handle_ptr->get_stream()), @@ -247,16 +245,10 @@ problem_t::problem_t(const problem_t& problem_, bool no_deep ? rmm::device_uvector(problem_.objective_coefficients, handle_ptr->get_stream()) : rmm::device_uvector(problem_.objective_coefficients.size(), handle_ptr->get_stream())), - variable_lower_bounds( - (!no_deep_copy) - ? rmm::device_uvector(problem_.variable_lower_bounds, handle_ptr->get_stream()) - : rmm::device_uvector(problem_.variable_lower_bounds.size(), - handle_ptr->get_stream())), - variable_upper_bounds( + variable_bounds( (!no_deep_copy) - ? rmm::device_uvector(problem_.variable_upper_bounds, handle_ptr->get_stream()) - : rmm::device_uvector(problem_.variable_upper_bounds.size(), - handle_ptr->get_stream())), + ? rmm::device_uvector(problem_.variable_bounds, handle_ptr->get_stream()) + : rmm::device_uvector(problem_.variable_bounds.size(), handle_ptr->get_stream())), constraint_lower_bounds( (!no_deep_copy) ? rmm::device_uvector(problem_.constraint_lower_bounds, handle_ptr->get_stream()) @@ -389,16 +381,12 @@ void problem_t::check_problem_representation(bool check_transposed, } // Check variable bounds are set and with the correct size - if (!empty) { - cuopt_assert(!variable_lower_bounds.is_empty() && !variable_upper_bounds.is_empty(), - "Variable lower bounds and variable upper bounds must be set."); - } - cuopt_assert(variable_lower_bounds.size() == objective_coefficients.size(), + if (!empty) { cuopt_assert(!variable_bounds.is_empty(), "Variable bounds must be set."); } + cuopt_assert(variable_bounds.size() == objective_coefficients.size(), "Sizes for vectors related to the variables are not the same."); - cuopt_assert(variable_upper_bounds.size() == objective_coefficients.size(), - "Sizes for vectors related to the variables are not the same"); - cuopt_assert(variable_upper_bounds.size() == (std::size_t)n_variables, + cuopt_assert(variable_bounds.size() == (std::size_t)n_variables, "Sizes for vectors related to the variables are not the same."); + cuopt_assert(variable_types.size() == (std::size_t)n_variables, "Sizes for vectors related to the variables are not the same."); // Check constraints bounds sizes @@ -418,16 +406,15 @@ void problem_t::check_problem_representation(bool check_transposed, "Sizes for vectors related to the constraints are not the same."); // Check the validity of bounds - cuopt_expects( - thrust::all_of(handle_ptr->get_thrust_policy(), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(n_variables), - [variable_lower_bounds = variable_lower_bounds.data(), - variable_upper_bounds = variable_upper_bounds.data()] __device__(i_t idx) { - return variable_lower_bounds[idx] <= variable_upper_bounds[idx]; - }), - error_type_t::ValidationError, - "Variable bounds are invalid"); + cuopt_expects(thrust::all_of(handle_ptr->get_thrust_policy(), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(n_variables), + [vars_bnd = make_span(variable_bounds)] __device__(i_t idx) { + auto bounds = vars_bnd[idx]; + return get_lower(bounds) <= get_upper(bounds); + }), + error_type_t::ValidationError, + "Variable bounds are invalid"); cuopt_expects( thrust::all_of(handle_ptr->get_thrust_policy(), thrust::make_counting_iterator(0), @@ -574,23 +561,23 @@ void problem_t::check_problem_representation(bool check_transposed, return true; }), "Some variables aren't referenced in the appropriate indice tables"); - cuopt_assert( - thrust::all_of( - handle_ptr->get_thrust_policy(), - thrust::make_zip_iterator(thrust::make_counting_iterator(0), - is_binary_variable.cbegin()), - thrust::make_zip_iterator(thrust::make_counting_iterator(is_binary_variable.size()), + cuopt_assert(thrust::all_of(handle_ptr->get_thrust_policy(), + thrust::make_zip_iterator(thrust::make_counting_iterator(0), + is_binary_variable.cbegin()), + thrust::make_zip_iterator( + thrust::make_counting_iterator(is_binary_variable.size()), is_binary_variable.cend()), - [types = variable_types.data(), - lb = variable_lower_bounds.data(), - ub = variable_upper_bounds.data(), - v = view()] __device__(const thrust::tuple tuple) { - i_t idx = thrust::get<0>(tuple); - i_t pred = thrust::get<1>(tuple); - return pred == (types[idx] != var_t::CONTINUOUS && v.integer_equal(lb[idx], 0.) && - v.integer_equal(ub[idx], 1.)); - }), - "The binary variable table is incorrect."); + [types = variable_types.data(), + vars_bnd = make_span(variable_bounds), + v = view()] __device__(const thrust::tuple tuple) { + i_t idx = thrust::get<0>(tuple); + i_t pred = thrust::get<1>(tuple); + auto bounds = vars_bnd[idx]; + return pred == (types[idx] != var_t::CONTINUOUS && + v.integer_equal(get_lower(bounds), 0.) && + v.integer_equal(get_upper(bounds), 1.)); + }), + "The binary variable table is incorrect."); if (!empty) { cuopt_assert(is_binary_pb == (n_variables == thrust::count(handle_ptr->get_thrust_policy(), is_binary_variable.begin(), @@ -768,9 +755,10 @@ void problem_t::compute_binary_var_table() is_binary_variable.begin(), is_binary_variable.end(), [pb_view] __device__(i_t i) { + auto bounds = pb_view.variable_bounds[i]; return pb_view.variable_types[i] != var_t::CONTINUOUS && - (pb_view.integer_equal(pb_view.variable_lower_bounds[i], 0) && - pb_view.integer_equal(pb_view.variable_upper_bounds[i], 1)); + (pb_view.integer_equal(get_lower(bounds), 0) && + pb_view.integer_equal(get_upper(bounds), 1)); }); get_n_binary_variables(); @@ -919,10 +907,7 @@ typename problem_t::view_t problem_t::view() v.offsets = raft::device_span{offsets.data(), offsets.size()}; v.objective_coefficients = raft::device_span{objective_coefficients.data(), objective_coefficients.size()}; - v.variable_lower_bounds = - raft::device_span{variable_lower_bounds.data(), variable_lower_bounds.size()}; - v.variable_upper_bounds = - raft::device_span{variable_upper_bounds.data(), variable_upper_bounds.size()}; + v.variable_bounds = make_span(variable_bounds); v.constraint_lower_bounds = raft::device_span{constraint_lower_bounds.data(), constraint_lower_bounds.size()}; v.constraint_upper_bounds = @@ -945,8 +930,7 @@ typename problem_t::view_t problem_t::view() template void problem_t::resize_variables(size_t size) { - variable_lower_bounds.resize(size, handle_ptr->get_stream()); - variable_upper_bounds.resize(size, handle_ptr->get_stream()); + variable_bounds.resize(size, handle_ptr->get_stream()); variable_types.resize(size, handle_ptr->get_stream()); objective_coefficients.resize(size, handle_ptr->get_stream()); is_binary_variable.resize(size, handle_ptr->get_stream()); @@ -978,13 +962,9 @@ void problem_t::insert_variables(variables_delta_t& h_vars) CUOPT_LOG_DEBUG("problem added variable size %d prev %d", h_vars.size(), n_variables); // resize the variable arrays if it can't fit the variables resize_variables(n_variables + h_vars.size()); - raft::copy(variable_lower_bounds.data() + n_variables, - h_vars.lower_bounds.data(), - h_vars.lower_bounds.size(), - handle_ptr->get_stream()); - raft::copy(variable_upper_bounds.data() + n_variables, - h_vars.upper_bounds.data(), - h_vars.upper_bounds.size(), + raft::copy(variable_bounds.data() + n_variables, + h_vars.variable_bounds.data(), + h_vars.variable_bounds.size(), handle_ptr->get_stream()); raft::copy(variable_types.data() + n_variables, h_vars.variable_types.data(), @@ -1225,15 +1205,9 @@ void problem_t::remove_given_variables(problem_t& original_p thrust::gather(handle_ptr->get_thrust_policy(), variable_map.begin(), variable_map.end(), - original_problem.variable_lower_bounds.begin(), - variable_lower_bounds.begin()); - variable_lower_bounds.resize(variable_map.size(), handle_ptr->get_stream()); - thrust::gather(handle_ptr->get_thrust_policy(), - variable_map.begin(), - variable_map.end(), - original_problem.variable_upper_bounds.begin(), - variable_upper_bounds.begin()); - variable_upper_bounds.resize(variable_map.size(), handle_ptr->get_stream()); + original_problem.variable_bounds.begin(), + variable_bounds.begin()); + variable_bounds.resize(variable_map.size(), handle_ptr->get_stream()); thrust::gather(handle_ptr->get_thrust_policy(), variable_map.begin(), variable_map.end(), @@ -1351,20 +1325,20 @@ void standardize_bounds(std::vector>>& variable_ problem_t& pb) { auto handle_ptr = pb.handle_ptr; - auto h_var_lower_bounds = cuopt::host_copy(pb.variable_lower_bounds); - auto h_var_upper_bounds = cuopt::host_copy(pb.variable_upper_bounds); + auto h_var_bounds = cuopt::host_copy(pb.variable_bounds); auto h_objective_coefficients = cuopt::host_copy(pb.objective_coefficients); auto h_variable_types = cuopt::host_copy(pb.variable_types); handle_ptr->sync_stream(); - const i_t n_vars_originally = (i_t)h_var_lower_bounds.size(); + const i_t n_vars_originally = (i_t)h_var_bounds.size(); for (i_t i = 0; i < n_vars_originally; ++i) { // if variable has free bounds, replace it with two vars // but add only one var and use it in all constraints // TODO create one var for integrals and one var for continuous - if (h_var_lower_bounds[i] == -std::numeric_limits::infinity() && - h_var_upper_bounds[i] == std::numeric_limits::infinity()) { + auto h_var_bound = h_var_bounds[i]; + if (get_lower(h_var_bound) == -std::numeric_limits::infinity() && + get_upper(h_var_bound) == std::numeric_limits::infinity()) { // add new variable auto var_coeff_vec = variable_constraint_map[i]; // negate all values in vec @@ -1372,16 +1346,16 @@ void standardize_bounds(std::vector>>& variable_ coeff = -coeff; } - h_var_lower_bounds[i] = 0.; + h_var_bounds[i].x = 0.; pb.presolve_data.variable_offsets[i] = 0.; pb.presolve_data.additional_var_used[i] = true; pb.presolve_data.additional_var_id_per_var[i] = pb.n_variables; + using f_t2 = typename type_2::type; // new var data std::stable_sort(var_coeff_vec.begin(), var_coeff_vec.end()); variable_constraint_map.push_back(var_coeff_vec); - h_var_lower_bounds.push_back(0.); - h_var_upper_bounds.push_back(std::numeric_limits::infinity()); + h_var_bounds.push_back(f_t2{0., std::numeric_limits::infinity()}); pb.presolve_data.variable_offsets.push_back(0.); h_objective_coefficients.push_back(-h_objective_coefficients[i]); h_variable_types.push_back(h_variable_types[i]); @@ -1397,21 +1371,14 @@ void standardize_bounds(std::vector>>& variable_ // TODO add some tests // resize the device vectors is sizes are smaller - if (pb.variable_lower_bounds.size() < h_var_lower_bounds.size()) { - pb.variable_lower_bounds.resize(h_var_lower_bounds.size(), handle_ptr->get_stream()); - pb.variable_upper_bounds.resize(h_var_lower_bounds.size(), handle_ptr->get_stream()); + if (pb.variable_bounds.size() < h_var_bounds.size()) { + pb.variable_bounds.resize(h_var_bounds.size(), handle_ptr->get_stream()); pb.objective_coefficients.resize(h_objective_coefficients.size(), handle_ptr->get_stream()); pb.variable_types.resize(h_variable_types.size(), handle_ptr->get_stream()); } - raft::copy(pb.variable_lower_bounds.data(), - h_var_lower_bounds.data(), - h_var_lower_bounds.size(), - handle_ptr->get_stream()); - raft::copy(pb.variable_upper_bounds.data(), - h_var_upper_bounds.data(), - h_var_upper_bounds.size(), - handle_ptr->get_stream()); + raft::copy( + pb.variable_bounds.data(), h_var_bounds.data(), h_var_bounds.size(), handle_ptr->get_stream()); raft::copy(pb.objective_coefficients.data(), h_objective_coefficients.data(), h_objective_coefficients.size(), @@ -1539,9 +1506,9 @@ void problem_t::get_host_user_problem( } } user_problem.num_range_rows = user_problem.range_rows.size(); - user_problem.lower = cuopt::host_copy(variable_lower_bounds); - user_problem.upper = cuopt::host_copy(variable_upper_bounds); - user_problem.problem_name = original_problem_ptr->get_problem_name(); + std::tie(user_problem.lower, user_problem.upper) = + extract_host_bounds(variable_bounds, handle_ptr); + user_problem.problem_name = original_problem_ptr->get_problem_name(); if (static_cast(row_names.size()) == m) { user_problem.row_names.resize(m); for (int i = 0; i < m; ++i) { diff --git a/cpp/src/mip/problem/problem.cuh b/cpp/src/mip/problem/problem.cuh index 49103fc955..9d63e18579 100644 --- a/cpp/src/mip/problem/problem.cuh +++ b/cpp/src/mip/problem/problem.cuh @@ -136,8 +136,9 @@ class problem_t { DI bool check_variable_within_bounds(i_t v, f_t val) const { const f_t int_tol = tolerances.integrality_tolerance; + auto bounds = variable_bounds[v]; bool within_bounds = - val <= (variable_upper_bounds[v] + int_tol) && val >= (variable_lower_bounds[v] - int_tol); + val <= (get_upper(bounds) + int_tol) && val >= (get_lower(bounds) - int_tol); return within_bounds; } @@ -158,21 +159,21 @@ class problem_t { { cuopt_assert(var_t::INTEGER != variable_types[v], "Random value can only be called on continuous values"); - f_t lower_bound = variable_lower_bounds[v]; - f_t upper_bound = variable_upper_bounds[v]; + auto bounds = variable_bounds[v]; f_t val; - if (isfinite(lower_bound) && isfinite(upper_bound)) { - f_t diff = upper_bound - lower_bound; - val = diff * rng.next_float() + lower_bound; + if (isfinite(get_lower(bounds)) && isfinite(get_upper(bounds))) { + f_t diff = get_upper(bounds) - get_lower(bounds); + val = diff * rng.next_float() + get_lower(bounds); } else { - auto finite_bound = isfinite(lower_bound) ? lower_bound : upper_bound; + auto finite_bound = isfinite(get_lower(bounds)) ? get_lower(bounds) : get_upper(bounds); val = finite_bound; } - cuopt_assert(isfinite(lower_bound), "Value must be finite"); + cuopt_assert(isfinite(get_lower(bounds)), "Value must be finite"); return val; } + using f_t2 = typename type_2::type; typename mip_solver_settings_t::tolerances_t tolerances; i_t n_variables; i_t n_integer_vars; @@ -187,8 +188,7 @@ class problem_t { raft::device_span variables; raft::device_span offsets; raft::device_span objective_coefficients; - raft::device_span variable_lower_bounds; - raft::device_span variable_upper_bounds; + raft::device_span variable_bounds; raft::device_span constraint_lower_bounds; raft::device_span constraint_upper_bounds; raft::device_span variable_types; @@ -242,8 +242,8 @@ class problem_t { /** weights in the objective function */ rmm::device_uvector objective_coefficients; - rmm::device_uvector variable_lower_bounds; - rmm::device_uvector variable_upper_bounds; + using f_t2 = typename type_2::type; + rmm::device_uvector variable_bounds; rmm::device_uvector constraint_lower_bounds; rmm::device_uvector constraint_upper_bounds; /* biggest between cstr lower and upper */ diff --git a/cpp/src/mip/problem/problem_helpers.cuh b/cpp/src/mip/problem/problem_helpers.cuh index bdc4635c03..8451983a99 100644 --- a/cpp/src/mip/problem/problem_helpers.cuh +++ b/cpp/src/mip/problem/problem_helpers.cuh @@ -58,6 +58,36 @@ struct transform_bounds_functor { } }; +template +static void set_variable_bounds(detail::problem_t& op_problem) +{ + op_problem.variable_bounds.resize(op_problem.n_variables, op_problem.handle_ptr->get_stream()); + auto vars_bnd = make_span(op_problem.variable_bounds); + + auto orig_problem = op_problem.original_problem_ptr; + auto variable_lower_bounds = make_span(orig_problem->get_variable_lower_bounds()); + auto variable_upper_bounds = make_span(orig_problem->get_variable_upper_bounds()); + + bool default_variable_lb = (orig_problem->get_variable_lower_bounds().is_empty()); + bool default_variable_ub = (orig_problem->get_variable_upper_bounds().is_empty()); + + thrust::for_each(op_problem.handle_ptr->get_thrust_policy(), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(op_problem.n_variables), + [vars_bnd, + variable_lower_bounds, + variable_upper_bounds, + default_variable_lb, + default_variable_ub] __device__(auto i) { + using f_t2 = typename type_2::type; + auto lb = f_t{0}; + auto ub = std::numeric_limits::infinity(); + if (!default_variable_lb) { lb = variable_lower_bounds[i]; } + if (!default_variable_ub) { ub = variable_upper_bounds[i]; } + vars_bnd[i] = f_t2{lb, ub}; + }); +} + template static void set_bounds_if_not_set(detail::problem_t& op_problem) { @@ -90,25 +120,7 @@ static void set_bounds_if_not_set(detail::problem_t& op_problem) transform_bounds_functor()); } - // If variable bound was not set, set it to default value - if (op_problem.variable_lower_bounds.is_empty() && - !op_problem.objective_coefficients.is_empty()) { - op_problem.variable_lower_bounds.resize(op_problem.objective_coefficients.size(), - op_problem.handle_ptr->get_stream()); - thrust::fill(op_problem.handle_ptr->get_thrust_policy(), - op_problem.variable_lower_bounds.begin(), - op_problem.variable_lower_bounds.end(), - f_t(0)); - } - if (op_problem.variable_upper_bounds.is_empty() && - !op_problem.objective_coefficients.is_empty()) { - op_problem.variable_upper_bounds.resize(op_problem.objective_coefficients.size(), - op_problem.handle_ptr->get_stream()); - thrust::fill(op_problem.handle_ptr->get_thrust_policy(), - op_problem.variable_upper_bounds.begin(), - op_problem.variable_upper_bounds.end(), - std::numeric_limits::infinity()); - } + set_variable_bounds(op_problem); if (op_problem.variable_types.is_empty() && !op_problem.objective_coefficients.is_empty()) { op_problem.variable_types.resize(op_problem.objective_coefficients.size(), op_problem.handle_ptr->get_stream()); @@ -261,11 +273,11 @@ static bool check_var_bounds_sanity(const detail::problem_t& problem) bool crossing_bounds_detected = thrust::any_of(problem.handle_ptr->get_thrust_policy(), thrust::counting_iterator(0), - thrust::counting_iterator((i_t)problem.variable_lower_bounds.size()), + thrust::counting_iterator((i_t)problem.variable_bounds.size()), [tolerance = problem.tolerances.presolve_absolute_tolerance, - lb = make_span(problem.variable_lower_bounds), - ub = make_span(problem.variable_upper_bounds)] __device__(i_t index) { - return (lb[index] > ub[index] + tolerance); + var_bnd = make_span(problem.variable_bounds)] __device__(i_t index) { + auto var_bounds = var_bnd[index]; + return (get_lower(var_bounds) > get_upper(var_bounds) + tolerance); }); return !crossing_bounds_detected; } @@ -292,12 +304,12 @@ static void round_bounds(detail::problem_t& problem) thrust::for_each(problem.handle_ptr->get_thrust_policy(), thrust::make_counting_iterator(0), thrust::make_counting_iterator(problem.n_variables), - [lb = make_span(problem.variable_lower_bounds), - ub = make_span(problem.variable_upper_bounds), - types = make_span(problem.variable_types)] __device__(i_t index) { + [bounds = make_span(problem.variable_bounds), + types = make_span(problem.variable_types)] __device__(i_t index) { if (types[index] == var_t::INTEGER) { - lb[index] = ceil(lb[index]); - ub[index] = floor(ub[index]); + using f_t2 = typename type_2::type; + auto bnd = bounds[index]; + bounds[index] = f_t2{ceil(get_lower(bnd)), floor(get_upper(bnd))}; } }); } diff --git a/cpp/src/mip/problem/write_mps.cu b/cpp/src/mip/problem/write_mps.cu index cca3cd5b1e..ce3e562383 100644 --- a/cpp/src/mip/problem/write_mps.cu +++ b/cpp/src/mip/problem/write_mps.cu @@ -36,8 +36,7 @@ void problem_t::write_as_mps(const std::string& path) auto h_reverse_constraints = cuopt::host_copy(reverse_constraints, handle_ptr->get_stream()); auto h_reverse_offsets = cuopt::host_copy(reverse_offsets, handle_ptr->get_stream()); auto h_obj_coeffs = cuopt::host_copy(objective_coefficients, handle_ptr->get_stream()); - auto h_var_lb = cuopt::host_copy(variable_lower_bounds, handle_ptr->get_stream()); - auto h_var_ub = cuopt::host_copy(variable_upper_bounds, handle_ptr->get_stream()); + auto [h_var_lb, h_var_ub] = extract_host_bounds(variable_bounds, handle_ptr); auto h_cstr_lb = cuopt::host_copy(constraint_lower_bounds, handle_ptr->get_stream()); auto h_cstr_ub = cuopt::host_copy(constraint_upper_bounds, handle_ptr->get_stream()); auto h_var_types = cuopt::host_copy(variable_types, handle_ptr->get_stream()); diff --git a/cpp/src/mip/solution/feasibility_test.cuh b/cpp/src/mip/solution/feasibility_test.cuh index eafb8a67cb..5c452e45fd 100644 --- a/cpp/src/mip/solution/feasibility_test.cuh +++ b/cpp/src/mip/solution/feasibility_test.cuh @@ -55,8 +55,8 @@ __global__ void test_variable_bounds_kernel(typename solution_t::view_ printf("inf var %d val %f l %f u %f integer %d\n", v, val, - sol.problem.variable_lower_bounds[v], - sol.problem.variable_upper_bounds[v], + get_lower(sol.problem.variable_bounds[v]), + get_upper(sol.problem.variable_bounds[v]), sol.problem.is_integer_var(v)); } cuopt_assert(isfinite(val), "assignment should be finite!"); @@ -72,8 +72,8 @@ __global__ void test_variable_bounds_kernel(typename solution_t::view_ printf("oob var %d val %f l %f u %f integer %d\n", v, val, - sol.problem.variable_lower_bounds[v], - sol.problem.variable_upper_bounds[v], + get_lower(sol.problem.variable_bounds[v]), + get_upper(sol.problem.variable_bounds[v]), sol.problem.is_integer_var(v)); } cuopt_assert(feasible, "Variables should be feasible"); diff --git a/cpp/src/mip/solution/solution.cu b/cpp/src/mip/solution/solution.cu index cabb0edda0..7c95aaeed2 100644 --- a/cpp/src/mip/solution/solution.cu +++ b/cpp/src/mip/solution/solution.cu @@ -35,11 +35,25 @@ namespace cuopt::linear_programming::detail { +template +rmm::device_uvector get_lower_bounds( + rmm::device_uvector::type> const& bounds, const raft::handle_t* handle_ptr) +{ + using f_t2 = typename type_2::type; + rmm::device_uvector lower_bounds(bounds.size(), handle_ptr->get_stream()); + thrust::transform(handle_ptr->get_thrust_policy(), + bounds.begin(), + bounds.end(), + lower_bounds.begin(), + [] __device__(auto bnd) { return bnd.x; }); + return lower_bounds; +} + template solution_t::solution_t(problem_t& problem_) : problem_ptr(&problem_), handle_ptr(problem_.handle_ptr), - assignment(problem_.variable_lower_bounds, handle_ptr->get_stream()), + assignment(std::move(get_lower_bounds(problem_.variable_bounds, handle_ptr))), lower_excess(problem_.n_constraints, handle_ptr->get_stream()), upper_excess(problem_.n_constraints, handle_ptr->get_stream()), lower_slack(problem_.n_constraints, handle_ptr->get_stream()), @@ -220,16 +234,16 @@ void solution_t::assign_random_within_bounds(f_t ratio_of_vars_to_rand std::vector h_assignment = host_copy(assignment); std::uniform_real_distribution unif_prob(0, 1); - auto variable_lower_bounds = cuopt::host_copy(problem_ptr->variable_lower_bounds); - auto variable_upper_bounds = cuopt::host_copy(problem_ptr->variable_upper_bounds); - auto variable_types = cuopt::host_copy(problem_ptr->variable_types); + auto variable_bounds = cuopt::host_copy(problem_ptr->variable_bounds); + auto variable_types = cuopt::host_copy(problem_ptr->variable_types); problem_ptr->handle_ptr->sync_stream(); - for (size_t i = 0; i < problem_ptr->variable_lower_bounds.size(); ++i) { + for (size_t i = 0; i < problem_ptr->variable_bounds.size(); ++i) { if (only_integers && variable_types[i] != var_t::INTEGER) { continue; } bool skip = unif_prob(rng) > ratio_of_vars_to_random_assign; if (skip) { continue; } - f_t lower_bound = variable_lower_bounds[i]; - f_t upper_bound = variable_upper_bounds[i]; + auto var_bounds = variable_bounds[i]; + auto lower_bound = get_lower(var_bounds); + auto upper_bound = get_upper(var_bounds); if (lower_bound == -std::numeric_limits::infinity()) { h_assignment[i] = upper_bound; } else if (upper_bound == std::numeric_limits::infinity()) { @@ -322,7 +336,7 @@ template void solution_t::compute_objective() { h_obj = compute_objective_from_vec( - assignment, problem_ptr->objective_coefficients, handle_ptr->get_stream()); + assignment, problem_ptr->objective_coefficients, handle_ptr); // to save from memory transactions, don't update the device objective // when needed we can update the device objective here h_user_obj = problem_ptr->get_user_obj_from_solver_obj(h_obj); @@ -443,10 +457,11 @@ i_t solution_t::calculate_similarity_radius(solution_t& othe problem_ptr->integer_indices.end(), cuda::proclaim_return_type( [other_ptr, curr_assignment, p_view = problem_ptr->view()] __device__(i_t idx) -> bool { + auto var_bounds = p_view.variable_bounds[idx]; return diverse_equal(other_ptr[idx], curr_assignment[idx], - p_view.variable_lower_bounds[idx], - p_view.variable_upper_bounds[idx], + get_lower(var_bounds), + get_upper(var_bounds), p_view.is_integer_var(idx), p_view.tolerances.integrality_tolerance); })); @@ -542,17 +557,15 @@ template f_t solution_t::compute_max_variable_violation() { cuopt_assert(problem_ptr->n_variables == assignment.size(), "Size mismatch"); - cuopt_assert(problem_ptr->n_variables == problem_ptr->variable_lower_bounds.size(), - "Size mismatch"); - cuopt_assert(problem_ptr->n_variables == problem_ptr->variable_upper_bounds.size(), - "Size mismatch"); + cuopt_assert(problem_ptr->n_variables == problem_ptr->variable_bounds.size(), "Size mismatch"); return thrust::transform_reduce( handle_ptr->get_thrust_policy(), thrust::make_counting_iterator(0), thrust::make_counting_iterator(0) + problem_ptr->n_variables, cuda::proclaim_return_type([v = view()] __device__(i_t idx) -> f_t { - f_t lower_vio = max(0., v.problem.variable_lower_bounds[idx] - v.assignment[idx]); - f_t upper_vio = max(0., v.assignment[idx] - v.problem.variable_upper_bounds[idx]); + auto var_bounds = v.problem.variable_bounds[idx]; + f_t lower_vio = max(0., get_lower(var_bounds) - v.assignment[idx]); + f_t upper_vio = max(0., v.assignment[idx] - get_upper(var_bounds)); return max(lower_vio, upper_vio); }), 0., diff --git a/cpp/src/mip/solve.cu b/cpp/src/mip/solve.cu index dc2f77d906..24c52b45eb 100644 --- a/cpp/src/mip/solve.cu +++ b/cpp/src/mip/solve.cu @@ -82,9 +82,9 @@ mip_solution_t run_mip(detail::problem_t& problem, thrust::make_counting_iterator(0), thrust::make_counting_iterator(problem.n_variables), [sol = solution.assignment.data(), pb = problem.view()] __device__(i_t index) { - sol[index] = pb.objective_coefficients[index] > 0 - ? pb.variable_lower_bounds[index] - : pb.variable_upper_bounds[index]; + auto bounds = pb.variable_bounds[index]; + sol[index] = pb.objective_coefficients[index] > 0 ? get_lower(bounds) + : get_upper(bounds); }); problem.post_process_solution(solution); solution.compute_objective(); // just to ensure h_user_obj is set diff --git a/cpp/src/mip/utils.cuh b/cpp/src/mip/utils.cuh index 18759737d5..47f1bbc48b 100644 --- a/cpp/src/mip/utils.cuh +++ b/cpp/src/mip/utils.cuh @@ -218,6 +218,20 @@ bool check_integer_equal_on_indices(const rmm::device_uvector& indices, }); } +template +f_t compute_objective_from_vec(const rmm::device_uvector& assignment, + const rmm::device_uvector& objective_coefficients, + const raft::handle_t* handle_ptr) +{ + cuopt_assert(assignment.size() == objective_coefficients.size(), "Size mismatch!"); + f_t computed_obj = thrust::inner_product(handle_ptr->get_thrust_policy(), + assignment.begin(), + assignment.end(), + objective_coefficients.begin(), + 0.); + return computed_obj; +} + template f_t compute_objective_from_vec(const rmm::device_uvector& assignment, const rmm::device_uvector& objective_coefficients, @@ -239,18 +253,18 @@ void clamp_within_var_bounds(rmm::device_uvector& assignment, { cuopt_assert(assignment.size() == problem_ptr->n_variables, "Size mismatch!"); f_t* assignment_ptr = assignment.data(); - thrust::for_each(handle_ptr->get_thrust_policy(), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(0) + problem_ptr->n_variables, - [assignment_ptr, - lower_bound = problem_ptr->variable_lower_bounds.data(), - upper_bound = problem_ptr->variable_upper_bounds.data()] __device__(i_t idx) { - if (assignment_ptr[idx] < lower_bound[idx]) { - assignment_ptr[idx] = lower_bound[idx]; - } else if (assignment_ptr[idx] > upper_bound[idx]) { - assignment_ptr[idx] = upper_bound[idx]; - } - }); + thrust::for_each( + handle_ptr->get_thrust_policy(), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(0) + problem_ptr->n_variables, + [assignment_ptr, variable_bound = problem_ptr->variable_bounds.data()] __device__(i_t idx) { + auto bound = variable_bound[idx]; + if (assignment_ptr[idx] < get_lower(bound)) { + assignment_ptr[idx] = get_lower(bound); + } else if (assignment_ptr[idx] > get_upper(bound)) { + assignment_ptr[idx] = get_upper(bound); + } + }); } template diff --git a/cpp/src/utilities/copy_helpers.hpp b/cpp/src/utilities/copy_helpers.hpp index 78593d3e1a..7207ad72aa 100644 --- a/cpp/src/utilities/copy_helpers.hpp +++ b/cpp/src/utilities/copy_helpers.hpp @@ -18,6 +18,7 @@ #pragma once #include +#include #include #include @@ -27,6 +28,93 @@ #include namespace cuopt { + +template +struct type_2 { + using type = void; +}; + +template <> +struct type_2 { + using type = int2; +}; + +template <> +struct type_2 { + using type = float2; +}; + +template <> +struct type_2 { + using type = double2; +}; + +template +struct scalar_type { + using type = void; +}; + +template <> +struct scalar_type { + using type = int; +}; + +template <> +struct scalar_type { + using type = float; +}; + +template <> +struct scalar_type { + using type = double; +}; + +template <> +struct scalar_type { + using type = const int; +}; + +template <> +struct scalar_type { + using type = const float; +}; + +template <> +struct scalar_type { + using type = const double; +}; + +template +raft::device_span::type> make_span_2(rmm::device_uvector& container) +{ + using T2 = typename type_2::type; + static_assert(sizeof(T2) == 2 * sizeof(T)); + return raft::device_span(reinterpret_cast(container.data()), + sizeof(T) * container.size() / sizeof(T2)); +} + +template +raft::device_span::type> make_span_2( + rmm::device_uvector const& container) +{ + using T2 = typename type_2::type; + static_assert(sizeof(T2) == 2 * sizeof(T)); + return raft::device_span(reinterpret_cast(container.data()), + sizeof(T) * container.size() / sizeof(T2)); +} + +template +__host__ __device__ inline typename scalar_type::type& get_lower(f_t2& val) +{ + return val.x; +} + +template +__host__ __device__ inline typename scalar_type::type& get_upper(f_t2& val) +{ + return val.y; +} + /** * @brief Simple utility function to copy device ptr to host * @@ -253,4 +341,22 @@ inline void expand_device_copy(rmm::device_uvector& dst_vec, raft::copy(dst_vec.data(), src_vec.data(), src_vec.size(), stream_view); } +template +std::tuple, std::vector> extract_host_bounds( + const rmm::device_uvector& variable_bounds, const raft::handle_t* handle_ptr) +{ + rmm::device_uvector var_lb(variable_bounds.size(), handle_ptr->get_stream()); + rmm::device_uvector var_ub(variable_bounds.size(), handle_ptr->get_stream()); + thrust::transform( + handle_ptr->get_thrust_policy(), + variable_bounds.begin(), + variable_bounds.end(), + thrust::make_zip_iterator(thrust::make_tuple(var_lb.begin(), var_ub.begin())), + [] __device__(auto i) { return thrust::make_tuple(get_lower(i), get_upper(i)); }); + handle_ptr->sync_stream(); + auto h_var_lb = cuopt::host_copy(var_lb); + auto h_var_ub = cuopt::host_copy(var_ub); + return std::make_tuple(h_var_lb, h_var_ub); +} + } // namespace cuopt diff --git a/cpp/tests/mip/CMakeLists.txt b/cpp/tests/mip/CMakeLists.txt index 020c537f6a..b9fd249a56 100644 --- a/cpp/tests/mip/CMakeLists.txt +++ b/cpp/tests/mip/CMakeLists.txt @@ -27,9 +27,6 @@ ConfigureTest(ELIM_VAR_REMAP_TEST ConfigureTest(STANDARDIZATION_TEST ${CMAKE_CURRENT_SOURCE_DIR}/bounds_standardization_test.cu ) -ConfigureTest(LB_TEST - ${CMAKE_CURRENT_SOURCE_DIR}/load_balancing_test.cu -) ConfigureTest(MULTI_PROBE_TEST ${CMAKE_CURRENT_SOURCE_DIR}/multi_probe_test.cu ) diff --git a/cpp/tests/mip/elim_var_remap_test.cu b/cpp/tests/mip/elim_var_remap_test.cu index aeb48fe5d6..c486d98c81 100644 --- a/cpp/tests/mip/elim_var_remap_test.cu +++ b/cpp/tests/mip/elim_var_remap_test.cu @@ -100,8 +100,8 @@ void test_elim_var_remap(std::string test_instance) auto fixed_vars = select_k_random(problem.n_variables - 1, 5); for (auto& v : fixed_vars) { double v_val = -v - 1; - problem.variable_lower_bounds.set_element(v, v_val, handle_.get_stream()); - problem.variable_upper_bounds.set_element(v, v_val, handle_.get_stream()); + double2 val = double2{v_val, v_val}; + problem.variable_bounds.set_element(v, val, handle_.get_stream()); full_assignment.set_element(v, v_val, handle_.get_stream()); } // Set free var assignments to 0 @@ -182,8 +182,8 @@ void test_elim_var_solution(std::string test_instance) auto fixed_vars = select_k_random(standardized_problem.n_variables - 1, 5); for (auto& v : fixed_vars) { double v_val = opt_sol_1.get_solution().element(v, handle_.get_stream()); - sub_problem.variable_lower_bounds.set_element(v, v_val, handle_.get_stream()); - sub_problem.variable_upper_bounds.set_element(v, v_val, handle_.get_stream()); + double2 val = double2{v_val, v_val}; + sub_problem.variable_bounds.set_element(v, val, handle_.get_stream()); } handle_.sync_stream(); diff --git a/cpp/tests/mip/load_balancing_test.cu b/cpp/tests/mip/load_balancing_test.cu index deed9ea85a..fb0d8b6e86 100644 --- a/cpp/tests/mip/load_balancing_test.cu +++ b/cpp/tests/mip/load_balancing_test.cu @@ -59,15 +59,14 @@ std::tuple, std::vector, std::vector> select_k_ auto seed = std::random_device{}(); std::cerr << "Tested with seed " << seed << "\n"; problem.compute_n_integer_vars(); - auto v_lb = host_copy(problem.variable_lower_bounds); - auto v_ub = host_copy(problem.variable_upper_bounds); + auto v_bnd = host_copy(problem.variable_bounds); auto int_var_id = host_copy(problem.integer_indices); - int_var_id.erase(std::remove_if(int_var_id.begin(), - int_var_id.end(), - [v_lb, v_ub](auto id) { - return !(std::isfinite(v_lb[id]) && std::isfinite(v_ub[id])); - }), - int_var_id.end()); + int_var_id.erase( + std::remove_if( + int_var_id.begin(), + int_var_id.end(), + [v_bnd](auto id) { return !(std::isfinite(v_bnd[id].x) && std::isfinite(v_bnd[id].y)); }), + int_var_id.end()); sample_size = std::min(sample_size, static_cast(int_var_id.size())); std::vector random_int_vars; std::mt19937 m{seed}; @@ -77,11 +76,11 @@ std::tuple, std::vector, std::vector> select_k_ std::vector probe_1(sample_size); for (int i = 0; i < static_cast(random_int_vars.size()); ++i) { if (i % 2) { - probe_0[i] = v_lb[random_int_vars[i]]; - probe_1[i] = v_ub[random_int_vars[i]]; + probe_0[i] = v_bnd[random_int_vars[i]].x; + probe_1[i] = v_bnd[random_int_vars[i]].y; } else { - probe_1[i] = v_lb[random_int_vars[i]]; - probe_0[i] = v_ub[random_int_vars[i]]; + probe_1[i] = v_bnd[random_int_vars[i]].x; + probe_0[i] = v_bnd[random_int_vars[i]].y; } } return std::make_tuple(std::move(random_int_vars), std::move(probe_0), std::move(probe_1)); diff --git a/cpp/tests/mip/multi_probe_test.cu b/cpp/tests/mip/multi_probe_test.cu index 63cf93c792..1473c84bff 100644 --- a/cpp/tests/mip/multi_probe_test.cu +++ b/cpp/tests/mip/multi_probe_test.cu @@ -58,15 +58,15 @@ std::tuple, std::vector, std::vector> select_k_ auto seed = std::random_device{}(); std::cerr << "Tested with seed " << seed << "\n"; problem.compute_n_integer_vars(); - auto v_lb = host_copy(problem.variable_lower_bounds); - auto v_ub = host_copy(problem.variable_upper_bounds); - auto int_var_id = host_copy(problem.integer_indices); - int_var_id.erase(std::remove_if(int_var_id.begin(), - int_var_id.end(), - [v_lb, v_ub](auto id) { - return !(std::isfinite(v_lb[id]) && std::isfinite(v_ub[id])); - }), - int_var_id.end()); + auto [v_lb, v_ub] = extract_host_bounds(problem.variable_bounds, problem.handle_ptr); + auto int_var_id = host_copy(problem.integer_indices); + int_var_id.erase( + std::remove_if(int_var_id.begin(), + int_var_id.end(), + [v_lb_sp = v_lb, v_ub_sp = v_ub](auto id) { + return !(std::isfinite(v_lb_sp[id]) && std::isfinite(v_ub_sp[id])); + }), + int_var_id.end()); sample_size = std::min(sample_size, static_cast(int_var_id.size())); std::vector random_int_vars; std::mt19937 m{seed}; From 7a9e7bfa96d35cb32461cf961e6967ebac1454d5 Mon Sep 17 00:00:00 2001 From: Alice Boucher <160623740+aliceb-nv@users.noreply.github.com> Date: Wed, 10 Sep 2025 17:19:02 +0200 Subject: [PATCH 21/33] [FIX] Fix high GPU memory usage (#351) closes https://github.com/NVIDIA/cuopt/issues/349 compute_related_variables was heuristically allocating memory based on A100/H100 with >=40GB of VRAM. This is now automatically adjusted based on the total VRAM of the device A command-line option has also been added to solve_MPS_file to specify device memory allocation limits for ease of testing. Authors: - Alice Boucher (https://github.com/aliceb-nv) - Nicolas L. Guidotti (https://github.com/nguidotti) Approvers: - Nicolas L. Guidotti (https://github.com/nguidotti) URL: https://github.com/NVIDIA/cuopt/pull/351 --- .../linear_programming/cuopt/run_mip.cpp | 38 ++++++++++++++++--- cpp/src/mip/problem/problem.cu | 12 ++++-- cpp/src/utilities/cuda_helpers.cuh | 25 ++++++++++++ 3 files changed, 66 insertions(+), 9 deletions(-) diff --git a/benchmarks/linear_programming/cuopt/run_mip.cpp b/benchmarks/linear_programming/cuopt/run_mip.cpp index e3e8641b4f..fab2eea90e 100644 --- a/benchmarks/linear_programming/cuopt/run_mip.cpp +++ b/benchmarks/linear_programming/cuopt/run_mip.cpp @@ -28,7 +28,10 @@ #include #include +#include +#include #include +#include #include @@ -256,7 +259,9 @@ void run_single_file_mp(std::string file_path, { std::cout << "running file " << file_path << " on gpu : " << device << std::endl; auto memory_resource = make_async(); - rmm::mr::set_current_device_resource(memory_resource.get()); + auto limiting_adaptor = + rmm::mr::limiting_resource_adaptor(memory_resource.get(), 6ULL * 1024ULL * 1024ULL * 1024ULL); + rmm::mr::set_current_device_resource(&limiting_adaptor); int sol_found = run_single_file(file_path, device, batch_id, @@ -340,6 +345,15 @@ int main(int argc, char* argv[]) .scan<'g', double>() .default_value(std::numeric_limits::max()); + program.add_argument("--memory-limit") + .help("memory limit in MB") + .scan<'g', double>() + .default_value(0.0); + + program.add_argument("--track-allocations") + .help("track allocations (t/f)") + .default_value(std::string("f")); + // Parse arguments try { program.parse_args(argc, argv); @@ -362,10 +376,12 @@ int main(int argc, char* argv[]) std::string result_file; int batch_num = -1; - bool heuristics_only = program.get("--heuristics-only")[0] == 't'; - int num_cpu_threads = program.get("--num-cpu-threads"); - bool write_log_file = program.get("--write-log-file")[0] == 't'; - bool log_to_console = program.get("--log-to-console")[0] == 't'; + bool heuristics_only = program.get("--heuristics-only")[0] == 't'; + int num_cpu_threads = program.get("--num-cpu-threads"); + bool write_log_file = program.get("--write-log-file")[0] == 't'; + bool log_to_console = program.get("--log-to-console")[0] == 't'; + double memory_limit = program.get("--memory-limit"); + bool track_allocations = program.get("--track-allocations")[0] == 't'; if (program.is_used("--out-dir")) { out_dir = program.get("--out-dir"); @@ -469,7 +485,17 @@ int main(int argc, char* argv[]) merge_result_files(out_dir, result_file, n_gpus, batch_num); } else { auto memory_resource = make_async(); - rmm::mr::set_current_device_resource(memory_resource.get()); + if (memory_limit > 0) { + auto limiting_adaptor = + rmm::mr::limiting_resource_adaptor(memory_resource.get(), memory_limit * 1024ULL * 1024ULL); + rmm::mr::set_current_device_resource(&limiting_adaptor); + } else if (track_allocations) { + rmm::mr::tracking_resource_adaptor tracking_adaptor(memory_resource.get(), + /*capture_stacks=*/true); + rmm::mr::set_current_device_resource(&tracking_adaptor); + } else { + rmm::mr::set_current_device_resource(memory_resource.get()); + } run_single_file(path, 0, 0, diff --git a/cpp/src/mip/problem/problem.cu b/cpp/src/mip/problem/problem.cu index 37064b5c63..b237caea95 100644 --- a/cpp/src/mip/problem/problem.cu +++ b/cpp/src/mip/problem/problem.cu @@ -21,6 +21,7 @@ #include "problem_kernels.cuh" #include +#include #include #include @@ -798,16 +799,21 @@ void problem_t::compute_related_variables(double time_limit) handle_ptr->sync_stream(); + // previously used constants were based on 40GB of memory. Scale accordingly on smaller GPUs + // We can't rely on querying free memory or allocation try/catch + // since this would break determinism guarantees (GPU may be shared by other processes) + f_t size_factor = std::min(1.0, cuopt::get_device_memory_size() / 1e9 / 40.0); + // TODO: determine optimal number of slices based on available GPU memory? This used to be 2e9 / // n_variables - i_t max_slice_size = 6e8 / n_variables; + i_t max_slice_size = 6e8 * size_factor / n_variables; rmm::device_uvector varmap(max_slice_size * n_variables, handle_ptr->get_stream()); rmm::device_uvector offsets(max_slice_size * n_variables, handle_ptr->get_stream()); related_variables.resize(0, handle_ptr->get_stream()); // TODO: this used to be 1e8 - related_variables.reserve(1e8, handle_ptr->get_stream()); // reserve space + related_variables.reserve(1e8 * size_factor, handle_ptr->get_stream()); // reserve space related_variables_offsets.resize(n_variables + 1, handle_ptr->get_stream()); related_variables_offsets.set_element_to_zero_async(0, handle_ptr->get_stream()); @@ -851,7 +857,7 @@ void problem_t::compute_related_variables(double time_limit) auto current_time = std::chrono::high_resolution_clock::now(); // if the related variable array would wind up being too large for available memory, abort // TODO this used to be 1e9 - if (related_variables.size() > 1e9 || + if (related_variables.size() > 1e9 * size_factor || std::chrono::duration_cast(current_time - start_time).count() > time_limit) { CUOPT_LOG_DEBUG( diff --git a/cpp/src/utilities/cuda_helpers.cuh b/cpp/src/utilities/cuda_helpers.cuh index 3de8206993..d70eb2d525 100644 --- a/cpp/src/utilities/cuda_helpers.cuh +++ b/cpp/src/utilities/cuda_helpers.cuh @@ -24,6 +24,8 @@ #include #include #include +#include +#include namespace cuopt { @@ -208,4 +210,27 @@ DI void sorted_insert(T* array, T item, int curr_size, int max_size) array[0] = item; } +inline size_t get_device_memory_size() +{ + // Otherwise, we need to get the free memory from the device + size_t free_mem, total_mem; + cudaMemGetInfo(&free_mem, &total_mem); + + auto res = rmm::mr::get_current_device_resource(); + auto limiting_adaptor = + dynamic_cast*>(res); + // Did we specifiy an explicit memory limit? + if (limiting_adaptor) { + printf("limiting_adaptor->get_allocation_limit(): %fMiB\n", + limiting_adaptor->get_allocation_limit() / (double)1e6); + printf("used_mem: %fMiB\n", limiting_adaptor->get_allocated_bytes() / (double)1e6); + printf("free_mem: %fMiB\n", + (limiting_adaptor->get_allocation_limit() - limiting_adaptor->get_allocated_bytes()) / + (double)1e6); + return std::min(total_mem, limiting_adaptor->get_allocation_limit()); + } else { + return total_mem; + } +} + } // namespace cuopt From 75f52615d5465a1bbe4c72d1d928a61b6efc4e71 Mon Sep 17 00:00:00 2001 From: Rajesh Gandham Date: Wed, 10 Sep 2025 12:59:54 -0400 Subject: [PATCH 22/33] Implement node presolve (#368) This PR implements node presolve using the bounds strengthening algorithm. At each node in branch-and-bound tree, host presolve is run to (i) improve the bounds on the variables (ii) to detect infeasibility and fathom nodes accordingly **Details** - The bounds strengthening takes into account of integrality constraints. This leads to more infeasibility detection than doing the dual simplex. - Bounds strengthening is done only starting with the constraints that are associated with the branched variables (all up to the root node) only. For examples, if the current node is depth 7, only constraints relating to these 7 variables are considered changed for the first iteration of bounds strengthening. This greatly improves the performance of the node presolve - This bounds strengthening is performed on the cpu ## Issue Closes #276 Authors: - Rajesh Gandham (https://github.com/rg20) Approvers: - Kumar Aatish (https://github.com/kaatish) URL: https://github.com/NVIDIA/cuopt/pull/368 --- .../linear_programming/run_mps_files.sh | 53 ++- cpp/src/dual_simplex/branch_and_bound.cpp | 55 ++- cpp/src/dual_simplex/mip_node.hpp | 13 +- cpp/src/dual_simplex/phase2.hpp | 15 + cpp/src/dual_simplex/presolve.cpp | 340 +++++++++++++----- cpp/src/dual_simplex/presolve.hpp | 9 + 6 files changed, 369 insertions(+), 116 deletions(-) diff --git a/benchmarks/linear_programming/run_mps_files.sh b/benchmarks/linear_programming/run_mps_files.sh index 61a8af0e04..5baf729b90 100755 --- a/benchmarks/linear_programming/run_mps_files.sh +++ b/benchmarks/linear_programming/run_mps_files.sh @@ -79,6 +79,7 @@ Optional Arguments: --batch-num Batch number --n-batches Number of batches --log-to-console Log to console + --model-list File containing a list of models to run -h, --help Show this help message and exit Examples: @@ -168,6 +169,11 @@ while [[ $# -gt 0 ]]; do LOG_TO_CONSOLE="$2" shift 2 ;; + --model-list) + echo "MODEL_LIST: $2" + MODEL_LIST="$2" + shift 2 + ;; *) echo "Unknown argument: $1" print_help @@ -194,7 +200,7 @@ PRESOLVE=${PRESOLVE:-true} BATCH_NUM=${BATCH_NUM:-0} N_BATCHES=${N_BATCHES:-1} LOG_TO_CONSOLE=${LOG_TO_CONSOLE:-true} - +MODEL_LIST=${MODEL_LIST:-} # Determine GPU list if [[ -n "$CUDA_VISIBLE_DEVICES" ]]; then IFS=',' read -ra GPU_LIST <<< "$CUDA_VISIBLE_DEVICES" @@ -206,8 +212,49 @@ else fi GPU_COUNT=${#GPU_LIST[@]} -# Gather all mps files into an array -mapfile -t mps_files < <(ls "$MPS_DIR"/*.mps) +# Ensure all entries in MODEL_LIST have .mps extension +if [[ -n "$MODEL_LIST" && -f "$MODEL_LIST" ]]; then + # Create a temporary file to store the updated model list + TMP_MODEL_LIST=$(mktemp) + while IFS= read -r line || [[ -n "$line" ]]; do + # Skip empty lines + [[ -z "$line" ]] && continue + # If the line does not end with .mps, append it + if [[ "$line" != *.mps ]]; then + echo "${line}.mps" >> "$TMP_MODEL_LIST" + else + echo "$line" >> "$TMP_MODEL_LIST" + fi + done < "$MODEL_LIST" + # Overwrite the original MODEL_LIST with the updated one + mv "$TMP_MODEL_LIST" "$MODEL_LIST" +fi + + +# Gather all mps files into an array, either from the model list or from the directory +if [[ -n "$MODEL_LIST" ]]; then + if [[ ! -f "$MODEL_LIST" ]]; then + echo "Model list file not found: $MODEL_LIST" + exit 1 + fi + mapfile -t mps_files < <(grep -v '^\s*$' "$MODEL_LIST" | sed "s|^|$MPS_DIR/|") + # Optionally, check that all files exist + missing_files=() + for f in "${mps_files[@]}"; do + if [[ ! -f "$f" ]]; then + missing_files+=("$f") + fi + done + if (( ${#missing_files[@]} > 0 )); then + echo "The following files from the model list do not exist in $MPS_DIR:" + for f in "${missing_files[@]}"; do + echo " $f" + done + exit 1 + fi +else + mapfile -t mps_files < <(ls "$MPS_DIR"/*.mps) +fi # Calculate batch size and start/end indices batch_size=$(( (${#mps_files[@]} + N_BATCHES - 1) / N_BATCHES )) diff --git a/cpp/src/dual_simplex/branch_and_bound.cpp b/cpp/src/dual_simplex/branch_and_bound.cpp index ce92532044..898f0a85ef 100644 --- a/cpp/src/dual_simplex/branch_and_bound.cpp +++ b/cpp/src/dual_simplex/branch_and_bound.cpp @@ -529,6 +529,8 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut std::move(up_child)); // child pointers moved into the tree lp_problem_t leaf_problem = original_lp; // Make a copy of the original LP. We will modify its bounds at each leaf + csc_matrix_t Arow(1, 1, 1); + original_lp.A.transpose(Arow); f_t gap = get_upper_bound() - lower_bound; i_t nodes_explored = 0; settings.log.printf( @@ -631,7 +633,11 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut // Set the correct bounds for the leaf problem leaf_problem.lower = original_lp.lower; leaf_problem.upper = original_lp.upper; - node_ptr->get_variable_bounds(leaf_problem.lower, leaf_problem.upper); + std::vector bounds_changed(original_lp.num_cols, false); + // Technically, we can get the already strengthened bounds from the node/parent instead of + // getting it from the original problem and re-strengthening. But this requires storing + // two vectors at each node and potentially cause memory issues + node_ptr->get_variable_bounds(leaf_problem.lower, leaf_problem.upper, bounds_changed); std::vector& leaf_vstatus = node_ptr->vstatus; lp_solution_t leaf_solution(leaf_problem.num_rows, leaf_problem.num_cols); @@ -642,28 +648,43 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut std::vector leaf_edge_norms = edge_norms; // = node.steepest_edge_norms; simplex_solver_settings_t lp_settings = settings; lp_settings.set_log(false); - lp_settings.cut_off = upper_bound + settings.dual_tol; - lp_settings.inside_mip = 2; - dual::status_t lp_status = dual_phase2(2, - 0, - lp_start_time, - leaf_problem, - lp_settings, - leaf_vstatus, - leaf_solution, - node_iter, - leaf_edge_norms); - if (lp_status == dual::status_t::NUMERICAL) { - settings.log.printf("Numerical issue node %d. Resolving from scratch.\n", nodes_explored); - lp_status_t second_status = solve_linear_program_advanced( - leaf_problem, lp_start_time, lp_settings, leaf_solution, leaf_vstatus, leaf_edge_norms); - lp_status = convert_lp_status_to_dual_status(second_status); + lp_settings.cut_off = upper_bound + settings.dual_tol; + lp_settings.inside_mip = 2; + + // in B&B we only have equality constraints, leave it empty for default + std::vector row_sense; + bool feasible = + bound_strengthening(row_sense, lp_settings, leaf_problem, Arow, var_types, bounds_changed); + + dual::status_t lp_status = dual::status_t::DUAL_UNBOUNDED; + + // If the problem is infeasible after bounds strengthening, we don't need to solve the LP + if (feasible) { + lp_status = dual_phase2(2, + 0, + lp_start_time, + leaf_problem, + lp_settings, + leaf_vstatus, + leaf_solution, + node_iter, + leaf_edge_norms); + if (lp_status == dual::status_t::NUMERICAL) { + settings.log.printf("Numerical issue node %d. Resolving from scratch.\n", nodes_explored); + lp_status_t second_status = solve_linear_program_advanced( + leaf_problem, lp_start_time, lp_settings, leaf_solution, leaf_vstatus, leaf_edge_norms); + lp_status = convert_lp_status_to_dual_status(second_status); + } } total_lp_solve_time += toc(lp_start_time); total_lp_iters += node_iter; nodes_explored++; if (lp_status == dual::status_t::DUAL_UNBOUNDED) { + if (!feasible) { + settings.log.printf("Infeasible after bounds strengthening. Fathoming node %d.\n", + nodes_explored); + } node_ptr->lower_bound = inf; std::vector*> stack; node_ptr->set_status(node_status_t::INFEASIBLE, stack); diff --git a/cpp/src/dual_simplex/mip_node.hpp b/cpp/src/dual_simplex/mip_node.hpp index cae93a4dc7..c4d83a8333 100644 --- a/cpp/src/dual_simplex/mip_node.hpp +++ b/cpp/src/dual_simplex/mip_node.hpp @@ -79,22 +79,27 @@ class mip_node_t { children[1] = nullptr; } - void get_variable_bounds(std::vector& lower, std::vector& upper) const + void get_variable_bounds(std::vector& lower, + std::vector& upper, + std::vector& bounds_changed) const { + std::fill(bounds_changed.begin(), bounds_changed.end(), false); // Apply the bounds at the current node assert(lower.size() > branch_var); assert(upper.size() > branch_var); lower[branch_var] = branch_var_lower; upper[branch_var] = branch_var_upper; + bounds_changed[branch_var] = true; mip_node_t* parent_ptr = parent; while (parent_ptr != nullptr) { if (parent_ptr->node_id == 0) { break; } assert(parent_ptr->branch_var >= 0); assert(lower.size() > parent_ptr->branch_var); assert(upper.size() > parent_ptr->branch_var); - lower[parent_ptr->branch_var] = parent_ptr->branch_var_lower; - upper[parent_ptr->branch_var] = parent_ptr->branch_var_upper; - parent_ptr = parent_ptr->parent; + lower[parent_ptr->branch_var] = parent_ptr->branch_var_lower; + upper[parent_ptr->branch_var] = parent_ptr->branch_var_upper; + bounds_changed[parent_ptr->branch_var] = true; + parent_ptr = parent_ptr->parent; } } diff --git a/cpp/src/dual_simplex/phase2.hpp b/cpp/src/dual_simplex/phase2.hpp index ef52c0a190..39311e0607 100644 --- a/cpp/src/dual_simplex/phase2.hpp +++ b/cpp/src/dual_simplex/phase2.hpp @@ -38,7 +38,22 @@ enum class status_t { CONCURRENT_LIMIT = 6, UNSET = 7 }; + +static std::string status_to_string(status_t status) +{ + switch (status) { + case status_t::OPTIMAL: return "OPTIMAL"; + case status_t::DUAL_UNBOUNDED: return "DUAL_UNBOUNDED"; + case status_t::NUMERICAL: return "NUMERICAL"; + case status_t::CUTOFF: return "CUTOFF"; + case status_t::TIME_LIMIT: return "TIME_LIMIT"; + case status_t::ITERATION_LIMIT: return "ITERATION_LIMIT"; + case status_t::CONCURRENT_LIMIT: return "CONCURRENT_LIMIT"; + case status_t::UNSET: return "UNSET"; + } + return "UNKNOWN"; } +} // namespace dual template dual::status_t dual_phase2(i_t phase, diff --git a/cpp/src/dual_simplex/presolve.cpp b/cpp/src/dual_simplex/presolve.cpp index 457da9113d..48d696bf96 100644 --- a/cpp/src/dual_simplex/presolve.cpp +++ b/cpp/src/dual_simplex/presolve.cpp @@ -18,131 +18,277 @@ #include #include +#include #include +#include namespace cuopt::linear_programming::dual_simplex { +template +static inline f_t update_lb(f_t curr_lb, f_t coeff, f_t delta_min_act, f_t delta_max_act) +{ + auto comp_bnd = (coeff < 0.) ? delta_min_act / coeff : delta_max_act / coeff; + return std::max(curr_lb, comp_bnd); +} + +template +static inline f_t update_ub(f_t curr_ub, f_t coeff, f_t delta_min_act, f_t delta_max_act) +{ + auto comp_bnd = (coeff < 0.) ? delta_max_act / coeff : delta_min_act / coeff; + return std::min(curr_ub, comp_bnd); +} + template -void bound_strengthening(const std::vector& row_sense, +static inline bool check_infeasibility(f_t min_a, f_t max_a, f_t cnst_lb, f_t cnst_ub, f_t eps) +{ + return (min_a > cnst_ub + eps) || (max_a < cnst_lb - eps); +} + +#define DEBUG_BOUND_STRENGTHENING 0 + +template +void print_bounds_stats(const std::vector& lower, + const std::vector& upper, + const simplex_solver_settings_t& settings, + const std::string msg) +{ +#if DEBUG_BOUND_STRENGTHENING + f_t lb_norm = 0.0; + f_t ub_norm = 0.0; + + i_t sz = lower.size(); + for (i_t i = 0; i < sz; ++i) { + if (std::isfinite(lower[i])) { lb_norm += abs(lower[i]); } + if (std::isfinite(upper[i])) { ub_norm += abs(upper[i]); } + } + settings.log.printf("%s :: lb norm %e, ub norm %e\n", msg.c_str(), lb_norm, ub_norm); +#endif +} + +template +bool bound_strengthening(const std::vector& row_sense, const simplex_solver_settings_t& settings, - lp_problem_t& problem) + lp_problem_t& problem, + const csc_matrix_t& Arow, + const std::vector& var_types, + const std::vector& bounds_changed) { const i_t m = problem.num_rows; const i_t n = problem.num_cols; - std::vector constraint_lower(m); - std::vector num_lower_infinity(m); - std::vector num_upper_infinity(m); - - csc_matrix_t Arow(1, 1, 1); - problem.A.transpose(Arow); - - std::vector less_rows; - less_rows.reserve(m); + std::vector delta_min_activity(m); + std::vector delta_max_activity(m); + std::vector constraint_lb(m); + std::vector constraint_ub(m); + + // FIXME:: Instead of initializing constraint_changed to true, we can only look + // at the constraints corresponding to branched variable in branch and bound + // This is because, the parent LP already checked for feasibility of the constraints + // without the branched variable bounds + std::vector constraint_changed(m, true); + std::vector variable_changed(n, false); + std::vector constraint_changed_next(m, false); + + if (false && !bounds_changed.empty()) { + std::fill(constraint_changed.begin(), constraint_changed.end(), false); + for (i_t i = 0; i < n; ++i) { + if (bounds_changed[i]) { + const i_t row_start = problem.A.col_start[i]; + const i_t row_end = problem.A.col_start[i + 1]; + for (i_t p = row_start; p < row_end; ++p) { + const i_t j = problem.A.i[p]; + constraint_changed[j] = true; + } + } + } + } - for (i_t i = 0; i < m; ++i) { - if (row_sense[i] == 'L') { less_rows.push_back(i); } + const bool is_row_sense_empty = row_sense.empty(); + if (is_row_sense_empty) { + std::copy(problem.rhs.begin(), problem.rhs.end(), constraint_lb.begin()); + std::copy(problem.rhs.begin(), problem.rhs.end(), constraint_ub.begin()); + } else { + // Set the constraint bounds + for (i_t i = 0; i < m; ++i) { + if (row_sense[i] == 'E') { + constraint_lb[i] = problem.rhs[i]; + constraint_ub[i] = problem.rhs[i]; + } else if (row_sense[i] == 'L') { + constraint_ub[i] = problem.rhs[i]; + constraint_lb[i] = -inf; + } else { + constraint_lb[i] = problem.rhs[i]; + constraint_ub[i] = inf; + } + } } std::vector lower = problem.lower; std::vector upper = problem.upper; + print_bounds_stats(lower, upper, settings, "Initial bounds"); + + i_t iter = 0; + const i_t iter_limit = 10; + while (iter < iter_limit) { + for (i_t i = 0; i < m; ++i) { + if (!constraint_changed[i]) { continue; } + const i_t row_start = Arow.col_start[i]; + const i_t row_end = Arow.col_start[i + 1]; - std::vector updated_variables_list; - updated_variables_list.reserve(n); - std::vector updated_variables_mark(n, 0); - - i_t iter = 0; - const i_t iter_limit = 10; - i_t total_strengthened_variables = 0; - settings.log.printf("Less equal rows %d\n", less_rows.size()); - while (iter < iter_limit && less_rows.size() > 0) { - // Derive bounds on the constraints - settings.log.printf("Running bound strengthening on %d rows\n", - static_cast(less_rows.size())); - for (i_t i : less_rows) { - const i_t row_start = Arow.col_start[i]; - const i_t row_end = Arow.col_start[i + 1]; - num_lower_infinity[i] = 0; - num_upper_infinity[i] = 0; - - f_t lower_limit = 0.0; + f_t min_a = 0.0; + f_t max_a = 0.0; for (i_t p = row_start; p < row_end; ++p) { const i_t j = Arow.i[p]; const f_t a_ij = Arow.x[p]; + + variable_changed[j] = true; if (a_ij > 0) { - lower_limit += a_ij * lower[j]; + min_a += a_ij * lower[j]; + max_a += a_ij * upper[j]; } else if (a_ij < 0) { - lower_limit += a_ij * upper[j]; - } - if (lower[j] == -inf && a_ij > 0) { - num_lower_infinity[i]++; - lower_limit = -inf; - } - if (upper[j] == inf && a_ij < 0) { - num_lower_infinity[i]++; - lower_limit = -inf; + min_a += a_ij * upper[j]; + max_a += a_ij * lower[j]; } + if (upper[j] == inf && a_ij > 0) { max_a = inf; } + if (lower[j] == -inf && a_ij < 0) { max_a = inf; } + + if (lower[j] == -inf && a_ij > 0) { min_a = -inf; } + if (upper[j] == inf && a_ij < 0) { min_a = -inf; } } - constraint_lower[i] = lower_limit; - } - // Use the constraint bounds to derive new bounds on the variables - for (i_t i : less_rows) { - if (std::isfinite(constraint_lower[i]) && num_lower_infinity[i] == 0) { - const i_t row_start = Arow.col_start[i]; - const i_t row_end = Arow.col_start[i + 1]; - for (i_t p = row_start; p < row_end; ++p) { - const i_t k = Arow.i[p]; - const f_t a_ik = Arow.x[p]; - if (a_ik > 0) { - const f_t new_upper = lower[k] + (problem.rhs[i] - constraint_lower[i]) / a_ik; - if (new_upper < upper[k]) { - upper[k] = new_upper; - if (lower[k] > upper[k]) { - settings.log.printf( - "\t INFEASIBLE!!!!!!!!!!!!!!!!! constraint_lower %e lower %e rhs %e\n", - constraint_lower[i], - lower[k], - problem.rhs[i]); - } - if (!updated_variables_mark[k]) { updated_variables_list.push_back(k); } - } - } else if (a_ik < 0) { - const f_t new_lower = upper[k] + (problem.rhs[i] - constraint_lower[i]) / a_ik; - if (new_lower > lower[k]) { - lower[k] = new_lower; - if (lower[k] > upper[k]) { - settings.log.printf("\t INFEASIBLE !!!!!!!!!!!!!!!!!!1\n"); - } - if (!updated_variables_mark[k]) { updated_variables_list.push_back(k); } - } - } - } + f_t cnst_lb = constraint_lb[i]; + f_t cnst_ub = constraint_ub[i]; + bool is_infeasible = + check_infeasibility(min_a, max_a, cnst_lb, cnst_ub, settings.primal_tol); + if (is_infeasible) { + settings.log.printf( + "Iter:: %d, Infeasible constraint %d, cnst_lb %e, cnst_ub %e, min_a %e, max_a %e\n", + iter, + i, + cnst_lb, + cnst_ub, + min_a, + max_a); + return false; } + + delta_min_activity[i] = cnst_ub - min_a; + delta_max_activity[i] = cnst_lb - max_a; } - less_rows.clear(); - - // Update the bounds on the constraints - settings.log.printf("Round %d: Strengthend %d variables\n", - iter, - static_cast(updated_variables_list.size())); - total_strengthened_variables += updated_variables_list.size(); - for (i_t j : updated_variables_list) { - updated_variables_mark[j] = 0; - const i_t col_start = problem.A.col_start[j]; - const i_t col_end = problem.A.col_start[j + 1]; - for (i_t p = col_start; p < col_end; ++p) { + + i_t num_bounds_changed = 0; + + for (i_t k = 0; k < n; ++k) { + if (!variable_changed[k]) { continue; } + f_t old_lb = lower[k]; + f_t old_ub = upper[k]; + + f_t new_lb = old_lb; + f_t new_ub = old_ub; + + const i_t row_start = problem.A.col_start[k]; + const i_t row_end = problem.A.col_start[k + 1]; + for (i_t p = row_start; p < row_end; ++p) { const i_t i = problem.A.i[p]; - less_rows.push_back(i); + + if (!constraint_changed[i]) { continue; } + const f_t a_ik = problem.A.x[p]; + + f_t delta_min_act = delta_min_activity[i]; + f_t delta_max_act = delta_max_activity[i]; + + delta_min_act += (a_ik < 0) ? a_ik * old_ub : a_ik * old_lb; + delta_max_act += (a_ik > 0) ? a_ik * old_ub : a_ik * old_lb; + + new_lb = std::max(new_lb, update_lb(old_lb, a_ik, delta_min_act, delta_max_act)); + new_ub = std::min(new_ub, update_ub(old_ub, a_ik, delta_min_act, delta_max_act)); + } + + // Integer rounding + if (!var_types.empty() && + (var_types[k] == variable_type_t::INTEGER || var_types[k] == variable_type_t::BINARY)) { + new_lb = std::ceil(new_lb - settings.integer_tol); + new_ub = std::floor(new_ub + settings.integer_tol); + } + + bool lb_updated = abs(new_lb - old_lb) > 1e3 * settings.primal_tol; + bool ub_updated = abs(new_ub - old_ub) > 1e3 * settings.primal_tol; + + new_lb = std::max(new_lb, problem.lower[k]); + new_ub = std::min(new_ub, problem.upper[k]); + + if (new_lb > new_ub + 1e-6) { + settings.log.printf( + "Iter:: %d, Infeasible variable after update %d, %e > %e\n", iter, k, new_lb, new_ub); + return false; + } + if (new_lb != old_lb || new_ub != old_ub) { + for (i_t p = row_start; p < row_end; ++p) { + const i_t i = problem.A.i[p]; + constraint_changed_next[i] = true; + } } + + lower[k] = std::min(new_lb, new_ub); + upper[k] = std::max(new_lb, new_ub); + + bool bounds_changed = lb_updated || ub_updated; + if (bounds_changed) { num_bounds_changed++; } } - updated_variables_list.clear(); + + if (num_bounds_changed == 0) { break; } + + std::swap(constraint_changed, constraint_changed_next); + std::fill(constraint_changed_next.begin(), constraint_changed_next.end(), false); + std::fill(variable_changed.begin(), variable_changed.end(), false); + iter++; } - settings.log.printf("Total strengthened variables %d\n", total_strengthened_variables); + + // settings.log.printf("Total strengthened variables %d\n", total_strengthened_variables); + +#if DEBUG_BOUND_STRENGTHENING + f_t lb_change = 0.0; + f_t ub_change = 0.0; + int num_lb_changed = 0; + int num_ub_changed = 0; + + for (i_t i = 0; i < n; ++i) { + if (lower[i] > problem.lower[i] + settings.primal_tol || + (!std::isfinite(problem.lower[i]) && std::isfinite(lower[i]))) { + num_lb_changed++; + lb_change += + std::isfinite(problem.lower[i]) + ? (lower[i] - problem.lower[i]) / (1e-6 + std::max(abs(lower[i]), abs(problem.lower[i]))) + : 1.0; + } + if (upper[i] < problem.upper[i] - settings.primal_tol || + (!std::isfinite(problem.upper[i]) && std::isfinite(upper[i]))) { + num_ub_changed++; + ub_change += + std::isfinite(problem.upper[i]) + ? (problem.upper[i] - upper[i]) / (1e-6 + std::max(abs(problem.upper[i]), abs(upper[i]))) + : 1.0; + } + } + + if (num_lb_changed > 0 || num_ub_changed > 0) { + settings.log.printf( + "lb change %e, ub change %e, num lb changed %d, num ub changed %d, iter %d\n", + 100 * lb_change / std::max(1, num_lb_changed), + 100 * ub_change / std::max(1, num_ub_changed), + num_lb_changed, + num_ub_changed, + iter); + } + print_bounds_stats(lower, upper, settings, "Final bounds"); +#endif + problem.lower = lower; problem.upper = upper; + + return true; } template @@ -691,7 +837,9 @@ void convert_user_problem(const user_problem_t& user_problem, constexpr bool run_bound_strengthening = false; if constexpr (run_bound_strengthening) { settings.log.printf("Running bound strengthening\n"); - bound_strengthening(row_sense, settings, problem); + csc_matrix_t Arow(1, 1, 1); + problem.A.transpose(Arow); + bound_strengthening(row_sense, settings, problem, Arow); } // The original problem may have a variable without a lower bound @@ -1077,6 +1225,14 @@ template void uncrush_solution(const presolve_info_t& const std::vector& crushed_z, std::vector& uncrushed_x, std::vector& uncrushed_z); + +template bool bound_strengthening( + const std::vector& row_sense, + const simplex_solver_settings_t& settings, + lp_problem_t& problem, + const csc_matrix_t& Arow, + const std::vector& var_types, + const std::vector& bounds_changed); #endif } // namespace cuopt::linear_programming::dual_simplex diff --git a/cpp/src/dual_simplex/presolve.hpp b/cpp/src/dual_simplex/presolve.hpp index 7a307e6f73..83b8c7d751 100644 --- a/cpp/src/dual_simplex/presolve.hpp +++ b/cpp/src/dual_simplex/presolve.hpp @@ -131,4 +131,13 @@ void uncrush_solution(const presolve_info_t& presolve_info, std::vector& uncrushed_x, std::vector& uncrushed_z); +// For pure LP bounds strengthening, var_types should be defaulted (i.e. left empty) +template +bool bound_strengthening(const std::vector& row_sense, + const simplex_solver_settings_t& settings, + lp_problem_t& problem, + const csc_matrix_t& Arow, + const std::vector& var_types = {}, + const std::vector& bounds_changed = {}); + } // namespace cuopt::linear_programming::dual_simplex From f37631f0cae7783b5c566d3f59e1c27576b9ca7c Mon Sep 17 00:00:00 2001 From: Hugo Linsenmaier Date: Mon, 15 Sep 2025 09:21:39 -0700 Subject: [PATCH 23/33] Loosen presolve tolerance and update timers to report cumulative presolve/solve time (#381) This PR updates: - Presolve tolerance setting from 9e-7 to 1e-5 as some models were incorrectly detected as infeasible. - Usage of `timer_t` class to propagate and report more precisely the cumulative time of presolve + solve time (concurrent, dual, pldp). Authors: - Hugo Linsenmaier (https://github.com/hlinsen) Approvers: - Rajesh Gandham (https://github.com/rg20) URL: https://github.com/NVIDIA/cuopt/pull/381 --- cpp/src/linear_programming/pdlp.cu | 67 ++++++-------- cpp/src/linear_programming/pdlp.cuh | 17 ++-- cpp/src/linear_programming/solve.cu | 78 ++++++++-------- cpp/src/linear_programming/solve.cuh | 1 + cpp/src/mip/presolve/third_party_presolve.cpp | 1 + cpp/src/mip/relaxed_lp/relaxed_lp.cu | 2 +- cpp/src/mip/solver.cu | 3 +- cpp/tests/linear_programming/pdlp_test.cu | 88 +++++++++---------- 8 files changed, 123 insertions(+), 134 deletions(-) diff --git a/cpp/src/linear_programming/pdlp.cu b/cpp/src/linear_programming/pdlp.cu index 824539e0ca..9f2de16f00 100644 --- a/cpp/src/linear_programming/pdlp.cu +++ b/cpp/src/linear_programming/pdlp.cu @@ -253,22 +253,14 @@ void pdlp_solver_t::set_initial_dual_solution( initial_dual_.data(), initial_dual_solution.data(), initial_dual_solution.size(), stream_view_); } -static bool time_limit_reached(const std::chrono::high_resolution_clock::time_point& start_time, - double seconds) -{ - auto current_time = std::chrono::high_resolution_clock::now(); - auto elapsed = - std::chrono::duration_cast(current_time - start_time).count(); - - return elapsed >= (seconds * 1000.0); -} +static bool time_limit_reached(const timer_t& timer) { return timer.check_time_limit(); } template std::optional> pdlp_solver_t::check_limits( - const std::chrono::high_resolution_clock::time_point& start_time) + const timer_t& timer) { // Check for time limit - if (time_limit_reached(start_time, settings_.time_limit)) { + if (time_limit_reached(timer)) { if (settings_.save_best_primal_so_far) { #ifdef PDLP_VERBOSE_MODE RAFT_CUDA_TRY(cudaDeviceSynchronize()); @@ -491,14 +483,10 @@ pdlp_warm_start_data_t pdlp_solver_t::get_filled_warmed_star } template -void pdlp_solver_t::print_termination_criteria( - const std::chrono::high_resolution_clock::time_point& start_time, bool is_average) +void pdlp_solver_t::print_termination_criteria(const timer_t& timer, bool is_average) { if (!inside_mip_) { - const auto current_time = std::chrono::high_resolution_clock::now(); - const f_t elapsed = - std::chrono::duration_cast(current_time - start_time).count() / - 1000.0; + auto elapsed = timer.elapsed_time(); if (is_average) { average_termination_strategy_.print_termination_criteria(total_pdlp_iterations_, elapsed); } else { @@ -509,13 +497,13 @@ void pdlp_solver_t::print_termination_criteria( template void pdlp_solver_t::print_final_termination_criteria( - const std::chrono::high_resolution_clock::time_point& start_time, + const timer_t& timer, const convergence_information_t& convergence_information, const pdlp_termination_status_t& termination_status, bool is_average) { if (!inside_mip_) { - print_termination_criteria(start_time, is_average); + print_termination_criteria(timer, is_average); CUOPT_LOG_INFO( "LP Solver status: %s", optimization_problem_solution_t::get_termination_status_string(termination_status) @@ -538,7 +526,7 @@ void pdlp_solver_t::print_final_termination_criteria( template std::optional> pdlp_solver_t::check_termination( - const std::chrono::high_resolution_clock::time_point& start_time) + const timer_t& timer) { raft::common::nvtx::range fun_scope("Check termination"); @@ -580,8 +568,8 @@ std::optional> pdlp_solver_t // enough) We still need to check iteration and time limit prior without breaking the logic below // of first checking termination before the limit if (total_pdlp_iterations_ <= 1) { - print_termination_criteria(start_time); - return check_limits(start_time); + print_termination_criteria(timer); + return check_limits(timer); } // First check for pdlp_termination_reason_t::Optimality and handle the first primal feasible case @@ -654,9 +642,8 @@ std::optional> pdlp_solver_t std::cout << "Optimal. End total number of iteration current=" << internal_solver_iterations_ << std::endl; #endif - print_final_termination_criteria(start_time, - current_termination_strategy_.get_convergence_information(), - termination_current); + print_final_termination_criteria( + timer, current_termination_strategy_.get_convergence_information(), termination_current); return current_termination_strategy_.fill_return_problem_solution( internal_solver_iterations_, pdhg_solver_, @@ -669,7 +656,7 @@ std::optional> pdlp_solver_t std::cout << "Optimal. End total number of iteration average=" << internal_solver_iterations_ << std::endl; #endif - print_final_termination_criteria(start_time, + print_final_termination_criteria(timer, average_termination_strategy_.get_convergence_information(), termination_average, true); @@ -689,7 +676,7 @@ std::optional> pdlp_solver_t std::cout << "Optimal. End total number of iteration average=" << internal_solver_iterations_ << std::endl; #endif - print_final_termination_criteria(start_time, + print_final_termination_criteria(timer, average_termination_strategy_.get_convergence_information(), termination_average, true); @@ -707,7 +694,7 @@ std::optional> pdlp_solver_t << std::endl; #endif print_final_termination_criteria( - start_time, current_termination_strategy_.get_convergence_information(), termination_current); + timer, current_termination_strategy_.get_convergence_information(), termination_current); return current_termination_strategy_.fill_return_problem_solution( internal_solver_iterations_, pdhg_solver_, @@ -729,9 +716,8 @@ std::optional> pdlp_solver_t std::cout << "Current Infeasible. End total number of iteration current=" << internal_solver_iterations_ << std::endl; #endif - print_final_termination_criteria(start_time, - current_termination_strategy_.get_convergence_information(), - termination_current); + print_final_termination_criteria( + timer, current_termination_strategy_.get_convergence_information(), termination_current); return current_termination_strategy_.fill_return_problem_solution( internal_solver_iterations_, pdhg_solver_, @@ -745,7 +731,7 @@ std::optional> pdlp_solver_t std::cout << "Average Infeasible. End total number of iteration current=" << internal_solver_iterations_ << std::endl; #endif - print_final_termination_criteria(start_time, + print_final_termination_criteria(timer, average_termination_strategy_.get_convergence_information(), termination_average, true); @@ -765,9 +751,8 @@ std::optional> pdlp_solver_t std::cout << "Infeasible. End total number of iteration current=" << internal_solver_iterations_ << std::endl; #endif - print_final_termination_criteria(start_time, - current_termination_strategy_.get_convergence_information(), - termination_current); + print_final_termination_criteria( + timer, current_termination_strategy_.get_convergence_information(), termination_current); return current_termination_strategy_.fill_return_problem_solution( internal_solver_iterations_, pdhg_solver_, @@ -785,7 +770,7 @@ std::optional> pdlp_solver_t << internal_solver_iterations_ << std::endl; #endif print_final_termination_criteria( - start_time, current_termination_strategy_.get_convergence_information(), termination_current); + timer, current_termination_strategy_.get_convergence_information(), termination_current); return optimization_problem_solution_t{pdlp_termination_status_t::NumericalError, stream_view_}; } @@ -797,10 +782,10 @@ std::optional> pdlp_solver_t average_termination_strategy_, termination_current, termination_average); - if (total_pdlp_iterations_ % 1000 == 0) { print_termination_criteria(start_time); } + if (total_pdlp_iterations_ % 1000 == 0) { print_termination_criteria(timer); } // No reason to terminate - return check_limits(start_time); + return check_limits(timer); } template @@ -983,8 +968,7 @@ void pdlp_solver_t::update_primal_dual_solutions( } template -optimization_problem_solution_t pdlp_solver_t::run_solver( - const std::chrono::high_resolution_clock::time_point& start_time) +optimization_problem_solution_t pdlp_solver_t::run_solver(const timer_t& timer) { bool verbose; #ifdef PDLP_VERBOSE_MODE @@ -1139,8 +1123,7 @@ optimization_problem_solution_t pdlp_solver_t::run_solver( pdhg_solver_.get_dual_solution()); // Check for termination - std::optional> solution = - check_termination(start_time); + std::optional> solution = check_termination(timer); if (solution.has_value()) { return std::move(solution.value()); } diff --git a/cpp/src/linear_programming/pdlp.cuh b/cpp/src/linear_programming/pdlp.cuh index 24db449726..ae75505abe 100644 --- a/cpp/src/linear_programming/pdlp.cuh +++ b/cpp/src/linear_programming/pdlp.cuh @@ -29,6 +29,8 @@ #include +#include + #include #include @@ -68,8 +70,7 @@ class pdlp_solver_t { pdlp_solver_settings_t const& settings = pdlp_solver_settings_t{}, bool is_batch_mode = false); - optimization_problem_solution_t run_solver( - const std::chrono::high_resolution_clock::time_point& start_time); + optimization_problem_solution_t run_solver(const timer_t& timer); f_t get_primal_weight_h() const; f_t get_step_size_h() const; @@ -99,19 +100,16 @@ class pdlp_solver_t { void set_inside_mip(bool inside_mip); private: - void print_termination_criteria(const std::chrono::high_resolution_clock::time_point& start_time, - bool is_average = false); + void print_termination_criteria(const timer_t& timer, bool is_average = false); void print_final_termination_criteria( - const std::chrono::high_resolution_clock::time_point& start_time, + const timer_t& timer, const convergence_information_t& convergence_information, const pdlp_termination_status_t& termination_status, bool is_average = false); void compute_initial_step_size(); void compute_initial_primal_weight(); - std::optional> check_termination( - const std::chrono::high_resolution_clock::time_point& start_time); - std::optional> check_limits( - const std::chrono::high_resolution_clock::time_point& start_time); + std::optional> check_termination(const timer_t& timer); + std::optional> check_limits(const timer_t& timer); void record_best_primal_so_far(const detail::pdlp_termination_strategy_t& current, const detail::pdlp_termination_strategy_t& average, const pdlp_termination_status_t& termination_current, @@ -212,7 +210,6 @@ class pdlp_solver_t { // Only used if save_best_primal_so_far is toggeled optimization_problem_solution_t best_primal_solution_so_far; primal_quality_adapter_t best_primal_quality_so_far_; - // Flag to indicate if solver is being called from MIP. No logging is done in this case. bool inside_mip_{false}; }; diff --git a/cpp/src/linear_programming/solve.cu b/cpp/src/linear_programming/solve.cu index e90312067b..95a62df2a6 100644 --- a/cpp/src/linear_programming/solve.cu +++ b/cpp/src/linear_programming/solve.cu @@ -41,7 +41,6 @@ #include #include #include -#include #include #include @@ -298,15 +297,15 @@ optimization_problem_solution_t convert_dual_simplex_sol( template std::tuple, dual_simplex::lp_status_t, f_t, f_t, f_t> run_dual_simplex(dual_simplex::user_problem_t& user_problem, - pdlp_solver_settings_t const& settings) + pdlp_solver_settings_t const& settings, + const timer_t& timer) { - auto start_solver = std::chrono::high_resolution_clock::now(); - + timer_t timer_dual_simplex(timer.remaining_time()); f_t norm_user_objective = dual_simplex::vector_norm2(user_problem.objective); f_t norm_rhs = dual_simplex::vector_norm2(user_problem.rhs); dual_simplex::simplex_solver_settings_t dual_simplex_settings; - dual_simplex_settings.time_limit = settings.time_limit; + dual_simplex_settings.time_limit = timer.remaining_time(); dual_simplex_settings.iteration_limit = settings.iteration_limit; dual_simplex_settings.concurrent_halt = settings.concurrent_halt; if (dual_simplex_settings.concurrent_halt != nullptr) { @@ -318,10 +317,9 @@ run_dual_simplex(dual_simplex::user_problem_t& user_problem, auto status = dual_simplex::solve_linear_program(user_problem, dual_simplex_settings, solution); - auto end = std::chrono::high_resolution_clock::now(); - auto duration = std::chrono::duration_cast(end - start_solver); - - CUOPT_LOG_INFO("Dual simplex finished in %.2f seconds", duration.count() / 1000.0); + CUOPT_LOG_INFO("Dual simplex finished in %.2f seconds, total time %.2f", + timer_dual_simplex.elapsed_time(), + timer.elapsed_time()); if (settings.concurrent_halt != nullptr && (status == dual_simplex::lp_status_t::OPTIMAL || status == dual_simplex::lp_status_t::UNBOUNDED || @@ -330,17 +328,19 @@ run_dual_simplex(dual_simplex::user_problem_t& user_problem, settings.concurrent_halt->store(1, std::memory_order_release); } - return {std::move(solution), status, duration.count() / 1000.0, norm_user_objective, norm_rhs}; + return {std::move(solution), status, timer.elapsed_time(), norm_user_objective, norm_rhs}; } template optimization_problem_solution_t run_dual_simplex( - detail::problem_t& problem, pdlp_solver_settings_t const& settings) + detail::problem_t& problem, + pdlp_solver_settings_t const& settings, + const timer_t& timer) { // Convert data structures to dual simplex format and back dual_simplex::user_problem_t dual_simplex_problem = cuopt_problem_to_simplex_problem(problem); - auto sol_dual_simplex = run_dual_simplex(dual_simplex_problem, settings); + auto sol_dual_simplex = run_dual_simplex(dual_simplex_problem, settings, timer); return convert_dual_simplex_sol(problem, std::get<0>(sol_dual_simplex), std::get<1>(sol_dual_simplex), @@ -353,7 +353,7 @@ template static optimization_problem_solution_t run_pdlp_solver( detail::problem_t& problem, pdlp_solver_settings_t const& settings, - const std::chrono::high_resolution_clock::time_point& start_time, + const timer_t& timer, bool is_batch_mode) { if (problem.n_constraints == 0) { @@ -362,26 +362,28 @@ static optimization_problem_solution_t run_pdlp_solver( problem.handle_ptr->get_stream()}; } detail::pdlp_solver_t solver(problem, settings, is_batch_mode); - return solver.run_solver(start_time); + return solver.run_solver(timer); } template optimization_problem_solution_t run_pdlp(detail::problem_t& problem, pdlp_solver_settings_t const& settings, + const timer_t& timer, bool is_batch_mode) { auto start_solver = std::chrono::high_resolution_clock::now(); f_t start_time = dual_simplex::tic(); - auto sol = run_pdlp_solver(problem, settings, start_solver, is_batch_mode); - auto end = std::chrono::high_resolution_clock::now(); - auto duration = std::chrono::duration_cast(end - start_solver); - sol.set_solve_time(duration.count() / 1000.0); + timer_t timer_pdlp(timer.remaining_time()); + auto sol = run_pdlp_solver(problem, settings, timer, is_batch_mode); + auto pdlp_solve_time = timer_pdlp.elapsed_time(); + sol.set_solve_time(timer.elapsed_time()); CUOPT_LOG_INFO("PDLP finished"); if (sol.get_termination_status() != pdlp_termination_status_t::ConcurrentLimit) { - CUOPT_LOG_INFO("Status: %s Objective: %.8e Iterations: %d Time: %.3fs", + CUOPT_LOG_INFO("Status: %s Objective: %.8e Iterations: %d Time: %.3fs, Total time %.3fs", sol.get_termination_status_string().c_str(), sol.get_objective_value(), sol.get_additional_termination_information().number_of_steps_taken, + pdlp_solve_time, sol.get_solve_time()); } @@ -394,7 +396,7 @@ optimization_problem_solution_t run_pdlp(detail::problem_t& dual_simplex::lp_solution_t initial_solution(1, 1); translate_to_crossover_problem(problem, sol, lp, initial_solution); dual_simplex::simplex_solver_settings_t dual_simplex_settings; - dual_simplex_settings.time_limit = settings.time_limit; + dual_simplex_settings.time_limit = timer.remaining_time(); dual_simplex_settings.iteration_limit = settings.iteration_limit; dual_simplex_settings.concurrent_halt = settings.concurrent_halt; dual_simplex::lp_solution_t vertex_solution(lp.num_rows, lp.num_cols); @@ -460,12 +462,13 @@ void run_dual_simplex_thread( pdlp_solver_settings_t const& settings, std::unique_ptr< std::tuple, dual_simplex::lp_status_t, f_t, f_t, f_t>>& - sol_ptr) + sol_ptr, + const timer_t& timer) { // We will return the solution from the thread as a unique_ptr sol_ptr = std::make_unique< std::tuple, dual_simplex::lp_status_t, f_t, f_t, f_t>>( - run_dual_simplex(problem, settings)); + run_dual_simplex(problem, settings, timer)); } template @@ -473,10 +476,11 @@ optimization_problem_solution_t run_concurrent( const optimization_problem_t& op_problem, detail::problem_t& problem, pdlp_solver_settings_t const& settings, + const timer_t& timer, bool is_batch_mode) { CUOPT_LOG_INFO("Running concurrent\n"); - f_t start_time = dual_simplex::tic(); + timer_t timer_concurrent(timer.remaining_time()); // Copy the settings so that we can set the concurrent halt pointer pdlp_solver_settings_t settings_pdlp(settings, @@ -498,10 +502,11 @@ optimization_problem_solution_t run_concurrent( std::thread dual_simplex_thread(run_dual_simplex_thread, std::ref(dual_simplex_problem), std::ref(settings_pdlp), - std::ref(sol_dual_simplex_ptr)); + std::ref(sol_dual_simplex_ptr), + std::ref(timer)); // Run pdlp in the main thread - auto sol_pdlp = run_pdlp(problem, settings_pdlp, is_batch_mode); + auto sol_pdlp = run_pdlp(problem, settings_pdlp, timer, is_batch_mode); // Wait for dual simplex thread to finish dual_simplex_thread.join(); @@ -514,8 +519,9 @@ optimization_problem_solution_t run_concurrent( std::get<3>(*sol_dual_simplex_ptr), std::get<4>(*sol_dual_simplex_ptr)); - f_t end_time = dual_simplex::toc(start_time); - CUOPT_LOG_INFO("Concurrent time: %.3fs", end_time); + f_t end_time = timer.elapsed_time(); + CUOPT_LOG_INFO( + "Concurrent time: %.3fs, total time %.3fs", timer_concurrent.elapsed_time(), end_time); // Check status to see if we should return the pdlp solution or the dual simplex solution if (sol_dual_simplex.get_termination_status() == pdlp_termination_status_t::Optimal || sol_dual_simplex.get_termination_status() == pdlp_termination_status_t::PrimalInfeasible || @@ -546,14 +552,15 @@ optimization_problem_solution_t solve_lp_with_method( const optimization_problem_t& op_problem, detail::problem_t& problem, pdlp_solver_settings_t const& settings, + const timer_t& timer, bool is_batch_mode) { if (settings.method == method_t::DualSimplex) { - return run_dual_simplex(problem, settings); + return run_dual_simplex(problem, settings, timer); } else if (settings.method == method_t::Concurrent) { - return run_concurrent(op_problem, problem, settings, is_batch_mode); + return run_concurrent(op_problem, problem, settings, timer, is_batch_mode); } else { - return run_pdlp(problem, settings, is_batch_mode); + return run_pdlp(problem, settings, timer, is_batch_mode); } } @@ -583,7 +590,7 @@ optimization_problem_solution_t solve_lp(optimization_problem_t::check_initial_solution_representation(op_problem, settings); } - auto presolve_timer = cuopt::timer_t(settings.time_limit); + auto lp_timer = cuopt::timer_t(settings.time_limit); detail::problem_t problem(op_problem); double presolve_time = 0.0; @@ -595,7 +602,7 @@ optimization_problem_solution_t solve_lp(optimization_problem_t>(); auto [reduced_problem, feasible] = presolver->apply(op_problem, @@ -608,7 +615,7 @@ optimization_problem_solution_t solve_lp(optimization_problem_tget_stream()); } problem = detail::problem_t(reduced_problem); - presolve_time = presolve_timer.elapsed_time(); + presolve_time = lp_timer.elapsed_time(); CUOPT_LOG_INFO("Third party presolve time: %f", presolve_time); } @@ -632,7 +639,7 @@ optimization_problem_solution_t solve_lp(optimization_problem_tget_stream()); - auto solution = solve_lp_with_method(op_problem, problem, settings, is_batch_mode); + auto solution = solve_lp_with_method(op_problem, problem, settings, lp_timer, is_batch_mode); if (run_presolve) { auto primal_solution = cuopt::device_copy(solution.get_primal_solution(), @@ -660,8 +667,6 @@ optimization_problem_solution_t solve_lp(optimization_problem_t::signaling_NaN()); auto full_stats = solution.get_additional_termination_information(); - // add third party presolve time to cuopt presolve time - full_stats.solve_time += presolve_time; // Create a new solution with the full problem solution solution = optimization_problem_solution_t(primal_solution, @@ -793,6 +798,7 @@ optimization_problem_solution_t solve_lp( const optimization_problem_t& op_problem, \ detail::problem_t& problem, \ pdlp_solver_settings_t const& settings, \ + const timer_t& timer, \ bool is_batch_mode); \ \ template optimization_problem_t mps_data_model_to_optimization_problem( \ diff --git a/cpp/src/linear_programming/solve.cuh b/cpp/src/linear_programming/solve.cuh index 3024d6774b..3098fd1b96 100644 --- a/cpp/src/linear_programming/solve.cuh +++ b/cpp/src/linear_programming/solve.cuh @@ -35,6 +35,7 @@ cuopt::linear_programming::optimization_problem_solution_t solve_lp_wi const optimization_problem_t& op_problem, detail::problem_t& problem, pdlp_solver_settings_t const& settings, + const timer_t& timer, bool is_batch_mode = false); } // namespace cuopt::linear_programming diff --git a/cpp/src/mip/presolve/third_party_presolve.cpp b/cpp/src/mip/presolve/third_party_presolve.cpp index aa32bd8716..bc6ebdffa7 100644 --- a/cpp/src/mip/presolve/third_party_presolve.cpp +++ b/cpp/src/mip/presolve/third_party_presolve.cpp @@ -343,6 +343,7 @@ void set_presolve_options(papilo::Presolve& presolver, { presolver.getPresolveOptions().tlim = time_limit; presolver.getPresolveOptions().threads = num_cpu_threads; // user setting or 0 (automatic) + presolver.getPresolveOptions().feastol = 1e-5; } template diff --git a/cpp/src/mip/relaxed_lp/relaxed_lp.cu b/cpp/src/mip/relaxed_lp/relaxed_lp.cu index d7a337dd28..790c50b179 100644 --- a/cpp/src/mip/relaxed_lp/relaxed_lp.cu +++ b/cpp/src/mip/relaxed_lp/relaxed_lp.cu @@ -87,7 +87,7 @@ optimization_problem_solution_t get_relaxed_lp_solution( // before LP flush the logs as it takes quite some time cuopt::default_logger().flush(); // temporarily add timer - auto start_time = std::chrono::high_resolution_clock::now(); + auto start_time = timer_t(pdlp_settings.time_limit); lp_solver.set_inside_mip(true); auto solver_response = lp_solver.run_solver(start_time); diff --git a/cpp/src/mip/solver.cu b/cpp/src/mip/solver.cu index 0f2117991f..5e27a65eaf 100644 --- a/cpp/src/mip/solver.cu +++ b/cpp/src/mip/solver.cu @@ -132,10 +132,11 @@ solution_t mip_solver_t::run_solver() CUOPT_LOG_INFO("Problem reduced to a LP, running concurrent LP"); pdlp_solver_settings_t settings{}; settings.time_limit = timer_.remaining_time(); + auto lp_timer = timer_t(settings.time_limit); settings.method = method_t::Concurrent; auto opt_sol = solve_lp_with_method( - *context.problem_ptr->original_problem_ptr, *context.problem_ptr, settings); + *context.problem_ptr->original_problem_ptr, *context.problem_ptr, settings, lp_timer); solution_t sol(*context.problem_ptr); sol.copy_new_assignment(host_copy(opt_sol.get_primal_solution())); diff --git a/cpp/tests/linear_programming/pdlp_test.cu b/cpp/tests/linear_programming/pdlp_test.cu index 2598561211..de40d8d322 100644 --- a/cpp/tests/linear_programming/pdlp_test.cu +++ b/cpp/tests/linear_programming/pdlp_test.cu @@ -279,8 +279,8 @@ TEST(pdlp_class, initial_solution_test) { cuopt::linear_programming::detail::pdlp_solver_t solver(problem, solver_settings); - auto start_solver = std::chrono::high_resolution_clock::now(); - solver.run_solver(start_solver); + auto pdlp_timer = timer_t(solver_settings.time_limit); + solver.run_solver(pdlp_timer); RAFT_CUDA_TRY(cudaStreamSynchronize(handle_.get_stream())); EXPECT_NEAR(initial_step_size_afiro, solver.get_step_size_h(), factor_tolerance); EXPECT_NEAR(initial_primal_weight_afiro, solver.get_primal_weight_h(), factor_tolerance); @@ -290,36 +290,36 @@ TEST(pdlp_class, initial_solution_test) // scale on initial option is not toggled { cuopt::linear_programming::detail::pdlp_solver_t solver(problem, solver_settings); - auto start_solver = std::chrono::high_resolution_clock::now(); + auto pdlp_timer = timer_t(solver_settings.time_limit); std::vector initial_primal(op_problem.get_n_variables(), 1); auto d_initial_primal = device_copy(initial_primal, handle_.get_stream()); solver.set_initial_primal_solution(d_initial_primal); - solver.run_solver(start_solver); + solver.run_solver(pdlp_timer); RAFT_CUDA_TRY(cudaStreamSynchronize(handle_.get_stream())); EXPECT_NEAR(initial_step_size_afiro, solver.get_step_size_h(), factor_tolerance); EXPECT_NEAR(initial_primal_weight_afiro, solver.get_primal_weight_h(), factor_tolerance); } { cuopt::linear_programming::detail::pdlp_solver_t solver(problem, solver_settings); - auto start_solver = std::chrono::high_resolution_clock::now(); + auto pdlp_timer = timer_t(solver_settings.time_limit); std::vector initial_dual(op_problem.get_n_constraints(), 1); auto d_initial_dual = device_copy(initial_dual, handle_.get_stream()); solver.set_initial_dual_solution(d_initial_dual); - solver.run_solver(start_solver); + solver.run_solver(pdlp_timer); RAFT_CUDA_TRY(cudaStreamSynchronize(handle_.get_stream())); EXPECT_NEAR(initial_step_size_afiro, solver.get_step_size_h(), factor_tolerance); EXPECT_NEAR(initial_primal_weight_afiro, solver.get_primal_weight_h(), factor_tolerance); } { cuopt::linear_programming::detail::pdlp_solver_t solver(problem, solver_settings); - auto start_solver = std::chrono::high_resolution_clock::now(); + auto pdlp_timer = timer_t(solver_settings.time_limit); std::vector initial_primal(op_problem.get_n_variables(), 1); auto d_initial_primal = device_copy(initial_primal, handle_.get_stream()); solver.set_initial_primal_solution(d_initial_primal); std::vector initial_dual(op_problem.get_n_constraints(), 1); auto d_initial_dual = device_copy(initial_dual, handle_.get_stream()); solver.set_initial_dual_solution(d_initial_dual); - solver.run_solver(start_solver); + solver.run_solver(pdlp_timer); RAFT_CUDA_TRY(cudaStreamSynchronize(handle_.get_stream())); EXPECT_NEAR(initial_step_size_afiro, solver.get_step_size_h(), factor_tolerance); EXPECT_NEAR(initial_primal_weight_afiro, solver.get_primal_weight_h(), factor_tolerance); @@ -328,9 +328,9 @@ TEST(pdlp_class, initial_solution_test) // Toggle the scale on initial solution while not providing should yield the same { cuopt::linear_programming::detail::pdlp_solver_t solver(problem, solver_settings); - auto start_solver = std::chrono::high_resolution_clock::now(); + auto pdlp_timer = timer_t(solver_settings.time_limit); cuopt::linear_programming::pdlp_hyper_params::update_step_size_on_initial_solution = true; - solver.run_solver(start_solver); + solver.run_solver(pdlp_timer); RAFT_CUDA_TRY(cudaStreamSynchronize(handle_.get_stream())); EXPECT_NEAR(initial_step_size_afiro, solver.get_step_size_h(), factor_tolerance); EXPECT_NEAR(initial_primal_weight_afiro, solver.get_primal_weight_h(), factor_tolerance); @@ -338,9 +338,9 @@ TEST(pdlp_class, initial_solution_test) } { cuopt::linear_programming::detail::pdlp_solver_t solver(problem, solver_settings); - auto start_solver = std::chrono::high_resolution_clock::now(); + auto pdlp_timer = timer_t(solver_settings.time_limit); cuopt::linear_programming::pdlp_hyper_params::update_primal_weight_on_initial_solution = true; - solver.run_solver(start_solver); + solver.run_solver(pdlp_timer); RAFT_CUDA_TRY(cudaStreamSynchronize(handle_.get_stream())); EXPECT_NEAR(initial_step_size_afiro, solver.get_step_size_h(), factor_tolerance); EXPECT_NEAR(initial_primal_weight_afiro, solver.get_primal_weight_h(), factor_tolerance); @@ -348,10 +348,10 @@ TEST(pdlp_class, initial_solution_test) } { cuopt::linear_programming::detail::pdlp_solver_t solver(problem, solver_settings); - auto start_solver = std::chrono::high_resolution_clock::now(); + auto pdlp_timer = timer_t(solver_settings.time_limit); cuopt::linear_programming::pdlp_hyper_params::update_primal_weight_on_initial_solution = true; cuopt::linear_programming::pdlp_hyper_params::update_step_size_on_initial_solution = true; - solver.run_solver(start_solver); + solver.run_solver(pdlp_timer); RAFT_CUDA_TRY(cudaStreamSynchronize(handle_.get_stream())); EXPECT_NEAR(initial_step_size_afiro, solver.get_step_size_h(), factor_tolerance); EXPECT_NEAR(initial_primal_weight_afiro, solver.get_primal_weight_h(), factor_tolerance); @@ -364,11 +364,11 @@ TEST(pdlp_class, initial_solution_test) { cuopt::linear_programming::pdlp_hyper_params::update_step_size_on_initial_solution = true; cuopt::linear_programming::detail::pdlp_solver_t solver(problem, solver_settings); - auto start_solver = std::chrono::high_resolution_clock::now(); + auto pdlp_timer = timer_t(solver_settings.time_limit); std::vector initial_primal(op_problem.get_n_variables(), 1); auto d_initial_primal = device_copy(initial_primal, handle_.get_stream()); solver.set_initial_primal_solution(d_initial_primal); - solver.run_solver(start_solver); + solver.run_solver(pdlp_timer); RAFT_CUDA_TRY(cudaStreamSynchronize(handle_.get_stream())); EXPECT_NEAR(initial_step_size_afiro, solver.get_step_size_h(), factor_tolerance); EXPECT_NEAR(initial_primal_weight_afiro, solver.get_primal_weight_h(), factor_tolerance); @@ -377,11 +377,11 @@ TEST(pdlp_class, initial_solution_test) { cuopt::linear_programming::pdlp_hyper_params::update_step_size_on_initial_solution = true; cuopt::linear_programming::detail::pdlp_solver_t solver(problem, solver_settings); - auto start_solver = std::chrono::high_resolution_clock::now(); + auto pdlp_timer = timer_t(solver_settings.time_limit); std::vector initial_dual(op_problem.get_n_constraints(), 1); auto d_initial_dual = device_copy(initial_dual, handle_.get_stream()); solver.set_initial_dual_solution(d_initial_dual); - solver.run_solver(start_solver); + solver.run_solver(pdlp_timer); RAFT_CUDA_TRY(cudaStreamSynchronize(handle_.get_stream())); EXPECT_NEAR(initial_step_size_afiro, solver.get_step_size_h(), factor_tolerance); EXPECT_NEAR(initial_primal_weight_afiro, solver.get_primal_weight_h(), factor_tolerance); @@ -393,11 +393,11 @@ TEST(pdlp_class, initial_solution_test) { cuopt::linear_programming::pdlp_hyper_params::update_primal_weight_on_initial_solution = true; cuopt::linear_programming::detail::pdlp_solver_t solver(problem, solver_settings); - auto start_solver = std::chrono::high_resolution_clock::now(); + auto pdlp_timer = timer_t(solver_settings.time_limit); std::vector initial_primal(op_problem.get_n_variables(), 1); auto d_initial_primal = device_copy(initial_primal, handle_.get_stream()); solver.set_initial_primal_solution(d_initial_primal); - solver.run_solver(start_solver); + solver.run_solver(pdlp_timer); EXPECT_NEAR(initial_step_size_afiro, solver.get_step_size_h(), factor_tolerance); EXPECT_NEAR(initial_primal_weight_afiro, solver.get_primal_weight_h(), factor_tolerance); cuopt::linear_programming::pdlp_hyper_params::update_primal_weight_on_initial_solution = false; @@ -405,11 +405,11 @@ TEST(pdlp_class, initial_solution_test) { cuopt::linear_programming::pdlp_hyper_params::update_primal_weight_on_initial_solution = true; cuopt::linear_programming::detail::pdlp_solver_t solver(problem, solver_settings); - auto start_solver = std::chrono::high_resolution_clock::now(); + auto pdlp_timer = timer_t(solver_settings.time_limit); std::vector initial_dual(op_problem.get_n_constraints(), 1); auto d_initial_dual = device_copy(initial_dual, handle_.get_stream()); solver.set_initial_dual_solution(d_initial_dual); - solver.run_solver(start_solver); + solver.run_solver(pdlp_timer); EXPECT_NEAR(initial_step_size_afiro, solver.get_step_size_h(), factor_tolerance); EXPECT_NEAR(initial_primal_weight_afiro, solver.get_primal_weight_h(), factor_tolerance); cuopt::linear_programming::pdlp_hyper_params::update_primal_weight_on_initial_solution = false; @@ -420,14 +420,14 @@ TEST(pdlp_class, initial_solution_test) { cuopt::linear_programming::pdlp_hyper_params::update_step_size_on_initial_solution = true; cuopt::linear_programming::detail::pdlp_solver_t solver(problem, solver_settings); - auto start_solver = std::chrono::high_resolution_clock::now(); + auto pdlp_timer = timer_t(solver_settings.time_limit); std::vector initial_primal(op_problem.get_n_variables(), 0); auto d_initial_primal = device_copy(initial_primal, handle_.get_stream()); solver.set_initial_primal_solution(d_initial_primal); std::vector initial_dual(op_problem.get_n_constraints(), 0); auto d_initial_dual = device_copy(initial_dual, handle_.get_stream()); solver.set_initial_dual_solution(d_initial_dual); - solver.run_solver(start_solver); + solver.run_solver(pdlp_timer); EXPECT_NEAR(initial_step_size_afiro, solver.get_step_size_h(), factor_tolerance); EXPECT_NEAR(initial_primal_weight_afiro, solver.get_primal_weight_h(), factor_tolerance); cuopt::linear_programming::pdlp_hyper_params::update_step_size_on_initial_solution = false; @@ -438,11 +438,11 @@ TEST(pdlp_class, initial_solution_test) { cuopt::linear_programming::pdlp_hyper_params::update_primal_weight_on_initial_solution = true; cuopt::linear_programming::detail::pdlp_solver_t solver(problem, solver_settings); - auto start_solver = std::chrono::high_resolution_clock::now(); + auto pdlp_timer = timer_t(solver_settings.time_limit); std::vector initial_primal(op_problem.get_n_variables(), 0); auto d_initial_primal = device_copy(initial_primal, handle_.get_stream()); solver.set_initial_primal_solution(d_initial_primal); - solver.run_solver(start_solver); + solver.run_solver(pdlp_timer); EXPECT_NEAR(initial_step_size_afiro, solver.get_step_size_h(), factor_tolerance); EXPECT_NEAR(initial_primal_weight_afiro, solver.get_primal_weight_h(), factor_tolerance); cuopt::linear_programming::pdlp_hyper_params::update_primal_weight_on_initial_solution = false; @@ -450,11 +450,11 @@ TEST(pdlp_class, initial_solution_test) { cuopt::linear_programming::pdlp_hyper_params::update_primal_weight_on_initial_solution = true; cuopt::linear_programming::detail::pdlp_solver_t solver(problem, solver_settings); - auto start_solver = std::chrono::high_resolution_clock::now(); + auto pdlp_timer = timer_t(solver_settings.time_limit); std::vector initial_dual(op_problem.get_n_constraints(), 0); auto d_initial_dual = device_copy(initial_dual, handle_.get_stream()); solver.set_initial_dual_solution(d_initial_dual); - solver.run_solver(start_solver); + solver.run_solver(pdlp_timer); EXPECT_NEAR(initial_step_size_afiro, solver.get_step_size_h(), factor_tolerance); EXPECT_NEAR(initial_primal_weight_afiro, solver.get_primal_weight_h(), factor_tolerance); cuopt::linear_programming::pdlp_hyper_params::update_primal_weight_on_initial_solution = false; @@ -462,14 +462,14 @@ TEST(pdlp_class, initial_solution_test) { cuopt::linear_programming::pdlp_hyper_params::update_primal_weight_on_initial_solution = true; cuopt::linear_programming::detail::pdlp_solver_t solver(problem, solver_settings); - auto start_solver = std::chrono::high_resolution_clock::now(); + auto pdlp_timer = timer_t(solver_settings.time_limit); std::vector initial_primal(op_problem.get_n_variables(), 0); auto d_initial_primal = device_copy(initial_primal, handle_.get_stream()); solver.set_initial_primal_solution(d_initial_primal); std::vector initial_dual(op_problem.get_n_constraints(), 0); auto d_initial_dual = device_copy(initial_dual, handle_.get_stream()); solver.set_initial_dual_solution(d_initial_dual); - solver.run_solver(start_solver); + solver.run_solver(pdlp_timer); EXPECT_NEAR(initial_step_size_afiro, solver.get_step_size_h(), factor_tolerance); EXPECT_NEAR(initial_primal_weight_afiro, solver.get_primal_weight_h(), factor_tolerance); cuopt::linear_programming::pdlp_hyper_params::update_primal_weight_on_initial_solution = false; @@ -480,14 +480,14 @@ TEST(pdlp_class, initial_solution_test) { cuopt::linear_programming::pdlp_hyper_params::update_primal_weight_on_initial_solution = true; cuopt::linear_programming::detail::pdlp_solver_t solver(problem, solver_settings); - auto start_solver = std::chrono::high_resolution_clock::now(); + auto pdlp_timer = timer_t(solver_settings.time_limit); std::vector initial_primal(op_problem.get_n_variables(), 1); auto d_initial_primal = device_copy(initial_primal, handle_.get_stream()); solver.set_initial_primal_solution(d_initial_primal); std::vector initial_dual(op_problem.get_n_constraints(), 1); auto d_initial_dual = device_copy(initial_dual, handle_.get_stream()); solver.set_initial_dual_solution(d_initial_dual); - solver.run_solver(start_solver); + solver.run_solver(pdlp_timer); EXPECT_NEAR(initial_step_size_afiro, solver.get_step_size_h(), factor_tolerance); EXPECT_NOT_NEAR(initial_primal_weight_afiro, solver.get_primal_weight_h(), factor_tolerance); cuopt::linear_programming::pdlp_hyper_params::update_primal_weight_on_initial_solution = false; @@ -495,14 +495,14 @@ TEST(pdlp_class, initial_solution_test) { cuopt::linear_programming::pdlp_hyper_params::update_step_size_on_initial_solution = true; cuopt::linear_programming::detail::pdlp_solver_t solver(problem, solver_settings); - auto start_solver = std::chrono::high_resolution_clock::now(); + auto pdlp_timer = timer_t(solver_settings.time_limit); std::vector initial_primal(op_problem.get_n_variables(), 1); auto d_initial_primal = device_copy(initial_primal, handle_.get_stream()); solver.set_initial_primal_solution(d_initial_primal); std::vector initial_dual(op_problem.get_n_constraints(), 1); auto d_initial_dual = device_copy(initial_dual, handle_.get_stream()); solver.set_initial_dual_solution(d_initial_dual); - solver.run_solver(start_solver); + solver.run_solver(pdlp_timer); EXPECT_NOT_NEAR(initial_step_size_afiro, solver.get_step_size_h(), factor_tolerance); EXPECT_NEAR(initial_primal_weight_afiro, solver.get_primal_weight_h(), factor_tolerance); cuopt::linear_programming::pdlp_hyper_params::update_step_size_on_initial_solution = false; @@ -511,14 +511,14 @@ TEST(pdlp_class, initial_solution_test) cuopt::linear_programming::pdlp_hyper_params::update_primal_weight_on_initial_solution = true; cuopt::linear_programming::pdlp_hyper_params::update_step_size_on_initial_solution = true; cuopt::linear_programming::detail::pdlp_solver_t solver(problem, solver_settings); - auto start_solver = std::chrono::high_resolution_clock::now(); + auto pdlp_timer = timer_t(solver_settings.time_limit); std::vector initial_primal(op_problem.get_n_variables(), 1); auto d_initial_primal = device_copy(initial_primal, handle_.get_stream()); solver.set_initial_primal_solution(d_initial_primal); std::vector initial_dual(op_problem.get_n_constraints(), 1); auto d_initial_dual = device_copy(initial_dual, handle_.get_stream()); solver.set_initial_dual_solution(d_initial_dual); - solver.run_solver(start_solver); + solver.run_solver(pdlp_timer); EXPECT_NOT_NEAR(initial_step_size_afiro, solver.get_step_size_h(), factor_tolerance); EXPECT_NOT_NEAR(initial_primal_weight_afiro, solver.get_primal_weight_h(), factor_tolerance); cuopt::linear_programming::pdlp_hyper_params::update_primal_weight_on_initial_solution = false; @@ -551,12 +551,12 @@ TEST(pdlp_class, initial_primal_weight_step_size_test) // Check setting an initial primal weight and step size { cuopt::linear_programming::detail::pdlp_solver_t solver(problem, solver_settings); - auto start_solver = std::chrono::high_resolution_clock::now(); + auto pdlp_timer = timer_t(solver_settings.time_limit); constexpr double test_initial_step_size = 1.0; constexpr double test_initial_primal_weight = 2.0; solver.set_initial_primal_weight(test_initial_primal_weight); solver.set_initial_step_size(test_initial_step_size); - solver.run_solver(start_solver); + solver.run_solver(pdlp_timer); RAFT_CUDA_TRY(cudaStreamSynchronize(handle_.get_stream())); EXPECT_EQ(test_initial_step_size, solver.get_step_size_h()); EXPECT_EQ(test_initial_primal_weight, solver.get_primal_weight_h()); @@ -569,27 +569,27 @@ TEST(pdlp_class, initial_primal_weight_step_size_test) cuopt::linear_programming::pdlp_hyper_params::update_primal_weight_on_initial_solution = true; cuopt::linear_programming::pdlp_hyper_params::update_step_size_on_initial_solution = true; cuopt::linear_programming::detail::pdlp_solver_t solver(problem, solver_settings); - auto start_solver = std::chrono::high_resolution_clock::now(); + auto pdlp_timer = timer_t(solver_settings.time_limit); std::vector initial_primal(op_problem.get_n_variables(), 1); auto d_initial_primal = device_copy(initial_primal, handle_.get_stream()); solver.set_initial_primal_solution(d_initial_primal); std::vector initial_dual(op_problem.get_n_constraints(), 1); auto d_initial_dual = device_copy(initial_dual, handle_.get_stream()); solver.set_initial_dual_solution(d_initial_dual); - solver.run_solver(start_solver); + solver.run_solver(pdlp_timer); const double previous_step_size = solver.get_step_size_h(); const double previous_primal_weight = solver.get_primal_weight_h(); // Start again but with an initial and check the impact cuopt::linear_programming::detail::pdlp_solver_t solver2(problem, solver_settings); - start_solver = std::chrono::high_resolution_clock::now(); + pdlp_timer = timer_t(solver_settings.time_limit); constexpr double test_initial_step_size = 1.0; constexpr double test_initial_primal_weight = 2.0; solver2.set_initial_primal_weight(test_initial_primal_weight); solver2.set_initial_step_size(test_initial_step_size); solver2.set_initial_primal_solution(d_initial_primal); solver2.set_initial_dual_solution(d_initial_dual); - solver2.run_solver(start_solver); + solver2.run_solver(pdlp_timer); RAFT_CUDA_TRY(cudaStreamSynchronize(handle_.get_stream())); const double sovler2_step_size = solver2.get_step_size_h(); const double sovler2_primal_weight = solver2.get_primal_weight_h(); @@ -598,14 +598,14 @@ TEST(pdlp_class, initial_primal_weight_step_size_test) // Again but with an initial k which should change the step size only, not the primal weight cuopt::linear_programming::detail::pdlp_solver_t solver3(problem, solver_settings); - start_solver = std::chrono::high_resolution_clock::now(); + pdlp_timer = timer_t(solver_settings.time_limit); solver3.set_initial_primal_weight(test_initial_primal_weight); solver3.set_initial_step_size(test_initial_step_size); solver3.set_initial_primal_solution(d_initial_primal); solver3.set_initial_k(10000); solver3.set_initial_dual_solution(d_initial_dual); solver3.set_initial_dual_solution(d_initial_dual); - solver3.run_solver(start_solver); + solver3.run_solver(pdlp_timer); RAFT_CUDA_TRY(cudaStreamSynchronize(handle_.get_stream())); EXPECT_NOT_NEAR(sovler2_step_size, solver3.get_step_size_h(), factor_tolerance); EXPECT_NEAR(sovler2_primal_weight, solver3.get_primal_weight_h(), factor_tolerance); From dfe4966091bbbb53b23b91621066c95ec18466a4 Mon Sep 17 00:00:00 2001 From: Ramakrishnap <42624703+rgsl888prabhu@users.noreply.github.com> Date: Mon, 15 Sep 2025 12:16:51 -0500 Subject: [PATCH 24/33] Doc update for container version update and add nvidia-cuda-runtime as dependency (#384) Adds doc updates for docker container version update Also add nvidia-cuda-runtime as dependency so we don't need to install it explicitly. Authors: - Ramakrishnap (https://github.com/rgsl888prabhu) Approvers: - Cindy Wilkinson (https://github.com/cwilkinson76) - Trevor McKay (https://github.com/tmckayus) URL: https://github.com/NVIDIA/cuopt/pull/384 --- README.md | 9 +++------ ci/docker/Dockerfile | 4 +--- dependencies.yaml | 5 +++-- docs/cuopt/source/cuopt-c/quick-start.rst | 10 ++-------- docs/cuopt/source/cuopt-python/quick-start.rst | 18 ++++++------------ docs/cuopt/source/cuopt-server/quick-start.rst | 10 ++++------ python/libcuopt/pyproject.toml | 1 + 7 files changed, 20 insertions(+), 37 deletions(-) diff --git a/README.md b/README.md index 8c73898506..e3a165f058 100644 --- a/README.md +++ b/README.md @@ -73,7 +73,6 @@ Development wheels are available as nightlies, please update `--extra-index-url` pip install --pre \ --extra-index-url=https://pypi.nvidia.com \ --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple/ \ - nvidia-cuda-runtime-cu12=12.9.* \ cuopt-server-cu12==25.10.* cuopt-sh-client==25.10.* ``` @@ -82,7 +81,6 @@ For CUDA 13.x: ```bash pip install \ --extra-index-url=https://pypi.nvidia.com \ - nvidia-cuda-runtime==13.0.* \ cuopt-server-cu13==25.10.* cuopt-sh-client==25.10.* ``` @@ -91,7 +89,6 @@ Development wheels are available as nightlies, please update `--extra-index-url` pip install --pre \ --extra-index-url=https://pypi.nvidia.com \ --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple/ \ - nvidia-cuda-runtime==13.0.* \ cuopt-server-cu13==25.10.* cuopt-sh-client==25.10.* ``` @@ -115,13 +112,13 @@ Users can pull the cuOpt container from the NVIDIA container registry. ```bash # For CUDA 12.x -docker pull nvidia/cuopt:latest-cuda12.9-py312 +docker pull nvidia/cuopt:latest-cuda12.9-py3.13 # For CUDA 13.x -docker pull nvidia/cuopt:latest-cuda13.0-py312 +docker pull nvidia/cuopt:latest-cuda13.0-py3.13 ``` -Note: The ``latest`` tag is the latest stable release of cuOpt. If you want to use a specific version, you can use the ``-cuda12.9-py312`` or ``-cuda13.0-py312`` tag. For example, to use cuOpt 25.5.0, you can use the ``25.5.0-cuda12.8-py312`` or ``25.5.0-cuda13.0-py312`` tag. Please refer to `cuOpt dockerhub page `_ for the list of available tags. +Note: The ``latest`` tag is the latest stable release of cuOpt. If you want to use a specific version, you can use the ``-cuda12.9-py3.13`` or ``-cuda13.0-py3.13`` tag. For example, to use cuOpt 25.10.0, you can use the ``25.10.0-cuda12.9-py3.13`` or ``25.10.0-cuda13.0-py3.13`` tag. Please refer to `cuOpt dockerhub page `_ for the list of available tags. More information about the cuOpt container can be found [here](https://docs.nvidia.com/cuopt/user-guide/latest/cuopt-server/quick-start.html#container-from-docker-hub). diff --git a/ci/docker/Dockerfile b/ci/docker/Dockerfile index e7f023040b..d7629fcd0c 100644 --- a/ci/docker/Dockerfile +++ b/ci/docker/Dockerfile @@ -62,9 +62,7 @@ RUN \ --extra-index-url https://pypi.anaconda.org/rapidsai-wheels-nightly/simple \ --no-cache-dir \ "cuopt-server-${cuda_suffix}==${CUOPT_VER}" \ - "cuopt-sh-client==${CUOPT_VER}" \ - "cuda-toolkit[cudart]==${cuda_major_minor}.*" \ - ${nvidia_cuda_runtime_pkg} && \ + "cuopt-sh-client==${CUOPT_VER}" && \ python -m pip list # Remove gcc to save space, gcc was required for building psutils diff --git a/dependencies.yaml b/dependencies.yaml index 5abb3197e8..d153a02915 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -750,12 +750,12 @@ dependencies: cuda: "12.*" use_cuda_wheels: "true" packages: - - cuda-toolkit[cublas,curand,cusolver,cusparse,nvtx]==12.* + - cuda-toolkit[cublas,cudart,curand,cusolver,cusparse,nvtx]==12.* - matrix: cuda: "13.*" use_cuda_wheels: "true" packages: - - cuda-toolkit[cublas,curand,cusolver,cusparse,nvtx]==13.* + - cuda-toolkit[cublas,cudart,curand,cusolver,cusparse,nvtx]==13.* # if use_cuda_wheels=false is provided, do not add dependencies on any CUDA wheels # (e.g. for DLFW and pip devcontainers) - matrix: @@ -765,6 +765,7 @@ dependencies: # (just as a source of documentation, as this populates pyproject.toml in source control) - matrix: packages: + - nvidia-cudart - nvidia-cublas - nvidia-curand - nvidia-cusparse diff --git a/docs/cuopt/source/cuopt-c/quick-start.rst b/docs/cuopt/source/cuopt-c/quick-start.rst index 4ae4e1ef8c..c3da7449ff 100644 --- a/docs/cuopt/source/cuopt-c/quick-start.rst +++ b/docs/cuopt/source/cuopt-c/quick-start.rst @@ -19,14 +19,10 @@ This wheel is a Python wrapper around the C++ library and eases installation and pip uninstall cuopt-thin-client # CUDA 13 - pip install --extra-index-url=https://pypi.nvidia.com \ - 'nvidia-cuda-runtime==13.0.*' \ - 'libcuopt-cu13==25.10.*' + pip install --extra-index-url=https://pypi.nvidia.com 'libcuopt-cu13==25.10.*' # CUDA 12 - pip install --extra-index-url=https://pypi.nvidia.com \ - 'nvidia-cuda-runtime-cu12==12.9.*' \ - 'libcuopt-cu12==25.10.*' + pip install --extra-index-url=https://pypi.nvidia.com 'libcuopt-cu12==25.10.*' .. note:: @@ -36,12 +32,10 @@ This wheel is a Python wrapper around the C++ library and eases installation and # CUDA 13 pip install --pre --extra-index-url=https://pypi.nvidia.com --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple/ \ - 'nvidia-cuda-runtime==13.0.*' \ 'libcuopt-cu13==25.10.*' # CUDA 12 pip install --pre --extra-index-url=https://pypi.nvidia.com --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple/ \ - 'nvidia-cuda-runtime-cu12==12.9.*' \ 'libcuopt-cu12==25.10.*' Conda diff --git a/docs/cuopt/source/cuopt-python/quick-start.rst b/docs/cuopt/source/cuopt-python/quick-start.rst index 65acea5db6..616b994d1a 100644 --- a/docs/cuopt/source/cuopt-python/quick-start.rst +++ b/docs/cuopt/source/cuopt-python/quick-start.rst @@ -13,14 +13,10 @@ pip .. code-block:: bash # CUDA 13 - pip install --extra-index-url=https://pypi.nvidia.com \ - 'nvidia-cuda-runtime==13.0.*' \ - 'cuopt-cu13==25.10.*' + pip install --extra-index-url=https://pypi.nvidia.com 'cuopt-cu13==25.10.*' # CUDA 12 - pip install --extra-index-url=https://pypi.nvidia.com \ - 'nvidia-cuda-runtime-cu12==12.9.*' \ - 'cuopt-cu12==25.10.*' + pip install --extra-index-url=https://pypi.nvidia.com 'cuopt-cu12==25.10.*' .. note:: @@ -30,12 +26,10 @@ pip # CUDA 13 pip install --pre --extra-index-url=https://pypi.nvidia.com --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple/ \ - 'nvidia-cuda-runtime==13.0.*' \ 'cuopt-cu13==25.10.*' # CUDA 12 pip install --pre --extra-index-url=https://pypi.nvidia.com --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple/ \ - 'nvidia-cuda-runtime-cu12==12.9.*' \ 'cuopt-cu12==25.10.*' @@ -63,19 +57,19 @@ NVIDIA cuOpt is also available as a container from Docker Hub: .. code-block:: bash - docker pull nvidia/cuopt:latest-cuda12.9-py3.12 + docker pull nvidia/cuopt:latest-cuda12.9-py3.13 .. note:: - The ``latest`` tag is the latest stable release of cuOpt. If you want to use a specific version, you can use the ``-cuda12.9-py3.12`` tag. For example, to use cuOpt 25.5.0, you can use the ``25.5.0-cuda12.9-py3.12`` tag. Please refer to `cuOpt dockerhub page `_ for the list of available tags. + The ``latest`` tag is the latest stable release of cuOpt. If you want to use a specific version, you can use the ``-cuda12.9-py3.13`` tag. For example, to use cuOpt 25.10.0, you can use the ``25.10.0-cuda12.9-py3.13`` tag. Please refer to `cuOpt dockerhub page `_ for the list of available tags. .. note:: - The nightly version of cuOpt is available as ``[VERSION]a-cuda12.9-py3.12`` tag. For example, to use cuOpt 25.8.0a, you can use the ``25.8.0a-cuda12.9-py3.12`` tag. + The nightly version of cuOpt is available as ``[VERSION]a-cuda12.9-py3.13`` tag. For example, to use cuOpt 25.10.0a, you can use the ``25.10.0a-cuda12.9-py3.13`` tag. Also the cuda version and python version might change in the future. Please refer to `cuOpt dockerhub page `_ for the list of available tags. The container includes both the Python API and self-hosted server components. To run the container: .. code-block:: bash - docker run --gpus all -it --rm nvidia/cuopt:latest-cuda12.9-py3.12 /bin/bash + docker run --gpus all -it --rm nvidia/cuopt:latest-cuda12.9-py3.13 /bin/bash This will start an interactive session with cuOpt pre-installed and ready to use. diff --git a/docs/cuopt/source/cuopt-server/quick-start.rst b/docs/cuopt/source/cuopt-server/quick-start.rst index 1e6fce235b..782feac834 100644 --- a/docs/cuopt/source/cuopt-server/quick-start.rst +++ b/docs/cuopt/source/cuopt-server/quick-start.rst @@ -29,13 +29,11 @@ pip # CUDA 13 pip install --pre --extra-index-url=https://pypi.nvidia.com --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple/ \ - 'nvidia-cuda-runtime==13.0.*' \ 'cuopt-server-cu13==25.10.*' \ 'cuopt-sh-client==25.10.*' # CUDA 12 pip install --pre --extra-index-url=https://pypi.nvidia.com --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple/ \ - 'nvidia-cuda-runtime-cu12==12.9.*' \ 'cuopt-server-cu12==25.10.*' \ 'cuopt-sh-client==25.10.*' @@ -59,19 +57,19 @@ NVIDIA cuOpt is also available as a container from Docker Hub: .. code-block:: bash - docker pull nvidia/cuopt:latest-cuda12.9-py3.12 + docker pull nvidia/cuopt:latest-cuda12.9-py3.13 .. note:: - The ``latest`` tag is the latest stable release of cuOpt. If you want to use a specific version, you can use the ``-cuda12.9-py3.12`` tag. For example, to use cuOpt 25.5.0, you can use the ``25.5.0-cuda12.9-py3.12`` tag. Please refer to `cuOpt dockerhub page `_ for the list of available tags. + The ``latest`` tag is the latest stable release of cuOpt. If you want to use a specific version, you can use the ``-cuda12.9-py3.13`` tag. For example, to use cuOpt 25.10.0, you can use the ``25.10.0-cuda12.9-py3.13`` tag. Please refer to `cuOpt dockerhub page `_ for the list of available tags. The container includes both the Python API and self-hosted server components. To run the container: .. code-block:: bash - docker run --gpus all -it --rm -p 8000:8000 -e CUOPT_SERVER_PORT=8000 nvidia/cuopt:latest-cuda12.9-py3.12 + docker run --gpus all -it --rm -p 8000:8000 -e CUOPT_SERVER_PORT=8000 nvidia/cuopt:latest-cuda12.9-py3.13 .. note:: - The nightly version of cuOpt is available as ``[VERSION]a-cuda12.9-py3.12`` tag. For example, to use cuOpt 25.8.0a, you can use the ``25.8.0a-cuda12.9-py3.12`` tag. + The nightly version of cuOpt is available as ``[VERSION]a-cuda12.9-py3.13`` tag. For example, to use cuOpt 25.10.0a, you can use the ``25.10.0a-cuda12.9-py3.13`` tag. Also the cuda version and python version might change in the future. Please refer to `cuOpt dockerhub page `_ for the list of available tags. .. note:: Make sure you have the NVIDIA Container Toolkit installed on your system to enable GPU support in containers. See the `installation guide `_ for details. diff --git a/python/libcuopt/pyproject.toml b/python/libcuopt/pyproject.toml index 3bbefc2c17..93271addf9 100644 --- a/python/libcuopt/pyproject.toml +++ b/python/libcuopt/pyproject.toml @@ -46,6 +46,7 @@ dependencies = [ "cuopt-mps-parser==25.10.*,>=0.0.0a0", "librmm==25.10.*,>=0.0.0a0", "nvidia-cublas", + "nvidia-cudart", "nvidia-curand", "nvidia-cusolver", "nvidia-cusparse", From 49177a44dc8f916070b95c3747546e1b50b5047a Mon Sep 17 00:00:00 2001 From: Ramakrishnap <42624703+rgsl888prabhu@users.noreply.github.com> Date: Mon, 15 Sep 2025 14:26:55 -0500 Subject: [PATCH 25/33] Add video link to the docs and Readme (#393) Add video link to the docs and readme Authors: - Ramakrishnap (https://github.com/rgsl888prabhu) Approvers: - Trevor McKay (https://github.com/tmckayus) URL: https://github.com/NVIDIA/cuopt/pull/393 --- README.md | 1 + docs/cuopt/source/resources.rst | 8 ++++++++ 2 files changed, 9 insertions(+) diff --git a/README.md b/README.md index e3a165f058..f3d040a813 100644 --- a/README.md +++ b/README.md @@ -140,3 +140,4 @@ Review the [CONTRIBUTING.md](CONTRIBUTING.md) file for information on how to con - [Examples and Notebooks](https://github.com/NVIDIA/cuopt-examples) - [Test cuopt with NVIDIA Launchable](https://brev.nvidia.com/launchable/deploy?launchableID=env-2qIG6yjGKDtdMSjXHcuZX12mDNJ): Examples notebooks are pulled and hosted on [NVIDIA Launchable](https://docs.nvidia.com/brev/latest/). - [Test cuopt on Google Colab](https://colab.research.google.com/github/nvidia/cuopt-examples/): Examples notebooks can be opened in Google Colab. Please note that you need to choose a `Runtime` as `GPU` in order to run the notebooks. +- [cuOpt Examples and Tutorial Videos](https://docs.nvidia.com/cuopt/user-guide/latest/resources.html#cuopt-examples-and-tutorials-videos) diff --git a/docs/cuopt/source/resources.rst b/docs/cuopt/source/resources.rst index 752ec2f3be..d8c98811c2 100644 --- a/docs/cuopt/source/resources.rst +++ b/docs/cuopt/source/resources.rst @@ -6,6 +6,14 @@ Resources `Sample Notebooks `_ ---------------------------------------------------------------------------------- + +cuOpt Examples and Tutorials Videos +----------------------------------- + +.. dropdown:: + + - `Quick Start to GPU-Accelerated Large-Scale Logistics and Supply Chain Optimization with NVIDIA cuOpt `_ + `Test cuopt with NVIDIA Launchable `_ ------------------------------------------------------------------------------------------------------------------------------ From 8d9587ee07f4e37cc4b4febaba9ff19ccdc12317 Mon Sep 17 00:00:00 2001 From: Ramakrishnap <42624703+rgsl888prabhu@users.noreply.github.com> Date: Tue, 16 Sep 2025 11:41:12 -0500 Subject: [PATCH 26/33] Add name to drop down for video link (#396) Add name to drop down for video link Authors: - Ramakrishnap (https://github.com/rgsl888prabhu) Approvers: - Cindy Wilkinson (https://github.com/cwilkinson76) - Ishika Roy (https://github.com/Iroy30) URL: https://github.com/NVIDIA/cuopt/pull/396 --- docs/cuopt/source/resources.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/cuopt/source/resources.rst b/docs/cuopt/source/resources.rst index d8c98811c2..9121800ee3 100644 --- a/docs/cuopt/source/resources.rst +++ b/docs/cuopt/source/resources.rst @@ -10,11 +10,11 @@ Resources cuOpt Examples and Tutorials Videos ----------------------------------- -.. dropdown:: +.. dropdown:: Tutorial List - `Quick Start to GPU-Accelerated Large-Scale Logistics and Supply Chain Optimization with NVIDIA cuOpt `_ -`Test cuopt with NVIDIA Launchable `_ +`Test cuOpt with NVIDIA Launchable `_ ------------------------------------------------------------------------------------------------------------------------------ `Test cuOpt on Google Colab `_ From 4609fbd5b6608785b5c93eee51aba0dc9c68fe50 Mon Sep 17 00:00:00 2001 From: Ishika Roy <41401566+Iroy30@users.noreply.github.com> Date: Tue, 16 Sep 2025 22:56:33 -0500 Subject: [PATCH 27/33] Add read/write MPS and relaxation to python API (#323) Add the ability to read from MPS , write to MPS and relax a MIP problem into an LP in python API ## Issue Authors: - Ishika Roy (https://github.com/Iroy30) - Trevor McKay (https://github.com/tmckayus) - Alice Boucher (https://github.com/aliceb-nv) Approvers: - Trevor McKay (https://github.com/tmckayus) - Rajesh Gandham (https://github.com/rg20) - Ramakrishnap (https://github.com/rgsl888prabhu) URL: https://github.com/NVIDIA/cuopt/pull/323 --- .../optimization_problem.hpp | 7 + .../cuopt/linear_programming/solve.hpp | 1 + .../utilities/cython_solve.hpp | 4 +- cpp/libmps_parser/CMakeLists.txt | 2 + .../include/mps_parser/data_model_view.hpp | 29 ++ .../include/mps_parser/mps_writer.hpp | 59 ++++ .../include/mps_parser/writer.hpp | 38 +++ cpp/libmps_parser/src/data_model_view.cpp | 25 ++ cpp/libmps_parser/src/mps_writer.cpp | 264 ++++++++++++++++++ cpp/libmps_parser/src/writer.cpp | 36 +++ .../optimization_problem.cu | 95 ++++++- cpp/src/linear_programming/solve.cu | 10 +- .../utilities/cython_solve.cu | 11 + cpp/src/mip/CMakeLists.txt | 2 +- cpp/src/mip/problem/problem.cu | 1 + cpp/src/mip/problem/problem.cuh | 2 +- cpp/src/mip/solve.cu | 2 +- cpp/tests/mip/doc_example_test.cu | 16 +- docs/cuopt/source/conf.py | 1 + .../cuopt_mps_parser/parser.pxd | 2 + .../cuopt_mps_parser/parser_wrapper.pyx | 2 + .../data_model/data_model.pxd | 13 + .../data_model/data_model.py | 32 +++ .../data_model/data_model_wrapper.pyx | 184 +++++++++++- .../cuopt/cuopt/linear_programming/problem.py | 207 +++++++++++--- .../solver/solver_wrapper.pyx | 132 +-------- .../linear_programming/test_python_API.py | 48 ++++ 27 files changed, 1035 insertions(+), 190 deletions(-) create mode 100644 cpp/libmps_parser/include/mps_parser/mps_writer.hpp create mode 100644 cpp/libmps_parser/include/mps_parser/writer.hpp create mode 100644 cpp/libmps_parser/src/mps_writer.cpp create mode 100644 cpp/libmps_parser/src/writer.cpp diff --git a/cpp/include/cuopt/linear_programming/optimization_problem.hpp b/cpp/include/cuopt/linear_programming/optimization_problem.hpp index db61723676..6d9ba5cda7 100644 --- a/cpp/include/cuopt/linear_programming/optimization_problem.hpp +++ b/cpp/include/cuopt/linear_programming/optimization_problem.hpp @@ -304,6 +304,13 @@ class optimization_problem_t { */ void set_row_names(const std::vector& row_names); + /** + * @brief Write the problem to an MPS formatted file + * + * @param[in] mps_file_path Path to the MPS file to write + */ + void write_to_mps(const std::string& mps_file_path); + i_t get_n_variables() const; i_t get_n_constraints() const; i_t get_nnz() const; diff --git a/cpp/include/cuopt/linear_programming/solve.hpp b/cpp/include/cuopt/linear_programming/solve.hpp index 04ee5530c0..11e8f9dcfa 100644 --- a/cpp/include/cuopt/linear_programming/solve.hpp +++ b/cpp/include/cuopt/linear_programming/solve.hpp @@ -25,6 +25,7 @@ #include #include #include +#include #include namespace cuopt::linear_programming { diff --git a/cpp/include/cuopt/linear_programming/utilities/cython_solve.hpp b/cpp/include/cuopt/linear_programming/utilities/cython_solve.hpp index eef185d0d0..46d672cb13 100644 --- a/cpp/include/cuopt/linear_programming/utilities/cython_solve.hpp +++ b/cpp/include/cuopt/linear_programming/utilities/cython_solve.hpp @@ -22,10 +22,10 @@ #include #include #include +#include #include #include - -#include +#include #include #include diff --git a/cpp/libmps_parser/CMakeLists.txt b/cpp/libmps_parser/CMakeLists.txt index d0cbd8a295..af09ebd2cd 100644 --- a/cpp/libmps_parser/CMakeLists.txt +++ b/cpp/libmps_parser/CMakeLists.txt @@ -78,7 +78,9 @@ add_library(mps_parser SHARED src/data_model_view.cpp src/mps_data_model.cpp src/mps_parser.cpp + src/mps_writer.cpp src/parser.cpp + src/writer.cpp src/utilities/cython_mps_parser.cpp ) diff --git a/cpp/libmps_parser/include/mps_parser/data_model_view.hpp b/cpp/libmps_parser/include/mps_parser/data_model_view.hpp index 17f74a6c2c..6b32d4fae3 100644 --- a/cpp/libmps_parser/include/mps_parser/data_model_view.hpp +++ b/cpp/libmps_parser/include/mps_parser/data_model_view.hpp @@ -186,6 +186,20 @@ class data_model_view_t { * @param[in] problem_name Problem name value. */ void set_problem_name(const std::string& problem_name); + /** + * @brief Set the variables names. + * @note Setting before calling the solver is optional. + * + * @param[in] variable_names Variable names values. + */ + void set_variable_names(const std::vector& variables_names); + /** + * @brief Set the row names. + * @note Setting before calling the solver is optional. + * + * @param[in] row_names Row names value. + */ + void set_row_names(const std::vector& row_names); /** * @brief Set the constraints lower bounds. * @note Setting before calling the solver is optional if you set the row type, else it's @@ -350,6 +364,19 @@ class data_model_view_t { */ span get_initial_dual_solution() const noexcept; + /** + * @brief Get the variable names + * + * @return span + */ + const std::vector& get_variable_names() const noexcept; + /** + * @brief Get the row names + * + * @return span + */ + const std::vector& get_row_names() const noexcept; + /** * @brief Get the problem name * @@ -404,6 +431,8 @@ class data_model_view_t { span row_types_; std::string objective_name_; std::string problem_name_; + std::vector variable_names_; + std::vector row_names_; span constraint_lower_bounds_; span constraint_upper_bounds_; diff --git a/cpp/libmps_parser/include/mps_parser/mps_writer.hpp b/cpp/libmps_parser/include/mps_parser/mps_writer.hpp new file mode 100644 index 0000000000..da919a68e9 --- /dev/null +++ b/cpp/libmps_parser/include/mps_parser/mps_writer.hpp @@ -0,0 +1,59 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights + * reserved. SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +#include +#include +#include +#include +#include +#include + +namespace cuopt::mps_parser { + +/** + * @brief Main writer class for MPS files + * + * @tparam f_t data type of the weights and variables + * @tparam i_t data type of the indices + */ +template +class mps_writer_t { + public: + /** + * @brief Ctor. Takes a data model view as input and writes it out as a MPS formatted file + * + * @param[in] problem Data model view to write + * @param[in] file Path to the MPS file to write + */ + mps_writer_t(const data_model_view_t& problem); + + /** + * @brief Writes the problem to an MPS formatted file + * + * @param[in] mps_file_path Path to the MPS file to write + */ + void write(const std::string& mps_file_path); + + private: + const data_model_view_t& problem_; +}; // class mps_writer_t + +} // namespace cuopt::mps_parser diff --git a/cpp/libmps_parser/include/mps_parser/writer.hpp b/cpp/libmps_parser/include/mps_parser/writer.hpp new file mode 100644 index 0000000000..8f193af137 --- /dev/null +++ b/cpp/libmps_parser/include/mps_parser/writer.hpp @@ -0,0 +1,38 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights + * reserved. SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// TODO: we might want to eventually rename libmps_parser to libmps_io +// (or libcuopt_io if we want to support other hypothetical formats) +namespace cuopt::mps_parser { + +/** + * @brief Writes the problem to an MPS formatted file + * + * Read this link http://lpsolve.sourceforge.net/5.5/mps-format.htm for more + * details on both free and fixed MPS format. + * + * @param[in] problem The problem data model view to write + * @param[in] mps_file_path Path to the MPS file to write + */ +template +void write_mps(const data_model_view_t& problem, const std::string& mps_file_path); + +} // namespace cuopt::mps_parser diff --git a/cpp/libmps_parser/src/data_model_view.cpp b/cpp/libmps_parser/src/data_model_view.cpp index efbe1a0f25..25558e37ea 100644 --- a/cpp/libmps_parser/src/data_model_view.cpp +++ b/cpp/libmps_parser/src/data_model_view.cpp @@ -198,6 +198,19 @@ void data_model_view_t::set_problem_name(const std::string& problem_na problem_name_ = problem_name; } +template +void data_model_view_t::set_variable_names( + const std::vector& variables_names) +{ + variable_names_ = variables_names; +} + +template +void data_model_view_t::set_row_names(const std::vector& row_names) +{ + row_names_ = row_names; +} + template span data_model_view_t::get_constraint_matrix_values() const noexcept { @@ -306,6 +319,18 @@ bool data_model_view_t::get_sense() const noexcept return maximize_; } +template +const std::vector& data_model_view_t::get_variable_names() const noexcept +{ + return variable_names_; +} + +template +const std::vector& data_model_view_t::get_row_names() const noexcept +{ + return row_names_; +} + // QPS-specific getter implementations template span data_model_view_t::get_quadratic_objective_values() const noexcept diff --git a/cpp/libmps_parser/src/mps_writer.cpp b/cpp/libmps_parser/src/mps_writer.cpp new file mode 100644 index 0000000000..7eb1a4f42e --- /dev/null +++ b/cpp/libmps_parser/src/mps_writer.cpp @@ -0,0 +1,264 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights + * reserved. SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include + +#include +#include +#include +#include +#include +#include + +namespace cuopt::mps_parser { + +template +mps_writer_t::mps_writer_t(const data_model_view_t& problem) : problem_(problem) +{ +} + +template +void mps_writer_t::write(const std::string& mps_file_path) +{ + std::ofstream mps_file(mps_file_path); + + mps_parser_expects(mps_file.is_open(), + error_type_t::ValidationError, + "Error creating output MPS file! Given path: %s", + mps_file_path.c_str()); + + i_t n_variables = problem_.get_variable_lower_bounds().size(); + i_t n_constraints; + if (problem_.get_constraint_bounds().size() > 0) + n_constraints = problem_.get_constraint_bounds().size(); + else + n_constraints = problem_.get_constraint_lower_bounds().size(); + + std::vector objective_coefficients(problem_.get_objective_coefficients().size()); + std::vector constraint_lower_bounds(n_constraints); + std::vector constraint_upper_bounds(n_constraints); + std::vector constraint_bounds(problem_.get_constraint_bounds().size()); + std::vector variable_lower_bounds(problem_.get_variable_lower_bounds().size()); + std::vector variable_upper_bounds(problem_.get_variable_upper_bounds().size()); + std::vector variable_types(problem_.get_variable_types().size()); + std::vector row_types(problem_.get_row_types().size()); + std::vector constraint_matrix_offsets(problem_.get_constraint_matrix_offsets().size()); + std::vector constraint_matrix_indices(problem_.get_constraint_matrix_indices().size()); + std::vector constraint_matrix_values(problem_.get_constraint_matrix_values().size()); + + std::copy( + problem_.get_objective_coefficients().data(), + problem_.get_objective_coefficients().data() + problem_.get_objective_coefficients().size(), + objective_coefficients.data()); + std::copy(problem_.get_constraint_bounds().data(), + problem_.get_constraint_bounds().data() + problem_.get_constraint_bounds().size(), + constraint_bounds.data()); + std::copy( + problem_.get_variable_lower_bounds().data(), + problem_.get_variable_lower_bounds().data() + problem_.get_variable_lower_bounds().size(), + variable_lower_bounds.data()); + std::copy( + problem_.get_variable_upper_bounds().data(), + problem_.get_variable_upper_bounds().data() + problem_.get_variable_upper_bounds().size(), + variable_upper_bounds.data()); + std::copy(problem_.get_variable_types().data(), + problem_.get_variable_types().data() + problem_.get_variable_types().size(), + variable_types.data()); + std::copy(problem_.get_row_types().data(), + problem_.get_row_types().data() + problem_.get_row_types().size(), + row_types.data()); + std::copy(problem_.get_constraint_matrix_offsets().data(), + problem_.get_constraint_matrix_offsets().data() + + problem_.get_constraint_matrix_offsets().size(), + constraint_matrix_offsets.data()); + std::copy(problem_.get_constraint_matrix_indices().data(), + problem_.get_constraint_matrix_indices().data() + + problem_.get_constraint_matrix_indices().size(), + constraint_matrix_indices.data()); + std::copy( + problem_.get_constraint_matrix_values().data(), + problem_.get_constraint_matrix_values().data() + problem_.get_constraint_matrix_values().size(), + constraint_matrix_values.data()); + + if (problem_.get_constraint_lower_bounds().size() == 0 || + problem_.get_constraint_upper_bounds().size() == 0) { + for (size_t i = 0; i < (size_t)n_constraints; i++) { + constraint_lower_bounds[i] = constraint_bounds[i]; + constraint_upper_bounds[i] = constraint_bounds[i]; + if (row_types[i] == 'L') { + constraint_lower_bounds[i] = -std::numeric_limits::infinity(); + } else if (row_types[i] == 'G') { + constraint_upper_bounds[i] = std::numeric_limits::infinity(); + } + } + } else { + std::copy( + problem_.get_constraint_lower_bounds().data(), + problem_.get_constraint_lower_bounds().data() + problem_.get_constraint_lower_bounds().size(), + constraint_lower_bounds.data()); + std::copy( + problem_.get_constraint_upper_bounds().data(), + problem_.get_constraint_upper_bounds().data() + problem_.get_constraint_upper_bounds().size(), + constraint_upper_bounds.data()); + } + + // save coefficients with full precision + mps_file << std::setprecision(std::numeric_limits::max_digits10); + + // NAME section + mps_file << "NAME " << problem_.get_problem_name() << "\n"; + + if (problem_.get_sense()) { mps_file << "OBJSENSE\n MAXIMIZE\n"; } + + // ROWS section + mps_file << "ROWS\n"; + mps_file << " N " + << (problem_.get_objective_name().empty() ? "OBJ" : problem_.get_objective_name()) + << "\n"; + for (size_t i = 0; i < (size_t)n_constraints; i++) { + std::string row_name = + i < problem_.get_row_names().size() ? problem_.get_row_names()[i] : "R" + std::to_string(i); + char type = 'L'; + if (constraint_lower_bounds[i] == constraint_upper_bounds[i]) + type = 'E'; + else if (std::isinf(constraint_upper_bounds[i])) + type = 'G'; + mps_file << " " << type << " " << row_name << "\n"; + } + + // COLUMNS section + mps_file << "COLUMNS\n"; + + // Keep a single integer section marker by going over constraints twice and writing out + // integral/nonintegral nonzeros ordered map + std::map>> integral_col_nnzs; + std::map>> continuous_col_nnzs; + for (size_t row_id = 0; row_id < (size_t)n_constraints; row_id++) { + for (size_t k = (size_t)constraint_matrix_offsets[row_id]; + k < (size_t)constraint_matrix_offsets[row_id + 1]; + k++) { + size_t var = (size_t)constraint_matrix_indices[k]; + if (variable_types[var] == 'I') { + integral_col_nnzs[var].emplace_back(row_id, constraint_matrix_values[k]); + } else { + continuous_col_nnzs[var].emplace_back(row_id, constraint_matrix_values[k]); + } + } + } + + for (size_t is_integral = 0; is_integral < 2; is_integral++) { + auto& col_map = is_integral ? integral_col_nnzs : continuous_col_nnzs; + if (is_integral) mps_file << " MARK0001 'MARKER' 'INTORG'\n"; + for (auto& [var_id, nnzs] : col_map) { + std::string col_name = var_id < problem_.get_variable_names().size() + ? problem_.get_variable_names()[var_id] + : "C" + std::to_string(var_id); + for (auto& nnz : nnzs) { + std::string row_name = nnz.first < problem_.get_row_names().size() + ? problem_.get_row_names()[nnz.first] + : "R" + std::to_string(nnz.first); + mps_file << " " << col_name << " " << row_name << " " << nnz.second << "\n"; + } + // Write objective coefficients + if (objective_coefficients[var_id] != 0.0) { + mps_file << " " << col_name << " " + << (problem_.get_objective_name().empty() ? "OBJ" : problem_.get_objective_name()) + << " " << objective_coefficients[var_id] << "\n"; + } + } + if (is_integral) mps_file << " MARK0001 'MARKER' 'INTEND'\n"; + } + + // RHS section + mps_file << "RHS\n"; + for (size_t i = 0; i < (size_t)n_constraints; i++) { + std::string row_name = + i < problem_.get_row_names().size() ? problem_.get_row_names()[i] : "R" + std::to_string(i); + + f_t rhs; + if (constraint_bounds.size() > 0) + rhs = constraint_bounds[i]; + else if (std::isinf(constraint_lower_bounds[i])) { + rhs = constraint_upper_bounds[i]; + } else if (std::isinf(constraint_upper_bounds[i])) { + rhs = constraint_lower_bounds[i]; + } else { // RANGES, encode the lower bound + rhs = constraint_lower_bounds[i]; + } + + if (std::isfinite(rhs) && rhs != 0.0) { + mps_file << " RHS1 " << row_name << " " << rhs << "\n"; + } + } + if (std::isfinite(problem_.get_objective_offset()) && problem_.get_objective_offset() != 0.0) { + mps_file << " RHS1 " + << (problem_.get_objective_name().empty() ? "OBJ" : problem_.get_objective_name()) + << " " << -problem_.get_objective_offset() << "\n"; + } + + // RANGES section if needed + bool has_ranges = false; + for (size_t i = 0; i < (size_t)n_constraints; i++) { + if (constraint_lower_bounds[i] != -std::numeric_limits::infinity() && + constraint_upper_bounds[i] != std::numeric_limits::infinity() && + constraint_lower_bounds[i] != constraint_upper_bounds[i]) { + if (!has_ranges) { + mps_file << "RANGES\n"; + has_ranges = true; + } + std::string row_name = "R" + std::to_string(i); + mps_file << " RNG1 " << row_name << " " + << (constraint_upper_bounds[i] - constraint_lower_bounds[i]) << "\n"; + } + } + + // BOUNDS section + mps_file << "BOUNDS\n"; + for (size_t j = 0; j < (size_t)n_variables; j++) { + std::string col_name = j < problem_.get_variable_names().size() + ? problem_.get_variable_names()[j] + : "C" + std::to_string(j); + + if (variable_lower_bounds[j] == -std::numeric_limits::infinity() && + variable_upper_bounds[j] == std::numeric_limits::infinity()) { + mps_file << " FR BOUND1 " << col_name << "\n"; + } else { + if (variable_lower_bounds[j] != 0.0 || objective_coefficients[j] == 0.0 || + variable_types[j] != 'C') { + if (variable_lower_bounds[j] == -std::numeric_limits::infinity()) { + mps_file << " MI BOUND1 " << col_name << "\n"; + } else { + mps_file << " LO BOUND1 " << col_name << " " << variable_lower_bounds[j] << "\n"; + } + } + if (variable_upper_bounds[j] != std::numeric_limits::infinity()) { + mps_file << " UP BOUND1 " << col_name << " " << variable_upper_bounds[j] << "\n"; + } + } + } + + mps_file << "ENDATA\n"; + mps_file.close(); +} + +template class mps_writer_t; +template class mps_writer_t; + +} // namespace cuopt::mps_parser diff --git a/cpp/libmps_parser/src/writer.cpp b/cpp/libmps_parser/src/writer.cpp new file mode 100644 index 0000000000..cf05653c33 --- /dev/null +++ b/cpp/libmps_parser/src/writer.cpp @@ -0,0 +1,36 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION & AFFILIATES. All rights + * reserved. SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include + +namespace cuopt::mps_parser { + +template +void write_mps(const data_model_view_t& problem, const std::string& mps_file_path) +{ + mps_writer_t writer(problem); + writer.write(mps_file_path); +} + +template void write_mps(const data_model_view_t& problem, + const std::string& mps_file_path); +template void write_mps(const data_model_view_t& problem, + const std::string& mps_file_path); + +} // namespace cuopt::mps_parser diff --git a/cpp/src/linear_programming/optimization_problem.cu b/cpp/src/linear_programming/optimization_problem.cu index cdd6cf2931..5847c503bc 100644 --- a/cpp/src/linear_programming/optimization_problem.cu +++ b/cpp/src/linear_programming/optimization_problem.cu @@ -16,10 +16,11 @@ */ #include -#include +#include #include #include +#include #include #include @@ -490,6 +491,98 @@ void optimization_problem_t::set_maximize(bool _maximize) maximize_ = _maximize; } +template +void optimization_problem_t::write_to_mps(const std::string& mps_file_path) +{ + cuopt::mps_parser::data_model_view_t data_model_view; + + // Set optimization sense + data_model_view.set_maximize(get_sense()); + + // Copy to host + auto constraint_matrix_values = cuopt::host_copy(get_constraint_matrix_values()); + auto constraint_matrix_indices = cuopt::host_copy(get_constraint_matrix_indices()); + auto constraint_matrix_offsets = cuopt::host_copy(get_constraint_matrix_offsets()); + auto constraint_bounds = cuopt::host_copy(get_constraint_bounds()); + auto objective_coefficients = cuopt::host_copy(get_objective_coefficients()); + auto variable_lower_bounds = cuopt::host_copy(get_variable_lower_bounds()); + auto variable_upper_bounds = cuopt::host_copy(get_variable_upper_bounds()); + auto constraint_lower_bounds = cuopt::host_copy(get_constraint_lower_bounds()); + auto constraint_upper_bounds = cuopt::host_copy(get_constraint_upper_bounds()); + auto row_types = cuopt::host_copy(get_row_types()); + + // Set constraint matrix in CSR format + if (get_nnz() != 0) { + data_model_view.set_csr_constraint_matrix(constraint_matrix_values.data(), + constraint_matrix_values.size(), + constraint_matrix_indices.data(), + constraint_matrix_indices.size(), + constraint_matrix_offsets.data(), + constraint_matrix_offsets.size()); + } + + // Set constraint bounds (RHS) + if (get_n_constraints() != 0) { + data_model_view.set_constraint_bounds(constraint_bounds.data(), constraint_bounds.size()); + } + + // Set objective coefficients + if (get_n_variables() != 0) { + data_model_view.set_objective_coefficients(objective_coefficients.data(), + objective_coefficients.size()); + } + + // Set objective scaling and offset + data_model_view.set_objective_scaling_factor(get_objective_scaling_factor()); + data_model_view.set_objective_offset(get_objective_offset()); + + // Set variable bounds + if (get_n_variables() != 0) { + data_model_view.set_variable_lower_bounds(variable_lower_bounds.data(), + variable_lower_bounds.size()); + data_model_view.set_variable_upper_bounds(variable_upper_bounds.data(), + variable_upper_bounds.size()); + } + + // Set row types (constraint types) + if (get_row_types().size() != 0) { + data_model_view.set_row_types(row_types.data(), row_types.size()); + } + + // Set constraint bounds (lower and upper) + if (get_constraint_lower_bounds().size() != 0 && get_constraint_upper_bounds().size() != 0) { + data_model_view.set_constraint_lower_bounds(constraint_lower_bounds.data(), + constraint_lower_bounds.size()); + data_model_view.set_constraint_upper_bounds(constraint_upper_bounds.data(), + constraint_upper_bounds.size()); + } + + // Create a temporary vector to hold the converted variable types + std::vector variable_types(get_n_variables()); + // Set variable types (convert from enum to char) + if (get_n_variables() != 0) { + auto enum_variable_types = cuopt::host_copy(get_variable_types()); + + // Convert enum types to char types + for (size_t i = 0; i < variable_types.size(); ++i) { + variable_types[i] = (enum_variable_types[i] == var_t::INTEGER) ? 'I' : 'C'; + } + + data_model_view.set_variable_types(variable_types.data(), variable_types.size()); + } + + // Set problem and variable names if available + if (!get_problem_name().empty()) { data_model_view.set_problem_name(get_problem_name()); } + + if (!get_objective_name().empty()) { data_model_view.set_objective_name(get_objective_name()); } + + if (!get_variable_names().empty()) { data_model_view.set_variable_names(get_variable_names()); } + + if (!get_row_names().empty()) { data_model_view.set_row_names(get_row_names()); } + + cuopt::mps_parser::write_mps(data_model_view, mps_file_path); +} + // NOTE: Explicitly instantiate all types here in order to avoid linker error #if MIP_INSTANTIATE_FLOAT template class optimization_problem_t; diff --git a/cpp/src/linear_programming/solve.cu b/cpp/src/linear_programming/solve.cu index 95a62df2a6..156dd52961 100644 --- a/cpp/src/linear_programming/solve.cu +++ b/cpp/src/linear_programming/solve.cu @@ -619,11 +619,6 @@ optimization_problem_solution_t solve_lp(optimization_problem_t solve_lp(optimization_problem_t #include #include +#include +#include +#include #include #include @@ -106,6 +109,14 @@ data_model_to_optimization_problem( op_problem.set_variable_types(enum_variable_types.data(), enum_variable_types.size()); } + if (data_model->get_variable_names().size() != 0) { + op_problem.set_variable_names(data_model->get_variable_names()); + } + + if (data_model->get_row_names().size() != 0) { + op_problem.set_row_names(data_model->get_row_names()); + } + return op_problem; } diff --git a/cpp/src/mip/CMakeLists.txt b/cpp/src/mip/CMakeLists.txt index 7165b3bb56..8e72ca70eb 100644 --- a/cpp/src/mip/CMakeLists.txt +++ b/cpp/src/mip/CMakeLists.txt @@ -13,12 +13,12 @@ # See the License for the specific language governing permissions and # limitations under the License. + # Files necessary for Linear Programming functionality set(MIP_LP_NECESSARY_FILES ${CMAKE_CURRENT_SOURCE_DIR}/problem/problem.cu ${CMAKE_CURRENT_SOURCE_DIR}/solver_settings.cu ${CMAKE_CURRENT_SOURCE_DIR}/solver_solution.cu - ${CMAKE_CURRENT_SOURCE_DIR}/problem/write_mps.cu ${CMAKE_CURRENT_SOURCE_DIR}/local_search/rounding/simple_rounding.cu ${CMAKE_CURRENT_SOURCE_DIR}/presolve/third_party_presolve.cpp ${CMAKE_CURRENT_SOURCE_DIR}/solution/solution.cu diff --git a/cpp/src/mip/problem/problem.cu b/cpp/src/mip/problem/problem.cu index b237caea95..0e5cf510c4 100644 --- a/cpp/src/mip/problem/problem.cu +++ b/cpp/src/mip/problem/problem.cu @@ -140,6 +140,7 @@ problem_t::problem_t( var_names(problem_.get_variable_names()), row_names(problem_.get_row_names()), objective_name(problem_.get_objective_name()), + objective_offset(problem_.get_objective_offset()), lp_state(*this, problem_.get_handle_ptr()->get_stream()), fixing_helpers(n_constraints, n_variables, handle_ptr) { diff --git a/cpp/src/mip/problem/problem.cuh b/cpp/src/mip/problem/problem.cuh index 9d63e18579..f9c147b891 100644 --- a/cpp/src/mip/problem/problem.cuh +++ b/cpp/src/mip/problem/problem.cuh @@ -112,7 +112,6 @@ class problem_t { void get_host_user_problem( cuopt::linear_programming::dual_simplex::user_problem_t& user_problem) const; - void write_as_mps(const std::string& path); void add_cutting_plane_at_objective(f_t objective); void compute_vars_with_objective_coeffs(); @@ -266,6 +265,7 @@ class problem_t { std::vector row_names{}; /** name of the objective (only a single objective is currently allowed) */ std::string objective_name; + f_t objective_offset; bool is_scaled_{false}; bool preprocess_called{false}; // this LP state keeps the warm start data of some solution of diff --git a/cpp/src/mip/solve.cu b/cpp/src/mip/solve.cu index 24c52b45eb..cef0f3467b 100644 --- a/cpp/src/mip/solve.cu +++ b/cpp/src/mip/solve.cu @@ -221,7 +221,7 @@ mip_solution_t solve_mip(optimization_problem_t& op_problem, } if (settings.user_problem_file != "") { CUOPT_LOG_INFO("Writing user problem to file: %s", settings.user_problem_file.c_str()); - problem.write_as_mps(settings.user_problem_file); + op_problem.write_to_mps(settings.user_problem_file); } // this is for PDLP, i think this should be part of pdlp solver diff --git a/cpp/tests/mip/doc_example_test.cu b/cpp/tests/mip/doc_example_test.cu index 3506eb00d2..e154e8c693 100644 --- a/cpp/tests/mip/doc_example_test.cu +++ b/cpp/tests/mip/doc_example_test.cu @@ -160,12 +160,16 @@ TEST(docs, user_problem_file) // Get solution values const auto& sol_values = solution.get_solution(); // x should be approximately 37 and integer - EXPECT_NEAR(37.0, sol_values.element(0, handle_.get_stream()), 0.1); - EXPECT_NEAR(std::round(sol_values.element(0, handle_.get_stream())), - sol_values.element(0, handle_.get_stream()), - settings.tolerances.integrality_tolerance); // Check x is integer - // y should be approximately 39.5 - EXPECT_NEAR(39.5, sol_values.element(1, handle_.get_stream()), 0.1); + for (int i = 0; i < problem2.get_n_variables(); i++) { + if (problem2.get_variable_names()[i] == "x") { + EXPECT_NEAR(37.0, sol_values.element(i, handle_.get_stream()), 0.1); + EXPECT_NEAR(std::round(sol_values.element(i, handle_.get_stream())), + sol_values.element(i, handle_.get_stream()), + settings.tolerances.integrality_tolerance); // Check x is integer + } else { // y should be approximately 39.5 + EXPECT_NEAR(39.5, sol_values.element(i, handle_.get_stream()), 0.1); + } + } } } // namespace cuopt::linear_programming::test diff --git a/docs/cuopt/source/conf.py b/docs/cuopt/source/conf.py index 517a57da9b..765ec1c968 100644 --- a/docs/cuopt/source/conf.py +++ b/docs/cuopt/source/conf.py @@ -300,6 +300,7 @@ ("py:obj", "cuopt_sh_client.PDLPSolverMode.is_integer"), ("py:obj", "cuopt_sh_client.PDLPSolverMode.bit_count"), ("py:obj", "cuopt_sh_client.PDLPSolverMode.bit_length"), + ("py:obj", "data_model.DataModel.set_data_model_view"), ("c:type", "size_t"), ("c:identifier", "int32_t"), ("c:identifier", "int8_t"), diff --git a/python/cuopt/cuopt/linear_programming/cuopt_mps_parser/parser.pxd b/python/cuopt/cuopt/linear_programming/cuopt_mps_parser/parser.pxd index 3ab51cd7a8..7a99503871 100644 --- a/python/cuopt/cuopt/linear_programming/cuopt_mps_parser/parser.pxd +++ b/python/cuopt/cuopt/linear_programming/cuopt_mps_parser/parser.pxd @@ -44,6 +44,8 @@ cdef extern from "mps_parser/mps_data_model.hpp" namespace "cuopt::mps_parser": vector[string] var_names_ vector[string] row_names_ vector[char] row_types_ + string objective_name_ + string problem_name_ cdef extern from "mps_parser/utilities/cython_mps_parser.hpp" namespace "cuopt::cython": # noqa diff --git a/python/cuopt/cuopt/linear_programming/cuopt_mps_parser/parser_wrapper.pyx b/python/cuopt/cuopt/linear_programming/cuopt_mps_parser/parser_wrapper.pyx index 9b150a7eb4..819c12ceb6 100644 --- a/python/cuopt/cuopt/linear_programming/cuopt_mps_parser/parser_wrapper.pyx +++ b/python/cuopt/cuopt/linear_programming/cuopt_mps_parser/parser_wrapper.pyx @@ -129,5 +129,7 @@ def ParseMps(mps_file_path, fixed_mps_formats): data_model.set_row_types(row_types) data_model.set_variable_names(var_names_) data_model.set_row_names(row_names_) + data_model.set_objective_name(dm_ret.objective_name_.decode()) + data_model.set_problem_name(dm_ret.problem_name_.decode()) return data_model diff --git a/python/cuopt/cuopt/linear_programming/data_model/data_model.pxd b/python/cuopt/cuopt/linear_programming/data_model/data_model.pxd index 8f5911bf03..247d1c53b6 100644 --- a/python/cuopt/cuopt/linear_programming/data_model/data_model.pxd +++ b/python/cuopt/cuopt/linear_programming/data_model/data_model.pxd @@ -20,6 +20,8 @@ # cython: language_level = 3 from libcpp cimport bool +from libcpp.string cimport string +from libcpp.vector cimport vector cdef extern from "mps_parser/data_model_view.hpp" namespace "cuopt::mps_parser" nogil: # noqa @@ -56,3 +58,14 @@ cdef extern from "mps_parser/data_model_view.hpp" namespace "cuopt::mps_parser" i_t size) except + void set_row_types(const char* row_types, i_t size) except + void set_variable_types(const char* var_types, i_t size) except + + void set_variable_names(const vector[string] variables_names) except + + void set_row_names(const vector[string] row_names) except + + void set_problem_name(const string problem_name) except + + void set_objective_name(const string objective_name) except + + + +cdef extern from "mps_parser/writer.hpp" namespace "cuopt::mps_parser" nogil: # noqa + + cdef void write_mps( + const data_model_view_t[int, double] data_model, + const string user_problem_file) except + diff --git a/python/cuopt/cuopt/linear_programming/data_model/data_model.py b/python/cuopt/cuopt/linear_programming/data_model/data_model.py index 0301b3603f..d5e43302de 100644 --- a/python/cuopt/cuopt/linear_programming/data_model/data_model.py +++ b/python/cuopt/cuopt/linear_programming/data_model/data_model.py @@ -411,6 +411,20 @@ def set_row_names(self, row_names): """ super().set_row_names(row_names) + @catch_cuopt_exception + def set_objective_name(self, objective_name): + """ + Set the objective name as string. + """ + super().set_objective_name(objective_name) + + @catch_cuopt_exception + def set_problem_name(self, problem_name): + """ + Set the problem name as string. + """ + super().set_problem_name(problem_name) + @catch_cuopt_exception def set_initial_primal_solution(self, initial_primal_solution): """ @@ -603,3 +617,21 @@ def get_row_names(self): """ return super().get_row_names() + + @catch_cuopt_exception + def get_objective_name(self): + """ + Get the objective name as string. + """ + return super().get_objective_name() + + @catch_cuopt_exception + def get_problem_name(self): + """ + Get the problem name as string. + """ + return super().get_problem_name() + + @catch_cuopt_exception + def writeMPS(self, user_problem_file): + return super().writeMPS(user_problem_file) diff --git a/python/cuopt/cuopt/linear_programming/data_model/data_model_wrapper.pyx b/python/cuopt/cuopt/linear_programming/data_model/data_model_wrapper.pyx index 11f3308bb7..50641d331f 100644 --- a/python/cuopt/cuopt/linear_programming/data_model/data_model_wrapper.pyx +++ b/python/cuopt/cuopt/linear_programming/data_model/data_model_wrapper.pyx @@ -19,12 +19,15 @@ # cython: embedsignature = True # cython: language_level = 3 -from .data_model cimport data_model_view_t +from .data_model cimport data_model_view_t, write_mps import warnings import numpy as np +import cudf + +from libc.stdint cimport uintptr_t from libcpp.memory cimport unique_ptr from libcpp.utility cimport move @@ -42,6 +45,17 @@ def type_cast(np_obj, np_type, name): return np_obj +def get_data_ptr(array): + if isinstance(array, cudf.Series): + return array.__cuda_array_interface__['data'][0] + elif isinstance(array, np.ndarray): + return array.__array_interface__['data'][0] + else: + raise Exception( + "get_data_ptr must be called with cudf.Series or np.ndarray" + ) + + cdef class DataModel: def __init__(self): @@ -133,6 +147,12 @@ cdef class DataModel: def set_row_names(self, row_names): self.row_names = row_names + def set_objective_name(self, objective_name): + self.objective_name = objective_name + + def set_problem_name(self, problem_name): + self.problem_name = problem_name + def get_sense(self): return self.maximize @@ -189,3 +209,165 @@ cdef class DataModel: def get_row_names(self): return self.row_names + + def get_objective_name(self): + return self.objective_name + + def get_problem_name(self): + return self.problem_name + + def set_data_model_view(self): + cdef data_model_view_t[int, double]* c_data_model_view = ( + self.c_data_model_view.get() + ) + + # Set self.fields on the C++ side if set on the Python side + cdef uintptr_t c_A_values = ( + get_data_ptr(self.get_constraint_matrix_values()) + ) + cdef uintptr_t c_A_indices = ( + get_data_ptr(self.get_constraint_matrix_indices()) + ) + cdef uintptr_t c_A_offsets = ( + get_data_ptr(self.get_constraint_matrix_offsets()) + ) + if self.get_constraint_matrix_values().shape[0] != 0 and self.get_constraint_matrix_indices().shape[0] != 0 and self.get_constraint_matrix_offsets().shape[0] != 0: # noqa + c_data_model_view.set_csr_constraint_matrix( + c_A_values, + self.get_constraint_matrix_values().shape[0], + c_A_indices, + self.get_constraint_matrix_indices().shape[0], + c_A_offsets, + self.get_constraint_matrix_offsets().shape[0] + ) + + cdef uintptr_t c_b = ( + get_data_ptr(self.get_constraint_bounds()) + ) + if self.get_constraint_bounds().shape[0] != 0: + c_data_model_view.set_constraint_bounds( + c_b, + self.get_constraint_bounds().shape[0] + ) + + cdef uintptr_t c_c = ( + get_data_ptr(self.get_objective_coefficients()) + ) + if self.get_objective_coefficients().shape[0] != 0: + c_data_model_view.set_objective_coefficients( + c_c, + self.get_objective_coefficients().shape[0] + ) + + c_data_model_view.set_objective_scaling_factor( + self.get_objective_scaling_factor() + ) + c_data_model_view.set_objective_offset( + self.get_objective_offset() + ) + c_data_model_view.set_maximize( self.maximize) + + cdef uintptr_t c_variable_lower_bounds = ( + get_data_ptr(self.get_variable_lower_bounds()) + ) + if self.get_variable_lower_bounds().shape[0] != 0: + c_data_model_view.set_variable_lower_bounds( + c_variable_lower_bounds, + self.get_variable_lower_bounds().shape[0] + ) + + cdef uintptr_t c_variable_upper_bounds = ( + get_data_ptr(self.get_variable_upper_bounds()) + ) + if self.get_variable_upper_bounds().shape[0] != 0: + c_data_model_view.set_variable_upper_bounds( + c_variable_upper_bounds, + self.get_variable_upper_bounds().shape[0] + ) + cdef uintptr_t c_constraint_lower_bounds = ( + get_data_ptr(self.get_constraint_lower_bounds()) + ) + if self.get_constraint_lower_bounds().shape[0] != 0: + c_data_model_view.set_constraint_lower_bounds( + c_constraint_lower_bounds, + self.get_constraint_lower_bounds().shape[0] + ) + cdef uintptr_t c_constraint_upper_bounds = ( + get_data_ptr(self.get_constraint_upper_bounds()) + ) + if self.get_constraint_upper_bounds().shape[0] != 0: + c_data_model_view.set_constraint_upper_bounds( + c_constraint_upper_bounds, + self.get_constraint_upper_bounds().shape[0] + ) + cdef uintptr_t c_row_types = ( + get_data_ptr(self.get_ascii_row_types()) + ) + if self.get_ascii_row_types().shape[0] != 0: + c_data_model_view.set_row_types( + c_row_types, + self.get_ascii_row_types().shape[0] + ) + + cdef uintptr_t c_var_types = ( + get_data_ptr(self.get_variable_types()) + ) + if self.get_variable_types().shape[0] != 0: + c_data_model_view.set_variable_types( + c_var_types, + self.get_variable_types().shape[0] + ) + + cdef vector[string] c_var_names + for s in self.get_variable_names(): + c_var_names.push_back(s.encode()) + + if len(self.get_variable_names()) != 0: + c_data_model_view.set_variable_names( + c_var_names + ) + + cdef vector[string] c_row_names + for s in self.get_row_names(): + c_row_names.push_back(s.encode()) + + if len(self.get_row_names()) != 0: + c_data_model_view.set_row_names( + c_row_names + ) + + if self.get_problem_name(): + c_data_model_view.set_problem_name( + self.get_problem_name().encode() + ) + + if self.get_objective_name(): + c_data_model_view.set_objective_name( + self.get_objective_name().encode() + ) + + # Set initial solution on the C++ side if set on the Python side + cdef uintptr_t c_initial_primal_solution = ( + get_data_ptr(self.get_initial_primal_solution()) + ) + if self.get_initial_primal_solution().shape[0] != 0: + c_data_model_view.set_initial_primal_solution( + c_initial_primal_solution, + self.get_initial_primal_solution().shape[0] + ) + cdef uintptr_t c_initial_dual_solution = ( + get_data_ptr(self.get_initial_dual_solution()) + ) + if self.get_initial_dual_solution().shape[0] != 0: + c_data_model_view.set_initial_dual_solution( + c_initial_dual_solution, + self.get_initial_dual_solution().shape[0] + ) + + def writeMPS(self, user_problem_file): + self.variable_types = type_cast( + self.variable_types, "S1", "variable_types" + ) + self.set_data_model_view() + write_mps(self.c_data_model_view.get()[0], + user_problem_file.encode('utf-8')) diff --git a/python/cuopt/cuopt/linear_programming/problem.py b/python/cuopt/cuopt/linear_programming/problem.py index 1a14e17cf1..9ad77ca58b 100644 --- a/python/cuopt/cuopt/linear_programming/problem.py +++ b/python/cuopt/cuopt/linear_programming/problem.py @@ -13,8 +13,10 @@ # See the License for the specific language governing permissions and # limitations under the License. +import copy from enum import Enum +import cuopt_mps_parser import numpy as np import cuopt.linear_programming.data_model as data_model @@ -699,12 +701,11 @@ def __init__(self, model_name=""): self.Status = -1 self.ObjValue = float("nan") + self.model = None self.solved = False self.rhs = None self.row_sense = None - self.row_pointers = None - self.column_indicies = None - self.values = None + self.constraint_csr_matrix = None self.lower_bound = None self.upper_bound = None self.var_type = None @@ -714,6 +715,124 @@ def __init__(self, mdict): for key, value in mdict.items(): setattr(self, key, value) + def _from_data_model(self, dm): + self.Name = dm.get_problem_name() + obj_coeffs = dm.get_objective_coefficients() + obj_constant = dm.get_objective_offset() + num_vars = len(obj_coeffs) + sense = dm.get_sense() + if sense: + sense = MAXIMIZE + else: + sense = MINIMIZE + v_lb = dm.get_variable_lower_bounds() + v_ub = dm.get_variable_upper_bounds() + v_types = dm.get_variable_types() + v_names = dm.get_variable_names().tolist() + + # Add all Variables and Objective Coefficients + for i in range(num_vars): + v_name = "" + if v_names: + v_name = v_names[i] + self.addVariable(v_lb[i], v_ub[i], vtype=v_types[i], name=v_name) + vars = self.getVariables() + expr = LinearExpression(vars, obj_coeffs, obj_constant) + self.setObjective(expr, sense) + + # Add all Constraints + c_lb = dm.get_constraint_lower_bounds() + c_ub = dm.get_constraint_upper_bounds() + c_b = dm.get_constraint_bounds() + c_names = dm.get_row_names() + offsets = dm.get_constraint_matrix_offsets() + indices = dm.get_constraint_matrix_indices() + values = dm.get_constraint_matrix_values() + + num_constrs = len(offsets) - 1 + for i in range(num_constrs): + start = offsets[i] + end = offsets[i + 1] + c_coeffs = values[start:end] + c_indices = indices[start:end] + c_vars = [vars[j] for j in c_indices] + expr = LinearExpression(c_vars, c_coeffs, 0.0) + if c_lb[i] == c_ub[i]: + self.addConstraint(expr == c_b[i], name=c_names[i]) + elif c_lb[i] == c_b[i]: + self.addConstraint(expr >= c_b[i], name=c_names[i]) + elif c_ub[i] == c_b[i]: + self.addConstraint(expr <= c_b[i], name=c_names[i]) + else: + raise Exception("Couldn't initialize constraints") + + def _to_data_model(self): + # iterate through the constraints and construct the constraint matrix + n = len(self.vars) + self.rhs = [] + self.row_sense = [] + self.row_names = [] + + if self.constraint_csr_matrix is None: + csr_dict = { + "row_pointers": [0], + "column_indices": [], + "values": [], + } + for constr in self.constrs: + csr_dict["column_indices"].extend( + list(constr.vindex_coeff_dict.keys()) + ) + csr_dict["values"].extend( + list(constr.vindex_coeff_dict.values()) + ) + csr_dict["row_pointers"].append( + len(csr_dict["column_indices"]) + ) + self.rhs.append(constr.RHS) + self.row_sense.append(constr.Sense) + self.row_names.append(constr.ConstraintName) + self.constraint_csr_matrix = csr_dict + + else: + for constr in self.constrs: + self.rhs.append(constr.RHS) + self.row_sense.append(constr.Sense) + + self.objective = np.zeros(n) + self.lower_bound, self.upper_bound = np.zeros(n), np.zeros(n) + self.var_type = np.empty(n, dtype="S1") + self.var_names = [] + + for j in range(n): + self.objective[j] = self.vars[j].getObjectiveCoefficient() + self.var_type[j] = self.vars[j].getVariableType() + self.lower_bound[j] = self.vars[j].getLowerBound() + self.upper_bound[j] = self.vars[j].getUpperBound() + self.var_names.append(self.vars[j].VariableName) + + # Initialize datamodel + dm = data_model.DataModel() + dm.set_csr_constraint_matrix( + np.array(self.constraint_csr_matrix["values"]), + np.array(self.constraint_csr_matrix["column_indices"]), + np.array(self.constraint_csr_matrix["row_pointers"]), + ) + if self.ObjSense == -1: + dm.set_maximize(True) + dm.set_constraint_bounds(np.array(self.rhs)) + dm.set_row_types(np.array(self.row_sense, dtype="S1")) + dm.set_objective_coefficients(self.objective) + dm.set_objective_offset(self.ObjConstant) + dm.set_variable_lower_bounds(self.lower_bound) + dm.set_variable_upper_bounds(self.upper_bound) + dm.set_variable_types(self.var_type) + dm.set_variable_names(self.var_names) + dm.set_row_names(self.row_names) + dm.set_problem_name(self.Name) + + self.model = dm + def reset_solved_values(self): # Resets all post solve values for var in self.vars: @@ -724,6 +843,8 @@ def reset_solved_values(self): constr.Slack = float("nan") constr.DualValue = float("nan") + self.model = None + self.constraint_csr_matrix = None self.ObjValue = float("nan") self.solved = False @@ -823,7 +944,7 @@ def setObjective(self, expr, sense=MINIMIZE): case int() | float(): for var in self.vars: var.setObjectiveCoefficient(0.0) - self.ObjCon = float(expr) + self.ObjConstant = float(expr) case Variable(): for var in self.vars: var.setObjectiveCoefficient(0.0) @@ -832,6 +953,7 @@ def setObjective(self, expr, sense=MINIMIZE): case LinearExpression(): for var, coeff in expr.zipVarCoefficients(): self.vars[var.getIndex()].setObjectiveCoefficient(coeff) + self.ObjConstant = expr.getConstant() case _: raise ValueError( "Objective must be a LinearExpression or a constant" @@ -856,6 +978,22 @@ def getConstraints(self): """ return self.constrs + @classmethod + def readMPS(cls, mps_file): + """ + Initiliaze a problem from an MPS file. + """ + problem = cls() + data_model = cuopt_mps_parser.ParseMps(mps_file) + problem._from_data_model(data_model) + problem.model = data_model + return problem + + def writeMPS(self, mps_file): + if self.model is None: + self._to_data_model() + self.model.writeMPS(mps_file) + @property def NumVariables(self): # Returns number of variables in the problem @@ -887,6 +1025,8 @@ def getCSR(self): Computes and returns the CSR representation of the constraint matrix. """ + if self.constraint_csr_matrix is not None: + return self.dict_to_object(self.constraint_csr_matrix) csr_dict = {"row_pointers": [0], "column_indices": [], "values": []} for constr in self.constrs: csr_dict["column_indices"].extend( @@ -894,6 +1034,7 @@ def getCSR(self): ) csr_dict["values"].extend(list(constr.vindex_coeff_dict.values())) csr_dict["row_pointers"].append(len(csr_dict["column_indices"])) + self.constraint_csr_matrix = csr_dict return self.dict_to_object(csr_dict) def get_incumbent_values(self, solution, vars): @@ -905,7 +1046,18 @@ def get_incumbent_values(self, solution, vars): values.append(solution[var.index]) return values - def post_solve(self, solution): + def relax(self): + """ + Relax a MIP problem into an LP problem and return the relaxed model. + """ + self.reset_solved_values() + relaxed_problem = copy.deepcopy(self) + vars = relaxed_problem.getVariables() + for v in vars: + v.VariableType = CONTINUOUS + return relaxed_problem + + def populate_solution(self, solution): self.Status = solution.get_termination_status() self.SolveTime = solution.get_solve_time() @@ -950,48 +1102,11 @@ def solve(self, settings=solver_settings.SolverSettings()): >>> problem.solve() """ - # iterate through the constraints and construct the constraint matrix - n = len(self.vars) - self.row_pointers = [0] - self.column_indicies = [] - self.values = [] - self.rhs = [] - self.row_sense = [] - for constr in self.constrs: - self.column_indicies.extend(list(constr.vindex_coeff_dict.keys())) - self.values.extend(list(constr.vindex_coeff_dict.values())) - self.row_pointers.append(len(self.column_indicies)) - self.rhs.append(constr.RHS) - self.row_sense.append(constr.Sense) - - self.objective = np.zeros(n) - self.lower_bound, self.upper_bound = np.zeros(n), np.zeros(n) - self.var_type = np.empty(n, dtype="S1") - - for j in range(n): - self.objective[j] = self.vars[j].getObjectiveCoefficient() - self.var_type[j] = self.vars[j].getVariableType() - self.lower_bound[j] = self.vars[j].getLowerBound() - self.upper_bound[j] = self.vars[j].getUpperBound() - - # Initialize datamodel - dm = data_model.DataModel() - dm.set_csr_constraint_matrix( - np.array(self.values), - np.array(self.column_indicies), - np.array(self.row_pointers), - ) - if self.ObjSense == -1: - dm.set_maximize(True) - dm.set_constraint_bounds(np.array(self.rhs)) - dm.set_row_types(np.array(self.row_sense, dtype="S1")) - dm.set_objective_coefficients(self.objective) - dm.set_variable_lower_bounds(self.lower_bound) - dm.set_variable_upper_bounds(self.upper_bound) - dm.set_variable_types(self.var_type) + if self.model is None: + self._to_data_model() # Call Solver - solution = solver.Solve(dm, settings) + solution = solver.Solve(self.model, settings) # Post Solve - self.post_solve(solution) + self.populate_solution(solution) diff --git a/python/cuopt/cuopt/linear_programming/solver/solver_wrapper.pyx b/python/cuopt/cuopt/linear_programming/solver/solver_wrapper.pyx index 02782b8f9b..a468d57ae1 100644 --- a/python/cuopt/cuopt/linear_programming/solver/solver_wrapper.pyx +++ b/python/cuopt/cuopt/linear_programming/solver/solver_wrapper.pyx @@ -154,127 +154,6 @@ def type_cast(cudf_obj, np_type, name): return cudf_obj -cdef set_data_model_view(DataModel data_model_obj): - cdef data_model_view_t[int, double]* c_data_model_view = ( - data_model_obj.c_data_model_view.get() - ) - - # Set data_model_obj fields on the C++ side if set on the Python side - cdef uintptr_t c_A_values = ( - get_data_ptr(data_model_obj.get_constraint_matrix_values()) - ) - cdef uintptr_t c_A_indices = ( - get_data_ptr(data_model_obj.get_constraint_matrix_indices()) - ) - cdef uintptr_t c_A_offsets = ( - get_data_ptr(data_model_obj.get_constraint_matrix_offsets()) - ) - if data_model_obj.get_constraint_matrix_values().shape[0] != 0 and data_model_obj.get_constraint_matrix_indices().shape[0] != 0 and data_model_obj.get_constraint_matrix_offsets().shape[0] != 0: # noqa - c_data_model_view.set_csr_constraint_matrix( - c_A_values, - data_model_obj.get_constraint_matrix_values().shape[0], - c_A_indices, - data_model_obj.get_constraint_matrix_indices().shape[0], - c_A_offsets, - data_model_obj.get_constraint_matrix_offsets().shape[0] - ) - - cdef uintptr_t c_b = ( - get_data_ptr(data_model_obj.get_constraint_bounds()) - ) - if data_model_obj.get_constraint_bounds().shape[0] != 0: - c_data_model_view.set_constraint_bounds( - c_b, - data_model_obj.get_constraint_bounds().shape[0] - ) - - cdef uintptr_t c_c = ( - get_data_ptr(data_model_obj.get_objective_coefficients()) - ) - if data_model_obj.get_objective_coefficients().shape[0] != 0: - c_data_model_view.set_objective_coefficients( - c_c, - data_model_obj.get_objective_coefficients().shape[0] - ) - - c_data_model_view.set_objective_scaling_factor( - data_model_obj.get_objective_scaling_factor() - ) - c_data_model_view.set_objective_offset( - data_model_obj.get_objective_offset() - ) - c_data_model_view.set_maximize( data_model_obj.maximize) - - cdef uintptr_t c_variable_lower_bounds = ( - get_data_ptr(data_model_obj.get_variable_lower_bounds()) - ) - if data_model_obj.get_variable_lower_bounds().shape[0] != 0: - c_data_model_view.set_variable_lower_bounds( - c_variable_lower_bounds, - data_model_obj.get_variable_lower_bounds().shape[0] - ) - - cdef uintptr_t c_variable_upper_bounds = ( - get_data_ptr(data_model_obj.get_variable_upper_bounds()) - ) - if data_model_obj.get_variable_upper_bounds().shape[0] != 0: - c_data_model_view.set_variable_upper_bounds( - c_variable_upper_bounds, - data_model_obj.get_variable_upper_bounds().shape[0] - ) - cdef uintptr_t c_constraint_lower_bounds = ( - get_data_ptr(data_model_obj.get_constraint_lower_bounds()) - ) - if data_model_obj.get_constraint_lower_bounds().shape[0] != 0: - c_data_model_view.set_constraint_lower_bounds( - c_constraint_lower_bounds, - data_model_obj.get_constraint_lower_bounds().shape[0] - ) - cdef uintptr_t c_constraint_upper_bounds = ( - get_data_ptr(data_model_obj.get_constraint_upper_bounds()) - ) - if data_model_obj.get_constraint_upper_bounds().shape[0] != 0: - c_data_model_view.set_constraint_upper_bounds( - c_constraint_upper_bounds, - data_model_obj.get_constraint_upper_bounds().shape[0] - ) - cdef uintptr_t c_row_types = ( - get_data_ptr(data_model_obj.get_ascii_row_types()) - ) - if data_model_obj.get_ascii_row_types().shape[0] != 0: - c_data_model_view.set_row_types( - c_row_types, - data_model_obj.get_ascii_row_types().shape[0] - ) - - cdef uintptr_t c_var_types = ( - get_data_ptr(data_model_obj.get_variable_types()) - ) - if data_model_obj.get_variable_types().shape[0] != 0: - c_data_model_view.set_variable_types( - c_var_types, - data_model_obj.get_variable_types().shape[0] - ) - - # Set initial solution on the C++ side if set on the Python side - cdef uintptr_t c_initial_primal_solution = ( - get_data_ptr(data_model_obj.get_initial_primal_solution()) - ) - if data_model_obj.get_initial_primal_solution().shape[0] != 0: - c_data_model_view.set_initial_primal_solution( - c_initial_primal_solution, - data_model_obj.get_initial_primal_solution().shape[0] - ) - cdef uintptr_t c_initial_dual_solution = ( - get_data_ptr(data_model_obj.get_initial_dual_solution()) - ) - if data_model_obj.get_initial_dual_solution().shape[0] != 0: - c_data_model_view.set_initial_dual_solution( - c_initial_dual_solution, - data_model_obj.get_initial_dual_solution().shape[0] - ) - - cdef set_solver_setting( unique_ptr[solver_settings_t[int, double]]& unique_solver_settings, settings, @@ -675,7 +554,7 @@ def Solve(py_data_model_obj, settings, mip=False): set_solver_setting( unique_solver_settings, settings, data_model_obj, mip ) - set_data_model_view(data_model_obj) + data_model_obj.set_data_model_view() return create_solution(move(call_solve( data_model_obj.c_data_model_view.get(), @@ -683,8 +562,10 @@ def Solve(py_data_model_obj, settings, mip=False): )), data_model_obj) -cdef insert_vector(DataModel data_model_obj, - vector[data_model_view_t[int, double] *]& data_model_views): +cdef set_and_insert_vector( + DataModel data_model_obj, + vector[data_model_view_t[int, double] *]& data_model_views): + data_model_obj.set_data_model_view() data_model_views.push_back(data_model_obj.c_data_model_view.get()) @@ -699,8 +580,7 @@ def BatchSolve(py_data_model_list, settings): cdef vector[data_model_view_t[int, double] *] data_model_views for data_model_obj in py_data_model_list: - set_data_model_view(data_model_obj) - insert_vector(data_model_obj, data_model_views) + set_and_insert_vector(data_model_obj, data_model_views) cdef pair[ vector[unique_ptr[solver_ret_t]], diff --git a/python/cuopt/cuopt/tests/linear_programming/test_python_API.py b/python/cuopt/cuopt/tests/linear_programming/test_python_API.py index 132920a865..1cc4993d29 100644 --- a/python/cuopt/cuopt/tests/linear_programming/test_python_API.py +++ b/python/cuopt/cuopt/tests/linear_programming/test_python_API.py @@ -26,6 +26,7 @@ CONTINUOUS, INTEGER, MAXIMIZE, + MINIMIZE, CType, Problem, VType, @@ -265,6 +266,53 @@ def test_constraint_matrix(): assert rhs == exp_rhs +def test_read_write_mps_and_relaxation(): + + # Create MIP model + m = Problem("SMALLMIP") + + # Vars: continuous, nonnegative by default + x1 = m.addVariable(name="x1", lb=0.0, vtype=INTEGER) + x2 = m.addVariable(name="x2", lb=0.0, ub=4.0, vtype=INTEGER) + x3 = m.addVariable(name="x3", lb=0.0, ub=6.0, vtype=INTEGER) + x4 = m.addVariable(name="x4", lb=0.0, vtype=INTEGER) + x5 = m.addVariable(name="x5", lb=0.0, vtype=INTEGER) + + # Objective (minimize) + m.setObjective(2 * x1 + 3 * x2 + x3 + 1 * x4 + 4 * x5, MINIMIZE) + + # Constraints (5 total) + m.addConstraint(x1 + x2 + x3 <= 10, name="c1") + m.addConstraint(2 * x1 + x3 - x4 >= 3, name="c2") + m.addConstraint(x2 + 3 * x5 == 7, name="c3") + m.addConstraint(x4 + x5 <= 8, name="c4") + m.addConstraint(x1 + x2 + x3 + x4 + x5 >= 5, name="c5") + + # Write MPS + m.writeMPS("small_mip.mps") + + # Read MPS and solve + prob = Problem.readMPS("small_mip.mps") + assert prob.Name == "SMALLMIP" + assert prob.IsMIP + prob.solve() + + expected_values_mip = [1.0, 1.0, 1.0, 0.0, 2.0] + assert prob.Status.name == "Optimal" + for i, v in enumerate(prob.getVariables()): + assert v.getValue() == pytest.approx(expected_values_mip[i]) + + # Relax the Problem into LP and solve + lp_prob = prob.relax() + assert not lp_prob.IsMIP + lp_prob.solve() + + expected_values_lp = [0.33333333, 0.0, 2.33333333, 0.0, 2.33333333] + assert lp_prob.Status.name == "Optimal" + for i, v in enumerate(lp_prob.getVariables()): + assert v.getValue() == pytest.approx(expected_values_lp[i]) + + def test_incumbent_solutions(): # Callback for incumbent solution From 3d4a42c39562a3f79c05aee246bbce16e350b29f Mon Sep 17 00:00:00 2001 From: Alice Boucher <160623740+aliceb-nv@users.noreply.github.com> Date: Wed, 17 Sep 2025 09:36:58 +0200 Subject: [PATCH 28/33] Remove limiting_resource_adaptor leftover (#398) The previous VRAM fix PR left a limiting_resource_adaptor in the code limiting runs on multiple GPUs to 6GB of VRAM. This was not meant to be part of the push, and this PR fixes this mistake. Authors: - Alice Boucher (https://github.com/aliceb-nv) Approvers: - Nicolas L. Guidotti (https://github.com/nguidotti) URL: https://github.com/NVIDIA/cuopt/pull/398 --- benchmarks/linear_programming/cuopt/run_mip.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/benchmarks/linear_programming/cuopt/run_mip.cpp b/benchmarks/linear_programming/cuopt/run_mip.cpp index fab2eea90e..41dbd9aa37 100644 --- a/benchmarks/linear_programming/cuopt/run_mip.cpp +++ b/benchmarks/linear_programming/cuopt/run_mip.cpp @@ -259,9 +259,7 @@ void run_single_file_mp(std::string file_path, { std::cout << "running file " << file_path << " on gpu : " << device << std::endl; auto memory_resource = make_async(); - auto limiting_adaptor = - rmm::mr::limiting_resource_adaptor(memory_resource.get(), 6ULL * 1024ULL * 1024ULL * 1024ULL); - rmm::mr::set_current_device_resource(&limiting_adaptor); + rmm::mr::set_current_device_resource(memory_resource.get()); int sol_found = run_single_file(file_path, device, batch_id, From 5ce1e14c2ee51559e508b61112e3f93add74705c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Akif=20=C3=87=C3=96RD=C3=9CK?= Date: Wed, 17 Sep 2025 14:54:45 +0200 Subject: [PATCH 29/33] Add sanitizer build option (#385) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR adds sanitizer build option. To use sanitizer with cuda runtime, one must follow a few steps: 1. Find libasan.so with "gcc -print-file-name=libasan.so" 2. Run the binary with env var set: LD_PRELOAD=$PATH_TO_LIBASAN ASAN_OPTIONS='protect_shadow_gap=0:replace_intrin=0' 3. (Optional) To run with a debugger (gdb or cuda-gdb) use the additional ASAN option alloc_dealloc_mismatch=0 Authors: - Akif ÇÖRDÜK (https://github.com/akifcorduk) Approvers: - Hugo Linsenmaier (https://github.com/hlinsen) - Trevor McKay (https://github.com/tmckayus) URL: https://github.com/NVIDIA/cuopt/pull/385 --- build.sh | 8 +++++++- cpp/CMakeLists.txt | 9 +++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/build.sh b/build.sh index 15367eb3ee..a5f57f9c9a 100755 --- a/build.sh +++ b/build.sh @@ -27,7 +27,7 @@ REPODIR=$(cd "$(dirname "$0")"; pwd) LIBCUOPT_BUILD_DIR=${LIBCUOPT_BUILD_DIR:=${REPODIR}/cpp/build} LIBMPS_PARSER_BUILD_DIR=${LIBMPS_PARSER_BUILD_DIR:=${REPODIR}/cpp/libmps_parser/build} -VALIDARGS="clean libcuopt libmps_parser cuopt_mps_parser cuopt cuopt_server cuopt_sh_client docs deb -a -b -g -v -l= --verbose-pdlp --build-lp-only --no-fetch-rapids --skip-c-python-adapters --skip-tests-build --skip-routing-build --skip-fatbin-write [--cmake-args=\\\"\\\"] [--cache-tool=] -n --allgpuarch --ci-only-arch --show_depr_warn -h --help" +VALIDARGS="clean libcuopt libmps_parser cuopt_mps_parser cuopt cuopt_server cuopt_sh_client docs deb -a -b -g -fsanitize -v -l= --verbose-pdlp --build-lp-only --no-fetch-rapids --skip-c-python-adapters --skip-tests-build --skip-routing-build --skip-fatbin-write [--cmake-args=\\\"\\\"] [--cache-tool=] -n --allgpuarch --ci-only-arch --show_depr_warn -h --help" HELP="$0 [ ...] [ ...] where is: clean - remove all existing build artifacts and configuration (start over) @@ -44,6 +44,7 @@ HELP="$0 [ ...] [ ...] -g - build for debug -a - Enable assertion (by default in debug mode) -b - Build with benchmark settings + -fsanitize - Build with sanitizer -n - no install step --no-fetch-rapids - don't fetch rapids dependencies -l= - log level. Options are: TRACE | DEBUG | INFO | WARN | ERROR | CRITICAL | OFF. Default=INFO @@ -85,6 +86,7 @@ BUILD_DISABLE_DEPRECATION_WARNING=ON BUILD_ALL_GPU_ARCH=0 BUILD_CI_ONLY=0 BUILD_LP_ONLY=0 +BUILD_SANITIZER=0 SKIP_C_PYTHON_ADAPTERS=0 SKIP_TESTS_BUILD=0 SKIP_ROUTING_BUILD=0 @@ -235,6 +237,9 @@ if hasArg --build-lp-only; then BUILD_LP_ONLY=1 SKIP_ROUTING_BUILD=1 # Automatically skip routing when building LP-only fi +if hasArg -fsanitize; then + BUILD_SANITIZER=1 +fi if hasArg --skip-c-python-adapters; then SKIP_C_PYTHON_ADAPTERS=1 fi @@ -345,6 +350,7 @@ if buildAll || hasArg libcuopt; then -DCMAKE_BUILD_TYPE=${BUILD_TYPE} \ -DFETCH_RAPIDS=${FETCH_RAPIDS} \ -DBUILD_LP_ONLY=${BUILD_LP_ONLY} \ + -DBUILD_SANITIZER=${BUILD_SANITIZER} \ -DSKIP_C_PYTHON_ADAPTERS=${SKIP_C_PYTHON_ADAPTERS} \ -DBUILD_TESTS=$((1 - ${SKIP_TESTS_BUILD})) \ -DSKIP_ROUTING_BUILD=${SKIP_ROUTING_BUILD} \ diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index a7897e7df8..df4ad41465 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -84,6 +84,15 @@ if(CMAKE_COMPILER_IS_GNUCXX) list(APPEND CUOPT_CXX_FLAGS -Werror -Wno-error=deprecated-declarations) endif(CMAKE_COMPILER_IS_GNUCXX) +# To use sanitizer with cuda runtime, one must follow a few steps: +# 1. Find libasan.so with "gcc -print-file-name=libasan.so" +# 2. Run the binary with env var set: LD_PRELOAD=$PATH_TO_LIBASAN ASAN_OPTIONS='protect_shadow_gap=0:replace_intrin=0' +# 3. (Optional) To run with a debugger (gdb or cuda-gdb) use the additional ASAN option alloc_dealloc_mismatch=0 +if(BUILD_SANITIZER) + list(APPEND CUOPT_CXX_FLAGS -fsanitize=address,undefined -fno-omit-frame-pointer -g -Wno-error=maybe-uninitialized) + add_link_options(-fsanitize=address,undefined) +endif(BUILD_SANITIZER) + if(DEFINE_ASSERT) add_definitions(-DASSERT_MODE) endif(DEFINE_ASSERT) From c80d730045b2453c2acd36b890b3233d0e50d460 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Akif=20=C3=87=C3=96RD=C3=9CK?= Date: Thu, 18 Sep 2025 10:42:09 +0200 Subject: [PATCH 30/33] Heuristic Improvements: balance between generation and improvement heuristics (#382) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR changes the heuristic structure by creating a natural balance between generation and improvement. The FP/FJ loop now adds solution to the population and only if we have enough diverse solutions we exit the loop and execute the population improvement. The diversity is increased to `sqrt(n_integers)`. The recombiners are run between the current best and all other solutions in the current population, if stagnation is detected in FP/FJ loop and then the loop continues. The bounds prop rounding in the context of FP is also improved. When the dual simplex solution is set, the pdlp is warm started now with both primal and dual solutions. The default tolerance is now 1e-6 absolute tolerance and 1e-12 relative tolerance. This PR includes bug fixes on: - Apperance of inf/nan on `z` vector dual simplex phase2. - Invalid launch dimensions on FJ and hash kernels. - Timer diff and function time limit issues when the solver is run with unlimited time limit. Benchmark results in 10 mins run on H100: - Main branch: 207 feasible solutions and average gap: '28.54', 3 unfinished/crashed - This PR: 213 feasible and average gap: '23.11', 1 unfinished/crushed. (The PR didn't have any crash before merge with main branch) closes #142 closes #374 closes #218 Authors: - Akif ÇÖRDÜK (https://github.com/akifcorduk) Approvers: - Ramakrishnap (https://github.com/rgsl888prabhu) - Alice Boucher (https://github.com/aliceb-nv) URL: https://github.com/NVIDIA/cuopt/pull/382 --- .../cuopt/mip_test_instances.hpp | 180 +------------- .../linear_programming/cuopt/run_mip.cpp | 1 + .../mip/solver_settings.hpp | 4 +- cpp/include/cuopt/logger.hpp | 4 +- .../bound_flipping_ratio_test.cpp | 6 + cpp/src/dual_simplex/branch_and_bound.cpp | 21 +- cpp/src/dual_simplex/phase2.cpp | 25 +- cpp/src/dual_simplex/presolve.cpp | 1 + .../dual_simplex/simplex_solver_settings.hpp | 2 +- .../initial_scaling.cu | 6 +- .../initial_scaling.cuh | 4 +- cpp/src/linear_programming/pdlp.cu | 4 +- .../utilities/logger_init.hpp | 2 +- cpp/src/mip/diversity/assignment_hash_map.cu | 3 +- cpp/src/mip/diversity/diversity_config.hpp | 4 +- cpp/src/mip/diversity/diversity_manager.cu | 234 +++++++----------- cpp/src/mip/diversity/diversity_manager.cuh | 22 +- cpp/src/mip/diversity/population.cu | 37 +-- cpp/src/mip/diversity/population.cuh | 15 +- .../diversity/recombiners/fp_recombiner.cuh | 10 +- .../mip/diversity/recombiners/recombiner.cuh | 3 +- .../recombiners/recombiner_stats.hpp | 5 + cpp/src/mip/diversity/recombiners/sub_mip.cuh | 15 ++ .../mip/feasibility_jump/feasibility_jump.cu | 10 +- .../mip/feasibility_jump/feasibility_jump.cuh | 16 +- .../feasibility_pump/feasibility_pump.cu | 34 +-- .../feasibility_pump/feasibility_pump.cuh | 2 - cpp/src/mip/local_search/local_search.cu | 224 +++++++++++------ cpp/src/mip/local_search/local_search.cuh | 21 +- .../local_search/rounding/constraint_prop.cu | 67 +++-- .../local_search/rounding/constraint_prop.cuh | 5 +- .../load_balanced_bounds_presolve_helpers.cuh | 2 +- cpp/src/mip/presolve/probing_cache.cu | 2 +- cpp/src/mip/presolve/trivial_presolve.cuh | 8 +- cpp/src/mip/problem/problem.cu | 2 +- cpp/src/mip/relaxed_lp/lp_state.cuh | 12 +- cpp/src/mip/relaxed_lp/relaxed_lp.cu | 6 +- cpp/src/mip/relaxed_lp/relaxed_lp.cuh | 3 +- cpp/src/mip/solve.cu | 16 +- cpp/src/mip/solver.cu | 9 +- cpp/tests/mip/bounds_standardization_test.cu | 5 +- cpp/tests/mip/elim_var_remap_test.cu | 10 +- cpp/tests/mip/load_balancing_test.cu | 2 +- cpp/tests/mip/multi_probe_test.cu | 3 +- docs/cuopt/source/lp-milp-settings.rst | 4 +- 45 files changed, 522 insertions(+), 549 deletions(-) diff --git a/benchmarks/linear_programming/cuopt/mip_test_instances.hpp b/benchmarks/linear_programming/cuopt/mip_test_instances.hpp index ac7c673590..22be74dadc 100644 --- a/benchmarks/linear_programming/cuopt/mip_test_instances.hpp +++ b/benchmarks/linear_programming/cuopt/mip_test_instances.hpp @@ -17,175 +17,11 @@ #pragma once #include #include -std::vector instances = {"30n20b8.mps", - "50v-10.mps", - "CMS750_4.mps", - "academictimetablesmall.mps", - "air05.mps", - "app1-1.mps", - "app1-2.mps", - "assign1-5-8.mps", - "atlanta-ip.mps", - "bab2.mps", - "bab6.mps", - "beasleyC3.mps", - "binkar10_1.mps", - "blp-ar98.mps", - "blp-ic98.mps", - "bppc4-08.mps", - "brazil3.mps", - "cmflsp50-24-8-8.mps", - "co-100.mps", - "cod105.mps", - "comp07-2idx.mps", - "comp21-2idx.mps", - "csched007.mps", - "csched008.mps", - "cvs16r128-89.mps", - "dano3_3.mps", - "decomp2.mps", - "drayage-100-23.mps", - "drayage-25-23.mps", - "eil33-2.mps", - "eilA101-2.mps", - "exp-1-500-5-5.mps", - "fast0507.mps", - "fastxgemm-n2r6s0t2.mps", - "fiball.mps", - "gen-ip002.mps", - "germanrr.mps", - "glass4.mps", - "graph20-20-1rand.mps", - "graphdraw-domain.mps", - "h80x6320d.mps", - "highschool1-aigio.mps", - "hypothyroid-k1.mps", - "icir97_tension.mps", - "irish-electricity.mps", - "istanbul-no-cutoff.mps", - "k1mushroom.mps", - "lectsched-5-obj.mps", - "leo1.mps", - "leo2.mps", - "lotsize.mps", - "mad.mps", - "map10.mps", - "map16715-04.mps", - "markshare2.mps", - "markshare_4_0.mps", - "mas74.mps", - "mc11.mps", - "mcsched.mps", - "mik-250-20-75-4.mps", - "momentum1.mps", - "mushroom-best.mps", - "mzzv11.mps", - "mzzv42z.mps", - "n2seq36q.mps", - "n3div36.mps", - "neos-1171448.mps", - "neos-1171737.mps", - "neos-1354092.mps", - "neos-1445765.mps", - "neos-1456979.mps", - "neos-1582420.mps", - "neos-2657525-crna.mps", - "neos-2746589-doon.mps", - "neos-3024952-loue.mps", - "neos-3046615-murg.mps", - "neos-3216931-puriri.mps", - "neos-3402294-bobin.mps", - "neos-3656078-kumeu.mps", - "neos-3754480-nidda.mps", - "neos-4300652-rahue.mps", - "neos-4338804-snowy.mps", - "neos-4387871-tavua.mps", - "neos-4413714-turia.mps", - "neos-4532248-waihi.mps", - "neos-4722843-widden.mps", - "neos-4738912-atrato.mps", - "neos-4763324-toguru.mps", - "neos-4954672-berkel.mps", - "neos-5049753-cuanza.mps", - "neos-5093327-huahum.mps", - "neos-5107597-kakapo.mps", - "neos-5114902-kasavu.mps", - "neos-5188808-nattai.mps", - "neos-5195221-niemur.mps", - "neos-662469.mps", - "neos-787933.mps", - "neos-848589.mps", - "neos-860300.mps", - "neos-911970.mps", - "neos-933966.mps", - "neos-950242.mps", - "neos17.mps", - "neos5.mps", - "net12.mps", - "netdiversion.mps", - "nexp-150-20-8-5.mps", - "ns1644855.mps", - "ns1760995.mps", - "ns1830653.mps", - "nursesched-medium-hint03.mps", - "nursesched-sprint02.mps", - "opm2-z10-s4.mps", - "pg.mps", - "physiciansched3-3.mps", - "piperout-08.mps", - "piperout-27.mps", - "pk1.mps", - "qap10.mps", - "radiationm18-12-05.mps", - "radiationm40-10-02.mps", - "rail01.mps", - "rail02.mps", - "rail507.mps", - "ran14x18-disj-8.mps", - "rmatr100-p10.mps", - "rmatr200-p5.mps", - "rocI-4-11.mps", - "rocII-5-11.mps", - "rococoB10-011000.mps", - "rococoC10-001000.mps", - "roi2alpha3n4.mps", - "roi5alpha10n8.mps", - "roll3000.mps", - "s100.mps", - "s250r10.mps", - "satellites2-40.mps", - "satellites2-60-fs.mps", - "savsched1.mps", - "sct2.mps", - "seymour.mps", - "seymour1.mps", - "sing326.mps", - "sing44.mps", - "sorrell3.mps", - "sp97ar.mps", - "sp98ar.mps", - "splice1k1.mps", - "square41.mps", - "square47.mps", - "supportcase10.mps", - "supportcase12.mps", - "supportcase18.mps", - "supportcase26.mps", - "supportcase33.mps", - "supportcase40.mps", - "supportcase42.mps", - "supportcase6.mps", - "supportcase7.mps", - "swath1.mps", - "swath3.mps", - "tbfp-network.mps", - "thor50dday.mps", - "timtab1.mps", - "tr12-30.mps", - "traininstance2.mps", - "traininstance6.mps", - "trento1.mps", - "uccase12.mps", - "uct-subprob.mps", - "unitcal_7.mps", - "var-smallemery-m6j6.mps"}; +std::vector instances = {"supportcase26_presolved.mps", + "supportcase26_presolved_2.mps", + "supportcase26_presolved_3.mps", + "supportcase26_presolved_4.mps", + "supportcase26_presolved_5.mps", + "supportcase26_presolved_6.mps", + "supportcase26_presolved_7.mps", + "supportcase26_presolved_8.mps"}; diff --git a/benchmarks/linear_programming/cuopt/run_mip.cpp b/benchmarks/linear_programming/cuopt/run_mip.cpp index 41dbd9aa37..f6b30e72ce 100644 --- a/benchmarks/linear_programming/cuopt/run_mip.cpp +++ b/benchmarks/linear_programming/cuopt/run_mip.cpp @@ -210,6 +210,7 @@ int run_single_file(std::string file_path, settings.log_to_console = log_to_console; settings.tolerances.relative_tolerance = 1e-12; settings.tolerances.absolute_tolerance = 1e-6; + settings.presolve = true; cuopt::linear_programming::benchmark_info_t benchmark_info; settings.benchmark_info_ptr = &benchmark_info; auto start_run_solver = std::chrono::high_resolution_clock::now(); diff --git a/cpp/include/cuopt/linear_programming/mip/solver_settings.hpp b/cpp/include/cuopt/linear_programming/mip/solver_settings.hpp index 4d4d29eaf9..1750f2a03e 100644 --- a/cpp/include/cuopt/linear_programming/mip/solver_settings.hpp +++ b/cpp/include/cuopt/linear_programming/mip/solver_settings.hpp @@ -72,8 +72,8 @@ class mip_solver_settings_t { struct tolerances_t { f_t presolve_absolute_tolerance = 1.0e-6; - f_t absolute_tolerance = 1.0e-4; - f_t relative_tolerance = 1.0e-6; + f_t absolute_tolerance = 1.0e-6; + f_t relative_tolerance = 1.0e-12; f_t integrality_tolerance = 1.0e-5; f_t absolute_mip_gap = 1.0e-10; f_t relative_mip_gap = 1.0e-4; diff --git a/cpp/include/cuopt/logger.hpp b/cpp/include/cuopt/logger.hpp index 5fb42b62d3..f8f4e200e0 100644 --- a/cpp/include/cuopt/logger.hpp +++ b/cpp/include/cuopt/logger.hpp @@ -78,7 +78,7 @@ inline rapids_logger::logger& default_logger() logger_.set_pattern(default_pattern()); #endif logger_.set_level(default_level()); - logger_.flush_on(rapids_logger::level_enum::info); + logger_.flush_on(rapids_logger::level_enum::debug); return logger_; }(); @@ -100,7 +100,7 @@ inline void reset_default_logger() default_logger().set_pattern(default_pattern()); #endif default_logger().set_level(default_level()); - default_logger().flush_on(rapids_logger::level_enum::info); + default_logger().flush_on(rapids_logger::level_enum::debug); } } // namespace cuopt diff --git a/cpp/src/dual_simplex/bound_flipping_ratio_test.cpp b/cpp/src/dual_simplex/bound_flipping_ratio_test.cpp index 11753cbcb7..1a513f4934 100644 --- a/cpp/src/dual_simplex/bound_flipping_ratio_test.cpp +++ b/cpp/src/dual_simplex/bound_flipping_ratio_test.cpp @@ -93,6 +93,11 @@ i_t bound_flipping_ratio_test_t::single_pass(i_t start, } step_length = min_val; nonbasic_entering = candidate; + // this should be temporary, find root causes where the candidate is not filled + if (nonbasic_entering == -1) { + // -1,-2 and -3 are reserved for other things + return -4; + } const i_t j = entering_index = nonbasic_list_[nonbasic_entering]; constexpr bool verbose = false; @@ -137,6 +142,7 @@ i_t bound_flipping_ratio_test_t::compute_step_length(f_t& step_length, i_t k_idx = single_pass( 0, num_breakpoints, indicies, ratios, slope, step_length, nonbasic_entering, entering_index); + if (k_idx == -4) { return -4; } bool continue_search = k_idx >= 0 && num_breakpoints > 1 && slope > 0.0; if (!continue_search) { if constexpr (0) { diff --git a/cpp/src/dual_simplex/branch_and_bound.cpp b/cpp/src/dual_simplex/branch_and_bound.cpp index 898f0a85ef..2986de0184 100644 --- a/cpp/src/dual_simplex/branch_and_bound.cpp +++ b/cpp/src/dual_simplex/branch_and_bound.cpp @@ -409,9 +409,12 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut assert(root_vstatus.size() == original_lp.num_cols); if (root_status == lp_status_t::INFEASIBLE) { settings.log.printf("MIP Infeasible\n"); - if (settings.heuristic_preemption_callback != nullptr) { - settings.heuristic_preemption_callback(); - } + // FIXME: rarely dual simplex detects infeasible whereas it is feasible. + // to add a small safety net, check if there is a primal solution already. + // Uncomment this if the issue with cost266-UUE is resolved + // if (settings.heuristic_preemption_callback != nullptr) { + // settings.heuristic_preemption_callback(); + // } return mip_status_t::INFEASIBLE; } if (root_status == lp_status_t::UNBOUNDED) { @@ -434,8 +437,16 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut if (settings.set_simplex_solution_callback != nullptr) { std::vector original_x; uncrush_primal_solution(original_problem, original_lp, root_relax_soln.x, original_x); - settings.set_simplex_solution_callback(original_x, - compute_user_objective(original_lp, root_objective)); + std::vector original_dual; + std::vector original_z; + uncrush_dual_solution(original_problem, + original_lp, + root_relax_soln.y, + root_relax_soln.z, + original_dual, + original_z); + settings.set_simplex_solution_callback( + original_x, original_dual, compute_user_objective(original_lp, root_objective)); } mutex_lower.lock(); f_t lower_bound = lower_bound_ = root_objective; diff --git a/cpp/src/dual_simplex/phase2.cpp b/cpp/src/dual_simplex/phase2.cpp index 7383f42216..370badf332 100644 --- a/cpp/src/dual_simplex/phase2.cpp +++ b/cpp/src/dual_simplex/phase2.cpp @@ -1495,13 +1495,13 @@ void compute_delta_y(const basis_update_mpf_t& ft, } template -void update_dual_variables(const sparse_vector_t& delta_y_sparse, - const std::vector& delta_z_indices, - const std::vector& delta_z, - f_t step_length, - i_t leaving_index, - std::vector& y, - std::vector& z) +i_t update_dual_variables(const sparse_vector_t& delta_y_sparse, + const std::vector& delta_z_indices, + const std::vector& delta_z, + f_t step_length, + i_t leaving_index, + std::vector& y, + std::vector& z) { // Update dual variables // y <- y + steplength * delta_y @@ -1517,6 +1517,7 @@ void update_dual_variables(const sparse_vector_t& delta_y_sparse, z[j] += step_length * delta_z[j]; } z[leaving_index] += step_length * delta_z[leaving_index]; + return 0; } template @@ -2514,6 +2515,10 @@ dual::status_t dual_phase2(i_t phase, delta_z_indices, nonbasic_mark); entering_index = bfrt.compute_step_length(step_length, nonbasic_entering_index); + if (entering_index == -4) { + settings.log.printf("Numerical issues encountered in ratio test.\n"); + return dual::status_t::NUMERICAL; + } timers.bfrt_time += timers.stop_timer(); } else { entering_index = phase2::phase2_ratio_test( @@ -2663,8 +2668,12 @@ dual::status_t dual_phase2(i_t phase, // Update dual variables // y <- y + steplength * delta_y // z <- z + steplength * delta_z - phase2::update_dual_variables( + i_t update_dual_variables_status = phase2::update_dual_variables( delta_y_sparse, delta_z_indices, delta_z, step_length, leaving_index, y, z); + if (update_dual_variables_status == -1) { + settings.log.printf("Numerical issues encountered in update_dual_variables.\n"); + return dual::status_t::NUMERICAL; + } timers.vector_time += timers.stop_timer(); #ifdef COMPUTE_DUAL_RESIDUAL diff --git a/cpp/src/dual_simplex/presolve.cpp b/cpp/src/dual_simplex/presolve.cpp index 48d696bf96..806f171d7a 100644 --- a/cpp/src/dual_simplex/presolve.cpp +++ b/cpp/src/dual_simplex/presolve.cpp @@ -1134,6 +1134,7 @@ void uncrush_dual_solution(const user_problem_t& user_problem, std::vector& user_y, std::vector& user_z) { + user_y.resize(user_problem.num_rows); // Reduced costs are uncrushed just like the primal solution uncrush_primal_solution(user_problem, problem, z, user_z); diff --git a/cpp/src/dual_simplex/simplex_solver_settings.hpp b/cpp/src/dual_simplex/simplex_solver_settings.hpp index a51ed19bcf..5b7e8bf0fa 100644 --- a/cpp/src/dual_simplex/simplex_solver_settings.hpp +++ b/cpp/src/dual_simplex/simplex_solver_settings.hpp @@ -109,7 +109,7 @@ struct simplex_solver_settings_t { i_t inside_mip; // 0 if outside MIP, 1 if inside MIP at root node, 2 if inside MIP at leaf node std::function&, f_t)> solution_callback; std::function heuristic_preemption_callback; - std::function&, f_t)> set_simplex_solution_callback; + std::function&, std::vector&, f_t)> set_simplex_solution_callback; mutable logger_t log; std::atomic* concurrent_halt; // if nullptr ignored, if !nullptr, 0 if solver should // continue, 1 if solver should halt diff --git a/cpp/src/linear_programming/initial_scaling_strategy/initial_scaling.cu b/cpp/src/linear_programming/initial_scaling_strategy/initial_scaling.cu index 72931267ad..3eae936902 100644 --- a/cpp/src/linear_programming/initial_scaling_strategy/initial_scaling.cu +++ b/cpp/src/linear_programming/initial_scaling_strategy/initial_scaling.cu @@ -39,17 +39,17 @@ pdlp_initial_scaling_strategy_t::pdlp_initial_scaling_strategy_t( problem_t& op_problem_scaled, i_t number_of_ruiz_iterations, f_t alpha, - pdhg_solver_t& pdhg_solver, rmm::device_uvector& A_T, rmm::device_uvector& A_T_offsets, rmm::device_uvector& A_T_indices, + pdhg_solver_t* pdhg_solver_ptr, bool running_mip) : handle_ptr_(handle_ptr), stream_view_(handle_ptr_->get_stream()), primal_size_h_(op_problem_scaled.n_variables), dual_size_h_(op_problem_scaled.n_constraints), op_problem_scaled_(op_problem_scaled), - pdhg_solver_(pdhg_solver), + pdhg_solver_ptr_(pdhg_solver_ptr), A_T_(A_T), A_T_offsets_(A_T_offsets), A_T_indices_(A_T_indices), @@ -398,7 +398,7 @@ void pdlp_initial_scaling_strategy_t::scale_problem() op_problem_scaled_.is_scaled_ = true; if (!running_mip_) { - scale_solutions(pdhg_solver_.get_primal_solution(), pdhg_solver_.get_dual_solution()); + scale_solutions(pdhg_solver_ptr_->get_primal_solution(), pdhg_solver_ptr_->get_dual_solution()); } } diff --git a/cpp/src/linear_programming/initial_scaling_strategy/initial_scaling.cuh b/cpp/src/linear_programming/initial_scaling_strategy/initial_scaling.cuh index 368b12770f..0cd01aa9ab 100644 --- a/cpp/src/linear_programming/initial_scaling_strategy/initial_scaling.cuh +++ b/cpp/src/linear_programming/initial_scaling_strategy/initial_scaling.cuh @@ -55,10 +55,10 @@ class pdlp_initial_scaling_strategy_t { problem_t& op_problem_scaled, i_t number_of_ruiz_iterations, f_t alpha, - pdhg_solver_t& pdhg_solver, rmm::device_uvector& A_T, rmm::device_uvector& A_T_offsets, rmm::device_uvector& A_T_indices, + pdhg_solver_t* pdhg_solver_ptr, bool running_mip = false); void scale_problem(); @@ -98,7 +98,7 @@ class pdlp_initial_scaling_strategy_t { rmm::device_uvector cummulative_constraint_matrix_scaling_; rmm::device_uvector cummulative_variable_scaling_; - pdhg_solver_t& pdhg_solver_; + pdhg_solver_t* pdhg_solver_ptr_; rmm::device_uvector& A_T_; rmm::device_uvector& A_T_offsets_; rmm::device_uvector& A_T_indices_; diff --git a/cpp/src/linear_programming/pdlp.cu b/cpp/src/linear_programming/pdlp.cu index 9f2de16f00..45d0b8b9c0 100644 --- a/cpp/src/linear_programming/pdlp.cu +++ b/cpp/src/linear_programming/pdlp.cu @@ -75,10 +75,10 @@ pdlp_solver_t::pdlp_solver_t(problem_t& op_problem, op_problem_scaled_, pdlp_hyper_params::default_l_inf_ruiz_iterations, (f_t)pdlp_hyper_params::default_alpha_pock_chambolle_rescaling, - pdhg_solver_, op_problem_scaled_.reverse_coefficients, op_problem_scaled_.reverse_offsets, - op_problem_scaled_.reverse_constraints}, + op_problem_scaled_.reverse_constraints, + &pdhg_solver_}, average_op_problem_evaluation_cusparse_view_{handle_ptr_, op_problem, unscaled_primal_avg_solution_, diff --git a/cpp/src/linear_programming/utilities/logger_init.hpp b/cpp/src/linear_programming/utilities/logger_init.hpp index 2448373915..093c4ae1c2 100644 --- a/cpp/src/linear_programming/utilities/logger_init.hpp +++ b/cpp/src/linear_programming/utilities/logger_init.hpp @@ -42,7 +42,7 @@ class init_logger_t { #else cuopt::default_logger().set_pattern(cuopt::default_pattern()); #endif - cuopt::default_logger().flush_on(rapids_logger::level_enum::info); + cuopt::default_logger().flush_on(rapids_logger::level_enum::debug); } } ~init_logger_t() { cuopt::reset_default_logger(); } diff --git a/cpp/src/mip/diversity/assignment_hash_map.cu b/cpp/src/mip/diversity/assignment_hash_map.cu index 24d0051b37..5e696a6789 100644 --- a/cpp/src/mip/diversity/assignment_hash_map.cu +++ b/cpp/src/mip/diversity/assignment_hash_map.cu @@ -90,8 +90,7 @@ void assignment_hash_map_t::fill_integer_assignment(solution_t size_t assignment_hash_map_t::hash_solution(solution_t& solution) { - const int TPB = 1024; - + const int TPB = 256; fill_integer_assignment(solution); thrust::fill( solution.handle_ptr->get_thrust_policy(), reduction_buffer.begin(), reduction_buffer.end(), 0); diff --git a/cpp/src/mip/diversity/diversity_config.hpp b/cpp/src/mip/diversity/diversity_config.hpp index c38555ab90..6acac8fbb5 100644 --- a/cpp/src/mip/diversity/diversity_config.hpp +++ b/cpp/src/mip/diversity/diversity_config.hpp @@ -22,7 +22,7 @@ namespace cuopt::linear_programming::detail { struct diversity_config_t { double time_ratio_on_init_lp = 0.1; double max_time_on_lp = 30; - double time_ratio_of_probing_cache = 0.10; + double time_ratio_of_probing_cache = 0.04; double max_time_on_probing = 60; size_t max_iterations_without_improvement = 15; int max_var_diff = 256; @@ -38,7 +38,7 @@ struct diversity_config_t { double max_fast_sol_time = 10; double lp_run_time_if_feasible = 15.; double lp_run_time_if_infeasible = 1; - bool halve_population = true; + bool halve_population = false; }; } // namespace cuopt::linear_programming::detail diff --git a/cpp/src/mip/diversity/diversity_manager.cu b/cpp/src/mip/diversity/diversity_manager.cu index 726eb5b41a..0fb3f9fc08 100644 --- a/cpp/src/mip/diversity/diversity_manager.cu +++ b/cpp/src/mip/diversity/diversity_manager.cu @@ -27,7 +27,6 @@ constexpr bool from_dir = false; constexpr bool fj_only_run = false; -constexpr bool fp_only_run = false; namespace cuopt::linear_programming::detail { @@ -47,19 +46,27 @@ diversity_manager_t::diversity_manager_t(mip_solver_context_tn_constraints), lp_optimal_solution(context.problem_ptr->n_variables, context.problem_ptr->handle_ptr->get_stream()), + lp_dual_optimal_solution(context.problem_ptr->n_constraints, + context.problem_ptr->handle_ptr->get_stream()), ls(context, lp_optimal_solution), timer(diversity_config.default_time_limit), bound_prop_recombiner(context, context.problem_ptr->n_variables, ls.constraint_prop, context.problem_ptr->handle_ptr), - fp_recombiner( - context, context.problem_ptr->n_variables, ls.fp, context.problem_ptr->handle_ptr), + fp_recombiner(context, + context.problem_ptr->n_variables, + ls.fj, + ls.constraint_prop, + ls.line_segment_search, + lp_optimal_solution, + context.problem_ptr->handle_ptr), line_segment_recombiner(context, context.problem_ptr->n_variables, ls.line_segment_search, @@ -73,7 +80,7 @@ diversity_manager_t::diversity_manager_t(mip_solver_context_t::n_of_arms, cuopt::seed_generator::get_seed(), ls_alpha, "ls"), - assignment_hash_map(*context.problem_ptr) + ls_hash_map(*context.problem_ptr) { // Read configuration ID from environment variable int max_config = -1; @@ -110,9 +117,9 @@ bool diversity_manager_t::run_local_search(solution_t& solut { i_t ls_mab_option = mab_ls.select_mab_option(); mab_ls_config_t::get_local_search_and_lm_from_config(ls_mab_option, ls_config); - assignment_hash_map.insert(solution); + ls_hash_map.insert(solution); constexpr i_t skip_solutions_threshold = 3; - if (assignment_hash_map.check_skip_solution(solution, skip_solutions_threshold)) { return false; } + if (ls_hash_map.check_skip_solution(solution, skip_solutions_threshold)) { return false; } ls.run_local_search(solution, weights, timer, ls_config); return true; } @@ -172,30 +179,10 @@ solution_t diversity_manager_t::generate_solution(f_t time_l { solution_t sol(*problem_ptr); sol.compute_feasibility(); - ls.generate_solution(sol, random_start, population.early_exit_primal_generation, time_limit); + ls.generate_solution(sol, random_start, &population, time_limit); return sol; } -template -void diversity_manager_t::generate_add_solution( - std::vector>& initial_sol_vector, f_t time_limit, bool random_start) -{ - // TODO check weights here if they are all similar - // do a local search than add it searched solution as well - initial_sol_vector.emplace_back(generate_solution(time_limit, random_start)); -} - -template -void diversity_manager_t::average_fj_weights(i_t i) -{ - thrust::transform(problem_ptr->handle_ptr->get_thrust_policy(), - population.weights.cstr_weights.begin(), - population.weights.cstr_weights.end(), - ls.fj.cstr_weights.begin(), - population.weights.cstr_weights.begin(), - [i] __device__(f_t w1, f_t w2) { return (w1 * i + w2) / (i + 1); }); -} - template void diversity_manager_t::add_user_given_solutions( std::vector>& initial_sol_vector) @@ -227,74 +214,6 @@ void diversity_manager_t::add_user_given_solutions( } } -// if 60% of the time, exit -// if 20% of the time finishes and we generate 5 solutions -template -void diversity_manager_t::generate_initial_solutions() -{ - add_user_given_solutions(initial_sol_vector); - bool skip_initial_island_generation = - initial_sol_vector.size() > diversity_config.n_sol_for_skip_init_gen || from_dir; - // allocate maximum of 40% of the time to the initial island generation - // aim to generate at least 5 feasible solutions thus spending 8% of the time to generate a - // solution if we can generate faster generate up to 10 sols - const f_t generation_time_limit = - diversity_config.generation_time_limit_ratio * timer.get_time_limit(); - const f_t max_island_gen_time = diversity_config.max_island_gen_time; - f_t total_island_gen_time = std::min(generation_time_limit, max_island_gen_time); - timer_t gen_timer(total_island_gen_time); - f_t sol_time_limit = gen_timer.remaining_time(); - for (i_t i = 0; i < diversity_config.maximum_island_size && !skip_initial_island_generation; - ++i) { - if (check_b_b_preemption()) { return; } - if (i + population.get_external_solution_size() >= 5) { break; } - CUOPT_LOG_DEBUG("Generating sol %d", i); - bool is_first_sol = (i == 0); - if (i == 1) { - sol_time_limit = gen_timer.remaining_time() / (diversity_config.initial_island_size - 1); - } - // in first iteration, definitely generate feasible - if (is_first_sol) { - sol_time_limit = gen_timer.remaining_time(); - ls.fj.reset_weights(problem_ptr->handle_ptr->get_stream()); - } - // in other iterations(when there is at least one feasible) - else { - ls.fj.randomize_weights(problem_ptr->handle_ptr); - } - generate_add_solution(initial_sol_vector, sol_time_limit, !is_first_sol); - if (is_first_sol && initial_sol_vector.back().get_feasible()) { - CUOPT_LOG_DEBUG("First FP/FJ solution found at %f with objective %f", - timer.elapsed_time(), - initial_sol_vector.back().get_user_objective()); - } - population.run_solution_callbacks(initial_sol_vector.back()); - // run ls on the generated solutions - solution_t searched_sol(initial_sol_vector.back()); - ls_config_t ls_config; - run_local_search(searched_sol, population.weights, gen_timer, ls_config); - population.run_solution_callbacks(searched_sol); - initial_sol_vector.emplace_back(std::move(searched_sol)); - average_fj_weights(i); - // run ls on the solutions - // if at least initial_island_size solutions are generated and time limit is reached - if (i >= diversity_config.initial_island_size || gen_timer.check_time_limit()) { break; } - } - CUOPT_LOG_DEBUG("Initial unsearched solutions are generated!"); - i_t actual_island_size = initial_sol_vector.size(); - population.normalize_weights(); - // find diversity of the population - population.find_diversity(initial_sol_vector, diversity_config.use_avg_diversity); - population.add_solutions_from_vec(std::move(initial_sol_vector)); - population.update_qualities(); - CUOPT_LOG_DEBUG("Initial population generated, size %d var_threshold %d", - population.current_size(), - population.var_threshold); - population.print(); - auto new_sol_vector = population.get_external_solutions(); - if (!fj_only_run && !fp_only_run) { recombine_and_ls_with_all(new_sol_vector); } -} - template bool diversity_manager_t::run_presolve(f_t time_limit) { @@ -320,7 +239,13 @@ bool diversity_manager_t::run_presolve(f_t time_limit) } stats.presolve_time = presolve_timer.elapsed_time(); lp_optimal_solution.resize(problem_ptr->n_variables, problem_ptr->handle_ptr->get_stream()); + lp_dual_optimal_solution.resize(problem_ptr->n_constraints, + problem_ptr->handle_ptr->get_stream()); problem_ptr->handle_ptr->sync_stream(); + CUOPT_LOG_INFO("After trivial presolve #constraints %d #variables %d objective offset %f.", + problem_ptr->n_constraints, + problem_ptr->n_variables, + problem_ptr->presolve_data.objective_offset); return true; } @@ -356,7 +281,7 @@ void diversity_manager_t::generate_quick_feasible_solution() template bool diversity_manager_t::check_b_b_preemption() { - if (population.preempt_heuristic_solver_) { + if (population.preempt_heuristic_solver_.load()) { if (population.current_size() == 0) { population.allocate_solutions(); } auto new_sol_vector = population.get_external_solutions(); population.add_solutions_from_vec(std::move(new_sol_vector)); @@ -385,7 +310,7 @@ template void diversity_manager_t::run_fp_alone(solution_t& solution) { CUOPT_LOG_INFO("Running FP alone!"); - ls.run_fp(solution, timer, &population.weights, false); + ls.run_fp(solution, timer, &population); CUOPT_LOG_INFO("FP alone finished!"); } @@ -418,7 +343,7 @@ solution_t diversity_manager_t::run_solver() population.initialize_population(); if (check_b_b_preemption()) { return population.best_feasible(); } // before probing cache or LP, run FJ to generate initial primal feasible solution - if (!from_dir && !fp_only_run && !fj_only_run) { generate_quick_feasible_solution(); } + if (!from_dir && !fj_only_run) { generate_quick_feasible_solution(); } const f_t time_ratio_of_probing_cache = diversity_config.time_ratio_of_probing_cache; const f_t max_time_on_probing = diversity_config.max_time_on_probing; f_t time_for_probing_cache = @@ -433,11 +358,7 @@ solution_t diversity_manager_t::run_solver() lp_state_t& lp_state = problem_ptr->lp_state; // resize because some constructor might be called before the presolve lp_state.resize(*problem_ptr, problem_ptr->handle_ptr->get_stream()); - bool bb_thread_solution_exists = false; - { - std::lock_guard guard(relaxed_solution_mutex); - bb_thread_solution_exists = simplex_solution_exists; - } // Mutex is unlocked here + bool bb_thread_solution_exists = simplex_solution_exists.load(); if (bb_thread_solution_exists) { ls.lp_optimal_exists = true; } else if (!fj_only_run) { @@ -447,17 +368,28 @@ solution_t diversity_manager_t::run_solver() lp_settings.return_first_feasible = false; lp_settings.save_state = true; lp_settings.concurrent_halt = &global_concurrent_halt; + lp_settings.has_initial_primal = false; rmm::device_uvector lp_optimal_solution_copy(lp_optimal_solution.size(), problem_ptr->handle_ptr->get_stream()); auto lp_result = get_relaxed_lp_solution(*problem_ptr, lp_optimal_solution_copy, lp_state, lp_settings); { std::lock_guard guard(relaxed_solution_mutex); - if (!simplex_solution_exists) { + if (!simplex_solution_exists.load()) { raft::copy(lp_optimal_solution.data(), lp_optimal_solution_copy.data(), lp_optimal_solution.size(), problem_ptr->handle_ptr->get_stream()); + } else { + // copy the lp state + raft::copy(lp_state.prev_primal.data(), + lp_optimal_solution.data(), + lp_optimal_solution.size(), + problem_ptr->handle_ptr->get_stream()); + raft::copy(lp_state.prev_dual.data(), + lp_dual_optimal_solution.data(), + lp_dual_optimal_solution.size(), + problem_ptr->handle_ptr->get_stream()); } } problem_ptr->handle_ptr->sync_stream(); @@ -481,29 +413,35 @@ solution_t diversity_manager_t::run_solver() } population.allocate_solutions(); + population.add_solutions_from_vec(std::move(initial_sol_vector)); if (check_b_b_preemption()) { return population.best_feasible(); } - if (!fp_only_run) { - // generate a population with 5 solutions(FP+FJ) - generate_initial_solutions(); - } + if (context.settings.benchmark_info_ptr != nullptr) { context.settings.benchmark_info_ptr->objective_of_initial_population = population.best_feasible().get_user_objective(); } if (fj_only_run) { - run_fj_alone(population.best_feasible()); - return population.best_feasible(); - } - - if (fp_only_run) { - auto sol = generate_solution(timer.remaining_time(), false); - run_fp_alone(sol); + solution_t sol(*problem_ptr); + run_fj_alone(sol); return sol; } - if (timer.check_time_limit()) { return population.best_feasible(); } + auto sol = generate_solution(timer.remaining_time(), false); + population.add_solution(std::move(solution_t(sol))); + if (timer.check_time_limit()) { + auto new_sol_vector = population.get_external_solutions(); + population.add_solutions_from_vec(std::move(new_sol_vector)); + return population.best_feasible(); + } + run_fp_alone(sol); + population.update_weights(); + if (timer.check_time_limit()) { + auto new_sol_vector = population.get_external_solutions(); + population.add_solutions_from_vec(std::move(new_sol_vector)); + return population.best_feasible(); + } main_loop(); return population.best_feasible(); @@ -557,24 +495,33 @@ void diversity_manager_t::set_new_user_bound(f_t new_bound) } template -void diversity_manager_t::recombine_and_ls_with_all(solution_t& solution) +void diversity_manager_t::recombine_and_ls_with_all(solution_t& solution, + bool add_only_feasible) { raft::common::nvtx::range fun_scope("recombine_and_ls_with_all"); + if (population.population_hash_map.check_skip_solution(solution, 1)) { return; } auto population_vector = population.population_to_vector(); for (auto& curr_sol : population_vector) { - if (check_b_b_preemption()) { return; } - if (curr_sol.get_feasible()) { - auto [offspring, lp_offspring] = recombine_and_local_search(curr_sol, solution); - i_t inserted_pos_1 = population.add_solution(std::move(lp_offspring)); - i_t inserted_pos_2 = population.add_solution(std::move(offspring)); - if (timer.check_time_limit()) { return; } + for (const auto recombiner_type : recombiner_types) { + if (check_b_b_preemption()) { return; } + if (curr_sol.get_feasible()) { + auto [offspring, lp_offspring] = + recombine_and_local_search(curr_sol, solution, recombiner_type); + if (!add_only_feasible || lp_offspring.get_feasible()) { + population.add_solution(std::move(lp_offspring)); + } + if (!add_only_feasible || offspring.get_feasible()) { + population.add_solution(std::move(offspring)); + } + if (timer.check_time_limit()) { return; } + } } } } template void diversity_manager_t::recombine_and_ls_with_all( - std::vector>& solutions) + std::vector>& solutions, bool add_only_feasible) { raft::common::nvtx::range fun_scope("recombine_and_ls_with_all"); if (solutions.size() > 0) { @@ -593,10 +540,10 @@ void diversity_manager_t::recombine_and_ls_with_all( // TODO try if running LP with integers fixed makes it feasible if (ls_solution.get_feasible()) { CUOPT_LOG_DEBUG("External LS searched solution feasible, running recombiners!"); - recombine_and_ls_with_all(ls_solution); + recombine_and_ls_with_all(ls_solution, add_only_feasible); } else { CUOPT_LOG_DEBUG("External solution feasible, running recombiners!"); - recombine_and_ls_with_all(sol); + recombine_and_ls_with_all(sol, add_only_feasible); } } } @@ -607,6 +554,7 @@ void diversity_manager_t::main_loop() { population.start_threshold_adjustment(); recombine_stats.reset(); + population.print(); while (true) { if (check_b_b_preemption()) { break; } CUOPT_LOG_DEBUG("Running a new step"); @@ -636,13 +584,11 @@ void diversity_manager_t::main_loop() population.find_diversity(current_population, diversity_config.use_avg_diversity); // if the threshold is lower than the threshold we progress with time // set it to the higher threshold - // population.var_threshold = max(population.var_threshold, prev_threshold); population.add_solutions_from_vec(std::move(current_population)); } else { // increase the threshold/decrease the diversity population.adjust_threshold(timer); } - // population.add_solutions_from_vec(std::move(new_solutions)); // idea to try, we can average the weights of the new solutions population.update_weights(); population.print(); @@ -678,7 +624,8 @@ void diversity_manager_t::check_better_than_both(solution_t& template std::pair, solution_t> diversity_manager_t::recombine_and_local_search(solution_t& sol1, - solution_t& sol2) + solution_t& sol2, + recombiner_enum_t recombiner_type) { raft::common::nvtx::range fun_scope("recombine_and_local_search"); CUOPT_LOG_DEBUG("Recombining sol cost:feas %f : %d and %f : %d", @@ -689,7 +636,7 @@ diversity_manager_t::recombine_and_local_search(solution_t& double best_objective_of_parents = std::min(sol1.get_objective(), sol2.get_objective()); bool at_least_one_parent_feasible = sol1.get_feasible() || sol2.get_feasible(); // randomly choose among 3 recombiners - auto [offspring, success] = recombine(sol1, sol2); + auto [offspring, success] = recombine(sol1, sol2, recombiner_type); if (!success) { // add the attempt mab_recombiner.add_mab_reward(static_cast(recombine_stats.get_last_attempt()), @@ -761,11 +708,11 @@ diversity_manager_t::recombine_and_local_search(solution_t& population.best().get_quality(population.weights), offspring_qual, recombiner_work_normalized_reward_t(recombine_stats.get_last_recombiner_time())); - // mab_ls.add_mab_reward(mab_ls_config_t::last_ls_mab_option, - // best_quality_of_parents, - // population.best_feasible().get_quality(population.weights), - // offspring_qual, - // ls_work_normalized_reward_t(mab_ls_config_t::last_lm_config)); + mab_ls.add_mab_reward(mab_ls_config_t::last_ls_mab_option, + best_quality_of_parents, + population.best_feasible().get_quality(population.weights), + offspring_qual, + ls_work_normalized_reward_t(mab_ls_config_t::last_lm_config)); if (context.settings.benchmark_info_ptr != nullptr) { check_better_than_both(offspring, sol1, sol2); check_better_than_both(lp_offspring, sol1, sol2); @@ -775,7 +722,7 @@ diversity_manager_t::recombine_and_local_search(solution_t& template std::pair, bool> diversity_manager_t::recombine( - solution_t& a, solution_t& b) + solution_t& a, solution_t& b, recombiner_enum_t recombiner_type) { recombiner_enum_t recombiner; if (run_only_ls_recombiner) { @@ -787,7 +734,12 @@ std::pair, bool> diversity_manager_t::recombine( } else if (run_only_sub_mip_recombiner) { recombiner = recombiner_enum_t::SUB_MIP; } else { - recombiner = static_cast(mab_recombiner.select_mab_option()); + // only run the given recombiner unless it is defult + if (recombiner_type == recombiner_enum_t::SIZE) { + recombiner = static_cast(mab_recombiner.select_mab_option()); + } else { + recombiner = recombiner_type; + } } recombine_stats.add_attempt((recombiner_enum_t)recombiner); recombine_stats.start_recombiner_time(); @@ -828,14 +780,16 @@ std::pair, bool> diversity_manager_t::recombine( template void diversity_manager_t::set_simplex_solution(const std::vector& solution, + const std::vector& dual_solution, f_t objective) { CUOPT_LOG_DEBUG("Setting simplex solution with objective %f", objective); using sol_t = solution_t; + cudaSetDevice(context.handle_ptr->get_device()); context.handle_ptr->sync_stream(); - RAFT_CUDA_TRY(cudaSetDevice(context.handle_ptr->get_device())); cuopt_func_call(sol_t new_sol(*problem_ptr)); cuopt_assert(new_sol.assignment.size() == solution.size(), "Assignment size mismatch"); + cuopt_assert(problem_ptr->n_constraints == dual_solution.size(), "Dual assignment size mismatch"); cuopt_func_call(new_sol.copy_new_assignment(solution)); cuopt_func_call(new_sol.compute_feasibility()); cuopt_assert(integer_equal(new_sol.get_user_objective(), objective, 1e-3), "Objective mismatch"); @@ -846,6 +800,10 @@ void diversity_manager_t::set_simplex_solution(const std::vector& // the operations are ordered as long as they are on the same stream raft::copy( lp_optimal_solution.data(), solution.data(), solution.size(), context.handle_ptr->get_stream()); + raft::copy(lp_dual_optimal_solution.data(), + dual_solution.data(), + dual_solution.size(), + context.handle_ptr->get_stream()); set_new_user_bound(objective); context.handle_ptr->sync_stream(); } diff --git a/cpp/src/mip/diversity/diversity_manager.cuh b/cpp/src/mip/diversity/diversity_manager.cuh index 3ad538d33b..e65b5697f1 100644 --- a/cpp/src/mip/diversity/diversity_manager.cuh +++ b/cpp/src/mip/diversity/diversity_manager.cuh @@ -52,7 +52,9 @@ class diversity_manager_t { // main loop of diversity improvements void main_loop(); // randomly chooses a recombiner and returns the offspring - std::pair, bool> recombine(solution_t& a, solution_t& b); + std::pair, bool> recombine(solution_t& a, + solution_t& b, + recombiner_enum_t recombiner_type); bool regenerate_solutions(); void generate_add_solution(std::vector>& initial_sol_vector, f_t time_limit, @@ -62,10 +64,13 @@ class diversity_manager_t { std::vector> generate_more_solutions(); void add_user_given_solutions(std::vector>& initial_sol_vector); population_t* get_population_pointer() { return &population; } - void recombine_and_ls_with_all(std::vector>& solutions); - void recombine_and_ls_with_all(solution_t& solution); + void recombine_and_ls_with_all(std::vector>& solutions, + bool add_only_feasible = false); + void recombine_and_ls_with_all(solution_t& solution, bool add_only_feasible = false); std::pair, solution_t> recombine_and_local_search( - solution_t& a, solution_t& b); + solution_t& a, + solution_t& b, + recombiner_enum_t recombiner_type = recombiner_enum_t::SIZE); void set_new_user_bound(f_t new_user_bound); void generate_quick_feasible_solution(); bool check_b_b_preemption(); @@ -77,14 +82,17 @@ class diversity_manager_t { timer_t& timer, ls_config_t& ls_config); - void set_simplex_solution(const std::vector& solution, f_t objective); + void set_simplex_solution(const std::vector& solution, + const std::vector& dual_solution, + f_t objective); mip_solver_context_t& context; problem_t* problem_ptr; diversity_config_t diversity_config; population_t population; rmm::device_uvector lp_optimal_solution; - bool simplex_solution_exists{false}; + rmm::device_uvector lp_dual_optimal_solution; + std::atomic simplex_solution_exists{false}; local_search_t ls; cuopt::timer_t timer; bound_prop_recombiner_t bound_prop_recombiner; @@ -98,7 +106,7 @@ class diversity_manager_t { std::vector> initial_sol_vector; mab_t mab_recombiner; mab_t mab_ls; - assignment_hash_map_t assignment_hash_map; + assignment_hash_map_t ls_hash_map; // mutex for the simplex solution update std::mutex relaxed_solution_mutex; // atomic for signalling pdlp to stop diff --git a/cpp/src/mip/diversity/population.cu b/cpp/src/mip/diversity/population.cu index 6a42cb39a1..63f4b3c855 100644 --- a/cpp/src/mip/diversity/population.cu +++ b/cpp/src/mip/diversity/population.cu @@ -15,6 +15,7 @@ * limitations under the License. */ +#include "diversity_manager.cuh" #include "population.cuh" #include @@ -38,23 +39,32 @@ constexpr double halving_skip_ratio = 0.75; template population_t::population_t(std::string const& name_, mip_solver_context_t& context_, + diversity_manager_t& dm_, int var_threshold_, size_t max_solutions_, f_t infeasibility_weight_) : name(name_), context(context_), problem_ptr(context.problem_ptr), + dm(dm_), var_threshold(var_threshold_), max_solutions(max_solutions_), infeasibility_importance(infeasibility_weight_), weights(0, context.problem_ptr->handle_ptr), rng(cuopt::seed_generator::get_seed()), early_exit_primal_generation(false), + population_hash_map(*problem_ptr), timer(0) { best_feasible_objective = std::numeric_limits::max(); } +template +i_t get_max_var_threshold(i_t n_vars) +{ + return n_vars - sqrt(n_vars); +} + template void population_t::allocate_solutions() { @@ -67,8 +77,7 @@ void population_t::allocate_solutions() template void population_t::initialize_population() { - var_threshold = - std::max(problem_ptr->n_variables - var_threshold, (problem_ptr->n_variables / 10) * 8); + var_threshold = get_max_var_threshold(problem_ptr->n_integer_vars); solutions.reserve(max_solutions); indices.reserve(max_solutions); // indices[0] always points to solutions[0] - a special place for feasible solution @@ -308,6 +317,7 @@ template i_t population_t::add_solution(solution_t&& sol) { raft::common::nvtx::range fun_scope("add_solution"); + population_hash_map.insert(sol); double sol_cost = sol.get_quality(weights); CUOPT_LOG_TRACE("Adding solution with quality %f and objective %f n_integers %d!", sol_cost, @@ -576,21 +586,6 @@ std::vector> population_t::population_to_vector() return sol_vec; } -template -i_t get_max_var_threshold(i_t n_vars) -{ - if (n_vars < 50) { - return std::max(1, n_vars - 1); - } else if (n_vars < 80) { - return n_vars - 2; - } else if (n_vars < 200) { - return n_vars - 4; - } else if (n_vars < 1000) { - return n_vars - 8; - } - return n_vars - 10; -} - template void population_t::halve_the_population() { @@ -756,6 +751,14 @@ void population_t::print() CUOPT_LOG_DEBUG(" -------------- "); } +template +void population_t::run_all_recombiners(solution_t& sol) +{ + std::vector> sol_vec; + sol_vec.emplace_back(std::move(solution_t(sol))); + dm.recombine_and_ls_with_all(sol_vec, true); +} + #if MIP_INSTANTIATE_FLOAT template class population_t; #endif diff --git a/cpp/src/mip/diversity/population.cuh b/cpp/src/mip/diversity/population.cuh index 0f0176341c..532b9f24d4 100644 --- a/cpp/src/mip/diversity/population.cuh +++ b/cpp/src/mip/diversity/population.cuh @@ -17,6 +17,7 @@ #pragma once +#include "assignment_hash_map.cuh" #include "population.cuh" #include @@ -30,11 +31,16 @@ namespace cuopt::linear_programming::detail { +// forward declare +template +class diversity_manager_t; + template class population_t { public: population_t(std::string const& name, mip_solver_context_t& context, + diversity_manager_t& dm, int var_threshold_, size_t max_solutions_, f_t infeasibility_weight_); @@ -64,6 +70,7 @@ class population_t { // initializes the population lazily. after presolve and var removals void initialize_population(); bool is_better_than_best_feasible(solution_t& sol); + void run_all_recombiners(solution_t& sol); void allocate_solutions(); @@ -154,6 +161,7 @@ class population_t { std::string name; mip_solver_context_t& context; problem_t* problem_ptr; + diversity_manager_t& dm; i_t var_threshold; i_t initial_threshold; double population_start_time; @@ -168,9 +176,10 @@ class population_t { std::mt19937 rng; i_t update_iter = 0; std::mutex solution_mutex; - bool early_exit_primal_generation = false; - f_t best_feasible_objective = std::numeric_limits::max(); - bool preempt_heuristic_solver_ = false; + std::atomic early_exit_primal_generation = false; + std::atomic preempt_heuristic_solver_ = false; + f_t best_feasible_objective = std::numeric_limits::max(); + assignment_hash_map_t population_hash_map; cuopt::timer_t timer; }; diff --git a/cpp/src/mip/diversity/recombiners/fp_recombiner.cuh b/cpp/src/mip/diversity/recombiners/fp_recombiner.cuh index 5597e8e845..f3d4d73556 100644 --- a/cpp/src/mip/diversity/recombiners/fp_recombiner.cuh +++ b/cpp/src/mip/diversity/recombiners/fp_recombiner.cuh @@ -34,11 +34,14 @@ class fp_recombiner_t : public recombiner_t { public: fp_recombiner_t(mip_solver_context_t& context, i_t n_vars, - feasibility_pump_t& fp_, + fj_t& fj, + constraint_prop_t& constraint_prop, + line_segment_search_t& line_segment_search, + rmm::device_uvector& lp_optimal_solution, const raft::handle_t* handle_ptr) : recombiner_t(context, n_vars, handle_ptr), vars_to_fix(n_vars, handle_ptr->get_stream()), - fp(fp_) + fp(context, fj, constraint_prop, line_segment_search, lp_optimal_solution) { } @@ -146,7 +149,8 @@ class fp_recombiner_t : public recombiner_t { return std::make_pair(offspring, !same_as_parents); } rmm::device_uvector vars_to_fix; - feasibility_pump_t& fp; + // keep a copy of FP to prevent interference with generation FP + feasibility_pump_t fp; }; } // namespace cuopt::linear_programming::detail diff --git a/cpp/src/mip/diversity/recombiners/recombiner.cuh b/cpp/src/mip/diversity/recombiners/recombiner.cuh index 94ca34ea18..3b09f1956f 100644 --- a/cpp/src/mip/diversity/recombiners/recombiner.cuh +++ b/cpp/src/mip/diversity/recombiners/recombiner.cuh @@ -124,7 +124,8 @@ class recombiner_t { CUOPT_LOG_DEBUG("n_objective_vars in different vars %d n_objective_vars %d", objective_indices_in_subproblem.size(), objective_indices.size()); - if (objective_indices_in_subproblem.size() < 0.4 * remaining_variables) { + if (objective_indices.size() > 0 && + objective_indices_in_subproblem.size() < 0.4 * remaining_variables) { std::default_random_engine rng_host(cuopt::seed_generator::get_seed()); std::vector objective_indices_not_in_subproblem; std::set_difference(objective_indices.begin(), diff --git a/cpp/src/mip/diversity/recombiners/recombiner_stats.hpp b/cpp/src/mip/diversity/recombiners/recombiner_stats.hpp index 7e3e25164a..c6f99a4e1f 100644 --- a/cpp/src/mip/diversity/recombiners/recombiner_stats.hpp +++ b/cpp/src/mip/diversity/recombiners/recombiner_stats.hpp @@ -23,6 +23,11 @@ namespace cuopt::linear_programming::detail { enum class recombiner_enum_t : int { BOUND_PROP = 0, FP, LINE_SEGMENT, SUB_MIP, SIZE }; +constexpr std::array recombiner_types = {recombiner_enum_t::BOUND_PROP, + recombiner_enum_t::FP, + recombiner_enum_t::LINE_SEGMENT, + recombiner_enum_t::SUB_MIP}; + struct recombine_stats { int attempts; int success; diff --git a/cpp/src/mip/diversity/recombiners/sub_mip.cuh b/cpp/src/mip/diversity/recombiners/sub_mip.cuh index 381f5b1258..eb94353515 100644 --- a/cpp/src/mip/diversity/recombiners/sub_mip.cuh +++ b/cpp/src/mip/diversity/recombiners/sub_mip.cuh @@ -79,6 +79,17 @@ class sub_mip_recombiner_t : public recombiner_t { "n_vars_from_guiding %d n_vars_from_other %d", n_vars_from_guiding, n_vars_from_other); this->compute_vars_to_fix(offspring, vars_to_fix, n_vars_from_other, n_vars_from_guiding); auto [fixed_problem, fixed_assignment, variable_map] = offspring.fix_variables(vars_to_fix); + pdlp_initial_scaling_strategy_t scaling( + fixed_problem.handle_ptr, + fixed_problem, + pdlp_hyper_params::default_l_inf_ruiz_iterations, + (f_t)pdlp_hyper_params::default_alpha_pock_chambolle_rescaling, + fixed_problem.reverse_coefficients, + fixed_problem.reverse_offsets, + fixed_problem.reverse_constraints, + nullptr, + true); + scaling.scale_problem(); fixed_problem.presolve_data.reset_additional_vars(fixed_problem, offspring.handle_ptr); fixed_problem.presolve_data.initialize_var_mapping(fixed_problem, offspring.handle_ptr); trivial_presolve(fixed_problem); @@ -131,6 +142,8 @@ class sub_mip_recombiner_t : public recombiner_t { offspring.handle_ptr->sync_stream(); } if (solution_vector.size() > 0) { + rmm::device_uvector dummy(0, offspring.handle_ptr->get_stream()); + scaling.unscale_solutions(fixed_assignment, dummy); // unfix the assignment on given result no matter if it is feasible offspring.unfix_variables(fixed_assignment, variable_map); } else { @@ -159,6 +172,8 @@ class sub_mip_recombiner_t : public recombiner_t { solution.size(), offspring.handle_ptr->get_stream()); fixed_problem.post_process_assignment(fixed_assignment, false); + rmm::device_uvector dummy(0, offspring.handle_ptr->get_stream()); + scaling.unscale_solutions(fixed_assignment, dummy); sol.unfix_variables(fixed_assignment, variable_map); sol.compute_feasibility(); cuopt_func_call(sol.test_variable_bounds()); diff --git a/cpp/src/mip/feasibility_jump/feasibility_jump.cu b/cpp/src/mip/feasibility_jump/feasibility_jump.cu index 7520d1431e..a30a9b1824 100644 --- a/cpp/src/mip/feasibility_jump/feasibility_jump.cu +++ b/cpp/src/mip/feasibility_jump/feasibility_jump.cu @@ -75,6 +75,10 @@ fj_t::fj_t(mip_solver_context_t& context_, fj_settings_t in_ { setval_launch_dims = get_launch_dims_max_occupancy( (void*)update_assignment_kernel, TPB_setval, pb_ptr->handle_ptr); + update_changed_constraints_launch_dims = + get_launch_dims_max_occupancy((void*)update_changed_constraints_kernel, + TPB_update_changed_constraints, + pb_ptr->handle_ptr); resetmoves_launch_dims = get_launch_dims_max_occupancy( (void*)compute_mtm_moves_kernel, TPB_resetmoves, pb_ptr->handle_ptr); resetmoves_bin_launch_dims = @@ -643,7 +647,9 @@ void fj_t::run_step_device(const rmm::cuda_stream_view& climber_stream bool use_graph) { raft::common::nvtx::range scope("run_step_device"); - auto [grid_setval, blocks_setval] = setval_launch_dims; + auto [grid_setval, blocks_setval] = setval_launch_dims; + auto [grid_update_changed_constraints, blocks_update_changed_constraints] = + update_changed_constraints_launch_dims; auto [grid_resetmoves, blocks_resetmoves] = resetmoves_launch_dims; auto [grid_resetmoves_bin, blocks_resetmoves_bin] = resetmoves_bin_launch_dims; auto [grid_update_weights, blocks_update_weights] = update_weights_launch_dims; @@ -795,7 +801,7 @@ void fj_t::run_step_device(const rmm::cuda_stream_view& climber_stream climber_stream); cudaLaunchKernel((void*)update_changed_constraints_kernel, 1, - blocks_setval, + blocks_update_changed_constraints, kernel_args, 0, climber_stream); diff --git a/cpp/src/mip/feasibility_jump/feasibility_jump.cuh b/cpp/src/mip/feasibility_jump/feasibility_jump.cuh index 5d362a3d3d..e5e7c6d978 100644 --- a/cpp/src/mip/feasibility_jump/feasibility_jump.cuh +++ b/cpp/src/mip/feasibility_jump/feasibility_jump.cuh @@ -34,13 +34,14 @@ namespace cuopt::linear_programming::detail { -static constexpr int TPB_resetmoves = raft::WarpSize * 4; -static constexpr int TPB_heavyvars = raft::WarpSize * 16; -static constexpr int TPB_heavycstrs = raft::WarpSize * 4; -static constexpr int TPB_localmin = raft::WarpSize * 4; -static constexpr int TPB_setval = raft::WarpSize * 16; -static constexpr int TPB_liftmoves = raft::WarpSize * 4; -static constexpr int TPB_loadbalance = raft::WarpSize * 4; +static constexpr int TPB_resetmoves = raft::WarpSize * 4; +static constexpr int TPB_heavyvars = raft::WarpSize * 16; +static constexpr int TPB_heavycstrs = raft::WarpSize * 4; +static constexpr int TPB_localmin = raft::WarpSize * 4; +static constexpr int TPB_setval = raft::WarpSize * 16; +static constexpr int TPB_update_changed_constraints = raft::WarpSize * 4; +static constexpr int TPB_liftmoves = raft::WarpSize * 4; +static constexpr int TPB_loadbalance = raft::WarpSize * 4; struct fj_hyper_parameters_t { // The number of moves to evaluate, if there are many positive-score @@ -253,6 +254,7 @@ class fj_t { // kernel launch dimensions, computed once inside the constructor std::pair setval_launch_dims; + std::pair update_changed_constraints_launch_dims; std::pair resetmoves_launch_dims; std::pair resetmoves_bin_launch_dims; std::pair update_weights_launch_dims; diff --git a/cpp/src/mip/local_search/feasibility_pump/feasibility_pump.cu b/cpp/src/mip/local_search/feasibility_pump/feasibility_pump.cu index f8b7d34fc5..d291efa4cf 100644 --- a/cpp/src/mip/local_search/feasibility_pump/feasibility_pump.cu +++ b/cpp/src/mip/local_search/feasibility_pump/feasibility_pump.cu @@ -59,8 +59,6 @@ feasibility_pump_t::feasibility_pump_t( context.problem_ptr->handle_ptr->get_stream()), orig_variable_types(context.problem_ptr->n_variables, context.problem_ptr->handle_ptr->get_stream()), - best_excess_solution(context.problem_ptr->n_variables, - context.problem_ptr->handle_ptr->get_stream()), lp_optimal_solution(lp_optimal_solution_), rng(cuopt::seed_generator::get_seed()), timer(20.) @@ -257,9 +255,16 @@ bool feasibility_pump_t::round(solution_t& solution) { bool result; CUOPT_LOG_DEBUG("Rounding the point"); - timer_t bounds_prop_timer(std::min(2., timer.remaining_time())); - const f_t lp_run_time_after_feasible = std::min(3., timer.remaining_time() / 20.); + timer_t bounds_prop_timer(std::min(0.5, timer.remaining_time())); + const f_t lp_run_time_after_feasible = 0.; + bool old_var = constraint_prop.round_all_vars; + f_t old_time = constraint_prop.max_time_for_bounds_prop; + constraint_prop.round_all_vars = false; + constraint_prop.max_time_for_bounds_prop = 0.7; result = constraint_prop.apply_round(solution, lp_run_time_after_feasible, bounds_prop_timer); + constraint_prop.round_all_vars = old_var; + constraint_prop.max_time_for_bounds_prop = old_time; + // result = solution.round_nearest(); cuopt_func_call(solution.test_variable_bounds(true)); // copy the last rounding raft::copy(last_rounding.data(), @@ -389,7 +394,6 @@ void feasibility_pump_t::resize_vectors(problem_t& problem, { last_rounding.resize(problem.n_variables, handle_ptr->get_stream()); last_projection.resize(problem.n_variables, handle_ptr->get_stream()); - best_excess_solution.resize(problem.n_variables, handle_ptr->get_stream()); } template @@ -419,20 +423,6 @@ bool feasibility_pump_t::check_distance_cycle(solution_t& so return is_cycle; } -template -void feasibility_pump_t::save_best_excess_solution(solution_t& solution) -{ - f_t sol_excess = solution.get_total_excess(); - if (sol_excess < best_excess) { - CUOPT_LOG_DEBUG("FP: updating excess from %f to %f", best_excess, sol_excess); - best_excess = sol_excess; - raft::copy(best_excess_solution.data(), - solution.assignment.data(), - solution.assignment.size(), - solution.handle_ptr->get_stream()); - } -} - template void feasibility_pump_t::relax_general_integers(solution_t& solution) { @@ -512,7 +502,6 @@ bool feasibility_pump_t::run_single_fp_descent(solution_t& s is_cycle = check_distance_cycle(solution); if (is_cycle) { is_feasible = round(solution); - save_best_excess_solution(solution); cuopt_func_call(solution.test_variable_bounds(true)); if (is_feasible) { bool res = solution.compute_feasibility(); @@ -530,7 +519,6 @@ bool feasibility_pump_t::run_single_fp_descent(solution_t& s if (n_integers == solution.problem_ptr->n_integer_vars) { if (is_feasible) { CUOPT_LOG_DEBUG("Feasible solution found after LP with relative tolerance"); - save_best_excess_solution(solution); return true; } // if the solution is almost on polytope @@ -550,8 +538,7 @@ bool feasibility_pump_t::run_single_fp_descent(solution_t& s is_feasible = solution.get_feasible(); n_integers = solution.compute_number_of_integers(); if (is_feasible && n_integers == solution.problem_ptr->n_integer_vars) { - CUOPT_LOG_DEBUG("Feasible solution verified with lower precision!"); - save_best_excess_solution(solution); + CUOPT_LOG_DEBUG("Feasible solution verified with LP!"); return true; } } @@ -564,7 +551,6 @@ bool feasibility_pump_t::run_single_fp_descent(solution_t& s const f_t time_ratio = 0.2; is_feasible = test_fj_feasible(solution, time_ratio * proj_and_round_time); } - save_best_excess_solution(solution); if (timer.check_time_limit()) { CUOPT_LOG_DEBUG("FP time limit reached!"); return false; diff --git a/cpp/src/mip/local_search/feasibility_pump/feasibility_pump.cuh b/cpp/src/mip/local_search/feasibility_pump/feasibility_pump.cuh index 2013e80f51..1806573f0a 100644 --- a/cpp/src/mip/local_search/feasibility_pump/feasibility_pump.cuh +++ b/cpp/src/mip/local_search/feasibility_pump/feasibility_pump.cuh @@ -137,7 +137,6 @@ class feasibility_pump_t { bool check_distance_cycle(solution_t& solution); void reset(); void resize_vectors(problem_t& problem, const raft::handle_t* handle_ptr); - void save_best_excess_solution(solution_t& solution); bool random_round_with_fj(solution_t& solution, timer_t& round_timer); bool round_multiple_points(solution_t& solution); void relax_general_integers(solution_t& solution); @@ -156,7 +155,6 @@ class feasibility_pump_t { rmm::device_uvector last_projection; rmm::device_uvector orig_variable_types; f_t best_excess; - rmm::device_uvector best_excess_solution; rmm::device_uvector& lp_optimal_solution; std::mt19937 rng; std::deque last_distances; diff --git a/cpp/src/mip/local_search/local_search.cu b/cpp/src/mip/local_search/local_search.cu index ad444568fd..f0497d3827 100644 --- a/cpp/src/mip/local_search/local_search.cu +++ b/cpp/src/mip/local_search/local_search.cu @@ -105,21 +105,17 @@ bool local_search_t::run_local_search(solution_t& solution, fj_settings.feasibility_run = false; fj.set_fj_settings(fj_settings); bool is_feas = false; - ls_method_t rd = static_cast(std::uniform_int_distribution( - static_cast(ls_method_t::FJ_LINE_SEGMENT), static_cast(ls_method_t::FP_SEARCH))(rng)); + ls_method_t rd = static_cast( + std::uniform_int_distribution(static_cast(ls_method_t::FJ_ANNEALING), + static_cast(ls_method_t::FJ_LINE_SEGMENT))(rng)); if (ls_config.ls_method == ls_method_t::FJ_LINE_SEGMENT) { rd = ls_method_t::FJ_LINE_SEGMENT; } else if (ls_config.ls_method == ls_method_t::FJ_ANNEALING) { rd = ls_method_t::FJ_ANNEALING; - } else if (ls_config.ls_method == ls_method_t::FP_SEARCH) { - rd = ls_method_t::FP_SEARCH; } if (rd == ls_method_t::FJ_LINE_SEGMENT && lp_optimal_exists) { fj.copy_weights(weights, solution.handle_ptr); is_feas = run_fj_line_segment(solution, timer, ls_config); - } else if (rd == ls_method_t::FP_SEARCH) { - timer = timer_t(std::min(3., timer.remaining_time())); - is_feas = run_fp(solution, timer, &weights, false); } else { fj.copy_weights(weights, solution.handle_ptr); is_feas = run_fj_annealing(solution, timer, ls_config); @@ -201,6 +197,7 @@ bool local_search_t::check_fj_on_lp_optimal(solution_t& solu lp_optimal_solution.data(), solution.assignment.size(), solution.handle_ptr->get_stream()); + cuopt_func_call(solution.test_variable_bounds(false)); } if (perturb) { CUOPT_LOG_DEBUG("Perturbating solution on initial fj on optimal run!"); @@ -252,8 +249,9 @@ bool local_search_t::run_fj_on_zero(solution_t& solution, ti template bool local_search_t::run_staged_fp(solution_t& solution, timer_t timer, - bool& early_exit) + population_t* population_ptr) { + cuopt_assert(population_ptr != nullptr, "Population pointer must not be null"); auto n_vars = solution.problem_ptr->n_variables; auto n_binary_vars = solution.problem_ptr->get_n_binary_variables(); auto n_integer_vars = solution.problem_ptr->n_integer_vars; @@ -262,22 +260,27 @@ bool local_search_t::run_staged_fp(solution_t& solution, auto integer_only = (n_binary_vars == 0); bool is_feasible = false; - // TODO return the best solution instead of the last if (binary_only || integer_only) { - return run_fp(solution, timer); + return run_fp(solution, timer, population_ptr); } else { const i_t n_fp_iterations = 1000000; fp.cycle_queue.reset(solution); fp.reset(); fp.resize_vectors(*solution.problem_ptr, solution.handle_ptr); for (i_t i = 0; i < n_fp_iterations && !timer.check_time_limit(); ++i) { - if (early_exit) { return false; } + if (population_ptr->preempt_heuristic_solver_.load()) { + CUOPT_LOG_DEBUG("Preempting heuristic solver!"); + return false; + } CUOPT_LOG_DEBUG("Running staged FP from beginning it %d", i); fp.relax_general_integers(solution); timer_t binary_timer(timer.remaining_time() / 3); i_t binary_it_counter = 0; for (; binary_it_counter < 100; ++binary_it_counter) { - if (early_exit) { return false; } + if (population_ptr->preempt_heuristic_solver_.load()) { + CUOPT_LOG_DEBUG("Preempting heuristic solver!"); + return false; + } CUOPT_LOG_DEBUG( "Running binary problem from it %d large_restart_it %d", binary_it_counter, i); is_feasible = fp.run_single_fp_descent(solution); @@ -331,83 +334,134 @@ void local_search_t::resize_vectors(problem_t& problem, } template -void save_best_fp_solution(solution_t& solution, - rmm::device_uvector& best_solution, - f_t& best_objective, - bool feasibility_run) +void local_search_t::save_solution_and_add_cutting_plane( + solution_t& solution, rmm::device_uvector& best_solution, f_t& best_objective) { - if (feasibility_run || solution.get_objective() < best_objective) { - CUOPT_LOG_DEBUG("Found better feasible in FP with obj %f. Continue with FJ!", - solution.get_objective()); - best_objective = solution.get_objective(); + if (solution.get_objective() < best_objective) { raft::copy(best_solution.data(), solution.assignment.data(), solution.assignment.size(), solution.handle_ptr->get_stream()); - solution.problem_ptr->add_cutting_plane_at_objective(solution.get_objective() - - OBJECTIVE_EPSILON); + best_objective = solution.get_objective(); + f_t objective_cut = + best_objective - std::max(std::abs(0.001 * best_objective), OBJECTIVE_EPSILON); + problem_with_objective_cut.add_cutting_plane_at_objective(objective_cut); } } template -void local_search_t::save_solution_and_add_cutting_plane( - solution_t& solution, rmm::device_uvector& best_solution, f_t& best_objective) +void local_search_t::resize_to_new_problem() { - if (solution.get_objective() < best_objective) { - raft::copy(best_solution.data(), - solution.assignment.data(), - solution.assignment.size(), - solution.handle_ptr->get_stream()); - best_objective = solution.get_objective(); - solution.problem_ptr->add_cutting_plane_at_objective(solution.get_objective() - - OBJECTIVE_EPSILON); + resize_vectors(problem_with_objective_cut, problem_with_objective_cut.handle_ptr); + // hint for next PR in case load balanced is reintroduced + // lb_constraint_prop.temp_problem.setup(problem_with_objective_cut); + // lb_constraint_prop.bounds_update.setup(lb_constraint_prop.temp_problem); + constraint_prop.bounds_update.resize(problem_with_objective_cut); +} + +template +void local_search_t::resize_to_old_problem(problem_t* old_problem_ptr) +{ + resize_vectors(*old_problem_ptr, old_problem_ptr->handle_ptr); + // hint for next PR in case load balanced is reintroduced + // lb_constraint_prop.temp_problem.setup(*old_problem_ptr); + // lb_constraint_prop.bounds_update.setup(lb_constraint_prop.temp_problem); + constraint_prop.bounds_update.resize(*old_problem_ptr); +} + +template +void local_search_t::reset_alpha_and_run_recombiners( + solution_t& solution, + problem_t* old_problem_ptr, + population_t* population_ptr, + i_t i, + i_t last_unimproved_iteration, + rmm::device_uvector& best_solution, + f_t& best_objective) +{ + fp.config.alpha = default_alpha; + solution_t solution_copy(solution); + solution_copy.problem_ptr = old_problem_ptr; + solution_copy.resize_to_problem(); + population_ptr->add_solution(std::move(solution_copy)); + constexpr i_t iterations_for_stagnation = 3; + if (population_ptr->current_size() > 1 && + i - last_unimproved_iteration > iterations_for_stagnation) { + solution_t best_feasible_copy(population_ptr->best_feasible()); + population_ptr->run_all_recombiners(best_feasible_copy); + } + auto new_sol_vector = population_ptr->get_external_solutions(); + population_ptr->add_solutions_from_vec(std::move(new_sol_vector)); + if (!cutting_plane_added_for_active_run) { + fj.copy_weights( + population_ptr->weights, solution.handle_ptr, problem_with_objective_cut.n_constraints); + solution.problem_ptr = &problem_with_objective_cut; + solution.resize_to_problem(); + resize_to_new_problem(); + cutting_plane_added_for_active_run = true; } + save_solution_and_add_cutting_plane( + population_ptr->best_feasible(), best_solution, best_objective); } template bool local_search_t::run_fp(solution_t& solution, timer_t timer, - const weight_t* weights, - bool feasibility_run) + population_t* population_ptr) { - const i_t n_fp_iterations = 1000000; - bool is_feasible = solution.compute_feasibility(); - double best_objective = solution.get_objective(); + cuopt_assert(population_ptr != nullptr, "Population pointer must not be null"); + const i_t n_fp_iterations = 1000000; + constexpr i_t n_sol_in_population_for_exit = 4; + bool is_feasible = solution.compute_feasibility(); + cutting_plane_added_for_active_run = is_feasible; + double best_objective = + is_feasible ? solution.get_objective() : std::numeric_limits::max(); rmm::device_uvector best_solution(solution.assignment, solution.handle_ptr->get_stream()); problem_t* old_problem_ptr = solution.problem_ptr; fp.timer = timer_t(timer.remaining_time()); - if (!feasibility_run) { - // if it has not been initialized yet, create a new problem and move it to the cut problem - if (!problem_with_objective_cut.cutting_plane_added) { - problem_with_objective_cut = std::move(problem_t(*old_problem_ptr)); - } - problem_with_objective_cut.add_cutting_plane_at_objective(solution.get_objective() - - OBJECTIVE_EPSILON); + // if it has not been initialized yet, create a new problem and move it to the cut problem + if (!problem_with_objective_cut.cutting_plane_added) { + problem_with_objective_cut = std::move(problem_t(*old_problem_ptr)); + } + if (is_feasible) { + f_t objective_cut = + best_objective - std::max(std::abs(0.001 * best_objective), OBJECTIVE_EPSILON); + problem_with_objective_cut.add_cutting_plane_at_objective(objective_cut); // Do the copy here for proper handling of the added constraints weight - fj.copy_weights(*weights, solution.handle_ptr, problem_with_objective_cut.n_constraints); + fj.copy_weights( + population_ptr->weights, solution.handle_ptr, problem_with_objective_cut.n_constraints); solution.problem_ptr = &problem_with_objective_cut; solution.resize_to_problem(); - resize_vectors(problem_with_objective_cut, solution.handle_ptr); - constraint_prop.bounds_update.resize(problem_with_objective_cut); + resize_to_new_problem(); } + i_t last_unimproved_iteration = 0; for (i_t i = 0; i < n_fp_iterations && !timer.check_time_limit(); ++i) { if (timer.check_time_limit()) { is_feasible = false; break; } CUOPT_LOG_DEBUG("fp_loop it %d", i); + if (population_ptr->preempt_heuristic_solver_.load()) { + CUOPT_LOG_DEBUG("Preempting heuristic solver!"); + break; + } is_feasible = fp.run_single_fp_descent(solution); + if (population_ptr->preempt_heuristic_solver_.load()) { + CUOPT_LOG_DEBUG("Preempting heuristic solver!"); + break; + } // if feasible return true if (is_feasible) { - if (feasibility_run) { - is_feasible = true; - break; - } else { - CUOPT_LOG_DEBUG("Found feasible in FP with obj %f. Continue with FJ!", - solution.get_objective()); - save_solution_and_add_cutting_plane(solution, best_solution, best_objective); - fp.config.alpha = default_alpha; - } + CUOPT_LOG_DEBUG("Found feasible in FP with obj %f. Continue with FJ!", + solution.get_objective()); + reset_alpha_and_run_recombiners(solution, + old_problem_ptr, + population_ptr, + i, + last_unimproved_iteration, + best_solution, + best_objective); + if (population_ptr->current_size() >= n_sol_in_population_for_exit) { break; } } // if not feasible, it means it is a cycle else { @@ -416,41 +470,45 @@ bool local_search_t::run_fp(solution_t& solution, break; } is_feasible = fp.restart_fp(solution); + if (population_ptr->preempt_heuristic_solver_.load()) { + CUOPT_LOG_DEBUG("Preempting heuristic solver!"); + break; + } if (is_feasible) { - if (feasibility_run) { - is_feasible = true; - break; - } else { - CUOPT_LOG_DEBUG("Found feasible in FP with obj %f. Continue with FJ!", - solution.get_objective()); - save_solution_and_add_cutting_plane(solution, best_solution, best_objective); - fp.config.alpha = default_alpha; - } + CUOPT_LOG_DEBUG("Found feasible during restart with obj %f. Continue with FJ!", + solution.get_objective()); + reset_alpha_and_run_recombiners(solution, + old_problem_ptr, + population_ptr, + i, + last_unimproved_iteration, + best_solution, + best_objective); + if (population_ptr->current_size() >= n_sol_in_population_for_exit) { break; } + } else { + last_unimproved_iteration = i; } } } - if (!feasibility_run) { - raft::copy(solution.assignment.data(), - best_solution.data(), - solution.assignment.size(), - solution.handle_ptr->get_stream()); - solution.problem_ptr = old_problem_ptr; - solution.resize_to_problem(); - resize_vectors(*old_problem_ptr, solution.handle_ptr); - constraint_prop.bounds_update.resize(*old_problem_ptr); - solution.handle_ptr->sync_stream(); - } + raft::copy(solution.assignment.data(), + best_solution.data(), + solution.assignment.size(), + solution.handle_ptr->get_stream()); + solution.problem_ptr = old_problem_ptr; + solution.resize_to_problem(); + resize_to_old_problem(old_problem_ptr); + solution.handle_ptr->sync_stream(); return is_feasible; } template bool local_search_t::generate_solution(solution_t& solution, bool perturb, - bool& early_exit, + population_t* population_ptr, f_t time_limit) { raft::common::nvtx::range fun_scope("LS FP Loop"); - + cuopt_assert(population_ptr != nullptr, "Population pointer must not be null"); timer_t timer(time_limit); auto n_vars = solution.problem_ptr->n_variables; auto n_binary_vars = solution.problem_ptr->get_n_binary_variables(); @@ -460,6 +518,10 @@ bool local_search_t::generate_solution(solution_t& solution, CUOPT_LOG_DEBUG("Solution generated with FJ on LP optimal: is_feasible %d", is_feasible); return true; } + if (population_ptr->preempt_heuristic_solver_.load()) { + CUOPT_LOG_DEBUG("Preempting heuristic solver!"); + return is_feasible; + } if (!perturb) { raft::copy(fj_sol_on_lp_opt.data(), solution.assignment.data(), @@ -476,12 +538,16 @@ bool local_search_t::generate_solution(solution_t& solution, solution.assignment.size(), solution.handle_ptr->get_stream()); } + if (population_ptr->preempt_heuristic_solver_.load()) { + CUOPT_LOG_DEBUG("Preempting heuristic solver!"); + return is_feasible; + } fp.timer = timer; // continue with the solution with fj on lp optimal fp.cycle_queue.reset(solution); fp.reset(); fp.resize_vectors(*solution.problem_ptr, solution.handle_ptr); - is_feasible = run_staged_fp(solution, timer, early_exit); + is_feasible = run_staged_fp(solution, timer, population_ptr); // is_feasible = run_fp(solution, timer); CUOPT_LOG_DEBUG("Solution generated with FP: is_feasible %d", is_feasible); return is_feasible; diff --git a/cpp/src/mip/local_search/local_search.cuh b/cpp/src/mip/local_search/local_search.cuh index bb95a8dc55..d878b4b558 100644 --- a/cpp/src/mip/local_search/local_search.cuh +++ b/cpp/src/mip/local_search/local_search.cuh @@ -17,6 +17,7 @@ #pragma once +#include #include #include #include @@ -29,7 +30,6 @@ namespace cuopt::linear_programming::detail { enum class ls_method_t : int { FJ_ANNEALING = 0, FJ_LINE_SEGMENT, - FP_SEARCH, RANDOM, LS_METHODS_SIZE = RANDOM }; @@ -55,7 +55,7 @@ class local_search_t { void generate_fast_solution(solution_t& solution, timer_t timer); bool generate_solution(solution_t& solution, bool perturb, - bool& early_exit, + population_t* population_ptr, f_t time_limit = 300.); bool run_fj_until_timer(solution_t& solution, const weight_t& weights, @@ -72,15 +72,25 @@ class local_search_t { const ls_config_t& ls_config); bool run_fj_on_zero(solution_t& solution, timer_t timer); bool check_fj_on_lp_optimal(solution_t& solution, bool perturb, timer_t timer); - bool run_staged_fp(solution_t& solution, timer_t timer, bool& early_exit); + bool run_staged_fp(solution_t& solution, + timer_t timer, + population_t* population_ptr); bool run_fp(solution_t& solution, timer_t timer, - const weight_t* weights = nullptr, - bool feasibility_run = true); + population_t* population_ptr = nullptr); void resize_vectors(problem_t& problem, const raft::handle_t* handle_ptr); void save_solution_and_add_cutting_plane(solution_t& solution, rmm::device_uvector& best_solution, f_t& best_objective); + void resize_to_new_problem(); + void resize_to_old_problem(problem_t* old_problem_ptr); + void reset_alpha_and_run_recombiners(solution_t& solution, + problem_t* old_problem_ptr, + population_t* population_ptr, + i_t i, + i_t last_unimproved_iteration, + rmm::device_uvector& best_solution, + f_t& best_objective); mip_solver_context_t& context; rmm::device_uvector& lp_optimal_solution; @@ -93,6 +103,7 @@ class local_search_t { feasibility_pump_t fp; std::mt19937 rng; problem_t problem_with_objective_cut; + bool cutting_plane_added_for_active_run{false}; }; } // namespace cuopt::linear_programming::detail diff --git a/cpp/src/mip/local_search/rounding/constraint_prop.cu b/cpp/src/mip/local_search/rounding/constraint_prop.cu index 61e8e08675..4dfd1b216b 100644 --- a/cpp/src/mip/local_search/rounding/constraint_prop.cu +++ b/cpp/src/mip/local_search/rounding/constraint_prop.cu @@ -17,6 +17,7 @@ #include #include +#include #include #include #include "constraint_prop.cuh" @@ -331,21 +332,16 @@ template struct find_unset_int_t { // This functor should be called only on integer variables f_t eps; - raft::device_span var_lb; - raft::device_span var_ub; raft::device_span assignment; - find_unset_int_t(f_t eps_, - raft::device_span lb_, - raft::device_span ub_, - raft::device_span assignment_) - : eps(eps_), var_lb(lb_), var_ub(ub_), assignment(assignment_) + find_unset_int_t(f_t eps_, raft::device_span assignment_) + : eps(eps_), assignment(assignment_) { } HDI bool operator()(i_t idx) { auto var_val = assignment[idx]; - bool is_set = is_integer(var_val); + bool is_set = is_integer(var_val, eps); return !is_set; } }; @@ -826,6 +822,21 @@ bool constraint_prop_t::run_repair_procedure(problem_t& prob return true; } +template +void constraint_prop_t::find_unset_integer_vars(solution_t& sol, + rmm::device_uvector& unset_vars) +{ + unset_vars.resize(sol.problem_ptr->n_integer_vars, sol.handle_ptr->get_stream()); + auto iter = + thrust::copy_if(sol.handle_ptr->get_thrust_policy(), + sol.problem_ptr->integer_indices.begin(), + sol.problem_ptr->integer_indices.end(), + unset_vars.begin(), + find_unset_int_t{sol.problem_ptr->tolerances.integrality_tolerance, + make_span(sol.assignment)}); + unset_vars.resize(iter - unset_vars.begin(), sol.handle_ptr->get_stream()); +} + template bool constraint_prop_t::is_problem_ii(problem_t& problem) { @@ -865,10 +876,35 @@ bool constraint_prop_t::find_integer( cuopt_func_call(orig_sol.test_variable_bounds()); return orig_sol.compute_feasibility(); } - raft::copy(unset_integer_vars.data(), - sol.problem_ptr->integer_indices.data(), - sol.problem_ptr->n_integer_vars, - sol.handle_ptr->get_stream()); + if (round_all_vars) { + raft::copy(unset_integer_vars.data(), + sol.problem_ptr->integer_indices.data(), + sol.problem_ptr->n_integer_vars, + sol.handle_ptr->get_stream()); + } else { + find_unset_integer_vars(sol, unset_integer_vars); + sort_by_frac(sol, make_span(unset_integer_vars)); + // round first unset_integer_vars.size() - 50, leave last 50 to be rounded by the algo + i_t n_to_round = std::max(unset_integer_vars.size() - 50, 0lu); + if (n_to_round > 0) { + thrust::for_each( + sol.handle_ptr->get_thrust_policy(), + unset_integer_vars.begin(), + unset_integer_vars.begin() + n_to_round, + [sol = sol.view(), seed = cuopt::seed_generator::get_seed()] __device__(i_t var_idx) { + raft::random::PCGenerator rng(seed, var_idx, 0); + auto var_bnd = sol.problem.variable_bounds[var_idx]; + sol.assignment[var_idx] = round_nearest(sol.assignment[var_idx], + get_lower(var_bnd), + get_upper(var_bnd), + sol.problem.tolerances.integrality_tolerance, + rng); + }); + find_unset_integer_vars(sol, unset_integer_vars); + } + set_bounds_on_fixed_vars(sol); + } + CUOPT_LOG_DEBUG("Bounds propagation rounding: unset vars %lu", unset_integer_vars.size()); if (unset_integer_vars.size() == 0) { CUOPT_LOG_ERROR("No integer variables provided in the bounds prop rounding"); @@ -1009,7 +1045,7 @@ bool constraint_prop_t::find_integer( // if the constraint is not ii, run LP if ((multi_probe.infeas_constraints_count_0 == 0 || multi_probe.infeas_constraints_count_1 == 0) && - !timeout_happened) { + !timeout_happened && lp_run_time_after_feasible > 0) { relaxed_lp_settings_t lp_settings; lp_settings.time_limit = lp_run_time_after_feasible; lp_settings.tolerance = orig_sol.problem_ptr->tolerances.absolute_tolerance; @@ -1034,10 +1070,7 @@ bool constraint_prop_t::apply_round( std::optional>> probing_config) { raft::common::nvtx::range fun_scope("constraint prop round"); - - // this is second timer that can continue but without recovery mode - const f_t max_time_for_bounds_prop = 5.; - max_timer = timer_t{max_time_for_bounds_prop}; + max_timer = timer_t{max_time_for_bounds_prop}; if (check_brute_force_rounding(sol)) { return true; } recovery_mode = false; rounding_ii = false; diff --git a/cpp/src/mip/local_search/rounding/constraint_prop.cuh b/cpp/src/mip/local_search/rounding/constraint_prop.cuh index 3b01da2749..591f25f36b 100644 --- a/cpp/src/mip/local_search/rounding/constraint_prop.cuh +++ b/cpp/src/mip/local_search/rounding/constraint_prop.cuh @@ -69,7 +69,7 @@ struct constraint_prop_t { std::optional>> probing_config = std::nullopt); void find_set_integer_vars(solution_t& sol, rmm::device_uvector& set_vars); - void find_unset_integer_vars(solution_t& sol, rmm::device_uvector& set_vars); + void find_unset_integer_vars(solution_t& sol, rmm::device_uvector& unset_vars); thrust::pair generate_double_probing_pair( const solution_t& sol, const solution_t& orig_sol, @@ -162,6 +162,9 @@ struct constraint_prop_t { bool use_probing_cache = true; static repair_stats_t repair_stats; bool single_rounding_only = false; + bool round_all_vars = true; + // this is second timer that can continue but without recovery mode + f_t max_time_for_bounds_prop = 5.; }; } // namespace cuopt::linear_programming::detail diff --git a/cpp/src/mip/presolve/load_balanced_bounds_presolve_helpers.cuh b/cpp/src/mip/presolve/load_balanced_bounds_presolve_helpers.cuh index f3e5e2ee0e..078169f7c6 100644 --- a/cpp/src/mip/presolve/load_balanced_bounds_presolve_helpers.cuh +++ b/cpp/src/mip/presolve/load_balanced_bounds_presolve_helpers.cuh @@ -126,7 +126,7 @@ i_t create_heavy_item_block_segments(rmm::cuda_stream_view stream, thrust::for_each( rmm::exec_policy(stream), thrust::make_counting_iterator(0), - thrust::make_counting_iterator(item_block_segments.size()), + thrust::make_counting_iterator(item_block_segments.size() - 1), heavy_vertex_meta_t{ make_span(item_block_segments), make_span(vertex_id), make_span(pseudo_block_id)}); thrust::inclusive_scan(rmm::exec_policy(stream), diff --git a/cpp/src/mip/presolve/probing_cache.cu b/cpp/src/mip/presolve/probing_cache.cu index 7389ec206f..9e586a220e 100644 --- a/cpp/src/mip/presolve/probing_cache.cu +++ b/cpp/src/mip/presolve/probing_cache.cu @@ -478,7 +478,7 @@ void compute_probing_cache(bound_presolve_t& bound_presolve, bound_presolve.settings.time_limit = timer.remaining_time(); // Set the number of threads - const size_t max_threads = 10; + const size_t max_threads = 8; omp_set_num_threads(max_threads); // Create a vector of multi_probe_t objects diff --git a/cpp/src/mip/presolve/trivial_presolve.cuh b/cpp/src/mip/presolve/trivial_presolve.cuh index 803bfca800..d7fe142336 100644 --- a/cpp/src/mip/presolve/trivial_presolve.cuh +++ b/cpp/src/mip/presolve/trivial_presolve.cuh @@ -265,10 +265,10 @@ void update_from_csr(problem_t& pb) pb.n_constraints = updated_n_cnst; pb.n_variables = updated_n_vars; - CUOPT_LOG_INFO("After trivial presolve updated %d constraints %d variables. Objective offset %f", - updated_n_cnst, - updated_n_vars, - pb.presolve_data.objective_offset); + CUOPT_LOG_DEBUG("After trivial presolve #constraints %d #variables %d. Objective offset %f", + updated_n_cnst, + updated_n_vars, + pb.presolve_data.objective_offset); // check successive cnst in coo increases by atmost 1 // update csr offset pb.offsets.resize(pb.n_constraints + 1, handle_ptr->get_stream()); diff --git a/cpp/src/mip/problem/problem.cu b/cpp/src/mip/problem/problem.cu index 0e5cf510c4..e4ab0835de 100644 --- a/cpp/src/mip/problem/problem.cu +++ b/cpp/src/mip/problem/problem.cu @@ -1578,7 +1578,7 @@ void problem_t::compute_vars_with_objective_coeffs() template void problem_t::add_cutting_plane_at_objective(f_t objective) { - CUOPT_LOG_INFO("Adding cutting plane at objective %f", objective); + CUOPT_LOG_DEBUG("Adding cutting plane at objective %f", objective); if (cutting_plane_added) { // modify the RHS i_t last_constraint = n_constraints - 1; diff --git a/cpp/src/mip/relaxed_lp/lp_state.cuh b/cpp/src/mip/relaxed_lp/lp_state.cuh index 0961a537f8..3bfa00955c 100644 --- a/cpp/src/mip/relaxed_lp/lp_state.cuh +++ b/cpp/src/mip/relaxed_lp/lp_state.cuh @@ -31,14 +31,10 @@ class lp_state_t { lp_state_t(problem_t& problem, rmm::cuda_stream_view stream) : prev_primal(problem.n_variables, stream), prev_dual(problem.n_constraints, stream) { - thrust::fill(problem.handle_ptr->get_thrust_policy(), - prev_primal.data(), - prev_primal.data() + problem.n_variables, - 0); - thrust::fill(problem.handle_ptr->get_thrust_policy(), - prev_dual.data(), - prev_dual.data() + problem.n_constraints, - 0); + thrust::fill( + rmm::exec_policy(stream), prev_primal.data(), prev_primal.data() + problem.n_variables, 0); + thrust::fill( + rmm::exec_policy(stream), prev_dual.data(), prev_dual.data() + problem.n_constraints, 0); } lp_state_t(problem_t& problem) : lp_state_t(problem, problem.handle_ptr->get_stream()) diff --git a/cpp/src/mip/relaxed_lp/relaxed_lp.cu b/cpp/src/mip/relaxed_lp/relaxed_lp.cu index 790c50b179..d28ad7fbb3 100644 --- a/cpp/src/mip/relaxed_lp/relaxed_lp.cu +++ b/cpp/src/mip/relaxed_lp/relaxed_lp.cu @@ -56,10 +56,10 @@ optimization_problem_solution_t get_relaxed_lp_solution( pdlp_settings.tolerances.relative_dual_tolerance = settings.tolerance / 100.; pdlp_settings.time_limit = settings.time_limit; pdlp_settings.concurrent_halt = settings.concurrent_halt; - if (settings.return_first_feasible) { pdlp_settings.per_constraint_residual = true; } - pdlp_settings.first_primal_feasible = settings.return_first_feasible; + pdlp_settings.per_constraint_residual = settings.per_constraint_residual; + pdlp_settings.first_primal_feasible = settings.return_first_feasible; pdlp_solver_t lp_solver(op_problem, pdlp_settings); - if (settings.save_state) { + if (settings.has_initial_primal) { i_t prev_size = lp_state.prev_dual.size(); CUOPT_LOG_DEBUG( "setting initial primal solution of size %d dual size %d problem vars %d cstrs %d", diff --git a/cpp/src/mip/relaxed_lp/relaxed_lp.cuh b/cpp/src/mip/relaxed_lp/relaxed_lp.cuh index a5fe23adb8..0e44a1a555 100644 --- a/cpp/src/mip/relaxed_lp/relaxed_lp.cuh +++ b/cpp/src/mip/relaxed_lp/relaxed_lp.cuh @@ -32,7 +32,8 @@ struct relaxed_lp_settings_t { bool check_infeasibility = true; bool return_first_feasible = false; bool save_state = true; - bool per_constraint_residual = false; + bool per_constraint_residual = true; + bool has_initial_primal = true; std::atomic* concurrent_halt = nullptr; }; diff --git a/cpp/src/mip/solve.cu b/cpp/src/mip/solve.cu index cef0f3467b..75eafa8c2b 100644 --- a/cpp/src/mip/solve.cu +++ b/cpp/src/mip/solve.cu @@ -106,16 +106,15 @@ mip_solution_t run_mip(detail::problem_t& problem, "Size mismatch"); cuopt_assert(problem.original_problem_ptr->get_n_constraints() == scaled_problem.n_constraints, "Size mismatch"); - detail::pdhg_solver_t pdhg_solver(scaled_problem.handle_ptr, scaled_problem); detail::pdlp_initial_scaling_strategy_t scaling( scaled_problem.handle_ptr, scaled_problem, pdlp_hyper_params::default_l_inf_ruiz_iterations, (f_t)pdlp_hyper_params::default_alpha_pock_chambolle_rescaling, - pdhg_solver, scaled_problem.reverse_coefficients, scaled_problem.reverse_offsets, scaled_problem.reverse_constraints, + nullptr, running_mip); cuopt_func_call(auto saved_problem = scaled_problem); @@ -157,12 +156,13 @@ mip_solution_t solve_mip(optimization_problem_t& op_problem, { try { constexpr f_t max_time_limit = 1000000000; - const f_t time_limit = settings.time_limit == 0 ? max_time_limit : settings.time_limit; - if (settings.heuristics_only && time_limit == std::numeric_limits::max()) { - CUOPT_LOG_ERROR("Time limit cannot be infinity when heuristics only is set"); - cuopt_expects(false, - error_type_t::RuntimeError, - "Time limit cannot be infinity when heuristics only is set"); + f_t time_limit = + (settings.time_limit == 0 || settings.time_limit == std::numeric_limits::infinity()) + ? max_time_limit + : settings.time_limit; + if (settings.heuristics_only && (time_limit == std::numeric_limits::max() || + time_limit == std::numeric_limits::infinity())) { + time_limit = max_time_limit; } // Create log stream for file logging and add it to default logger diff --git a/cpp/src/mip/solver.cu b/cpp/src/mip/solver.cu index 5e27a65eaf..ea32724191 100644 --- a/cpp/src/mip/solver.cu +++ b/cpp/src/mip/solver.cu @@ -76,9 +76,11 @@ struct branch_and_bound_solution_helper_t { dm->population.add_external_solution(solution, objective); } - void set_simplex_solution(std::vector& solution, f_t objective) + void set_simplex_solution(std::vector& solution, + std::vector& dual_solution, + f_t objective) { - dm->set_simplex_solution(solution, objective); + dm->set_simplex_solution(solution, dual_solution, objective); } void preempt_heuristic_solver() { dm->population.preempt_heuristic_solver(); } @@ -187,7 +189,8 @@ solution_t mip_solver_t::run_solver() std::bind(&branch_and_bound_solution_helper_t::set_simplex_solution, &solution_helper, std::placeholders::_1, - std::placeholders::_2); + std::placeholders::_2, + std::placeholders::_3); // Create the branch and bound object branch_and_bound = std::make_unique>( diff --git a/cpp/tests/mip/bounds_standardization_test.cu b/cpp/tests/mip/bounds_standardization_test.cu index 77b4acfd7b..14aa271cd7 100644 --- a/cpp/tests/mip/bounds_standardization_test.cu +++ b/cpp/tests/mip/bounds_standardization_test.cu @@ -78,8 +78,9 @@ void test_bounds_standardization_test(std::string test_instance) mip_solver_settings_t default_settings{}; detail::relaxed_lp_settings_t lp_settings; - lp_settings.time_limit = 120.; - lp_settings.tolerance = default_settings.tolerances.absolute_tolerance; + lp_settings.time_limit = 120.; + lp_settings.tolerance = default_settings.tolerances.absolute_tolerance; + lp_settings.per_constraint_residual = false; // run the problem through pdlp auto result_1 = detail::get_relaxed_lp_solution(standardized_problem, solution_1, lp_settings); diff --git a/cpp/tests/mip/elim_var_remap_test.cu b/cpp/tests/mip/elim_var_remap_test.cu index c486d98c81..e6aa6ec17f 100644 --- a/cpp/tests/mip/elim_var_remap_test.cu +++ b/cpp/tests/mip/elim_var_remap_test.cu @@ -163,8 +163,9 @@ void test_elim_var_solution(std::string test_instance) detail::solution_t solution_1(standardized_problem); detail::relaxed_lp_settings_t lp_settings; - lp_settings.time_limit = 120.; - lp_settings.tolerance = default_settings.tolerances.absolute_tolerance; + lp_settings.time_limit = 120.; + lp_settings.tolerance = default_settings.tolerances.absolute_tolerance; + lp_settings.per_constraint_residual = false; // run the problem through pdlp auto result_1 = detail::get_relaxed_lp_solution(standardized_problem, solution_1, lp_settings); solution_1.compute_feasibility(); @@ -192,8 +193,9 @@ void test_elim_var_solution(std::string test_instance) detail::solution_t solution_2(sub_problem); detail::relaxed_lp_settings_t lp_settings_2; - lp_settings_2.time_limit = 120.; - lp_settings_2.tolerance = default_settings.tolerances.absolute_tolerance; + lp_settings_2.time_limit = 120.; + lp_settings_2.tolerance = default_settings.tolerances.absolute_tolerance; + lp_settings_2.per_constraint_residual = false; // run the problem through pdlp auto result_2 = detail::get_relaxed_lp_solution(sub_problem, solution_2, lp_settings_2); solution_2.compute_feasibility(); diff --git a/cpp/tests/mip/load_balancing_test.cu b/cpp/tests/mip/load_balancing_test.cu index fb0d8b6e86..6d4762d301 100644 --- a/cpp/tests/mip/load_balancing_test.cu +++ b/cpp/tests/mip/load_balancing_test.cu @@ -141,10 +141,10 @@ void test_multi_probe(std::string path) problem, 10, 1.0, - pdhg_solver, problem.reverse_coefficients, problem.reverse_offsets, problem.reverse_constraints, + nullptr, true); detail::mip_solver_t solver(problem, default_settings, scaling, cuopt::timer_t(0)); detail::load_balanced_problem_t lb_problem(problem); diff --git a/cpp/tests/mip/multi_probe_test.cu b/cpp/tests/mip/multi_probe_test.cu index 1473c84bff..10d6bc7bce 100644 --- a/cpp/tests/mip/multi_probe_test.cu +++ b/cpp/tests/mip/multi_probe_test.cu @@ -158,15 +158,14 @@ void test_multi_probe(std::string path) problem_checking_t::check_problem_representation(op_problem); detail::problem_t problem(op_problem); mip_solver_settings_t default_settings{}; - detail::pdhg_solver_t pdhg_solver(problem.handle_ptr, problem); detail::pdlp_initial_scaling_strategy_t scaling(&handle_, problem, 10, 1.0, - pdhg_solver, problem.reverse_coefficients, problem.reverse_offsets, problem.reverse_constraints, + nullptr, true); detail::mip_solver_t solver(problem, default_settings, scaling, cuopt::timer_t(0)); detail::bound_presolve_t bnd_prb_0(solver.context); diff --git a/docs/cuopt/source/lp-milp-settings.rst b/docs/cuopt/source/lp-milp-settings.rst index bb9541f4ab..b97f31f168 100644 --- a/docs/cuopt/source/lp-milp-settings.rst +++ b/docs/cuopt/source/lp-milp-settings.rst @@ -280,14 +280,14 @@ Absolute Tolerance ``CUOPT_MIP_ABSOLUTE_TOLERANCE`` controls the MIP absolute tolerance. -Note: the default value is ``1e-4``. +Note: the default value is ``1e-6``. Relative Tolerance ^^^^^^^^^^^^^^^^^^ ``CUOPT_MIP_RELATIVE_TOLERANCE`` controls the MIP relative tolerance. -Note: the default value is ``1e-6``. +Note: the default value is ``1e-12``. Integrality Tolerance From e3f90c5fa7b9a799313e6553576877e65d0176e3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Akif=20=C3=87=C3=96RD=C3=9CK?= Date: Fri, 19 Sep 2025 16:00:44 +0200 Subject: [PATCH 31/33] Fix bug in fixed_problem_computation (#403) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The issue was the problem check that some constraint values were going -inf and nan. This was caught by exceptions but it didn't crash the process as it is caught. We need more graceful handling of exceptions: #264 Authors: - Akif ÇÖRDÜK (https://github.com/akifcorduk) Approvers: - Alice Boucher (https://github.com/aliceb-nv) - Ramakrishnap (https://github.com/rgsl888prabhu) URL: https://github.com/NVIDIA/cuopt/pull/403 --- cpp/CMakeLists.txt | 5 ++--- cpp/src/mip/problem/problem.cu | 1 + 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index df4ad41465..45789ceddc 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -85,9 +85,8 @@ if(CMAKE_COMPILER_IS_GNUCXX) endif(CMAKE_COMPILER_IS_GNUCXX) # To use sanitizer with cuda runtime, one must follow a few steps: -# 1. Find libasan.so with "gcc -print-file-name=libasan.so" -# 2. Run the binary with env var set: LD_PRELOAD=$PATH_TO_LIBASAN ASAN_OPTIONS='protect_shadow_gap=0:replace_intrin=0' -# 3. (Optional) To run with a debugger (gdb or cuda-gdb) use the additional ASAN option alloc_dealloc_mismatch=0 +# 1. Run the binary with env var set: LD_PRELOAD="$(gcc -print-file-name=libasan.so)" ASAN_OPTIONS='protect_shadow_gap=0:replace_intrin=0' +# 2. (Optional) To run with a debugger (gdb or cuda-gdb) use the additional ASAN option alloc_dealloc_mismatch=0 if(BUILD_SANITIZER) list(APPEND CUOPT_CXX_FLAGS -fsanitize=address,undefined -fno-omit-frame-pointer -g -Wno-error=maybe-uninitialized) add_link_options(-fsanitize=address,undefined) diff --git a/cpp/src/mip/problem/problem.cu b/cpp/src/mip/problem/problem.cu index e4ab0835de..598534c61e 100644 --- a/cpp/src/mip/problem/problem.cu +++ b/cpp/src/mip/problem/problem.cu @@ -1278,6 +1278,7 @@ void problem_t::compute_integer_fixed_problem() return; } rmm::device_uvector assignment(n_variables, handle_ptr->get_stream()); + thrust::fill(handle_ptr->get_thrust_policy(), assignment.begin(), assignment.end(), 0.); integer_fixed_problem = std::make_shared>(get_problem_after_fixing_vars( assignment, integer_indices, integer_fixed_variable_map, handle_ptr)); integer_fixed_problem->check_problem_representation(true); From ae659b96eb1a682d253a8d93b38a87aeb763cc15 Mon Sep 17 00:00:00 2001 From: "Nicolas L. Guidotti" Date: Fri, 19 Sep 2025 22:03:53 +0200 Subject: [PATCH 32/33] Simple diving for Branch-and-Bound (#305) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR introduces the following changes: - Implements a simple diving procedure - Allows the branch-and-bound to switch between different search strategies: `BEST_FIRST`, `DEPTH_FIRST` and `MULTITHREADED_BEST_FIRST_WITH_DIVING` - Refactor the branch-and-bound code such that the `solve` function is now organized into separated methods - Moved some commonly used variables to be member variables in the branch-and-bound solver. Authors: - Nicolas L. Guidotti (https://github.com/nguidotti) - Ramakrishnap (https://github.com/rgsl888prabhu) - https://github.com/ahehn-nv Approvers: - Gil Forsyth (https://github.com/gforsyth) - Akif ÇÖRDÜK (https://github.com/akifcorduk) - Trevor McKay (https://github.com/tmckayus) - Chris Maes (https://github.com/chris-maes) URL: https://github.com/NVIDIA/cuopt/pull/305 --- .../linear_programming/cuopt/run_mip.cpp | 2 +- cpp/src/dual_simplex/branch_and_bound.cpp | 1150 ++++++++++------- cpp/src/dual_simplex/branch_and_bound.hpp | 119 +- cpp/src/dual_simplex/mip_node.hpp | 11 +- cpp/src/dual_simplex/pseudo_costs.hpp | 8 + cpp/src/mip/diversity/recombiners/sub_mip.cuh | 2 + cpp/tests/dual_simplex/unit_tests/solve.cpp | 1 + cpp/tests/mip/miplib_test.cu | 13 +- 8 files changed, 792 insertions(+), 514 deletions(-) diff --git a/benchmarks/linear_programming/cuopt/run_mip.cpp b/benchmarks/linear_programming/cuopt/run_mip.cpp index f6b30e72ce..64b1bb4bb1 100644 --- a/benchmarks/linear_programming/cuopt/run_mip.cpp +++ b/benchmarks/linear_programming/cuopt/run_mip.cpp @@ -17,6 +17,7 @@ #include "initial_solution_reader.hpp" #include "mip_test_instances.hpp" +#include #include #include #include @@ -24,7 +25,6 @@ #include #include -#include #include #include diff --git a/cpp/src/dual_simplex/branch_and_bound.cpp b/cpp/src/dual_simplex/branch_and_bound.cpp index 2986de0184..3d41921054 100644 --- a/cpp/src/dual_simplex/branch_and_bound.cpp +++ b/cpp/src/dual_simplex/branch_and_bound.cpp @@ -15,6 +15,7 @@ * limitations under the License. */ +#include #include #include @@ -30,8 +31,10 @@ #include #include +#include +#include #include -#include +#include namespace cuopt::linear_programming::dual_simplex { @@ -123,7 +126,7 @@ bool check_guess(const lp_problem_t& original_lp, } template -void set_uninitialized_steepest_edge_norms(i_t n, std::vector& edge_norms) +void set_uninitialized_steepest_edge_norms(std::vector& edge_norms) { for (i_t j = 0; j < edge_norms.size(); ++j) { if (edge_norms[j] <= 0.0) { edge_norms[j] = 1e-4; } @@ -184,17 +187,6 @@ dual::status_t convert_lp_status_to_dual_status(lp_status_t status) } } -} // namespace - -template -f_t branch_and_bound_t::get_upper_bound() -{ - mutex_upper.lock(); - const f_t upper_bound = upper_bound_; - mutex_upper.unlock(); - return upper_bound; -} - template f_t sgn(f_t x) { @@ -207,7 +199,8 @@ f_t relative_gap(f_t obj_value, f_t lower_bound) f_t user_mip_gap = obj_value == 0.0 ? (lower_bound == 0.0 ? 0.0 : std::numeric_limits::infinity()) : std::abs(obj_value - lower_bound) / std::abs(obj_value); - if (user_mip_gap != user_mip_gap) { return std::numeric_limits::infinity(); } + // Handle NaNs (i.e., NaN != NaN) + if (std::isnan(user_mip_gap)) { return std::numeric_limits::infinity(); } return user_mip_gap; } @@ -225,33 +218,81 @@ std::string user_mip_gap(f_t obj_value, f_t lower_bound) } } +} // namespace + +template +branch_and_bound_t::branch_and_bound_t( + const user_problem_t& user_problem, + const simplex_solver_settings_t& solver_settings) + : original_problem_(user_problem), + settings_(solver_settings), + original_lp_(1, 1, 1), + incumbent_(1), + root_relax_soln_(1, 1), + pc_(1) +{ + stats_.start_time = tic(); + convert_user_problem(original_problem_, settings_, original_lp_, new_slacks_); + full_variable_types(original_problem_, original_lp_, var_types_); + + mutex_upper_.lock(); + upper_bound_ = inf; + mutex_upper_.unlock(); + + mutex_lower_.lock(); + lower_bound_ = -inf; + mutex_lower_.unlock(); + + mutex_branching_.lock(); + currently_branching_ = false; + mutex_branching_.unlock(); +} + +template +f_t branch_and_bound_t::get_upper_bound() +{ + mutex_upper_.lock(); + const f_t upper_bound = upper_bound_; + mutex_upper_.unlock(); + return upper_bound; +} + +template +f_t branch_and_bound_t::get_lower_bound() +{ + mutex_lower_.lock(); + const f_t lower_bound = lower_bound_; + mutex_lower_.unlock(); + return lower_bound; +} + template void branch_and_bound_t::set_new_solution(const std::vector& solution) { - if (solution.size() != original_problem.num_cols) { - settings.log.printf( - "Solution size mismatch %ld %d\n", solution.size(), original_problem.num_cols); + if (solution.size() != original_problem_.num_cols) { + settings_.log.printf( + "Solution size mismatch %ld %d\n", solution.size(), original_problem_.num_cols); } std::vector crushed_solution; crush_primal_solution( - original_problem, original_lp, solution, new_slacks, crushed_solution); - f_t obj = compute_objective(original_lp, crushed_solution); + original_problem_, original_lp_, solution, new_slacks_, crushed_solution); + f_t obj = compute_objective(original_lp_, crushed_solution); bool is_feasible = false; bool attempt_repair = false; - mutex_upper.lock(); + mutex_upper_.lock(); if (obj < upper_bound_) { f_t primal_err; f_t bound_err; i_t num_fractional; is_feasible = check_guess( - original_lp, settings, var_types, crushed_solution, primal_err, bound_err, num_fractional); + original_lp_, settings_, var_types_, crushed_solution, primal_err, bound_err, num_fractional); if (is_feasible) { upper_bound_ = obj; } else { attempt_repair = true; constexpr bool verbose = false; if (verbose) { - settings.log.printf( + settings_.log.printf( "Injected solution infeasible. Constraint error %e bound error %e integer infeasible " "%d\n", primal_err, @@ -260,68 +301,65 @@ void branch_and_bound_t::set_new_solution(const std::vector& solu } } } - mutex_upper.unlock(); + mutex_upper_.unlock(); if (is_feasible) { - mutex_lower.lock(); - f_t lower_bound = lower_bound_; - mutex_lower.unlock(); - mutex_branching.lock(); - bool currently_branching = currently_branching; - mutex_branching.unlock(); + mutex_branching_.lock(); + bool currently_branching = currently_branching_; + mutex_branching_.unlock(); if (currently_branching) { - settings.log.printf( + f_t user_obj = compute_user_objective(original_lp_, obj); + f_t user_lower = compute_user_objective(original_lp_, get_lower_bound()); + std::string gap = user_mip_gap(user_obj, user_lower); + + settings_.log.printf( "H %+13.6e %+10.6e %s %9.2f\n", - compute_user_objective(original_lp, obj), - compute_user_objective(original_lp, lower_bound), - user_mip_gap(compute_user_objective(original_lp, obj), - compute_user_objective(original_lp, lower_bound)) - .c_str(), - toc(start_time)); + user_obj, + user_lower, + gap.c_str(), + toc(stats_.start_time)); } else { - settings.log.printf("New solution from primal heuristics. Objective %+.6e. Time %.2f\n", - compute_user_objective(original_lp, obj), - toc(start_time)); + settings_.log.printf("New solution from primal heuristics. Objective %+.6e. Time %.2f\n", + compute_user_objective(original_lp_, obj), + toc(stats_.start_time)); } } if (attempt_repair) { - mutex_repair.lock(); - repair_queue.push_back(crushed_solution); - mutex_repair.unlock(); + mutex_repair_.lock(); + repair_queue_.push_back(crushed_solution); + mutex_repair_.unlock(); } } template -bool branch_and_bound_t::repair_solution( - const std::vector& root_vstatus, - const std::vector& edge_norms, - const std::vector& potential_solution, - f_t& repaired_obj, - std::vector& repaired_solution) const +bool branch_and_bound_t::repair_solution(const std::vector& edge_norms, + const std::vector& potential_solution, + f_t& repaired_obj, + std::vector& repaired_solution) const { bool feasible = false; repaired_obj = std::numeric_limits::quiet_NaN(); - i_t n = original_lp.num_cols; + i_t n = original_lp_.num_cols; assert(potential_solution.size() == n); - lp_problem_t repair_lp = original_lp; + lp_problem_t repair_lp = original_lp_; // Fix integer variables for (i_t j = 0; j < n; ++j) { - if (var_types[j] == variable_type_t::INTEGER) { + if (var_types_[j] == variable_type_t::INTEGER) { const f_t fixed_val = std::round(potential_solution[j]); repair_lp.lower[j] = fixed_val; repair_lp.upper[j] = fixed_val; } } - lp_solution_t lp_solution(original_lp.num_rows, original_lp.num_cols); + lp_solution_t lp_solution(original_lp_.num_rows, original_lp_.num_cols); i_t iter = 0; f_t lp_start_time = tic(); - simplex_solver_settings_t lp_settings = settings; - std::vector vstatus = root_vstatus; + simplex_solver_settings_t lp_settings = settings_; + std::vector vstatus = root_vstatus_; lp_settings.set_log(false); lp_settings.inside_mip = true; std::vector leaf_edge_norms = edge_norms; @@ -334,12 +372,17 @@ bool branch_and_bound_t::repair_solution( f_t primal_error; f_t bound_error; i_t num_fractional; - feasible = check_guess( - original_lp, settings, var_types, lp_solution.x, primal_error, bound_error, num_fractional); - repaired_obj = compute_objective(original_lp, repaired_solution); + feasible = check_guess(original_lp_, + settings_, + var_types_, + lp_solution.x, + primal_error, + bound_error, + num_fractional); + repaired_obj = compute_objective(original_lp_, repaired_solution); constexpr bool verbose = false; if (verbose) { - settings.log.printf( + settings_.log.printf( "After repair: feasible %d primal error %e bound error %e fractional %d. Objective %e\n", feasible, primal_error, @@ -353,62 +396,501 @@ bool branch_and_bound_t::repair_solution( } template -branch_and_bound_t::branch_and_bound_t( - const user_problem_t& user_problem, - const simplex_solver_settings_t& solver_settings) - : original_problem(user_problem), settings(solver_settings), original_lp(1, 1, 1), incumbent(1) +void branch_and_bound_t::repair_heuristic_solutions() { - start_time = tic(); - convert_user_problem(original_problem, settings, original_lp, new_slacks); - full_variable_types(original_problem, original_lp, var_types); + // Check if there are any solutions to repair + std::vector> to_repair; + mutex_repair_.lock(); + if (repair_queue_.size() > 0) { + to_repair = repair_queue_; + repair_queue_.clear(); + } + mutex_repair_.unlock(); + + if (to_repair.size() > 0) { + settings_.log.debug("Attempting to repair %ld injected solutions\n", to_repair.size()); + for (const std::vector& potential_solution : to_repair) { + std::vector repaired_solution; + f_t repaired_obj; + bool is_feasible = + repair_solution(edge_norms_, potential_solution, repaired_obj, repaired_solution); + if (is_feasible) { + mutex_upper_.lock(); + + if (repaired_obj < upper_bound_) { + upper_bound_ = repaired_obj; + incumbent_.set_incumbent_solution(repaired_obj, repaired_solution); + + f_t obj = compute_user_objective(original_lp_, repaired_obj); + f_t lower = compute_user_objective(original_lp_, get_lower_bound()); + std::string user_gap = user_mip_gap(obj, lower); + settings_.log.printf( + "H %+13.6e %+10.6e %s %9.2f\n", + obj, + lower, + user_gap.c_str(), + toc(stats_.start_time)); + + if (settings_.solution_callback != nullptr) { + std::vector original_x; + uncrush_primal_solution(original_problem_, original_lp_, repaired_solution, original_x); + settings_.solution_callback(original_x, repaired_obj); + } + } - mutex_upper.lock(); - upper_bound_ = inf; - mutex_upper.unlock(); + mutex_upper_.unlock(); + } + } + } +} - mutex_lower.lock(); - lower_bound_ = -inf; - mutex_lower.unlock(); +template +void branch_and_bound_t::branch(mip_node_t* parent_node, + i_t branch_var, + f_t branch_var_val, + const std::vector& parent_vstatus) +{ + // down child + auto down_child = std::make_unique>( + original_lp_, parent_node, ++stats_.num_nodes, branch_var, 0, branch_var_val, parent_vstatus); + + graphviz_edge( + settings_, parent_node, down_child.get(), branch_var, 0, std::floor(branch_var_val)); + + // up child + auto up_child = std::make_unique>( + original_lp_, parent_node, ++stats_.num_nodes, branch_var, 1, branch_var_val, parent_vstatus); + + graphviz_edge(settings_, parent_node, up_child.get(), branch_var, 1, std::ceil(branch_var_val)); + + assert(parent_vstatus.size() == original_lp_.num_cols); + parent_node->add_children(std::move(down_child), + std::move(up_child)); // child pointers moved into the tree +} + +template +void branch_and_bound_t::update_tree(mip_node_t* node_ptr, node_status_t status) +{ + std::vector*> stack; + node_ptr->set_status(status, stack); + remove_fathomed_nodes(stack); +} + +template +void branch_and_bound_t::add_feasible_solution(f_t leaf_objective, + const std::vector& leaf_solution, + i_t leaf_depth, + char symbol) +{ + bool send_solution = false; + i_t nodes_explored = stats_.nodes_explored; + i_t nodes_unexplored = stats_.nodes_unexplored; + f_t gap; + + mutex_upper_.lock(); + if (leaf_objective < upper_bound_) { + incumbent_.set_incumbent_solution(leaf_objective, leaf_solution); + upper_bound_ = leaf_objective; + f_t lower_bound = get_lower_bound(); + gap = upper_bound_ - lower_bound; + f_t obj = compute_user_objective(original_lp_, upper_bound_); + f_t lower = compute_user_objective(original_lp_, lower_bound); + settings_.log.printf("%c%8d %8lu %+13.6e %+10.6e %4d %7.1e %s %9.2f\n", + symbol, + nodes_explored, + nodes_unexplored, + obj, + lower, + leaf_depth, + nodes_explored > 0 ? stats_.total_lp_iters / nodes_explored : 0, + user_mip_gap(obj, lower).c_str(), + toc(stats_.start_time)); + + send_solution = true; + } + + if (send_solution && settings_.solution_callback != nullptr) { + std::vector original_x; + uncrush_primal_solution(original_problem_, original_lp_, incumbent_.x, original_x); + settings_.solution_callback(original_x, upper_bound_); + } + mutex_upper_.unlock(); + + if (send_solution) { + mutex_gap_.lock(); + gap_ = gap; + mutex_gap_.unlock(); + } +} + +template +dual::status_t branch_and_bound_t::node_dual_simplex( + i_t leaf_id, + lp_problem_t& leaf_problem, + std::vector& leaf_vstatus, + lp_solution_t& leaf_solution, + std::vector& bounds_changed, + csc_matrix_t& Arow, + f_t upper_bound, + logger_t& log) +{ + i_t node_iter = 0; + assert(leaf_vstatus.size() == leaf_problem.num_cols); + f_t lp_start_time = tic(); + std::vector leaf_edge_norms = edge_norms_; // = node.steepest_edge_norms; + simplex_solver_settings_t lp_settings = settings_; + lp_settings.set_log(false); + lp_settings.cut_off = upper_bound + settings_.dual_tol; + lp_settings.inside_mip = 2; + + // in B&B we only have equality constraints, leave it empty for default + std::vector row_sense; + bool feasible = + bound_strengthening(row_sense, lp_settings, leaf_problem, Arow, var_types_, bounds_changed); + + dual::status_t lp_status = dual::status_t::DUAL_UNBOUNDED; + + if (feasible) { + lp_status = dual_phase2(2, + 0, + lp_start_time, + leaf_problem, + lp_settings, + leaf_vstatus, + leaf_solution, + node_iter, + leaf_edge_norms); + + if (lp_status == dual::status_t::NUMERICAL) { + log.printf("Numerical issue node %d. Resolving from scratch.\n", leaf_id); + lp_status_t second_status = solve_linear_program_advanced( + leaf_problem, lp_start_time, lp_settings, leaf_solution, leaf_vstatus, leaf_edge_norms); + lp_status = convert_lp_status_to_dual_status(second_status); + } + } else { + log.printf("Infeasible after bounds strengthening. Fathoming node %d.\n", leaf_id); + } + + mutex_stats_.lock(); + stats_.total_lp_solve_time += toc(lp_start_time); + stats_.total_lp_iters += node_iter; + mutex_stats_.unlock(); + + return lp_status; +} + +template +mip_status_t branch_and_bound_t::solve_node_lp( + mip_node_t* node_ptr, + lp_problem_t& leaf_problem, + csc_matrix_t& Arow, + const std::vector& var_types, + f_t upper_bound) +{ + logger_t log; + log.log = false; + std::vector& leaf_vstatus = node_ptr->vstatus; + lp_solution_t leaf_solution(leaf_problem.num_rows, leaf_problem.num_cols); + + // Set the correct bounds for the leaf problem + leaf_problem.lower = original_lp_.lower; + leaf_problem.upper = original_lp_.upper; + + std::vector bounds_changed(original_lp_.num_cols, false); + // Technically, we can get the already strengthened bounds from the node/parent instead of + // getting it from the original problem and re-strengthening. But this requires storing + // two vectors at each node and potentially cause memory issues + node_ptr->get_variable_bounds(leaf_problem.lower, leaf_problem.upper, bounds_changed); + + dual::status_t lp_status = node_dual_simplex(node_ptr->node_id, + leaf_problem, + leaf_vstatus, + leaf_solution, + bounds_changed, + Arow, + upper_bound, + settings_.log); + + if (lp_status == dual::status_t::DUAL_UNBOUNDED) { + node_ptr->lower_bound = inf; + graphviz_node(settings_, node_ptr, "infeasible", 0.0); + update_tree(node_ptr, node_status_t::INFEASIBLE); + // Node was infeasible. Do not branch + + } else if (lp_status == dual::status_t::CUTOFF) { + node_ptr->lower_bound = upper_bound; + f_t leaf_objective = compute_objective(leaf_problem, leaf_solution.x); + graphviz_node(settings_, node_ptr, "cut off", leaf_objective); + update_tree(node_ptr, node_status_t::FATHOMED); + // Node was cut off. Do not branch + } else if (lp_status == dual::status_t::OPTIMAL) { + // LP was feasible + std::vector fractional; + const i_t leaf_fractional = + fractional_variables(settings_, leaf_solution.x, var_types_, fractional); + f_t leaf_objective = compute_objective(leaf_problem, leaf_solution.x); + graphviz_node(settings_, node_ptr, "lower bound", leaf_objective); + + mutex_pc_.lock(); + pc_.update_pseudo_costs(node_ptr, leaf_objective); + mutex_pc_.unlock(); + + node_ptr->lower_bound = leaf_objective; + + constexpr f_t fathom_tol = 1e-5; + if (leaf_fractional == 0) { + add_feasible_solution(leaf_objective, leaf_solution.x, node_ptr->depth, 'B'); + graphviz_node(settings_, node_ptr, "integer feasible", leaf_objective); + update_tree(node_ptr, node_status_t::INTEGER_FEASIBLE); + + } else if (leaf_objective <= upper_bound + fathom_tol) { + // Choose fractional variable to branch on + mutex_pc_.lock(); + const i_t branch_var = pc_.variable_selection( + fractional, leaf_solution.x, leaf_problem.lower, leaf_problem.upper, log); + mutex_pc_.unlock(); + + assert(leaf_vstatus.size() == leaf_problem.num_cols); + branch(node_ptr, branch_var, leaf_solution.x[branch_var], leaf_vstatus); + node_ptr->status = node_status_t::HAS_CHILDREN; + + } else { + graphviz_node(settings_, node_ptr, "fathomed", leaf_objective); + update_tree(node_ptr, node_status_t::FATHOMED); + } + } else { + graphviz_node(settings_, node_ptr, "numerical", 0.0); + settings_.log.printf("Encountered LP status %d. This indicates a numerical issue.\n", + lp_status); + return mip_status_t::NUMERICAL; + } + + return mip_status_t::UNSET; +} + +template +mip_status_t branch_and_bound_t::explore_tree(i_t branch_var, + mip_solution_t& solution) +{ + mip_status_t status = mip_status_t::UNSET; + logger_t log; + log.log = false; + auto compare = [](mip_node_t* a, mip_node_t* b) { + return a->lower_bound > + b->lower_bound; // True if a comes before b, elements that come before are output last + }; + + std::priority_queue*, std::vector*>, decltype(compare)> + heap(compare); + + mip_node_t root_node(root_objective_, root_vstatus_); + graphviz_node(settings_, &root_node, "lower bound", root_objective_); + + branch(&root_node, branch_var, root_relax_soln_.x[branch_var], root_vstatus_); + + // the stack does not own the unique_ptr the tree does + heap.push(root_node.get_down_child()); + heap.push(root_node.get_up_child()); + + // Make a copy of the original LP. We will modify its bounds at each leaf + lp_problem_t leaf_problem = original_lp_; + csc_matrix_t Arow(1, 1, 1); + original_lp_.A.transpose(Arow); + + f_t lower_bound = get_lower_bound(); + f_t gap = get_upper_bound() - lower_bound; + f_t last_log = 0; + + while (gap > settings_.absolute_mip_gap_tol && + relative_gap(get_upper_bound(), lower_bound) > settings_.relative_mip_gap_tol && + heap.size() > 0) { + repair_heuristic_solutions(); + + // Get a node off the heap + mip_node_t* node_ptr = heap.top(); + heap.pop(); + stats_.nodes_explored++; + + f_t upper_bound = get_upper_bound(); + if (upper_bound < node_ptr->lower_bound) { + // This node was put on the heap earlier but its lower bound is now greater than the current + // upper bound + update_tree(node_ptr, node_status_t::FATHOMED); + graphviz_node(settings_, node_ptr, "cutoff", node_ptr->lower_bound); + continue; + } + mutex_lower_.lock(); + lower_bound = lower_bound_ = node_ptr->lower_bound; + mutex_lower_.unlock(); + + mutex_gap_.lock(); + gap_ = gap = upper_bound - lower_bound; + mutex_gap_.unlock(); + + i_t nodes_explored = stats_.nodes_explored; + f_t now = toc(stats_.start_time); + f_t time_since_log = last_log == 0 ? 1.0 : toc(last_log); + if ((nodes_explored % 1000 == 0 || gap < 10 * settings_.absolute_mip_gap_tol || + nodes_explored < 1000) && + (time_since_log >= 1) || + (time_since_log > 60) || now > settings_.time_limit) { + f_t user_obj = compute_user_objective(original_lp_, upper_bound); + f_t user_lower = compute_user_objective(original_lp_, lower_bound); + std::string user_gap = user_mip_gap(user_obj, user_lower); + + settings_.log.printf(" %8d %8lu %+13.6e %+10.6e %4d %7.1e %s %9.2f\n", + nodes_explored, + heap.size(), + user_obj, + user_lower, + node_ptr->depth, + nodes_explored > 0 ? stats_.total_lp_iters / nodes_explored : 0, + user_gap.c_str(), + now); + last_log = tic(); + } + + if (toc(stats_.start_time) > settings_.time_limit) { + settings_.log.printf("Hit time limit. Stopping\n"); + status = mip_status_t::TIME_LIMIT; + break; + } + + status = solve_node_lp(node_ptr, leaf_problem, Arow, var_types_, upper_bound); + + if (status == mip_status_t::NUMERICAL) { break; } + + if (node_ptr->status == node_status_t::HAS_CHILDREN) { + // the heap does not own the unique_ptr the tree does + heap.push(node_ptr->get_down_child()); + heap.push(node_ptr->get_up_child()); + } + } + + stats_.nodes_unexplored = heap.size(); + + if (stats_.nodes_unexplored == 0) { + mutex_lower_.lock(); + lower_bound = lower_bound_ = root_node.lower_bound; + mutex_lower_.unlock(); + + mutex_gap_.lock(); + gap_ = gap = get_upper_bound() - lower_bound; + mutex_gap_.unlock(); + } + + return status; +} + +template +mip_status_t branch_and_bound_t::dive(i_t branch_var, mip_solution_t& solution) +{ + mip_status_t status = mip_status_t::UNSET; + + logger_t log; + log.log = false; - mutex_branching.lock(); - currently_branching = false; - mutex_branching.unlock(); + std::vector*> node_stack; + + mip_node_t root_node(root_objective_, root_vstatus_); + graphviz_node(settings_, &root_node, "lower bound", root_objective_); + + branch(&root_node, branch_var, root_relax_soln_.x[branch_var], root_vstatus_); + + // the stack does not own the unique_ptr the tree does + node_stack.push_back(root_node.get_down_child()); + node_stack.push_back(root_node.get_up_child()); + + // Make a copy of the original LP. We will modify its bounds at each leaf + lp_problem_t leaf_problem = original_lp_; + + csc_matrix_t Arow(1, 1, 1); + original_lp_.A.transpose(Arow); + + f_t lower_bound = get_lower_bound(); + f_t gap = get_upper_bound() - lower_bound; + i_t nodes_explored = 0; + + while (node_stack.size() > 0) { + // Get a node off the stack + mip_node_t* node_ptr = node_stack.back(); + node_stack.pop_back(); + nodes_explored++; + + f_t upper_bound = get_upper_bound(); + lower_bound = get_lower_bound(); + gap = upper_bound - lower_bound; + + if (gap < settings_.absolute_mip_gap_tol && + relative_gap(get_upper_bound(), lower_bound) < settings_.relative_mip_gap_tol) { + update_tree(node_ptr, node_status_t::FATHOMED); + continue; + } + + if (toc(stats_.start_time) > settings_.time_limit) { + status = mip_status_t::TIME_LIMIT; + break; + } + + status = solve_node_lp(node_ptr, leaf_problem, Arow, var_types_, upper_bound); + + if (status == mip_status_t::NUMERICAL) { continue; } + + if (node_ptr->status == node_status_t::HAS_CHILDREN) { + // Martin's child selection + const i_t branch_var = node_ptr->get_down_child()->branch_var; + const f_t branch_var_val = node_ptr->get_down_child()->fractional_val; + const f_t down_val = std::floor(root_relax_soln_.x[branch_var]); + const f_t up_val = std::ceil(root_relax_soln_.x[branch_var]); + const f_t down_dist = branch_var_val - down_val; + const f_t up_dist = up_val - branch_var_val; + + if (down_dist < up_dist) { + node_stack.push_back(node_ptr->get_up_child()); + node_stack.push_back(node_ptr->get_down_child()); + } else { + node_stack.push_back(node_ptr->get_down_child()); + node_stack.push_back(node_ptr->get_up_child()); + } + } + } + + return status; } template mip_status_t branch_and_bound_t::solve(mip_solution_t& solution) { mip_status_t status = mip_status_t::UNSET; - mip_solution_t incumbent(original_lp.num_cols); - if (guess.size() != 0) { + + if (guess_.size() != 0) { std::vector crushed_guess; - crush_primal_solution(original_problem, original_lp, guess, new_slacks, crushed_guess); + crush_primal_solution(original_problem_, original_lp_, guess_, new_slacks_, crushed_guess); f_t primal_err; f_t bound_err; i_t num_fractional; const bool feasible = check_guess( - original_lp, settings, var_types, crushed_guess, primal_err, bound_err, num_fractional); + original_lp_, settings_, var_types_, crushed_guess, primal_err, bound_err, num_fractional); if (feasible) { - const f_t computed_obj = compute_objective(original_lp, crushed_guess); - mutex_upper.lock(); - incumbent.set_incumbent_solution(computed_obj, crushed_guess); + const f_t computed_obj = compute_objective(original_lp_, crushed_guess); + mutex_upper_.lock(); + incumbent_.set_incumbent_solution(computed_obj, crushed_guess); upper_bound_ = computed_obj; - mutex_upper.unlock(); + mutex_upper_.unlock(); } } - lp_solution_t root_relax_soln(original_lp.num_rows, original_lp.num_cols); - std::vector root_vstatus; - std::vector edge_norms; - settings.log.printf("Solving LP root relaxation\n"); - simplex_solver_settings_t lp_settings = settings; + root_relax_soln_.resize(original_lp_.num_rows, original_lp_.num_cols); + + settings_.log.printf("Solving LP root relaxation\n"); + simplex_solver_settings_t lp_settings = settings_; lp_settings.inside_mip = 1; lp_status_t root_status = solve_linear_program_advanced( - original_lp, start_time, lp_settings, root_relax_soln, root_vstatus, edge_norms); - f_t total_lp_solve_time = toc(start_time); - assert(root_vstatus.size() == original_lp.num_cols); + original_lp_, stats_.start_time, lp_settings, root_relax_soln_, root_vstatus_, edge_norms_); + stats_.total_lp_solve_time = toc(stats_.start_time); + assert(root_vstatus_.size() == original_lp_.num_cols); if (root_status == lp_status_t::INFEASIBLE) { - settings.log.printf("MIP Infeasible\n"); + settings_.log.printf("MIP Infeasible\n"); // FIXME: rarely dual simplex detects infeasible whereas it is feasible. // to add a small safety net, check if there is a primal solution already. // Uncomment this if the issue with cost266-UUE is resolved @@ -418,444 +900,148 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut return mip_status_t::INFEASIBLE; } if (root_status == lp_status_t::UNBOUNDED) { - settings.log.printf("MIP Unbounded\n"); - if (settings.heuristic_preemption_callback != nullptr) { - settings.heuristic_preemption_callback(); + settings_.log.printf("MIP Unbounded\n"); + if (settings_.heuristic_preemption_callback != nullptr) { + settings_.heuristic_preemption_callback(); } return mip_status_t::UNBOUNDED; } if (root_status == lp_status_t::TIME_LIMIT) { - settings.log.printf("Hit time limit\n"); + settings_.log.printf("Hit time limit\n"); return mip_status_t::TIME_LIMIT; } - set_uninitialized_steepest_edge_norms(original_lp.num_cols, edge_norms); + set_uninitialized_steepest_edge_norms(edge_norms_); - std::vector fractional; - const i_t num_fractional = - fractional_variables(settings, root_relax_soln.x, var_types, fractional); - const f_t root_objective = compute_objective(original_lp, root_relax_soln.x); - if (settings.set_simplex_solution_callback != nullptr) { + root_objective_ = compute_objective(original_lp_, root_relax_soln_.x); + if (settings_.set_simplex_solution_callback != nullptr) { std::vector original_x; - uncrush_primal_solution(original_problem, original_lp, root_relax_soln.x, original_x); + uncrush_primal_solution(original_problem_, original_lp_, root_relax_soln_.x, original_x); std::vector original_dual; std::vector original_z; - uncrush_dual_solution(original_problem, - original_lp, - root_relax_soln.y, - root_relax_soln.z, + uncrush_dual_solution(original_problem_, + original_lp_, + root_relax_soln_.y, + root_relax_soln_.z, original_dual, original_z); - settings.set_simplex_solution_callback( - original_x, original_dual, compute_user_objective(original_lp, root_objective)); + settings_.set_simplex_solution_callback( + original_x, original_dual, compute_user_objective(original_lp_, root_objective_)); } - mutex_lower.lock(); - f_t lower_bound = lower_bound_ = root_objective; - mutex_lower.unlock(); + mutex_lower_.lock(); + lower_bound_ = root_objective_; + mutex_lower_.unlock(); + + std::vector fractional; + const i_t num_fractional = + fractional_variables(settings_, root_relax_soln_.x, var_types_, fractional); if (num_fractional == 0) { - mutex_upper.lock(); - incumbent.set_incumbent_solution(root_objective, root_relax_soln.x); - upper_bound_ = root_objective; - mutex_upper.unlock(); + mutex_upper_.lock(); + incumbent_.set_incumbent_solution(root_objective_, root_relax_soln_.x); + upper_bound_ = root_objective_; + mutex_upper_.unlock(); // We should be done here - uncrush_primal_solution(original_problem, original_lp, incumbent.x, solution.x); - solution.objective = incumbent.objective; - solution.lower_bound = lower_bound; + uncrush_primal_solution(original_problem_, original_lp_, incumbent_.x, solution.x); + solution.objective = incumbent_.objective; + solution.lower_bound = lower_bound_; solution.nodes_explored = 0; - solution.simplex_iterations = root_relax_soln.iterations; - settings.log.printf("Optimal solution found at root node. Objective %.16e. Time %.2f.\n", - compute_user_objective(original_lp, root_objective), - toc(start_time)); - if (settings.solution_callback != nullptr) { - settings.solution_callback(solution.x, solution.objective); + solution.simplex_iterations = root_relax_soln_.iterations; + settings_.log.printf("Optimal solution found at root node. Objective %.16e. Time %.2f.\n", + compute_user_objective(original_lp_, root_objective_), + toc(stats_.start_time)); + if (settings_.solution_callback != nullptr) { + settings_.solution_callback(solution.x, solution.objective); } - if (settings.heuristic_preemption_callback != nullptr) { - settings.heuristic_preemption_callback(); + if (settings_.heuristic_preemption_callback != nullptr) { + settings_.heuristic_preemption_callback(); } return mip_status_t::OPTIMAL; } - pseudo_costs_t pc(original_lp.num_cols); - strong_branching(original_lp, - settings, - start_time, - var_types, - root_relax_soln.x, + pc_.resize(original_lp_.num_cols); + strong_branching(original_lp_, + settings_, + stats_.start_time, + var_types_, + root_relax_soln_.x, fractional, - root_objective, - root_vstatus, - edge_norms, - pc); - - auto compare = [](mip_node_t* a, mip_node_t* b) { - return a->lower_bound > - b->lower_bound; // True if a comes before b, elements that come before are output last - }; - std::priority_queue*, std::vector*>, decltype(compare)> - heap(compare); - i_t num_nodes = 0; - mip_node_t root_node(root_objective, root_vstatus); - graphviz_node(settings, &root_node, "lower bound", lower_bound); + root_objective_, + root_vstatus_, + edge_norms_, + pc_); // Choose variable to branch on logger_t log; - log.log = false; - const i_t branch_var = - pc.variable_selection(fractional, root_relax_soln.x, original_lp.lower, original_lp.upper, log); + log.log = false; + i_t branch_var = pc_.variable_selection( + fractional, root_relax_soln_.x, original_lp_.lower, original_lp_.upper, log); - // down child - std::unique_ptr> down_child = - std::make_unique>(original_lp, - &root_node, - ++num_nodes, - branch_var, - 0, - root_relax_soln.x[branch_var], - root_vstatus); - - graphviz_edge(settings, - &root_node, - down_child.get(), - branch_var, - 0, - std::floor(root_relax_soln.x[branch_var])); - - // up child - std::unique_ptr> up_child = - std::make_unique>(original_lp, - &root_node, - ++num_nodes, - branch_var, - 1, - root_relax_soln.x[branch_var], - root_vstatus); + stats_.total_lp_iters = 0; + stats_.nodes_explored = 0; + stats_.nodes_unexplored = 0; + stats_.num_nodes = 1; - graphviz_edge( - settings, &root_node, up_child.get(), branch_var, 0, std::ceil(root_relax_soln.x[branch_var])); - - assert(root_vstatus.size() == original_lp.num_cols); - heap.push(down_child.get()); // the heap does not own the unique_ptr the tree does - heap.push(up_child.get()); // the heap does not own the unqiue_ptr the tree does - root_node.add_children(std::move(down_child), - std::move(up_child)); // child pointers moved into the tree - lp_problem_t leaf_problem = - original_lp; // Make a copy of the original LP. We will modify its bounds at each leaf - csc_matrix_t Arow(1, 1, 1); - original_lp.A.transpose(Arow); - f_t gap = get_upper_bound() - lower_bound; - i_t nodes_explored = 0; - settings.log.printf( + settings_.log.printf( "| Explored | Unexplored | Objective | Bound | Depth | Iter/Node | Gap | " " Time \n"); - mutex_branching.lock(); - currently_branching = true; - mutex_branching.unlock(); - - f_t total_lp_iters = 0.0; - f_t last_log = 0; - while (gap > settings.absolute_mip_gap_tol && - relative_gap(get_upper_bound(), lower_bound) > settings.relative_mip_gap_tol && - heap.size() > 0) { - // Check if there are any solutions to repair - std::vector> to_repair; - mutex_repair.lock(); - if (repair_queue.size() > 0) { - to_repair = repair_queue; - repair_queue.clear(); - } - mutex_repair.unlock(); - - if (to_repair.size() > 0) { - settings.log.debug("Attempting to repair %ld injected solutions\n", to_repair.size()); - for (const std::vector& potential_solution : to_repair) { - std::vector repaired_solution; - f_t repaired_obj; - bool is_feasible = repair_solution( - root_vstatus, edge_norms, potential_solution, repaired_obj, repaired_solution); - if (is_feasible) { - mutex_upper.lock(); - if (repaired_obj < upper_bound_) { - upper_bound_ = repaired_obj; - incumbent.set_incumbent_solution(repaired_obj, repaired_solution); - - settings.log.printf( - "H %+13.6e %+10.6e %s %9.2f\n", - compute_user_objective(original_lp, repaired_obj), - compute_user_objective(original_lp, lower_bound), - user_mip_gap(compute_user_objective(original_lp, repaired_obj), - compute_user_objective(original_lp, lower_bound)) - .c_str(), - toc(start_time)); - if (settings.solution_callback != nullptr) { - std::vector original_x; - uncrush_primal_solution(original_problem, original_lp, repaired_solution, original_x); - settings.solution_callback(original_x, repaired_obj); - } - } - mutex_upper.unlock(); - } - } - } - - // Get a node off the heap - mip_node_t* node_ptr = heap.top(); - heap.pop(); // Remove node from the heap - f_t upper_bound = get_upper_bound(); - if (upper_bound < node_ptr->lower_bound) { - // This node was put on the heap earlier but its lower bound is now greater than the current - // upper bound - std::vector*> stack; - node_ptr->set_status(node_status_t::FATHOMED, stack); - graphviz_node(settings, node_ptr, "cutoff", node_ptr->lower_bound); - remove_fathomed_nodes(stack); - continue; - } - mutex_lower.lock(); - lower_bound_ = lower_bound = node_ptr->lower_bound; - mutex_lower.unlock(); - gap = upper_bound - lower_bound; - const i_t leaf_depth = node_ptr->depth; - f_t now = toc(start_time); - f_t time_since_log = last_log == 0 ? 1.0 : toc(last_log); - if ((nodes_explored % 1000 == 0 || gap < 10 * settings.absolute_mip_gap_tol || - nodes_explored < 1000) && - (time_since_log >= 1) || - (time_since_log > 60) || now > settings.time_limit) { - settings.log.printf(" %8d %8lu %+13.6e %+10.6e %4d %7.1e %s %9.2f\n", - nodes_explored, - heap.size(), - compute_user_objective(original_lp, upper_bound), - compute_user_objective(original_lp, lower_bound), - leaf_depth, - nodes_explored > 0 ? total_lp_iters / nodes_explored : 0, - user_mip_gap(compute_user_objective(original_lp, upper_bound), - compute_user_objective(original_lp, lower_bound)) - .c_str(), - now); - last_log = tic(); - } - if (now > settings.time_limit) { - settings.log.printf("Hit time limit. Stoppping\n"); - status = mip_status_t::TIME_LIMIT; - break; - } + mutex_branching_.lock(); + currently_branching_ = true; + mutex_branching_.unlock(); - // Set the correct bounds for the leaf problem - leaf_problem.lower = original_lp.lower; - leaf_problem.upper = original_lp.upper; - std::vector bounds_changed(original_lp.num_cols, false); - // Technically, we can get the already strengthened bounds from the node/parent instead of - // getting it from the original problem and re-strengthening. But this requires storing - // two vectors at each node and potentially cause memory issues - node_ptr->get_variable_bounds(leaf_problem.lower, leaf_problem.upper, bounds_changed); - - std::vector& leaf_vstatus = node_ptr->vstatus; - lp_solution_t leaf_solution(leaf_problem.num_rows, leaf_problem.num_cols); - - i_t node_iter = 0; - assert(leaf_vstatus.size() == leaf_problem.num_cols); - f_t lp_start_time = tic(); - std::vector leaf_edge_norms = edge_norms; // = node.steepest_edge_norms; - simplex_solver_settings_t lp_settings = settings; - lp_settings.set_log(false); - lp_settings.cut_off = upper_bound + settings.dual_tol; - lp_settings.inside_mip = 2; - - // in B&B we only have equality constraints, leave it empty for default - std::vector row_sense; - bool feasible = - bound_strengthening(row_sense, lp_settings, leaf_problem, Arow, var_types, bounds_changed); - - dual::status_t lp_status = dual::status_t::DUAL_UNBOUNDED; - - // If the problem is infeasible after bounds strengthening, we don't need to solve the LP - if (feasible) { - lp_status = dual_phase2(2, - 0, - lp_start_time, - leaf_problem, - lp_settings, - leaf_vstatus, - leaf_solution, - node_iter, - leaf_edge_norms); - if (lp_status == dual::status_t::NUMERICAL) { - settings.log.printf("Numerical issue node %d. Resolving from scratch.\n", nodes_explored); - lp_status_t second_status = solve_linear_program_advanced( - leaf_problem, lp_start_time, lp_settings, leaf_solution, leaf_vstatus, leaf_edge_norms); - lp_status = convert_lp_status_to_dual_status(second_status); - } - } - total_lp_solve_time += toc(lp_start_time); - total_lp_iters += node_iter; + std::future diving_thread; - nodes_explored++; - if (lp_status == dual::status_t::DUAL_UNBOUNDED) { - if (!feasible) { - settings.log.printf("Infeasible after bounds strengthening. Fathoming node %d.\n", - nodes_explored); - } - node_ptr->lower_bound = inf; - std::vector*> stack; - node_ptr->set_status(node_status_t::INFEASIBLE, stack); - graphviz_node(settings, node_ptr, "infeasible", 0.0); - remove_fathomed_nodes(stack); - // Node was infeasible. Do not branch - } else if (lp_status == dual::status_t::CUTOFF) { - node_ptr->lower_bound = upper_bound; - std::vector*> stack; - node_ptr->set_status(node_status_t::FATHOMED, stack); - f_t leaf_objective = compute_objective(leaf_problem, leaf_solution.x); - graphviz_node(settings, node_ptr, "cut off", leaf_objective); - remove_fathomed_nodes(stack); - // Node was cut off. Do not branch - } else if (lp_status == dual::status_t::OPTIMAL) { - // LP was feasible - std::vector fractional; - const i_t leaf_fractional = - fractional_variables(settings, leaf_solution.x, var_types, fractional); - f_t leaf_objective = compute_objective(leaf_problem, leaf_solution.x); - graphviz_node(settings, node_ptr, "lower bound", leaf_objective); - - pc.update_pseudo_costs(node_ptr, leaf_objective); - node_ptr->lower_bound = leaf_objective; - - constexpr f_t fathom_tol = 1e-5; - if (leaf_fractional == 0) { - bool send_solution = false; - mutex_upper.lock(); - if (leaf_objective < upper_bound_) { - incumbent.set_incumbent_solution(leaf_objective, leaf_solution.x); - upper_bound_ = upper_bound = leaf_objective; - gap = upper_bound - lower_bound; - settings.log.printf("B%8d %8lu %+13.6e %+10.6e %4d %7.1e %s %9.2f\n", - nodes_explored, - heap.size(), - compute_user_objective(original_lp, upper_bound), - compute_user_objective(original_lp, lower_bound), - leaf_depth, - nodes_explored > 0 ? total_lp_iters / nodes_explored : 0, - user_mip_gap(compute_user_objective(original_lp, upper_bound), - compute_user_objective(original_lp, lower_bound)) - .c_str(), - toc(start_time)); - send_solution = true; - } - mutex_upper.unlock(); - if (send_solution && settings.solution_callback != nullptr) { - std::vector original_x; - uncrush_primal_solution(original_problem, original_lp, incumbent.x, original_x); - settings.solution_callback(original_x, upper_bound); - } - graphviz_node(settings, node_ptr, "integer feasible", leaf_objective); - std::vector*> stack; - node_ptr->set_status(node_status_t::INTEGER_FEASIBLE, stack); - remove_fathomed_nodes(stack); - } else if (leaf_objective <= upper_bound + fathom_tol) { - // Choose fractional variable to branch on - const i_t branch_var = pc.variable_selection( - fractional, leaf_solution.x, leaf_problem.lower, leaf_problem.upper, log); - assert(leaf_vstatus.size() == leaf_problem.num_cols); - - // down child - std::unique_ptr> down_child = - std::make_unique>(original_lp, - node_ptr, - ++num_nodes, - branch_var, - 0, - leaf_solution.x[branch_var], - leaf_vstatus); - graphviz_edge(settings, - node_ptr, - down_child.get(), - branch_var, - 0, - std::floor(leaf_solution.x[branch_var])); - // up child - std::unique_ptr> up_child = - std::make_unique>(original_lp, - node_ptr, - ++num_nodes, - branch_var, - 1, - leaf_solution.x[branch_var], - leaf_vstatus); - graphviz_edge(settings, - node_ptr, - up_child.get(), - branch_var, - 0, - std::ceil(leaf_solution.x[branch_var])); - heap.push(down_child.get()); // the heap does not own the unique_ptr the tree does - heap.push(up_child.get()); // the heap does not own the unique_ptr the tree does - node_ptr->add_children(std::move(down_child), - std::move(up_child)); // child pointers moved into the tree - } else { - graphviz_node(settings, node_ptr, "fathomed", leaf_objective); - std::vector*> stack; - node_ptr->set_status(node_status_t::FATHOMED, stack); - remove_fathomed_nodes(stack); - } - } else { - graphviz_node(settings, node_ptr, "numerical", 0.0); - settings.log.printf("Encountered LP status %d. This indicates a numerical issue.\n", - lp_status); - status = mip_status_t::NUMERICAL; - break; - } + if (settings_.num_threads > 0) { + diving_thread = std::async(std::launch::async, [&]() { return dive(branch_var, solution); }); } - mutex_branching.lock(); - currently_branching = false; - mutex_branching.unlock(); - if (heap.size() == 0) { - mutex_lower.lock(); - lower_bound = lower_bound_ = root_node.lower_bound; - mutex_lower.unlock(); - gap = get_upper_bound() - lower_bound; - } + status = explore_tree(branch_var, solution); + + if (settings_.num_threads > 0) { mip_status_t diving_status = diving_thread.get(); } + + mutex_branching_.lock(); + currently_branching_ = false; + mutex_branching_.unlock(); - settings.log.printf( + settings_.log.printf( "Explored %d nodes in %.2fs.\nAbsolute Gap %e Objective %.16e Lower Bound %.16e\n", - nodes_explored, - toc(start_time), - gap, - compute_user_objective(original_lp, get_upper_bound()), - compute_user_objective(original_lp, lower_bound)); - - if (gap <= settings.absolute_mip_gap_tol || - relative_gap(get_upper_bound(), lower_bound) <= settings.relative_mip_gap_tol) { + stats_.nodes_explored.load(), + toc(stats_.start_time), + gap_, + compute_user_objective(original_lp_, get_upper_bound()), + compute_user_objective(original_lp_, lower_bound_)); + + if (gap_ <= settings_.absolute_mip_gap_tol || + relative_gap(get_upper_bound(), lower_bound_) <= settings_.relative_mip_gap_tol) { status = mip_status_t::OPTIMAL; - if (gap > 0 && gap <= settings.absolute_mip_gap_tol) { - settings.log.printf("Optimal solution found within absolute MIP gap tolerance (%.1e)\n", - settings.absolute_mip_gap_tol); - } else if (gap > 0 && - relative_gap(get_upper_bound(), lower_bound) <= settings.relative_mip_gap_tol) { - settings.log.printf("Optimal solution found within relative MIP gap tolerance (%.1e)\n", - settings.relative_mip_gap_tol); + if (gap_ > 0 && gap_ <= settings_.absolute_mip_gap_tol) { + settings_.log.printf("Optimal solution found within absolute MIP gap tolerance (%.1e)\n", + settings_.absolute_mip_gap_tol); + } else if (gap_ > 0 && + relative_gap(get_upper_bound(), lower_bound_) <= settings_.relative_mip_gap_tol) { + settings_.log.printf("Optimal solution found within relative MIP gap tolerance (%.1e)\n", + settings_.relative_mip_gap_tol); } else { - settings.log.printf("Optimal solution found.\n"); + settings_.log.printf("Optimal solution found.\n"); } - if (settings.heuristic_preemption_callback != nullptr) { - settings.heuristic_preemption_callback(); + if (settings_.heuristic_preemption_callback != nullptr) { + settings_.heuristic_preemption_callback(); } } - if (heap.size() == 0 && get_upper_bound() == inf) { - settings.log.printf("Integer infeasible.\n"); + if (stats_.nodes_unexplored == 0 && get_upper_bound() == inf) { + settings_.log.printf("Integer infeasible.\n"); status = mip_status_t::INFEASIBLE; - if (settings.heuristic_preemption_callback != nullptr) { - settings.heuristic_preemption_callback(); + if (settings_.heuristic_preemption_callback != nullptr) { + settings_.heuristic_preemption_callback(); } } - uncrush_primal_solution(original_problem, original_lp, incumbent.x, solution.x); - solution.objective = incumbent.objective; - solution.lower_bound = lower_bound; - solution.nodes_explored = nodes_explored; - solution.simplex_iterations = total_lp_iters; + uncrush_primal_solution(original_problem_, original_lp_, incumbent_.x, solution.x); + solution.objective = incumbent_.objective; + solution.lower_bound = get_lower_bound(); + solution.nodes_explored = stats_.nodes_explored; + solution.simplex_iterations = stats_.total_lp_iters; return status; } diff --git a/cpp/src/dual_simplex/branch_and_bound.hpp b/cpp/src/dual_simplex/branch_and_bound.hpp index 9c1bac9fbc..efe28000bd 100644 --- a/cpp/src/dual_simplex/branch_and_bound.hpp +++ b/cpp/src/dual_simplex/branch_and_bound.hpp @@ -18,15 +18,21 @@ #pragma once #include +#include #include +#include #include #include #include +#include +#include #include #include #include #include +#include "cuopt/linear_programming/mip/solver_settings.hpp" +#include "dual_simplex/mip_node.hpp" namespace cuopt::linear_programming::dual_simplex { @@ -50,65 +56,128 @@ class branch_and_bound_t { const simplex_solver_settings_t& solver_settings); // Set an initial guess based on the user_problem. This should be called before solve. - void set_initial_guess(const std::vector& user_guess) { guess = user_guess; } + void set_initial_guess(const std::vector& user_guess) { guess_ = user_guess; } // Set a solution based on the user problem during the course of the solve void set_new_solution(const std::vector& solution); - bool repair_solution(const std::vector& root_vstatus, - const std::vector& leaf_edge_norms, + // Repair a low-quality solution from the heuristics. + bool repair_solution(const std::vector& leaf_edge_norms, const std::vector& potential_solution, f_t& repaired_obj, std::vector& repaired_solution) const; f_t get_upper_bound(); + f_t get_lower_bound(); + // The main entry routine. Returns the solver status and populates solution with the incumbent. mip_status_t solve(mip_solution_t& solution); private: - const user_problem_t& original_problem; - const simplex_solver_settings_t settings; + const user_problem_t& original_problem_; + const simplex_solver_settings_t settings_; + + // Initial guess. + std::vector guess_; - f_t start_time; - std::vector guess; + lp_problem_t original_lp_; + std::vector new_slacks_; + std::vector var_types_; - lp_problem_t original_lp; - std::vector new_slacks; - std::vector var_types; // Mutex for lower bound - std::mutex mutex_lower; + std::mutex mutex_lower_; + // Global variable for lower bound f_t lower_bound_; // Mutex for upper bound - std::mutex mutex_upper; + std::mutex mutex_upper_; + // Global variable for upper bound f_t upper_bound_; + // Global variable for incumbent. The incumbent should be updated with the upper bound - mip_solution_t incumbent; + mip_solution_t incumbent_; // Mutex for gap - std::mutex mutex_gap; + std::mutex mutex_gap_; + // Global variable for gap - f_t gap; + f_t gap_; // Mutex for branching - std::mutex mutex_branching; - bool currently_branching; + std::mutex mutex_branching_; + bool currently_branching_; - // Mutex for stats - std::mutex mutex_stats; // Global variable for stats + std::mutex mutex_stats_; + + // Note that floating point atomics are only supported in C++20. struct stats_t { - int nodes_explored; - f_t total_lp_solve_time; - f_t start_time; - } stats; + f_t start_time = 0.0; + f_t total_lp_solve_time = 0.0; + std::atomic nodes_explored = 0; + std::atomic nodes_unexplored = 0; + f_t total_lp_iters = 0; + std::atomic num_nodes = 0; + } stats_; // Mutex for repair - std::mutex mutex_repair; - std::vector> repair_queue; + std::mutex mutex_repair_; + std::vector> repair_queue_; + + // Variables for the root node in the search tree. + std::vector root_vstatus_; + f_t root_objective_; + lp_solution_t root_relax_soln_; + std::vector edge_norms_; + + // Pseudocosts + pseudo_costs_t pc_; + std::mutex mutex_pc_; + + // Update the status of the nodes in the search tree. + void update_tree(mip_node_t* node_ptr, node_status_t status); + + // Update the incumbent solution with the new feasible solution. + // found during branch and bound. + void add_feasible_solution(f_t leaf_objective, + const std::vector& leaf_solution, + i_t leaf_depth, + char symbol); + + // Repairs low-quality solutions from the heuristics, if it is applicable. + void repair_heuristic_solutions(); + + // Explore the search tree using the best-first search strategy. + mip_status_t explore_tree(i_t branch_var, mip_solution_t& solution); + + // Explore the search tree using the depth-first search strategy. + mip_status_t dive(i_t branch_var, mip_solution_t& solution); + + // Branch the current node, creating two children. + void branch(mip_node_t* parent_node, + i_t branch_var, + f_t branch_var_val, + const std::vector& parent_vstatus); + + // Solve the LP relaxation of a leaf node. + mip_status_t solve_node_lp(mip_node_t* node_ptr, + lp_problem_t& leaf_problem, + csc_matrix_t& Arow, + const std::vector& var_types, + f_t upper_bound); + + // Solve the LP relaxation of a leaf node using the dual simplex method. + dual::status_t node_dual_simplex(i_t leaf_id, + lp_problem_t& leaf_problem, + std::vector& leaf_vstatus, + lp_solution_t& leaf_solution, + std::vector& bounds_changed, + csc_matrix_t& Arow, + f_t upper_bound, + logger_t& log); }; } // namespace cuopt::linear_programming::dual_simplex diff --git a/cpp/src/dual_simplex/mip_node.hpp b/cpp/src/dual_simplex/mip_node.hpp index c4d83a8333..2d315fe0e3 100644 --- a/cpp/src/dual_simplex/mip_node.hpp +++ b/cpp/src/dual_simplex/mip_node.hpp @@ -20,6 +20,7 @@ #include #include +#include #include #include #include @@ -33,6 +34,7 @@ enum class node_status_t : int { INTEGER_FEASIBLE = 2, // Node has an integer feasible solution INFEASIBLE = 3, // Node is infeasible FATHOMED = 4, // Node objective is greater than the upper bound + HAS_CHILDREN = 5, // Node has children to explore }; bool inactive_status(node_status_t status); @@ -40,7 +42,7 @@ bool inactive_status(node_status_t status); template class mip_node_t { public: - mip_node_t(f_t root_lower_bound, std::vector& basis) + mip_node_t(f_t root_lower_bound, const std::vector& basis) : status(node_status_t::ACTIVE), lower_bound(root_lower_bound), depth(0), @@ -53,13 +55,14 @@ class mip_node_t { children[0] = nullptr; children[1] = nullptr; } + mip_node_t(const lp_problem_t& problem, mip_node_t* parent_node, i_t node_num, i_t branch_variable, i_t branch_direction, f_t branch_var_value, - std::vector& basis) + const std::vector& basis) : status(node_status_t::ACTIVE), lower_bound(parent_node->lower_bound), depth(parent_node->depth + 1), @@ -103,6 +106,10 @@ class mip_node_t { } } + mip_node_t* get_down_child() const { return children[0].get(); } + + mip_node_t* get_up_child() const { return children[1].get(); } + void add_children(std::unique_ptr&& down_child, std::unique_ptr&& up_child) { diff --git a/cpp/src/dual_simplex/pseudo_costs.hpp b/cpp/src/dual_simplex/pseudo_costs.hpp index fef67b9b14..d26fe8d489 100644 --- a/cpp/src/dual_simplex/pseudo_costs.hpp +++ b/cpp/src/dual_simplex/pseudo_costs.hpp @@ -39,6 +39,14 @@ class pseudo_costs_t { void update_pseudo_costs(mip_node_t* node_ptr, f_t leaf_objective); + void resize(i_t num_variables) + { + pseudo_cost_sum_down.resize(num_variables); + pseudo_cost_sum_up.resize(num_variables); + pseudo_cost_num_down.resize(num_variables); + pseudo_cost_num_up.resize(num_variables); + } + void initialized(i_t& num_initialized_down, i_t& num_initialized_up, f_t& pseudo_cost_down_avg, diff --git a/cpp/src/mip/diversity/recombiners/sub_mip.cuh b/cpp/src/mip/diversity/recombiners/sub_mip.cuh index eb94353515..1818092fc9 100644 --- a/cpp/src/mip/diversity/recombiners/sub_mip.cuh +++ b/cpp/src/mip/diversity/recombiners/sub_mip.cuh @@ -17,6 +17,7 @@ #pragma once +#include "cuopt/linear_programming/mip/solver_settings.hpp" #include "recombiner.cuh" #include @@ -115,6 +116,7 @@ class sub_mip_recombiner_t : public recombiner_t { f_t objective) { this->solution_callback(solution, objective); }; + // disable B&B logs, so that it is not interfering with the main B&B thread branch_and_bound_settings.log.log = false; dual_simplex::branch_and_bound_t branch_and_bound(branch_and_bound_problem, diff --git a/cpp/tests/dual_simplex/unit_tests/solve.cpp b/cpp/tests/dual_simplex/unit_tests/solve.cpp index 923ad8c90a..964ea314cd 100644 --- a/cpp/tests/dual_simplex/unit_tests/solve.cpp +++ b/cpp/tests/dual_simplex/unit_tests/solve.cpp @@ -159,6 +159,7 @@ TEST(dual_simplex, burglar) for (int j = 0; j < num_items; ++j) { user_problem.var_types[j] = cuopt::linear_programming::dual_simplex::variable_type_t::INTEGER; } + cuopt::linear_programming::dual_simplex::simplex_solver_settings_t settings; std::vector solution(num_items); EXPECT_EQ((cuopt::linear_programming::dual_simplex::solve(user_problem, settings, solution)), 0); diff --git a/cpp/tests/mip/miplib_test.cu b/cpp/tests/mip/miplib_test.cu index 603fe60145..d0866455fa 100644 --- a/cpp/tests/mip/miplib_test.cu +++ b/cpp/tests/mip/miplib_test.cu @@ -16,6 +16,9 @@ */ #include "../linear_programming/utilities/pdlp_test_utilities.cuh" +#include "cuopt/linear_programming/mip/solver_settings.hpp" +#include "dual_simplex/branch_and_bound.hpp" +#include "dual_simplex/simplex_solver_settings.hpp" #include "mip_utils.cuh" #include @@ -40,7 +43,7 @@ struct result_map_t { double cost; }; -void test_miplib_file(result_map_t test_instance) +void test_miplib_file(result_map_t test_instance, mip_solver_settings_t settings) { const raft::handle_t handle_{}; @@ -48,7 +51,6 @@ void test_miplib_file(result_map_t test_instance) cuopt::mps_parser::mps_data_model_t problem = cuopt::mps_parser::parse_mps(path, false); handle_.sync_stream(); - mip_solver_settings_t settings; // set the time limit depending on we are in assert mode or not #ifdef ASSERT_MODE constexpr double test_time_limit = 60.; @@ -58,7 +60,9 @@ void test_miplib_file(result_map_t test_instance) settings.time_limit = test_time_limit; mip_solution_t solution = solve_mip(&handle_, problem, settings); - EXPECT_EQ(solution.get_termination_status(), mip_termination_status_t::FeasibleFound); + bool is_feasible = solution.get_termination_status() == mip_termination_status_t::FeasibleFound || + solution.get_termination_status() == mip_termination_status_t::Optimal; + EXPECT_TRUE(is_feasible); double obj_val = solution.get_objective_value(); // for now keep a 100% error rate EXPECT_NEAR(test_instance.cost, obj_val, test_instance.cost); @@ -68,10 +72,11 @@ void test_miplib_file(result_map_t test_instance) TEST(mip_solve, run_small_tests) { + mip_solver_settings_t settings; std::vector test_instances = { {"mip/50v-10.mps", 11311031.}, {"mip/neos5.mps", 15.}, {"mip/swath1.mps", 1300.}}; for (const auto& test_instance : test_instances) { - test_miplib_file(test_instance); + test_miplib_file(test_instance, settings); } } From ac080212639fe3a11c8e3e3c15dc47eb546986af Mon Sep 17 00:00:00 2001 From: Alice Boucher Date: Mon, 22 Sep 2025 09:48:51 +0000 Subject: [PATCH 33/33] amend --- cpp/src/linear_programming/solve.cu | 6 ++++++ cpp/src/mip/solve.cu | 2 +- cpp/tests/linear_programming/c_api_tests/c_api_test.c | 3 ++- cpp/tests/linear_programming/c_api_tests/c_api_tests.cpp | 8 +++++++- cpp/tests/linear_programming/c_api_tests/c_api_tests.h | 2 +- 5 files changed, 17 insertions(+), 4 deletions(-) diff --git a/cpp/src/linear_programming/solve.cu b/cpp/src/linear_programming/solve.cu index 156dd52961..ef75287510 100644 --- a/cpp/src/linear_programming/solve.cu +++ b/cpp/src/linear_programming/solve.cu @@ -590,6 +590,12 @@ optimization_problem_solution_t solve_lp(optimization_problem_t::check_initial_solution_representation(op_problem, settings); } + // Check for crossing bounds. Return infeasible if there are any + if (problem_checking_t::has_crossing_bounds(op_problem)) { + return optimization_problem_solution_t(pdlp_termination_status_t::PrimalInfeasible, + op_problem.get_handle_ptr()->get_stream()); + } + auto lp_timer = cuopt::timer_t(settings.time_limit); detail::problem_t problem(op_problem); diff --git a/cpp/src/mip/solve.cu b/cpp/src/mip/solve.cu index 75eafa8c2b..7941f41dc4 100644 --- a/cpp/src/mip/solve.cu +++ b/cpp/src/mip/solve.cu @@ -215,7 +215,7 @@ mip_solution_t solve_mip(optimization_problem_t& op_problem, op_problem.get_handle_ptr()->get_stream()); } - problem = detail::problem_t(reduced_op_problem, settings.get_tolerances()); + problem = detail::problem_t(reduced_op_problem); presolve_time = timer.elapsed_time(); CUOPT_LOG_INFO("Third party presolve time: %f", presolve_time); } diff --git a/cpp/tests/linear_programming/c_api_tests/c_api_test.c b/cpp/tests/linear_programming/c_api_tests/c_api_test.c index 4a976277aa..9f841c3306 100644 --- a/cpp/tests/linear_programming/c_api_tests/c_api_test.c +++ b/cpp/tests/linear_programming/c_api_tests/c_api_test.c @@ -874,7 +874,7 @@ cuopt_int_t test_ranged_problem(cuopt_int_t *termination_status_ptr, cuopt_float } // Test invalid bounds scenario (what MOI wrapper was producing) -cuopt_int_t test_invalid_bounds() +cuopt_int_t test_invalid_bounds(cuopt_int_t test_mip) { cuOptOptimizationProblem problem = NULL; cuOptSolverSettings settings = NULL; @@ -923,6 +923,7 @@ cuopt_int_t test_invalid_bounds() // Variable types (binary) char variable_types[] = {CUOPT_INTEGER}; // Binary variable + if (!test_mip) variable_types[0] = CUOPT_CONTINUOUS; cuopt_int_t status; cuopt_float_t time; diff --git a/cpp/tests/linear_programming/c_api_tests/c_api_tests.cpp b/cpp/tests/linear_programming/c_api_tests/c_api_tests.cpp index 8ae49c48cc..5ccc21a9ee 100644 --- a/cpp/tests/linear_programming/c_api_tests/c_api_tests.cpp +++ b/cpp/tests/linear_programming/c_api_tests/c_api_tests.cpp @@ -113,4 +113,10 @@ TEST(c_api, test_ranged_problem) EXPECT_NEAR(objective, 32.0, 1e-3); } -TEST(c_api, test_invalid_bounds) { EXPECT_EQ(test_invalid_bounds(), CUOPT_SUCCESS); } +TEST(c_api, test_invalid_bounds) +{ + // Test LP codepath + EXPECT_EQ(test_invalid_bounds(false), CUOPT_SUCCESS); + // Test MIP codepath + EXPECT_EQ(test_invalid_bounds(true), CUOPT_SUCCESS); +} diff --git a/cpp/tests/linear_programming/c_api_tests/c_api_tests.h b/cpp/tests/linear_programming/c_api_tests/c_api_tests.h index 66a202f41f..0400a62e90 100644 --- a/cpp/tests/linear_programming/c_api_tests/c_api_tests.h +++ b/cpp/tests/linear_programming/c_api_tests/c_api_tests.h @@ -39,7 +39,7 @@ cuopt_int_t test_missing_file(); cuopt_int_t test_infeasible_problem(); cuopt_int_t test_bad_parameter_name(); cuopt_int_t test_ranged_problem(cuopt_int_t* termination_status_ptr, cuopt_float_t* objective_ptr); -cuopt_int_t test_invalid_bounds(); +cuopt_int_t test_invalid_bounds(cuopt_int_t test_mip); #ifdef __cplusplus }