diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 116a6beb67..06523bf404 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -138,7 +138,8 @@ endif(BUILD_MSAN) # infrastructure files. Those files include abseil headers, and abseil's shared library # on conda-forge doesn't export Mutex::Dtor() in NDEBUG builds (abseil-cpp#1624). # Keeping NDEBUG defined for gRPC files makes the header inline an empty Dtor(), -# avoiding the missing symbol at runtime. +# avoiding the missing symbol at runtime. Additionally, gRPC files are always +# compiled with -DNDEBUG (see below) so Debug builds also avoid the missing symbol. if(DEFINE_ASSERT) add_definitions(-DASSERT_MODE) list(APPEND CUOPT_CUDA_FLAGS -UNDEBUG) @@ -390,7 +391,7 @@ if(DEFINE_ASSERT) endif() # Add gRPC mapper files and generated protobuf sources -list(APPEND CUOPT_SRC_FILES +set(GRPC_INFRA_FILES ${PROTO_SRCS} ${GRPC_PROTO_SRCS} ${GRPC_SERVICE_SRCS} @@ -401,6 +402,15 @@ list(APPEND CUOPT_SRC_FILES src/grpc/client/grpc_client.cpp src/grpc/client/solve_remote.cpp ) +list(APPEND CUOPT_SRC_FILES ${GRPC_INFRA_FILES}) + +# Always keep NDEBUG defined for gRPC infrastructure files so that abseil +# headers inline Mutex::Dtor() instead of emitting an external call. +# The conda-forge abseil shared library is built with NDEBUG and does not +# export that symbol (abseil-cpp#1624). Without this, Debug builds fail +# at runtime with "undefined symbol: absl::…::Mutex::Dtor". +set_property(SOURCE ${GRPC_INFRA_FILES} DIRECTORY ${CMAKE_SOURCE_DIR} + APPEND PROPERTY COMPILE_OPTIONS "-DNDEBUG") add_library(cuopt SHARED ${CUOPT_SRC_FILES} diff --git a/cpp/cuopt_cli.cpp b/cpp/cuopt_cli.cpp index da5f606771..ac568e07cf 100644 --- a/cpp/cuopt_cli.cpp +++ b/cpp/cuopt_cli.cpp @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -85,30 +86,22 @@ inline cuopt::init_logger_t dummy_logger( * @brief Run a single file * @param file_path Path to the MPS format input file containing the optimization problem * @param initial_solution_file Path to initial solution file in SOL format - * @param settings_strings Map of solver parameters + * @param settings Merged solver settings (config file loaded in main, then CLI overrides applied) */ int run_single_file(const std::string& file_path, const std::string& initial_solution_file, bool solve_relaxation, - const std::map& settings_strings) + cuopt::linear_programming::solver_settings_t& settings) { - cuopt::linear_programming::solver_settings_t settings; - - try { - for (auto& [key, val] : settings_strings) { - settings.set_parameter_from_string(key, val); - } - } catch (const std::exception& e) { - auto log = dummy_logger(settings); - CUOPT_LOG_ERROR("Error: %s", e.what()); - return -1; - } + cuopt::init_logger_t log(settings.get_parameter(CUOPT_LOG_FILE), + settings.get_parameter(CUOPT_LOG_TO_CONSOLE)); std::string base_filename = file_path.substr(file_path.find_last_of("/\\") + 1); constexpr bool input_mps_strict = false; cuopt::mps_parser::mps_data_model_t mps_data_model; bool parsing_failed = false; + auto timer = cuopt::timer_t(settings.get_parameter(CUOPT_TIME_LIMIT)); { CUOPT_LOG_INFO("Reading file %s", base_filename.c_str()); try { @@ -123,6 +116,7 @@ int run_single_file(const std::string& file_path, CUOPT_LOG_ERROR("Parsing MPS failed. Exiting!"); return -1; } + CUOPT_LOG_INFO("Read file %s in %.2f seconds", base_filename.c_str(), timer.elapsed_time()); // Determine memory backend and create problem using interface // Create handle only for GPU memory backend (avoid CUDA init on CPU-only hosts) @@ -259,6 +253,21 @@ int set_cuda_module_loading(int argc, char* argv[]) */ int main(int argc, char* argv[]) { + // Handle dump flags before argparse so no other args are required + for (int i = 1; i < argc; ++i) { + std::string arg = argv[i]; + if (arg == "--dump-hyper-params") { + cuopt::linear_programming::solver_settings_t settings; + settings.dump_parameters_to_file("/dev/stdout", true); + return 0; + } + if (arg == "--dump-params") { + cuopt::linear_programming::solver_settings_t settings; + settings.dump_parameters_to_file("/dev/stdout", false); + return 0; + } + } + if (set_cuda_module_loading(argc, argv) != 0) { return 1; } // Get the version string from the version_config.hpp file @@ -287,6 +296,20 @@ int main(int argc, char* argv[]) .default_value(true) .implicit_value(true); + program.add_argument("--params-file") + .help("path to parameter config file (key = value format, supports all parameters)") + .default_value(std::string("")); + + program.add_argument("--dump-hyper-params") + .help("print hyper-parameters only in config file format and exit") + .default_value(false) + .implicit_value(true); + + program.add_argument("--dump-params") + .help("print all parameters in config file format and exit") + .default_value(false) + .implicit_value(true); + std::map arg_name_to_param_name; // Register --pdlp-precision with string-to-int mapping so that it flows @@ -312,16 +335,17 @@ int main(int argc, char* argv[]) std::string arg_name = param_name_to_arg_name(param.param_name); // handle duplicate parameters appearing in MIP and LP settings if (arg_name_to_param_name.count(arg_name) == 0) { - program.add_argument(arg_name.c_str()).default_value(param.default_value); + auto& arg = program.add_argument(arg_name.c_str()).default_value(param.default_value); + if (param.param_name.find("hyper_") != std::string::npos) { arg.hidden(); } arg_name_to_param_name[arg_name] = param.param_name; } } for (auto& param : double_params) { std::string arg_name = param_name_to_arg_name(param.param_name); - // handle duplicate parameters appearing in MIP and LP settings if (arg_name_to_param_name.count(arg_name) == 0) { - program.add_argument(arg_name.c_str()).default_value(param.default_value); + auto& arg = program.add_argument(arg_name.c_str()).default_value(param.default_value); + if (param.param_name.find("hyper_") != std::string::npos) { arg.hidden(); } arg_name_to_param_name[arg_name] = param.param_name; } } @@ -329,20 +353,21 @@ int main(int argc, char* argv[]) for (auto& param : bool_params) { std::string arg_name = param_name_to_arg_name(param.param_name); if (arg_name_to_param_name.count(arg_name) == 0) { - program.add_argument(arg_name.c_str()).default_value(param.default_value); + auto& arg = program.add_argument(arg_name.c_str()).default_value(param.default_value); + if (param.param_name.find("hyper_") != std::string::npos) { arg.hidden(); } arg_name_to_param_name[arg_name] = param.param_name; } } for (auto& param : string_params) { std::string arg_name = param_name_to_arg_name(param.param_name); - // handle duplicate parameters appearing in MIP and LP settings if (arg_name_to_param_name.count(arg_name) == 0) { - program.add_argument(arg_name.c_str()).default_value(param.default_value); + auto& arg = program.add_argument(arg_name.c_str()).default_value(param.default_value); + if (param.param_name.find("hyper_") != std::string::npos) { arg.hidden(); } arg_name_to_param_name[arg_name] = param.param_name; } - } // done with solver settings - } + } + } // done with solver settings // Parse arguments try { @@ -374,16 +399,26 @@ int main(int argc, char* argv[]) const auto initial_solution_file = program.get("--initial-solution"); const auto solve_relaxation = program.get("--relaxation"); + const auto params_file = program.get("--params-file"); + + cuopt::linear_programming::solver_settings_t settings; + try { + if (!params_file.empty()) { settings.load_parameters_from_file(params_file); } + for (auto& [key, val] : settings_strings) { + settings.set_parameter_from_string(key, val); + } + } catch (const std::exception& e) { + auto log = dummy_logger(settings); + CUOPT_LOG_ERROR("Error: %s", e.what()); + return -1; + } // Only initialize CUDA resources if using GPU memory backend (not remote execution) auto memory_backend = cuopt::linear_programming::get_memory_backend_type(); std::vector> memory_resources; if (memory_backend == cuopt::linear_programming::memory_backend_t::GPU) { - // All arguments are parsed as string, default values are parsed as int if unused. - const auto num_gpus = program.is_used("--num-gpus") - ? std::stoi(program.get("--num-gpus")) - : program.get("--num-gpus"); + const int num_gpus = settings.get_parameter(CUOPT_NUM_GPUS); for (int i = 0; i < std::min(raft::device_setter::get_device_count(), num_gpus); ++i) { RAFT_CUDA_TRY(cudaSetDevice(i)); @@ -393,5 +428,5 @@ int main(int argc, char* argv[]) RAFT_CUDA_TRY(cudaSetDevice(0)); } - return run_single_file(file_name, initial_solution_file, solve_relaxation, settings_strings); + return run_single_file(file_name, initial_solution_file, solve_relaxation, settings); } diff --git a/cpp/include/cuopt/linear_programming/constants.h b/cpp/include/cuopt/linear_programming/constants.h index 09a138eb54..06eacb3408 100644 --- a/cpp/include/cuopt/linear_programming/constants.h +++ b/cpp/include/cuopt/linear_programming/constants.h @@ -20,64 +20,90 @@ #define CUOPT_INSTANTIATE_INT64 0 /* @brief LP/MIP parameter string constants */ -#define CUOPT_ABSOLUTE_DUAL_TOLERANCE "absolute_dual_tolerance" -#define CUOPT_RELATIVE_DUAL_TOLERANCE "relative_dual_tolerance" -#define CUOPT_ABSOLUTE_PRIMAL_TOLERANCE "absolute_primal_tolerance" -#define CUOPT_RELATIVE_PRIMAL_TOLERANCE "relative_primal_tolerance" -#define CUOPT_ABSOLUTE_GAP_TOLERANCE "absolute_gap_tolerance" -#define CUOPT_RELATIVE_GAP_TOLERANCE "relative_gap_tolerance" -#define CUOPT_INFEASIBILITY_DETECTION "infeasibility_detection" -#define CUOPT_STRICT_INFEASIBILITY "strict_infeasibility" -#define CUOPT_PRIMAL_INFEASIBLE_TOLERANCE "primal_infeasible_tolerance" -#define CUOPT_DUAL_INFEASIBLE_TOLERANCE "dual_infeasible_tolerance" -#define CUOPT_ITERATION_LIMIT "iteration_limit" -#define CUOPT_TIME_LIMIT "time_limit" -#define CUOPT_WORK_LIMIT "work_limit" -#define CUOPT_PDLP_SOLVER_MODE "pdlp_solver_mode" -#define CUOPT_METHOD "method" -#define CUOPT_PER_CONSTRAINT_RESIDUAL "per_constraint_residual" -#define CUOPT_SAVE_BEST_PRIMAL_SO_FAR "save_best_primal_so_far" -#define CUOPT_FIRST_PRIMAL_FEASIBLE "first_primal_feasible" -#define CUOPT_LOG_FILE "log_file" -#define CUOPT_LOG_TO_CONSOLE "log_to_console" -#define CUOPT_CROSSOVER "crossover" -#define CUOPT_FOLDING "folding" -#define CUOPT_AUGMENTED "augmented" -#define CUOPT_DUALIZE "dualize" -#define CUOPT_ORDERING "ordering" -#define CUOPT_BARRIER_DUAL_INITIAL_POINT "barrier_dual_initial_point" -#define CUOPT_ELIMINATE_DENSE_COLUMNS "eliminate_dense_columns" -#define CUOPT_CUDSS_DETERMINISTIC "cudss_deterministic" -#define CUOPT_PRESOLVE "presolve" -#define CUOPT_DUAL_POSTSOLVE "dual_postsolve" -#define CUOPT_MIP_DETERMINISM_MODE "mip_determinism_mode" -#define CUOPT_MIP_ABSOLUTE_TOLERANCE "mip_absolute_tolerance" -#define CUOPT_MIP_RELATIVE_TOLERANCE "mip_relative_tolerance" -#define CUOPT_MIP_INTEGRALITY_TOLERANCE "mip_integrality_tolerance" -#define CUOPT_MIP_ABSOLUTE_GAP "mip_absolute_gap" -#define CUOPT_MIP_RELATIVE_GAP "mip_relative_gap" -#define CUOPT_MIP_HEURISTICS_ONLY "mip_heuristics_only" -#define CUOPT_MIP_SCALING "mip_scaling" -#define CUOPT_MIP_PRESOLVE "mip_presolve" -#define CUOPT_MIP_RELIABILITY_BRANCHING "mip_reliability_branching" -#define CUOPT_MIP_CUT_PASSES "mip_cut_passes" -#define CUOPT_MIP_MIXED_INTEGER_ROUNDING_CUTS "mip_mixed_integer_rounding_cuts" -#define CUOPT_MIP_MIXED_INTEGER_GOMORY_CUTS "mip_mixed_integer_gomory_cuts" -#define CUOPT_MIP_KNAPSACK_CUTS "mip_knapsack_cuts" -#define CUOPT_MIP_IMPLIED_BOUND_CUTS "mip_implied_bound_cuts" -#define CUOPT_MIP_CLIQUE_CUTS "mip_clique_cuts" -#define CUOPT_MIP_STRONG_CHVATAL_GOMORY_CUTS "mip_strong_chvatal_gomory_cuts" -#define CUOPT_MIP_REDUCED_COST_STRENGTHENING "mip_reduced_cost_strengthening" -#define CUOPT_MIP_CUT_CHANGE_THRESHOLD "mip_cut_change_threshold" -#define CUOPT_MIP_CUT_MIN_ORTHOGONALITY "mip_cut_min_orthogonality" -#define CUOPT_MIP_BATCH_PDLP_STRONG_BRANCHING "mip_batch_pdlp_strong_branching" -#define CUOPT_SOLUTION_FILE "solution_file" -#define CUOPT_NUM_CPU_THREADS "num_cpu_threads" -#define CUOPT_NUM_GPUS "num_gpus" -#define CUOPT_USER_PROBLEM_FILE "user_problem_file" -#define CUOPT_PRESOLVE_FILE "presolve_file" -#define CUOPT_RANDOM_SEED "random_seed" -#define CUOPT_PDLP_PRECISION "pdlp_precision" +#define CUOPT_ABSOLUTE_DUAL_TOLERANCE "absolute_dual_tolerance" +#define CUOPT_RELATIVE_DUAL_TOLERANCE "relative_dual_tolerance" +#define CUOPT_ABSOLUTE_PRIMAL_TOLERANCE "absolute_primal_tolerance" +#define CUOPT_RELATIVE_PRIMAL_TOLERANCE "relative_primal_tolerance" +#define CUOPT_ABSOLUTE_GAP_TOLERANCE "absolute_gap_tolerance" +#define CUOPT_RELATIVE_GAP_TOLERANCE "relative_gap_tolerance" +#define CUOPT_INFEASIBILITY_DETECTION "infeasibility_detection" +#define CUOPT_STRICT_INFEASIBILITY "strict_infeasibility" +#define CUOPT_PRIMAL_INFEASIBLE_TOLERANCE "primal_infeasible_tolerance" +#define CUOPT_DUAL_INFEASIBLE_TOLERANCE "dual_infeasible_tolerance" +#define CUOPT_ITERATION_LIMIT "iteration_limit" +#define CUOPT_TIME_LIMIT "time_limit" +#define CUOPT_WORK_LIMIT "work_limit" +#define CUOPT_PDLP_SOLVER_MODE "pdlp_solver_mode" +#define CUOPT_METHOD "method" +#define CUOPT_PER_CONSTRAINT_RESIDUAL "per_constraint_residual" +#define CUOPT_SAVE_BEST_PRIMAL_SO_FAR "save_best_primal_so_far" +#define CUOPT_FIRST_PRIMAL_FEASIBLE "first_primal_feasible" +#define CUOPT_LOG_FILE "log_file" +#define CUOPT_LOG_TO_CONSOLE "log_to_console" +#define CUOPT_CROSSOVER "crossover" +#define CUOPT_FOLDING "folding" +#define CUOPT_AUGMENTED "augmented" +#define CUOPT_DUALIZE "dualize" +#define CUOPT_ORDERING "ordering" +#define CUOPT_BARRIER_DUAL_INITIAL_POINT "barrier_dual_initial_point" +#define CUOPT_ELIMINATE_DENSE_COLUMNS "eliminate_dense_columns" +#define CUOPT_CUDSS_DETERMINISTIC "cudss_deterministic" +#define CUOPT_PRESOLVE "presolve" +#define CUOPT_DUAL_POSTSOLVE "dual_postsolve" +#define CUOPT_MIP_DETERMINISM_MODE "mip_determinism_mode" +#define CUOPT_MIP_ABSOLUTE_TOLERANCE "mip_absolute_tolerance" +#define CUOPT_MIP_RELATIVE_TOLERANCE "mip_relative_tolerance" +#define CUOPT_MIP_INTEGRALITY_TOLERANCE "mip_integrality_tolerance" +#define CUOPT_MIP_ABSOLUTE_GAP "mip_absolute_gap" +#define CUOPT_MIP_RELATIVE_GAP "mip_relative_gap" +#define CUOPT_MIP_HEURISTICS_ONLY "mip_heuristics_only" +#define CUOPT_MIP_SCALING "mip_scaling" +#define CUOPT_MIP_PRESOLVE "mip_presolve" +#define CUOPT_MIP_RELIABILITY_BRANCHING "mip_reliability_branching" +#define CUOPT_MIP_CUT_PASSES "mip_cut_passes" +#define CUOPT_MIP_MIXED_INTEGER_ROUNDING_CUTS "mip_mixed_integer_rounding_cuts" +#define CUOPT_MIP_MIXED_INTEGER_GOMORY_CUTS "mip_mixed_integer_gomory_cuts" +#define CUOPT_MIP_KNAPSACK_CUTS "mip_knapsack_cuts" +#define CUOPT_MIP_IMPLIED_BOUND_CUTS "mip_implied_bound_cuts" +#define CUOPT_MIP_CLIQUE_CUTS "mip_clique_cuts" +#define CUOPT_MIP_STRONG_CHVATAL_GOMORY_CUTS "mip_strong_chvatal_gomory_cuts" +#define CUOPT_MIP_REDUCED_COST_STRENGTHENING "mip_reduced_cost_strengthening" +#define CUOPT_MIP_CUT_CHANGE_THRESHOLD "mip_cut_change_threshold" +#define CUOPT_MIP_CUT_MIN_ORTHOGONALITY "mip_cut_min_orthogonality" +#define CUOPT_MIP_BATCH_PDLP_STRONG_BRANCHING "mip_batch_pdlp_strong_branching" +#define CUOPT_MIP_BATCH_PDLP_RELIABILITY_BRANCHING "mip_batch_pdlp_reliability_branching" +#define CUOPT_MIP_STRONG_BRANCHING_SIMPLEX_ITERATION_LIMIT \ + "mip_strong_branching_simplex_iteration_limit" +#define CUOPT_SOLUTION_FILE "solution_file" +#define CUOPT_NUM_CPU_THREADS "num_cpu_threads" +#define CUOPT_NUM_GPUS "num_gpus" +#define CUOPT_USER_PROBLEM_FILE "user_problem_file" +#define CUOPT_PRESOLVE_FILE "presolve_file" +#define CUOPT_RANDOM_SEED "random_seed" +#define CUOPT_PDLP_PRECISION "pdlp_precision" + +#define CUOPT_MIP_HYPER_HEURISTIC_POPULATION_SIZE "mip_hyper_heuristic_population_size" +#define CUOPT_MIP_HYPER_HEURISTIC_NUM_CPUFJ_THREADS "mip_hyper_heuristic_num_cpufj_threads" +#define CUOPT_MIP_HYPER_HEURISTIC_PRESOLVE_TIME_RATIO "mip_hyper_heuristic_presolve_time_ratio" +#define CUOPT_MIP_HYPER_HEURISTIC_PRESOLVE_MAX_TIME "mip_hyper_heuristic_presolve_max_time" +#define CUOPT_MIP_HYPER_HEURISTIC_ROOT_LP_TIME_RATIO "mip_hyper_heuristic_root_lp_time_ratio" +#define CUOPT_MIP_HYPER_HEURISTIC_ROOT_LP_MAX_TIME "mip_hyper_heuristic_root_lp_max_time" +#define CUOPT_MIP_HYPER_HEURISTIC_RINS_TIME_LIMIT "mip_hyper_heuristic_rins_time_limit" +#define CUOPT_MIP_HYPER_HEURISTIC_RINS_MAX_TIME_LIMIT "mip_hyper_heuristic_rins_max_time_limit" +#define CUOPT_MIP_HYPER_HEURISTIC_RINS_FIX_RATE "mip_hyper_heuristic_rins_fix_rate" +#define CUOPT_MIP_HYPER_HEURISTIC_STAGNATION_TRIGGER "mip_hyper_heuristic_stagnation_trigger" +#define CUOPT_MIP_HYPER_HEURISTIC_MAX_ITERS_WITHOUT_IMPROVEMENT \ + "mip_hyper_heuristic_max_iterations_without_improvement" +#define CUOPT_MIP_HYPER_HEURISTIC_INITIAL_INFEASIBILITY_WEIGHT \ + "mip_hyper_heuristic_initial_infeasibility_weight" +#define CUOPT_MIP_HYPER_HEURISTIC_N_OF_MINIMUMS_FOR_EXIT \ + "mip_hyper_heuristic_n_of_minimums_for_exit" +#define CUOPT_MIP_HYPER_HEURISTIC_ENABLED_RECOMBINERS "mip_hyper_heuristic_enabled_recombiners" +#define CUOPT_MIP_HYPER_HEURISTIC_CYCLE_DETECTION_LENGTH \ + "mip_hyper_heuristic_cycle_detection_length" +#define CUOPT_MIP_HYPER_HEURISTIC_RELAXED_LP_TIME_LIMIT "mip_hyper_heuristic_relaxed_lp_time_limit" +#define CUOPT_MIP_HYPER_HEURISTIC_RELATED_VARS_TIME_LIMIT \ + "mip_hyper_heuristic_related_vars_time_limit" /* @brief MIP determinism mode constants */ #define CUOPT_MODE_OPPORTUNISTIC 0 @@ -129,6 +155,7 @@ #define CUOPT_METHOD_PDLP 1 #define CUOPT_METHOD_DUAL_SIMPLEX 2 #define CUOPT_METHOD_BARRIER 3 +#define CUOPT_METHOD_UNSET 4 /* @brief PDLP precision mode constants */ #define CUOPT_PDLP_DEFAULT_PRECISION -1 @@ -153,4 +180,9 @@ #define CUOPT_PRESOLVE_PAPILO 1 #define CUOPT_PRESOLVE_PSLP 2 +/* @brief MIP scaling mode constants */ +#define CUOPT_MIP_SCALING_OFF 0 +#define CUOPT_MIP_SCALING_ON 1 +#define CUOPT_MIP_SCALING_NO_OBJECTIVE 2 + #endif // CUOPT_CONSTANTS_H diff --git a/cpp/include/cuopt/linear_programming/cpu_optimization_problem_solution.hpp b/cpp/include/cuopt/linear_programming/cpu_optimization_problem_solution.hpp index ba22e81480..e86dd0341a 100644 --- a/cpp/include/cuopt/linear_programming/cpu_optimization_problem_solution.hpp +++ b/cpp/include/cuopt/linear_programming/cpu_optimization_problem_solution.hpp @@ -46,7 +46,7 @@ class cpu_lp_solution_t : public lp_solution_interface_t { l2_dual_residual_(std::numeric_limits::signaling_NaN()), gap_(std::numeric_limits::signaling_NaN()), num_iterations_(0), - solved_by_pdlp_(false) + solved_by_(Unset) { } @@ -65,7 +65,7 @@ class cpu_lp_solution_t : public lp_solution_interface_t { f_t l2_dual_residual, f_t gap, i_t num_iterations, - bool solved_by_pdlp) + method_t solved_by) : primal_solution_(std::move(primal_solution)), dual_solution_(std::move(dual_solution)), reduced_cost_(std::move(reduced_cost)), @@ -78,7 +78,7 @@ class cpu_lp_solution_t : public lp_solution_interface_t { l2_dual_residual_(l2_dual_residual), gap_(gap), num_iterations_(num_iterations), - solved_by_pdlp_(solved_by_pdlp) + solved_by_(solved_by) { } @@ -97,7 +97,7 @@ class cpu_lp_solution_t : public lp_solution_interface_t { f_t l2_dual_residual, f_t gap, i_t num_iterations, - bool solved_by_pdlp, + method_t solved_by, cpu_pdlp_warm_start_data_t&& warmstart_data) : primal_solution_(std::move(primal_solution)), dual_solution_(std::move(dual_solution)), @@ -111,7 +111,7 @@ class cpu_lp_solution_t : public lp_solution_interface_t { l2_dual_residual_(l2_dual_residual), gap_(gap), num_iterations_(num_iterations), - solved_by_pdlp_(solved_by_pdlp), + solved_by_(solved_by), pdlp_warm_start_data_(std::move(warmstart_data)) { } @@ -149,7 +149,7 @@ class cpu_lp_solution_t : public lp_solution_interface_t { i_t get_num_iterations(i_t = 0) const override { return num_iterations_; } - bool is_solved_by_pdlp(i_t = 0) const override { return solved_by_pdlp_; } + method_t solved_by(i_t = 0) const override { return solved_by_; } const pdlp_warm_start_data_t& get_pdlp_warm_start_data() const override { @@ -266,7 +266,7 @@ class cpu_lp_solution_t : public lp_solution_interface_t { f_t l2_dual_residual_; f_t gap_; i_t num_iterations_; - bool solved_by_pdlp_; + method_t solved_by_; // PDLP warm start data (embedded struct, CPU-backed using std::vector) cpu_pdlp_warm_start_data_t pdlp_warm_start_data_; diff --git a/cpp/include/cuopt/linear_programming/mip/heuristics_hyper_params.hpp b/cpp/include/cuopt/linear_programming/mip/heuristics_hyper_params.hpp new file mode 100644 index 0000000000..c0b644544a --- /dev/null +++ b/cpp/include/cuopt/linear_programming/mip/heuristics_hyper_params.hpp @@ -0,0 +1,40 @@ +/* clang-format off */ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + */ +/* clang-format on */ + +#pragma once + +namespace cuopt::linear_programming { + +/** + * @brief Tuning knobs for MIP GPU heuristics. + * + * All fields carry their actual defaults. A config file only needs to list + * the knobs being changed; omitted keys keep the values shown here. + * These are registered in the unified parameter framework via solver_settings_t + * and can be loaded from a config file with load_parameters_from_file(). + */ +struct mip_heuristics_hyper_params_t { + int population_size = 32; // max solutions in pool + int num_cpufj_threads = 8; // parallel CPU FJ climbers + double presolve_time_ratio = 0.1; // fraction of total time for presolve + double presolve_max_time = 60.0; // hard cap on presolve seconds + double root_lp_time_ratio = 0.1; // fraction of total time for root LP + double root_lp_max_time = 15.0; // hard cap on root LP seconds + double rins_time_limit = 3.0; // per-call RINS sub-MIP time + double rins_max_time_limit = 20.0; // ceiling for RINS adaptive time budget + double rins_fix_rate = 0.5; // RINS variable fix rate + int stagnation_trigger = 3; // FP loops w/o improvement before recombination + int max_iterations_without_improvement = 8; // diversity step depth after stagnation + double initial_infeasibility_weight = 1000.0; // constraint violation penalty seed + int n_of_minimums_for_exit = 7000; // FJ baseline local-minima exit threshold + int enabled_recombiners = 15; // bitmask: 1=BP 2=FP 4=LS 8=SubMIP + int cycle_detection_length = 30; // FP assignment cycle ring buffer + double relaxed_lp_time_limit = 1.0; // base relaxed LP time cap in heuristics + double related_vars_time_limit = 30.0; // time for related-variable structure build +}; + +} // namespace cuopt::linear_programming diff --git a/cpp/include/cuopt/linear_programming/mip/solver_settings.hpp b/cpp/include/cuopt/linear_programming/mip/solver_settings.hpp index c2cd33df98..14c4d227bc 100644 --- a/cpp/include/cuopt/linear_programming/mip/solver_settings.hpp +++ b/cpp/include/cuopt/linear_programming/mip/solver_settings.hpp @@ -10,6 +10,7 @@ #include #include +#include #include #include @@ -95,13 +96,17 @@ class mip_solver_settings_t { i_t knapsack_cuts = -1; i_t clique_cuts = -1; i_t implied_bound_cuts = -1; - i_t strong_chvatal_gomory_cuts = -1; - i_t reduced_cost_strengthening = -1; - f_t cut_change_threshold = -1.0; - f_t cut_min_orthogonality = 0.5; - i_t mip_batch_pdlp_strong_branching = 0; - i_t num_gpus = 1; - bool log_to_console = true; + i_t strong_chvatal_gomory_cuts = -1; + i_t reduced_cost_strengthening = -1; + f_t cut_change_threshold = -1.0; + f_t cut_min_orthogonality = 0.5; + i_t mip_batch_pdlp_strong_branching{ + 0}; // 0 = DS only, 1 = cooperative DS + PDLP, 2 = batch PDLP only + i_t mip_batch_pdlp_reliability_branching{ + 0}; // 0 = DS only, 1 = cooperative DS + PDLP, 2 = batch PDLP only + i_t strong_branching_simplex_iteration_limit = -1; + i_t num_gpus = 1; + bool log_to_console = true; std::string log_file; std::string sol_file; @@ -110,7 +115,7 @@ class mip_solver_settings_t { /** Initial primal solutions */ std::vector>> initial_solutions; - bool mip_scaling = false; + int mip_scaling = CUOPT_MIP_SCALING_NO_OBJECTIVE; presolver_t presolver{presolver_t::Default}; /** * @brief Determinism mode for MIP solver. @@ -136,6 +141,8 @@ class mip_solver_settings_t { // TODO check with Akif and Alice pdlp_hyper_params::pdlp_hyper_params_t hyper_params; + mip_heuristics_hyper_params_t heuristic_params; + private: std::vector mip_callbacks_; diff --git a/cpp/include/cuopt/linear_programming/optimization_problem_solution.hpp b/cpp/include/cuopt/linear_programming/optimization_problem_solution.hpp index bca0df3936..ac55256973 100644 --- a/cpp/include/cuopt/linear_programming/optimization_problem_solution.hpp +++ b/cpp/include/cuopt/linear_programming/optimization_problem_solution.hpp @@ -122,9 +122,9 @@ class gpu_lp_solution_t : public lp_solution_interface_t { return solution_.get_additional_termination_information(id).number_of_steps_taken; } - bool is_solved_by_pdlp(i_t id = 0) const override + method_t solved_by(i_t id = 0) const override { - return solution_.get_additional_termination_information(id).solved_by_pdlp; + return solution_.get_additional_termination_information(id).solved_by; } const pdlp_warm_start_data_t& get_pdlp_warm_start_data() const override @@ -338,7 +338,7 @@ class gpu_lp_solution_t : public lp_solution_interface_t { get_l2_dual_residual(), get_gap(), get_num_iterations(), - is_solved_by_pdlp(), + solved_by(), std::move(cpu_ws)); } @@ -353,7 +353,7 @@ class gpu_lp_solution_t : public lp_solution_interface_t { get_l2_dual_residual(), get_gap(), get_num_iterations(), - is_solved_by_pdlp()); + solved_by()); } /** diff --git a/cpp/include/cuopt/linear_programming/optimization_problem_solution_interface.hpp b/cpp/include/cuopt/linear_programming/optimization_problem_solution_interface.hpp index 8c1386f059..3dc1678c8b 100644 --- a/cpp/include/cuopt/linear_programming/optimization_problem_solution_interface.hpp +++ b/cpp/include/cuopt/linear_programming/optimization_problem_solution_interface.hpp @@ -291,10 +291,10 @@ class lp_solution_interface_t : public optimization_problem_solution_interface_t virtual i_t get_num_iterations(i_t id = 0) const = 0; /** - * @brief Check if solved by PDLP - * @return true if solved by PDLP + * @brief Method used for solving the LP. + * @return the method used for solving the LP. */ - virtual bool is_solved_by_pdlp(i_t id = 0) const = 0; + virtual method_t solved_by(i_t id = 0) const = 0; /** * @brief Get PDLP warm start data (GPU solutions only) diff --git a/cpp/include/cuopt/linear_programming/pdlp/solver_settings.hpp b/cpp/include/cuopt/linear_programming/pdlp/solver_settings.hpp index 86ce4d8db5..bcf5a736f0 100644 --- a/cpp/include/cuopt/linear_programming/pdlp/solver_settings.hpp +++ b/cpp/include/cuopt/linear_programming/pdlp/solver_settings.hpp @@ -18,6 +18,8 @@ #include +#include + namespace cuopt::linear_programming { // Forward declare solver_settings_t for friend class @@ -50,9 +52,11 @@ enum pdlp_solver_mode_t : int { * @brief Enum representing the different methods that can be used to solve the * linear programming problem. * - * Concurrent: Use both PDLP and DualSimplex in parallel. + * Concurrent: Use PDLP, Barrier and DualSimplex in parallel. * PDLP: Use the PDLP method. * DualSimplex: Use the dual simplex method. + * Barrier: Use the barrier method + * Unset: The value was not set. * * @note Default method is Concurrent. */ @@ -60,9 +64,22 @@ enum method_t : int { Concurrent = CUOPT_METHOD_CONCURRENT, PDLP = CUOPT_METHOD_PDLP, DualSimplex = CUOPT_METHOD_DUAL_SIMPLEX, - Barrier = CUOPT_METHOD_BARRIER + Barrier = CUOPT_METHOD_BARRIER, + Unset = CUOPT_METHOD_UNSET }; +/// Returns the corresponding string from the enum `method_t`. +inline std::string method_to_string(method_t method) +{ + switch (method) { + case method_t::DualSimplex: return "Dual Simplex"; + case method_t::PDLP: return "PDLP"; + case method_t::Barrier: return "Barrier"; + case method_t::Concurrent: return "Concurrent"; + default: return "Unset"; + } +} + /** * @brief Enum representing the PDLP precision modes. * @@ -147,6 +164,12 @@ class pdlp_solver_settings_t { * @param[in] initial_primal_weight Initial primal weight. */ void set_initial_primal_weight(f_t initial_primal_weight); + /** + * @brief Set an initial pdlp iteration. + * + * @param[in] initial_pdlp_iteration Initial pdlp iteration. + */ + void set_initial_pdlp_iteration(i_t initial_pdlp_iteration); /** * @brief Set the pdlp warm start data. This allows to restart PDLP with a @@ -213,6 +236,8 @@ class pdlp_solver_settings_t { std::optional get_initial_step_size() const; // TODO batch mode: tmp std::optional get_initial_primal_weight() const; + // TODO batch mode: tmp + std::optional get_initial_pdlp_iteration() const; const rmm::device_uvector& get_initial_primal_solution() const; const rmm::device_uvector& get_initial_dual_solution() const; @@ -265,6 +290,8 @@ class pdlp_solver_settings_t { bool inside_mip{false}; // For concurrent termination std::atomic* concurrent_halt{nullptr}; + // Shared strong branching solved flags for cooperative DS + PDLP + cuda::std::span> shared_sb_solved; static constexpr f_t minimal_absolute_tolerance = 1.0e-12; pdlp_hyper_params::pdlp_hyper_params_t hyper_params; // Holds the information of new variable lower and upper bounds for each climber in the format: @@ -273,6 +300,12 @@ class pdlp_solver_settings_t { // concurrently i.e. if new_bounds.size() == 2, then 2 versions of the problem with updated bounds // will be solved concurrently std::vector> new_bounds; + // By default to save memory and speed we don't store and copy each climber's primal and dual + // solutions We only retrieve termination statistics and the objective values + bool generate_batch_primal_dual_solution{false}; + // Used to force batch PDLP to solve a subbatch of the problems at a time + // The 0 default value will make the solver use its heuristic to determine the subbatch size + i_t sub_batch_size{0}; private: /** Initial primal solution */ @@ -285,6 +318,9 @@ class pdlp_solver_settings_t { /** Initial primal weight */ // TODO batch mode: tmp std::optional initial_primal_weight_; + /** Initial pdlp iteration */ + // TODO batch mode: tmp + std::optional initial_pdlp_iteration_; /** GPU-backed warm start data (device_uvector), used by C++ API and local GPU solves */ pdlp_warm_start_data_t pdlp_warm_start_data_; /** Warm start data as spans over external memory, used by Cython/Python interface */ diff --git a/cpp/include/cuopt/linear_programming/pdlp/solver_solution.hpp b/cpp/include/cuopt/linear_programming/pdlp/solver_solution.hpp index 8681690540..81e911df62 100644 --- a/cpp/include/cuopt/linear_programming/pdlp/solver_solution.hpp +++ b/cpp/include/cuopt/linear_programming/pdlp/solver_solution.hpp @@ -10,6 +10,7 @@ #include #include #include +#include #include #include @@ -89,8 +90,8 @@ class optimization_problem_solution_t : public base_solution_t { /** Solve time in seconds */ double solve_time{std::numeric_limits::signaling_NaN()}; - /** Whether the problem was solved by PDLP or Dual Simplex */ - bool solved_by_pdlp{false}; + /** Whether the problem was solved by PDLP, Barrier or Dual Simplex */ + method_t solved_by = method_t::Unset; }; /** diff --git a/cpp/include/cuopt/linear_programming/solver_settings.hpp b/cpp/include/cuopt/linear_programming/solver_settings.hpp index 61e84c6cd8..1720b0e9f9 100644 --- a/cpp/include/cuopt/linear_programming/solver_settings.hpp +++ b/cpp/include/cuopt/linear_programming/solver_settings.hpp @@ -96,6 +96,9 @@ class solver_settings_t { const std::vector>& get_string_parameters() const; const std::vector get_parameter_names() const; + void load_parameters_from_file(const std::string& path); + bool dump_parameters_to_file(const std::string& path, bool hyperparameters_only = true) const; + private: pdlp_solver_settings_t pdlp_settings; mip_solver_settings_t mip_settings; diff --git a/cpp/include/cuopt/linear_programming/utilities/cython_types.hpp b/cpp/include/cuopt/linear_programming/utilities/cython_types.hpp index 29b4963663..20db133512 100644 --- a/cpp/include/cuopt/linear_programming/utilities/cython_types.hpp +++ b/cpp/include/cuopt/linear_programming/utilities/cython_types.hpp @@ -83,7 +83,7 @@ struct linear_programming_ret_t { double gap_{}; int nb_iterations_{}; double solve_time_{}; - bool solved_by_pdlp_{}; + linear_programming::method_t solved_by_{}; bool is_gpu() const { return std::holds_alternative(solutions_); } }; diff --git a/cpp/include/cuopt/linear_programming/utilities/internals.hpp b/cpp/include/cuopt/linear_programming/utilities/internals.hpp index fc90dec04f..bdfbb969d2 100644 --- a/cpp/include/cuopt/linear_programming/utilities/internals.hpp +++ b/cpp/include/cuopt/linear_programming/utilities/internals.hpp @@ -79,8 +79,14 @@ class base_solution_t { template struct parameter_info_t { - parameter_info_t(std::string_view param_name, T* value, T min, T max, T def) - : param_name(param_name), value_ptr(value), min_value(min), max_value(max), default_value(def) + parameter_info_t( + std::string_view param_name, T* value, T min, T max, T def, const char* description = "") + : param_name(param_name), + value_ptr(value), + min_value(min), + max_value(max), + default_value(def), + description(description) { } std::string param_name; @@ -88,28 +94,34 @@ struct parameter_info_t { T min_value; T max_value; T default_value; + const char* description; }; template <> struct parameter_info_t { - parameter_info_t(std::string_view name, bool* value, bool def) - : param_name(name), value_ptr(value), default_value(def) + parameter_info_t(std::string_view name, bool* value, bool def, const char* description = "") + : param_name(name), value_ptr(value), default_value(def), description(description) { } std::string param_name; bool* value_ptr; bool default_value; + const char* description; }; template <> struct parameter_info_t { - parameter_info_t(std::string_view name, std::string* value, std::string def) - : param_name(name), value_ptr(value), default_value(def) + parameter_info_t(std::string_view name, + std::string* value, + std::string def, + const char* description = "") + : param_name(name), value_ptr(value), default_value(def), description(description) { } std::string param_name; std::string* value_ptr; std::string default_value; + const char* description; }; /** diff --git a/cpp/src/barrier/barrier.cu b/cpp/src/barrier/barrier.cu index 76ed1927b1..902e691e64 100644 --- a/cpp/src/barrier/barrier.cu +++ b/cpp/src/barrier/barrier.cu @@ -2153,10 +2153,9 @@ void barrier_solver_t::gpu_compute_residual_norms(const rmm::device_uv std::max(device_vector_norm_inf(data.d_primal_residual_, stream_view_), device_vector_norm_inf(data.d_bound_residual_, stream_view_)); dual_residual_norm = device_vector_norm_inf(data.d_dual_residual_, stream_view_); - // TODO: CMM understand why rhs and not residual complementarity_residual_norm = - std::max(device_vector_norm_inf(data.d_complementarity_xz_rhs_, stream_view_), - device_vector_norm_inf(data.d_complementarity_wv_rhs_, stream_view_)); + std::max(device_vector_norm_inf(data.d_complementarity_xz_residual_, stream_view_), + device_vector_norm_inf(data.d_complementarity_wv_residual_, stream_view_)); } template @@ -3494,7 +3493,9 @@ lp_status_t barrier_solver_t::solve(f_t start_time, f_t relative_primal_residual = primal_residual_norm / (1.0 + norm_b); f_t relative_dual_residual = dual_residual_norm / (1.0 + norm_c); f_t relative_complementarity_residual = - complementarity_residual_norm / (1.0 + std::abs(primal_objective)); + complementarity_residual_norm / + (1.0 + std::min(std::abs(compute_user_objective(lp, primal_objective)), + std::abs(primal_objective))); dense_vector_t upper(lp.upper); data.gather_upper_bounds(upper, data.restrict_u_); @@ -3510,11 +3511,11 @@ lp_status_t barrier_solver_t::solve(f_t start_time, float64_t elapsed_time = toc(start_time); settings.log.printf("%3d %+.12e %+.12e %.2e %.2e %.2e %.1f\n", iter, - primal_objective, - dual_objective, - primal_residual_norm, - dual_residual_norm, - complementarity_residual_norm, + compute_user_objective(lp, primal_objective), + compute_user_objective(lp, dual_objective), + relative_primal_residual, + relative_dual_residual, + relative_complementarity_residual, elapsed_time); bool converged = primal_residual_norm < settings.barrier_relative_feasibility_tol && @@ -3656,7 +3657,9 @@ lp_status_t barrier_solver_t::solve(f_t start_time, relative_primal_residual = primal_residual_norm / (1.0 + norm_b); relative_dual_residual = dual_residual_norm / (1.0 + norm_c); relative_complementarity_residual = - complementarity_residual_norm / (1.0 + std::abs(primal_objective)); + complementarity_residual_norm / + (1.0 + std::min(std::abs(compute_user_objective(lp, primal_objective)), + std::abs(primal_objective))); if (relative_primal_residual < settings.barrier_relaxed_feasibility_tol && relative_dual_residual < settings.barrier_relaxed_optimality_tol && diff --git a/cpp/src/branch_and_bound/branch_and_bound.cpp b/cpp/src/branch_and_bound/branch_and_bound.cpp index 7106d8dce3..e94e8c93e1 100644 --- a/cpp/src/branch_and_bound/branch_and_bound.cpp +++ b/cpp/src/branch_and_bound/branch_and_bound.cpp @@ -174,14 +174,14 @@ f_t sgn(f_t x) return x < 0 ? -1 : 1; } -template -f_t relative_gap(f_t obj_value, f_t lower_bound) +template +f_t compute_user_abs_gap(const lp_problem_t& lp, f_t obj_value, f_t lower_bound) { - f_t user_mip_gap = obj_value == 0.0 - ? (lower_bound == 0.0 ? 0.0 : std::numeric_limits::infinity()) - : std::abs(obj_value - lower_bound) / std::abs(obj_value); - if (std::isnan(user_mip_gap)) { return std::numeric_limits::infinity(); } - return user_mip_gap; + // abs_gap = |user_obj - user_lower| = |obj_scale| * |obj_value - lower_bound| + // obj_constant cancels out in the subtraction; obj_scale sign must be removed via abs + f_t gap = std::abs(lp.obj_scale) * (obj_value - lower_bound); + if (gap < -1e-4) { CUOPT_LOG_ERROR("Gap is negative %e", gap); } + return gap; } template @@ -191,15 +191,15 @@ f_t user_relative_gap(const lp_problem_t& lp, f_t obj_value, f_t lower f_t user_lower_bound = compute_user_objective(lp, lower_bound); f_t user_mip_gap = user_obj == 0.0 ? (user_lower_bound == 0.0 ? 0.0 : std::numeric_limits::infinity()) - : std::abs(user_obj - user_lower_bound) / std::abs(user_obj); + : compute_user_abs_gap(lp, obj_value, lower_bound) / std::abs(user_obj); if (std::isnan(user_mip_gap)) { return std::numeric_limits::infinity(); } return user_mip_gap; } -template -std::string user_mip_gap(f_t obj_value, f_t lower_bound) +template +std::string user_mip_gap(const lp_problem_t& lp, f_t obj_value, f_t lower_bound) { - const f_t user_mip_gap = relative_gap(obj_value, lower_bound); + const f_t user_mip_gap = user_relative_gap(lp, obj_value, lower_bound); if (user_mip_gap == std::numeric_limits::infinity()) { return " - "; } else { @@ -319,7 +319,7 @@ void branch_and_bound_t::report_heuristic(f_t obj) if (is_running_) { f_t user_obj = compute_user_objective(original_lp_, obj); f_t user_lower = compute_user_objective(original_lp_, get_lower_bound()); - std::string user_gap = user_mip_gap(user_obj, user_lower); + std::string user_gap = user_mip_gap(original_lp_, obj, get_lower_bound()); settings_.log.printf( "H %+13.6e %+10.6e %s %9.2f\n", @@ -329,9 +329,9 @@ void branch_and_bound_t::report_heuristic(f_t obj) toc(exploration_stats_.start_time)); } else { if (solving_root_relaxation_.load()) { - f_t user_obj = compute_user_objective(original_lp_, obj); - f_t user_lower = root_lp_current_lower_bound_.load(); - std::string user_gap = user_mip_gap(user_obj, user_lower); + f_t user_obj = compute_user_objective(original_lp_, obj); + std::string user_gap = + user_mip_gap(original_lp_, obj, root_lp_current_lower_bound_.load()); settings_.log.printf( "New solution from primal heuristics. Objective %+.6e. Gap %s. Time %.2f\n", user_obj, @@ -356,7 +356,7 @@ void branch_and_bound_t::report( const f_t user_lower = compute_user_objective(original_lp_, lower_bound); const f_t iters = static_cast(exploration_stats_.total_lp_iters); const f_t iter_node = nodes_explored > 0 ? iters / nodes_explored : iters; - const std::string user_gap = user_mip_gap(user_obj, user_lower); + const std::string user_gap = user_mip_gap(original_lp_, obj, lower_bound); if (work_time >= 0) { settings_.log.printf( "%c %10d %10lu %+13.6e %+10.6e %6d %6d %7.1e %s %9.2f %9.2f\n", @@ -717,9 +717,9 @@ void branch_and_bound_t::set_final_solution(mip_solution_t& settings_.heuristic_preemption_callback(); } - f_t gap = upper_bound_ - lower_bound; f_t obj = compute_user_objective(original_lp_, upper_bound_.load()); f_t user_bound = compute_user_objective(original_lp_, lower_bound); + f_t gap = std::abs(obj - user_bound); f_t gap_rel = user_relative_gap(original_lp_, upper_bound_.load(), lower_bound); bool is_maximization = original_lp_.obj_scale < 0.0; @@ -840,14 +840,15 @@ branch_variable_t branch_and_bound_t::variable_selection( if (settings_.reliability_branching != 0) { branch_var = pc_.reliable_variable_selection(node_ptr, fractional, - solution, - settings_, - var_types_, worker, + var_types_, exploration_stats_, + settings_, upper_bound_, worker_pool_.num_idle_workers(), - log); + log, + new_slacks_, + original_lp_); } else { branch_var = pc_.variable_selection(fractional, solution, log); } @@ -1176,7 +1177,6 @@ std::pair branch_and_bound_t::upd dual::status_t lp_status, Policy& policy) { - constexpr f_t inf = std::numeric_limits::infinity(); const f_t abs_fathom_tol = settings_.absolute_mip_gap_tol / 10; lp_problem_t& leaf_problem = worker->leaf_problem; lp_solution_t& leaf_solution = worker->leaf_solution; @@ -1437,7 +1437,7 @@ void branch_and_bound_t::plunge_with(branch_and_bound_worker_t 0 && (solver_status_ == mip_status_t::UNSET && is_running_) && rel_gap > settings_.relative_mip_gap_tol && abs_gap > settings_.absolute_mip_gap_tol) { @@ -1528,13 +1528,13 @@ void branch_and_bound_t::plunge_with(branch_and_bound_worker_t 0 && (rel_gap <= settings_.relative_mip_gap_tol || abs_gap <= settings_.absolute_mip_gap_tol)) { @@ -1581,7 +1581,7 @@ void branch_and_bound_t::dive_with(branch_and_bound_worker_t f_t lower_bound = get_lower_bound(); f_t upper_bound = upper_bound_; f_t rel_gap = user_relative_gap(original_lp_, upper_bound, lower_bound); - f_t abs_gap = upper_bound - lower_bound; + f_t abs_gap = compute_user_abs_gap(original_lp_, upper_bound, lower_bound); while (stack.size() > 0 && (solver_status_ == mip_status_t::UNSET && is_running_) && rel_gap > settings_.relative_mip_gap_tol && abs_gap > settings_.absolute_mip_gap_tol) { @@ -1636,7 +1636,7 @@ void branch_and_bound_t::dive_with(branch_and_bound_worker_t lower_bound = get_lower_bound(); upper_bound = upper_bound_; rel_gap = user_relative_gap(original_lp_, upper_bound, lower_bound); - abs_gap = upper_bound - lower_bound; + abs_gap = compute_user_abs_gap(original_lp_, upper_bound, lower_bound); } worker_pool_.return_worker_to_pool(worker); @@ -1667,7 +1667,7 @@ void branch_and_bound_t::run_scheduler() #endif f_t lower_bound = get_lower_bound(); - f_t abs_gap = upper_bound_ - lower_bound; + f_t abs_gap = compute_user_abs_gap(original_lp_, upper_bound_.load(), lower_bound); f_t rel_gap = user_relative_gap(original_lp_, upper_bound_.load(), lower_bound); i_t last_node_depth = 0; i_t last_int_infeas = 0; @@ -1777,7 +1777,7 @@ void branch_and_bound_t::run_scheduler() } lower_bound = get_lower_bound(); - abs_gap = upper_bound_ - lower_bound; + abs_gap = compute_user_abs_gap(original_lp_, upper_bound_.load(), lower_bound); rel_gap = user_relative_gap(original_lp_, upper_bound_.load(), lower_bound); if (abs_gap <= settings_.absolute_mip_gap_tol || rel_gap <= settings_.relative_mip_gap_tol) { @@ -1799,7 +1799,7 @@ void branch_and_bound_t::single_threaded_solve() branch_and_bound_worker_t worker(0, original_lp_, Arow_, var_types_, settings_); f_t lower_bound = get_lower_bound(); - f_t abs_gap = upper_bound_ - lower_bound; + f_t abs_gap = compute_user_abs_gap(original_lp_, upper_bound_.load(), lower_bound); f_t rel_gap = user_relative_gap(original_lp_, upper_bound_.load(), lower_bound); while (solver_status_ == mip_status_t::UNSET && abs_gap > settings_.absolute_mip_gap_tol && @@ -1844,7 +1844,7 @@ void branch_and_bound_t::single_threaded_solve() plunge_with(&worker); lower_bound = get_lower_bound(); - abs_gap = upper_bound_ - lower_bound; + abs_gap = compute_user_abs_gap(original_lp_, upper_bound_.load(), lower_bound); rel_gap = user_relative_gap(original_lp_, upper_bound_.load(), lower_bound); if (abs_gap <= settings_.absolute_mip_gap_tol || rel_gap <= settings_.relative_mip_gap_tol) { @@ -1974,18 +1974,21 @@ lp_status_t branch_and_bound_t::solve_root_relaxation( set_uninitialized_steepest_edge_norms(original_lp_, basic_list, edge_norms); user_objective = root_crossover_soln_.user_objective; iter = root_crossover_soln_.iterations; - solver_name = "Barrier/PDLP and Crossover"; + solver_name = method_to_string(root_relax_solved_by); + } else { - root_status = root_status_future.get(); - user_objective = root_relax_soln_.user_objective; - iter = root_relax_soln_.iterations; - solver_name = "Dual Simplex"; + root_status = root_status_future.get(); + user_objective = root_relax_soln_.user_objective; + iter = root_relax_soln_.iterations; + root_relax_solved_by = DualSimplex; + solver_name = "Dual Simplex"; } } else { - root_status = root_status_future.get(); - user_objective = root_relax_soln_.user_objective; - iter = root_relax_soln_.iterations; - solver_name = "Dual Simplex"; + root_status = root_status_future.get(); + user_objective = root_relax_soln_.user_objective; + iter = root_relax_soln_.iterations; + root_relax_solved_by = DualSimplex; + solver_name = "Dual Simplex"; } settings_.log.printf("\n"); @@ -2463,7 +2466,7 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut report(' ', obj, root_objective_, 0, num_fractional); f_t rel_gap = user_relative_gap(original_lp_, upper_bound_.load(), root_objective_); - f_t abs_gap = upper_bound_.load() - root_objective_; + f_t abs_gap = compute_user_abs_gap(original_lp_, upper_bound_.load(), root_objective_); if (rel_gap < settings_.relative_mip_gap_tol || abs_gap < settings_.absolute_mip_gap_tol) { set_solution_at_root(solution, cut_info); set_final_solution(solution, root_objective_); @@ -2499,18 +2502,23 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut set_uninitialized_steepest_edge_norms(original_lp_, basic_list, edge_norms_); pc_.resize(original_lp_.num_cols); + original_lp_.A.transpose(pc_.AT); { raft::common::nvtx::range scope_sb("BB::strong_branching"); - strong_branching(original_problem_, - original_lp_, + strong_branching(original_lp_, settings_, exploration_stats_.start_time, + new_slacks_, var_types_, - root_relax_soln_.x, + root_relax_soln_, fractional, root_objective_, + upper_bound_, root_vstatus_, edge_norms_, + basic_list, + nonbasic_list, + basis_update, pc_); } @@ -2953,7 +2961,6 @@ void branch_and_bound_t::run_deterministic_bfs_loop( worker.current_node = node; f_t upper_bound = worker.local_upper_bound; - f_t rel_gap = user_relative_gap(original_lp_, upper_bound, node->lower_bound); if (node->lower_bound > upper_bound) { worker.current_node = nullptr; worker.record_fathomed(node, node->lower_bound); @@ -3065,7 +3072,7 @@ void branch_and_bound_t::deterministic_sync_callback() f_t lower_bound = deterministic_compute_lower_bound(); f_t upper_bound = upper_bound_.load(); - f_t abs_gap = upper_bound - lower_bound; + f_t abs_gap = compute_user_abs_gap(original_lp_, upper_bound, lower_bound); f_t rel_gap = user_relative_gap(original_lp_, upper_bound, lower_bound); if (abs_gap <= settings_.absolute_mip_gap_tol || rel_gap <= settings_.relative_mip_gap_tol) { @@ -3104,7 +3111,7 @@ void branch_and_bound_t::deterministic_sync_callback() f_t obj = compute_user_objective(original_lp_, upper_bound); f_t user_lower = compute_user_objective(original_lp_, lower_bound); - std::string gap_user = user_mip_gap(obj, user_lower); + std::string gap_user = user_mip_gap(original_lp_, upper_bound, lower_bound); std::string idle_workers; i_t idle_count = 0; @@ -3742,7 +3749,6 @@ void branch_and_bound_t::deterministic_dive( stack.pop_front(); // Prune check using snapshot upper bound - f_t rel_gap = user_relative_gap(original_lp_, worker.local_upper_bound, node_ptr->lower_bound); if (node_ptr->lower_bound > worker.local_upper_bound) { worker.recompute_bounds_and_basis = true; continue; diff --git a/cpp/src/branch_and_bound/branch_and_bound.hpp b/cpp/src/branch_and_bound/branch_and_bound.hpp index 4faadcc6b8..0d07cf12a5 100644 --- a/cpp/src/branch_and_bound/branch_and_bound.hpp +++ b/cpp/src/branch_and_bound/branch_and_bound.hpp @@ -30,6 +30,8 @@ #include #include +#include + #include #include @@ -89,7 +91,8 @@ class branch_and_bound_t { const std::vector& reduced_costs, f_t objective, f_t user_objective, - i_t iterations) + i_t iterations, + method_t method) { if (!is_root_solution_set) { root_crossover_soln_.x = primal; @@ -99,6 +102,7 @@ class branch_and_bound_t { root_crossover_soln_.objective = objective; root_crossover_soln_.user_objective = user_objective; root_crossover_soln_.iterations = iterations; + root_relax_solved_by = method; root_crossover_solution_set_.store(true, std::memory_order_release); } } @@ -218,6 +222,7 @@ class branch_and_bound_t { f_t root_objective_; lp_solution_t root_relax_soln_; lp_solution_t root_crossover_soln_; + method_t root_relax_solved_by{Unset}; std::vector edge_norms_; std::atomic root_crossover_solution_set_{false}; omp_atomic_t root_lp_current_lower_bound_; diff --git a/cpp/src/branch_and_bound/pseudo_costs.cpp b/cpp/src/branch_and_bound/pseudo_costs.cpp index ee7e2f7803..c38e98e27d 100644 --- a/cpp/src/branch_and_bound/pseudo_costs.cpp +++ b/cpp/src/branch_and_bound/pseudo_costs.cpp @@ -6,14 +6,19 @@ /* clang-format on */ #include +#include #include #include #include #include +#include + #include +#include + #include #include @@ -22,6 +27,263 @@ namespace cuopt::linear_programming::dual_simplex { namespace { +static bool is_dual_simplex_done(dual::status_t status) +{ + return status == dual::status_t::DUAL_UNBOUNDED || status == dual::status_t::OPTIMAL || + status == dual::status_t::ITERATION_LIMIT || status == dual::status_t::CUTOFF; +} + +template +struct objective_change_estimate_t { + f_t down_obj_change; + f_t up_obj_change; +}; + +template +f_t compute_step_length(const simplex_solver_settings_t& settings, + const std::vector& vstatus, + const std::vector& z, + const std::vector& delta_z, + const std::vector& delta_z_indices) +{ + f_t step_length = inf; + f_t pivot_tol = settings.pivot_tol; + const i_t nz = delta_z_indices.size(); + for (i_t h = 0; h < nz; h++) { + const i_t j = delta_z_indices[h]; + if (vstatus[j] == variable_status_t::NONBASIC_LOWER && delta_z[j] < -pivot_tol) { + const f_t ratio = -z[j] / delta_z[j]; + if (ratio < step_length) { step_length = ratio; } + } else if (vstatus[j] == variable_status_t::NONBASIC_UPPER && delta_z[j] > pivot_tol) { + const f_t ratio = -z[j] / delta_z[j]; + if (ratio < step_length) { step_length = ratio; } + } + } + return step_length; +} + +template +objective_change_estimate_t single_pivot_objective_change_estimate( + const lp_problem_t& lp, + const simplex_solver_settings_t& settings, + const csc_matrix_t& A_transpose, + const std::vector& vstatus, + i_t variable_j, + i_t basic_j, + const lp_solution_t& lp_solution, + const std::vector& basic_list, + const std::vector& nonbasic_list, + const std::vector& nonbasic_mark, + basis_update_mpf_t& basis_factors, + std::vector& workspace, + std::vector& delta_z, + f_t& work_estimate) +{ + // Compute the objective estimate for the down and up branches of variable j + assert(variable_j >= 0); + assert(basic_j >= 0); + + // Down branch + i_t direction = -1; + sparse_vector_t e_k(lp.num_rows, 0); + e_k.i.push_back(basic_j); + e_k.x.push_back(-f_t(direction)); + + sparse_vector_t delta_y(lp.num_rows, 0); + basis_factors.b_transpose_solve(e_k, delta_y); + + // Compute delta_z_N = -N^T * delta_y + i_t delta_y_nz0 = 0; + const i_t nz_delta_y = delta_y.i.size(); + for (i_t k = 0; k < nz_delta_y; k++) { + if (std::abs(delta_y.x[k]) > settings.zero_tol) { delta_y_nz0++; } + } + work_estimate += nz_delta_y; + const f_t delta_y_nz_percentage = delta_y_nz0 / static_cast(lp.num_rows) * 100.0; + const bool use_transpose = delta_y_nz_percentage <= 30.0; + std::vector delta_z_indices; + // delta_z starts out all zero + if (use_transpose) { + compute_delta_z(A_transpose, + delta_y, + variable_j, + direction, + nonbasic_mark, + workspace, + delta_z_indices, + delta_z, + work_estimate); + } else { + std::vector delta_y_dense(lp.num_rows, 0); + delta_y.to_dense(delta_y_dense); + compute_reduced_cost_update(lp, + basic_list, + nonbasic_list, + delta_y_dense, + variable_j, + direction, + workspace, + delta_z_indices, + delta_z, + work_estimate); + } + + // Verify dual feasibility +#ifdef CHECK_DUAL_FEASIBILITY + { + std::vector dual_residual = lp_solution.z; + for (i_t j = 0; j < lp.num_cols; j++) { + dual_residual[j] -= lp.objective[j]; + } + matrix_transpose_vector_multiply(lp.A, 1.0, lp_solution.y, 1.0, dual_residual); + f_t dual_residual_norm = vector_norm_inf(dual_residual); + settings.log.printf("Dual residual norm: %e\n", dual_residual_norm); + } +#endif + + // Compute the step-length + f_t step_length = compute_step_length(settings, vstatus, lp_solution.z, delta_z, delta_z_indices); + + // Handle the leaving variable case + + f_t delta_obj_down = + step_length * (lp_solution.x[variable_j] - std::floor(lp_solution.x[variable_j])); +#ifdef CHECK_DELTA_OBJ + f_t delta_obj_check = 0.0; + for (i_t k = 0; k < delta_y.i.size(); k++) { + delta_obj_check += lp.rhs[delta_y.i[k]] * delta_y.x[k]; + } + for (i_t h = 0; h < delta_z_indices.size(); h++) { + const i_t j = delta_z_indices[h]; + if (vstatus[j] == variable_status_t::NONBASIC_LOWER) { + delta_obj_check += lp.lower[j] * delta_z[j]; + } else if (vstatus[j] == variable_status_t::NONBASIC_UPPER) { + delta_obj_check += lp.upper[j] * delta_z[j]; + } + } + delta_obj_check += std::floor(lp_solution.x[variable_j]) * delta_z[variable_j]; + delta_obj_check *= step_length; + if (std::abs(delta_obj_check - delta_obj) > 1e-6) { + settings.log.printf("Delta obj check %e. Delta obj %e. Step length %e.\n", + delta_obj_check, + delta_obj, + step_length); + } +#endif + + settings.log.debug( + "Down branch %d. Step length: %e. Delta obj: %e. \n", variable_j, step_length, delta_obj_down); + + // Up branch + direction = 1; + // Negate delta_z + for (i_t j : delta_z_indices) { + delta_z[j] *= -1.0; + } + + // Compute the step-length + step_length = compute_step_length(settings, vstatus, lp_solution.z, delta_z, delta_z_indices); + + f_t delta_obj_up = + step_length * (std::ceil(lp_solution.x[variable_j]) - lp_solution.x[variable_j]); + settings.log.debug( + "Up branch %d. Step length: %e. Delta obj: %e.\n", variable_j, step_length, delta_obj_up); + + delta_z_indices.push_back(variable_j); + + // Clear delta_z + for (i_t j : delta_z_indices) { + delta_z[j] = 0.0; + workspace[j] = 0; + } + +#ifdef CHECK_DELTA_Z + for (i_t j = 0; j < lp.num_cols; j++) { + if (delta_z[j] != 0.0) { settings.log.printf("Delta z %d: %e\n", j, delta_z[j]); } + } + for (i_t j = 0; j < lp.num_cols; j++) { + if (workspace[j] != 0) { settings.log.printf("Workspace %d: %d\n", j, workspace[j]); } + } +#endif + + return {.down_obj_change = std::max(delta_obj_down, 0), + .up_obj_change = std::max(delta_obj_up, 0)}; +} + +template +void initialize_pseudo_costs_with_estimate(const lp_problem_t& lp, + const simplex_solver_settings_t& settings, + const std::vector& vstatus, + const lp_solution_t& lp_solution, + const std::vector& basic_list, + const std::vector& nonbasic_list, + const std::vector& fractional, + basis_update_mpf_t& basis_factors, + pseudo_costs_t& pc) +{ + i_t m = lp.num_rows; + i_t n = lp.num_cols; + + std::vector delta_z(n, 0); + std::vector workspace(n, 0); + + f_t work_estimate = 0; + + std::vector basic_map(n, -1); + for (i_t i = 0; i < m; i++) { + basic_map[basic_list[i]] = i; + } + + std::vector nonbasic_mark(n, -1); + for (i_t i = 0; i < n - m; i++) { + nonbasic_mark[nonbasic_list[i]] = i; + } + + for (i_t k = 0; k < fractional.size(); k++) { + const i_t j = fractional[k]; + assert(j >= 0); + + objective_change_estimate_t estimate = + single_pivot_objective_change_estimate(lp, + settings, + pc.AT, + vstatus, + j, + basic_map[j], + lp_solution, + basic_list, + nonbasic_list, + nonbasic_mark, + basis_factors, + workspace, + delta_z, + work_estimate); + pc.strong_branch_down[k] = estimate.down_obj_change; + pc.strong_branch_up[k] = estimate.up_obj_change; + } +} + +template +f_t objective_upper_bound(const lp_problem_t& lp, f_t upper_bound, f_t dual_tol) +{ + f_t cut_off = 0; + + if (std::isfinite(upper_bound)) { + cut_off = upper_bound + dual_tol; + } else { + cut_off = 0; + for (i_t j = 0; j < lp.num_cols; ++j) { + if (lp.objective[j] > 0) { + cut_off += lp.objective[j] * lp.upper[j]; + } else if (lp.objective[j] < 0) { + cut_off += lp.objective[j] * lp.lower[j]; + } + } + } + + return cut_off; +} + template void strong_branch_helper(i_t start, i_t end, @@ -30,11 +292,18 @@ void strong_branch_helper(i_t start, const simplex_solver_settings_t& settings, const std::vector& var_types, const std::vector& fractional, - f_t root_obj, const std::vector& root_soln, const std::vector& root_vstatus, const std::vector& edge_norms, - pseudo_costs_t& pc) + f_t root_obj, + f_t upper_bound, + i_t iter_limit, + pseudo_costs_t& pc, + std::vector& dual_simplex_obj_down, + std::vector& dual_simplex_obj_up, + std::vector& dual_simplex_status_down, + std::vector& dual_simplex_status_up, + shared_strong_branching_context_view_t& sb_view) { raft::common::nvtx::range scope("BB::strong_branch_helper"); lp_problem_t child_problem = original_lp; @@ -47,6 +316,21 @@ void strong_branch_helper(i_t start, for (i_t branch = 0; branch < 2; branch++) { // Do the down branch + const i_t shared_idx = (branch == 0) ? k : k + static_cast(fractional.size()); + // Batch PDLP has already solved this subproblem, skip it + if (sb_view.is_valid() && sb_view.is_solved(shared_idx)) { + if (verbose) { + settings.log.printf( + "[COOP SB] DS thread %d skipping variable %d branch %s (shared_idx %d): already solved " + "by PDLP\n", + thread_id, + j, + branch == 0 ? "down" : "up", + shared_idx); + } + continue; + } + if (branch == 0) { child_problem.lower[j] = original_lp.lower[j]; child_problem.upper[j] = std::floor(root_soln[j]); @@ -61,7 +345,10 @@ void strong_branch_helper(i_t start, f_t elapsed_time = toc(start_time); if (elapsed_time > settings.time_limit) { break; } child_settings.time_limit = std::max(0.0, settings.time_limit - elapsed_time); - child_settings.iteration_limit = 200; + child_settings.iteration_limit = iter_limit; + child_settings.cut_off = + objective_upper_bound(child_problem, upper_bound, child_settings.dual_tol); + lp_solution_t solution(original_lp.num_rows, original_lp.num_cols); i_t iter = 0; std::vector vstatus = root_vstatus; @@ -80,7 +367,8 @@ void strong_branch_helper(i_t start, if (status == dual::status_t::DUAL_UNBOUNDED) { // LP was infeasible obj = std::numeric_limits::infinity(); - } else if (status == dual::status_t::OPTIMAL || status == dual::status_t::ITERATION_LIMIT) { + } else if (status == dual::status_t::OPTIMAL || status == dual::status_t::ITERATION_LIMIT || + status == dual::status_t::CUTOFF) { obj = compute_objective(child_problem, solution.x); } else { settings.log.debug("Thread id %2d remaining %d variable %d branch %d status %d\n", @@ -92,7 +380,9 @@ void strong_branch_helper(i_t start, } if (branch == 0) { - pc.strong_branch_down[k] = std::max(obj - root_obj, 0.0); + pc.strong_branch_down[k] = std::max(obj - root_obj, 0.0); + dual_simplex_obj_down[k] = std::max(obj - root_obj, 0.0); + dual_simplex_status_down[k] = status; if (verbose) { settings.log.printf("Thread id %2d remaining %d variable %d branch %d obj %e time %.2f\n", thread_id, @@ -103,7 +393,9 @@ void strong_branch_helper(i_t start, toc(start_time)); } } else { - pc.strong_branch_up[k] = std::max(obj - root_obj, 0.0); + pc.strong_branch_up[k] = std::max(obj - root_obj, 0.0); + dual_simplex_obj_up[k] = std::max(obj - root_obj, 0.0); + dual_simplex_status_up[k] = status; if (verbose) { settings.log.printf( "Thread id %2d remaining %d variable %d branch %d obj %e change down %e change up %e " @@ -113,11 +405,28 @@ void strong_branch_helper(i_t start, j, branch, obj, - pc.strong_branch_down[k], - pc.strong_branch_up[k], + dual_simplex_obj_down[k], + dual_simplex_obj_up[k], toc(start_time)); } } + // Mark the subproblem as solved so that batch PDLP removes it from the batch + if (sb_view.is_valid()) { + // We could not mark as solved nodes hitting iteration limit in DS + if ((branch == 0 && is_dual_simplex_done(dual_simplex_status_down[k])) || + (branch == 1 && is_dual_simplex_done(dual_simplex_status_up[k]))) { + sb_view.mark_solved(shared_idx); + if (verbose) { + settings.log.printf( + "[COOP SB] DS thread %d solved variable %d branch %s (shared_idx %d), marking in " + "shared context\n", + thread_id, + j, + branch == 0 ? "down" : "up", + shared_idx); + } + } + } if (toc(start_time) > settings.time_limit) { break; } } if (toc(start_time) > settings.time_limit) { break; } @@ -140,23 +449,21 @@ void strong_branch_helper(i_t start, } template -f_t trial_branching(const lp_problem_t& original_lp, - const simplex_solver_settings_t& settings, - const std::vector& var_types, - const std::vector& vstatus, - const std::vector& edge_norms, - const basis_update_mpf_t& basis_factors, - const std::vector& basic_list, - const std::vector& nonbasic_list, - i_t branch_var, - f_t branch_var_lower, - f_t branch_var_upper, - f_t upper_bound, - i_t bnb_lp_iter_per_node, - f_t start_time, - i_t upper_max_lp_iter, - i_t lower_max_lp_iter, - omp_atomic_t& total_lp_iter) +std::pair trial_branching(const lp_problem_t& original_lp, + const simplex_solver_settings_t& settings, + const std::vector& var_types, + const std::vector& vstatus, + const std::vector& edge_norms, + const basis_update_mpf_t& basis_factors, + const std::vector& basic_list, + const std::vector& nonbasic_list, + i_t branch_var, + f_t branch_var_lower, + f_t branch_var_upper, + f_t upper_bound, + f_t start_time, + i_t iter_limit, + omp_atomic_t& total_lp_iter) { lp_problem_t child_problem = original_lp; child_problem.lower[branch_var] = branch_var_lower; @@ -165,12 +472,11 @@ f_t trial_branching(const lp_problem_t& original_lp, const bool initialize_basis = false; simplex_solver_settings_t child_settings = settings; child_settings.set_log(false); - i_t lp_iter_upper = upper_max_lp_iter; - i_t lp_iter_lower = lower_max_lp_iter; - child_settings.iteration_limit = std::clamp(bnb_lp_iter_per_node, lp_iter_lower, lp_iter_upper); - child_settings.cut_off = upper_bound + settings.dual_tol; + child_settings.iteration_limit = iter_limit; child_settings.inside_mip = 2; child_settings.scale_columns = false; + child_settings.cut_off = + objective_upper_bound(child_problem, upper_bound, child_settings.dual_tol); lp_solution_t solution(original_lp.num_rows, original_lp.num_cols); i_t iter = 0; @@ -181,7 +487,7 @@ f_t trial_branching(const lp_problem_t& original_lp, basis_update_mpf_t child_basis_factors = basis_factors; // Only refactor the basis if we encounter numerical issues. - child_basis_factors.set_refactor_frequency(upper_max_lp_iter); + child_basis_factors.set_refactor_frequency(iter_limit); dual::status_t status = dual_phase2_with_advanced_basis(2, 0, @@ -207,12 +513,12 @@ f_t trial_branching(const lp_problem_t& original_lp, if (status == dual::status_t::DUAL_UNBOUNDED) { // LP was infeasible - return std::numeric_limits::infinity(); + return {std::numeric_limits::infinity(), dual::status_t::DUAL_UNBOUNDED}; } else if (status == dual::status_t::OPTIMAL || status == dual::status_t::ITERATION_LIMIT || status == dual::status_t::CUTOFF) { - return compute_objective(child_problem, solution.x); + return {compute_objective(child_problem, solution.x), status}; } else { - return std::numeric_limits::quiet_NaN(); + return {std::numeric_limits::quiet_NaN(), dual::status_t::NUMERICAL}; } } @@ -220,15 +526,44 @@ f_t trial_branching(const lp_problem_t& original_lp, template static cuopt::mps_parser::mps_data_model_t simplex_problem_to_mps_data_model( - const dual_simplex::user_problem_t& user_problem) + const dual_simplex::lp_problem_t& lp, + const std::vector& new_slacks, + const std::vector& root_soln, + std::vector& original_root_soln_x) { + // Branch and bound has a problem of the form: + // minimize c^T x + // subject to A*x + Es = b + // l <= x <= u + // E_{jj} = sigma_j, where sigma_j is +1 or -1 + + // We need to convert this into a problem that is better for PDLP + // to solve. PDLP perfers inequality constraints. Thus, we want + // to convert the above into the problem: + // minimize c^T x + // subject to lb <= A*x <= ub + // l <= x <= u + cuopt::mps_parser::mps_data_model_t mps_model; - int m = user_problem.num_rows; - int n = user_problem.num_cols; + int m = lp.num_rows; + int n = lp.num_cols - new_slacks.size(); + original_root_soln_x.resize(n); + + // Remove slacks from A + dual_simplex::csc_matrix_t A_no_slacks = lp.A; + std::vector cols_to_remove(lp.A.n, 0); + for (i_t j : new_slacks) { + cols_to_remove[j] = 1; + } + A_no_slacks.remove_columns(cols_to_remove); + + for (i_t j = 0; j < n; j++) { + original_root_soln_x[j] = root_soln[j]; + } // Convert CSC to CSR using built-in method dual_simplex::csr_matrix_t csr_A(m, n, 0); - user_problem.A.to_compressed_row(csr_A); + A_no_slacks.to_compressed_row(csr_A); int nz = csr_A.row_start[m]; @@ -237,78 +572,435 @@ static cuopt::mps_parser::mps_data_model_t simplex_problem_to_mps_data csr_A.x.data(), nz, csr_A.j.data(), nz, csr_A.row_start.data(), m + 1); // Set objective coefficients - mps_model.set_objective_coefficients(user_problem.objective.data(), n); + mps_model.set_objective_coefficients(lp.objective.data(), n); - // Set objective scaling and offset - mps_model.set_objective_scaling_factor(user_problem.obj_scale); - mps_model.set_objective_offset(user_problem.obj_constant); + // The LP is already in minimization form (objective negated for max problems). + // Pass identity scaling so PDLP returns the raw DS-space objective directly. + mps_model.set_objective_scaling_factor(f_t(1.0)); + mps_model.set_objective_offset(f_t(0.0)); // Set variable bounds - mps_model.set_variable_lower_bounds(user_problem.lower.data(), n); - mps_model.set_variable_upper_bounds(user_problem.upper.data(), n); + mps_model.set_variable_lower_bounds(lp.lower.data(), n); + mps_model.set_variable_upper_bounds(lp.upper.data(), n); // Convert row sense and RHS to constraint bounds std::vector constraint_lower(m); std::vector constraint_upper(m); + std::vector slack_map(m, -1); + for (i_t j : new_slacks) { + const i_t col_start = lp.A.col_start[j]; + const i_t i = lp.A.i[col_start]; + slack_map[i] = j; + } + for (i_t i = 0; i < m; ++i) { - if (user_problem.row_sense[i] == 'L') { - constraint_lower[i] = -std::numeric_limits::infinity(); - constraint_upper[i] = user_problem.rhs[i]; - } else if (user_problem.row_sense[i] == 'G') { - constraint_lower[i] = user_problem.rhs[i]; - constraint_upper[i] = std::numeric_limits::infinity(); + // Each row is of the form a_i^T x + sigma * s_i = b_i + // with sigma = +1 or -1 + // and l_i <= s_i <= u_i + // We have that a_i^T x - b_i = -sigma * s_i + // If sigma = -1, then we have + // a_i^T x - b_i = s_i + // l_i <= a_i^T x - b_i <= u_i + // l_i + b_i <= a_i^T x <= u_i + b_i + // + // If sigma = +1, then we have + // a_i^T x - b_i = -s_i + // -a_i^T x + b_i = s_i + // l_i <= -a_i^T x + b_i <= u_i + // l_i - b_i <= -a_i^T x <= u_i - b_i + // -u_i + b_i <= a_i^T x <= -l_i + b_i + + const i_t slack = slack_map[i]; + assert(slack != -1); + const i_t col_start = lp.A.col_start[slack]; + const f_t sigma = lp.A.x[col_start]; + const f_t slack_lower = lp.lower[slack]; + const f_t slack_upper = lp.upper[slack]; + + if (sigma == -1) { + constraint_lower[i] = slack_lower + lp.rhs[i]; + constraint_upper[i] = slack_upper + lp.rhs[i]; + } else if (sigma == 1) { + constraint_lower[i] = -slack_upper + lp.rhs[i]; + constraint_upper[i] = -slack_lower + lp.rhs[i]; } else { - constraint_lower[i] = user_problem.rhs[i]; - constraint_upper[i] = user_problem.rhs[i]; + assert(sigma == 1.0 || sigma == -1.0); } } - for (i_t k = 0; k < user_problem.num_range_rows; ++k) { - i_t i = user_problem.range_rows[k]; - f_t r = user_problem.range_value[k]; - f_t b = user_problem.rhs[i]; - f_t h = -std::numeric_limits::infinity(); - f_t u = std::numeric_limits::infinity(); - if (user_problem.row_sense[i] == 'L') { - h = b - std::abs(r); - u = b; - } else if (user_problem.row_sense[i] == 'G') { - h = b; - u = b + std::abs(r); - } else if (user_problem.row_sense[i] == 'E') { - if (r > 0) { - h = b; - u = b + std::abs(r); - } else { - h = b - std::abs(r); - u = b; + mps_model.set_constraint_lower_bounds(constraint_lower.data(), m); + mps_model.set_constraint_upper_bounds(constraint_upper.data(), m); + mps_model.set_maximize(false); + + return mps_model; +} + +enum class sb_source_t { DUAL_SIMPLEX, PDLP, NONE }; + +// Merge a single strong branching result from Dual Simplex and PDLP. +// Rules: +// 1. If both found optimal -> keep DS (higher quality vertex solution) +// 2. Else if Dual Simplex found infeasible -> declare infeasible +// 3. Else if one is optimal -> keep the optimal one +// 4. Else if Dual Simplex hit iteration limit -> keep DS +// 5. Else if none converged -> NaN (original objective) +template +static std::pair merge_sb_result(f_t dual_simplex_val, + dual::status_t dual_simplex_status, + f_t pdlp_dual_obj, + bool pdlp_optimal) +{ + // Dual simplex always maintains dual feasibility, so OPTIMAL and ITERATION_LIMIT both qualify + + // Rule 1: Both optimal -> keep DS + if (dual_simplex_status == dual::status_t::OPTIMAL && pdlp_optimal) { + return {dual_simplex_val, sb_source_t::DUAL_SIMPLEX}; + } + + // Rule 2: Dual Simplex found infeasible -> declare infeasible + if (dual_simplex_status == dual::status_t::DUAL_UNBOUNDED) { + return {std::numeric_limits::infinity(), sb_source_t::DUAL_SIMPLEX}; + } + + // Rule 3: Only one converged -> keep that + if (dual_simplex_status == dual::status_t::OPTIMAL && !pdlp_optimal) { + return {dual_simplex_val, sb_source_t::DUAL_SIMPLEX}; + } + if (pdlp_optimal && dual_simplex_status != dual::status_t::OPTIMAL) { + return {pdlp_dual_obj, sb_source_t::PDLP}; + } + + // Rule 4: Dual Simplex hit iteration limit or work limit or cutoff -> keep DS + if (dual_simplex_status == dual::status_t::ITERATION_LIMIT || + dual_simplex_status == dual::status_t::WORK_LIMIT || + dual_simplex_status == dual::status_t::CUTOFF) { + return {dual_simplex_val, sb_source_t::DUAL_SIMPLEX}; + } + + // Rule 5: None converged -> NaN + return {std::numeric_limits::quiet_NaN(), sb_source_t::NONE}; +} + +template +static void batch_pdlp_strong_branching_task( + const simplex_solver_settings_t& settings, + i_t effective_batch_pdlp, + f_t start_time, + std::atomic& concurrent_halt, + const lp_problem_t& original_lp, + const std::vector& new_slacks, + const std::vector& root_soln, + const std::vector& fractional, + f_t root_obj, + pseudo_costs_t& pc, + shared_strong_branching_context_view_t& sb_view, + std::vector& pdlp_obj_down, + std::vector& pdlp_obj_up) +{ + constexpr bool verbose = false; + + settings.log.printf(effective_batch_pdlp == 2 + ? "Batch PDLP only for strong branching\n" + : "Cooperative batch PDLP and Dual Simplex for strong branching\n"); + + f_t start_batch = tic(); + std::vector original_root_soln_x; + + if (concurrent_halt.load() == 1) { return; } + + const auto mps_model = + simplex_problem_to_mps_data_model(original_lp, new_slacks, root_soln, original_root_soln_x); + + std::vector fraction_values; + + std::vector original_root_soln_y, original_root_soln_z; + // TODO put back later once Chris has this part + /*uncrush_dual_solution( + original_problem, original_lp, root_soln_y, root_soln_z, original_root_soln_y, + original_root_soln_z);*/ + + for (i_t k = 0; k < fractional.size(); k++) { + const i_t j = fractional[k]; + fraction_values.push_back(original_root_soln_x[j]); + } + + if (concurrent_halt.load() == 1) { return; } + + f_t batch_elapsed_time = toc(start_time); + const f_t warm_start_remaining_time = + std::max(static_cast(0.0), settings.time_limit - batch_elapsed_time); + if (warm_start_remaining_time <= 0.0) { return; } + + assert(!pc.pdlp_warm_cache.populated && "PDLP warm cache should not be populated at this point"); + + if (!pc.pdlp_warm_cache.populated) { + pdlp_solver_settings_t ws_settings; + ws_settings.method = method_t::PDLP; + ws_settings.presolver = presolver_t::None; + ws_settings.pdlp_solver_mode = pdlp_solver_mode_t::Stable3; + ws_settings.detect_infeasibility = false; + // Since the warm start will be used over and over again we want to maximize the chance of + // convergeance Batch PDLP is very compute intensive so we want to minimize the number of + // iterations + constexpr int warm_start_iteration_limit = 500000; + ws_settings.iteration_limit = warm_start_iteration_limit; + ws_settings.time_limit = warm_start_remaining_time; + constexpr f_t pdlp_tolerance = 1e-5; + ws_settings.tolerances.relative_dual_tolerance = pdlp_tolerance; + ws_settings.tolerances.absolute_dual_tolerance = pdlp_tolerance; + ws_settings.tolerances.relative_primal_tolerance = pdlp_tolerance; + ws_settings.tolerances.absolute_primal_tolerance = pdlp_tolerance; + ws_settings.tolerances.relative_gap_tolerance = pdlp_tolerance; + ws_settings.tolerances.absolute_gap_tolerance = pdlp_tolerance; + ws_settings.inside_mip = true; + if (effective_batch_pdlp == 1) { ws_settings.concurrent_halt = &concurrent_halt; } + + auto start_time = std::chrono::high_resolution_clock::now(); + + auto ws_solution = solve_lp(&pc.pdlp_warm_cache.batch_pdlp_handle, mps_model, ws_settings); + + if (verbose) { + auto end_time = std::chrono::high_resolution_clock::now(); + auto duration = + std::chrono::duration_cast(end_time - start_time).count(); + settings.log.printf( + "Original problem solved in %d milliseconds" + " and iterations: %d\n", + duration, + ws_solution.get_pdlp_warm_start_data().total_pdlp_iterations_); + } + + if (ws_solution.get_termination_status() == pdlp_termination_status_t::Optimal) { + auto& cache = pc.pdlp_warm_cache; + const auto& ws_primal = ws_solution.get_primal_solution(); + const auto& ws_dual = ws_solution.get_dual_solution(); + // Need to use the pc steam since the batch pdlp handle will get destroyed after the warm + // start + cache.initial_primal = rmm::device_uvector(ws_primal, ws_primal.stream()); + cache.initial_dual = rmm::device_uvector(ws_dual, ws_dual.stream()); + cache.step_size = ws_solution.get_pdlp_warm_start_data().initial_step_size_; + cache.primal_weight = ws_solution.get_pdlp_warm_start_data().initial_primal_weight_; + cache.pdlp_iteration = ws_solution.get_pdlp_warm_start_data().total_pdlp_iterations_; + cache.populated = true; + + if (verbose) { + settings.log.printf( + "Cached PDLP warm start: primal=%zu dual=%zu step_size=%e primal_weight=%e iters=%d\n", + cache.initial_primal.size(), + cache.initial_dual.size(), + cache.step_size, + cache.primal_weight, + cache.pdlp_iteration); + } + } else { + if (verbose) { + settings.log.printf( + "PDLP warm start solve did not reach optimality (%s), skipping cache and batch PDLP\n", + ws_solution.get_termination_status_string().c_str()); } + return; } - constraint_lower[i] = h; - constraint_upper[i] = u; } - mps_model.set_constraint_lower_bounds(constraint_lower.data(), m); - mps_model.set_constraint_upper_bounds(constraint_upper.data(), m); - mps_model.set_maximize(user_problem.obj_scale < 0); + if (concurrent_halt.load() == 1) { return; } - return mps_model; + pdlp_solver_settings_t pdlp_settings; + if (effective_batch_pdlp == 1) { + pdlp_settings.concurrent_halt = &concurrent_halt; + pdlp_settings.shared_sb_solved = sb_view.solved; + } + + batch_elapsed_time = toc(start_time); + const f_t batch_remaining_time = + std::max(static_cast(0.0), settings.time_limit - batch_elapsed_time); + if (batch_remaining_time <= 0.0) { return; } + pdlp_settings.time_limit = batch_remaining_time; + + if (pc.pdlp_warm_cache.populated) { + auto& cache = pc.pdlp_warm_cache; + pdlp_settings.set_initial_primal_solution(cache.initial_primal.data(), + cache.initial_primal.size(), + cache.batch_pdlp_handle.get_stream()); + pdlp_settings.set_initial_dual_solution( + cache.initial_dual.data(), cache.initial_dual.size(), cache.batch_pdlp_handle.get_stream()); + pdlp_settings.set_initial_step_size(cache.step_size); + pdlp_settings.set_initial_primal_weight(cache.primal_weight); + pdlp_settings.set_initial_pdlp_iteration(cache.pdlp_iteration); + } + + if (concurrent_halt.load() == 1) { return; } + + const auto solutions = batch_pdlp_solve( + &pc.pdlp_warm_cache.batch_pdlp_handle, mps_model, fractional, fraction_values, pdlp_settings); + f_t batch_pdlp_strong_branching_time = toc(start_batch); + + // Fail safe in case the batch PDLP failed and produced no solutions + if (solutions.get_additional_termination_informations().size() != fractional.size() * 2) { + if (verbose) { settings.log.printf("Batch PDLP failed and produced no solutions\n"); } + return; + } + + // Find max iteration on how many are done accross the batch + i_t max_iterations = 0; + i_t amount_done = 0; + for (i_t k = 0; k < solutions.get_additional_termination_informations().size(); k++) { + max_iterations = std::max( + max_iterations, solutions.get_additional_termination_information(k).number_of_steps_taken); + // TODO batch mode infeasible: should also count as done if infeasible + if (solutions.get_termination_status(k) == pdlp_termination_status_t::Optimal) { + amount_done++; + } + } + + if (verbose) { + settings.log.printf( + "Batch PDLP strong branching completed in %.2fs. Solved %d/%d with max %d iterations\n", + batch_pdlp_strong_branching_time, + amount_done, + fractional.size() * 2, + max_iterations); + } + + for (i_t k = 0; k < fractional.size(); k++) { + if (solutions.get_termination_status(k) == pdlp_termination_status_t::Optimal) { + pdlp_obj_down[k] = std::max(solutions.get_dual_objective_value(k) - root_obj, f_t(0.0)); + } + if (solutions.get_termination_status(k + fractional.size()) == + pdlp_termination_status_t::Optimal) { + pdlp_obj_up[k] = + std::max(solutions.get_dual_objective_value(k + fractional.size()) - root_obj, f_t(0.0)); + } + } +} + +template +static void batch_pdlp_reliability_branching_task( + logger_t& log, + i_t rb_mode, + i_t num_candidates, + f_t start_time, + std::atomic& concurrent_halt, + const lp_problem_t& original_lp, + const std::vector& new_slacks, + const std::vector& solution, + branch_and_bound_worker_t* worker, + const std::vector& candidate_vars, + const simplex_solver_settings_t& settings, + shared_strong_branching_context_view_t& sb_view, + batch_pdlp_warm_cache_t& pdlp_warm_cache, + std::vector& pdlp_obj_down, + std::vector& pdlp_obj_up) +{ + log.printf(rb_mode == 2 ? "RB batch PDLP only for %d candidates\n" + : "RB cooperative batch PDLP and DS for %d candidates\n", + num_candidates); + + f_t start_batch = tic(); + + std::vector original_soln_x; + + if (concurrent_halt.load() == 1) { return; } + + auto mps_model = + simplex_problem_to_mps_data_model(original_lp, new_slacks, solution, original_soln_x); + { + const i_t n_orig = original_lp.num_cols - new_slacks.size(); + for (i_t j = 0; j < n_orig; j++) { + mps_model.variable_lower_bounds_[j] = worker->leaf_problem.lower[j]; + mps_model.variable_upper_bounds_[j] = worker->leaf_problem.upper[j]; + } + } + + std::vector fraction_values; + fraction_values.reserve(num_candidates); + for (i_t j : candidate_vars) { + fraction_values.push_back(original_soln_x[j]); + } + + if (concurrent_halt.load() == 1) { return; } + + const f_t batch_elapsed_time = toc(start_time); + const f_t batch_remaining_time = + std::max(static_cast(0.0), settings.time_limit - batch_elapsed_time); + if (batch_remaining_time <= 0.0) { return; } + + // One handle per batch PDLP since there can be concurrent calls + const raft::handle_t batch_pdlp_handle; + + pdlp_solver_settings_t pdlp_settings; + if (rb_mode == 1) { + pdlp_settings.concurrent_halt = &concurrent_halt; + pdlp_settings.shared_sb_solved = sb_view.solved; + } + pdlp_settings.time_limit = batch_remaining_time; + + if (pdlp_warm_cache.populated) { + auto& cache = pdlp_warm_cache; + pdlp_settings.set_initial_primal_solution( + cache.initial_primal.data(), cache.initial_primal.size(), batch_pdlp_handle.get_stream()); + pdlp_settings.set_initial_dual_solution( + cache.initial_dual.data(), cache.initial_dual.size(), batch_pdlp_handle.get_stream()); + pdlp_settings.set_initial_step_size(cache.step_size); + pdlp_settings.set_initial_primal_weight(cache.primal_weight); + pdlp_settings.set_initial_pdlp_iteration(cache.pdlp_iteration); + } + + if (concurrent_halt.load() == 1) { return; } + + const auto solutions = + batch_pdlp_solve(&batch_pdlp_handle, mps_model, candidate_vars, fraction_values, pdlp_settings); + + f_t batch_pdlp_time = toc(start_batch); + + if (solutions.get_additional_termination_informations().size() != + static_cast(num_candidates) * 2) { + log.printf("RB batch PDLP failed and produced no solutions\n"); + return; + } + + i_t amount_done = 0; + for (i_t k = 0; k < num_candidates * 2; k++) { + if (solutions.get_termination_status(k) == pdlp_termination_status_t::Optimal) { + amount_done++; + } + } + + log.printf("RB batch PDLP completed in %.2fs. Solved %d/%d\n", + batch_pdlp_time, + amount_done, + num_candidates * 2); + + for (i_t k = 0; k < num_candidates; k++) { + if (solutions.get_termination_status(k) == pdlp_termination_status_t::Optimal) { + pdlp_obj_down[k] = solutions.get_dual_objective_value(k); + } + if (solutions.get_termination_status(k + num_candidates) == + pdlp_termination_status_t::Optimal) { + pdlp_obj_up[k] = solutions.get_dual_objective_value(k + num_candidates); + } + } } template -void strong_branching(const user_problem_t& original_problem, - const lp_problem_t& original_lp, +void strong_branching(const lp_problem_t& original_lp, const simplex_solver_settings_t& settings, f_t start_time, + const std::vector& new_slacks, const std::vector& var_types, - const std::vector root_soln, + const lp_solution_t& root_solution, const std::vector& fractional, f_t root_obj, + f_t upper_bound, const std::vector& root_vstatus, const std::vector& edge_norms, + const std::vector& basic_list, + const std::vector& nonbasic_list, + basis_update_mpf_t& basis_factors, pseudo_costs_t& pc) { + constexpr bool verbose = false; + pc.resize(original_lp.num_cols); pc.strong_branch_down.assign(fractional.size(), 0); pc.strong_branch_up.assign(fractional.size(), 0); @@ -317,128 +1009,224 @@ void strong_branching(const user_problem_t& original_problem, const f_t elapsed_time = toc(start_time); if (elapsed_time > settings.time_limit) { return; } - if (settings.mip_batch_pdlp_strong_branching) { - settings.log.printf("Batch PDLP strong branching enabled\n"); + // 0: no batch PDLP, 1: cooperative batch PDLP and DS, 2: batch PDLP only + const i_t effective_batch_pdlp = + (settings.sub_mip || (settings.deterministic && settings.mip_batch_pdlp_strong_branching == 1)) + ? 0 + : settings.mip_batch_pdlp_strong_branching; - f_t start_batch = tic(); + if (settings.mip_batch_pdlp_strong_branching != 0 && + (settings.sub_mip || settings.deterministic)) { + settings.log.printf( + "Batch PDLP strong branching is disabled because sub-MIP or deterministic mode is enabled\n"); + } - // Use original_problem to create the BatchLP problem - csr_matrix_t A_row(original_problem.A.m, original_problem.A.n, 0); - original_problem.A.to_compressed_row(A_row); + settings.log.printf("Strong branching using %d threads and %ld fractional variables\n", + settings.num_threads, + fractional.size()); + + // Cooperative DS + PDLP: shared context tracks which subproblems are solved + shared_strong_branching_context_t shared_ctx(2 * fractional.size()); + shared_strong_branching_context_view_t sb_view(shared_ctx.solved); + + std::atomic concurrent_halt{0}; + + std::vector pdlp_obj_down(fractional.size(), std::numeric_limits::quiet_NaN()); + std::vector pdlp_obj_up(fractional.size(), std::numeric_limits::quiet_NaN()); + + std::vector dual_simplex_status_down(fractional.size(), dual::status_t::UNSET); + std::vector dual_simplex_status_up(fractional.size(), dual::status_t::UNSET); + std::vector dual_simplex_obj_down(fractional.size(), std::numeric_limits::quiet_NaN()); + std::vector dual_simplex_obj_up(fractional.size(), std::numeric_limits::quiet_NaN()); + f_t strong_branching_start_time = tic(); + i_t simplex_iteration_limit = settings.strong_branching_simplex_iteration_limit; + + if (simplex_iteration_limit < 1) { + initialize_pseudo_costs_with_estimate(original_lp, + settings, + root_vstatus, + root_solution, + basic_list, + nonbasic_list, + fractional, + basis_factors, + pc); + } else { +#pragma omp parallel num_threads(settings.num_threads) + { +#pragma omp single nowait + { + if (effective_batch_pdlp != 0) { +#pragma omp task + batch_pdlp_strong_branching_task(settings, + effective_batch_pdlp, + start_time, + concurrent_halt, + original_lp, + new_slacks, + root_solution.x, + fractional, + root_obj, + pc, + sb_view, + pdlp_obj_down, + pdlp_obj_up); + } - // Convert the root_soln to the original problem space - std::vector original_root_soln_x; - uncrush_primal_solution(original_problem, original_lp, root_soln, original_root_soln_x); + if (effective_batch_pdlp != 2) { + i_t n = std::min(4 * settings.num_threads, fractional.size()); +// Here we are creating more tasks than the number of threads +// such that they can be scheduled dynamically to the threads. +#pragma omp taskloop num_tasks(n) + for (i_t k = 0; k < n; k++) { + i_t start = std::floor(k * fractional.size() / n); + i_t end = std::floor((k + 1) * fractional.size() / n); + + constexpr bool verbose = false; + if (verbose) { + settings.log.printf("Thread id %d task id %d start %d end %d. size %d\n", + omp_get_thread_num(), + k, + start, + end, + end - start); + } + + strong_branch_helper(start, + end, + start_time, + original_lp, + settings, + var_types, + fractional, + root_solution.x, + root_vstatus, + edge_norms, + root_obj, + upper_bound, + simplex_iteration_limit, + pc, + dual_simplex_obj_down, + dual_simplex_obj_up, + dual_simplex_status_down, + dual_simplex_status_up, + sb_view); + } + // DS done: signal PDLP to stop (time-limit or all work done) and wait + if (effective_batch_pdlp == 1) { concurrent_halt.store(1); } + } + } + } + } - std::vector fraction_values; + settings.log.printf("Strong branching completed in %.2fs\n", toc(strong_branching_start_time)); + if (verbose) { + // Collect Dual Simplex statistics + i_t dual_simplex_optimal = 0, dual_simplex_infeasible = 0, dual_simplex_iter_limit = 0; + i_t dual_simplex_numerical = 0, dual_simplex_cutoff = 0, dual_simplex_time_limit = 0; + i_t dual_simplex_concurrent = 0, dual_simplex_work_limit = 0, dual_simplex_unset = 0; + const i_t total_subproblems = fractional.size() * 2; for (i_t k = 0; k < fractional.size(); k++) { - const i_t j = fractional[k]; - fraction_values.push_back(original_root_soln_x[j]); - } - - const auto mps_model = simplex_problem_to_mps_data_model(original_problem); - const f_t batch_elapsed_time = toc(start_time); - const f_t batch_remaining_time = - std::max(static_cast(0.0), settings.time_limit - batch_elapsed_time); - if (batch_remaining_time <= 0.0) { return; } - pdlp_solver_settings_t pdlp_settings; - pdlp_settings.time_limit = batch_remaining_time; - const raft::handle_t batch_pdlp_handle; - const auto solutions = - batch_pdlp_solve(&batch_pdlp_handle, mps_model, fractional, fraction_values, pdlp_settings); - f_t batch_pdlp_strong_branching_time = toc(start_batch); - - // Find max iteration on how many are done accross the batch - i_t max_iterations = 0; - i_t amount_done = 0; - for (i_t k = 0; k < solutions.get_additional_termination_informations().size(); k++) { - max_iterations = std::max( - max_iterations, solutions.get_additional_termination_information(k).number_of_steps_taken); - // TODO batch mode infeasible: should also count as done if infeasible - if (solutions.get_termination_status(k) == pdlp_termination_status_t::Optimal) { - amount_done++; + for (auto st : {dual_simplex_status_down[k], dual_simplex_status_up[k]}) { + switch (st) { + case dual::status_t::OPTIMAL: dual_simplex_optimal++; break; + case dual::status_t::DUAL_UNBOUNDED: dual_simplex_infeasible++; break; + case dual::status_t::ITERATION_LIMIT: dual_simplex_iter_limit++; break; + case dual::status_t::NUMERICAL: dual_simplex_numerical++; break; + case dual::status_t::CUTOFF: dual_simplex_cutoff++; break; + case dual::status_t::TIME_LIMIT: dual_simplex_time_limit++; break; + case dual::status_t::CONCURRENT_LIMIT: dual_simplex_concurrent++; break; + case dual::status_t::WORK_LIMIT: dual_simplex_work_limit++; break; + case dual::status_t::UNSET: dual_simplex_unset++; break; + } } } - settings.log.printf( - "Batch PDLP strong branching completed in %.2fs. Solved %d/%d with max %d iterations\n", - batch_pdlp_strong_branching_time, - amount_done, - fractional.size() * 2, - max_iterations); + settings.log.printf("Dual Simplex: %d/%d optimal, %d infeasible, %d iter-limit", + dual_simplex_optimal, + total_subproblems, + dual_simplex_infeasible, + dual_simplex_iter_limit); + if (dual_simplex_cutoff) settings.log.printf(", %d cutoff", dual_simplex_cutoff); + if (dual_simplex_time_limit) settings.log.printf(", %d time-limit", dual_simplex_time_limit); + if (dual_simplex_numerical) settings.log.printf(", %d numerical", dual_simplex_numerical); + if (dual_simplex_concurrent) + settings.log.printf(", %d concurrent-halt", dual_simplex_concurrent); + if (dual_simplex_work_limit) settings.log.printf(", %d work-limit", dual_simplex_work_limit); + if (dual_simplex_unset) settings.log.printf(", %d unset/skipped", dual_simplex_unset); + settings.log.printf("\n"); + } + if (effective_batch_pdlp != 0 && verbose) { + i_t pdlp_optimal_count = 0; for (i_t k = 0; k < fractional.size(); k++) { - // Call BatchLP solver. Solve 2*fractional.size() subproblems. - // Let j = fractional[k]. We want to solve the two trial branching problems - // Branch down: - // minimize c^T x - // subject to lb <= A*x <= ub - // x_j <= floor(root_soln[j]) - // l <= x < u - // Let the optimal objective value of thie problem be obj_down - f_t obj_down = (solutions.get_termination_status(k) == pdlp_termination_status_t::Optimal) - ? solutions.get_dual_objective_value(k) - : std::numeric_limits::quiet_NaN(); - - // Branch up: - // minimize c^T x - // subject to lb <= A*x <= ub - // x_j >= ceil(root_soln[j]) - // Let the optimal objective value of thie problem be obj_up - f_t obj_up = (solutions.get_termination_status(k + fractional.size()) == - pdlp_termination_status_t::Optimal) - ? solutions.get_dual_objective_value(k + fractional.size()) - : std::numeric_limits::quiet_NaN(); - - pc.strong_branch_down[k] = obj_down - root_obj; - pc.strong_branch_up[k] = obj_up - root_obj; + if (!std::isnan(pdlp_obj_down[k])) pdlp_optimal_count++; + if (!std::isnan(pdlp_obj_up[k])) pdlp_optimal_count++; } - } else { - settings.log.printf("Strong branching using %d threads and %ld fractional variables\n", - settings.num_threads, - fractional.size()); - f_t strong_branching_start_time = tic(); - -#pragma omp parallel num_threads(settings.num_threads) - { - i_t n = std::min(4 * settings.num_threads, fractional.size()); - // Here we are creating more tasks than the number of threads - // such that they can be scheduled dynamically to the threads. -#pragma omp for schedule(dynamic, 1) - for (i_t k = 0; k < n; k++) { - i_t start = std::floor(k * fractional.size() / n); - i_t end = std::floor((k + 1) * fractional.size() / n); + settings.log.printf("Batch PDLP found %d/%d optimal solutions\n", + pdlp_optimal_count, + static_cast(fractional.size() * 2)); + } - constexpr bool verbose = false; - if (verbose) { - settings.log.printf("Thread id %d task id %d start %d end %d. size %d\n", - omp_get_thread_num(), - k, - start, - end, - end - start); + if (effective_batch_pdlp != 0) { + i_t merged_from_ds = 0; + i_t merged_from_pdlp = 0; + i_t merged_nan = 0; + i_t solved_by_both = 0; + for (i_t k = 0; k < fractional.size(); k++) { + for (i_t branch = 0; branch < 2; branch++) { + const bool is_down = (branch == 0); + f_t& sb_dest = is_down ? pc.strong_branch_down[k] : pc.strong_branch_up[k]; + f_t ds_obj = is_down ? dual_simplex_obj_down[k] : dual_simplex_obj_up[k]; + dual::status_t ds_status = + is_down ? dual_simplex_status_down[k] : dual_simplex_status_up[k]; + f_t pdlp_obj = is_down ? pdlp_obj_down[k] : pdlp_obj_up[k]; + bool pdlp_has = !std::isnan(pdlp_obj); + bool ds_has = ds_status != dual::status_t::UNSET; + + const auto [value, source] = + merge_sb_result(ds_obj, ds_status, pdlp_obj, pdlp_has); + + if (source == sb_source_t::PDLP || effective_batch_pdlp == 2) { sb_dest = value; } + + if (source == sb_source_t::DUAL_SIMPLEX) + merged_from_ds++; + else if (source == sb_source_t::PDLP) + merged_from_pdlp++; + else + merged_nan++; + + if (ds_has && pdlp_has && verbose) { + solved_by_both++; + settings.log.printf( + "[COOP SB] Merge: variable %d %s solved by BOTH (DS=%e PDLP=%e) -> kept %s\n", + fractional[k], + is_down ? "DOWN" : "UP", + ds_obj, + pdlp_obj, + source == sb_source_t::DUAL_SIMPLEX ? "DS" : "PDLP"); } - - strong_branch_helper(start, - end, - start_time, - original_lp, - settings, - var_types, - fractional, - root_obj, - root_soln, - root_vstatus, - edge_norms, - pc); } } - settings.log.printf("Strong branching completed in %.2fs\n", toc(strong_branching_start_time)); + + pc.pdlp_warm_cache.percent_solved_by_batch_pdlp_at_root = + (f_t(merged_from_pdlp) / f_t(fractional.size() * 2)) * 100.0; + if (verbose) { + settings.log.printf( + "Batch PDLP for strong branching. Percent solved by batch PDLP at root: %f\n", + pc.pdlp_warm_cache.percent_solved_by_batch_pdlp_at_root); + settings.log.printf( + "Merged results: %d from DS, %d from PDLP, %d unresolved (NaN), %d solved by both\n", + merged_from_ds, + merged_from_pdlp, + merged_nan, + solved_by_both); + } } - pc.update_pseudo_costs_from_strong_branching(fractional, root_soln); + pc.update_pseudo_costs_from_strong_branching(fractional, root_solution.x); } template @@ -529,38 +1317,32 @@ i_t pseudo_costs_t::variable_selection(const std::vector& fractio template i_t pseudo_costs_t::reliable_variable_selection( - mip_node_t* node_ptr, + const mip_node_t* node_ptr, const std::vector& fractional, - const std::vector& solution, - const simplex_solver_settings_t& settings, - const std::vector& var_types, branch_and_bound_worker_t* worker, + const std::vector& var_types, const branch_and_bound_stats_t& bnb_stats, + const simplex_solver_settings_t& settings, f_t upper_bound, int max_num_tasks, - logger_t& log) + logger_t& log, + const std::vector& new_slacks, + const lp_problem_t& original_lp) { - constexpr f_t eps = 1e-6; - f_t start_time = bnb_stats.start_time; - i_t branch_var = fractional[0]; - f_t max_score = -1; - i_t num_initialized_down; - i_t num_initialized_up; - f_t pseudo_cost_down_avg; - f_t pseudo_cost_up_avg; - - initialized(num_initialized_down, num_initialized_up, pseudo_cost_down_avg, pseudo_cost_up_avg); - - log.printf("PC: num initialized down %d up %d avg down %e up %e\n", - num_initialized_down, - num_initialized_up, - pseudo_cost_down_avg, - pseudo_cost_up_avg); + constexpr f_t eps = 1e-6; + f_t start_time = bnb_stats.start_time; + i_t branch_var = fractional[0]; + f_t max_score = -1; + f_t pseudo_cost_down_avg = -1; + f_t pseudo_cost_up_avg = -1; + lp_solution_t& leaf_solution = worker->leaf_solution; const int64_t branch_and_bound_lp_iters = bnb_stats.total_lp_iters; - const int64_t branch_and_bound_explored = bnb_stats.nodes_explored; const i_t branch_and_bound_lp_iter_per_node = - branch_and_bound_lp_iters / bnb_stats.nodes_explored; + bnb_stats.nodes_explored > 0 ? branch_and_bound_lp_iters / bnb_stats.nodes_explored : 0; + const i_t iter_limit_per_trial = std::clamp(2 * branch_and_bound_lp_iter_per_node, + reliability_branching_settings.lower_max_lp_iter, + reliability_branching_settings.upper_max_lp_iter); i_t reliable_threshold = settings.reliability_branching; if (reliable_threshold < 0) { @@ -580,17 +1362,32 @@ i_t pseudo_costs_t::reliable_variable_selection( reliable_threshold = strong_branching_lp_iter < max_reliability_iter ? reliable_threshold : 0; } - std::vector unreliable_list; + // If `reliable_threshold == 0`, then we set the uninitialized pseudocosts to the average. + // Otherwise, the best ones are initialized via strong branching, while the other are ignored. // + // In the latter, we are not using the average pseudocost (which calculated in the `initialized` + // method). + if (reliable_threshold == 0) { + i_t num_initialized_up; + i_t num_initialized_down; + initialized(num_initialized_down, num_initialized_up, pseudo_cost_down_avg, pseudo_cost_up_avg); + log.printf("PC: num initialized down %d up %d avg down %e up %e\n", + num_initialized_down, + num_initialized_up, + pseudo_cost_down_avg, + pseudo_cost_up_avg); + } + + std::vector> unreliable_list; omp_mutex_t score_mutex; for (i_t j : fractional) { if (pseudo_cost_num_down[j] < reliable_threshold || pseudo_cost_num_up[j] < reliable_threshold) { - unreliable_list.push_back(j); + unreliable_list.push_back(std::make_pair(-1, j)); continue; } - - f_t score = calculate_pseudocost_score(j, solution, pseudo_cost_up_avg, pseudo_cost_down_avg); + f_t score = + calculate_pseudocost_score(j, leaf_solution.x, pseudo_cost_up_avg, pseudo_cost_down_avg); if (score > max_score) { max_score = score; @@ -599,15 +1396,57 @@ i_t pseudo_costs_t::reliable_variable_selection( } if (unreliable_list.empty()) { - log.printf( - "pc branching on %d. Value %e. Score %e\n", branch_var, solution[branch_var], max_score); + log.printf("pc branching on %d. Value %e. Score %e\n", + branch_var, + leaf_solution.x[branch_var], + max_score); return branch_var; } - const int num_tasks = std::max(max_num_tasks, 1); - const int task_priority = reliability_branching_settings.task_priority; - const i_t max_num_candidates = reliability_branching_settings.max_num_candidates; + // 0: no batch PDLP, 1: cooperative batch PDLP and DS, 2: batch PDLP only + const i_t rb_mode = settings.mip_batch_pdlp_reliability_branching; + // We don't use batch PDLP in reliability branching if the PDLP warm start data was not filled + // This indicates that PDLP alone (not batched) couldn't even run at the root node + // So it will most likely perform poorly compared to DS + // It is also off if the number of candidate is very small + // If warm start could run but almost none of the BPDLP results were used, we also want to avoid + // using batch PDLP + constexpr i_t min_num_candidates_for_pdlp = 5; + constexpr f_t min_percent_solved_by_batch_pdlp_at_root_for_pdlp = 5.0; + // Batch PDLP is either forced or we use the heuristic to decide if it should be used + const bool use_pdlp = (rb_mode == 2) || (rb_mode != 0 && !settings.sub_mip && + !settings.deterministic && pdlp_warm_cache.populated && + unreliable_list.size() > min_num_candidates_for_pdlp && + pdlp_warm_cache.percent_solved_by_batch_pdlp_at_root > + min_percent_solved_by_batch_pdlp_at_root_for_pdlp); + + if (rb_mode != 0 && !pdlp_warm_cache.populated) { + log.printf("PDLP warm start data not populated, using DS only\n"); + } else if (rb_mode != 0 && settings.sub_mip) { + log.printf("Batch PDLP reliability branching is disabled because sub-MIP is enabled\n"); + } else if (rb_mode != 0 && settings.deterministic) { + log.printf( + "Batch PDLP reliability branching is disabled because deterministic mode is enabled\n"); + } else if (rb_mode != 0 && unreliable_list.size() < min_num_candidates_for_pdlp) { + log.printf("Not enough candidates to use batch PDLP, using DS only\n"); + } else if (rb_mode != 0 && pdlp_warm_cache.percent_solved_by_batch_pdlp_at_root < 5.0) { + log.printf("Percent solved by batch PDLP at root is too low, using DS only\n"); + } else if (use_pdlp) { + log.printf( + "Using batch PDLP because populated, unreliable list size is %d (> %d), and percent solved " + "by batch PDLP at root is %f%% (> %f%%)\n", + static_cast(unreliable_list.size()), + min_num_candidates_for_pdlp, + pdlp_warm_cache.percent_solved_by_batch_pdlp_at_root, + min_percent_solved_by_batch_pdlp_at_root_for_pdlp); + } + + const int num_tasks = std::max(max_num_tasks, 10); + const int task_priority = reliability_branching_settings.task_priority; + // If both batch PDLP and DS are used we double the max number of candidates + const i_t max_num_candidates = use_pdlp ? 2 * reliability_branching_settings.max_num_candidates + : reliability_branching_settings.max_num_candidates; const i_t num_candidates = std::min(unreliable_list.size(), max_num_candidates); assert(task_priority > 0); @@ -623,96 +1462,317 @@ i_t pseudo_costs_t::reliable_variable_selection( num_tasks, reliable_threshold); - // Shuffle the unreliable list so every variable has the same chance to be selected. - if (unreliable_list.size() > max_num_candidates) { worker->rng.shuffle(unreliable_list); } + if (unreliable_list.size() > max_num_candidates) { + if (reliability_branching_settings.rank_candidates_with_dual_pivot) { + i_t m = worker->leaf_problem.num_rows; + i_t n = worker->leaf_problem.num_cols; + f_t work_estimate = 0; + + std::vector delta_z(n, 0); + std::vector workspace(n, 0); + + std::vector basic_map(n, -1); + for (i_t i = 0; i < m; i++) { + basic_map[worker->basic_list[i]] = i; + } + + std::vector nonbasic_mark(n, -1); + for (i_t i = 0; i < n - m; i++) { + nonbasic_mark[worker->nonbasic_list[i]] = i; + } + + for (auto& [score, j] : unreliable_list) { + if (pseudo_cost_num_down[j] == 0 || pseudo_cost_num_up[j] == 0) { + // Estimate the objective change by performing a single pivot of dual simplex. + objective_change_estimate_t estimate = + single_pivot_objective_change_estimate(worker->leaf_problem, + settings, + AT, + node_ptr->vstatus, + j, + basic_map[j], + leaf_solution, + worker->basic_list, + worker->nonbasic_list, + nonbasic_mark, + worker->basis_factors, + workspace, + delta_z, + work_estimate); + + score = std::max(estimate.up_obj_change, eps) * std::max(estimate.down_obj_change, eps); + } else { + // Use the previous score, even if it is unreliable + score = calculate_pseudocost_score( + j, leaf_solution.x, pseudo_cost_up_avg, pseudo_cost_down_avg); + } + } + } else { + f_t high = max_score > 0 ? max_score : 1; + f_t low = 0; + + for (auto& [score, j] : unreliable_list) { + if (score == -1) { score = worker->rng.uniform(low, high); } + } + } + + // We only need to get the top-k elements in the list, where + // k = num_candidates + std::partial_sort(unreliable_list.begin(), + unreliable_list.begin() + num_candidates, + unreliable_list.end(), + [](auto el1, auto el2) { return el1.first > el2.first; }); + } + + // Both DS and PDLP work on the same candidate set + std::vector candidate_vars(num_candidates); + for (i_t i = 0; i < num_candidates; i++) { + candidate_vars[i] = unreliable_list[i].second; + } + + // Shared context for cooperative work-stealing (mode 1) + // [0..num_candidates) = down, [num_candidates..2*num_candidates) = up + shared_strong_branching_context_t shared_ctx(2 * num_candidates); + shared_strong_branching_context_view_t sb_view(shared_ctx.solved); + + std::vector pdlp_obj_down(num_candidates, std::numeric_limits::quiet_NaN()); + std::vector pdlp_obj_up(num_candidates, std::numeric_limits::quiet_NaN()); + + std::atomic concurrent_halt{0}; + + if (use_pdlp) { +#pragma omp task default(shared) + batch_pdlp_reliability_branching_task(log, + rb_mode, + num_candidates, + start_time, + concurrent_halt, + original_lp, + new_slacks, + leaf_solution.x, + worker, + candidate_vars, + settings, + sb_view, + pdlp_warm_cache, + pdlp_obj_down, + pdlp_obj_up); + } if (toc(start_time) > settings.time_limit) { - log.printf("Time limit reached"); + log.printf("Time limit reached\n"); + if (use_pdlp) { + concurrent_halt.store(1); +#pragma omp taskwait + } return branch_var; } + std::vector dual_simplex_obj_down(num_candidates, std::numeric_limits::quiet_NaN()); + std::vector dual_simplex_obj_up(num_candidates, std::numeric_limits::quiet_NaN()); + std::vector dual_simplex_status_down(num_candidates, dual::status_t::UNSET); + std::vector dual_simplex_status_up(num_candidates, dual::status_t::UNSET); + + f_t dual_simplex_start_time = tic(); + + if (rb_mode != 2) { #pragma omp taskloop if (num_tasks > 1) priority(task_priority) num_tasks(num_tasks) \ - shared(score_mutex) - for (i_t i = 0; i < num_candidates; ++i) { - const i_t j = unreliable_list[i]; - - if (toc(start_time) > settings.time_limit) { continue; } - - pseudo_cost_mutex_down[j].lock(); - if (pseudo_cost_num_down[j] < reliable_threshold) { - // Do trial branching on the down branch - f_t obj = trial_branching(worker->leaf_problem, - settings, - var_types, - node_ptr->vstatus, - worker->leaf_edge_norms, - worker->basis_factors, - worker->basic_list, - worker->nonbasic_list, - j, - worker->leaf_problem.lower[j], - std::floor(solution[j]), - upper_bound, - branch_and_bound_lp_iter_per_node, - start_time, - reliability_branching_settings.upper_max_lp_iter, - reliability_branching_settings.lower_max_lp_iter, - strong_branching_lp_iter); - - if (!std::isnan(obj)) { - f_t change_in_obj = std::max(obj - node_ptr->lower_bound, eps); - f_t change_in_x = solution[j] - std::floor(solution[j]); - pseudo_cost_sum_down[j] += change_in_obj / change_in_x; - pseudo_cost_num_down[j]++; + shared(score_mutex, \ + sb_view, \ + dual_simplex_obj_down, \ + dual_simplex_obj_up, \ + dual_simplex_status_down, \ + dual_simplex_status_up, \ + unreliable_list) + for (i_t i = 0; i < num_candidates; ++i) { + auto [score, j] = unreliable_list[i]; + + if (toc(start_time) > settings.time_limit) { continue; } + + if (rb_mode == 1 && sb_view.is_solved(i)) { + log.printf( + "DS skipping variable %d branch down (shared_idx %d): already solved by PDLP\n", j, i); + } else { + pseudo_cost_mutex_down[j].lock(); + if (pseudo_cost_num_down[j] < reliable_threshold) { + // Do trial branching on the down branch + const auto [obj, status] = trial_branching(worker->leaf_problem, + settings, + var_types, + node_ptr->vstatus, + worker->leaf_edge_norms, + worker->basis_factors, + worker->basic_list, + worker->nonbasic_list, + j, + worker->leaf_problem.lower[j], + std::floor(leaf_solution.x[j]), + upper_bound, + start_time, + iter_limit_per_trial, + strong_branching_lp_iter); + + dual_simplex_obj_down[i] = obj; + dual_simplex_status_down[i] = status; + if (!std::isnan(obj)) { + f_t change_in_obj = std::max(obj - node_ptr->lower_bound, eps); + f_t change_in_x = leaf_solution.x[j] - std::floor(leaf_solution.x[j]); + pseudo_cost_sum_down[j] += change_in_obj / change_in_x; + pseudo_cost_num_down[j]++; + // Should be valid if were are already here + if (rb_mode == 1 && is_dual_simplex_done(status)) { sb_view.mark_solved(i); } + } + } else { + // Variable became reliable, make it as solved so that batch PDLP does not solve it again + if (rb_mode == 1) sb_view.mark_solved(i); + } + pseudo_cost_mutex_down[j].unlock(); } - } - pseudo_cost_mutex_down[j].unlock(); - - if (toc(start_time) > settings.time_limit) { continue; } - - pseudo_cost_mutex_up[j].lock(); - if (pseudo_cost_num_up[j] < reliable_threshold) { - f_t obj = trial_branching(worker->leaf_problem, - settings, - var_types, - node_ptr->vstatus, - worker->leaf_edge_norms, - worker->basis_factors, - worker->basic_list, - worker->nonbasic_list, - j, - std::ceil(solution[j]), - worker->leaf_problem.upper[j], - upper_bound, - branch_and_bound_lp_iter_per_node, - start_time, - reliability_branching_settings.upper_max_lp_iter, - reliability_branching_settings.lower_max_lp_iter, - strong_branching_lp_iter); - - if (!std::isnan(obj)) { - f_t change_in_obj = std::max(obj - node_ptr->lower_bound, eps); - f_t change_in_x = std::ceil(solution[j]) - solution[j]; - pseudo_cost_sum_up[j] += change_in_obj / change_in_x; - pseudo_cost_num_up[j]++; + + if (toc(start_time) > settings.time_limit) { continue; } + + const i_t shared_idx = i + num_candidates; + if (rb_mode == 1 && sb_view.is_solved(shared_idx)) { + log.printf("DS skipping variable %d branch up (shared_idx %d): already solved by PDLP\n", + j, + shared_idx); + } else { + pseudo_cost_mutex_up[j].lock(); + if (pseudo_cost_num_up[j] < reliable_threshold) { + const auto [obj, status] = trial_branching(worker->leaf_problem, + settings, + var_types, + node_ptr->vstatus, + worker->leaf_edge_norms, + worker->basis_factors, + worker->basic_list, + worker->nonbasic_list, + j, + std::ceil(leaf_solution.x[j]), + worker->leaf_problem.upper[j], + upper_bound, + start_time, + iter_limit_per_trial, + strong_branching_lp_iter); + + dual_simplex_obj_up[i] = obj; + dual_simplex_status_up[i] = status; + if (!std::isnan(obj)) { + f_t change_in_obj = std::max(obj - node_ptr->lower_bound, eps); + f_t change_in_x = std::ceil(leaf_solution.x[j]) - leaf_solution.x[j]; + pseudo_cost_sum_up[j] += change_in_obj / change_in_x; + pseudo_cost_num_up[j]++; + // Should be valid if were are already here + if (rb_mode == 1 && is_dual_simplex_done(status)) { sb_view.mark_solved(shared_idx); } + } + } else { + // Variable became reliable, make it as solved so that batch PDLP does not solve it again + if (rb_mode == 1) sb_view.mark_solved(shared_idx); + } + pseudo_cost_mutex_up[j].unlock(); } + + if (toc(start_time) > settings.time_limit) { continue; } + + score = + calculate_pseudocost_score(j, leaf_solution.x, pseudo_cost_up_avg, pseudo_cost_down_avg); + + score_mutex.lock(); + if (score > max_score) { + max_score = score; + branch_var = j; + } + score_mutex.unlock(); } - pseudo_cost_mutex_up[j].unlock(); - if (toc(start_time) > settings.time_limit) { continue; } + concurrent_halt.store(1); + } - f_t score = calculate_pseudocost_score(j, solution, pseudo_cost_up_avg, pseudo_cost_down_avg); + f_t dual_simplex_elapsed = toc(dual_simplex_start_time); + + // TODO put back + // if (rb_mode != 2) { + // if (rb_mode == 1) { + // log.printf( + // "RB Dual Simplex: %d candidates, %d/%d optimal, %d/%d infeasible, %d/%d failed, %d skipped + // (PDLP) in %.2fs\n", num_candidates, dual_simplex_optimal.load(), num_candidates * 2, + // dual_simplex_infeasible.load(), num_candidates * 2, + // dual_simplex_failed.load(), num_candidates * 2, + // dual_simplex_skipped.load(), dual_simplex_elapsed); + // } else { + // log.printf( + // "RB Dual Simplex: %d candidates, %d/%d optimal, %d/%d infeasible, %d/%d failed in + // %.2fs\n", num_candidates, dual_simplex_optimal.load(), num_candidates * 2, + // dual_simplex_infeasible.load(), num_candidates * 2, dual_simplex_failed.load(), + // num_candidates * 2, dual_simplex_elapsed); + // } + //} + + if (use_pdlp) { +#pragma omp taskwait + + i_t pdlp_applied = 0; + i_t pdlp_optimal = 0; + for (i_t i = 0; i < num_candidates; i++) { + const i_t j = candidate_vars[i]; + + // Down: check if PDLP should override DS + if (!std::isnan(pdlp_obj_down[i])) { + pdlp_optimal++; + const auto [merged_obj, source] = merge_sb_result( + dual_simplex_obj_down[i], dual_simplex_status_down[i], pdlp_obj_down[i], true); + // PDLP won the merge, update the pseudo-cost only if node is still unreliable (concurrent + // calls may have made it reliable) + if (source == sb_source_t::PDLP) { + pseudo_cost_mutex_down[j].lock(); + if (pseudo_cost_num_down[j] < reliable_threshold) { + f_t change_in_obj = std::max(merged_obj - node_ptr->lower_bound, eps); + f_t change_in_x = leaf_solution.x[j] - std::floor(leaf_solution.x[j]); + pseudo_cost_sum_down[j] += change_in_obj / change_in_x; + pseudo_cost_num_down[j]++; + pdlp_applied++; + } + pseudo_cost_mutex_down[j].unlock(); + } + } - score_mutex.lock(); - if (score > max_score) { - max_score = score; - branch_var = j; + // Up: check if PDLP should override DS + if (!std::isnan(pdlp_obj_up[i])) { + pdlp_optimal++; + const auto [merged_obj, source] = merge_sb_result( + dual_simplex_obj_up[i], dual_simplex_status_up[i], pdlp_obj_up[i], true); + // PDLP won the merge, update the pseudo-cost only if node is still unreliable (concurrent + // calls may have made it reliable) + if (source == sb_source_t::PDLP) { + pseudo_cost_mutex_up[j].lock(); + if (pseudo_cost_num_up[j] < reliable_threshold) { + f_t change_in_obj = std::max(merged_obj - node_ptr->lower_bound, eps); + f_t change_in_x = std::ceil(leaf_solution.x[j]) - leaf_solution.x[j]; + pseudo_cost_sum_up[j] += change_in_obj / change_in_x; + pseudo_cost_num_up[j]++; + pdlp_applied++; + } + pseudo_cost_mutex_up[j].unlock(); + } + } + + f_t score = + calculate_pseudocost_score(j, leaf_solution.x, pseudo_cost_up_avg, pseudo_cost_down_avg); + if (score > max_score) { + max_score = score; + branch_var = j; + } } - score_mutex.unlock(); + + log.printf("RB batch PDLP: %d candidates, %d/%d optimal, %d applied to pseudo-costs\n", + num_candidates, + pdlp_optimal, + num_candidates * 2, + pdlp_applied); } log.printf( - "pc branching on %d. Value %e. Score %e\n", branch_var, solution[branch_var], max_score); + "pc branching on %d. Value %e. Score %e\n", branch_var, leaf_solution.x[branch_var], max_score); return branch_var; } @@ -776,16 +1836,20 @@ void pseudo_costs_t::update_pseudo_costs_from_strong_branching( template class pseudo_costs_t; -template void strong_branching(const user_problem_t& original_problem, - const lp_problem_t& original_lp, +template void strong_branching(const lp_problem_t& original_lp, const simplex_solver_settings_t& settings, double start_time, + const std::vector& new_slacks, const std::vector& var_types, - const std::vector root_soln, + const lp_solution_t& root_solution, const std::vector& fractional, double root_obj, + double upper_bound, const std::vector& root_vstatus, const std::vector& edge_norms, + const std::vector& basic_list, + const std::vector& nonbasic_list, + basis_update_mpf_t& basis_factors, pseudo_costs_t& pc); #endif diff --git a/cpp/src/branch_and_bound/pseudo_costs.hpp b/cpp/src/branch_and_bound/pseudo_costs.hpp index 6b6c6917b6..009bd8b81a 100644 --- a/cpp/src/branch_and_bound/pseudo_costs.hpp +++ b/cpp/src/branch_and_bound/pseudo_costs.hpp @@ -20,7 +20,10 @@ #include #include +#include + #include +#include namespace cuopt::linear_programming::dual_simplex { @@ -403,6 +406,23 @@ struct reliability_branching_settings_t { // Only used when `reliable_threshold` is negative i_t max_reliable_threshold = 5; i_t min_reliable_threshold = 1; + + // Estimate the objective change of each fractional variable + // using a single pivot of dual simplex. Then rank the candidates + // based on this estimation. + bool rank_candidates_with_dual_pivot = true; +}; + +template +struct batch_pdlp_warm_cache_t { + const raft::handle_t batch_pdlp_handle{}; + rmm::device_uvector initial_primal{0, batch_pdlp_handle.get_stream()}; + rmm::device_uvector initial_dual{0, batch_pdlp_handle.get_stream()}; + f_t step_size{std::numeric_limits::signaling_NaN()}; + f_t primal_weight{std::numeric_limits::signaling_NaN()}; + i_t pdlp_iteration{-1}; + f_t percent_solved_by_batch_pdlp_at_root{f_t(0.0)}; + bool populated{false}; }; template @@ -414,7 +434,8 @@ class pseudo_costs_t { pseudo_cost_num_down(num_variables), pseudo_cost_num_up(num_variables), pseudo_cost_mutex_up(num_variables), - pseudo_cost_mutex_down(num_variables) + pseudo_cost_mutex_down(num_variables), + AT(1, 1, 1) { } @@ -472,16 +493,17 @@ class pseudo_costs_t { const std::vector& solution, logger_t& log); - i_t reliable_variable_selection(mip_node_t* node_ptr, + i_t reliable_variable_selection(const mip_node_t* node_ptr, const std::vector& fractional, - const std::vector& solution, - const simplex_solver_settings_t& settings, - const std::vector& var_types, branch_and_bound_worker_t* worker, + const std::vector& var_types, const branch_and_bound_stats_t& bnb_stats, + const simplex_solver_settings_t& settings, f_t upper_bound, int max_num_tasks, - logger_t& log); + logger_t& log, + const std::vector& new_slacks, + const lp_problem_t& original_lp); void update_pseudo_costs_from_strong_branching(const std::vector& fractional, const std::vector& root_soln); @@ -504,6 +526,7 @@ class pseudo_costs_t { reliability_branching_settings_t reliability_branching_settings; + csc_matrix_t AT; // Transpose of the constraint matrix A std::vector> pseudo_cost_sum_up; std::vector> pseudo_cost_sum_down; std::vector> pseudo_cost_num_up; @@ -514,19 +537,25 @@ class pseudo_costs_t { std::vector pseudo_cost_mutex_down; omp_atomic_t num_strong_branches_completed = 0; omp_atomic_t strong_branching_lp_iter = 0; + + batch_pdlp_warm_cache_t pdlp_warm_cache; }; template -void strong_branching(const user_problem_t& original_problem, - const lp_problem_t& original_lp, +void strong_branching(const lp_problem_t& original_lp, const simplex_solver_settings_t& settings, f_t start_time, + const std::vector& new_slacks, const std::vector& var_types, - const std::vector root_soln, + const lp_solution_t& root_solution, const std::vector& fractional, f_t root_obj, + f_t upper_bound, const std::vector& root_vstatus, const std::vector& edge_norms, + const std::vector& basic_list, + const std::vector& nonbasic_list, + basis_update_mpf_t& basis_factors, pseudo_costs_t& pc); } // namespace cuopt::linear_programming::dual_simplex diff --git a/cpp/src/branch_and_bound/shared_strong_branching_context.hpp b/cpp/src/branch_and_bound/shared_strong_branching_context.hpp new file mode 100644 index 0000000000..a9e697ae58 --- /dev/null +++ b/cpp/src/branch_and_bound/shared_strong_branching_context.hpp @@ -0,0 +1,60 @@ +/* clang-format off */ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + */ +/* clang-format on */ + +#pragma once + +#include +#include +#include +#include + +namespace cuopt::linear_programming::dual_simplex { + +template +struct shared_strong_branching_context_t { + std::vector> solved; + + explicit shared_strong_branching_context_t(size_t num_subproblems) : solved(num_subproblems) + { + for (auto& s : solved) + s.store(0); + } +}; + +template +struct shared_strong_branching_context_view_t { + std::span> solved; + + shared_strong_branching_context_view_t() = default; + + shared_strong_branching_context_view_t(std::span> s) : solved(s) {} + + bool is_valid() const { return !solved.empty(); } + + bool is_solved(i_t local_idx) const + { + assert(local_idx >= 0 && static_cast(local_idx) < solved.size() && + "local_idx out of bounds"); + return solved[local_idx].load() != 0; + } + + void mark_solved(i_t local_idx) const + { + assert(local_idx >= 0 && static_cast(local_idx) < solved.size() && + "local_idx out of bounds"); + solved[local_idx].store(1); + } + + shared_strong_branching_context_view_t subview(i_t offset, i_t count) const + { + assert(offset >= 0 && count >= 0 && static_cast(offset + count) <= solved.size() && + "subview out of bounds"); + return {solved.subspan(offset, count)}; + } +}; + +} // namespace cuopt::linear_programming::dual_simplex diff --git a/cpp/src/dual_simplex/crossover.cpp b/cpp/src/dual_simplex/crossover.cpp index f55ee0837d..f83c37b922 100644 --- a/cpp/src/dual_simplex/crossover.cpp +++ b/cpp/src/dual_simplex/crossover.cpp @@ -612,7 +612,7 @@ i_t dual_push(const lp_problem_t& lp, return TIME_LIMIT_RETURN; } if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) { - settings.log.printf("Concurrent halt\n"); + if (!settings.inside_mip) { settings.log.printf("Concurrent halt\n"); } return CONCURRENT_HALT_RETURN; } } @@ -989,7 +989,7 @@ i_t primal_push(const lp_problem_t& lp, return TIME_LIMIT_RETURN; } if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) { - settings.log.printf("Concurrent halt\n"); + if (!settings.inside_mip) { settings.log.printf("Concurrent halt\n"); } return CONCURRENT_HALT_RETURN; } } @@ -1353,7 +1353,7 @@ crossover_status_t crossover(const lp_problem_t& lp, return crossover_status_t::TIME_LIMIT; } if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) { - settings.log.printf("Concurrent halt\n"); + if (!settings.inside_mip) { settings.log.printf("Concurrent halt\n"); } return crossover_status_t::CONCURRENT_LIMIT; } @@ -1415,7 +1415,7 @@ crossover_status_t crossover(const lp_problem_t& lp, return crossover_status_t::TIME_LIMIT; } if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) { - settings.log.printf("Concurrent halt\n"); + if (!settings.inside_mip) { settings.log.printf("Concurrent halt\n"); } return crossover_status_t::CONCURRENT_LIMIT; } primal_infeas = primal_infeasibility(lp, settings, vstatus, solution.x); @@ -1577,7 +1577,7 @@ crossover_status_t crossover(const lp_problem_t& lp, return crossover_status_t::TIME_LIMIT; } if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) { - settings.log.printf("Concurrent halt\n"); + if (!settings.inside_mip) { settings.log.printf("Concurrent halt\n"); } return crossover_status_t::CONCURRENT_LIMIT; } solution.iterations += iter; diff --git a/cpp/src/dual_simplex/phase2.cpp b/cpp/src/dual_simplex/phase2.cpp index a6a1b80b6f..5b1130796e 100644 --- a/cpp/src/dual_simplex/phase2.cpp +++ b/cpp/src/dual_simplex/phase2.cpp @@ -82,6 +82,129 @@ class nvtx_range_guard { bool active_; }; +template +void compute_reduced_cost_update(const lp_problem_t& lp, + const std::vector& basic_list, + const std::vector& nonbasic_list, + const std::vector& delta_y, + i_t leaving_index, + i_t direction, + std::vector& delta_z_mark, + std::vector& delta_z_indices, + std::vector& delta_z, + f_t& work_estimate) +{ + const i_t m = lp.num_rows; + const i_t n = lp.num_cols; + + size_t nnzs_processed = 0; + // delta_zB = sigma*ei + for (i_t k = 0; k < m; k++) { + const i_t j = basic_list[k]; + delta_z[j] = 0; + } + work_estimate += 2 * m; + delta_z[leaving_index] = direction; + // delta_zN = -N'*delta_y + const i_t num_nonbasic = n - m; + for (i_t k = 0; k < num_nonbasic; k++) { + const i_t j = nonbasic_list[k]; + // z_j <- -A(:, j)'*delta_y + const i_t col_start = lp.A.col_start[j]; + const i_t col_end = lp.A.col_start[j + 1]; + f_t dot = 0.0; + for (i_t p = col_start; p < col_end; ++p) { + dot += lp.A.x[p] * delta_y[lp.A.i[p]]; + } + nnzs_processed += col_end - col_start; + + delta_z[j] = -dot; + if (dot != 0.0) { + delta_z_indices.push_back(j); // Note delta_z_indices has n elements reserved + delta_z_mark[j] = 1; + } + } + work_estimate += 3 * num_nonbasic; + work_estimate += 3 * nnzs_processed; + work_estimate += 2 * delta_z_indices.size(); +} + +template +void compute_delta_z(const csc_matrix_t& A_transpose, + const sparse_vector_t& delta_y, + i_t leaving_index, + i_t direction, + const std::vector& nonbasic_mark, + std::vector& delta_z_mark, + std::vector& delta_z_indices, + std::vector& delta_z, + f_t& work_estimate) +{ + // delta_zN = - N'*delta_y + const i_t nz_delta_y = delta_y.i.size(); + size_t nnz_processed = 0; + size_t nonbasic_marked = 0; + for (i_t k = 0; k < nz_delta_y; k++) { + const i_t i = delta_y.i[k]; + const f_t delta_y_i = delta_y.x[k]; + if (std::abs(delta_y_i) < 1e-12) { continue; } + const i_t row_start = A_transpose.col_start[i]; + const i_t row_end = A_transpose.col_start[i + 1]; + nnz_processed += row_end - row_start; + for (i_t p = row_start; p < row_end; ++p) { + const i_t j = A_transpose.i[p]; + if (nonbasic_mark[j] >= 0) { + delta_z[j] -= delta_y_i * A_transpose.x[p]; + nonbasic_marked++; + if (!delta_z_mark[j]) { + delta_z_mark[j] = 1; + delta_z_indices.push_back(j); + } + } + } + } + work_estimate += 4 * nz_delta_y; + work_estimate += 2 * nnz_processed; + work_estimate += 3 * nonbasic_marked; + work_estimate += 2 * delta_z_indices.size(); + + // delta_zB = sigma*ei + delta_z[leaving_index] = direction; + +#ifdef CHECK_CHANGE_IN_REDUCED_COST + const i_t m = A_transpose.n; + const i_t n = A_transpose.m; + std::vector delta_y_dense(m); + delta_y.to_dense(delta_y_dense); + std::vector delta_z_check(n); + std::vector delta_z_mark_check(n, 0); + std::vector delta_z_indices_check; + phase2::compute_reduced_cost_update(lp, + basic_list, + nonbasic_list, + delta_y_dense, + leaving_index, + direction, + delta_z_mark_check, + delta_z_indices_check, + delta_z_check, + work_estimate); + f_t error_check = 0.0; + for (i_t k = 0; k < n; ++k) { + const f_t diff = std::abs(delta_z[k] - delta_z_check[k]); + if (diff > 1e-6) { + printf("delta_z error %d transpose %e no transpose %e diff %e\n", + k, + delta_z[k], + delta_z_check[k], + diff); + } + error_check = std::max(error_check, diff); + } + if (error_check > 1e-6) { printf("delta_z error %e\n", error_check); } +#endif +} + namespace phase2 { // Computes vectors farkas_y, farkas_zl, farkas_zu that satisfy @@ -322,129 +445,6 @@ void initial_perturbation(const lp_problem_t& lp, n); } -template -void compute_reduced_cost_update(const lp_problem_t& lp, - const std::vector& basic_list, - const std::vector& nonbasic_list, - const std::vector& delta_y, - i_t leaving_index, - i_t direction, - std::vector& delta_z_mark, - std::vector& delta_z_indices, - std::vector& delta_z, - f_t& work_estimate) -{ - const i_t m = lp.num_rows; - const i_t n = lp.num_cols; - - size_t nnzs_processed = 0; - // delta_zB = sigma*ei - for (i_t k = 0; k < m; k++) { - const i_t j = basic_list[k]; - delta_z[j] = 0; - } - work_estimate += 2 * m; - delta_z[leaving_index] = direction; - // delta_zN = -N'*delta_y - const i_t num_nonbasic = n - m; - for (i_t k = 0; k < num_nonbasic; k++) { - const i_t j = nonbasic_list[k]; - // z_j <- -A(:, j)'*delta_y - const i_t col_start = lp.A.col_start[j]; - const i_t col_end = lp.A.col_start[j + 1]; - f_t dot = 0.0; - for (i_t p = col_start; p < col_end; ++p) { - dot += lp.A.x[p] * delta_y[lp.A.i[p]]; - } - nnzs_processed += col_end - col_start; - - delta_z[j] = -dot; - if (dot != 0.0) { - delta_z_indices.push_back(j); // Note delta_z_indices has n elements reserved - delta_z_mark[j] = 1; - } - } - work_estimate += 3 * num_nonbasic; - work_estimate += 3 * nnzs_processed; - work_estimate += 2 * delta_z_indices.size(); -} - -template -void compute_delta_z(const csc_matrix_t& A_transpose, - const sparse_vector_t& delta_y, - i_t leaving_index, - i_t direction, - std::vector& nonbasic_mark, - std::vector& delta_z_mark, - std::vector& delta_z_indices, - std::vector& delta_z, - f_t& work_estimate) -{ - // delta_zN = - N'*delta_y - const i_t nz_delta_y = delta_y.i.size(); - size_t nnz_processed = 0; - size_t nonbasic_marked = 0; - for (i_t k = 0; k < nz_delta_y; k++) { - const i_t i = delta_y.i[k]; - const f_t delta_y_i = delta_y.x[k]; - if (std::abs(delta_y_i) < 1e-12) { continue; } - const i_t row_start = A_transpose.col_start[i]; - const i_t row_end = A_transpose.col_start[i + 1]; - nnz_processed += row_end - row_start; - for (i_t p = row_start; p < row_end; ++p) { - const i_t j = A_transpose.i[p]; - if (nonbasic_mark[j] >= 0) { - delta_z[j] -= delta_y_i * A_transpose.x[p]; - nonbasic_marked++; - if (!delta_z_mark[j]) { - delta_z_mark[j] = 1; - delta_z_indices.push_back(j); - } - } - } - } - work_estimate += 4 * nz_delta_y; - work_estimate += 2 * nnz_processed; - work_estimate += 3 * nonbasic_marked; - work_estimate += 2 * delta_z_indices.size(); - - // delta_zB = sigma*ei - delta_z[leaving_index] = direction; - -#ifdef CHECK_CHANGE_IN_REDUCED_COST - const i_t m = A_transpose.n; - const i_t n = A_transpose.m; - std::vector delta_y_dense(m); - delta_y.to_dense(delta_y_dense); - std::vector delta_z_check(n); - std::vector delta_z_mark_check(n, 0); - std::vector delta_z_indices_check; - phase2::compute_reduced_cost_update(lp, - basic_list, - nonbasic_list, - delta_y_dense, - leaving_index, - direction, - delta_z_mark_check, - delta_z_indices_check, - delta_z_check, - work_estimate); - f_t error_check = 0.0; - for (i_t k = 0; k < n; ++k) { - const f_t diff = std::abs(delta_z[k] - delta_z_check[k]); - if (diff > 1e-6) { - printf("delta_z error %d transpose %e no transpose %e diff %e\n", - k, - delta_z[k], - delta_z_check[k], - diff); - } - error_check = std::max(error_check, diff); - } - if (error_check > 1e-6) { printf("delta_z error %e\n", error_check); } -#endif -} - template void compute_reduced_costs(const std::vector& objective, const csc_matrix_t& A, @@ -2939,30 +2939,30 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, PHASE2_NVTX_RANGE("DualSimplex::delta_z"); if (use_transpose) { sparse_delta_z++; - phase2::compute_delta_z(A_transpose, - delta_y_sparse, - leaving_index, - direction, - nonbasic_mark, - delta_z_mark, - delta_z_indices, - delta_z, - phase2_work_estimate); + compute_delta_z(A_transpose, + delta_y_sparse, + leaving_index, + direction, + nonbasic_mark, + delta_z_mark, + delta_z_indices, + delta_z, + phase2_work_estimate); } else { dense_delta_z++; // delta_zB = sigma*ei delta_y_sparse.to_dense(delta_y); phase2_work_estimate += delta_y.size(); - phase2::compute_reduced_cost_update(lp, - basic_list, - nonbasic_list, - delta_y, - leaving_index, - direction, - delta_z_mark, - delta_z_indices, - delta_z, - phase2_work_estimate); + compute_reduced_cost_update(lp, + basic_list, + nonbasic_list, + delta_y, + leaving_index, + direction, + delta_z_mark, + delta_z_indices, + delta_z, + phase2_work_estimate); } } timers.delta_z_time += timers.stop_timer(); @@ -3571,7 +3571,7 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, sum_perturb, now); if (phase == 2 && settings.inside_mip == 1 && settings.dual_simplex_objective_callback) { - settings.dual_simplex_objective_callback(user_obj); + settings.dual_simplex_objective_callback(obj); } } @@ -3641,6 +3641,27 @@ template dual::status_t dual_phase2_with_advanced_basis( int& iter, std::vector& steepest_edge_norms, work_limit_context_t* work_unit_context); + +template void compute_reduced_cost_update(const lp_problem_t& lp, + const std::vector& basic_list, + const std::vector& nonbasic_list, + const std::vector& delta_y, + int leaving_index, + int direction, + std::vector& delta_z_mark, + std::vector& delta_z_indices, + std::vector& delta_z, + double& work_estimate); + +template void compute_delta_z(const csc_matrix_t& A_transpose, + const sparse_vector_t& delta_y, + int leaving_index, + int direction, + const std::vector& nonbasic_mark, + std::vector& delta_z_mark, + std::vector& delta_z_indices, + std::vector& delta_z, + double& work_estimate); #endif } // namespace cuopt::linear_programming::dual_simplex diff --git a/cpp/src/dual_simplex/phase2.hpp b/cpp/src/dual_simplex/phase2.hpp index 7f01eb3cf7..5db797449c 100644 --- a/cpp/src/dual_simplex/phase2.hpp +++ b/cpp/src/dual_simplex/phase2.hpp @@ -81,4 +81,27 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, std::vector& delta_y_steepest_edge, work_limit_context_t* work_unit_context = nullptr); +template +void compute_reduced_cost_update(const lp_problem_t& lp, + const std::vector& basic_list, + const std::vector& nonbasic_list, + const std::vector& delta_y, + i_t leaving_index, + i_t direction, + std::vector& delta_z_mark, + std::vector& delta_z_indices, + std::vector& delta_z, + f_t& work_estimate); + +template +void compute_delta_z(const csc_matrix_t& A_transpose, + const sparse_vector_t& delta_y, + i_t leaving_index, + i_t direction, + const std::vector& nonbasic_mark, + std::vector& delta_z_mark, + std::vector& delta_z_indices, + std::vector& delta_z, + f_t& work_estimate); + } // namespace cuopt::linear_programming::dual_simplex diff --git a/cpp/src/dual_simplex/presolve.cpp b/cpp/src/dual_simplex/presolve.cpp index d2a68d96de..c5ef847106 100644 --- a/cpp/src/dual_simplex/presolve.cpp +++ b/cpp/src/dual_simplex/presolve.cpp @@ -821,6 +821,168 @@ i_t presolve(const lp_problem_t& original, { problem = original; std::vector row_sense(problem.num_rows, '='); + // Check for free variables + i_t free_variables = 0; + for (i_t j = 0; j < problem.num_cols; j++) { + if (problem.lower[j] == -inf && problem.upper[j] == inf) { free_variables++; } + } + + if (settings.barrier_presolve && free_variables > 0) { + // Try to remove free variables + std::vector constraints_to_check; + std::vector current_free_variables; + std::vector row_marked(problem.num_rows, 0); + current_free_variables.reserve(problem.num_cols); + constraints_to_check.reserve(problem.num_rows); + for (i_t j = 0; j < problem.num_cols; j++) { + if (problem.lower[j] == -inf && problem.upper[j] == inf) { + current_free_variables.push_back(j); + const i_t col_start = problem.A.col_start[j]; + const i_t col_end = problem.A.col_start[j + 1]; + for (i_t p = col_start; p < col_end; p++) { + const i_t i = problem.A.i[p]; + if (row_marked[i] == 0) { + row_marked[i] = 1; + constraints_to_check.push_back(i); + } + } + } + } + + i_t removed_free_variables = 0; + + if (constraints_to_check.size() > 0) { + // Check if the constraints are feasible + csr_matrix_t Arow(0, 0, 0); + problem.A.to_compressed_row(Arow); + + // The constraints are in the form: + // sum_j a_j x_j = beta + for (i_t i : constraints_to_check) { + const i_t row_start = Arow.row_start[i]; + const i_t row_end = Arow.row_start[i + 1]; + f_t lower_activity_i = 0.0; + f_t upper_activity_i = 0.0; + i_t lower_inf_i = 0; + i_t upper_inf_i = 0; + i_t last_free_i = -1; + f_t last_free_coeff_i = 0.0; + for (i_t p = row_start; p < row_end; p++) { + const i_t j = Arow.j[p]; + const f_t aij = Arow.x[p]; + const f_t lower_j = problem.lower[j]; + const f_t upper_j = problem.upper[j]; + if (lower_j == -inf && upper_j == inf) { + last_free_i = j; + last_free_coeff_i = aij; + } + if (aij > 0) { + if (lower_j > -inf) { + lower_activity_i += aij * lower_j; + } else { + lower_inf_i++; + } + if (upper_j < inf) { + upper_activity_i += aij * upper_j; + } else { + upper_inf_i++; + } + } else { + if (upper_j < inf) { + lower_activity_i += aij * upper_j; + } else { + lower_inf_i++; + } + if (lower_j > -inf) { + upper_activity_i += aij * lower_j; + } else { + upper_inf_i++; + } + } + } + + if (last_free_i == -1) { continue; } + + // sum_j a_ij x_j == beta + + const f_t rhs = problem.rhs[i]; + // sum_{k != j} a_ik x_k + a_ij x_j == rhs + // Suppose that -inf < x_j < inf and all other variables x_k with k != j are bounded + // a_ij x_j == rhs - sum_{k != j} a_ik x_k + // So if a_ij > 0, we have + // x_j == 1/a_ij * (rhs - sum_{k != j} a_ik x_k) + // We can derive two bounds from this: + // x_j <= 1/a_ij * (rhs - lower_activity_i) and + // x_j >= 1/a_ij * (rhs - upper_activity_i) + + // If a_ij < 0, we have + // x_j == 1/a_ij * (rhs - sum_{k != j} a_ik x_k + // And we can derive two bounds from this: + // x_j >= 1/a_ij * (rhs - lower_activity_i) + // x_j <= 1/a_ij * (rhs - upper_activity_i) + const i_t j = last_free_i; + const f_t a_ij = last_free_coeff_i; + const f_t max_bound = 1e10; + bool bounded = false; + if (a_ij > 0) { + if (lower_inf_i == 1) { + const f_t new_upper = 1.0 / a_ij * (rhs - lower_activity_i); + if (new_upper < max_bound) { + problem.upper[j] = new_upper; + bounded = true; + } + } + if (upper_inf_i == 1) { + const f_t new_lower = 1.0 / a_ij * (rhs - upper_activity_i); + if (new_lower > -max_bound) { + problem.lower[j] = new_lower; + bounded = true; + } + } + } else if (a_ij < 0) { + if (lower_inf_i == 1) { + const f_t new_lower = 1.0 / a_ij * (rhs - lower_activity_i); + if (new_lower > -max_bound) { + problem.lower[j] = new_lower; + bounded = true; + } + } + if (upper_inf_i == 1) { + const f_t new_upper = 1.0 / a_ij * (rhs - upper_activity_i); + if (new_upper < max_bound) { + problem.upper[j] = new_upper; + bounded = true; + } + } + } + + if (bounded) { removed_free_variables++; } + } + } + + for (i_t j : current_free_variables) { + if (problem.lower[j] > -inf && problem.upper[j] < inf) { + // We don't need two bounds. Pick the smallest one. + if (std::abs(problem.lower[j]) < std::abs(problem.upper[j])) { + // Restore the inf in the upper bound. Barrier will not require an additional w variable + problem.upper[j] = inf; + } else { + // Restores the -inf in the lower bound. Barrier will require an additional w variable + problem.lower[j] = -inf; + } + } + } + + i_t new_free_variables = 0; + for (i_t j = 0; j < problem.num_cols; j++) { + if (problem.lower[j] == -inf && problem.upper[j] == inf) { new_free_variables++; } + } + if (removed_free_variables != 0) { + settings.log.printf("Bounded %d free variables\n", removed_free_variables); + } + assert(new_free_variables == free_variables - removed_free_variables); + free_variables = new_free_variables; + } // The original problem may have a variable without a lower bound // but a finite upper bound @@ -834,7 +996,43 @@ i_t presolve(const lp_problem_t& original, settings.log.printf("%d variables with no lower bound\n", no_lower_bound); } - // FIXME:: handle no lower bound case for barrier presolve + // Handle -inf < x_j <= u_j by substituting x'_j = -x_j, giving -u_j <= x'_j < inf + if (settings.barrier_presolve && no_lower_bound > 0) { + presolve_info.negated_variables.reserve(no_lower_bound); + for (i_t j = 0; j < problem.num_cols; j++) { + if (problem.lower[j] == -inf && problem.upper[j] < inf) { + presolve_info.negated_variables.push_back(j); + + problem.lower[j] = -problem.upper[j]; + problem.upper[j] = inf; + problem.objective[j] *= -1; + + const i_t col_start = problem.A.col_start[j]; + const i_t col_end = problem.A.col_start[j + 1]; + for (i_t p = col_start; p < col_end; p++) { + problem.A.x[p] *= -1.0; + } + } + } + + // (1/2) x^T Q x with x = D x' (D_ii = -1 for negated columns) is (1/2) x'^T D Q D x'. + // One pass: Q'_{ik} = D_{ii} D_{kk} Q_{ik} — flip iff exactly one of {i,k} is negated. + if (problem.Q.n > 0 && !presolve_info.negated_variables.empty()) { + std::vector is_negated(static_cast(problem.num_cols), false); + for (i_t const j : presolve_info.negated_variables) { + is_negated[static_cast(j)] = true; + } + for (i_t row = 0; row < problem.Q.m; ++row) { + const i_t q_start = problem.Q.row_start[row]; + const i_t q_end = problem.Q.row_start[row + 1]; + const bool is_negated_row = is_negated[static_cast(row)]; + for (i_t p = q_start; p < q_end; ++p) { + const i_t col = problem.Q.j[p]; + if (is_negated_row != is_negated[static_cast(col)]) { problem.Q.x[p] *= -1.0; } + } + } + } + } // The original problem may have nonzero lower bounds // 0 != l_j <= x_j <= u_j @@ -939,12 +1137,6 @@ i_t presolve(const lp_problem_t& original, remove_empty_cols(problem, num_empty_cols, presolve_info); } - // Check for free variables - i_t free_variables = 0; - for (i_t j = 0; j < problem.num_cols; j++) { - if (problem.lower[j] == -inf && problem.upper[j] == inf) { free_variables++; } - } - problem.Q.check_matrix("Before free variable expansion"); if (settings.barrier_presolve && free_variables > 0) { @@ -1511,6 +1703,14 @@ void uncrush_solution(const presolve_info_t& presolve_info, } settings.log.printf("Post-solve: Handling removed lower bounds %d\n", num_lower_bounds); } + + if (presolve_info.negated_variables.size() > 0) { + for (const i_t j : presolve_info.negated_variables) { + input_x[j] *= -1.0; + input_z[j] *= -1.0; + } + } + assert(uncrushed_x.size() == input_x.size()); assert(uncrushed_y.size() == input_y.size()); assert(uncrushed_z.size() == input_z.size()); diff --git a/cpp/src/dual_simplex/presolve.hpp b/cpp/src/dual_simplex/presolve.hpp index a068ed04ab..d570ea933e 100644 --- a/cpp/src/dual_simplex/presolve.hpp +++ b/cpp/src/dual_simplex/presolve.hpp @@ -202,6 +202,9 @@ struct presolve_info_t { std::vector removed_constraints; folding_info_t folding_info; + + // Variables that were negated to handle -inf < x_j <= u_j + std::vector negated_variables; }; template diff --git a/cpp/src/dual_simplex/right_looking_lu.cpp b/cpp/src/dual_simplex/right_looking_lu.cpp index 37202000f8..5cb0185c8c 100644 --- a/cpp/src/dual_simplex/right_looking_lu.cpp +++ b/cpp/src/dual_simplex/right_looking_lu.cpp @@ -1258,7 +1258,7 @@ i_t right_looking_lu_row_permutation_only(const csc_matrix_t& A, } if (toc(start_time) > settings.time_limit) { return TIME_LIMIT_RETURN; } if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) { - settings.log.printf("Concurrent halt\n"); + if (!settings.inside_mip) { settings.log.printf("Concurrent halt\n"); } return CONCURRENT_HALT_RETURN; } } diff --git a/cpp/src/dual_simplex/simplex_solver_settings.hpp b/cpp/src/dual_simplex/simplex_solver_settings.hpp index 1ba65aad06..cfc120e477 100644 --- a/cpp/src/dual_simplex/simplex_solver_settings.hpp +++ b/cpp/src/dual_simplex/simplex_solver_settings.hpp @@ -106,6 +106,9 @@ struct simplex_solver_settings_t { reduced_cost_strengthening(-1), cut_change_threshold(1e-3), cut_min_orthogonality(0.5), + mip_batch_pdlp_strong_branching(0), + mip_batch_pdlp_reliability_branching(0), + strong_branching_simplex_iteration_limit(-1), random_seed(0), reliability_branching(-1), inside_mip(0), @@ -189,8 +192,16 @@ struct simplex_solver_settings_t { // strengthening f_t cut_change_threshold; // threshold for cut change f_t cut_min_orthogonality; // minimum orthogonality for cuts - i_t mip_batch_pdlp_strong_branching{0}; // 0 if not using batch PDLP for strong branching, 1 if - // using batch PDLP for strong branching + i_t + mip_batch_pdlp_strong_branching; // 0 = DS only, 1 = cooperative DS + PDLP, 2 = batch PDLP only + i_t mip_batch_pdlp_reliability_branching; // 0 = DS only, 1 = cooperative DS + PDLP, 2 = batch + // PDLP only + // Set the maximum number of simplex iterations allowed per trial branch when applying + // strong branching to the root node. + // -1 - Automatic (iteration limit = 200) + // 0, 1 - Estimate the objective change using a single pivot of dual simplex + // >1 - Set as the iteration limit in dual simplex + i_t strong_branching_simplex_iteration_limit; diving_heuristics_settings_t diving_settings; // Settings for the diving heuristics diff --git a/cpp/src/dual_simplex/user_problem.hpp b/cpp/src/dual_simplex/user_problem.hpp index f50a6d33a5..73c4c391be 100644 --- a/cpp/src/dual_simplex/user_problem.hpp +++ b/cpp/src/dual_simplex/user_problem.hpp @@ -46,7 +46,7 @@ struct user_problem_t { std::vector row_names; std::vector col_names; f_t obj_constant; - f_t obj_scale; // 1.0 for min, -1.0 for max + f_t obj_scale; // positive for min, netagive for max bool objective_is_integral{false}; std::vector var_types; std::vector Q_offsets; diff --git a/cpp/src/grpc/cuopt_remote.proto b/cpp/src/grpc/cuopt_remote.proto index d2617e0ef8..cc7af2a1f7 100644 --- a/cpp/src/grpc/cuopt_remote.proto +++ b/cpp/src/grpc/cuopt_remote.proto @@ -162,7 +162,7 @@ message MIPSolverSettings { int32 num_cpu_threads = 12; int32 num_gpus = 13; int32 presolver = 14; - bool mip_scaling = 15; + int32 mip_scaling = 15; } // LP solve request @@ -202,7 +202,7 @@ message LPSolution { double gap = 24; int32 nb_iterations = 25; double solve_time = 26; - bool solved_by_pdlp = 27; + int32 solved_by = 27; } enum PDLPTerminationStatus { diff --git a/cpp/src/grpc/cuopt_remote_service.proto b/cpp/src/grpc/cuopt_remote_service.proto index 86777baba6..24c7517781 100644 --- a/cpp/src/grpc/cuopt_remote_service.proto +++ b/cpp/src/grpc/cuopt_remote_service.proto @@ -205,7 +205,7 @@ message ChunkedResultHeader { double gap = 16; int32 nb_iterations = 17; double solve_time = 18; - bool solved_by_pdlp = 19; + int32 solved_by = 19; // MIP result scalars MIPTerminationStatus mip_termination_status = 30; diff --git a/cpp/src/grpc/grpc_settings_mapper.cpp b/cpp/src/grpc/grpc_settings_mapper.cpp index 8885b2e358..0c52d766b0 100644 --- a/cpp/src/grpc/grpc_settings_mapper.cpp +++ b/cpp/src/grpc/grpc_settings_mapper.cpp @@ -230,7 +230,12 @@ void map_proto_to_mip_settings(const cuopt::remote::MIPSolverSettings& pb_settin ? static_cast(pv) : presolver_t::Default; } - settings.mip_scaling = pb_settings.mip_scaling(); + { + auto sv = pb_settings.mip_scaling(); + settings.mip_scaling = (sv >= CUOPT_MIP_SCALING_OFF && sv <= CUOPT_MIP_SCALING_NO_OBJECTIVE) + ? sv + : CUOPT_MIP_SCALING_ON; + } } // Explicit template instantiations diff --git a/cpp/src/grpc/grpc_solution_mapper.cpp b/cpp/src/grpc/grpc_solution_mapper.cpp index 700fd12c98..096b466804 100644 --- a/cpp/src/grpc/grpc_solution_mapper.cpp +++ b/cpp/src/grpc/grpc_solution_mapper.cpp @@ -110,7 +110,7 @@ void map_lp_solution_to_proto(const cpu_lp_solution_t& solution, pb_solution->set_gap(solution.get_gap()); pb_solution->set_nb_iterations(solution.get_num_iterations()); pb_solution->set_solve_time(solution.get_solve_time()); - pb_solution->set_solved_by_pdlp(solution.is_solved_by_pdlp()); + pb_solution->set_solved_by(static_cast(solution.solved_by())); if (solution.has_warm_start_data()) { auto* pb_ws = pb_solution->mutable_warm_start_data(); @@ -157,15 +157,15 @@ cpu_lp_solution_t map_proto_to_lp_solution(const cuopt::remote::LPSolu std::vector reduced_cost(pb_solution.reduced_cost().begin(), pb_solution.reduced_cost().end()); - auto status = from_proto_pdlp_status(pb_solution.termination_status()); - auto obj = static_cast(pb_solution.primal_objective()); - auto dual_obj = static_cast(pb_solution.dual_objective()); - auto solve_t = pb_solution.solve_time(); - auto l2_pr = static_cast(pb_solution.l2_primal_residual()); - auto l2_dr = static_cast(pb_solution.l2_dual_residual()); - auto g = static_cast(pb_solution.gap()); - auto iters = static_cast(pb_solution.nb_iterations()); - auto by_pdlp = pb_solution.solved_by_pdlp(); + auto status = from_proto_pdlp_status(pb_solution.termination_status()); + auto obj = static_cast(pb_solution.primal_objective()); + auto dual_obj = static_cast(pb_solution.dual_objective()); + auto solve_t = pb_solution.solve_time(); + auto l2_pr = static_cast(pb_solution.l2_primal_residual()); + auto l2_dr = static_cast(pb_solution.l2_dual_residual()); + auto g = static_cast(pb_solution.gap()); + auto iters = static_cast(pb_solution.nb_iterations()); + auto solved_by = static_cast(pb_solution.solved_by()); if (pb_solution.has_warm_start_data()) { const auto& pb_ws = pb_solution.warm_start_data(); @@ -211,7 +211,7 @@ cpu_lp_solution_t map_proto_to_lp_solution(const cuopt::remote::LPSolu l2_dr, g, iters, - by_pdlp, + solved_by, std::move(ws)); } @@ -226,7 +226,7 @@ cpu_lp_solution_t map_proto_to_lp_solution(const cuopt::remote::LPSolu l2_dr, g, iters, - by_pdlp); + solved_by); } template @@ -354,7 +354,7 @@ void populate_chunked_result_header_lp(const cpu_lp_solution_t& soluti header->set_gap(solution.get_gap()); header->set_nb_iterations(solution.get_num_iterations()); header->set_solve_time(solution.get_solve_time()); - header->set_solved_by_pdlp(solution.is_solved_by_pdlp()); + header->set_solved_by(static_cast(solution.solved_by())); const auto& primal = solution.get_primal_solution_host(); const auto& dual = solution.get_dual_solution_host(); @@ -551,15 +551,15 @@ cpu_lp_solution_t chunked_result_to_lp_solution( auto dual = bytes_to_typed(arrays, cuopt::remote::RESULT_DUAL_SOLUTION); auto reduced_cost = bytes_to_typed(arrays, cuopt::remote::RESULT_REDUCED_COST); - auto status = from_proto_pdlp_status(h.lp_termination_status()); - auto obj = static_cast(h.primal_objective()); - auto dual_obj = static_cast(h.dual_objective()); - auto solve_t = h.solve_time(); - auto l2_pr = static_cast(h.l2_primal_residual()); - auto l2_dr = static_cast(h.l2_dual_residual()); - auto g = static_cast(h.gap()); - auto iters = static_cast(h.nb_iterations()); - auto by_pdlp = h.solved_by_pdlp(); + auto status = from_proto_pdlp_status(h.lp_termination_status()); + auto obj = static_cast(h.primal_objective()); + auto dual_obj = static_cast(h.dual_objective()); + auto solve_t = h.solve_time(); + auto l2_pr = static_cast(h.l2_primal_residual()); + auto l2_dr = static_cast(h.l2_dual_residual()); + auto g = static_cast(h.gap()); + auto iters = static_cast(h.nb_iterations()); + auto solved_by = static_cast(h.solved_by()); auto ws_primal = bytes_to_typed(arrays, cuopt::remote::RESULT_WS_CURRENT_PRIMAL); if (!ws_primal.empty()) { @@ -598,7 +598,7 @@ cpu_lp_solution_t chunked_result_to_lp_solution( l2_dr, g, iters, - by_pdlp, + solved_by, std::move(ws)); } @@ -613,7 +613,7 @@ cpu_lp_solution_t chunked_result_to_lp_solution( l2_dr, g, iters, - by_pdlp); + solved_by); } template diff --git a/cpp/src/grpc/server/grpc_worker.cpp b/cpp/src/grpc/server/grpc_worker.cpp index 943212b80d..99b0e35b5e 100644 --- a/cpp/src/grpc/server/grpc_worker.cpp +++ b/cpp/src/grpc/server/grpc_worker.cpp @@ -378,7 +378,7 @@ static SolveResult run_lp_solve(DeserializedJob& dj, term_info.l2_dual_residual, term_info.gap, term_info.number_of_steps_taken, - term_info.solved_by_pdlp, + term_info.solved_by, std::move(cpu_ws)); populate_chunked_result_header_lp(cpu_solution, &sr.header); diff --git a/cpp/src/math_optimization/solver_settings.cu b/cpp/src/math_optimization/solver_settings.cu index 183c964a90..c23b1d27ca 100644 --- a/cpp/src/math_optimization/solver_settings.cu +++ b/cpp/src/math_optimization/solver_settings.cu @@ -5,10 +5,17 @@ */ /* clang-format on */ +#include #include #include #include +#include +#include +#include +#include +#include + namespace cuopt::linear_programming { namespace { @@ -16,9 +23,10 @@ namespace { bool string_to_int(const std::string& value, int& result) { try { - result = std::stoi(value); - return true; - } catch (const std::invalid_argument& e) { + size_t pos = 0; + result = std::stoi(value, &pos); + return pos == value.size(); + } catch (const std::exception&) { return false; } } @@ -27,14 +35,32 @@ template bool string_to_float(const std::string& value, f_t& result) { try { - if constexpr (std::is_same_v) { result = std::stof(value); } - if constexpr (std::is_same_v) { result = std::stod(value); } - return true; - } catch (const std::invalid_argument& e) { + size_t pos = 0; + if constexpr (std::is_same_v) { result = std::stof(value, &pos); } + if constexpr (std::is_same_v) { result = std::stod(value, &pos); } + if (std::isnan(result)) { return false; } + return pos == value.size(); + } catch (const std::exception&) { return false; } } +std::string quote_if_needed(const std::string& s) +{ + bool needs_quoting = s.empty() || s.find(' ') != std::string::npos || + s.find('"') != std::string::npos || s.find('\t') != std::string::npos; + if (!needs_quoting) return s; + std::string out = "\""; + for (char c : s) { + if (c == '"') + out += "\\\""; + else + out += c; + } + out += '"'; + return out; +} + bool string_to_bool(const std::string& value, bool& result) { if (value == "true" || value == "True" || value == "TRUE" || value == "1" || value == "t" || @@ -75,7 +101,18 @@ solver_settings_t::solver_settings_t() : pdlp_settings(), mip_settings {CUOPT_PRIMAL_INFEASIBLE_TOLERANCE, &pdlp_settings.tolerances.primal_infeasible_tolerance, f_t(0.0), f_t(1e-1), std::max(f_t(1e-10), std::numeric_limits::epsilon())}, {CUOPT_DUAL_INFEASIBLE_TOLERANCE, &pdlp_settings.tolerances.dual_infeasible_tolerance, f_t(0.0), f_t(1e-1), std::max(f_t(1e-10), std::numeric_limits::epsilon())}, {CUOPT_MIP_CUT_CHANGE_THRESHOLD, &mip_settings.cut_change_threshold, f_t(-1.0), std::numeric_limits::infinity(), f_t(-1.0)}, - {CUOPT_MIP_CUT_MIN_ORTHOGONALITY, &mip_settings.cut_min_orthogonality, f_t(0.0), f_t(1.0), f_t(0.5)} + {CUOPT_MIP_CUT_MIN_ORTHOGONALITY, &mip_settings.cut_min_orthogonality, f_t(0.0), f_t(1.0), f_t(0.5)}, + // MIP heuristic hyper-parameters (hidden from default --help: name contains "hyper_") + {CUOPT_MIP_HYPER_HEURISTIC_PRESOLVE_TIME_RATIO, &mip_settings.heuristic_params.presolve_time_ratio, f_t(0.0), f_t(1.0), f_t(0.1), "fraction of total time for presolve"}, + {CUOPT_MIP_HYPER_HEURISTIC_PRESOLVE_MAX_TIME, &mip_settings.heuristic_params.presolve_max_time, f_t(0.0), std::numeric_limits::infinity(), f_t(60.0), "hard cap on presolve seconds"}, + {CUOPT_MIP_HYPER_HEURISTIC_ROOT_LP_TIME_RATIO, &mip_settings.heuristic_params.root_lp_time_ratio, f_t(0.0), f_t(1.0), f_t(0.1), "fraction of total time for root LP"}, + {CUOPT_MIP_HYPER_HEURISTIC_ROOT_LP_MAX_TIME, &mip_settings.heuristic_params.root_lp_max_time, f_t(0.0), std::numeric_limits::infinity(), f_t(15.0), "hard cap on root LP seconds"}, + {CUOPT_MIP_HYPER_HEURISTIC_RINS_TIME_LIMIT, &mip_settings.heuristic_params.rins_time_limit, f_t(0.0), std::numeric_limits::infinity(), f_t(3.0), "per-call RINS sub-MIP time"}, + {CUOPT_MIP_HYPER_HEURISTIC_RINS_MAX_TIME_LIMIT, &mip_settings.heuristic_params.rins_max_time_limit, f_t(0.0), std::numeric_limits::infinity(), f_t(20.0), "ceiling for RINS adaptive time budget"}, + {CUOPT_MIP_HYPER_HEURISTIC_RINS_FIX_RATE, &mip_settings.heuristic_params.rins_fix_rate, f_t(0.0), f_t(1.0), f_t(0.5), "RINS variable fix rate"}, + {CUOPT_MIP_HYPER_HEURISTIC_INITIAL_INFEASIBILITY_WEIGHT, &mip_settings.heuristic_params.initial_infeasibility_weight, f_t(1e-9), std::numeric_limits::infinity(), f_t(1000.0), "constraint violation penalty seed"}, + {CUOPT_MIP_HYPER_HEURISTIC_RELAXED_LP_TIME_LIMIT, &mip_settings.heuristic_params.relaxed_lp_time_limit, f_t(1e-9), std::numeric_limits::infinity(), f_t(1.0), "base relaxed LP time cap in heuristics"}, + {CUOPT_MIP_HYPER_HEURISTIC_RELATED_VARS_TIME_LIMIT, &mip_settings.heuristic_params.related_vars_time_limit, f_t(1e-9), std::numeric_limits::infinity(), f_t(30.0), "time for related-variable structure build"}, }; // Int parameters @@ -100,13 +137,24 @@ solver_settings_t::solver_settings_t() : pdlp_settings(), mip_settings {CUOPT_MIP_REDUCED_COST_STRENGTHENING, &mip_settings.reduced_cost_strengthening, -1, std::numeric_limits::max(), -1}, {CUOPT_NUM_GPUS, &pdlp_settings.num_gpus, 1, 2, 1}, {CUOPT_NUM_GPUS, &mip_settings.num_gpus, 1, 2, 1}, - {CUOPT_MIP_BATCH_PDLP_STRONG_BRANCHING, &mip_settings.mip_batch_pdlp_strong_branching, 0, 1, 0}, + {CUOPT_MIP_BATCH_PDLP_STRONG_BRANCHING, &mip_settings.mip_batch_pdlp_strong_branching, 0, 2, 0}, + {CUOPT_MIP_BATCH_PDLP_RELIABILITY_BRANCHING, &mip_settings.mip_batch_pdlp_reliability_branching, 0, 2, 0}, + {CUOPT_MIP_STRONG_BRANCHING_SIMPLEX_ITERATION_LIMIT, &mip_settings.strong_branching_simplex_iteration_limit, -1,std::numeric_limits::max(), -1}, {CUOPT_PRESOLVE, reinterpret_cast(&pdlp_settings.presolver), CUOPT_PRESOLVE_DEFAULT, CUOPT_PRESOLVE_PSLP, CUOPT_PRESOLVE_DEFAULT}, {CUOPT_PRESOLVE, reinterpret_cast(&mip_settings.presolver), CUOPT_PRESOLVE_DEFAULT, CUOPT_PRESOLVE_PSLP, CUOPT_PRESOLVE_DEFAULT}, {CUOPT_MIP_DETERMINISM_MODE, &mip_settings.determinism_mode, CUOPT_MODE_OPPORTUNISTIC, CUOPT_MODE_DETERMINISTIC, CUOPT_MODE_OPPORTUNISTIC}, {CUOPT_RANDOM_SEED, &mip_settings.seed, -1, std::numeric_limits::max(), -1}, {CUOPT_MIP_RELIABILITY_BRANCHING, &mip_settings.reliability_branching, -1, std::numeric_limits::max(), -1}, - {CUOPT_PDLP_PRECISION, reinterpret_cast(&pdlp_settings.pdlp_precision), CUOPT_PDLP_DEFAULT_PRECISION, CUOPT_PDLP_MIXED_PRECISION, CUOPT_PDLP_DEFAULT_PRECISION} + {CUOPT_PDLP_PRECISION, reinterpret_cast(&pdlp_settings.pdlp_precision), CUOPT_PDLP_DEFAULT_PRECISION, CUOPT_PDLP_MIXED_PRECISION, CUOPT_PDLP_DEFAULT_PRECISION}, + {CUOPT_MIP_SCALING, &mip_settings.mip_scaling, CUOPT_MIP_SCALING_OFF, CUOPT_MIP_SCALING_NO_OBJECTIVE, CUOPT_MIP_SCALING_ON}, + // MIP heuristic hyper-parameters (hidden from default --help: name contains "hyper_") + {CUOPT_MIP_HYPER_HEURISTIC_POPULATION_SIZE, &mip_settings.heuristic_params.population_size, 1, std::numeric_limits::max(), 32, "max solutions in pool"}, + {CUOPT_MIP_HYPER_HEURISTIC_NUM_CPUFJ_THREADS, &mip_settings.heuristic_params.num_cpufj_threads, 0, std::numeric_limits::max(), 8, "parallel CPU FJ climbers"}, + {CUOPT_MIP_HYPER_HEURISTIC_STAGNATION_TRIGGER, &mip_settings.heuristic_params.stagnation_trigger, 1, std::numeric_limits::max(), 3, "FP loops w/o improvement before recombination"}, + {CUOPT_MIP_HYPER_HEURISTIC_MAX_ITERS_WITHOUT_IMPROVEMENT, &mip_settings.heuristic_params.max_iterations_without_improvement, 1, std::numeric_limits::max(), 8, "diversity step depth after stagnation"}, + {CUOPT_MIP_HYPER_HEURISTIC_N_OF_MINIMUMS_FOR_EXIT, &mip_settings.heuristic_params.n_of_minimums_for_exit, 1, std::numeric_limits::max(), 7000, "FJ baseline local-minima exit threshold"}, + {CUOPT_MIP_HYPER_HEURISTIC_ENABLED_RECOMBINERS, &mip_settings.heuristic_params.enabled_recombiners, 0, 15, 15, "bitmask: 1=BP 2=FP 4=LS 8=SubMIP"}, + {CUOPT_MIP_HYPER_HEURISTIC_CYCLE_DETECTION_LENGTH, &mip_settings.heuristic_params.cycle_detection_length, 1, std::numeric_limits::max(), 30, "FP assignment cycle ring buffer length"}, }; // Bool parameters @@ -116,7 +164,6 @@ solver_settings_t::solver_settings_t() : pdlp_settings(), mip_settings {CUOPT_PER_CONSTRAINT_RESIDUAL, &pdlp_settings.per_constraint_residual, false}, {CUOPT_SAVE_BEST_PRIMAL_SO_FAR, &pdlp_settings.save_best_primal_so_far, false}, {CUOPT_FIRST_PRIMAL_FEASIBLE, &pdlp_settings.first_primal_feasible, false}, - {CUOPT_MIP_SCALING, &mip_settings.mip_scaling, true}, {CUOPT_MIP_HEURISTICS_ONLY, &mip_settings.heuristics_only, false}, {CUOPT_LOG_TO_CONSOLE, &pdlp_settings.log_to_console, true}, {CUOPT_LOG_TO_CONSOLE, &mip_settings.log_to_console, true}, @@ -476,6 +523,111 @@ const std::vector solver_settings_t::get_parameter_names( return parameter_names; } +template +void solver_settings_t::load_parameters_from_file(const std::string& path) +{ + cuopt_expects(!std::filesystem::is_directory(path) && std::filesystem::exists(path), + error_type_t::ValidationError, + "Parameter config: not a valid file: %s", + path.c_str()); + std::ifstream file(path); + cuopt_expects(file.is_open(), + error_type_t::ValidationError, + "Parameter config: cannot open: %s", + path.c_str()); + std::string line; + while (std::getline(file, line)) { + auto first_non_ws = std::find_if_not(line.begin(), line.end(), ::isspace); + if (first_non_ws == line.end() || *first_non_ws == '#') continue; + line.erase(line.begin(), first_non_ws); + + std::istringstream iss(line); + std::string key; + cuopt_expects(iss >> key >> std::ws && iss.get() == '=', + error_type_t::ValidationError, + "Parameter config: bad line: %s", + line.c_str()); + iss >> std::ws; + cuopt_expects(!iss.eof(), + error_type_t::ValidationError, + "Parameter config: missing value: %s", + line.c_str()); + std::string val; + if (iss.peek() == '"') { + iss.get(); + val.clear(); + char ch; + bool closed = false; + while (iss.get(ch)) { + if (ch == '\\' && iss.peek() == '"') { + iss.get(ch); + val += '"'; + } else if (ch == '"') { + closed = true; + break; + } else { + val += ch; + } + } + cuopt_expects(closed, + error_type_t::ValidationError, + "Parameter config: unterminated quote: %s", + line.c_str()); + } else { + iss >> val; + } + std::string trailing; + cuopt_expects(!bool(iss >> trailing), + error_type_t::ValidationError, + "Parameter config: trailing junk: %s", + line.c_str()); + try { + set_parameter_from_string(key, val); + } catch (const std::invalid_argument& e) { + cuopt_expects(false, error_type_t::ValidationError, "Parameter config: %s", e.what()); + } + } + CUOPT_LOG_INFO("Parameters loaded from: %s", path.c_str()); +} + +template +bool solver_settings_t::dump_parameters_to_file(const std::string& path, + bool hyperparameters_only) const +{ + std::ofstream file(path); + if (!file.is_open()) { + CUOPT_LOG_ERROR("Cannot open file for writing: %s", path.c_str()); + return false; + } + file << "# cuOpt parameter configuration (auto-generated)\n"; + file << "# Uncomment and change the values you wish to override.\n\n"; + for (const auto& p : int_parameters) { + if (hyperparameters_only && p.param_name.find("hyper_") == std::string::npos) continue; + if (p.description && p.description[0] != '\0') + file << "# " << p.description << " (int, range: [" << p.min_value << ", " << p.max_value + << "])\n"; + file << "# " << p.param_name << " = " << *p.value_ptr << "\n\n"; + } + for (const auto& p : float_parameters) { + if (hyperparameters_only && p.param_name.find("hyper_") == std::string::npos) continue; + if (p.description && p.description[0] != '\0') + file << "# " << p.description << " (double, range: [" << p.min_value << ", " << p.max_value + << "])\n"; + file << "# " << p.param_name << " = " << *p.value_ptr << "\n\n"; + } + for (const auto& p : bool_parameters) { + if (hyperparameters_only && p.param_name.find("hyper_") == std::string::npos) continue; + if (p.description && p.description[0] != '\0') file << "# " << p.description << " (bool)\n"; + file << "# " << p.param_name << " = " << (*p.value_ptr ? "true" : "false") << "\n\n"; + } + for (const auto& p : string_parameters) { + if (hyperparameters_only && p.param_name.find("hyper_") == std::string::npos) continue; + if (p.description && p.description[0] != '\0') file << "# " << p.description << " (string)\n"; + file << "# " << p.param_name << " = " << quote_if_needed(*p.value_ptr) << "\n\n"; + } + return true; +} + #if MIP_INSTANTIATE_FLOAT template class solver_settings_t; template void solver_settings_t::set_parameter(const std::string& name, int value); diff --git a/cpp/src/mip_heuristics/CMakeLists.txt b/cpp/src/mip_heuristics/CMakeLists.txt index 2d9d2f1b8e..13649682a6 100644 --- a/cpp/src/mip_heuristics/CMakeLists.txt +++ b/cpp/src/mip_heuristics/CMakeLists.txt @@ -14,10 +14,12 @@ set(MIP_LP_NECESSARY_FILES ${CMAKE_CURRENT_SOURCE_DIR}/presolve/third_party_presolve.cpp ${CMAKE_CURRENT_SOURCE_DIR}/presolve/gf2_presolve.cpp ${CMAKE_CURRENT_SOURCE_DIR}/solution/solution.cu + ${CMAKE_CURRENT_SOURCE_DIR}/presolve/conflict_graph/clique_table.cu ) # Files that are MIP-specific and not needed for pure LP set(MIP_NON_LP_FILES + ${CMAKE_CURRENT_SOURCE_DIR}/mip_scaling_strategy.cu ${CMAKE_CURRENT_SOURCE_DIR}/solve.cu ${CMAKE_CURRENT_SOURCE_DIR}/solver.cu ${CMAKE_CURRENT_SOURCE_DIR}/diversity/assignment_hash_map.cu @@ -38,7 +40,6 @@ set(MIP_NON_LP_FILES ${CMAKE_CURRENT_SOURCE_DIR}/presolve/multi_probe.cu ${CMAKE_CURRENT_SOURCE_DIR}/presolve/probing_cache.cu ${CMAKE_CURRENT_SOURCE_DIR}/presolve/trivial_presolve.cu - ${CMAKE_CURRENT_SOURCE_DIR}/presolve/conflict_graph/clique_table.cu ${CMAKE_CURRENT_SOURCE_DIR}/feasibility_jump/feasibility_jump.cu ${CMAKE_CURRENT_SOURCE_DIR}/feasibility_jump/feasibility_jump_kernels.cu ${CMAKE_CURRENT_SOURCE_DIR}/feasibility_jump/fj_cpu.cu diff --git a/cpp/src/mip_heuristics/diversity/diversity_config.hpp b/cpp/src/mip_heuristics/diversity/diversity_config.hpp index de14260794..dacf7773de 100644 --- a/cpp/src/mip_heuristics/diversity/diversity_config.hpp +++ b/cpp/src/mip_heuristics/diversity/diversity_config.hpp @@ -12,24 +12,20 @@ namespace cuopt::linear_programming::detail { struct diversity_config_t { - double time_ratio_on_init_lp = 0.1; - double max_time_on_lp = 15.0; - double time_ratio_of_probing_cache = 0.1; - double max_time_on_probing = 60.0; - int max_var_diff = 256; - size_t max_solutions = 32; - double initial_infeasibility_weight = 1000.; - double default_time_limit = 10.; - int initial_island_size = 3; - int maximum_island_size = 8; - bool use_avg_diversity = false; - double generation_time_limit_ratio = 0.6; - double max_island_gen_time = 600; - size_t n_sol_for_skip_init_gen = 3; - double max_fast_sol_time = 10; - double lp_run_time_if_feasible = 2.; - double lp_run_time_if_infeasible = 1.; - bool halve_population = false; + double time_ratio_of_probing_cache = 0.1; + double max_time_on_probing = 60.0; + int max_var_diff = 256; + double default_time_limit = 10.; + int initial_island_size = 3; + int maximum_island_size = 8; + bool use_avg_diversity = false; + double generation_time_limit_ratio = 0.6; + double max_island_gen_time = 600; + size_t n_sol_for_skip_init_gen = 3; + double max_fast_sol_time = 10; + double lp_run_time_if_feasible = 2.; + double lp_run_time_if_infeasible = 1.; + bool halve_population = false; }; } // namespace cuopt::linear_programming::detail diff --git a/cpp/src/mip_heuristics/diversity/diversity_manager.cu b/cpp/src/mip_heuristics/diversity/diversity_manager.cu index 9187d82f21..e821c016c2 100644 --- a/cpp/src/mip_heuristics/diversity/diversity_manager.cu +++ b/cpp/src/mip_heuristics/diversity/diversity_manager.cu @@ -42,13 +42,13 @@ diversity_manager_t::diversity_manager_t(mip_solver_context_tn_constraints), + context_.settings.heuristic_params.population_size, + context_.settings.heuristic_params.initial_infeasibility_weight * + context.problem_ptr->n_constraints), lp_optimal_solution(context.problem_ptr->n_variables, context.problem_ptr->handle_ptr->get_stream()), lp_dual_optimal_solution(context.problem_ptr->n_constraints, @@ -225,6 +225,7 @@ bool diversity_manager_t::run_presolve(f_t time_limit, timer_t global_ raft::common::nvtx::range fun_scope("run_presolve"); CUOPT_LOG_INFO("Running presolve!"); timer_t presolve_timer(time_limit); + auto term_crit = ls.constraint_prop.bounds_update.solve(*problem_ptr); if (ls.constraint_prop.bounds_update.infeas_constraints_count > 0) { stats.presolve_time = timer.elapsed_time(); @@ -247,7 +248,8 @@ bool diversity_manager_t::run_presolve(f_t time_limit, timer_t global_ compute_probing_cache(ls.constraint_prop.bounds_update, *problem_ptr, probing_timer); if (problem_is_infeasible) { return false; } } - const bool remap_cache_ids = true; + const bool remap_cache_ids = true; + problem_ptr->related_vars_time_limit = context.settings.heuristic_params.related_vars_time_limit; if (!global_timer.check_time_limit()) { trivial_presolve(*problem_ptr, remap_cache_ids); } if (!problem_ptr->empty && !check_bounds_sanity(*problem_ptr)) { return false; } // if (!presolve_timer.check_time_limit() && !context.settings.heuristics_only && @@ -423,10 +425,10 @@ solution_t diversity_manager_t::run_solver() return population.best_feasible(); } - population.timer = timer; - const f_t time_limit = timer.remaining_time(); - const f_t lp_time_limit = - std::min(diversity_config.max_time_on_lp, time_limit * diversity_config.time_ratio_on_init_lp); + population.timer = timer; + const f_t time_limit = timer.remaining_time(); + const auto& hp = context.settings.heuristic_params; + const f_t lp_time_limit = std::min(hp.root_lp_max_time, time_limit * hp.root_lp_time_ratio); // after every change to the problem, we should resize all the relevant vars // we need to encapsulate that to prevent repetitions recombine_stats.reset(); @@ -435,7 +437,8 @@ solution_t diversity_manager_t::run_solver() problem_ptr->check_problem_representation(true); // have the structure ready for reusing later problem_ptr->compute_integer_fixed_problem(); - recombiner_t::init_enabled_recombiners(*problem_ptr); + recombiner_t::init_enabled_recombiners( + *problem_ptr, context.settings.heuristic_params.enabled_recombiners); mab_recombiner.resize_mab_arm_stats(recombiner_t::enabled_recombiners.size()); // test problem is not ii cuopt_func_call( @@ -462,23 +465,25 @@ solution_t diversity_manager_t::run_solver() } else if (!fj_only_run) { convert_greater_to_less(*problem_ptr); - f_t tolerance_divisor = - problem_ptr->tolerances.absolute_tolerance / problem_ptr->tolerances.relative_tolerance; - if (tolerance_divisor == 0) { tolerance_divisor = 1; } f_t absolute_tolerance = context.settings.tolerances.absolute_tolerance; pdlp_solver_settings_t pdlp_settings{}; - pdlp_settings.tolerances.relative_primal_tolerance = absolute_tolerance / tolerance_divisor; - pdlp_settings.tolerances.relative_dual_tolerance = absolute_tolerance / tolerance_divisor; - pdlp_settings.time_limit = lp_time_limit; - pdlp_settings.first_primal_feasible = false; - pdlp_settings.concurrent_halt = &global_concurrent_halt; - pdlp_settings.method = method_t::Concurrent; - pdlp_settings.inside_mip = true; - pdlp_settings.pdlp_solver_mode = pdlp_solver_mode_t::Stable2; - pdlp_settings.num_gpus = context.settings.num_gpus; - pdlp_settings.presolver = presolver_t::None; - + pdlp_settings.tolerances.absolute_dual_tolerance = absolute_tolerance; + pdlp_settings.tolerances.relative_dual_tolerance = + context.settings.tolerances.relative_tolerance; + pdlp_settings.tolerances.absolute_primal_tolerance = absolute_tolerance; + pdlp_settings.tolerances.relative_primal_tolerance = + context.settings.tolerances.relative_tolerance; + pdlp_settings.time_limit = lp_time_limit; + pdlp_settings.first_primal_feasible = false; + pdlp_settings.concurrent_halt = &global_concurrent_halt; + pdlp_settings.method = method_t::Concurrent; + pdlp_settings.inside_mip = true; + pdlp_settings.pdlp_solver_mode = pdlp_solver_mode_t::Stable2; + pdlp_settings.num_gpus = context.settings.num_gpus; + pdlp_settings.presolver = presolver_t::None; + pdlp_settings.per_constraint_residual = true; + set_pdlp_solver_mode(pdlp_settings); timer_t lp_timer(lp_time_limit); auto lp_result = solve_lp_with_method(*problem_ptr, pdlp_settings, lp_timer); @@ -510,7 +515,11 @@ solution_t diversity_manager_t::run_solver() ls.lp_optimal_exists = true; if (!use_staged_simplex_solution) { if (lp_result.get_termination_status() == pdlp_termination_status_t::Optimal) { - set_new_user_bound(lp_result.get_objective_value()); + solution_t lp_sol(*problem_ptr); + lp_sol.copy_new_assignment(lp_optimal_solution); + const bool consider_integrality = false; + lp_sol.compute_feasibility(consider_integrality); + if (lp_sol.get_feasible()) { set_new_user_bound(lp_result.get_objective_value()); } } else if (lp_result.get_termination_status() == pdlp_termination_status_t::PrimalInfeasible) { CUOPT_LOG_ERROR("Problem is primal infeasible, continuing anyway!"); @@ -555,9 +564,10 @@ solution_t diversity_manager_t::run_solver() auto user_obj = lp_result.get_objective_value(); auto solver_obj = problem_ptr->get_solver_obj_from_user_obj(user_obj); auto iterations = lp_result.get_additional_termination_information().number_of_steps_taken; + auto method = lp_result.get_additional_termination_information().solved_by; // Set for the B&B (param4 expects solver space, param5 expects user space) problem_ptr->set_root_relaxation_solution_callback( - host_primal, host_dual, host_reduced_costs, solver_obj, user_obj, iterations); + host_primal, host_dual, host_reduced_costs, solver_obj, user_obj, iterations, method); } if (!use_staged_simplex_solution) { diff --git a/cpp/src/mip_heuristics/diversity/lns/rins.cu b/cpp/src/mip_heuristics/diversity/lns/rins.cu index 32ffa778d1..c4331343de 100644 --- a/cpp/src/mip_heuristics/diversity/lns/rins.cu +++ b/cpp/src/mip_heuristics/diversity/lns/rins.cu @@ -32,8 +32,8 @@ rins_t::rins_t(mip_solver_context_t& context_, rins_settings_t settings_) : context(context_), problem_ptr(context.problem_ptr), dm(dm_), settings(settings_) { - fixrate = settings.default_fixrate; - time_limit = settings.default_time_limit; + fixrate = context.settings.heuristic_params.rins_fix_rate; + time_limit = context.settings.heuristic_params.rins_time_limit; } template @@ -224,8 +224,7 @@ void rins_t::run_rins() std::vector> rins_solution_queue; - mip_solver_context_t fj_context( - &rins_handle, &fixed_problem, context.settings, context.scaling); + mip_solver_context_t fj_context(&rins_handle, &fixed_problem, context.settings); fj_t fj(fj_context); solution_t fj_solution(fixed_problem); fj_solution.copy_new_assignment(cuopt::host_copy(fixed_assignment, rins_handle.get_stream())); @@ -265,12 +264,13 @@ void rins_t::run_rins() std::min(current_mip_gap, (f_t)settings.target_mip_gap); branch_and_bound_settings.integer_tol = context.settings.tolerances.integrality_tolerance; branch_and_bound_settings.num_threads = 1; - branch_and_bound_settings.reliability_branching = 0; - branch_and_bound_settings.max_cut_passes = 0; - branch_and_bound_settings.clique_cuts = 0; - branch_and_bound_settings.sub_mip = 1; - branch_and_bound_settings.log.log = false; - branch_and_bound_settings.log.log_prefix = "[RINS] "; + branch_and_bound_settings.reliability_branching = 0; + branch_and_bound_settings.max_cut_passes = 0; + branch_and_bound_settings.clique_cuts = 0; + branch_and_bound_settings.sub_mip = 1; + branch_and_bound_settings.strong_branching_simplex_iteration_limit = 200; + branch_and_bound_settings.log.log = false; + branch_and_bound_settings.log.log_prefix = "[RINS] "; branch_and_bound_settings.solution_callback = [&rins_solution_queue](std::vector& solution, f_t objective) { rins_solution_queue.push_back(solution); @@ -297,7 +297,8 @@ void rins_t::run_rins() CUOPT_LOG_DEBUG("RINS submip time limit"); // do goldilocks update fixrate = std::min(fixrate + f_t(0.05), static_cast(settings.max_fixrate)); - time_limit = std::min(time_limit + f_t(2), static_cast(settings.max_time_limit)); + time_limit = std::min(time_limit + f_t(2), + static_cast(context.settings.heuristic_params.rins_max_time_limit)); } else if (branch_and_bound_status == dual_simplex::mip_status_t::INFEASIBLE) { CUOPT_LOG_DEBUG("RINS submip infeasible"); // do goldilocks update, decreasing fixrate @@ -306,7 +307,8 @@ void rins_t::run_rins() CUOPT_LOG_DEBUG("RINS solution not found"); // do goldilocks update fixrate = std::min(fixrate + f_t(0.05), static_cast(settings.max_fixrate)); - time_limit = std::min(time_limit + f_t(2), static_cast(settings.max_time_limit)); + time_limit = std::min(time_limit + f_t(2), + static_cast(context.settings.heuristic_params.rins_max_time_limit)); } cpu_fj_thread.stop_cpu_solver(); diff --git a/cpp/src/mip_heuristics/diversity/lns/rins.cuh b/cpp/src/mip_heuristics/diversity/lns/rins.cuh index 7a04b24897..0a9133f848 100644 --- a/cpp/src/mip_heuristics/diversity/lns/rins.cuh +++ b/cpp/src/mip_heuristics/diversity/lns/rins.cuh @@ -43,11 +43,8 @@ struct rins_settings_t { int nodes_after_later_improvement = 200; double min_fixrate = 0.3; double max_fixrate = 0.8; - double default_fixrate = 0.5; double min_fractional_ratio = 0.3; double min_time_limit = 3.; - double max_time_limit = 20.; - double default_time_limit = 3.; double target_mip_gap = 0.03; bool objective_cut = true; }; diff --git a/cpp/src/mip_heuristics/diversity/population.cu b/cpp/src/mip_heuristics/diversity/population.cu index 7fa0df5486..bb0fdd6d11 100644 --- a/cpp/src/mip_heuristics/diversity/population.cu +++ b/cpp/src/mip_heuristics/diversity/population.cu @@ -265,11 +265,6 @@ void population_t::invoke_get_solution_callback( f_t user_bound = context.stats.get_solution_bound(); solution_t temp_sol(sol); problem_ptr->post_process_assignment(temp_sol.assignment); - if (context.settings.mip_scaling) { - cuopt_assert(context.scaling != nullptr, ""); - rmm::device_uvector dummy(0, temp_sol.handle_ptr->get_stream()); - context.scaling->unscale_solutions(temp_sol.assignment, dummy); - } if (problem_ptr->has_papilo_presolve_data()) { problem_ptr->papilo_uncrush_assignment(temp_sol.assignment); } @@ -309,10 +304,8 @@ void population_t::run_solution_callbacks(solution_t& sol) invoke_get_solution_callback(sol, get_sol_callback); } } - // save the best objective here, because we might not have been able to return the solution to - // the user because of the unscaling that causes infeasibility. - // This prevents an issue of repaired, or a fully feasible solution being reported in the call - // back in next run. + // Save the best objective here even if callback handling later exits early. + // This prevents older solutions from being reported as "new best" in subsequent callbacks. best_feasible_objective = sol.get_objective(); } @@ -345,10 +338,6 @@ void population_t::run_solution_callbacks(solution_t& sol) incumbent_assignment.size(), sol.handle_ptr->get_stream()); - if (context.settings.mip_scaling) { - cuopt_assert(context.scaling != nullptr, ""); - context.scaling->scale_solutions(incumbent_assignment); - } bool is_valid = problem_ptr->pre_process_assignment(incumbent_assignment); if (!is_valid) { return; } cuopt_assert(outside_sol.assignment.size() == incumbent_assignment.size(), diff --git a/cpp/src/mip_heuristics/diversity/recombiners/recombiner.cuh b/cpp/src/mip_heuristics/diversity/recombiners/recombiner.cuh index 89a5e86c17..4782e9612b 100644 --- a/cpp/src/mip_heuristics/diversity/recombiners/recombiner.cuh +++ b/cpp/src/mip_heuristics/diversity/recombiners/recombiner.cuh @@ -195,10 +195,14 @@ class recombiner_t { "vars_to_fix should be sorted!"); } - static void init_enabled_recombiners(const problem_t& problem) + static void init_enabled_recombiners(const problem_t& problem, + int user_enabled_mask = -1) { std::unordered_set enabled_recombiners; for (auto recombiner : recombiner_types) { + if (user_enabled_mask >= 0 && !(user_enabled_mask & (1 << (uint32_t)recombiner))) { + continue; + } enabled_recombiners.insert(recombiner); } if (problem.expensive_to_fix_vars) { diff --git a/cpp/src/mip_heuristics/diversity/recombiners/sub_mip.cuh b/cpp/src/mip_heuristics/diversity/recombiners/sub_mip.cuh index f6e4f172cf..5a637aae8e 100644 --- a/cpp/src/mip_heuristics/diversity/recombiners/sub_mip.cuh +++ b/cpp/src/mip_heuristics/diversity/recombiners/sub_mip.cuh @@ -14,6 +14,7 @@ #include #include #include +#include namespace cuopt::linear_programming::detail { @@ -106,11 +107,12 @@ class sub_mip_recombiner_t : public recombiner_t { branch_and_bound_settings.relative_mip_gap_tol = context.settings.tolerances.relative_mip_gap; branch_and_bound_settings.integer_tol = context.settings.tolerances.integrality_tolerance; branch_and_bound_settings.num_threads = 1; - branch_and_bound_settings.reliability_branching = 0; - branch_and_bound_settings.max_cut_passes = 0; - branch_and_bound_settings.clique_cuts = 0; - branch_and_bound_settings.sub_mip = 1; - branch_and_bound_settings.solution_callback = [this](std::vector& solution, + branch_and_bound_settings.reliability_branching = 0; + branch_and_bound_settings.max_cut_passes = 0; + branch_and_bound_settings.clique_cuts = 0; + branch_and_bound_settings.sub_mip = 1; + branch_and_bound_settings.strong_branching_simplex_iteration_limit = 200; + branch_and_bound_settings.solution_callback = [this](std::vector& solution, f_t objective) { this->solution_callback(solution, objective); }; diff --git a/cpp/src/mip_heuristics/feasibility_jump/early_gpufj.cu b/cpp/src/mip_heuristics/feasibility_jump/early_gpufj.cu index 55726421d7..3f77427d87 100644 --- a/cpp/src/mip_heuristics/feasibility_jump/early_gpufj.cu +++ b/cpp/src/mip_heuristics/feasibility_jump/early_gpufj.cu @@ -26,7 +26,7 @@ early_gpufj_t::early_gpufj_t(const optimization_problem_t& o op_problem, settings.get_tolerances(), std::move(incumbent_callback)) { context_ptr_ = std::make_unique>( - &this->handle_, this->problem_ptr_.get(), settings, nullptr); + &this->handle_, this->problem_ptr_.get(), settings); } template diff --git a/cpp/src/mip_heuristics/local_search/feasibility_pump/feasibility_pump.cu b/cpp/src/mip_heuristics/local_search/feasibility_pump/feasibility_pump.cu index f28faec249..0a17e3ebfd 100644 --- a/cpp/src/mip_heuristics/local_search/feasibility_pump/feasibility_pump.cu +++ b/cpp/src/mip_heuristics/local_search/feasibility_pump/feasibility_pump.cu @@ -43,7 +43,7 @@ feasibility_pump_t::feasibility_pump_t( fj(fj_), // fj_tree(fj_tree_), line_segment_search(line_segment_search_), - cycle_queue(*context.problem_ptr), + cycle_queue(*context.problem_ptr, context.settings.heuristic_params.cycle_detection_length), constraint_prop(constraint_prop_), last_rounding(context.problem_ptr->n_variables, context.problem_ptr->handle_ptr->get_stream()), last_projection(context.problem_ptr->n_variables, @@ -208,7 +208,8 @@ bool feasibility_pump_t::linear_project_onto_polytope(solution_t struct cycle_queue_t { - cycle_queue_t(problem_t& problem) : curr_recent_sol(cycle_detection_length - 1) + cycle_queue_t(problem_t& problem, i_t cycle_len = 30) + : cycle_detection_length(cycle_len), curr_recent_sol(cycle_detection_length - 1) { for (i_t i = 0; i < cycle_detection_length; ++i) { recent_solutions.emplace_back( @@ -86,7 +87,7 @@ struct cycle_queue_t { } std::vector> recent_solutions; - const i_t cycle_detection_length = 30; + const i_t cycle_detection_length; i_t curr_recent_sol; i_t n_iterations_without_cycle = 0; }; diff --git a/cpp/src/mip_heuristics/local_search/local_search.cu b/cpp/src/mip_heuristics/local_search/local_search.cu index 118b7181ab..da29511d70 100644 --- a/cpp/src/mip_heuristics/local_search/local_search.cu +++ b/cpp/src/mip_heuristics/local_search/local_search.cu @@ -46,13 +46,16 @@ local_search_t::local_search_t(mip_solver_context_t& context rng(cuopt::seed_generator::get_seed()), problem_with_objective_cut(*context.problem_ptr, context.problem_ptr->handle_ptr) { - for (auto& cpu_fj : ls_cpu_fj) { - cpu_fj.fj_ptr = &fj; - } - for (auto& cpu_fj : scratch_cpu_fj) { - cpu_fj.fj_ptr = &fj; + const int n_cpufj = context.settings.heuristic_params.num_cpufj_threads; + for (int i = 0; i < n_cpufj; ++i) { + ls_cpu_fj.push_back(std::make_unique>()); + ls_cpu_fj.back()->fj_ptr = &fj; } + scratch_cpu_fj.push_back(std::make_unique>()); + scratch_cpu_fj.back()->fj_ptr = &fj; scratch_cpu_fj_on_lp_opt.fj_ptr = &fj; + + fj.settings.n_of_minimums_for_exit = context.settings.heuristic_params.n_of_minimums_for_exit; } static double local_search_best_obj = std::numeric_limits::max(); @@ -72,7 +75,8 @@ void local_search_t::start_cpufj_scratch_threads(population_t 0) solution.assign_random_within_bounds(0.4); cpu_fj.fj_cpu = cpu_fj.fj_ptr->create_cpu_climber(solution, default_weights, @@ -100,8 +104,8 @@ void local_search_t::start_cpufj_scratch_threads(population_tstart_cpu_solver(); } } @@ -141,8 +145,8 @@ void local_search_t::start_cpufj_lptopt_scratch_threads( template void local_search_t::stop_cpufj_scratch_threads() { - for (auto& cpu_fj : scratch_cpu_fj) { - cpu_fj.request_termination(); + for (auto& cpu_fj_ptr : scratch_cpu_fj) { + cpu_fj_ptr->request_termination(); } scratch_cpu_fj_on_lp_opt.request_termination(); } @@ -229,7 +233,8 @@ bool local_search_t::do_fj_solve(solution_t& solution, } auto h_weights = cuopt::host_copy(in_fj.cstr_weights, solution.handle_ptr->get_stream()); auto h_objective_weight = in_fj.objective_weight.value(solution.handle_ptr->get_stream()); - for (auto& cpu_fj : ls_cpu_fj) { + for (auto& cpu_fj_ptr : ls_cpu_fj) { + auto& cpu_fj = *cpu_fj_ptr; cpu_fj.fj_cpu = cpu_fj.fj_ptr->create_cpu_climber(solution, h_weights, h_weights, @@ -242,8 +247,8 @@ bool local_search_t::do_fj_solve(solution_t& solution, auto solution_copy = solution; // Start CPU solver in background thread - for (auto& cpu_fj : ls_cpu_fj) { - cpu_fj.start_cpu_solver(); + for (auto& cpu_fj_ptr : ls_cpu_fj) { + cpu_fj_ptr->start_cpu_solver(); } // Run GPU solver and measure execution time @@ -252,8 +257,8 @@ bool local_search_t::do_fj_solve(solution_t& solution, in_fj.solve(solution); // Stop CPU solver - for (auto& cpu_fj : ls_cpu_fj) { - cpu_fj.stop_cpu_solver(); + for (auto& cpu_fj_ptr : ls_cpu_fj) { + cpu_fj_ptr->stop_cpu_solver(); } auto gpu_fj_end = std::chrono::high_resolution_clock::now(); @@ -263,13 +268,13 @@ bool local_search_t::do_fj_solve(solution_t& solution, f_t best_cpu_obj = std::numeric_limits::max(); // // Wait for CPU solver to finish - for (auto& cpu_fj : ls_cpu_fj) { - bool cpu_sol_found = cpu_fj.wait_for_cpu_solver(); + for (auto& cpu_fj_ptr : ls_cpu_fj) { + bool cpu_sol_found = cpu_fj_ptr->wait_for_cpu_solver(); if (cpu_sol_found) { - f_t cpu_obj = cpu_fj.fj_cpu->h_best_objective; + f_t cpu_obj = cpu_fj_ptr->fj_cpu->h_best_objective; if (cpu_obj < best_cpu_obj) { best_cpu_obj = cpu_obj; - solution_cpu.copy_new_assignment(cpu_fj.fj_cpu->h_best_assignment); + solution_cpu.copy_new_assignment(cpu_fj_ptr->fj_cpu->h_best_assignment); solution_cpu.compute_feasibility(); } } @@ -686,8 +691,9 @@ void local_search_t::reset_alpha_and_run_recombiners( f_t& best_objective) { raft::common::nvtx::range fun_scope("reset_alpha_and_run_recombiners"); - constexpr i_t iterations_for_stagnation = 3; - constexpr i_t max_iterations_without_improvement = 8; + const auto& hp = context.settings.heuristic_params; + const i_t iterations_for_stagnation = hp.stagnation_trigger; + const i_t max_iterations_without_improvement = hp.max_iterations_without_improvement; population_ptr->add_external_solutions_to_population(); if (population_ptr->current_size() > 1 && i - last_improved_iteration > iterations_for_stagnation) { diff --git a/cpp/src/mip_heuristics/local_search/local_search.cuh b/cpp/src/mip_heuristics/local_search/local_search.cuh index a36688d71d..94493ebcb3 100644 --- a/cpp/src/mip_heuristics/local_search/local_search.cuh +++ b/cpp/src/mip_heuristics/local_search/local_search.cuh @@ -126,8 +126,8 @@ class local_search_t { feasibility_pump_t fp; std::mt19937 rng; - std::array, 8> ls_cpu_fj; - std::array, 1> scratch_cpu_fj; + std::vector>> ls_cpu_fj; + std::vector>> scratch_cpu_fj; cpu_fj_thread_t scratch_cpu_fj_on_lp_opt; cpu_fj_thread_t deterministic_cpu_fj; problem_t problem_with_objective_cut; diff --git a/cpp/src/mip_heuristics/mip_scaling_strategy.cu b/cpp/src/mip_heuristics/mip_scaling_strategy.cu new file mode 100644 index 0000000000..0aaa606aea --- /dev/null +++ b/cpp/src/mip_heuristics/mip_scaling_strategy.cu @@ -0,0 +1,883 @@ +/* clang-format off */ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + */ +/* clang-format on */ + +#include +#include +#include +#include + +#include + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include + +namespace cuopt::linear_programming::detail { + +constexpr int row_scaling_max_iterations = 8; +constexpr double row_scaling_min_initial_log2_spread = 12.0; +constexpr int row_scaling_factor_exponent = 5; +constexpr int row_scaling_big_m_soft_factor_exponent = 4; +constexpr double row_scaling_min_factor = + 1.0 / static_cast(std::uint64_t{1} << row_scaling_factor_exponent); +constexpr double row_scaling_max_factor = + static_cast(std::uint64_t{1} << row_scaling_factor_exponent); +constexpr double row_scaling_big_m_soft_min_factor = + 1.0 / static_cast(std::uint64_t{1} << row_scaling_big_m_soft_factor_exponent); +constexpr double row_scaling_big_m_soft_max_factor = 1.0; +constexpr double row_scaling_spread_rel_tol = 1.0e-2; +constexpr double integer_coefficient_rel_tol = 1.0e-6; +constexpr double integer_multiplier_rounding_tolerance = 1.0e-6; +constexpr double min_abs_objective_coefficient_threshold = 1.0e-2; +constexpr double max_obj_scaling_coefficient = 1.0e3; + +constexpr int cumulative_row_scaling_exponent = 8; +constexpr double cumulative_row_scaling_min = + 1.0 / static_cast(std::uint64_t{1} << cumulative_row_scaling_exponent); +constexpr double cumulative_row_scaling_max = + static_cast(std::uint64_t{1} << cumulative_row_scaling_exponent); + +constexpr double post_scaling_max_ratio_warn = 1.0e15; + +constexpr double big_m_abs_threshold = 1.0e4; +constexpr double big_m_ratio_threshold = 1.0e4; + +template +struct abs_value_transform_t { + __device__ f_t operator()(f_t value) const { return raft::abs(value); } +}; + +template +struct nonzero_abs_or_inf_transform_t { + __device__ f_t operator()(f_t value) const + { + const f_t abs_value = raft::abs(value); + return abs_value > f_t(0) ? abs_value : std::numeric_limits::infinity(); + } +}; + +template +struct nonzero_count_transform_t { + __device__ i_t operator()(f_t value) const { return raft::abs(value) > f_t(0) ? i_t(1) : i_t(0); } +}; + +template +struct max_op_t { + __host__ __device__ item_t operator()(const item_t& lhs, const item_t& rhs) const + { + return lhs > rhs ? lhs : rhs; + } +}; + +template +struct min_op_t { + __host__ __device__ item_t operator()(const item_t& lhs, const item_t& rhs) const + { + return lhs < rhs ? lhs : rhs; + } +}; + +struct gcd_op_t { + __host__ __device__ std::int64_t operator()(std::int64_t lhs, std::int64_t rhs) const + { + lhs = lhs < 0 ? -lhs : lhs; + rhs = rhs < 0 ? -rhs : rhs; + if (lhs == 0) { return rhs; } + if (rhs == 0) { return lhs; } + while (rhs != 0) { + const std::int64_t remainder = lhs % rhs; + lhs = rhs; + rhs = remainder; + } + return lhs; + } +}; + +template +struct integer_coeff_for_integer_var_transform_t { + __device__ std::int64_t operator()(thrust::tuple coeff_with_type) const + { + const f_t coefficient = thrust::get<0>(coeff_with_type); + const var_t var_type = thrust::get<1>(coeff_with_type); + if (var_type != var_t::INTEGER) { return std::int64_t{0}; } + + const f_t abs_coefficient = raft::abs(coefficient); + if (!isfinite(abs_coefficient) || abs_coefficient <= f_t(0)) { return std::int64_t{0}; } + + const f_t rounded_abs_coefficient = round(abs_coefficient); + const f_t tolerance_scale = abs_coefficient > f_t(1) ? abs_coefficient : f_t(1); + const f_t integrality_tolerance = + static_cast(integer_coefficient_rel_tol) * tolerance_scale; + if (raft::abs(abs_coefficient - rounded_abs_coefficient) > integrality_tolerance) { + return std::int64_t{0}; + } + if (rounded_abs_coefficient <= f_t(0) || + rounded_abs_coefficient > static_cast(std::numeric_limits::max())) { + return std::int64_t{0}; + } + return static_cast(rounded_abs_coefficient); + } +}; + +template +void compute_row_inf_norm( + const cuopt::linear_programming::optimization_problem_t& op_problem, + rmm::device_uvector& temp_storage, + size_t temp_storage_bytes, + rmm::device_uvector& row_inf_norm, + rmm::cuda_stream_view stream_view) +{ + const auto& matrix_values = op_problem.get_constraint_matrix_values(); + const auto& matrix_offsets = op_problem.get_constraint_matrix_offsets(); + auto coeff_abs_iter = + thrust::make_transform_iterator(matrix_values.data(), abs_value_transform_t{}); + size_t current_bytes = temp_storage_bytes; + RAFT_CUDA_TRY(cub::DeviceSegmentedReduce::Reduce(temp_storage.data(), + current_bytes, + coeff_abs_iter, + row_inf_norm.data(), + op_problem.get_n_constraints(), + matrix_offsets.data(), + matrix_offsets.data() + 1, + max_op_t{}, + f_t(0), + stream_view)); +} + +template +void compute_row_integer_gcd( + const cuopt::linear_programming::optimization_problem_t& op_problem, + rmm::device_uvector& temp_storage, + size_t temp_storage_bytes, + rmm::device_uvector& row_integer_gcd, + rmm::cuda_stream_view stream_view) +{ + const auto& matrix_values = op_problem.get_constraint_matrix_values(); + const auto& matrix_indices = op_problem.get_constraint_matrix_indices(); + const auto& matrix_offsets = op_problem.get_constraint_matrix_offsets(); + const auto& variable_types = op_problem.get_variable_types(); + if (variable_types.size() != static_cast(op_problem.get_n_variables())) { + thrust::fill(op_problem.get_handle_ptr()->get_thrust_policy(), + row_integer_gcd.begin(), + row_integer_gcd.end(), + std::int64_t{0}); + return; + } + auto variable_type_per_nnz = + thrust::make_permutation_iterator(variable_types.data(), matrix_indices.data()); + auto coeff_and_type_iter = + thrust::make_zip_iterator(thrust::make_tuple(matrix_values.data(), variable_type_per_nnz)); + auto integer_coeff_iter = thrust::make_transform_iterator( + coeff_and_type_iter, integer_coeff_for_integer_var_transform_t{}); + size_t current_bytes = temp_storage_bytes; + RAFT_CUDA_TRY(cub::DeviceSegmentedReduce::Reduce(temp_storage.data(), + current_bytes, + integer_coeff_iter, + row_integer_gcd.data(), + op_problem.get_n_constraints(), + matrix_offsets.data(), + matrix_offsets.data() + 1, + gcd_op_t{}, + std::int64_t{0}, + stream_view)); +} + +template +void compute_big_m_skip_rows( + const cuopt::linear_programming::optimization_problem_t& op_problem, + rmm::device_uvector& temp_storage, + size_t temp_storage_bytes, + rmm::device_uvector& row_inf_norm, + rmm::device_uvector& row_min_nonzero, + rmm::device_uvector& row_nonzero_count, + rmm::device_uvector& row_skip_scaling) +{ + const auto& matrix_values = op_problem.get_constraint_matrix_values(); + const auto& matrix_offsets = op_problem.get_constraint_matrix_offsets(); + const auto stream_view = op_problem.get_handle_ptr()->get_stream(); + auto coeff_abs_iter = + thrust::make_transform_iterator(matrix_values.data(), abs_value_transform_t{}); + auto coeff_nonzero_min_iter = + thrust::make_transform_iterator(matrix_values.data(), nonzero_abs_or_inf_transform_t{}); + auto coeff_nonzero_count_iter = + thrust::make_transform_iterator(matrix_values.data(), nonzero_count_transform_t{}); + + size_t max_bytes = temp_storage_bytes; + RAFT_CUDA_TRY(cub::DeviceSegmentedReduce::Reduce(temp_storage.data(), + max_bytes, + coeff_abs_iter, + row_inf_norm.data(), + op_problem.get_n_constraints(), + matrix_offsets.data(), + matrix_offsets.data() + 1, + max_op_t{}, + f_t(0), + stream_view)); + size_t min_bytes = temp_storage_bytes; + RAFT_CUDA_TRY(cub::DeviceSegmentedReduce::Reduce(temp_storage.data(), + min_bytes, + coeff_nonzero_min_iter, + row_min_nonzero.data(), + op_problem.get_n_constraints(), + matrix_offsets.data(), + matrix_offsets.data() + 1, + min_op_t{}, + std::numeric_limits::infinity(), + stream_view)); + size_t count_bytes = temp_storage_bytes; + RAFT_CUDA_TRY(cub::DeviceSegmentedReduce::Reduce(temp_storage.data(), + count_bytes, + coeff_nonzero_count_iter, + row_nonzero_count.data(), + op_problem.get_n_constraints(), + matrix_offsets.data(), + matrix_offsets.data() + 1, + thrust::plus{}, + i_t(0), + stream_view)); + + auto row_begin = thrust::make_zip_iterator( + thrust::make_tuple(row_inf_norm.begin(), row_min_nonzero.begin(), row_nonzero_count.begin())); + auto row_end = thrust::make_zip_iterator( + thrust::make_tuple(row_inf_norm.end(), row_min_nonzero.end(), row_nonzero_count.end())); + thrust::transform( + op_problem.get_handle_ptr()->get_thrust_policy(), + row_begin, + row_end, + row_skip_scaling.begin(), + [] __device__(auto row_info) -> i_t { + const f_t row_norm = thrust::get<0>(row_info); + const f_t row_min_non_zero = thrust::get<1>(row_info); + const i_t row_non_zero_size = thrust::get<2>(row_info); + if (row_non_zero_size < i_t(2) || row_min_non_zero >= std::numeric_limits::infinity()) { + return i_t(0); + } + + const f_t row_ratio = row_norm / row_min_non_zero; + return row_norm >= static_cast(big_m_abs_threshold) && + row_ratio >= static_cast(big_m_ratio_threshold) + ? i_t(1) + : i_t(0); + }); +} + +template +void scale_objective(cuopt::linear_programming::optimization_problem_t& op_problem) +{ + auto& obj_coefficients = op_problem.get_objective_coefficients(); + const i_t n_cols = op_problem.get_n_variables(); + if (n_cols == 0) { return; } + + const auto* handle_ptr = op_problem.get_handle_ptr(); + + f_t min_abs_obj = thrust::transform_reduce(handle_ptr->get_thrust_policy(), + obj_coefficients.begin(), + obj_coefficients.end(), + nonzero_abs_or_inf_transform_t{}, + std::numeric_limits::infinity(), + min_op_t{}); + + f_t max_abs_obj = thrust::transform_reduce(handle_ptr->get_thrust_policy(), + obj_coefficients.begin(), + obj_coefficients.end(), + abs_value_transform_t{}, + f_t(0), + max_op_t{}); + + if (!std::isfinite(static_cast(min_abs_obj)) || min_abs_obj <= f_t(0) || + max_abs_obj <= f_t(0)) { + CUOPT_LOG_INFO("MIP_OBJ_SCALING skipped: no finite nonzero objective coefficients"); + return; + } + + if (static_cast(min_abs_obj) >= min_abs_objective_coefficient_threshold) { + CUOPT_LOG_INFO("MIP_OBJ_SCALING skipped: min_abs_coeff=%g already above threshold=%g", + static_cast(min_abs_obj), + min_abs_objective_coefficient_threshold); + return; + } + + double raw_scale = min_abs_objective_coefficient_threshold / static_cast(min_abs_obj); + double scale = std::min(raw_scale, max_obj_scaling_coefficient); + + double post_max = static_cast(max_abs_obj) * scale; + if (post_max > 1.0e6) { + CUOPT_LOG_INFO("MIP_OBJ_SCALING skipped: would push max_coeff from %g to %g (limit 1e6)", + static_cast(max_abs_obj), + post_max); + return; + } + + f_t scale_f = static_cast(scale); + thrust::transform(handle_ptr->get_thrust_policy(), + obj_coefficients.begin(), + obj_coefficients.end(), + obj_coefficients.begin(), + [scale_f] __device__(f_t c) -> f_t { return c * scale_f; }); + + f_t old_sf = op_problem.get_objective_scaling_factor(); + f_t old_off = op_problem.get_objective_offset(); + op_problem.set_objective_scaling_factor(old_sf / scale_f); + op_problem.set_objective_offset(old_off * scale_f); + + CUOPT_LOG_INFO( + "MIP_OBJ_SCALING applied: min_abs_coeff=%g max_abs_coeff=%g scale=%g new_scaling_factor=%g", + static_cast(min_abs_obj), + static_cast(max_abs_obj), + scale, + static_cast(old_sf / scale_f)); +} + +template +rmm::device_uvector capture_pre_scaling_integer_gcd( + const cuopt::linear_programming::optimization_problem_t& op_problem, + rmm::device_uvector& temp_storage, + size_t temp_storage_bytes, + rmm::cuda_stream_view stream_view) +{ + const i_t n_rows = op_problem.get_n_constraints(); + rmm::device_uvector gcd(static_cast(n_rows), stream_view); + compute_row_integer_gcd(op_problem, temp_storage, temp_storage_bytes, gcd, stream_view); + return gcd; +} + +template +void assert_integer_coefficient_integrality( + const cuopt::linear_programming::optimization_problem_t& op_problem, + rmm::device_uvector& temp_storage, + size_t temp_storage_bytes, + const rmm::device_uvector& pre_scaling_gcd, + rmm::cuda_stream_view stream_view) +{ + const auto* handle_ptr = op_problem.get_handle_ptr(); + const i_t n_rows = op_problem.get_n_constraints(); + rmm::device_uvector post_scaling_gcd(static_cast(n_rows), stream_view); + compute_row_integer_gcd( + op_problem, temp_storage, temp_storage_bytes, post_scaling_gcd, stream_view); + + i_t broken_rows = thrust::inner_product( + handle_ptr->get_thrust_policy(), + pre_scaling_gcd.begin(), + pre_scaling_gcd.end(), + post_scaling_gcd.begin(), + i_t(0), + thrust::plus{}, + [] __device__(std::int64_t pre_gcd, std::int64_t post_gcd) -> i_t { + return (pre_gcd > std::int64_t{0} && post_gcd == std::int64_t{0}) ? i_t(1) : i_t(0); + }); + + if (broken_rows > 0) { + CUOPT_LOG_WARN("MIP row scaling: %d rows lost integer coefficient integrality after scaling", + broken_rows); + } + cuopt_assert(broken_rows == 0, + "MIP scaling must preserve integer coefficients for integer variables"); +} + +template +mip_scaling_strategy_t::mip_scaling_strategy_t( + typename mip_scaling_strategy_t::optimization_problem_type_t& op_problem_scaled) + : handle_ptr_(op_problem_scaled.get_handle_ptr()), + stream_view_(handle_ptr_->get_stream()), + op_problem_scaled_(op_problem_scaled) +{ +} + +template +size_t dry_run_cub(const cuopt::linear_programming::optimization_problem_t& op_problem, + i_t n_rows, + rmm::device_uvector& row_inf_norm, + rmm::device_uvector& row_min_nonzero, + rmm::device_uvector& row_nonzero_count, + rmm::device_uvector& row_integer_gcd, + rmm::cuda_stream_view stream_view) +{ + const auto& matrix_values = op_problem.get_constraint_matrix_values(); + const auto& matrix_indices = op_problem.get_constraint_matrix_indices(); + const auto& matrix_offsets = op_problem.get_constraint_matrix_offsets(); + const auto& variable_types = op_problem.get_variable_types(); + size_t temp_storage_bytes = 0; + size_t current_required_bytes = 0; + + auto coeff_abs_iter = + thrust::make_transform_iterator(matrix_values.data(), abs_value_transform_t{}); + RAFT_CUDA_TRY(cub::DeviceSegmentedReduce::Reduce(nullptr, + current_required_bytes, + coeff_abs_iter, + row_inf_norm.data(), + n_rows, + matrix_offsets.data(), + matrix_offsets.data() + 1, + max_op_t{}, + f_t(0), + stream_view)); + temp_storage_bytes = std::max(temp_storage_bytes, current_required_bytes); + + auto coeff_nonzero_min_iter = + thrust::make_transform_iterator(matrix_values.data(), nonzero_abs_or_inf_transform_t{}); + RAFT_CUDA_TRY(cub::DeviceSegmentedReduce::Reduce(nullptr, + current_required_bytes, + coeff_nonzero_min_iter, + row_min_nonzero.data(), + n_rows, + matrix_offsets.data(), + matrix_offsets.data() + 1, + min_op_t{}, + std::numeric_limits::infinity(), + stream_view)); + temp_storage_bytes = std::max(temp_storage_bytes, current_required_bytes); + + auto coeff_nonzero_count_iter = + thrust::make_transform_iterator(matrix_values.data(), nonzero_count_transform_t{}); + RAFT_CUDA_TRY(cub::DeviceSegmentedReduce::Reduce(nullptr, + current_required_bytes, + coeff_nonzero_count_iter, + row_nonzero_count.data(), + n_rows, + matrix_offsets.data(), + matrix_offsets.data() + 1, + thrust::plus{}, + i_t(0), + stream_view)); + temp_storage_bytes = std::max(temp_storage_bytes, current_required_bytes); + + if (variable_types.size() == static_cast(op_problem.get_n_variables())) { + auto variable_type_per_nnz = + thrust::make_permutation_iterator(variable_types.data(), matrix_indices.data()); + auto coeff_and_type_iter = + thrust::make_zip_iterator(thrust::make_tuple(matrix_values.data(), variable_type_per_nnz)); + auto integer_coeff_iter = thrust::make_transform_iterator( + coeff_and_type_iter, integer_coeff_for_integer_var_transform_t{}); + RAFT_CUDA_TRY(cub::DeviceSegmentedReduce::Reduce(nullptr, + current_required_bytes, + integer_coeff_iter, + row_integer_gcd.data(), + n_rows, + matrix_offsets.data(), + matrix_offsets.data() + 1, + gcd_op_t{}, + std::int64_t{0}, + stream_view)); + temp_storage_bytes = std::max(temp_storage_bytes, current_required_bytes); + } + + return temp_storage_bytes; +} + +template +void mip_scaling_strategy_t::scale_problem(bool do_objective_scaling) +{ + raft::common::nvtx::range fun_scope("mip_scale_problem"); + + auto& matrix_values = op_problem_scaled_.get_constraint_matrix_values(); + auto& matrix_offsets = op_problem_scaled_.get_constraint_matrix_offsets(); + auto& constraint_bounds = op_problem_scaled_.get_constraint_bounds(); + auto& constraint_lower_bounds = op_problem_scaled_.get_constraint_lower_bounds(); + auto& constraint_upper_bounds = op_problem_scaled_.get_constraint_upper_bounds(); + const i_t n_rows = op_problem_scaled_.get_n_constraints(); + const i_t n_cols = op_problem_scaled_.get_n_variables(); + const i_t nnz = op_problem_scaled_.get_nnz(); + + if (do_objective_scaling) { + scale_objective(op_problem_scaled_); + } else { + CUOPT_LOG_INFO("MIP_OBJ_SCALING skipped: disabled by user setting"); + } + + if (n_rows == 0 || nnz <= 0) { return; } + cuopt_assert(constraint_bounds.size() == size_t{0} || + constraint_bounds.size() == static_cast(n_rows), + "constraint_bounds must be empty or have one value per constraint"); + + rmm::device_uvector row_inf_norm(static_cast(n_rows), stream_view_); + rmm::device_uvector row_min_nonzero(static_cast(n_rows), stream_view_); + rmm::device_uvector row_nonzero_count(static_cast(n_rows), stream_view_); + rmm::device_uvector row_integer_gcd(static_cast(n_rows), stream_view_); + rmm::device_uvector row_rhs_magnitude(static_cast(n_rows), stream_view_); + rmm::device_uvector row_skip_scaling(static_cast(n_rows), stream_view_); + thrust::fill( + handle_ptr_->get_thrust_policy(), row_skip_scaling.begin(), row_skip_scaling.end(), i_t(0)); + rmm::device_uvector iteration_scaling(static_cast(n_rows), stream_view_); + rmm::device_uvector cumulative_scaling(static_cast(n_rows), stream_view_); + thrust::fill( + handle_ptr_->get_thrust_policy(), cumulative_scaling.begin(), cumulative_scaling.end(), f_t(1)); + rmm::device_uvector coefficient_row_index(static_cast(nnz), stream_view_); + rmm::device_uvector ref_log2_values(static_cast(n_rows), stream_view_); + + thrust::upper_bound(handle_ptr_->get_thrust_policy(), + matrix_offsets.begin(), + matrix_offsets.end(), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(nnz), + coefficient_row_index.begin()); + thrust::transform( + handle_ptr_->get_thrust_policy(), + coefficient_row_index.begin(), + coefficient_row_index.end(), + coefficient_row_index.begin(), + [] __device__(i_t row_upper_bound_idx) -> i_t { return row_upper_bound_idx - 1; }); + + size_t temp_storage_bytes = dry_run_cub(op_problem_scaled_, + n_rows, + row_inf_norm, + row_min_nonzero, + row_nonzero_count, + row_integer_gcd, + stream_view_); + + rmm::device_uvector temp_storage(temp_storage_bytes, stream_view_); + + cuopt_func_call(auto pre_scaling_gcd = capture_pre_scaling_integer_gcd( + op_problem_scaled_, temp_storage, temp_storage_bytes, stream_view_)); + + compute_big_m_skip_rows(op_problem_scaled_, + temp_storage, + temp_storage_bytes, + row_inf_norm, + row_min_nonzero, + row_nonzero_count, + row_skip_scaling); + + i_t big_m_rows = thrust::count( + handle_ptr_->get_thrust_policy(), row_skip_scaling.begin(), row_skip_scaling.end(), i_t(1)); + + CUOPT_LOG_INFO("MIP row scaling start: rows=%d cols=%d max_iterations=%d soft_big_m_rows=%d", + n_rows, + n_cols, + row_scaling_max_iterations, + big_m_rows); + + f_t original_max_coeff = thrust::transform_reduce(handle_ptr_->get_thrust_policy(), + matrix_values.begin(), + matrix_values.end(), + abs_value_transform_t{}, + f_t(0), + max_op_t{}); + + double previous_row_log2_spread = std::numeric_limits::infinity(); + for (int iteration = 0; iteration < row_scaling_max_iterations; ++iteration) { + compute_row_inf_norm( + op_problem_scaled_, temp_storage, temp_storage_bytes, row_inf_norm, stream_view_); + compute_row_integer_gcd( + op_problem_scaled_, temp_storage, temp_storage_bytes, row_integer_gcd, stream_view_); + + using row_stats_t = thrust::tuple; + auto row_norm_log2_stats = thrust::transform_reduce( + handle_ptr_->get_thrust_policy(), + row_inf_norm.begin(), + row_inf_norm.end(), + [] __device__(f_t row_norm) -> row_stats_t { + if (row_norm == f_t(0)) { + return {0.0, + 0.0, + std::numeric_limits::infinity(), + -std::numeric_limits::infinity()}; + } + const double row_log2 = log2(static_cast(row_norm)); + return {row_log2, 1.0, row_log2, row_log2}; + }, + row_stats_t{0.0, + 0.0, + std::numeric_limits::infinity(), + -std::numeric_limits::infinity()}, + [] __device__(row_stats_t a, row_stats_t b) -> row_stats_t { + return {thrust::get<0>(a) + thrust::get<0>(b), + thrust::get<1>(a) + thrust::get<1>(b), + min_op_t{}(thrust::get<2>(a), thrust::get<2>(b)), + max_op_t{}(thrust::get<3>(a), thrust::get<3>(b))}; + }); + const i_t active_row_count = static_cast(thrust::get<1>(row_norm_log2_stats)); + if (active_row_count == 0) { break; } + const double row_log2_spread = + thrust::get<3>(row_norm_log2_stats) - thrust::get<2>(row_norm_log2_stats); + if (iteration == 0 && row_log2_spread <= row_scaling_min_initial_log2_spread) { + CUOPT_LOG_INFO("MIP row scaling skipped: initial_log2_spread=%g threshold=%g", + row_log2_spread, + row_scaling_min_initial_log2_spread); + break; + } + if (std::isfinite(previous_row_log2_spread)) { + const double spread_improvement = previous_row_log2_spread - row_log2_spread; + if (spread_improvement <= + row_scaling_spread_rel_tol * std::max(1.0, previous_row_log2_spread)) { + break; + } + } + previous_row_log2_spread = row_log2_spread; + + thrust::transform(handle_ptr_->get_thrust_policy(), + thrust::make_zip_iterator(thrust::make_tuple( + constraint_lower_bounds.begin(), constraint_upper_bounds.begin())), + thrust::make_zip_iterator(thrust::make_tuple(constraint_lower_bounds.end(), + constraint_upper_bounds.end())), + row_rhs_magnitude.begin(), + [] __device__(auto row_bounds) -> f_t { + const f_t lower_bound = thrust::get<0>(row_bounds); + const f_t upper_bound = thrust::get<1>(row_bounds); + f_t rhs_norm = f_t(0); + if (isfinite(lower_bound)) { rhs_norm = raft::abs(lower_bound); } + if (isfinite(upper_bound)) { + const f_t upper_abs = raft::abs(upper_bound); + rhs_norm = upper_abs > rhs_norm ? upper_abs : rhs_norm; + } + return rhs_norm; + }); + + constexpr double neg_inf_sentinel = -1.0e300; + thrust::transform(handle_ptr_->get_thrust_policy(), + thrust::make_zip_iterator(thrust::make_tuple( + row_inf_norm.begin(), row_rhs_magnitude.begin(), row_skip_scaling.begin())), + thrust::make_zip_iterator(thrust::make_tuple( + row_inf_norm.end(), row_rhs_magnitude.end(), row_skip_scaling.end())), + ref_log2_values.begin(), + [] __device__(auto row_info) -> double { + const f_t row_norm = thrust::get<0>(row_info); + const f_t rhs_norm = thrust::get<1>(row_info); + const i_t is_big_m = thrust::get<2>(row_info); + if (is_big_m) { return -std::numeric_limits::infinity(); } + if (rhs_norm == f_t(0)) { return -std::numeric_limits::infinity(); } + if (row_norm <= f_t(0)) { return -std::numeric_limits::infinity(); } + return log2(static_cast(row_norm)); + }); + thrust::sort(handle_ptr_->get_thrust_policy(), ref_log2_values.begin(), ref_log2_values.end()); + auto valid_begin_iter = thrust::lower_bound(handle_ptr_->get_thrust_policy(), + ref_log2_values.begin(), + ref_log2_values.end(), + neg_inf_sentinel); + i_t n_invalid = static_cast(valid_begin_iter - ref_log2_values.begin()); + i_t valid_count = n_rows - n_invalid; + if (valid_count == 0) { break; } + i_t median_idx = n_invalid + valid_count / 2; + double h_median_log2; + RAFT_CUDA_TRY(cudaMemcpyAsync(&h_median_log2, + ref_log2_values.data() + median_idx, + sizeof(double), + cudaMemcpyDeviceToHost, + stream_view_)); + handle_ptr_->sync_stream(); + f_t target_norm = static_cast(exp2(h_median_log2)); + cuopt_assert(std::isfinite(static_cast(target_norm)), "target_norm must be finite"); + cuopt_assert(target_norm > f_t(0), "target_norm must be positive"); + + thrust::transform( + handle_ptr_->get_thrust_policy(), + thrust::make_zip_iterator(thrust::make_tuple(row_inf_norm.begin(), + row_skip_scaling.begin(), + row_integer_gcd.begin(), + cumulative_scaling.begin(), + row_rhs_magnitude.begin())), + thrust::make_zip_iterator(thrust::make_tuple(row_inf_norm.end(), + row_skip_scaling.end(), + row_integer_gcd.end(), + cumulative_scaling.end(), + row_rhs_magnitude.end())), + iteration_scaling.begin(), + [target_norm] __device__(auto row_info) -> f_t { + const f_t row_norm = thrust::get<0>(row_info); + const i_t is_big_m = thrust::get<1>(row_info); + const std::int64_t row_coeff_gcd = thrust::get<2>(row_info); + const f_t cum_scale = thrust::get<3>(row_info); + const f_t rhs_norm = thrust::get<4>(row_info); + if (row_norm == f_t(0)) { return f_t(1); } + if (rhs_norm == f_t(0)) { return f_t(1); } + + const f_t desired_scaling = target_norm / row_norm; + if (!isfinite(desired_scaling) || desired_scaling <= f_t(0)) { return f_t(1); } + + f_t min_scaling = is_big_m ? static_cast(row_scaling_big_m_soft_min_factor) + : static_cast(row_scaling_min_factor); + f_t max_scaling = is_big_m ? static_cast(row_scaling_big_m_soft_max_factor) + : static_cast(row_scaling_max_factor); + + if (!is_big_m && row_norm >= static_cast(big_m_abs_threshold)) { + if (max_scaling > f_t(1)) { max_scaling = f_t(1); } + } + + const f_t cum_lower = static_cast(cumulative_row_scaling_min) / cum_scale; + const f_t cum_upper = static_cast(cumulative_row_scaling_max) / cum_scale; + if (cum_lower > min_scaling) { min_scaling = cum_lower; } + if (cum_upper < max_scaling) { max_scaling = cum_upper; } + if (min_scaling > max_scaling) { return f_t(1); } + + f_t row_scaling = desired_scaling; + if (row_scaling < min_scaling) { row_scaling = min_scaling; } + if (row_scaling > max_scaling) { row_scaling = max_scaling; } + + // Fix E: prefer power-of-two scaling for integer rows (exact in IEEE 754) + if (row_coeff_gcd > std::int64_t{0}) { + const f_t gcd_value = static_cast(row_coeff_gcd); + if (isfinite(gcd_value) && gcd_value > f_t(0)) { + const double log2_scaling = log2(static_cast(row_scaling)); + int k_candidates[3] = {static_cast(round(log2_scaling)), + static_cast(floor(log2_scaling)), + static_cast(ceil(log2_scaling))}; + bool found_pow2 = false; + for (int ci = 0; ci < 3 && !found_pow2; ++ci) { + int k = k_candidates[ci]; + f_t pow2 = static_cast(exp2(static_cast(k))); + if (pow2 < min_scaling || pow2 > max_scaling) { continue; } + bool preserves = + (k >= 0) || (-k < 63 && (row_coeff_gcd % (std::int64_t{1} << (-k))) == 0); + if (preserves) { + row_scaling = pow2; + found_pow2 = true; + } + } + if (!found_pow2) { + std::int64_t min_mult = static_cast( + ceil(static_cast(min_scaling * gcd_value - + static_cast(integer_multiplier_rounding_tolerance)))); + std::int64_t max_mult = static_cast(floor( + static_cast(max_scaling * gcd_value + + static_cast(integer_multiplier_rounding_tolerance)))); + if (min_mult < std::int64_t{1}) { min_mult = std::int64_t{1}; } + if (max_mult < min_mult) { max_mult = min_mult; } + std::int64_t proj_mult = static_cast(round(row_scaling * gcd_value)); + if (proj_mult < min_mult) { proj_mult = min_mult; } + if (proj_mult > max_mult) { proj_mult = max_mult; } + row_scaling = static_cast(proj_mult) / gcd_value; + } + } + } + return row_scaling; + }); + + i_t scaled_rows = + thrust::count_if(handle_ptr_->get_thrust_policy(), + iteration_scaling.begin(), + iteration_scaling.end(), + [] __device__(f_t row_scale) -> bool { return row_scale != f_t(1); }); + CUOPT_LOG_INFO( + "MIP_SCALING_METRICS iteration=%d log2_spread=%g target_norm=%g scaled_rows=%d " + "valid_rows=%d", + iteration, + row_log2_spread, + static_cast(target_norm), + scaled_rows, + valid_count); + if (scaled_rows == 0) { break; } + + f_t predicted_max = thrust::inner_product(handle_ptr_->get_thrust_policy(), + row_inf_norm.begin(), + row_inf_norm.end(), + iteration_scaling.begin(), + f_t(0), + max_op_t{}, + thrust::multiplies{}); + if (predicted_max > original_max_coeff) { + CUOPT_LOG_INFO("MIP_SCALING magnitude guard: predicted_max=%g > original_max=%g, stopping", + static_cast(predicted_max), + static_cast(original_max_coeff)); + break; + } + + thrust::transform( + handle_ptr_->get_thrust_policy(), + matrix_values.begin(), + matrix_values.end(), + thrust::make_permutation_iterator(iteration_scaling.begin(), coefficient_row_index.begin()), + matrix_values.begin(), + thrust::multiplies{}); + + thrust::transform(handle_ptr_->get_thrust_policy(), + cumulative_scaling.begin(), + cumulative_scaling.end(), + iteration_scaling.begin(), + cumulative_scaling.begin(), + thrust::multiplies{}); + + thrust::transform(handle_ptr_->get_thrust_policy(), + constraint_lower_bounds.begin(), + constraint_lower_bounds.end(), + iteration_scaling.begin(), + constraint_lower_bounds.begin(), + thrust::multiplies{}); + thrust::transform(handle_ptr_->get_thrust_policy(), + constraint_upper_bounds.begin(), + constraint_upper_bounds.end(), + iteration_scaling.begin(), + constraint_upper_bounds.begin(), + thrust::multiplies{}); + if (constraint_bounds.size() == static_cast(n_rows)) { + thrust::transform(handle_ptr_->get_thrust_policy(), + constraint_bounds.begin(), + constraint_bounds.end(), + iteration_scaling.begin(), + constraint_bounds.begin(), + thrust::multiplies{}); + } + } + + CUOPT_LOG_INFO("MIP_SCALING_SUMMARY rows=%d bigm_rows=%d final_spread=%g", + n_rows, + big_m_rows, + previous_row_log2_spread); + + cuopt_func_call(assert_integer_coefficient_integrality( + op_problem_scaled_, temp_storage, temp_storage_bytes, pre_scaling_gcd, stream_view_)); + + const f_t post_max_coeff = thrust::transform_reduce(handle_ptr_->get_thrust_policy(), + matrix_values.begin(), + matrix_values.end(), + abs_value_transform_t{}, + f_t(0), + max_op_t{}); + const f_t post_min_nonzero_coeff = thrust::transform_reduce(handle_ptr_->get_thrust_policy(), + matrix_values.begin(), + matrix_values.end(), + nonzero_abs_or_inf_transform_t{}, + std::numeric_limits::infinity(), + min_op_t{}); + if (std::isfinite(static_cast(post_max_coeff)) && + std::isfinite(static_cast(post_min_nonzero_coeff)) && + post_min_nonzero_coeff > f_t(0)) { + const double post_ratio = + static_cast(post_max_coeff) / static_cast(post_min_nonzero_coeff); + if (post_ratio > post_scaling_max_ratio_warn) { + CUOPT_LOG_WARN( + "MIP row scaling: extreme coefficient ratio after scaling: max=%g min_nz=%g ratio=%g", + static_cast(post_max_coeff), + static_cast(post_min_nonzero_coeff), + post_ratio); + } + } + + CUOPT_LOG_INFO("MIP row scaling completed"); + op_problem_scaled_.print_scaling_information(); +} + +#define INSTANTIATE(F_TYPE) template class mip_scaling_strategy_t; + +#if MIP_INSTANTIATE_FLOAT +INSTANTIATE(float) +#endif + +#if MIP_INSTANTIATE_DOUBLE +INSTANTIATE(double) +#endif + +} // namespace cuopt::linear_programming::detail diff --git a/cpp/src/mip_heuristics/mip_scaling_strategy.cuh b/cpp/src/mip_heuristics/mip_scaling_strategy.cuh new file mode 100644 index 0000000000..63d88dbec6 --- /dev/null +++ b/cpp/src/mip_heuristics/mip_scaling_strategy.cuh @@ -0,0 +1,32 @@ +/* clang-format off */ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + */ +/* clang-format on */ + +#pragma once + +#include + +#include + +#include + +namespace cuopt::linear_programming::detail { + +template +class mip_scaling_strategy_t { + public: + using optimization_problem_type_t = cuopt::linear_programming::optimization_problem_t; + explicit mip_scaling_strategy_t(optimization_problem_type_t& op_problem_scaled); + + void scale_problem(bool scale_objective = true); + + private: + raft::handle_t const* handle_ptr_{nullptr}; + rmm::cuda_stream_view stream_view_; + optimization_problem_type_t& op_problem_scaled_; +}; + +} // namespace cuopt::linear_programming::detail diff --git a/cpp/src/mip_heuristics/presolve/third_party_presolve.cpp b/cpp/src/mip_heuristics/presolve/third_party_presolve.cpp index bf93b54e26..d94cf5aa67 100644 --- a/cpp/src/mip_heuristics/presolve/third_party_presolve.cpp +++ b/cpp/src/mip_heuristics/presolve/third_party_presolve.cpp @@ -739,7 +739,10 @@ third_party_presolve_result_t third_party_presolve_t::apply( auto opt_problem = build_optimization_problem( papilo_problem, op_problem.get_handle_ptr(), category, maximize_); + // metadata from original optimization problem that is not filled opt_problem.set_problem_name(op_problem.get_problem_name()); + opt_problem.set_objective_scaling_factor(op_problem.get_objective_scaling_factor()); + // when an objective offset outside (e.g. from mps file), handle accordingly auto col_flags = papilo_problem.getColFlags(); std::vector implied_integer_indices; for (size_t i = 0; i < col_flags.size(); i++) { diff --git a/cpp/src/mip_heuristics/problem/problem.cu b/cpp/src/mip_heuristics/problem/problem.cu index c03d9342d1..10d80586b4 100644 --- a/cpp/src/mip_heuristics/problem/problem.cu +++ b/cpp/src/mip_heuristics/problem/problem.cu @@ -206,6 +206,7 @@ problem_t::problem_t(const problem_t& problem_) clique_table(problem_.clique_table), vars_with_objective_coeffs(problem_.vars_with_objective_coeffs), expensive_to_fix_vars(problem_.expensive_to_fix_vars), + related_vars_time_limit(problem_.related_vars_time_limit), Q_offsets(problem_.Q_offsets), Q_indices(problem_.Q_indices), Q_values(problem_.Q_values) @@ -263,6 +264,7 @@ problem_t::problem_t(const problem_t& problem_, clique_table(problem_.clique_table), vars_with_objective_coeffs(problem_.vars_with_objective_coeffs), expensive_to_fix_vars(problem_.expensive_to_fix_vars), + related_vars_time_limit(problem_.related_vars_time_limit), Q_offsets(problem_.Q_offsets), Q_indices(problem_.Q_indices), Q_values(problem_.Q_values) @@ -363,6 +365,7 @@ problem_t::problem_t(const problem_t& problem_, bool no_deep fixing_helpers(problem_.fixing_helpers, handle_ptr), vars_with_objective_coeffs(problem_.vars_with_objective_coeffs), expensive_to_fix_vars(problem_.expensive_to_fix_vars), + related_vars_time_limit(problem_.related_vars_time_limit), Q_offsets(problem_.Q_offsets), Q_indices(problem_.Q_indices), Q_values(problem_.Q_values) @@ -805,8 +808,7 @@ void problem_t::recompute_auxilliary_data(bool check_representation) compute_binary_var_table(); compute_vars_with_objective_coeffs(); // TODO: speedup compute related variables - const double time_limit = 30.; - compute_related_variables(time_limit); + compute_related_variables(related_vars_time_limit); if (check_representation) cuopt_func_call(check_problem_representation(true)); } diff --git a/cpp/src/mip_heuristics/problem/problem.cuh b/cpp/src/mip_heuristics/problem/problem.cuh index 4c8bf3caad..a801cc4067 100644 --- a/cpp/src/mip_heuristics/problem/problem.cuh +++ b/cpp/src/mip_heuristics/problem/problem.cuh @@ -18,6 +18,7 @@ #include #include +#include #include #include "host_helper.cuh" #include "problem_fixing.cuh" @@ -249,8 +250,13 @@ class problem_t { rmm::device_uvector integer_fixed_variable_map; std::function&)> branch_and_bound_callback; - std::function&, const std::vector&, const std::vector&, f_t, f_t, i_t)> + std::function&, + const std::vector&, + const std::vector&, + f_t, + f_t, + i_t, + method_t)> set_root_relaxation_solution_callback; typename mip_solver_settings_t::tolerances_t tolerances{}; @@ -328,6 +334,7 @@ class problem_t { bool cutting_plane_added{false}; std::pair, std::vector> vars_with_objective_coeffs; bool expensive_to_fix_vars{false}; + double related_vars_time_limit{30.}; std::vector Q_offsets; std::vector Q_indices; std::vector Q_values; diff --git a/cpp/src/mip_heuristics/relaxed_lp/relaxed_lp.cu b/cpp/src/mip_heuristics/relaxed_lp/relaxed_lp.cu index e2bbc8feb1..84415f5372 100644 --- a/cpp/src/mip_heuristics/relaxed_lp/relaxed_lp.cu +++ b/cpp/src/mip_heuristics/relaxed_lp/relaxed_lp.cu @@ -8,7 +8,6 @@ #include "relaxed_lp.cuh" #include -#include #include #include #include diff --git a/cpp/src/mip_heuristics/solution/solution.cu b/cpp/src/mip_heuristics/solution/solution.cu index b5506024f9..09ff8be472 100644 --- a/cpp/src/mip_heuristics/solution/solution.cu +++ b/cpp/src/mip_heuristics/solution/solution.cu @@ -323,7 +323,7 @@ f_t solution_t::compute_l2_residual() } template -bool solution_t::compute_feasibility() +bool solution_t::compute_feasibility(bool consider_integrality) { n_feasible_constraints.set_value_to_zero_async(handle_ptr->get_stream()); compute_constraints(); @@ -331,7 +331,8 @@ bool solution_t::compute_feasibility() compute_infeasibility(); compute_number_of_integers(); i_t h_n_feas_constraints = n_feasible_constraints.value(handle_ptr->get_stream()); - is_feasible = h_n_feas_constraints == problem_ptr->n_constraints && test_number_all_integer(); + is_feasible = h_n_feas_constraints == problem_ptr->n_constraints; + if (consider_integrality) { is_feasible = is_feasible && test_number_all_integer(); } CUOPT_LOG_TRACE("is_feasible %d n_feasible_cstr %d all_cstr %d", is_feasible, h_n_feas_constraints, diff --git a/cpp/src/mip_heuristics/solution/solution.cuh b/cpp/src/mip_heuristics/solution/solution.cuh index f6c2c2f802..9de10ed980 100644 --- a/cpp/src/mip_heuristics/solution/solution.cuh +++ b/cpp/src/mip_heuristics/solution/solution.cuh @@ -58,7 +58,7 @@ class solution_t { // makes the approximate integer values up to INTEGRALITY TOLERANCE whole integers void correct_integer_precision(); // does a reduction and returns if the current solution is feasible - bool compute_feasibility(); + bool compute_feasibility(bool consider_integrality = true); // sets the is_feasible flag to 1 void set_feasible(); // sets the is_feasible flag to 0 diff --git a/cpp/src/mip_heuristics/solve.cu b/cpp/src/mip_heuristics/solve.cu index 8e9d2358e4..4e9cd6a2a5 100644 --- a/cpp/src/mip_heuristics/solve.cu +++ b/cpp/src/mip_heuristics/solve.cu @@ -11,13 +11,13 @@ #include #include #include +#include #include #include #include #include #include -#include #include #include #include @@ -87,12 +87,6 @@ mip_solution_t run_mip(detail::problem_t& problem, { try { raft::common::nvtx::range fun_scope("run_mip"); - auto constexpr const running_mip = true; - - // TODO ask Akif and Alice how was this passed down? - auto hyper_params = settings.hyper_params; - hyper_params.update_primal_weight_on_initial_solution = false; - hyper_params.update_step_size_on_initial_solution = true; if (settings.get_mip_callbacks().size() > 0) { auto callback_num_variables = problem.original_problem_ptr->get_n_variables(); if (problem.has_papilo_presolve_data()) { @@ -147,7 +141,7 @@ mip_solution_t run_mip(detail::problem_t& problem, } // problem contains unpreprocessed data detail::problem_t scaled_problem(problem); - + cuopt_func_call(auto saved_problem = scaled_problem); CUOPT_LOG_INFO("Objective offset %f scaling_factor %f", problem.presolve_data.objective_offset, problem.presolve_data.objective_scaling_factor); @@ -156,33 +150,12 @@ mip_solution_t run_mip(detail::problem_t& problem, "Size mismatch"); cuopt_assert(problem.original_problem_ptr->get_n_constraints() == scaled_problem.n_constraints, "Size mismatch"); - detail::pdlp_initial_scaling_strategy_t scaling( - scaled_problem.handle_ptr, - scaled_problem, - hyper_params.default_l_inf_ruiz_iterations, - (f_t)hyper_params.default_alpha_pock_chambolle_rescaling, - scaled_problem.reverse_coefficients, - scaled_problem.reverse_offsets, - scaled_problem.reverse_constraints, - nullptr, - hyper_params, - running_mip); - - cuopt_func_call(auto saved_problem = scaled_problem); - if (settings.mip_scaling) { - scaling.scale_problem(); - if (settings.initial_solutions.size() > 0) { - for (const auto& initial_solution : settings.initial_solutions) { - scaling.scale_primal(*initial_solution); - } - } - } // only call preprocess on scaled problem, so we can compute feasibility on the original problem scaled_problem.preprocess_problem(); - // cuopt_func_call((check_scaled_problem(scaled_problem, saved_problem))); + scaled_problem.related_vars_time_limit = settings.heuristic_params.related_vars_time_limit; detail::trivial_presolve(scaled_problem); - detail::mip_solver_t solver(scaled_problem, settings, scaling, timer); + detail::mip_solver_t solver(scaled_problem, settings, timer); // initial_cutoff is in user-space (representation-invariant). // It will be converted to the target solver-space at each consumption point. solver.context.initial_cutoff = initial_cutoff; @@ -228,22 +201,21 @@ mip_solution_t run_mip(detail::problem_t& problem, CUOPT_LOG_DEBUG("Started early CPUFJ on papilo-presolved problem during cuOpt presolve"); } - auto scaled_sol = solver.run_solver(); - bool is_feasible_before_scaling = scaled_sol.get_feasible(); - scaled_sol.problem_ptr = &problem; - - if (settings.mip_scaling) { scaling.unscale_solutions(scaled_sol); } + auto presolved_sol = solver.run_solver(); + bool is_feasible_on_presolved = presolved_sol.get_feasible(); + presolved_sol.problem_ptr = &problem; // at this point we need to compute the feasibility on the original problem not the presolved // one - bool is_feasible_after_unscaling = scaled_sol.compute_feasibility(); - if (!scaled_problem.empty && is_feasible_before_scaling != is_feasible_after_unscaling) { + bool is_feasible_on_original = presolved_sol.compute_feasibility(); + if (!scaled_problem.empty && is_feasible_on_presolved != is_feasible_on_original) { CUOPT_LOG_WARN( - "The feasibility does not match on scaled and unscaled problems. To overcome this issue, " + "The feasibility does not match on presolved and original problems. To overcome this " + "issue, " "please provide a more numerically stable problem."); } - auto sol = scaled_sol.get_solution( - is_feasible_before_scaling || is_feasible_after_unscaling, solver.get_solver_stats(), false); + auto sol = presolved_sol.get_solution( + is_feasible_on_presolved || is_feasible_on_original, solver.get_solver_stats(), false); int hidesol = std::getenv("CUOPT_MIP_HIDE_SOLUTION") ? atoi(std::getenv("CUOPT_MIP_HIDE_SOLUTION")) : 0; @@ -312,7 +284,10 @@ mip_solution_t solve_mip(optimization_problem_t& op_problem, } auto timer = timer_t(time_limit); - + if (settings.mip_scaling != CUOPT_MIP_SCALING_OFF) { + detail::mip_scaling_strategy_t scaling(op_problem); + scaling.scale_problem(settings.mip_scaling != CUOPT_MIP_SCALING_NO_OBJECTIVE); + } double presolve_time = 0.0; std::unique_ptr> presolver; std::optional> presolve_result_opt; @@ -387,7 +362,9 @@ mip_solution_t solve_mip(optimization_problem_t& op_problem, detail::sort_csr(op_problem); // allocate not more than 10% of the time limit to presolve. // Note that this is not the presolve time, but the time limit for presolve. - double presolve_time_limit = std::min(0.1 * time_limit, 60.0); + const auto& hp = settings.heuristic_params; + double presolve_time_limit = + std::min(hp.presolve_time_ratio * time_limit, hp.presolve_max_time); if (settings.determinism_mode == CUOPT_MODE_DETERMINISTIC) { presolve_time_limit = std::numeric_limits::infinity(); } diff --git a/cpp/src/mip_heuristics/solver.cu b/cpp/src/mip_heuristics/solver.cu index 5d2f043ee4..0bbf48d95e 100644 --- a/cpp/src/mip_heuristics/solver.cu +++ b/cpp/src/mip_heuristics/solver.cu @@ -42,14 +42,10 @@ static void init_handler(const raft::handle_t* handle_ptr) template mip_solver_t::mip_solver_t(const problem_t& op_problem, const mip_solver_settings_t& solver_settings, - pdlp_initial_scaling_strategy_t& scaling, timer_t timer) : op_problem_(op_problem), solver_settings_(solver_settings), - context(op_problem.handle_ptr, - const_cast*>(&op_problem), - solver_settings, - &scaling), + context(op_problem.handle_ptr, const_cast*>(&op_problem), solver_settings), timer_(timer) { init_handler(op_problem.handle_ptr); @@ -211,11 +207,14 @@ solution_t mip_solver_t::run_solver() f_t time_limit = context.settings.determinism_mode == CUOPT_MODE_DETERMINISTIC ? std::numeric_limits::infinity() : timer_.remaining_time(); - double presolve_time_limit = std::min(0.1 * time_limit, 60.0); + const auto& hp = context.settings.heuristic_params; + double presolve_time_limit = std::min(hp.presolve_time_ratio * time_limit, hp.presolve_max_time); presolve_time_limit = context.settings.determinism_mode == CUOPT_MODE_DETERMINISTIC ? std::numeric_limits::infinity() : presolve_time_limit; - bool presolve_success = run_presolve ? dm.run_presolve(presolve_time_limit, timer_) : true; + if (std::isfinite(presolve_time_limit)) + CUOPT_LOG_DEBUG("Presolve time limit: %g", presolve_time_limit); + bool presolve_success = run_presolve ? dm.run_presolve(presolve_time_limit, timer_) : true; // Stop early CPUFJ after cuopt presolve (probing cache) but before main solve if (context.early_cpufj_ptr) { @@ -347,12 +346,18 @@ solution_t mip_solver_t::run_solver() branch_and_bound_settings.clique_cuts = context.settings.clique_cuts; branch_and_bound_settings.strong_chvatal_gomory_cuts = context.settings.strong_chvatal_gomory_cuts; - branch_and_bound_settings.reduced_cost_strengthening = - context.settings.reduced_cost_strengthening; branch_and_bound_settings.cut_change_threshold = context.settings.cut_change_threshold; branch_and_bound_settings.cut_min_orthogonality = context.settings.cut_min_orthogonality; branch_and_bound_settings.mip_batch_pdlp_strong_branching = context.settings.mip_batch_pdlp_strong_branching; + branch_and_bound_settings.mip_batch_pdlp_reliability_branching = + context.settings.mip_batch_pdlp_reliability_branching; + + branch_and_bound_settings.strong_branching_simplex_iteration_limit = + context.settings.strong_branching_simplex_iteration_limit < 0 + ? 200 + : context.settings.strong_branching_simplex_iteration_limit; + branch_and_bound_settings.reduced_cost_strengthening = context.settings.reduced_cost_strengthening == -1 ? 2 @@ -441,7 +446,8 @@ solution_t mip_solver_t::run_solver() std::placeholders::_3, std::placeholders::_4, std::placeholders::_5, - std::placeholders::_6); + std::placeholders::_6, + std::placeholders::_7); if (timer_.check_time_limit()) { CUOPT_LOG_INFO("Time limit reached during B&B setup"); diff --git a/cpp/src/mip_heuristics/solver.cuh b/cpp/src/mip_heuristics/solver.cuh index 1b5fe17244..9b9024a1dc 100644 --- a/cpp/src/mip_heuristics/solver.cuh +++ b/cpp/src/mip_heuristics/solver.cuh @@ -20,7 +20,6 @@ class mip_solver_t { public: explicit mip_solver_t(const problem_t& op_problem, const mip_solver_settings_t& solver_settings, - pdlp_initial_scaling_strategy_t& scaling, timer_t timer); solution_t run_solver(); diff --git a/cpp/src/mip_heuristics/solver_context.cuh b/cpp/src/mip_heuristics/solver_context.cuh index 8fa852609b..3ea7377e15 100644 --- a/cpp/src/mip_heuristics/solver_context.cuh +++ b/cpp/src/mip_heuristics/solver_context.cuh @@ -9,7 +9,6 @@ #include #include -#include #include #include @@ -37,9 +36,8 @@ template struct mip_solver_context_t { explicit mip_solver_context_t(raft::handle_t const* handle_ptr_, problem_t* problem_ptr_, - mip_solver_settings_t settings_, - pdlp_initial_scaling_strategy_t* scaling) - : handle_ptr(handle_ptr_), problem_ptr(problem_ptr_), settings(settings_), scaling(scaling) + mip_solver_settings_t settings_) + : handle_ptr(handle_ptr_), problem_ptr(problem_ptr_), settings(settings_) { cuopt_assert(problem_ptr != nullptr, "problem_ptr is nullptr"); stats.set_solution_bound(problem_ptr->maximize ? std::numeric_limits::infinity() @@ -56,7 +54,6 @@ struct mip_solver_context_t { diversity_manager_t* diversity_manager_ptr{nullptr}; std::atomic preempt_heuristic_solver_ = false; const mip_solver_settings_t settings; - pdlp_initial_scaling_strategy_t* scaling; // nullptr when not available (early FJ) solver_stats_t stats; // Work limit context for tracking work units in deterministic mode (shared across all timers in // GPU heuristic loop) diff --git a/cpp/src/pdlp/pdlp.cu b/cpp/src/pdlp/pdlp.cu index 40d907e469..a759887fc5 100644 --- a/cpp/src/pdlp/pdlp.cu +++ b/cpp/src/pdlp/pdlp.cu @@ -501,8 +501,7 @@ std::optional> pdlp_solver_t } // Check for concurrent limit - if (settings_.method == method_t::Concurrent && settings_.concurrent_halt != nullptr && - *settings_.concurrent_halt == 1) { + if (settings_.concurrent_halt != nullptr && settings_.concurrent_halt->load() == 1) { #ifdef PDLP_VERBOSE_MODE RAFT_CUDA_TRY(cudaDeviceSynchronize()); std::cout << "Concurrent Limit reached, returning current solution" << std::endl; @@ -772,13 +771,34 @@ pdlp_solver_t::check_batch_termination(const timer_t& timer) if (current_termination_strategy_.is_done(term)) { std::cout << "[BATCH MODE]: Climber " << i << " is done with " << optimization_problem_solution_t::get_termination_status_string(term) - << " at step " << total_pdlp_iterations_ << ". It's original index is " + << " at step " << internal_solver_iterations_ << ". It's original index is " << climber_strategies_[i].original_index << std::endl; } } #endif - // All are optimal or infeasible + // Sync external solved status into internal termination strategy before all_done() check + if (sb_view_.is_valid()) { + for (size_t i = 0; i < climber_strategies_.size(); ++i) { + // If PDLP has solved it to optimality we want to keep it and resolved both solvers having + // solved the problem later + if (current_termination_strategy_.is_done( + current_termination_strategy_.get_termination_status(i))) + continue; + const i_t local_idx = climber_strategies_[i].original_index; + if (sb_view_.is_solved(local_idx)) { + current_termination_strategy_.set_termination_status( + i, pdlp_termination_status_t::ConcurrentLimit); +#ifdef BATCH_VERBOSE_MODE + std::cout << "[COOP SB] DS already solved climber " << i << " (original_index " << local_idx + << "), synced to ConcurrentLimit at step " << internal_solver_iterations_ + << std::endl; +#endif + } + } + } + + // All are optimal, infeasible, or externally solved if (current_termination_strategy_.all_done()) { const auto original_batch_size = settings_.new_bounds.size(); // Some climber got removed from the batch while the optimization was running @@ -821,10 +841,13 @@ pdlp_solver_t::check_batch_termination(const timer_t& timer) batch_solution_to_return_ .get_additional_termination_informations()[climber_strategies_[i].original_index] .total_number_of_attempted_steps = pdhg_solver_.get_total_pdhg_iterations(); - batch_solution_to_return_ - .get_additional_termination_informations()[climber_strategies_[i].original_index] - .solved_by_pdlp = (current_termination_strategy_.get_termination_status(i) != - pdlp_termination_status_t::ConcurrentLimit); + if (current_termination_strategy_.get_termination_status(i) != + pdlp_termination_status_t::ConcurrentLimit) { + batch_solution_to_return_ + .get_additional_termination_informations()[climber_strategies_[i].original_index] + .solved_by = method_t::PDLP; + } + if (sb_view_.is_valid()) { sb_view_.mark_solved(climber_strategies_[i].original_index); } } current_termination_strategy_.fill_gpu_terms_stats(total_pdlp_iterations_); RAFT_CUDA_TRY(cudaStreamSynchronize(stream_view_)); @@ -841,6 +864,11 @@ pdlp_solver_t::check_batch_termination(const timer_t& timer) std::move(batch_solution_to_return_.get_additional_termination_informations()), std::move(batch_solution_to_return_.get_terminations_status())}; } + if (sb_view_.is_valid()) { + for (size_t i = 0; i < climber_strategies_.size(); ++i) { + sb_view_.mark_solved(climber_strategies_[i].original_index); + } + } RAFT_CUDA_TRY(cudaStreamSynchronize(stream_view_)); return current_termination_strategy_.fill_return_problem_solution( internal_solver_iterations_, @@ -859,8 +887,11 @@ pdlp_solver_t::check_batch_termination(const timer_t& timer) current_termination_strategy_.get_termination_status(i))) { raft::common::nvtx::range fun_scope("remove_done_climber"); #ifdef BATCH_VERBOSE_MODE - std::cout << "Removing climber " << i << " because it is done. Its original index is " - << climber_strategies_[i].original_index << std::endl; + const bool externally_solved = (current_termination_strategy_.get_termination_status(i) == + pdlp_termination_status_t::ConcurrentLimit); + std::cout << "Removing climber " << i << " (original_index " + << climber_strategies_[i].original_index << ") because it is done" + << (externally_solved ? " [solved by DS]" : " [solved by PDLP]") << std::endl; #endif to_remove.emplace(i); // Copy current climber solution information @@ -889,10 +920,13 @@ pdlp_solver_t::check_batch_termination(const timer_t& timer) batch_solution_to_return_ .get_additional_termination_informations()[climber_strategies_[i].original_index] .total_number_of_attempted_steps = pdhg_solver_.get_total_pdhg_iterations(); - batch_solution_to_return_ - .get_additional_termination_informations()[climber_strategies_[i].original_index] - .solved_by_pdlp = (current_termination_strategy_.get_termination_status(i) != - pdlp_termination_status_t::ConcurrentLimit); + if (current_termination_strategy_.get_termination_status(i) != + pdlp_termination_status_t::ConcurrentLimit) { + batch_solution_to_return_ + .get_additional_termination_informations()[climber_strategies_[i].original_index] + .solved_by = method_t::PDLP; + } + if (sb_view_.is_valid()) { sb_view_.mark_solved(climber_strategies_[i].original_index); } } } if (to_remove.size() > 0) { @@ -966,7 +1000,7 @@ std::optional> pdlp_solver_t // To avoid that we allow at least two iterations at first before checking (in practice 0 wasn't // enough) We still need to check iteration and time limit prior without breaking the logic below // of first checking termination before the limit - if (total_pdlp_iterations_ <= 1) { + if (internal_solver_iterations_ <= 1) { print_termination_criteria(timer); return check_limits(timer); } @@ -1508,9 +1542,6 @@ HDI void fixed_error_computation(const f_t norm_squared_delta_primal, norm_squared_delta_primal * primal_weight + norm_squared_delta_dual / primal_weight; const f_t computed_interaction = f_t(2.0) * interaction * step_size; - cuopt_assert(movement + computed_interaction >= f_t(0.0), - "Movement + computed interaction must be >= 0"); - // Clamp to 0 to avoid NaN *fixed_point_error = cuda::std::sqrt(cuda::std::max(f_t(0.0), movement + computed_interaction)); @@ -1769,6 +1800,90 @@ void pdlp_solver_t::resize_and_swap_all_context_loop( pdhg_solver_.get_primal_tmp_resource().data(), CUSPARSE_ORDER_COL); + // Recalculate SpMM buffer sizes for the new batch dimensions. + // cuSparse may require different buffer sizes when the number of columns changes + // (e.g. SpMM with 1 column may internally fall back to SpMV with larger buffer needs). + { + size_t new_buf_size = 0; + + // PDHG row-row: A_T * batch_dual_solutions -> batch_current_AtYs + RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsespmm_bufferSize( + handle_ptr_->get_cusparse_handle(), + CUSPARSE_OPERATION_NON_TRANSPOSE, + CUSPARSE_OPERATION_NON_TRANSPOSE, + reusable_device_scalar_value_1_.data(), + pdhg_cusparse_view.A_T, + pdhg_cusparse_view.batch_dual_solutions, + reusable_device_scalar_value_0_.data(), + pdhg_cusparse_view.batch_current_AtYs, + (deterministic_batch_pdlp) ? CUSPARSE_SPMM_CSR_ALG3 : CUSPARSE_SPMM_CSR_ALG2, + &new_buf_size, + stream_view_)); + pdhg_cusparse_view.buffer_transpose_batch_row_row_.resize(new_buf_size, stream_view_); + + // PDHG row-row: A * batch_reflected_primal_solutions -> batch_dual_gradients + RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsespmm_bufferSize( + handle_ptr_->get_cusparse_handle(), + CUSPARSE_OPERATION_NON_TRANSPOSE, + CUSPARSE_OPERATION_NON_TRANSPOSE, + reusable_device_scalar_value_1_.data(), + pdhg_cusparse_view.A, + pdhg_cusparse_view.batch_reflected_primal_solutions, + reusable_device_scalar_value_0_.data(), + pdhg_cusparse_view.batch_dual_gradients, + (deterministic_batch_pdlp) ? CUSPARSE_SPMM_CSR_ALG3 : CUSPARSE_SPMM_CSR_ALG2, + &new_buf_size, + stream_view_)); + pdhg_cusparse_view.buffer_non_transpose_batch_row_row_.resize(new_buf_size, stream_view_); + + // Adaptive step size: A_T * batch_potential_next_dual_solution -> batch_next_AtYs + RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsespmm_bufferSize( + handle_ptr_->get_cusparse_handle(), + CUSPARSE_OPERATION_NON_TRANSPOSE, + CUSPARSE_OPERATION_NON_TRANSPOSE, + reusable_device_scalar_value_1_.data(), + pdhg_cusparse_view.A_T, + pdhg_cusparse_view.batch_potential_next_dual_solution, + reusable_device_scalar_value_0_.data(), + pdhg_cusparse_view.batch_next_AtYs, + CUSPARSE_SPMM_CSR_ALG3, + &new_buf_size, + stream_view_)); + pdhg_cusparse_view.buffer_transpose_batch.resize(new_buf_size, stream_view_); + + // Convergence info: A_T * batch_dual_solutions -> batch_tmp_primals + RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsespmm_bufferSize( + handle_ptr_->get_cusparse_handle(), + CUSPARSE_OPERATION_NON_TRANSPOSE, + CUSPARSE_OPERATION_NON_TRANSPOSE, + reusable_device_scalar_value_1_.data(), + current_op_problem_evaluation_cusparse_view_.A_T, + current_op_problem_evaluation_cusparse_view_.batch_dual_solutions, + reusable_device_scalar_value_0_.data(), + current_op_problem_evaluation_cusparse_view_.batch_tmp_primals, + CUSPARSE_SPMM_CSR_ALG3, + &new_buf_size, + stream_view_)); + current_op_problem_evaluation_cusparse_view_.buffer_transpose_batch.resize(new_buf_size, + stream_view_); + + // Convergence info: A * batch_primal_solutions -> batch_tmp_duals + RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsespmm_bufferSize( + handle_ptr_->get_cusparse_handle(), + CUSPARSE_OPERATION_NON_TRANSPOSE, + CUSPARSE_OPERATION_NON_TRANSPOSE, + reusable_device_scalar_value_1_.data(), + current_op_problem_evaluation_cusparse_view_.A, + current_op_problem_evaluation_cusparse_view_.batch_primal_solutions, + reusable_device_scalar_value_0_.data(), + current_op_problem_evaluation_cusparse_view_.batch_tmp_duals, + CUSPARSE_SPMM_CSR_ALG3, + &new_buf_size, + stream_view_)); + current_op_problem_evaluation_cusparse_view_.buffer_non_transpose_batch.resize(new_buf_size, + stream_view_); + } + // Rerun preprocess // PDHG SpMM preprocess @@ -2200,6 +2315,22 @@ optimization_problem_solution_t pdlp_solver_t::run_solver(co pdhg_solver_.total_pdhg_iterations_ = initial_k_.value(); pdhg_solver_.get_d_total_pdhg_iterations().set_value_async(initial_k_.value(), stream_view_); } + if (settings_.get_initial_pdlp_iteration().has_value()) { + total_pdlp_iterations_ = settings_.get_initial_pdlp_iteration().value(); + // This is meaningless in batch mode since pdhg step is never used, set it just to avoid + // assertions + pdhg_solver_.get_d_total_pdhg_iterations().set_value_async(total_pdlp_iterations_, + stream_view_); + pdhg_solver_.total_pdhg_iterations_ = total_pdlp_iterations_; + // Reset the fixed point error since at this pdlp iteration it is expected to already be + // initialized to some value + std::fill(restart_strategy_.initial_fixed_point_error_.begin(), + restart_strategy_.initial_fixed_point_error_.end(), + f_t(0.0)); + std::fill(restart_strategy_.fixed_point_error_.begin(), + restart_strategy_.fixed_point_error_.end(), + f_t(0.0)); + } // Only the primal_weight_ and step_size_ variables are initialized during the initial phase // The associated primal/dual step_size (computed using the two firstly mentionned) are not @@ -2321,13 +2452,6 @@ optimization_problem_solution_t pdlp_solver_t::run_solver(co bool warm_start_was_given = settings_.get_pdlp_warm_start_data().is_populated(); - // In batch mode, before running the solver, we need to transpose the primal and dual solution to - // row format - if (batch_mode_) - transpose_primal_dual_to_row(pdhg_solver_.get_potential_next_primal_solution(), - pdhg_solver_.get_potential_next_dual_solution(), - pdhg_solver_.get_dual_slack()); - if (!inside_mip_) { CUOPT_LOG_INFO( " Iter Primal Obj. Dual Obj. Gap Primal Res. Dual Res. Time"); @@ -2390,13 +2514,6 @@ optimization_problem_solution_t pdlp_solver_t::run_solver(co } } -#ifdef CUPDLP_DEBUG_MODE - print("before scale slack", pdhg_solver_.get_dual_slack()); - print("before scale potential next primal", - pdhg_solver_.get_potential_next_primal_solution()); - print("before scale potential next dual", pdhg_solver_.get_potential_next_dual_solution()); -#endif - // In case of batch mode, primal and dual matrices are in row format // We need to transpose them to column format before doing any checks if (batch_mode_) { @@ -2412,6 +2529,13 @@ optimization_problem_solution_t pdlp_solver_t::run_solver(co pdhg_solver_.get_primal_solution(), pdhg_solver_.get_dual_solution(), dummy); } +#ifdef CUPDLP_DEBUG_MODE + print("before scale slack", pdhg_solver_.get_dual_slack()); + print("before scale potential next primal", + pdhg_solver_.get_potential_next_primal_solution()); + print("before scale potential next dual", pdhg_solver_.get_potential_next_dual_solution()); +#endif + // We go back to the unscaled problem here. It ensures that we do not terminate 'too early' // because of the error margin being evaluated on the scaled problem diff --git a/cpp/src/pdlp/pdlp.cuh b/cpp/src/pdlp/pdlp.cuh index de0cf69c91..d03430f150 100644 --- a/cpp/src/pdlp/pdlp.cuh +++ b/cpp/src/pdlp/pdlp.cuh @@ -7,6 +7,7 @@ #pragma once +#include #include #include @@ -138,6 +139,8 @@ class pdlp_solver_t { rmm::cuda_stream_view stream_view_; // Intentionnaly take a copy to avoid an unintentional modification in the calling context const pdlp_solver_settings_t settings_; + dual_simplex::shared_strong_branching_context_view_t sb_view_{ + settings_.shared_sb_solved}; problem_t* problem_ptr; // Combined bounds in op_problem_scaled_ will only be scaled if diff --git a/cpp/src/pdlp/pdlp_constants.hpp b/cpp/src/pdlp/pdlp_constants.hpp index cf17cc985b..568d7d00b0 100644 --- a/cpp/src/pdlp/pdlp_constants.hpp +++ b/cpp/src/pdlp/pdlp_constants.hpp @@ -7,8 +7,6 @@ #pragma once -#include - #include namespace cuopt::linear_programming::detail { diff --git a/cpp/src/pdlp/restart_strategy/pdlp_restart_strategy.cu b/cpp/src/pdlp/restart_strategy/pdlp_restart_strategy.cu index 05da0350d0..821238fe84 100644 --- a/cpp/src/pdlp/restart_strategy/pdlp_restart_strategy.cu +++ b/cpp/src/pdlp/restart_strategy/pdlp_restart_strategy.cu @@ -693,6 +693,12 @@ void pdlp_restart_strategy_t::should_cupdlpx_restart(i_t total_number_ { std::fill(should_restart.begin(), should_restart.end(), 0); +#ifdef CUPDLP_DEBUG_MODE + // Print the current stats of initial fixed point error and fixed point error + print("initial_fixed_point_error", initial_fixed_point_error_); + print("fixed_point_error", fixed_point_error_); +#endif + if (total_number_of_iterations == hyper_params_.major_iteration) { #ifdef CUPDLP_DEBUG_MODE printf("forced restart at first major\n"); diff --git a/cpp/src/pdlp/solution_conversion.cu b/cpp/src/pdlp/solution_conversion.cu index 7993445a08..8ec3c20b27 100644 --- a/cpp/src/pdlp/solution_conversion.cu +++ b/cpp/src/pdlp/solution_conversion.cu @@ -95,7 +95,7 @@ cuopt::cython::linear_programming_ret_t gpu_lp_solution_t::to_linear_p ret.gap_ = term_info.gap; ret.nb_iterations_ = term_info.number_of_steps_taken; ret.solve_time_ = term_info.solve_time; - ret.solved_by_pdlp_ = term_info.solved_by_pdlp; + ret.solved_by_ = term_info.solved_by; } return ret; @@ -181,7 +181,7 @@ cpu_lp_solution_t::to_cpu_linear_programming_ret_t() ret.gap_ = gap_; ret.nb_iterations_ = num_iterations_; ret.solve_time_ = solve_time_; - ret.solved_by_pdlp_ = solved_by_pdlp_; + ret.solved_by_ = solved_by_; return ret; } diff --git a/cpp/src/pdlp/solve.cu b/cpp/src/pdlp/solve.cu index b8d714a663..29a7f32db6 100644 --- a/cpp/src/pdlp/solve.cu +++ b/cpp/src/pdlp/solve.cu @@ -351,7 +351,7 @@ optimization_problem_solution_t convert_dual_simplex_sol( f_t duration, f_t norm_user_objective, f_t norm_rhs, - i_t method) + method_t method) { auto to_termination_status = [](dual_simplex::lp_status_t status) { switch (status) { @@ -389,7 +389,7 @@ optimization_problem_solution_t convert_dual_simplex_sol( std::vector< typename optimization_problem_solution_t::additional_termination_information_t> info(1); - info[0].solved_by_pdlp = false; + info[0].solved_by = method; info[0].primal_objective = solution.user_objective; info[0].dual_objective = solution.user_objective; info[0].gap = 0.0; @@ -420,7 +420,7 @@ optimization_problem_solution_t convert_dual_simplex_sol( termination_status != pdlp_termination_status_t::TimeLimit && termination_status != pdlp_termination_status_t::ConcurrentLimit) { CUOPT_LOG_INFO("%s Solve status %s", - method == 0 ? "Dual Simplex" : "Barrier", + method == method_t::DualSimplex ? "Dual Simplex" : "Barrier", sol.get_termination_status_string().c_str()); } @@ -494,7 +494,7 @@ optimization_problem_solution_t run_barrier( std::get<2>(sol_dual_simplex), std::get<3>(sol_dual_simplex), std::get<4>(sol_dual_simplex), - 1); + method_t::Barrier); } template @@ -568,7 +568,7 @@ optimization_problem_solution_t run_dual_simplex( std::get<2>(sol_dual_simplex), std::get<3>(sol_dual_simplex), std::get<4>(sol_dual_simplex), - 0); + method_t::DualSimplex); } #if PDLP_INSTANTIATE_FLOAT || CUOPT_INSTANTIATE_FLOAT @@ -670,7 +670,7 @@ static optimization_problem_solution_t run_pdlp_solver_in_fp32( di.max_dual_ray_infeasibility = static_cast(fi.max_dual_ray_infeasibility); di.dual_ray_linear_objective = static_cast(fi.dual_ray_linear_objective); di.solve_time = fi.solve_time; - di.solved_by_pdlp = fi.solved_by_pdlp; + di.solved_by = fi.solved_by; term_infos.push_back(di); } @@ -845,17 +845,18 @@ optimization_problem_solution_t run_pdlp(detail::problem_t& return sol; } +// Compute in double as some cases overflow when using size_t template -static size_t batch_pdlp_memory_estimator(const optimization_problem_t& problem, - int trial_batch_size, - int max_batch_size) +static double batch_pdlp_memory_estimator(const optimization_problem_t& problem, + double trial_batch_size, + bool collect_solutions = false) { - size_t total_memory = 0; + double total_memory = 0.0; // In PDLP we store the scaled version of the problem which contains all of those total_memory += problem.get_constraint_matrix_indices().size() * sizeof(i_t); total_memory += problem.get_constraint_matrix_offsets().size() * sizeof(i_t); total_memory += problem.get_constraint_matrix_values().size() * sizeof(f_t); - total_memory *= 2; // To account for the A_t matrix + total_memory *= 2.0; // To account for the A_t matrix total_memory += problem.get_objective_coefficients().size() * sizeof(f_t); total_memory += problem.get_constraint_bounds().size() * sizeof(f_t); total_memory += problem.get_variable_lower_bounds().size() * sizeof(f_t); @@ -893,14 +894,16 @@ static size_t batch_pdlp_memory_estimator(const optimization_problem_t total_memory += trial_batch_size * problem.get_n_variables() * sizeof(f_t); total_memory += trial_batch_size * problem.get_n_constraints() * sizeof(f_t); - // Data for the solution - total_memory += problem.get_n_variables() * max_batch_size * sizeof(f_t); - total_memory += problem.get_n_constraints() * max_batch_size * sizeof(f_t); - total_memory += problem.get_n_variables() * max_batch_size * sizeof(f_t); + // Data for the solution (only allocated when collect_solutions is true) + if (collect_solutions) { + total_memory += problem.get_n_variables() * trial_batch_size * sizeof(f_t); + total_memory += problem.get_n_constraints() * trial_batch_size * sizeof(f_t); + total_memory += problem.get_n_variables() * trial_batch_size * sizeof(f_t); + } - // Add a 50% overhead to make sure we have enough memory considering other parts of the solver may - // allocate at the same time - total_memory *= 1.5; + // Add a 70% overhead to make sure we have enough memory considering other parts of the solver may + // need memory later while the batch PDLP is running + total_memory *= 1.7; // Data from saddle point state return total_memory; @@ -911,125 +914,168 @@ optimization_problem_solution_t run_batch_pdlp( optimization_problem_t& problem, pdlp_solver_settings_t const& settings) { // Hyper parameter than can be changed, I have put what I believe to be the best - bool primal_dual_init = true; - bool primal_weight_init = true; - bool use_optimal_batch_size = false; - constexpr int iteration_limit = 100000; - // Shouldn't we work on the unpresolved and/or unscaled problem for PDLP? - // Shouldn't we put an iteration limit? If yes what should we do with the partial solutions? + constexpr bool pdlp_primal_dual_init = true; + constexpr bool primal_weight_init = true; + constexpr bool use_initial_pdlp_iterations = true; + bool use_optimal_batch_size = false; + constexpr int batch_iteration_limit = 100000; + constexpr f_t pdlp_tolerance = 1e-5; rmm::cuda_stream_view stream = problem.get_handle_ptr()->get_stream(); rmm::device_uvector initial_primal(0, stream); rmm::device_uvector initial_dual(0, stream); - f_t initial_step_size = std::numeric_limits::signaling_NaN(); - f_t initial_primal_weight = std::numeric_limits::signaling_NaN(); + f_t initial_step_size = std::numeric_limits::signaling_NaN(); + f_t initial_primal_weight = std::numeric_limits::signaling_NaN(); + i_t initial_pdlp_iteration = -1; cuopt_assert(settings.new_bounds.size() > 0, "Batch size should be greater than 0"); - const int max_batch_size = settings.new_bounds.size(); - int memory_max_batch_size = max_batch_size; + const size_t max_batch_size = settings.new_bounds.size(); + size_t memory_max_batch_size = max_batch_size; // Check if we don't hit the limit using max_batch_size - const size_t memory_estimate = - batch_pdlp_memory_estimator(problem, max_batch_size, max_batch_size); - size_t free_mem, total_mem; - RAFT_CUDA_TRY(cudaMemGetInfo(&free_mem, &total_mem)); + const bool collect_solutions = settings.generate_batch_primal_dual_solution; + const double memory_estimate = + batch_pdlp_memory_estimator(problem, max_batch_size, collect_solutions); + size_t st_free_mem, st_total_mem; + RAFT_CUDA_TRY(cudaMemGetInfo(&st_free_mem, &st_total_mem)); + const double free_mem = static_cast(st_free_mem); + const double total_mem = static_cast(st_total_mem); + +#ifdef BATCH_VERBOSE_MODE + std::cout << "Memory estimate: " << memory_estimate << std::endl; + std::cout << "Free memory: " << free_mem << std::endl; + std::cout << "Total memory: " << total_mem << std::endl; +#endif if (memory_estimate > free_mem) { use_optimal_batch_size = true; // Decrement batch size iteratively until we find a batch size that fits while (memory_max_batch_size > 1) { - const size_t memory_estimate = - batch_pdlp_memory_estimator(problem, memory_max_batch_size, max_batch_size); + const double memory_estimate = + batch_pdlp_memory_estimator(problem, memory_max_batch_size, collect_solutions); if (memory_estimate <= free_mem) { break; } +#ifdef BATCH_VERBOSE_MODE + std::cout << "Memory estimate: " << memory_estimate << std::endl; + std::cout << "Memory max batch size: " << memory_max_batch_size << std::endl; + std::cout << "Free memory: " << free_mem << std::endl; + std::cout << "Total memory: " << total_mem << std::endl; + std::cout << "--------------------------------" << std::endl; +#endif memory_max_batch_size--; } - const size_t min_estimate = - batch_pdlp_memory_estimator(problem, memory_max_batch_size, max_batch_size); - cuopt_expects(min_estimate <= free_mem, - error_type_t::OutOfMemoryError, - "Insufficient GPU memory for batch PDLP (min batch size still too large)"); + const double min_estimate = + batch_pdlp_memory_estimator(problem, memory_max_batch_size, collect_solutions); + if (min_estimate > free_mem) { + return optimization_problem_solution_t(pdlp_termination_status_t::NumericalError, + stream); + } } - int optimal_batch_size = use_optimal_batch_size - ? detail::optimal_batch_size_handler(problem, memory_max_batch_size) - : max_batch_size; + size_t optimal_batch_size = use_optimal_batch_size + ? detail::optimal_batch_size_handler(problem, memory_max_batch_size) + : max_batch_size; + if (settings.sub_batch_size > 0) { optimal_batch_size = settings.sub_batch_size; } cuopt_assert(optimal_batch_size != 0 && optimal_batch_size <= max_batch_size, "Optimal batch size should be between 1 and max batch size"); - using f_t2 = typename type_2::type; - - // If need warm start, solve the LP alone - if (primal_dual_init || primal_weight_init) { - pdlp_solver_settings_t warm_start_settings = settings; - warm_start_settings.new_bounds.clear(); - warm_start_settings.method = cuopt::linear_programming::method_t::PDLP; - warm_start_settings.presolver = cuopt::linear_programming::presolver_t::None; - warm_start_settings.pdlp_solver_mode = pdlp_solver_mode_t::Stable3; - warm_start_settings.detect_infeasibility = false; - warm_start_settings.iteration_limit = iteration_limit; - warm_start_settings.inside_mip = true; - optimization_problem_solution_t original_solution = - solve_lp(problem, warm_start_settings); - if (primal_dual_init) { - initial_primal = rmm::device_uvector(original_solution.get_primal_solution(), - original_solution.get_primal_solution().stream()); - initial_dual = rmm::device_uvector(original_solution.get_dual_solution(), - original_solution.get_dual_solution().stream()); - initial_step_size = original_solution.get_pdlp_warm_start_data().initial_step_size_; + + const bool warm_start_from_settings = settings.has_initial_primal_solution() || + settings.has_initial_dual_solution() || + settings.get_initial_step_size().has_value() || + settings.get_initial_primal_weight().has_value() || + settings.get_initial_pdlp_iteration().has_value(); + + if (warm_start_from_settings) { +#ifdef BATCH_VERBOSE_MODE + std::cout << "Using warm start from settings" << std::endl; +#endif + if (settings.has_initial_primal_solution() && pdlp_primal_dual_init) { + initial_primal = rmm::device_uvector(settings.get_initial_primal_solution(), + settings.get_initial_primal_solution().stream()); + } + if (settings.has_initial_dual_solution() && pdlp_primal_dual_init) { + initial_dual = rmm::device_uvector(settings.get_initial_dual_solution(), + settings.get_initial_dual_solution().stream()); + } + if (settings.get_initial_step_size().has_value() && pdlp_primal_dual_init) { + initial_step_size = *settings.get_initial_step_size(); } - if (primal_weight_init) { - initial_primal_weight = original_solution.get_pdlp_warm_start_data().initial_primal_weight_; + if (settings.get_initial_primal_weight().has_value() && primal_weight_init) { + initial_primal_weight = *settings.get_initial_primal_weight(); + } + if (settings.get_initial_pdlp_iteration().has_value() && use_initial_pdlp_iterations) { + initial_pdlp_iteration = *settings.get_initial_pdlp_iteration(); } } - rmm::device_uvector full_primal_solution(problem.get_n_variables() * max_batch_size, stream); - rmm::device_uvector full_dual_solution(problem.get_n_constraints() * max_batch_size, stream); - rmm::device_uvector full_reduced_cost(problem.get_n_variables() * max_batch_size, stream); + rmm::device_uvector full_primal_solution( + (collect_solutions) ? problem.get_n_variables() * max_batch_size : 0, stream); + rmm::device_uvector full_dual_solution( + (collect_solutions) ? problem.get_n_constraints() * max_batch_size : 0, stream); + rmm::device_uvector full_reduced_cost( + (collect_solutions) ? problem.get_n_variables() * max_batch_size : 0, stream); std::vector< typename optimization_problem_solution_t::additional_termination_information_t> full_info; std::vector full_status; - pdlp_solver_settings_t batch_settings = settings; - const auto original_new_bounds = batch_settings.new_bounds; - batch_settings.method = cuopt::linear_programming::method_t::PDLP; - batch_settings.presolver = presolver_t::None; - batch_settings.pdlp_solver_mode = pdlp_solver_mode_t::Stable3; - batch_settings.detect_infeasibility = false; - batch_settings.iteration_limit = iteration_limit; - batch_settings.inside_mip = true; - if (primal_dual_init) { + pdlp_solver_settings_t batch_settings = settings; + const auto original_new_bounds = batch_settings.new_bounds; + batch_settings.method = cuopt::linear_programming::method_t::PDLP; + batch_settings.presolver = presolver_t::None; + batch_settings.pdlp_solver_mode = pdlp_solver_mode_t::Stable3; + batch_settings.detect_infeasibility = false; + batch_settings.iteration_limit = batch_iteration_limit; + batch_settings.inside_mip = true; + batch_settings.tolerances.absolute_dual_tolerance = pdlp_tolerance; + batch_settings.tolerances.relative_dual_tolerance = pdlp_tolerance; + batch_settings.tolerances.absolute_primal_tolerance = pdlp_tolerance; + batch_settings.tolerances.relative_primal_tolerance = pdlp_tolerance; + batch_settings.tolerances.absolute_gap_tolerance = pdlp_tolerance; + batch_settings.tolerances.relative_gap_tolerance = pdlp_tolerance; + if (initial_primal.size() > 0) { batch_settings.set_initial_primal_solution( initial_primal.data(), initial_primal.size(), initial_primal.stream()); + } + if (initial_dual.size() > 0) { batch_settings.set_initial_dual_solution( initial_dual.data(), initial_dual.size(), initial_dual.stream()); - batch_settings.set_initial_step_size(initial_step_size); } - if (primal_weight_init) { batch_settings.set_initial_primal_weight(initial_primal_weight); } + if (!std::isnan(initial_step_size)) { batch_settings.set_initial_step_size(initial_step_size); } + if (initial_pdlp_iteration != -1) { + batch_settings.set_initial_pdlp_iteration(initial_pdlp_iteration); + } + if (!std::isnan(initial_primal_weight)) { + batch_settings.set_initial_primal_weight(initial_primal_weight); + } - for (int i = 0; i < max_batch_size; i += optimal_batch_size) { - const int current_batch_size = std::min(optimal_batch_size, max_batch_size - i); + for (size_t i = 0; i < max_batch_size; i += optimal_batch_size) { + const size_t current_batch_size = std::min(optimal_batch_size, max_batch_size - i); // Only take the new bounds from [i, i + current_batch_size) batch_settings.new_bounds = std::vector>( original_new_bounds.begin() + i, original_new_bounds.begin() + i + current_batch_size); - auto sol = solve_lp(problem, batch_settings); + if (!settings.shared_sb_solved.empty()) { + batch_settings.shared_sb_solved = settings.shared_sb_solved.subspan(i, current_batch_size); + } - // Copy results - raft::copy(full_primal_solution.data() + i * problem.get_n_variables(), - sol.get_primal_solution().data(), - problem.get_n_variables() * current_batch_size, - stream); - raft::copy(full_dual_solution.data() + i * problem.get_n_constraints(), - sol.get_dual_solution().data(), - problem.get_n_constraints() * current_batch_size, - stream); - raft::copy(full_reduced_cost.data() + i * problem.get_n_variables(), - sol.get_reduced_cost().data(), - problem.get_n_variables() * current_batch_size, - stream); + auto sol = solve_lp(problem, batch_settings); + if (collect_solutions) { + raft::copy(full_primal_solution.data() + i * problem.get_n_variables(), + sol.get_primal_solution().data(), + sol.get_primal_solution().size(), + stream); + raft::copy(full_dual_solution.data() + i * problem.get_n_constraints(), + sol.get_dual_solution().data(), + sol.get_dual_solution().size(), + stream); + raft::copy(full_reduced_cost.data() + i * problem.get_n_variables(), + sol.get_reduced_cost().data(), + sol.get_reduced_cost().size(), + stream); + } auto info = sol.get_additional_termination_informations(); full_info.insert(full_info.end(), info.begin(), info.end()); @@ -1210,7 +1256,7 @@ optimization_problem_solution_t run_concurrent( std::get<2>(*sol_dual_simplex_ptr), std::get<3>(*sol_dual_simplex_ptr), std::get<4>(*sol_dual_simplex_ptr), - 0) + method_t::DualSimplex) : optimization_problem_solution_t{pdlp_termination_status_t::ConcurrentLimit, problem.handle_ptr->get_stream()}; @@ -1221,7 +1267,7 @@ optimization_problem_solution_t run_concurrent( std::get<2>(*sol_barrier_ptr), std::get<3>(*sol_barrier_ptr), std::get<4>(*sol_barrier_ptr), - 1); + method_t::Barrier); f_t end_time = timer.elapsed_time(); CUOPT_LOG_CONDITIONAL_INFO(!settings.inside_mip, "Concurrent time: %.3fs", end_time); diff --git a/cpp/src/pdlp/solver_settings.cu b/cpp/src/pdlp/solver_settings.cu index 7acfc7481c..ac2564bb16 100644 --- a/cpp/src/pdlp/solver_settings.cu +++ b/cpp/src/pdlp/solver_settings.cu @@ -61,12 +61,30 @@ void pdlp_solver_settings_t::set_initial_dual_solution(const f_t* init template void pdlp_solver_settings_t::set_initial_step_size(f_t initial_step_size) { + cuopt_expects(initial_step_size > f_t(0), + error_type_t::ValidationError, + "Initial step size must be greater than 0"); + cuopt_expects(!std::isinf(initial_step_size), + error_type_t::ValidationError, + "Initial step size must be finite"); + cuopt_expects(!std::isnan(initial_step_size), + error_type_t::ValidationError, + "Initial step size must be a number"); initial_step_size_ = std::make_optional(initial_step_size); } template void pdlp_solver_settings_t::set_initial_primal_weight(f_t initial_primal_weight) { + cuopt_expects(initial_primal_weight > f_t(0), + error_type_t::ValidationError, + "Initial primal weight must be greater than 0"); + cuopt_expects(!std::isinf(initial_primal_weight), + error_type_t::ValidationError, + "Initial primal weight must be finite"); + cuopt_expects(!std::isnan(initial_primal_weight), + error_type_t::ValidationError, + "Initial primal weight must be a number"); initial_primal_weight_ = std::make_optional(initial_primal_weight); } @@ -348,6 +366,21 @@ std::optional pdlp_solver_settings_t::get_initial_primal_weight() return initial_primal_weight_; } +template +void pdlp_solver_settings_t::set_initial_pdlp_iteration(i_t initial_pdlp_iteration) +{ + cuopt_expects(initial_pdlp_iteration >= 0, + error_type_t::ValidationError, + "Initial pdlp iteration must be greater than or equal to 0"); + initial_pdlp_iteration_ = std::make_optional(initial_pdlp_iteration); +} + +template +std::optional pdlp_solver_settings_t::get_initial_pdlp_iteration() const +{ + return initial_pdlp_iteration_; +} + template const pdlp_warm_start_data_t& pdlp_solver_settings_t::get_pdlp_warm_start_data() const noexcept diff --git a/cpp/src/pdlp/solver_solution.cu b/cpp/src/pdlp/solver_solution.cu index ba68d20740..ec0492dac3 100644 --- a/cpp/src/pdlp/solver_solution.cu +++ b/cpp/src/pdlp/solver_solution.cu @@ -168,12 +168,12 @@ void optimization_problem_solution_t::write_additional_termination_sta myfile << "\t\"Additional termination information\" : { " << std::endl; myfile << "\t\"Number of steps taken\" : " << termination_stats.number_of_steps_taken << "," << std::endl; - if (termination_stats.solved_by_pdlp) { + if (termination_stats.solved_by == method_t::PDLP) { myfile << "\t\"Total number of attempted steps\" : " << termination_stats.total_number_of_attempted_steps << "," << std::endl; } myfile << "\t\"Total solve time\" : " << termination_stats.solve_time; - if (termination_stats.solved_by_pdlp) { + if (termination_stats.solved_by == method_t::PDLP) { myfile << "," << std::endl; myfile << "\t\t\"Convergence measures\" : { " << std::endl; myfile << "\t\t\t\"Absolute primal residual\" : " << termination_stats.l2_primal_residual << "," diff --git a/cpp/src/pdlp/termination_strategy/termination_strategy.cu b/cpp/src/pdlp/termination_strategy/termination_strategy.cu index 7179df6a49..5a621daaef 100644 --- a/cpp/src/pdlp/termination_strategy/termination_strategy.cu +++ b/cpp/src/pdlp/termination_strategy/termination_strategy.cu @@ -124,6 +124,14 @@ pdlp_termination_status_t pdlp_termination_strategy_t::get_termination return (pdlp_termination_status_t)termination_status_[id]; } +template +void pdlp_termination_strategy_t::set_termination_status(i_t id, + pdlp_termination_status_t status) +{ + cuopt_assert(id < termination_status_.size(), "id too big for batch size"); + termination_status_[id] = (i_t)status; +} + template std::vector pdlp_termination_strategy_t::get_terminations_status() @@ -389,7 +397,8 @@ __host__ __device__ bool pdlp_termination_strategy_t::is_done( { return termination_status == pdlp_termination_status_t::Optimal || termination_status == pdlp_termination_status_t::PrimalInfeasible || - termination_status == pdlp_termination_status_t::DualInfeasible; + termination_status == pdlp_termination_status_t::DualInfeasible || + termination_status == pdlp_termination_status_t::ConcurrentLimit; } template @@ -596,8 +605,10 @@ pdlp_termination_strategy_t::fill_return_problem_solution( &infeasibility_information_view.dual_ray_linear_objective[i], 1, stream_view_); - term_stats_vector[i].solved_by_pdlp = - (termination_status[i] != pdlp_termination_status_t::ConcurrentLimit); + + if (termination_status[i] != pdlp_termination_status_t::ConcurrentLimit) { + term_stats_vector[i].solved_by = method_t::PDLP; + } } RAFT_CUDA_TRY(cudaStreamSynchronize(stream_view_)); diff --git a/cpp/src/pdlp/termination_strategy/termination_strategy.hpp b/cpp/src/pdlp/termination_strategy/termination_strategy.hpp index 6fe118c488..efb7a41d7b 100644 --- a/cpp/src/pdlp/termination_strategy/termination_strategy.hpp +++ b/cpp/src/pdlp/termination_strategy/termination_strategy.hpp @@ -140,6 +140,7 @@ class pdlp_termination_strategy_t { f_t get_relative_primal_tolerance_factor() const; pdlp_termination_status_t get_termination_status(i_t id) const; + void set_termination_status(i_t id, pdlp_termination_status_t status); std::vector get_terminations_status(); bool all_optimal_status() const; bool all_done() const; diff --git a/cpp/src/pdlp/utilities/ping_pong_graph.cu b/cpp/src/pdlp/utilities/ping_pong_graph.cu index 4ec5bff8c1..0df3861b5a 100644 --- a/cpp/src/pdlp/utilities/ping_pong_graph.cu +++ b/cpp/src/pdlp/utilities/ping_pong_graph.cu @@ -8,6 +8,7 @@ #include #include +#include #include diff --git a/cpp/tests/linear_programming/c_api_tests/c_api_test.c b/cpp/tests/linear_programming/c_api_tests/c_api_test.c index 689c8ed742..639aa8c379 100644 --- a/cpp/tests/linear_programming/c_api_tests/c_api_test.c +++ b/cpp/tests/linear_programming/c_api_tests/c_api_test.c @@ -18,7 +18,6 @@ #error "This file must be compiled as C code" #endif - int check_problem(cuOptOptimizationProblem problem, cuopt_int_t num_constraints, cuopt_int_t num_variables, @@ -58,27 +57,24 @@ const char* termination_status_to_string(cuopt_int_t termination_status) return "Unknown"; } +int test_int_size() { return cuOptGetIntSize(); } -int test_int_size() { - return cuOptGetIntSize(); -} - -int test_float_size() { - return cuOptGetFloatSize(); -} +int test_float_size() { return cuOptGetFloatSize(); } -cuopt_int_t test_missing_file() { +cuopt_int_t test_missing_file() +{ cuOptOptimizationProblem problem = NULL; - cuOptSolverSettings settings = NULL; - cuOptSolution solution = NULL; - cuopt_int_t status = cuOptReadProblem("missing_file.mps", &problem); + cuOptSolverSettings settings = NULL; + cuOptSolution solution = NULL; + cuopt_int_t status = cuOptReadProblem("missing_file.mps", &problem); cuOptDestroyProblem(&problem); cuOptDestroySolverSettings(&settings); cuOptDestroySolution(&solution); return status; } -cuopt_int_t test_bad_parameter_name() { +cuopt_int_t test_bad_parameter_name() +{ cuOptSolverSettings settings = NULL; cuopt_int_t status; cuopt_int_t value; @@ -152,16 +148,13 @@ static void mip_get_solution_callback(const cuopt_float_t* solution, if (context == NULL) { return; } context->get_calls += 1; if (context->last_solution == NULL) { - context->last_solution = - (cuopt_float_t*)malloc(context->n_variables * sizeof(cuopt_float_t)); + context->last_solution = (cuopt_float_t*)malloc(context->n_variables * sizeof(cuopt_float_t)); if (context->last_solution == NULL) { context->error = 1; return; } } - memcpy(context->last_solution, - solution, - context->n_variables * sizeof(cuopt_float_t)); + memcpy(context->last_solution, solution, context->n_variables * sizeof(cuopt_float_t)); memcpy(&context->last_objective, objective_value, sizeof(cuopt_float_t)); memcpy(&context->last_solution_bound, solution_bound, sizeof(cuopt_float_t)); } @@ -176,18 +169,16 @@ static void mip_set_solution_callback(cuopt_float_t* solution, context->set_calls += 1; memcpy(&context->last_solution_bound, solution_bound, sizeof(cuopt_float_t)); if (context->last_solution == NULL) { return; } - memcpy(solution, - context->last_solution, - context->n_variables * sizeof(cuopt_float_t)); + memcpy(solution, context->last_solution, context->n_variables * sizeof(cuopt_float_t)); memcpy(objective_value, &context->last_objective, sizeof(cuopt_float_t)); } static cuopt_int_t test_mip_callbacks_internal(int include_set_callback) { cuOptOptimizationProblem problem = NULL; - cuOptSolverSettings settings = NULL; - cuOptSolution solution = NULL; - mip_callback_context_t context = {0}; + cuOptSolverSettings settings = NULL; + cuOptSolution solution = NULL; + mip_callback_context_t context = {0}; #define NUM_ITEMS 8 #define NUM_CONSTRAINTS 1 @@ -202,7 +193,7 @@ static cuopt_int_t test_mip_callbacks_internal(int include_set_callback) cuopt_int_t row_offsets[] = {0, NUM_ITEMS}; cuopt_int_t column_indices[NUM_ITEMS]; - cuopt_float_t rhs[] = {max_weight}; + cuopt_float_t rhs[] = {max_weight}; char constraint_sense[] = {CUOPT_LESS_THAN}; cuopt_float_t lower_bounds[NUM_ITEMS]; cuopt_float_t upper_bounds[NUM_ITEMS]; @@ -296,15 +287,9 @@ static cuopt_int_t test_mip_callbacks_internal(int include_set_callback) return status; } -cuopt_int_t test_mip_get_callbacks_only() -{ - return test_mip_callbacks_internal(0); -} +cuopt_int_t test_mip_get_callbacks_only() { return test_mip_callbacks_internal(0); } -cuopt_int_t test_mip_get_set_callbacks() -{ - return test_mip_callbacks_internal(1); -} +cuopt_int_t test_mip_get_set_callbacks() { return test_mip_callbacks_internal(1); } cuopt_int_t burglar_problem() { @@ -332,7 +317,7 @@ cuopt_int_t burglar_problem() cuopt_int_t row_offsets[] = {0, NUM_ITEMS}; cuopt_int_t column_indices[NUM_ITEMS]; - cuopt_float_t rhs[] = {max_weight}; + cuopt_float_t rhs[] = {max_weight}; char constraint_sense[] = {CUOPT_LESS_THAN}; cuopt_float_t lower_bounds[NUM_ITEMS]; cuopt_float_t upper_bounds[NUM_ITEMS]; @@ -453,11 +438,16 @@ cuopt_int_t burglar_problem() return status; } -int solve_mps_file(const char* filename, double time_limit, double iteration_limit, int* termination_status_ptr, double* solve_time_ptr, int method) +int solve_mps_file(const char* filename, + double time_limit, + double iteration_limit, + int* termination_status_ptr, + double* solve_time_ptr, + int method) { cuOptOptimizationProblem problem = NULL; - cuOptSolverSettings settings = NULL; - cuOptSolution solution = NULL; + cuOptSolverSettings settings = NULL; + cuOptSolution solution = NULL; cuopt_int_t status; cuopt_int_t is_mip; cuopt_int_t termination_status = -1; @@ -500,9 +490,10 @@ int solve_mps_file(const char* filename, double time_limit, double iteration_lim } status = cuOptSolve(problem, settings, &solution); if (status != CUOPT_SUCCESS) { - #define ERROR_BUFFER_SIZE 1024 +#define ERROR_BUFFER_SIZE 1024 char error_string[ERROR_BUFFER_SIZE]; - cuopt_int_t error_string_status = cuOptGetErrorString(solution, error_string, ERROR_BUFFER_SIZE); + cuopt_int_t error_string_status = + cuOptGetErrorString(solution, error_string, ERROR_BUFFER_SIZE); if (error_string_status != CUOPT_SUCCESS) { printf("Error getting error string\n"); goto DONE; @@ -572,14 +563,14 @@ int check_problem(cuOptOptimizationProblem problem, char* check_variable_types; cuopt_int_t status; check_objective_coefficients = (cuopt_float_t*)malloc(num_variables * sizeof(cuopt_float_t)); - check_row_offsets = (cuopt_int_t*)malloc((num_constraints + 1) * sizeof(cuopt_int_t)); - check_column_indices = (cuopt_int_t*)malloc(nnz * sizeof(cuopt_int_t)); - check_values = (cuopt_float_t*)malloc(nnz * sizeof(cuopt_float_t)); - check_constraint_sense = (char*)malloc(num_constraints * sizeof(char)); - check_rhs = (cuopt_float_t*)malloc(num_constraints * sizeof(cuopt_float_t)); - check_var_lower_bounds = (cuopt_float_t*)malloc(num_variables * sizeof(cuopt_float_t)); - check_var_upper_bounds = (cuopt_float_t*)malloc(num_variables * sizeof(cuopt_float_t)); - check_variable_types = (char*)malloc(num_variables * sizeof(char)); + check_row_offsets = (cuopt_int_t*)malloc((num_constraints + 1) * sizeof(cuopt_int_t)); + check_column_indices = (cuopt_int_t*)malloc(nnz * sizeof(cuopt_int_t)); + check_values = (cuopt_float_t*)malloc(nnz * sizeof(cuopt_float_t)); + check_constraint_sense = (char*)malloc(num_constraints * sizeof(char)); + check_rhs = (cuopt_float_t*)malloc(num_constraints * sizeof(cuopt_float_t)); + check_var_lower_bounds = (cuopt_float_t*)malloc(num_variables * sizeof(cuopt_float_t)); + check_var_upper_bounds = (cuopt_float_t*)malloc(num_variables * sizeof(cuopt_float_t)); + check_variable_types = (char*)malloc(num_variables * sizeof(char)); status = cuOptGetNumConstraints(problem, &check_num_constraints); if (status != CUOPT_SUCCESS) { @@ -637,7 +628,9 @@ int check_problem(cuOptOptimizationProblem problem, goto DONE; } if (check_objective_offset != objective_offset) { - printf("Error: expected objective offset to be %f, but got %f\n", objective_offset, check_objective_offset); + printf("Error: expected objective offset to be %f, but got %f\n", + objective_offset, + check_objective_offset); status = -1; goto DONE; } @@ -791,9 +784,8 @@ int check_problem(cuOptOptimizationProblem problem, cuopt_int_t test_infeasible_problem() { cuOptOptimizationProblem problem = NULL; - cuOptSolverSettings settings = NULL; - cuOptSolution solution = NULL; - + cuOptSolverSettings settings = NULL; + cuOptSolution solution = NULL; /* Solve the following problem minimize 0 @@ -812,21 +804,28 @@ cuopt_int_t test_infeasible_problem() 0 1 2 3 */ - cuopt_int_t num_variables = 4; + cuopt_int_t num_variables = 4; cuopt_int_t num_constraints = 9; - cuopt_int_t nnz = 17; - cuopt_int_t row_offsets[] = {0, 2, 4, 6, 7, 9, 10, 12, 15, 17}; + cuopt_int_t nnz = 17; + cuopt_int_t row_offsets[] = {0, 2, 4, 6, 7, 9, 10, 12, 15, 17}; // clang-format off // row1, row2, row3, row4, row5,row6, row7, row8, row9 cuopt_int_t column_indices[] = {0, 1, 0, 1, 0, 1, 3, 2, 3, 2, 0, 3, 0, 1, 2, 1, 2}; cuopt_float_t values[] = {-0.5, 1.0, 2.0, -1.0, 3.0, 1.0, 1.0, 3.0, -1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0}; // clang-format on - cuopt_float_t rhs[] = {0.5, 3.0, 6.0, 2.0, 2.0, 5.0, 10.0, 14.0, 1.0}; - char constraint_sense[] = {CUOPT_GREATER_THAN, CUOPT_GREATER_THAN, - CUOPT_LESS_THAN, CUOPT_LESS_THAN, CUOPT_LESS_THAN, - CUOPT_GREATER_THAN, CUOPT_LESS_THAN, CUOPT_LESS_THAN, CUOPT_GREATER_THAN}; + cuopt_float_t rhs[] = {0.5, 3.0, 6.0, 2.0, 2.0, 5.0, 10.0, 14.0, 1.0}; + char constraint_sense[] = {CUOPT_GREATER_THAN, + CUOPT_GREATER_THAN, + CUOPT_LESS_THAN, + CUOPT_LESS_THAN, + CUOPT_LESS_THAN, + CUOPT_GREATER_THAN, + CUOPT_LESS_THAN, + CUOPT_LESS_THAN, + CUOPT_GREATER_THAN}; cuopt_float_t var_lower_bounds[] = {0.0, 0.0, 0.0, 0.0}; - cuopt_float_t var_upper_bounds[] = {CUOPT_INFINITY, CUOPT_INFINITY, CUOPT_INFINITY, CUOPT_INFINITY}; + cuopt_float_t var_upper_bounds[] = { + CUOPT_INFINITY, CUOPT_INFINITY, CUOPT_INFINITY, CUOPT_INFINITY}; char variable_types[] = {CUOPT_CONTINUOUS, CUOPT_CONTINUOUS, CUOPT_CONTINUOUS, CUOPT_CONTINUOUS}; cuopt_float_t objective_coefficients[] = {0.0, 0.0, 0.0, 0.0}; @@ -835,19 +834,19 @@ cuopt_int_t test_infeasible_problem() cuopt_float_t objective_value; cuopt_int_t status = cuOptCreateProblem(num_constraints, - num_variables, - CUOPT_MINIMIZE, - 0.0, - objective_coefficients, - row_offsets, - column_indices, - values, - constraint_sense, - rhs, - var_lower_bounds, - var_upper_bounds, - variable_types, - &problem); + num_variables, + CUOPT_MINIMIZE, + 0.0, + objective_coefficients, + row_offsets, + column_indices, + values, + constraint_sense, + rhs, + var_lower_bounds, + var_upper_bounds, + variable_types, + &problem); if (status != CUOPT_SUCCESS) { printf("Error creating problem\n"); goto DONE; @@ -923,12 +922,11 @@ cuopt_int_t test_infeasible_problem() return status; } - -cuopt_int_t test_ranged_problem(cuopt_int_t *termination_status_ptr, cuopt_float_t *objective_ptr) +cuopt_int_t test_ranged_problem(cuopt_int_t* termination_status_ptr, cuopt_float_t* objective_ptr) { cuOptOptimizationProblem problem = NULL; - cuOptSolverSettings settings = NULL; - cuOptSolution solution = NULL; + cuOptSolverSettings settings = NULL; + cuOptSolution solution = NULL; // maximize obj: 5 * x + 8 * y; // subject to c1: 2*x + 3*y <= 12; @@ -937,22 +935,22 @@ cuopt_int_t test_ranged_problem(cuopt_int_t *termination_status_ptr, cuopt_float // subject to x_limit: 0 <= x <= 10; // subject to y_limit: 0 <= y <= 10; - cuopt_int_t num_variables = 2; - cuopt_int_t num_constraints = 3; - cuopt_int_t nnz = 6; - cuopt_int_t objective_sense = CUOPT_MAXIMIZE; - cuopt_float_t objective_offset = 0.0; - cuopt_float_t objective_coefficients[] = {5.0, 8.0}; - cuopt_int_t row_offsets[] = {0, 2, 4, 6}; - cuopt_int_t column_indices[] = {0, 1, 0, 1, 0, 1}; - cuopt_float_t values[] = {2.0, 3.0, 3.0, 1.0, 1.0, 2.0}; - cuopt_float_t constraint_lower_bounds[] = {-CUOPT_INFINITY, -CUOPT_INFINITY, 2.0}; - cuopt_float_t constraint_upper_bounds[] = {12.0, 6.0, 8.0}; + cuopt_int_t num_variables = 2; + cuopt_int_t num_constraints = 3; + cuopt_int_t nnz = 6; + cuopt_int_t objective_sense = CUOPT_MAXIMIZE; + cuopt_float_t objective_offset = 0.0; + cuopt_float_t objective_coefficients[] = {5.0, 8.0}; + cuopt_int_t row_offsets[] = {0, 2, 4, 6}; + cuopt_int_t column_indices[] = {0, 1, 0, 1, 0, 1}; + cuopt_float_t values[] = {2.0, 3.0, 3.0, 1.0, 1.0, 2.0}; + cuopt_float_t constraint_lower_bounds[] = {-CUOPT_INFINITY, -CUOPT_INFINITY, 2.0}; + cuopt_float_t constraint_upper_bounds[] = {12.0, 6.0, 8.0}; cuopt_float_t constraint_lower_bounds_check[] = {1.0, 1.0, 1.0}; cuopt_float_t constraint_upper_bounds_check[] = {1.0, 1.0, 1.0}; - cuopt_float_t variable_lower_bounds[] = {0.0, 0.0}; - cuopt_float_t variable_upper_bounds[] = {10.0, 10.0}; - char variable_types[] = {CUOPT_CONTINUOUS, CUOPT_CONTINUOUS}; + cuopt_float_t variable_lower_bounds[] = {0.0, 0.0}; + cuopt_float_t variable_upper_bounds[] = {10.0, 10.0}; + char variable_types[] = {CUOPT_CONTINUOUS, CUOPT_CONTINUOUS}; cuopt_int_t status; status = cuOptCreateRangedProblem(num_constraints, @@ -989,13 +987,17 @@ cuopt_int_t test_ranged_problem(cuopt_int_t *termination_status_ptr, cuopt_float for (cuopt_int_t i = 0; i < num_constraints; i++) { if (constraint_lower_bounds_check[i] != constraint_lower_bounds[i]) { printf("Error: expected constraint lower bound %d to be %f, but got %f\n", - i, constraint_lower_bounds[i], constraint_lower_bounds_check[i]); + i, + constraint_lower_bounds[i], + constraint_lower_bounds_check[i]); status = -1; goto DONE; } if (constraint_upper_bounds_check[i] != constraint_upper_bounds[i]) { printf("Error: expected constraint upper bound %d to be %f, but got %f\n", - i, constraint_upper_bounds[i], constraint_upper_bounds_check[i]); + i, + constraint_upper_bounds[i], + constraint_upper_bounds_check[i]); status = -1; goto DONE; } @@ -1043,8 +1045,8 @@ cuopt_int_t test_ranged_problem(cuopt_int_t *termination_status_ptr, cuopt_float cuopt_int_t test_invalid_bounds(cuopt_int_t test_mip) { cuOptOptimizationProblem problem = NULL; - cuOptSolverSettings settings = NULL; - cuOptSolution solution = NULL; + cuOptSolverSettings settings = NULL; + cuOptSolution solution = NULL; /* Test the invalid bounds scenario: maximize 2*x @@ -1059,17 +1061,17 @@ cuopt_int_t test_invalid_bounds(cuopt_int_t test_mip) - Result: 1.0 <= x <= 0.0 (INVALID!) */ - cuopt_int_t num_variables = 1; + cuopt_int_t num_variables = 1; cuopt_int_t num_constraints = 2; - cuopt_int_t nnz = 2; + cuopt_int_t nnz = 2; // CSR format constraint matrix // From the constraints: // x >= 0.2 // x <= 0.5 - cuopt_int_t row_offsets[] = {0, 1, 2}; + cuopt_int_t row_offsets[] = {0, 1, 2}; cuopt_int_t column_indices[] = {0, 0}; - cuopt_float_t values[] = {1.0, 1.0}; + cuopt_float_t values[] = {1.0, 1.0}; // Objective coefficients // From the objective function: maximize 2*x @@ -1101,19 +1103,19 @@ cuopt_int_t test_invalid_bounds(cuopt_int_t test_mip) // Create the problem status = cuOptCreateRangedProblem(num_constraints, - num_variables, - CUOPT_MAXIMIZE, // maximize - 0.0, // objective offset - objective_coefficients, - row_offsets, - column_indices, - values, - constraint_lower_bounds, - constraint_upper_bounds, - var_lower_bounds, - var_upper_bounds, - variable_types, - &problem); + num_variables, + CUOPT_MAXIMIZE, // maximize + 0.0, // objective offset + objective_coefficients, + row_offsets, + column_indices, + values, + constraint_lower_bounds, + constraint_upper_bounds, + var_lower_bounds, + var_upper_bounds, + variable_types, + &problem); printf("cuOptCreateRangedProblem returned: %d\n", status); @@ -1157,8 +1159,7 @@ cuopt_int_t test_invalid_bounds(cuopt_int_t test_mip) termination_status); status = CUOPT_VALIDATION_ERROR; goto DONE; - } - else { + } else { printf("✓ Problem found infeasible as expected\n"); status = CUOPT_SUCCESS; goto DONE; @@ -1173,13 +1174,15 @@ cuopt_int_t test_invalid_bounds(cuopt_int_t test_mip) // Print results printf("\nResults:\n"); printf("--------\n"); - printf("Termination status: %s (%d)\n", termination_status_to_string(termination_status), termination_status); + printf("Termination status: %s (%d)\n", + termination_status_to_string(termination_status), + termination_status); printf("Solve time: %f seconds\n", time); printf("Objective value: %f\n", objective_value); // Get and print solution variables cuopt_float_t* solution_values = (cuopt_float_t*)malloc(num_variables * sizeof(cuopt_float_t)); - status = cuOptGetPrimalSolution(solution, solution_values); + status = cuOptGetPrimalSolution(solution, solution_values); if (status != CUOPT_SUCCESS) { printf("Error getting solution values: %d\n", status); free(solution_values); @@ -1200,11 +1203,12 @@ cuopt_int_t test_invalid_bounds(cuopt_int_t test_mip) return status; } -cuopt_int_t test_quadratic_problem(cuopt_int_t* termination_status_ptr, cuopt_float_t* objective_ptr) +cuopt_int_t test_quadratic_problem(cuopt_int_t* termination_status_ptr, + cuopt_float_t* objective_ptr) { cuOptOptimizationProblem problem = NULL; - cuOptSolverSettings settings = NULL; - cuOptSolution solution = NULL; + cuOptSolverSettings settings = NULL; + cuOptSolution solution = NULL; // minimize x1^2 + 4*x2^2 - 8*x1 - 16*x2 // subject to x1 + x2 >= 5 @@ -1213,22 +1217,22 @@ cuopt_int_t test_quadratic_problem(cuopt_int_t* termination_status_ptr, cuopt_fl // x1 <= 10 // x2 <= 10 - cuopt_int_t num_variables = 2; - cuopt_int_t num_constraints = 1; - cuopt_int_t objective_sense = CUOPT_MINIMIZE; - cuopt_float_t objective_offset = 0.0; + cuopt_int_t num_variables = 2; + cuopt_int_t num_constraints = 1; + cuopt_int_t objective_sense = CUOPT_MINIMIZE; + cuopt_float_t objective_offset = 0.0; cuopt_float_t objective_coefficients[] = {-8.0, -16.0}; - cuopt_int_t quadratic_objective_matrix_row_offsets[] = {0, 1, 2}; - cuopt_int_t quadratic_objective_matrix_column_indices[] = {0, 1}; + cuopt_int_t quadratic_objective_matrix_row_offsets[] = {0, 1, 2}; + cuopt_int_t quadratic_objective_matrix_column_indices[] = {0, 1}; cuopt_float_t quadratic_objective_matrix_coefficent_values[] = {1.0, 4.0}; - cuopt_int_t row_offsets[] = {0, 2}; + cuopt_int_t row_offsets[] = {0, 2}; cuopt_int_t column_indices[] = {0, 1}; - cuopt_float_t values[] = {1.0, 1.0}; + cuopt_float_t values[] = {1.0, 1.0}; cuopt_float_t constraint_bounds[] = {5.0}; - char constraint_sense[] = {'G'}; + char constraint_sense[] = {'G'}; cuopt_float_t var_lower_bounds[] = {3.0, 0.0}; cuopt_float_t var_upper_bounds[] = {10.0, 10.0}; @@ -1281,20 +1285,20 @@ cuopt_int_t test_quadratic_problem(cuopt_int_t* termination_status_ptr, cuopt_fl goto DONE; } - DONE: -cuOptDestroyProblem(&problem); -cuOptDestroySolverSettings(&settings); -cuOptDestroySolution(&solution); + cuOptDestroyProblem(&problem); + cuOptDestroySolverSettings(&settings); + cuOptDestroySolution(&solution); -return status; + return status; } -cuopt_int_t test_quadratic_ranged_problem(cuopt_int_t* termination_status_ptr, cuopt_float_t* objective_ptr) +cuopt_int_t test_quadratic_ranged_problem(cuopt_int_t* termination_status_ptr, + cuopt_float_t* objective_ptr) { cuOptOptimizationProblem problem = NULL; - cuOptSolverSettings settings = NULL; - cuOptSolution solution = NULL; + cuOptSolverSettings settings = NULL; + cuOptSolution solution = NULL; // minimize x1^2 + 4*x2^2 - 8*x1 - 16*x2 // subject to x1 + x2 >= 5 @@ -1302,18 +1306,18 @@ cuopt_int_t test_quadratic_ranged_problem(cuopt_int_t* termination_status_ptr, c // x2 >= 0 // x1 <= 10 // x2 <= 10 - cuopt_int_t num_variables = 2; - cuopt_int_t num_constraints = 1; - cuopt_int_t objective_sense = CUOPT_MINIMIZE; - cuopt_float_t objective_offset = 0.0; - cuopt_float_t objective_coefficients[] = {-8.0, -16.0}; - cuopt_int_t quadratic_objective_matrix_row_offsets[] = {0, 1, 2}; - cuopt_int_t quadratic_objective_matrix_column_indices[] = {0, 1}; + cuopt_int_t num_variables = 2; + cuopt_int_t num_constraints = 1; + cuopt_int_t objective_sense = CUOPT_MINIMIZE; + cuopt_float_t objective_offset = 0.0; + cuopt_float_t objective_coefficients[] = {-8.0, -16.0}; + cuopt_int_t quadratic_objective_matrix_row_offsets[] = {0, 1, 2}; + cuopt_int_t quadratic_objective_matrix_column_indices[] = {0, 1}; cuopt_float_t quadratic_objective_matrix_coefficent_values[] = {1.0, 4.0}; - cuopt_int_t row_offsets[] = {0, 2}; + cuopt_int_t row_offsets[] = {0, 2}; cuopt_int_t column_indices[] = {0, 1}; - cuopt_float_t values[] = {1.0, 1.0}; + cuopt_float_t values[] = {1.0, 1.0}; cuopt_float_t constraint_lower_bounds[] = {5.0}; cuopt_float_t constraint_upper_bounds[] = {100.0}; @@ -1324,21 +1328,21 @@ cuopt_int_t test_quadratic_ranged_problem(cuopt_int_t* termination_status_ptr, c cuopt_int_t status; status = cuOptCreateQuadraticRangedProblem(num_constraints, - num_variables, - objective_sense, - objective_offset, - objective_coefficients, - quadratic_objective_matrix_row_offsets, - quadratic_objective_matrix_column_indices, - quadratic_objective_matrix_coefficent_values, - row_offsets, - column_indices, - values, - constraint_lower_bounds, - constraint_upper_bounds, - var_lower_bounds, - var_upper_bounds, - &problem); + num_variables, + objective_sense, + objective_offset, + objective_coefficients, + quadratic_objective_matrix_row_offsets, + quadratic_objective_matrix_column_indices, + quadratic_objective_matrix_coefficent_values, + row_offsets, + column_indices, + values, + constraint_lower_bounds, + constraint_upper_bounds, + var_lower_bounds, + var_upper_bounds, + &problem); if (status != CUOPT_SUCCESS) { printf("Error creating problem: %d\n", status); @@ -1370,19 +1374,19 @@ cuopt_int_t test_quadratic_ranged_problem(cuopt_int_t* termination_status_ptr, c } DONE: -cuOptDestroyProblem(&problem); -cuOptDestroySolverSettings(&settings); -cuOptDestroySolution(&solution); + cuOptDestroyProblem(&problem); + cuOptDestroySolverSettings(&settings); + cuOptDestroySolution(&solution); -return status; + return status; } cuopt_int_t test_write_problem(const char* input_filename, const char* output_filename) { - cuOptOptimizationProblem problem = NULL; + cuOptOptimizationProblem problem = NULL; cuOptOptimizationProblem problem_read = NULL; - cuOptSolverSettings settings = NULL; - cuOptSolution solution = NULL; + cuOptSolverSettings settings = NULL; + cuOptSolution solution = NULL; cuopt_int_t status; cuopt_int_t termination_status; cuopt_float_t objective_value; @@ -1458,12 +1462,16 @@ cuopt_int_t test_write_problem(const char* input_filename, const char* output_fi return status; } - -cuopt_int_t test_maximize_problem_dual_variables(cuopt_int_t method, cuopt_int_t* termination_status_ptr, cuopt_float_t* objective_ptr, cuopt_float_t* dual_variables, cuopt_float_t* reduced_costs, cuopt_float_t *dual_obj_ptr) +cuopt_int_t test_maximize_problem_dual_variables(cuopt_int_t method, + cuopt_int_t* termination_status_ptr, + cuopt_float_t* objective_ptr, + cuopt_float_t* dual_variables, + cuopt_float_t* reduced_costs, + cuopt_float_t* dual_obj_ptr) { cuOptOptimizationProblem problem = NULL; - cuOptSolverSettings settings = NULL; - cuOptSolution solution = NULL; + cuOptSolverSettings settings = NULL; + cuOptSolution solution = NULL; /* Solve the following problem maximize 4*x1 + x2 + 5*x3 + 3*x4 @@ -1473,20 +1481,17 @@ cuopt_int_t test_maximize_problem_dual_variables(cuopt_int_t method, cuopt_int_t x1, x2, x3, x4 >= 0 */ - cuopt_int_t num_variables = 4; - cuopt_int_t num_constraints = 3; - cuopt_int_t nnz = 12; - cuopt_int_t row_offsets[] = {0, 4, 8, 12}; - cuopt_int_t column_indices[] = {0, 1, 2, 3, - 0, 1, 2, 3, - 0, 1, 2, 3}; - cuopt_float_t values[] = {1.0, -1.0, -1.0, 3.0, - 5.0, 1.0, 3.0, 8.0, - -1.0, 2.0, 3.0, -5.0}; - cuopt_float_t rhs[] = {1.0, 55.0, 3.0}; - char constraint_sense[] = {CUOPT_LESS_THAN, CUOPT_LESS_THAN, CUOPT_LESS_THAN}; + cuopt_int_t num_variables = 4; + cuopt_int_t num_constraints = 3; + cuopt_int_t nnz = 12; + cuopt_int_t row_offsets[] = {0, 4, 8, 12}; + cuopt_int_t column_indices[] = {0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3}; + cuopt_float_t values[] = {1.0, -1.0, -1.0, 3.0, 5.0, 1.0, 3.0, 8.0, -1.0, 2.0, 3.0, -5.0}; + cuopt_float_t rhs[] = {1.0, 55.0, 3.0}; + char constraint_sense[] = {CUOPT_LESS_THAN, CUOPT_LESS_THAN, CUOPT_LESS_THAN}; cuopt_float_t var_lower_bounds[] = {0.0, 0.0, 0.0, 0.0}; - cuopt_float_t var_upper_bounds[] = {CUOPT_INFINITY, CUOPT_INFINITY, CUOPT_INFINITY, CUOPT_INFINITY}; + cuopt_float_t var_upper_bounds[] = { + CUOPT_INFINITY, CUOPT_INFINITY, CUOPT_INFINITY, CUOPT_INFINITY}; char variable_types[] = {CUOPT_CONTINUOUS, CUOPT_CONTINUOUS, CUOPT_CONTINUOUS, CUOPT_CONTINUOUS}; cuopt_float_t objective_coefficients[] = {4.0, 1.0, 5.0, 3.0}; @@ -1569,10 +1574,9 @@ cuopt_int_t test_maximize_problem_dual_variables(cuopt_int_t method, cuopt_int_t time); printf("Objective value: %f\n", *objective_ptr); - /* Get and print solution variables */ cuopt_float_t* solution_values = (cuopt_float_t*)malloc(num_variables * sizeof(cuopt_float_t)); - status = cuOptGetPrimalSolution(solution, solution_values); + status = cuOptGetPrimalSolution(solution, solution_values); if (status != CUOPT_SUCCESS) { printf("Error getting solution values: %d\n", status); free(solution_values); @@ -1627,7 +1631,8 @@ cuopt_int_t test_deterministic_bb(const char* filename, cuopt_int_t status; cuopt_int_t run; - printf("Testing deterministic B&B: %s with %d threads, %d runs\n", filename, num_threads, num_runs); + printf( + "Testing deterministic B&B: %s with %d threads, %d runs\n", filename, num_threads, num_runs); status = cuOptReadProblem(filename, &problem); if (status != CUOPT_SUCCESS) { @@ -1766,21 +1771,20 @@ cuopt_int_t test_lp_solution_mip_methods() cuopt_float_t mip_gap; cuopt_float_t solution_bound; - cuopt_float_t obj[] = {1.0, 2.0}; - cuopt_int_t offsets[] = {0, 2}; - cuopt_int_t indices[] = {0, 1}; - cuopt_float_t vals[] = {1.0, 1.0}; - char sense[] = {CUOPT_LESS_THAN}; - cuopt_float_t rhs[] = {10.0}; - cuopt_float_t lb[] = {0.0, 0.0}; - cuopt_float_t ub[] = {100.0, 100.0}; - char vtypes[] = {CUOPT_CONTINUOUS, CUOPT_CONTINUOUS}; + cuopt_float_t obj[] = {1.0, 2.0}; + cuopt_int_t offsets[] = {0, 2}; + cuopt_int_t indices[] = {0, 1}; + cuopt_float_t vals[] = {1.0, 1.0}; + char sense[] = {CUOPT_LESS_THAN}; + cuopt_float_t rhs[] = {10.0}; + cuopt_float_t lb[] = {0.0, 0.0}; + cuopt_float_t ub[] = {100.0, 100.0}; + char vtypes[] = {CUOPT_CONTINUOUS, CUOPT_CONTINUOUS}; printf("Testing LP solution with MIP-only methods...\n"); - status = cuOptCreateProblem(1, 2, CUOPT_MINIMIZE, 0.0, - obj, offsets, indices, vals, - sense, rhs, lb, ub, vtypes, &problem); + status = cuOptCreateProblem( + 1, 2, CUOPT_MINIMIZE, 0.0, obj, offsets, indices, vals, sense, rhs, lb, ub, vtypes, &problem); if (status != CUOPT_SUCCESS) { printf("Error creating LP problem: %d\n", status); goto DONE; @@ -1840,21 +1844,20 @@ cuopt_int_t test_mip_solution_lp_methods() cuopt_float_t dual_solution[1]; cuopt_float_t reduced_costs[2]; - cuopt_float_t obj[] = {3.0, 5.0}; - cuopt_int_t offsets[] = {0, 2}; - cuopt_int_t indices[] = {0, 1}; - cuopt_float_t vals[] = {1.0, 2.0}; - char sense[] = {CUOPT_LESS_THAN}; - cuopt_float_t rhs[] = {4.0}; - cuopt_float_t lb[] = {0.0, 0.0}; - cuopt_float_t ub[] = {1.0, 1.0}; - char vtypes[] = {CUOPT_INTEGER, CUOPT_INTEGER}; + cuopt_float_t obj[] = {3.0, 5.0}; + cuopt_int_t offsets[] = {0, 2}; + cuopt_int_t indices[] = {0, 1}; + cuopt_float_t vals[] = {1.0, 2.0}; + char sense[] = {CUOPT_LESS_THAN}; + cuopt_float_t rhs[] = {4.0}; + cuopt_float_t lb[] = {0.0, 0.0}; + cuopt_float_t ub[] = {1.0, 1.0}; + char vtypes[] = {CUOPT_INTEGER, CUOPT_INTEGER}; printf("Testing MIP solution with LP-only methods...\n"); - status = cuOptCreateProblem(1, 2, CUOPT_MAXIMIZE, 0.0, - obj, offsets, indices, vals, - sense, rhs, lb, ub, vtypes, &problem); + status = cuOptCreateProblem( + 1, 2, CUOPT_MAXIMIZE, 0.0, obj, offsets, indices, vals, sense, rhs, lb, ub, vtypes, &problem); if (status != CUOPT_SUCCESS) { printf("Error creating MIP problem: %d\n", status); goto DONE; @@ -1929,8 +1932,10 @@ cuopt_int_t test_cpu_only_execution(const char* filename) cuopt_float_t* primal_solution = NULL; printf("Testing CPU-only execution (simulated remote mode)...\n"); - printf(" CUDA_VISIBLE_DEVICES=%s\n", getenv("CUDA_VISIBLE_DEVICES") ? getenv("CUDA_VISIBLE_DEVICES") : "(not set)"); - printf(" CUOPT_REMOTE_HOST=%s\n", getenv("CUOPT_REMOTE_HOST") ? getenv("CUOPT_REMOTE_HOST") : "(not set)"); + printf(" CUDA_VISIBLE_DEVICES=%s\n", + getenv("CUDA_VISIBLE_DEVICES") ? getenv("CUDA_VISIBLE_DEVICES") : "(not set)"); + printf(" CUOPT_REMOTE_HOST=%s\n", + getenv("CUOPT_REMOTE_HOST") ? getenv("CUOPT_REMOTE_HOST") : "(not set)"); status = cuOptReadProblem(filename, &problem); if (status != CUOPT_SUCCESS) { @@ -2008,9 +2013,7 @@ cuopt_int_t test_cpu_only_execution(const char* filename) printf(" Termination status: %s\n", termination_status_to_string(termination_status)); printf(" Objective value: %f\n", objective_value); printf(" Solve time: %f\n", solve_time); - if (num_variables > 0) { - printf(" Primal solution[0]: %f\n", primal_solution[0]); - } + if (num_variables > 0) { printf(" Primal solution[0]: %f\n", primal_solution[0]); } status = CUOPT_SUCCESS; @@ -2190,8 +2193,8 @@ cuopt_int_t test_pdlp_precision_mixed(const char* filename, } cuopt_int_t test_pdlp_precision_single(const char* filename, - cuopt_int_t* termination_status_ptr, - cuopt_float_t* objective_ptr) + cuopt_int_t* termination_status_ptr, + cuopt_float_t* objective_ptr) { cuOptOptimizationProblem problem = NULL; cuOptSolverSettings settings = NULL; diff --git a/cpp/tests/linear_programming/pdlp_test.cu b/cpp/tests/linear_programming/pdlp_test.cu index 50d4a8fb45..1588ff5e5d 100644 --- a/cpp/tests/linear_programming/pdlp_test.cu +++ b/cpp/tests/linear_programming/pdlp_test.cu @@ -5,6 +5,7 @@ */ /* clang-format on */ +#include #include #include #include @@ -43,6 +44,7 @@ #include #include #include +#include #include namespace cuopt::linear_programming::test { @@ -1036,7 +1038,7 @@ TEST(pdlp_class, run_empty_matrix_dual_simplex) optimization_problem_solution_t solution = solve_lp(&handle_, op_problem, solver_settings); EXPECT_EQ((int)solution.get_termination_status(), CUOPT_TERMINATION_STATUS_OPTIMAL); - EXPECT_FALSE(solution.get_additional_termination_information().solved_by_pdlp); + EXPECT_EQ(solution.get_additional_termination_information().solved_by, method_t::DualSimplex); } TEST(pdlp_class, test_max) @@ -1681,6 +1683,7 @@ TEST(pdlp_class, strong_branching_test) solver_settings.method = cuopt::linear_programming::method_t::PDLP; solver_settings.pdlp_solver_mode = pdlp_solver_mode_t::Stable3; solver_settings.presolver = cuopt::linear_programming::presolver_t::None; + solver_settings.generate_batch_primal_dual_solution = true; const int n_fractional = fractional.size(); const int batch_size = n_fractional * 2; @@ -2043,6 +2046,301 @@ TEST(pdlp_class, precision_single_pslp_presolve) afiro_primal_objective, solution.get_additional_termination_information().primal_objective)); } +// --------------------------------------------------------------------------- +// Cooperative strong branching tests +// --------------------------------------------------------------------------- + +TEST(pdlp_class, shared_sb_context_unit) +{ + using namespace cuopt::linear_programming::dual_simplex; + + constexpr int N = 10; + shared_strong_branching_context_t ctx(N); + shared_strong_branching_context_view_t view(ctx.solved); + + EXPECT_TRUE(view.is_valid()); + + shared_strong_branching_context_view_t empty_view; + EXPECT_FALSE(empty_view.is_valid()); + + for (int i = 0; i < N; ++i) { + EXPECT_FALSE(view.is_solved(i)); + } + + view.mark_solved(0); + view.mark_solved(3); + view.mark_solved(7); + + EXPECT_TRUE(view.is_solved(0)); + EXPECT_FALSE(view.is_solved(1)); + EXPECT_FALSE(view.is_solved(2)); + EXPECT_TRUE(view.is_solved(3)); + EXPECT_FALSE(view.is_solved(4)); + EXPECT_FALSE(view.is_solved(5)); + EXPECT_FALSE(view.is_solved(6)); + EXPECT_TRUE(view.is_solved(7)); + EXPECT_FALSE(view.is_solved(8)); + EXPECT_FALSE(view.is_solved(9)); + + // subview(2, 5) covers global indices [2..6] + auto sv = view.subview(2, 5); + EXPECT_TRUE(sv.is_valid()); + EXPECT_FALSE(sv.is_solved(0)); // global 2 + EXPECT_TRUE(sv.is_solved(1)); // global 3 + EXPECT_FALSE(sv.is_solved(2)); // global 4 + EXPECT_FALSE(sv.is_solved(3)); // global 5 + EXPECT_FALSE(sv.is_solved(4)); // global 6 + + // Mark through subview: local 4 -> global 6 + sv.mark_solved(4); + EXPECT_TRUE(view.is_solved(6)); + EXPECT_TRUE(sv.is_solved(4)); +} + +TEST(pdlp_class, shared_sb_view_batch_pre_solved) +{ + using namespace cuopt::linear_programming::dual_simplex; + + const raft::handle_t handle_{}; + auto path = make_path_absolute("linear_programming/afiro_original.mps"); + cuopt::mps_parser::mps_data_model_t op_problem = + cuopt::mps_parser::parse_mps(path, true); + + const std::vector fractional = {1, 2, 4}; + const std::vector root_soln_x = {0.891, 0.109, 0.636429}; + const int n_fractional = fractional.size(); + const int batch_size = n_fractional * 2; // 6 + + auto solver_settings = pdlp_solver_settings_t{}; + solver_settings.method = cuopt::linear_programming::method_t::PDLP; + solver_settings.pdlp_solver_mode = pdlp_solver_mode_t::Stable3; + solver_settings.presolver = cuopt::linear_programming::presolver_t::None; + + // Build new_bounds: down branches [0..2], up branches [3..5] + for (int i = 0; i < n_fractional; ++i) + solver_settings.new_bounds.push_back({fractional[i], + op_problem.get_variable_lower_bounds()[fractional[i]], + std::floor(root_soln_x[i])}); + for (int i = 0; i < n_fractional; ++i) + solver_settings.new_bounds.push_back({fractional[i], + std::ceil(root_soln_x[i]), + op_problem.get_variable_upper_bounds()[fractional[i]]}); + + shared_strong_branching_context_t shared_ctx(batch_size); + shared_strong_branching_context_view_t sb_view(shared_ctx.solved); + + // Pre-mark entries 1 and 4 as solved (simulating DS) + sb_view.mark_solved(1); + sb_view.mark_solved(4); + + solver_settings.shared_sb_solved = sb_view.solved; + + auto solution = solve_lp(&handle_, op_problem, solver_settings); + + ASSERT_EQ(solution.get_terminations_status().size(), batch_size); + + // Pre-solved entries should have ConcurrentLimit + EXPECT_EQ(solution.get_termination_status(1), pdlp_termination_status_t::ConcurrentLimit); + EXPECT_EQ(solution.get_termination_status(4), pdlp_termination_status_t::ConcurrentLimit); + + // Others should be Optimal + EXPECT_EQ(solution.get_termination_status(0), pdlp_termination_status_t::Optimal); + EXPECT_EQ(solution.get_termination_status(2), pdlp_termination_status_t::Optimal); + EXPECT_EQ(solution.get_termination_status(3), pdlp_termination_status_t::Optimal); + EXPECT_EQ(solution.get_termination_status(5), pdlp_termination_status_t::Optimal); + + // All entries should now be marked solved in the shared context + for (int i = 0; i < batch_size; ++i) { + EXPECT_TRUE(sb_view.is_solved(i)) << "Entry " << i << " should be solved"; + } +} + +TEST(pdlp_class, shared_sb_view_subbatch) +{ + using namespace cuopt::linear_programming::dual_simplex; + + const raft::handle_t handle_{}; + auto path = make_path_absolute("linear_programming/afiro_original.mps"); + cuopt::mps_parser::mps_data_model_t op_problem = + cuopt::mps_parser::parse_mps(path, true); + + const std::vector fractional = {1, 2, 4}; + const std::vector root_soln_x = {0.891, 0.109, 0.636429}; + const int n_fractional = fractional.size(); + const int batch_size = n_fractional * 2; + + auto solver_settings = pdlp_solver_settings_t{}; + solver_settings.method = cuopt::linear_programming::method_t::PDLP; + solver_settings.pdlp_solver_mode = pdlp_solver_mode_t::Stable3; + solver_settings.presolver = cuopt::linear_programming::presolver_t::None; + solver_settings.sub_batch_size = 2; + + shared_strong_branching_context_t shared_ctx(batch_size); + shared_strong_branching_context_view_t sb_view(shared_ctx.solved); + + // Pre-mark one entry in each sub-batch of size 2: indices 1, 4 + sb_view.mark_solved(1); + sb_view.mark_solved(4); + + solver_settings.shared_sb_solved = sb_view.solved; + + auto solution = batch_pdlp_solve(&handle_, op_problem, fractional, root_soln_x, solver_settings); + + ASSERT_EQ(solution.get_terminations_status().size(), batch_size); + + // Pre-solved entries should have ConcurrentLimit + EXPECT_EQ(solution.get_termination_status(1), pdlp_termination_status_t::ConcurrentLimit); + EXPECT_EQ(solution.get_termination_status(4), pdlp_termination_status_t::ConcurrentLimit); + + // Others should be Optimal + for (int i = 0; i < batch_size; ++i) { + if (i == 1 || i == 4) continue; + EXPECT_EQ(solution.get_termination_status(i), pdlp_termination_status_t::Optimal) + << "Entry " << i << " should be Optimal"; + } + + // All should be marked solved + for (int i = 0; i < batch_size; ++i) { + EXPECT_TRUE(sb_view.is_solved(i)) << "Entry " << i << " should be solved"; + } +} + +TEST(pdlp_class, shared_sb_view_concurrent_mark) +{ + using namespace cuopt::linear_programming::dual_simplex; + + const raft::handle_t handle_{}; + auto path = make_path_absolute("linear_programming/afiro_original.mps"); + cuopt::mps_parser::mps_data_model_t op_problem = + cuopt::mps_parser::parse_mps(path, true); + + const std::vector fractional = {1, 2, 4}; + const std::vector root_soln_x = {0.891, 0.109, 0.636429}; + const int n_fractional = fractional.size(); + const int batch_size = n_fractional * 2; + + auto solver_settings = pdlp_solver_settings_t{}; + solver_settings.method = cuopt::linear_programming::method_t::PDLP; + solver_settings.pdlp_solver_mode = pdlp_solver_mode_t::Stable3; + solver_settings.presolver = cuopt::linear_programming::presolver_t::None; + solver_settings.iteration_limit = 1000000; + + for (int i = 0; i < n_fractional; ++i) + solver_settings.new_bounds.push_back({fractional[0], -5, -5}); + + for (int i = 0; i < n_fractional; ++i) + solver_settings.new_bounds.push_back({fractional[i], + std::ceil(root_soln_x[i]), + op_problem.get_variable_upper_bounds()[fractional[i]]}); + + shared_strong_branching_context_t shared_ctx(batch_size); + shared_strong_branching_context_view_t sb_view(shared_ctx.solved); + + solver_settings.shared_sb_solved = sb_view.solved; + + optimization_problem_solution_t* result_ptr = nullptr; + + auto pdlp_thread = std::thread([&]() { + auto sol = new optimization_problem_solution_t( + solve_lp(&handle_, op_problem, solver_settings)); + result_ptr = sol; + }); + + // Wait a bit then mark entries 0, 2, 4 as solved (simulating DS) + std::this_thread::sleep_for(std::chrono::milliseconds(200)); + for (int i = 0; i < n_fractional; ++i) + sb_view.mark_solved(i); + + pdlp_thread.join(); + + ASSERT_NE(result_ptr, nullptr); + auto& solution = *result_ptr; + + ASSERT_EQ(solution.get_terminations_status().size(), batch_size); + + for (int i = 0; i < batch_size; ++i) { + auto status = solution.get_termination_status(i); + // Each entry should be either Optimal (PDLP solved it first) or ConcurrentLimit (DS marked it) + EXPECT_TRUE(status == pdlp_termination_status_t::Optimal || + status == pdlp_termination_status_t::ConcurrentLimit) + << "Entry " << i << " has unexpected status " + << cuopt::linear_programming::optimization_problem_solution_t:: + get_termination_status_string(status); + } + + // All entries should end up marked solved + for (int i = 0; i < batch_size; ++i) { + EXPECT_TRUE(sb_view.is_solved(i)) << "Entry " << i << " should be solved"; + } + + delete result_ptr; +} + +TEST(pdlp_class, shared_sb_view_all_infeasible) +{ + using namespace cuopt::linear_programming::dual_simplex; + + const raft::handle_t handle_{}; + auto path = make_path_absolute("linear_programming/afiro_original.mps"); + cuopt::mps_parser::mps_data_model_t op_problem = + cuopt::mps_parser::parse_mps(path, true); + + const std::vector fractional = {1, 2, 4}; + const std::vector root_soln_x = {0.891, 0.109, 0.636429}; + const int n_fractional = fractional.size(); + const int batch_size = n_fractional; + + auto solver_settings = pdlp_solver_settings_t{}; + solver_settings.method = cuopt::linear_programming::method_t::PDLP; + solver_settings.pdlp_solver_mode = pdlp_solver_mode_t::Stable3; + solver_settings.presolver = cuopt::linear_programming::presolver_t::None; + solver_settings.iteration_limit = 1000000; + + for (int i = 0; i < n_fractional; ++i) + solver_settings.new_bounds.push_back({fractional[0], -5, -5}); + + shared_strong_branching_context_t shared_ctx(batch_size); + shared_strong_branching_context_view_t sb_view(shared_ctx.solved); + + solver_settings.shared_sb_solved = sb_view.solved; + + optimization_problem_solution_t* result_ptr = nullptr; + + auto pdlp_thread = std::thread([&]() { + auto sol = new optimization_problem_solution_t( + solve_lp(&handle_, op_problem, solver_settings)); + result_ptr = sol; + }); + + // Wait a bit then mark entries 0, 2, 4 as solved (simulating DS) + std::this_thread::sleep_for(std::chrono::milliseconds(200)); + for (int i = 0; i < n_fractional; ++i) + sb_view.mark_solved(i); + + pdlp_thread.join(); + + ASSERT_NE(result_ptr, nullptr); + auto& solution = *result_ptr; + + ASSERT_EQ(solution.get_terminations_status().size(), batch_size); + + for (int i = 0; i < batch_size; ++i) { + auto status = solution.get_termination_status(i); + // Each entry should be either Optimal (PDLP solved it first) or ConcurrentLimit (DS marked it) + EXPECT_TRUE(status == pdlp_termination_status_t::ConcurrentLimit) + << "Entry " << i << " has unexpected status " + << cuopt::linear_programming::optimization_problem_solution_t:: + get_termination_status_string(status); + } + + // All entries should end up marked solved + for (int i = 0; i < batch_size; ++i) { + EXPECT_TRUE(sb_view.is_solved(i)) << "Entry " << i << " should be solved"; + } + + delete result_ptr; +} + } // namespace cuopt::linear_programming::test CUOPT_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/linear_programming/unit_tests/solution_interface_test.cu b/cpp/tests/linear_programming/unit_tests/solution_interface_test.cu index 08da011a3d..4683a5e3cc 100644 --- a/cpp/tests/linear_programming/unit_tests/solution_interface_test.cu +++ b/cpp/tests/linear_programming/unit_tests/solution_interface_test.cu @@ -87,7 +87,7 @@ static std::unique_ptr> make_cpu_lp_solution(bool /*l2_dual_residual=*/2e-8, /*gap=*/0.5, /*num_iterations=*/100, - /*solved_by_pdlp=*/true); + /*solved_by=*/method_t::PDLP); } cpu_pdlp_warm_start_data_t ws; @@ -120,7 +120,7 @@ static std::unique_ptr> make_cpu_lp_solution(bool /*l2_dual_residual=*/2e-8, /*gap=*/0.5, /*num_iterations=*/100, - /*solved_by_pdlp=*/true, + /*solved_by=*/method_t::PDLP, std::move(ws)); } @@ -167,7 +167,7 @@ static gpu_lp_solution_t make_gpu_lp_solution() term_stats[0].l2_dual_residual = 2e-8; term_stats[0].gap = 0.5; term_stats[0].number_of_steps_taken = 100; - term_stats[0].solved_by_pdlp = true; + term_stats[0].solved_by = method_t::PDLP; std::vector term_status = {pdlp_termination_status_t::Optimal}; diff --git a/cpp/tests/mip/CMakeLists.txt b/cpp/tests/mip/CMakeLists.txt index 2f2139890f..f2cf53ff6c 100644 --- a/cpp/tests/mip/CMakeLists.txt +++ b/cpp/tests/mip/CMakeLists.txt @@ -49,3 +49,6 @@ ConfigureTest(MIP_TERMINATION_STATUS_TEST ConfigureTest(DETERMINISM_TEST ${CMAKE_CURRENT_SOURCE_DIR}/determinism_test.cu ) +ConfigureTest(HEURISTICS_HYPER_PARAMS_TEST + ${CMAKE_CURRENT_SOURCE_DIR}/heuristics_hyper_params_test.cu +) diff --git a/cpp/tests/mip/determinism_test.cu b/cpp/tests/mip/determinism_test.cu index 1e59fba649..dcd6f7749d 100644 --- a/cpp/tests/mip/determinism_test.cu +++ b/cpp/tests/mip/determinism_test.cu @@ -233,10 +233,10 @@ INSTANTIATE_TEST_SUITE_P( std::make_tuple("/mip/gen-ip054.mps", 128, 120.0, 1), std::make_tuple("/mip/bb_optimality.mps", 4, 60.0, 4), std::make_tuple("/mip/neos5.mps", 16, 60.0, 1), - std::make_tuple("/mip/seymour1.mps", 16, 60.0, 1), + std::make_tuple("/mip/seymour1.mps", 16, 120.0, 1), // too heavy for CI // std::make_tuple("/mip/n2seq36q.mps", 16, 60.0, 4), - std::make_tuple("/mip/gmu-35-50.mps", 32, 60.0, 3)), + std::make_tuple("/mip/gmu-35-50.mps", 32, 60.0, 2)), [](const ::testing::TestParamInfo& info) { const auto& path = std::get<0>(info.param); int threads = std::get<1>(info.param); diff --git a/cpp/tests/mip/feasibility_jump_tests.cu b/cpp/tests/mip/feasibility_jump_tests.cu index baa3e9b803..4e8a518522 100644 --- a/cpp/tests/mip/feasibility_jump_tests.cu +++ b/cpp/tests/mip/feasibility_jump_tests.cu @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -77,16 +78,7 @@ static fj_state_t run_fj(std::string test_instance, // run the problem constructor of MIP, so that we do bounds standardization detail::problem_t problem(op_problem); problem.preprocess_problem(); - detail::pdhg_solver_t pdhg_solver(problem.handle_ptr, problem); - detail::pdlp_initial_scaling_strategy_t scaling(&handle_, - problem, - 10, - 1.0, - pdhg_solver, - problem.reverse_coefficients, - problem.reverse_offsets, - problem.reverse_constraints, - true); + detail::mip_scaling_strategy_t scaling(problem); auto settings = mip_solver_settings_t{}; settings.time_limit = 30.; diff --git a/cpp/tests/mip/heuristics_hyper_params_test.cu b/cpp/tests/mip/heuristics_hyper_params_test.cu new file mode 100644 index 0000000000..50e463b1fe --- /dev/null +++ b/cpp/tests/mip/heuristics_hyper_params_test.cu @@ -0,0 +1,283 @@ +/* clang-format off */ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + */ +/* clang-format on */ + +#include +#include +#include + +#include + +#include +#include +#include +#include + +namespace cuopt::linear_programming::test { + +using settings_t = solver_settings_t; + +class HeuristicsHyperParamsTest : public ::testing::Test { + protected: + std::string tmp_path; + + void SetUp() override + { + tmp_path = std::filesystem::temp_directory_path() / "cuopt_heuristic_params_test.config"; + } + + void TearDown() override { std::remove(tmp_path.c_str()); } +}; + +TEST_F(HeuristicsHyperParamsTest, DumpedFileIsAllCommentedOut) +{ + settings_t settings; + settings.dump_parameters_to_file(tmp_path, true); + + // Loading the commented-out dump should leave struct defaults unchanged + settings_t reloaded; + reloaded.get_mip_settings().heuristic_params.population_size = 9999; + reloaded.load_parameters_from_file(tmp_path); + EXPECT_EQ(reloaded.get_mip_settings().heuristic_params.population_size, 9999); +} + +TEST_F(HeuristicsHyperParamsTest, DumpedFileIsParseable) +{ + settings_t settings; + settings.dump_parameters_to_file(tmp_path, true); + + settings_t reloaded; + EXPECT_NO_THROW(reloaded.load_parameters_from_file(tmp_path)); +} + +TEST_F(HeuristicsHyperParamsTest, CustomValuesRoundTrip) +{ + { + std::ofstream f(tmp_path); + f << "mip_hyper_heuristic_population_size = 64\n"; + f << "mip_hyper_heuristic_num_cpufj_threads = 4\n"; + f << "mip_hyper_heuristic_presolve_time_ratio = 0.2\n"; + f << "mip_hyper_heuristic_presolve_max_time = 120\n"; + f << "mip_hyper_heuristic_root_lp_time_ratio = 0.05\n"; + f << "mip_hyper_heuristic_root_lp_max_time = 30\n"; + f << "mip_hyper_heuristic_rins_time_limit = 5\n"; + f << "mip_hyper_heuristic_rins_max_time_limit = 40\n"; + f << "mip_hyper_heuristic_rins_fix_rate = 0.7\n"; + f << "mip_hyper_heuristic_stagnation_trigger = 5\n"; + f << "mip_hyper_heuristic_max_iterations_without_improvement = 12\n"; + f << "mip_hyper_heuristic_initial_infeasibility_weight = 500\n"; + f << "mip_hyper_heuristic_n_of_minimums_for_exit = 10000\n"; + f << "mip_hyper_heuristic_enabled_recombiners = 5\n"; + f << "mip_hyper_heuristic_cycle_detection_length = 50\n"; + f << "mip_hyper_heuristic_relaxed_lp_time_limit = 2\n"; + f << "mip_hyper_heuristic_related_vars_time_limit = 60\n"; + } + + settings_t settings; + settings.load_parameters_from_file(tmp_path); + const auto& hp = settings.get_mip_settings().heuristic_params; + + EXPECT_EQ(hp.population_size, 64); + EXPECT_EQ(hp.num_cpufj_threads, 4); + EXPECT_DOUBLE_EQ(hp.presolve_time_ratio, 0.2); + EXPECT_DOUBLE_EQ(hp.presolve_max_time, 120.0); + EXPECT_DOUBLE_EQ(hp.root_lp_time_ratio, 0.05); + EXPECT_DOUBLE_EQ(hp.root_lp_max_time, 30.0); + EXPECT_DOUBLE_EQ(hp.rins_time_limit, 5.0); + EXPECT_DOUBLE_EQ(hp.rins_max_time_limit, 40.0); + EXPECT_DOUBLE_EQ(hp.rins_fix_rate, 0.7); + EXPECT_EQ(hp.stagnation_trigger, 5); + EXPECT_EQ(hp.max_iterations_without_improvement, 12); + EXPECT_DOUBLE_EQ(hp.initial_infeasibility_weight, 500.0); + EXPECT_EQ(hp.n_of_minimums_for_exit, 10000); + EXPECT_EQ(hp.enabled_recombiners, 5); + EXPECT_EQ(hp.cycle_detection_length, 50); + EXPECT_DOUBLE_EQ(hp.relaxed_lp_time_limit, 2.0); + EXPECT_DOUBLE_EQ(hp.related_vars_time_limit, 60.0); +} + +TEST_F(HeuristicsHyperParamsTest, PartialConfigKeepsDefaults) +{ + { + std::ofstream f(tmp_path); + f << "mip_hyper_heuristic_population_size = 128\n"; + f << "mip_hyper_heuristic_rins_fix_rate = 0.3\n"; + } + + settings_t settings; + settings.load_parameters_from_file(tmp_path); + const auto& hp = settings.get_mip_settings().heuristic_params; + + EXPECT_EQ(hp.population_size, 128); + EXPECT_DOUBLE_EQ(hp.rins_fix_rate, 0.3); + + mip_heuristics_hyper_params_t defaults; + EXPECT_EQ(hp.num_cpufj_threads, defaults.num_cpufj_threads); + EXPECT_DOUBLE_EQ(hp.presolve_time_ratio, defaults.presolve_time_ratio); + EXPECT_EQ(hp.n_of_minimums_for_exit, defaults.n_of_minimums_for_exit); + EXPECT_EQ(hp.enabled_recombiners, defaults.enabled_recombiners); +} + +TEST_F(HeuristicsHyperParamsTest, CommentsAndBlankLinesIgnored) +{ + { + std::ofstream f(tmp_path); + f << "# This is a comment\n"; + f << "\n"; + f << "# Another comment\n"; + f << "mip_hyper_heuristic_population_size = 42\n"; + f << "\n"; + } + + settings_t settings; + settings.load_parameters_from_file(tmp_path); + EXPECT_EQ(settings.get_mip_settings().heuristic_params.population_size, 42); +} + +TEST_F(HeuristicsHyperParamsTest, UnknownKeyThrows) +{ + { + std::ofstream f(tmp_path); + f << "bogus_key = 42\n"; + } + settings_t settings; + EXPECT_THROW(settings.load_parameters_from_file(tmp_path), cuopt::logic_error); +} + +TEST_F(HeuristicsHyperParamsTest, BadNumericValueThrows) +{ + { + std::ofstream f(tmp_path); + f << "mip_hyper_heuristic_population_size = not_a_number\n"; + } + settings_t settings; + EXPECT_THROW(settings.load_parameters_from_file(tmp_path), cuopt::logic_error); +} + +TEST_F(HeuristicsHyperParamsTest, TrailingJunkSpaceSeparatedThrows) +{ + { + std::ofstream f(tmp_path); + f << "mip_hyper_heuristic_population_size = 64 foo\n"; + } + settings_t settings; + EXPECT_THROW(settings.load_parameters_from_file(tmp_path), cuopt::logic_error); +} + +TEST_F(HeuristicsHyperParamsTest, TrailingJunkNoSpaceThrows) +{ + { + std::ofstream f(tmp_path); + f << "mip_hyper_heuristic_population_size = 64foo\n"; + } + settings_t settings; + EXPECT_THROW(settings.load_parameters_from_file(tmp_path), cuopt::logic_error); +} + +TEST_F(HeuristicsHyperParamsTest, TrailingJunkFloatThrows) +{ + { + std::ofstream f(tmp_path); + f << "mip_hyper_heuristic_rins_fix_rate = 0.5abc\n"; + } + settings_t settings; + EXPECT_THROW(settings.load_parameters_from_file(tmp_path), cuopt::logic_error); +} + +TEST_F(HeuristicsHyperParamsTest, RangeViolationCycleDetectionThrows) +{ + { + std::ofstream f(tmp_path); + f << "mip_hyper_heuristic_cycle_detection_length = 0\n"; + } + settings_t settings; + EXPECT_THROW(settings.load_parameters_from_file(tmp_path), cuopt::logic_error); +} + +TEST_F(HeuristicsHyperParamsTest, RangeViolationFixRateThrows) +{ + { + std::ofstream f(tmp_path); + f << "mip_hyper_heuristic_rins_fix_rate = 2.0\n"; + } + settings_t settings; + EXPECT_THROW(settings.load_parameters_from_file(tmp_path), cuopt::logic_error); +} + +TEST_F(HeuristicsHyperParamsTest, NonexistentFileThrows) +{ + settings_t settings; + EXPECT_THROW(settings.load_parameters_from_file("/tmp/does_not_exist_cuopt_test.config"), + cuopt::logic_error); +} + +TEST_F(HeuristicsHyperParamsTest, DirectoryPathThrows) +{ + settings_t settings; + EXPECT_THROW(settings.load_parameters_from_file("/tmp"), cuopt::logic_error); +} + +TEST_F(HeuristicsHyperParamsTest, IndentedCommentAndWhitespaceLinesIgnored) +{ + { + std::ofstream f(tmp_path); + f << " # indented comment\n"; + f << " \t \n"; + f << "mip_hyper_heuristic_population_size = 99\n"; + } + settings_t settings; + settings.load_parameters_from_file(tmp_path); + EXPECT_EQ(settings.get_mip_settings().heuristic_params.population_size, 99); +} + +TEST_F(HeuristicsHyperParamsTest, MixedSolverAndHyperParamsFromFile) +{ + { + std::ofstream f(tmp_path); + f << "mip_hyper_heuristic_population_size = 100\n"; + f << "time_limit = 42\n"; + } + settings_t settings; + settings.load_parameters_from_file(tmp_path); + EXPECT_EQ(settings.get_mip_settings().heuristic_params.population_size, 100); + EXPECT_DOUBLE_EQ(settings.get_mip_settings().time_limit, 42.0); +} + +TEST_F(HeuristicsHyperParamsTest, QuotedStringValue) +{ + { + std::ofstream f(tmp_path); + f << "log_file = \"/path/with spaces/log.txt\"\n"; + } + settings_t settings; + settings.load_parameters_from_file(tmp_path); + EXPECT_EQ(settings.template get_parameter(CUOPT_LOG_FILE), + "/path/with spaces/log.txt"); +} + +TEST_F(HeuristicsHyperParamsTest, QuotedStringWithEscapedQuote) +{ + { + std::ofstream f(tmp_path); + f << R"(log_file = "/path/with \"quotes\"/log.txt")" << "\n"; + } + settings_t settings; + settings.load_parameters_from_file(tmp_path); + EXPECT_EQ(settings.template get_parameter(CUOPT_LOG_FILE), + "/path/with \"quotes\"/log.txt"); +} + +TEST_F(HeuristicsHyperParamsTest, UnterminatedQuoteThrows) +{ + { + std::ofstream f(tmp_path); + f << "log_file = \"/path/no/close\n"; + } + settings_t settings; + EXPECT_THROW(settings.load_parameters_from_file(tmp_path), cuopt::logic_error); +} + +} // namespace cuopt::linear_programming::test diff --git a/cpp/tests/mip/load_balancing_test.cu b/cpp/tests/mip/load_balancing_test.cu index 5e2f08007d..1f825a26f7 100644 --- a/cpp/tests/mip/load_balancing_test.cu +++ b/cpp/tests/mip/load_balancing_test.cu @@ -9,11 +9,11 @@ #include "mip_utils.cuh" #include +#include #include #include #include #include -#include #include #include #include @@ -128,16 +128,7 @@ void test_multi_probe(std::string path) problem_checking_t::check_problem_representation(op_problem); detail::problem_t problem(op_problem); mip_solver_settings_t default_settings{}; - detail::pdhg_solver_t pdhg_solver(problem.handle_ptr, problem); - detail::pdlp_initial_scaling_strategy_t scaling(&handle_, - problem, - 10, - 1.0, - problem.reverse_coefficients, - problem.reverse_offsets, - problem.reverse_constraints, - nullptr, - true); + detail::mip_scaling_strategy_t scaling(problem); detail::mip_solver_t solver(problem, default_settings, scaling, cuopt::timer_t(0)); detail::load_balanced_problem_t lb_problem(problem); detail::load_balanced_bounds_presolve_t lb_prs(lb_problem, solver.context); diff --git a/cpp/tests/mip/multi_probe_test.cu b/cpp/tests/mip/multi_probe_test.cu index 073c153486..003220de9b 100644 --- a/cpp/tests/mip/multi_probe_test.cu +++ b/cpp/tests/mip/multi_probe_test.cu @@ -9,10 +9,10 @@ #include "mip_utils.cuh" #include +#include #include #include #include -#include #include #include #include @@ -150,18 +150,7 @@ void test_multi_probe(std::string path) problem_checking_t::check_problem_representation(op_problem); detail::problem_t problem(op_problem); mip_solver_settings_t default_settings{}; - pdlp_hyper_params::pdlp_hyper_params_t hyper_params{}; - detail::pdlp_initial_scaling_strategy_t scaling(&handle_, - problem, - 10, - 1.0, - problem.reverse_coefficients, - problem.reverse_offsets, - problem.reverse_constraints, - nullptr, - hyper_params, - true); - detail::mip_solver_t solver(problem, default_settings, scaling, cuopt::timer_t(0)); + detail::mip_solver_t solver(problem, default_settings, cuopt::timer_t(0)); detail::bound_presolve_t bnd_prb_0(solver.context); detail::bound_presolve_t bnd_prb_1(solver.context); detail::multi_probe_t multi_probe_prs(solver.context); diff --git a/cpp/tests/mip/server_test.cu b/cpp/tests/mip/server_test.cu index 2c47191b95..b027be897f 100644 --- a/cpp/tests/mip/server_test.cu +++ b/cpp/tests/mip/server_test.cu @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -91,12 +91,12 @@ TEST(ServerTest, TestSampleLP) class MILPTestParams : public testing::TestWithParam< - std::tuple> {}; + std::tuple> {}; TEST_P(MILPTestParams, TestSampleMILP) { bool maximize = std::get<0>(GetParam()); - bool scaling = std::get<1>(GetParam()); + int scaling = std::get<1>(GetParam()); bool heuristics_only = std::get<2>(GetParam()); auto expected_termination_status = std::get<3>(GetParam()); @@ -104,9 +104,9 @@ TEST_P(MILPTestParams, TestSampleMILP) auto problem = create_std_milp_problem(maximize); cuopt::linear_programming::mip_solver_settings_t settings{}; - settings.set_time_limit(5); - settings.set_mip_scaling(scaling); - settings.set_heuristics_only(heuristics_only); + settings.time_limit = 5; + settings.mip_scaling = scaling; + settings.heuristics_only = heuristics_only; auto result = cuopt::linear_programming::solve_mip(&handle, problem, settings); @@ -117,13 +117,21 @@ INSTANTIATE_TEST_SUITE_P( MILPTests, MILPTestParams, testing::Values( - std::make_tuple( - true, true, true, cuopt::linear_programming::mip_termination_status_t::FeasibleFound), - std::make_tuple( - false, true, false, cuopt::linear_programming::mip_termination_status_t::Optimal), - std::make_tuple( - true, false, true, cuopt::linear_programming::mip_termination_status_t::FeasibleFound), - std::make_tuple( - false, false, false, cuopt::linear_programming::mip_termination_status_t::Optimal))); + std::make_tuple(true, + CUOPT_MIP_SCALING_ON, + true, + cuopt::linear_programming::mip_termination_status_t::FeasibleFound), + std::make_tuple(false, + CUOPT_MIP_SCALING_ON, + false, + cuopt::linear_programming::mip_termination_status_t::Optimal), + std::make_tuple(true, + CUOPT_MIP_SCALING_OFF, + true, + cuopt::linear_programming::mip_termination_status_t::FeasibleFound), + std::make_tuple(false, + CUOPT_MIP_SCALING_OFF, + false, + cuopt::linear_programming::mip_termination_status_t::Optimal))); } // namespace cuopt::linear_programming::test diff --git a/cpp/tests/mip/unit_test.cu b/cpp/tests/mip/unit_test.cu index 29b58b736b..68de599f0c 100644 --- a/cpp/tests/mip/unit_test.cu +++ b/cpp/tests/mip/unit_test.cu @@ -9,7 +9,9 @@ #include "mip_utils.cuh" #include +#include #include +#include #include #include #include @@ -226,12 +228,12 @@ TEST(ErrorTest, TestError) class MILPTestParams : public testing::TestWithParam< - std::tuple> {}; + std::tuple> {}; TEST_P(MILPTestParams, TestSampleMILP) { bool maximize = std::get<0>(GetParam()); - bool scaling = std::get<1>(GetParam()); + int scaling = std::get<1>(GetParam()); bool heuristics_only = std::get<2>(GetParam()); auto expected_termination_status = std::get<3>(GetParam()); @@ -252,7 +254,7 @@ TEST_P(MILPTestParams, TestSampleMILP) TEST_P(MILPTestParams, TestSingleVarMILP) { bool maximize = std::get<0>(GetParam()); - bool scaling = std::get<1>(GetParam()); + int scaling = std::get<1>(GetParam()); bool heuristics_only = std::get<2>(GetParam()); auto expected_termination_status = std::get<3>(GetParam()); @@ -274,13 +276,165 @@ TEST_P(MILPTestParams, TestSingleVarMILP) INSTANTIATE_TEST_SUITE_P( MILPTests, MILPTestParams, - testing::Values( - std::make_tuple(true, true, true, cuopt::linear_programming::mip_termination_status_t::Optimal), - std::make_tuple( - false, true, false, cuopt::linear_programming::mip_termination_status_t::Optimal), - std::make_tuple( - true, false, true, cuopt::linear_programming::mip_termination_status_t::Optimal), - std::make_tuple( - false, false, false, cuopt::linear_programming::mip_termination_status_t::Optimal))); + testing::Values(std::make_tuple(true, + CUOPT_MIP_SCALING_ON, + true, + cuopt::linear_programming::mip_termination_status_t::Optimal), + std::make_tuple(false, + CUOPT_MIP_SCALING_ON, + false, + cuopt::linear_programming::mip_termination_status_t::Optimal), + std::make_tuple(true, + CUOPT_MIP_SCALING_OFF, + true, + cuopt::linear_programming::mip_termination_status_t::Optimal), + std::make_tuple(false, + CUOPT_MIP_SCALING_OFF, + false, + cuopt::linear_programming::mip_termination_status_t::Optimal))); + +// --------------------------------------------------------------------------- +// Scaling integrality preservation test +// --------------------------------------------------------------------------- + +static mps_parser::mps_data_model_t create_wide_spread_milp() +{ + mps_parser::mps_data_model_t problem; + + // 6 rows, 4 variables (x0=INT, x1=INT, x2=INT, x3=CONT) + // Coefficient spread: ~log2(100000/1) ≈ 17, well above the 12-threshold. + // clang-format off + std::vector values = { + 3.0, 7.0, 2.0, 1.5, // row 0: small ints + cont + 100000.0, 50000.0, 25000.0, 999.9, // row 1: large ints + cont + 5.0, 11.0, 13.0, 0.3, // row 2: small primes + cont + 60000.0, 30000.0, 9000.0, 42.42, // row 3: large + cont + 1.0, 1.0, 1.0, 0.0, // row 4: unit row (no cont) + 8.0, 4.0, 6.0, 3.14 // row 5: small ints + cont + }; + // clang-format on + std::vector indices = {0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, + 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3}; + std::vector offsets = {0, 4, 8, 12, 16, 20, 24}; + problem.set_csr_constraint_matrix( + values.data(), values.size(), indices.data(), indices.size(), offsets.data(), offsets.size()); + + std::vector cl = {0, 0, 0, 0, 0, 0}; + std::vector cu = {1e6, 1e8, 1e4, 1e8, 100, 1e4}; + problem.set_constraint_lower_bounds(cl.data(), cl.size()); + problem.set_constraint_upper_bounds(cu.data(), cu.size()); + + std::vector vl = {0, 0, 0, 0}; + std::vector vu = {1000, 1000, 1000, 1e6}; + problem.set_variable_lower_bounds(vl.data(), vl.size()); + problem.set_variable_upper_bounds(vu.data(), vu.size()); + + std::vector obj = {1.0, 2.0, 3.0, 0.5}; + problem.set_objective_coefficients(obj.data(), obj.size()); + problem.set_maximize(false); + + std::vector var_types = {'I', 'I', 'I', 'C'}; + problem.set_variable_types(var_types); + + return problem; +} + +TEST(ScalingIntegrity, IntegerCoefficientsPreservedAfterScaling) +{ + raft::handle_t handle; + auto mps_problem = create_wide_spread_milp(); + auto op_problem = mps_data_model_to_optimization_problem(&handle, mps_problem); + problem_checking_t::check_problem_representation(op_problem); + + const int nnz = op_problem.get_nnz(); + + auto pre_values = + cuopt::host_copy(op_problem.get_constraint_matrix_values(), handle.get_stream()); + auto col_indices = + cuopt::host_copy(op_problem.get_constraint_matrix_indices(), handle.get_stream()); + auto var_types = cuopt::host_copy(op_problem.get_variable_types(), handle.get_stream()); + handle.sync_stream(); + + std::vector was_integer(nnz, false); + for (int k = 0; k < nnz; ++k) { + int col = col_indices[k]; + if (var_types[col] == var_t::INTEGER) { + double abs_val = std::abs(pre_values[k]); + if (abs_val > 0.0 && + std::abs(abs_val - std::round(abs_val)) <= 1e-6 * std::max(1.0, abs_val)) { + was_integer[k] = true; + } + } + } + + detail::mip_scaling_strategy_t scaling(op_problem); + scaling.scale_problem(); + + auto post_values = + cuopt::host_copy(op_problem.get_constraint_matrix_values(), handle.get_stream()); + handle.sync_stream(); + + int violations = 0; + for (int k = 0; k < nnz; ++k) { + if (!was_integer[k]) { continue; } + double abs_val = std::abs(post_values[k]); + double frac_err = std::abs(abs_val - std::round(abs_val)); + double rel_tol = 1e-6 * std::max(1.0, abs_val); + if (frac_err > rel_tol) { + ++violations; + ADD_FAILURE() << "Coefficient [" << k << "] col=" << col_indices[k] << " was integer (" + << pre_values[k] << ") but after scaling is " << post_values[k] + << " (frac_err=" << frac_err << ")"; + } + } + EXPECT_EQ(violations, 0) << violations << " integer coefficients lost integrality after scaling"; +} + +TEST(ScalingIntegrity, NoObjectiveScalingPreservesIntegerCoefficients) +{ + raft::handle_t handle; + auto mps_problem = create_wide_spread_milp(); + auto op_problem = mps_data_model_to_optimization_problem(&handle, mps_problem); + problem_checking_t::check_problem_representation(op_problem); + + const int nnz = op_problem.get_nnz(); + + auto pre_values = + cuopt::host_copy(op_problem.get_constraint_matrix_values(), handle.get_stream()); + auto col_indices = + cuopt::host_copy(op_problem.get_constraint_matrix_indices(), handle.get_stream()); + auto var_types = cuopt::host_copy(op_problem.get_variable_types(), handle.get_stream()); + handle.sync_stream(); + + std::vector was_integer(nnz, false); + for (int k = 0; k < nnz; ++k) { + int col = col_indices[k]; + if (var_types[col] == var_t::INTEGER) { + double abs_val = std::abs(pre_values[k]); + if (abs_val > 0.0 && + std::abs(abs_val - std::round(abs_val)) <= 1e-6 * std::max(1.0, abs_val)) { + was_integer[k] = true; + } + } + } + + detail::mip_scaling_strategy_t scaling(op_problem); + scaling.scale_problem(/*scale_objective=*/false); + + auto post_values = + cuopt::host_copy(op_problem.get_constraint_matrix_values(), handle.get_stream()); + handle.sync_stream(); + + int violations = 0; + for (int k = 0; k < nnz; ++k) { + if (!was_integer[k]) { continue; } + double abs_val = std::abs(post_values[k]); + double frac_err = std::abs(abs_val - std::round(abs_val)); + double rel_tol = 1e-6 * std::max(1.0, abs_val); + if (frac_err > rel_tol) { ++violations; } + } + EXPECT_EQ(violations, 0) << violations + << " integer coefficients lost integrality after scaling (no-obj mode)"; +} } // namespace cuopt::linear_programming::test diff --git a/docs/cuopt/source/cuopt-c/lp-qp-milp/examples/milp_mps_example.c b/docs/cuopt/source/cuopt-c/lp-qp-milp/examples/milp_mps_example.c index 6d80679514..c61a29bd95 100644 --- a/docs/cuopt/source/cuopt-c/lp-qp-milp/examples/milp_mps_example.c +++ b/docs/cuopt/source/cuopt-c/lp-qp-milp/examples/milp_mps_example.c @@ -39,8 +39,8 @@ const char* termination_status_to_string(cuopt_int_t termination_status) cuopt_int_t solve_mps_file(const char* filename) { cuOptOptimizationProblem problem = NULL; - cuOptSolverSettings settings = NULL; - cuOptSolution solution = NULL; + cuOptSolverSettings settings = NULL; + cuOptSolution solution = NULL; cuopt_int_t status; cuopt_float_t time; cuopt_int_t termination_status; @@ -115,18 +115,20 @@ cuopt_int_t solve_mps_file(const char* filename) printf("\nResults:\n"); printf("--------\n"); printf("Number of variables: %d\n", num_variables); - printf("Termination status: %s (%d)\n", termination_status_to_string(termination_status), termination_status); + printf("Termination status: %s (%d)\n", + termination_status_to_string(termination_status), + termination_status); printf("Solve time: %f seconds\n", time); printf("Objective value: %f\n", objective_value); // Get and print solution variables if (has_primal_solution) { - solution_values = (cuopt_float_t*)malloc(num_variables * sizeof(cuopt_float_t)); - status = cuOptGetPrimalSolution(solution, solution_values); - if (status != CUOPT_SUCCESS) { - printf("Error getting solution values: %d\n", status); - goto DONE; - } + solution_values = (cuopt_float_t*)malloc(num_variables * sizeof(cuopt_float_t)); + status = cuOptGetPrimalSolution(solution, solution_values); + if (status != CUOPT_SUCCESS) { + printf("Error getting solution values: %d\n", status); + goto DONE; + } } printf("\nSolution: \n"); @@ -145,7 +147,8 @@ cuopt_int_t solve_mps_file(const char* filename) return status; } -int main(int argc, char* argv[]) { +int main(int argc, char* argv[]) +{ if (argc != 2) { printf("Usage: %s \n", argv[0]); return 1; diff --git a/docs/cuopt/source/cuopt-c/lp-qp-milp/examples/mps_file_example.c b/docs/cuopt/source/cuopt-c/lp-qp-milp/examples/mps_file_example.c index ac32e19afb..696d59cecd 100644 --- a/docs/cuopt/source/cuopt-c/lp-qp-milp/examples/mps_file_example.c +++ b/docs/cuopt/source/cuopt-c/lp-qp-milp/examples/mps_file_example.c @@ -60,8 +60,8 @@ const char* termination_status_to_string(cuopt_int_t termination_status) cuopt_int_t solve_mps_file(const char* filename) { cuOptOptimizationProblem problem = NULL; - cuOptSolverSettings settings = NULL; - cuOptSolution solution = NULL; + cuOptSolverSettings settings = NULL; + cuOptSolution solution = NULL; cuopt_int_t status; cuopt_float_t time; cuopt_int_t termination_status; @@ -129,13 +129,15 @@ cuopt_int_t solve_mps_file(const char* filename) printf("\nResults:\n"); printf("--------\n"); printf("Number of variables: %d\n", num_variables); - printf("Termination status: %s (%d)\n", termination_status_to_string(termination_status), termination_status); + printf("Termination status: %s (%d)\n", + termination_status_to_string(termination_status), + termination_status); printf("Solve time: %f seconds\n", time); printf("Objective value: %f\n", objective_value); // Get and print solution variables solution_values = (cuopt_float_t*)malloc(num_variables * sizeof(cuopt_float_t)); - status = cuOptGetPrimalSolution(solution, solution_values); + status = cuOptGetPrimalSolution(solution, solution_values); if (status != CUOPT_SUCCESS) { printf("Error getting solution values: %d\n", status); goto DONE; @@ -158,7 +160,8 @@ cuopt_int_t solve_mps_file(const char* filename) return status; } -int main(int argc, char* argv[]) { +int main(int argc, char* argv[]) +{ if (argc != 2) { printf("Usage: %s \n", argv[0]); return 1; diff --git a/docs/cuopt/source/cuopt-c/lp-qp-milp/examples/simple_lp_example.c b/docs/cuopt/source/cuopt-c/lp-qp-milp/examples/simple_lp_example.c index 2d675094c3..977a173c7b 100644 --- a/docs/cuopt/source/cuopt-c/lp-qp-milp/examples/simple_lp_example.c +++ b/docs/cuopt/source/cuopt-c/lp-qp-milp/examples/simple_lp_example.c @@ -62,8 +62,8 @@ const char* termination_status_to_string(cuopt_int_t termination_status) cuopt_int_t test_simple_lp() { cuOptOptimizationProblem problem = NULL; - cuOptSolverSettings settings = NULL; - cuOptSolution solution = NULL; + cuOptSolverSettings settings = NULL; + cuOptSolution solution = NULL; /* Solve the following LP: minimize -0.2*x1 + 0.1*x2 @@ -73,18 +73,18 @@ cuopt_int_t test_simple_lp() x1, x2 >= 0 */ - cuopt_int_t num_variables = 2; + cuopt_int_t num_variables = 2; cuopt_int_t num_constraints = 2; - cuopt_int_t nnz = 4; + cuopt_int_t nnz = 4; // CSR format constraint matrix // https://docs.nvidia.com/nvpl/latest/sparse/storage_format/sparse_matrix.html#compressed-sparse-row-csr // From the constraints: // 3.0*x1 + 4.0*x2 <= 5.4 // 2.7*x1 + 10.1*x2 <= 4.9 - cuopt_int_t row_offsets[] = {0, 2, 4}; + cuopt_int_t row_offsets[] = {0, 2, 4}; cuopt_int_t column_indices[] = {0, 1, 0, 1}; - cuopt_float_t values[] = {3.0, 4.0, 2.7, 10.1}; + cuopt_float_t values[] = {3.0, 4.0, 2.7, 10.1}; // Objective coefficients // From the objective function: minimize -0.2*x1 + 0.1*x2 @@ -119,19 +119,19 @@ cuopt_int_t test_simple_lp() // Create the problem status = cuOptCreateRangedProblem(num_constraints, - num_variables, - CUOPT_MINIMIZE, - 0.0, // objective offset - objective_coefficients, - row_offsets, - column_indices, - values, - constraint_lower_bounds, - constraint_upper_bounds, - var_lower_bounds, - var_upper_bounds, - variable_types, - &problem); + num_variables, + CUOPT_MINIMIZE, + 0.0, // objective offset + objective_coefficients, + row_offsets, + column_indices, + values, + constraint_lower_bounds, + constraint_upper_bounds, + var_lower_bounds, + var_upper_bounds, + variable_types, + &problem); if (status != CUOPT_SUCCESS) { printf("Error creating problem: %d\n", status); goto DONE; @@ -180,7 +180,9 @@ cuopt_int_t test_simple_lp() // Print results printf("\nResults:\n"); printf("--------\n"); - printf("Termination status: %s (%d)\n", termination_status_to_string(termination_status), termination_status); + printf("Termination status: %s (%d)\n", + termination_status_to_string(termination_status), + termination_status); printf("Solve time: %f seconds\n", time); printf("Objective value: %f\n", objective_value); @@ -211,7 +213,8 @@ cuopt_int_t test_simple_lp() return status; } -int main() { +int main() +{ // Run the test cuopt_int_t status = test_simple_lp(); diff --git a/docs/cuopt/source/cuopt-c/lp-qp-milp/examples/simple_milp_example.c b/docs/cuopt/source/cuopt-c/lp-qp-milp/examples/simple_milp_example.c index d406cc8e12..401c87a2dc 100644 --- a/docs/cuopt/source/cuopt-c/lp-qp-milp/examples/simple_milp_example.c +++ b/docs/cuopt/source/cuopt-c/lp-qp-milp/examples/simple_milp_example.c @@ -40,8 +40,8 @@ const char* termination_status_to_string(cuopt_int_t termination_status) cuopt_int_t test_simple_milp() { cuOptOptimizationProblem problem = NULL; - cuOptSolverSettings settings = NULL; - cuOptSolution solution = NULL; + cuOptSolverSettings settings = NULL; + cuOptSolution solution = NULL; /* Solve the following MILP: minimize -0.2*x1 + 0.1*x2 @@ -53,18 +53,18 @@ cuopt_int_t test_simple_milp() x2 is continuous */ - cuopt_int_t num_variables = 2; + cuopt_int_t num_variables = 2; cuopt_int_t num_constraints = 2; - cuopt_int_t nnz = 4; + cuopt_int_t nnz = 4; // CSR format constraint matrix // https://docs.nvidia.com/nvpl/latest/sparse/storage_format/sparse_matrix.html#compressed-sparse-row-csr // From the constraints: // 3.0*x1 + 4.0*x2 <= 5.4 // 2.7*x1 + 10.1*x2 <= 4.9 - cuopt_int_t row_offsets[] = {0, 2, 4}; + cuopt_int_t row_offsets[] = {0, 2, 4}; cuopt_int_t column_indices[] = {0, 1, 0, 1}; - cuopt_float_t values[] = {3.0, 4.0, 2.7, 10.1}; + cuopt_float_t values[] = {3.0, 4.0, 2.7, 10.1}; // Objective coefficients // From the objective function: minimize -0.2*x1 + 0.1*x2 @@ -99,19 +99,19 @@ cuopt_int_t test_simple_milp() // Create the problem status = cuOptCreateRangedProblem(num_constraints, - num_variables, - CUOPT_MINIMIZE, - 0.0, // objective offset - objective_coefficients, - row_offsets, - column_indices, - values, - constraint_lower_bounds, - constraint_upper_bounds, - var_lower_bounds, - var_upper_bounds, - variable_types, - &problem); + num_variables, + CUOPT_MINIMIZE, + 0.0, // objective offset + objective_coefficients, + row_offsets, + column_indices, + values, + constraint_lower_bounds, + constraint_upper_bounds, + var_lower_bounds, + var_upper_bounds, + variable_types, + &problem); if (status != CUOPT_SUCCESS) { printf("Error creating problem: %d\n", status); goto DONE; @@ -160,7 +160,9 @@ cuopt_int_t test_simple_milp() // Print results printf("\nResults:\n"); printf("--------\n"); - printf("Termination status: %s (%d)\n", termination_status_to_string(termination_status), termination_status); + printf("Termination status: %s (%d)\n", + termination_status_to_string(termination_status), + termination_status); printf("Solve time: %f seconds\n", time); printf("Objective value: %f\n", objective_value); @@ -191,7 +193,8 @@ cuopt_int_t test_simple_milp() return status; } -int main() { +int main() +{ // Run the test cuopt_int_t status = test_simple_milp(); diff --git a/docs/cuopt/source/cuopt-c/lp-qp-milp/examples/simple_qp_example.c b/docs/cuopt/source/cuopt-c/lp-qp-milp/examples/simple_qp_example.c index a68f360e3b..77b3f21f10 100644 --- a/docs/cuopt/source/cuopt-c/lp-qp-milp/examples/simple_qp_example.c +++ b/docs/cuopt/source/cuopt-c/lp-qp-milp/examples/simple_qp_example.c @@ -1,6 +1,6 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. - * SPDX-License-Identifier: Apache-2.0 + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights + * reserved. SPDX-License-Identifier: Apache-2.0 */ /* * Simple QP C API Example @@ -57,8 +57,8 @@ const char* termination_status_to_string(cuopt_int_t termination_status) cuopt_int_t test_simple_qp() { cuOptOptimizationProblem problem = NULL; - cuOptSolverSettings settings = NULL; - cuOptSolution solution = NULL; + cuOptSolverSettings settings = NULL; + cuOptSolution solution = NULL; /* Solve the following QP: minimize x^2 + y^2 @@ -67,15 +67,15 @@ cuopt_int_t test_simple_qp() x, y >= 0 */ - cuopt_int_t num_variables = 2; + cuopt_int_t num_variables = 2; cuopt_int_t num_constraints = 1; - cuopt_int_t nnz = 2; + cuopt_int_t nnz = 2; // CSR format constraint matrix // https://docs.nvidia.com/nvpl/latest/sparse/storage_format/sparse_matrix.html#compressed-sparse-row-csr - cuopt_int_t row_offsets[] = {0, 2}; + cuopt_int_t row_offsets[] = {0, 2}; cuopt_int_t column_indices[] = {0, 1}; - cuopt_float_t values[] = {1.0, 1.0}; + cuopt_float_t values[] = {1.0, 1.0}; // Objective coefficients // From the objective function: minimize x^2 + y^2 @@ -87,16 +87,15 @@ cuopt_int_t test_simple_qp() // From the objective function: minimize x^2 + y^2 // 1 is the coefficient of the quadratic term on x^2 // 1 is the coefficient of the quadratic term on y^2 - cuopt_float_t quadratic_objective_matrix_values[] = {1.0, 1.0}; - cuopt_int_t quadratic_objective_matrix_row_offsets[] = {0, 1, 2}; + cuopt_float_t quadratic_objective_matrix_values[] = {1.0, 1.0}; + cuopt_int_t quadratic_objective_matrix_row_offsets[] = {0, 1, 2}; cuopt_int_t quadratic_objective_matrix_column_indices[] = {0, 1}; // Constraint bounds // From the constraints: // x + y >= 1 cuopt_float_t constraint_rhs[] = {1.0}; - char constraint_sense[] = { CUOPT_GREATER_THAN }; - + char constraint_sense[] = {CUOPT_GREATER_THAN}; // Variable bounds // From the constraints: @@ -174,7 +173,9 @@ cuopt_int_t test_simple_qp() // Print results printf("\nResults:\n"); printf("--------\n"); - printf("Termination status: %s (%d)\n", termination_status_to_string(termination_status), termination_status); + printf("Termination status: %s (%d)\n", + termination_status_to_string(termination_status), + termination_status); printf("Solve time: %f seconds\n", time); printf("Objective value: %f\n", objective_value); @@ -205,7 +206,8 @@ cuopt_int_t test_simple_qp() return status; } -int main() { +int main() +{ // Run the test cuopt_int_t status = test_simple_qp(); diff --git a/docs/cuopt/source/cuopt-c/lp-qp-milp/lp-qp-example.rst b/docs/cuopt/source/cuopt-c/lp-qp-milp/lp-qp-example.rst index 07182501fd..07fdc72d58 100644 --- a/docs/cuopt/source/cuopt-c/lp-qp-milp/lp-qp-example.rst +++ b/docs/cuopt/source/cuopt-c/lp-qp-milp/lp-qp-example.rst @@ -91,7 +91,7 @@ If you have built it locally, libcuopt.so will be in the build directory ``cpp/b # Find the libcuopt library and assign to LIBCUOPT_LIBRARY_PATH LIBCUOPT_LIBRARY_PATH=$(find / -name "libcuopt.so" 2>/dev/null) -A sample MPS file (:download:`download sample.mps `): +A sample MPS file (:download:`download sample.mps `): .. literalinclude:: examples/sample.mps :language: text diff --git a/docs/cuopt/source/cuopt-c/lp-qp-milp/milp-examples.rst b/docs/cuopt/source/cuopt-c/lp-qp-milp/milp-examples.rst index 3389b53d4a..a6d446d4b3 100644 --- a/docs/cuopt/source/cuopt-c/lp-qp-milp/milp-examples.rst +++ b/docs/cuopt/source/cuopt-c/lp-qp-milp/milp-examples.rst @@ -98,7 +98,7 @@ If you have built it locally, libcuopt.so will be in the build directory ``cpp/b # Find the libcuopt library and assign to LIBCUOPT_LIBRARY_PATH LIBCUOPT_LIBRARY_PATH=$(find / -name "libcuopt.so" 2>/dev/null) -A sample MILP MPS file (:download:`download mip_sample.mps `): +A sample MILP MPS file (:download:`download mip_sample.mps `): .. literalinclude:: examples/mip_sample.mps :language: text diff --git a/docs/cuopt/source/cuopt-server/examples/lp-examples.rst b/docs/cuopt/source/cuopt-server/examples/lp-examples.rst index 52d401281c..7bba75d046 100644 --- a/docs/cuopt/source/cuopt-server/examples/lp-examples.rst +++ b/docs/cuopt/source/cuopt-server/examples/lp-examples.rst @@ -407,7 +407,7 @@ In the case of batch mode, you can send a bunch of ``mps`` files at once, and ac .. note:: Batch mode is not available for MILP problems. -A sample MPS file (:download:`sample.mps `): +A sample MPS file (:download:`sample.mps `): .. literalinclude:: lp/examples/sample.mps :language: text diff --git a/docs/cuopt/source/faq.rst b/docs/cuopt/source/faq.rst index 0c3a0e219f..2770e1b507 100644 --- a/docs/cuopt/source/faq.rst +++ b/docs/cuopt/source/faq.rst @@ -283,7 +283,7 @@ Routing FAQ So in either case, task locations are actually integer indices into another structure. - If you have (lat, long) values, then you can generate a cost matrix using a map API. cuOpt does not directly connect to a third-party map engine, but that can be done outside of cuOpt as shown `here `__. + If you have (lat, long) values, then you can generate a cost matrix using a map API. cuOpt does not directly connect to a third-party map engine, but that can be done outside of cuOpt as shown `here `__. .. dropdown:: Is it possible to define constraints such as refrigerated vehicles required for certain orders? diff --git a/python/cuopt/cuopt/linear_programming/solution/solution.py b/python/cuopt/cuopt/linear_programming/solution/solution.py index e2533da8c1..93d224fdbd 100644 --- a/python/cuopt/cuopt/linear_programming/solution/solution.py +++ b/python/cuopt/cuopt/linear_programming/solution/solution.py @@ -1,6 +1,9 @@ # SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 +from cuopt.linear_programming.solver_settings.solver_settings import ( + SolverMethod, +) from cuopt.linear_programming.solver.solver_wrapper import ( LPTerminationStatus, MILPTerminationStatus, @@ -116,8 +119,10 @@ class Solution: Time used for pre-solve solve_time: Float64 Solve time in seconds - solved_by_pdlp: bool - Whether the problem was solved by PDLP or Dual Simplex + solved_by: enum + Note: Applicable to only LP + Whether the LP was solved by Dual Simplex, PDLP or Barrier. This is populated + by the solver using the values from SolverMethod. """ def __init__( @@ -154,7 +159,7 @@ def __init__( dual_objective=0.0, gap=0.0, nb_iterations=0, - solved_by_pdlp=None, + solved_by=SolverMethod.Unset, mip_gap=0.0, solution_bound=0.0, presolve_time=0.0, @@ -196,7 +201,7 @@ def __init__( self.primal_objective = primal_objective self.dual_objective = dual_objective self.solve_time = solve_time - self.solved_by_pdlp = solved_by_pdlp + self.solved_by = SolverMethod(solved_by) self.vars = vars self.lp_stats = { "primal_residual": primal_residual, @@ -302,10 +307,23 @@ def get_solve_time(self): return self.solve_time def get_solved_by_pdlp(self): + from warnings import warn + + warn( + "get_solved_by_pdlp() will be deprecated in 26.08. Use get_solved_by() instead. ", + DeprecationWarning, + ) + + """ + Returns whether the problem was solved by PDLP or not. + """ + return self.solved_by == SolverMethod.PDLP + + def get_solved_by(self): """ - Returns whether the problem was solved by PDLP or Dual Simplex + Returns whether the LP was solved by Dual Simplex, PDLP or Barrier. See SolverMethod for all possible values. """ - return self.solved_by_pdlp + return self.solved_by def get_vars(self): """ diff --git a/python/cuopt/cuopt/linear_programming/solver/solver.pxd b/python/cuopt/cuopt/linear_programming/solver/solver.pxd index f5ed90055f..3bb2cba34a 100644 --- a/python/cuopt/cuopt/linear_programming/solver/solver.pxd +++ b/python/cuopt/cuopt/linear_programming/solver/solver.pxd @@ -29,6 +29,13 @@ cdef extern from "cuopt/linear_programming/pdlp/solver_settings.hpp" namespace " Fast1 "cuopt::linear_programming::pdlp_solver_mode_t::Fast1" # noqa Stable3 "cuopt::linear_programming::pdlp_solver_mode_t::Stable3" # noqa + ctypedef enum method_t "cuopt::linear_programming::method_t": # noqa + Concurrent "cuopt::linear_programming::method_t::Concurrent" # noqa + PDLP "cuopt::linear_programming::method_t::PDLP" # noqa + DualSimplex "cuopt::linear_programming::method_t::DualSimplex" # noqa + Barrier "cuopt::linear_programming::method_t::Barrier" # noqa + Unset "cuopt::linear_programming::method_t::Unset" # noqa + cdef extern from "cuopt/linear_programming/solver_settings.hpp" namespace "cuopt::linear_programming": # noqa cdef cppclass solver_settings_t[i_t, f_t]: @@ -178,7 +185,7 @@ cdef extern from "cuopt/linear_programming/utilities/cython_solve.hpp" namespace double gap_ int nb_iterations_ double solve_time_ - bool solved_by_pdlp_ + method_t solved_by_ bool is_gpu() # Unified MIP solution struct — solution_ variant accessed via helpers diff --git a/python/cuopt/cuopt/linear_programming/solver/solver_wrapper.pyx b/python/cuopt/cuopt/linear_programming/solver/solver_wrapper.pyx index cb4831a367..f5a7aaab48 100644 --- a/python/cuopt/cuopt/linear_programming/solver/solver_wrapper.pyx +++ b/python/cuopt/cuopt/linear_programming/solver/solver_wrapper.pyx @@ -62,6 +62,7 @@ import cudf from cuopt.linear_programming.solver_settings.solver_settings import ( PDLPSolverMode, + SolverMethod, SolverSettings, ) from cuopt.utilities import InputValidationError, series_from_buf @@ -479,7 +480,7 @@ cdef create_solution(unique_ptr[solver_ret_t] sol_ret_ptr, lp_ptr.dual_objective_, lp_ptr.gap_, lp_ptr.nb_iterations_, - lp_ptr.solved_by_pdlp_, + lp_ptr.solved_by_, ) else: return Solution( @@ -498,7 +499,7 @@ cdef create_solution(unique_ptr[solver_ret_t] sol_ret_ptr, dual_objective=lp_ptr.dual_objective_, gap=lp_ptr.gap_, nb_iterations=lp_ptr.nb_iterations_, - solved_by_pdlp=lp_ptr.solved_by_pdlp_, + solved_by=lp_ptr.solved_by_, ) diff --git a/python/cuopt/cuopt/linear_programming/solver_settings/solver_settings.py b/python/cuopt/cuopt/linear_programming/solver_settings/solver_settings.py index 19db315349..dc689b75fe 100644 --- a/python/cuopt/cuopt/linear_programming/solver_settings/solver_settings.py +++ b/python/cuopt/cuopt/linear_programming/solver_settings/solver_settings.py @@ -18,6 +18,7 @@ class SolverMethod(IntEnum): PDLP = auto() DualSimplex = auto() Barrier = auto() + Unset = auto() def __str__(self): """Convert the solver method to a string. diff --git a/python/cuopt/cuopt/tests/linear_programming/test_lp_solver.py b/python/cuopt/cuopt/tests/linear_programming/test_lp_solver.py index 08cb0b4a70..291c80d925 100644 --- a/python/cuopt/cuopt/tests/linear_programming/test_lp_solver.py +++ b/python/cuopt/cuopt/tests/linear_programming/test_lp_solver.py @@ -87,7 +87,7 @@ def test_solver(): assert solution.get_primal_objective() == pytest.approx(0.0) assert solution.get_dual_objective() == pytest.approx(0.0) assert solution.get_lp_stats()["gap"] == pytest.approx(0.0) - assert solution.get_solved_by_pdlp() + assert solution.get_solved_by() == SolverMethod.PDLP def test_parser_and_solver(): @@ -600,7 +600,7 @@ def test_dual_simplex(): assert solution.get_termination_status() == LPTerminationStatus.Optimal assert solution.get_primal_objective() == pytest.approx(-464.7531) - assert not solution.get_solved_by_pdlp() + assert solution.get_solved_by() == SolverMethod.DualSimplex def test_barrier(): @@ -768,7 +768,7 @@ def test_pdlp_precision_single(): assert solution.get_primal_objective() == pytest.approx( -464.7531, rel=1e-1 ) - assert solution.get_solved_by_pdlp() + assert solution.get_solved_by() == SolverMethod.PDLP def test_pdlp_precision_single_crossover(): diff --git a/python/cuopt_self_hosted/cuopt_sh_client/cuopt_self_host_client.py b/python/cuopt_self_hosted/cuopt_sh_client/cuopt_self_host_client.py index 066e81b026..8a4156c592 100644 --- a/python/cuopt_self_hosted/cuopt_sh_client/cuopt_self_host_client.py +++ b/python/cuopt_self_hosted/cuopt_sh_client/cuopt_self_host_client.py @@ -200,7 +200,7 @@ def create_solution_obj(solver_response): nb_iterations=sol["lp_statistics"]["nb_iterations"], primal_objective=sol["primal_objective"], dual_objective=sol["dual_objective"], - solved_by_pdlp=sol["solved_by_pdlp"], + solved_by=sol["solved_by"], ) return status, solution_obj diff --git a/python/cuopt_self_hosted/cuopt_sh_client/thin_client_solution.py b/python/cuopt_self_hosted/cuopt_sh_client/thin_client_solution.py index aa169abed9..1d519211fe 100644 --- a/python/cuopt_self_hosted/cuopt_sh_client/thin_client_solution.py +++ b/python/cuopt_self_hosted/cuopt_sh_client/thin_client_solution.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2023-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 @@ -66,8 +66,8 @@ class ThinClientSolution: Time used for pre-solve solve_time: Float64 Solve time in seconds - solved_by_pdlp: bool - Whether the problem was solved by PDLP or Dual Simplex + solved_by: str + Whether the problem was solved by PDLP, Barrier or Dual Simplex """ def __init__( @@ -87,7 +87,7 @@ def __init__( dual_objective=0.0, gap=0.0, nb_iterations=0, - solved_by_pdlp=None, + solved_by=None, mip_gap=0.0, solution_bound=0.0, presolve_time=0.0, @@ -107,7 +107,7 @@ def __init__( self.primal_objective = primal_objective self.dual_objective = dual_objective self.solve_time = solve_time - self.solved_by_pdlp = solved_by_pdlp + self.solved_by = solved_by self.vars = vars self.lp_stats = { "primal_residual": primal_residual, @@ -191,11 +191,11 @@ def get_solve_time(self): """ return self.solve_time - def get_solved_by_pdlp(self): + def get_solved_by(self): """ - Returns whether the problem was solved by PDLP or Dual Simplex + Returns whether the problem was solved by PDLP, Barrier or Dual Simplex """ - return self.solved_by_pdlp + return self.solved_by def get_vars(self): """ diff --git a/python/cuopt_server/cuopt_server/tests/test_lp.py b/python/cuopt_server/cuopt_server/tests/test_lp.py index 7b85899350..e3a683f8de 100644 --- a/python/cuopt_server/cuopt_server/tests/test_lp.py +++ b/python/cuopt_server/cuopt_server/tests/test_lp.py @@ -88,7 +88,7 @@ def get_std_data_for_milp(): data = get_std_data_for_lp() data["variable_types"] = ["I", "C"] data["maximize"] = True - data["solver_config"]["mip_scaling"] = False + data["solver_config"]["mip_scaling"] = 0 return data @@ -107,10 +107,10 @@ def test_sample_lp(cuoptproc): # noqa @pytest.mark.parametrize( "maximize, scaling, expected_status, heuristics_only", [ - (True, True, MILPTerminationStatus.Optimal.name, True), - (False, True, MILPTerminationStatus.Optimal.name, False), - (True, False, MILPTerminationStatus.Optimal.name, True), - (False, False, MILPTerminationStatus.Optimal.name, False), + (True, 1, MILPTerminationStatus.Optimal.name, True), + (False, 1, MILPTerminationStatus.Optimal.name, False), + (True, 0, MILPTerminationStatus.Optimal.name, True), + (False, 0, MILPTerminationStatus.Optimal.name, False), ], ) def test_sample_milp( diff --git a/python/cuopt_server/cuopt_server/utils/linear_programming/data_definition.py b/python/cuopt_server/cuopt_server/utils/linear_programming/data_definition.py index dba48f010a..78f3068014 100644 --- a/python/cuopt_server/cuopt_server/utils/linear_programming/data_definition.py +++ b/python/cuopt_server/cuopt_server/utils/linear_programming/data_definition.py @@ -441,9 +441,15 @@ class SolverConfig(BaseModel): "
" "Note: Not supported for MILP. ", ) - mip_scaling: Optional[bool] = Field( - default=True, - description="Set True to enable MIP scaling, False to disable.", + mip_scaling: Optional[int] = Field( + default=1, + description="MIP scaling mode:" + "
" + "- 0: No scaling" + "
" + "- 1: Full scaling (objective + row), default" + "
" + "- 2: Row scaling only (no objective scaling)", ) mip_heuristics_only: Optional[bool] = Field( default=False, @@ -452,8 +458,15 @@ class SolverConfig(BaseModel): ) mip_batch_pdlp_strong_branching: Optional[int] = Field( default=0, - description="Set 1 to enable batch PDLP strong branching " - "in the MIP solver, 0 to disable.", + description="Strong branching mode: 0 = Dual Simplex only, " + "1 = cooperative work-stealing (DS + batch PDLP), " + "2 = batch PDLP only.", + ) + mip_batch_pdlp_reliability_branching: Optional[int] = Field( + default=0, + description="Reliability branching mode: 0 = Dual Simplex only, " + "1 = cooperative work-stealing (DS + batch PDLP), " + "2 = batch PDLP only.", ) num_cpu_threads: Optional[int] = Field( default=None, @@ -700,10 +713,10 @@ class SolutionData(StrictModel): default=None, description=("Returns the engine solve time in seconds"), ) - solved_by_pdlp: bool = Field( + solved_by: int = Field( default=None, description=( - "Returns whether problem was solved by PDLP or Dual Simplex" + "Returns whether problem was solved by PDLP, Barrier or Dual Simplex" ), ) primal_objective: float = Field( diff --git a/python/cuopt_server/cuopt_server/utils/linear_programming/solver.py b/python/cuopt_server/cuopt_server/utils/linear_programming/solver.py index 87524f8715..0bc05a9b9f 100644 --- a/python/cuopt_server/cuopt_server/utils/linear_programming/solver.py +++ b/python/cuopt_server/cuopt_server/utils/linear_programming/solver.py @@ -317,7 +317,7 @@ def create_solution(sol): sol.get_dual_objective ) solution["solver_time"] = sol.get_solve_time() - solution["solved_by_pdlp"] = sol.get_solved_by_pdlp() + solution["solved_by"] = sol.get_solved_by().name solution["vars"] = sol.get_vars() solution["lp_statistics"] = {} if lp_stats is None else lp_stats solution["reduced_cost"] = reduced_cost