Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
130 changes: 78 additions & 52 deletions cpp/src/mip_heuristics/diversity/diversity_manager.cu
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,48 @@ diversity_manager_t<i_t, f_t>::diversity_manager_t(mip_solver_context_t<i_t, f_t
}
}

// this function is to specialize the local search with config from diversity manager
template <typename i_t, typename f_t>
void diversity_manager_t<i_t, f_t>::consume_staged_simplex_solution(lp_state_t<i_t, f_t>& lp_state)
{
std::vector<f_t> staged_simplex_solution_local;
std::vector<f_t> staged_simplex_dual_solution_local;
f_t staged_simplex_objective_local = std::numeric_limits<f_t>::infinity();
{
std::lock_guard<std::mutex> guard(relaxed_solution_mutex);
cuopt_assert(simplex_solution_exists.load(),
"Simplex solution flag set without a staged simplex solution");
staged_simplex_solution_local = staged_simplex_solution;
staged_simplex_dual_solution_local = staged_simplex_dual_solution;
staged_simplex_objective_local = staged_simplex_objective;
}
solution_t<i_t, f_t> new_sol(*problem_ptr);
cuopt_assert(new_sol.assignment.size() == staged_simplex_solution_local.size(),
"Assignment size mismatch");
cuopt_assert(problem_ptr->n_constraints == staged_simplex_dual_solution_local.size(),
"Dual assignment size mismatch");
new_sol.copy_new_assignment(staged_simplex_solution_local);
new_sol.compute_feasibility();
cuopt_assert(integer_equal(new_sol.get_user_objective(), staged_simplex_objective_local, 1e-3),
"Objective mismatch");
raft::copy(lp_optimal_solution.data(),
staged_simplex_solution_local.data(),
staged_simplex_solution_local.size(),
problem_ptr->handle_ptr->get_stream());
clamp_within_var_bounds(lp_optimal_solution, problem_ptr, problem_ptr->handle_ptr);
raft::copy(lp_state.prev_primal.data(),
lp_optimal_solution.data(),
lp_optimal_solution.size(),
problem_ptr->handle_ptr->get_stream());
problem_ptr->handle_ptr->sync_stream();
solution_t<i_t, f_t> bounded_lp_sol(*problem_ptr);
bounded_lp_sol.copy_new_assignment(lp_optimal_solution);
bounded_lp_sol.handle_ptr->sync_stream();
auto max_lp_bound_violation = bounded_lp_sol.compute_max_variable_violation();
cuopt_assert(max_lp_bound_violation == 0.0,
"LP optimal solution must be within variable bounds after staged copy");
set_new_user_bound(staged_simplex_objective_local);
}

template <typename i_t, typename f_t>
bool diversity_manager_t<i_t, f_t>::run_local_search(solution_t<i_t, f_t>& solution,
const weight_t<i_t, f_t>& weights,
Expand Down Expand Up @@ -416,6 +457,7 @@ solution_t<i_t, f_t> diversity_manager_t<i_t, f_t>::run_solver()
lp_state.resize(*problem_ptr, problem_ptr->handle_ptr->get_stream());
bool bb_thread_solution_exists = simplex_solution_exists.load();
if (bb_thread_solution_exists) {
consume_staged_simplex_solution(lp_state);
ls.lp_optimal_exists = true;
} else if (!fj_only_run) {
convert_greater_to_less(*problem_ptr);
Expand All @@ -440,9 +482,11 @@ solution_t<i_t, f_t> diversity_manager_t<i_t, f_t>::run_solver()
timer_t lp_timer(lp_time_limit);
auto lp_result = solve_lp_with_method<i_t, f_t>(*problem_ptr, pdlp_settings, lp_timer);

bool use_staged_simplex_solution = false;
{
std::lock_guard<std::mutex> guard(relaxed_solution_mutex);
if (!simplex_solution_exists.load()) {
use_staged_simplex_solution = simplex_solution_exists.load();
if (!use_staged_simplex_solution) {
cuopt_assert(lp_result.get_primal_solution().size() == lp_optimal_solution.size(),
"LP optimal solution size mismatch");
cuopt_assert(lp_result.get_dual_solution().size() == lp_dual_optimal_solution.size(),
Expand All @@ -455,42 +499,37 @@ solution_t<i_t, f_t> diversity_manager_t<i_t, f_t>::run_solver()
lp_result.get_dual_solution().data(),
lp_dual_optimal_solution.size(),
problem_ptr->handle_ptr->get_stream());
} else {
// copy the lp state
raft::copy(lp_state.prev_primal.data(),
lp_optimal_solution.data(),
lp_optimal_solution.size(),
problem_ptr->handle_ptr->get_stream());
raft::copy(lp_state.prev_dual.data(),
lp_dual_optimal_solution.data(),
lp_dual_optimal_solution.size(),
problem_ptr->handle_ptr->get_stream());
}
problem_ptr->handle_ptr->sync_stream();
}
if (use_staged_simplex_solution) { consume_staged_simplex_solution(lp_state); }
cuopt_assert(thrust::all_of(problem_ptr->handle_ptr->get_thrust_policy(),
lp_optimal_solution.begin(),
lp_optimal_solution.end(),
[] __host__ __device__(f_t val) { return std::isfinite(val); }),
"LP optimal solution contains non-finite values");
ls.lp_optimal_exists = true;
if (lp_result.get_termination_status() == pdlp_termination_status_t::Optimal) {
set_new_user_bound(lp_result.get_objective_value());
} else if (lp_result.get_termination_status() == pdlp_termination_status_t::PrimalInfeasible) {
CUOPT_LOG_ERROR("Problem is primal infeasible, continuing anyway!");
ls.lp_optimal_exists = false;
} else if (lp_result.get_termination_status() == pdlp_termination_status_t::DualInfeasible) {
CUOPT_LOG_ERROR("PDLP detected dual infeasibility, continuing anyway!");
ls.lp_optimal_exists = false;
} else if (lp_result.get_termination_status() == pdlp_termination_status_t::TimeLimit) {
CUOPT_LOG_DEBUG(
"Initial LP run exceeded time limit, continuing solver with partial LP result!");
// note to developer, in debug mode the LP run might be too slow and it might cause PDLP not
// to bring variables within the bounds
if (!use_staged_simplex_solution) {
if (lp_result.get_termination_status() == pdlp_termination_status_t::Optimal) {
set_new_user_bound(lp_result.get_objective_value());
} else if (lp_result.get_termination_status() ==
pdlp_termination_status_t::PrimalInfeasible) {
CUOPT_LOG_ERROR("Problem is primal infeasible, continuing anyway!");
ls.lp_optimal_exists = false;
} else if (lp_result.get_termination_status() == pdlp_termination_status_t::DualInfeasible) {
CUOPT_LOG_ERROR("PDLP detected dual infeasibility, continuing anyway!");
ls.lp_optimal_exists = false;
} else if (lp_result.get_termination_status() == pdlp_termination_status_t::TimeLimit) {
CUOPT_LOG_DEBUG(
"Initial LP run exceeded time limit, continuing solver with partial LP result!");
// note to developer, in debug mode the LP run might be too slow and it might cause PDLP
// not to bring variables within the bounds
}
}

// Send PDLP relaxed solution to branch and bound
if (problem_ptr->set_root_relaxation_solution_callback != nullptr) {
// Send relaxed solution to branch and bound only if PDLP found it (not dual simplex via
// set_simplex_solution)
if (!use_staged_simplex_solution &&
problem_ptr->set_root_relaxation_solution_callback != nullptr) {
auto& d_primal_solution = lp_result.get_primal_solution();
auto& d_dual_solution = lp_result.get_dual_solution();
auto& d_reduced_costs = lp_result.get_reduced_cost();
Expand Down Expand Up @@ -521,8 +560,10 @@ solution_t<i_t, f_t> diversity_manager_t<i_t, f_t>::run_solver()
host_primal, host_dual, host_reduced_costs, solver_obj, user_obj, iterations);
}

// in case the pdlp returned var boudns that are out of bounds
clamp_within_var_bounds(lp_optimal_solution, problem_ptr, problem_ptr->handle_ptr);
if (!use_staged_simplex_solution) {
// in case the pdlp returned var boudns that are out of bounds
clamp_within_var_bounds(lp_optimal_solution, problem_ptr, problem_ptr->handle_ptr);
}
}

if (ls.lp_optimal_exists) {
Expand Down Expand Up @@ -865,30 +906,15 @@ void diversity_manager_t<i_t, f_t>::set_simplex_solution(const std::vector<f_t>&
f_t objective)
{
CUOPT_LOG_DEBUG("Setting simplex solution with objective %f", objective);
using sol_t = solution_t<i_t, f_t>;
RAFT_CUDA_TRY(cudaSetDevice(context.handle_ptr->get_device()));
context.handle_ptr->sync_stream();
cuopt_func_call(sol_t new_sol(*problem_ptr));
cuopt_assert(new_sol.assignment.size() == solution.size(), "Assignment size mismatch");
cuopt_assert(problem_ptr->n_constraints == dual_solution.size(), "Dual assignment size mismatch");
cuopt_func_call(new_sol.copy_new_assignment(solution));
cuopt_func_call(new_sol.compute_feasibility());
cuopt_assert(integer_equal(new_sol.get_user_objective(), objective, 1e-3), "Objective mismatch");
std::lock_guard<std::mutex> lock(relaxed_solution_mutex);
simplex_solution_exists.store(true, std::memory_order_release);
global_concurrent_halt = 1;
CUOPT_LOG_DEBUG("Setting concurrent halt for PDLP inside diversity manager");
// global_concurrent_halt.store(1, std::memory_order_release);
// it is safe to use lp_optimal_solution while executing the copy operation
// the operations are ordered as long as they are on the same stream
raft::copy(
lp_optimal_solution.data(), solution.data(), solution.size(), context.handle_ptr->get_stream());
raft::copy(lp_dual_optimal_solution.data(),
dual_solution.data(),
dual_solution.size(),
context.handle_ptr->get_stream());
set_new_user_bound(objective);
context.handle_ptr->sync_stream();
cuopt_assert(lp_optimal_solution.size() == solution.size(), "Assignment size mismatch");
cuopt_assert(problem_ptr->n_constraints == dual_solution.size(), "Dual assignment size mismatch");
staged_simplex_solution = solution;
staged_simplex_dual_solution = dual_solution;
staged_simplex_objective = objective;
simplex_solution_exists.store(true, std::memory_order_release);
CUOPT_LOG_DEBUG("Staged simplex solution and requested concurrent halt");
}

#if MIP_INSTANTIATE_FLOAT
Expand Down
4 changes: 4 additions & 0 deletions cpp/src/mip_heuristics/diversity/diversity_manager.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ class diversity_manager_t {
timer_t& timer,
ls_config_t<i_t, f_t>& ls_config);

void consume_staged_simplex_solution(lp_state_t<i_t, f_t>& lp_state);
void set_simplex_solution(const std::vector<f_t>& solution,
const std::vector<f_t>& dual_solution,
f_t objective);
Expand All @@ -79,6 +80,9 @@ class diversity_manager_t {
rmm::device_uvector<f_t> lp_optimal_solution;
rmm::device_uvector<f_t> lp_dual_optimal_solution;
std::atomic<bool> simplex_solution_exists{false};
std::vector<f_t> staged_simplex_solution;
std::vector<f_t> staged_simplex_dual_solution;
f_t staged_simplex_objective{std::numeric_limits<f_t>::infinity()};
local_search_t<i_t, f_t> ls;
cuopt::timer_t timer;
bound_prop_recombiner_t<i_t, f_t> bound_prop_recombiner;
Expand Down
15 changes: 11 additions & 4 deletions cpp/src/pdlp/solve.cu
Original file line number Diff line number Diff line change
Expand Up @@ -1145,17 +1145,21 @@ optimization_problem_solution_t<i_t, f_t> run_concurrent(
std::ref(sol_dual_simplex_ptr),
std::ref(timer));
}
dual_simplex::user_problem_t<i_t, f_t> barrier_problem = dual_simplex_problem;
// Create a thread for barrier
// Create a thread for barrier.
// The barrier handle is owned here so that its destructor runs on the
// main thread after PDLP finishes. cublasDestroy internally calls cudaDeviceSynchronize, which
// is globally forbidden while any stream is in graph capture mode.
std::unique_ptr<raft::handle_t> barrier_handle_ptr;
std::unique_ptr<
std::tuple<dual_simplex::lp_solution_t<i_t, f_t>, dual_simplex::lp_status_t, f_t, f_t, f_t>>
sol_barrier_ptr;
auto barrier_thread = std::thread([&]() {
auto call_barrier_thread = [&]() {
rmm::cuda_stream_view barrier_stream = rmm::cuda_stream_per_thread;
auto barrier_handle = raft::handle_t(barrier_stream);
barrier_handle_ptr = std::make_unique<raft::handle_t>(barrier_stream);
auto barrier_problem = dual_simplex_problem;
barrier_problem.handle_ptr = &barrier_handle;
barrier_problem.handle_ptr = barrier_handle_ptr.get();

run_barrier_thread<i_t, f_t>(std::ref(barrier_problem),
std::ref(settings_pdlp),
std::ref(sol_barrier_ptr),
Expand Down Expand Up @@ -1193,6 +1197,9 @@ optimization_problem_solution_t<i_t, f_t> run_concurrent(
if (!settings.inside_mip) { dual_simplex_thread.join(); }

barrier_thread.join();
// At this point, it is safe to destroy the barrier context since we're outside of any PDLP graph
// capture.
barrier_handle_ptr.reset();
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That should do the trick yes, nice one!


// copy the dual simplex solution to the device
auto sol_dual_simplex =
Expand Down
2 changes: 2 additions & 0 deletions docs/cuopt/source/cuopt-server/examples/milp-examples.rst
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,8 @@ The incumbent solution can be retrieved using a callback function as follows:

.. note::
Incumbent solution callback is only applicable to MILP.
The callback bound can be ``None`` when the solver has found an incumbent
but no finite global bound is available yet.

:download:`incumbent_callback_example.py <milp/examples/incumbent_callback_example.py>`

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,10 @@ def main():

# Incumbent callback - receives intermediate host solutions
def callback(solution, solution_cost, solution_bound):
"""Called when solver finds a new incumbent solution."""
"""Called when solver finds a new incumbent solution.

solution_bound can be None when no finite bound is available yet.
"""
print(
f"Solution : {solution} cost : {solution_cost} "
f"bound : {solution_bound}\n"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -104,8 +104,8 @@ def set_solution(
@pytest.mark.parametrize(
"file_name",
[
# ("/mip/swath1.mps"), # Disabled: https://github.com/NVIDIA/cuopt/issues/967 (PDLP concurrent / incumbent callbacks).
# ("/mip/neos5-free-bound.mps"), # Disabled: https://github.com/NVIDIA/cuopt/issues/967 (PDLP concurrent / incumbent callbacks).
("/mip/swath1.mps"),
("/mip/neos5-free-bound.mps"),
],
)
def test_incumbent_get_callback(file_name):
Expand All @@ -115,8 +115,8 @@ def test_incumbent_get_callback(file_name):
@pytest.mark.parametrize(
"file_name",
[
# ("/mip/swath1.mps"), # Disabled: https://github.com/NVIDIA/cuopt/issues/967 (PDLP concurrent / incumbent callbacks).
# ("/mip/neos5-free-bound.mps"), # Disabled: https://github.com/NVIDIA/cuopt/issues/967 (PDLP concurrent / incumbent callbacks).
("/mip/swath1.mps"),
("/mip/neos5-free-bound.mps"),
],
)
def test_incumbent_get_set_callback(file_name):
Expand Down
7 changes: 4 additions & 3 deletions python/cuopt_server/cuopt_server/tests/test_incumbents.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,15 +49,16 @@ def _run_incumbent_callback(cuoptproc, include_set_callback): # noqa
cnt = 0
while cnt < 60:
res = client.get(f"/cuopt/solution/{reqId}/incumbents")
if res.json() != []:
i = res.json()[0]
payload = res.json()
if payload != []:
i = payload[0]
assert "solution" in i
assert isinstance(i["solution"], list)
assert len(i["solution"]) > 0
assert "cost" in i
assert isinstance(i["cost"], float)
assert "bound" in i
assert isinstance(i["bound"], float)
assert i["bound"] is None or isinstance(i["bound"], float)
break
time.sleep(1)
cnt += 1
Expand Down
2 changes: 1 addition & 1 deletion python/cuopt_server/cuopt_server/webserver.py
Original file line number Diff line number Diff line change
Expand Up @@ -364,7 +364,7 @@ def getsolverlogs(
"this id since the last GET. Result will be a list of the form "
"[{'solution': [1.0, 1.0], 'cost': 2.0, 'bound': 1.5}] where each item "
"contains the fields 'solution' (a list of floats), "
"'cost' (a float), and 'bound' (a float). "
"'cost' (a float), and 'bound' (a float or None when no finite bound is available yet). "
"An empty list indicates that there are no current incumbent solutions "
"at this time. A sentinel value of [{'solution': [], 'cost': None, "
"'bound': None}] indicates that no future incumbent values will be produced. "
Expand Down
Loading