From 374e40d98cddec17dd7ba312a7cf510b0b507c26 Mon Sep 17 00:00:00 2001 From: Ramakrishna Prabhu Date: Thu, 9 Apr 2026 11:35:34 -0500 Subject: [PATCH 1/4] Improve C++ error handling, resource cleanup, and API return checks - Add fread return value checks and dimension validation in presolve binary reader - Wrap CUDA kernel launches and graph API calls with RAFT_CUDA_TRY in feasibility_jump - Wrap cudaMemcpy calls with RAFT_CUDA_TRY in optimization_problem tests - Add cudaGetDevice/cudaGetDeviceProperties return checks in version_info - Use PID-based shared memory segment names in gRPC server - Use RAII (unique_ptr) for FILE handle in MPS parser to prevent fd leak on error Co-Authored-By: Claude Opus 4.6 (1M context) --- cpp/libmps_parser/src/mps_parser.cpp | 15 ++-- cpp/src/dual_simplex/presolve.hpp | 47 +++++++---- cpp/src/grpc/server/grpc_server_main.cpp | 12 +-- cpp/src/grpc/server/grpc_server_types.hpp | 13 ++- cpp/src/grpc/server/grpc_worker_infra.cpp | 6 +- .../feasibility_jump/feasibility_jump.cu | 82 +++++++++---------- cpp/src/utilities/version_info.cpp | 44 +++++----- .../unit_tests/optimization_problem_test.cu | 72 ++++++++-------- 8 files changed, 159 insertions(+), 132 deletions(-) diff --git a/cpp/libmps_parser/src/mps_parser.cpp b/cpp/libmps_parser/src/mps_parser.cpp index 6a81b3b6c1..72311720bb 100644 --- a/cpp/libmps_parser/src/mps_parser.cpp +++ b/cpp/libmps_parser/src/mps_parser.cpp @@ -544,35 +544,30 @@ std::vector mps_parser_t::file_to_string(const std::string& file #endif // MPS_PARSER_WITH_ZLIB // Faster than using C++ I/O - FILE* fp = fopen(file.c_str(), "r"); + std::unique_ptr fp{fopen(file.c_str(), "r")}; mps_parser_expects(fp != nullptr, error_type_t::ValidationError, "Error opening MPS file! Given path: %s", mps_file.c_str()); - mps_parser_expects(fseek(fp, 0L, SEEK_END) == 0, + mps_parser_expects(fseek(fp.get(), 0L, SEEK_END) == 0, error_type_t::ValidationError, "File browsing MPS file! Given path: %s", mps_file.c_str()); - const long bufsize = ftell(fp); + const long bufsize = ftell(fp.get()); mps_parser_expects(bufsize != -1L, error_type_t::ValidationError, "File browsing MPS file! Given path: %s", mps_file.c_str()); std::vector buf(bufsize + 1); - rewind(fp); + rewind(fp.get()); - mps_parser_expects(fread(buf.data(), sizeof(char), bufsize, fp) == bufsize, + mps_parser_expects(fread(buf.data(), sizeof(char), bufsize, fp.get()) == bufsize, error_type_t::ValidationError, "Error reading MPS file! Given path: %s", mps_file.c_str()); buf[bufsize] = '\0'; - mps_parser_expects(fclose(fp) == 0, - error_type_t::ValidationError, - "Error closing MPS file! Given path: %s", - mps_file.c_str()); - return buf; } diff --git a/cpp/src/dual_simplex/presolve.hpp b/cpp/src/dual_simplex/presolve.hpp index d570ea933e..49c121157a 100644 --- a/cpp/src/dual_simplex/presolve.hpp +++ b/cpp/src/dual_simplex/presolve.hpp @@ -75,29 +75,48 @@ struct lp_problem_t { { FILE* fid = fopen(path.c_str(), "r"); if (fid) { - fread(&num_rows, sizeof(i_t), 1, fid); - fread(&num_cols, sizeof(i_t), 1, fid); - fread(&obj_constant, sizeof(f_t), 1, fid); - fread(&obj_scale, sizeof(f_t), 1, fid); + auto check_read = [&](size_t got, size_t expected, const char* field) { + if (got != expected) { + fclose(fid); + throw std::runtime_error(std::string("Failed to read field '") + field + + "' from file: " + path); + } + }; + constexpr i_t max_dim = 100000000; // 100M upper bound for sanity + + check_read(fread(&num_rows, sizeof(i_t), 1, fid), 1, "num_rows"); + check_read(fread(&num_cols, sizeof(i_t), 1, fid), 1, "num_cols"); + if (num_rows <= 0 || num_rows > max_dim || num_cols <= 0 || num_cols > max_dim) { + fclose(fid); + throw std::runtime_error("Invalid dimensions in file: " + path); + } + check_read(fread(&obj_constant, sizeof(f_t), 1, fid), 1, "obj_constant"); + check_read(fread(&obj_scale, sizeof(f_t), 1, fid), 1, "obj_scale"); i_t is_integral; - fread(&is_integral, sizeof(i_t), 1, fid); + check_read(fread(&is_integral, sizeof(i_t), 1, fid), 1, "is_integral"); objective_is_integral = is_integral == 1; objective.resize(num_cols); - fread(objective.data(), sizeof(f_t), num_cols, fid); + check_read(fread(objective.data(), sizeof(f_t), num_cols, fid), num_cols, "objective"); rhs.resize(num_rows); - fread(rhs.data(), sizeof(f_t), num_rows, fid); + check_read(fread(rhs.data(), sizeof(f_t), num_rows, fid), num_rows, "rhs"); lower.resize(num_cols); - fread(lower.data(), sizeof(f_t), num_cols, fid); + check_read(fread(lower.data(), sizeof(f_t), num_cols, fid), num_cols, "lower"); upper.resize(num_cols); - fread(upper.data(), sizeof(f_t), num_cols, fid); + check_read(fread(upper.data(), sizeof(f_t), num_cols, fid), num_cols, "upper"); A.n = num_cols; A.m = num_rows; A.col_start.resize(num_cols + 1); - fread(A.col_start.data(), sizeof(i_t), num_cols + 1, fid); - A.i.resize(A.col_start[num_cols]); - fread(A.i.data(), sizeof(i_t), A.i.size(), fid); - A.x.resize(A.i.size()); - fread(A.x.data(), sizeof(f_t), A.x.size(), fid); + check_read( + fread(A.col_start.data(), sizeof(i_t), num_cols + 1, fid), num_cols + 1, "A.col_start"); + i_t nnz = A.col_start[num_cols]; + if (nnz < 0 || nnz > max_dim * 10) { + fclose(fid); + throw std::runtime_error("Invalid nnz in file: " + path); + } + A.i.resize(nnz); + check_read(fread(A.i.data(), sizeof(i_t), nnz, fid), nnz, "A.i"); + A.x.resize(nnz); + check_read(fread(A.x.data(), sizeof(f_t), nnz, fid), nnz, "A.x"); fclose(fid); } } diff --git a/cpp/src/grpc/server/grpc_server_main.cpp b/cpp/src/grpc/server/grpc_server_main.cpp index d638c191b1..3c2f6e0c15 100644 --- a/cpp/src/grpc/server/grpc_server_main.cpp +++ b/cpp/src/grpc/server/grpc_server_main.cpp @@ -189,16 +189,16 @@ int main(int argc, char** argv) ensure_log_dir_exists(); - shm_unlink(SHM_JOB_QUEUE); - shm_unlink(SHM_RESULT_QUEUE); - shm_unlink(SHM_CONTROL); + shm_unlink(SHM_JOB_QUEUE.c_str()); + shm_unlink(SHM_RESULT_QUEUE.c_str()); + shm_unlink(SHM_CONTROL.c_str()); job_queue = static_cast( - create_shared_memory(SHM_JOB_QUEUE, sizeof(JobQueueEntry) * MAX_JOBS)); + create_shared_memory(SHM_JOB_QUEUE.c_str(), sizeof(JobQueueEntry) * MAX_JOBS)); result_queue = static_cast( - create_shared_memory(SHM_RESULT_QUEUE, sizeof(ResultQueueEntry) * MAX_RESULTS)); + create_shared_memory(SHM_RESULT_QUEUE.c_str(), sizeof(ResultQueueEntry) * MAX_RESULTS)); shm_ctrl = static_cast( - create_shared_memory(SHM_CONTROL, sizeof(SharedMemoryControl))); + create_shared_memory(SHM_CONTROL.c_str(), sizeof(SharedMemoryControl))); new (shm_ctrl) SharedMemoryControl{}; for (size_t i = 0; i < MAX_JOBS; ++i) { diff --git a/cpp/src/grpc/server/grpc_server_types.hpp b/cpp/src/grpc/server/grpc_server_types.hpp index dc6684dea5..a88d272242 100644 --- a/cpp/src/grpc/server/grpc_server_types.hpp +++ b/cpp/src/grpc/server/grpc_server_types.hpp @@ -255,9 +255,16 @@ inline std::map chunked_uploads; inline std::mutex chunked_downloads_mutex; inline std::map chunked_downloads; -inline const char* SHM_JOB_QUEUE = "/cuopt_job_queue"; -inline const char* SHM_RESULT_QUEUE = "/cuopt_result_queue"; -inline const char* SHM_CONTROL = "/cuopt_control"; +// Shared memory names include PID to prevent local users from accessing +// segments belonging to other server instances on the same host. +inline std::string make_shm_name(const char* base) +{ + return std::string(base) + "_" + std::to_string(getpid()); +} + +inline std::string SHM_JOB_QUEUE = make_shm_name("/cuopt_job_queue"); +inline std::string SHM_RESULT_QUEUE = make_shm_name("/cuopt_result_queue"); +inline std::string SHM_CONTROL = make_shm_name("/cuopt_control"); inline const std::string LOG_DIR = "/tmp/cuopt_logs"; diff --git a/cpp/src/grpc/server/grpc_worker_infra.cpp b/cpp/src/grpc/server/grpc_worker_infra.cpp index b2e28b4550..b1726ffc8b 100644 --- a/cpp/src/grpc/server/grpc_worker_infra.cpp +++ b/cpp/src/grpc/server/grpc_worker_infra.cpp @@ -12,15 +12,15 @@ void cleanup_shared_memory() { if (job_queue) { munmap(job_queue, sizeof(JobQueueEntry) * MAX_JOBS); - shm_unlink(SHM_JOB_QUEUE); + shm_unlink(SHM_JOB_QUEUE.c_str()); } if (result_queue) { munmap(result_queue, sizeof(ResultQueueEntry) * MAX_RESULTS); - shm_unlink(SHM_RESULT_QUEUE); + shm_unlink(SHM_RESULT_QUEUE.c_str()); } if (shm_ctrl) { munmap(shm_ctrl, sizeof(SharedMemoryControl)); - shm_unlink(SHM_CONTROL); + shm_unlink(SHM_CONTROL.c_str()); } } diff --git a/cpp/src/mip_heuristics/feasibility_jump/feasibility_jump.cu b/cpp/src/mip_heuristics/feasibility_jump/feasibility_jump.cu index 5c86882f19..a700c09920 100644 --- a/cpp/src/mip_heuristics/feasibility_jump/feasibility_jump.cu +++ b/cpp/src/mip_heuristics/feasibility_jump/feasibility_jump.cu @@ -753,18 +753,18 @@ void fj_t::run_step_device(const rmm::cuda_stream_view& climber_stream } #endif - cudaLaunchKernel((void*)update_lift_moves_kernel, - grid_lift_move, - blocks_lift_move, - kernel_args, - 0, - climber_stream); - cudaLaunchKernel((void*)update_breakthrough_moves_kernel, - grid_lift_move, - blocks_lift_move, - kernel_args, - 0, - climber_stream); + RAFT_CUDA_TRY(cudaLaunchKernel((void*)update_lift_moves_kernel, + grid_lift_move, + blocks_lift_move, + kernel_args, + 0, + climber_stream)); + RAFT_CUDA_TRY(cudaLaunchKernel((void*)update_breakthrough_moves_kernel, + grid_lift_move, + blocks_lift_move, + kernel_args, + 0, + climber_stream)); } // compaction kernel @@ -777,44 +777,44 @@ void fj_t::run_step_device(const rmm::cuda_stream_view& climber_stream pb_ptr->n_variables, climber_stream); - cudaLaunchKernel((void*)select_variable_kernel, - dim3(1), - dim3(256), - kernel_args, - 0, - climber_stream); - - cudaLaunchCooperativeKernel((void*)handle_local_minimum_kernel, - grid_update_weights, - blocks_update_weights, - kernel_args, - 0, - climber_stream); + RAFT_CUDA_TRY(cudaLaunchKernel((void*)select_variable_kernel, + dim3(1), + dim3(256), + kernel_args, + 0, + climber_stream)); + + RAFT_CUDA_TRY(cudaLaunchCooperativeKernel((void*)handle_local_minimum_kernel, + grid_update_weights, + blocks_update_weights, + kernel_args, + 0, + climber_stream)); raft::copy(data.break_condition.data(), data.temp_break_condition.data(), 1, climber_stream); - cudaLaunchKernel((void*)update_assignment_kernel, - grid_setval, - blocks_setval, - update_assignment_args, - 0, - climber_stream); - cudaLaunchKernel((void*)update_changed_constraints_kernel, - 1, - blocks_update_changed_constraints, - kernel_args, - 0, - climber_stream); + RAFT_CUDA_TRY(cudaLaunchKernel((void*)update_assignment_kernel, + grid_setval, + blocks_setval, + update_assignment_args, + 0, + climber_stream)); + RAFT_CUDA_TRY(cudaLaunchKernel((void*)update_changed_constraints_kernel, + 1, + blocks_update_changed_constraints, + kernel_args, + 0, + climber_stream)); } if (use_graph) { - cudaStreamEndCapture(climber_stream, &graph); - cudaGraphInstantiate(&graph_instance, graph); + RAFT_CUDA_TRY(cudaStreamEndCapture(climber_stream, &graph)); + RAFT_CUDA_TRY(cudaGraphInstantiate(&graph_instance, graph)); RAFT_CHECK_CUDA(climber_stream); - cudaGraphDestroy(graph); + RAFT_CUDA_TRY(cudaGraphDestroy(graph)); graph_created = true; } } - if (use_graph) cudaGraphLaunch(graph_instance, climber_stream); + if (use_graph) RAFT_CUDA_TRY(cudaGraphLaunch(graph_instance, climber_stream)); } template diff --git a/cpp/src/utilities/version_info.cpp b/cpp/src/utilities/version_info.cpp index ec9db5130b..88269d9189 100644 --- a/cpp/src/utilities/version_info.cpp +++ b/cpp/src/utilities/version_info.cpp @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -166,30 +166,36 @@ static double get_available_memory_gb() void print_version_info() { int device_id = 0; - cudaGetDevice(&device_id); + if (cudaGetDevice(&device_id) != cudaSuccess) { + CUOPT_LOG_WARN("No CUDA device available, skipping GPU info"); + return; + } cudaDeviceProp device_prop; - cudaGetDeviceProperties(&device_prop, device_id); + if (cudaGetDeviceProperties(&device_prop, device_id) != cudaSuccess) { + CUOPT_LOG_WARN("Failed to query CUDA device properties"); + return; + } cudaUUID_t uuid = device_prop.uuid; char uuid_str[37] = {0}; snprintf(uuid_str, sizeof(uuid_str), "%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x", - uuid.bytes[0], - uuid.bytes[1], - uuid.bytes[2], - uuid.bytes[3], - uuid.bytes[4], - uuid.bytes[5], - uuid.bytes[6], - uuid.bytes[7], - uuid.bytes[8], - uuid.bytes[9], - uuid.bytes[10], - uuid.bytes[11], - uuid.bytes[12], - uuid.bytes[13], - uuid.bytes[14], - uuid.bytes[15]); + (unsigned char)uuid.bytes[0], + (unsigned char)uuid.bytes[1], + (unsigned char)uuid.bytes[2], + (unsigned char)uuid.bytes[3], + (unsigned char)uuid.bytes[4], + (unsigned char)uuid.bytes[5], + (unsigned char)uuid.bytes[6], + (unsigned char)uuid.bytes[7], + (unsigned char)uuid.bytes[8], + (unsigned char)uuid.bytes[9], + (unsigned char)uuid.bytes[10], + (unsigned char)uuid.bytes[11], + (unsigned char)uuid.bytes[12], + (unsigned char)uuid.bytes[13], + (unsigned char)uuid.bytes[14], + (unsigned char)uuid.bytes[15]); int version = 0; cudaRuntimeGetVersion(&version); int major = version / 1000; diff --git a/cpp/tests/linear_programming/unit_tests/optimization_problem_test.cu b/cpp/tests/linear_programming/unit_tests/optimization_problem_test.cu index ddee8a12c5..9ba5ca1e93 100644 --- a/cpp/tests/linear_programming/unit_tests/optimization_problem_test.cu +++ b/cpp/tests/linear_programming/unit_tests/optimization_problem_test.cu @@ -123,88 +123,88 @@ TEST(optimization_problem_t, test_set_get_fields) problem.set_csr_constraint_matrix(A_host, 3, indices_host, 3, indices_host, 3); // Test set_A_values - cudaMemcpy(result.data(), - problem.get_constraint_matrix_values().data(), - 3 * sizeof(double), - cudaMemcpyDeviceToHost); + RAFT_CUDA_TRY(cudaMemcpy(result.data(), + problem.get_constraint_matrix_values().data(), + 3 * sizeof(double), + cudaMemcpyDeviceToHost)); EXPECT_NEAR(1.0, result[0], 1e-5); EXPECT_NEAR(2.0, result[1], 1e-5); EXPECT_NEAR(3.0, result[2], 1e-5); // Test A_indices - cudaMemcpy(result_int.data(), - problem.get_constraint_matrix_indices().data(), - 3 * sizeof(int), - cudaMemcpyDeviceToHost); + RAFT_CUDA_TRY(cudaMemcpy(result_int.data(), + problem.get_constraint_matrix_indices().data(), + 3 * sizeof(int), + cudaMemcpyDeviceToHost)); EXPECT_EQ(0, result_int[0]); EXPECT_EQ(1, result_int[1]); EXPECT_EQ(2, result_int[2]); // Test A_offsets_ - cudaMemcpy(result_int.data(), - problem.get_constraint_matrix_offsets().data(), - 3 * sizeof(int), - cudaMemcpyDeviceToHost); + RAFT_CUDA_TRY(cudaMemcpy(result_int.data(), + problem.get_constraint_matrix_offsets().data(), + 3 * sizeof(int), + cudaMemcpyDeviceToHost)); EXPECT_EQ(0, result_int[0]); EXPECT_EQ(1, result_int[1]); EXPECT_EQ(2, result_int[2]); // Test b_ problem.set_constraint_bounds(b_host, 3); - cudaMemcpy(result.data(), - problem.get_constraint_bounds().data(), - 3 * sizeof(double), - cudaMemcpyDeviceToHost); + RAFT_CUDA_TRY(cudaMemcpy(result.data(), + problem.get_constraint_bounds().data(), + 3 * sizeof(double), + cudaMemcpyDeviceToHost)); EXPECT_NEAR(4.0, result[0], 1e-5); EXPECT_NEAR(5.0, result[1], 1e-5); EXPECT_NEAR(6.0, result[2], 1e-5); // Test c_ problem.set_objective_coefficients(c_host, 3); - cudaMemcpy(result.data(), - problem.get_objective_coefficients().data(), - 3 * sizeof(double), - cudaMemcpyDeviceToHost); + RAFT_CUDA_TRY(cudaMemcpy(result.data(), + problem.get_objective_coefficients().data(), + 3 * sizeof(double), + cudaMemcpyDeviceToHost)); EXPECT_NEAR(7.0, result[0], 1e-5); EXPECT_NEAR(8.0, result[1], 1e-5); EXPECT_NEAR(9.0, result[2], 1e-5); // Test variable_lower_bounds_ problem.set_variable_lower_bounds(var_lb_host, 3); - cudaMemcpy(result.data(), - problem.get_variable_lower_bounds().data(), - 3 * sizeof(double), - cudaMemcpyDeviceToHost); + RAFT_CUDA_TRY(cudaMemcpy(result.data(), + problem.get_variable_lower_bounds().data(), + 3 * sizeof(double), + cudaMemcpyDeviceToHost)); EXPECT_NEAR(0.0, result[0], 1e-5); EXPECT_NEAR(0.1, result[1], 1e-5); EXPECT_NEAR(0.2, result[2], 1e-5); // Test variable_upper_bounds_ problem.set_variable_upper_bounds(var_ub_host, 3); - cudaMemcpy(result.data(), - problem.get_variable_upper_bounds().data(), - 3 * sizeof(double), - cudaMemcpyDeviceToHost); + RAFT_CUDA_TRY(cudaMemcpy(result.data(), + problem.get_variable_upper_bounds().data(), + 3 * sizeof(double), + cudaMemcpyDeviceToHost)); EXPECT_NEAR(1.0, result[0], 1e-5); EXPECT_NEAR(1.1, result[1], 1e-5); EXPECT_NEAR(1.2, result[2], 1e-5); // Test constraint_lower_bounds_ problem.set_constraint_lower_bounds(con_lb_host, 3); - cudaMemcpy(result.data(), - problem.get_constraint_lower_bounds().data(), - 3 * sizeof(double), - cudaMemcpyDeviceToHost); + RAFT_CUDA_TRY(cudaMemcpy(result.data(), + problem.get_constraint_lower_bounds().data(), + 3 * sizeof(double), + cudaMemcpyDeviceToHost)); EXPECT_NEAR(0.5, result[0], 1e-5); EXPECT_NEAR(0.6, result[1], 1e-5); EXPECT_NEAR(0.7, result[2], 1e-5); // Test constraint_upper_bounds_ problem.set_constraint_upper_bounds(con_ub_host, 3); - cudaMemcpy(result.data(), - problem.get_constraint_upper_bounds().data(), - 3 * sizeof(double), - cudaMemcpyDeviceToHost); + RAFT_CUDA_TRY(cudaMemcpy(result.data(), + problem.get_constraint_upper_bounds().data(), + 3 * sizeof(double), + cudaMemcpyDeviceToHost)); EXPECT_NEAR(1.5, result[0], 1e-5); EXPECT_NEAR(1.6, result[1], 1e-5); EXPECT_NEAR(1.7, result[2], 1e-5); From 0c01c6132a846d09a3b58091cc2c29c63a119fed Mon Sep 17 00:00:00 2001 From: Ramakrishna Prabhu Date: Thu, 9 Apr 2026 12:58:09 -0500 Subject: [PATCH 2/4] Address review feedback: RAII file handling, CSC validation, remaining CUDA checks - Use unique_ptr for FILE handle in presolve binary reader (RAII) - Throw on fopen failure instead of silent skip - Add CSC matrix validation (col_start monotonicity, row index bounds) - Check cudaRuntimeGetVersion return value in version_info - Wrap remaining cudaStreamBeginCapture and cudaLaunchCooperativeKernel calls with RAFT_CUDA_TRY in feasibility_jump Co-Authored-By: Claude Opus 4.6 (1M context) --- cpp/src/dual_simplex/presolve.hpp | 92 ++++++++++--------- .../feasibility_jump/feasibility_jump.cu | 36 ++++---- cpp/src/utilities/version_info.cpp | 5 +- 3 files changed, 73 insertions(+), 60 deletions(-) diff --git a/cpp/src/dual_simplex/presolve.hpp b/cpp/src/dual_simplex/presolve.hpp index 49c121157a..77738f7e89 100644 --- a/cpp/src/dual_simplex/presolve.hpp +++ b/cpp/src/dual_simplex/presolve.hpp @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -73,52 +74,59 @@ struct lp_problem_t { void read_problem(const std::string& path) { - FILE* fid = fopen(path.c_str(), "r"); - if (fid) { - auto check_read = [&](size_t got, size_t expected, const char* field) { - if (got != expected) { - fclose(fid); - throw std::runtime_error(std::string("Failed to read field '") + field + - "' from file: " + path); - } - }; - constexpr i_t max_dim = 100000000; // 100M upper bound for sanity + std::unique_ptr fid(fopen(path.c_str(), "rb"), &fclose); + if (!fid) { throw std::runtime_error("Failed to open file: " + path); } + + auto check_read = [&](size_t got, size_t expected, const char* field) { + if (got != expected) { + throw std::runtime_error(std::string("Failed to read field '") + field + + "' from file: " + path); + } + }; + constexpr i_t max_dim = 100000000; // 100M upper bound for sanity - check_read(fread(&num_rows, sizeof(i_t), 1, fid), 1, "num_rows"); - check_read(fread(&num_cols, sizeof(i_t), 1, fid), 1, "num_cols"); - if (num_rows <= 0 || num_rows > max_dim || num_cols <= 0 || num_cols > max_dim) { - fclose(fid); - throw std::runtime_error("Invalid dimensions in file: " + path); + check_read(fread(&num_rows, sizeof(i_t), 1, fid.get()), 1, "num_rows"); + check_read(fread(&num_cols, sizeof(i_t), 1, fid.get()), 1, "num_cols"); + if (num_rows <= 0 || num_rows > max_dim || num_cols <= 0 || num_cols > max_dim) { + throw std::runtime_error("Invalid dimensions in file: " + path); + } + check_read(fread(&obj_constant, sizeof(f_t), 1, fid.get()), 1, "obj_constant"); + check_read(fread(&obj_scale, sizeof(f_t), 1, fid.get()), 1, "obj_scale"); + i_t is_integral; + check_read(fread(&is_integral, sizeof(i_t), 1, fid.get()), 1, "is_integral"); + objective_is_integral = is_integral == 1; + objective.resize(num_cols); + check_read(fread(objective.data(), sizeof(f_t), num_cols, fid.get()), num_cols, "objective"); + rhs.resize(num_rows); + check_read(fread(rhs.data(), sizeof(f_t), num_rows, fid.get()), num_rows, "rhs"); + lower.resize(num_cols); + check_read(fread(lower.data(), sizeof(f_t), num_cols, fid.get()), num_cols, "lower"); + upper.resize(num_cols); + check_read(fread(upper.data(), sizeof(f_t), num_cols, fid.get()), num_cols, "upper"); + A.n = num_cols; + A.m = num_rows; + A.col_start.resize(num_cols + 1); + check_read( + fread(A.col_start.data(), sizeof(i_t), num_cols + 1, fid.get()), num_cols + 1, "A.col_start"); + i_t nnz = A.col_start[num_cols]; + if (nnz < 0 || nnz > max_dim * 10) { throw std::runtime_error("Invalid nnz in file: " + path); } + if (A.col_start[0] != 0) { + throw std::runtime_error("Invalid A.col_start[0] in file: " + path); + } + for (i_t j = 0; j < num_cols; ++j) { + if (A.col_start[j] < 0 || A.col_start[j] > A.col_start[j + 1] || A.col_start[j + 1] > nnz) { + throw std::runtime_error("Invalid A.col_start monotonicity/range in file: " + path); } - check_read(fread(&obj_constant, sizeof(f_t), 1, fid), 1, "obj_constant"); - check_read(fread(&obj_scale, sizeof(f_t), 1, fid), 1, "obj_scale"); - i_t is_integral; - check_read(fread(&is_integral, sizeof(i_t), 1, fid), 1, "is_integral"); - objective_is_integral = is_integral == 1; - objective.resize(num_cols); - check_read(fread(objective.data(), sizeof(f_t), num_cols, fid), num_cols, "objective"); - rhs.resize(num_rows); - check_read(fread(rhs.data(), sizeof(f_t), num_rows, fid), num_rows, "rhs"); - lower.resize(num_cols); - check_read(fread(lower.data(), sizeof(f_t), num_cols, fid), num_cols, "lower"); - upper.resize(num_cols); - check_read(fread(upper.data(), sizeof(f_t), num_cols, fid), num_cols, "upper"); - A.n = num_cols; - A.m = num_rows; - A.col_start.resize(num_cols + 1); - check_read( - fread(A.col_start.data(), sizeof(i_t), num_cols + 1, fid), num_cols + 1, "A.col_start"); - i_t nnz = A.col_start[num_cols]; - if (nnz < 0 || nnz > max_dim * 10) { - fclose(fid); - throw std::runtime_error("Invalid nnz in file: " + path); + } + A.i.resize(nnz); + check_read(fread(A.i.data(), sizeof(i_t), nnz, fid.get()), nnz, "A.i"); + for (i_t k = 0; k < nnz; ++k) { + if (A.i[k] < 0 || A.i[k] >= num_rows) { + throw std::runtime_error("Invalid row index in A.i in file: " + path); } - A.i.resize(nnz); - check_read(fread(A.i.data(), sizeof(i_t), nnz, fid), nnz, "A.i"); - A.x.resize(nnz); - check_read(fread(A.x.data(), sizeof(f_t), nnz, fid), nnz, "A.x"); - fclose(fid); } + A.x.resize(nnz); + check_read(fread(A.x.data(), sizeof(f_t), nnz, fid.get()), nnz, "A.x"); } void write_mps(const std::string& path) const diff --git a/cpp/src/mip_heuristics/feasibility_jump/feasibility_jump.cu b/cpp/src/mip_heuristics/feasibility_jump/feasibility_jump.cu index a700c09920..a3494bda90 100644 --- a/cpp/src/mip_heuristics/feasibility_jump/feasibility_jump.cu +++ b/cpp/src/mip_heuristics/feasibility_jump/feasibility_jump.cu @@ -706,7 +706,9 @@ void fj_t::run_step_device(const rmm::cuda_stream_view& climber_stream data.cub_storage_bytes.resize(compaction_temp_storage_bytes, climber_stream); } - if (use_graph) { cudaStreamBeginCapture(climber_stream, cudaStreamCaptureModeThreadLocal); } + if (use_graph) { + RAFT_CUDA_TRY(cudaStreamBeginCapture(climber_stream, cudaStreamCaptureModeThreadLocal)); + } for (i_t i = 0; i < (use_graph ? iterations_per_graph : 1); ++i) { { // related varialbe array has to be dynamically computed each iteration @@ -719,37 +721,37 @@ void fj_t::run_step_device(const rmm::cuda_stream_view& climber_stream load_balancing_score_update(climber_stream, climber_idx); } else { if (is_binary_pb) { - cudaLaunchCooperativeKernel( + RAFT_CUDA_TRY(cudaLaunchCooperativeKernel( (void*)compute_mtm_moves_kernel, grid_resetmoves_bin, blocks_resetmoves_bin, reset_moves_args, 0, - climber_stream); + climber_stream)); } else { - cudaLaunchCooperativeKernel( + RAFT_CUDA_TRY(cudaLaunchCooperativeKernel( (void*)compute_mtm_moves_kernel, grid_resetmoves, blocks_resetmoves, reset_moves_args, 0, - climber_stream); + climber_stream)); } } #if FJ_DEBUG_LOAD_BALANCING if (use_load_balancing) { - cudaLaunchCooperativeKernel((void*)compute_mtm_moves_kernel, - grid_resetmoves_bin, - blocks_resetmoves_bin, - reset_moves_args, - 0, - climber_stream); - cudaLaunchCooperativeKernel((void*)load_balancing_sanity_checks, - 512, - 128, - kernel_args, - 0, - climber_stream); + RAFT_CUDA_TRY(cudaLaunchCooperativeKernel((void*)compute_mtm_moves_kernel, + grid_resetmoves_bin, + blocks_resetmoves_bin, + reset_moves_args, + 0, + climber_stream)); + RAFT_CUDA_TRY(cudaLaunchCooperativeKernel((void*)load_balancing_sanity_checks, + 512, + 128, + kernel_args, + 0, + climber_stream)); } #endif diff --git a/cpp/src/utilities/version_info.cpp b/cpp/src/utilities/version_info.cpp index 88269d9189..7a46a5c8cf 100644 --- a/cpp/src/utilities/version_info.cpp +++ b/cpp/src/utilities/version_info.cpp @@ -197,7 +197,10 @@ void print_version_info() (unsigned char)uuid.bytes[14], (unsigned char)uuid.bytes[15]); int version = 0; - cudaRuntimeGetVersion(&version); + if (cudaRuntimeGetVersion(&version) != cudaSuccess) { + CUOPT_LOG_WARN("Failed to query CUDA runtime version"); + version = 0; + } int major = version / 1000; int minor = (version % 1000) / 10; CUOPT_LOG_INFO("cuOpt version: %d.%d.%d, git hash: %s, host arch: %s, device archs: %s", From 741a825224f76c67f422f60c38bb86a71d4231c5 Mon Sep 17 00:00:00 2001 From: Ramakrishna Prabhu Date: Thu, 9 Apr 2026 13:58:36 -0500 Subject: [PATCH 3/4] Address review round 2: byte cap, metadata validation, graph cleanup, GPU flag - Add 2 GiB total serialized byte cap before allocations in presolve reader - Validate obj_scale (must be 1.0 or -1.0) and is_integral (must be 0 or 1) - Wrap cudaGraphInstantiate in try/catch to ensure graph cleanup on failure - Use has_gpu flag instead of early return so version/CPU info always logs Co-Authored-By: Claude Opus 4.6 (1M context) --- cpp/src/dual_simplex/presolve.hpp | 27 ++++++- .../feasibility_jump/feasibility_jump.cu | 7 +- cpp/src/utilities/version_info.cpp | 78 ++++++++++--------- 3 files changed, 74 insertions(+), 38 deletions(-) diff --git a/cpp/src/dual_simplex/presolve.hpp b/cpp/src/dual_simplex/presolve.hpp index 77738f7e89..cca4bf2e1a 100644 --- a/cpp/src/dual_simplex/presolve.hpp +++ b/cpp/src/dual_simplex/presolve.hpp @@ -83,7 +83,8 @@ struct lp_problem_t { "' from file: " + path); } }; - constexpr i_t max_dim = 100000000; // 100M upper bound for sanity + constexpr i_t max_dim = 100000000; // 100M upper bound for sanity + constexpr size_t max_serialized_bytes = size_t{1} << 31; // 2 GiB cap check_read(fread(&num_rows, sizeof(i_t), 1, fid.get()), 1, "num_rows"); check_read(fread(&num_cols, sizeof(i_t), 1, fid.get()), 1, "num_cols"); @@ -92,9 +93,26 @@ struct lp_problem_t { } check_read(fread(&obj_constant, sizeof(f_t), 1, fid.get()), 1, "obj_constant"); check_read(fread(&obj_scale, sizeof(f_t), 1, fid.get()), 1, "obj_scale"); + if (obj_scale != f_t{1} && obj_scale != f_t{-1}) { + throw std::runtime_error("Invalid obj_scale in file: " + path); + } i_t is_integral; check_read(fread(&is_integral, sizeof(i_t), 1, fid.get()), 1, "is_integral"); + if (is_integral != 0 && is_integral != 1) { + throw std::runtime_error("Invalid is_integral in file: " + path); + } objective_is_integral = is_integral == 1; + + // Compute total byte footprint before any allocation + size_t total_bytes = static_cast(num_cols) * sizeof(f_t) // objective + + static_cast(num_rows) * sizeof(f_t) // rhs + + static_cast(num_cols) * sizeof(f_t) // lower + + static_cast(num_cols) * sizeof(f_t) // upper + + (static_cast(num_cols) + 1) * sizeof(i_t); // col_start + if (total_bytes > max_serialized_bytes) { + throw std::runtime_error("Serialized problem too large in file: " + path); + } + objective.resize(num_cols); check_read(fread(objective.data(), sizeof(f_t), num_cols, fid.get()), num_cols, "objective"); rhs.resize(num_rows); @@ -118,6 +136,13 @@ struct lp_problem_t { throw std::runtime_error("Invalid A.col_start monotonicity/range in file: " + path); } } + + // Check nnz byte footprint before allocating sparse arrays + size_t nnz_bytes = static_cast(nnz) * (sizeof(i_t) + sizeof(f_t)); + if (total_bytes + nnz_bytes > max_serialized_bytes) { + throw std::runtime_error("Serialized problem too large in file: " + path); + } + A.i.resize(nnz); check_read(fread(A.i.data(), sizeof(i_t), nnz, fid.get()), nnz, "A.i"); for (i_t k = 0; k < nnz; ++k) { diff --git a/cpp/src/mip_heuristics/feasibility_jump/feasibility_jump.cu b/cpp/src/mip_heuristics/feasibility_jump/feasibility_jump.cu index a3494bda90..6b440aed4f 100644 --- a/cpp/src/mip_heuristics/feasibility_jump/feasibility_jump.cu +++ b/cpp/src/mip_heuristics/feasibility_jump/feasibility_jump.cu @@ -809,7 +809,12 @@ void fj_t::run_step_device(const rmm::cuda_stream_view& climber_stream if (use_graph) { RAFT_CUDA_TRY(cudaStreamEndCapture(climber_stream, &graph)); - RAFT_CUDA_TRY(cudaGraphInstantiate(&graph_instance, graph)); + try { + RAFT_CUDA_TRY(cudaGraphInstantiate(&graph_instance, graph)); + } catch (...) { + RAFT_CUDA_TRY(cudaGraphDestroy(graph)); + throw; + } RAFT_CHECK_CUDA(climber_stream); RAFT_CUDA_TRY(cudaGraphDestroy(graph)); graph_created = true; diff --git a/cpp/src/utilities/version_info.cpp b/cpp/src/utilities/version_info.cpp index 7a46a5c8cf..54eb8f48bf 100644 --- a/cpp/src/utilities/version_info.cpp +++ b/cpp/src/utilities/version_info.cpp @@ -165,41 +165,45 @@ static double get_available_memory_gb() void print_version_info() { + bool has_gpu = true; int device_id = 0; + cudaDeviceProp device_prop{}; + char uuid_str[37] = {0}; + int version = 0; + if (cudaGetDevice(&device_id) != cudaSuccess) { CUOPT_LOG_WARN("No CUDA device available, skipping GPU info"); - return; + has_gpu = false; } - cudaDeviceProp device_prop; - if (cudaGetDeviceProperties(&device_prop, device_id) != cudaSuccess) { + if (has_gpu && cudaGetDeviceProperties(&device_prop, device_id) != cudaSuccess) { CUOPT_LOG_WARN("Failed to query CUDA device properties"); - return; + has_gpu = false; } - cudaUUID_t uuid = device_prop.uuid; - char uuid_str[37] = {0}; - snprintf(uuid_str, - sizeof(uuid_str), - "%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x", - (unsigned char)uuid.bytes[0], - (unsigned char)uuid.bytes[1], - (unsigned char)uuid.bytes[2], - (unsigned char)uuid.bytes[3], - (unsigned char)uuid.bytes[4], - (unsigned char)uuid.bytes[5], - (unsigned char)uuid.bytes[6], - (unsigned char)uuid.bytes[7], - (unsigned char)uuid.bytes[8], - (unsigned char)uuid.bytes[9], - (unsigned char)uuid.bytes[10], - (unsigned char)uuid.bytes[11], - (unsigned char)uuid.bytes[12], - (unsigned char)uuid.bytes[13], - (unsigned char)uuid.bytes[14], - (unsigned char)uuid.bytes[15]); - int version = 0; - if (cudaRuntimeGetVersion(&version) != cudaSuccess) { - CUOPT_LOG_WARN("Failed to query CUDA runtime version"); - version = 0; + if (has_gpu) { + cudaUUID_t uuid = device_prop.uuid; + snprintf(uuid_str, + sizeof(uuid_str), + "%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x", + (unsigned char)uuid.bytes[0], + (unsigned char)uuid.bytes[1], + (unsigned char)uuid.bytes[2], + (unsigned char)uuid.bytes[3], + (unsigned char)uuid.bytes[4], + (unsigned char)uuid.bytes[5], + (unsigned char)uuid.bytes[6], + (unsigned char)uuid.bytes[7], + (unsigned char)uuid.bytes[8], + (unsigned char)uuid.bytes[9], + (unsigned char)uuid.bytes[10], + (unsigned char)uuid.bytes[11], + (unsigned char)uuid.bytes[12], + (unsigned char)uuid.bytes[13], + (unsigned char)uuid.bytes[14], + (unsigned char)uuid.bytes[15]); + if (cudaRuntimeGetVersion(&version) != cudaSuccess) { + CUOPT_LOG_WARN("Failed to query CUDA runtime version"); + version = 0; + } } int major = version / 1000; int minor = (version % 1000) / 10; @@ -215,13 +219,15 @@ void print_version_info() get_physical_cores(), std::thread::hardware_concurrency(), get_available_memory_gb()); - CUOPT_LOG_INFO("CUDA %d.%d, device: %s (ID %d), VRAM: %.2f GiB", - major, - minor, - device_prop.name, - device_id, - (double)device_prop.totalGlobalMem / (1024.0 * 1024.0 * 1024.0)); - CUOPT_LOG_INFO("CUDA device UUID: %s\n", uuid_str); + if (has_gpu) { + CUOPT_LOG_INFO("CUDA %d.%d, device: %s (ID %d), VRAM: %.2f GiB", + major, + minor, + device_prop.name, + device_id, + (double)device_prop.totalGlobalMem / (1024.0 * 1024.0 * 1024.0)); + CUOPT_LOG_INFO("CUDA device UUID: %s\n", uuid_str); + } } } // namespace cuopt From 7130875d5fa20b58ea1ba02c31125617abc3a9c7 Mon Sep 17 00:00:00 2001 From: Ramakrishna Prabhu Date: Wed, 15 Apr 2026 11:12:24 -0500 Subject: [PATCH 4/4] Remove unused read_problem debug method from presolve.hpp --- cpp/src/dual_simplex/presolve.hpp | 83 ------------------------------- 1 file changed, 83 deletions(-) diff --git a/cpp/src/dual_simplex/presolve.hpp b/cpp/src/dual_simplex/presolve.hpp index cca4bf2e1a..afb63e9246 100644 --- a/cpp/src/dual_simplex/presolve.hpp +++ b/cpp/src/dual_simplex/presolve.hpp @@ -16,7 +16,6 @@ #include #include #include -#include #include #include #include @@ -72,88 +71,6 @@ struct lp_problem_t { } } - void read_problem(const std::string& path) - { - std::unique_ptr fid(fopen(path.c_str(), "rb"), &fclose); - if (!fid) { throw std::runtime_error("Failed to open file: " + path); } - - auto check_read = [&](size_t got, size_t expected, const char* field) { - if (got != expected) { - throw std::runtime_error(std::string("Failed to read field '") + field + - "' from file: " + path); - } - }; - constexpr i_t max_dim = 100000000; // 100M upper bound for sanity - constexpr size_t max_serialized_bytes = size_t{1} << 31; // 2 GiB cap - - check_read(fread(&num_rows, sizeof(i_t), 1, fid.get()), 1, "num_rows"); - check_read(fread(&num_cols, sizeof(i_t), 1, fid.get()), 1, "num_cols"); - if (num_rows <= 0 || num_rows > max_dim || num_cols <= 0 || num_cols > max_dim) { - throw std::runtime_error("Invalid dimensions in file: " + path); - } - check_read(fread(&obj_constant, sizeof(f_t), 1, fid.get()), 1, "obj_constant"); - check_read(fread(&obj_scale, sizeof(f_t), 1, fid.get()), 1, "obj_scale"); - if (obj_scale != f_t{1} && obj_scale != f_t{-1}) { - throw std::runtime_error("Invalid obj_scale in file: " + path); - } - i_t is_integral; - check_read(fread(&is_integral, sizeof(i_t), 1, fid.get()), 1, "is_integral"); - if (is_integral != 0 && is_integral != 1) { - throw std::runtime_error("Invalid is_integral in file: " + path); - } - objective_is_integral = is_integral == 1; - - // Compute total byte footprint before any allocation - size_t total_bytes = static_cast(num_cols) * sizeof(f_t) // objective - + static_cast(num_rows) * sizeof(f_t) // rhs - + static_cast(num_cols) * sizeof(f_t) // lower - + static_cast(num_cols) * sizeof(f_t) // upper - + (static_cast(num_cols) + 1) * sizeof(i_t); // col_start - if (total_bytes > max_serialized_bytes) { - throw std::runtime_error("Serialized problem too large in file: " + path); - } - - objective.resize(num_cols); - check_read(fread(objective.data(), sizeof(f_t), num_cols, fid.get()), num_cols, "objective"); - rhs.resize(num_rows); - check_read(fread(rhs.data(), sizeof(f_t), num_rows, fid.get()), num_rows, "rhs"); - lower.resize(num_cols); - check_read(fread(lower.data(), sizeof(f_t), num_cols, fid.get()), num_cols, "lower"); - upper.resize(num_cols); - check_read(fread(upper.data(), sizeof(f_t), num_cols, fid.get()), num_cols, "upper"); - A.n = num_cols; - A.m = num_rows; - A.col_start.resize(num_cols + 1); - check_read( - fread(A.col_start.data(), sizeof(i_t), num_cols + 1, fid.get()), num_cols + 1, "A.col_start"); - i_t nnz = A.col_start[num_cols]; - if (nnz < 0 || nnz > max_dim * 10) { throw std::runtime_error("Invalid nnz in file: " + path); } - if (A.col_start[0] != 0) { - throw std::runtime_error("Invalid A.col_start[0] in file: " + path); - } - for (i_t j = 0; j < num_cols; ++j) { - if (A.col_start[j] < 0 || A.col_start[j] > A.col_start[j + 1] || A.col_start[j + 1] > nnz) { - throw std::runtime_error("Invalid A.col_start monotonicity/range in file: " + path); - } - } - - // Check nnz byte footprint before allocating sparse arrays - size_t nnz_bytes = static_cast(nnz) * (sizeof(i_t) + sizeof(f_t)); - if (total_bytes + nnz_bytes > max_serialized_bytes) { - throw std::runtime_error("Serialized problem too large in file: " + path); - } - - A.i.resize(nnz); - check_read(fread(A.i.data(), sizeof(i_t), nnz, fid.get()), nnz, "A.i"); - for (i_t k = 0; k < nnz; ++k) { - if (A.i[k] < 0 || A.i[k] >= num_rows) { - throw std::runtime_error("Invalid row index in A.i in file: " + path); - } - } - A.x.resize(nnz); - check_read(fread(A.x.data(), sizeof(f_t), nnz, fid.get()), nnz, "A.x"); - } - void write_mps(const std::string& path) const { std::ofstream mps_file(path);