diff --git a/cpp/libmps_parser/src/mps_parser.cpp b/cpp/libmps_parser/src/mps_parser.cpp index 6a81b3b6c1..72311720bb 100644 --- a/cpp/libmps_parser/src/mps_parser.cpp +++ b/cpp/libmps_parser/src/mps_parser.cpp @@ -544,35 +544,30 @@ std::vector mps_parser_t::file_to_string(const std::string& file #endif // MPS_PARSER_WITH_ZLIB // Faster than using C++ I/O - FILE* fp = fopen(file.c_str(), "r"); + std::unique_ptr fp{fopen(file.c_str(), "r")}; mps_parser_expects(fp != nullptr, error_type_t::ValidationError, "Error opening MPS file! Given path: %s", mps_file.c_str()); - mps_parser_expects(fseek(fp, 0L, SEEK_END) == 0, + mps_parser_expects(fseek(fp.get(), 0L, SEEK_END) == 0, error_type_t::ValidationError, "File browsing MPS file! Given path: %s", mps_file.c_str()); - const long bufsize = ftell(fp); + const long bufsize = ftell(fp.get()); mps_parser_expects(bufsize != -1L, error_type_t::ValidationError, "File browsing MPS file! Given path: %s", mps_file.c_str()); std::vector buf(bufsize + 1); - rewind(fp); + rewind(fp.get()); - mps_parser_expects(fread(buf.data(), sizeof(char), bufsize, fp) == bufsize, + mps_parser_expects(fread(buf.data(), sizeof(char), bufsize, fp.get()) == bufsize, error_type_t::ValidationError, "Error reading MPS file! Given path: %s", mps_file.c_str()); buf[bufsize] = '\0'; - mps_parser_expects(fclose(fp) == 0, - error_type_t::ValidationError, - "Error closing MPS file! Given path: %s", - mps_file.c_str()); - return buf; } diff --git a/cpp/src/dual_simplex/presolve.hpp b/cpp/src/dual_simplex/presolve.hpp index d570ea933e..afb63e9246 100644 --- a/cpp/src/dual_simplex/presolve.hpp +++ b/cpp/src/dual_simplex/presolve.hpp @@ -71,37 +71,6 @@ struct lp_problem_t { } } - void read_problem(const std::string& path) - { - FILE* fid = fopen(path.c_str(), "r"); - if (fid) { - fread(&num_rows, sizeof(i_t), 1, fid); - fread(&num_cols, sizeof(i_t), 1, fid); - fread(&obj_constant, sizeof(f_t), 1, fid); - fread(&obj_scale, sizeof(f_t), 1, fid); - i_t is_integral; - fread(&is_integral, sizeof(i_t), 1, fid); - objective_is_integral = is_integral == 1; - objective.resize(num_cols); - fread(objective.data(), sizeof(f_t), num_cols, fid); - rhs.resize(num_rows); - fread(rhs.data(), sizeof(f_t), num_rows, fid); - lower.resize(num_cols); - fread(lower.data(), sizeof(f_t), num_cols, fid); - upper.resize(num_cols); - fread(upper.data(), sizeof(f_t), num_cols, fid); - A.n = num_cols; - A.m = num_rows; - A.col_start.resize(num_cols + 1); - fread(A.col_start.data(), sizeof(i_t), num_cols + 1, fid); - A.i.resize(A.col_start[num_cols]); - fread(A.i.data(), sizeof(i_t), A.i.size(), fid); - A.x.resize(A.i.size()); - fread(A.x.data(), sizeof(f_t), A.x.size(), fid); - fclose(fid); - } - } - void write_mps(const std::string& path) const { std::ofstream mps_file(path); diff --git a/cpp/src/grpc/server/grpc_server_main.cpp b/cpp/src/grpc/server/grpc_server_main.cpp index d638c191b1..3c2f6e0c15 100644 --- a/cpp/src/grpc/server/grpc_server_main.cpp +++ b/cpp/src/grpc/server/grpc_server_main.cpp @@ -189,16 +189,16 @@ int main(int argc, char** argv) ensure_log_dir_exists(); - shm_unlink(SHM_JOB_QUEUE); - shm_unlink(SHM_RESULT_QUEUE); - shm_unlink(SHM_CONTROL); + shm_unlink(SHM_JOB_QUEUE.c_str()); + shm_unlink(SHM_RESULT_QUEUE.c_str()); + shm_unlink(SHM_CONTROL.c_str()); job_queue = static_cast( - create_shared_memory(SHM_JOB_QUEUE, sizeof(JobQueueEntry) * MAX_JOBS)); + create_shared_memory(SHM_JOB_QUEUE.c_str(), sizeof(JobQueueEntry) * MAX_JOBS)); result_queue = static_cast( - create_shared_memory(SHM_RESULT_QUEUE, sizeof(ResultQueueEntry) * MAX_RESULTS)); + create_shared_memory(SHM_RESULT_QUEUE.c_str(), sizeof(ResultQueueEntry) * MAX_RESULTS)); shm_ctrl = static_cast( - create_shared_memory(SHM_CONTROL, sizeof(SharedMemoryControl))); + create_shared_memory(SHM_CONTROL.c_str(), sizeof(SharedMemoryControl))); new (shm_ctrl) SharedMemoryControl{}; for (size_t i = 0; i < MAX_JOBS; ++i) { diff --git a/cpp/src/grpc/server/grpc_server_types.hpp b/cpp/src/grpc/server/grpc_server_types.hpp index dc6684dea5..a88d272242 100644 --- a/cpp/src/grpc/server/grpc_server_types.hpp +++ b/cpp/src/grpc/server/grpc_server_types.hpp @@ -255,9 +255,16 @@ inline std::map chunked_uploads; inline std::mutex chunked_downloads_mutex; inline std::map chunked_downloads; -inline const char* SHM_JOB_QUEUE = "/cuopt_job_queue"; -inline const char* SHM_RESULT_QUEUE = "/cuopt_result_queue"; -inline const char* SHM_CONTROL = "/cuopt_control"; +// Shared memory names include PID to prevent local users from accessing +// segments belonging to other server instances on the same host. +inline std::string make_shm_name(const char* base) +{ + return std::string(base) + "_" + std::to_string(getpid()); +} + +inline std::string SHM_JOB_QUEUE = make_shm_name("/cuopt_job_queue"); +inline std::string SHM_RESULT_QUEUE = make_shm_name("/cuopt_result_queue"); +inline std::string SHM_CONTROL = make_shm_name("/cuopt_control"); inline const std::string LOG_DIR = "/tmp/cuopt_logs"; diff --git a/cpp/src/grpc/server/grpc_worker_infra.cpp b/cpp/src/grpc/server/grpc_worker_infra.cpp index b2e28b4550..b1726ffc8b 100644 --- a/cpp/src/grpc/server/grpc_worker_infra.cpp +++ b/cpp/src/grpc/server/grpc_worker_infra.cpp @@ -12,15 +12,15 @@ void cleanup_shared_memory() { if (job_queue) { munmap(job_queue, sizeof(JobQueueEntry) * MAX_JOBS); - shm_unlink(SHM_JOB_QUEUE); + shm_unlink(SHM_JOB_QUEUE.c_str()); } if (result_queue) { munmap(result_queue, sizeof(ResultQueueEntry) * MAX_RESULTS); - shm_unlink(SHM_RESULT_QUEUE); + shm_unlink(SHM_RESULT_QUEUE.c_str()); } if (shm_ctrl) { munmap(shm_ctrl, sizeof(SharedMemoryControl)); - shm_unlink(SHM_CONTROL); + shm_unlink(SHM_CONTROL.c_str()); } } diff --git a/cpp/src/mip_heuristics/feasibility_jump/feasibility_jump.cu b/cpp/src/mip_heuristics/feasibility_jump/feasibility_jump.cu index 5c86882f19..6b440aed4f 100644 --- a/cpp/src/mip_heuristics/feasibility_jump/feasibility_jump.cu +++ b/cpp/src/mip_heuristics/feasibility_jump/feasibility_jump.cu @@ -706,7 +706,9 @@ void fj_t::run_step_device(const rmm::cuda_stream_view& climber_stream data.cub_storage_bytes.resize(compaction_temp_storage_bytes, climber_stream); } - if (use_graph) { cudaStreamBeginCapture(climber_stream, cudaStreamCaptureModeThreadLocal); } + if (use_graph) { + RAFT_CUDA_TRY(cudaStreamBeginCapture(climber_stream, cudaStreamCaptureModeThreadLocal)); + } for (i_t i = 0; i < (use_graph ? iterations_per_graph : 1); ++i) { { // related varialbe array has to be dynamically computed each iteration @@ -719,52 +721,52 @@ void fj_t::run_step_device(const rmm::cuda_stream_view& climber_stream load_balancing_score_update(climber_stream, climber_idx); } else { if (is_binary_pb) { - cudaLaunchCooperativeKernel( + RAFT_CUDA_TRY(cudaLaunchCooperativeKernel( (void*)compute_mtm_moves_kernel, grid_resetmoves_bin, blocks_resetmoves_bin, reset_moves_args, 0, - climber_stream); + climber_stream)); } else { - cudaLaunchCooperativeKernel( + RAFT_CUDA_TRY(cudaLaunchCooperativeKernel( (void*)compute_mtm_moves_kernel, grid_resetmoves, blocks_resetmoves, reset_moves_args, 0, - climber_stream); + climber_stream)); } } #if FJ_DEBUG_LOAD_BALANCING if (use_load_balancing) { - cudaLaunchCooperativeKernel((void*)compute_mtm_moves_kernel, - grid_resetmoves_bin, - blocks_resetmoves_bin, - reset_moves_args, - 0, - climber_stream); - cudaLaunchCooperativeKernel((void*)load_balancing_sanity_checks, - 512, - 128, - kernel_args, - 0, - climber_stream); + RAFT_CUDA_TRY(cudaLaunchCooperativeKernel((void*)compute_mtm_moves_kernel, + grid_resetmoves_bin, + blocks_resetmoves_bin, + reset_moves_args, + 0, + climber_stream)); + RAFT_CUDA_TRY(cudaLaunchCooperativeKernel((void*)load_balancing_sanity_checks, + 512, + 128, + kernel_args, + 0, + climber_stream)); } #endif - cudaLaunchKernel((void*)update_lift_moves_kernel, - grid_lift_move, - blocks_lift_move, - kernel_args, - 0, - climber_stream); - cudaLaunchKernel((void*)update_breakthrough_moves_kernel, - grid_lift_move, - blocks_lift_move, - kernel_args, - 0, - climber_stream); + RAFT_CUDA_TRY(cudaLaunchKernel((void*)update_lift_moves_kernel, + grid_lift_move, + blocks_lift_move, + kernel_args, + 0, + climber_stream)); + RAFT_CUDA_TRY(cudaLaunchKernel((void*)update_breakthrough_moves_kernel, + grid_lift_move, + blocks_lift_move, + kernel_args, + 0, + climber_stream)); } // compaction kernel @@ -777,44 +779,49 @@ void fj_t::run_step_device(const rmm::cuda_stream_view& climber_stream pb_ptr->n_variables, climber_stream); - cudaLaunchKernel((void*)select_variable_kernel, - dim3(1), - dim3(256), - kernel_args, - 0, - climber_stream); - - cudaLaunchCooperativeKernel((void*)handle_local_minimum_kernel, - grid_update_weights, - blocks_update_weights, - kernel_args, - 0, - climber_stream); + RAFT_CUDA_TRY(cudaLaunchKernel((void*)select_variable_kernel, + dim3(1), + dim3(256), + kernel_args, + 0, + climber_stream)); + + RAFT_CUDA_TRY(cudaLaunchCooperativeKernel((void*)handle_local_minimum_kernel, + grid_update_weights, + blocks_update_weights, + kernel_args, + 0, + climber_stream)); raft::copy(data.break_condition.data(), data.temp_break_condition.data(), 1, climber_stream); - cudaLaunchKernel((void*)update_assignment_kernel, - grid_setval, - blocks_setval, - update_assignment_args, - 0, - climber_stream); - cudaLaunchKernel((void*)update_changed_constraints_kernel, - 1, - blocks_update_changed_constraints, - kernel_args, - 0, - climber_stream); + RAFT_CUDA_TRY(cudaLaunchKernel((void*)update_assignment_kernel, + grid_setval, + blocks_setval, + update_assignment_args, + 0, + climber_stream)); + RAFT_CUDA_TRY(cudaLaunchKernel((void*)update_changed_constraints_kernel, + 1, + blocks_update_changed_constraints, + kernel_args, + 0, + climber_stream)); } if (use_graph) { - cudaStreamEndCapture(climber_stream, &graph); - cudaGraphInstantiate(&graph_instance, graph); + RAFT_CUDA_TRY(cudaStreamEndCapture(climber_stream, &graph)); + try { + RAFT_CUDA_TRY(cudaGraphInstantiate(&graph_instance, graph)); + } catch (...) { + RAFT_CUDA_TRY(cudaGraphDestroy(graph)); + throw; + } RAFT_CHECK_CUDA(climber_stream); - cudaGraphDestroy(graph); + RAFT_CUDA_TRY(cudaGraphDestroy(graph)); graph_created = true; } } - if (use_graph) cudaGraphLaunch(graph_instance, climber_stream); + if (use_graph) RAFT_CUDA_TRY(cudaGraphLaunch(graph_instance, climber_stream)); } template diff --git a/cpp/src/utilities/version_info.cpp b/cpp/src/utilities/version_info.cpp index ec9db5130b..54eb8f48bf 100644 --- a/cpp/src/utilities/version_info.cpp +++ b/cpp/src/utilities/version_info.cpp @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -165,33 +165,46 @@ static double get_available_memory_gb() void print_version_info() { + bool has_gpu = true; int device_id = 0; - cudaGetDevice(&device_id); - cudaDeviceProp device_prop; - cudaGetDeviceProperties(&device_prop, device_id); - cudaUUID_t uuid = device_prop.uuid; + cudaDeviceProp device_prop{}; char uuid_str[37] = {0}; - snprintf(uuid_str, - sizeof(uuid_str), - "%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x", - uuid.bytes[0], - uuid.bytes[1], - uuid.bytes[2], - uuid.bytes[3], - uuid.bytes[4], - uuid.bytes[5], - uuid.bytes[6], - uuid.bytes[7], - uuid.bytes[8], - uuid.bytes[9], - uuid.bytes[10], - uuid.bytes[11], - uuid.bytes[12], - uuid.bytes[13], - uuid.bytes[14], - uuid.bytes[15]); - int version = 0; - cudaRuntimeGetVersion(&version); + int version = 0; + + if (cudaGetDevice(&device_id) != cudaSuccess) { + CUOPT_LOG_WARN("No CUDA device available, skipping GPU info"); + has_gpu = false; + } + if (has_gpu && cudaGetDeviceProperties(&device_prop, device_id) != cudaSuccess) { + CUOPT_LOG_WARN("Failed to query CUDA device properties"); + has_gpu = false; + } + if (has_gpu) { + cudaUUID_t uuid = device_prop.uuid; + snprintf(uuid_str, + sizeof(uuid_str), + "%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x", + (unsigned char)uuid.bytes[0], + (unsigned char)uuid.bytes[1], + (unsigned char)uuid.bytes[2], + (unsigned char)uuid.bytes[3], + (unsigned char)uuid.bytes[4], + (unsigned char)uuid.bytes[5], + (unsigned char)uuid.bytes[6], + (unsigned char)uuid.bytes[7], + (unsigned char)uuid.bytes[8], + (unsigned char)uuid.bytes[9], + (unsigned char)uuid.bytes[10], + (unsigned char)uuid.bytes[11], + (unsigned char)uuid.bytes[12], + (unsigned char)uuid.bytes[13], + (unsigned char)uuid.bytes[14], + (unsigned char)uuid.bytes[15]); + if (cudaRuntimeGetVersion(&version) != cudaSuccess) { + CUOPT_LOG_WARN("Failed to query CUDA runtime version"); + version = 0; + } + } int major = version / 1000; int minor = (version % 1000) / 10; CUOPT_LOG_INFO("cuOpt version: %d.%d.%d, git hash: %s, host arch: %s, device archs: %s", @@ -206,13 +219,15 @@ void print_version_info() get_physical_cores(), std::thread::hardware_concurrency(), get_available_memory_gb()); - CUOPT_LOG_INFO("CUDA %d.%d, device: %s (ID %d), VRAM: %.2f GiB", - major, - minor, - device_prop.name, - device_id, - (double)device_prop.totalGlobalMem / (1024.0 * 1024.0 * 1024.0)); - CUOPT_LOG_INFO("CUDA device UUID: %s\n", uuid_str); + if (has_gpu) { + CUOPT_LOG_INFO("CUDA %d.%d, device: %s (ID %d), VRAM: %.2f GiB", + major, + minor, + device_prop.name, + device_id, + (double)device_prop.totalGlobalMem / (1024.0 * 1024.0 * 1024.0)); + CUOPT_LOG_INFO("CUDA device UUID: %s\n", uuid_str); + } } } // namespace cuopt diff --git a/cpp/tests/linear_programming/unit_tests/optimization_problem_test.cu b/cpp/tests/linear_programming/unit_tests/optimization_problem_test.cu index ddee8a12c5..9ba5ca1e93 100644 --- a/cpp/tests/linear_programming/unit_tests/optimization_problem_test.cu +++ b/cpp/tests/linear_programming/unit_tests/optimization_problem_test.cu @@ -123,88 +123,88 @@ TEST(optimization_problem_t, test_set_get_fields) problem.set_csr_constraint_matrix(A_host, 3, indices_host, 3, indices_host, 3); // Test set_A_values - cudaMemcpy(result.data(), - problem.get_constraint_matrix_values().data(), - 3 * sizeof(double), - cudaMemcpyDeviceToHost); + RAFT_CUDA_TRY(cudaMemcpy(result.data(), + problem.get_constraint_matrix_values().data(), + 3 * sizeof(double), + cudaMemcpyDeviceToHost)); EXPECT_NEAR(1.0, result[0], 1e-5); EXPECT_NEAR(2.0, result[1], 1e-5); EXPECT_NEAR(3.0, result[2], 1e-5); // Test A_indices - cudaMemcpy(result_int.data(), - problem.get_constraint_matrix_indices().data(), - 3 * sizeof(int), - cudaMemcpyDeviceToHost); + RAFT_CUDA_TRY(cudaMemcpy(result_int.data(), + problem.get_constraint_matrix_indices().data(), + 3 * sizeof(int), + cudaMemcpyDeviceToHost)); EXPECT_EQ(0, result_int[0]); EXPECT_EQ(1, result_int[1]); EXPECT_EQ(2, result_int[2]); // Test A_offsets_ - cudaMemcpy(result_int.data(), - problem.get_constraint_matrix_offsets().data(), - 3 * sizeof(int), - cudaMemcpyDeviceToHost); + RAFT_CUDA_TRY(cudaMemcpy(result_int.data(), + problem.get_constraint_matrix_offsets().data(), + 3 * sizeof(int), + cudaMemcpyDeviceToHost)); EXPECT_EQ(0, result_int[0]); EXPECT_EQ(1, result_int[1]); EXPECT_EQ(2, result_int[2]); // Test b_ problem.set_constraint_bounds(b_host, 3); - cudaMemcpy(result.data(), - problem.get_constraint_bounds().data(), - 3 * sizeof(double), - cudaMemcpyDeviceToHost); + RAFT_CUDA_TRY(cudaMemcpy(result.data(), + problem.get_constraint_bounds().data(), + 3 * sizeof(double), + cudaMemcpyDeviceToHost)); EXPECT_NEAR(4.0, result[0], 1e-5); EXPECT_NEAR(5.0, result[1], 1e-5); EXPECT_NEAR(6.0, result[2], 1e-5); // Test c_ problem.set_objective_coefficients(c_host, 3); - cudaMemcpy(result.data(), - problem.get_objective_coefficients().data(), - 3 * sizeof(double), - cudaMemcpyDeviceToHost); + RAFT_CUDA_TRY(cudaMemcpy(result.data(), + problem.get_objective_coefficients().data(), + 3 * sizeof(double), + cudaMemcpyDeviceToHost)); EXPECT_NEAR(7.0, result[0], 1e-5); EXPECT_NEAR(8.0, result[1], 1e-5); EXPECT_NEAR(9.0, result[2], 1e-5); // Test variable_lower_bounds_ problem.set_variable_lower_bounds(var_lb_host, 3); - cudaMemcpy(result.data(), - problem.get_variable_lower_bounds().data(), - 3 * sizeof(double), - cudaMemcpyDeviceToHost); + RAFT_CUDA_TRY(cudaMemcpy(result.data(), + problem.get_variable_lower_bounds().data(), + 3 * sizeof(double), + cudaMemcpyDeviceToHost)); EXPECT_NEAR(0.0, result[0], 1e-5); EXPECT_NEAR(0.1, result[1], 1e-5); EXPECT_NEAR(0.2, result[2], 1e-5); // Test variable_upper_bounds_ problem.set_variable_upper_bounds(var_ub_host, 3); - cudaMemcpy(result.data(), - problem.get_variable_upper_bounds().data(), - 3 * sizeof(double), - cudaMemcpyDeviceToHost); + RAFT_CUDA_TRY(cudaMemcpy(result.data(), + problem.get_variable_upper_bounds().data(), + 3 * sizeof(double), + cudaMemcpyDeviceToHost)); EXPECT_NEAR(1.0, result[0], 1e-5); EXPECT_NEAR(1.1, result[1], 1e-5); EXPECT_NEAR(1.2, result[2], 1e-5); // Test constraint_lower_bounds_ problem.set_constraint_lower_bounds(con_lb_host, 3); - cudaMemcpy(result.data(), - problem.get_constraint_lower_bounds().data(), - 3 * sizeof(double), - cudaMemcpyDeviceToHost); + RAFT_CUDA_TRY(cudaMemcpy(result.data(), + problem.get_constraint_lower_bounds().data(), + 3 * sizeof(double), + cudaMemcpyDeviceToHost)); EXPECT_NEAR(0.5, result[0], 1e-5); EXPECT_NEAR(0.6, result[1], 1e-5); EXPECT_NEAR(0.7, result[2], 1e-5); // Test constraint_upper_bounds_ problem.set_constraint_upper_bounds(con_ub_host, 3); - cudaMemcpy(result.data(), - problem.get_constraint_upper_bounds().data(), - 3 * sizeof(double), - cudaMemcpyDeviceToHost); + RAFT_CUDA_TRY(cudaMemcpy(result.data(), + problem.get_constraint_upper_bounds().data(), + 3 * sizeof(double), + cudaMemcpyDeviceToHost)); EXPECT_NEAR(1.5, result[0], 1e-5); EXPECT_NEAR(1.6, result[1], 1e-5); EXPECT_NEAR(1.7, result[2], 1e-5);