From 6b0f4deb33242342de9257f3744fd6b71b8f5e98 Mon Sep 17 00:00:00 2001 From: Alexander Novotny Date: Mon, 23 Feb 2026 13:58:51 -0500 Subject: [PATCH 01/14] Add elementwiseDivide vector operation and test --- resolve/cuda/cudaVectorKernels.cu | 44 +++++++++++++++++++++ resolve/cuda/cudaVectorKernels.h | 1 + resolve/hip/hipVectorKernels.h | 1 + resolve/hip/hipVectorKernels.hip | 44 +++++++++++++++++++++ resolve/vector/VectorHandler.cpp | 29 ++++++++++++++ resolve/vector/VectorHandler.hpp | 2 + resolve/vector/VectorHandlerCpu.cpp | 24 +++++++++++ resolve/vector/VectorHandlerCpu.hpp | 2 + resolve/vector/VectorHandlerCuda.cpp | 22 +++++++++++ resolve/vector/VectorHandlerCuda.hpp | 10 +++++ resolve/vector/VectorHandlerHip.cpp | 22 +++++++++++ resolve/vector/VectorHandlerHip.hpp | 10 +++++ resolve/vector/VectorHandlerImpl.hpp | 3 ++ tests/unit/vector/VectorHandlerTests.hpp | 42 ++++++++++++++++++++ tests/unit/vector/runVectorHandlerTests.cpp | 1 + 15 files changed, 257 insertions(+) diff --git a/resolve/cuda/cudaVectorKernels.cu b/resolve/cuda/cudaVectorKernels.cu index 0a879c61d..255a5b18d 100644 --- a/resolve/cuda/cudaVectorKernels.cu +++ b/resolve/cuda/cudaVectorKernels.cu @@ -81,6 +81,30 @@ namespace ReSolve } } + /** + * @brief Divides a vector's elements by another vector's elements + * + * @param[in] n - size of the vectors + * @param[in] d_val - divisor values + * @param[in, out] vec - vector to be divided. Changes in place. + * + * @todo Decide how to allow user to configure grid and block sizes. + */ + __global__ void elementwiseDivide(index_type n, + const real_type* d_val, + real_type* vec) + { + // Get the index of the element to be processed + index_type idx = blockIdx.x * blockDim.x + threadIdx.x; + + // Check if the index is within bounds + if (idx < n) + { + // Divide the vector element by the corresponding divisor value + vec[idx] /= d_val[idx]; + } + } + } // namespace kernels void setArrayConst(index_type n, real_type val, real_type* arr) @@ -118,5 +142,25 @@ namespace ReSolve // Launch the kernel kernels::scale<<>>(n, diag, vec); } + + /** + * @brief Wrapper that divides a vector's elements by another vector's elements + * + * @param[in] n - size of the vectors + * @param[in] divisor - divisor values + * @param[in, out] vec - vector to be divided. Changes in place. + * + * @todo Decide how to allow user to configure grid and block sizes. + */ + void elementwiseDivide(index_type n, + const real_type* divisor, + real_type* vec) + { + // Define block size and number of blocks + const int block_size = 256; + int num_blocks = (n + block_size - 1) / block_size; + // Launch the kernel + kernels::elementwiseDivide<<>>(n, divisor, vec); + } } // namespace cuda } // namespace ReSolve diff --git a/resolve/cuda/cudaVectorKernels.h b/resolve/cuda/cudaVectorKernels.h index f8bef0d5b..8f8642fbd 100644 --- a/resolve/cuda/cudaVectorKernels.h +++ b/resolve/cuda/cudaVectorKernels.h @@ -18,5 +18,6 @@ namespace ReSolve void setArrayConst(index_type n, real_type val, real_type* arr); void addConst(index_type n, real_type val, real_type* arr); void scale(index_type n, const real_type* diag, real_type* vec); + void elementwiseDivide(index_type n, const real_type* divisor, real_type* vec); } // namespace cuda } // namespace ReSolve diff --git a/resolve/hip/hipVectorKernels.h b/resolve/hip/hipVectorKernels.h index f5790017d..ee3510ba0 100644 --- a/resolve/hip/hipVectorKernels.h +++ b/resolve/hip/hipVectorKernels.h @@ -18,5 +18,6 @@ namespace ReSolve void setArrayConst(index_type n, real_type val, real_type* arr); void addConst(index_type n, real_type val, real_type* arr); void scale(index_type n, const real_type* diag, real_type* vec); + void elementwiseDivide(index_type n, const real_type* divisor, real_type* vec); } // namespace hip } // namespace ReSolve diff --git a/resolve/hip/hipVectorKernels.hip b/resolve/hip/hipVectorKernels.hip index d904c69ca..18ec12015 100644 --- a/resolve/hip/hipVectorKernels.hip +++ b/resolve/hip/hipVectorKernels.hip @@ -69,6 +69,30 @@ namespace ReSolve { vec[idx] *= diag[idx]; } } + + /** + * @brief Divides a vector's elements by another vector's elements + * + * @param[in] n - size of the vectors + * @param[in] d_val - divisor values + * @param[in, out] vec - vector to be divided. Changes in place. + * + * @todo Decide how to allow user to configure grid and block sizes. + */ + __global__ void elementwiseDivide(index_type n, + const real_type* d_val, + real_type* vec) + { + // Get the index of the element to be processed + index_type idx = blockIdx.x * blockDim.x + threadIdx.x; + + // Check if the index is within bounds + if (idx < n) + { + // Divide the vector element by the corresponding divisor value + vec[idx] /= d_val[idx]; + } + } } // namespace kernels void setArrayConst(index_type n, real_type c, real_type* v) @@ -106,5 +130,25 @@ namespace ReSolve { // Launch the kernel kernels::scale<<>>(n, diag, vec); } + + /** + * @brief Wrapper that divides a vector's elements by another vector's elements + * + * @param[in] n - size of the vectors + * @param[in] divisor - divisor values + * @param[in, out] vec - vector to be divided. Changes in place. + * + * @todo Decide how to allow user to configure grid and block sizes. + */ + void elementwiseDivide(index_type n, + const real_type* divisor, + real_type* vec) + { + // Define block size and number of blocks + const int block_size = 256; + int num_blocks = (n + block_size - 1) / block_size; + // Launch the kernel + kernels::elementwiseDivide<<>>(n, divisor, vec); + } } // namespace hip } // namespace ReSolve diff --git a/resolve/vector/VectorHandler.cpp b/resolve/vector/VectorHandler.cpp index 4f8be96aa..8c8ca525e 100644 --- a/resolve/vector/VectorHandler.cpp +++ b/resolve/vector/VectorHandler.cpp @@ -341,6 +341,35 @@ namespace ReSolve } } + /** + * @brief Divide the elements of a vector by the elements of another vector + * + * @param[in] divisor - vector divisor + * @param[in,out] vec - vector to be divided + * @param[in] memspace - Device where the operation is computed + * + * @pre The two vectors must be the same size + * + * @return 0 if successful, 1 otherwise + */ + int VectorHandler::elementwiseDivide(vector::Vector* divisor, vector::Vector* vec, memory::MemorySpace memspace) + { + assert(divisor->getSize() == vec->getSize() && "Diagonal vector must be of the same size as the vector."); + assert(divisor->getData(memspace) != nullptr && "Diagonal vector data is null!\n"); + assert(vec->getData(memspace) != nullptr && "Vector data is null!\n"); + using namespace ReSolve::memory; + switch (memspace) + { + case HOST: + return cpuImpl_->elementwiseDivide(divisor, vec); + break; + case DEVICE: + return devImpl_->elementwiseDivide(divisor, vec); + break; + } + return 1; + } + /** * @brief If CUDA support is enabled in the handler. * diff --git a/resolve/vector/VectorHandler.hpp b/resolve/vector/VectorHandler.hpp index 674a8c538..9d33d65c2 100644 --- a/resolve/vector/VectorHandler.hpp +++ b/resolve/vector/VectorHandler.hpp @@ -67,6 +67,8 @@ namespace ReSolve vector::Vector* x, memory::MemorySpace memspace); + int elementwiseDivide(vector::Vector* diag, vector::Vector* vec, memory::MemorySpace memspace); + // Vector infinity norm real_type amax(vector::Vector* x, memory::MemorySpace memspace); diff --git a/resolve/vector/VectorHandlerCpu.cpp b/resolve/vector/VectorHandlerCpu.cpp index a1620c939..21f65a317 100644 --- a/resolve/vector/VectorHandlerCpu.cpp +++ b/resolve/vector/VectorHandlerCpu.cpp @@ -316,4 +316,28 @@ namespace ReSolve vec->setDataUpdated(memory::HOST); } + /** + * @brief Divide the elements of a vector by the elements of another vector + * + * @param[in] divisor - vector divisor + * @param[in,out] vec - vector to be divided + * + * @pre The two vectors must be the same size + * + * @return 0 if successful, 1 otherwise + */ + int VectorHandlerCpu::elementwiseDivide(vector::Vector* divisor, vector::Vector* vec) + { + real_type* divisor_data = divisor->getData(memory::HOST); + real_type* vec_data = vec->getData(memory::HOST); + index_type n = vec->getSize(); + + for (index_type i = 0; i < n; ++i) + { + vec_data[i] /= divisor_data[i]; + } + vec->setDataUpdated(memory::HOST); + return 0; + } + } // namespace ReSolve diff --git a/resolve/vector/VectorHandlerCpu.hpp b/resolve/vector/VectorHandlerCpu.hpp index d00b06305..e3357921d 100644 --- a/resolve/vector/VectorHandlerCpu.hpp +++ b/resolve/vector/VectorHandlerCpu.hpp @@ -57,6 +57,8 @@ namespace ReSolve virtual void scal(vector::Vector* diag, vector::Vector* vec); + virtual int elementwiseDivide(vector::Vector* divisor, vector::Vector* vec); + private: LinAlgWorkspaceCpu* workspace_; }; diff --git a/resolve/vector/VectorHandlerCuda.cpp b/resolve/vector/VectorHandlerCuda.cpp index 9d4626b0c..2274bb7eb 100644 --- a/resolve/vector/VectorHandlerCuda.cpp +++ b/resolve/vector/VectorHandlerCuda.cpp @@ -339,4 +339,26 @@ namespace ReSolve vec->setDataUpdated(memory::DEVICE); } + /** + * @brief Divide a vector's elements by another vector's elements in CUDA + * + * @param[in] divisor - vector of divisors + * @param[in, out] vec - vector to be divided + * + * @pre The diagonal vector must be of the same size as the vector. + * @pre vec is undivided + * @post vec is divided + * + * @return 0 if successful, 1 otherwise + */ + int VectorHandlerCuda::elementwiseDivide(vector::Vector* divisor, vector::Vector* vec) + { + real_type* divisor_data = divisor->getData(memory::DEVICE); + real_type* vec_data = vec->getData(memory::DEVICE); + index_type n = vec->getSize(); + cuda::elementwiseDivide(n, divisor_data, vec_data); + vec->setDataUpdated(memory::DEVICE); + return 0; + } + } // namespace ReSolve diff --git a/resolve/vector/VectorHandlerCuda.hpp b/resolve/vector/VectorHandlerCuda.hpp index a18f8dbfe..a82deeeec 100644 --- a/resolve/vector/VectorHandlerCuda.hpp +++ b/resolve/vector/VectorHandlerCuda.hpp @@ -66,6 +66,16 @@ namespace ReSolve */ virtual void scal(vector::Vector* diag, vector::Vector* vec); + /** + * @brief elementwiseDivide: divides a vector's elements by another's + * + * @param[in] divisor vector of size n x 1 + * @param[in,out] vec vector of size n x 1 (this is where the result is stored) + * + * @return 0 if successful, 1 otherwise + */ + virtual int elementwiseDivide(vector::Vector* divisor, vector::Vector* vec); + private: MemoryHandler mem_; ///< Device memory manager object LinAlgWorkspaceCUDA* workspace_; diff --git a/resolve/vector/VectorHandlerHip.cpp b/resolve/vector/VectorHandlerHip.cpp index 9a006acb8..887de7c51 100644 --- a/resolve/vector/VectorHandlerHip.cpp +++ b/resolve/vector/VectorHandlerHip.cpp @@ -333,4 +333,26 @@ namespace ReSolve vec->setDataUpdated(memory::DEVICE); } + /** + * @brief Divide a vector's elements by another vector's elements in HIP + * + * @param[in] divisor - vector of divisors + * @param[in, out] vec - vector to be divided + * + * @pre The diagonal vector must be of the same size as the vector. + * @pre vec is undivided + * @post vec is divided + * + * @return 0 if successful, 1 otherwise + */ + int VectorHandlerHip::elementwiseDivide(vector::Vector* divisor, vector::Vector* vec) + { + real_type* divisor_data = divisor->getData(memory::DEVICE); + real_type* vec_data = vec->getData(memory::DEVICE); + index_type n = vec->getSize(); + hip::elementwiseDivide(n, divisor_data, vec_data); + vec->setDataUpdated(memory::DEVICE); + return 0; + } + } // namespace ReSolve diff --git a/resolve/vector/VectorHandlerHip.hpp b/resolve/vector/VectorHandlerHip.hpp index f890bf113..ce4066636 100644 --- a/resolve/vector/VectorHandlerHip.hpp +++ b/resolve/vector/VectorHandlerHip.hpp @@ -66,6 +66,16 @@ namespace ReSolve */ virtual void scal(vector::Vector* diag, vector::Vector* vec); + /** + * @brief elementwiseDivide: divides a vector's elements by another's + * + * @param[in] divisor vector of size n x 1 + * @param[in,out] vec vector of size n x 1 (this is where the result is stored) + * + * @return 0 if successful, 1 otherwise + */ + virtual int elementwiseDivide(vector::Vector* divisor, vector::Vector* vec); + private: LinAlgWorkspaceHIP* workspace_; MemoryHandler mem_; ///< Device memory manager object diff --git a/resolve/vector/VectorHandlerImpl.hpp b/resolve/vector/VectorHandlerImpl.hpp index 6e76091b4..d721a58cc 100644 --- a/resolve/vector/VectorHandlerImpl.hpp +++ b/resolve/vector/VectorHandlerImpl.hpp @@ -45,6 +45,9 @@ namespace ReSolve // Scale a vector by a diagonal matrix virtual void scal(vector::Vector* diag, vector::Vector* vec) = 0; + // Divide the elements of a vector by the elements of another vector + virtual int elementwiseDivide(vector::Vector* divisor, vector::Vector* vec) = 0; + /** gemv: * if `transpose = N` (no), `x = beta*x + alpha*V*y`, * where `x` is `[n x 1]`, `V` is `[n x k]` and `y` is `[k x 1]`. diff --git a/tests/unit/vector/VectorHandlerTests.hpp b/tests/unit/vector/VectorHandlerTests.hpp index 76e558244..9382317e1 100644 --- a/tests/unit/vector/VectorHandlerTests.hpp +++ b/tests/unit/vector/VectorHandlerTests.hpp @@ -280,6 +280,48 @@ namespace ReSolve return status.report(__func__); } + TestOutcome elementwiseDivide(index_type N) + { + TestStatus status; + + vector::Vector divisor(N); + vector::Vector vec(N); + + // divisor[i] = i, vec[i] = 3.0 + // expected result vec[i] = i * 3.0 + divisor.allocate(memspace_); + vec.allocate(memspace_); + + vec.setToConst(3.0, memspace_); + + auto divisor_data = std::unique_ptr(new real_type[N]); + for (size_t i = 0; i < static_cast(N); ++i) + { + divisor_data[i] = (real_type) (i + 1); + } + divisor.copyDataFrom(divisor_data.get(), memory::HOST, memspace_); + + handler_.elementwiseDivide(&divisor, &vec, memspace_); + + if (memspace_ == memory::DEVICE) + { + vec.syncData(memory::HOST); + } + + for (index_type i = 0; i < N; ++i) + { + if (!isEqual(vec.getData(memory::HOST)[i], (real_type) 3.0 / (i + 1))) + { + std::cout << "Solution vector element vec[" << i << "] = " << vec.getData(memory::HOST)[i] + << ", expected: " << (real_type) 3.0 / (i + 1) << "\n"; + status *= false; + break; + } + } + + return status.report(__func__); + } + private: ReSolve::VectorHandler& handler_; ReSolve::memory::MemorySpace memspace_{memory::HOST}; diff --git a/tests/unit/vector/runVectorHandlerTests.cpp b/tests/unit/vector/runVectorHandlerTests.cpp index 0d4e02983..4d3f57483 100644 --- a/tests/unit/vector/runVectorHandlerTests.cpp +++ b/tests/unit/vector/runVectorHandlerTests.cpp @@ -25,6 +25,7 @@ int main(int, char**) result += test.axpyMulti(100, 10); result += test.massDot(100, 10); result += test.scale(100); + result += test.elementwiseDivide(100); std::cout << "\n"; } From eda3b3d6cd508eb9d2e810b59add3eafe1540fde Mon Sep 17 00:00:00 2001 From: Alexander Novotny Date: Mon, 23 Feb 2026 14:07:46 -0500 Subject: [PATCH 02/14] Update CHANGELOG.md --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index e5e08eeec..08597278c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,6 +18,8 @@ - Made Vector::copyDataTo able to copy from device to host and vice versa +- Added `elementwiseDivide` vector operation. + ## Changes to Re::Solve in release 0.99.2 ### Major Features From 53c80a68c9b5ccb2794fb022f8d2f6dd8d7f6a4e Mon Sep 17 00:00:00 2001 From: Alexander Novotny Date: Tue, 24 Feb 2026 14:56:14 -0500 Subject: [PATCH 03/14] Add elementwiseMax and abs --- resolve/cuda/cudaVectorKernels.cu | 83 ++++++++++++++++++ resolve/cuda/cudaVectorKernels.h | 2 + resolve/hip/hipVectorKernels.h | 2 + resolve/hip/hipVectorKernels.hip | 84 ++++++++++++++++++ resolve/vector/VectorHandler.cpp | 53 ++++++++++++ resolve/vector/VectorHandler.hpp | 5 +- resolve/vector/VectorHandlerCpu.cpp | 45 ++++++++++ resolve/vector/VectorHandlerCpu.hpp | 4 + resolve/vector/VectorHandlerCuda.cpp | 36 ++++++++ resolve/vector/VectorHandlerCuda.hpp | 19 ++++ resolve/vector/VectorHandlerHip.cpp | 36 ++++++++ resolve/vector/VectorHandlerHip.hpp | 19 ++++ resolve/vector/VectorHandlerImpl.hpp | 6 ++ tests/unit/vector/VectorHandlerTests.hpp | 96 ++++++++++++++++++++- tests/unit/vector/runVectorHandlerTests.cpp | 8 ++ 15 files changed, 495 insertions(+), 3 deletions(-) diff --git a/resolve/cuda/cudaVectorKernels.cu b/resolve/cuda/cudaVectorKernels.cu index 255a5b18d..c604d7e75 100644 --- a/resolve/cuda/cudaVectorKernels.cu +++ b/resolve/cuda/cudaVectorKernels.cu @@ -105,6 +105,51 @@ namespace ReSolve } } + /** + * @brief Wrapper that computes the element-wise max of two vectors. + * + * @param[in] n - size of the vectors + * @param[in] x - First vector values + * @param[in, out] y - Second vector values. Changes in place. + * + * @todo Decide how to allow user to configure grid and block sizes. + */ + __global__ void elementwiseMax(index_type n, + const real_type* x, + real_type* y) + { + // Get the index of the element to be processed + index_type idx = blockIdx.x * blockDim.x + threadIdx.x; + + // Check if the index is within bounds + if (idx < n) + { + // Compute maximum of elements + y[idx] = max(x[idx], y[idx]); + } + } + + /** + * @brief Computes the element-wise absolute value of a vector. + * + * @param[in] n - size of the vector + * @param[in, out] x - Vector values. Changes in place. + * + * @todo Decide how to allow user to configure grid and block sizes. + */ + __global__ void abs(index_type n, + real_type* x) + { + // Get the index of the element to be processed + index_type idx = blockIdx.x * blockDim.x + threadIdx.x; + + // Check if the index is within bounds + if (idx < n) + { + // Compute absolute value of element + x[idx] = fabs(x[idx]); + } + } } // namespace kernels void setArrayConst(index_type n, real_type val, real_type* arr) @@ -162,5 +207,43 @@ namespace ReSolve // Launch the kernel kernels::elementwiseDivide<<>>(n, divisor, vec); } + + /** + * @brief Wrapper that computes the element-wise max of two vectors. + * + * @param[in] n - size of the vectors + * @param[in] x - First vector values + * @param[in, out] y - Second vector values. Changes in place. + * + * @todo Decide how to allow user to configure grid and block sizes. + */ + void elementwiseMax(index_type n, + const real_type* x, + real_type* y) + { + // Define block size and number of blocks + const int block_size = 256; + int num_blocks = (n + block_size - 1) / block_size; + // Launch the kernel + kernels::elementwiseMax<<>>(n, x, y); + } + + /** + * @brief Wrapper that computes the element-wise absolute value of a vector. + * + * @param[in] n - size of the vector + * @param[in, out] x - Vector values. Changes in place. + * + * @todo Decide how to allow user to configure grid and block sizes. + */ + void abs(index_type n, + real_type* x) + { + // Define block size and number of blocks + const int block_size = 256; + int num_blocks = (n + block_size - 1) / block_size; + // Launch the kernel + kernels::abs<<>>(n, x); + } } // namespace cuda } // namespace ReSolve diff --git a/resolve/cuda/cudaVectorKernels.h b/resolve/cuda/cudaVectorKernels.h index 8f8642fbd..fccef3447 100644 --- a/resolve/cuda/cudaVectorKernels.h +++ b/resolve/cuda/cudaVectorKernels.h @@ -19,5 +19,7 @@ namespace ReSolve void addConst(index_type n, real_type val, real_type* arr); void scale(index_type n, const real_type* diag, real_type* vec); void elementwiseDivide(index_type n, const real_type* divisor, real_type* vec); + void elementwiseMax(index_type n, const real_type* x, real_type* y); + void abs(index_type n, real_type* x); } // namespace cuda } // namespace ReSolve diff --git a/resolve/hip/hipVectorKernels.h b/resolve/hip/hipVectorKernels.h index ee3510ba0..c7057a9f3 100644 --- a/resolve/hip/hipVectorKernels.h +++ b/resolve/hip/hipVectorKernels.h @@ -19,5 +19,7 @@ namespace ReSolve void addConst(index_type n, real_type val, real_type* arr); void scale(index_type n, const real_type* diag, real_type* vec); void elementwiseDivide(index_type n, const real_type* divisor, real_type* vec); + void elementwiseMax(index_type n, const real_type* x, real_type* y); + void abs(index_type n, real_type* x); } // namespace hip } // namespace ReSolve diff --git a/resolve/hip/hipVectorKernels.hip b/resolve/hip/hipVectorKernels.hip index 18ec12015..51ad35c65 100644 --- a/resolve/hip/hipVectorKernels.hip +++ b/resolve/hip/hipVectorKernels.hip @@ -93,6 +93,52 @@ namespace ReSolve { vec[idx] /= d_val[idx]; } } + + /** + * @brief Wrapper that computes the element-wise max of two vectors. + * + * @param[in] n - size of the vectors + * @param[in] x - First vector values + * @param[in, out] y - Second vector values. Changes in place. + * + * @todo Decide how to allow user to configure grid and block sizes. + */ + __global__ void elementwiseMax(index_type n, + const real_type* x, + real_type* y) + { + // Get the index of the element to be processed + index_type idx = blockIdx.x * blockDim.x + threadIdx.x; + + // Check if the index is within bounds + if (idx < n) + { + // Compute maximum of elements + y[idx] = max(x[idx], y[idx]); + } + } + + /** + * @brief Computes the element-wise absolute value of a vector. + * + * @param[in] n - size of the vector + * @param[in, out] x - Vector values. Changes in place. + * + * @todo Decide how to allow user to configure grid and block sizes. + */ + __global__ void abs(index_type n, + real_type* x) + { + // Get the index of the element to be processed + index_type idx = blockIdx.x * blockDim.x + threadIdx.x; + + // Check if the index is within bounds + if (idx < n) + { + // Compute absolute value of element + x[idx] = fabs(x[idx]); + } + } } // namespace kernels void setArrayConst(index_type n, real_type c, real_type* v) @@ -150,5 +196,43 @@ namespace ReSolve { // Launch the kernel kernels::elementwiseDivide<<>>(n, divisor, vec); } + + /** + * @brief Wrapper that computes the element-wise max of two vectors. + * + * @param[in] n - size of the vectors + * @param[in] x - First vector values + * @param[in, out] y - Second vector values. Changes in place. + * + * @todo Decide how to allow user to configure grid and block sizes. + */ + void elementwiseMax(index_type n, + const real_type* x, + real_type* y) + { + // Define block size and number of blocks + const int block_size = 256; + int num_blocks = (n + block_size - 1) / block_size; + // Launch the kernel + kernels::elementwiseMax<<>>(n, x, y); + } + + /** + * @brief Wrapper that computes the element-wise absolute value of a vector. + * + * @param[in] n - size of the vector + * @param[in, out] x - Vector values. Changes in place. + * + * @todo Decide how to allow user to configure grid and block sizes. + */ + void abs(index_type n, + real_type* x) + { + // Define block size and number of blocks + const int block_size = 256; + int num_blocks = (n + block_size - 1) / block_size; + // Launch the kernel + kernels::abs<<>>(n, x); + } } // namespace hip } // namespace ReSolve diff --git a/resolve/vector/VectorHandler.cpp b/resolve/vector/VectorHandler.cpp index 8c8ca525e..de9dc9963 100644 --- a/resolve/vector/VectorHandler.cpp +++ b/resolve/vector/VectorHandler.cpp @@ -370,6 +370,59 @@ namespace ReSolve return 1; } + /** + * @brief Takes the element-wise max between two vectors. + * + * @param[in] x - The first vector + * @param[in,out] y - The second vector (result is returned in y) + * @param[in] memspace - Device where the operation is computed + * + * @pre The two vectors must be the same size + * + * @return 0 if successful, 1 otherwise + */ + int VectorHandler::elementwiseMax(vector::Vector* x, vector::Vector* y, memory::MemorySpace memspace) + { + assert(x->getSize() == y->getSize() && "Vectors must be the same size."); + assert(x->getData(memspace) != nullptr && "Vector x data is null!\n"); + assert(y->getData(memspace) != nullptr && "Vector y data is null!\n"); + using namespace ReSolve::memory; + switch (memspace) + { + case HOST: + return cpuImpl_->elementwiseMax(x, y); + break; + case DEVICE: + return devImpl_->elementwiseMax(x, y); + break; + } + return 1; + } + + /** + * @brief Computes the element-wise absolute value of a vector. + * + * @param[in,out] x - Input and output vector + * @param[in] memspace - Device where the operation is computed + * + * @return 0 if successful, 1 otherwise + */ + int VectorHandler::abs(vector::Vector* x, memory::MemorySpace memspace) + { + assert(x->getData(memspace) != nullptr && "Vector data is null!\n"); + using namespace ReSolve::memory; + switch (memspace) + { + case HOST: + return cpuImpl_->abs(x); + break; + case DEVICE: + return devImpl_->abs(x); + break; + } + return 1; + } + /** * @brief If CUDA support is enabled in the handler. * diff --git a/resolve/vector/VectorHandler.hpp b/resolve/vector/VectorHandler.hpp index 9d33d65c2..e9f85a624 100644 --- a/resolve/vector/VectorHandler.hpp +++ b/resolve/vector/VectorHandler.hpp @@ -67,7 +67,10 @@ namespace ReSolve vector::Vector* x, memory::MemorySpace memspace); - int elementwiseDivide(vector::Vector* diag, vector::Vector* vec, memory::MemorySpace memspace); + int elementwiseDivide(vector::Vector* divisor, vector::Vector* vec, memory::MemorySpace memspace); + int elementwiseMax(vector::Vector* x, vector::Vector* y, memory::MemorySpace memspace); + + int abs(vector::Vector* x, memory::MemorySpace memspace); // Vector infinity norm real_type amax(vector::Vector* x, memory::MemorySpace memspace); diff --git a/resolve/vector/VectorHandlerCpu.cpp b/resolve/vector/VectorHandlerCpu.cpp index 21f65a317..11b31ea47 100644 --- a/resolve/vector/VectorHandlerCpu.cpp +++ b/resolve/vector/VectorHandlerCpu.cpp @@ -340,4 +340,49 @@ namespace ReSolve return 0; } + /** + * @brief Take the element-wise max of two vectors. + * Each element of the output will be the maximum value of the corresponding elements in the input vectors. + * + * @param[in] x - First vector + * @param[in,out] y - Second vector (output) + * + * @pre The two vectors must be the same size + * + * @return 0 if successful, 1 otherwise + */ + int VectorHandlerCpu::elementwiseMax(vector::Vector* x, vector::Vector* y) + { + real_type* x_data = x->getData(memory::HOST); + real_type* y_data = y->getData(memory::HOST); + index_type n = y->getSize(); + + for (index_type i = 0; i < n; ++i) + { + y_data[i] = std::max(x_data[i], y_data[i]); + } + y->setDataUpdated(memory::HOST); + return 0; + } + + /** + * @brief Take the element-wise absolute value of a vector. + * + * @param[in,out] x - Input and output vector + * + * @return 0 if successful, 1 otherwise + */ + int VectorHandlerCpu::abs(vector::Vector* x) + { + real_type* x_data = x->getData(memory::HOST); + index_type n = x->getSize(); + + for (index_type i = 0; i < n; ++i) + { + x_data[i] = std::abs(x_data[i]); + } + x->setDataUpdated(memory::HOST); + return 0; + } + } // namespace ReSolve diff --git a/resolve/vector/VectorHandlerCpu.hpp b/resolve/vector/VectorHandlerCpu.hpp index e3357921d..51f7f2b63 100644 --- a/resolve/vector/VectorHandlerCpu.hpp +++ b/resolve/vector/VectorHandlerCpu.hpp @@ -59,6 +59,10 @@ namespace ReSolve virtual int elementwiseDivide(vector::Vector* divisor, vector::Vector* vec); + virtual int elementwiseMax(vector::Vector* x, vector::Vector* y); + + virtual int abs(vector::Vector* x); + private: LinAlgWorkspaceCpu* workspace_; }; diff --git a/resolve/vector/VectorHandlerCuda.cpp b/resolve/vector/VectorHandlerCuda.cpp index 2274bb7eb..a975df423 100644 --- a/resolve/vector/VectorHandlerCuda.cpp +++ b/resolve/vector/VectorHandlerCuda.cpp @@ -361,4 +361,40 @@ namespace ReSolve return 0; } + /** + * @brief Calculate element-wise maximum between two vectors in CUDA + * + * @param[in] x - The first vector + * @param[in, out] y - The second vector (result is returned in y) + * + * @pre The two vectors must be the same size + * + * @return 0 if successful, 1 otherwise + */ + int VectorHandlerCuda::elementwiseMax(vector::Vector* x, vector::Vector* y) + { + real_type* x_data = x->getData(memory::DEVICE); + real_type* y_data = y->getData(memory::DEVICE); + index_type n = y->getSize(); + cuda::elementwiseMax(n, x_data, y_data); + y->setDataUpdated(memory::DEVICE); + return 0; + } + + /** + * @brief Calculate element-wise absolute value of a vector in CUDA + * + * @param[in, out] x - The vector (result is returned in x) + * + * @return 0 if successful, 1 otherwise + */ + int VectorHandlerCuda::abs(vector::Vector* x) + { + real_type* x_data = x->getData(memory::DEVICE); + index_type n = x->getSize(); + cuda::abs(n, x_data); + x->setDataUpdated(memory::DEVICE); + return 0; + } + } // namespace ReSolve diff --git a/resolve/vector/VectorHandlerCuda.hpp b/resolve/vector/VectorHandlerCuda.hpp index a82deeeec..8b84bc28e 100644 --- a/resolve/vector/VectorHandlerCuda.hpp +++ b/resolve/vector/VectorHandlerCuda.hpp @@ -76,6 +76,25 @@ namespace ReSolve */ virtual int elementwiseDivide(vector::Vector* divisor, vector::Vector* vec); + /** + * @brief elementwiseMax: calculate the element-wise maximum of two vectors + * + * @param[in] x vector of size n x 1 + * @param[in,out] y vector of size n x 1 (this is where the result is stored) + * + * @return 0 if successful, 1 otherwise + */ + virtual int elementwiseMax(vector::Vector* x, vector::Vector* y); + + /** + * @brief abs: calculate the element-wise absolute value of a vector + * + * @param[in,out] x vector of size n x 1 (this is where the result is stored) + * + * @return 0 if successful, 1 otherwise + */ + virtual int abs(vector::Vector* x); + private: MemoryHandler mem_; ///< Device memory manager object LinAlgWorkspaceCUDA* workspace_; diff --git a/resolve/vector/VectorHandlerHip.cpp b/resolve/vector/VectorHandlerHip.cpp index 887de7c51..f6b1fe22a 100644 --- a/resolve/vector/VectorHandlerHip.cpp +++ b/resolve/vector/VectorHandlerHip.cpp @@ -355,4 +355,40 @@ namespace ReSolve return 0; } + /** + * @brief Calculate element-wise maximum between two vectors in HIP + * + * @param[in] x - The first vector + * @param[in, out] y - The second vector (result is returned in y) + * + * @pre The two vectors must be the same size + * + * @return 0 if successful, 1 otherwise + */ + int VectorHandlerHip::elementwiseMax(vector::Vector* x, vector::Vector* y) + { + real_type* x_data = x->getData(memory::DEVICE); + real_type* y_data = y->getData(memory::DEVICE); + index_type n = y->getSize(); + hip::elementwiseMax(n, x_data, y_data); + y->setDataUpdated(memory::DEVICE); + return 0; + } + + /** + * @brief Calculate element-wise absolute value of a vector in HIP + * + * @param[in, out] x - The vector (result is returned in x) + * + * @return 0 if successful, 1 otherwise + */ + int VectorHandlerHip::abs(vector::Vector* x) + { + real_type* x_data = x->getData(memory::DEVICE); + index_type n = x->getSize(); + hip::abs(n, x_data); + x->setDataUpdated(memory::DEVICE); + return 0; + } + } // namespace ReSolve diff --git a/resolve/vector/VectorHandlerHip.hpp b/resolve/vector/VectorHandlerHip.hpp index ce4066636..80df606ce 100644 --- a/resolve/vector/VectorHandlerHip.hpp +++ b/resolve/vector/VectorHandlerHip.hpp @@ -76,6 +76,25 @@ namespace ReSolve */ virtual int elementwiseDivide(vector::Vector* divisor, vector::Vector* vec); + /** + * @brief elementwiseMax: calculate the element-wise maximum of two vectors + * + * @param[in] x vector of size n x 1 + * @param[in,out] y vector of size n x 1 (this is where the result is stored) + * + * @return 0 if successful, 1 otherwise + */ + virtual int elementwiseMax(vector::Vector* x, vector::Vector* y); + + /** + * @brief abs: calculate the element-wise absolute value of a vector + * + * @param[in,out] x vector of size n x 1 (this is where the result is stored) + * + * @return 0 if successful, 1 otherwise + */ + virtual int abs(vector::Vector* x); + private: LinAlgWorkspaceHIP* workspace_; MemoryHandler mem_; ///< Device memory manager object diff --git a/resolve/vector/VectorHandlerImpl.hpp b/resolve/vector/VectorHandlerImpl.hpp index d721a58cc..12d9df21a 100644 --- a/resolve/vector/VectorHandlerImpl.hpp +++ b/resolve/vector/VectorHandlerImpl.hpp @@ -48,6 +48,12 @@ namespace ReSolve // Divide the elements of a vector by the elements of another vector virtual int elementwiseDivide(vector::Vector* divisor, vector::Vector* vec) = 0; + // Compute element-wise max of two vectors + virtual int elementwiseMax(vector::Vector* x, vector::Vector* y) = 0; + + // Compute element-wise absolute value of a vector + virtual int abs(vector::Vector* x) = 0; + /** gemv: * if `transpose = N` (no), `x = beta*x + alpha*V*y`, * where `x` is `[n x 1]`, `V` is `[n x k]` and `y` is `[k x 1]`. diff --git a/tests/unit/vector/VectorHandlerTests.hpp b/tests/unit/vector/VectorHandlerTests.hpp index 9382317e1..75b3e1a0d 100644 --- a/tests/unit/vector/VectorHandlerTests.hpp +++ b/tests/unit/vector/VectorHandlerTests.hpp @@ -287,8 +287,8 @@ namespace ReSolve vector::Vector divisor(N); vector::Vector vec(N); - // divisor[i] = i, vec[i] = 3.0 - // expected result vec[i] = i * 3.0 + // divisor[i] = i + 1, vec[i] = 3.0 + // expected result vec[i] = 3.0 / (i + 1) divisor.allocate(memspace_); vec.allocate(memspace_); @@ -322,6 +322,98 @@ namespace ReSolve return status.report(__func__); } + TestOutcome elementwiseMax(index_type N) + { + TestStatus status; + + vector::Vector x(N); + vector::Vector y(N); + + x.allocate(memspace_); + y.allocate(memspace_); + + auto x_data = std::unique_ptr(new real_type[N]); + auto y_data = std::unique_ptr(new real_type[N]); + for (size_t i = 0; i < static_cast(N); ++i) + { + if (i % 3 == 0) + { + x_data[i] = (real_type) (i + 1); + y_data[i] = (real_type) i * .5; + } + else + { + x_data[i] = -(real_type) (i + 1); + y_data[i] = (real_type) (i + 1); + } + } + x.copyDataFrom(x_data.get(), memory::HOST, memspace_); + y.copyDataFrom(y_data.get(), memory::HOST, memspace_); + + handler_.elementwiseMax(&x, &y, memspace_); + + if (memspace_ == memory::DEVICE) + { + y.syncData(memory::HOST); + } + + for (index_type i = 0; i < N; ++i) + { + if (!isEqual(y.getData(memory::HOST)[i], (real_type) (i + 1))) + { + std::cout << "Solution vector element y[" << i << "] = " << y.getData(memory::HOST)[i] + << ", expected: " << (real_type) (i + 1) << "\n"; + status *= false; + break; + } + } + + return status.report(__func__); + } + + TestOutcome abs(index_type N) + { + TestStatus status; + + vector::Vector x(N); + + x.allocate(memspace_); + + auto x_data = std::unique_ptr(new real_type[N]); + for (size_t i = 0; i < static_cast(N); ++i) + { + if (i % 3 == 0) + { + x_data[i] = -(real_type) i; + } + else + { + x_data[i] = (real_type) i; + } + } + x.copyDataFrom(x_data.get(), memory::HOST, memspace_); + + handler_.abs(&x, memspace_); + + if (memspace_ == memory::DEVICE) + { + x.syncData(memory::HOST); + } + + for (index_type i = 0; i < N; ++i) + { + if (!isEqual(x.getData(memory::HOST)[i], (real_type) i)) + { + std::cout << "Solution vector element y[" << i << "] = " << x.getData(memory::HOST)[i] + << ", expected: " << (real_type) i << "\n"; + status *= false; + break; + } + } + + return status.report(__func__); + } + private: ReSolve::VectorHandler& handler_; ReSolve::memory::MemorySpace memspace_{memory::HOST}; diff --git a/tests/unit/vector/runVectorHandlerTests.cpp b/tests/unit/vector/runVectorHandlerTests.cpp index 4d3f57483..933061f70 100644 --- a/tests/unit/vector/runVectorHandlerTests.cpp +++ b/tests/unit/vector/runVectorHandlerTests.cpp @@ -26,6 +26,8 @@ int main(int, char**) result += test.massDot(100, 10); result += test.scale(100); result += test.elementwiseDivide(100); + result += test.elementwiseMax(100); + result += test.abs(100); std::cout << "\n"; } @@ -49,6 +51,9 @@ int main(int, char**) result += test.massDot(1000, 30); result += test.amax(1000); result += test.scale(1000); + result += test.elementwiseDivide(1000); + result += test.elementwiseMax(1000); + result += test.abs(1000); std::cout << "\n"; } @@ -73,6 +78,9 @@ int main(int, char**) result += test.massDot(1000, 30); result += test.amax(1000); result += test.scale(1000); + result += test.elementwiseDivide(1000); + result += test.elementwiseMax(1000); + result += test.abs(1000); std::cout << "\n"; } From 910c90df3c94cce0e230fc8ba679c4e898b175aa Mon Sep 17 00:00:00 2001 From: Alexander Novotny Date: Tue, 24 Feb 2026 14:58:14 -0500 Subject: [PATCH 04/14] Update CHANGELOG.md --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 08597278c..5eadc27d7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,7 +18,7 @@ - Made Vector::copyDataTo able to copy from device to host and vice versa -- Added `elementwiseDivide` vector operation. +- Added `elementwiseDivide`, `elementwiseMax`, and `abs` vector operations. ## Changes to Re::Solve in release 0.99.2 From d364d79afbcd0ef32023cfb2a0f7659a9d692f92 Mon Sep 17 00:00:00 2001 From: Alexander Novotny Date: Tue, 3 Mar 2026 17:23:00 -0500 Subject: [PATCH 05/14] Update new vector copy method name --- tests/unit/vector/VectorHandlerTests.hpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/unit/vector/VectorHandlerTests.hpp b/tests/unit/vector/VectorHandlerTests.hpp index 75b3e1a0d..290f05a7a 100644 --- a/tests/unit/vector/VectorHandlerTests.hpp +++ b/tests/unit/vector/VectorHandlerTests.hpp @@ -299,7 +299,7 @@ namespace ReSolve { divisor_data[i] = (real_type) (i + 1); } - divisor.copyDataFrom(divisor_data.get(), memory::HOST, memspace_); + divisor.copyFromExternal(divisor_data.get(), memory::HOST, memspace_); handler_.elementwiseDivide(&divisor, &vec, memspace_); @@ -347,8 +347,8 @@ namespace ReSolve y_data[i] = (real_type) (i + 1); } } - x.copyDataFrom(x_data.get(), memory::HOST, memspace_); - y.copyDataFrom(y_data.get(), memory::HOST, memspace_); + x.copyFromExternal(x_data.get(), memory::HOST, memspace_); + y.copyFromExternal(y_data.get(), memory::HOST, memspace_); handler_.elementwiseMax(&x, &y, memspace_); @@ -391,7 +391,7 @@ namespace ReSolve x_data[i] = (real_type) i; } } - x.copyDataFrom(x_data.get(), memory::HOST, memspace_); + x.copyFromExternal(x_data.get(), memory::HOST, memspace_); handler_.abs(&x, memspace_); From a97ce85f264ad289d35a6e23241240e98fabb5a0 Mon Sep 17 00:00:00 2001 From: Alexander Novotny Date: Tue, 3 Mar 2026 17:23:57 -0500 Subject: [PATCH 06/14] Update new method names --- CHANGELOG.md | 2 +- resolve/cuda/cudaVectorKernels.cu | 28 ++++++++++----------- resolve/cuda/cudaVectorKernels.h | 4 +-- resolve/hip/hipVectorKernels.h | 4 +-- resolve/hip/hipVectorKernels.hip | 12 ++++----- resolve/vector/VectorHandler.cpp | 12 ++++----- resolve/vector/VectorHandler.hpp | 4 +-- resolve/vector/VectorHandlerCpu.cpp | 4 +-- resolve/vector/VectorHandlerCpu.hpp | 4 +-- resolve/vector/VectorHandlerCuda.cpp | 8 +++--- resolve/vector/VectorHandlerCuda.hpp | 8 +++--- resolve/vector/VectorHandlerHip.cpp | 8 +++--- resolve/vector/VectorHandlerHip.hpp | 8 +++--- resolve/vector/VectorHandlerImpl.hpp | 4 +-- tests/unit/vector/VectorHandlerTests.hpp | 8 +++--- tests/unit/vector/runVectorHandlerTests.cpp | 12 ++++----- 16 files changed, 65 insertions(+), 65 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5eadc27d7..eb5cbbe92 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,7 +18,7 @@ - Made Vector::copyDataTo able to copy from device to host and vice versa -- Added `elementwiseDivide`, `elementwiseMax`, and `abs` vector operations. +- Added `scaleInv`, `max`, and `abs` vector operations. ## Changes to Re::Solve in release 0.99.2 diff --git a/resolve/cuda/cudaVectorKernels.cu b/resolve/cuda/cudaVectorKernels.cu index c604d7e75..ad68ad767 100644 --- a/resolve/cuda/cudaVectorKernels.cu +++ b/resolve/cuda/cudaVectorKernels.cu @@ -90,9 +90,9 @@ namespace ReSolve * * @todo Decide how to allow user to configure grid and block sizes. */ - __global__ void elementwiseDivide(index_type n, - const real_type* d_val, - real_type* vec) + __global__ void scaleInv(index_type n, + const real_type* d_val, + real_type* vec) { // Get the index of the element to be processed index_type idx = blockIdx.x * blockDim.x + threadIdx.x; @@ -114,9 +114,9 @@ namespace ReSolve * * @todo Decide how to allow user to configure grid and block sizes. */ - __global__ void elementwiseMax(index_type n, - const real_type* x, - real_type* y) + __global__ void max(index_type n, + const real_type* x, + real_type* y) { // Get the index of the element to be processed index_type idx = blockIdx.x * blockDim.x + threadIdx.x; @@ -197,15 +197,15 @@ namespace ReSolve * * @todo Decide how to allow user to configure grid and block sizes. */ - void elementwiseDivide(index_type n, - const real_type* divisor, - real_type* vec) + void scaleInv(index_type n, + const real_type* divisor, + real_type* vec) { // Define block size and number of blocks const int block_size = 256; int num_blocks = (n + block_size - 1) / block_size; // Launch the kernel - kernels::elementwiseDivide<<>>(n, divisor, vec); + kernels::scaleInv<<>>(n, divisor, vec); } /** @@ -217,15 +217,15 @@ namespace ReSolve * * @todo Decide how to allow user to configure grid and block sizes. */ - void elementwiseMax(index_type n, - const real_type* x, - real_type* y) + void max(index_type n, + const real_type* x, + real_type* y) { // Define block size and number of blocks const int block_size = 256; int num_blocks = (n + block_size - 1) / block_size; // Launch the kernel - kernels::elementwiseMax<<>>(n, x, y); + kernels::max<<>>(n, x, y); } /** diff --git a/resolve/cuda/cudaVectorKernels.h b/resolve/cuda/cudaVectorKernels.h index fccef3447..bd2f00d6e 100644 --- a/resolve/cuda/cudaVectorKernels.h +++ b/resolve/cuda/cudaVectorKernels.h @@ -18,8 +18,8 @@ namespace ReSolve void setArrayConst(index_type n, real_type val, real_type* arr); void addConst(index_type n, real_type val, real_type* arr); void scale(index_type n, const real_type* diag, real_type* vec); - void elementwiseDivide(index_type n, const real_type* divisor, real_type* vec); - void elementwiseMax(index_type n, const real_type* x, real_type* y); + void scaleInv(index_type n, const real_type* divisor, real_type* vec); + void max(index_type n, const real_type* x, real_type* y); void abs(index_type n, real_type* x); } // namespace cuda } // namespace ReSolve diff --git a/resolve/hip/hipVectorKernels.h b/resolve/hip/hipVectorKernels.h index c7057a9f3..c42569319 100644 --- a/resolve/hip/hipVectorKernels.h +++ b/resolve/hip/hipVectorKernels.h @@ -18,8 +18,8 @@ namespace ReSolve void setArrayConst(index_type n, real_type val, real_type* arr); void addConst(index_type n, real_type val, real_type* arr); void scale(index_type n, const real_type* diag, real_type* vec); - void elementwiseDivide(index_type n, const real_type* divisor, real_type* vec); - void elementwiseMax(index_type n, const real_type* x, real_type* y); + void scaleInv(index_type n, const real_type* divisor, real_type* vec); + void max(index_type n, const real_type* x, real_type* y); void abs(index_type n, real_type* x); } // namespace hip } // namespace ReSolve diff --git a/resolve/hip/hipVectorKernels.hip b/resolve/hip/hipVectorKernels.hip index 51ad35c65..bdcd548c6 100644 --- a/resolve/hip/hipVectorKernels.hip +++ b/resolve/hip/hipVectorKernels.hip @@ -79,7 +79,7 @@ namespace ReSolve { * * @todo Decide how to allow user to configure grid and block sizes. */ - __global__ void elementwiseDivide(index_type n, + __global__ void scaleInv(index_type n, const real_type* d_val, real_type* vec) { @@ -103,7 +103,7 @@ namespace ReSolve { * * @todo Decide how to allow user to configure grid and block sizes. */ - __global__ void elementwiseMax(index_type n, + __global__ void max(index_type n, const real_type* x, real_type* y) { @@ -186,7 +186,7 @@ namespace ReSolve { * * @todo Decide how to allow user to configure grid and block sizes. */ - void elementwiseDivide(index_type n, + void scaleInv(index_type n, const real_type* divisor, real_type* vec) { @@ -194,7 +194,7 @@ namespace ReSolve { const int block_size = 256; int num_blocks = (n + block_size - 1) / block_size; // Launch the kernel - kernels::elementwiseDivide<<>>(n, divisor, vec); + kernels::scaleInv<<>>(n, divisor, vec); } /** @@ -206,7 +206,7 @@ namespace ReSolve { * * @todo Decide how to allow user to configure grid and block sizes. */ - void elementwiseMax(index_type n, + void max(index_type n, const real_type* x, real_type* y) { @@ -214,7 +214,7 @@ namespace ReSolve { const int block_size = 256; int num_blocks = (n + block_size - 1) / block_size; // Launch the kernel - kernels::elementwiseMax<<>>(n, x, y); + kernels::max<<>>(n, x, y); } /** diff --git a/resolve/vector/VectorHandler.cpp b/resolve/vector/VectorHandler.cpp index de9dc9963..af81bd4ee 100644 --- a/resolve/vector/VectorHandler.cpp +++ b/resolve/vector/VectorHandler.cpp @@ -352,7 +352,7 @@ namespace ReSolve * * @return 0 if successful, 1 otherwise */ - int VectorHandler::elementwiseDivide(vector::Vector* divisor, vector::Vector* vec, memory::MemorySpace memspace) + int VectorHandler::scaleInv(vector::Vector* divisor, vector::Vector* vec, memory::MemorySpace memspace) { assert(divisor->getSize() == vec->getSize() && "Diagonal vector must be of the same size as the vector."); assert(divisor->getData(memspace) != nullptr && "Diagonal vector data is null!\n"); @@ -361,10 +361,10 @@ namespace ReSolve switch (memspace) { case HOST: - return cpuImpl_->elementwiseDivide(divisor, vec); + return cpuImpl_->scaleInv(divisor, vec); break; case DEVICE: - return devImpl_->elementwiseDivide(divisor, vec); + return devImpl_->scaleInv(divisor, vec); break; } return 1; @@ -381,7 +381,7 @@ namespace ReSolve * * @return 0 if successful, 1 otherwise */ - int VectorHandler::elementwiseMax(vector::Vector* x, vector::Vector* y, memory::MemorySpace memspace) + int VectorHandler::max(vector::Vector* x, vector::Vector* y, memory::MemorySpace memspace) { assert(x->getSize() == y->getSize() && "Vectors must be the same size."); assert(x->getData(memspace) != nullptr && "Vector x data is null!\n"); @@ -390,10 +390,10 @@ namespace ReSolve switch (memspace) { case HOST: - return cpuImpl_->elementwiseMax(x, y); + return cpuImpl_->max(x, y); break; case DEVICE: - return devImpl_->elementwiseMax(x, y); + return devImpl_->max(x, y); break; } return 1; diff --git a/resolve/vector/VectorHandler.hpp b/resolve/vector/VectorHandler.hpp index e9f85a624..4ed46ad7b 100644 --- a/resolve/vector/VectorHandler.hpp +++ b/resolve/vector/VectorHandler.hpp @@ -67,8 +67,8 @@ namespace ReSolve vector::Vector* x, memory::MemorySpace memspace); - int elementwiseDivide(vector::Vector* divisor, vector::Vector* vec, memory::MemorySpace memspace); - int elementwiseMax(vector::Vector* x, vector::Vector* y, memory::MemorySpace memspace); + int scaleInv(vector::Vector* divisor, vector::Vector* vec, memory::MemorySpace memspace); + int max(vector::Vector* x, vector::Vector* y, memory::MemorySpace memspace); int abs(vector::Vector* x, memory::MemorySpace memspace); diff --git a/resolve/vector/VectorHandlerCpu.cpp b/resolve/vector/VectorHandlerCpu.cpp index 11b31ea47..d9f285dd1 100644 --- a/resolve/vector/VectorHandlerCpu.cpp +++ b/resolve/vector/VectorHandlerCpu.cpp @@ -326,7 +326,7 @@ namespace ReSolve * * @return 0 if successful, 1 otherwise */ - int VectorHandlerCpu::elementwiseDivide(vector::Vector* divisor, vector::Vector* vec) + int VectorHandlerCpu::scaleInv(vector::Vector* divisor, vector::Vector* vec) { real_type* divisor_data = divisor->getData(memory::HOST); real_type* vec_data = vec->getData(memory::HOST); @@ -351,7 +351,7 @@ namespace ReSolve * * @return 0 if successful, 1 otherwise */ - int VectorHandlerCpu::elementwiseMax(vector::Vector* x, vector::Vector* y) + int VectorHandlerCpu::max(vector::Vector* x, vector::Vector* y) { real_type* x_data = x->getData(memory::HOST); real_type* y_data = y->getData(memory::HOST); diff --git a/resolve/vector/VectorHandlerCpu.hpp b/resolve/vector/VectorHandlerCpu.hpp index 51f7f2b63..79714bd8d 100644 --- a/resolve/vector/VectorHandlerCpu.hpp +++ b/resolve/vector/VectorHandlerCpu.hpp @@ -57,9 +57,9 @@ namespace ReSolve virtual void scal(vector::Vector* diag, vector::Vector* vec); - virtual int elementwiseDivide(vector::Vector* divisor, vector::Vector* vec); + virtual int scaleInv(vector::Vector* divisor, vector::Vector* vec); - virtual int elementwiseMax(vector::Vector* x, vector::Vector* y); + virtual int max(vector::Vector* x, vector::Vector* y); virtual int abs(vector::Vector* x); diff --git a/resolve/vector/VectorHandlerCuda.cpp b/resolve/vector/VectorHandlerCuda.cpp index a975df423..040614748 100644 --- a/resolve/vector/VectorHandlerCuda.cpp +++ b/resolve/vector/VectorHandlerCuda.cpp @@ -351,12 +351,12 @@ namespace ReSolve * * @return 0 if successful, 1 otherwise */ - int VectorHandlerCuda::elementwiseDivide(vector::Vector* divisor, vector::Vector* vec) + int VectorHandlerCuda::scaleInv(vector::Vector* divisor, vector::Vector* vec) { real_type* divisor_data = divisor->getData(memory::DEVICE); real_type* vec_data = vec->getData(memory::DEVICE); index_type n = vec->getSize(); - cuda::elementwiseDivide(n, divisor_data, vec_data); + cuda::scaleInv(n, divisor_data, vec_data); vec->setDataUpdated(memory::DEVICE); return 0; } @@ -371,12 +371,12 @@ namespace ReSolve * * @return 0 if successful, 1 otherwise */ - int VectorHandlerCuda::elementwiseMax(vector::Vector* x, vector::Vector* y) + int VectorHandlerCuda::max(vector::Vector* x, vector::Vector* y) { real_type* x_data = x->getData(memory::DEVICE); real_type* y_data = y->getData(memory::DEVICE); index_type n = y->getSize(); - cuda::elementwiseMax(n, x_data, y_data); + cuda::max(n, x_data, y_data); y->setDataUpdated(memory::DEVICE); return 0; } diff --git a/resolve/vector/VectorHandlerCuda.hpp b/resolve/vector/VectorHandlerCuda.hpp index 8b84bc28e..f2793361b 100644 --- a/resolve/vector/VectorHandlerCuda.hpp +++ b/resolve/vector/VectorHandlerCuda.hpp @@ -67,24 +67,24 @@ namespace ReSolve virtual void scal(vector::Vector* diag, vector::Vector* vec); /** - * @brief elementwiseDivide: divides a vector's elements by another's + * @brief scaleInv: divides a vector's elements by another's * * @param[in] divisor vector of size n x 1 * @param[in,out] vec vector of size n x 1 (this is where the result is stored) * * @return 0 if successful, 1 otherwise */ - virtual int elementwiseDivide(vector::Vector* divisor, vector::Vector* vec); + virtual int scaleInv(vector::Vector* divisor, vector::Vector* vec); /** - * @brief elementwiseMax: calculate the element-wise maximum of two vectors + * @brief max: calculate the element-wise maximum of two vectors * * @param[in] x vector of size n x 1 * @param[in,out] y vector of size n x 1 (this is where the result is stored) * * @return 0 if successful, 1 otherwise */ - virtual int elementwiseMax(vector::Vector* x, vector::Vector* y); + virtual int max(vector::Vector* x, vector::Vector* y); /** * @brief abs: calculate the element-wise absolute value of a vector diff --git a/resolve/vector/VectorHandlerHip.cpp b/resolve/vector/VectorHandlerHip.cpp index f6b1fe22a..5ae20c026 100644 --- a/resolve/vector/VectorHandlerHip.cpp +++ b/resolve/vector/VectorHandlerHip.cpp @@ -345,12 +345,12 @@ namespace ReSolve * * @return 0 if successful, 1 otherwise */ - int VectorHandlerHip::elementwiseDivide(vector::Vector* divisor, vector::Vector* vec) + int VectorHandlerHip::scaleInv(vector::Vector* divisor, vector::Vector* vec) { real_type* divisor_data = divisor->getData(memory::DEVICE); real_type* vec_data = vec->getData(memory::DEVICE); index_type n = vec->getSize(); - hip::elementwiseDivide(n, divisor_data, vec_data); + hip::scaleInv(n, divisor_data, vec_data); vec->setDataUpdated(memory::DEVICE); return 0; } @@ -365,12 +365,12 @@ namespace ReSolve * * @return 0 if successful, 1 otherwise */ - int VectorHandlerHip::elementwiseMax(vector::Vector* x, vector::Vector* y) + int VectorHandlerHip::max(vector::Vector* x, vector::Vector* y) { real_type* x_data = x->getData(memory::DEVICE); real_type* y_data = y->getData(memory::DEVICE); index_type n = y->getSize(); - hip::elementwiseMax(n, x_data, y_data); + hip::max(n, x_data, y_data); y->setDataUpdated(memory::DEVICE); return 0; } diff --git a/resolve/vector/VectorHandlerHip.hpp b/resolve/vector/VectorHandlerHip.hpp index 80df606ce..70c6ab8ca 100644 --- a/resolve/vector/VectorHandlerHip.hpp +++ b/resolve/vector/VectorHandlerHip.hpp @@ -67,24 +67,24 @@ namespace ReSolve virtual void scal(vector::Vector* diag, vector::Vector* vec); /** - * @brief elementwiseDivide: divides a vector's elements by another's + * @brief scaleInv: divides a vector's elements by another's * * @param[in] divisor vector of size n x 1 * @param[in,out] vec vector of size n x 1 (this is where the result is stored) * * @return 0 if successful, 1 otherwise */ - virtual int elementwiseDivide(vector::Vector* divisor, vector::Vector* vec); + virtual int scaleInv(vector::Vector* divisor, vector::Vector* vec); /** - * @brief elementwiseMax: calculate the element-wise maximum of two vectors + * @brief max: calculate the element-wise maximum of two vectors * * @param[in] x vector of size n x 1 * @param[in,out] y vector of size n x 1 (this is where the result is stored) * * @return 0 if successful, 1 otherwise */ - virtual int elementwiseMax(vector::Vector* x, vector::Vector* y); + virtual int max(vector::Vector* x, vector::Vector* y); /** * @brief abs: calculate the element-wise absolute value of a vector diff --git a/resolve/vector/VectorHandlerImpl.hpp b/resolve/vector/VectorHandlerImpl.hpp index 12d9df21a..18df3832a 100644 --- a/resolve/vector/VectorHandlerImpl.hpp +++ b/resolve/vector/VectorHandlerImpl.hpp @@ -46,10 +46,10 @@ namespace ReSolve virtual void scal(vector::Vector* diag, vector::Vector* vec) = 0; // Divide the elements of a vector by the elements of another vector - virtual int elementwiseDivide(vector::Vector* divisor, vector::Vector* vec) = 0; + virtual int scaleInv(vector::Vector* divisor, vector::Vector* vec) = 0; // Compute element-wise max of two vectors - virtual int elementwiseMax(vector::Vector* x, vector::Vector* y) = 0; + virtual int max(vector::Vector* x, vector::Vector* y) = 0; // Compute element-wise absolute value of a vector virtual int abs(vector::Vector* x) = 0; diff --git a/tests/unit/vector/VectorHandlerTests.hpp b/tests/unit/vector/VectorHandlerTests.hpp index 290f05a7a..98ad77806 100644 --- a/tests/unit/vector/VectorHandlerTests.hpp +++ b/tests/unit/vector/VectorHandlerTests.hpp @@ -280,7 +280,7 @@ namespace ReSolve return status.report(__func__); } - TestOutcome elementwiseDivide(index_type N) + TestOutcome scaleInv(index_type N) { TestStatus status; @@ -301,7 +301,7 @@ namespace ReSolve } divisor.copyFromExternal(divisor_data.get(), memory::HOST, memspace_); - handler_.elementwiseDivide(&divisor, &vec, memspace_); + handler_.scaleInv(&divisor, &vec, memspace_); if (memspace_ == memory::DEVICE) { @@ -322,7 +322,7 @@ namespace ReSolve return status.report(__func__); } - TestOutcome elementwiseMax(index_type N) + TestOutcome max(index_type N) { TestStatus status; @@ -350,7 +350,7 @@ namespace ReSolve x.copyFromExternal(x_data.get(), memory::HOST, memspace_); y.copyFromExternal(y_data.get(), memory::HOST, memspace_); - handler_.elementwiseMax(&x, &y, memspace_); + handler_.max(&x, &y, memspace_); if (memspace_ == memory::DEVICE) { diff --git a/tests/unit/vector/runVectorHandlerTests.cpp b/tests/unit/vector/runVectorHandlerTests.cpp index 933061f70..8263811de 100644 --- a/tests/unit/vector/runVectorHandlerTests.cpp +++ b/tests/unit/vector/runVectorHandlerTests.cpp @@ -25,8 +25,8 @@ int main(int, char**) result += test.axpyMulti(100, 10); result += test.massDot(100, 10); result += test.scale(100); - result += test.elementwiseDivide(100); - result += test.elementwiseMax(100); + result += test.scaleInv(100); + result += test.max(100); result += test.abs(100); std::cout << "\n"; @@ -51,8 +51,8 @@ int main(int, char**) result += test.massDot(1000, 30); result += test.amax(1000); result += test.scale(1000); - result += test.elementwiseDivide(1000); - result += test.elementwiseMax(1000); + result += test.scaleInv(1000); + result += test.max(1000); result += test.abs(1000); std::cout << "\n"; @@ -78,8 +78,8 @@ int main(int, char**) result += test.massDot(1000, 30); result += test.amax(1000); result += test.scale(1000); - result += test.elementwiseDivide(1000); - result += test.elementwiseMax(1000); + result += test.scaleInv(1000); + result += test.max(1000); result += test.abs(1000); std::cout << "\n"; From c26d2110f3076a83089b55d1f8187ff810b12929 Mon Sep 17 00:00:00 2001 From: Alexander Novotny Date: Wed, 4 Mar 2026 11:38:52 -0500 Subject: [PATCH 07/14] Update abs signature --- resolve/cuda/cudaVectorKernels.cu | 26 +++++++++++++++--------- resolve/cuda/cudaVectorKernels.h | 2 +- resolve/hip/hipVectorKernels.h | 2 +- resolve/hip/hipVectorKernels.hip | 24 +++++++++++++--------- resolve/vector/VectorHandler.cpp | 16 +++++++++------ resolve/vector/VectorHandler.hpp | 2 +- resolve/vector/VectorHandlerCpu.cpp | 11 +++++----- resolve/vector/VectorHandlerCpu.hpp | 2 +- resolve/vector/VectorHandlerCuda.cpp | 14 +++++++------ resolve/vector/VectorHandlerCuda.hpp | 2 +- resolve/vector/VectorHandlerHip.cpp | 14 +++++++------ resolve/vector/VectorHandlerHip.hpp | 2 +- resolve/vector/VectorHandlerImpl.hpp | 2 +- tests/unit/vector/VectorHandlerTests.hpp | 16 +++++++++++++-- 14 files changed, 83 insertions(+), 52 deletions(-) diff --git a/resolve/cuda/cudaVectorKernels.cu b/resolve/cuda/cudaVectorKernels.cu index ad68ad767..43fae95d7 100644 --- a/resolve/cuda/cudaVectorKernels.cu +++ b/resolve/cuda/cudaVectorKernels.cu @@ -12,6 +12,8 @@ #include #include +#include "resolve/Common.hpp" + namespace ReSolve { namespace cuda @@ -132,13 +134,15 @@ namespace ReSolve /** * @brief Computes the element-wise absolute value of a vector. * - * @param[in] n - size of the vector - * @param[in, out] x - Vector values. Changes in place. + * @param[in] n - size of the vector + * @param[in] in - Vector input + * @param[out] out - Vector output * * @todo Decide how to allow user to configure grid and block sizes. */ - __global__ void abs(index_type n, - real_type* x) + __global__ void abs(index_type n, + const real_type* in, + real_type* out) { // Get the index of the element to be processed index_type idx = blockIdx.x * blockDim.x + threadIdx.x; @@ -147,7 +151,7 @@ namespace ReSolve if (idx < n) { // Compute absolute value of element - x[idx] = fabs(x[idx]); + out[idx] = fabs(in[idx]); } } } // namespace kernels @@ -231,19 +235,21 @@ namespace ReSolve /** * @brief Wrapper that computes the element-wise absolute value of a vector. * - * @param[in] n - size of the vector - * @param[in, out] x - Vector values. Changes in place. + * @param[in] n - size of the vector + * @param[in] in - Vector input + * @param[out] out - Vector output * * @todo Decide how to allow user to configure grid and block sizes. */ - void abs(index_type n, - real_type* x) + void abs(index_type n, + const real_type* in, + real_type* out) { // Define block size and number of blocks const int block_size = 256; int num_blocks = (n + block_size - 1) / block_size; // Launch the kernel - kernels::abs<<>>(n, x); + kernels::abs<<>>(n, in, out); } } // namespace cuda } // namespace ReSolve diff --git a/resolve/cuda/cudaVectorKernels.h b/resolve/cuda/cudaVectorKernels.h index bd2f00d6e..bc5928dfd 100644 --- a/resolve/cuda/cudaVectorKernels.h +++ b/resolve/cuda/cudaVectorKernels.h @@ -20,6 +20,6 @@ namespace ReSolve void scale(index_type n, const real_type* diag, real_type* vec); void scaleInv(index_type n, const real_type* divisor, real_type* vec); void max(index_type n, const real_type* x, real_type* y); - void abs(index_type n, real_type* x); + void abs(index_type n, const real_type* in, real_type* out); } // namespace cuda } // namespace ReSolve diff --git a/resolve/hip/hipVectorKernels.h b/resolve/hip/hipVectorKernels.h index c42569319..0db575725 100644 --- a/resolve/hip/hipVectorKernels.h +++ b/resolve/hip/hipVectorKernels.h @@ -20,6 +20,6 @@ namespace ReSolve void scale(index_type n, const real_type* diag, real_type* vec); void scaleInv(index_type n, const real_type* divisor, real_type* vec); void max(index_type n, const real_type* x, real_type* y); - void abs(index_type n, real_type* x); + void abs(index_type n, const real_type* in, real_type* out); } // namespace hip } // namespace ReSolve diff --git a/resolve/hip/hipVectorKernels.hip b/resolve/hip/hipVectorKernels.hip index bdcd548c6..8b0e97c4a 100644 --- a/resolve/hip/hipVectorKernels.hip +++ b/resolve/hip/hipVectorKernels.hip @@ -121,13 +121,15 @@ namespace ReSolve { /** * @brief Computes the element-wise absolute value of a vector. * - * @param[in] n - size of the vector - * @param[in, out] x - Vector values. Changes in place. + * @param[in] n - size of the vector + * @param[in] in - Vector input + * @param[out] out - Vector output * * @todo Decide how to allow user to configure grid and block sizes. */ - __global__ void abs(index_type n, - real_type* x) + __global__ void abs(index_type n, + const real_type* in, + real_type* out) { // Get the index of the element to be processed index_type idx = blockIdx.x * blockDim.x + threadIdx.x; @@ -136,7 +138,7 @@ namespace ReSolve { if (idx < n) { // Compute absolute value of element - x[idx] = fabs(x[idx]); + out[idx] = fabs(in[idx]); } } } // namespace kernels @@ -220,19 +222,21 @@ namespace ReSolve { /** * @brief Wrapper that computes the element-wise absolute value of a vector. * - * @param[in] n - size of the vector - * @param[in, out] x - Vector values. Changes in place. + * @param[in] n - size of the vector + * @param[in] in - Vector input + * @param[out] out - Vector output * * @todo Decide how to allow user to configure grid and block sizes. */ - void abs(index_type n, - real_type* x) + void abs(index_type n, + const real_type* in, + real_type* out) { // Define block size and number of blocks const int block_size = 256; int num_blocks = (n + block_size - 1) / block_size; // Launch the kernel - kernels::abs<<>>(n, x); + kernels::abs<<>>(n, in, out); } } // namespace hip } // namespace ReSolve diff --git a/resolve/vector/VectorHandler.cpp b/resolve/vector/VectorHandler.cpp index af81bd4ee..026783d28 100644 --- a/resolve/vector/VectorHandler.cpp +++ b/resolve/vector/VectorHandler.cpp @@ -402,22 +402,26 @@ namespace ReSolve /** * @brief Computes the element-wise absolute value of a vector. * - * @param[in,out] x - Input and output vector - * @param[in] memspace - Device where the operation is computed + * @param[in] in - Input vector + * @param[out] out - Output vector + * @param[in] memspace - Device where the operation is computed * * @return 0 if successful, 1 otherwise */ - int VectorHandler::abs(vector::Vector* x, memory::MemorySpace memspace) + int VectorHandler::abs(const vector::Vector* in, vector::Vector* out, memory::MemorySpace memspace) { - assert(x->getData(memspace) != nullptr && "Vector data is null!\n"); + assert(in->getData(memspace) != nullptr && "Vector in data is null!"); + assert(out->getData(memspace) != nullptr && "Vector out data is null!"); + assert(in->getSize() == out->getSize() && "Vector sizes do not match!"); + using namespace ReSolve::memory; switch (memspace) { case HOST: - return cpuImpl_->abs(x); + return cpuImpl_->abs(in, out); break; case DEVICE: - return devImpl_->abs(x); + return devImpl_->abs(in, out); break; } return 1; diff --git a/resolve/vector/VectorHandler.hpp b/resolve/vector/VectorHandler.hpp index 4ed46ad7b..7e36b34c6 100644 --- a/resolve/vector/VectorHandler.hpp +++ b/resolve/vector/VectorHandler.hpp @@ -70,7 +70,7 @@ namespace ReSolve int scaleInv(vector::Vector* divisor, vector::Vector* vec, memory::MemorySpace memspace); int max(vector::Vector* x, vector::Vector* y, memory::MemorySpace memspace); - int abs(vector::Vector* x, memory::MemorySpace memspace); + int abs(const vector::Vector* in, vector::Vector* out, memory::MemorySpace memspace); // Vector infinity norm real_type amax(vector::Vector* x, memory::MemorySpace memspace); diff --git a/resolve/vector/VectorHandlerCpu.cpp b/resolve/vector/VectorHandlerCpu.cpp index d9f285dd1..780ba4777 100644 --- a/resolve/vector/VectorHandlerCpu.cpp +++ b/resolve/vector/VectorHandlerCpu.cpp @@ -372,16 +372,17 @@ namespace ReSolve * * @return 0 if successful, 1 otherwise */ - int VectorHandlerCpu::abs(vector::Vector* x) + int VectorHandlerCpu::abs(const vector::Vector* in, vector::Vector* out) { - real_type* x_data = x->getData(memory::HOST); - index_type n = x->getSize(); + const real_type* in_data = in->getData(memory::HOST); + real_type* out_data = out->getData(memory::HOST); + index_type n = in->getSize(); for (index_type i = 0; i < n; ++i) { - x_data[i] = std::abs(x_data[i]); + out_data[i] = std::abs(in_data[i]); } - x->setDataUpdated(memory::HOST); + out->setDataUpdated(memory::HOST); return 0; } diff --git a/resolve/vector/VectorHandlerCpu.hpp b/resolve/vector/VectorHandlerCpu.hpp index 79714bd8d..7748fa4a1 100644 --- a/resolve/vector/VectorHandlerCpu.hpp +++ b/resolve/vector/VectorHandlerCpu.hpp @@ -61,7 +61,7 @@ namespace ReSolve virtual int max(vector::Vector* x, vector::Vector* y); - virtual int abs(vector::Vector* x); + virtual int abs(const vector::Vector* in, vector::Vector* out); private: LinAlgWorkspaceCpu* workspace_; diff --git a/resolve/vector/VectorHandlerCuda.cpp b/resolve/vector/VectorHandlerCuda.cpp index 040614748..e4c9896c7 100644 --- a/resolve/vector/VectorHandlerCuda.cpp +++ b/resolve/vector/VectorHandlerCuda.cpp @@ -384,16 +384,18 @@ namespace ReSolve /** * @brief Calculate element-wise absolute value of a vector in CUDA * - * @param[in, out] x - The vector (result is returned in x) + * @param[in] in - The input vector + * @param[out] out - The output * * @return 0 if successful, 1 otherwise */ - int VectorHandlerCuda::abs(vector::Vector* x) + int VectorHandlerCuda::abs(const vector::Vector* in, vector::Vector* out) { - real_type* x_data = x->getData(memory::DEVICE); - index_type n = x->getSize(); - cuda::abs(n, x_data); - x->setDataUpdated(memory::DEVICE); + const real_type* in_data = in->getData(memory::DEVICE); + real_type* out_data = out->getData(memory::DEVICE); + index_type n = in->getSize(); + cuda::abs(n, in_data, out_data); + out->setDataUpdated(memory::DEVICE); return 0; } diff --git a/resolve/vector/VectorHandlerCuda.hpp b/resolve/vector/VectorHandlerCuda.hpp index f2793361b..97f1229de 100644 --- a/resolve/vector/VectorHandlerCuda.hpp +++ b/resolve/vector/VectorHandlerCuda.hpp @@ -93,7 +93,7 @@ namespace ReSolve * * @return 0 if successful, 1 otherwise */ - virtual int abs(vector::Vector* x); + virtual int abs(const vector::Vector* in, vector::Vector* out); private: MemoryHandler mem_; ///< Device memory manager object diff --git a/resolve/vector/VectorHandlerHip.cpp b/resolve/vector/VectorHandlerHip.cpp index 5ae20c026..f87f9a1c6 100644 --- a/resolve/vector/VectorHandlerHip.cpp +++ b/resolve/vector/VectorHandlerHip.cpp @@ -378,16 +378,18 @@ namespace ReSolve /** * @brief Calculate element-wise absolute value of a vector in HIP * - * @param[in, out] x - The vector (result is returned in x) + * @param[in] in - The input vector + * @param[out] out - The output * * @return 0 if successful, 1 otherwise */ - int VectorHandlerHip::abs(vector::Vector* x) + int VectorHandlerHip::abs(const vector::Vector* in, vector::Vector* out) { - real_type* x_data = x->getData(memory::DEVICE); - index_type n = x->getSize(); - hip::abs(n, x_data); - x->setDataUpdated(memory::DEVICE); + const real_type* in_data = in->getData(memory::DEVICE); + real_type* out_data = out->getData(memory::DEVICE); + index_type n = in->getSize(); + hip::abs(n, in_data, out_data); + out->setDataUpdated(memory::DEVICE); return 0; } diff --git a/resolve/vector/VectorHandlerHip.hpp b/resolve/vector/VectorHandlerHip.hpp index 70c6ab8ca..af5a6b89d 100644 --- a/resolve/vector/VectorHandlerHip.hpp +++ b/resolve/vector/VectorHandlerHip.hpp @@ -93,7 +93,7 @@ namespace ReSolve * * @return 0 if successful, 1 otherwise */ - virtual int abs(vector::Vector* x); + virtual int abs(const vector::Vector* in, vector::Vector* out); private: LinAlgWorkspaceHIP* workspace_; diff --git a/resolve/vector/VectorHandlerImpl.hpp b/resolve/vector/VectorHandlerImpl.hpp index 18df3832a..8ab3de234 100644 --- a/resolve/vector/VectorHandlerImpl.hpp +++ b/resolve/vector/VectorHandlerImpl.hpp @@ -52,7 +52,7 @@ namespace ReSolve virtual int max(vector::Vector* x, vector::Vector* y) = 0; // Compute element-wise absolute value of a vector - virtual int abs(vector::Vector* x) = 0; + virtual int abs(const vector::Vector* in, vector::Vector* out) = 0; /** gemv: * if `transpose = N` (no), `x = beta*x + alpha*V*y`, diff --git a/tests/unit/vector/VectorHandlerTests.hpp b/tests/unit/vector/VectorHandlerTests.hpp index 98ad77806..6780d632a 100644 --- a/tests/unit/vector/VectorHandlerTests.hpp +++ b/tests/unit/vector/VectorHandlerTests.hpp @@ -376,8 +376,10 @@ namespace ReSolve TestStatus status; vector::Vector x(N); + vector::Vector y(N); x.allocate(memspace_); + y.allocate(memspace_); auto x_data = std::unique_ptr(new real_type[N]); for (size_t i = 0; i < static_cast(N); ++i) @@ -393,18 +395,28 @@ namespace ReSolve } x.copyFromExternal(x_data.get(), memory::HOST, memspace_); - handler_.abs(&x, memspace_); + handler_.abs(&x, &y, memspace_); + handler_.abs(&x, &x, memspace_); if (memspace_ == memory::DEVICE) { x.syncData(memory::HOST); + y.syncData(memory::HOST); } for (index_type i = 0; i < N; ++i) { if (!isEqual(x.getData(memory::HOST)[i], (real_type) i)) { - std::cout << "Solution vector element y[" << i << "] = " << x.getData(memory::HOST)[i] + std::cout << "Solution vector element x[" << i << "] = " << x.getData(memory::HOST)[i] + << ", expected: " << (real_type) i << "\n"; + status *= false; + break; + } + + if (!isEqual(y.getData(memory::HOST)[i], (real_type) i)) + { + std::cout << "Solution vector element y[" << i << "] = " << y.getData(memory::HOST)[i] << ", expected: " << (real_type) i << "\n"; status *= false; break; From a321661d20a981cffcaa668fe6a27c83d4749910 Mon Sep 17 00:00:00 2001 From: Alexander Novotny Date: Wed, 4 Mar 2026 13:29:18 -0500 Subject: [PATCH 08/14] Update max signature --- resolve/cuda/cudaVectorKernels.cu | 26 +++++++++++--------- resolve/cuda/cudaVectorKernels.h | 2 +- resolve/hip/hipVectorKernels.h | 2 +- resolve/hip/hipVectorKernels.hip | 30 ++++++++++++++---------- resolve/vector/VectorHandler.cpp | 19 ++++++++------- resolve/vector/VectorHandler.hpp | 2 +- resolve/vector/VectorHandlerCpu.cpp | 20 +++++++++------- resolve/vector/VectorHandlerCpu.hpp | 2 +- resolve/vector/VectorHandlerCuda.cpp | 20 +++++++++------- resolve/vector/VectorHandlerCuda.hpp | 7 +++--- resolve/vector/VectorHandlerHip.cpp | 20 +++++++++------- resolve/vector/VectorHandlerHip.hpp | 7 +++--- resolve/vector/VectorHandlerImpl.hpp | 2 +- tests/unit/vector/VectorHandlerTests.hpp | 13 +++++++++- 14 files changed, 101 insertions(+), 71 deletions(-) diff --git a/resolve/cuda/cudaVectorKernels.cu b/resolve/cuda/cudaVectorKernels.cu index 43fae95d7..af25849a1 100644 --- a/resolve/cuda/cudaVectorKernels.cu +++ b/resolve/cuda/cudaVectorKernels.cu @@ -108,17 +108,19 @@ namespace ReSolve } /** - * @brief Wrapper that computes the element-wise max of two vectors. + * @brief Computes the element-wise max of two vectors. * - * @param[in] n - size of the vectors - * @param[in] x - First vector values - * @param[in, out] y - Second vector values. Changes in place. + * @param[in] n - size of the vectors + * @param[in] x - First vector values + * @param[in] y - Second vector values + * @param[out] out - Output vector values * * @todo Decide how to allow user to configure grid and block sizes. */ __global__ void max(index_type n, const real_type* x, - real_type* y) + const real_type* y, + real_type* out) { // Get the index of the element to be processed index_type idx = blockIdx.x * blockDim.x + threadIdx.x; @@ -127,7 +129,7 @@ namespace ReSolve if (idx < n) { // Compute maximum of elements - y[idx] = max(x[idx], y[idx]); + out[idx] = max(x[idx], y[idx]); } } @@ -215,21 +217,23 @@ namespace ReSolve /** * @brief Wrapper that computes the element-wise max of two vectors. * - * @param[in] n - size of the vectors - * @param[in] x - First vector values - * @param[in, out] y - Second vector values. Changes in place. + * @param[in] n - size of the vectors + * @param[in] x - First vector values + * @param[in] y - Second vector values + * @param[out] out - Output vector values * * @todo Decide how to allow user to configure grid and block sizes. */ void max(index_type n, const real_type* x, - real_type* y) + const real_type* y, + real_type* out) { // Define block size and number of blocks const int block_size = 256; int num_blocks = (n + block_size - 1) / block_size; // Launch the kernel - kernels::max<<>>(n, x, y); + kernels::max<<>>(n, x, y, out); } /** diff --git a/resolve/cuda/cudaVectorKernels.h b/resolve/cuda/cudaVectorKernels.h index bc5928dfd..943b85207 100644 --- a/resolve/cuda/cudaVectorKernels.h +++ b/resolve/cuda/cudaVectorKernels.h @@ -19,7 +19,7 @@ namespace ReSolve void addConst(index_type n, real_type val, real_type* arr); void scale(index_type n, const real_type* diag, real_type* vec); void scaleInv(index_type n, const real_type* divisor, real_type* vec); - void max(index_type n, const real_type* x, real_type* y); + void max(index_type n, const real_type* x, const real_type* y, real_type* out); void abs(index_type n, const real_type* in, real_type* out); } // namespace cuda } // namespace ReSolve diff --git a/resolve/hip/hipVectorKernels.h b/resolve/hip/hipVectorKernels.h index 0db575725..327cb3775 100644 --- a/resolve/hip/hipVectorKernels.h +++ b/resolve/hip/hipVectorKernels.h @@ -19,7 +19,7 @@ namespace ReSolve void addConst(index_type n, real_type val, real_type* arr); void scale(index_type n, const real_type* diag, real_type* vec); void scaleInv(index_type n, const real_type* divisor, real_type* vec); - void max(index_type n, const real_type* x, real_type* y); + void max(index_type n, const real_type* x, const real_type* y, real_type* out); void abs(index_type n, const real_type* in, real_type* out); } // namespace hip } // namespace ReSolve diff --git a/resolve/hip/hipVectorKernels.hip b/resolve/hip/hipVectorKernels.hip index 8b0e97c4a..a3a245511 100644 --- a/resolve/hip/hipVectorKernels.hip +++ b/resolve/hip/hipVectorKernels.hip @@ -95,17 +95,19 @@ namespace ReSolve { } /** - * @brief Wrapper that computes the element-wise max of two vectors. + * @brief Computes the element-wise max of two vectors. * - * @param[in] n - size of the vectors - * @param[in] x - First vector values - * @param[in, out] y - Second vector values. Changes in place. + * @param[in] n - size of the vectors + * @param[in] x - First vector values + * @param[in] y - Second vector values + * @param[out] out - Output vector values * * @todo Decide how to allow user to configure grid and block sizes. */ __global__ void max(index_type n, - const real_type* x, - real_type* y) + const real_type* x, + const real_type* y, + real_type* out) { // Get the index of the element to be processed index_type idx = blockIdx.x * blockDim.x + threadIdx.x; @@ -114,7 +116,7 @@ namespace ReSolve { if (idx < n) { // Compute maximum of elements - y[idx] = max(x[idx], y[idx]); + out[idx] = max(x[idx], y[idx]); } } @@ -202,21 +204,23 @@ namespace ReSolve { /** * @brief Wrapper that computes the element-wise max of two vectors. * - * @param[in] n - size of the vectors - * @param[in] x - First vector values - * @param[in, out] y - Second vector values. Changes in place. + * @param[in] n - size of the vectors + * @param[in] x - First vector values + * @param[in] y - Second vector values + * @param[out] out - Output vector values * * @todo Decide how to allow user to configure grid and block sizes. */ void max(index_type n, - const real_type* x, - real_type* y) + const real_type* x, + const real_type* y, + real_type* out) { // Define block size and number of blocks const int block_size = 256; int num_blocks = (n + block_size - 1) / block_size; // Launch the kernel - kernels::max<<>>(n, x, y); + kernels::max<<>>(n, x, y, out); } /** diff --git a/resolve/vector/VectorHandler.cpp b/resolve/vector/VectorHandler.cpp index 026783d28..e8b1e168b 100644 --- a/resolve/vector/VectorHandler.cpp +++ b/resolve/vector/VectorHandler.cpp @@ -373,27 +373,30 @@ namespace ReSolve /** * @brief Takes the element-wise max between two vectors. * - * @param[in] x - The first vector - * @param[in,out] y - The second vector (result is returned in y) - * @param[in] memspace - Device where the operation is computed + * @param[in] x - The first vector + * @param[in] y - The second vector + * @param[out] out - The output vector + * @param[in] memspace - Device where the operation is computed * * @pre The two vectors must be the same size * * @return 0 if successful, 1 otherwise */ - int VectorHandler::max(vector::Vector* x, vector::Vector* y, memory::MemorySpace memspace) + int VectorHandler::max(const vector::Vector* x, const vector::Vector* y, vector::Vector* out, memory::MemorySpace memspace) { assert(x->getSize() == y->getSize() && "Vectors must be the same size."); - assert(x->getData(memspace) != nullptr && "Vector x data is null!\n"); - assert(y->getData(memspace) != nullptr && "Vector y data is null!\n"); + assert(x->getSize() == out->getSize() && "Vectors must be the same size."); + assert(x->getData(memspace) != nullptr && "Vector x data is null!"); + assert(y->getData(memspace) != nullptr && "Vector y data is null!"); + assert(out->getData(memspace) != nullptr && "Vector out data is null!"); using namespace ReSolve::memory; switch (memspace) { case HOST: - return cpuImpl_->max(x, y); + return cpuImpl_->max(x, y, out); break; case DEVICE: - return devImpl_->max(x, y); + return devImpl_->max(x, y, out); break; } return 1; diff --git a/resolve/vector/VectorHandler.hpp b/resolve/vector/VectorHandler.hpp index 7e36b34c6..b276168e9 100644 --- a/resolve/vector/VectorHandler.hpp +++ b/resolve/vector/VectorHandler.hpp @@ -68,7 +68,7 @@ namespace ReSolve memory::MemorySpace memspace); int scaleInv(vector::Vector* divisor, vector::Vector* vec, memory::MemorySpace memspace); - int max(vector::Vector* x, vector::Vector* y, memory::MemorySpace memspace); + int max(const vector::Vector* x, const vector::Vector* y, vector::Vector* out, memory::MemorySpace memspace); int abs(const vector::Vector* in, vector::Vector* out, memory::MemorySpace memspace); diff --git a/resolve/vector/VectorHandlerCpu.cpp b/resolve/vector/VectorHandlerCpu.cpp index 780ba4777..70f1bb007 100644 --- a/resolve/vector/VectorHandlerCpu.cpp +++ b/resolve/vector/VectorHandlerCpu.cpp @@ -344,24 +344,26 @@ namespace ReSolve * @brief Take the element-wise max of two vectors. * Each element of the output will be the maximum value of the corresponding elements in the input vectors. * - * @param[in] x - First vector - * @param[in,out] y - Second vector (output) + * @param[in] x - First input vector + * @param[in] y - Second input vector + * @param[out] out - Output vector * - * @pre The two vectors must be the same size + * @pre The three vectors must be the same size * * @return 0 if successful, 1 otherwise */ - int VectorHandlerCpu::max(vector::Vector* x, vector::Vector* y) + int VectorHandlerCpu::max(const vector::Vector* x, const vector::Vector* y, vector::Vector* out) { - real_type* x_data = x->getData(memory::HOST); - real_type* y_data = y->getData(memory::HOST); - index_type n = y->getSize(); + const real_type* x_data = x->getData(memory::HOST); + const real_type* y_data = y->getData(memory::HOST); + real_type* out_data = out->getData(memory::HOST); + index_type n = y->getSize(); for (index_type i = 0; i < n; ++i) { - y_data[i] = std::max(x_data[i], y_data[i]); + out_data[i] = std::max(x_data[i], y_data[i]); } - y->setDataUpdated(memory::HOST); + out->setDataUpdated(memory::HOST); return 0; } diff --git a/resolve/vector/VectorHandlerCpu.hpp b/resolve/vector/VectorHandlerCpu.hpp index 7748fa4a1..13dddd9ef 100644 --- a/resolve/vector/VectorHandlerCpu.hpp +++ b/resolve/vector/VectorHandlerCpu.hpp @@ -59,7 +59,7 @@ namespace ReSolve virtual int scaleInv(vector::Vector* divisor, vector::Vector* vec); - virtual int max(vector::Vector* x, vector::Vector* y); + virtual int max(const vector::Vector* x, const vector::Vector* y, vector::Vector* out); virtual int abs(const vector::Vector* in, vector::Vector* out); diff --git a/resolve/vector/VectorHandlerCuda.cpp b/resolve/vector/VectorHandlerCuda.cpp index e4c9896c7..483608ae5 100644 --- a/resolve/vector/VectorHandlerCuda.cpp +++ b/resolve/vector/VectorHandlerCuda.cpp @@ -364,20 +364,22 @@ namespace ReSolve /** * @brief Calculate element-wise maximum between two vectors in CUDA * - * @param[in] x - The first vector - * @param[in, out] y - The second vector (result is returned in y) + * @param[in] x - The first vector + * @param[in] y - The second vector + * @param[out] out - The output vector * - * @pre The two vectors must be the same size + * @pre The three vectors must be the same size * * @return 0 if successful, 1 otherwise */ - int VectorHandlerCuda::max(vector::Vector* x, vector::Vector* y) + int VectorHandlerCuda::max(const vector::Vector* x, const vector::Vector* y, vector::Vector* out) { - real_type* x_data = x->getData(memory::DEVICE); - real_type* y_data = y->getData(memory::DEVICE); - index_type n = y->getSize(); - cuda::max(n, x_data, y_data); - y->setDataUpdated(memory::DEVICE); + real_type* x_data = x->getData(memory::DEVICE); + real_type* y_data = y->getData(memory::DEVICE); + real_type* out_data = out->getData(memory::DEVICE); + index_type n = y->getSize(); + cuda::max(n, x_data, y_data, out_data); + out->setDataUpdated(memory::DEVICE); return 0; } diff --git a/resolve/vector/VectorHandlerCuda.hpp b/resolve/vector/VectorHandlerCuda.hpp index 97f1229de..b674eec37 100644 --- a/resolve/vector/VectorHandlerCuda.hpp +++ b/resolve/vector/VectorHandlerCuda.hpp @@ -79,12 +79,13 @@ namespace ReSolve /** * @brief max: calculate the element-wise maximum of two vectors * - * @param[in] x vector of size n x 1 - * @param[in,out] y vector of size n x 1 (this is where the result is stored) + * @param[in] x vector of size n x 1 + * @param[in] y vector of size n x 1 + * @param[out] out output vector of size n x 1 * * @return 0 if successful, 1 otherwise */ - virtual int max(vector::Vector* x, vector::Vector* y); + virtual int max(const vector::Vector* x, const vector::Vector* y, vector::Vector* out); /** * @brief abs: calculate the element-wise absolute value of a vector diff --git a/resolve/vector/VectorHandlerHip.cpp b/resolve/vector/VectorHandlerHip.cpp index f87f9a1c6..87d6f692c 100644 --- a/resolve/vector/VectorHandlerHip.cpp +++ b/resolve/vector/VectorHandlerHip.cpp @@ -358,20 +358,22 @@ namespace ReSolve /** * @brief Calculate element-wise maximum between two vectors in HIP * - * @param[in] x - The first vector - * @param[in, out] y - The second vector (result is returned in y) + * @param[in] x - The first vector + * @param[in] y - The second vector + * @param[out] out - The output vector * - * @pre The two vectors must be the same size + * @pre The three vectors must be the same size * * @return 0 if successful, 1 otherwise */ - int VectorHandlerHip::max(vector::Vector* x, vector::Vector* y) + int VectorHandlerHip::max(const vector::Vector* x, const vector::Vector* y, vector::Vector* out) { - real_type* x_data = x->getData(memory::DEVICE); - real_type* y_data = y->getData(memory::DEVICE); - index_type n = y->getSize(); - hip::max(n, x_data, y_data); - y->setDataUpdated(memory::DEVICE); + real_type* x_data = x->getData(memory::DEVICE); + real_type* y_data = y->getData(memory::DEVICE); + real_type* out_data = out->getData(memory::DEVICE); + index_type n = y->getSize(); + hip::max(n, x_data, y_data, out_data); + out->setDataUpdated(memory::DEVICE); return 0; } diff --git a/resolve/vector/VectorHandlerHip.hpp b/resolve/vector/VectorHandlerHip.hpp index af5a6b89d..f018a4a00 100644 --- a/resolve/vector/VectorHandlerHip.hpp +++ b/resolve/vector/VectorHandlerHip.hpp @@ -79,12 +79,13 @@ namespace ReSolve /** * @brief max: calculate the element-wise maximum of two vectors * - * @param[in] x vector of size n x 1 - * @param[in,out] y vector of size n x 1 (this is where the result is stored) + * @param[in] x vector of size n x 1 + * @param[in] y vector of size n x 1 + * @param[out] out output vector of size n x 1 * * @return 0 if successful, 1 otherwise */ - virtual int max(vector::Vector* x, vector::Vector* y); + virtual int max(const vector::Vector* x, const vector::Vector* y, vector::Vector* out); /** * @brief abs: calculate the element-wise absolute value of a vector diff --git a/resolve/vector/VectorHandlerImpl.hpp b/resolve/vector/VectorHandlerImpl.hpp index 8ab3de234..575275430 100644 --- a/resolve/vector/VectorHandlerImpl.hpp +++ b/resolve/vector/VectorHandlerImpl.hpp @@ -49,7 +49,7 @@ namespace ReSolve virtual int scaleInv(vector::Vector* divisor, vector::Vector* vec) = 0; // Compute element-wise max of two vectors - virtual int max(vector::Vector* x, vector::Vector* y) = 0; + virtual int max(const vector::Vector* x, const vector::Vector* y, vector::Vector* out) = 0; // Compute element-wise absolute value of a vector virtual int abs(const vector::Vector* in, vector::Vector* out) = 0; diff --git a/tests/unit/vector/VectorHandlerTests.hpp b/tests/unit/vector/VectorHandlerTests.hpp index 6780d632a..ea43a9e9b 100644 --- a/tests/unit/vector/VectorHandlerTests.hpp +++ b/tests/unit/vector/VectorHandlerTests.hpp @@ -328,9 +328,11 @@ namespace ReSolve vector::Vector x(N); vector::Vector y(N); + vector::Vector z(N); x.allocate(memspace_); y.allocate(memspace_); + z.allocate(memspace_); auto x_data = std::unique_ptr(new real_type[N]); auto y_data = std::unique_ptr(new real_type[N]); @@ -350,7 +352,8 @@ namespace ReSolve x.copyFromExternal(x_data.get(), memory::HOST, memspace_); y.copyFromExternal(y_data.get(), memory::HOST, memspace_); - handler_.max(&x, &y, memspace_); + handler_.max(&x, &y, &z, memspace_); + handler_.max(&x, &y, &y, memspace_); if (memspace_ == memory::DEVICE) { @@ -366,6 +369,14 @@ namespace ReSolve status *= false; break; } + + if (!isEqual(z.getData(memory::HOST)[i], (real_type) (i + 1))) + { + std::cout << "Solution vector element z[" << i << "] = " << z.getData(memory::HOST)[i] + << ", expected: " << (real_type) (i + 1) << "\n"; + status *= false; + break; + } } return status.report(__func__); From e268bf2be6d092e64354f6a1ac105b38a91a398e Mon Sep 17 00:00:00 2001 From: Alexander Novotny Date: Wed, 4 Mar 2026 13:35:19 -0500 Subject: [PATCH 09/14] Remove function-internal block sizes --- resolve/cuda/cudaVectorKernels.cu | 14 +++++--------- resolve/hip/hipVectorKernels.hip | 14 +++++--------- 2 files changed, 10 insertions(+), 18 deletions(-) diff --git a/resolve/cuda/cudaVectorKernels.cu b/resolve/cuda/cudaVectorKernels.cu index af25849a1..1f05e0bc0 100644 --- a/resolve/cuda/cudaVectorKernels.cu +++ b/resolve/cuda/cudaVectorKernels.cu @@ -158,6 +158,8 @@ namespace ReSolve } } // namespace kernels + constexpr index_type block_size = 256; + void setArrayConst(index_type n, real_type val, real_type* arr) { index_type num_blocks; @@ -207,9 +209,7 @@ namespace ReSolve const real_type* divisor, real_type* vec) { - // Define block size and number of blocks - const int block_size = 256; - int num_blocks = (n + block_size - 1) / block_size; + int num_blocks = (n + block_size - 1) / block_size; // Launch the kernel kernels::scaleInv<<>>(n, divisor, vec); } @@ -229,9 +229,7 @@ namespace ReSolve const real_type* y, real_type* out) { - // Define block size and number of blocks - const int block_size = 256; - int num_blocks = (n + block_size - 1) / block_size; + int num_blocks = (n + block_size - 1) / block_size; // Launch the kernel kernels::max<<>>(n, x, y, out); } @@ -249,9 +247,7 @@ namespace ReSolve const real_type* in, real_type* out) { - // Define block size and number of blocks - const int block_size = 256; - int num_blocks = (n + block_size - 1) / block_size; + int num_blocks = (n + block_size - 1) / block_size; // Launch the kernel kernels::abs<<>>(n, in, out); } diff --git a/resolve/hip/hipVectorKernels.hip b/resolve/hip/hipVectorKernels.hip index a3a245511..f155b48b7 100644 --- a/resolve/hip/hipVectorKernels.hip +++ b/resolve/hip/hipVectorKernels.hip @@ -145,6 +145,8 @@ namespace ReSolve { } } // namespace kernels + constexpr index_type block_size = 256; + void setArrayConst(index_type n, real_type c, real_type* v) { index_type num_blocks; @@ -194,9 +196,7 @@ namespace ReSolve { const real_type* divisor, real_type* vec) { - // Define block size and number of blocks - const int block_size = 256; - int num_blocks = (n + block_size - 1) / block_size; + int num_blocks = (n + block_size - 1) / block_size; // Launch the kernel kernels::scaleInv<<>>(n, divisor, vec); } @@ -216,9 +216,7 @@ namespace ReSolve { const real_type* y, real_type* out) { - // Define block size and number of blocks - const int block_size = 256; - int num_blocks = (n + block_size - 1) / block_size; + int num_blocks = (n + block_size - 1) / block_size; // Launch the kernel kernels::max<<>>(n, x, y, out); } @@ -236,9 +234,7 @@ namespace ReSolve { const real_type* in, real_type* out) { - // Define block size and number of blocks - const int block_size = 256; - int num_blocks = (n + block_size - 1) / block_size; + int num_blocks = (n + block_size - 1) / block_size; // Launch the kernel kernels::abs<<>>(n, in, out); } From a2d77000276861a16c7748bc552f46a322690610 Mon Sep 17 00:00:00 2001 From: Slaven Peles Date: Thu, 12 Mar 2026 13:47:31 -0400 Subject: [PATCH 10/14] Temporarily remove const from VectorHandler methods. --- resolve/hip/hipVectorKernels.hip | 2 +- resolve/vector/VectorHandler.cpp | 4 ++-- resolve/vector/VectorHandler.hpp | 4 ++-- resolve/vector/VectorHandlerCpu.cpp | 4 ++-- resolve/vector/VectorHandlerCpu.hpp | 4 ++-- resolve/vector/VectorHandlerCuda.cpp | 4 ++-- resolve/vector/VectorHandlerCuda.hpp | 4 ++-- resolve/vector/VectorHandlerHip.cpp | 4 ++-- resolve/vector/VectorHandlerHip.hpp | 4 ++-- resolve/vector/VectorHandlerImpl.hpp | 4 ++-- 10 files changed, 19 insertions(+), 19 deletions(-) diff --git a/resolve/hip/hipVectorKernels.hip b/resolve/hip/hipVectorKernels.hip index f155b48b7..619694b93 100644 --- a/resolve/hip/hipVectorKernels.hip +++ b/resolve/hip/hipVectorKernels.hip @@ -116,7 +116,7 @@ namespace ReSolve { if (idx < n) { // Compute maximum of elements - out[idx] = max(x[idx], y[idx]); + out[idx] = fmax(x[idx], y[idx]); } } diff --git a/resolve/vector/VectorHandler.cpp b/resolve/vector/VectorHandler.cpp index e8b1e168b..3747d4a8f 100644 --- a/resolve/vector/VectorHandler.cpp +++ b/resolve/vector/VectorHandler.cpp @@ -382,7 +382,7 @@ namespace ReSolve * * @return 0 if successful, 1 otherwise */ - int VectorHandler::max(const vector::Vector* x, const vector::Vector* y, vector::Vector* out, memory::MemorySpace memspace) + int VectorHandler::max(/* const */ vector::Vector* x, /* const */ vector::Vector* y, vector::Vector* out, memory::MemorySpace memspace) { assert(x->getSize() == y->getSize() && "Vectors must be the same size."); assert(x->getSize() == out->getSize() && "Vectors must be the same size."); @@ -411,7 +411,7 @@ namespace ReSolve * * @return 0 if successful, 1 otherwise */ - int VectorHandler::abs(const vector::Vector* in, vector::Vector* out, memory::MemorySpace memspace) + int VectorHandler::abs(/* const */ vector::Vector* in, vector::Vector* out, memory::MemorySpace memspace) { assert(in->getData(memspace) != nullptr && "Vector in data is null!"); assert(out->getData(memspace) != nullptr && "Vector out data is null!"); diff --git a/resolve/vector/VectorHandler.hpp b/resolve/vector/VectorHandler.hpp index b276168e9..13f0a3b54 100644 --- a/resolve/vector/VectorHandler.hpp +++ b/resolve/vector/VectorHandler.hpp @@ -68,9 +68,9 @@ namespace ReSolve memory::MemorySpace memspace); int scaleInv(vector::Vector* divisor, vector::Vector* vec, memory::MemorySpace memspace); - int max(const vector::Vector* x, const vector::Vector* y, vector::Vector* out, memory::MemorySpace memspace); + int max(/* const */ vector::Vector* x, /* const */ vector::Vector* y, vector::Vector* out, memory::MemorySpace memspace); - int abs(const vector::Vector* in, vector::Vector* out, memory::MemorySpace memspace); + int abs(/* const */ vector::Vector* in, vector::Vector* out, memory::MemorySpace memspace); // Vector infinity norm real_type amax(vector::Vector* x, memory::MemorySpace memspace); diff --git a/resolve/vector/VectorHandlerCpu.cpp b/resolve/vector/VectorHandlerCpu.cpp index 70f1bb007..ee31a0e7a 100644 --- a/resolve/vector/VectorHandlerCpu.cpp +++ b/resolve/vector/VectorHandlerCpu.cpp @@ -352,7 +352,7 @@ namespace ReSolve * * @return 0 if successful, 1 otherwise */ - int VectorHandlerCpu::max(const vector::Vector* x, const vector::Vector* y, vector::Vector* out) + int VectorHandlerCpu::max(/* const */ vector::Vector* x, /* const */ vector::Vector* y, vector::Vector* out) { const real_type* x_data = x->getData(memory::HOST); const real_type* y_data = y->getData(memory::HOST); @@ -374,7 +374,7 @@ namespace ReSolve * * @return 0 if successful, 1 otherwise */ - int VectorHandlerCpu::abs(const vector::Vector* in, vector::Vector* out) + int VectorHandlerCpu::abs(/* const */ vector::Vector* in, vector::Vector* out) { const real_type* in_data = in->getData(memory::HOST); real_type* out_data = out->getData(memory::HOST); diff --git a/resolve/vector/VectorHandlerCpu.hpp b/resolve/vector/VectorHandlerCpu.hpp index 13dddd9ef..d153bb63a 100644 --- a/resolve/vector/VectorHandlerCpu.hpp +++ b/resolve/vector/VectorHandlerCpu.hpp @@ -59,9 +59,9 @@ namespace ReSolve virtual int scaleInv(vector::Vector* divisor, vector::Vector* vec); - virtual int max(const vector::Vector* x, const vector::Vector* y, vector::Vector* out); + virtual int max(/* const */ vector::Vector* x, /* const */ vector::Vector* y, vector::Vector* out); - virtual int abs(const vector::Vector* in, vector::Vector* out); + virtual int abs(/* const */ vector::Vector* in, vector::Vector* out); private: LinAlgWorkspaceCpu* workspace_; diff --git a/resolve/vector/VectorHandlerCuda.cpp b/resolve/vector/VectorHandlerCuda.cpp index 483608ae5..231216ab6 100644 --- a/resolve/vector/VectorHandlerCuda.cpp +++ b/resolve/vector/VectorHandlerCuda.cpp @@ -372,7 +372,7 @@ namespace ReSolve * * @return 0 if successful, 1 otherwise */ - int VectorHandlerCuda::max(const vector::Vector* x, const vector::Vector* y, vector::Vector* out) + int VectorHandlerCuda::max(/* const */ vector::Vector* x, /* const */ vector::Vector* y, vector::Vector* out) { real_type* x_data = x->getData(memory::DEVICE); real_type* y_data = y->getData(memory::DEVICE); @@ -391,7 +391,7 @@ namespace ReSolve * * @return 0 if successful, 1 otherwise */ - int VectorHandlerCuda::abs(const vector::Vector* in, vector::Vector* out) + int VectorHandlerCuda::abs(/* const */ vector::Vector* in, vector::Vector* out) { const real_type* in_data = in->getData(memory::DEVICE); real_type* out_data = out->getData(memory::DEVICE); diff --git a/resolve/vector/VectorHandlerCuda.hpp b/resolve/vector/VectorHandlerCuda.hpp index b674eec37..6a3ae757c 100644 --- a/resolve/vector/VectorHandlerCuda.hpp +++ b/resolve/vector/VectorHandlerCuda.hpp @@ -85,7 +85,7 @@ namespace ReSolve * * @return 0 if successful, 1 otherwise */ - virtual int max(const vector::Vector* x, const vector::Vector* y, vector::Vector* out); + virtual int max(/* const */ vector::Vector* x, /* const */ vector::Vector* y, vector::Vector* out); /** * @brief abs: calculate the element-wise absolute value of a vector @@ -94,7 +94,7 @@ namespace ReSolve * * @return 0 if successful, 1 otherwise */ - virtual int abs(const vector::Vector* in, vector::Vector* out); + virtual int abs(/* const */ vector::Vector* in, vector::Vector* out); private: MemoryHandler mem_; ///< Device memory manager object diff --git a/resolve/vector/VectorHandlerHip.cpp b/resolve/vector/VectorHandlerHip.cpp index 87d6f692c..deda8aec0 100644 --- a/resolve/vector/VectorHandlerHip.cpp +++ b/resolve/vector/VectorHandlerHip.cpp @@ -366,7 +366,7 @@ namespace ReSolve * * @return 0 if successful, 1 otherwise */ - int VectorHandlerHip::max(const vector::Vector* x, const vector::Vector* y, vector::Vector* out) + int VectorHandlerHip::max(/* const */ vector::Vector* x, /* const */ vector::Vector* y, vector::Vector* out) { real_type* x_data = x->getData(memory::DEVICE); real_type* y_data = y->getData(memory::DEVICE); @@ -385,7 +385,7 @@ namespace ReSolve * * @return 0 if successful, 1 otherwise */ - int VectorHandlerHip::abs(const vector::Vector* in, vector::Vector* out) + int VectorHandlerHip::abs(/* const */ vector::Vector* in, vector::Vector* out) { const real_type* in_data = in->getData(memory::DEVICE); real_type* out_data = out->getData(memory::DEVICE); diff --git a/resolve/vector/VectorHandlerHip.hpp b/resolve/vector/VectorHandlerHip.hpp index f018a4a00..f35da5bf6 100644 --- a/resolve/vector/VectorHandlerHip.hpp +++ b/resolve/vector/VectorHandlerHip.hpp @@ -85,7 +85,7 @@ namespace ReSolve * * @return 0 if successful, 1 otherwise */ - virtual int max(const vector::Vector* x, const vector::Vector* y, vector::Vector* out); + virtual int max(/* const */ vector::Vector* x, /* const */ vector::Vector* y, vector::Vector* out); /** * @brief abs: calculate the element-wise absolute value of a vector @@ -94,7 +94,7 @@ namespace ReSolve * * @return 0 if successful, 1 otherwise */ - virtual int abs(const vector::Vector* in, vector::Vector* out); + virtual int abs(/* const */ vector::Vector* in, vector::Vector* out); private: LinAlgWorkspaceHIP* workspace_; diff --git a/resolve/vector/VectorHandlerImpl.hpp b/resolve/vector/VectorHandlerImpl.hpp index 575275430..13e015259 100644 --- a/resolve/vector/VectorHandlerImpl.hpp +++ b/resolve/vector/VectorHandlerImpl.hpp @@ -49,10 +49,10 @@ namespace ReSolve virtual int scaleInv(vector::Vector* divisor, vector::Vector* vec) = 0; // Compute element-wise max of two vectors - virtual int max(const vector::Vector* x, const vector::Vector* y, vector::Vector* out) = 0; + virtual int max(/* const */ vector::Vector* x, /* const */ vector::Vector* y, vector::Vector* out) = 0; // Compute element-wise absolute value of a vector - virtual int abs(const vector::Vector* in, vector::Vector* out) = 0; + virtual int abs(/* const */ vector::Vector* in, vector::Vector* out) = 0; /** gemv: * if `transpose = N` (no), `x = beta*x + alpha*V*y`, From 9d6d78dba5477dd48a02a8e740ec7b9edf6841e7 Mon Sep 17 00:00:00 2001 From: pelesh Date: Thu, 12 Mar 2026 17:58:13 +0000 Subject: [PATCH 11/14] Fix CUDA bug in VectorHandler. --- resolve/cuda/cudaVectorKernels.cu | 2 +- resolve/hykkt/permutation/Permutation.cpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/resolve/cuda/cudaVectorKernels.cu b/resolve/cuda/cudaVectorKernels.cu index 1f05e0bc0..395f1340d 100644 --- a/resolve/cuda/cudaVectorKernels.cu +++ b/resolve/cuda/cudaVectorKernels.cu @@ -129,7 +129,7 @@ namespace ReSolve if (idx < n) { // Compute maximum of elements - out[idx] = max(x[idx], y[idx]); + out[idx] = fmax(x[idx], y[idx]); } } diff --git a/resolve/hykkt/permutation/Permutation.cpp b/resolve/hykkt/permutation/Permutation.cpp index ecf373461..761f7d000 100644 --- a/resolve/hykkt/permutation/Permutation.cpp +++ b/resolve/hykkt/permutation/Permutation.cpp @@ -247,8 +247,8 @@ namespace ReSolve real_type* old_val, real_type* new_val) { - index_type length; - index_type* apply_perm_; + index_type length{0}; + index_type* apply_perm_{nullptr}; switch (permutation) { case PERM_V: From 465d63f4d2f1356c405f216f73744cdc1221e4da Mon Sep 17 00:00:00 2001 From: Shaked Regev <35384901+shakedregev@users.noreply.github.com> Date: Thu, 12 Mar 2026 15:26:22 -0400 Subject: [PATCH 12/14] Follow floating point conventions --- tests/unit/vector/VectorHandlerTests.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/vector/VectorHandlerTests.hpp b/tests/unit/vector/VectorHandlerTests.hpp index ea43a9e9b..23c2480b3 100644 --- a/tests/unit/vector/VectorHandlerTests.hpp +++ b/tests/unit/vector/VectorHandlerTests.hpp @@ -341,7 +341,7 @@ namespace ReSolve if (i % 3 == 0) { x_data[i] = (real_type) (i + 1); - y_data[i] = (real_type) i * .5; + y_data[i] = (real_type) i * 0.5; } else { From 5cbd6a4e81e1748e1b6c1edd3262815277bcb9f3 Mon Sep 17 00:00:00 2001 From: pelesh Date: Tue, 17 Mar 2026 20:49:22 -0400 Subject: [PATCH 13/14] Rename scaleInv --> diagSolve --- CHANGELOG.md | 2 +- resolve/cuda/cudaVectorKernels.cu | 22 ++++++++++----------- resolve/cuda/cudaVectorKernels.h | 2 +- resolve/hip/hipVectorKernels.h | 2 +- resolve/hip/hipVectorKernels.hip | 20 +++++++++---------- resolve/hykkt/ruiz/CMakeLists.txt | 2 +- resolve/hykkt/spgemm/CMakeLists.txt | 2 +- resolve/vector/VectorHandler.cpp | 14 ++++++------- resolve/vector/VectorHandler.hpp | 2 +- resolve/vector/VectorHandlerCpu.cpp | 10 +++++----- resolve/vector/VectorHandlerCpu.hpp | 2 +- resolve/vector/VectorHandlerCuda.cpp | 10 +++++----- resolve/vector/VectorHandlerCuda.hpp | 7 ++++--- resolve/vector/VectorHandlerHip.cpp | 14 ++++++------- resolve/vector/VectorHandlerHip.hpp | 6 +++--- resolve/vector/VectorHandlerImpl.hpp | 2 +- tests/unit/vector/VectorHandlerTests.hpp | 16 +++++++-------- tests/unit/vector/runVectorHandlerTests.cpp | 6 +++--- 18 files changed, 71 insertions(+), 70 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index eb5cbbe92..936a0f470 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,7 +18,7 @@ - Made Vector::copyDataTo able to copy from device to host and vice versa -- Added `scaleInv`, `max`, and `abs` vector operations. +- Added `diagSolve`, `max`, and `abs` vector operations. ## Changes to Re::Solve in release 0.99.2 diff --git a/resolve/cuda/cudaVectorKernels.cu b/resolve/cuda/cudaVectorKernels.cu index 395f1340d..c8f998e33 100644 --- a/resolve/cuda/cudaVectorKernels.cu +++ b/resolve/cuda/cudaVectorKernels.cu @@ -84,17 +84,17 @@ namespace ReSolve } /** - * @brief Divides a vector's elements by another vector's elements + * @brief Multiplies vector by an inverse of a diagonal matrix. * * @param[in] n - size of the vectors - * @param[in] d_val - divisor values + * @param[in] d_val - diagonal matrix values * @param[in, out] vec - vector to be divided. Changes in place. * * @todo Decide how to allow user to configure grid and block sizes. */ - __global__ void scaleInv(index_type n, - const real_type* d_val, - real_type* vec) + __global__ void diagSolve(index_type n, + const real_type* d_val, + real_type* vec) { // Get the index of the element to be processed index_type idx = blockIdx.x * blockDim.x + threadIdx.x; @@ -102,7 +102,7 @@ namespace ReSolve // Check if the index is within bounds if (idx < n) { - // Divide the vector element by the corresponding divisor value + // Divide the vector element by the corresponding diag value vec[idx] /= d_val[idx]; } } @@ -200,18 +200,18 @@ namespace ReSolve * @brief Wrapper that divides a vector's elements by another vector's elements * * @param[in] n - size of the vectors - * @param[in] divisor - divisor values + * @param[in] diag - diag values * @param[in, out] vec - vector to be divided. Changes in place. * * @todo Decide how to allow user to configure grid and block sizes. */ - void scaleInv(index_type n, - const real_type* divisor, - real_type* vec) + void diagSolve(index_type n, + const real_type* diag, + real_type* vec) { int num_blocks = (n + block_size - 1) / block_size; // Launch the kernel - kernels::scaleInv<<>>(n, divisor, vec); + kernels::diagSolve<<>>(n, diag, vec); } /** diff --git a/resolve/cuda/cudaVectorKernels.h b/resolve/cuda/cudaVectorKernels.h index 943b85207..9bd9fecee 100644 --- a/resolve/cuda/cudaVectorKernels.h +++ b/resolve/cuda/cudaVectorKernels.h @@ -18,7 +18,7 @@ namespace ReSolve void setArrayConst(index_type n, real_type val, real_type* arr); void addConst(index_type n, real_type val, real_type* arr); void scale(index_type n, const real_type* diag, real_type* vec); - void scaleInv(index_type n, const real_type* divisor, real_type* vec); + void diagSolve(index_type n, const real_type* diag, real_type* vec); void max(index_type n, const real_type* x, const real_type* y, real_type* out); void abs(index_type n, const real_type* in, real_type* out); } // namespace cuda diff --git a/resolve/hip/hipVectorKernels.h b/resolve/hip/hipVectorKernels.h index 327cb3775..3e758a952 100644 --- a/resolve/hip/hipVectorKernels.h +++ b/resolve/hip/hipVectorKernels.h @@ -18,7 +18,7 @@ namespace ReSolve void setArrayConst(index_type n, real_type val, real_type* arr); void addConst(index_type n, real_type val, real_type* arr); void scale(index_type n, const real_type* diag, real_type* vec); - void scaleInv(index_type n, const real_type* divisor, real_type* vec); + void diagSolve(index_type n, const real_type* diag, real_type* vec); void max(index_type n, const real_type* x, const real_type* y, real_type* out); void abs(index_type n, const real_type* in, real_type* out); } // namespace hip diff --git a/resolve/hip/hipVectorKernels.hip b/resolve/hip/hipVectorKernels.hip index 619694b93..8242e314d 100644 --- a/resolve/hip/hipVectorKernels.hip +++ b/resolve/hip/hipVectorKernels.hip @@ -71,17 +71,17 @@ namespace ReSolve { } /** - * @brief Divides a vector's elements by another vector's elements + * @brief Multiplies vector by an inverse of a diagonal matrix. * * @param[in] n - size of the vectors - * @param[in] d_val - divisor values + * @param[in] d_val - diagonal matrix values * @param[in, out] vec - vector to be divided. Changes in place. * * @todo Decide how to allow user to configure grid and block sizes. */ - __global__ void scaleInv(index_type n, - const real_type* d_val, - real_type* vec) + __global__ void diagSolve(index_type n, + const real_type* d_val, + real_type* vec) { // Get the index of the element to be processed index_type idx = blockIdx.x * blockDim.x + threadIdx.x; @@ -89,7 +89,7 @@ namespace ReSolve { // Check if the index is within bounds if (idx < n) { - // Divide the vector element by the corresponding divisor value + // Divide the vector element by the corresponding diag value vec[idx] /= d_val[idx]; } } @@ -187,18 +187,18 @@ namespace ReSolve { * @brief Wrapper that divides a vector's elements by another vector's elements * * @param[in] n - size of the vectors - * @param[in] divisor - divisor values + * @param[in] diag - diag values * @param[in, out] vec - vector to be divided. Changes in place. * * @todo Decide how to allow user to configure grid and block sizes. */ - void scaleInv(index_type n, - const real_type* divisor, + void diagSolve(index_type n, + const real_type* diag, real_type* vec) { int num_blocks = (n + block_size - 1) / block_size; // Launch the kernel - kernels::scaleInv<<>>(n, divisor, vec); + kernels::diagSolve<<>>(n, diag, vec); } /** diff --git a/resolve/hykkt/ruiz/CMakeLists.txt b/resolve/hykkt/ruiz/CMakeLists.txt index c4155893b..16e0515f1 100644 --- a/resolve/hykkt/ruiz/CMakeLists.txt +++ b/resolve/hykkt/ruiz/CMakeLists.txt @@ -18,7 +18,7 @@ set(RUIZ_HEADER_INSTALL # Build shared library ReSolve::hykkt add_library(resolve_hykkt_ruiz SHARED ${RUIZ_SRC}) -target_link_libraries(resolve_hykkt_ruiz PRIVATE resolve_logger resolve_vector) +target_link_libraries(resolve_hykkt_ruiz PRIVATE resolve_logger resolve_vector resolve_matrix) # Link to CUDA ReSolve backend if CUDA is support enabled if(RESOLVE_USE_CUDA) diff --git a/resolve/hykkt/spgemm/CMakeLists.txt b/resolve/hykkt/spgemm/CMakeLists.txt index 7b7f213b0..751e2423f 100644 --- a/resolve/hykkt/spgemm/CMakeLists.txt +++ b/resolve/hykkt/spgemm/CMakeLists.txt @@ -20,7 +20,7 @@ set(SPGEMM_ROCM_SRC SpGEMMHip.cpp) add_library(resolve_hykkt_spgemm SHARED ${SPGEMM_SRC}) target_link_libraries( - resolve_hykkt_spgemm PRIVATE resolve_logger ${suitesparse_cholmod} + resolve_hykkt_spgemm PRIVATE resolve_logger ${suitesparse_cholmod} resolve_matrix resolve_vector ) target_include_directories( resolve_hykkt_spgemm PUBLIC ${SUITESPARSE_INCLUDE_DIR} diff --git a/resolve/vector/VectorHandler.cpp b/resolve/vector/VectorHandler.cpp index 3747d4a8f..288e08733 100644 --- a/resolve/vector/VectorHandler.cpp +++ b/resolve/vector/VectorHandler.cpp @@ -342,9 +342,9 @@ namespace ReSolve } /** - * @brief Divide the elements of a vector by the elements of another vector + * @brief Multiplies vector by an inverse of a diagonal matrix. * - * @param[in] divisor - vector divisor + * @param[in] diag - diagonal matrix stored in a vector object * @param[in,out] vec - vector to be divided * @param[in] memspace - Device where the operation is computed * @@ -352,19 +352,19 @@ namespace ReSolve * * @return 0 if successful, 1 otherwise */ - int VectorHandler::scaleInv(vector::Vector* divisor, vector::Vector* vec, memory::MemorySpace memspace) + int VectorHandler::diagSolve(vector::Vector* diag, vector::Vector* vec, memory::MemorySpace memspace) { - assert(divisor->getSize() == vec->getSize() && "Diagonal vector must be of the same size as the vector."); - assert(divisor->getData(memspace) != nullptr && "Diagonal vector data is null!\n"); + assert(diag->getSize() == vec->getSize() && "Diagonal vector must be of the same size as the vector."); + assert(diag->getData(memspace) != nullptr && "Diagonal vector data is null!\n"); assert(vec->getData(memspace) != nullptr && "Vector data is null!\n"); using namespace ReSolve::memory; switch (memspace) { case HOST: - return cpuImpl_->scaleInv(divisor, vec); + return cpuImpl_->diagSolve(diag, vec); break; case DEVICE: - return devImpl_->scaleInv(divisor, vec); + return devImpl_->diagSolve(diag, vec); break; } return 1; diff --git a/resolve/vector/VectorHandler.hpp b/resolve/vector/VectorHandler.hpp index 13f0a3b54..5d971cf34 100644 --- a/resolve/vector/VectorHandler.hpp +++ b/resolve/vector/VectorHandler.hpp @@ -67,7 +67,7 @@ namespace ReSolve vector::Vector* x, memory::MemorySpace memspace); - int scaleInv(vector::Vector* divisor, vector::Vector* vec, memory::MemorySpace memspace); + int diagSolve(vector::Vector* diag, vector::Vector* vec, memory::MemorySpace memspace); int max(/* const */ vector::Vector* x, /* const */ vector::Vector* y, vector::Vector* out, memory::MemorySpace memspace); int abs(/* const */ vector::Vector* in, vector::Vector* out, memory::MemorySpace memspace); diff --git a/resolve/vector/VectorHandlerCpu.cpp b/resolve/vector/VectorHandlerCpu.cpp index ee31a0e7a..d11fe1462 100644 --- a/resolve/vector/VectorHandlerCpu.cpp +++ b/resolve/vector/VectorHandlerCpu.cpp @@ -317,24 +317,24 @@ namespace ReSolve } /** - * @brief Divide the elements of a vector by the elements of another vector + * @brief Multiplies vector by an inverse of a diagonal matrix. * - * @param[in] divisor - vector divisor + * @param[in] diag - diagonal matrix stored in a vector object * @param[in,out] vec - vector to be divided * * @pre The two vectors must be the same size * * @return 0 if successful, 1 otherwise */ - int VectorHandlerCpu::scaleInv(vector::Vector* divisor, vector::Vector* vec) + int VectorHandlerCpu::diagSolve(vector::Vector* diag, vector::Vector* vec) { - real_type* divisor_data = divisor->getData(memory::HOST); + real_type* diag_data = diag->getData(memory::HOST); real_type* vec_data = vec->getData(memory::HOST); index_type n = vec->getSize(); for (index_type i = 0; i < n; ++i) { - vec_data[i] /= divisor_data[i]; + vec_data[i] /= diag_data[i]; } vec->setDataUpdated(memory::HOST); return 0; diff --git a/resolve/vector/VectorHandlerCpu.hpp b/resolve/vector/VectorHandlerCpu.hpp index d153bb63a..18de62880 100644 --- a/resolve/vector/VectorHandlerCpu.hpp +++ b/resolve/vector/VectorHandlerCpu.hpp @@ -57,7 +57,7 @@ namespace ReSolve virtual void scal(vector::Vector* diag, vector::Vector* vec); - virtual int scaleInv(vector::Vector* divisor, vector::Vector* vec); + virtual int diagSolve(vector::Vector* diag, vector::Vector* vec); virtual int max(/* const */ vector::Vector* x, /* const */ vector::Vector* y, vector::Vector* out); diff --git a/resolve/vector/VectorHandlerCuda.cpp b/resolve/vector/VectorHandlerCuda.cpp index 231216ab6..6cdca8916 100644 --- a/resolve/vector/VectorHandlerCuda.cpp +++ b/resolve/vector/VectorHandlerCuda.cpp @@ -340,9 +340,9 @@ namespace ReSolve } /** - * @brief Divide a vector's elements by another vector's elements in CUDA + * @brief Multiplies vector by an inverse of a diagonal matrix. * - * @param[in] divisor - vector of divisors + * @param[in] diag - diagonal matrix stored in a vector object * @param[in, out] vec - vector to be divided * * @pre The diagonal vector must be of the same size as the vector. @@ -351,12 +351,12 @@ namespace ReSolve * * @return 0 if successful, 1 otherwise */ - int VectorHandlerCuda::scaleInv(vector::Vector* divisor, vector::Vector* vec) + int VectorHandlerCuda::diagSolve(vector::Vector* diag, vector::Vector* vec) { - real_type* divisor_data = divisor->getData(memory::DEVICE); + real_type* diag_data = diag->getData(memory::DEVICE); real_type* vec_data = vec->getData(memory::DEVICE); index_type n = vec->getSize(); - cuda::scaleInv(n, divisor_data, vec_data); + cuda::diagSolve(n, diag_data, vec_data); vec->setDataUpdated(memory::DEVICE); return 0; } diff --git a/resolve/vector/VectorHandlerCuda.hpp b/resolve/vector/VectorHandlerCuda.hpp index 6a3ae757c..a6bcd3794 100644 --- a/resolve/vector/VectorHandlerCuda.hpp +++ b/resolve/vector/VectorHandlerCuda.hpp @@ -67,14 +67,15 @@ namespace ReSolve virtual void scal(vector::Vector* diag, vector::Vector* vec); /** - * @brief scaleInv: divides a vector's elements by another's + * @brief Multiplies vector by an inverse of a diagonal matrix. * - * @param[in] divisor vector of size n x 1 + * + * @param[in] diag vector of size n x 1 * @param[in,out] vec vector of size n x 1 (this is where the result is stored) * * @return 0 if successful, 1 otherwise */ - virtual int scaleInv(vector::Vector* divisor, vector::Vector* vec); + virtual int diagSolve(vector::Vector* diag, vector::Vector* vec); /** * @brief max: calculate the element-wise maximum of two vectors diff --git a/resolve/vector/VectorHandlerHip.cpp b/resolve/vector/VectorHandlerHip.cpp index deda8aec0..9eaada472 100644 --- a/resolve/vector/VectorHandlerHip.cpp +++ b/resolve/vector/VectorHandlerHip.cpp @@ -334,9 +334,9 @@ namespace ReSolve } /** - * @brief Divide a vector's elements by another vector's elements in HIP + * @brief Multiplies vector by an inverse of a diagonal matrix. * - * @param[in] divisor - vector of divisors + * @param[in] diag - diagonal matrix stored in a vector object * @param[in, out] vec - vector to be divided * * @pre The diagonal vector must be of the same size as the vector. @@ -345,12 +345,12 @@ namespace ReSolve * * @return 0 if successful, 1 otherwise */ - int VectorHandlerHip::scaleInv(vector::Vector* divisor, vector::Vector* vec) + int VectorHandlerHip::diagSolve(vector::Vector* diag, vector::Vector* vec) { - real_type* divisor_data = divisor->getData(memory::DEVICE); - real_type* vec_data = vec->getData(memory::DEVICE); - index_type n = vec->getSize(); - hip::scaleInv(n, divisor_data, vec_data); + real_type* diag_data = diag->getData(memory::DEVICE); + real_type* vec_data = vec->getData(memory::DEVICE); + index_type n = vec->getSize(); + hip::diagSolve(n, diag_data, vec_data); vec->setDataUpdated(memory::DEVICE); return 0; } diff --git a/resolve/vector/VectorHandlerHip.hpp b/resolve/vector/VectorHandlerHip.hpp index f35da5bf6..a13eb9aea 100644 --- a/resolve/vector/VectorHandlerHip.hpp +++ b/resolve/vector/VectorHandlerHip.hpp @@ -67,14 +67,14 @@ namespace ReSolve virtual void scal(vector::Vector* diag, vector::Vector* vec); /** - * @brief scaleInv: divides a vector's elements by another's + * @brief Multiplies vector by an inverse of a diagonal matrix. * - * @param[in] divisor vector of size n x 1 + * @param[in] diag vector of size n x 1 * @param[in,out] vec vector of size n x 1 (this is where the result is stored) * * @return 0 if successful, 1 otherwise */ - virtual int scaleInv(vector::Vector* divisor, vector::Vector* vec); + virtual int diagSolve(vector::Vector* diag, vector::Vector* vec); /** * @brief max: calculate the element-wise maximum of two vectors diff --git a/resolve/vector/VectorHandlerImpl.hpp b/resolve/vector/VectorHandlerImpl.hpp index 13e015259..8d49704cd 100644 --- a/resolve/vector/VectorHandlerImpl.hpp +++ b/resolve/vector/VectorHandlerImpl.hpp @@ -46,7 +46,7 @@ namespace ReSolve virtual void scal(vector::Vector* diag, vector::Vector* vec) = 0; // Divide the elements of a vector by the elements of another vector - virtual int scaleInv(vector::Vector* divisor, vector::Vector* vec) = 0; + virtual int diagSolve(vector::Vector* diag, vector::Vector* vec) = 0; // Compute element-wise max of two vectors virtual int max(/* const */ vector::Vector* x, /* const */ vector::Vector* y, vector::Vector* out) = 0; diff --git a/tests/unit/vector/VectorHandlerTests.hpp b/tests/unit/vector/VectorHandlerTests.hpp index 23c2480b3..4f0ef8a8d 100644 --- a/tests/unit/vector/VectorHandlerTests.hpp +++ b/tests/unit/vector/VectorHandlerTests.hpp @@ -280,28 +280,28 @@ namespace ReSolve return status.report(__func__); } - TestOutcome scaleInv(index_type N) + TestOutcome diagSolve(index_type N) { TestStatus status; - vector::Vector divisor(N); + vector::Vector diag(N); vector::Vector vec(N); - // divisor[i] = i + 1, vec[i] = 3.0 + // diag[i] = i + 1, vec[i] = 3.0 // expected result vec[i] = 3.0 / (i + 1) - divisor.allocate(memspace_); + diag.allocate(memspace_); vec.allocate(memspace_); vec.setToConst(3.0, memspace_); - auto divisor_data = std::unique_ptr(new real_type[N]); + auto diag_data = std::unique_ptr(new real_type[N]); for (size_t i = 0; i < static_cast(N); ++i) { - divisor_data[i] = (real_type) (i + 1); + diag_data[i] = (real_type) (i + 1); } - divisor.copyFromExternal(divisor_data.get(), memory::HOST, memspace_); + diag.copyFromExternal(diag_data.get(), memory::HOST, memspace_); - handler_.scaleInv(&divisor, &vec, memspace_); + handler_.diagSolve(&diag, &vec, memspace_); if (memspace_ == memory::DEVICE) { diff --git a/tests/unit/vector/runVectorHandlerTests.cpp b/tests/unit/vector/runVectorHandlerTests.cpp index 8263811de..42396acae 100644 --- a/tests/unit/vector/runVectorHandlerTests.cpp +++ b/tests/unit/vector/runVectorHandlerTests.cpp @@ -25,7 +25,7 @@ int main(int, char**) result += test.axpyMulti(100, 10); result += test.massDot(100, 10); result += test.scale(100); - result += test.scaleInv(100); + result += test.diagSolve(100); result += test.max(100); result += test.abs(100); @@ -51,7 +51,7 @@ int main(int, char**) result += test.massDot(1000, 30); result += test.amax(1000); result += test.scale(1000); - result += test.scaleInv(1000); + result += test.diagSolve(1000); result += test.max(1000); result += test.abs(1000); @@ -78,7 +78,7 @@ int main(int, char**) result += test.massDot(1000, 30); result += test.amax(1000); result += test.scale(1000); - result += test.scaleInv(1000); + result += test.diagSolve(1000); result += test.max(1000); result += test.abs(1000); From 2a86c4bed17cee73204148cd9ebded8a944be87b Mon Sep 17 00:00:00 2001 From: pelesh Date: Wed, 18 Mar 2026 00:50:08 +0000 Subject: [PATCH 14/14] Apply pre-commmit fixes --- resolve/hykkt/ruiz/CMakeLists.txt | 4 +++- resolve/hykkt/spgemm/CMakeLists.txt | 3 ++- resolve/vector/VectorHandlerCpu.cpp | 4 ++-- resolve/vector/VectorHandlerCuda.cpp | 4 ++-- 4 files changed, 9 insertions(+), 6 deletions(-) diff --git a/resolve/hykkt/ruiz/CMakeLists.txt b/resolve/hykkt/ruiz/CMakeLists.txt index 16e0515f1..4c95ff02c 100644 --- a/resolve/hykkt/ruiz/CMakeLists.txt +++ b/resolve/hykkt/ruiz/CMakeLists.txt @@ -18,7 +18,9 @@ set(RUIZ_HEADER_INSTALL # Build shared library ReSolve::hykkt add_library(resolve_hykkt_ruiz SHARED ${RUIZ_SRC}) -target_link_libraries(resolve_hykkt_ruiz PRIVATE resolve_logger resolve_vector resolve_matrix) +target_link_libraries( + resolve_hykkt_ruiz PRIVATE resolve_logger resolve_vector resolve_matrix +) # Link to CUDA ReSolve backend if CUDA is support enabled if(RESOLVE_USE_CUDA) diff --git a/resolve/hykkt/spgemm/CMakeLists.txt b/resolve/hykkt/spgemm/CMakeLists.txt index 751e2423f..188c65b50 100644 --- a/resolve/hykkt/spgemm/CMakeLists.txt +++ b/resolve/hykkt/spgemm/CMakeLists.txt @@ -20,7 +20,8 @@ set(SPGEMM_ROCM_SRC SpGEMMHip.cpp) add_library(resolve_hykkt_spgemm SHARED ${SPGEMM_SRC}) target_link_libraries( - resolve_hykkt_spgemm PRIVATE resolve_logger ${suitesparse_cholmod} resolve_matrix resolve_vector + resolve_hykkt_spgemm PRIVATE resolve_logger ${suitesparse_cholmod} + resolve_matrix resolve_vector ) target_include_directories( resolve_hykkt_spgemm PUBLIC ${SUITESPARSE_INCLUDE_DIR} diff --git a/resolve/vector/VectorHandlerCpu.cpp b/resolve/vector/VectorHandlerCpu.cpp index d11fe1462..a28affddd 100644 --- a/resolve/vector/VectorHandlerCpu.cpp +++ b/resolve/vector/VectorHandlerCpu.cpp @@ -329,8 +329,8 @@ namespace ReSolve int VectorHandlerCpu::diagSolve(vector::Vector* diag, vector::Vector* vec) { real_type* diag_data = diag->getData(memory::HOST); - real_type* vec_data = vec->getData(memory::HOST); - index_type n = vec->getSize(); + real_type* vec_data = vec->getData(memory::HOST); + index_type n = vec->getSize(); for (index_type i = 0; i < n; ++i) { diff --git a/resolve/vector/VectorHandlerCuda.cpp b/resolve/vector/VectorHandlerCuda.cpp index 6cdca8916..f98e1fbdc 100644 --- a/resolve/vector/VectorHandlerCuda.cpp +++ b/resolve/vector/VectorHandlerCuda.cpp @@ -354,8 +354,8 @@ namespace ReSolve int VectorHandlerCuda::diagSolve(vector::Vector* diag, vector::Vector* vec) { real_type* diag_data = diag->getData(memory::DEVICE); - real_type* vec_data = vec->getData(memory::DEVICE); - index_type n = vec->getSize(); + real_type* vec_data = vec->getData(memory::DEVICE); + index_type n = vec->getSize(); cuda::diagSolve(n, diag_data, vec_data); vec->setDataUpdated(memory::DEVICE); return 0;