From ff8769a22a608a9e21e00ecb804a6a39550d536f Mon Sep 17 00:00:00 2001 From: Nicholson Koukpaizan Date: Thu, 24 Apr 2025 15:58:18 -0400 Subject: [PATCH 01/15] .clang-format and .pre-commit to be on par with GridKit. --- .clang-format | 72 +++++++++++++++++++++++++++++++ .github/workflows/.clang-format | 9 ---- .github/workflows/pre_commit.yaml | 22 ++++++++++ .pre-commit-config.yaml | 12 ++++++ 4 files changed, 106 insertions(+), 9 deletions(-) create mode 100644 .clang-format delete mode 100644 .github/workflows/.clang-format create mode 100644 .github/workflows/pre_commit.yaml create mode 100644 .pre-commit-config.yaml diff --git a/.clang-format b/.clang-format new file mode 100644 index 000000000..2fcec4c5f --- /dev/null +++ b/.clang-format @@ -0,0 +1,72 @@ +--- +Language: Cpp +BasedOnStyle: Microsoft + +# Indentation +IndentWidth: 2 # 2 spaces per indent +AccessModifierOffset: -2 +IndentAccessModifiers: false # Align access modifiers to braces +NamespaceIndentation: All # Indent namspace contents +ConstructorInitializerIndentWidth: 2 + +# Comments which match this regex will be unformatted (and therefore can be longer or have more whitespace than other comments) +CommentPragmas: '^\*\*' + + +# Alignment +AlignConsecutiveAssignments: + Enabled: true + AcrossEmptyLines: false + AcrossComments: true + AlignCompound: true + PadOperators: true +AlignConsecutiveBitFields: + Enabled: true + AcrossEmptyLines: false + AcrossComments: true +AlignConsecutiveDeclarations: + Enabled: true + AcrossEmptyLines: false + AcrossComments: true + # For future versions of clang-format + # AlignFunctionDeclarations: false + # AlignFunctionPointers: false +AlignConsecutiveMacros: + Enabled: true + AcrossEmptyLines: false + AcrossComments: true + +# Newlines +ColumnLimit: 0 +BreakBeforeBraces: Allman # Braces on their own lines +SeparateDefinitionBlocks: Always # Separate definitions (functions etc.) with an empty line +AlwaysBreakTemplateDeclarations: true # Put template on their own lines +AllowShortBlocksOnASingleLine: Never +# On a newer version of clang-format, replace with BinPackArguments: OnePerLine +BinPackArguments: false # Don't allow multiple function arguments on the same line unless they all fit +BinPackParameters: false # Same but for parameters +PackConstructorInitializers: NextLine +AllowShortFunctionsOnASingleLine: None +BreakBeforeBinaryOperators: NonAssignment # Put binary operators after a line break, rather than before +AllowShortIfStatementsOnASingleLine: Never + +# Spaces +SpaceBeforeParens: ControlStatementsExceptControlMacros +SpaceAfterCStyleCast: true +PointerAlignment: Left + +# Includes +IncludeBlocks: Regroup # Regroup includes based on config +IncludeCategories: + - Regex: '(^"|\.hpp)' # 'local' includes + Priority: 3 + SortPriority: 0 + CaseSensitive: false + - Regex: '\/' # Library includes + Priority: 2 + SortPriority: 0 + CaseSensitive: false + - Regex: '.*' # Everything else + Priority: 1 + SortPriority: 0 + CaseSensitive: false diff --git a/.github/workflows/.clang-format b/.github/workflows/.clang-format deleted file mode 100644 index b8cef3e95..000000000 --- a/.github/workflows/.clang-format +++ /dev/null @@ -1,9 +0,0 @@ -# Documentation about the style options and their meaning -# https://releases.llvm.org/12.0.0/tools/clang/docs/ClangFormatStyleOptions.html - -Language: Cpp -AccessModifierOffset: 0 -AlignConsecutiveMacros: true -BreakBeforeBraces: Linux -ColumnLimit: 150 -ConstructorInitializerIndentWidth: 2 diff --git a/.github/workflows/pre_commit.yaml b/.github/workflows/pre_commit.yaml new file mode 100644 index 000000000..b969fca6f --- /dev/null +++ b/.github/workflows/pre_commit.yaml @@ -0,0 +1,22 @@ +name: resolve-bot pre-commit + +# Won't run on develop/main directly +on: [pull_request] + +jobs: + pre-commit: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + with: + ref: ${{ github.event.pull_request.head.ref }} + - uses: actions/setup-python@v5.4.0 + - uses: pre-commit/action@v3.0.1 + #- uses: EndBug/add-and-commit@v9.1.4 + # # Only need to try and commit if the action failed + # if: failure() + # with: + # fetch: false + # committer_name: GitHub Actions + # committer_email: actions@github.com + # message: Apply pre-commmit fixes diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 000000000..eda93831a --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,12 @@ +repos: + - repo: https://github.com/pre-commit/mirrors-clang-format + rev: v19.1.7 + hooks: + - id: clang-format + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v5.0.0 + hooks: + - id: check-toml + - id: forbid-new-submodules + - id: end-of-file-fixer + - id: check-yaml From 3bbab2c2ce4e5f27e0b9e42c431d3d410788cdc9 Mon Sep 17 00:00:00 2001 From: Slaven Peles Date: Wed, 18 Jun 2025 17:15:58 -0400 Subject: [PATCH 02/15] Apply clang-format --- examples/ExampleHelper.hpp | 651 ++++---- .../r_KLU_GLU_matrix_values_update.cpp | 162 +- .../r_KLU_cusolverrf_redo_factorization.cpp | 160 +- .../r_KLU_rf_FGMRES_reuse_factorization.cpp | 148 +- .../r_KLU_rocsolverrf_redo_factorization.cpp | 170 +- examples/gluRefactor.cpp | 103 +- examples/gpuRefactor.cpp | 136 +- examples/kluFactor.cpp | 103 +- examples/kluRefactor.cpp | 105 +- examples/randGmres.cpp | 76 +- examples/sysRefactor.cpp | 133 +- resolve/Common.hpp | 31 +- resolve/Doxygen.hpp | 34 +- resolve/GramSchmidt.cpp | 521 +++--- resolve/GramSchmidt.hpp | 91 +- resolve/LinSolver.cpp | 21 +- resolve/LinSolver.hpp | 47 +- resolve/LinSolverDirect.cpp | 40 +- resolve/LinSolverDirect.hpp | 56 +- resolve/LinSolverDirectCpuILU0.cpp | 221 +-- resolve/LinSolverDirectCpuILU0.hpp | 82 +- resolve/LinSolverDirectCuSolverGLU.cpp | 222 +-- resolve/LinSolverDirectCuSolverGLU.hpp | 81 +- resolve/LinSolverDirectCuSolverRf.cpp | 287 ++-- resolve/LinSolverDirectCuSolverRf.hpp | 102 +- resolve/LinSolverDirectCuSparseILU0.cpp | 256 +-- resolve/LinSolverDirectCuSparseILU0.hpp | 112 +- resolve/LinSolverDirectKLU.cpp | 199 ++- resolve/LinSolverDirectKLU.hpp | 162 +- resolve/LinSolverDirectLUSOL.cpp | 255 +-- resolve/LinSolverDirectLUSOL.hpp | 262 ++-- resolve/LinSolverDirectRocSolverRf.cpp | 320 ++-- resolve/LinSolverDirectRocSolverRf.hpp | 142 +- resolve/LinSolverDirectRocSparseILU0.cpp | 330 ++-- resolve/LinSolverDirectRocSparseILU0.hpp | 94 +- resolve/LinSolverDirectSerialILU0.cpp | 231 +-- resolve/LinSolverDirectSerialILU0.hpp | 73 +- resolve/LinSolverIterative.cpp | 25 +- resolve/LinSolverIterative.hpp | 53 +- resolve/LinSolverIterativeFGMRES.cpp | 353 +++-- resolve/LinSolverIterativeFGMRES.hpp | 125 +- resolve/LinSolverIterativeRandFGMRES.cpp | 462 +++--- resolve/LinSolverIterativeRandFGMRES.hpp | 194 +-- resolve/MemoryUtils.hpp | 182 ++- resolve/Profiling.hpp | 10 +- resolve/RefactorizationSolver.cpp | 4 +- resolve/RefactorizationSolver.hpp | 48 +- resolve/SystemSolver.cpp | 462 ++++-- resolve/SystemSolver.hpp | 205 ++- resolve/cpu/CpuMemory.hpp | 41 +- resolve/cpu/MemoryUtils.cpp | 33 +- resolve/cuda/CudaMemory.hpp | 30 +- resolve/cuda/cuda_check_errors.hpp | 18 +- resolve/cusolver_defs.hpp | 107 +- resolve/hip/HipMemory.hpp | 30 +- resolve/hip/hip_check_errors.hpp | 18 +- resolve/lusol/lusol.hpp | 17 +- resolve/matrix/Coo.cpp | 506 +++--- resolve/matrix/Coo.hpp | 50 +- resolve/matrix/Csc.cpp | 327 ++-- resolve/matrix/Csc.hpp | 42 +- resolve/matrix/Csr.cpp | 539 ++++--- resolve/matrix/Csr.hpp | 65 +- resolve/matrix/MatrixHandler.cpp | 211 +-- resolve/matrix/MatrixHandler.hpp | 69 +- resolve/matrix/MatrixHandlerCpu.cpp | 172 +- resolve/matrix/MatrixHandlerCpu.hpp | 50 +- resolve/matrix/MatrixHandlerCuda.cpp | 155 +- resolve/matrix/MatrixHandlerCuda.hpp | 50 +- resolve/matrix/MatrixHandlerHip.cpp | 112 +- resolve/matrix/MatrixHandlerHip.hpp | 50 +- resolve/matrix/MatrixHandlerImpl.hpp | 47 +- resolve/matrix/Sparse.cpp | 420 ++--- resolve/matrix/Sparse.hpp | 140 +- resolve/matrix/io.cpp | 530 ++++--- resolve/matrix/io.hpp | 54 +- resolve/random/RandomSketchingCountCpu.cpp | 91 +- resolve/random/RandomSketchingCountCpu.hpp | 47 +- resolve/random/RandomSketchingCountCuda.cpp | 97 +- resolve/random/RandomSketchingCountCuda.hpp | 47 +- resolve/random/RandomSketchingCountHip.cpp | 97 +- resolve/random/RandomSketchingCountHip.hpp | 52 +- resolve/random/RandomSketchingFWHTCpu.cpp | 146 +- resolve/random/RandomSketchingFWHTCpu.hpp | 70 +- resolve/random/RandomSketchingFWHTCuda.cpp | 152 +- resolve/random/RandomSketchingFWHTCuda.hpp | 68 +- resolve/random/RandomSketchingFWHTHip.cpp | 148 +- resolve/random/RandomSketchingFWHTHip.hpp | 72 +- resolve/random/RandomSketchingImpl.hpp | 35 +- resolve/random/SketchingHandler.cpp | 97 +- resolve/random/SketchingHandler.hpp | 34 +- resolve/random/cpuSketchingKernels.cpp | 110 +- resolve/utilities/logger/Logger.cpp | 40 +- resolve/utilities/logger/Logger.hpp | 81 +- resolve/utilities/params/CliOptions.cpp | 40 +- resolve/utilities/params/CliOptions.hpp | 39 +- resolve/utilities/version/version.cpp | 9 +- resolve/utilities/version/version.hpp | 6 +- resolve/vector/Vector.cpp | 1121 ++++++------- resolve/vector/Vector.hpp | 81 +- resolve/vector/VectorHandler.cpp | 249 +-- resolve/vector/VectorHandler.hpp | 103 +- resolve/vector/VectorHandlerCpu.cpp | 250 +-- resolve/vector/VectorHandlerCpu.hpp | 97 +- resolve/vector/VectorHandlerCuda.cpp | 195 +-- resolve/vector/VectorHandlerCuda.hpp | 101 +- resolve/vector/VectorHandlerHip.cpp | 192 +-- resolve/vector/VectorHandlerHip.hpp | 99 +- resolve/vector/VectorHandlerImpl.hpp | 96 +- resolve/workspace/LinAlgWorkspace.hpp | 1 - resolve/workspace/LinAlgWorkspaceCUDA.cpp | 57 +- resolve/workspace/LinAlgWorkspaceCUDA.hpp | 133 +- resolve/workspace/LinAlgWorkspaceCpu.cpp | 7 +- resolve/workspace/LinAlgWorkspaceCpu.hpp | 12 +- resolve/workspace/LinAlgWorkspaceHIP.cpp | 47 +- resolve/workspace/LinAlgWorkspaceHIP.hpp | 118 +- tests/functionality/TestHelper.hpp | 804 +++++----- tests/functionality/testKlu.cpp | 93 +- tests/functionality/testLUSOL.cpp | 106 +- tests/functionality/testRandGmres.cpp | 163 +- tests/functionality/testRefactor.cpp | 127 +- tests/functionality/testSysGLU.cpp | 188 ++- tests/functionality/testSysGmres.cpp | 202 ++- tests/functionality/testSysRefactor.cpp | 111 +- tests/functionality/testVersion.cpp | 20 +- tests/unit/TestBase.hpp | 438 +++--- tests/unit/matrix/LUSOLTests.hpp | 625 ++++---- .../unit/matrix/MatrixFactorizationTests.hpp | 677 ++++---- tests/unit/matrix/MatrixHandlerTests.hpp | 1395 +++++++++-------- tests/unit/matrix/MatrixIoTests.hpp | 1024 ++++++------ tests/unit/matrix/SparseTests.hpp | 665 ++++---- .../matrix/runMatrixFactorizationTests.cpp | 12 +- tests/unit/matrix/runMatrixHandlerTests.cpp | 11 +- tests/unit/matrix/runMatrixIoTests.cpp | 7 +- tests/unit/matrix/runSparseTests.cpp | 9 +- tests/unit/memory/MemoryUtilsTests.hpp | 170 +- tests/unit/memory/runMemoryUtilsTests.cpp | 6 +- tests/unit/params/ParamTests.hpp | 210 +-- tests/unit/params/runParamTests.cpp | 2 +- tests/unit/utilities/logger/LoggerTests.hpp | 368 ++--- .../unit/utilities/logger/runLoggerTests.cpp | 4 +- tests/unit/vector/GramSchmidtTests.hpp | 276 ++-- tests/unit/vector/VectorHandlerTests.hpp | 446 +++--- tests/unit/vector/VectorTests.hpp | 660 ++++---- tests/unit/vector/runGramSchmidtTests.cpp | 10 +- tests/unit/vector/runVectorHandlerTests.cpp | 7 +- tests/unit/vector/runVectorTests.cpp | 19 +- 147 files changed, 13616 insertions(+), 12001 deletions(-) diff --git a/examples/ExampleHelper.hpp b/examples/ExampleHelper.hpp index 19269a372..3dc474d6b 100644 --- a/examples/ExampleHelper.hpp +++ b/examples/ExampleHelper.hpp @@ -1,9 +1,10 @@ #pragma once -#include +#include #include +#include + #include -#include namespace ReSolve { @@ -23,10 +24,10 @@ namespace ReSolve std::cout << "========================================================================================================================\n"; std::cout << std::endl; - std::cout << "Finished reading the matrix and rhs, size: " << A->getNumRows() << " x "<< A->getNumColumns() - << ", nnz: " << A->getNnz() + std::cout << "Finished reading the matrix and rhs, size: " << A->getNumRows() << " x " << A->getNumColumns() + << ", nnz: " << A->getNnz() << ", symmetric? " << A->symmetric() - << ", Expanded? " << A->expanded() << std::endl; + << ", Expanded? " << A->expanded() << std::endl; } /** @@ -41,345 +42,357 @@ namespace ReSolve template class ExampleHelper { - public: - /** - * @brief Default constructor - * - * Initializes matrix and vector handlers. - * - * @param[in,out] workspace - workspace for matrix and vector handlers - * - * @pre Workspace handles are initialized - * - * @post Handlers are instantiated. - * allocated - */ - ExampleHelper(workspace_type& workspace) - : mh_(&workspace), - vh_(&workspace) + public: + /** + * @brief Default constructor + * + * Initializes matrix and vector handlers. + * + * @param[in,out] workspace - workspace for matrix and vector handlers + * + * @pre Workspace handles are initialized + * + * @post Handlers are instantiated. + * allocated + */ + ExampleHelper(workspace_type& workspace) + : mh_(&workspace), + vh_(&workspace) + { + memspace_ = ReSolve::memory::DEVICE; + if (mh_.getIsCudaEnabled()) { - memspace_ = ReSolve::memory::DEVICE; - if (mh_.getIsCudaEnabled()) { - hardware_backend_ = "CUDA"; - } else if (mh_.getIsHipEnabled()) { - hardware_backend_ = "HIP"; - } else { - hardware_backend_ = "CPU"; - memspace_ = ReSolve::memory::HOST; - } + hardware_backend_ = "CUDA"; } - - /** - * @brief Destroy the ExampleHelper object - * - * @post Vectors res_ and x_true_ are deleted. - * - */ - ~ExampleHelper() + else if (mh_.getIsHipEnabled()) { - if (res_) { - delete res_; - res_ = nullptr; - } - if (x_true_) { - delete x_true_; - x_true_ = nullptr; - } + hardware_backend_ = "HIP"; } - - std::string getHardwareBackend() const + else { - return hardware_backend_; + hardware_backend_ = "CPU"; + memspace_ = ReSolve::memory::HOST; } - - /** - * @brief Set the new linear system together with its computed solution - * and compute solution error and residual norms. - * - * This will set the new system A*x = r and compute related error norms. - * - * @param A[in] - Linear system matrix - * @param r[in] - Linear system right-hand side - * @param x[in] - Computed solution of the linear system - */ - void setSystem(ReSolve::matrix::Sparse* A, - ReSolve::vector::Vector* r, - ReSolve::vector::Vector* x) + } + + /** + * @brief Destroy the ExampleHelper object + * + * @post Vectors res_ and x_true_ are deleted. + * + */ + ~ExampleHelper() + { + if (res_) { - assert((res_ == nullptr) && (x_true_ == nullptr)); - A_ = A; - r_ = r; - x_ = x; - res_ = new ReSolve::vector::Vector(A->getNumRows()); - computeNorms(); + delete res_; + res_ = nullptr; } - - /** - * @brief Set the new linear system together with its computed solution - * and compute solution error and residual norms. - * - * This is to be used after values in A and r are updated. - * - * @todo This method probably does not need any input parameters. - * - * @param A[in] - Linear system matrix - * @param r[in] - Linear system right-hand side - * @param x[in] - Computed solution of the linear system - */ - void resetSystem(ReSolve::matrix::Sparse* A, - ReSolve::vector::Vector* r, - ReSolve::vector::Vector* x) + if (x_true_) { - A_ = A; - r_ = r; - x_ = x; - if(res_ == nullptr) { - res_ = new ReSolve::vector::Vector(A->getNumRows()); - } - - computeNorms(); + delete x_true_; + x_true_ = nullptr; } - - /// Return L2 norm of the linear system residual. - ReSolve::real_type getNormResidual() + } + + std::string getHardwareBackend() const + { + return hardware_backend_; + } + + /** + * @brief Set the new linear system together with its computed solution + * and compute solution error and residual norms. + * + * This will set the new system A*x = r and compute related error norms. + * + * @param A[in] - Linear system matrix + * @param r[in] - Linear system right-hand side + * @param x[in] - Computed solution of the linear system + */ + void setSystem(ReSolve::matrix::Sparse* A, + ReSolve::vector::Vector* r, + ReSolve::vector::Vector* x) + { + assert((res_ == nullptr) && (x_true_ == nullptr)); + A_ = A; + r_ = r; + x_ = x; + res_ = new ReSolve::vector::Vector(A->getNumRows()); + computeNorms(); + } + + /** + * @brief Set the new linear system together with its computed solution + * and compute solution error and residual norms. + * + * This is to be used after values in A and r are updated. + * + * @todo This method probably does not need any input parameters. + * + * @param A[in] - Linear system matrix + * @param r[in] - Linear system right-hand side + * @param x[in] - Computed solution of the linear system + */ + void resetSystem(ReSolve::matrix::Sparse* A, + ReSolve::vector::Vector* r, + ReSolve::vector::Vector* x) + { + A_ = A; + r_ = r; + x_ = x; + if (res_ == nullptr) { - return norm_res_; + res_ = new ReSolve::vector::Vector(A->getNumRows()); } - /// Return relative residual norm. - ReSolve::real_type getNormRelativeResidual() + computeNorms(); + } + + /// Return L2 norm of the linear system residual. + ReSolve::real_type getNormResidual() + { + return norm_res_; + } + + /// Return relative residual norm. + ReSolve::real_type getNormRelativeResidual() + { + return norm_res_ / norm_rhs_; + } + + /// Minimalistic summary + void printShortSummary() + { + std::cout << "\t2-Norm of the residual: " + << std::scientific << std::setprecision(16) + << getNormRelativeResidual() << "\n"; + } + + /// Summary of direct solve + void printSummary() + { + std::cout << "\t 2-Norm of the residual (before IR): " + << std::scientific << std::setprecision(16) + << getNormRelativeResidual() << "\n"; + + std::cout << std::scientific << std::setprecision(16) + << "\t Matrix inf norm: " << inf_norm_A_ << "\n" + << "\t Residual inf norm: " << inf_norm_res_ << "\n" + << "\t Solution inf norm: " << inf_norm_x_ << "\n" + << "\t Norm of scaled residuals: " << nsr_norm_ << "\n"; + } + + /// Summary of error norms for an iterative refinement test. + void printIrSummary(ReSolve::LinSolverIterative* ls) + { + std::cout << "FGMRES: init nrm: " + << std::scientific << std::setprecision(16) + << ls->getInitResidualNorm() / norm_rhs_ + << " final nrm: " + << ls->getFinalResidualNorm() / norm_rhs_ + << " iter: " << ls->getNumIter() << "\n"; + } + + /// Summary of error norms for an iterative solver test. + void printIterativeSolverSummary(ReSolve::LinSolverIterative* ls) + { + std::cout << std::setprecision(16) << std::scientific; + std::cout << "\t Initial residual norm ||b-A*x|| : " << ls->getInitResidualNorm() << "\n"; + std::cout << "\t Initial relative residual norm ||b-A*x||/||b|| : " << ls->getInitResidualNorm() / norm_rhs_ << "\n"; + std::cout << "\t Final residual norm ||b-A*x|| : " << ls->getFinalResidualNorm() << "\n"; + std::cout << "\t Final relative residual norm ||b-A*x||/||b|| : " << ls->getFinalResidualNorm() / norm_rhs_ << "\n"; + std::cout << "\t Number of iterations : " << ls->getNumIter() << "\n"; + } + + /// Check the relative residual norm against `tolerance`. + int checkResult(ReSolve::real_type tolerance) + { + int error_sum = 0; + ReSolve::real_type norm = norm_res_ / norm_rhs_; + + if (!std::isfinite(norm)) { - return norm_res_/norm_rhs_; + std::cout << "Result is not a finite number!\n"; + error_sum++; } - - /// Minimalistic summary - void printShortSummary() + if (norm > tolerance) { - std::cout << "\t2-Norm of the residual: " - << std::scientific << std::setprecision(16) - << getNormRelativeResidual() << "\n"; + std::cout << "Result inaccurate!\n"; + error_sum++; } - /// Summary of direct solve - void printSummary() + return error_sum; + } + + /** + * @brief Verify the computation of the norm of scaled residuals. + * + * The norm value is provided as the input. This function computes + * the norm of scaled residuals for the system that has been set + * by the constructor or (re)setSystem functions. + * + * @param nsr_system - norm of scaled residuals value to be verified + * @return int - 0 if the result is correct, error code otherwise + */ + int checkNormOfScaledResiduals(ReSolve::real_type nsr_system) + { + using namespace ReSolve; + int error_sum = 0; + + // Compute residual norm to get updated vector res_ + res_->copyDataFrom(r_, memspace_, memspace_); + norm_res_ = computeResidualNorm(*A_, *x_, *res_, memspace_); + + // Compute norm of scaled residuals + real_type inf_norm_A = 0.0; + mh_.matrixInfNorm(A_, &inf_norm_A, memspace_); + real_type inf_norm_x = vh_.infNorm(x_, memspace_); + real_type inf_norm_res = vh_.infNorm(res_, memspace_); + real_type nsr_norm = inf_norm_res / (inf_norm_A * inf_norm_x); + real_type error = std::abs(nsr_system - nsr_norm) / nsr_norm; + + // Test norm of scaled residuals method in SystemSolver + if (error > 10.0 * std::numeric_limits::epsilon()) { - std::cout << "\t 2-Norm of the residual (before IR): " - << std::scientific << std::setprecision(16) - << getNormRelativeResidual() << "\n"; - + std::cout << "Norm of scaled residuals computation failed:\n"; std::cout << std::scientific << std::setprecision(16) - << "\t Matrix inf norm: " << inf_norm_A_ << "\n" - << "\t Residual inf norm: " << inf_norm_res_ << "\n" - << "\t Solution inf norm: " << inf_norm_x_ << "\n" - << "\t Norm of scaled residuals: " << nsr_norm_ << "\n"; - } - - /// Summary of error norms for an iterative refinement test. - void printIrSummary(ReSolve::LinSolverIterative* ls) - { - std::cout << "FGMRES: init nrm: " - << std::scientific << std::setprecision(16) - << ls->getInitResidualNorm()/norm_rhs_ - << " final nrm: " - << ls->getFinalResidualNorm()/norm_rhs_ - << " iter: " << ls->getNumIter() << "\n"; - } - - /// Summary of error norms for an iterative solver test. - void printIterativeSolverSummary(ReSolve::LinSolverIterative* ls) - { - std::cout << std::setprecision(16) << std::scientific; - std::cout << "\t Initial residual norm ||b-A*x|| : " << ls->getInitResidualNorm() << "\n"; - std::cout << "\t Initial relative residual norm ||b-A*x||/||b|| : " << ls->getInitResidualNorm()/norm_rhs_ << "\n"; - std::cout << "\t Final residual norm ||b-A*x|| : " << ls->getFinalResidualNorm() << "\n"; - std::cout << "\t Final relative residual norm ||b-A*x||/||b|| : " << ls->getFinalResidualNorm()/norm_rhs_ << "\n"; - std::cout << "\t Number of iterations : " << ls->getNumIter() << "\n"; + << "\tMatrix inf norm : " << inf_norm_A << "\n" + << "\tResidual inf norm : " << inf_norm_res << "\n" + << "\tSolution inf norm : " << inf_norm_x << "\n" + << "\tNorm of scaled residuals : " << nsr_norm << "\n" + << "\tNorm of scaled residuals (system): " << nsr_system << "\n\n"; } - - /// Check the relative residual norm against `tolerance`. - int checkResult(ReSolve::real_type tolerance) + return error_sum; + } + + /** + * @brief Verify the computation of the relative residual norm. + * + * The norm value is provided as the input. This function computes + * the relative residual norm for the system that has been set + * by the constructor or (re)setSystem functions. + * + * @param rrn_system - relative residual norm value to be verified + * @return int - 0 if the result is correct, error code otherwise + */ + int checkRelativeResidualNorm(ReSolve::real_type rrn_system) + { + using namespace ReSolve; + int error_sum = 0; + + // Compute residual norm + res_->copyDataFrom(r_, memspace_, memspace_); + norm_res_ = computeResidualNorm(*A_, *x_, *res_, memspace_); + + real_type error = std::abs(norm_rhs_ * rrn_system - norm_res_) / norm_res_; + if (error > 10.0 * std::numeric_limits::epsilon()) { - int error_sum = 0; - ReSolve::real_type norm = norm_res_/norm_rhs_; - - if (!std::isfinite(norm)) { - std::cout << "Result is not a finite number!\n"; - error_sum++; - } - if (norm > tolerance) { - std::cout << "Result inaccurate!\n"; - error_sum++; - } - - return error_sum; - } - - /** - * @brief Verify the computation of the norm of scaled residuals. - * - * The norm value is provided as the input. This function computes - * the norm of scaled residuals for the system that has been set - * by the constructor or (re)setSystem functions. - * - * @param nsr_system - norm of scaled residuals value to be verified - * @return int - 0 if the result is correct, error code otherwise - */ - int checkNormOfScaledResiduals(ReSolve::real_type nsr_system) - { - using namespace ReSolve; - int error_sum = 0; - - // Compute residual norm to get updated vector res_ - res_->copyDataFrom(r_, memspace_, memspace_); - norm_res_ = computeResidualNorm(*A_, *x_, *res_, memspace_); - - // Compute norm of scaled residuals - real_type inf_norm_A = 0.0; - mh_.matrixInfNorm(A_, &inf_norm_A, memspace_); - real_type inf_norm_x = vh_.infNorm(x_, memspace_); - real_type inf_norm_res = vh_.infNorm(res_, memspace_); - real_type nsr_norm = inf_norm_res / (inf_norm_A * inf_norm_x); - real_type error = std::abs(nsr_system - nsr_norm)/nsr_norm; - - // Test norm of scaled residuals method in SystemSolver - if (error > 10.0*std::numeric_limits::epsilon()) - { - std::cout << "Norm of scaled residuals computation failed:\n"; - std::cout << std::scientific << std::setprecision(16) - << "\tMatrix inf norm : " << inf_norm_A << "\n" - << "\tResidual inf norm : " << inf_norm_res << "\n" - << "\tSolution inf norm : " << inf_norm_x << "\n" - << "\tNorm of scaled residuals : " << nsr_norm << "\n" - << "\tNorm of scaled residuals (system): " << nsr_system << "\n\n"; - } - return error_sum; - } - - /** - * @brief Verify the computation of the relative residual norm. - * - * The norm value is provided as the input. This function computes - * the relative residual norm for the system that has been set - * by the constructor or (re)setSystem functions. - * - * @param rrn_system - relative residual norm value to be verified - * @return int - 0 if the result is correct, error code otherwise - */ - int checkRelativeResidualNorm(ReSolve::real_type rrn_system) - { - using namespace ReSolve; - int error_sum = 0; - - // Compute residual norm - res_->copyDataFrom(r_, memspace_, memspace_); - norm_res_ = computeResidualNorm(*A_, *x_, *res_, memspace_); - - real_type error = std::abs(norm_rhs_ * rrn_system - norm_res_)/norm_res_; - if (error > 10.0*std::numeric_limits::epsilon()) { - std::cout << "Relative residual norm computation failed:\n"; - std::cout << std::scientific << std::setprecision(16) - << "\tTest value : " << norm_res_/norm_rhs_ << "\n" - << "\tSystemSolver computed : " << rrn_system << "\n\n"; - error_sum++; - } - return error_sum; - } - - /** - * @brief Verify the computation of the residual norm. - * - * The norm value is provided as the input. This function computes - * the residual norm for the system that has been set by the constructor - * or (re)setSystem functions. - * - * @param rrn_system - residual norm value to be verified - * @return int - 0 if the result is correct, error code otherwise - */ - int checkResidualNorm(ReSolve::real_type rn_system) - { - using namespace ReSolve; - int error_sum = 0; - - // Compute residual norm - res_->copyDataFrom(r_, memspace_, memspace_); - norm_res_ = computeResidualNorm(*A_, *x_, *res_, memspace_); - - real_type error = std::abs(rn_system - norm_res_)/norm_res_; - if (error > 10.0*std::numeric_limits::epsilon()) { - std::cout << "Residual norm computation failed:\n"; - std::cout << std::scientific << std::setprecision(16) - << "\tTest value : " << norm_res_ << "\n" - << "\tSystemSolver computed : " << rn_system << "\n\n"; - error_sum++; - } - return error_sum; - } - - private: - /// Compute error norms. - void computeNorms() - { - // Compute rhs and residual norms - res_->copyDataFrom(r_, memspace_, memspace_); - norm_rhs_ = norm2(*r_, memspace_); - norm_res_ = computeResidualNorm(*A_, *x_, *res_, memspace_); - - // Compute norm of scaled residuals - mh_.matrixInfNorm(A_, &inf_norm_A_, memspace_); - inf_norm_x_ = vh_.infNorm(x_, memspace_); - inf_norm_res_ = vh_.infNorm(res_, memspace_); - nsr_norm_ = inf_norm_res_ / (inf_norm_A_ * inf_norm_x_); - } - - /** - * @brief Computes residual norm = || A * x - r ||_2 - * - * @param[in] A - system matrix - * @param[in] x - computed solution of the system - * @param[in,out] r - system right-hand side, residual vector - * @param[in] memspace memory space where to computate the norm - * @return ReSolve::real_type - * - * @post r is overwritten with residual values - */ - ReSolve::real_type computeResidualNorm(ReSolve::matrix::Sparse& A, - ReSolve::vector::Vector& x, - ReSolve::vector::Vector& r, - ReSolve::memory::MemorySpace memspace) - { - using namespace ReSolve::constants; - mh_.matvec(&A, &x, &r, &ONE, &MINUS_ONE, memspace); // r := A * x - r - return norm2(r, memspace); + std::cout << "Relative residual norm computation failed:\n"; + std::cout << std::scientific << std::setprecision(16) + << "\tTest value : " << norm_res_ / norm_rhs_ << "\n" + << "\tSystemSolver computed : " << rrn_system << "\n\n"; + error_sum++; } - - /// Compute L2 norm of vector `r` in memory space `memspace`. - ReSolve::real_type norm2(ReSolve::vector::Vector& r, - ReSolve::memory::MemorySpace memspace) + return error_sum; + } + + /** + * @brief Verify the computation of the residual norm. + * + * The norm value is provided as the input. This function computes + * the residual norm for the system that has been set by the constructor + * or (re)setSystem functions. + * + * @param rrn_system - residual norm value to be verified + * @return int - 0 if the result is correct, error code otherwise + */ + int checkResidualNorm(ReSolve::real_type rn_system) + { + using namespace ReSolve; + int error_sum = 0; + + // Compute residual norm + res_->copyDataFrom(r_, memspace_, memspace_); + norm_res_ = computeResidualNorm(*A_, *x_, *res_, memspace_); + + real_type error = std::abs(rn_system - norm_res_) / norm_res_; + if (error > 10.0 * std::numeric_limits::epsilon()) { - return std::sqrt(vh_.dot(&r, &r, memspace)); + std::cout << "Residual norm computation failed:\n"; + std::cout << std::scientific << std::setprecision(16) + << "\tTest value : " << norm_res_ << "\n" + << "\tSystemSolver computed : " << rn_system << "\n\n"; + error_sum++; } - - private: - ReSolve::matrix::Sparse* A_; ///< pointer to system matrix - ReSolve::vector::Vector* r_; ///< pointer to system right-hand side - ReSolve::vector::Vector* x_; ///< pointer to the computed solution - - ReSolve::MatrixHandler mh_; ///< matrix handler instance - ReSolve::VectorHandler vh_; ///< vector handler instance - - ReSolve::vector::Vector* res_{nullptr}; ///< pointer to residual vector - ReSolve::vector::Vector* x_true_{nullptr}; ///< pointer to solution error vector - - ReSolve::real_type norm_rhs_{0.0}; ///< right-hand side vector norm - ReSolve::real_type norm_res_{0.0}; ///< residual vector norm - - real_type inf_norm_A_{0.0}; ///< infinity norm of matrix A - real_type inf_norm_x_{0.0}; ///< infinity norm of solution x - real_type inf_norm_res_{0.0}; ///< infinity norm of res = A*x - r - real_type nsr_norm_{0.0}; ///< norm of scaled residuals - - ReSolve::memory::MemorySpace memspace_{ReSolve::memory::HOST}; - std::string hardware_backend_{"NONE"}; + return error_sum; + } + + private: + /// Compute error norms. + void computeNorms() + { + // Compute rhs and residual norms + res_->copyDataFrom(r_, memspace_, memspace_); + norm_rhs_ = norm2(*r_, memspace_); + norm_res_ = computeResidualNorm(*A_, *x_, *res_, memspace_); + + // Compute norm of scaled residuals + mh_.matrixInfNorm(A_, &inf_norm_A_, memspace_); + inf_norm_x_ = vh_.infNorm(x_, memspace_); + inf_norm_res_ = vh_.infNorm(res_, memspace_); + nsr_norm_ = inf_norm_res_ / (inf_norm_A_ * inf_norm_x_); + } + + /** + * @brief Computes residual norm = || A * x - r ||_2 + * + * @param[in] A - system matrix + * @param[in] x - computed solution of the system + * @param[in,out] r - system right-hand side, residual vector + * @param[in] memspace memory space where to computate the norm + * @return ReSolve::real_type + * + * @post r is overwritten with residual values + */ + ReSolve::real_type computeResidualNorm(ReSolve::matrix::Sparse& A, + ReSolve::vector::Vector& x, + ReSolve::vector::Vector& r, + ReSolve::memory::MemorySpace memspace) + { + using namespace ReSolve::constants; + mh_.matvec(&A, &x, &r, &ONE, &MINUS_ONE, memspace); // r := A * x - r + return norm2(r, memspace); + } + + /// Compute L2 norm of vector `r` in memory space `memspace`. + ReSolve::real_type norm2(ReSolve::vector::Vector& r, + ReSolve::memory::MemorySpace memspace) + { + return std::sqrt(vh_.dot(&r, &r, memspace)); + } + + private: + ReSolve::matrix::Sparse* A_; ///< pointer to system matrix + ReSolve::vector::Vector* r_; ///< pointer to system right-hand side + ReSolve::vector::Vector* x_; ///< pointer to the computed solution + + ReSolve::MatrixHandler mh_; ///< matrix handler instance + ReSolve::VectorHandler vh_; ///< vector handler instance + + ReSolve::vector::Vector* res_{nullptr}; ///< pointer to residual vector + ReSolve::vector::Vector* x_true_{nullptr}; ///< pointer to solution error vector + + ReSolve::real_type norm_rhs_{0.0}; ///< right-hand side vector norm + ReSolve::real_type norm_res_{0.0}; ///< residual vector norm + + real_type inf_norm_A_{0.0}; ///< infinity norm of matrix A + real_type inf_norm_x_{0.0}; ///< infinity norm of solution x + real_type inf_norm_res_{0.0}; ///< infinity norm of res = A*x - r + real_type nsr_norm_{0.0}; ///< norm of scaled residuals + + ReSolve::memory::MemorySpace memspace_{ReSolve::memory::HOST}; + std::string hardware_backend_{"NONE"}; }; } // namespace examples diff --git a/examples/experimental/r_KLU_GLU_matrix_values_update.cpp b/examples/experimental/r_KLU_GLU_matrix_values_update.cpp index ac3300b94..401c1ff3d 100644 --- a/examples/experimental/r_KLU_GLU_matrix_values_update.cpp +++ b/examples/experimental/r_KLU_GLU_matrix_values_update.cpp @@ -1,157 +1,173 @@ -#include -#include #include +#include +#include -#include -#include +#include +#include #include #include #include +#include +#include #include -#include -#include #include // this updates the matrix values to simulate what CFD/optimization software does. using namespace ReSolve::constants; -int main(int argc, char *argv[]) +int main(int argc, char* argv[]) { // Use the same data types as those you specified in ReSolve build. - using index_type = ReSolve::index_type; - using real_type = ReSolve::real_type; + using index_type = ReSolve::index_type; + using real_type = ReSolve::real_type; using vector_type = ReSolve::vector::Vector; using matrix_type = ReSolve::matrix::Sparse; (void) argc; // TODO: Check if the number of input parameters is correct. - std::string matrixFileName = argv[1]; - std::string rhsFileName = argv[2]; + std::string matrixFileName = argv[1]; + std::string rhsFileName = argv[2]; index_type numSystems = atoi(argv[3]); - std::cout<<"Family mtx file name: "<< matrixFileName << ", total number of matrices: "<initializeHandles(); - ReSolve::MatrixHandler* matrix_handler = new ReSolve::MatrixHandler(workspace_CUDA); - ReSolve::VectorHandler* vector_handler = new ReSolve::VectorHandler(workspace_CUDA); - real_type* rhs = nullptr; - real_type* x = nullptr; + ReSolve::MatrixHandler* matrix_handler = new ReSolve::MatrixHandler(workspace_CUDA); + ReSolve::VectorHandler* vector_handler = new ReSolve::VectorHandler(workspace_CUDA); + real_type* rhs = nullptr; + real_type* x = nullptr; vector_type* vec_rhs = nullptr; - vector_type* vec_x = nullptr; - vector_type* vec_r = nullptr; + vector_type* vec_x = nullptr; + vector_type* vec_r = nullptr; - ReSolve::LinSolverDirectKLU* KLU = new ReSolve::LinSolverDirectKLU; + ReSolve::LinSolverDirectKLU* KLU = new ReSolve::LinSolverDirectKLU; ReSolve::LinSolverDirectCuSolverGLU* GLU = new ReSolve::LinSolverDirectCuSolverGLU(workspace_CUDA); for (int i = 0; i < numSystems; ++i) { index_type j = 4 + i * 2; - fileId = argv[j]; - rhsId = argv[j + 1]; + fileId = argv[j]; + rhsId = argv[j + 1]; matrixFileNameFull = ""; - rhsFileNameFull = ""; + rhsFileNameFull = ""; // Read matrix first matrixFileNameFull = matrixFileName + fileId + ".mtx"; - rhsFileNameFull = rhsFileName + rhsId + ".mtx"; - std::cout << std::endl << std::endl << std::endl; - std::cout << "========================================================================================================================"<getNumRows()]; + rhs = ReSolve::io::createArrayFromFile(rhs_file); + x = new real_type[A->getNumRows()]; vec_rhs = new vector_type(A->getNumRows()); - vec_x = new vector_type(A->getNumRows()); - vec_x->allocate(ReSolve::memory::HOST);//for KLU + vec_x = new vector_type(A->getNumRows()); + vec_x->allocate(ReSolve::memory::HOST); // for KLU vec_x->allocate(ReSolve::memory::DEVICE); vec_r = new vector_type(A->getNumRows()); - } else { - if (i==1) { + } + else + { + if (i == 1) + { A_exp = ReSolve::io::createCsrFromFile(mat_file, is_expand_symmetric); - } else { + } + else + { ReSolve::io::updateMatrixFromFile(mat_file, A_exp); } - std::cout<<"Updating values of A_coo!"<copyValues(A_exp->getValues(ReSolve::memory::HOST), ReSolve::memory::HOST, ReSolve::memory::HOST); - //ReSolve::io::updateMatrixFromFile(mat_file, A); + // ReSolve::io::updateMatrixFromFile(mat_file, A); ReSolve::io::updateArrayFromFile(rhs_file, &rhs); } // Copy matrix data to device A->syncData(ReSolve::memory::DEVICE); - std::cout << "Finished reading the matrix and rhs, size: " << A->getNumRows() << " x "<< A->getNumColumns() - << ", nnz: " << A->getNnz() + std::cout << "Finished reading the matrix and rhs, size: " << A->getNumRows() << " x " << A->getNumColumns() + << ", nnz: " << A->getNnz() << ", symmetric? " << A->symmetric() - << ", Expanded? " << A->expanded() << std::endl; + << ", Expanded? " << A->expanded() << std::endl; mat_file.close(); rhs_file.close(); // Update host and device data. - if (i < 1) { + if (i < 1) + { vec_rhs->copyDataFrom(rhs, ReSolve::memory::HOST, ReSolve::memory::HOST); - } else { + } + else + { vec_rhs->copyDataFrom(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); } std::cout << "CSR matrix loaded. Expanded NNZ: " << A->getNnz() << std::endl; - //Now call direct solver + // Now call direct solver int status; - if (i < 1){ + if (i < 1) + { KLU->setup(A); status = KLU->analyze(); - std::cout<<"KLU analysis status: "<factorize(); - std::cout<<"KLU factorization status: "<getLFactor(); matrix_type* U = KLU->getUFactor(); - if (L == nullptr) {printf("ERROR");} + if (L == nullptr) + { + printf("ERROR"); + } index_type* P = KLU->getPOrdering(); index_type* Q = KLU->getQOrdering(); GLU->setup(A, L, U, P, Q); status = GLU->solve(vec_rhs, vec_x); - std::cout<<"GLU solve status: "<solve(vec_rhs, vec_x); // std::cout<<"KLU solve status: "<refactorize(); - std::cout<<"Using CUSOLVER GLU"<refactorize(); + std::cout << "Using CUSOLVER GLU" << std::endl; status = GLU->refactorize(); - std::cout<<"CUSOLVER GLU refactorization status: "<solve(vec_rhs, vec_x); - std::cout<<"CUSOLVER GLU solve status: "<copyDataFrom(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); - matrix_handler->setValuesChanged(true, ReSolve::memory::DEVICE); matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUS_ONE, ReSolve::memory::DEVICE); @@ -160,18 +176,18 @@ int main(int argc, char *argv[]) << sqrt(vector_handler->dot(vec_r, vec_r, ReSolve::memory::DEVICE)) << "\n"; } - //now DELETE - delete A; - delete A_exp; - delete KLU; - delete GLU; - delete [] x; - delete [] rhs; - delete vec_r; - delete vec_x; - delete workspace_CUDA; - delete matrix_handler; - delete vector_handler; - - return 0; - } + // now DELETE + delete A; + delete A_exp; + delete KLU; + delete GLU; + delete[] x; + delete[] rhs; + delete vec_r; + delete vec_x; + delete workspace_CUDA; + delete matrix_handler; + delete vector_handler; + + return 0; +} diff --git a/examples/experimental/r_KLU_cusolverrf_redo_factorization.cpp b/examples/experimental/r_KLU_cusolverrf_redo_factorization.cpp index 76f4d5920..fb4854256 100644 --- a/examples/experimental/r_KLU_cusolverrf_redo_factorization.cpp +++ b/examples/experimental/r_KLU_cusolverrf_redo_factorization.cpp @@ -1,34 +1,34 @@ -#include -#include #include +#include +#include +#include +#include #include -#include #include -#include -#include +#include #include +#include +#include #include -#include -#include #include using namespace ReSolve::constants; -int main(int argc, char *argv[] ) +int main(int argc, char* argv[]) { // Use the same data types as those you specified in ReSolve build. - using index_type = ReSolve::index_type; - using real_type = ReSolve::real_type; + using index_type = ReSolve::index_type; + using real_type = ReSolve::real_type; using vector_type = ReSolve::vector::Vector; (void) argc; // TODO: Check if the number of input parameters is correct. - std::string matrixFileName = argv[1]; - std::string rhsFileName = argv[2]; + std::string matrixFileName = argv[1]; + std::string rhsFileName = argv[2]; index_type numSystems = atoi(argv[3]); - std::cout<<"Family mtx file name: "<< matrixFileName << ", total number of matrices: "<initializeHandles(); - ReSolve::MatrixHandler* matrix_handler = new ReSolve::MatrixHandler(workspace_CUDA); - ReSolve::VectorHandler* vector_handler = new ReSolve::VectorHandler(workspace_CUDA); - real_type* rhs = nullptr; - real_type* x = nullptr; + ReSolve::MatrixHandler* matrix_handler = new ReSolve::MatrixHandler(workspace_CUDA); + ReSolve::VectorHandler* vector_handler = new ReSolve::VectorHandler(workspace_CUDA); + real_type* rhs = nullptr; + real_type* x = nullptr; vector_type* vec_rhs = nullptr; vector_type* vec_x = nullptr; vector_type* vec_r = nullptr; - ReSolve::LinSolverDirectKLU* KLU = new ReSolve::LinSolverDirectKLU; - ReSolve::LinSolverDirectCuSolverRf* Rf = new ReSolve::LinSolverDirectCuSolverRf(); + ReSolve::LinSolverDirectKLU* KLU = new ReSolve::LinSolverDirectKLU; + ReSolve::LinSolverDirectCuSolverRf* Rf = new ReSolve::LinSolverDirectCuSolverRf(); real_type res_nrm = 0.0; - real_type b_nrm = 0.0; + real_type b_nrm = 0.0; // We need them. They hold a POINTER. Don't delete them here. KLU deletes them. ReSolve::matrix::Csc* L_csc; ReSolve::matrix::Csc* U_csc; - index_type* P; - index_type* Q; + index_type* P; + index_type* Q; - int status = 0; + int status = 0; int status_refactor = 0; for (int i = 0; i < numSystems; ++i) { index_type j = 4 + i * 2; - fileId = argv[j]; - rhsId = argv[j + 1]; + fileId = argv[j]; + rhsId = argv[j + 1]; matrixFileNameFull = ""; - rhsFileNameFull = ""; + rhsFileNameFull = ""; // Read matrix first matrixFileNameFull = matrixFileName + fileId + ".mtx"; - rhsFileNameFull = rhsFileName + rhsId + ".mtx"; - std::cout << std::endl << std::endl << std::endl; - std::cout << "========================================================================================================================"<getNumRows()]; + rhs = ReSolve::io::createArrayFromFile(rhs_file); + x = new real_type[A->getNumRows()]; vec_rhs = new vector_type(A->getNumRows()); - vec_x = new vector_type(A->getNumRows()); - vec_r = new vector_type(A->getNumRows()); - } else { + vec_x = new vector_type(A->getNumRows()); + vec_r = new vector_type(A->getNumRows()); + } + else + { ReSolve::io::updateMatrixFromFile(mat_file, A); ReSolve::io::updateArrayFromFile(rhs_file, &rhs); } // Copy matrix data to device A->syncData(ReSolve::memory::DEVICE); - std::cout << "Finished reading the matrix and rhs, size: " << A->getNumRows() << " x "<< A->getNumColumns() - << ", nnz: " << A->getNnz() + std::cout << "Finished reading the matrix and rhs, size: " << A->getNumRows() << " x " << A->getNumColumns() + << ", nnz: " << A->getNnz() << ", symmetric? " << A->symmetric() - << ", Expanded? " << A->expanded() << std::endl; + << ", Expanded? " << A->expanded() << std::endl; mat_file.close(); rhs_file.close(); // Update host and device data. - if (i < 2) { + if (i < 2) + { vec_rhs->copyDataFrom(rhs, ReSolve::memory::HOST, ReSolve::memory::HOST); - } else { + } + else + { vec_rhs->copyDataFrom(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); } std::cout << "CSR matrix loaded. Expanded NNZ: " << A->getNnz() << std::endl; - //Now call direct solver - if (i < 2) { + // Now call direct solver + if (i < 2) + { KLU->setup(A); status = KLU->analyze(); - std::cout<<"KLU analysis status: "<factorize(); - std::cout<<"KLU factorization status: "<solve(vec_rhs, vec_x); - std::cout<<"KLU solve status: "<getLFactor(); - U_csc = (ReSolve::matrix::Csc*) KLU->getUFactor(); + std::cout << "KLU solve status: " << status << std::endl; + if (i == 1) + { + L_csc = (ReSolve::matrix::Csc*) KLU->getLFactor(); + U_csc = (ReSolve::matrix::Csc*) KLU->getUFactor(); ReSolve::matrix::Csr* L = new ReSolve::matrix::Csr(L_csc->getNumRows(), L_csc->getNumColumns(), L_csc->getNnz()); ReSolve::matrix::Csr* U = new ReSolve::matrix::Csr(U_csc->getNumRows(), U_csc->getNumColumns(), U_csc->getNnz()); L_csc->syncData(ReSolve::memory::DEVICE); U_csc->syncData(ReSolve::memory::DEVICE); - matrix_handler->csc2csr(L_csc,L, ReSolve::memory::DEVICE); - matrix_handler->csc2csr(U_csc,U, ReSolve::memory::DEVICE); - if (L == nullptr) { + matrix_handler->csc2csr(L_csc, L, ReSolve::memory::DEVICE); + matrix_handler->csc2csr(U_csc, U, ReSolve::memory::DEVICE); + if (L == nullptr) + { std::cout << "ERROR\n"; } P = KLU->getPOrdering(); @@ -151,12 +162,14 @@ int main(int argc, char *argv[] ) delete L; delete U; } - } else { - std::cout<<"Using cusolver rf"<refactorize(); - std::cout<<"cusolver rf refactorization status: "<solve(vec_rhs, vec_x); - std::cout<<"cusolver rf solve status: "<copyDataFrom(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); @@ -167,20 +180,24 @@ int main(int argc, char *argv[] ) b_nrm = sqrt(vector_handler->dot(vec_rhs, vec_rhs, ReSolve::memory::DEVICE)); std::cout << "\t2-Norm of the residual: " << std::scientific << std::setprecision(16) - << res_nrm/b_nrm << "\n"; - if (((res_nrm/b_nrm > 1e-7 ) && (!std::isnan(res_nrm))) || (status_refactor != 0 )) { - if ((res_nrm/b_nrm > 1e-7 )) { + << res_nrm / b_nrm << "\n"; + if (((res_nrm / b_nrm > 1e-7) && (!std::isnan(res_nrm))) || (status_refactor != 0)) + { + if ((res_nrm / b_nrm > 1e-7)) + { std::cout << "\n \t !!! ALERT !!! Residual norm is too large; redoing KLU symbolic and numeric factorization. !!! ALERT !!! \n \n"; - } else { + } + else + { std::cout << "\n \t !!! ALERT !!! cuSolverRf crashed; redoing KLU symbolic and numeric factorization. !!! ALERT !!! \n \n"; } KLU->setup(A); status = KLU->analyze(); - std::cout<<"KLU analysis status: "<factorize(); - std::cout<<"KLU factorization status: "<solve(vec_rhs, vec_x); - std::cout<<"KLU solve status: "<copyDataFrom(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); vec_r->copyDataFrom(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); @@ -190,10 +207,9 @@ int main(int argc, char *argv[] ) matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUS_ONE, ReSolve::memory::DEVICE); res_nrm = sqrt(vector_handler->dot(vec_r, vec_r, ReSolve::memory::DEVICE)); - std::cout <<"\t New residual norm: " + std::cout << "\t New residual norm: " << std::scientific << std::setprecision(16) - << res_nrm/b_nrm << "\n"; - + << res_nrm / b_nrm << "\n"; L_csc = (ReSolve::matrix::Csc*) KLU->getLFactor(); U_csc = (ReSolve::matrix::Csc*) KLU->getUFactor(); @@ -214,12 +230,12 @@ int main(int argc, char *argv[] ) } } // for (int i = 0; i < numSystems; ++i) - //now DELETE + // now DELETE delete A; delete KLU; delete Rf; - delete [] x; - delete [] rhs; + delete[] x; + delete[] rhs; delete vec_r; delete vec_x; delete workspace_CUDA; diff --git a/examples/experimental/r_KLU_rf_FGMRES_reuse_factorization.cpp b/examples/experimental/r_KLU_rf_FGMRES_reuse_factorization.cpp index 0fd0c86a6..8f9a6b1e5 100644 --- a/examples/experimental/r_KLU_rf_FGMRES_reuse_factorization.cpp +++ b/examples/experimental/r_KLU_rf_FGMRES_reuse_factorization.cpp @@ -1,36 +1,36 @@ -#include -#include #include +#include +#include #include +#include +#include +#include #include -#include #include -#include -#include +#include #include +#include +#include #include -#include -#include -#include #include using namespace ReSolve::constants; -int main(int argc, char *argv[]) +int main(int argc, char* argv[]) { // Use the same data types as those you specified in ReSolve build. - using index_type = ReSolve::index_type; - using real_type = ReSolve::real_type; + using index_type = ReSolve::index_type; + using real_type = ReSolve::real_type; using vector_type = ReSolve::vector::Vector; (void) argc; // TODO: Check if the number of input parameters is correct. - std::string matrixFileName = argv[1]; - std::string rhsFileName = argv[2]; + std::string matrixFileName = argv[1]; + std::string rhsFileName = argv[2]; index_type numSystems = atoi(argv[3]); - std::cout<<"Family mtx file name: "<< matrixFileName << ", total number of matrices: "<initializeHandles(); - ReSolve::MatrixHandler* matrix_handler = new ReSolve::MatrixHandler(workspace_CUDA); - ReSolve::VectorHandler* vector_handler = new ReSolve::VectorHandler(workspace_CUDA); - real_type* rhs = nullptr; - real_type* x = nullptr; + ReSolve::MatrixHandler* matrix_handler = new ReSolve::MatrixHandler(workspace_CUDA); + ReSolve::VectorHandler* vector_handler = new ReSolve::VectorHandler(workspace_CUDA); + real_type* rhs = nullptr; + real_type* x = nullptr; vector_type* vec_rhs = nullptr; vector_type* vec_x = nullptr; @@ -52,87 +52,95 @@ int main(int argc, char *argv[]) ReSolve::GramSchmidt* GS = new ReSolve::GramSchmidt(vector_handler, ReSolve::GramSchmidt::CGS2); - ReSolve::LinSolverDirectKLU* KLU = new ReSolve::LinSolverDirectKLU; - ReSolve::LinSolverDirectCuSolverRf* Rf = new ReSolve::LinSolverDirectCuSolverRf; - ReSolve::LinSolverIterativeFGMRES* FGMRES = new ReSolve::LinSolverIterativeFGMRES(matrix_handler, vector_handler, GS); + ReSolve::LinSolverDirectKLU* KLU = new ReSolve::LinSolverDirectKLU; + ReSolve::LinSolverDirectCuSolverRf* Rf = new ReSolve::LinSolverDirectCuSolverRf; + ReSolve::LinSolverIterativeFGMRES* FGMRES = new ReSolve::LinSolverIterativeFGMRES(matrix_handler, vector_handler, GS); for (int i = 0; i < numSystems; ++i) { index_type j = 4 + i * 2; - fileId = argv[j]; - rhsId = argv[j + 1]; + fileId = argv[j]; + rhsId = argv[j + 1]; matrixFileNameFull = ""; - rhsFileNameFull = ""; + rhsFileNameFull = ""; // Read matrix first matrixFileNameFull = matrixFileName + fileId + ".mtx"; - rhsFileNameFull = rhsFileName + rhsId + ".mtx"; - std::cout << std::endl << std::endl << std::endl; - std::cout << "========================================================================================================================"<getNumRows()]; + if (i == 0) + { + A = ReSolve::io::createCsrFromFile(mat_file, is_expand_symmetric); + rhs = ReSolve::io::createArrayFromFile(rhs_file); + x = new real_type[A->getNumRows()]; vec_rhs = new vector_type(A->getNumRows()); - vec_x = new vector_type(A->getNumRows()); - vec_x->allocate(ReSolve::memory::HOST);//for KLU + vec_x = new vector_type(A->getNumRows()); + vec_x->allocate(ReSolve::memory::HOST); // for KLU vec_x->allocate(ReSolve::memory::DEVICE); vec_r = new vector_type(A->getNumRows()); } - else { + else + { ReSolve::io::updateMatrixFromFile(mat_file, A); ReSolve::io::updateArrayFromFile(rhs_file, &rhs); } // Copy matrix data to device A->syncData(ReSolve::memory::DEVICE); - std::cout << "Finished reading the matrix and rhs, size: " << A->getNumRows() << " x "<< A->getNumColumns() - << ", nnz: " << A->getNnz() + std::cout << "Finished reading the matrix and rhs, size: " << A->getNumRows() << " x " << A->getNumColumns() + << ", nnz: " << A->getNnz() << ", symmetric? " << A->symmetric() - << ", Expanded? " << A->expanded() << std::endl; + << ", Expanded? " << A->expanded() << std::endl; mat_file.close(); rhs_file.close(); // Update host and device data. - if (i < 2) { + if (i < 2) + { vec_rhs->copyDataFrom(rhs, ReSolve::memory::HOST, ReSolve::memory::HOST); vec_rhs->setDataUpdated(ReSolve::memory::HOST); - } else { + } + else + { A->syncData(ReSolve::memory::DEVICE); vec_rhs->copyDataFrom(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); } std::cout << "CSR matrix loaded. Expanded NNZ: " << A->getNnz() << std::endl; // Now call direct solver - int status = 0; + int status = 0; real_type norm_b; - if (i < 2){ + if (i < 2) + { KLU->setup(A); matrix_handler->setValuesChanged(true, ReSolve::memory::DEVICE); status = KLU->analyze(); - std::cout<<"KLU analysis status: "<factorize(); - std::cout<<"KLU factorization status: "<solve(vec_rhs, vec_x); - std::cout<<"KLU solve status: "<copyDataFrom(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); norm_b = vector_handler->dot(vec_r, vec_r, ReSolve::memory::DEVICE); norm_b = sqrt(norm_b); @@ -140,31 +148,35 @@ int main(int argc, char *argv[]) matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUS_ONE, ReSolve::memory::DEVICE); std::cout << "\t 2-Norm of the residual : " << std::scientific << std::setprecision(16) - << sqrt(vector_handler->dot(vec_r, vec_r, ReSolve::memory::DEVICE))/norm_b << "\n"; - if (i == 1) { + << sqrt(vector_handler->dot(vec_r, vec_r, ReSolve::memory::DEVICE)) / norm_b << "\n"; + if (i == 1) + { ReSolve::matrix::Csc* L_csc = (ReSolve::matrix::Csc*) KLU->getLFactor(); ReSolve::matrix::Csc* U_csc = (ReSolve::matrix::Csc*) KLU->getUFactor(); - ReSolve::matrix::Csr* L = new ReSolve::matrix::Csr(L_csc->getNumRows(), L_csc->getNumColumns(), L_csc->getNnz()); - ReSolve::matrix::Csr* U = new ReSolve::matrix::Csr(U_csc->getNumRows(), U_csc->getNumColumns(), U_csc->getNnz()); + ReSolve::matrix::Csr* L = new ReSolve::matrix::Csr(L_csc->getNumRows(), L_csc->getNumColumns(), L_csc->getNnz()); + ReSolve::matrix::Csr* U = new ReSolve::matrix::Csr(U_csc->getNumRows(), U_csc->getNumColumns(), U_csc->getNnz()); L_csc->syncData(ReSolve::memory::DEVICE); U_csc->syncData(ReSolve::memory::DEVICE); - matrix_handler->csc2csr(L_csc,L, ReSolve::memory::DEVICE); - matrix_handler->csc2csr(U_csc,U, ReSolve::memory::DEVICE); - if (L == nullptr) { + matrix_handler->csc2csr(L_csc, L, ReSolve::memory::DEVICE); + matrix_handler->csc2csr(U_csc, U, ReSolve::memory::DEVICE); + if (L == nullptr) + { std::cout << "ERROR\n"; } index_type* P = KLU->getPOrdering(); index_type* Q = KLU->getQOrdering(); Rf->setup(A, L, U, P, Q); - std::cout<<"about to set FGMRES" <setRestart(1000); FGMRES->setMaxit(2000); FGMRES->setup(A); } - } else { - //status = KLU->refactorize(); - std::cout<<"Using CUSOLVER RF"<refactorize(); + std::cout << "Using CUSOLVER RF" << std::endl; if ((i % 2 == 0)) { status = Rf->refactorize(); @@ -174,12 +186,12 @@ int main(int argc, char *argv[]) status = Rf->solve(vec_rhs, vec_x); FGMRES->setupPreconditioner("LU", Rf); } - //if (i%2!=0) vec_x->setToZero(ReSolve::memory::DEVICE); - real_type norm_x = vector_handler->dot(vec_x, vec_x, ReSolve::memory::DEVICE); + // if (i%2!=0) vec_x->setToZero(ReSolve::memory::DEVICE); + real_type norm_x = vector_handler->dot(vec_x, vec_x, ReSolve::memory::DEVICE); std::cout << "Norm of x (before solve): " << std::scientific << std::setprecision(16) << sqrt(norm_x) << "\n"; - std::cout<<"CUSOLVER RF solve status: "<copyDataFrom(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); vec_r->copyDataFrom(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); @@ -193,7 +205,7 @@ int main(int argc, char *argv[]) std::cout << "\t 2-Norm of the residual (before IR): " << std::scientific << std::setprecision(16) - << sqrt(vector_handler->dot(vec_r, vec_r, ReSolve::memory::DEVICE))/norm_b << "\n"; + << sqrt(vector_handler->dot(vec_r, vec_r, ReSolve::memory::DEVICE)) / norm_b << "\n"; std::cout << "\t 2-Norm of the RIGHT HAND SIDE: " << std::scientific << std::setprecision(16) << norm_b << "\n"; @@ -203,24 +215,22 @@ int main(int argc, char *argv[]) std::cout << "FGMRES: init nrm: " << std::scientific << std::setprecision(16) - << FGMRES->getInitResidualNorm()/norm_b + << FGMRES->getInitResidualNorm() / norm_b << " final nrm: " - << FGMRES->getFinalResidualNorm()/norm_b + << FGMRES->getFinalResidualNorm() / norm_b << " iter: " << FGMRES->getNumIter() << "\n"; norm_x = vector_handler->dot(vec_x, vec_x, ReSolve::memory::DEVICE); std::cout << "Norm of x (after IR): " << std::scientific << std::setprecision(16) << sqrt(norm_x) << "\n"; } - - } delete A; delete KLU; delete Rf; - delete [] x; - delete [] rhs; + delete[] x; + delete[] rhs; delete vec_r; delete vec_x; delete workspace_CUDA; diff --git a/examples/experimental/r_KLU_rocsolverrf_redo_factorization.cpp b/examples/experimental/r_KLU_rocsolverrf_redo_factorization.cpp index 97ba3b9c2..61822df28 100644 --- a/examples/experimental/r_KLU_rocsolverrf_redo_factorization.cpp +++ b/examples/experimental/r_KLU_rocsolverrf_redo_factorization.cpp @@ -1,34 +1,34 @@ -#include -#include #include +#include +#include +#include +#include #include -#include #include -#include -#include +#include #include +#include +#include #include -#include -#include #include using namespace ReSolve::constants; -int main(int argc, char *argv[] ) +int main(int argc, char* argv[]) { // Use the same data types as those you specified in ReSolve build. - using index_type = ReSolve::index_type; - using real_type = ReSolve::real_type; + using index_type = ReSolve::index_type; + using real_type = ReSolve::real_type; using vector_type = ReSolve::vector::Vector; (void) argc; // TODO: Check if the number of input parameters is correct. - std::string matrixFileName = argv[1]; - std::string rhsFileName = argv[2]; + std::string matrixFileName = argv[1]; + std::string rhsFileName = argv[2]; index_type numSystems = atoi(argv[3]); std::cout << "Family mtx file name: " << matrixFileName << ", total number of matrices: " << numSystems << std::endl; - std::cout << "Family rhs file name: " << rhsFileName << ", total number of RHSes: " << numSystems << std::endl; + std::cout << "Family rhs file name: " << rhsFileName << ", total number of RHSes: " << numSystems << std::endl; std::string fileId; std::string rhsId; @@ -39,17 +39,17 @@ int main(int argc, char *argv[] ) ReSolve::LinAlgWorkspaceHIP* workspace_HIP = new ReSolve::LinAlgWorkspaceHIP; workspace_HIP->initializeHandles(); - ReSolve::MatrixHandler* matrix_handler = new ReSolve::MatrixHandler(workspace_HIP); - ReSolve::VectorHandler* vector_handler = new ReSolve::VectorHandler(workspace_HIP); - real_type* rhs = nullptr; - real_type* x = nullptr; + ReSolve::MatrixHandler* matrix_handler = new ReSolve::MatrixHandler(workspace_HIP); + ReSolve::VectorHandler* vector_handler = new ReSolve::VectorHandler(workspace_HIP); + real_type* rhs = nullptr; + real_type* x = nullptr; vector_type* vec_rhs = nullptr; vector_type* vec_x = nullptr; vector_type* vec_r = nullptr; - ReSolve::LinSolverDirectKLU* KLU = new ReSolve::LinSolverDirectKLU; - ReSolve::LinSolverDirectRocSolverRf* Rf = new ReSolve::LinSolverDirectRocSolverRf(workspace_HIP); + ReSolve::LinSolverDirectKLU* KLU = new ReSolve::LinSolverDirectKLU; + ReSolve::LinSolverDirectRocSolverRf* Rf = new ReSolve::LinSolverDirectRocSolverRf(workspace_HIP); real_type res_nrm; real_type b_nrm; @@ -57,84 +57,96 @@ int main(int argc, char *argv[] ) for (int i = 0; i < numSystems; ++i) { index_type j = 4 + i * 2; - fileId = argv[j]; - rhsId = argv[j + 1]; + fileId = argv[j]; + rhsId = argv[j + 1]; matrixFileNameFull = ""; - rhsFileNameFull = ""; + rhsFileNameFull = ""; // Read matrix first matrixFileNameFull = matrixFileName + fileId + ".mtx"; - rhsFileNameFull = rhsFileName + rhsId + ".mtx"; - std::cout << std::endl << std::endl << std::endl; - std::cout << "========================================================================================================================"<getNumRows()]; + rhs = ReSolve::io::createArrayFromFile(rhs_file); + x = new real_type[A->getNumRows()]; vec_rhs = new vector_type(A->getNumRows()); - vec_x = new vector_type(A->getNumRows()); - vec_r = new vector_type(A->getNumRows()); - } else { + vec_x = new vector_type(A->getNumRows()); + vec_r = new vector_type(A->getNumRows()); + } + else + { ReSolve::io::updateMatrixFromFile(mat_file, A); ReSolve::io::updateArrayFromFile(rhs_file, &rhs); } // Copy matrix data to device A->syncData(ReSolve::memory::DEVICE); - std::cout << "Finished reading the matrix and rhs, size: " << A->getNumRows() << " x "<< A->getNumColumns() - << ", nnz: " << A->getNnz() + std::cout << "Finished reading the matrix and rhs, size: " << A->getNumRows() << " x " << A->getNumColumns() + << ", nnz: " << A->getNnz() << ", symmetric? " << A->symmetric() - << ", Expanded? " << A->expanded() << std::endl; + << ", Expanded? " << A->expanded() << std::endl; mat_file.close(); rhs_file.close(); // Update host and device data. - if (i < 2) { + if (i < 2) + { vec_rhs->copyDataFrom(rhs, ReSolve::memory::HOST, ReSolve::memory::HOST); - } else { + } + else + { vec_rhs->copyDataFrom(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); } std::cout << "CSR matrix loaded. Expanded NNZ: " << A->getNnz() << std::endl; - //Now call direct solver + // Now call direct solver int status = 0; - if (i < 2){ + if (i < 2) + { KLU->setup(A); status = KLU->analyze(); - std::cout<<"KLU analysis status: " << status << std::endl; + std::cout << "KLU analysis status: " << status << std::endl; status = KLU->factorize(); std::cout << "KLU factorization status: " << status << std::endl; status = KLU->solve(vec_rhs, vec_x); std::cout << "KLU solve status: " << status << std::endl; - if (i == 1) { + if (i == 1) + { ReSolve::matrix::Csc* L = (ReSolve::matrix::Csc*) KLU->getLFactor(); ReSolve::matrix::Csc* U = (ReSolve::matrix::Csc*) KLU->getUFactor(); - index_type* P = KLU->getPOrdering(); - index_type* Q = KLU->getQOrdering(); + index_type* P = KLU->getPOrdering(); + index_type* Q = KLU->getQOrdering(); vec_rhs->copyDataFrom(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); Rf->setup(A, L, U, P, Q, vec_rhs); Rf->refactorize(); } - } else { + } + else + { std::cout << "Using rocsolver rf" << std::endl; status = Rf->refactorize(); std::cout << "rocsolver rf refactorization status: " << status << std::endl; @@ -147,55 +159,55 @@ int main(int argc, char *argv[] ) matrix_handler->setValuesChanged(true, ReSolve::memory::DEVICE); matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUS_ONE, ReSolve::memory::DEVICE); res_nrm = sqrt(vector_handler->dot(vec_r, vec_r, ReSolve::memory::DEVICE)); - b_nrm = sqrt(vector_handler->dot(vec_rhs, vec_rhs, ReSolve::memory::DEVICE)); + b_nrm = sqrt(vector_handler->dot(vec_rhs, vec_rhs, ReSolve::memory::DEVICE)); std::cout << "\t 2-Norm of the residual: " << std::scientific << std::setprecision(16) - << res_nrm/b_nrm << "\n"; - if (!isnan(res_nrm)) { - if (res_nrm/b_nrm > 1e-7 ) { - std::cout << "\n \t !!! ALERT !!! Residual norm is too large; " - << "redoing KLU symbolic and numeric factorization. !!! ALERT !!! \n\n"; - - KLU->setup(A); - status = KLU->analyze(); - std::cout << "KLU analysis status: " << status << std::endl; - status = KLU->factorize(); - std::cout << "KLU factorization status: " << status << std::endl; - status = KLU->solve(vec_rhs, vec_x); - std::cout << "KLU solve status: " << status << std::endl; - - vec_rhs->copyDataFrom(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); - vec_r->copyDataFrom(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); + << res_nrm / b_nrm << "\n"; + if (!isnan(res_nrm)) + { + if (res_nrm / b_nrm > 1e-7) + { + std::cout << "\n \t !!! ALERT !!! Residual norm is too large; " + << "redoing KLU symbolic and numeric factorization. !!! ALERT !!! \n\n"; + + KLU->setup(A); + status = KLU->analyze(); + std::cout << "KLU analysis status: " << status << std::endl; + status = KLU->factorize(); + std::cout << "KLU factorization status: " << status << std::endl; + status = KLU->solve(vec_rhs, vec_x); + std::cout << "KLU solve status: " << status << std::endl; - matrix_handler->setValuesChanged(true, ReSolve::memory::DEVICE); + vec_rhs->copyDataFrom(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); + vec_r->copyDataFrom(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); - matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUS_ONE, ReSolve::memory::DEVICE); - res_nrm = sqrt(vector_handler->dot(vec_r, vec_r, ReSolve::memory::DEVICE)); + matrix_handler->setValuesChanged(true, ReSolve::memory::DEVICE); - std::cout << "\t New residual norm: " - << std::scientific << std::setprecision(16) - << res_nrm/b_nrm << "\n"; + matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUS_ONE, ReSolve::memory::DEVICE); + res_nrm = sqrt(vector_handler->dot(vec_r, vec_r, ReSolve::memory::DEVICE)); + std::cout << "\t New residual norm: " + << std::scientific << std::setprecision(16) + << res_nrm / b_nrm << "\n"; - ReSolve::matrix::Csc* L = (ReSolve::matrix::Csc*) KLU->getLFactor(); - ReSolve::matrix::Csc* U = (ReSolve::matrix::Csc*) KLU->getUFactor(); + ReSolve::matrix::Csc* L = (ReSolve::matrix::Csc*) KLU->getLFactor(); + ReSolve::matrix::Csc* U = (ReSolve::matrix::Csc*) KLU->getUFactor(); - index_type* P = KLU->getPOrdering(); - index_type* Q = KLU->getQOrdering(); + index_type* P = KLU->getPOrdering(); + index_type* Q = KLU->getQOrdering(); - Rf->setup(A, L, U, P, Q, vec_rhs); - } + Rf->setup(A, L, U, P, Q, vec_rhs); + } } - } // for (int i = 0; i < numSystems; ++i) - //now DELETE + // now DELETE delete A; delete KLU; delete Rf; - delete [] x; - delete [] rhs; + delete[] x; + delete[] rhs; delete vec_r; delete vec_x; delete workspace_HIP; diff --git a/examples/gluRefactor.cpp b/examples/gluRefactor.cpp index 21454342f..419c65903 100644 --- a/examples/gluRefactor.cpp +++ b/examples/gluRefactor.cpp @@ -2,32 +2,32 @@ * @file gpuRefactor.cpp * @author Slaven Peles (peless@ornl.gov) * @author Kasia Swirydowicz (kasia.swirydowicz@amd.com) - * + * * @brief Example of solving linear systems using refactorization on a GPU. - * + * * A series of linear systems is read from files specified at command line * input and solved with refactorization approach on GPU. First system * is solved with KLU solver on CPU, using full factorization, and the * subsequent systems are solved with GLU solver on GPU, using refactorization * approach. It is assumed that all systems in the series have the same * sparsity pattern, so the analysis is done only once for the entire series. - * + * */ -#include -#include #include +#include #include +#include -#include +#include +#include #include -#include -#include +#include #include +#include +#include +#include #include -#include #include -#include -#include #ifdef RESOLVE_USE_CUDA #include @@ -52,10 +52,10 @@ void printHelpInfo() /// Prototype of the example function template -static int gluRefactor(int argc, char *argv[]); +static int gluRefactor(int argc, char* argv[]); /// Main function selects example to be run. -int main(int argc, char *argv[]) +int main(int argc, char* argv[]) { #ifdef RESOLVE_USE_CUDA gluRefactor(argc, argv); @@ -75,26 +75,30 @@ int main(int argc, char *argv[]) * @return 0 if the example ran successfully, -1 otherwise */ template -int gluRefactor(int argc, char *argv[]) +int gluRefactor(int argc, char* argv[]) { using namespace ReSolve::examples; using namespace ReSolve; - using index_type = ReSolve::index_type; + using index_type = ReSolve::index_type; using vector_type = ReSolve::vector::Vector; CliOptions options(argc, argv); bool is_help = options.hasKey("-h"); - if (is_help) { + if (is_help) + { printHelpInfo(); return 0; } index_type num_systems = 0; - auto opt = options.getParamFromKey("-n"); - if (opt) { + auto opt = options.getParamFromKey("-n"); + if (opt) + { num_systems = atoi((opt->second).c_str()); - } else { + } + else + { std::cout << "Incorrect input!\n\n"; printHelpInfo(); return 1; @@ -102,9 +106,12 @@ int gluRefactor(int argc, char *argv[]) std::string matrix_pathname(""); opt = options.getParamFromKey("-m"); - if (opt) { + if (opt) + { matrix_pathname = opt->second; - } else { + } + else + { std::cout << "Incorrect input!\n\n"; printHelpInfo(); return 1; @@ -112,9 +119,12 @@ int gluRefactor(int argc, char *argv[]) std::string rhs_pathname(""); opt = options.getParamFromKey("-r"); - if (opt) { + if (opt) + { rhs_pathname = opt->second; - } else { + } + else + { std::cout << "Incorrect input!\n\n"; printHelpInfo(); return 1; @@ -122,16 +132,19 @@ int gluRefactor(int argc, char *argv[]) std::string file_extension(""); opt = options.getParamFromKey("-e"); - if (opt) { + if (opt) + { file_extension = opt->second; - } else { + } + else + { file_extension = "mtx"; } - std::cout << "Family mtx file name: " << matrix_pathname + std::cout << "Family mtx file name: " << matrix_pathname << ", total number of matrices: " << num_systems << "\n" - << "Family rhs file name: " << rhs_pathname - << ", total number of RHSes: " << num_systems << "\n"; + << "Family rhs file name: " << rhs_pathname + << ", total number of RHSes: " << num_systems << "\n"; // Create workspace workspace_type workspace; @@ -142,47 +155,51 @@ int gluRefactor(int argc, char *argv[]) std::cout << "gluRefactor with " << helper.getHardwareBackend() << " backend\n"; // Direct solvers instantiation - LinSolverDirectKLU KLU; + LinSolverDirectKLU KLU; LinSolverDirectCuSolverGLU Rf(&workspace); // Pointers to matrix and vectors defining the linear system - matrix::Csr* A = nullptr; + matrix::Csr* A = nullptr; vector_type* vec_rhs = nullptr; vector_type* vec_x = nullptr; RESOLVE_RANGE_PUSH(__FUNCTION__); - for (int i = 0; i < num_systems; ++i) { + for (int i = 0; i < num_systems; ++i) + { std::cout << "System " << i << ":\n"; RESOLVE_RANGE_PUSH("File input"); std::ostringstream matname; std::ostringstream rhsname; matname << matrix_pathname << std::setfill('0') << std::setw(2) << i << "." << file_extension; - rhsname << rhs_pathname << std::setfill('0') << std::setw(2) << i << "." << file_extension; + rhsname << rhs_pathname << std::setfill('0') << std::setw(2) << i << "." << file_extension; std::string matrix_pathname_full = matname.str(); std::string rhs_pathname_full = rhsname.str(); // Read matrix and right-hand-side vector std::ifstream mat_file(matrix_pathname_full); - if(!mat_file.is_open()) + if (!mat_file.is_open()) { std::cout << "Failed to open file " << matrix_pathname_full << "\n"; return -1; } std::ifstream rhs_file(rhs_pathname_full); - if(!rhs_file.is_open()) + if (!rhs_file.is_open()) { std::cout << "Failed to open file " << rhs_pathname_full << "\n"; return -1; } bool is_expand_symmetric = true; - if (i == 0) { - A = io::createCsrFromFile(mat_file, is_expand_symmetric); + if (i == 0) + { + A = io::createCsrFromFile(mat_file, is_expand_symmetric); vec_rhs = io::createVectorFromFile(rhs_file); - vec_x = new vector_type(A->getNumRows()); + vec_x = new vector_type(A->getNumRows()); vec_x->allocate(memory::HOST); vec_x->allocate(memory::DEVICE); - } else { + } + else + { io::updateMatrixFromFile(mat_file, A); io::updateVectorFromFile(rhs_file, vec_rhs); } @@ -200,7 +217,8 @@ int gluRefactor(int argc, char *argv[]) int status = 0; - if (i < 1) { + if (i < 1) + { RESOLVE_RANGE_PUSH("KLU"); // Setup factorization solver KLU.setup(A); @@ -220,7 +238,8 @@ int gluRefactor(int argc, char *argv[]) // Extract factors and configure refactorization solver matrix::Csc* L = (matrix::Csc*) KLU.getLFactor(); matrix::Csc* U = (matrix::Csc*) KLU.getUFactor(); - if (L == nullptr || U == nullptr) { + if (L == nullptr || U == nullptr) + { std::cout << "Factor extraction from KLU failed!\n"; } index_type* P = KLU.getPOrdering(); @@ -229,7 +248,9 @@ int gluRefactor(int argc, char *argv[]) status = Rf.setup(A, L, U, P, Q); RESOLVE_RANGE_POP("KLU"); - } else { + } + else + { RESOLVE_RANGE_PUSH("Refactorization"); // Refactorize on the device diff --git a/examples/gpuRefactor.cpp b/examples/gpuRefactor.cpp index f1b4e722d..7fe208f35 100644 --- a/examples/gpuRefactor.cpp +++ b/examples/gpuRefactor.cpp @@ -15,28 +15,28 @@ * entire series. * */ -#include -#include #include +#include #include +#include -#include -#include -#include -#include -#include -#include #include #include -#include #include +#include +#include +#include +#include #include +#include +#include +#include #ifdef RESOLVE_USE_CUDA - #include +#include #endif #ifdef RESOLVE_USE_HIP - #include +#include #endif #include "ExampleHelper.hpp" @@ -58,20 +58,20 @@ void printHelpInfo() /// Prototype of the example function template -static int gpuRefactor(int argc, char *argv[]); +static int gpuRefactor(int argc, char* argv[]); /// Main function selects example to be run. -int main(int argc, char *argv[]) +int main(int argc, char* argv[]) { - #ifdef RESOLVE_USE_CUDA - gpuRefactor(argc, argv); - #endif +#ifdef RESOLVE_USE_CUDA + gpuRefactor(argc, argv); +#endif - #ifdef RESOLVE_USE_HIP - gpuRefactor(argc, argv); - #endif +#ifdef RESOLVE_USE_HIP + gpuRefactor(argc, argv); +#endif return 0; } @@ -85,17 +85,18 @@ int main(int argc, char *argv[]) * @return 0 if the example ran successfully, -1 otherwise */ template -int gpuRefactor(int argc, char *argv[]) +int gpuRefactor(int argc, char* argv[]) { using namespace ReSolve::examples; using namespace ReSolve; - using index_type = ReSolve::index_type; + using index_type = ReSolve::index_type; using vector_type = ReSolve::vector::Vector; CliOptions options(argc, argv); bool is_help = options.hasKey("-h"); - if (is_help) { + if (is_help) + { printHelpInfo(); return 0; } @@ -103,10 +104,13 @@ int gpuRefactor(int argc, char *argv[]) bool is_iterative_refinement = options.hasKey("-i"); index_type num_systems = 0; - auto opt = options.getParamFromKey("-n"); - if (opt) { + auto opt = options.getParamFromKey("-n"); + if (opt) + { num_systems = atoi((opt->second).c_str()); - } else { + } + else + { std::cout << "Incorrect input!\n\n"; printHelpInfo(); return 1; @@ -114,9 +118,12 @@ int gpuRefactor(int argc, char *argv[]) std::string matrix_pathname(""); opt = options.getParamFromKey("-m"); - if (opt) { + if (opt) + { matrix_pathname = opt->second; - } else { + } + else + { std::cout << "Incorrect input!\n\n"; printHelpInfo(); return 1; @@ -124,9 +131,12 @@ int gpuRefactor(int argc, char *argv[]) std::string rhs_pathname(""); opt = options.getParamFromKey("-r"); - if (opt) { + if (opt) + { rhs_pathname = opt->second; - } else { + } + else + { std::cout << "Incorrect input!\n\n"; printHelpInfo(); return 1; @@ -134,16 +144,19 @@ int gpuRefactor(int argc, char *argv[]) std::string file_extension(""); opt = options.getParamFromKey("-e"); - if (opt) { + if (opt) + { file_extension = opt->second; - } else { + } + else + { file_extension = "mtx"; } - std::cout << "Family mtx file name: " << matrix_pathname + std::cout << "Family mtx file name: " << matrix_pathname << ", total number of matrices: " << num_systems << "\n" - << "Family rhs file name: " << rhs_pathname - << ", total number of RHSes: " << num_systems << "\n"; + << "Family rhs file name: " << rhs_pathname + << ", total number of RHSes: " << num_systems << "\n"; // Create workspace workspace_type workspace; @@ -159,50 +172,54 @@ int gpuRefactor(int argc, char *argv[]) // Direct solvers instantiation LinSolverDirectKLU KLU; - refactor_type Rf(&workspace); + refactor_type Rf(&workspace); // Iterative solver instantiation - GramSchmidt GS(&vector_handler, GramSchmidt::CGS2); + GramSchmidt GS(&vector_handler, GramSchmidt::CGS2); LinSolverIterativeFGMRES FGMRES(&matrix_handler, &vector_handler, &GS); // Pointers to matrix and vectors defining the linear system - matrix::Csr* A = nullptr; + matrix::Csr* A = nullptr; vector_type* vec_rhs = nullptr; vector_type* vec_x = nullptr; RESOLVE_RANGE_PUSH(__FUNCTION__); - for (int i = 0; i < num_systems; ++i) { + for (int i = 0; i < num_systems; ++i) + { std::cout << "System " << i << ":\n"; RESOLVE_RANGE_PUSH("File input"); std::ostringstream matname; std::ostringstream rhsname; matname << matrix_pathname << std::setfill('0') << std::setw(2) << i << "." << file_extension; - rhsname << rhs_pathname << std::setfill('0') << std::setw(2) << i << "." << file_extension; + rhsname << rhs_pathname << std::setfill('0') << std::setw(2) << i << "." << file_extension; std::string matrix_pathname_full = matname.str(); std::string rhs_pathname_full = rhsname.str(); // Read matrix and right-hand-side vector std::ifstream mat_file(matrix_pathname_full); - if(!mat_file.is_open()) + if (!mat_file.is_open()) { std::cout << "Failed to open file " << matrix_pathname_full << "\n"; return -1; } std::ifstream rhs_file(rhs_pathname_full); - if(!rhs_file.is_open()) + if (!rhs_file.is_open()) { std::cout << "Failed to open file " << rhs_pathname_full << "\n"; return -1; } bool is_expand_symmetric = true; - if (i == 0) { - A = io::createCsrFromFile(mat_file, is_expand_symmetric); + if (i == 0) + { + A = io::createCsrFromFile(mat_file, is_expand_symmetric); vec_rhs = io::createVectorFromFile(rhs_file); - vec_x = new vector_type(A->getNumRows()); + vec_x = new vector_type(A->getNumRows()); vec_x->allocate(memory::HOST); vec_x->allocate(memory::DEVICE); - } else { + } + else + { io::updateMatrixFromFile(mat_file, A); io::updateVectorFromFile(rhs_file, vec_rhs); } @@ -220,7 +237,8 @@ int gpuRefactor(int argc, char *argv[]) int status = 0; - if (i == 0) { + if (i == 0) + { RESOLVE_RANGE_PUSH("KLU"); // Setup factorization solver KLU.setup(A); @@ -231,7 +249,8 @@ int gpuRefactor(int argc, char *argv[]) std::cout << "KLU analysis status: " << status << std::endl; } - if (i < 2) { + if (i < 2) + { // Numeric factorization status = KLU.factorize(); std::cout << "KLU factorization status: " << status << std::endl; @@ -244,11 +263,13 @@ int gpuRefactor(int argc, char *argv[]) helper.resetSystem(A, vec_rhs, vec_x); helper.printShortSummary(); - if (i == 1) { + if (i == 1) + { // Extract factors and configure refactorization solver matrix::Csc* L = (matrix::Csc*) KLU.getLFactor(); matrix::Csc* U = (matrix::Csc*) KLU.getUFactor(); - if (L == nullptr || U == nullptr) { + if (L == nullptr || U == nullptr) + { std::cout << "Factor extraction from KLU failed!\n"; } index_type* P = KLU.getPOrdering(); @@ -257,12 +278,15 @@ int gpuRefactor(int argc, char *argv[]) Rf.setup(A, L, U, P, Q, vec_rhs); // Setup iterative refinement solver - if (is_iterative_refinement) { + if (is_iterative_refinement) + { FGMRES.setup(A); } } RESOLVE_RANGE_POP("KLU"); - } else { + } + else + { std::cout << "Using refactorization\n"; RESOLVE_RANGE_PUSH("Refactorization"); @@ -278,13 +302,15 @@ int gpuRefactor(int argc, char *argv[]) helper.printSummary(); RESOLVE_RANGE_PUSH("Iterative refinement"); - if (is_iterative_refinement) { + if (is_iterative_refinement) + { // Setup iterative refinement FGMRES.resetMatrix(A); FGMRES.setupPreconditioner("LU", &Rf); // If refactorization produced finite solution do iterative refinement - if (std::isfinite(helper.getNormRelativeResidual())) { + if (std::isfinite(helper.getNormRelativeResidual())) + { FGMRES.solve(vec_rhs, vec_x); // Print summary diff --git a/examples/kluFactor.cpp b/examples/kluFactor.cpp index 8f1c3b587..e35cb60ec 100644 --- a/examples/kluFactor.cpp +++ b/examples/kluFactor.cpp @@ -11,22 +11,22 @@ * pattern, so the analysis is done only once for the entire series. * */ -#include #include +#include #include +#include "ExampleHelper.hpp" +#include +#include #include -#include #include -#include -#include +#include #include +#include +#include +#include #include -#include -#include #include -#include "ExampleHelper.hpp" -#include using namespace ReSolve::constants; @@ -45,17 +45,18 @@ void printHelpInfo() std::cout << "\t-i\tEnables iterative refinement.\n\n"; } -int main(int argc, char *argv[]) +int main(int argc, char* argv[]) { // Use the same data types as those you specified in ReSolve build. - using index_type = ReSolve::index_type; + using index_type = ReSolve::index_type; using vector_type = ReSolve::vector::Vector; using namespace ReSolve::examples; using namespace ReSolve; CliOptions options(argc, argv); bool is_help = options.hasKey("-h"); - if (is_help) { + if (is_help) + { printHelpInfo(); return 0; } @@ -63,10 +64,13 @@ int main(int argc, char *argv[]) bool is_iterative_refinement = options.hasKey("-i"); index_type num_systems = 0; - auto opt = options.getParamFromKey("-n"); - if (opt) { + auto opt = options.getParamFromKey("-n"); + if (opt) + { num_systems = std::stoi((opt->second).c_str()); - } else { + } + else + { std::cout << "Incorrect input!\n\n"; printHelpInfo(); return 1; @@ -74,9 +78,12 @@ int main(int argc, char *argv[]) std::string matrix_path_name(""); opt = options.getParamFromKey("-m"); - if (opt) { + if (opt) + { matrix_path_name = opt->second; - } else { + } + else + { std::cout << "Incorrect input!\n\n"; printHelpInfo(); return 1; @@ -84,9 +91,12 @@ int main(int argc, char *argv[]) std::string rhs_path_name(""); opt = options.getParamFromKey("-r"); - if (opt) { + if (opt) + { rhs_path_name = opt->second; - } else { + } + else + { std::cout << "Incorrect input!\n\n"; printHelpInfo(); return 1; @@ -94,9 +104,12 @@ int main(int argc, char *argv[]) std::string file_extension(""); opt = options.getParamFromKey("-e"); - if (opt) { + if (opt) + { file_extension = opt->second; - } else { + } + else + { file_extension = "mtx"; } @@ -105,17 +118,17 @@ int main(int argc, char *argv[]) std::string matrix_file_name_full; std::string rhs_file_name_full; - matrix::Csr* A = nullptr; - LinAlgWorkspaceCpu workspace; + matrix::Csr* A = nullptr; + LinAlgWorkspaceCpu workspace; ExampleHelper helper(workspace); - MatrixHandler matrix_handler(&workspace); - VectorHandler vector_handler(&workspace); + MatrixHandler matrix_handler(&workspace); + VectorHandler vector_handler(&workspace); vector_type* vec_rhs = nullptr; vector_type* vec_x = nullptr; - LinSolverDirectKLU KLU; - GramSchmidt GS(&vector_handler, GramSchmidt::CGS2); + LinSolverDirectKLU KLU; + GramSchmidt GS(&vector_handler, GramSchmidt::CGS2); LinSolverIterativeFGMRES FGMRES(&matrix_handler, &vector_handler, &GS); for (int i = 0; i < num_systems; ++i) { @@ -124,28 +137,31 @@ int main(int argc, char *argv[]) std::ostringstream matname; std::ostringstream rhsname; matname << matrix_path_name << std::setfill('0') << std::setw(2) << i << "." << file_extension; - rhsname << rhs_path_name << std::setfill('0') << std::setw(2) << i << "." << file_extension; + rhsname << rhs_path_name << std::setfill('0') << std::setw(2) << i << "." << file_extension; matrix_file_name_full = matname.str(); - rhs_file_name_full = rhsname.str(); + rhs_file_name_full = rhsname.str(); std::ifstream mat_file(matrix_file_name_full); - if(!mat_file.is_open()) + if (!mat_file.is_open()) { std::cout << "Failed to open file " << matrix_file_name_full << "\n"; return 1; } std::ifstream rhs_file(rhs_file_name_full); - if(!rhs_file.is_open()) + if (!rhs_file.is_open()) { std::cout << "Failed to open file " << rhs_file_name_full << "\n"; return 1; } bool is_expand_symmetric = true; - if (i == 0) { + if (i == 0) + { A = ReSolve::io::createCsrFromFile(mat_file, is_expand_symmetric); vec_rhs = ReSolve::io::createVectorFromFile(rhs_file); - vec_x = new vector_type(A->getNumRows()); - } else { + vec_x = new vector_type(A->getNumRows()); + } + else + { ReSolve::io::updateMatrixFromFile(mat_file, A); ReSolve::io::updateVectorFromFile(rhs_file, vec_rhs); } @@ -153,31 +169,34 @@ int main(int argc, char *argv[]) mat_file.close(); rhs_file.close(); - std::cout<<"COO to CSR completed. Expanded NNZ: "<< A->getNnz()<getNnz() << std::endl; + // Now call direct solver int status; - if (i==0) { + if (i == 0) + { vec_rhs->setDataUpdated(ReSolve::memory::HOST); KLU.setup(A); status = KLU.analyze(); - std::cout<<"KLU analysis status: "< #include +#include #include +#include "ExampleHelper.hpp" +#include +#include #include -#include #include -#include -#include +#include #include +#include +#include +#include #include -#include -#include #include -#include - -#include "ExampleHelper.hpp" using namespace ReSolve::constants; @@ -46,17 +45,18 @@ void printHelpInfo() std::cout << "\t-i\tEnables iterative refinement.\n\n"; } -int main(int argc, char *argv[]) +int main(int argc, char* argv[]) { // Use the same data types as those you specified in ReSolve build. - using index_type = ReSolve::index_type; + using index_type = ReSolve::index_type; using vector_type = ReSolve::vector::Vector; using namespace ReSolve::examples; using namespace ReSolve; CliOptions options(argc, argv); bool is_help = options.hasKey("-h"); - if (is_help) { + if (is_help) + { printHelpInfo(); return 0; } @@ -64,10 +64,13 @@ int main(int argc, char *argv[]) bool is_iterative_refinement = options.hasKey("-i"); index_type num_systems = 0; - auto opt = options.getParamFromKey("-n"); - if (opt) { + auto opt = options.getParamFromKey("-n"); + if (opt) + { num_systems = std::stoi((opt->second).c_str()); - } else { + } + else + { std::cout << "Incorrect input!\n\n"; printHelpInfo(); return 1; @@ -75,9 +78,12 @@ int main(int argc, char *argv[]) std::string matrix_path_name(""); opt = options.getParamFromKey("-m"); - if (opt) { + if (opt) + { matrix_path_name = opt->second; - } else { + } + else + { std::cout << "Incorrect input!\n\n"; printHelpInfo(); return 1; @@ -85,9 +91,12 @@ int main(int argc, char *argv[]) std::string rhs_path_name(""); opt = options.getParamFromKey("-r"); - if (opt) { + if (opt) + { rhs_path_name = opt->second; - } else { + } + else + { std::cout << "Incorrect input!\n\n"; printHelpInfo(); return 1; @@ -95,9 +104,12 @@ int main(int argc, char *argv[]) std::string file_extension(""); opt = options.getParamFromKey("-e"); - if (opt) { + if (opt) + { file_extension = opt->second; - } else { + } + else + { file_extension = "mtx"; } @@ -106,17 +118,17 @@ int main(int argc, char *argv[]) std::string matrix_file_name_full; std::string rhs_file_name_full; - matrix::Csr* A = nullptr; - LinAlgWorkspaceCpu workspace; + matrix::Csr* A = nullptr; + LinAlgWorkspaceCpu workspace; ExampleHelper helper(workspace); - MatrixHandler matrix_handler(&workspace); - VectorHandler vector_handler(&workspace); + MatrixHandler matrix_handler(&workspace); + VectorHandler vector_handler(&workspace); vector_type* vec_rhs = nullptr; vector_type* vec_x = nullptr; - LinSolverDirectKLU* KLU = new LinSolverDirectKLU; - GramSchmidt GS(&vector_handler, GramSchmidt::CGS2); + LinSolverDirectKLU* KLU = new LinSolverDirectKLU; + GramSchmidt GS(&vector_handler, GramSchmidt::CGS2); LinSolverIterativeFGMRES FGMRES(&matrix_handler, &vector_handler, &GS); for (int i = 0; i < num_systems; ++i) { @@ -125,28 +137,31 @@ int main(int argc, char *argv[]) std::ostringstream matname; std::ostringstream rhsname; matname << matrix_path_name << std::setfill('0') << std::setw(2) << i << "." << file_extension; - rhsname << rhs_path_name << std::setfill('0') << std::setw(2) << i << "." << file_extension; + rhsname << rhs_path_name << std::setfill('0') << std::setw(2) << i << "." << file_extension; matrix_file_name_full = matname.str(); - rhs_file_name_full = rhsname.str(); + rhs_file_name_full = rhsname.str(); std::ifstream mat_file(matrix_file_name_full); - if(!mat_file.is_open()) + if (!mat_file.is_open()) { std::cout << "Failed to open file " << matrix_file_name_full << "\n"; return 1; } std::ifstream rhs_file(rhs_file_name_full); - if(!rhs_file.is_open()) + if (!rhs_file.is_open()) { std::cout << "Failed to open file " << rhs_file_name_full << "\n"; return 1; } bool is_expand_symmetric = true; - if (i == 0) { + if (i == 0) + { A = ReSolve::io::createCsrFromFile(mat_file, is_expand_symmetric); vec_rhs = ReSolve::io::createVectorFromFile(rhs_file); - vec_x = new vector_type(A->getNumRows()); - } else { + vec_x = new vector_type(A->getNumRows()); + } + else + { ReSolve::io::updateMatrixFromFile(mat_file, A); ReSolve::io::updateVectorFromFile(rhs_file, vec_rhs); } @@ -155,19 +170,23 @@ int main(int argc, char *argv[]) rhs_file.close(); std::cout << "COO to CSR completed. Expanded NNZ: " << A->getNnz() << std::endl; - //Now call direct solver + // Now call direct solver int status; - if (i==0) { + if (i == 0) + { vec_rhs->setDataUpdated(ReSolve::memory::HOST); KLU->setup(A); status = KLU->analyze(); std::cout << "KLU analysis status: " << status << std::endl; } - if (i < 2){ + if (i < 2) + { status = KLU->factorize(); std::cout << "KLU factorization status: " << status << std::endl; - } else { - status = KLU->refactorize(); + } + else + { + status = KLU->refactorize(); std::cout << "KLU re-factorization status: " << status << std::endl; } status = KLU->solve(vec_rhs, vec_x); @@ -175,13 +194,15 @@ int main(int argc, char *argv[]) helper.resetSystem(A, vec_rhs, vec_x); helper.printShortSummary(); - if (is_iterative_refinement) { + if (is_iterative_refinement) + { // Setup iterative refinement FGMRES.setup(A); FGMRES.setupPreconditioner("LU", KLU); // If refactorization produced finite solution do iterative refinement - if (std::isfinite(helper.getNormRelativeResidual())) { + if (std::isfinite(helper.getNormRelativeResidual())) + { FGMRES.solve(vec_rhs, vec_x); // Print summary @@ -190,7 +211,7 @@ int main(int argc, char *argv[]) } } - //now DELETE + // now DELETE delete A; delete KLU; delete vec_rhs; diff --git a/examples/randGmres.cpp b/examples/randGmres.cpp index 4a1c46f2d..629cc2649 100644 --- a/examples/randGmres.cpp +++ b/examples/randGmres.cpp @@ -1,20 +1,19 @@ -#include -#include #include +#include +#include -#include -#include -#include -#include -#include +#include "ExampleHelper.hpp" #include -#include #include #include -#include +#include +#include +#include +#include #include - -#include "ExampleHelper.hpp" +#include +#include +#include #ifdef RESOLVE_USE_HIP #include @@ -35,9 +34,9 @@ static void printUsage() /// Prototype of the example main function template -static int runGmresExample(int argc, char *argv[]); +static int runGmresExample(int argc, char* argv[]); -int main(int argc, char *argv[]) +int main(int argc, char* argv[]) { int status = 0; @@ -62,7 +61,7 @@ int main(int argc, char *argv[]) /// Example implementation template -int runGmresExample(int argc, char *argv[]) +int runGmresExample(int argc, char* argv[]) { // Use the same data types as those you specified in ReSolve build. using namespace ReSolve; @@ -74,16 +73,20 @@ int runGmresExample(int argc, char *argv[]) ReSolve::CliOptions options(argc, argv); bool is_help = options.hasKey("-h"); - if (is_help) { + if (is_help) + { printUsage(); return 1; } std::string matrix_pathname; - auto opt = options.getParamFromKey("-m"); - if (opt) { + auto opt = options.getParamFromKey("-m"); + if (opt) + { matrix_pathname = opt->second; - } else { + } + else + { std::cout << "Incorrect input!\n"; printUsage(); return 1; @@ -91,9 +94,12 @@ int runGmresExample(int argc, char *argv[]) std::string rhs_pathname; opt = options.getParamFromKey("-r"); - if (opt) { + if (opt) + { rhs_pathname = opt->second; - } else { + } + else + { std::cout << "Incorrect input!\n"; printUsage(); return 1; @@ -110,54 +116,56 @@ int runGmresExample(int argc, char *argv[]) GramSchmidt GS(&vector_handler, GramSchmidt::CGS2); - precon_type Precond(&workspace); + precon_type Precond(&workspace); LinSolverIterativeRandFGMRES FGMRES(&matrix_handler, &vector_handler, LinSolverIterativeRandFGMRES::cs, &GS); // Set memory space where to run tests - std::string hwbackend = "CPU"; - memory::MemorySpace memspace = memory::HOST; - if (matrix_handler.getIsCudaEnabled()) { - memspace = memory::DEVICE; + std::string hwbackend = "CPU"; + memory::MemorySpace memspace = memory::HOST; + if (matrix_handler.getIsCudaEnabled()) + { + memspace = memory::DEVICE; hwbackend = "CUDA"; } - if (matrix_handler.getIsHipEnabled()) { - memspace = memory::DEVICE; + if (matrix_handler.getIsHipEnabled()) + { + memspace = memory::DEVICE; hwbackend = "HIP"; } - matrix::Csr* A = nullptr; + matrix::Csr* A = nullptr; vector_type* vec_rhs = nullptr; vector_type* vec_x = nullptr; std::ifstream mat_file(matrix_pathname); - if(!mat_file.is_open()) + if (!mat_file.is_open()) { std::cout << "Failed to open file " << matrix_pathname << "\n"; return -1; } std::ifstream rhs_file(rhs_pathname); - if(!rhs_file.is_open()) + if (!rhs_file.is_open()) { std::cout << "Failed to open file " << rhs_pathname << "\n"; return -1; } bool is_expand_symmetric = true; - A = io::createCsrFromFile(mat_file, is_expand_symmetric); - vec_rhs = io::createVectorFromFile(rhs_file); + A = io::createCsrFromFile(mat_file, is_expand_symmetric); + vec_rhs = io::createVectorFromFile(rhs_file); mat_file.close(); rhs_file.close(); vec_x = new vector_type(A->getNumRows()); vec_x->allocate(memspace); - if (memspace == memory::DEVICE) { + if (memspace == memory::DEVICE) + { A->syncData(memspace); vec_rhs->syncData(memspace); } - printSystemInfo(matrix_pathname, A); matrix_handler.setValuesChanged(true, memspace); diff --git a/examples/sysRefactor.cpp b/examples/sysRefactor.cpp index e348a2c21..1cf3d48c5 100644 --- a/examples/sysRefactor.cpp +++ b/examples/sysRefactor.cpp @@ -1,24 +1,22 @@ -#include -#include -#include #include +#include +#include #include #include +#include "ExampleHelper.hpp" +#include #include +#include #include -#include #include -#include -#include +#include #include +#include +#include +#include #include -#include #include -#include -#include - -#include "ExampleHelper.hpp" /// Prints help message describing system usage. void printHelpInfo() @@ -41,16 +39,17 @@ using namespace ReSolve::constants; /// Prototype of the example function template -static int sysRefactor(int argc, char *argv[]); +static int sysRefactor(int argc, char* argv[]); /// Main function selects example to be run. -int main(int argc, char *argv[]) +int main(int argc, char* argv[]) { ReSolve::CliOptions options(argc, argv); // If help flag is passed, print help message and return bool is_help = options.hasKey("-h"); - if (is_help) { + if (is_help) + { printHelpInfo(); return 0; } @@ -98,18 +97,19 @@ int main(int argc, char *argv[]) * @return 0 if the example ran successfully, -1 otherwise */ template -int sysRefactor(int argc, char *argv[]) +int sysRefactor(int argc, char* argv[]) { // Use the same data types as those you specified in ReSolve build. using namespace ReSolve::examples; using namespace ReSolve; - using index_type = ReSolve::index_type; + using index_type = ReSolve::index_type; using vector_type = ReSolve::vector::Vector; CliOptions options(argc, argv); bool is_help = options.hasKey("-h"); - if (is_help) { + if (is_help) + { printHelpInfo(); return 0; } @@ -117,44 +117,56 @@ int sysRefactor(int argc, char *argv[]) bool is_iterative_refinement = options.hasKey("-i"); index_type num_systems = 0; - auto opt = options.getParamFromKey("-n"); - if (opt) { + auto opt = options.getParamFromKey("-n"); + if (opt) + { num_systems = atoi((opt->second).c_str()); - } else { + } + else + { std::cout << "Incorrect input!\n"; printHelpInfo(); } std::string matrix_pathname(""); opt = options.getParamFromKey("-m"); - if (opt) { + if (opt) + { matrix_pathname = opt->second; - } else { + } + else + { std::cout << "Incorrect input!\n"; printHelpInfo(); } std::string rhs_pathname(""); opt = options.getParamFromKey("-r"); - if (opt) { + if (opt) + { rhs_pathname = opt->second; - } else { + } + else + { std::cout << "Incorrect input!\n"; printHelpInfo(); } std::string file_extension(""); opt = options.getParamFromKey("-e"); - if (opt) { + if (opt) + { file_extension = opt->second; - } else { + } + else + { file_extension = "mtx"; } - std::cout << "Family matrix file name: " << matrix_pathname + std::cout << "Family matrix file name: " << matrix_pathname << ", total number of matrices: " << num_systems << "\n" - << "Family rhs file name: " << rhs_pathname - << ", total number of RHSes: " << num_systems << "\n"; + << "Family rhs file name: " << rhs_pathname + << ", total number of RHSes: " << num_systems << "\n"; int status = 0; @@ -163,7 +175,7 @@ int sysRefactor(int argc, char *argv[]) // Create a helper object (computing errors, printing summaries, etc.) ExampleHelper helper(workspace); - std::string hw_backend = helper.getHardwareBackend(); + std::string hw_backend = helper.getHardwareBackend(); std::cout << "sysRefactor with " << hw_backend << " backend\n"; MatrixHandler matrix_handler(&workspace); @@ -176,11 +188,16 @@ int sysRefactor(int argc, char *argv[]) // Create system solver std::string refactor("none"); - if (hw_backend == "CUDA") { + if (hw_backend == "CUDA") + { refactor = "cusolverrf"; - } else if (hw_backend == "HIP") { + } + else if (hw_backend == "HIP") + { refactor = "rocsolverrf"; - } else { + } + else + { refactor = "klu"; } @@ -192,14 +209,17 @@ int sysRefactor(int argc, char *argv[]) "none"); // iterative refinement // Disable iterative refinement temporarily for CPU backend - if (hw_backend == "CPU") { + if (hw_backend == "CPU") + { is_iterative_refinement = false; } - if (is_iterative_refinement) { + if (is_iterative_refinement) + { solver.setRefinementMethod("fgmres", "cgs2"); solver.getIterativeSolver().setCliParam("restart", "100"); - if (hw_backend == "CUDA") { + if (hw_backend == "CUDA") + { solver.getIterativeSolver().setTol(1e-17); } } @@ -212,19 +232,19 @@ int sysRefactor(int argc, char *argv[]) std::ostringstream matname; std::ostringstream rhsname; matname << matrix_pathname << std::setfill('0') << std::setw(2) << i << "." << file_extension; - rhsname << rhs_pathname << std::setfill('0') << std::setw(2) << i << "." << file_extension; + rhsname << rhs_pathname << std::setfill('0') << std::setw(2) << i << "." << file_extension; std::string matrix_pathname_full = matname.str(); std::string rhs_pathname_full = rhsname.str(); // Read matrix and right-hand-side vector std::ifstream mat_file(matrix_pathname_full); - if(!mat_file.is_open()) + if (!mat_file.is_open()) { std::cout << "Failed to open file " << matrix_pathname_full << "\n"; return 1; } std::ifstream rhs_file(rhs_pathname_full); - if(!rhs_file.is_open()) + if (!rhs_file.is_open()) { std::cout << "Failed to open file " << rhs_pathname_full << "\n"; return 1; @@ -232,15 +252,19 @@ int sysRefactor(int argc, char *argv[]) // Refactorization is LU-based, so need to expand symmetric matrices bool is_expand_symmetric = true; - if (i == 0) { - A = ReSolve::io::createCsrFromFile(mat_file, is_expand_symmetric); + if (i == 0) + { + A = ReSolve::io::createCsrFromFile(mat_file, is_expand_symmetric); vec_rhs = ReSolve::io::createVectorFromFile(rhs_file); - vec_x = new vector_type(A->getNumRows()); + vec_x = new vector_type(A->getNumRows()); vec_x->allocate(memory::HOST); - if (hw_backend == "CUDA" || hw_backend == "HIP") { + if (hw_backend == "CUDA" || hw_backend == "HIP") + { vec_x->allocate(memory::DEVICE); } - } else { + } + else + { ReSolve::io::updateMatrixFromFile(mat_file, A); ReSolve::io::updateVectorFromFile(rhs_file, vec_rhs); } @@ -249,7 +273,8 @@ int sysRefactor(int argc, char *argv[]) rhs_file.close(); // Ensure matrix data is synced to the device before any GPU operations - if (hw_backend == "CUDA" || hw_backend == "HIP") { + if (hw_backend == "CUDA" || hw_backend == "HIP") + { A->syncData(memory::DEVICE); vec_rhs->syncData(memory::DEVICE); } @@ -258,10 +283,12 @@ int sysRefactor(int argc, char *argv[]) printSystemInfo(matrix_pathname_full, A); // Now call direct solver - if (i == 0) { + if (i == 0) + { // Set matrix in solver after the initial matrix is loaded status = solver.setMatrix(A); - if (status != 0) { + if (status != 0) + { std::cout << "Failed to set matrix in solver. Status: " << status << std::endl; return 1; } @@ -273,7 +300,9 @@ int sysRefactor(int argc, char *argv[]) // Numeric factorization on the host status = solver.factorize(); std::cout << "Numeric factorization on the host status: " << status << std::endl; - } else if (i == 1) { + } + else if (i == 1) + { // Numeric factorization on the host status = solver.factorize(); std::cout << "Numeric factorization on the host status: " << status << std::endl; @@ -281,8 +310,9 @@ int sysRefactor(int argc, char *argv[]) // Set up refactorization solver status = solver.refactorizationSetup(); std::cout << "Refactorization setup status: " << status << std::endl; - - } else { + } + else + { // Refactorize on the device status = solver.refactorize(); std::cout << "Refactorization on the device status: " << status << std::endl; @@ -294,7 +324,8 @@ int sysRefactor(int argc, char *argv[]) // Print summary of results helper.resetSystem(A, vec_rhs, vec_x); helper.printShortSummary(); - if ((i > 1) && is_iterative_refinement) { + if ((i > 1) && is_iterative_refinement) + { helper.printIrSummary(&(solver.getIterativeSolver())); } } diff --git a/resolve/Common.hpp b/resolve/Common.hpp index 9b467c3ad..af30adadb 100644 --- a/resolve/Common.hpp +++ b/resolve/Common.hpp @@ -2,36 +2,37 @@ #include -//TODO: temporary +// TODO: temporary #include -namespace ReSolve { +namespace ReSolve +{ /// @todo Provide CMake option to se these types at config time - using real_type = double; + using real_type = double; using index_type = std::int32_t; namespace constants { - constexpr real_type ZERO = 0.0; - constexpr real_type ONE = 1.0; - constexpr real_type TWO = 2.0; - constexpr real_type HALF = 0.5; + constexpr real_type ZERO = 0.0; + constexpr real_type ONE = 1.0; + constexpr real_type TWO = 2.0; + constexpr real_type HALF = 0.5; constexpr real_type MINUS_ONE = -1.0; constexpr real_type MACHINE_EPSILON = std::numeric_limits::epsilon(); - } + } // namespace constants namespace colors { // must be const pointer and const dest for // const string declarations to pass -Wwrite-strings - static const char * const RED = "\033[1;31m"; - static const char * const GREEN = "\033[1;32m"; - static const char * const YELLOW = "\033[33;1m"; - static const char * const BLUE = "\033[34;1m"; - static const char * const ORANGE = "\u001b[38;5;208m"; - static const char * const CLEAR = "\033[0m"; - } + static const char* const RED = "\033[1;31m"; + static const char* const GREEN = "\033[1;32m"; + static const char* const YELLOW = "\033[33;1m"; + static const char* const BLUE = "\033[34;1m"; + static const char* const ORANGE = "\u001b[38;5;208m"; + static const char* const CLEAR = "\033[0m"; + } // namespace colors } // namespace ReSolve diff --git a/resolve/Doxygen.hpp b/resolve/Doxygen.hpp index 905c05595..2a5eea857 100644 --- a/resolve/Doxygen.hpp +++ b/resolve/Doxygen.hpp @@ -1,31 +1,31 @@ /** * @file Doxygen.hpp * @author Slaven Peles (peless@ornl.gov) - * @brief - * + * @brief + * * @mainpage ReSolve Source Code Documentation - * + * * ReSolve is a library of GPU-resident linear solvers. It contains iterative * and direct linear solvers designed to run on NVIDIA and AMD GPUs, as well as * on CPU devices. This is the main page of source code documentation intended * for developers who want to contribute to ReSolve code. General documentation * is available at readthedocs. The * ReSolve project is hosted on GitHub. - * - * + * + * * @section name_sec Name - * + * * Linear solvers are typically used within an application where a series of * systems with same sparsity pattern is solved one after another, such as in * the case of dynamic simulations or optimization. An efficient linear solver * design will _re-solve_ systems with the same sparsity pattern while reusing * symbolic operations and memory allocations from the prior systems, therefore * the name ReSolve. - * + * * @section history_sec History - * + * * The development of Re::Solve sparse linear solver library started as a part - * of Stochastic Grid Dynamics at Exascale + * of Stochastic Grid Dynamics at Exascale * (ExaSGD) * subproject of the Exascale Computing Project * (ECP). The overarching @@ -48,19 +48,19 @@ * pattern without unnecessary recomputation and re-allocations, easy to * integrate with applications, and capable of running on both AMD and NVIDIA * GPUs. - * + * * @section design_sec Code Design and Organization - * + * * @subsection solvers_subsec Solvers - * + * * @subsection matvecs_subsec Matrix and Vector Classes - * + * * @subsection handlers_subsec Matrix and Vector Handlers - * + * * @subsection workspaces_subsec Workspaces - * + * * @subsection backends_subsec Hardware Backends - * + * * @subsection utils_subsec Utilities - * + * */ diff --git a/resolve/GramSchmidt.cpp b/resolve/GramSchmidt.cpp index a9268273e..725a0d1d0 100644 --- a/resolve/GramSchmidt.cpp +++ b/resolve/GramSchmidt.cpp @@ -1,10 +1,11 @@ -#include +#include "GramSchmidt.hpp" + #include #include +#include #include #include -#include "GramSchmidt.hpp" namespace ReSolve { @@ -12,24 +13,28 @@ namespace ReSolve index_type idxmap(index_type i, index_type j, index_type col_length) { - return i * (col_length) + j; + return i * (col_length) + j; } - GramSchmidt::GramSchmidt(VectorHandler* vh, GSVariant variant) + GramSchmidt::GramSchmidt(VectorHandler* vh, GSVariant variant) : variant_(variant), setup_complete_(false), vector_handler_(vh) { - if (vector_handler_->getIsCudaEnabled() || vector_handler_->getIsHipEnabled()) { + if (vector_handler_->getIsCudaEnabled() || vector_handler_->getIsHipEnabled()) + { memspace_ = memory::DEVICE; - } else { + } + else + { memspace_ = memory::HOST; } } GramSchmidt::~GramSchmidt() { - if (setup_complete_) { + if (setup_complete_) + { freeGramSchmidtData(); } } @@ -45,12 +50,14 @@ namespace ReSolve int GramSchmidt::setVariant(GSVariant variant) { // If the same variant is already set, do nothing. - if(variant == variant_) { + if (variant == variant_) + { return 0; } // If Gram-Scmidt data is not allocated, just set the variant and exit. - if (!setup_complete_) { + if (!setup_complete_) + { variant_ = variant; return 0; } @@ -85,8 +92,10 @@ namespace ReSolve int GramSchmidt::setup(index_type n, index_type restart) { - if (setup_complete_) { - if ((vec_v_->getSize() != n) || (num_vecs_ != restart)) { + if (setup_complete_) + { + if ((vec_v_->getSize() != n) || (num_vecs_ != restart)) + { freeGramSchmidtData(); } } @@ -96,8 +105,9 @@ namespace ReSolve vec_x_ = new vector_type(n, 2); // n x 2 multivector view num_vecs_ = restart; - if((variant_ == MGS_TWO_SYNC) || (variant_ == MGS_PM)) { - h_L_ = new real_type[num_vecs_ * (num_vecs_ + 1)](); + if ((variant_ == MGS_TWO_SYNC) || (variant_ == MGS_PM)) + { + h_L_ = new real_type[num_vecs_ * (num_vecs_ + 1)](); vec_rv_ = new vector_type(num_vecs_ + 1, 2); vec_rv_->allocate(memspace_); @@ -107,18 +117,21 @@ namespace ReSolve vec_Hcolumn_->allocate(memspace_); vec_Hcolumn_->setToZero(memspace_); } - if(variant_ == CGS2) { - h_aux_ = new real_type[num_vecs_ + 1](); + if (variant_ == CGS2) + { + h_aux_ = new real_type[num_vecs_ + 1](); vec_Hcolumn_ = new vector_type(num_vecs_ + 1); vec_Hcolumn_->allocate(memspace_); vec_Hcolumn_->setToZero(memspace_); } - if(variant_ == CGS1) { + if (variant_ == CGS1) + { vec_Hcolumn_ = new vector_type(num_vecs_ + 1); vec_Hcolumn_->allocate(memspace_); vec_Hcolumn_->setToZero(memspace_); } - if(variant_ == MGS_PM) { + if (variant_ == MGS_PM) + { h_aux_ = new real_type[num_vecs_ + 1](); } @@ -130,236 +143,267 @@ namespace ReSolve { using namespace constants; - double t = 0.0; - double s = 0.0; + double t = 0.0; + double s = 0.0; real_type* h_rv = nullptr; - switch (variant_) { - case MGS: - vec_w_->setData(V->getData(i + 1, memspace_), memspace_); - for(int j = 0; j <= i; ++j) { - t = 0.0; - vec_v_->setData( V->getData(j, memspace_), memspace_); - t = vector_handler_->dot(vec_v_, vec_w_, memspace_); - H[ idxmap(i, j, num_vecs_ + 1) ] = t; - t *= -1.0; - vector_handler_->axpy(&t, vec_v_, vec_w_, memspace_); - } - t = 0.0; - t = vector_handler_->dot(vec_w_, vec_w_, memspace_); - //set the last entry in Hessenberg matrix - t = std::sqrt(t); - H[ idxmap(i, i + 1, num_vecs_ + 1) ] = t; - if(std::abs(t) > MACHINE_EPSILON) { - t = 1.0/t; - vector_handler_->scal(&t, vec_w_, memspace_); - } else { - assert(0 && "Gram-Schmidt failed, vector with ZERO norm\n"); - return 1; - } - return 0; - - case CGS2: - vec_v_->setData(V->getData(i + 1, memspace_), memspace_); - vector_handler_->gemv('T', n, i + 1, &ONE, &ZERO, V, vec_v_, vec_Hcolumn_, memspace_); - // V(:,i+1) = V(:, i+1) - V(:,1:i)*Hcol - vector_handler_->gemv('N', n, i + 1, &ONE, &MINUS_ONE, V, vec_Hcolumn_, vec_v_, memspace_ ); - mem_.deviceSynchronize(); - - // copy H_col to aux, we will need it later - vec_Hcolumn_->setDataUpdated(memspace_); - vec_Hcolumn_->resize(i + 1); - vec_Hcolumn_->copyDataTo(h_aux_, 0, memory::HOST); - mem_.deviceSynchronize(); - - //Hcol = V(:,1:i)^T*V(:,i+1); - vector_handler_->gemv('T', n, i + 1, &ONE, &ZERO, V, vec_v_, vec_Hcolumn_, memspace_); - mem_.deviceSynchronize(); - - // V(:,i+1) = V(:, i+1) - V(:,1:i)*Hcol - vector_handler_->gemv('N', n, i + 1, &ONE, &MINUS_ONE, V, vec_Hcolumn_, vec_v_, memspace_ ); - mem_.deviceSynchronize(); - - // copy H_col to H - vec_Hcolumn_->setDataUpdated(memspace_); - vec_Hcolumn_->copyDataTo(&H[ idxmap(i, 0, num_vecs_ + 1)], 0, memory::HOST); - mem_.deviceSynchronize(); - - // add both pieces together (unstable otherwise, careful here!!) + switch (variant_) + { + case MGS: + vec_w_->setData(V->getData(i + 1, memspace_), memspace_); + for (int j = 0; j <= i; ++j) + { t = 0.0; - for(int j = 0; j <= i; ++j) { - H[ idxmap(i, j, num_vecs_ + 1)] += h_aux_[j]; - } + vec_v_->setData(V->getData(j, memspace_), memspace_); + t = vector_handler_->dot(vec_v_, vec_w_, memspace_); + H[idxmap(i, j, num_vecs_ + 1)] = t; + t *= -1.0; + vector_handler_->axpy(&t, vec_v_, vec_w_, memspace_); + } + t = 0.0; + t = vector_handler_->dot(vec_w_, vec_w_, memspace_); + // set the last entry in Hessenberg matrix + t = std::sqrt(t); + H[idxmap(i, i + 1, num_vecs_ + 1)] = t; + if (std::abs(t) > MACHINE_EPSILON) + { + t = 1.0 / t; + vector_handler_->scal(&t, vec_w_, memspace_); + } + else + { + assert(0 && "Gram-Schmidt failed, vector with ZERO norm\n"); + return 1; + } + return 0; - t = vector_handler_->dot(vec_v_, vec_v_, memspace_); - //set the last entry in Hessenberg matrix - t = std::sqrt(t); - H[ idxmap(i, i + 1, num_vecs_ + 1) ] = t; - - if(std::abs(t) > MACHINE_EPSILON) { - t = 1.0/t; - vector_handler_->scal(&t, vec_v_, memspace_); - } else { - assert(0 && "Gram-Schmidt failed, vector with ZERO norm\n"); - return 1; - } - return 0; - - case MGS_TWO_SYNC: - // V[1:i]^T[V[i] w] - vec_x_->setData(V->getData(i, memspace_), memspace_); - vec_w_->setData(V->getData(i + 1, memspace_), memspace_); - vec_rv_->resize(i + 1); - - vector_handler_->massDot2Vec(n, V, i + 1, vec_x_, vec_rv_, memspace_); - vec_rv_->setDataUpdated(memspace_); - if (memspace_ == memory::DEVICE) { - vec_rv_->syncData(memory::HOST); - } + case CGS2: + vec_v_->setData(V->getData(i + 1, memspace_), memspace_); + vector_handler_->gemv('T', n, i + 1, &ONE, &ZERO, V, vec_v_, vec_Hcolumn_, memspace_); + // V(:,i+1) = V(:, i+1) - V(:,1:i)*Hcol + vector_handler_->gemv('N', n, i + 1, &ONE, &MINUS_ONE, V, vec_Hcolumn_, vec_v_, memspace_); + mem_.deviceSynchronize(); + + // copy H_col to aux, we will need it later + vec_Hcolumn_->setDataUpdated(memspace_); + vec_Hcolumn_->resize(i + 1); + vec_Hcolumn_->copyDataTo(h_aux_, 0, memory::HOST); + mem_.deviceSynchronize(); + + // Hcol = V(:,1:i)^T*V(:,i+1); + vector_handler_->gemv('T', n, i + 1, &ONE, &ZERO, V, vec_v_, vec_Hcolumn_, memspace_); + mem_.deviceSynchronize(); + + // V(:,i+1) = V(:, i+1) - V(:,1:i)*Hcol + vector_handler_->gemv('N', n, i + 1, &ONE, &MINUS_ONE, V, vec_Hcolumn_, vec_v_, memspace_); + mem_.deviceSynchronize(); + + // copy H_col to H + vec_Hcolumn_->setDataUpdated(memspace_); + vec_Hcolumn_->copyDataTo(&H[idxmap(i, 0, num_vecs_ + 1)], 0, memory::HOST); + mem_.deviceSynchronize(); + + // add both pieces together (unstable otherwise, careful here!!) + t = 0.0; + for (int j = 0; j <= i; ++j) + { + H[idxmap(i, j, num_vecs_ + 1)] += h_aux_[j]; + } - vec_rv_->copyDataTo(&h_L_[idxmap(i, 0, num_vecs_ + 1)], 0, memory::HOST); - h_rv = vec_rv_->getData(1, memory::HOST); + t = vector_handler_->dot(vec_v_, vec_v_, memspace_); + // set the last entry in Hessenberg matrix + t = std::sqrt(t); + H[idxmap(i, i + 1, num_vecs_ + 1)] = t; - for(int j=0; j<=i; ++j) { - H[ idxmap(i, j, num_vecs_ + 1) ] = 0.0; - } - // triangular solve - for(int j = 0; j <= i; ++j) { - H[ idxmap(i, j, num_vecs_ + 1) ] = h_rv[j]; - s = 0.0; - for(int k = 0; k < j; ++k) { - s += h_L_[ idxmap(j, k, num_vecs_ + 1) ] * H[ idxmap(i, k, num_vecs_ + 1) ]; - } // for k - H[ idxmap(i, j, num_vecs_ + 1) ] -= s; - } // for j - vec_Hcolumn_->resize(i + 1); - vec_Hcolumn_->copyDataFrom(&H[ idxmap(i, 0, num_vecs_ + 1)], memory::HOST, memspace_); - vector_handler_->massAxpy(n, vec_Hcolumn_, i + 1, V, vec_w_, memspace_); - - // normalize (second synch) - t = vector_handler_->dot(vec_w_, vec_w_, memspace_); - //set the last entry in Hessenberg matrix - t = std::sqrt(t); - H[ idxmap(i, i + 1, num_vecs_ + 1)] = t; - if(std::abs(t) > MACHINE_EPSILON) { - t = 1.0 / t; - vector_handler_->scal(&t, vec_w_, memspace_); - for (int ii=0; ii<=i; ++ii) - { - vec_v_->setData(V->getData(ii, memspace_), memspace_); - vec_w_->setData(V->getData(i + 1, memspace_), memspace_); - } - } else { - assert(0 && "Iterative refinement failed, Krylov vector with ZERO norm\n"); - return 1; - } - h_rv = nullptr; - return 0; - - case MGS_PM: - vec_x_->setData(V->getData(i, memspace_), memspace_); - vec_w_->setData(V->getData(i + 1, memspace_), memspace_); - vec_rv_->resize(i + 1); - - vector_handler_->massDot2Vec(n, V, i + 1, vec_x_, vec_rv_, memspace_); - vec_rv_->setDataUpdated(memspace_); - if (memspace_ == memory::DEVICE) { - vec_rv_->syncData(memory::HOST); - } + if (std::abs(t) > MACHINE_EPSILON) + { + t = 1.0 / t; + vector_handler_->scal(&t, vec_v_, memspace_); + } + else + { + assert(0 && "Gram-Schmidt failed, vector with ZERO norm\n"); + return 1; + } + return 0; - vec_rv_->copyDataTo(&h_L_[idxmap(i, 0, num_vecs_ + 1)], 0, memory::HOST); - h_rv = vec_rv_->getData(1, memory::HOST); + case MGS_TWO_SYNC: + // V[1:i]^T[V[i] w] + vec_x_->setData(V->getData(i, memspace_), memspace_); + vec_w_->setData(V->getData(i + 1, memspace_), memspace_); + vec_rv_->resize(i + 1); + + vector_handler_->massDot2Vec(n, V, i + 1, vec_x_, vec_rv_, memspace_); + vec_rv_->setDataUpdated(memspace_); + if (memspace_ == memory::DEVICE) + { + vec_rv_->syncData(memory::HOST); + } - for(int j = 0; j <= i; ++j) { - H[ idxmap(i, j, num_vecs_ + 1) ] = 0.0; - } + vec_rv_->copyDataTo(&h_L_[idxmap(i, 0, num_vecs_ + 1)], 0, memory::HOST); + h_rv = vec_rv_->getData(1, memory::HOST); - //triangular solve - for(int j = 0; j <= i; ++j) { - H[ idxmap(i, j, num_vecs_ + 1) ] = h_rv[j]; - s = 0.0; - for(int k = 0; k < j; ++k) { - s += h_L_[ idxmap(j, k, num_vecs_ + 1) ] * H[ idxmap(i, k, num_vecs_ + 1) ]; - } // for k - H[ idxmap(i, j, num_vecs_ + 1) ] -= s; - } // for j - - // now compute h_rv = L^T h_H - double h; - for(int j = 0; j <= i; ++j) { - // go through COLUMN OF L - h_rv[j] = 0.0; - for(int k = j + 1; k <= i; ++k) { - h = h_L_[ idxmap(k, j, num_vecs_ + 1)]; - h_rv[j] += H[ idxmap(i, k, num_vecs_ + 1) ] * h; - } + for (int j = 0; j <= i; ++j) + { + H[idxmap(i, j, num_vecs_ + 1)] = 0.0; + } + // triangular solve + for (int j = 0; j <= i; ++j) + { + H[idxmap(i, j, num_vecs_ + 1)] = h_rv[j]; + s = 0.0; + for (int k = 0; k < j; ++k) + { + s += h_L_[idxmap(j, k, num_vecs_ + 1)] * H[idxmap(i, k, num_vecs_ + 1)]; + } // for k + H[idxmap(i, j, num_vecs_ + 1)] -= s; + } // for j + vec_Hcolumn_->resize(i + 1); + vec_Hcolumn_->copyDataFrom(&H[idxmap(i, 0, num_vecs_ + 1)], memory::HOST, memspace_); + vector_handler_->massAxpy(n, vec_Hcolumn_, i + 1, V, vec_w_, memspace_); + + // normalize (second synch) + t = vector_handler_->dot(vec_w_, vec_w_, memspace_); + // set the last entry in Hessenberg matrix + t = std::sqrt(t); + H[idxmap(i, i + 1, num_vecs_ + 1)] = t; + if (std::abs(t) > MACHINE_EPSILON) + { + t = 1.0 / t; + vector_handler_->scal(&t, vec_w_, memspace_); + for (int ii = 0; ii <= i; ++ii) + { + vec_v_->setData(V->getData(ii, memspace_), memspace_); + vec_w_->setData(V->getData(i + 1, memspace_), memspace_); } + } + else + { + assert(0 && "Iterative refinement failed, Krylov vector with ZERO norm\n"); + return 1; + } + h_rv = nullptr; + return 0; - // and do one more tri solve with L^T: h_aux = (I-L)^{-1}h_rv - for(int j = 0; j <= i; ++j) { - h_aux_[j] = h_rv[j]; - s = 0.0; - for(int k = 0; k < j; ++k) { - s += h_L_[ idxmap(j, k, num_vecs_ + 1) ] * h_aux_[k]; - } // for k - h_aux_[j] -= s; - } // for j - - // and now subtract that from h_H - for(int j=0; j<=i; ++j) { - H[ idxmap(i, j, num_vecs_ + 1) ] -= h_aux_[j]; - } + case MGS_PM: + vec_x_->setData(V->getData(i, memspace_), memspace_); + vec_w_->setData(V->getData(i + 1, memspace_), memspace_); + vec_rv_->resize(i + 1); - vec_Hcolumn_->resize(i + 1); - vec_Hcolumn_->copyDataFrom(&H[ idxmap(i, 0, num_vecs_ + 1)], memory::HOST, memspace_); - - vector_handler_->massAxpy(n, vec_Hcolumn_, i + 1, V, vec_w_, memspace_); - // normalize (second synch) - t = vector_handler_->dot(vec_w_, vec_w_, memspace_); - //set the last entry in Hessenberg matrix - t = std::sqrt(t); - H[ idxmap(i, i + 1, num_vecs_ + 1) ] = t; - if(std::abs(t) > MACHINE_EPSILON) { - t = 1.0 / t; - vector_handler_->scal(&t, vec_w_, memspace_); - } else { - assert(0 && "Iterative refinement failed, Krylov vector with ZERO norm\n"); - return 1; - } - h_rv = nullptr; - return 0; - - case CGS1: - vec_v_->setData(V->getData(i + 1, memspace_), memspace_); - //Hcol = V(:,1:i)^T*V(:,i+1); - vector_handler_->gemv('T', n, i + 1, &ONE, &ZERO, V, vec_v_, vec_Hcolumn_, memspace_); - // V(:,i+1) = V(:, i+1) - V(:,1:i)*Hcol - vector_handler_->gemv('N', n, i + 1, &ONE, &MINUS_ONE, V, vec_Hcolumn_, vec_v_, memspace_ ); - mem_.deviceSynchronize(); - - // copy H_col to H - vec_Hcolumn_->setDataUpdated(memspace_); - vec_Hcolumn_->resize(i + 1); - vec_Hcolumn_->copyDataTo(&H[ idxmap(i, 0, num_vecs_ + 1)], 0, memory::HOST); - mem_.deviceSynchronize(); - - t = vector_handler_->dot(vec_v_, vec_v_, memspace_); - //set the last entry in Hessenberg matrix - t = std::sqrt(t); - H[ idxmap(i, i + 1, num_vecs_ + 1) ] = t; - if(std::abs(t) > MACHINE_EPSILON) { - t = 1.0/t; - vector_handler_->scal(&t, vec_v_, memspace_); - } else { - assert(0 && "Gram-Schmidt failed, vector with ZERO norm\n"); - return 1; + vector_handler_->massDot2Vec(n, V, i + 1, vec_x_, vec_rv_, memspace_); + vec_rv_->setDataUpdated(memspace_); + if (memspace_ == memory::DEVICE) + { + vec_rv_->syncData(memory::HOST); + } + + vec_rv_->copyDataTo(&h_L_[idxmap(i, 0, num_vecs_ + 1)], 0, memory::HOST); + h_rv = vec_rv_->getData(1, memory::HOST); + + for (int j = 0; j <= i; ++j) + { + H[idxmap(i, j, num_vecs_ + 1)] = 0.0; + } + + // triangular solve + for (int j = 0; j <= i; ++j) + { + H[idxmap(i, j, num_vecs_ + 1)] = h_rv[j]; + s = 0.0; + for (int k = 0; k < j; ++k) + { + s += h_L_[idxmap(j, k, num_vecs_ + 1)] * H[idxmap(i, k, num_vecs_ + 1)]; + } // for k + H[idxmap(i, j, num_vecs_ + 1)] -= s; + } // for j + + // now compute h_rv = L^T h_H + double h; + for (int j = 0; j <= i; ++j) + { + // go through COLUMN OF L + h_rv[j] = 0.0; + for (int k = j + 1; k <= i; ++k) + { + h = h_L_[idxmap(k, j, num_vecs_ + 1)]; + h_rv[j] += H[idxmap(i, k, num_vecs_ + 1)] * h; } - return 0; + } + + // and do one more tri solve with L^T: h_aux = (I-L)^{-1}h_rv + for (int j = 0; j <= i; ++j) + { + h_aux_[j] = h_rv[j]; + s = 0.0; + for (int k = 0; k < j; ++k) + { + s += h_L_[idxmap(j, k, num_vecs_ + 1)] * h_aux_[k]; + } // for k + h_aux_[j] -= s; + } // for j + + // and now subtract that from h_H + for (int j = 0; j <= i; ++j) + { + H[idxmap(i, j, num_vecs_ + 1)] -= h_aux_[j]; + } - default: - assert(0 && "Iterative refinement failed, wrong orthogonalization.\n"); + vec_Hcolumn_->resize(i + 1); + vec_Hcolumn_->copyDataFrom(&H[idxmap(i, 0, num_vecs_ + 1)], memory::HOST, memspace_); + + vector_handler_->massAxpy(n, vec_Hcolumn_, i + 1, V, vec_w_, memspace_); + // normalize (second synch) + t = vector_handler_->dot(vec_w_, vec_w_, memspace_); + // set the last entry in Hessenberg matrix + t = std::sqrt(t); + H[idxmap(i, i + 1, num_vecs_ + 1)] = t; + if (std::abs(t) > MACHINE_EPSILON) + { + t = 1.0 / t; + vector_handler_->scal(&t, vec_w_, memspace_); + } + else + { + assert(0 && "Iterative refinement failed, Krylov vector with ZERO norm\n"); return 1; - } //switch + } + h_rv = nullptr; + return 0; + + case CGS1: + vec_v_->setData(V->getData(i + 1, memspace_), memspace_); + // Hcol = V(:,1:i)^T*V(:,i+1); + vector_handler_->gemv('T', n, i + 1, &ONE, &ZERO, V, vec_v_, vec_Hcolumn_, memspace_); + // V(:,i+1) = V(:, i+1) - V(:,1:i)*Hcol + vector_handler_->gemv('N', n, i + 1, &ONE, &MINUS_ONE, V, vec_Hcolumn_, vec_v_, memspace_); + mem_.deviceSynchronize(); + + // copy H_col to H + vec_Hcolumn_->setDataUpdated(memspace_); + vec_Hcolumn_->resize(i + 1); + vec_Hcolumn_->copyDataTo(&H[idxmap(i, 0, num_vecs_ + 1)], 0, memory::HOST); + mem_.deviceSynchronize(); + + t = vector_handler_->dot(vec_v_, vec_v_, memspace_); + // set the last entry in Hessenberg matrix + t = std::sqrt(t); + H[idxmap(i, i + 1, num_vecs_ + 1)] = t; + if (std::abs(t) > MACHINE_EPSILON) + { + t = 1.0 / t; + vector_handler_->scal(&t, vec_v_, memspace_); + } + else + { + assert(0 && "Gram-Schmidt failed, vector with ZERO norm\n"); + return 1; + } + return 0; + + default: + assert(0 && "Iterative refinement failed, wrong orthogonalization.\n"); + return 1; + } // switch return 0; } // int orthogonalize() @@ -370,7 +414,8 @@ namespace ReSolve int GramSchmidt::freeGramSchmidtData() { - if(variant_ == MGS_TWO_SYNC || variant_ == MGS_PM) { + if (variant_ == MGS_TWO_SYNC || variant_ == MGS_PM) + { delete[] h_L_; h_L_ = nullptr; @@ -380,19 +425,22 @@ namespace ReSolve vec_Hcolumn_ = nullptr; } - if (variant_ == CGS2) { + if (variant_ == CGS2) + { delete[] h_aux_; h_aux_ = nullptr; delete vec_Hcolumn_; vec_Hcolumn_ = nullptr; } - if (variant_ == CGS1) { + if (variant_ == CGS1) + { delete vec_Hcolumn_; vec_Hcolumn_ = nullptr; } - if (variant_ == MGS_PM) { + if (variant_ == MGS_PM) + { delete[] h_aux_; h_aux_ = nullptr; } @@ -406,5 +454,4 @@ namespace ReSolve return 0; } - } // namespace ReSolve diff --git a/resolve/GramSchmidt.hpp b/resolve/GramSchmidt.hpp index 062c3de32..1a366cca9 100644 --- a/resolve/GramSchmidt.hpp +++ b/resolve/GramSchmidt.hpp @@ -1,57 +1,60 @@ #pragma once -#include #include +#include #include "Common.hpp" -#include #include +#include -namespace ReSolve +namespace ReSolve { class GramSchmidt { - private: - using vector_type = vector::Vector; - - public: - enum GSVariant {MGS = 0, - CGS2, - MGS_TWO_SYNC, - MGS_PM, - CGS1}; - - GramSchmidt() = delete; - GramSchmidt(VectorHandler* vh, GSVariant variant); - ~GramSchmidt(); - int setVariant(GramSchmidt::GSVariant variant); - GSVariant getVariant(); - real_type* getL(); //only for low synch, returns null ptr otherwise - - int setup(index_type n, index_type restart); - int orthogonalize(index_type n, vector_type* V, real_type* H, index_type i); - bool isSetupComplete(); - - private: - int freeGramSchmidtData(); - - GSVariant variant_{MGS}; - bool setup_complete_{false}; //to avoid double allocations - - index_type num_vecs_; //the same as restart - vector_type* vec_rv_{nullptr}; - vector_type* vec_Hcolumn_{nullptr}; - - real_type* h_L_{nullptr}; - real_type* h_aux_{nullptr}; - VectorHandler* vector_handler_{nullptr}; - - vector_type* vec_v_{nullptr}; // aux variable - vector_type* vec_w_{nullptr}; // aux variable - vector_type* vec_x_{nullptr}; // aux variable - - MemoryHandler mem_; ///< Device memory manager object - memory::MemorySpace memspace_; + private: + using vector_type = vector::Vector; + + public: + enum GSVariant + { + MGS = 0, + CGS2, + MGS_TWO_SYNC, + MGS_PM, + CGS1 + }; + + GramSchmidt() = delete; + GramSchmidt(VectorHandler* vh, GSVariant variant); + ~GramSchmidt(); + int setVariant(GramSchmidt::GSVariant variant); + GSVariant getVariant(); + real_type* getL(); // only for low synch, returns null ptr otherwise + + int setup(index_type n, index_type restart); + int orthogonalize(index_type n, vector_type* V, real_type* H, index_type i); + bool isSetupComplete(); + + private: + int freeGramSchmidtData(); + + GSVariant variant_{MGS}; + bool setup_complete_{false}; // to avoid double allocations + + index_type num_vecs_; // the same as restart + vector_type* vec_rv_{nullptr}; + vector_type* vec_Hcolumn_{nullptr}; + + real_type* h_L_{nullptr}; + real_type* h_aux_{nullptr}; + VectorHandler* vector_handler_{nullptr}; + + vector_type* vec_v_{nullptr}; // aux variable + vector_type* vec_w_{nullptr}; // aux variable + vector_type* vec_x_{nullptr}; // aux variable + + MemoryHandler mem_; ///< Device memory manager object + memory::MemorySpace memspace_; }; } // namespace ReSolve diff --git a/resolve/LinSolver.cpp b/resolve/LinSolver.cpp index 75bc15b6d..5791e691b 100644 --- a/resolve/LinSolver.cpp +++ b/resolve/LinSolver.cpp @@ -3,16 +3,15 @@ * @author Kasia Swirydowicz (kasia.swirydowicz@pnnl.gov) * @author Slaven Peles (peless@ornl.gov) * @brief Implementation of linear solver base class. - * + * */ -#include -#include - #include "LinSolver.hpp" +#include +#include -namespace ReSolve +namespace ReSolve { using out = io::Logger; @@ -22,25 +21,23 @@ namespace ReSolve LinSolver::~LinSolver() { - //destroy the matrix and hadlers + // destroy the matrix and hadlers } real_type LinSolver::evaluateResidual() { - //to be implemented + // to be implemented return 1.0; } int LinSolver::getParamId(std::string id) const { auto it = params_list_.find(id); - if (it == params_list_.end()) { + if (it == params_list_.end()) + { out::error() << "Unknown parameter " << id << ".\n"; return 999; } return (*it).second; } -} - - - +} // namespace ReSolve diff --git a/resolve/LinSolver.hpp b/resolve/LinSolver.hpp index 556b1412d..efc3dcc51 100644 --- a/resolve/LinSolver.hpp +++ b/resolve/LinSolver.hpp @@ -3,7 +3,7 @@ * @author Kasia Swirydowicz (kasia.swirydowicz@pnnl.gov) * @author Slaven Peles (peless@ornl.gov) * @brief Declaration of linear solver base class. - * + * */ #pragma once @@ -12,7 +12,7 @@ #include "Common.hpp" -namespace ReSolve +namespace ReSolve { // Forward declaration of vector::Vector class namespace vector @@ -32,38 +32,37 @@ namespace ReSolve // Forward declaration of MatrixHandler class class MatrixHandler; - /** * @brief Base class for all linear solvers. - * + * */ - class LinSolver + class LinSolver { - protected: - using vector_type = vector::Vector; + protected: + using vector_type = vector::Vector; + + public: + LinSolver(); + virtual ~LinSolver(); - public: - LinSolver(); - virtual ~LinSolver(); + real_type evaluateResidual(); - real_type evaluateResidual(); + virtual int setCliParam(const std::string /* id */, const std::string /* value */) = 0; + virtual std::string getCliParamString(const std::string /* id */) const = 0; + virtual index_type getCliParamInt(const std::string /* id */) const = 0; + virtual real_type getCliParamReal(const std::string /* id */) const = 0; + virtual bool getCliParamBool(const std::string /* id */) const = 0; + virtual int printCliParam(const std::string /* id */) const = 0; - virtual int setCliParam(const std::string /* id */, const std::string /* value */) = 0; - virtual std::string getCliParamString(const std::string /* id */) const = 0; - virtual index_type getCliParamInt(const std::string /* id */) const = 0; - virtual real_type getCliParamReal(const std::string /* id */) const = 0; - virtual bool getCliParamBool(const std::string /* id */) const = 0; - virtual int printCliParam(const std::string /* id */) const = 0; - - protected: - int getParamId(std::string id) const; + protected: + int getParamId(std::string id) const; - matrix::Sparse* A_{nullptr}; + matrix::Sparse* A_{nullptr}; - MatrixHandler* matrix_handler_{nullptr}; - VectorHandler* vector_handler_{nullptr}; + MatrixHandler* matrix_handler_{nullptr}; + VectorHandler* vector_handler_{nullptr}; - std::map params_list_; + std::map params_list_; }; } // namespace ReSolve diff --git a/resolve/LinSolverDirect.cpp b/resolve/LinSolverDirect.cpp index 6eaf64ee8..b11370953 100644 --- a/resolve/LinSolverDirect.cpp +++ b/resolve/LinSolverDirect.cpp @@ -3,16 +3,17 @@ * @author Kasia Swirydowicz (kasia.swirydowicz@pnnl.gov) * @author Slaven Peles (peless@ornl.gov) * @brief Implementation of direct solver base class. - * + * */ +#include #include #include -#include -namespace ReSolve +namespace ReSolve { using out = io::Logger; + /** * @brief Constructor for LinSolverDirect class. * @@ -37,27 +38,29 @@ namespace ReSolve * @brief Setup function for LinSolverDirect class. * * @param[in] A - matrix to be solved - * @param[in] L - optional lower triangular factor + * @param[in] L - optional lower triangular factor * @param[in] U - optional upper triangular factor * @param[in] P - optional row permutation vector * @param[in] Q - optional column permutation vector * @param[in] rhs - optional right-hand side vector - * + * * @return int - error code, 0 if successful */ int LinSolverDirect::setup(matrix::Sparse* A, matrix::Sparse* /* L */, matrix::Sparse* /* U */, - index_type* /* P */, - index_type* /* Q */, - vector_type* /* rhs */) + index_type* /* P */, + index_type* /* Q */, + vector_type* /* rhs */) { - if (A == nullptr) { + if (A == nullptr) + { return 1; } A_ = A; return 0; } + /** * @brief Placeholder function for symbolic factorization. */ @@ -65,6 +68,7 @@ namespace ReSolve { return 1; } + /** * @brief Placeholder function for numeric factorization. */ @@ -72,6 +76,7 @@ namespace ReSolve { return 1; } + /** * @brief Placeholder function for refactorization. */ @@ -79,34 +84,35 @@ namespace ReSolve { return 1; } + /** * @brief Placeholder function for lower triangular factor. */ matrix::Sparse* LinSolverDirect::getLFactor() { return nullptr; - } - + } + /** * @brief Placeholder function for upper triangular factor. */ matrix::Sparse* LinSolverDirect::getUFactor() { return nullptr; - } - + } + /** * @brief Placeholder function for row permutation vector. */ - index_type* LinSolverDirect::getPOrdering() + index_type* LinSolverDirect::getPOrdering() { return nullptr; - } - + } + /** * @brief Placeholder function for column permutation vector. */ - index_type* LinSolverDirect::getQOrdering() + index_type* LinSolverDirect::getQOrdering() { return nullptr; } diff --git a/resolve/LinSolverDirect.hpp b/resolve/LinSolverDirect.hpp index 7623f074c..eefb75c2e 100644 --- a/resolve/LinSolverDirect.hpp +++ b/resolve/LinSolverDirect.hpp @@ -3,44 +3,44 @@ * @author Kasia Swirydowicz (kasia.swirydowicz@pnnl.gov) * @author Slaven Peles (peless@ornl.gov) * @brief Declaration of direct solver base class. - * + * */ #pragma once #include + #include -namespace ReSolve +namespace ReSolve { - class LinSolverDirect : public LinSolver + class LinSolverDirect : public LinSolver { - public: - LinSolverDirect(); - virtual ~LinSolverDirect(); - virtual int setup(matrix::Sparse* A = nullptr, - matrix::Sparse* L = nullptr, - matrix::Sparse* U = nullptr, - index_type* P = nullptr, - index_type* Q = nullptr, - vector_type* rhs = nullptr); + public: + LinSolverDirect(); + virtual ~LinSolverDirect(); + virtual int setup(matrix::Sparse* A = nullptr, + matrix::Sparse* L = nullptr, + matrix::Sparse* U = nullptr, + index_type* P = nullptr, + index_type* Q = nullptr, + vector_type* rhs = nullptr); - virtual int analyze(); //the same as symbolic factorization - virtual int factorize(); - virtual int refactorize(); - virtual int solve(vector_type* rhs, vector_type* x) = 0; - virtual int solve(vector_type* x) = 0; - - virtual matrix::Sparse* getLFactor(); - virtual matrix::Sparse* getUFactor(); - virtual index_type* getPOrdering(); - virtual index_type* getQOrdering(); + virtual int analyze(); // the same as symbolic factorization + virtual int factorize(); + virtual int refactorize(); + virtual int solve(vector_type* rhs, vector_type* x) = 0; + virtual int solve(vector_type* x) = 0; - protected: - matrix::Sparse* L_{nullptr}; - matrix::Sparse* U_{nullptr}; - index_type* P_{nullptr}; - index_type* Q_{nullptr}; + virtual matrix::Sparse* getLFactor(); + virtual matrix::Sparse* getUFactor(); + virtual index_type* getPOrdering(); + virtual index_type* getQOrdering(); + protected: + matrix::Sparse* L_{nullptr}; + matrix::Sparse* U_{nullptr}; + index_type* P_{nullptr}; + index_type* Q_{nullptr}; }; -} +} // namespace ReSolve diff --git a/resolve/LinSolverDirectCpuILU0.cpp b/resolve/LinSolverDirectCpuILU0.cpp index 92f6b289e..cffb2647b 100644 --- a/resolve/LinSolverDirectCpuILU0.cpp +++ b/resolve/LinSolverDirectCpuILU0.cpp @@ -2,42 +2,43 @@ * @file LinSolverDirectCpuILU0.cpp * @author Slaven Peles (peless@ornl.gov) * @brief Contains definition of a class for incomplete LU factorization on CPU - * - * + * + * */ +#include "LinSolverDirectCpuILU0.hpp" + #include -#include #include -#include #include +#include +#include -#include "LinSolverDirectCpuILU0.hpp" - -namespace ReSolve +namespace ReSolve { using out = io::Logger; LinSolverDirectCpuILU0::LinSolverDirectCpuILU0(LinAlgWorkspaceCpu* /* workspace */) - // : workspace_(workspace) + // : workspace_(workspace) { } /** * @brief Destructor - * + * * @todo Address how L and U factors are deleted (currently base class does that). */ LinSolverDirectCpuILU0::~LinSolverDirectCpuILU0() { - if (owns_factors_) { + if (owns_factors_) + { delete L_; delete U_; L_ = nullptr; U_ = nullptr; } - delete [] diagU_; - delete [] idxmap_; + delete[] diagU_; + delete[] idxmap_; } int LinSolverDirectCpuILU0::setup(matrix::Sparse* A, @@ -45,12 +46,12 @@ namespace ReSolve matrix::Sparse*, index_type*, index_type*, - vector_type* ) + vector_type*) { - int error_sum = 0; - A_ = dynamic_cast(A); - error_sum += analyze(); - error_sum += factorize(); + int error_sum = 0; + A_ = dynamic_cast(A); + error_sum += analyze(); + error_sum += factorize(); return error_sum; } @@ -64,27 +65,33 @@ namespace ReSolve A_ = dynamic_cast(A); index_type* rowsL = L_->getRowData(HOST); - real_type* valsL = L_->getValues(HOST); + real_type* valsL = L_->getValues(HOST); index_type* rowsU = U_->getRowData(HOST); index_type* colsU = U_->getColData(HOST); - real_type* valsU = U_->getValues(HOST); + real_type* valsU = U_->getValues(HOST); const index_type* colsA = A_->getColData(HOST); - const real_type* valsA = A_->getValues(HOST); + const real_type* valsA = A_->getValues(HOST); // Update values in L and U factors - const index_type N = A_->getNumRows(); - index_type acount = 0; - for (index_type i = 0; i < N; ++i) { - for (index_type j = rowsL[i]; j < rowsL[i+1]; ++j) { - valsL[j] = valsA[acount]; - ++acount; + const index_type N = A_->getNumRows(); + index_type acount = 0; + for (index_type i = 0; i < N; ++i) + { + for (index_type j = rowsL[i]; j < rowsL[i + 1]; ++j) + { + valsL[j] = valsA[acount]; + ++acount; } - for (index_type j = rowsU[i]; j < rowsU[i+1]; ++j) { - if ((colsU[j] == i) && (colsA[acount] != i)) { + for (index_type j = rowsU[i]; j < rowsU[i + 1]; ++j) + { + if ((colsU[j] == i) && (colsA[acount] != i)) + { valsU[j] = zero_diagonal_; - } else { + } + else + { valsU[j] = valsA[acount]; ++acount; } @@ -101,7 +108,7 @@ namespace ReSolve using namespace memory; int error_sum = 0; - const index_type N = A_->getNumRows(); + const index_type N = A_->getNumRows(); const index_type* rowsA = A_->getRowData(memory::HOST); const index_type* colsA = A_->getColData(memory::HOST); const real_type* valsA = A_->getValues(memory::HOST); @@ -120,23 +127,32 @@ namespace ReSolve // Find number of nonzeros and row pointers for L and U factors bool has_diagonal = false; - for (index_type i = 0; i < N; ++i) { + for (index_type i = 0; i < N; ++i) + { rowsL[i] = nnzL; rowsU[i] = nnzU; - for (index_type j = rowsA[i]; j < rowsA[i+1]; ++j) { - if (colsA[j] < i) { + for (index_type j = rowsA[i]; j < rowsA[i + 1]; ++j) + { + if (colsA[j] < i) + { nnzL++; - } else { - if (colsA[j] == i) { + } + else + { + if (colsA[j] == i) + { has_diagonal = true; - diagU_[i] = valsA[j] < zero_diagonal_ ? zero_diagonal_ : valsA[j]; + diagU_[i] = valsA[j] < zero_diagonal_ ? zero_diagonal_ : valsA[j]; } nnzU++; } } - if (has_diagonal) { + if (has_diagonal) + { has_diagonal = false; - } else { + } + else + { nnzU++; diagU_[i] = zero_diagonal_; } @@ -146,23 +162,27 @@ namespace ReSolve index_type* colsL = new index_type[nnzL]; index_type* colsU = new index_type[nnzU]; - real_type* valsL = new real_type[nnzL]; - real_type* valsU = new real_type[nnzU]; + real_type* valsL = new real_type[nnzL]; + real_type* valsU = new real_type[nnzU]; // Set data for L and U index_type lcount = 0; - index_type ucount = 0; - for (index_type i = 0; i < N; ++i) { + index_type ucount = 0; + for (index_type i = 0; i < N; ++i) + { colsU[ucount] = i; valsU[ucount] = diagU_[i]; ++ucount; - for (index_type j = rowsA[i]; j < rowsA[i+1]; ++j) { - if (colsA[j] < i) { + for (index_type j = rowsA[i]; j < rowsA[i + 1]; ++j) + { + if (colsA[j] < i) + { colsL[lcount] = colsA[j]; valsL[lcount] = valsA[j]; ++lcount; - } - if (colsA[j] > i) { + } + if (colsA[j] > i) + { colsU[ucount] = colsA[j]; valsU[ucount] = valsA[j]; ++ucount; @@ -191,42 +211,47 @@ namespace ReSolve index_type* rowsL = L_->getRowData(HOST); index_type* colsL = L_->getColData(HOST); - real_type* valsL = L_->getValues(HOST); + real_type* valsL = L_->getValues(HOST); index_type* rowsU = U_->getRowData(HOST); index_type* colsU = U_->getColData(HOST); - real_type* valsU = U_->getValues(HOST); + real_type* valsU = U_->getValues(HOST); index_type N = A_->getNumRows(); for (index_type u = 0; u < N; ++u) - idxmap_[u] = -1; + idxmap_[u] = -1; // Factorize (incompletely) - for (index_type i = 1; i < N; ++i) { - for (index_type v = rowsL[i]; v < rowsL[i+1]; ++v) { + for (index_type i = 1; i < N; ++i) + { + for (index_type v = rowsL[i]; v < rowsL[i + 1]; ++v) + { index_type k = colsL[v]; - for (index_type u = rowsU[k]; u < rowsU[k+1]; ++u) { - idxmap_[colsU[u]] = u; + for (index_type u = rowsU[k]; u < rowsU[k + 1]; ++u) + { + idxmap_[colsU[u]] = u; } valsL[v] /= valsU[rowsU[k]]; - for (index_type w = v+1; w < rowsL[i+1]; ++w) { - index_type j = idxmap_[colsL[w]]; + for (index_type w = v + 1; w < rowsL[i + 1]; ++w) + { + index_type j = idxmap_[colsL[w]]; if (j == -1) continue; - valsL[w] -= valsL[v]*valsU[j]; + valsL[w] -= valsL[v] * valsU[j]; } - for (index_type w = rowsU[i]; w < rowsU[i+1]; ++w) { - index_type j = idxmap_[colsU[w]]; + for (index_type w = rowsU[i]; w < rowsU[i + 1]; ++w) + { + index_type j = idxmap_[colsU[w]]; if (j == -1) continue; - valsU[w] -= valsL[v]*valsU[j]; + valsU[w] -= valsL[v] * valsU[j]; } for (index_type u = 0; u < N; ++u) - idxmap_[u] = -1; + idxmap_[u] = -1; } } @@ -235,7 +260,7 @@ namespace ReSolve /** * @brief Triangular solve - * + * * @param[in,out] rhs_vec - right-hand-side vector * @return int - error code */ @@ -254,8 +279,10 @@ namespace ReSolve real_type* valsL = L_->getValues(HOST); // Forward substitution - for (index_type i = 0; i < N; ++i) { - for (index_type j = rowsL[i]; j < rowsL[i+1]; ++j) { + for (index_type i = 0; i < N; ++i) + { + for (index_type j = rowsL[i]; j < rowsL[i + 1]; ++j) + { rhs[i] -= valsL[j] * rhs[colsL[j]]; } } @@ -265,8 +292,10 @@ namespace ReSolve real_type* valsU = U_->getValues(HOST); // Backward substitution - for (index_type i = N - 1; i >= 0; --i) { - for (index_type j = rowsU[i] + 1; j < rowsU[i+1]; ++j) { + for (index_type i = N - 1; i >= 0; --i) + { + for (index_type j = rowsU[i] + 1; j < rowsU[i + 1]; ++j) + { rhs[i] -= valsU[j] * rhs[colsU[j]]; } rhs[i] /= valsU[rowsU[i]]; @@ -277,8 +306,8 @@ namespace ReSolve /** * @brief Triangular solve - * - * @param[in] rhs_vec - right-hand-side vector + * + * @param[in] rhs_vec - right-hand-side vector * @param[out] x_vec - solution vector * @return int - status code */ @@ -299,9 +328,11 @@ namespace ReSolve const real_type* valsL = L_->getValues(HOST); // Forward substitution - for (index_type i = 0; i < N; ++i) { + for (index_type i = 0; i < N; ++i) + { x[i] = rhs[i]; - for (index_type j = rowsL[i]; j < rowsL[i+1]; ++j) { + for (index_type j = rowsL[i]; j < rowsL[i + 1]; ++j) + { x[i] -= valsL[j] * x[colsL[j]]; } } @@ -311,8 +342,10 @@ namespace ReSolve const real_type* valsU = U_->getValues(HOST); // Backward substitution - for (index_type i = N - 1; i >= 0; --i) { - for (index_type j = rowsU[i] + 1; j < rowsU[i+1]; ++j) { + for (index_type i = N - 1; i >= 0; --i) + { + for (index_type j = rowsU[i] + 1; j < rowsU[i + 1]; ++j) + { x[i] -= valsU[j] * x[colsU[j]]; } x[i] /= valsU[rowsU[i]]; @@ -333,11 +366,11 @@ namespace ReSolve /** * @brief Sets approximation to zero on matrix diagonal. - * + * * If the original matrix has structural zeros on the diagonal, the ILU0 * analysis will add diagonal elements and set them to `zero_diagonal_` * value. The default is 1e-6, this function allows user to change that. - * + * * @param z - small value approximating zero * @return int - returns status code */ @@ -349,11 +382,11 @@ namespace ReSolve /** * @brief Placeholder function for now. - * + * * The following switch (getParamId(Id)) cases always run the default and * are currently redundant code (like an if (true)). * In the future, they will be expanded to include more options. - * + * * @param id - string ID for parameter to set. * @return int Error code. */ @@ -361,19 +394,19 @@ namespace ReSolve { switch (getParamId(id)) { - default: - std::cout << "Setting parameter failed!\n"; + default: + std::cout << "Setting parameter failed!\n"; } return 0; } /** * @brief Placeholder function for now. - * + * * The following switch (getParamId(Id)) cases always run the default and * are currently redundant code (like an if (true)). * In the future, they will be expanded to include more options. - * + * * @param id - string ID for parameter to get. * @return std::string Value of the string parameter to return. */ @@ -381,19 +414,19 @@ namespace ReSolve { switch (getParamId(id)) { - default: - out::error() << "Trying to get unknown string parameter " << id << "\n"; + default: + out::error() << "Trying to get unknown string parameter " << id << "\n"; } return ""; } /** * @brief Placeholder function for now. - * + * * The following switch (getParamId(Id)) cases always run the default and * are currently redundant code (like an if (true)). * In the future, they will be expanded to include more options. - * + * * @param id - string ID for parameter to get. * @return int Value of the int parameter to return. */ @@ -401,19 +434,19 @@ namespace ReSolve { switch (getParamId(id)) { - default: - out::error() << "Trying to get unknown integer parameter " << id << "\n"; + default: + out::error() << "Trying to get unknown integer parameter " << id << "\n"; } return -1; } /** * @brief Placeholder function for now. - * + * * The following switch (getParamId(Id)) cases always run the default and * are currently redundant code (like an if (true)). * In the future, they will be expanded to include more options. - * + * * @param id - string ID for parameter to get. * @return real_type Value of the real_type parameter to return. */ @@ -421,19 +454,19 @@ namespace ReSolve { switch (getParamId(id)) { - default: - out::error() << "Trying to get unknown real parameter " << id << "\n"; + default: + out::error() << "Trying to get unknown real parameter " << id << "\n"; } return std::numeric_limits::quiet_NaN(); } /** * @brief Placeholder function for now. - * + * * The following switch (getParamId(Id)) cases always run the default and * are currently redundant code (like an if (true)). * In the future, they will be expanded to include more options. - * + * * @param id - string ID for parameter to get. * @return bool Value of the bool parameter to return. */ @@ -441,8 +474,8 @@ namespace ReSolve { switch (getParamId(id)) { - default: - out::error() << "Trying to get unknown boolean parameter " << id << "\n"; + default: + out::error() << "Trying to get unknown boolean parameter " << id << "\n"; } return false; } @@ -458,4 +491,4 @@ namespace ReSolve return 0; } -} // namespace resolve +} // namespace ReSolve diff --git a/resolve/LinSolverDirectCpuILU0.hpp b/resolve/LinSolverDirectCpuILU0.hpp index dbde29e20..fc4e75450 100644 --- a/resolve/LinSolverDirectCpuILU0.hpp +++ b/resolve/LinSolverDirectCpuILU0.hpp @@ -3,7 +3,7 @@ * @author Slaven Peles (peless@ornl.gov) * @brief Contains declaration of a class for incomplete LU factorization on CPU * - * + * */ #pragma once @@ -11,7 +11,7 @@ #include #include -namespace ReSolve +namespace ReSolve { // Forward declaration of vector::Vector class namespace vector @@ -30,59 +30,59 @@ namespace ReSolve /** * @brief Incomplete LU factorization solver. - * + * * Implements ILU0 factorization from Algorithm 1 in 2023 paper by Suzuki, * Fukaya, and Iwashita with modification where zero diagonal elements in * the matrix are replaced by small values specified in `zero_diagonal_`. * Factors L and U are stored in separate CSR matrices. Factor L does not * store ones at the diagonal. - * + * * Methods in this class perform all operations on raw matrix data. - * + * */ - class LinSolverDirectCpuILU0 : public LinSolverDirect + class LinSolverDirectCpuILU0 : public LinSolverDirect { using vector_type = vector::Vector; - - public: - LinSolverDirectCpuILU0(LinAlgWorkspaceCpu* workspace = nullptr); - ~LinSolverDirectCpuILU0(); - - int setup(matrix::Sparse* A, - matrix::Sparse* L = nullptr, - matrix::Sparse* U = nullptr, - index_type* P = nullptr, - index_type* Q = nullptr, - vector_type* rhs = nullptr) override; - // if values of A change, but the nnz pattern does not, redo the analysis only (reuse buffers though) - int reset(matrix::Sparse* A); - int analyze() override; - int factorize() override; - - int solve(vector_type* rhs, vector_type* x) override; - int solve(vector_type* rhs) override; // the solution is returned IN RHS (rhs is overwritten) - matrix::Sparse* getLFactor() override; - matrix::Sparse* getUFactor() override; + public: + LinSolverDirectCpuILU0(LinAlgWorkspaceCpu* workspace = nullptr); + ~LinSolverDirectCpuILU0(); + + int setup(matrix::Sparse* A, + matrix::Sparse* L = nullptr, + matrix::Sparse* U = nullptr, + index_type* P = nullptr, + index_type* Q = nullptr, + vector_type* rhs = nullptr) override; + // if values of A change, but the nnz pattern does not, redo the analysis only (reuse buffers though) + int reset(matrix::Sparse* A); + int analyze() override; + int factorize() override; + + int solve(vector_type* rhs, vector_type* x) override; + int solve(vector_type* rhs) override; // the solution is returned IN RHS (rhs is overwritten) + + matrix::Sparse* getLFactor() override; + matrix::Sparse* getUFactor() override; - int setZeroDiagonal(real_type z); + int setZeroDiagonal(real_type z); - int setCliParam(const std::string id, const std::string value) override; - std::string getCliParamString(const std::string id) const override; - index_type getCliParamInt(const std::string id) const override; - real_type getCliParamReal(const std::string id) const override; - bool getCliParamBool(const std::string id) const override; - int printCliParam(const std::string id) const override; + int setCliParam(const std::string id, const std::string value) override; + std::string getCliParamString(const std::string id) const override; + index_type getCliParamInt(const std::string id) const override; + real_type getCliParamReal(const std::string id) const override; + bool getCliParamBool(const std::string id) const override; + int printCliParam(const std::string id) const override; - private: - // MemoryHandler mem_; ///< Device memory manager object - // LinAlgWorkspaceCpu* workspace_{nullptr}; + private: + // MemoryHandler mem_; ///< Device memory manager object + // LinAlgWorkspaceCpu* workspace_{nullptr}; - matrix::Csr* A_{nullptr}; ///< Pointer to the system matrix - real_type* diagU_{nullptr}; ///< Buffer holding diagonal of factor U - index_type* idxmap_{nullptr}; ///< Mapping for matrix column indices - bool owns_factors_{false}; ///< If the class owns L and U factors + matrix::Csr* A_{nullptr}; ///< Pointer to the system matrix + real_type* diagU_{nullptr}; ///< Buffer holding diagonal of factor U + index_type* idxmap_{nullptr}; ///< Mapping for matrix column indices + bool owns_factors_{false}; ///< If the class owns L and U factors - real_type zero_diagonal_{1e-6}; ///< Approximation for zero diagonal + real_type zero_diagonal_{1e-6}; ///< Approximation for zero diagonal }; } // namespace ReSolve diff --git a/resolve/LinSolverDirectCuSolverGLU.cpp b/resolve/LinSolverDirectCuSolverGLU.cpp index f57de9ff9..c05b2295a 100644 --- a/resolve/LinSolverDirectCuSolverGLU.cpp +++ b/resolve/LinSolverDirectCuSolverGLU.cpp @@ -1,17 +1,18 @@ +#include "LinSolverDirectCuSolverGLU.hpp" + #include // includes memcpy #include -#include +#include #include -#include #include -#include "LinSolverDirectCuSolverGLU.hpp" -#include +#include +#include namespace ReSolve { using vector_type = vector::Vector; - using out = io::Logger; + using out = io::Logger; LinSolverDirectCuSolverGLU::LinSolverDirectCuSolverGLU(LinAlgWorkspaceCUDA* workspace) { @@ -30,23 +31,23 @@ namespace ReSolve int LinSolverDirectCuSolverGLU::setup(matrix::Sparse* A, matrix::Sparse* L, matrix::Sparse* U, - index_type* P, - index_type* Q, + index_type* P, + index_type* Q, vector_type* /** rhs */) { RESOLVE_RANGE_PUSH(__FUNCTION__); int error_sum = 0; LinAlgWorkspaceCUDA* workspaceCUDA = workspace_; - //get the handle - handle_cusolversp_ = workspaceCUDA->getCusolverSpHandle(); - A_ = (matrix::Csr*) A; - index_type n = A_->getNumRows(); - index_type nnz = A_->getNnz(); - //create combined factor - combineFactors(L,U); + // get the handle + handle_cusolversp_ = workspaceCUDA->getCusolverSpHandle(); + A_ = (matrix::Csr*) A; + index_type n = A_->getNumRows(); + index_type nnz = A_->getNnz(); + // create combined factor + combineFactors(L, U); - //set up descriptors + // set up descriptors cusparseCreateMatDescr(&descr_M_); cusparseSetMatType(descr_M_, CUSPARSE_MATRIX_TYPE_GENERAL); cusparseSetMatIndexBase(descr_M_, CUSPARSE_INDEX_BASE_ZERO); @@ -56,46 +57,46 @@ namespace ReSolve cusparseSetMatType(descr_A_, CUSPARSE_MATRIX_TYPE_GENERAL); cusparseSetMatIndexBase(descr_A_, CUSPARSE_INDEX_BASE_ZERO); - //set up the GLU - status_cusolver_ = cusolverSpDgluSetup(handle_cusolversp_, + // set up the GLU + status_cusolver_ = cusolverSpDgluSetup(handle_cusolversp_, n, - nnz, - descr_A_, + nnz, + descr_A_, A_->getRowData(memory::HOST), A_->getColData(memory::HOST), - P, /** base-0 */ - Q, /** base-0 */ - M_->getNnz(), /** nnzM */ - descr_M_, - M_->getRowData(memory::HOST), - M_->getColData(memory::HOST), + P, /** base-0 */ + Q, /** base-0 */ + M_->getNnz(), /** nnzM */ + descr_M_, + M_->getRowData(memory::HOST), + M_->getColData(memory::HOST), info_M_); - error_sum += status_cusolver_; - //NOW the buffer + error_sum += status_cusolver_; + // NOW the buffer size_t buffer_size; - status_cusolver_ = cusolverSpDgluBufferSize(handle_cusolversp_, info_M_, &buffer_size); - error_sum += status_cusolver_; + status_cusolver_ = cusolverSpDgluBufferSize(handle_cusolversp_, info_M_, &buffer_size); + error_sum += status_cusolver_; mem_.allocateBufferOnDevice(&glu_buffer_, buffer_size); - status_cusolver_ = cusolverSpDgluAnalysis(handle_cusolversp_, info_M_, glu_buffer_); - error_sum += status_cusolver_; + status_cusolver_ = cusolverSpDgluAnalysis(handle_cusolversp_, info_M_, glu_buffer_); + error_sum += status_cusolver_; // reset and refactor so factors are ON THE GPU - status_cusolver_ = cusolverSpDgluReset(handle_cusolversp_, + status_cusolver_ = cusolverSpDgluReset(handle_cusolversp_, n, /** A is original matrix */ - nnz, - descr_A_, - A_->getValues( memory::DEVICE), + nnz, + descr_A_, + A_->getValues(memory::DEVICE), A_->getRowData(memory::DEVICE), A_->getColData(memory::DEVICE), info_M_); - error_sum += status_cusolver_; + error_sum += status_cusolver_; - status_cusolver_ = cusolverSpDgluFactor(handle_cusolversp_, info_M_, glu_buffer_); - error_sum += status_cusolver_; + status_cusolver_ = cusolverSpDgluFactor(handle_cusolversp_, info_M_, glu_buffer_); + error_sum += status_cusolver_; RESOLVE_RANGE_POP(__FUNCTION__); return error_sum; @@ -104,52 +105,61 @@ namespace ReSolve void LinSolverDirectCuSolverGLU::combineFactors(matrix::Sparse* L, matrix::Sparse* U) { // L and U need to be in CSC format - index_type n = L->getNumRows(); - index_type* Lp = L->getColData(memory::HOST); - index_type* Li = L->getRowData(memory::HOST); - index_type* Up = U->getColData(memory::HOST); - index_type* Ui = U->getRowData(memory::HOST); - index_type nnzM = ( L->getNnz() + U->getNnz() - n ); - M_ = new matrix::Csr(n, n, nnzM); + index_type n = L->getNumRows(); + index_type* Lp = L->getColData(memory::HOST); + index_type* Li = L->getRowData(memory::HOST); + index_type* Up = U->getColData(memory::HOST); + index_type* Ui = U->getRowData(memory::HOST); + index_type nnzM = (L->getNnz() + U->getNnz() - n); + M_ = new matrix::Csr(n, n, nnzM); M_->allocateMatrixData(memory::HOST); index_type* mia = M_->getRowData(memory::HOST); index_type* mja = M_->getColData(memory::HOST); - index_type row; - for(index_type i = 0; i < n; ++i) { + index_type row; + for (index_type i = 0; i < n; ++i) + { // go through EACH COLUMN OF L first - for(index_type j = Lp[i]; j < Lp[i + 1]; ++j) { + for (index_type j = Lp[i]; j < Lp[i + 1]; ++j) + { row = Li[j]; // BUT dont count diagonal twice, important - if(row != i) { + if (row != i) + { mia[row + 1]++; } } // then each column of U - for(index_type j = Up[i]; j < Up[i + 1]; ++j) { + for (index_type j = Up[i]; j < Up[i + 1]; ++j) + { row = Ui[j]; mia[row + 1]++; } } // then organize mia_; mia[0] = 0; - for(index_type i = 1; i < n + 1; i++) { + for (index_type i = 1; i < n + 1; i++) + { mia[i] += mia[i - 1]; } std::vector Mshifts(n, 0); - for(index_type i = 0; i < n; ++i) { + for (index_type i = 0; i < n; ++i) + { // go through EACH COLUMN OF L first - for(int j = Lp[i]; j < Lp[i + 1]; ++j) { + for (int j = Lp[i]; j < Lp[i + 1]; ++j) + { row = Li[j]; - if(row != i) { + if (row != i) + { // place (row, i) where it belongs! mja[mia[row] + Mshifts[row]] = i; Mshifts[row]++; } } // each column of U next - for(index_type j = Up[i]; j < Up[i + 1]; ++j) { - row = Ui[j]; + for (index_type j = Up[i]; j < Up[i + 1]; ++j) + { + row = Ui[j]; mja[mia[row] + Mshifts[row]] = i; Mshifts[row]++; } @@ -159,20 +169,20 @@ namespace ReSolve int LinSolverDirectCuSolverGLU::refactorize() { RESOLVE_RANGE_PUSH(__FUNCTION__); - int error_sum = 0; - status_cusolver_ = cusolverSpDgluReset(handle_cusolversp_, - A_->getNumRows(), - /** A is original matrix */ - A_->getNnz(), - descr_A_, - A_->getValues( memory::DEVICE), - A_->getRowData(memory::DEVICE), - A_->getColData(memory::DEVICE), - info_M_); - error_sum += status_cusolver_; + int error_sum = 0; + status_cusolver_ = cusolverSpDgluReset(handle_cusolversp_, + A_->getNumRows(), + /** A is original matrix */ + A_->getNnz(), + descr_A_, + A_->getValues(memory::DEVICE), + A_->getRowData(memory::DEVICE), + A_->getColData(memory::DEVICE), + info_M_); + error_sum += status_cusolver_; - status_cusolver_ = cusolverSpDgluFactor(handle_cusolversp_, info_M_, glu_buffer_); - error_sum += status_cusolver_; + status_cusolver_ = cusolverSpDgluFactor(handle_cusolversp_, info_M_, glu_buffer_); + error_sum += status_cusolver_; RESOLVE_RANGE_POP(__FUNCTION__); return error_sum; @@ -181,26 +191,26 @@ namespace ReSolve int LinSolverDirectCuSolverGLU::solve(vector_type* rhs, vector_type* x) { RESOLVE_RANGE_PUSH(__FUNCTION__); - status_cusolver_ = cusolverSpDgluSolve(handle_cusolversp_, - A_->getNumRows(), - /** A is original matrix */ - A_->getNnz(), - descr_A_, - A_->getValues( memory::DEVICE), - A_->getRowData(memory::DEVICE), - A_->getColData(memory::DEVICE), - rhs->getData(memory::DEVICE),/** right hand side */ - x->getData(memory::DEVICE), /** left hand side */ - &ite_refine_succ_, - &r_nrm_inf_, - info_M_, - glu_buffer_); + status_cusolver_ = cusolverSpDgluSolve(handle_cusolversp_, + A_->getNumRows(), + /** A is original matrix */ + A_->getNnz(), + descr_A_, + A_->getValues(memory::DEVICE), + A_->getRowData(memory::DEVICE), + A_->getColData(memory::DEVICE), + rhs->getData(memory::DEVICE), /** right hand side */ + x->getData(memory::DEVICE), /** left hand side */ + &ite_refine_succ_, + &r_nrm_inf_, + info_M_, + glu_buffer_); x->setDataUpdated(memory::DEVICE); RESOLVE_RANGE_POP(__FUNCTION__); - return status_cusolver_; + return status_cusolver_; } - int LinSolverDirectCuSolverGLU::solve(vector_type* ) + int LinSolverDirectCuSolverGLU::solve(vector_type*) { out::error() << "Function solve(Vector* x) not implemented in CuSolverGLU!\n" << "Consider using solve(Vector* rhs, Vector* x) instead.\n"; @@ -209,11 +219,11 @@ namespace ReSolve /*** * @brief Placeholder function for now. - * + * * The following switch (getParamId(Id)) cases always run the default and * are currently redundant code (like an if (true)). * In the future, they will be expanded to include more options. - * + * * @param id - string ID for parameter to set. * @return int Error code. */ @@ -221,19 +231,19 @@ namespace ReSolve { switch (getParamId(id)) { - default: - std::cout << "Setting parameter failed!\n"; + default: + std::cout << "Setting parameter failed!\n"; } return 0; } /*** * @brief Placeholder function for now. - * + * * The following switch (getParamId(Id)) cases always run the default and * are currently redundant code (like an if (true)). * In the future, they will be expanded to include more options. - * + * * @param id - string ID for parameter to get. * @return std::string Value of the string parameter to return. */ @@ -241,19 +251,19 @@ namespace ReSolve { switch (getParamId(id)) { - default: - out::error() << "Trying to get unknown string parameter " << id << "\n"; + default: + out::error() << "Trying to get unknown string parameter " << id << "\n"; } return ""; } /*** * @brief Placeholder function for now. - * + * * The following switch (getParamId(Id)) cases always run the default and * are currently redundant code (like an if (true)). * In the future, they will be expanded to include more options. - * + * * @param id - string ID for parameter to get. * @return int Value of the int parameter to return. */ @@ -261,19 +271,19 @@ namespace ReSolve { switch (getParamId(id)) { - default: - out::error() << "Trying to get unknown integer parameter " << id << "\n"; + default: + out::error() << "Trying to get unknown integer parameter " << id << "\n"; } return -1; } /*** * @brief Placeholder function for now. - * + * * The following switch (getParamId(Id)) cases always run the default and * are currently redundant code (like an if (true)). * In the future, they will be expanded to include more options. - * + * * @param id - string ID for parameter to get. * @return real_type Value of the real_type parameter to return. */ @@ -281,19 +291,19 @@ namespace ReSolve { switch (getParamId(id)) { - default: - out::error() << "Trying to get unknown real parameter " << id << "\n"; + default: + out::error() << "Trying to get unknown real parameter " << id << "\n"; } return std::numeric_limits::quiet_NaN(); } /*** * @brief Placeholder function for now. - * + * * The following switch (getParamId(Id)) cases always run the default and * are currently redundant code (like an if (true)). * In the future, they will be expanded to include more options. - * + * * @param id - string ID for parameter to get. * @return bool Value of the bool parameter to return. */ @@ -301,8 +311,8 @@ namespace ReSolve { switch (getParamId(id)) { - default: - out::error() << "Trying to get unknown boolean parameter " << id << "\n"; + default: + out::error() << "Trying to get unknown boolean parameter " << id << "\n"; } return false; } @@ -318,4 +328,4 @@ namespace ReSolve return 0; } -} +} // namespace ReSolve diff --git a/resolve/LinSolverDirectCuSolverGLU.hpp b/resolve/LinSolverDirectCuSolverGLU.hpp index 3ac22194e..485261d5a 100644 --- a/resolve/LinSolverDirectCuSolverGLU.hpp +++ b/resolve/LinSolverDirectCuSolverGLU.hpp @@ -1,12 +1,11 @@ #pragma once -#include "cusolver_defs.hpp" - #include "Common.hpp" -#include +#include "cusolver_defs.hpp" #include +#include -namespace ReSolve +namespace ReSolve { // Forward declaration of vector::Vector class namespace vector @@ -23,47 +22,47 @@ namespace ReSolve // Forward declaration of ReSolve handlers workspace class LinAlgWorkspaceCUDA; - class LinSolverDirectCuSolverGLU : public LinSolverDirect + class LinSolverDirectCuSolverGLU : public LinSolverDirect { using vector_type = vector::Vector; - - public: - LinSolverDirectCuSolverGLU(LinAlgWorkspaceCUDA* workspace); - ~LinSolverDirectCuSolverGLU(); - int refactorize() override; - int solve(vector_type* rhs, vector_type* x) override; - int solve(vector_type* x) override; + public: + LinSolverDirectCuSolverGLU(LinAlgWorkspaceCUDA* workspace); + ~LinSolverDirectCuSolverGLU(); + + int refactorize() override; + int solve(vector_type* rhs, vector_type* x) override; + int solve(vector_type* x) override; + + int setup(matrix::Sparse* A, + matrix::Sparse* L, + matrix::Sparse* U, + index_type* P, + index_type* Q, + vector_type* rhs = nullptr) override; - int setup(matrix::Sparse* A, - matrix::Sparse* L, - matrix::Sparse* U, - index_type* P, - index_type* Q, - vector_type* rhs = nullptr) override; - - int setCliParam(const std::string id, const std::string value) override; - std::string getCliParamString(const std::string id) const override; - index_type getCliParamInt(const std::string id) const override; - real_type getCliParamReal(const std::string id) const override; - bool getCliParamBool(const std::string id) const override; - int printCliParam(const std::string id) const override; + int setCliParam(const std::string id, const std::string value) override; + std::string getCliParamString(const std::string id) const override; + index_type getCliParamInt(const std::string id) const override; + real_type getCliParamReal(const std::string id) const override; + bool getCliParamBool(const std::string id) const override; + int printCliParam(const std::string id) const override; - private: - void combineFactors(matrix::Sparse* L, matrix::Sparse* U); ///< creates L+U from separate L, U factors - matrix::Sparse* M_; ///< the matrix that contains added factors - //note: we need cuSolver handle, we can copy it from the workspace to avoid double allocation - cusparseMatDescr_t descr_M_; //this is NOT sparse matrix descriptor - cusparseMatDescr_t descr_A_; //this is NOT sparse matrix descriptor - LinAlgWorkspaceCUDA* workspace_; ///< Workspace access so we can copy cusparse handle - cusolverSpHandle_t handle_cusolversp_; - cusolverStatus_t status_cusolver_; - cusparseStatus_t status_cusparse_; - csrgluInfo_t info_M_; - void* glu_buffer_; - double r_nrm_inf_; ///< Error norm of the solution - int ite_refine_succ_; ///< Stores return value of cusolverSpDgluSolve + private: + void combineFactors(matrix::Sparse* L, matrix::Sparse* U); ///< creates L+U from separate L, U factors + matrix::Sparse* M_; ///< the matrix that contains added factors + // note: we need cuSolver handle, we can copy it from the workspace to avoid double allocation + cusparseMatDescr_t descr_M_; // this is NOT sparse matrix descriptor + cusparseMatDescr_t descr_A_; // this is NOT sparse matrix descriptor + LinAlgWorkspaceCUDA* workspace_; ///< Workspace access so we can copy cusparse handle + cusolverSpHandle_t handle_cusolversp_; + cusolverStatus_t status_cusolver_; + cusparseStatus_t status_cusparse_; + csrgluInfo_t info_M_; + void* glu_buffer_; + double r_nrm_inf_; ///< Error norm of the solution + int ite_refine_succ_; ///< Stores return value of cusolverSpDgluSolve - MemoryHandler mem_; ///< Device memory manager object + MemoryHandler mem_; ///< Device memory manager object }; -} +} // namespace ReSolve diff --git a/resolve/LinSolverDirectCuSolverRf.cpp b/resolve/LinSolverDirectCuSolverRf.cpp index 447be4c52..7d8a82466 100644 --- a/resolve/LinSolverDirectCuSolverRf.cpp +++ b/resolve/LinSolverDirectCuSolverRf.cpp @@ -1,13 +1,15 @@ +#include "LinSolverDirectCuSolverRf.hpp" + #include -#include -#include #include -#include "LinSolverDirectCuSolverRf.hpp" +#include +#include namespace ReSolve { using out = io::Logger; + /** * @brief Placeholder constructor for LinSolverDirectCuSolverRf * @@ -60,23 +62,22 @@ namespace ReSolve int LinSolverDirectCuSolverRf::setup(matrix::Sparse* A, matrix::Sparse* L, matrix::Sparse* U, - index_type* P, - index_type* Q, + index_type* P, + index_type* Q, vector_type* /* rhs */) { - assert(A->getSparseFormat() == matrix::Sparse::COMPRESSED_SPARSE_ROW && - "Matrix A has to be in CSR format for cusolverRf input.\n"); - assert(L->getSparseFormat() == U->getSparseFormat() && - "Matrices L and U have to be in the same format for cusolverRf input.\n"); + assert(A->getSparseFormat() == matrix::Sparse::COMPRESSED_SPARSE_ROW && "Matrix A has to be in CSR format for cusolverRf input.\n"); + assert(L->getSparseFormat() == U->getSparseFormat() && "Matrices L and U have to be in the same format for cusolverRf input.\n"); int error_sum = 0; - this->A_ = A; - index_type n = A_->getNumRows(); + this->A_ = A; + index_type n = A_->getNumRows(); - //remember - P and Q are generally CPU variables - // factorization data is stored in the handle. - // If function is called again, destroy the old handle to get rid of old data. - if (setup_completed_) { + // remember - P and Q are generally CPU variables + // factorization data is stored in the handle. + // If function is called again, destroy the old handle to get rid of old data. + if (setup_completed_) + { cusolverRfDestroy(handle_cusolverrf_); cusolverRfCreate(&handle_cusolverrf_); } @@ -86,37 +87,41 @@ namespace ReSolve matrix::Csr* L_csr = nullptr; matrix::Csr* U_csr = nullptr; - switch (L->getSparseFormat()) { - case matrix::Sparse::COMPRESSED_SPARSE_COLUMN: - // std::cout << "converting L and U factors from CSC to CSR format ...\n"; - L_csc = static_cast(L); - U_csc = static_cast(U); - L_csr = new matrix::Csr(L_csc->getNumRows(), L_csc->getNumColumns(), L_csc->getNnz()); - U_csr = new matrix::Csr(U_csc->getNumRows(), U_csc->getNumColumns(), U_csc->getNnz()); - csc2csr(L_csc, L_csr); - csc2csr(U_csc, U_csr); - L_csr->syncData(memory::DEVICE); - U_csr->syncData(memory::DEVICE); - break; - case matrix::Sparse::COMPRESSED_SPARSE_ROW: - L_csr = dynamic_cast(L); - U_csr = dynamic_cast(U); - break; - default: - out::error() << "Matrix type for L and U factors not recognized!\n"; - out::error() << "Refactorization not completed.\n"; - return 1; + switch (L->getSparseFormat()) + { + case matrix::Sparse::COMPRESSED_SPARSE_COLUMN: + // std::cout << "converting L and U factors from CSC to CSR format ...\n"; + L_csc = static_cast(L); + U_csc = static_cast(U); + L_csr = new matrix::Csr(L_csc->getNumRows(), L_csc->getNumColumns(), L_csc->getNnz()); + U_csr = new matrix::Csr(U_csc->getNumRows(), U_csc->getNumColumns(), U_csc->getNnz()); + csc2csr(L_csc, L_csr); + csc2csr(U_csc, U_csr); + L_csr->syncData(memory::DEVICE); + U_csr->syncData(memory::DEVICE); + break; + case matrix::Sparse::COMPRESSED_SPARSE_ROW: + L_csr = dynamic_cast(L); + U_csr = dynamic_cast(U); + break; + default: + out::error() << "Matrix type for L and U factors not recognized!\n"; + out::error() << "Refactorization not completed.\n"; + return 1; } - if (d_P_ == nullptr){ + if (d_P_ == nullptr) + { mem_.allocateArrayOnDevice(&d_P_, n); } - if (d_Q_ == nullptr){ + if (d_Q_ == nullptr) + { mem_.allocateArrayOnDevice(&d_Q_, n); } - if (d_T_ != nullptr){ + if (d_T_ != nullptr) + { mem_.deleteOnDevice(d_T_); } @@ -125,57 +130,57 @@ namespace ReSolve mem_.copyArrayHostToDevice(d_P_, P, n); mem_.copyArrayHostToDevice(d_Q_, Q, n); - - status_cusolverrf_ = cusolverRfSetResetValuesFastMode(handle_cusolverrf_, CUSOLVERRF_RESET_VALUES_FAST_MODE_ON); - error_sum += status_cusolverrf_; - status_cusolverrf_ = cusolverRfSetupDevice(n, + status_cusolverrf_ = cusolverRfSetResetValuesFastMode(handle_cusolverrf_, CUSOLVERRF_RESET_VALUES_FAST_MODE_ON); + error_sum += status_cusolverrf_; + status_cusolverrf_ = cusolverRfSetupDevice(n, A_->getNnz(), A_->getRowData(memory::DEVICE), A_->getColData(memory::DEVICE), - A_->getValues( memory::DEVICE), + A_->getValues(memory::DEVICE), L_csr->getNnz(), L_csr->getRowData(memory::DEVICE), L_csr->getColData(memory::DEVICE), - L_csr->getValues( memory::DEVICE), + L_csr->getValues(memory::DEVICE), U_csr->getNnz(), U_csr->getRowData(memory::DEVICE), U_csr->getColData(memory::DEVICE), - U_csr->getValues( memory::DEVICE), + U_csr->getValues(memory::DEVICE), d_P_, d_Q_, handle_cusolverrf_); - error_sum += status_cusolverrf_; + error_sum += status_cusolverrf_; mem_.deviceSynchronize(); - status_cusolverrf_ = cusolverRfAnalyze(handle_cusolverrf_); - error_sum += status_cusolverrf_; + status_cusolverrf_ = cusolverRfAnalyze(handle_cusolverrf_); + error_sum += status_cusolverrf_; const cusolverRfFactorization_t fact_alg = - CUSOLVERRF_FACTORIZATION_ALG0; // 0 - default, 1 or 2 + CUSOLVERRF_FACTORIZATION_ALG0; // 0 - default, 1 or 2 const cusolverRfTriangularSolve_t solve_alg = - CUSOLVERRF_TRIANGULAR_SOLVE_ALG1; // 1- default, 2 or 3 // 1 causes error + CUSOLVERRF_TRIANGULAR_SOLVE_ALG1; // 1- default, 2 or 3 // 1 causes error this->setAlgorithms(fact_alg, solve_alg); setup_completed_ = true; // Remove temporary objects upon setup completion - switch (L->getSparseFormat()) { - case matrix::Sparse::COMPRESSED_SPARSE_COLUMN: - delete L_csr; - delete U_csr; - L_csr = nullptr; - U_csr = nullptr; - L_csc = nullptr; - U_csc = nullptr; - break; - case matrix::Sparse::COMPRESSED_SPARSE_ROW: - L_csr = nullptr; - U_csr = nullptr; - L_csc = nullptr; - U_csc = nullptr; - break; - default: - break; + switch (L->getSparseFormat()) + { + case matrix::Sparse::COMPRESSED_SPARSE_COLUMN: + delete L_csr; + delete U_csr; + L_csr = nullptr; + U_csr = nullptr; + L_csc = nullptr; + U_csc = nullptr; + break; + case matrix::Sparse::COMPRESSED_SPARSE_ROW: + L_csr = nullptr; + U_csr = nullptr; + L_csc = nullptr; + U_csc = nullptr; + break; + default: + break; } // delete L_csr; // delete U_csr; @@ -192,7 +197,7 @@ namespace ReSolve * @pre The cuSolverRf handle has been created. * @post The factorization and triangular solve algorithms are set. */ - void LinSolverDirectCuSolverRf::setAlgorithms(cusolverRfFactorization_t fact_alg, + void LinSolverDirectCuSolverRf::setAlgorithms(cusolverRfFactorization_t fact_alg, cusolverRfTriangularSolve_t solve_alg) { cusolverRfSetAlgs(handle_cusolverrf_, fact_alg, solve_alg); @@ -215,7 +220,7 @@ namespace ReSolve int LinSolverDirectCuSolverRf::refactorize() { int error_sum = 0; - + // Check if matrix A data is valid assert(A_ != nullptr && "Matrix A is null!"); assert(A_->getNumRows() > 0 && "Matrix A must have positive row count!"); @@ -228,19 +233,19 @@ namespace ReSolve // Check solver handle assert(handle_cusolverrf_ != nullptr && "cuSolverRf handle is null!"); - status_cusolverrf_ = cusolverRfResetValues(A_->getNumRows(), + status_cusolverrf_ = cusolverRfResetValues(A_->getNumRows(), A_->getNnz(), A_->getRowData(memory::DEVICE), A_->getColData(memory::DEVICE), - A_->getValues( memory::DEVICE), + A_->getValues(memory::DEVICE), d_P_, d_Q_, handle_cusolverrf_); - error_sum += status_cusolverrf_; + error_sum += status_cusolverrf_; mem_.deviceSynchronize(); - status_cusolverrf_ = cusolverRfRefactor(handle_cusolverrf_); - error_sum += status_cusolverrf_; + status_cusolverrf_ = cusolverRfRefactor(handle_cusolverrf_); + error_sum += status_cusolverrf_; return error_sum; } @@ -257,14 +262,14 @@ namespace ReSolve */ int LinSolverDirectCuSolverRf::solve(vector_type* rhs) { - status_cusolverrf_ = cusolverRfSolve(handle_cusolverrf_, - d_P_, - d_Q_, - 1, - d_T_, - A_->getNumRows(), - rhs->getData(memory::DEVICE), - A_->getNumRows()); + status_cusolverrf_ = cusolverRfSolve(handle_cusolverrf_, + d_P_, + d_Q_, + 1, + d_T_, + A_->getNumRows(), + rhs->getData(memory::DEVICE), + A_->getNumRows()); return status_cusolverrf_; } @@ -283,16 +288,17 @@ namespace ReSolve { x->copyDataFrom(rhs->getData(memory::DEVICE), memory::DEVICE, memory::DEVICE); x->setDataUpdated(memory::DEVICE); - status_cusolverrf_ = cusolverRfSolve(handle_cusolverrf_, - d_P_, - d_Q_, - 1, - d_T_, - A_->getNumRows(), - x->getData(memory::DEVICE), - A_->getNumRows()); + status_cusolverrf_ = cusolverRfSolve(handle_cusolverrf_, + d_P_, + d_Q_, + 1, + d_T_, + A_->getNumRows(), + x->getData(memory::DEVICE), + A_->getNumRows()); return status_cusolverrf_; } + /** * @brief Sets a flag threshold for zero pivots and a boost factor * @@ -307,8 +313,8 @@ namespace ReSolve real_type nboost) { // Zero flagging threshold and boost NEED TO BE DOUBLE! - double zero = static_cast(nzero); - double boost = static_cast(nboost); + double zero = static_cast(nzero); + double boost = static_cast(nboost); status_cusolverrf_ = cusolverRfSetNumericProperties(handle_cusolverrf_, zero, boost); @@ -327,16 +333,16 @@ namespace ReSolve { switch (getParamId(id)) { - case ZERO_PIVOT: - zero_pivot_ = atof(value.c_str()); - setNumericalProperties(zero_pivot_, pivot_boost_); - break; - case PIVOT_BOOST: - pivot_boost_ = atof(value.c_str()); - setNumericalProperties(zero_pivot_, pivot_boost_); - break; - default: - std::cout << "Setting parameter failed!\n"; + case ZERO_PIVOT: + zero_pivot_ = atof(value.c_str()); + setNumericalProperties(zero_pivot_, pivot_boost_); + break; + case PIVOT_BOOST: + pivot_boost_ = atof(value.c_str()); + setNumericalProperties(zero_pivot_, pivot_boost_); + break; + default: + std::cout << "Setting parameter failed!\n"; } return 0; } @@ -355,8 +361,8 @@ namespace ReSolve { switch (getParamId(id)) { - default: - out::error() << "Trying to get unknown string parameter " << id << "\n"; + default: + out::error() << "Trying to get unknown string parameter " << id << "\n"; } return ""; } @@ -375,8 +381,8 @@ namespace ReSolve { switch (getParamId(id)) { - default: - out::error() << "Trying to get unknown integer parameter " << id << "\n"; + default: + out::error() << "Trying to get unknown integer parameter " << id << "\n"; } return -1; } @@ -385,12 +391,12 @@ namespace ReSolve { switch (getParamId(id)) { - case ZERO_PIVOT: - return zero_pivot_; - case PIVOT_BOOST: - return pivot_boost_; - default: - out::error() << "Trying to get unknown real parameter " << id << "\n"; + case ZERO_PIVOT: + return zero_pivot_; + case PIVOT_BOOST: + return pivot_boost_; + default: + out::error() << "Trying to get unknown real parameter " << id << "\n"; } return std::numeric_limits::quiet_NaN(); } @@ -409,8 +415,8 @@ namespace ReSolve { switch (getParamId(id)) { - default: - out::error() << "Trying to get unknown boolean parameter " << id << "\n"; + default: + out::error() << "Trying to get unknown boolean parameter " << id << "\n"; } return false; } @@ -422,15 +428,15 @@ namespace ReSolve { switch (getParamId(id)) { - case ZERO_PIVOT: - std::cout << zero_pivot_ << "\n"; - break; - case PIVOT_BOOST: - std::cout << pivot_boost_ << "\n"; - break; - default: - out::error() << "Trying to print unknown parameter " << id << "\n"; - return 1; + case ZERO_PIVOT: + std::cout << zero_pivot_ << "\n"; + break; + case PIVOT_BOOST: + std::cout << pivot_boost_ << "\n"; + break; + default: + out::error() << "Trying to print unknown parameter " << id << "\n"; + return 1; } return 0; } @@ -473,25 +479,28 @@ namespace ReSolve index_type* rowIdxCsc = A_csc->getRowData(memory::HOST); index_type* colPtrCsc = A_csc->getColData(memory::HOST); - real_type* valuesCsc = A_csc->getValues( memory::HOST); + real_type* valuesCsc = A_csc->getValues(memory::HOST); index_type* rowPtrCsr = A_csr->getRowData(memory::HOST); index_type* colIdxCsr = A_csr->getColData(memory::HOST); - real_type* valuesCsr = A_csr->getValues( memory::HOST); + real_type* valuesCsr = A_csr->getValues(memory::HOST); // Set all CSR row pointers to zero - for (index_type i = 0; i <= n; ++i) { + for (index_type i = 0; i <= n; ++i) + { rowPtrCsr[i] = 0; } // Set all CSR values and column indices to zero - for (index_type i = 0; i < nnz; ++i) { + for (index_type i = 0; i < nnz; ++i) + { colIdxCsr[i] = 0; valuesCsr[i] = 0.0; } // Compute number of entries per row - for (index_type i = 0; i < nnz; ++i) { + for (index_type i = 0; i < nnz; ++i) + { rowPtrCsr[rowIdxCsc[i]]++; } @@ -499,7 +508,7 @@ namespace ReSolve for (index_type row = 0, rowsum = 0; row < n; ++row) { // Store value in row pointer to temp - index_type temp = rowPtrCsr[row]; + index_type temp = rowPtrCsr[row]; // Copy cumulative sum to the row pointer rowPtrCsr[row] = rowsum; @@ -514,15 +523,15 @@ namespace ReSolve // Compute positions of column indices and values in CSR matrix and store them there // Overwrites CSR row pointers in the process // adding to them the number of elements in that row - for (index_type jj = colPtrCsc[col]; jj < colPtrCsc[col+1]; jj++) + for (index_type jj = colPtrCsc[col]; jj < colPtrCsc[col + 1]; jj++) { - index_type row = rowIdxCsc[jj]; - index_type dest = rowPtrCsr[row]; + index_type row = rowIdxCsc[jj]; + index_type dest = rowPtrCsr[row]; - colIdxCsr[dest] = col; - valuesCsr[dest] = valuesCsc[jj]; + colIdxCsr[dest] = col; + valuesCsr[dest] = valuesCsc[jj]; - rowPtrCsr[row]++; + rowPtrCsr[row]++; } } @@ -531,9 +540,9 @@ namespace ReSolve // for i>=1: new rowPtrCsr[i] = old rowPtrCsr[i-1] and new rowPtrCsr[0]=0 for (index_type row = 0, last = 0; row <= n; row++) { - index_type temp = rowPtrCsr[row]; - rowPtrCsr[row] = last; - last = temp; + index_type temp = rowPtrCsr[row]; + rowPtrCsr[row] = last; + last = temp; } // Mark data on the host as updated @@ -542,4 +551,4 @@ namespace ReSolve return 0; } -} // namespace resolve +} // namespace ReSolve diff --git a/resolve/LinSolverDirectCuSolverRf.hpp b/resolve/LinSolverDirectCuSolverRf.hpp index b20b222a6..bf9951ca4 100644 --- a/resolve/LinSolverDirectCuSolverRf.hpp +++ b/resolve/LinSolverDirectCuSolverRf.hpp @@ -1,10 +1,9 @@ #pragma once -#include "cusolverRf.h" - #include "Common.hpp" -#include +#include "cusolverRf.h" #include +#include namespace ReSolve { @@ -20,7 +19,7 @@ namespace ReSolve class Sparse; class Csr; class Csc; - } + } // namespace matrix // Forward declaration of ReSolve handlers workspace class LinAlgWorkspaceCUDA; @@ -29,49 +28,54 @@ namespace ReSolve { using vector_type = vector::Vector; - public: - LinSolverDirectCuSolverRf(LinAlgWorkspaceCUDA* workspace = nullptr); - ~LinSolverDirectCuSolverRf(); - - int setup(matrix::Sparse* A, - matrix::Sparse* L, - matrix::Sparse* U, - index_type* P, - index_type* Q, - vector_type* rhs = nullptr) override; - - int refactorize() override; - int solve(vector_type* rhs, vector_type* x) override; - int solve(vector_type* rhs) override; // rhs overwritten by solution - - void setAlgorithms(cusolverRfFactorization_t fact_alg, - cusolverRfTriangularSolve_t solve_alg); - int setNumericalProperties(real_type nzero, real_type nboost); - - int setCliParam(const std::string id, const std::string value) override; - std::string getCliParamString(const std::string id) const override; - index_type getCliParamInt(const std::string id) const override; - real_type getCliParamReal(const std::string id) const override; - bool getCliParamBool(const std::string id) const override; - int printCliParam(const std::string id) const override; - - private: - void initParamList(); - int csc2csr(matrix::Csc* A_csc, matrix::Csr* A_csr); - - private: - enum ParamaterIDs {ZERO_PIVOT=0, PIVOT_BOOST}; - real_type zero_pivot_{0.0}; ///< The value below which zero pivot is flagged. - real_type pivot_boost_{0.0}; ///< The value which is substituted for zero pivot. - - cusolverRfHandle_t handle_cusolverrf_; - cusolverStatus_t status_cusolverrf_; - - index_type* d_P_{nullptr}; - index_type* d_Q_{nullptr}; - real_type* d_T_{nullptr}; - bool setup_completed_{false}; - - MemoryHandler mem_; ///< Device memory manager object + public: + LinSolverDirectCuSolverRf(LinAlgWorkspaceCUDA* workspace = nullptr); + ~LinSolverDirectCuSolverRf(); + + int setup(matrix::Sparse* A, + matrix::Sparse* L, + matrix::Sparse* U, + index_type* P, + index_type* Q, + vector_type* rhs = nullptr) override; + + int refactorize() override; + int solve(vector_type* rhs, vector_type* x) override; + int solve(vector_type* rhs) override; // rhs overwritten by solution + + void setAlgorithms(cusolverRfFactorization_t fact_alg, + cusolverRfTriangularSolve_t solve_alg); + int setNumericalProperties(real_type nzero, real_type nboost); + + int setCliParam(const std::string id, const std::string value) override; + std::string getCliParamString(const std::string id) const override; + index_type getCliParamInt(const std::string id) const override; + real_type getCliParamReal(const std::string id) const override; + bool getCliParamBool(const std::string id) const override; + int printCliParam(const std::string id) const override; + + private: + void initParamList(); + int csc2csr(matrix::Csc* A_csc, matrix::Csr* A_csr); + + private: + enum ParamaterIDs + { + ZERO_PIVOT = 0, + PIVOT_BOOST + }; + + real_type zero_pivot_{0.0}; ///< The value below which zero pivot is flagged. + real_type pivot_boost_{0.0}; ///< The value which is substituted for zero pivot. + + cusolverRfHandle_t handle_cusolverrf_; + cusolverStatus_t status_cusolverrf_; + + index_type* d_P_{nullptr}; + index_type* d_Q_{nullptr}; + real_type* d_T_{nullptr}; + bool setup_completed_{false}; + + MemoryHandler mem_; ///< Device memory manager object }; -} +} // namespace ReSolve diff --git a/resolve/LinSolverDirectCuSparseILU0.cpp b/resolve/LinSolverDirectCuSparseILU0.cpp index c94945ade..3bce8530d 100644 --- a/resolve/LinSolverDirectCuSparseILU0.cpp +++ b/resolve/LinSolverDirectCuSparseILU0.cpp @@ -1,9 +1,11 @@ -#include -#include #include "LinSolverDirectCuSparseILU0.hpp" + #include -namespace ReSolve +#include +#include + +namespace ReSolve { using out = io::Logger; @@ -24,25 +26,25 @@ namespace ReSolve matrix::Sparse*, index_type*, index_type*, - vector_type* ) + vector_type*) { - //remember - P and Q are generally CPU variables + // remember - P and Q are generally CPU variables int error_sum = 0; - this->A_ = (matrix::Csr*) A; - index_type n = A_->getNumRows(); + this->A_ = (matrix::Csr*) A; + index_type n = A_->getNumRows(); index_type nnz = A_->getNnz(); - mem_.allocateArrayOnDevice(&d_ILU_vals_,nnz); - //copy A values to a buffer first + mem_.allocateArrayOnDevice(&d_ILU_vals_, nnz); + // copy A values to a buffer first mem_.copyArrayDeviceToDevice(d_ILU_vals_, A_->getValues(ReSolve::memory::DEVICE), nnz); - mem_.allocateArrayOnDevice(&d_aux1_,n); - mem_.allocateArrayOnDevice(&d_aux2_,n); - cudaMemset(d_aux1_, 1, n*sizeof(double)); + mem_.allocateArrayOnDevice(&d_aux1_, n); + mem_.allocateArrayOnDevice(&d_aux2_, n); + cudaMemset(d_aux1_, 1, n * sizeof(double)); cusparseCreateDnVec(&vec_X_, n, d_aux1_, CUDA_R_64F); cusparseCreateDnVec(&vec_Y_, n, d_aux2_, CUDA_R_64F); - //set up descriptors + // set up descriptors // Create matrix descriptor for A cusparseCreateMatDescr(&descr_A_); @@ -57,9 +59,9 @@ namespace ReSolve n, n, nnz, - A_->getRowData(ReSolve::memory::DEVICE), - A_->getColData(ReSolve::memory::DEVICE), - d_ILU_vals_, //vals_, + A_->getRowData(ReSolve::memory::DEVICE), + A_->getColData(ReSolve::memory::DEVICE), + d_ILU_vals_, // vals_, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, @@ -69,42 +71,41 @@ namespace ReSolve n, n, nnz, - A_->getRowData(ReSolve::memory::DEVICE), - A_->getColData(ReSolve::memory::DEVICE), - d_ILU_vals_, //vals_, + A_->getRowData(ReSolve::memory::DEVICE), + A_->getColData(ReSolve::memory::DEVICE), + d_ILU_vals_, // vals_, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_R_64F); - // Create matrix info structure cusparseCreateCsrilu02Info(&info_A_); - int buffer_size_A; + int buffer_size_A; size_t buffer_size_L; size_t buffer_size_U; - status_cusparse_ = cusparseDcsrilu02_bufferSize(workspace_->getCusparseHandle(), - n, - nnz, + status_cusparse_ = cusparseDcsrilu02_bufferSize(workspace_->getCusparseHandle(), + n, + nnz, descr_A_, - d_ILU_vals_, //vals_, - A_->getRowData(ReSolve::memory::DEVICE), - A_->getColData(ReSolve::memory::DEVICE), - info_A_, + d_ILU_vals_, // vals_, + A_->getRowData(ReSolve::memory::DEVICE), + A_->getColData(ReSolve::memory::DEVICE), + info_A_, &buffer_size_A); - mem_.allocateBufferOnDevice(&buffer_,(size_t) buffer_size_A); - error_sum += status_cusparse_; + mem_.allocateBufferOnDevice(&buffer_, (size_t) buffer_size_A); + error_sum += status_cusparse_; // Now analysis - status_cusparse_ = cusparseDcsrilu02_analysis(workspace_->getCusparseHandle(), - n, - nnz, + status_cusparse_ = cusparseDcsrilu02_analysis(workspace_->getCusparseHandle(), + n, + nnz, descr_A_, - d_ILU_vals_, //vals_, - A_->getRowData(ReSolve::memory::DEVICE), - A_->getColData(ReSolve::memory::DEVICE), + d_ILU_vals_, // vals_, + A_->getRowData(ReSolve::memory::DEVICE), + A_->getColData(ReSolve::memory::DEVICE), info_A_, CUSPARSE_SOLVE_POLICY_USE_LEVEL, buffer_); @@ -113,79 +114,79 @@ namespace ReSolve // and now the actual decomposition // Compute incomplete LU factorization - status_cusparse_ = cusparseDcsrilu02(workspace_->getCusparseHandle(), - n, - nnz, + status_cusparse_ = cusparseDcsrilu02(workspace_->getCusparseHandle(), + n, + nnz, descr_A_, - d_ILU_vals_, //vals_ - A_->getRowData(ReSolve::memory::DEVICE), - A_->getColData(ReSolve::memory::DEVICE), + d_ILU_vals_, // vals_ + A_->getRowData(ReSolve::memory::DEVICE), + A_->getColData(ReSolve::memory::DEVICE), info_A_, CUSPARSE_SOLVE_POLICY_USE_LEVEL, buffer_); error_sum += status_cusparse_; - // now take care of LU solve + // now take care of LU solve // now create actual Sparse matrix OBJECTS for L and U cusparseFillMode_t fillmodeL = CUSPARSE_FILL_MODE_LOWER; cusparseFillMode_t fillmodeU = CUSPARSE_FILL_MODE_UPPER; - cusparseDiagType_t diagtypeL = CUSPARSE_DIAG_TYPE_UNIT; - cusparseDiagType_t diagtypeU = CUSPARSE_DIAG_TYPE_NON_UNIT; + cusparseDiagType_t diagtypeL = CUSPARSE_DIAG_TYPE_UNIT; + cusparseDiagType_t diagtypeU = CUSPARSE_DIAG_TYPE_NON_UNIT; - cusparseSpMatSetAttribute(mat_L_, + cusparseSpMatSetAttribute(mat_L_, CUSPARSE_SPMAT_FILL_MODE, - &fillmodeL, - sizeof(fillmodeL)); + &fillmodeL, + sizeof(fillmodeL)); - cusparseSpMatSetAttribute(mat_U_, + cusparseSpMatSetAttribute(mat_U_, CUSPARSE_SPMAT_FILL_MODE, - &fillmodeU, + &fillmodeU, sizeof(fillmodeU)); - cusparseSpMatSetAttribute(mat_L_, + cusparseSpMatSetAttribute(mat_L_, CUSPARSE_SPMAT_DIAG_TYPE, - &diagtypeL, - sizeof(diagtypeL)); + &diagtypeL, + sizeof(diagtypeL)); - cusparseSpMatSetAttribute(mat_U_, + cusparseSpMatSetAttribute(mat_U_, CUSPARSE_SPMAT_DIAG_TYPE, - &diagtypeU, - sizeof(diagtypeU)); + &diagtypeU, + sizeof(diagtypeU)); - status_cusparse_ = cusparseSpSV_bufferSize(workspace_->getCusparseHandle(), - CUSPARSE_OPERATION_NON_TRANSPOSE, - &(constants::ONE), + status_cusparse_ = cusparseSpSV_bufferSize(workspace_->getCusparseHandle(), + CUSPARSE_OPERATION_NON_TRANSPOSE, + &(constants::ONE), mat_L_, vec_X_, - vec_Y_, + vec_Y_, CUDA_R_64F, CUSPARSE_SPSV_ALG_DEFAULT, - descr_spsv_L_, + descr_spsv_L_, &buffer_size_L); - error_sum += status_cusparse_; + error_sum += status_cusparse_; mem_.allocateBufferOnDevice(&buffer_L_, buffer_size_L); - status_cusparse_ = cusparseSpSV_bufferSize(workspace_->getCusparseHandle(), - CUSPARSE_OPERATION_NON_TRANSPOSE, - &(constants::ONE), + status_cusparse_ = cusparseSpSV_bufferSize(workspace_->getCusparseHandle(), + CUSPARSE_OPERATION_NON_TRANSPOSE, + &(constants::ONE), mat_U_, vec_X_, - vec_Y_, + vec_Y_, CUDA_R_64F, CUSPARSE_SPSV_ALG_DEFAULT, - descr_spsv_U_, + descr_spsv_U_, &buffer_size_U); - error_sum += status_cusparse_; + error_sum += status_cusparse_; mem_.allocateBufferOnDevice(&buffer_U_, buffer_size_U); - status_cusparse_ = cusparseSpSV_analysis(workspace_->getCusparseHandle(), - CUSPARSE_OPERATION_NON_TRANSPOSE, - &(constants::ONE), + status_cusparse_ = cusparseSpSV_analysis(workspace_->getCusparseHandle(), + CUSPARSE_OPERATION_NON_TRANSPOSE, + &(constants::ONE), mat_L_, vec_X_, vec_Y_, @@ -193,12 +194,11 @@ namespace ReSolve CUSPARSE_SPSV_ALG_DEFAULT, descr_spsv_L_, buffer_L_); - error_sum += status_cusparse_; - + error_sum += status_cusparse_; - status_cusparse_ = cusparseSpSV_analysis(workspace_->getCusparseHandle(), - CUSPARSE_OPERATION_NON_TRANSPOSE, - &(constants::ONE), + status_cusparse_ = cusparseSpSV_analysis(workspace_->getCusparseHandle(), + CUSPARSE_OPERATION_NON_TRANSPOSE, + &(constants::ONE), mat_U_, vec_X_, vec_Y_, @@ -215,24 +215,24 @@ namespace ReSolve int LinSolverDirectCuSparseILU0::reset(matrix::Sparse* A) { - int error_sum = 0; - this->A_ = A; - index_type n = A_->getNumRows(); + int error_sum = 0; + this->A_ = A; + index_type n = A_->getNumRows(); index_type nnz = A_->getNnz(); mem_.copyArrayDeviceToDevice(d_ILU_vals_, A_->getValues(ReSolve::memory::DEVICE), nnz); - status_cusparse_ = cusparseDcsrilu02(workspace_->getCusparseHandle(), - n, - nnz, + status_cusparse_ = cusparseDcsrilu02(workspace_->getCusparseHandle(), + n, + nnz, descr_A_, - d_ILU_vals_, //vals_ - A_->getRowData(ReSolve::memory::DEVICE), - A_->getColData(ReSolve::memory::DEVICE), + d_ILU_vals_, // vals_ + A_->getRowData(ReSolve::memory::DEVICE), + A_->getColData(ReSolve::memory::DEVICE), info_A_, CUSPARSE_SOLVE_POLICY_USE_LEVEL, buffer_); - //rerun tri solve analysis - to be updated - error_sum += status_cusparse_; + // rerun tri solve analysis - to be updated + error_sum += status_cusparse_; return error_sum; } @@ -241,34 +241,34 @@ namespace ReSolve int LinSolverDirectCuSparseILU0::solve(vector_type* rhs) { int error_sum = 0; - + cusparseCreateDnVec(&vec_X_, A_->getNumRows(), rhs->getData(ReSolve::memory::DEVICE), CUDA_R_64F); cusparseCreateDnVec(&vec_Y_, A_->getNumRows(), d_aux1_, CUDA_R_64F); - - status_cusparse_ = cusparseSpSV_solve(workspace_->getCusparseHandle(), + + status_cusparse_ = cusparseSpSV_solve(workspace_->getCusparseHandle(), CUSPARSE_OPERATION_NON_TRANSPOSE, - &(constants::ONE), + &(constants::ONE), mat_L_, vec_X_, vec_Y_, CUDA_R_64F, CUSPARSE_SPSV_ALG_DEFAULT, descr_spsv_L_); - error_sum += status_cusparse_; + error_sum += status_cusparse_; - status_cusparse_ = cusparseSpSV_solve(workspace_->getCusparseHandle(), + status_cusparse_ = cusparseSpSV_solve(workspace_->getCusparseHandle(), CUSPARSE_OPERATION_NON_TRANSPOSE, - &(constants::ONE), + &(constants::ONE), mat_U_, vec_Y_, vec_X_, CUDA_R_64F, CUSPARSE_SPSV_ALG_DEFAULT, descr_spsv_U_); - error_sum += status_cusparse_; - + error_sum += status_cusparse_; + rhs->setDataUpdated(ReSolve::memory::DEVICE); - + cusparseDestroyDnVec(vec_X_); cusparseDestroyDnVec(vec_Y_); @@ -278,35 +278,35 @@ namespace ReSolve int LinSolverDirectCuSparseILU0::solve(vector_type* rhs, vector_type* x) { int error_sum = 0; - + cusparseCreateDnVec(&vec_X_, A_->getNumRows(), rhs->getData(ReSolve::memory::DEVICE), CUDA_R_64F); cusparseCreateDnVec(&vec_Y_, A_->getNumRows(), d_aux1_, CUDA_R_64F); - - status_cusparse_ = cusparseSpSV_solve(workspace_->getCusparseHandle(), + + status_cusparse_ = cusparseSpSV_solve(workspace_->getCusparseHandle(), CUSPARSE_OPERATION_NON_TRANSPOSE, - &(constants::ONE), + &(constants::ONE), mat_L_, vec_X_, vec_Y_, CUDA_R_64F, CUSPARSE_SPSV_ALG_DEFAULT, descr_spsv_L_); - error_sum += status_cusparse_; + error_sum += status_cusparse_; cusparseCreateDnVec(&vec_X_, A_->getNumRows(), x->getData(ReSolve::memory::DEVICE), CUDA_R_64F); - status_cusparse_ = cusparseSpSV_solve(workspace_->getCusparseHandle(), + status_cusparse_ = cusparseSpSV_solve(workspace_->getCusparseHandle(), CUSPARSE_OPERATION_NON_TRANSPOSE, - &(constants::ONE), + &(constants::ONE), mat_U_, vec_Y_, vec_X_, CUDA_R_64F, CUSPARSE_SPSV_ALG_DEFAULT, descr_spsv_U_); - error_sum += status_cusparse_; - + error_sum += status_cusparse_; + x->setDataUpdated(ReSolve::memory::DEVICE); - + cusparseDestroyDnVec(vec_X_); cusparseDestroyDnVec(vec_Y_); @@ -315,11 +315,11 @@ namespace ReSolve /** * @brief Placeholder function for now. - * + * * The following switch (getParamId(Id)) cases always run the default and * are currently redundant code (like an if (true)). * In the future, they will be expanded to include more options. - * + * * @param id - string ID for parameter to set. * @return int Error code. */ @@ -327,19 +327,19 @@ namespace ReSolve { switch (getParamId(id)) { - default: - std::cout << "Setting parameter failed!\n"; + default: + std::cout << "Setting parameter failed!\n"; } return 0; } /** * @brief Placeholder function for now. - * + * * The following switch (getParamId(Id)) cases always run the default and * are currently redundant code (like an if (true)). * In the future, they will be expanded to include more options. - * + * * @param id - string ID for parameter to get. * @return std::string Value of the string parameter to return. */ @@ -347,19 +347,19 @@ namespace ReSolve { switch (getParamId(id)) { - default: - out::error() << "Trying to get unknown string parameter " << id << "\n"; + default: + out::error() << "Trying to get unknown string parameter " << id << "\n"; } return ""; } /** * @brief Placeholder function for now. - * + * * The following switch (getParamId(Id)) cases always run the default and * are currently redundant code (like an if (true)). * In the future, they will be expanded to include more options. - * + * * @param id - string ID for parameter to get. * @return int Value of the int parameter to return. */ @@ -367,19 +367,19 @@ namespace ReSolve { switch (getParamId(id)) { - default: - out::error() << "Trying to get unknown integer parameter " << id << "\n"; + default: + out::error() << "Trying to get unknown integer parameter " << id << "\n"; } return -1; } /** * @brief Placeholder function for now. - * + * * The following switch (getParamId(Id)) cases always run the default and * are currently redundant code (like an if (true)). * In the future, they will be expanded to include more options. - * + * * @param id - string ID for parameter to get. * @return real_type Value of the real_type parameter to return. */ @@ -387,19 +387,19 @@ namespace ReSolve { switch (getParamId(id)) { - default: - out::error() << "Trying to get unknown real parameter " << id << "\n"; + default: + out::error() << "Trying to get unknown real parameter " << id << "\n"; } return std::numeric_limits::quiet_NaN(); } /** * @brief Placeholder function for now. - * + * * The following switch (getParamId(Id)) cases always run the default and * are currently redundant code (like an if (true)). * In the future, they will be expanded to include more options. - * + * * @param id - string ID for parameter to get. * @return bool Value of the bool parameter to return. */ @@ -407,8 +407,8 @@ namespace ReSolve { switch (getParamId(id)) { - default: - out::error() << "Trying to get unknown boolean parameter " << id << "\n"; + default: + out::error() << "Trying to get unknown boolean parameter " << id << "\n"; } return false; } @@ -424,4 +424,4 @@ namespace ReSolve return 0; } -} // namespace resolve +} // namespace ReSolve diff --git a/resolve/LinSolverDirectCuSparseILU0.hpp b/resolve/LinSolverDirectCuSparseILU0.hpp index b5817a102..f4ae5ae6b 100644 --- a/resolve/LinSolverDirectCuSparseILU0.hpp +++ b/resolve/LinSolverDirectCuSparseILU0.hpp @@ -1,14 +1,14 @@ #pragma once -#include #include +#include #include "Common.hpp" +#include #include #include -#include -namespace ReSolve +namespace ReSolve { // Forward declaration of vector::Vector class namespace vector @@ -22,59 +22,59 @@ namespace ReSolve class Sparse; } - class LinSolverDirectCuSparseILU0 : public LinSolverDirect + class LinSolverDirectCuSparseILU0 : public LinSolverDirect { using vector_type = vector::Vector; - - public: - LinSolverDirectCuSparseILU0(LinAlgWorkspaceCUDA* workspace); - ~LinSolverDirectCuSparseILU0(); - - int setup(matrix::Sparse* A, - matrix::Sparse* L = nullptr, - matrix::Sparse* U = nullptr, - index_type* P = nullptr, - index_type* Q = nullptr, - vector_type* rhs = nullptr) override; - // if values of A change, but the nnz pattern does not, redo the analysis only (reuse buffers though) - int reset(matrix::Sparse* A); - - int solve(vector_type* rhs, vector_type* x) override; - int solve(vector_type* rhs) override; - - int setCliParam(const std::string id, const std::string value) override; - std::string getCliParamString(const std::string id) const override; - index_type getCliParamInt(const std::string id) const override; - real_type getCliParamReal(const std::string id) const override; - bool getCliParamBool(const std::string id) const override; - int printCliParam(const std::string id) const override; - - private: - cusparseStatus_t status_cusparse_; - - MemoryHandler mem_; ///< Device memory manager object - LinAlgWorkspaceCUDA* workspace_{nullptr}; - - cusparseMatDescr_t descr_A_{nullptr}; - - cusparseSpMatDescr_t mat_L_{nullptr}; - cusparseSpMatDescr_t mat_U_{nullptr}; - - cusparseSpSVDescr_t descr_spsv_L_{nullptr}; - cusparseSpSVDescr_t descr_spsv_U_{nullptr}; - csrilu02Info_t info_A_{nullptr}; - - void* buffer_{nullptr}; - void* buffer_L_{nullptr}; - void* buffer_U_{nullptr}; - - real_type* d_aux1_{nullptr}; - real_type* d_aux2_{nullptr}; - - cusparseDnVecDescr_t vec_X_{nullptr}; - cusparseDnVecDescr_t vec_Y_{nullptr}; - - // since ILU OVERWRITES THE MATRIX values, we need a buffer to keep the values of ILU decomposition. - real_type* d_ILU_vals_{nullptr}; + + public: + LinSolverDirectCuSparseILU0(LinAlgWorkspaceCUDA* workspace); + ~LinSolverDirectCuSparseILU0(); + + int setup(matrix::Sparse* A, + matrix::Sparse* L = nullptr, + matrix::Sparse* U = nullptr, + index_type* P = nullptr, + index_type* Q = nullptr, + vector_type* rhs = nullptr) override; + // if values of A change, but the nnz pattern does not, redo the analysis only (reuse buffers though) + int reset(matrix::Sparse* A); + + int solve(vector_type* rhs, vector_type* x) override; + int solve(vector_type* rhs) override; + + int setCliParam(const std::string id, const std::string value) override; + std::string getCliParamString(const std::string id) const override; + index_type getCliParamInt(const std::string id) const override; + real_type getCliParamReal(const std::string id) const override; + bool getCliParamBool(const std::string id) const override; + int printCliParam(const std::string id) const override; + + private: + cusparseStatus_t status_cusparse_; + + MemoryHandler mem_; ///< Device memory manager object + LinAlgWorkspaceCUDA* workspace_{nullptr}; + + cusparseMatDescr_t descr_A_{nullptr}; + + cusparseSpMatDescr_t mat_L_{nullptr}; + cusparseSpMatDescr_t mat_U_{nullptr}; + + cusparseSpSVDescr_t descr_spsv_L_{nullptr}; + cusparseSpSVDescr_t descr_spsv_U_{nullptr}; + csrilu02Info_t info_A_{nullptr}; + + void* buffer_{nullptr}; + void* buffer_L_{nullptr}; + void* buffer_U_{nullptr}; + + real_type* d_aux1_{nullptr}; + real_type* d_aux2_{nullptr}; + + cusparseDnVecDescr_t vec_X_{nullptr}; + cusparseDnVecDescr_t vec_Y_{nullptr}; + + // since ILU OVERWRITES THE MATRIX values, we need a buffer to keep the values of ILU decomposition. + real_type* d_ILU_vals_{nullptr}; }; -}// namespace +} // namespace ReSolve diff --git a/resolve/LinSolverDirectKLU.cpp b/resolve/LinSolverDirectKLU.cpp index 2e7e68510..5530d1b0e 100644 --- a/resolve/LinSolverDirectKLU.cpp +++ b/resolve/LinSolverDirectKLU.cpp @@ -1,12 +1,15 @@ +#include "LinSolverDirectKLU.hpp" + #include // includes memcpy -#include + #include #include -#include "LinSolverDirectKLU.hpp" +#include namespace ReSolve { using out = io::Logger; + /** * @brief Constructor for LinSolverDirectKLU * @@ -15,27 +18,27 @@ namespace ReSolve LinSolverDirectKLU::LinSolverDirectKLU() { Symbolic_ = nullptr; - Numeric_ = nullptr; + Numeric_ = nullptr; L_ = nullptr; U_ = nullptr; // Populate KLU data structure holding solver parameters klu_defaults(&Common_); - Common_.btf = 0; - Common_.scale = -1; - Common_.ordering = ordering_; - Common_.tol = pivot_threshold_tol_; + Common_.btf = 0; + Common_.scale = -1; + Common_.ordering = ordering_; + Common_.tol = pivot_threshold_tol_; Common_.halt_if_singular = halt_if_singular_; // Register configurable parameters initParamList(); out::summary() << "KLU solver set with parameters:\n" - << "\tbtf = " << Common_.btf << "\n" - << "\tscale = " << Common_.scale << "\n" - << "\tordering = " << Common_.ordering << "\n" - << "\tpivot threshold = " << Common_.tol << "\n" + << "\tbtf = " << Common_.btf << "\n" + << "\tscale = " << Common_.scale << "\n" + << "\tordering = " << Common_.ordering << "\n" + << "\tpivot threshold = " << Common_.tol << "\n" << "\thalt if singular = " << Common_.halt_if_singular << "\n"; } @@ -51,11 +54,12 @@ namespace ReSolve */ LinSolverDirectKLU::~LinSolverDirectKLU() { - if (factors_extracted_) { + if (factors_extracted_) + { delete L_; delete U_; - delete [] P_; - delete [] Q_; + delete[] P_; + delete[] Q_; L_ = nullptr; U_ = nullptr; P_ = nullptr; @@ -78,9 +82,9 @@ namespace ReSolve int LinSolverDirectKLU::setup(matrix::Sparse* A, matrix::Sparse* /* L */, matrix::Sparse* /* U */, - index_type* /* P */, - index_type* /* Q */, - vector_type* /* rhs */) + index_type* /* P */, + index_type* /* Q */, + vector_type* /* rhs */) { this->A_ = A; return 0; @@ -94,32 +98,37 @@ namespace ReSolve int LinSolverDirectKLU::analyze() { // in case we called this function AGAIN - if (Symbolic_ != nullptr) { + if (Symbolic_ != nullptr) + { klu_free_symbolic(&Symbolic_, &Common_); } - Symbolic_ = klu_analyze(A_->getNumRows(), + Symbolic_ = klu_analyze(A_->getNumRows(), A_->getRowData(memory::HOST), A_->getColData(memory::HOST), &Common_); factors_extracted_ = false; - if (L_ != nullptr) { + if (L_ != nullptr) + { delete L_; L_ = nullptr; } - if (U_ != nullptr) { + if (U_ != nullptr) + { delete U_; U_ = nullptr; } - if (Symbolic_ == nullptr) { + if (Symbolic_ == nullptr) + { out::error() << "Symbolic_ factorization failed with Common_.status = " << Common_.status << "\n"; return 1; } return 0; } + /** * @brief Factorize the matrix A. * @@ -127,7 +136,8 @@ namespace ReSolve */ int LinSolverDirectKLU::factorize() { - if (Numeric_ != nullptr) { + if (Numeric_ != nullptr) + { klu_free_numeric(&Numeric_, &Common_); } @@ -139,17 +149,20 @@ namespace ReSolve factors_extracted_ = false; - if (L_ != nullptr) { + if (L_ != nullptr) + { delete L_; L_ = nullptr; } - if (U_ != nullptr) { + if (U_ != nullptr) + { delete U_; U_ = nullptr; } - if (Numeric_ == nullptr) { + if (Numeric_ == nullptr) + { return 1; } return 0; @@ -160,7 +173,7 @@ namespace ReSolve * * @return 0 if successful, 1 otherwise */ - int LinSolverDirectKLU::refactorize() + int LinSolverDirectKLU::refactorize() { int kluStatus = klu_refactor(A_->getRowData(memory::HOST), A_->getColData(memory::HOST), @@ -171,18 +184,21 @@ namespace ReSolve factors_extracted_ = false; - if (L_ != nullptr) { + if (L_ != nullptr) + { delete L_; L_ = nullptr; } - if (U_ != nullptr) { + if (U_ != nullptr) + { delete U_; U_ = nullptr; } - if (!kluStatus){ - //display error + if (!kluStatus) + { + // display error return 1; } return 0; @@ -199,13 +215,14 @@ namespace ReSolve int LinSolverDirectKLU::solve(vector_type* rhs, vector_type* x) { - //copy the vector + // copy the vector x->copyDataFrom(rhs->getData(memory::HOST), memory::HOST, memory::HOST); x->setDataUpdated(memory::HOST); int kluStatus = klu_solve(Symbolic_, Numeric_, A_->getNumRows(), 1, x->getData(memory::HOST), &Common_); - if (!kluStatus){ + if (!kluStatus) + { return 1; } return 0; @@ -214,7 +231,7 @@ namespace ReSolve /** * @brief Generic solver with matrix A with unspecified rhs (not implemented). */ - int LinSolverDirectKLU::solve(vector_type* ) + int LinSolverDirectKLU::solve(vector_type*) { out::error() << "Function solve(Vector* x) not implemented in LinSolverDirectKLU!\n" << "Consider using solve(Vector* rhs, Vector* x) instead.\n"; @@ -228,7 +245,8 @@ namespace ReSolve */ matrix::Sparse* LinSolverDirectKLU::getLFactor() { - if (!factors_extracted_) { + if (!factors_extracted_) + { const int nnzL = Numeric_->lnz; const int nnzU = Numeric_->unz; @@ -241,10 +259,10 @@ namespace ReSolve Symbolic_, L_->getColData(memory::HOST), L_->getRowData(memory::HOST), - L_->getValues( memory::HOST), + L_->getValues(memory::HOST), U_->getColData(memory::HOST), U_->getRowData(memory::HOST), - U_->getValues( memory::HOST), + U_->getValues(memory::HOST), nullptr, nullptr, nullptr, @@ -261,6 +279,7 @@ namespace ReSolve } return L_; } + /** * @brief Get the upper triangular factor U. * @@ -268,7 +287,8 @@ namespace ReSolve */ matrix::Sparse* LinSolverDirectKLU::getUFactor() { - if (!factors_extracted_) { + if (!factors_extracted_) + { const int nnzL = Numeric_->lnz; const int nnzU = Numeric_->unz; @@ -280,10 +300,10 @@ namespace ReSolve Symbolic_, L_->getColData(memory::HOST), L_->getRowData(memory::HOST), - L_->getValues( memory::HOST), + L_->getValues(memory::HOST), U_->getColData(memory::HOST), U_->getRowData(memory::HOST), - U_->getValues( memory::HOST), + U_->getValues(memory::HOST), nullptr, nullptr, nullptr, @@ -301,6 +321,7 @@ namespace ReSolve } return U_; } + /** * @brief Get the permutation vector P. * @@ -308,12 +329,15 @@ namespace ReSolve */ index_type* LinSolverDirectKLU::getPOrdering() { - if (Numeric_ != nullptr) { - P_ = new index_type[A_->getNumRows()]; + if (Numeric_ != nullptr) + { + P_ = new index_type[A_->getNumRows()]; size_t nrows = static_cast(A_->getNumRows()); std::memcpy(P_, Numeric_->Pnum, nrows * sizeof(index_type)); return P_; - } else { + } + else + { return nullptr; } } @@ -325,12 +349,15 @@ namespace ReSolve */ index_type* LinSolverDirectKLU::getQOrdering() { - if (Numeric_ != nullptr) { - Q_ = new index_type[A_->getNumRows()]; + if (Numeric_ != nullptr) + { + Q_ = new index_type[A_->getNumRows()]; size_t nrows = static_cast(A_->getNumRows()); std::memcpy(Q_, Symbolic_->Q, nrows * sizeof(index_type)); return Q_; - } else { + } + else + { return nullptr; } } @@ -345,7 +372,7 @@ namespace ReSolve void LinSolverDirectKLU::setPivotThreshold(real_type tol) { pivot_threshold_tol_ = tol; - Common_.tol = tol; + Common_.tol = tol; } /** @@ -357,7 +384,7 @@ namespace ReSolve */ void LinSolverDirectKLU::setOrdering(int ordering) { - ordering_ = ordering; + ordering_ = ordering; Common_.ordering = ordering; } @@ -371,7 +398,7 @@ namespace ReSolve void LinSolverDirectKLU::setHaltIfSingular(bool isHalt) { - halt_if_singular_ = isHalt; + halt_if_singular_ = isHalt; Common_.halt_if_singular = isHalt; } @@ -400,17 +427,17 @@ namespace ReSolve { switch (getParamId(id)) { - case PIVOT_TOL: - setPivotThreshold(atof(value.c_str())); - break; - case ORDERING: - setOrdering(atoi(value.c_str())); - break; - case HALT_IF_SINGULAR: - setHaltIfSingular(value == "yes"); - break; - default: - std::cout << "Setting parameter failed!\n"; + case PIVOT_TOL: + setPivotThreshold(atof(value.c_str())); + break; + case ORDERING: + setOrdering(atoi(value.c_str())); + break; + case HALT_IF_SINGULAR: + setHaltIfSingular(value == "yes"); + break; + default: + std::cout << "Setting parameter failed!\n"; } return 0; } @@ -429,8 +456,8 @@ namespace ReSolve { switch (getParamId(id)) { - default: - out::error() << "Trying to get unknown string parameter " << id << "\n"; + default: + out::error() << "Trying to get unknown string parameter " << id << "\n"; } return ""; } @@ -448,10 +475,10 @@ namespace ReSolve { switch (getParamId(id)) { - case ORDERING: - return ordering_; - default: - out::error() << "Trying to get unknown integer parameter " << id << "\n"; + case ORDERING: + return ordering_; + default: + out::error() << "Trying to get unknown integer parameter " << id << "\n"; } return -1; } @@ -470,10 +497,10 @@ namespace ReSolve { switch (getParamId(id)) { - case PIVOT_TOL: - return pivot_threshold_tol_; - default: - out::error() << "Trying to get unknown real parameter " << id << "\n"; + case PIVOT_TOL: + return pivot_threshold_tol_; + default: + out::error() << "Trying to get unknown real parameter " << id << "\n"; } return std::numeric_limits::quiet_NaN(); } @@ -491,10 +518,10 @@ namespace ReSolve { switch (getParamId(id)) { - case HALT_IF_SINGULAR: - return halt_if_singular_; - default: - out::error() << "Trying to get unknown boolean parameter " << id << "\n"; + case HALT_IF_SINGULAR: + return halt_if_singular_; + default: + out::error() << "Trying to get unknown boolean parameter " << id << "\n"; } return false; } @@ -510,18 +537,18 @@ namespace ReSolve { switch (getParamId(id)) { - case PIVOT_TOL: - std::cout << pivot_threshold_tol_ << "\n"; - break; - case ORDERING: - std::cout << ordering_ << "\n"; - break; - case HALT_IF_SINGULAR: - std::cout << halt_if_singular_ << "\n"; - break; - default: - out::error() << "Trying to print unknown parameter " << id << "\n"; - return 1; + case PIVOT_TOL: + std::cout << pivot_threshold_tol_ << "\n"; + break; + case ORDERING: + std::cout << ordering_ << "\n"; + break; + case HALT_IF_SINGULAR: + std::cout << halt_if_singular_ << "\n"; + break; + default: + out::error() << "Trying to print unknown parameter " << id << "\n"; + return 1; } return 0; } diff --git a/resolve/LinSolverDirectKLU.hpp b/resolve/LinSolverDirectKLU.hpp index c371f9991..e134bddc0 100644 --- a/resolve/LinSolverDirectKLU.hpp +++ b/resolve/LinSolverDirectKLU.hpp @@ -1,8 +1,7 @@ #pragma once -#include "klu.h" - #include "Common.hpp" +#include "klu.h" #include namespace ReSolve @@ -23,81 +22,86 @@ namespace ReSolve { using vector_type = vector::Vector; - public: - LinSolverDirectKLU(); - ~LinSolverDirectKLU(); - - int setup(matrix::Sparse* A, - matrix::Sparse* L = nullptr, - matrix::Sparse* U = nullptr, - index_type* P = nullptr, - index_type* Q = nullptr, - vector_type* rhs = nullptr) override; - - int analyze() override; //the same as symbolic factorization - int factorize() override; - int refactorize() override; - int solve(vector_type* rhs, vector_type* x) override; - int solve(vector_type* x) override; - - matrix::Sparse* getLFactor() override; - matrix::Sparse* getUFactor() override; - index_type* getPOrdering() override; - index_type* getQOrdering() override; - - virtual void setPivotThreshold(real_type tol); - virtual void setOrdering(int ordering); - virtual void setHaltIfSingular(bool isHalt); - - virtual real_type getMatrixConditionNumber(); - - int setCliParam(const std::string id, const std::string value) override; - std::string getCliParamString(const std::string id) const override; - index_type getCliParamInt(const std::string id) const override; - real_type getCliParamReal(const std::string id) const override; - bool getCliParamBool(const std::string id) const override; - int printCliParam(const std::string id) const override; - - private: - enum ParamaterIDs {PIVOT_TOL=0, ORDERING, HALT_IF_SINGULAR}; - - /** - * @brief Ordering type (during the analysis) - * - * Available values are 0 = AMD, 1 = COLAMD, 2 = user provided P, Q. - * - * Default is COLAMD. - */ - int ordering_{1}; - - /** - * @brief Partial pivoing tolerance. - * - * If the diagonal entry has a magnitude greater than or equal to tol - * times the largest magnitude of entries in the pivot column, then the - * diagonal entry is chosen. - */ - real_type pivot_threshold_tol_{0.1}; - - /** - * @brief Halt if matrix is singular. - * - * If false: keep going. Return a Numeric object with a zero U(k,k). - * A divide-by-zero may occur when computing L(:,k). The Numeric object - * can be passed to klu_solve (a divide-by-zero will occur). It can - * also be safely passed to refactorization methods. - * - * If true: stop quickly. klu_factor will free the partially-constructed - * Numeric object. klu_refactor will not free it, but will leave the - * numerical values only partially defined. - */ - bool halt_if_singular_{false}; - - private: - void initParamList(); - bool factors_extracted_{false}; - klu_common Common_; //settings - klu_symbolic* Symbolic_{nullptr}; - klu_numeric* Numeric_{nullptr}; + public: + LinSolverDirectKLU(); + ~LinSolverDirectKLU(); + + int setup(matrix::Sparse* A, + matrix::Sparse* L = nullptr, + matrix::Sparse* U = nullptr, + index_type* P = nullptr, + index_type* Q = nullptr, + vector_type* rhs = nullptr) override; + + int analyze() override; // the same as symbolic factorization + int factorize() override; + int refactorize() override; + int solve(vector_type* rhs, vector_type* x) override; + int solve(vector_type* x) override; + + matrix::Sparse* getLFactor() override; + matrix::Sparse* getUFactor() override; + index_type* getPOrdering() override; + index_type* getQOrdering() override; + + virtual void setPivotThreshold(real_type tol); + virtual void setOrdering(int ordering); + virtual void setHaltIfSingular(bool isHalt); + + virtual real_type getMatrixConditionNumber(); + + int setCliParam(const std::string id, const std::string value) override; + std::string getCliParamString(const std::string id) const override; + index_type getCliParamInt(const std::string id) const override; + real_type getCliParamReal(const std::string id) const override; + bool getCliParamBool(const std::string id) const override; + int printCliParam(const std::string id) const override; + + private: + enum ParamaterIDs + { + PIVOT_TOL = 0, + ORDERING, + HALT_IF_SINGULAR + }; + + /** + * @brief Ordering type (during the analysis) + * + * Available values are 0 = AMD, 1 = COLAMD, 2 = user provided P, Q. + * + * Default is COLAMD. + */ + int ordering_{1}; + + /** + * @brief Partial pivoing tolerance. + * + * If the diagonal entry has a magnitude greater than or equal to tol + * times the largest magnitude of entries in the pivot column, then the + * diagonal entry is chosen. + */ + real_type pivot_threshold_tol_{0.1}; + + /** + * @brief Halt if matrix is singular. + * + * If false: keep going. Return a Numeric object with a zero U(k,k). + * A divide-by-zero may occur when computing L(:,k). The Numeric object + * can be passed to klu_solve (a divide-by-zero will occur). It can + * also be safely passed to refactorization methods. + * + * If true: stop quickly. klu_factor will free the partially-constructed + * Numeric object. klu_refactor will not free it, but will leave the + * numerical values only partially defined. + */ + bool halt_if_singular_{false}; + + private: + void initParamList(); + bool factors_extracted_{false}; + klu_common Common_; // settings + klu_symbolic* Symbolic_{nullptr}; + klu_numeric* Numeric_{nullptr}; }; -} +} // namespace ReSolve diff --git a/resolve/LinSolverDirectLUSOL.cpp b/resolve/LinSolverDirectLUSOL.cpp index cddb777ce..3f16a2e72 100644 --- a/resolve/LinSolverDirectLUSOL.cpp +++ b/resolve/LinSolverDirectLUSOL.cpp @@ -1,11 +1,11 @@ +#include "LinSolverDirectLUSOL.hpp" + #include #include #include #include -#include "LinSolverDirectLUSOL.hpp" #include "lusol/lusol.hpp" - #include #include #include @@ -21,20 +21,21 @@ namespace ReSolve luparm_[0] = 6; // Set LUSOL output print level - switch (out::verbosity()) { - case io::Logger::NONE: - luparm_[1] = -1; - break; - case io::Logger::ERRORS: - case io::Logger::WARNINGS: - luparm_[1] = 0; - break; - case io::Logger::SUMMARY: - luparm_[1] = 10; - break; - case io::Logger::EVERYTHING: - luparm_[1] = 50; - break; + switch (out::verbosity()) + { + case io::Logger::NONE: + luparm_[1] = -1; + break; + case io::Logger::ERRORS: + case io::Logger::WARNINGS: + luparm_[1] = 0; + break; + case io::Logger::SUMMARY: + luparm_[1] = 10; + break; + case io::Logger::EVERYTHING: + luparm_[1] = 50; + break; } // maximum number of columns searched allowed in a Markowitz-type @@ -94,11 +95,11 @@ namespace ReSolve int LinSolverDirectLUSOL::setup(matrix::Sparse* A, matrix::Sparse* /* L */, matrix::Sparse* /* U */, - index_type* /* P */, - index_type* /* Q */, - vector_type* /* rhs */) + index_type* /* P */, + index_type* /* Q */, + vector_type* /* rhs */) { - A_ = A; + A_ = A; is_factorized_ = false; delete L_; delete U_; @@ -119,24 +120,26 @@ namespace ReSolve int LinSolverDirectLUSOL::analyze() { // Brute force solution: If the solver workspace is already allocated, nuke it! - if (is_solver_data_allocated_) { + if (is_solver_data_allocated_) + { freeSolverData(); is_solver_data_allocated_ = false; } nelem_ = A_->getNnz(); - m_ = A_->getNumRows(); - n_ = A_->getNumColumns(); + m_ = A_->getNumRows(); + n_ = A_->getNumColumns(); allocateSolverData(); is_solver_data_allocated_ = true; - real_type* a_in = A_->getValues(memory::HOST); + real_type* a_in = A_->getValues(memory::HOST); index_type* indc_in = A_->getRowData(memory::HOST); index_type* indr_in = A_->getColData(memory::HOST); - for (index_type i = 0; i < nelem_; i++) { - a_[i] = a_in[i]; + for (index_type i = 0; i < nelem_; i++) + { + a_[i] = a_in[i]; indc_[i] = indc_in[i] + 1; indr_[i] = indr_in[i] + 1; } @@ -146,7 +149,8 @@ namespace ReSolve int LinSolverDirectLUSOL::factorize() { - if (!is_solver_data_allocated_) { + if (!is_solver_data_allocated_) + { out::warning() << "LinSolverDirect::factorize() called on " << "LinSolverDirectLUSOL without allocating the " << "workspace first!\n"; @@ -154,7 +158,8 @@ namespace ReSolve // it isn't possible for this to error in any recoverable way // but we'll check anyway int inform = analyze(); - if (inform < 0) { + if (inform < 0) + { return inform; } } @@ -199,22 +204,25 @@ namespace ReSolve int LinSolverDirectLUSOL::solve(vector_type* rhs, vector_type* x) { - if (!is_factorized_) { + if (!is_factorized_) + { out::warning() << "LinSolverDirect::solve(vector_type*, vector_type*) " << "called on LinSolverDirectLUSOL without factorizing " << "first!\n"; int inform = factorize(); - if (inform < 0) { + if (inform < 0) + { return inform; } } - if (m_ == 0 || rhs->getSize() != m_ || x->getSize() != n_) { + if (m_ == 0 || rhs->getSize() != m_ || x->getSize() != n_) + { return -1; } - index_type mode = 5; + index_type mode = 5; index_type inform = 0; lu6sol(&mode, @@ -254,11 +262,13 @@ namespace ReSolve */ matrix::Sparse* LinSolverDirectLUSOL::getLFactor() { - if (!is_factorized_) { + if (!is_factorized_) + { return nullptr; } - if (L_ != nullptr) { + if (L_ != nullptr) + { // because of the way we've implemented setup, we can just return the // existing pointer in L_ as this means we've already extracted L // @@ -267,48 +277,52 @@ namespace ReSolve } index_type diagonal_bound = std::min({m_, n_}); - index_type current_nnz = luparm_[22]; + index_type current_nnz = luparm_[22]; L_ = static_cast(new matrix::Csc(n_, m_, current_nnz + diagonal_bound, false, true)); L_->allocateMatrixData(memory::HOST); index_type* columns = L_->getColData(memory::HOST); - index_type* rows = L_->getRowData(memory::HOST); - real_type* values = L_->getValues(memory::HOST); + index_type* rows = L_->getRowData(memory::HOST); + real_type* values = L_->getValues(memory::HOST); // build an inverse permutation array for p // NOTE: this is not one-indexed like the original is std::unique_ptr pt = std::unique_ptr(new index_type[m_]); - for (index_type i = 0; i < m_; i++) { + for (index_type i = 0; i < m_; i++) + { size_t j = static_cast(p_[i] - 1); - pt[j] = i; + pt[j] = i; } // preprocessing since columns are stored unordered within lusol's workspace - columns[0] = 0; - index_type offset = lena_ - 1; + columns[0] = 0; + index_type offset = lena_ - 1; index_type initial_m = luparm_[19]; - for (index_type i = 0; i < initial_m; i++) { - index_type column_nnz = lenc_[i]; - index_type column_nnz_end = offset - column_nnz; - size_t j = static_cast(indr_[column_nnz_end + 1] - 1); + for (index_type i = 0; i < initial_m; i++) + { + index_type column_nnz = lenc_[i]; + index_type column_nnz_end = offset - column_nnz; + size_t j = static_cast(indr_[column_nnz_end + 1] - 1); index_type corresponding_column = pt[j]; columns[corresponding_column + 1] = column_nnz; - offset = column_nnz_end; + offset = column_nnz_end; } - for (index_type column = 0; column < m_; column++) { + for (index_type column = 0; column < m_; column++) + { columns[column + 1] += columns[column]; } // handle rectangular l factors correctly - for (index_type column = 0; column < diagonal_bound; column++) { - columns[column + 1] += column + 1; - rows[columns[column + 1] - 1] = column; - values[columns[column + 1] - 1] = 1.0; + for (index_type column = 0; column < diagonal_bound; column++) + { + columns[column + 1] += column + 1; + rows[columns[column + 1] - 1] = column; + values[columns[column + 1] - 1] = 1.0; } // fill the destination arrays. iterates over the stored columns, depermuting the @@ -316,14 +330,16 @@ namespace ReSolve // insertion sort (where L is the L factor as stored in LUSOL's workspace) offset = lena_ - 1; - for (index_type i = 0; i < initial_m; i++) { - size_t j = static_cast(indr_[offset - lenc_[i] + 1] - 1); + for (index_type i = 0; i < initial_m; i++) + { + size_t j = static_cast(indr_[offset - lenc_[i] + 1] - 1); index_type corresponding_column = pt[j]; for (index_type destination_offset = columns[corresponding_column]; destination_offset < columns[corresponding_column + 1] - 1; - destination_offset++) { - size_t k = static_cast(indc_[offset] - 1); + destination_offset++) + { + size_t k = static_cast(indc_[offset] - 1); index_type row = pt[k]; // closest position to the target row @@ -334,19 +350,21 @@ namespace ReSolve index_type insertion_offset = static_cast(closest_position - rows); // LUSOL is not supposed to create duplicates. Report error if it does. - if (rows[insertion_offset] == row && closest_position != &rows[destination_offset]) { + if (rows[insertion_offset] == row && closest_position != &rows[destination_offset]) + { out::error() << "duplicate element found during LUSOL L factor extraction\n"; return nullptr; } for (index_type swap_offset = destination_offset; swap_offset > insertion_offset; - swap_offset--) { + swap_offset--) + { std::swap(rows[swap_offset], rows[swap_offset - 1]); std::swap(values[swap_offset], values[swap_offset - 1]); } - rows[insertion_offset] = row; + rows[insertion_offset] = row; values[insertion_offset] = -a_[offset]; offset--; @@ -364,52 +382,59 @@ namespace ReSolve */ matrix::Sparse* LinSolverDirectLUSOL::getUFactor() { - if (!is_factorized_) { + if (!is_factorized_) + { return nullptr; } - if (U_ != nullptr) { + if (U_ != nullptr) + { // likewise return U_; } - index_type current_nnz = luparm_[23]; + index_type current_nnz = luparm_[23]; index_type n_singularities = luparm_[10]; - U_ = static_cast(new matrix::Csr(n_, m_, current_nnz - n_singularities, false, true)); + U_ = static_cast(new matrix::Csr(n_, m_, current_nnz - n_singularities, false, true)); U_->allocateMatrixData(memory::HOST); - index_type* rows = U_->getRowData(memory::HOST); + index_type* rows = U_->getRowData(memory::HOST); index_type* columns = U_->getColData(memory::HOST); - real_type* values = U_->getValues(memory::HOST); + real_type* values = U_->getValues(memory::HOST); // build an inverse permutation array for q // NOTE: this is not one-indexed like the original is std::unique_ptr qt = std::unique_ptr(new index_type[n_]); - for (index_type i = 0; i < n_; i++) { + for (index_type i = 0; i < n_; i++) + { size_t j = static_cast(q_[i] - 1); - qt[j] = i; + qt[j] = i; } // preprocessing since rows technically aren't ordered either index_type stored_rows = luparm_[15]; - for (index_type stored_row = 0; stored_row < stored_rows; stored_row++) { + for (index_type stored_row = 0; stored_row < stored_rows; stored_row++) + { index_type corresponding_row = p_[stored_row] - 1; - rows[stored_row + 1] = lenr_[corresponding_row]; + rows[stored_row + 1] = lenr_[corresponding_row]; } - for (index_type row = 0; row < n_; row++) { + for (index_type row = 0; row < n_; row++) + { rows[row + 1] += rows[row]; } // fill the destination arrays - for (index_type row = 0; row < n_; row++) { + for (index_type row = 0; row < n_; row++) + { index_type offset = locr_[p_[row] - 1] - 1; - for (index_type destination_offset = rows[row]; destination_offset < rows[row + 1]; destination_offset++) { - size_t j = static_cast(indr_[offset] - 1); + for (index_type destination_offset = rows[row]; destination_offset < rows[row + 1]; destination_offset++) + { + size_t j = static_cast(indr_[offset] - 1); index_type column = qt[j]; // closest position to the target column @@ -420,18 +445,20 @@ namespace ReSolve index_type insertion_offset = static_cast(closest_position - columns); // LUSOL is not supposed to create duplicates. Report error if it does. - if (columns[insertion_offset] == column && closest_position != &columns[destination_offset]) { + if (columns[insertion_offset] == column && closest_position != &columns[destination_offset]) + { out::error() << "duplicate element found during LUSOL U factor extraction\n"; return nullptr; } - for (index_type swap_offset = destination_offset; swap_offset > insertion_offset; swap_offset--) { + for (index_type swap_offset = destination_offset; swap_offset > insertion_offset; swap_offset--) + { std::swap(columns[swap_offset], columns[swap_offset - 1]); std::swap(values[swap_offset], values[swap_offset - 1]); } columns[insertion_offset] = column; - values[insertion_offset] = a_[offset]; + values[insertion_offset] = a_[offset]; offset++; } @@ -442,11 +469,13 @@ namespace ReSolve index_type* LinSolverDirectLUSOL::getPOrdering() { - if (P_ == nullptr) { + if (P_ == nullptr) + { P_ = new index_type[m_]; } - for (index_type i = 0; i < m_; i++) { + for (index_type i = 0; i < m_; i++) + { P_[i] = p_[i] - 1; } @@ -455,11 +484,13 @@ namespace ReSolve index_type* LinSolverDirectLUSOL::getQOrdering() { - if (Q_ == nullptr) { + if (Q_ == nullptr) + { Q_ = new index_type[n_]; } - for (index_type i = 0; i < n_; i++) { + for (index_type i = 0; i < n_; i++) + { Q_[i] = q_[i] - 1; } @@ -468,11 +499,11 @@ namespace ReSolve /** * @brief Placeholder function for now. - * + * * The following switch (getParamId(Id)) cases always run the default and * are currently redundant code (like an if (true)). * In the future, they will be expanded to include more options. - * + * * @param id - string ID for parameter to set. * @return int Error code. */ @@ -480,19 +511,19 @@ namespace ReSolve { switch (getParamId(id)) { - default: - std::cout << "Setting parameter failed!\n"; + default: + std::cout << "Setting parameter failed!\n"; } return 0; } /** * @brief Placeholder function for now. - * + * * The following switch (getParamId(Id)) cases always run the default and * are currently redundant code (like an if (true)). * In the future, they will be expanded to include more options. - * + * * @param id - string ID for parameter to get. * @return std::string Value of the string parameter to return. */ @@ -500,19 +531,19 @@ namespace ReSolve { switch (getParamId(id)) { - default: - out::error() << "Trying to get unknown string parameter " << id << "\n"; + default: + out::error() << "Trying to get unknown string parameter " << id << "\n"; } return ""; } /** * @brief Placeholder function for now. - * + * * The following switch (getParamId(Id)) cases always run the default and * are currently redundant code (like an if (true)). * In the future, they will be expanded to include more options. - * + * * @param id - string ID for parameter to get. * @return int Value of the int parameter to return. */ @@ -520,19 +551,19 @@ namespace ReSolve { switch (getParamId(id)) { - default: - out::error() << "Trying to get unknown integer parameter " << id << "\n"; + default: + out::error() << "Trying to get unknown integer parameter " << id << "\n"; } return -1; } /** * @brief Placeholder function for now. - * + * * The following switch (getParamId(Id)) cases always run the default and * are currently redundant code (like an if (true)). * In the future, they will be expanded to include more options. - * + * * @param id - string ID for parameter to get. * @return real_type Value of the real_type parameter to return. */ @@ -540,19 +571,19 @@ namespace ReSolve { switch (getParamId(id)) { - default: - out::error() << "Trying to get unknown real parameter " << id << "\n"; + default: + out::error() << "Trying to get unknown real parameter " << id << "\n"; } return std::numeric_limits::quiet_NaN(); } /** * @brief Placeholder function for now. - * + * * The following switch (getParamId(Id)) cases always run the default and * are currently redundant code (like an if (true)). * In the future, they will be expanded to include more options. - * + * * @param id - string ID for parameter to get. * @return bool Value of the bool parameter to return. */ @@ -560,8 +591,8 @@ namespace ReSolve { switch (getParamId(id)) { - default: - out::error() << "Trying to get unknown boolean parameter " << id << "\n"; + default: + out::error() << "Trying to get unknown boolean parameter " << id << "\n"; } return false; } @@ -588,7 +619,7 @@ namespace ReSolve // lena_ in resolve/lusol/lusol.f90 file. lena_ = std::max({20 * nelem_, 10 * m_, 10 * n_, 10000}); - a_ = new real_type[lena_]; + a_ = new real_type[lena_]; indc_ = new index_type[lena_]; indr_ = new index_type[lena_]; mem_.setZeroArrayOnHost(a_, lena_); @@ -649,22 +680,22 @@ namespace ReSolve delete[] w_; delete[] P_; delete[] Q_; - a_ = nullptr; - indc_ = nullptr; - indr_ = nullptr; - p_ = nullptr; - q_ = nullptr; - lenc_ = nullptr; - lenr_ = nullptr; - locc_ = nullptr; - locr_ = nullptr; + a_ = nullptr; + indc_ = nullptr; + indr_ = nullptr; + p_ = nullptr; + q_ = nullptr; + lenc_ = nullptr; + lenr_ = nullptr; + locc_ = nullptr; + locr_ = nullptr; iploc_ = nullptr; iqloc_ = nullptr; ipinv_ = nullptr; iqinv_ = nullptr; - w_ = nullptr; - P_ = nullptr; - Q_ = nullptr; + w_ = nullptr; + P_ = nullptr; + Q_ = nullptr; return 0; } diff --git a/resolve/LinSolverDirectLUSOL.hpp b/resolve/LinSolverDirectLUSOL.hpp index 9de689276..619ed4f9d 100644 --- a/resolve/LinSolverDirectLUSOL.hpp +++ b/resolve/LinSolverDirectLUSOL.hpp @@ -21,150 +21,150 @@ namespace ReSolve /** * @brief Wrapper for LUSOL solver. - * + * * LUSOL Fortran code is in file `lusol.f90`. - * + * */ class LinSolverDirectLUSOL : public LinSolverDirect { - using vector_type = vector::Vector; + using vector_type = vector::Vector; - public: - LinSolverDirectLUSOL(); - ~LinSolverDirectLUSOL(); + public: + LinSolverDirectLUSOL(); + ~LinSolverDirectLUSOL(); - /// @brief Setup function of the linear solver - int setup(matrix::Sparse* A, - matrix::Sparse* L = nullptr, - matrix::Sparse* U = nullptr, - index_type* P = nullptr, - index_type* Q = nullptr, - vector_type* rhs = nullptr) override; + /// @brief Setup function of the linear solver + int setup(matrix::Sparse* A, + matrix::Sparse* L = nullptr, + matrix::Sparse* U = nullptr, + index_type* P = nullptr, + index_type* Q = nullptr, + vector_type* rhs = nullptr) override; - /// @brief Analysis function of LUSOL - int analyze() override; + /// @brief Analysis function of LUSOL + int analyze() override; - int factorize() override; - int refactorize() override; - int solve(vector_type* rhs, vector_type* x) override; - int solve(vector_type* x) override; + int factorize() override; + int refactorize() override; + int solve(vector_type* rhs, vector_type* x) override; + int solve(vector_type* x) override; - /// @brief Returns the L factor of the solution in CSC format - matrix::Sparse* getLFactor() override; + /// @brief Returns the L factor of the solution in CSC format + matrix::Sparse* getLFactor() override; - /// @brief Returns the U factor of the solution in CSR format - matrix::Sparse* getUFactor() override; + /// @brief Returns the U factor of the solution in CSR format + matrix::Sparse* getUFactor() override; - index_type* getPOrdering() override; - index_type* getQOrdering() override; + index_type* getPOrdering() override; + index_type* getQOrdering() override; - int setCliParam(const std::string id, const std::string value) override; - std::string getCliParamString(const std::string id) const override; - index_type getCliParamInt(const std::string id) const override; - real_type getCliParamReal(const std::string id) const override; - bool getCliParamBool(const std::string id) const override; - int printCliParam(const std::string id) const override; - - private: - int allocateSolverData(); - int freeSolverData(); - - bool is_solver_data_allocated_{false}; - - MemoryHandler mem_; - - /// @brief Indicates if we have factorized the matrix yet - bool is_factorized_ = false; - - /// @brief Storage used for the matrices - /// - /// Primary workspace used by LUSOL. Used to hold the nonzeros of matrices - /// passed along API boundaries and as a general scratch region - real_type* a_ = nullptr; - - /// @brief Row data of matrices passed along API boundaries, in addition to - /// functioning as additional workspace storage for LUSOL - index_type* indc_ = nullptr; - - /// @brief Column data of matrices passed along API boundaries, in addition to - /// functioning as additional workspace storage for LUSOL - index_type* indr_ = nullptr; - - /// @brief The number of nonzero elements within the input matrix, A - index_type nelem_ = 0; - - /// @brief The permutation vector P, stored in the way LUSOL expects it to be (1-indexed) - index_type* p_ = nullptr; - - /// @brief The permutation vector Q, stored in the way LUSOL expects it to be (1-indexed) - index_type* q_ = nullptr; - - /// @brief The length of the dynamically-allocated arrays held within `a_`, - /// `indc_`, and `indr_` - /// - /// This should be much greater than the number of nonzeroes in the input - /// matrix A, as stated in LUSOL's source code. - /// - /// Note that this is not an upper bound on the required space; the size of this - /// buffer may be insufficient, in which case a call to a LUSOL subroutine - /// utilizing it will return with inform set to 7, and the intended behavior of - /// the callee is that they should resize `a_`, `indc_`, and `indr_` to at least - /// the value specified in `luparm_[12]` - index_type lena_ = 0; - - /// @brief The number of rows in the input matrix, A - index_type m_ = 0; - - /// @brief The number of columns in the input matrix, A - index_type n_ = 0; - - /// @brief Index-typed parameters passed along the API boundary - index_type luparm_[30] = {0}; - - /// @brief Real-typed parameters passed along the API boundary - real_type parmlu_[30] = {0}; - - /// @brief Number of entries within nontrivial columns of L, stored in pivot order - index_type* lenc_ = nullptr; - - /// @brief Number of entries in each row of U, stored in original order - index_type* lenr_ = nullptr; - - /// @brief Appears to be internal storage for LUSOL, used by the LU update routines - index_type* locc_ = nullptr; - - /// @brief Points to the beginning of rows of U within a - index_type* locr_ = nullptr; - - // TODO: it would be nice to have more information about these "undefined" (as - // said within the source code documentation of lu1fac) parameters - // - // there is some amount of documentation in the "notes on array names" - // section, but given they're only really storage parameters and aren't - // useful post-factorization, we'll leave it at "undefined" for now - - /// @brief Undefined value - index_type* iploc_ = nullptr; - - /// @brief Undefined value - index_type* iqloc_ = nullptr; + int setCliParam(const std::string id, const std::string value) override; + std::string getCliParamString(const std::string id) const override; + index_type getCliParamInt(const std::string id) const override; + real_type getCliParamReal(const std::string id) const override; + bool getCliParamBool(const std::string id) const override; + int printCliParam(const std::string id) const override; + + private: + int allocateSolverData(); + int freeSolverData(); + + bool is_solver_data_allocated_{false}; + + MemoryHandler mem_; + + /// @brief Indicates if we have factorized the matrix yet + bool is_factorized_ = false; + + /// @brief Storage used for the matrices + /// + /// Primary workspace used by LUSOL. Used to hold the nonzeros of matrices + /// passed along API boundaries and as a general scratch region + real_type* a_ = nullptr; + + /// @brief Row data of matrices passed along API boundaries, in addition to + /// functioning as additional workspace storage for LUSOL + index_type* indc_ = nullptr; + + /// @brief Column data of matrices passed along API boundaries, in addition to + /// functioning as additional workspace storage for LUSOL + index_type* indr_ = nullptr; + + /// @brief The number of nonzero elements within the input matrix, A + index_type nelem_ = 0; + + /// @brief The permutation vector P, stored in the way LUSOL expects it to be (1-indexed) + index_type* p_ = nullptr; + + /// @brief The permutation vector Q, stored in the way LUSOL expects it to be (1-indexed) + index_type* q_ = nullptr; + + /// @brief The length of the dynamically-allocated arrays held within `a_`, + /// `indc_`, and `indr_` + /// + /// This should be much greater than the number of nonzeroes in the input + /// matrix A, as stated in LUSOL's source code. + /// + /// Note that this is not an upper bound on the required space; the size of this + /// buffer may be insufficient, in which case a call to a LUSOL subroutine + /// utilizing it will return with inform set to 7, and the intended behavior of + /// the callee is that they should resize `a_`, `indc_`, and `indr_` to at least + /// the value specified in `luparm_[12]` + index_type lena_ = 0; + + /// @brief The number of rows in the input matrix, A + index_type m_ = 0; + + /// @brief The number of columns in the input matrix, A + index_type n_ = 0; + + /// @brief Index-typed parameters passed along the API boundary + index_type luparm_[30] = {0}; + + /// @brief Real-typed parameters passed along the API boundary + real_type parmlu_[30] = {0}; + + /// @brief Number of entries within nontrivial columns of L, stored in pivot order + index_type* lenc_ = nullptr; + + /// @brief Number of entries in each row of U, stored in original order + index_type* lenr_ = nullptr; + + /// @brief Appears to be internal storage for LUSOL, used by the LU update routines + index_type* locc_ = nullptr; + + /// @brief Points to the beginning of rows of U within a + index_type* locr_ = nullptr; + + // TODO: it would be nice to have more information about these "undefined" (as + // said within the source code documentation of lu1fac) parameters + // + // there is some amount of documentation in the "notes on array names" + // section, but given they're only really storage parameters and aren't + // useful post-factorization, we'll leave it at "undefined" for now + + /// @brief Undefined value + index_type* iploc_ = nullptr; + + /// @brief Undefined value + index_type* iqloc_ = nullptr; - /// @brief Undefined value - index_type* ipinv_ = nullptr; + /// @brief Undefined value + index_type* ipinv_ = nullptr; - /// @brief Undefined value - index_type* iqinv_ = nullptr; - - /// @brief Indicates singularity during LU factorization, otherwise contains either - /// the solution or target for solving a linear system - /// - /// Generally speaking, `w_[j] == +max(jth column of U)`, but if the - /// `j`th column is a singularity, `w_[j] == -max(jth column of U)`. Hence, - /// `w_[j] <= 0` implies that the column `j` of A is likely dependent on the - /// other columns of A. - /// - /// When solving a linear system `A*w_ = v_`, `w_` contains the solution. It is not - /// important what `w_` contains prior to this. - real_type* w_ = nullptr; + /// @brief Undefined value + index_type* iqinv_ = nullptr; + + /// @brief Indicates singularity during LU factorization, otherwise contains either + /// the solution or target for solving a linear system + /// + /// Generally speaking, `w_[j] == +max(jth column of U)`, but if the + /// `j`th column is a singularity, `w_[j] == -max(jth column of U)`. Hence, + /// `w_[j] <= 0` implies that the column `j` of A is likely dependent on the + /// other columns of A. + /// + /// When solving a linear system `A*w_ = v_`, `w_` contains the solution. It is not + /// important what `w_` contains prior to this. + real_type* w_ = nullptr; }; } // namespace ReSolve diff --git a/resolve/LinSolverDirectRocSolverRf.cpp b/resolve/LinSolverDirectRocSolverRf.cpp index a57b8c1dc..86fdefda2 100644 --- a/resolve/LinSolverDirectRocSolverRf.cpp +++ b/resolve/LinSolverDirectRocSolverRf.cpp @@ -1,12 +1,15 @@ -#include -#include #include "LinSolverDirectRocSolverRf.hpp" + #include + #include +#include +#include namespace ReSolve { using out = io::Logger; + /** * @brief Constructor for LinSolverDirectRocSolverRf * @@ -16,9 +19,9 @@ namespace ReSolve */ LinSolverDirectRocSolverRf::LinSolverDirectRocSolverRf(LinAlgWorkspaceHIP* workspace) { - workspace_ = workspace; - infoM_ = nullptr; - solve_mode_ = 1; //solve mode - 1: use rocsparse trisolve + workspace_ = workspace; + infoM_ = nullptr; + solve_mode_ = 1; // solve mode - 1: use rocsparse trisolve initParamList(); } @@ -56,20 +59,19 @@ namespace ReSolve int LinSolverDirectRocSolverRf::setup(matrix::Sparse* A, matrix::Sparse* L, matrix::Sparse* U, - index_type* P, - index_type* Q, - vector_type* rhs) + index_type* P, + index_type* Q, + vector_type* rhs) { RESOLVE_RANGE_PUSH(__FUNCTION__); int error_sum = 0; - assert(A->getSparseFormat() == matrix::Sparse::COMPRESSED_SPARSE_ROW && - "Matrix has to be in CSR format for rocsolverRf.\n"); - A_ = A; + assert(A->getSparseFormat() == matrix::Sparse::COMPRESSED_SPARSE_ROW && "Matrix has to be in CSR format for rocsolverRf.\n"); + A_ = A; index_type n = A_->getNumRows(); - //set matrix info + // set matrix info rocsolver_create_rfinfo(&infoM_, workspace_->getRocblasHandle()); // Combine factors L and U into matrix M_ @@ -78,12 +80,14 @@ namespace ReSolve M_->setUpdated(ReSolve::memory::HOST); M_->syncData(ReSolve::memory::DEVICE); - //remember - P and Q are generally CPU variables - if (d_P_ == nullptr) { + // remember - P and Q are generally CPU variables + if (d_P_ == nullptr) + { mem_.allocateArrayOnDevice(&d_P_, n); } - if (d_Q_ == nullptr) { + if (d_Q_ == nullptr) + { mem_.allocateArrayOnDevice(&d_Q_, n); } mem_.copyArrayHostToDevice(d_P_, P, n); @@ -94,13 +98,13 @@ namespace ReSolve n, 1, A_->getNnz(), - A_->getRowData(ReSolve::memory::DEVICE), //kRowPtr_, - A_->getColData(ReSolve::memory::DEVICE), //jCol_, - A_->getValues(ReSolve::memory::DEVICE), //vals_, + A_->getRowData(ReSolve::memory::DEVICE), // kRowPtr_, + A_->getColData(ReSolve::memory::DEVICE), // jCol_, + A_->getValues(ReSolve::memory::DEVICE), // vals_, M_->getNnz(), M_->getRowData(ReSolve::memory::DEVICE), M_->getColData(ReSolve::memory::DEVICE), - M_->getValues(ReSolve::memory::DEVICE), //vals_, + M_->getValues(ReSolve::memory::DEVICE), // vals_, d_P_, d_Q_, rhs->getData(ReSolve::memory::DEVICE), @@ -110,23 +114,25 @@ namespace ReSolve mem_.deviceSynchronize(); error_sum += status_rocblas_; // tri solve setup - if (solve_mode_ == 1) { // fast mode + if (solve_mode_ == 1) + { // fast mode - if (L_csr_ != nullptr) { + if (L_csr_ != nullptr) + { delete L_csr_; } L_csr_ = new ReSolve::matrix::Csr(L->getNumRows(), L->getNumColumns(), L->getNnz()); L_csr_->allocateMatrixData(ReSolve::memory::DEVICE); - if (U_csr_ != nullptr) { + if (U_csr_ != nullptr) + { delete U_csr_; } U_csr_ = new ReSolve::matrix::Csr(U->getNumRows(), U->getNumColumns(), U->getNnz()); U_csr_->allocateMatrixData(ReSolve::memory::DEVICE); - rocsparse_create_mat_descr(&(descr_L_)); rocsparse_set_mat_fill_mode(descr_L_, rocsparse_fill_mode_lower); rocsparse_set_mat_index_base(descr_L_, rocsparse_index_base_zero); @@ -147,17 +153,17 @@ namespace ReSolve M_->getNnz(), M_->getRowData(ReSolve::memory::DEVICE), M_->getColData(ReSolve::memory::DEVICE), - M_->getValues(ReSolve::memory::DEVICE), //vals_, + M_->getValues(ReSolve::memory::DEVICE), // vals_, L_csr_->getRowData(ReSolve::memory::DEVICE), L_csr_->getColData(ReSolve::memory::DEVICE), - L_csr_->getValues(ReSolve::memory::DEVICE), //vals_, + L_csr_->getValues(ReSolve::memory::DEVICE), // vals_, U_csr_->getRowData(ReSolve::memory::DEVICE), U_csr_->getColData(ReSolve::memory::DEVICE), U_csr_->getValues(ReSolve::memory::DEVICE)); error_sum += status_rocblas_; - status_rocsparse_ = rocsparse_dcsrsv_buffer_size(workspace_->getRocsparseHandle(), + status_rocsparse_ = rocsparse_dcsrsv_buffer_size(workspace_->getRocsparseHandle(), rocsparse_operation_none, n, L_csr_->getNnz(), @@ -167,10 +173,10 @@ namespace ReSolve L_csr_->getColData(ReSolve::memory::DEVICE), info_L_, &L_buffer_size); - error_sum += status_rocsparse_; + error_sum += status_rocsparse_; mem_.allocateBufferOnDevice(&L_buffer_, L_buffer_size); - status_rocsparse_ = rocsparse_dcsrsv_buffer_size(workspace_->getRocsparseHandle(), + status_rocsparse_ = rocsparse_dcsrsv_buffer_size(workspace_->getRocsparseHandle(), rocsparse_operation_none, n, U_csr_->getNnz(), @@ -180,10 +186,10 @@ namespace ReSolve U_csr_->getColData(ReSolve::memory::DEVICE), info_U_, &U_buffer_size); - error_sum += status_rocsparse_; + error_sum += status_rocsparse_; mem_.allocateBufferOnDevice(&U_buffer_, U_buffer_size); - status_rocsparse_ = rocsparse_dcsrsv_analysis(workspace_->getRocsparseHandle(), + status_rocsparse_ = rocsparse_dcsrsv_analysis(workspace_->getRocsparseHandle(), rocsparse_operation_none, n, L_csr_->getNnz(), @@ -195,34 +201,37 @@ namespace ReSolve rocsparse_analysis_policy_force, rocsparse_solve_policy_auto, L_buffer_); - error_sum += status_rocsparse_; - if (status_rocsparse_!=0) { + error_sum += status_rocsparse_; + if (status_rocsparse_ != 0) + { std::cout << "status after analysis 1: " << status_rocsparse_ << "\n"; } - status_rocsparse_ = rocsparse_dcsrsv_analysis(workspace_->getRocsparseHandle(), + status_rocsparse_ = rocsparse_dcsrsv_analysis(workspace_->getRocsparseHandle(), rocsparse_operation_none, n, U_csr_->getNnz(), descr_U_, - U_csr_->getValues(ReSolve::memory::DEVICE), //vals_, + U_csr_->getValues(ReSolve::memory::DEVICE), // vals_, U_csr_->getRowData(ReSolve::memory::DEVICE), U_csr_->getColData(ReSolve::memory::DEVICE), info_U_, rocsparse_analysis_policy_force, rocsparse_solve_policy_auto, U_buffer_); - error_sum += status_rocsparse_; - if (status_rocsparse_!=0) { + error_sum += status_rocsparse_; + if (status_rocsparse_ != 0) + { out::error() << "status after analysis 2: " << status_rocsparse_ << "\n"; } - //allocate aux data - if (d_aux1_ == nullptr) { - mem_.allocateArrayOnDevice(&d_aux1_,n); + // allocate aux data + if (d_aux1_ == nullptr) + { + mem_.allocateArrayOnDevice(&d_aux1_, n); } - if (d_aux2_ == nullptr) { - mem_.allocateArrayOnDevice(&d_aux2_,n); + if (d_aux2_ == nullptr) + { + mem_.allocateArrayOnDevice(&d_aux2_, n); } - } RESOLVE_RANGE_POP(__FUNCTION__); return error_sum; @@ -238,41 +247,41 @@ namespace ReSolve RESOLVE_RANGE_PUSH(__FUNCTION__); int error_sum = 0; mem_.deviceSynchronize(); - status_rocblas_ = rocsolver_dcsrrf_refactlu(workspace_->getRocblasHandle(), - A_->getNumRows(), - A_->getNnz(), - A_->getRowData(ReSolve::memory::DEVICE), //kRowPtr_, - A_->getColData(ReSolve::memory::DEVICE), //jCol_, - A_->getValues(ReSolve::memory::DEVICE), //vals_, - M_->getNnz(), - M_->getRowData(ReSolve::memory::DEVICE), - M_->getColData(ReSolve::memory::DEVICE), - M_->getValues(ReSolve::memory::DEVICE), //OUTPUT, - d_P_, - d_Q_, - infoM_); + status_rocblas_ = rocsolver_dcsrrf_refactlu(workspace_->getRocblasHandle(), + A_->getNumRows(), + A_->getNnz(), + A_->getRowData(ReSolve::memory::DEVICE), // kRowPtr_, + A_->getColData(ReSolve::memory::DEVICE), // jCol_, + A_->getValues(ReSolve::memory::DEVICE), // vals_, + M_->getNnz(), + M_->getRowData(ReSolve::memory::DEVICE), + M_->getColData(ReSolve::memory::DEVICE), + M_->getValues(ReSolve::memory::DEVICE), // OUTPUT, + d_P_, + d_Q_, + infoM_); mem_.deviceSynchronize(); error_sum += status_rocblas_; - if (solve_mode_ == 1) { - //split M, fill L and U with correct values + if (solve_mode_ == 1) + { + // split M, fill L and U with correct values status_rocblas_ = rocsolver_dcsrrf_splitlu(workspace_->getRocblasHandle(), A_->getNumRows(), M_->getNnz(), M_->getRowData(ReSolve::memory::DEVICE), M_->getColData(ReSolve::memory::DEVICE), - M_->getValues(ReSolve::memory::DEVICE), //vals_, + M_->getValues(ReSolve::memory::DEVICE), // vals_, L_csr_->getRowData(ReSolve::memory::DEVICE), L_csr_->getColData(ReSolve::memory::DEVICE), - L_csr_->getValues(ReSolve::memory::DEVICE), //vals_, + L_csr_->getValues(ReSolve::memory::DEVICE), // vals_, U_csr_->getRowData(ReSolve::memory::DEVICE), U_csr_->getColData(ReSolve::memory::DEVICE), U_csr_->getValues(ReSolve::memory::DEVICE)); mem_.deviceSynchronize(); error_sum += status_rocblas_; - } RESOLVE_RANGE_POP(__FUNCTION__); return error_sum; @@ -289,22 +298,25 @@ namespace ReSolve { RESOLVE_RANGE_PUSH(__FUNCTION__); int error_sum = 0; - if (solve_mode_ == 0) { + if (solve_mode_ == 0) + { mem_.deviceSynchronize(); - status_rocblas_ = rocsolver_dcsrrf_solve(workspace_->getRocblasHandle(), - A_->getNumRows(), - 1, - M_->getNnz(), - M_->getRowData(ReSolve::memory::DEVICE), - M_->getColData(ReSolve::memory::DEVICE), - M_->getValues(ReSolve::memory::DEVICE), - d_P_, - d_Q_, - rhs->getData(ReSolve::memory::DEVICE), - A_->getNumRows(), - infoM_); + status_rocblas_ = rocsolver_dcsrrf_solve(workspace_->getRocblasHandle(), + A_->getNumRows(), + 1, + M_->getNnz(), + M_->getRowData(ReSolve::memory::DEVICE), + M_->getColData(ReSolve::memory::DEVICE), + M_->getValues(ReSolve::memory::DEVICE), + d_P_, + d_Q_, + rhs->getData(ReSolve::memory::DEVICE), + A_->getNumRows(), + infoM_); mem_.deviceSynchronize(); - } else { + } + else + { // not implemented yet hip::permuteVectorP(A_->getNumRows(), d_P_, rhs->getData(ReSolve::memory::DEVICE), d_aux1_); mem_.deviceSynchronize(); @@ -319,7 +331,7 @@ namespace ReSolve L_csr_->getColData(ReSolve::memory::DEVICE), info_L_, d_aux1_, - d_aux2_, //result + d_aux2_, // result rocsparse_solve_policy_auto, L_buffer_); error_sum += status_rocsparse_; @@ -334,13 +346,13 @@ namespace ReSolve U_csr_->getRowData(ReSolve::memory::DEVICE), U_csr_->getColData(ReSolve::memory::DEVICE), info_U_, - d_aux2_, //input - d_aux1_, //result + d_aux2_, // input + d_aux1_, // result rocsparse_solve_policy_auto, U_buffer_); error_sum += status_rocsparse_; - hip::permuteVectorQ(A_->getNumRows(), d_Q_,d_aux1_,rhs->getData(ReSolve::memory::DEVICE)); + hip::permuteVectorQ(A_->getNumRows(), d_Q_, d_aux1_, rhs->getData(ReSolve::memory::DEVICE)); mem_.deviceSynchronize(); } RESOLVE_RANGE_POP(__FUNCTION__); @@ -361,23 +373,26 @@ namespace ReSolve x->copyDataFrom(rhs->getData(ReSolve::memory::DEVICE), ReSolve::memory::DEVICE, ReSolve::memory::DEVICE); x->setDataUpdated(ReSolve::memory::DEVICE); int error_sum = 0; - if (solve_mode_ == 0) { + if (solve_mode_ == 0) + { mem_.deviceSynchronize(); - status_rocblas_ = rocsolver_dcsrrf_solve(workspace_->getRocblasHandle(), - A_->getNumRows(), - 1, - M_->getNnz(), - M_->getRowData(ReSolve::memory::DEVICE), - M_->getColData(ReSolve::memory::DEVICE), - M_->getValues(ReSolve::memory::DEVICE), - d_P_, - d_Q_, - x->getData(ReSolve::memory::DEVICE), - A_->getNumRows(), - infoM_); - error_sum += status_rocblas_; + status_rocblas_ = rocsolver_dcsrrf_solve(workspace_->getRocblasHandle(), + A_->getNumRows(), + 1, + M_->getNnz(), + M_->getRowData(ReSolve::memory::DEVICE), + M_->getColData(ReSolve::memory::DEVICE), + M_->getValues(ReSolve::memory::DEVICE), + d_P_, + d_Q_, + x->getData(ReSolve::memory::DEVICE), + A_->getNumRows(), + infoM_); + error_sum += status_rocblas_; mem_.deviceSynchronize(); - } else { + } + else + { // not implemented yet hip::permuteVectorP(A_->getNumRows(), d_P_, rhs->getData(ReSolve::memory::DEVICE), d_aux1_); @@ -389,12 +404,12 @@ namespace ReSolve L_csr_->getNnz(), &(constants::ONE), descr_L_, - L_csr_->getValues(ReSolve::memory::DEVICE), //vals_, + L_csr_->getValues(ReSolve::memory::DEVICE), // vals_, L_csr_->getRowData(ReSolve::memory::DEVICE), L_csr_->getColData(ReSolve::memory::DEVICE), info_L_, d_aux1_, - d_aux2_, //result + d_aux2_, // result rocsparse_solve_policy_auto, L_buffer_); error_sum += status_rocsparse_; @@ -405,17 +420,17 @@ namespace ReSolve U_csr_->getNnz(), &(constants::ONE), descr_U_, - U_csr_->getValues(ReSolve::memory::DEVICE), //vals_, + U_csr_->getValues(ReSolve::memory::DEVICE), // vals_, U_csr_->getRowData(ReSolve::memory::DEVICE), U_csr_->getColData(ReSolve::memory::DEVICE), info_U_, - d_aux2_, //input - d_aux1_,//result + d_aux2_, // input + d_aux1_, // result rocsparse_solve_policy_auto, U_buffer_); error_sum += status_rocsparse_; - hip::permuteVectorQ(A_->getNumRows(), d_Q_,d_aux1_,x->getData(ReSolve::memory::DEVICE)); + hip::permuteVectorQ(A_->getNumRows(), d_Q_, d_aux1_, x->getData(ReSolve::memory::DEVICE)); mem_.deviceSynchronize(); } RESOLVE_RANGE_POP(__FUNCTION__); @@ -458,17 +473,20 @@ namespace ReSolve { switch (getParamId(id)) { - case SOLVE_MODE: - if (value == "rocsparse_trisolve") { - // use rocsparse triangular solver - setSolveMode(1); - } else { - // use default - setSolveMode(0); - } - break; - default: - std::cout << "Setting parameter failed!\n"; + case SOLVE_MODE: + if (value == "rocsparse_trisolve") + { + // use rocsparse triangular solver + setSolveMode(1); + } + else + { + // use default + setSolveMode(0); + } + break; + default: + std::cout << "Setting parameter failed!\n"; } return 0; } @@ -488,18 +506,18 @@ namespace ReSolve std::string value(""); switch (getParamId(id)) { - case SOLVE_MODE: - switch (getSolveMode()) - { - case 0: - value = "default"; - break; - case 1: - value = "rocsparse_trisolve"; - break; - } - default: - out::error() << "Trying to get unknown string parameter " << id << "\n"; + case SOLVE_MODE: + switch (getSolveMode()) + { + case 0: + value = "default"; + break; + case 1: + value = "rocsparse_trisolve"; + break; + } + default: + out::error() << "Trying to get unknown string parameter " << id << "\n"; } return value; } @@ -518,8 +536,8 @@ namespace ReSolve { switch (getParamId(id)) { - default: - out::error() << "Trying to get unknown integer parameter " << id << "\n"; + default: + out::error() << "Trying to get unknown integer parameter " << id << "\n"; } return -1; } @@ -538,8 +556,8 @@ namespace ReSolve { switch (getParamId(id)) { - default: - out::error() << "Trying to get unknown real parameter " << id << "\n"; + default: + out::error() << "Trying to get unknown real parameter " << id << "\n"; } return std::numeric_limits::quiet_NaN(); } @@ -558,15 +576,15 @@ namespace ReSolve { switch (getParamId(id)) { - default: - out::error() << "Trying to get unknown boolean parameter " << id << "\n"; + default: + out::error() << "Trying to get unknown boolean parameter " << id << "\n"; } return false; } /** - * @brief Placeholder function that shouldn't be called. - */ + * @brief Placeholder function that shouldn't be called. + */ int LinSolverDirectRocSolverRf::printCliParam(const std::string id) const { switch (getParamId(id)) @@ -596,57 +614,67 @@ namespace ReSolve void LinSolverDirectRocSolverRf::combineFactors(matrix::Sparse* L, matrix::Sparse* U) { // L and U need to be in CSC format - index_type n = L->getNumRows(); + index_type n = L->getNumRows(); index_type* Lp = L->getColData(ReSolve::memory::HOST); index_type* Li = L->getRowData(ReSolve::memory::HOST); index_type* Up = U->getColData(ReSolve::memory::HOST); index_type* Ui = U->getRowData(ReSolve::memory::HOST); - if (M_ != nullptr) { + if (M_ != nullptr) + { delete M_; } - index_type nnzM = ( L->getNnz() + U->getNnz() - n ); - M_ = new matrix::Csr(n, n, nnzM); + index_type nnzM = (L->getNnz() + U->getNnz() - n); + M_ = new matrix::Csr(n, n, nnzM); M_->allocateMatrixData(ReSolve::memory::DEVICE); M_->allocateMatrixData(ReSolve::memory::HOST); index_type* mia = M_->getRowData(ReSolve::memory::HOST); index_type* mja = M_->getColData(ReSolve::memory::HOST); - index_type row; - for(index_type i = 0; i < n; ++i) { + index_type row; + for (index_type i = 0; i < n; ++i) + { // go through EACH COLUMN OF L first - for(index_type j = Lp[i]; j < Lp[i + 1]; ++j) { + for (index_type j = Lp[i]; j < Lp[i + 1]; ++j) + { row = Li[j]; // BUT dont count diagonal twice, important - if(row != i) { + if (row != i) + { mia[row + 1]++; } } // then each column of U - for(index_type j = Up[i]; j < Up[i + 1]; ++j) { + for (index_type j = Up[i]; j < Up[i + 1]; ++j) + { row = Ui[j]; mia[row + 1]++; } } // then organize mia_; mia[0] = 0; - for(index_type i = 1; i < n + 1; i++) { + for (index_type i = 1; i < n + 1; i++) + { mia[i] += mia[i - 1]; } std::vector Mshifts(static_cast(n), 0); - for(index_type i = 0; i < n; ++i) { + for (index_type i = 0; i < n; ++i) + { // go through EACH COLUMN OF L first - for(int j = Lp[i]; j < Lp[i + 1]; ++j) { + for (int j = Lp[i]; j < Lp[i + 1]; ++j) + { row = Li[j]; - if(row != i) { + if (row != i) + { // place (row, i) where it belongs! mja[mia[row] + Mshifts[static_cast(row)]] = i; Mshifts[static_cast(row)]++; } } // each column of U next - for(index_type j = Up[i]; j < Up[i + 1]; ++j) { - row = Ui[j]; + for (index_type j = Up[i]; j < Up[i + 1]; ++j) + { + row = Ui[j]; mja[mia[row] + Mshifts[static_cast(row)]] = i; Mshifts[static_cast(row)]++; } @@ -662,4 +690,4 @@ namespace ReSolve { params_list_["solve_mode"] = SOLVE_MODE; } -} // namespace resolve +} // namespace ReSolve diff --git a/resolve/LinSolverDirectRocSolverRf.hpp b/resolve/LinSolverDirectRocSolverRf.hpp index 1dacc3d4b..d4702aeea 100644 --- a/resolve/LinSolverDirectRocSolverRf.hpp +++ b/resolve/LinSolverDirectRocSolverRf.hpp @@ -1,16 +1,16 @@ #pragma once -#include +#include #include #include -#include +#include #include "Common.hpp" +#include #include #include -#include -namespace ReSolve +namespace ReSolve { // Forward declaration of vector::Vector class namespace vector @@ -23,71 +23,75 @@ namespace ReSolve { class Sparse; class Csr; - } - - class LinSolverDirectRocSolverRf : public LinSolverDirect + } // namespace matrix + + class LinSolverDirectRocSolverRf : public LinSolverDirect { using vector_type = vector::Vector; - - public: - LinSolverDirectRocSolverRf(LinAlgWorkspaceHIP* workspace); - ~LinSolverDirectRocSolverRf(); - - int setup(matrix::Sparse* A, - matrix::Sparse* L, - matrix::Sparse* U, - index_type* P, - index_type* Q, - vector_type* rhs) override; - - int refactorize() override; - int solve(vector_type* rhs, vector_type* x) override; - int solve(vector_type* rhs) override; // the solution overwrites rhs - - int setSolveMode(int mode); // should probably be enum - int getSolveMode() const; //should be enum too - - int setCliParam(const std::string id, const std::string value) override; - std::string getCliParamString(const std::string id) const override; - index_type getCliParamInt(const std::string id) const override; - real_type getCliParamReal(const std::string id) const override; - bool getCliParamBool(const std::string id) const override; - int printCliParam(const std::string id) const override; - - private: - enum ParamaterIDs {SOLVE_MODE=0}; - int solve_mode_{0}; // 0 - default; 1 - use rocparse trisolver - - private: - // to be exported to matrix handler in a later time - void combineFactors(matrix::Sparse* L, matrix::Sparse* U); //create L+U from separate L, U factors - void initParamList(); - - rocblas_status status_rocblas_; - rocsparse_status status_rocsparse_; - index_type* d_P_{nullptr}; - index_type* d_Q_{nullptr}; - - MemoryHandler mem_; ///< Device memory manager object - LinAlgWorkspaceHIP* workspace_; - - rocsolver_rfinfo infoM_; - matrix::Sparse* M_{nullptr};//the matrix that contains added factors - - // not used by default - for fast solve - rocsparse_mat_descr descr_L_{nullptr}; - rocsparse_mat_descr descr_U_{nullptr}; - - rocsparse_mat_info info_L_{nullptr}; - rocsparse_mat_info info_U_{nullptr}; - - void* L_buffer_{nullptr}; - void* U_buffer_{nullptr}; - - ReSolve::matrix::Csr* L_csr_{nullptr}; - ReSolve::matrix::Csr* U_csr_{nullptr}; - - real_type* d_aux1_{nullptr}; - real_type* d_aux2_{nullptr}; + + public: + LinSolverDirectRocSolverRf(LinAlgWorkspaceHIP* workspace); + ~LinSolverDirectRocSolverRf(); + + int setup(matrix::Sparse* A, + matrix::Sparse* L, + matrix::Sparse* U, + index_type* P, + index_type* Q, + vector_type* rhs) override; + + int refactorize() override; + int solve(vector_type* rhs, vector_type* x) override; + int solve(vector_type* rhs) override; // the solution overwrites rhs + + int setSolveMode(int mode); // should probably be enum + int getSolveMode() const; // should be enum too + + int setCliParam(const std::string id, const std::string value) override; + std::string getCliParamString(const std::string id) const override; + index_type getCliParamInt(const std::string id) const override; + real_type getCliParamReal(const std::string id) const override; + bool getCliParamBool(const std::string id) const override; + int printCliParam(const std::string id) const override; + + private: + enum ParamaterIDs + { + SOLVE_MODE = 0 + }; + + int solve_mode_{0}; // 0 - default; 1 - use rocparse trisolver + + private: + // to be exported to matrix handler in a later time + void combineFactors(matrix::Sparse* L, matrix::Sparse* U); // create L+U from separate L, U factors + void initParamList(); + + rocblas_status status_rocblas_; + rocsparse_status status_rocsparse_; + index_type* d_P_{nullptr}; + index_type* d_Q_{nullptr}; + + MemoryHandler mem_; ///< Device memory manager object + LinAlgWorkspaceHIP* workspace_; + + rocsolver_rfinfo infoM_; + matrix::Sparse* M_{nullptr}; // the matrix that contains added factors + + // not used by default - for fast solve + rocsparse_mat_descr descr_L_{nullptr}; + rocsparse_mat_descr descr_U_{nullptr}; + + rocsparse_mat_info info_L_{nullptr}; + rocsparse_mat_info info_U_{nullptr}; + + void* L_buffer_{nullptr}; + void* U_buffer_{nullptr}; + + ReSolve::matrix::Csr* L_csr_{nullptr}; + ReSolve::matrix::Csr* U_csr_{nullptr}; + + real_type* d_aux1_{nullptr}; + real_type* d_aux2_{nullptr}; }; -} +} // namespace ReSolve diff --git a/resolve/LinSolverDirectRocSparseILU0.cpp b/resolve/LinSolverDirectRocSparseILU0.cpp index 479686419..8be374924 100644 --- a/resolve/LinSolverDirectRocSparseILU0.cpp +++ b/resolve/LinSolverDirectRocSparseILU0.cpp @@ -1,10 +1,10 @@ -#include -#include #include "LinSolverDirectRocSparseILU0.hpp" +#include #include +#include -namespace ReSolve +namespace ReSolve { using out = io::Logger; @@ -24,19 +24,19 @@ namespace ReSolve matrix::Sparse*, index_type*, index_type*, - vector_type* ) + vector_type*) { - //remember - P and Q are generally CPU variables + // remember - P and Q are generally CPU variables int error_sum = 0; - this->A_ = (matrix::Csr*) A; - index_type n = A_->getNumRows(); + this->A_ = (matrix::Csr*) A; + index_type n = A_->getNumRows(); index_type nnz = A_->getNnz(); - mem_.allocateArrayOnDevice(&d_ILU_vals_,nnz); - //copy A values to a buffer first + mem_.allocateArrayOnDevice(&d_ILU_vals_, nnz); + // copy A values to a buffer first mem_.copyArrayDeviceToDevice(d_ILU_vals_, A_->getValues(ReSolve::memory::DEVICE), nnz); - //set up descriptors + // set up descriptors // Create matrix descriptor for A rocsparse_create_mat_descr(&descr_A_); @@ -58,150 +58,155 @@ namespace ReSolve size_t buffer_size_L; size_t buffer_size_U; - status_rocsparse_ = rocsparse_dcsrilu0_buffer_size(workspace_->getRocsparseHandle(), - n, - nnz, + status_rocsparse_ = rocsparse_dcsrilu0_buffer_size(workspace_->getRocsparseHandle(), + n, + nnz, descr_A_, - d_ILU_vals_, //vals_, - A_->getRowData(ReSolve::memory::DEVICE), - A_->getColData(ReSolve::memory::DEVICE), - info_A_, + d_ILU_vals_, // vals_, + A_->getRowData(ReSolve::memory::DEVICE), + A_->getColData(ReSolve::memory::DEVICE), + info_A_, &buffer_size_A); - if (status_rocsparse_ != 0) { - io::Logger::warning() << "Buffer size estimate for ILU0 failed with code: " <getRocsparseHandle(), - rocsparse_operation_none, - n, - nnz, + error_sum += status_rocsparse_; + status_rocsparse_ = rocsparse_dcsrsv_buffer_size(workspace_->getRocsparseHandle(), + rocsparse_operation_none, + n, + nnz, descr_L_, - d_ILU_vals_, //vals_, - A_->getRowData(ReSolve::memory::DEVICE), - A_->getColData(ReSolve::memory::DEVICE), - info_A_, + d_ILU_vals_, // vals_, + A_->getRowData(ReSolve::memory::DEVICE), + A_->getColData(ReSolve::memory::DEVICE), + info_A_, &buffer_size_L); - if (status_rocsparse_ != 0) { - io::Logger::warning() << "Buffer size estimate for L solve failed with code: " <getRocsparseHandle(), - rocsparse_operation_none, - n, - nnz, + status_rocsparse_ = rocsparse_dcsrsv_buffer_size(workspace_->getRocsparseHandle(), + rocsparse_operation_none, + n, + nnz, descr_U_, - d_ILU_vals_, //vals_, - A_->getRowData(ReSolve::memory::DEVICE), - A_->getColData(ReSolve::memory::DEVICE), + d_ILU_vals_, // vals_, + A_->getRowData(ReSolve::memory::DEVICE), + A_->getColData(ReSolve::memory::DEVICE), info_A_, &buffer_size_U); - if (status_rocsparse_ != 0) { - io::Logger::warning() << "Buffer size estimate for U solve failed with code: " <getRocsparseHandle(), - n, - nnz, + status_rocsparse_ = rocsparse_dcsrilu0_analysis(workspace_->getRocsparseHandle(), + n, + nnz, descr_A_, - d_ILU_vals_, //vals_, - A_->getRowData(ReSolve::memory::DEVICE), - A_->getColData(ReSolve::memory::DEVICE), + d_ILU_vals_, // vals_, + A_->getRowData(ReSolve::memory::DEVICE), + A_->getColData(ReSolve::memory::DEVICE), info_A_, rocsparse_analysis_policy_reuse, rocsparse_solve_policy_auto, buffer_); - if (status_rocsparse_ != 0) { - io::Logger::warning() << "ILU0 decomposition analysis failed with code: " <getRocsparseHandle(), - rocsparse_operation_none, - n, - nnz, + status_rocsparse_ = rocsparse_dcsrsv_analysis(workspace_->getRocsparseHandle(), + rocsparse_operation_none, + n, + nnz, descr_L_, - d_ILU_vals_, //vals_, - A_->getRowData(ReSolve::memory::DEVICE), - A_->getColData(ReSolve::memory::DEVICE), - info_A_, + d_ILU_vals_, // vals_, + A_->getRowData(ReSolve::memory::DEVICE), + A_->getColData(ReSolve::memory::DEVICE), + info_A_, rocsparse_analysis_policy_reuse, rocsparse_solve_policy_auto, buffer_); - if (status_rocsparse_ != 0) { - io::Logger::warning() << "Solve analysis for L solve failed with code: " <getRocsparseHandle(), - rocsparse_operation_none, - n, - nnz, + status_rocsparse_ = rocsparse_dcsrsv_analysis(workspace_->getRocsparseHandle(), + rocsparse_operation_none, + n, + nnz, descr_U_, - d_ILU_vals_, //vals_, - A_->getRowData(ReSolve::memory::DEVICE), - A_->getColData(ReSolve::memory::DEVICE), + d_ILU_vals_, // vals_, + A_->getRowData(ReSolve::memory::DEVICE), + A_->getColData(ReSolve::memory::DEVICE), info_A_, - rocsparse_analysis_policy_reuse, + rocsparse_analysis_policy_reuse, rocsparse_solve_policy_auto, buffer_); - if (status_rocsparse_ != 0) { - io::Logger::warning() << "Solve analysis for U solve failed with code: " <getRocsparseHandle(), - n, - nnz, + status_rocsparse_ = rocsparse_dcsrilu0(workspace_->getRocsparseHandle(), + n, + nnz, descr_A_, - d_ILU_vals_, //vals_ - A_->getRowData(ReSolve::memory::DEVICE), - A_->getColData(ReSolve::memory::DEVICE), + d_ILU_vals_, // vals_ + A_->getRowData(ReSolve::memory::DEVICE), + A_->getColData(ReSolve::memory::DEVICE), info_A_, rocsparse_solve_policy_auto, buffer_); - if (status_rocsparse_ != 0) { - io::Logger::warning() << "ILU0 decomposition failed with code: " <A_ = A; - index_type n = A_->getNumRows(); + int error_sum = 0; + this->A_ = A; + index_type n = A_->getNumRows(); index_type nnz = A_->getNnz(); mem_.copyArrayDeviceToDevice(d_ILU_vals_, A_->getValues(ReSolve::memory::DEVICE), nnz); - status_rocsparse_ = rocsparse_dcsrilu0(workspace_->getRocsparseHandle(), - n, - nnz, + status_rocsparse_ = rocsparse_dcsrilu0(workspace_->getRocsparseHandle(), + n, + nnz, descr_A_, - d_ILU_vals_, //vals_, - A_->getRowData(ReSolve::memory::DEVICE), - A_->getColData(ReSolve::memory::DEVICE), + d_ILU_vals_, // vals_, + A_->getRowData(ReSolve::memory::DEVICE), + A_->getColData(ReSolve::memory::DEVICE), info_A_, rocsparse_solve_policy_auto, buffer_); @@ -210,41 +215,42 @@ namespace ReSolve return error_sum; } + // solution is returned in RHS int LinSolverDirectRocSparseILU0::solve(vector_type* rhs) { - int error_sum = 0; - status_rocsparse_ = rocsparse_dcsrsv_solve(workspace_->getRocsparseHandle(), + int error_sum = 0; + status_rocsparse_ = rocsparse_dcsrsv_solve(workspace_->getRocsparseHandle(), rocsparse_operation_none, A_->getNumRows(), - A_->getNnz(), - &(constants::ONE), + A_->getNnz(), + &(constants::ONE), descr_L_, - d_ILU_vals_, //vals_, - A_->getRowData(ReSolve::memory::DEVICE), - A_->getColData(ReSolve::memory::DEVICE), + d_ILU_vals_, // vals_, + A_->getRowData(ReSolve::memory::DEVICE), + A_->getColData(ReSolve::memory::DEVICE), info_A_, rhs->getData(ReSolve::memory::DEVICE), - d_aux1_, //result + d_aux1_, // result rocsparse_solve_policy_auto, buffer_); - error_sum += status_rocsparse_; + error_sum += status_rocsparse_; - status_rocsparse_ = rocsparse_dcsrsv_solve(workspace_->getRocsparseHandle(), + status_rocsparse_ = rocsparse_dcsrsv_solve(workspace_->getRocsparseHandle(), rocsparse_operation_none, A_->getNumRows(), - A_->getNnz(), - &(constants::ONE), + A_->getNnz(), + &(constants::ONE), descr_U_, - d_ILU_vals_, //vals_, - A_->getRowData(ReSolve::memory::DEVICE), - A_->getColData(ReSolve::memory::DEVICE), + d_ILU_vals_, // vals_, + A_->getRowData(ReSolve::memory::DEVICE), + A_->getColData(ReSolve::memory::DEVICE), info_A_, - d_aux1_, //input - rhs->getData(ReSolve::memory::DEVICE),//result + d_aux1_, // input + rhs->getData(ReSolve::memory::DEVICE), // result rocsparse_solve_policy_auto, buffer_); - error_sum += status_rocsparse_; + error_sum += status_rocsparse_; rhs->setDataUpdated(ReSolve::memory::DEVICE); return error_sum; @@ -254,52 +260,50 @@ namespace ReSolve { int error_sum = 0; + status_rocsparse_ = rocsparse_dcsrsv_solve(workspace_->getRocsparseHandle(), + rocsparse_operation_none, + A_->getNumRows(), + A_->getNnz(), + &(constants::ONE), + descr_L_, + d_ILU_vals_, // vals_, + A_->getRowData(ReSolve::memory::DEVICE), + A_->getColData(ReSolve::memory::DEVICE), + info_A_, + rhs->getData(ReSolve::memory::DEVICE), + d_aux1_, // result + rocsparse_solve_policy_auto, + buffer_); + error_sum += status_rocsparse_; - - status_rocsparse_ = rocsparse_dcsrsv_solve(workspace_->getRocsparseHandle(), - rocsparse_operation_none, - A_->getNumRows(), - A_->getNnz(), - &(constants::ONE), - descr_L_, - d_ILU_vals_, //vals_, - A_->getRowData(ReSolve::memory::DEVICE), - A_->getColData(ReSolve::memory::DEVICE), - info_A_, - rhs->getData(ReSolve::memory::DEVICE), - d_aux1_, //result - rocsparse_solve_policy_auto, - buffer_); - error_sum += status_rocsparse_; - - status_rocsparse_ = rocsparse_dcsrsv_solve(workspace_->getRocsparseHandle(), + status_rocsparse_ = rocsparse_dcsrsv_solve(workspace_->getRocsparseHandle(), rocsparse_operation_none, A_->getNumRows(), - A_->getNnz(), - &(constants::ONE), + A_->getNnz(), + &(constants::ONE), descr_U_, - d_ILU_vals_, //vals_, - A_->getRowData(ReSolve::memory::DEVICE), - A_->getColData(ReSolve::memory::DEVICE), + d_ILU_vals_, // vals_, + A_->getRowData(ReSolve::memory::DEVICE), + A_->getColData(ReSolve::memory::DEVICE), info_A_, - d_aux1_, //input - x->getData(ReSolve::memory::DEVICE),//result + d_aux1_, // input + x->getData(ReSolve::memory::DEVICE), // result rocsparse_solve_policy_auto, buffer_); - error_sum += status_rocsparse_; - + error_sum += status_rocsparse_; + x->setDataUpdated(ReSolve::memory::DEVICE); return error_sum; } - + /** * @brief Placeholder function for now. - * + * * The following switch (getParamId(Id)) cases always run the default and * are currently redundant code (like an if (true)). * In the future, they will be expanded to include more options. - * + * * @param[in] id - string ID for parameter to get. * @param[in] value unused/ignored * @return int Value of the int parameter to return. @@ -308,19 +312,19 @@ namespace ReSolve { switch (getParamId(id)) { - default: - std::cout << "Setting parameter failed!\n"; + default: + std::cout << "Setting parameter failed!\n"; } return 0; } /** * @brief Placeholder function for now. - * + * * The following switch (getParamId(Id)) cases always run the default and * are currently redundant code (like an if (true)). * In the future, they will be expanded to include more options. - * + * * @param id - string ID for parameter to get. * @return std::string Value of the string parameter to return. */ @@ -328,19 +332,19 @@ namespace ReSolve { switch (getParamId(id)) { - default: - out::error() << "Trying to get unknown string parameter " << id << "\n"; + default: + out::error() << "Trying to get unknown string parameter " << id << "\n"; } return ""; } /** * @brief Placeholder function for now. - * + * * The following switch (getParamId(Id)) cases always run the default and * are currently redundant code (like an if (true)). * In the future, they will be expanded to include more options. - * + * * @param id - string ID for parameter to get. * @return int Value of the int parameter to return. */ @@ -348,19 +352,19 @@ namespace ReSolve { switch (getParamId(id)) { - default: - out::error() << "Trying to get unknown integer parameter " << id << "\n"; + default: + out::error() << "Trying to get unknown integer parameter " << id << "\n"; } return -1; } /** * @brief Placeholder function for now. - * + * * The following switch (getParamId(Id)) cases always run the default and * are currently redundant code (like an if (true)). * In the future, they will be expanded to include more options. - * + * * @param id - string ID for parameter to get. * @return real_type Value of the real_type parameter to return. */ @@ -368,19 +372,19 @@ namespace ReSolve { switch (getParamId(id)) { - default: - out::error() << "Trying to get unknown real parameter " << id << "\n"; + default: + out::error() << "Trying to get unknown real parameter " << id << "\n"; } return std::numeric_limits::quiet_NaN(); } /** * @brief Placeholder function for now. - * + * * The following switch (getParamId(Id)) cases always run the default and * are currently redundant code (like an if (true)). * In the future, they will be expanded to include more options. - * + * * @param id - string ID for parameter to get. * @return bool Value of the bool parameter to return. */ @@ -388,8 +392,8 @@ namespace ReSolve { switch (getParamId(id)) { - default: - out::error() << "Trying to get unknown boolean parameter " << id << "\n"; + default: + out::error() << "Trying to get unknown boolean parameter " << id << "\n"; } return false; } @@ -405,4 +409,4 @@ namespace ReSolve return 0; } -}// namespace resolve +} // namespace ReSolve diff --git a/resolve/LinSolverDirectRocSparseILU0.hpp b/resolve/LinSolverDirectRocSparseILU0.hpp index 7b9f9f67e..1a18a14d4 100644 --- a/resolve/LinSolverDirectRocSparseILU0.hpp +++ b/resolve/LinSolverDirectRocSparseILU0.hpp @@ -1,16 +1,16 @@ #pragma once -#include -#include #include +#include +#include #include "Common.hpp" +#include #include #include -#include -namespace ReSolve +namespace ReSolve { // Forward declaration of vector::Vector class namespace vector @@ -24,49 +24,49 @@ namespace ReSolve class Sparse; } - class LinSolverDirectRocSparseILU0 : public LinSolverDirect + class LinSolverDirectRocSparseILU0 : public LinSolverDirect { using vector_type = vector::Vector; - - public: - LinSolverDirectRocSparseILU0(LinAlgWorkspaceHIP* workspace); - ~LinSolverDirectRocSparseILU0(); - - int setup(matrix::Sparse* A, - matrix::Sparse* L = nullptr, - matrix::Sparse* U = nullptr, - index_type* P = nullptr, - index_type* Q = nullptr, - vector_type* rhs = nullptr) override; - // if values of A change, but the nnz pattern does not, redo the analysis only (reuse buffers though) - int reset(matrix::Sparse* A); - - int solve(vector_type* rhs, vector_type* x) override; - int solve(vector_type* rhs) override; // the solution is returned IN RHS (rhs is overwritten) - - int setCliParam(const std::string id, const std::string value) override; - std::string getCliParamString(const std::string id) const override; - index_type getCliParamInt(const std::string id) const override; - real_type getCliParamReal(const std::string id) const override; - bool getCliParamBool(const std::string id) const override; - int printCliParam(const std::string id) const override; - - private: - rocsparse_status status_rocsparse_; - - MemoryHandler mem_; ///< Device memory manager object - LinAlgWorkspaceHIP* workspace_{nullptr}; - - rocsparse_mat_descr descr_A_{nullptr}; - rocsparse_mat_descr descr_L_{nullptr}; - rocsparse_mat_descr descr_U_{nullptr}; - - rocsparse_mat_info info_A_{nullptr}; - - void* buffer_{nullptr}; - - real_type* d_aux1_{nullptr}; - // since ILU OVERWRITES THE MATRIX values, we need a buffer to keep the values of ILU decomposition. - real_type* d_ILU_vals_{nullptr}; + + public: + LinSolverDirectRocSparseILU0(LinAlgWorkspaceHIP* workspace); + ~LinSolverDirectRocSparseILU0(); + + int setup(matrix::Sparse* A, + matrix::Sparse* L = nullptr, + matrix::Sparse* U = nullptr, + index_type* P = nullptr, + index_type* Q = nullptr, + vector_type* rhs = nullptr) override; + // if values of A change, but the nnz pattern does not, redo the analysis only (reuse buffers though) + int reset(matrix::Sparse* A); + + int solve(vector_type* rhs, vector_type* x) override; + int solve(vector_type* rhs) override; // the solution is returned IN RHS (rhs is overwritten) + + int setCliParam(const std::string id, const std::string value) override; + std::string getCliParamString(const std::string id) const override; + index_type getCliParamInt(const std::string id) const override; + real_type getCliParamReal(const std::string id) const override; + bool getCliParamBool(const std::string id) const override; + int printCliParam(const std::string id) const override; + + private: + rocsparse_status status_rocsparse_; + + MemoryHandler mem_; ///< Device memory manager object + LinAlgWorkspaceHIP* workspace_{nullptr}; + + rocsparse_mat_descr descr_A_{nullptr}; + rocsparse_mat_descr descr_L_{nullptr}; + rocsparse_mat_descr descr_U_{nullptr}; + + rocsparse_mat_info info_A_{nullptr}; + + void* buffer_{nullptr}; + + real_type* d_aux1_{nullptr}; + // since ILU OVERWRITES THE MATRIX values, we need a buffer to keep the values of ILU decomposition. + real_type* d_ILU_vals_{nullptr}; }; -}// namespace +} // namespace ReSolve diff --git a/resolve/LinSolverDirectSerialILU0.cpp b/resolve/LinSolverDirectSerialILU0.cpp index 930bbae2f..2f22dcb69 100644 --- a/resolve/LinSolverDirectSerialILU0.cpp +++ b/resolve/LinSolverDirectSerialILU0.cpp @@ -1,11 +1,12 @@ -#include -#include -#include #include "LinSolverDirectSerialILU0.hpp" +#include + +#include #include +#include -namespace ReSolve +namespace ReSolve { using out = io::Logger; @@ -16,14 +17,15 @@ namespace ReSolve LinSolverDirectSerialILU0::~LinSolverDirectSerialILU0() { - if (owns_factors_) { + if (owns_factors_) + { delete L_; delete U_; L_ = nullptr; U_ = nullptr; } - delete [] h_aux1_; - delete [] h_ILU_vals_; + delete[] h_aux1_; + delete[] h_ILU_vals_; } int LinSolverDirectSerialILU0::setup(matrix::Sparse* A, @@ -31,19 +33,20 @@ namespace ReSolve matrix::Sparse*, index_type*, index_type*, - vector_type* ) + vector_type*) { - this->A_ = (matrix::Csr*) A; - index_type n = A_->getNumRows(); + this->A_ = (matrix::Csr*) A; + index_type n = A_->getNumRows(); index_type nnz = A_->getNnz(); h_ILU_vals_ = new real_type[nnz]; - h_aux1_ = new real_type[n]; + h_aux1_ = new real_type[n]; - index_type zero_pivot = 0; // no zero pivot + index_type zero_pivot = 0; // no zero pivot - //copy A values to a buffer first - for (index_type i = 0; i < nnz; ++i) { + // copy A values to a buffer first + for (index_type i = 0; i < nnz; ++i) + { h_ILU_vals_[i] = A_->getValues(ReSolve::memory::HOST)[i]; } @@ -52,89 +55,107 @@ namespace ReSolve index_type* ja_mapper = new index_type[n]; // aux scalars for indexing etc - index_type k, j, jw, j1, j2; + index_type k, j, jw, j1, j2; - for (index_type i = 0; i < n; ++i) { + for (index_type i = 0; i < n; ++i) + { j1 = A_->getRowData(ReSolve::memory::HOST)[i]; j2 = A_->getRowData(ReSolve::memory::HOST)[i + 1]; - for (index_type j = j1; j < j2; ++j) { + for (index_type j = j1; j < j2; ++j) + { ja_mapper[A_->getColData(ReSolve::memory::HOST)[j]] = j; } - // IJK ILU + // IJK ILU j = j1; - while ( j < j2) { + while (j < j2) + { k = A_->getColData(ReSolve::memory::HOST)[j]; - if (k < i) { + if (k < i) + { h_ILU_vals_[j] /= h_ILU_vals_[u_ptr[k]]; - for (index_type jj = u_ptr[k] + 1; jj < A_->getRowData(ReSolve::memory::HOST)[k + 1]; ++jj) { + for (index_type jj = u_ptr[k] + 1; jj < A_->getRowData(ReSolve::memory::HOST)[k + 1]; ++jj) + { jw = ja_mapper[A_->getColData(ReSolve::memory::HOST)[jj]]; - if (jw != 0) { - h_ILU_vals_[jw] -= h_ILU_vals_[j] * h_ILU_vals_[jj]; + if (jw != 0) + { + h_ILU_vals_[jw] -= h_ILU_vals_[j] * h_ILU_vals_[jj]; } - } - } else { + } + } + else + { break; } - j++; + j++; } u_ptr[i] = j; - if ((k != i) || (fabs(h_ILU_vals_[j]) < 1e-16)) { + if ((k != i) || (fabs(h_ILU_vals_[j]) < 1e-16)) + { zero_pivot = -1; // zero pivot is in place (i,i) on the diagonal return zero_pivot; } // reset mapper - for (index_type j = j1; j< j2; ++j) { + for (index_type j = j1; j < j2; ++j) + { ja_mapper[A_->getColData(ReSolve::memory::HOST)[j]] = 0; } } - //clean up - delete [] ja_mapper; - delete [] u_ptr; + // clean up + delete[] ja_mapper; + delete[] u_ptr; // split into L and U! index_type nnzL = 0, nnzU = 0; // the diagonal values GO TO U, L has 1s on the diagonal - for (index_type i = 0; i < n; ++i) { + for (index_type i = 0; i < n; ++i) + { j1 = A_->getRowData(ReSolve::memory::HOST)[i]; j2 = A_->getRowData(ReSolve::memory::HOST)[i + 1]; - for (index_type j = j1; j < j2; ++j) { - if (A->getColData(ReSolve::memory::HOST)[j] == i) { + for (index_type j = j1; j < j2; ++j) + { + if (A->getColData(ReSolve::memory::HOST)[j] == i) + { // diagonal, add to both nnzL++; nnzU++; } - if (A->getColData(ReSolve::memory::HOST)[j] > i) { + if (A->getColData(ReSolve::memory::HOST)[j] > i) + { // upper part nnzU++; } - if (A->getColData(ReSolve::memory::HOST)[j] < i) { + if (A->getColData(ReSolve::memory::HOST)[j] < i) + { // lower part nnzL++; } - } + } } // TODO: What is the purpose of nnzL and nnzU if they are not used after this? // allocate L and U - L_ = new matrix::Csr(n, n, nnzL, false, true); - U_ = new matrix::Csr(n, n, nnzU, false, true); + L_ = new matrix::Csr(n, n, nnzL, false, true); + U_ = new matrix::Csr(n, n, nnzU, false, true); owns_factors_ = true; - L_->allocateMatrixData(ReSolve::memory::HOST); - U_->allocateMatrixData(ReSolve::memory::HOST); - index_type lit = 0, uit = 0, kL, kU; + L_->allocateMatrixData(ReSolve::memory::HOST); + U_->allocateMatrixData(ReSolve::memory::HOST); + index_type lit = 0, uit = 0, kL, kU; L_->getRowData(ReSolve::memory::HOST)[0] = 0; U_->getRowData(ReSolve::memory::HOST)[0] = 0; - for (index_type i = 0; i < n; ++i) { + for (index_type i = 0; i < n; ++i) + { j1 = A_->getRowData(ReSolve::memory::HOST)[i]; j2 = A_->getRowData(ReSolve::memory::HOST)[i + 1]; kL = 0; kU = 0; - for (index_type j = j1; j < j2; ++j) { + for (index_type j = j1; j < j2; ++j) + { - if (A->getColData(ReSolve::memory::HOST)[j] == i) { + if (A->getColData(ReSolve::memory::HOST)[j] == i) + { // diagonal, add to both L_->getValues(ReSolve::memory::HOST)[lit] = 1.0; @@ -149,30 +170,33 @@ namespace ReSolve kU++; } - if (A->getColData(ReSolve::memory::HOST)[j] > i) { + if (A->getColData(ReSolve::memory::HOST)[j] > i) + { // upper part - U_->getValues(ReSolve::memory::HOST) [uit] = h_ILU_vals_[j]; - U_->getColData(ReSolve::memory::HOST)[uit] = A_->getColData(ReSolve::memory::HOST)[j]; ; + U_->getValues(ReSolve::memory::HOST)[uit] = h_ILU_vals_[j]; + U_->getColData(ReSolve::memory::HOST)[uit] = A_->getColData(ReSolve::memory::HOST)[j]; + ; uit++; kU++; } - if (A->getColData(ReSolve::memory::HOST)[j] < i) { + if (A->getColData(ReSolve::memory::HOST)[j] < i) + { // lower part - L_->getValues(ReSolve::memory::HOST) [lit] = h_ILU_vals_[j]; - L_->getColData(ReSolve::memory::HOST)[lit] = A_->getColData(ReSolve::memory::HOST)[j]; + L_->getValues(ReSolve::memory::HOST)[lit] = h_ILU_vals_[j]; + L_->getColData(ReSolve::memory::HOST)[lit] = A_->getColData(ReSolve::memory::HOST)[j]; lit++; kL++; } - } + } // update row pointers (offsets) - L_->getRowData(ReSolve::memory::HOST)[i + 1] = L_->getRowData(ReSolve::memory::HOST)[i] + kL; - U_->getRowData(ReSolve::memory::HOST)[i + 1] = U_->getRowData(ReSolve::memory::HOST)[i] + kU; + L_->getRowData(ReSolve::memory::HOST)[i + 1] = L_->getRowData(ReSolve::memory::HOST)[i] + kL; + U_->getRowData(ReSolve::memory::HOST)[i + 1] = U_->getRowData(ReSolve::memory::HOST)[i] + kU; } - + return zero_pivot; } @@ -180,30 +204,35 @@ namespace ReSolve { return this->setup(A); } + // solution is returned in RHS int LinSolverDirectSerialILU0::solve(vector_type* rhs) { int error_sum = 0; // printf("solve t 1\n"); // h_aux1 = L^{-1} rhs - for (index_type i = 0; i < L_->getNumRows(); ++i) { + for (index_type i = 0; i < L_->getNumRows(); ++i) + { h_aux1_[i] = rhs->getData(ReSolve::memory::HOST)[i]; - for (index_type j = L_->getRowData(ReSolve::memory::HOST)[i]; j < L_->getRowData(ReSolve::memory::HOST)[i + 1] - 1; ++j) { - index_type col = L_->getColData(ReSolve::memory::HOST)[j]; - h_aux1_[i] -= L_->getValues(ReSolve::memory::HOST)[j] * h_aux1_[col]; + for (index_type j = L_->getRowData(ReSolve::memory::HOST)[i]; j < L_->getRowData(ReSolve::memory::HOST)[i + 1] - 1; ++j) + { + index_type col = L_->getColData(ReSolve::memory::HOST)[j]; + h_aux1_[i] -= L_->getValues(ReSolve::memory::HOST)[j] * h_aux1_[col]; } h_aux1_[i] /= L_->getValues(ReSolve::memory::HOST)[L_->getRowData(ReSolve::memory::HOST)[i + 1] - 1]; } // rhs = U^{-1} h_aux1 - for (index_type i = A_->getNumRows() - 1; i >= 0; --i) { + for (index_type i = A_->getNumRows() - 1; i >= 0; --i) + { rhs->getData(ReSolve::memory::HOST)[i] = h_aux1_[i]; - for (index_type j = U_->getRowData(ReSolve::memory::HOST)[i] + 1; j < U_->getRowData(ReSolve::memory::HOST)[i + 1]; ++j) { - index_type col = U_->getColData(ReSolve::memory::HOST)[j]; + for (index_type j = U_->getRowData(ReSolve::memory::HOST)[i] + 1; j < U_->getRowData(ReSolve::memory::HOST)[i + 1]; ++j) + { + index_type col = U_->getColData(ReSolve::memory::HOST)[j]; rhs->getData(ReSolve::memory::HOST)[i] -= U_->getValues(ReSolve::memory::HOST)[j] * rhs->getData(ReSolve::memory::HOST)[col]; } - rhs->getData(ReSolve::memory::HOST)[i] /= U_->getValues(ReSolve::memory::HOST)[U_->getRowData(ReSolve::memory::HOST)[i]]; //divide by the diagonal entry + rhs->getData(ReSolve::memory::HOST)[i] /= U_->getValues(ReSolve::memory::HOST)[U_->getRowData(ReSolve::memory::HOST)[i]]; // divide by the diagonal entry } return error_sum; @@ -211,41 +240,45 @@ namespace ReSolve int LinSolverDirectSerialILU0::solve(vector_type* rhs, vector_type* x) { - //printf("solve t 2i, L has %d rows, U has %d rows \n", L_->getNumRows(), U_->getNumRows()); + // printf("solve t 2i, L has %d rows, U has %d rows \n", L_->getNumRows(), U_->getNumRows()); int error_sum = 0; // h_aux1 = L^{-1} rhs - //for (int ii=0; ii<10; ++ii) printf("y[%d] = %16.16f \n ", ii, rhs->getData(ReSolve::memory::HOST)[ii]); - for (index_type i = 0; i < L_->getNumRows(); ++i) { + // for (int ii=0; ii<10; ++ii) printf("y[%d] = %16.16f \n ", ii, rhs->getData(ReSolve::memory::HOST)[ii]); + for (index_type i = 0; i < L_->getNumRows(); ++i) + { h_aux1_[i] = rhs->getData(ReSolve::memory::HOST)[i]; - for (index_type j = L_->getRowData(ReSolve::memory::HOST)[i]; j < L_->getRowData(ReSolve::memory::HOST)[i + 1] - 1; ++j) { - index_type col = L_->getColData(ReSolve::memory::HOST)[j]; - h_aux1_[i] -= L_->getValues(ReSolve::memory::HOST)[j] * h_aux1_[col]; + for (index_type j = L_->getRowData(ReSolve::memory::HOST)[i]; j < L_->getRowData(ReSolve::memory::HOST)[i + 1] - 1; ++j) + { + index_type col = L_->getColData(ReSolve::memory::HOST)[j]; + h_aux1_[i] -= L_->getValues(ReSolve::memory::HOST)[j] * h_aux1_[col]; } h_aux1_[i] /= L_->getValues(ReSolve::memory::HOST)[L_->getRowData(ReSolve::memory::HOST)[i + 1] - 1]; } - //for (int ii=0; ii<10; ++ii) printf("(L)^{-1}y[%d] = %16.16f \n ", ii, h_aux1_[ii]); - // x = U^{-1} h_aux1 + // for (int ii=0; ii<10; ++ii) printf("(L)^{-1}y[%d] = %16.16f \n ", ii, h_aux1_[ii]); + // x = U^{-1} h_aux1 - for (index_type i = U_->getNumRows() - 1; i >= 0; --i) { + for (index_type i = U_->getNumRows() - 1; i >= 0; --i) + { x->getData(ReSolve::memory::HOST)[i] = h_aux1_[i]; - for (index_type j = U_->getRowData(ReSolve::memory::HOST)[i] + 1; j < U_->getRowData(ReSolve::memory::HOST)[i + 1]; ++j) { - index_type col = U_->getColData(ReSolve::memory::HOST)[j]; - x->getData(ReSolve::memory::HOST)[i] -= U_->getValues(ReSolve::memory::HOST)[j] * x->getData(ReSolve::memory::HOST)[col]; + for (index_type j = U_->getRowData(ReSolve::memory::HOST)[i] + 1; j < U_->getRowData(ReSolve::memory::HOST)[i + 1]; ++j) + { + index_type col = U_->getColData(ReSolve::memory::HOST)[j]; + x->getData(ReSolve::memory::HOST)[i] -= U_->getValues(ReSolve::memory::HOST)[j] * x->getData(ReSolve::memory::HOST)[col]; } - x->getData(ReSolve::memory::HOST)[i] /= U_->getValues(ReSolve::memory::HOST)[U_->getRowData(ReSolve::memory::HOST)[i]]; //divide by the diagonal entry + x->getData(ReSolve::memory::HOST)[i] /= U_->getValues(ReSolve::memory::HOST)[U_->getRowData(ReSolve::memory::HOST)[i]]; // divide by the diagonal entry } - //for (int ii=0; ii<10; ++ii) printf("(LU)^{-1}y[%d] = %16.16f \n ", ii, x->getData(ReSolve::memory::HOST)[ii]); - return error_sum; + // for (int ii=0; ii<10; ++ii) printf("(LU)^{-1}y[%d] = %16.16f \n ", ii, x->getData(ReSolve::memory::HOST)[ii]); + return error_sum; } /** * @brief Placeholder function for now. - * + * * The following switch (getParamId(Id)) cases always run the default and * are currently redundant code (like an if (true)). * In the future, they will be expanded to include more options. - * + * * @param id - string ID for parameter to set. * @return int Error code. */ @@ -253,19 +286,19 @@ namespace ReSolve { switch (getParamId(id)) { - default: - std::cout << "Setting parameter failed!\n"; + default: + std::cout << "Setting parameter failed!\n"; } return 0; } /** * @brief Placeholder function for now. - * + * * The following switch (getParamId(Id)) cases always run the default and * are currently redundant code (like an if (true)). * In the future, they will be expanded to include more options. - * + * * @param id - string ID for parameter to get. * @return std::string Value of the string parameter to return. */ @@ -273,19 +306,19 @@ namespace ReSolve { switch (getParamId(id)) { - default: - out::error() << "Trying to get unknown string parameter " << id << "\n"; + default: + out::error() << "Trying to get unknown string parameter " << id << "\n"; } return ""; } /** * @brief Placeholder function for now. - * + * * The following switch (getParamId(Id)) cases always run the default and * are currently redundant code (like an if (true)). * In the future, they will be expanded to include more options. - * + * * @param id - string ID for parameter to get. * @return int Value of the int parameter to return. */ @@ -293,19 +326,19 @@ namespace ReSolve { switch (getParamId(id)) { - default: - out::error() << "Trying to get unknown integer parameter " << id << "\n"; + default: + out::error() << "Trying to get unknown integer parameter " << id << "\n"; } return -1; } /** * @brief Placeholder function for now. - * + * * The following switch (getParamId(Id)) cases always run the default and * are currently redundant code (like an if (true)). * In the future, they will be expanded to include more options. - * + * * @param id - string ID for parameter to get. * @return real_type Value of the real_type parameter to return. */ @@ -313,19 +346,19 @@ namespace ReSolve { switch (getParamId(id)) { - default: - out::error() << "Trying to get unknown real parameter " << id << "\n"; + default: + out::error() << "Trying to get unknown real parameter " << id << "\n"; } return std::numeric_limits::quiet_NaN(); } /** * @brief Placeholder function for now. - * + * * The following switch (getParamId(Id)) cases always run the default and * are currently redundant code (like an if (true)). * In the future, they will be expanded to include more options. - * + * * @param id - string ID for parameter to get. * @return bool Value of the bool parameter to return. */ @@ -333,8 +366,8 @@ namespace ReSolve { switch (getParamId(id)) { - default: - out::error() << "Trying to get unknown boolean parameter " << id << "\n"; + default: + out::error() << "Trying to get unknown boolean parameter " << id << "\n"; } return false; } @@ -350,4 +383,4 @@ namespace ReSolve return 0; } -} // namespace resolve +} // namespace ReSolve diff --git a/resolve/LinSolverDirectSerialILU0.hpp b/resolve/LinSolverDirectSerialILU0.hpp index 7dc4914ac..64b96632a 100644 --- a/resolve/LinSolverDirectSerialILU0.hpp +++ b/resolve/LinSolverDirectSerialILU0.hpp @@ -2,11 +2,11 @@ #pragma once #include "Common.hpp" +#include #include #include -#include -namespace ReSolve +namespace ReSolve { // Forward declaration of vector::Vector class namespace vector @@ -20,41 +20,40 @@ namespace ReSolve class Sparse; } - class LinSolverDirectSerialILU0 : public LinSolverDirect + class LinSolverDirectSerialILU0 : public LinSolverDirect { using vector_type = vector::Vector; - - public: - LinSolverDirectSerialILU0(LinAlgWorkspaceCpu* workspace); - ~LinSolverDirectSerialILU0(); - - int setup(matrix::Sparse* A, - matrix::Sparse* L = nullptr, - matrix::Sparse* U = nullptr, - index_type* P = nullptr, - index_type* Q = nullptr, - vector_type* rhs = nullptr) override; - int reset(matrix::Sparse* A); - - int solve(vector_type* rhs, vector_type* x) override; - int solve(vector_type* rhs) override; // the solutuon is returned IN RHS (rhs is overwritten) - - int setCliParam(const std::string id, const std::string value) override; - std::string getCliParamString(const std::string id) const override; - index_type getCliParamInt(const std::string id) const override; - real_type getCliParamReal(const std::string id) const override; - bool getCliParamBool(const std::string id) const override; - int printCliParam(const std::string id) const override; - - private: - - // MemoryHandler mem_; ///< Device memory manager object - LinAlgWorkspaceCpu* workspace_{nullptr}; - bool owns_factors_{false}; ///< If the class owns L and U factors - - real_type* h_aux1_{nullptr}; - // since ILU OVERWRITES THE MATRIX values, we need a buffer to keep - // the values of ILU decomposition. - real_type* h_ILU_vals_{nullptr}; + + public: + LinSolverDirectSerialILU0(LinAlgWorkspaceCpu* workspace); + ~LinSolverDirectSerialILU0(); + + int setup(matrix::Sparse* A, + matrix::Sparse* L = nullptr, + matrix::Sparse* U = nullptr, + index_type* P = nullptr, + index_type* Q = nullptr, + vector_type* rhs = nullptr) override; + int reset(matrix::Sparse* A); + + int solve(vector_type* rhs, vector_type* x) override; + int solve(vector_type* rhs) override; // the solutuon is returned IN RHS (rhs is overwritten) + + int setCliParam(const std::string id, const std::string value) override; + std::string getCliParamString(const std::string id) const override; + index_type getCliParamInt(const std::string id) const override; + real_type getCliParamReal(const std::string id) const override; + bool getCliParamBool(const std::string id) const override; + int printCliParam(const std::string id) const override; + + private: + // MemoryHandler mem_; ///< Device memory manager object + LinAlgWorkspaceCpu* workspace_{nullptr}; + bool owns_factors_{false}; ///< If the class owns L and U factors + + real_type* h_aux1_{nullptr}; + // since ILU OVERWRITES THE MATRIX values, we need a buffer to keep + // the values of ILU decomposition. + real_type* h_ILU_vals_{nullptr}; }; -}// namespace +} // namespace ReSolve diff --git a/resolve/LinSolverIterative.cpp b/resolve/LinSolverIterative.cpp index ed1b97ccb..c24af8f76 100644 --- a/resolve/LinSolverIterative.cpp +++ b/resolve/LinSolverIterative.cpp @@ -3,30 +3,29 @@ * @author Kasia Swirydowicz (kasia.swirydowicz@pnnl.gov) * @author Slaven Peles (peless@ornl.gov) * @brief Implementation of iterative solver base class. - * + * */ -#include -#include #include #include +#include +#include - -namespace ReSolve +namespace ReSolve { using out = io::Logger; - LinSolverIterative::LinSolverIterative() { } - + LinSolverIterative::~LinSolverIterative() { } int LinSolverIterative::setup(matrix::Sparse* A) { - if (A == nullptr) { + if (A == nullptr) + { return 1; } this->A_ = A; @@ -48,7 +47,6 @@ namespace ReSolve return total_iters_; } - real_type LinSolverIterative::getTol() const { return tol_; @@ -65,16 +63,13 @@ namespace ReSolve return 1; } - void LinSolverIterative::setTol(real_type new_tol) + void LinSolverIterative::setTol(real_type new_tol) { this->tol_ = new_tol; } - void LinSolverIterative::setMaxit(index_type new_maxit) + void LinSolverIterative::setMaxit(index_type new_maxit) { this->maxit_ = new_maxit; } -} - - - +} // namespace ReSolve diff --git a/resolve/LinSolverIterative.hpp b/resolve/LinSolverIterative.hpp index 4b9ee5f97..f85dbedeb 100644 --- a/resolve/LinSolverIterative.hpp +++ b/resolve/LinSolverIterative.hpp @@ -3,47 +3,48 @@ * @author Kasia Swirydowicz (kasia.swirydowicz@pnnl.gov) * @author Slaven Peles (peless@ornl.gov) * @brief Declaration of iterative solver base class. - * + * */ #pragma once #include + #include -namespace ReSolve +namespace ReSolve { class GramSchmidt; - class LinSolverIterative : public LinSolver + class LinSolverIterative : public LinSolver { - public: - LinSolverIterative(); - virtual ~LinSolverIterative(); - virtual int setup(matrix::Sparse* A); - virtual int resetMatrix(matrix::Sparse* A) = 0; - virtual int setupPreconditioner(std::string type, LinSolverDirect* LU_solver) = 0; + public: + LinSolverIterative(); + virtual ~LinSolverIterative(); + virtual int setup(matrix::Sparse* A); + virtual int resetMatrix(matrix::Sparse* A) = 0; + virtual int setupPreconditioner(std::string type, LinSolverDirect* LU_solver) = 0; - virtual int solve(vector_type* rhs, vector_type* init_guess) = 0; + virtual int solve(vector_type* rhs, vector_type* init_guess) = 0; - virtual real_type getFinalResidualNorm() const; - virtual real_type getInitResidualNorm() const; - virtual index_type getNumIter() const; + virtual real_type getFinalResidualNorm() const; + virtual real_type getInitResidualNorm() const; + virtual index_type getNumIter() const; - virtual int setOrthogonalization(GramSchmidt* gs); + virtual int setOrthogonalization(GramSchmidt* gs); - real_type getTol() const; - index_type getMaxit() const; + real_type getTol() const; + index_type getMaxit() const; - void setTol(real_type new_tol); - void setMaxit(index_type new_maxit); + void setTol(real_type new_tol); + void setMaxit(index_type new_maxit); - protected: - real_type initial_residual_norm_; - real_type final_residual_norm_; - index_type total_iters_; + protected: + real_type initial_residual_norm_; + real_type final_residual_norm_; + index_type total_iters_; - // Parameters common for all iterative solvers - real_type tol_{1e-14}; ///< Solver tolerance - index_type maxit_{100}; ///< Maximum solver iterations + // Parameters common for all iterative solvers + real_type tol_{1e-14}; ///< Solver tolerance + index_type maxit_{100}; ///< Maximum solver iterations }; -} +} // namespace ReSolve diff --git a/resolve/LinSolverIterativeFGMRES.cpp b/resolve/LinSolverIterativeFGMRES.cpp index 7096384c6..cafc52063 100644 --- a/resolve/LinSolverIterativeFGMRES.cpp +++ b/resolve/LinSolverIterativeFGMRES.cpp @@ -4,14 +4,15 @@ * @brief Implementation of LinSolverIterativeFGMRES class * */ -#include +#include "LinSolverIterativeFGMRES.hpp" + #include #include #include +#include -#include #include -#include "LinSolverIterativeFGMRES.hpp" +#include namespace ReSolve { @@ -23,7 +24,7 @@ namespace ReSolve { matrix_handler_ = matrix_handler; vector_handler_ = vector_handler; - GS_ = gs; + GS_ = gs; setMemorySpace(); initParamList(); } @@ -37,22 +38,23 @@ namespace ReSolve GramSchmidt* gs) { // Base class settings here (to be removed when solver parameter settings are implemented) - tol_ = tol; - maxit_= maxit; - restart_ = restart; + tol_ = tol; + maxit_ = maxit; + restart_ = restart; conv_cond_ = conv_cond; - flexible_ = true; + flexible_ = true; matrix_handler_ = matrix_handler; vector_handler_ = vector_handler; - GS_ = gs; + GS_ = gs; setMemorySpace(); initParamList(); } LinSolverIterativeFGMRES::~LinSolverIterativeFGMRES() { - if (is_solver_set_) { + if (is_solver_set_) + { freeSolverData(); } } @@ -70,8 +72,10 @@ namespace ReSolve int LinSolverIterativeFGMRES::setup(matrix::Sparse* A) { // If A_ is already set, then report error and exit. - if (n_ != A->getNumRows()) { - if (is_solver_set_) { + if (n_ != A->getNumRows()) + { + if (is_solver_set_) + { out::warning() << "Matrix size changed. Reallocating solver ...\n"; freeSolverData(); is_solver_set_ = false; @@ -83,7 +87,8 @@ namespace ReSolve n_ = A->getNumRows(); // Allocate solver data. - if (!is_solver_set_) { + if (!is_solver_set_) + { allocateSolverData(); is_solver_set_ = true; } @@ -93,28 +98,28 @@ namespace ReSolve return 0; } - int LinSolverIterativeFGMRES::solve(vector_type* rhs, vector_type* x) + int LinSolverIterativeFGMRES::solve(vector_type* rhs, vector_type* x) { using namespace constants; - //io::Logger::setVerbosity(io::Logger::EVERYTHING); + // io::Logger::setVerbosity(io::Logger::EVERYTHING); int outer_flag = 1; - int notconv = 1; - int i = 0; - int it = 0; - int j = 0; - int k = 0; - int k1 = 0; - - real_type t = 0.0; - real_type rnorm = 0.0; - real_type bnorm = 0.0; - real_type tolrel; + int notconv = 1; + int i = 0; + int it = 0; + int j = 0; + int k = 0; + int k1 = 0; + + real_type t = 0.0; + real_type rnorm = 0.0; + real_type bnorm = 0.0; + real_type tolrel; vector_type vec_v(n_); vector_type vec_z(n_); - //V[0] = b-A*x_0 - //debug + // V[0] = b-A*x_0 + // debug vec_Z_->setToZero(memspace_); vec_V_->setToZero(memspace_); @@ -123,18 +128,21 @@ namespace ReSolve rnorm = 0.0; bnorm = vector_handler_->dot(rhs, rhs, memspace_); rnorm = vector_handler_->dot(vec_V_, vec_V_, memspace_); - //rnorm = ||V_1|| + // rnorm = ||V_1|| rnorm = std::sqrt(rnorm); bnorm = std::sqrt(bnorm); io::Logger::misc() << "it 0: norm of residual " << std::scientific << std::setprecision(16) << rnorm << " Norm of rhs: " << bnorm << "\n"; initial_residual_norm_ = rnorm; - while(outer_flag) { + while (outer_flag) + { // check if maybe residual is already small enough? - if (it == 0) { + if (it == 0) + { tolrel = tol_ * rnorm; - if (std::abs(tolrel) < MACHINE_EPSILON) { + if (std::abs(tolrel) < MACHINE_EPSILON) + { tolrel = MACHINE_EPSILON; } } @@ -142,22 +150,23 @@ namespace ReSolve bool exit_cond = false; switch (conv_cond_) { - case 0: - exit_cond = ((std::abs(rnorm - ZERO) <= MACHINE_EPSILON)); - break; - case 1: - exit_cond = ((std::abs(rnorm - ZERO) <= MACHINE_EPSILON) || (rnorm < tol_)); - break; - case 2: - exit_cond = ((std::abs(rnorm - ZERO) <= MACHINE_EPSILON) || (rnorm < (tol_*bnorm))); - break; + case 0: + exit_cond = ((std::abs(rnorm - ZERO) <= MACHINE_EPSILON)); + break; + case 1: + exit_cond = ((std::abs(rnorm - ZERO) <= MACHINE_EPSILON) || (rnorm < tol_)); + break; + case 2: + exit_cond = ((std::abs(rnorm - ZERO) <= MACHINE_EPSILON) || (rnorm < (tol_ * bnorm))); + break; } - if (exit_cond) { - outer_flag = 0; - final_residual_norm_ = rnorm; + if (exit_cond) + { + outer_flag = 0; + final_residual_norm_ = rnorm; initial_residual_norm_ = rnorm; - total_iters_ = 0; + total_iters_ = 0; break; } @@ -166,55 +175,62 @@ namespace ReSolve vector_handler_->scal(&t, vec_V_, memspace_); // initialize norm history h_rs_[0] = rnorm; - i = -1; - notconv = 1; + i = -1; + notconv = 1; - while((notconv) && (it < maxit_)) { + while ((notconv) && (it < maxit_)) + { i++; it++; // Z_i = (LU)^{-1}*V_i - vec_v.setData( vec_V_->getData(i, memspace_), memspace_); - if (flexible_) { - vec_z.setData( vec_Z_->getData(i, memspace_), memspace_); - } else { - vec_z.setData( vec_Z_->getData(0, memspace_), memspace_); + vec_v.setData(vec_V_->getData(i, memspace_), memspace_); + if (flexible_) + { + vec_z.setData(vec_Z_->getData(i, memspace_), memspace_); + } + else + { + vec_z.setData(vec_Z_->getData(0, memspace_), memspace_); } this->precV(&vec_v, &vec_z); mem_.deviceSynchronize(); // V_{i+1}=A*Z_i - vec_v.setData( vec_V_->getData(i + 1, memspace_), memspace_); + vec_v.setData(vec_V_->getData(i + 1, memspace_), memspace_); matrix_handler_->matvec(A_, &vec_z, &vec_v, &ONE, &ZERO, memspace_); // orthogonalize V[i+1], form a column of h_H_ GS_->orthogonalize(n_, vec_V_, h_H_, i); - if (i != 0) { - for (index_type k = 1; k <= i; k++) { - k1 = k - 1; - t = h_H_[i * (restart_ + 1) + k1]; + if (i != 0) + { + for (index_type k = 1; k <= i; k++) + { + k1 = k - 1; + t = h_H_[i * (restart_ + 1) + k1]; h_H_[i * (restart_ + 1) + k1] = h_c_[k1] * t + h_s_[k1] * h_H_[i * (restart_ + 1) + k]; - h_H_[i * (restart_ + 1) + k] = -h_s_[k1] * t + h_c_[k1] * h_H_[i * (restart_ + 1) + k]; + h_H_[i * (restart_ + 1) + k] = -h_s_[k1] * t + h_c_[k1] * h_H_[i * (restart_ + 1) + k]; } } // if i!=0 - real_type Hii = h_H_[i * (restart_ + 1) + i]; + real_type Hii = h_H_[i * (restart_ + 1) + i]; real_type Hii1 = h_H_[(i) * (restart_ + 1) + i + 1]; - real_type gam = std::sqrt(Hii * Hii + Hii1 * Hii1); + real_type gam = std::sqrt(Hii * Hii + Hii1 * Hii1); - if(std::abs(gam - ZERO) <= MACHINE_EPSILON) { + if (std::abs(gam - ZERO) <= MACHINE_EPSILON) + { gam = MACHINE_EPSILON; } /* next Given's rotation */ - h_c_[i] = Hii / gam; - h_s_[i] = Hii1 / gam; + h_c_[i] = Hii / gam; + h_s_[i] = Hii1 / gam; h_rs_[i + 1] = -h_s_[i] * h_rs_[i]; - h_rs_[i] = h_c_[i] * h_rs_[i]; + h_rs_[i] = h_c_[i] * h_rs_[i]; - h_H_[(i) * (restart_ + 1) + (i)] = h_c_[i] * Hii + h_s_[i] * Hii1; + h_H_[(i) * (restart_ + 1) + (i)] = h_c_[i] * Hii + h_s_[i] * Hii1; h_H_[(i) * (restart_ + 1) + (i + 1)] = h_c_[i] * Hii1 - h_s_[i] * Hii; // residual norm estimate @@ -223,7 +239,8 @@ namespace ReSolve << std::scientific << std::setprecision(16) << rnorm << "\n"; // check convergence - if (i + 1 >= restart_ || rnorm <= tolrel || it >= maxit_) { + if (i + 1 >= restart_ || rnorm <= tolrel || it >= maxit_) + { notconv = 0; } } // inner while @@ -233,32 +250,39 @@ namespace ReSolve << rnorm << "\n"; // solve tri system h_rs_[i] = h_rs_[i] / h_H_[i * (restart_ + 1) + i]; - for(int ii = 2; ii <= i + 1; ii++) { - k = i - ii + 1; + for (int ii = 2; ii <= i + 1; ii++) + { + k = i - ii + 1; k1 = k + 1; - t = h_rs_[k]; - for (j = k1; j <= i; j++) { + t = h_rs_[k]; + for (j = k1; j <= i; j++) + { t -= h_H_[j * (restart_ + 1) + k] * h_rs_[j]; } h_rs_[k] = t / h_H_[k * (restart_ + 1) + k]; } // get solution - if (flexible_) { - for (j = 0; j <= i; j++) { - vec_z.setData( vec_Z_->getData(j, memspace_), memspace_); + if (flexible_) + { + for (j = 0; j <= i; j++) + { + vec_z.setData(vec_Z_->getData(j, memspace_), memspace_); vector_handler_->axpy(&h_rs_[j], &vec_z, x, memspace_); } - } else { + } + else + { vec_Z_->setToZero(memspace_); - vec_z.setData( vec_Z_->getData(0, memspace_), memspace_); - for (j = 0; j <= i; j++) { - vec_v.setData( vec_V_->getData(j, memspace_), memspace_); + vec_z.setData(vec_Z_->getData(0, memspace_), memspace_); + for (j = 0; j <= i; j++) + { + vec_v.setData(vec_V_->getData(j, memspace_), memspace_); vector_handler_->axpy(&h_rs_[j], &vec_v, &vec_z, memspace_); } // now multiply d_Z by precon - vec_v.setData( vec_V_->getData(memspace_), memspace_); + vec_v.setData(vec_V_->getData(memspace_), memspace_); this->precV(&vec_z, &vec_v); // and add to x vector_handler_->axpy(&ONE, &vec_v, x, memspace_); @@ -266,7 +290,8 @@ namespace ReSolve /* test solution */ - if(rnorm <= tolrel || it >= maxit_) { + if (rnorm <= tolrel || it >= maxit_) + { // rnorm_aux = rnorm; outer_flag = 0; } @@ -277,9 +302,10 @@ namespace ReSolve // rnorm = ||V_1|| rnorm = std::sqrt(rnorm); - if(!outer_flag) { + if (!outer_flag) + { final_residual_norm_ = rnorm; - total_iters_ = it; + total_iters_ = it; io::Logger::misc() << "End of cycle, COMPUTED norm of residual " << std::scientific << std::setprecision(16) << rnorm << "\n"; @@ -288,19 +314,21 @@ namespace ReSolve return 0; } - int LinSolverIterativeFGMRES::setupPreconditioner(std::string type, LinSolverDirect* LU_solver) + int LinSolverIterativeFGMRES::setupPreconditioner(std::string type, LinSolverDirect* LU_solver) { - if (type != "LU") { + if (type != "LU") + { out::warning() << "Only LU-type solve can be used as a preconditioner at this time." << std::endl; return 1; - } else { + } + else + { LU_solver_ = LU_solver; return 0; } - } - int LinSolverIterativeFGMRES::resetMatrix(matrix::Sparse* new_matrix) + int LinSolverIterativeFGMRES::resetMatrix(matrix::Sparse* new_matrix) { A_ = new_matrix; matrix_handler_->setValuesChanged(true, memspace_); @@ -333,7 +361,8 @@ namespace ReSolve int LinSolverIterativeFGMRES::setRestart(index_type restart) { // If the new restart value is the same as the old, do nothing. - if (restart_ == restart) { + if (restart_ == restart) + { return 0; } @@ -341,7 +370,8 @@ namespace ReSolve restart_ = restart; // If solver is already set, reallocate solver data - if (is_solver_set_) { + if (is_solver_set_) + { freeSolverData(); allocateSolverData(); } @@ -351,7 +381,7 @@ namespace ReSolve // If Gram-Schmidt is already set, we need to reallocate it since the // restart value has changed. // if (GS_->isSetupComplete()) { - GS_->setup(n_, restart_); + GS_->setup(n_, restart_); // } return 0; @@ -366,11 +396,15 @@ namespace ReSolve int LinSolverIterativeFGMRES::setFlexible(bool is_flexible) { // TODO: Add vector method resize - if (vec_Z_) { + if (vec_Z_) + { delete vec_Z_; - if (is_flexible) { + if (is_flexible) + { vec_Z_ = new vector_type(n_, restart_ + 1); - } else { + } + else + { // otherwise Z is just a one vector, not multivector and we dont keep it vec_Z_ = new vector_type(n_); } @@ -393,43 +427,42 @@ namespace ReSolve return 0; } - index_type LinSolverIterativeFGMRES::getRestart() const + index_type LinSolverIterativeFGMRES::getRestart() const { return restart_; } - index_type LinSolverIterativeFGMRES::getConvCond() const + index_type LinSolverIterativeFGMRES::getConvCond() const { return conv_cond_; } - bool LinSolverIterativeFGMRES::getFlexible() const + bool LinSolverIterativeFGMRES::getFlexible() const { return flexible_; } - int LinSolverIterativeFGMRES::setCliParam(const std::string id, const std::string value) { switch (getParamId(id)) { - case TOL: - setTol(atof(value.c_str())); - break; - case MAXIT: - setMaxit(atoi(value.c_str())); - break; - case RESTART: - setRestart(atoi(value.c_str())); - break; - case CONV_COND: - setConvergenceCondition(atoi(value.c_str())); - break; - case FLEXIBLE: - setFlexible(value == "yes"); - break; - default: - std::cout << "Setting parameter failed!\n"; + case TOL: + setTol(atof(value.c_str())); + break; + case MAXIT: + setMaxit(atoi(value.c_str())); + break; + case RESTART: + setRestart(atoi(value.c_str())); + break; + case CONV_COND: + setConvergenceCondition(atoi(value.c_str())); + break; + case FLEXIBLE: + setFlexible(value == "yes"); + break; + default: + std::cout << "Setting parameter failed!\n"; } return 0; } @@ -438,8 +471,8 @@ namespace ReSolve { switch (getParamId(id)) { - default: - out::error() << "Trying to get unknown string parameter " << id << "\n"; + default: + out::error() << "Trying to get unknown string parameter " << id << "\n"; } return ""; } @@ -448,17 +481,17 @@ namespace ReSolve { switch (getParamId(id)) { - case MAXIT: - return getMaxit(); - break; - case RESTART: - return getRestart(); - break; - case CONV_COND: - return getConvCond(); - break; - default: - out::error() << "Trying to get unknown integer parameter " << id << "\n"; + case MAXIT: + return getMaxit(); + break; + case RESTART: + return getRestart(); + break; + case CONV_COND: + return getConvCond(); + break; + default: + out::error() << "Trying to get unknown integer parameter " << id << "\n"; } return -1; } @@ -467,11 +500,11 @@ namespace ReSolve { switch (getParamId(id)) { - case TOL: - return getTol(); - break; - default: - out::error() << "Trying to get unknown real parameter " << id << "\n"; + case TOL: + return getTol(); + break; + default: + out::error() << "Trying to get unknown real parameter " << id << "\n"; } return std::numeric_limits::quiet_NaN(); } @@ -480,11 +513,11 @@ namespace ReSolve { switch (getParamId(id)) { - case FLEXIBLE: - return getFlexible(); - break; - default: - out::error() << "Trying to get unknown boolean parameter " << id << "\n"; + case FLEXIBLE: + return getFlexible(); + break; + default: + out::error() << "Trying to get unknown boolean parameter " << id << "\n"; } return false; } @@ -523,36 +556,39 @@ namespace ReSolve { vec_V_ = new vector_type(n_, restart_ + 1); vec_V_->allocate(memspace_); - if (flexible_) { + if (flexible_) + { vec_Z_ = new vector_type(n_, restart_ + 1); - } else { + } + else + { // otherwise Z is just a one vector, not multivector and we dont keep it vec_Z_ = new vector_type(n_); } vec_Z_->allocate(memspace_); h_H_ = new real_type[restart_ * (restart_ + 1)]; - h_c_ = new real_type[restart_]; // needed for givens - h_s_ = new real_type[restart_]; // same - h_rs_ = new real_type[restart_ + 1]; // for residual norm history + h_c_ = new real_type[restart_]; // needed for givens + h_s_ = new real_type[restart_]; // same + h_rs_ = new real_type[restart_ + 1]; // for residual norm history return 0; } int LinSolverIterativeFGMRES::freeSolverData() { - delete [] h_H_ ; - delete [] h_c_ ; - delete [] h_s_ ; - delete [] h_rs_; + delete[] h_H_; + delete[] h_c_; + delete[] h_s_; + delete[] h_rs_; delete vec_V_; delete vec_Z_; - h_H_ = nullptr; - h_c_ = nullptr; - h_s_ = nullptr; - h_rs_ = nullptr; - vec_V_ = nullptr; - vec_Z_ = nullptr; + h_H_ = nullptr; + h_c_ = nullptr; + h_s_ = nullptr; + h_rs_ = nullptr; + vec_V_ = nullptr; + vec_Z_ = nullptr; return 0; } @@ -569,14 +605,17 @@ namespace ReSolve bool is_vector_handler_cuda = matrix_handler_->getIsCudaEnabled(); bool is_vector_handler_hip = matrix_handler_->getIsHipEnabled(); - if ((is_matrix_handler_cuda != is_vector_handler_cuda) || - (is_matrix_handler_hip != is_vector_handler_hip )) { + if ((is_matrix_handler_cuda != is_vector_handler_cuda) || (is_matrix_handler_hip != is_vector_handler_hip)) + { out::error() << "Matrix and vector handler backends are incompatible!\n"; } - if (is_matrix_handler_cuda || is_matrix_handler_hip) { + if (is_matrix_handler_cuda || is_matrix_handler_hip) + { memspace_ = memory::DEVICE; - } else { + } + else + { memspace_ = memory::HOST; } } @@ -590,4 +629,4 @@ namespace ReSolve params_list_["flexible"] = FLEXIBLE; } -} // namespace +} // namespace ReSolve diff --git a/resolve/LinSolverIterativeFGMRES.hpp b/resolve/LinSolverIterativeFGMRES.hpp index cfdd6c442..184828c2a 100644 --- a/resolve/LinSolverIterativeFGMRES.hpp +++ b/resolve/LinSolverIterativeFGMRES.hpp @@ -2,24 +2,24 @@ * @file LinSolverIterativeFGMRES.hpp * @author Kasia Swirydowicz (kasia.swirydowicz@pnnl.gov) * @brief Declaration of LinSolverIterativeFGMRES class - * + * */ #pragma once #include "Common.hpp" -#include -#include #include "GramSchmidt.hpp" #include #include +#include +#include -namespace ReSolve +namespace ReSolve { /** * @brief (F)GMRES solver - * + * * @author Kasia Swirydowicz (kasia.swirydowicz@pnnl.gov) - * + * * @note MatrixHandler and VectorHandler objects are inherited from * LinSolver base class. */ @@ -27,68 +27,75 @@ namespace ReSolve { using vector_type = vector::Vector; - public: - LinSolverIterativeFGMRES(MatrixHandler* matrix_handler, - VectorHandler* vector_handler, - GramSchmidt* gs); - LinSolverIterativeFGMRES(index_type restart, - real_type tol, - index_type maxit, - index_type conv_cond, - MatrixHandler* matrix_handler, - VectorHandler* vector_handler, - GramSchmidt* gs); - ~LinSolverIterativeFGMRES(); + public: + LinSolverIterativeFGMRES(MatrixHandler* matrix_handler, + VectorHandler* vector_handler, + GramSchmidt* gs); + LinSolverIterativeFGMRES(index_type restart, + real_type tol, + index_type maxit, + index_type conv_cond, + MatrixHandler* matrix_handler, + VectorHandler* vector_handler, + GramSchmidt* gs); + ~LinSolverIterativeFGMRES(); - int solve(vector_type* rhs, vector_type* x) override; - int setup(matrix::Sparse* A) override; - int resetMatrix(matrix::Sparse* new_A) override; - int setupPreconditioner(std::string name, LinSolverDirect* LU_solver) override; - int setOrthogonalization(GramSchmidt* gs) override; + int solve(vector_type* rhs, vector_type* x) override; + int setup(matrix::Sparse* A) override; + int resetMatrix(matrix::Sparse* new_A) override; + int setupPreconditioner(std::string name, LinSolverDirect* LU_solver) override; + int setOrthogonalization(GramSchmidt* gs) override; - int setRestart(index_type restart); - int setFlexible(bool is_flexible); - int setConvergenceCondition(index_type conv_cond); - index_type getRestart() const; - index_type getConvCond() const; - bool getFlexible() const; + int setRestart(index_type restart); + int setFlexible(bool is_flexible); + int setConvergenceCondition(index_type conv_cond); + index_type getRestart() const; + index_type getConvCond() const; + bool getFlexible() const; - int setCliParam(const std::string id, const std::string value) override; - std::string getCliParamString(const std::string id) const override; - index_type getCliParamInt(const std::string id) const override; - real_type getCliParamReal(const std::string id) const override; - bool getCliParamBool(const std::string id) const override; - int printCliParam(const std::string id) const override; + int setCliParam(const std::string id, const std::string value) override; + std::string getCliParamString(const std::string id) const override; + index_type getCliParamInt(const std::string id) const override; + real_type getCliParamReal(const std::string id) const override; + bool getCliParamBool(const std::string id) const override; + int printCliParam(const std::string id) const override; - private: - enum ParamaterIDs {TOL=0, MAXIT, RESTART, CONV_COND, FLEXIBLE}; + private: + enum ParamaterIDs + { + TOL = 0, + MAXIT, + RESTART, + CONV_COND, + FLEXIBLE + }; - index_type restart_{10}; ///< GMRES restart - index_type conv_cond_{0}; ///< GMRES convergence condition - bool flexible_{true}; ///< If using flexible GMRES (FGMRES) algorithm + index_type restart_{10}; ///< GMRES restart + index_type conv_cond_{0}; ///< GMRES convergence condition + bool flexible_{true}; ///< If using flexible GMRES (FGMRES) algorithm - private: - int allocateSolverData(); - int freeSolverData(); - void setMemorySpace(); - void initParamList(); - void precV(vector_type* rhs, vector_type* x); ///< Apply preconditioner + private: + int allocateSolverData(); + int freeSolverData(); + void setMemorySpace(); + void initParamList(); + void precV(vector_type* rhs, vector_type* x); ///< Apply preconditioner - memory::MemorySpace memspace_; + memory::MemorySpace memspace_; - vector_type* vec_V_{nullptr}; - vector_type* vec_Z_{nullptr}; + vector_type* vec_V_{nullptr}; + vector_type* vec_Z_{nullptr}; - real_type* h_H_{nullptr}; - real_type* h_c_{nullptr}; - real_type* h_s_{nullptr}; - real_type* h_rs_{nullptr}; + real_type* h_H_{nullptr}; + real_type* h_c_{nullptr}; + real_type* h_s_{nullptr}; + real_type* h_rs_{nullptr}; - GramSchmidt* GS_{nullptr}; - LinSolverDirect* LU_solver_{nullptr}; - index_type n_{0}; - bool is_solver_set_{false}; + GramSchmidt* GS_{nullptr}; + LinSolverDirect* LU_solver_{nullptr}; + index_type n_{0}; + bool is_solver_set_{false}; - MemoryHandler mem_; ///< Device memory manager object + MemoryHandler mem_; ///< Device memory manager object }; -} +} // namespace ReSolve diff --git a/resolve/LinSolverIterativeRandFGMRES.cpp b/resolve/LinSolverIterativeRandFGMRES.cpp index 4c1189cb6..94cce54f6 100644 --- a/resolve/LinSolverIterativeRandFGMRES.cpp +++ b/resolve/LinSolverIterativeRandFGMRES.cpp @@ -4,19 +4,20 @@ * @brief Implementation of LinSolverIterativeRandFGMRES class. * */ -#include +#include "LinSolverIterativeRandFGMRES.hpp" + #include #include -#include #include +#include +#include -#include -#include -#include #include #include +#include #include -#include "LinSolverIterativeRandFGMRES.hpp" +#include +#include namespace ReSolve { @@ -27,16 +28,16 @@ namespace ReSolve SketchingMethod rand_method, GramSchmidt* gs) { - tol_ = 1e-14; //default - maxit_= 100; //default - restart_ = 10; - conv_cond_ = 0;//default - flexible_ = true; - - matrix_handler_ = matrix_handler; - vector_handler_ = vector_handler; + tol_ = 1e-14; // default + maxit_ = 100; // default + restart_ = 10; + conv_cond_ = 0; // default + flexible_ = true; + + matrix_handler_ = matrix_handler; + vector_handler_ = vector_handler; sketching_method_ = rand_method; - GS_ = gs; + GS_ = gs; setMemorySpace(); initParamList(); @@ -51,16 +52,16 @@ namespace ReSolve SketchingMethod rand_method, GramSchmidt* gs) { - tol_ = tol; - maxit_= maxit; - restart_ = restart; + tol_ = tol; + maxit_ = maxit; + restart_ = restart; conv_cond_ = conv_cond; - flexible_ = true; + flexible_ = true; - matrix_handler_ = matrix_handler; - vector_handler_ = vector_handler; + matrix_handler_ = matrix_handler; + vector_handler_ = vector_handler; sketching_method_ = rand_method; - GS_ = gs; + GS_ = gs; setMemorySpace(); initParamList(); @@ -68,11 +69,13 @@ namespace ReSolve LinSolverIterativeRandFGMRES::~LinSolverIterativeRandFGMRES() { - if (is_solver_set_) { + if (is_solver_set_) + { freeSolverData(); } - if (is_sketching_set_) { + if (is_sketching_set_) + { freeSketchingData(); } } @@ -86,13 +89,16 @@ namespace ReSolve int LinSolverIterativeRandFGMRES::setup(matrix::Sparse* A) { // If A_ is already set, then report error and exit. - if (n_ != A->getNumRows()) { - if (is_solver_set_) { + if (n_ != A->getNumRows()) + { + if (is_solver_set_) + { out::warning() << "Matrix size changed. Reallocating solver ...\n"; freeSolverData(); } - if (is_sketching_set_) { + if (is_sketching_set_) + { out::warning() << "Matrix size changed. Reallocating solver ...\n"; freeSketchingData(); } @@ -101,11 +107,13 @@ namespace ReSolve A_ = A; n_ = A_->getNumRows(); - if (!is_solver_set_) { + if (!is_solver_set_) + { allocateSolverData(); } - if (!is_sketching_set_) { + if (!is_sketching_set_) + { allocateSketchingData(); } @@ -124,29 +132,29 @@ namespace ReSolve * @invariant rhs vector is unchanged. * @post x is overwritten with the solution to the linear system. */ - int LinSolverIterativeRandFGMRES::solve(vector_type* rhs, vector_type* x) + int LinSolverIterativeRandFGMRES::solve(vector_type* rhs, vector_type* x) { using namespace constants; // io::Logger::setVerbosity(io::Logger::EVERYTHING); - int outer_flag = 1; - int notconv = 1; - index_type i = 0; - int it = 0; - int j; - int k; - int k1; - - real_type t; - real_type rnorm; - real_type bnorm; - real_type tolrel; + int outer_flag = 1; + int notconv = 1; + index_type i = 0; + int it = 0; + int j; + int k; + int k1; + + real_type t; + real_type rnorm; + real_type bnorm; + real_type tolrel; vector_type vec_v(n_); vector_type vec_z(n_); vector_type vec_s(k_rand_); - //V[0] = b-A*x_0 - //debug + // V[0] = b-A*x_0 + // debug vec_Z_->setToZero(memspace_); vec_V_->setToZero(memspace_); @@ -158,7 +166,8 @@ namespace ReSolve sketching_handler_->Theta(&vec_v, &vec_s); - if (sketching_method_ == fwht) { + if (sketching_method_ == fwht) + { vector_handler_->scal(&one_over_k_, &vec_s, memspace_); } mem_.deviceSynchronize(); @@ -172,11 +181,14 @@ namespace ReSolve << std::scientific << std::setprecision(16) << rnorm << " Norm of rhs: " << bnorm << "\n"; initial_residual_norm_ = rnorm; - while(outer_flag) { + while (outer_flag) + { // check if maybe residual is already small enough? - if (it == 0) { + if (it == 0) + { tolrel = tol_ * rnorm; - if (std::abs(tolrel) < MACHINE_EPSILON) { + if (std::abs(tolrel) < MACHINE_EPSILON) + { tolrel = MACHINE_EPSILON; } } @@ -184,22 +196,23 @@ namespace ReSolve bool exit_cond = false; switch (conv_cond_) { - case 0: - exit_cond = ((std::abs(rnorm - ZERO) <= MACHINE_EPSILON)); - break; - case 1: - exit_cond = ((std::abs(rnorm - ZERO) <= MACHINE_EPSILON) || (rnorm < tol_)); - break; - case 2: - exit_cond = ((std::abs(rnorm - ZERO) <= MACHINE_EPSILON) || (rnorm < (tol_*bnorm))); - break; + case 0: + exit_cond = ((std::abs(rnorm - ZERO) <= MACHINE_EPSILON)); + break; + case 1: + exit_cond = ((std::abs(rnorm - ZERO) <= MACHINE_EPSILON) || (rnorm < tol_)); + break; + case 2: + exit_cond = ((std::abs(rnorm - ZERO) <= MACHINE_EPSILON) || (rnorm < (tol_ * bnorm))); + break; } - if (exit_cond) { - outer_flag = 0; - final_residual_norm_ = rnorm; + if (exit_cond) + { + outer_flag = 0; + final_residual_norm_ = rnorm; initial_residual_norm_ = rnorm; - total_iters_ = 0; + total_iters_ = 0; break; } @@ -212,18 +225,22 @@ namespace ReSolve // initialize norm history h_rs_[0] = rnorm; - i = -1; - notconv = 1; + i = -1; + notconv = 1; - while((notconv) && (it < maxit_)) { + while ((notconv) && (it < maxit_)) + { i++; it++; // Z_i = (LU)^{-1}*V_i vec_v.setData(vec_V_->getData(i, memspace_), memspace_); - if (flexible_) { + if (flexible_) + { vec_z.setData(vec_Z_->getData(i, memspace_), memspace_); - } else { + } + else + { vec_z.setData(vec_Z_->getData(0, memspace_), memspace_); } this->precV(&vec_v, &vec_z); @@ -239,7 +256,8 @@ namespace ReSolve // this is where it differs from normal solver GS vec_s.setData(vec_S_->getData(i + 1, memspace_), memspace_); sketching_handler_->Theta(&vec_v, &vec_s); - if (sketching_method_ == fwht) { + if (sketching_method_ == fwht) + { vector_handler_->scal(&one_over_k_, &vec_s, memspace_); } mem_.deviceSynchronize(); @@ -250,46 +268,50 @@ namespace ReSolve vec_aux_->copyDataFrom(&h_H_[i * (restart_ + 1)], memory::HOST, memspace_); // V(:, i+1) = w - V(:, 1:i)*d_H_col = V(:, i+1) - d_H_col*V(:,1:i); - vector_handler_->gemv('N', n_, i + 1, &MINUS_ONE, &ONE, vec_V_, vec_aux_, &vec_v, memspace_ ); + vector_handler_->gemv('N', n_, i + 1, &MINUS_ONE, &ONE, vec_V_, vec_aux_, &vec_v, memspace_); t = 1.0 / h_H_[i * (restart_ + 1) + i + 1]; vector_handler_->scal(&t, &vec_v, memspace_); mem_.deviceSynchronize(); vec_s.setData(vec_S_->getData(i + 1, memspace_), memspace_); - if (i != 0) { - for (int k = 1; k <= i; k++) { - k1 = k - 1; - t = h_H_[i * (restart_ + 1) + k1]; + if (i != 0) + { + for (int k = 1; k <= i; k++) + { + k1 = k - 1; + t = h_H_[i * (restart_ + 1) + k1]; h_H_[i * (restart_ + 1) + k1] = h_c_[k1] * t + h_s_[k1] * h_H_[i * (restart_ + 1) + k]; - h_H_[i * (restart_ + 1) + k] = -h_s_[k1] * t + h_c_[k1] * h_H_[i * (restart_ + 1) + k]; + h_H_[i * (restart_ + 1) + k] = -h_s_[k1] * t + h_c_[k1] * h_H_[i * (restart_ + 1) + k]; } } // if (i != 0) - real_type Hii = h_H_[i * (restart_ + 1) + i]; + real_type Hii = h_H_[i * (restart_ + 1) + i]; real_type Hii1 = h_H_[(i) * (restart_ + 1) + i + 1]; - real_type gam = std::sqrt(Hii * Hii + Hii1 * Hii1); + real_type gam = std::sqrt(Hii * Hii + Hii1 * Hii1); - if(std::abs(gam - ZERO) <= MACHINE_EPSILON) { + if (std::abs(gam - ZERO) <= MACHINE_EPSILON) + { gam = MACHINE_EPSILON; } /* next Given's rotation */ - h_c_[i] = Hii / gam; - h_s_[i] = Hii1 / gam; + h_c_[i] = Hii / gam; + h_s_[i] = Hii1 / gam; h_rs_[i + 1] = -h_s_[i] * h_rs_[i]; - h_rs_[i] = h_c_[i] * h_rs_[i]; + h_rs_[i] = h_c_[i] * h_rs_[i]; - h_H_[(i) * (restart_ + 1) + (i)] = h_c_[i] * Hii + h_s_[i] * Hii1; + h_H_[(i) * (restart_ + 1) + (i)] = h_c_[i] * Hii + h_s_[i] * Hii1; h_H_[(i) * (restart_ + 1) + (i + 1)] = h_c_[i] * Hii1 - h_s_[i] * Hii; // residual norm estimate rnorm = std::abs(h_rs_[i + 1]); - io::Logger::misc() << "it: "<< it << " --> norm of the residual " + io::Logger::misc() << "it: " << it << " --> norm of the residual " << std::scientific << std::setprecision(16) << rnorm << "\n"; // check convergence - if (i + 1 >= restart_ || rnorm <= tolrel || it >= maxit_) { + if (i + 1 >= restart_ || rnorm <= tolrel || it >= maxit_) + { notconv = 0; } } // inner while @@ -299,26 +321,33 @@ namespace ReSolve << rnorm << "\n"; // solve tri system h_rs_[i] = h_rs_[i] / h_H_[i * (restart_ + 1) + i]; - for (int ii = 2; ii <= i + 1; ii++) { - k = i - ii + 1; + for (int ii = 2; ii <= i + 1; ii++) + { + k = i - ii + 1; k1 = k + 1; - t = h_rs_[k]; - for(j = k1; j <= i; j++) { + t = h_rs_[k]; + for (j = k1; j <= i; j++) + { t -= h_H_[j * (restart_ + 1) + k] * h_rs_[j]; } h_rs_[k] = t / h_H_[k * (restart_ + 1) + k]; } // get solution - if (flexible_) { - for (j = 0; j <= i; j++) { + if (flexible_) + { + for (j = 0; j <= i; j++) + { vec_z.setData(vec_Z_->getData(j, memspace_), memspace_); vector_handler_->axpy(&h_rs_[j], &vec_z, x, memspace_); } - } else { + } + else + { vec_Z_->setToZero(0, memspace_); - vec_z.setData( vec_Z_->getData(0, memspace_), memspace_); - for(j = 0; j <= i; j++) { + vec_z.setData(vec_Z_->getData(0, memspace_), memspace_); + for (j = 0; j <= i; j++) + { vec_v.setData(vec_V_->getData(j, memspace_), memspace_); vector_handler_->axpy(&h_rs_[j], &vec_v, &vec_z, memspace_); } @@ -331,23 +360,27 @@ namespace ReSolve } /* test solution */ - if(rnorm <= tolrel || it >= maxit_) { + if (rnorm <= tolrel || it >= maxit_) + { outer_flag = 0; } rhs->copyDataTo(vec_V_->getData(memspace_), 0, memspace_); matrix_handler_->matvec(A_, x, vec_V_, &MINUS_ONE, &ONE, memspace_); - if (outer_flag) { + if (outer_flag) + { sketching_handler_->reset(); - if (sketching_method_ == cs) { + if (sketching_method_ == cs) + { vec_S_->setToZero(memspace_); } vec_v.setData(vec_V_->getData(0, memspace_), memspace_); vec_s.setData(vec_S_->getData(0, memspace_), memspace_); sketching_handler_->Theta(&vec_v, &vec_s); - if (sketching_method_ == fwht) { + if (sketching_method_ == fwht) + { vector_handler_->scal(&one_over_k_, &vec_s, memspace_); } mem_.deviceSynchronize(); @@ -356,7 +389,8 @@ namespace ReSolve rnorm = std::sqrt(rnorm); } - if (!outer_flag) { + if (!outer_flag) + { rnorm = vector_handler_->dot(vec_V_, vec_V_, memspace_); // rnorm = ||V_0|| rnorm = std::sqrt(rnorm); @@ -366,30 +400,32 @@ namespace ReSolve << rnorm << "\n"; final_residual_norm_ = rnorm; - total_iters_ = it; + total_iters_ = it; } } // outer while return 0; } - int LinSolverIterativeRandFGMRES::setupPreconditioner(std::string type, LinSolverDirect* LU_solver) + int LinSolverIterativeRandFGMRES::setupPreconditioner(std::string type, LinSolverDirect* LU_solver) { - if (type != "LU") { + if (type != "LU") + { out::warning() << "Only cusolverRf tri solve can be used as a preconditioner at this time." << std::endl; return 1; - } else { + } + else + { LU_solver_ = LU_solver; return 0; } - } - index_type LinSolverIterativeRandFGMRES::getKrand() + index_type LinSolverIterativeRandFGMRES::getKrand() { return k_rand_; } - int LinSolverIterativeRandFGMRES::resetMatrix(matrix::Sparse* new_matrix) + int LinSolverIterativeRandFGMRES::resetMatrix(matrix::Sparse* new_matrix) { A_ = new_matrix; matrix_handler_->setValuesChanged(true, memspace_); @@ -404,8 +440,10 @@ namespace ReSolve */ int LinSolverIterativeRandFGMRES::setSketchingMethod(SketchingMethod method) { - if (is_sketching_set_) { - if (method == sketching_method_) { + if (is_sketching_set_) + { + if (method == sketching_method_) + { out::misc() << "Keeping sketching method " << method << "\n"; return 0; } @@ -415,7 +453,8 @@ namespace ReSolve // If solver is set, go ahead and create sketching, otherwise just set sketching method. sketching_method_ = method; - if (is_solver_set_) { + if (is_solver_set_) + { out::misc() << "Allocating sketching method " << sketching_method_ << "\n"; allocateSketchingData(); } @@ -448,7 +487,8 @@ namespace ReSolve int LinSolverIterativeRandFGMRES::setRestart(index_type restart) { // If the new restart value is the same as the old, do nothing. - if (restart_ == restart) { + if (restart_ == restart) + { return 0; } @@ -456,13 +496,15 @@ namespace ReSolve restart_ = restart; // If solver is already set, reallocate solver data - if (is_solver_set_) { + if (is_solver_set_) + { freeSolverData(); allocateSolverData(); } // If sketching has been set, reallocate sketching data - if (is_sketching_set_) { + if (is_sketching_set_) + { freeSketchingData(); allocateSketchingData(); } @@ -484,11 +526,15 @@ namespace ReSolve int LinSolverIterativeRandFGMRES::setFlexible(bool is_flexible) { // TODO: Add vector method resize - if (vec_Z_) { + if (vec_Z_) + { delete vec_Z_; - if (is_flexible) { + if (is_flexible) + { vec_Z_ = new vector_type(n_, restart_ + 1); - } else { + } + else + { // otherwise Z is just one vector, not a multivector and we don't keep it vec_Z_ = new vector_type(n_); } @@ -511,17 +557,17 @@ namespace ReSolve return 0; } - index_type LinSolverIterativeRandFGMRES::getRestart() const + index_type LinSolverIterativeRandFGMRES::getRestart() const { return restart_; } - index_type LinSolverIterativeRandFGMRES::getConvCond() const + index_type LinSolverIterativeRandFGMRES::getConvCond() const { return conv_cond_; } - bool LinSolverIterativeRandFGMRES::getFlexible() const + bool LinSolverIterativeRandFGMRES::getFlexible() const { return flexible_; } @@ -530,23 +576,23 @@ namespace ReSolve { switch (getParamId(id)) { - case TOL: - setTol(atof(value.c_str())); - break; - case MAXIT: - setMaxit(atoi(value.c_str())); - break; - case RESTART: - setRestart(atoi(value.c_str())); - break; - case CONV_COND: - setConvergenceCondition(atoi(value.c_str())); - break; - case FLEXIBLE: - setFlexible(value == "yes"); - break; - default: - std::cout << "Setting parameter failed!\n"; + case TOL: + setTol(atof(value.c_str())); + break; + case MAXIT: + setMaxit(atoi(value.c_str())); + break; + case RESTART: + setRestart(atoi(value.c_str())); + break; + case CONV_COND: + setConvergenceCondition(atoi(value.c_str())); + break; + case FLEXIBLE: + setFlexible(value == "yes"); + break; + default: + std::cout << "Setting parameter failed!\n"; } return 0; } @@ -555,8 +601,8 @@ namespace ReSolve { switch (getParamId(id)) { - default: - out::error() << "Trying to get unknown string parameter " << id << "\n"; + default: + out::error() << "Trying to get unknown string parameter " << id << "\n"; } return ""; } @@ -565,17 +611,17 @@ namespace ReSolve { switch (getParamId(id)) { - case MAXIT: - return getMaxit(); - break; - case RESTART: - return getRestart(); - break; - case CONV_COND: - return getConvCond(); - break; - default: - out::error() << "Trying to get unknown integer parameter " << id << "\n"; + case MAXIT: + return getMaxit(); + break; + case RESTART: + return getRestart(); + break; + case CONV_COND: + return getConvCond(); + break; + default: + out::error() << "Trying to get unknown integer parameter " << id << "\n"; } return -1; } @@ -584,11 +630,11 @@ namespace ReSolve { switch (getParamId(id)) { - case TOL: - return getTol(); - break; - default: - out::error() << "Trying to get unknown real parameter " << id << "\n"; + case TOL: + return getTol(); + break; + default: + out::error() << "Trying to get unknown real parameter " << id << "\n"; } return std::numeric_limits::quiet_NaN(); } @@ -597,11 +643,11 @@ namespace ReSolve { switch (getParamId(id)) { - case FLEXIBLE: - return getFlexible(); - break; - default: - out::error() << "Trying to get unknown boolean parameter " << id << "\n"; + case FLEXIBLE: + return getFlexible(); + break; + default: + out::error() << "Trying to get unknown boolean parameter " << id << "\n"; } return false; } @@ -634,9 +680,12 @@ namespace ReSolve { vec_V_ = new vector_type(n_, restart_ + 1); vec_V_->allocate(memspace_); - if (flexible_) { + if (flexible_) + { vec_Z_ = new vector_type(n_, restart_ + 1); - } else { + } + else + { // otherwise Z is just one vector, not multivector and we dont keep it vec_Z_ = new vector_type(n_); } @@ -645,9 +694,9 @@ namespace ReSolve vec_aux_->allocate(memspace_); h_H_ = new real_type[restart_ * (restart_ + 1)]; - h_c_ = new real_type[restart_]; // needed for givens - h_s_ = new real_type[restart_]; // same - h_rs_ = new real_type[restart_ + 1]; // for residual norm history + h_c_ = new real_type[restart_]; // needed for givens + h_s_ = new real_type[restart_]; // same + h_rs_ = new real_type[restart_ + 1]; // for residual norm history is_solver_set_ = true; return 0; @@ -655,18 +704,18 @@ namespace ReSolve int LinSolverIterativeRandFGMRES::freeSolverData() { - delete [] h_H_ ; - delete [] h_c_ ; - delete [] h_s_ ; - delete [] h_rs_; + delete[] h_H_; + delete[] h_c_; + delete[] h_s_; + delete[] h_rs_; delete vec_V_; delete vec_Z_; delete vec_aux_; - h_H_ = nullptr; - h_c_ = nullptr; - h_s_ = nullptr; - h_rs_ = nullptr; + h_H_ = nullptr; + h_c_ = nullptr; + h_s_ = nullptr; + h_rs_ = nullptr; vec_V_ = nullptr; vec_Z_ = nullptr; vec_aux_ = nullptr; @@ -684,34 +733,39 @@ namespace ReSolve { // Set randomized method k_rand_ = n_; - switch (sketching_method_) { - case cs: - if (std::ceil(restart_ * std::log(n_)) < k_rand_) { - k_rand_ = static_cast(std::ceil(restart_ * std::log(static_cast(n_)))); - } - sketching_handler_ = new SketchingHandler(sketching_method_, device_type_); - // set k and n - break; - case fwht: - if (std::ceil(2.0 * restart_ * std::log(n_) / std::log(restart_)) < k_rand_) { - k_rand_ = static_cast(std::ceil(2.0 * restart_ * std::log(n_) / std::log(restart_))); - } - sketching_handler_ = new SketchingHandler(sketching_method_, device_type_); - break; - default: - io::Logger::warning() << "Wrong sketching method, setting to default (CountSketch)\n"; - sketching_method_ = cs; - if (std::ceil(restart_ * std::log(n_)) < k_rand_) { - k_rand_ = static_cast(std::ceil(restart_ * std::log(n_))); - } - sketching_handler_ = new SketchingHandler(cs, device_type_); - break; + switch (sketching_method_) + { + case cs: + if (std::ceil(restart_ * std::log(n_)) < k_rand_) + { + k_rand_ = static_cast(std::ceil(restart_ * std::log(static_cast(n_)))); + } + sketching_handler_ = new SketchingHandler(sketching_method_, device_type_); + // set k and n + break; + case fwht: + if (std::ceil(2.0 * restart_ * std::log(n_) / std::log(restart_)) < k_rand_) + { + k_rand_ = static_cast(std::ceil(2.0 * restart_ * std::log(n_) / std::log(restart_))); + } + sketching_handler_ = new SketchingHandler(sketching_method_, device_type_); + break; + default: + io::Logger::warning() << "Wrong sketching method, setting to default (CountSketch)\n"; + sketching_method_ = cs; + if (std::ceil(restart_ * std::log(n_)) < k_rand_) + { + k_rand_ = static_cast(std::ceil(restart_ * std::log(n_))); + } + sketching_handler_ = new SketchingHandler(cs, device_type_); + break; } one_over_k_ = 1.0 / std::sqrt((real_type) k_rand_); - vec_S_ = new vector_type(k_rand_, restart_ + 1); + vec_S_ = new vector_type(k_rand_, restart_ + 1); vec_S_->allocate(memspace_); - if (sketching_method_ == cs) { + if (sketching_method_ == cs) + { vec_S_->setToZero(memspace_); } @@ -725,7 +779,7 @@ namespace ReSolve delete vec_S_; delete sketching_handler_; - vec_S_ = nullptr; + vec_S_ = nullptr; sketching_handler_ = nullptr; is_sketching_set_ = false; @@ -737,7 +791,6 @@ namespace ReSolve LU_solver_->solve(rhs, x); } - /** * @brief Set memory space and device tape based on how MatrixHandler * and VectorHandler are configured. @@ -750,19 +803,24 @@ namespace ReSolve bool is_vector_handler_cuda = matrix_handler_->getIsCudaEnabled(); bool is_vector_handler_hip = matrix_handler_->getIsHipEnabled(); - if ((is_matrix_handler_cuda != is_vector_handler_cuda) || - (is_matrix_handler_hip != is_vector_handler_hip )) { + if ((is_matrix_handler_cuda != is_vector_handler_cuda) || (is_matrix_handler_hip != is_vector_handler_hip)) + { out::error() << "Matrix and vector handler backends are incompatible!\n"; } - if (is_matrix_handler_cuda) { - memspace_ = memory::DEVICE; + if (is_matrix_handler_cuda) + { + memspace_ = memory::DEVICE; device_type_ = memory::CUDADEVICE; - } else if (is_matrix_handler_hip) { - memspace_ = memory::DEVICE; + } + else if (is_matrix_handler_hip) + { + memspace_ = memory::DEVICE; device_type_ = memory::HIPDEVICE; - } else { - memspace_ = memory::HOST; + } + else + { + memspace_ = memory::HOST; device_type_ = memory::NONE; } } diff --git a/resolve/LinSolverIterativeRandFGMRES.hpp b/resolve/LinSolverIterativeRandFGMRES.hpp index 5704697d7..c1d57d19a 100644 --- a/resolve/LinSolverIterativeRandFGMRES.hpp +++ b/resolve/LinSolverIterativeRandFGMRES.hpp @@ -2,7 +2,7 @@ * @file LinSolverIterativeRandFGMRES.hpp * @author Kasia Swirydowicz (kasia.swirydowicz@pnnl.gov) * @brief Declaration of LinSolverIterativeRandFGMRES class - * + * */ #pragma once @@ -15,10 +15,12 @@ namespace ReSolve // Forward declarations class SketchingHandler; class GramSchmidt; + namespace matrix { class Sparse; } + namespace vector { class Vector; @@ -26,101 +28,111 @@ namespace ReSolve /** * @brief Randomized (F)GMRES - * + * * @author Kasia Swirydowicz (kasia.swirydowicz@pnnl.gov) - * + * * @note Pointers to MatrixHandler and VectorHandler objects are inherited from * LinSolver base class. - * + * */ class LinSolverIterativeRandFGMRES : public LinSolverIterative { - private: - using vector_type = vector::Vector; - - public: - enum SketchingMethod {cs = 0, // count sketch - fwht}; // fast Walsh-Hadamard transform - - LinSolverIterativeRandFGMRES(MatrixHandler* matrix_handler, - VectorHandler* vector_handler, - SketchingMethod rand_method, - GramSchmidt* gs); - - LinSolverIterativeRandFGMRES(index_type restart, - real_type tol, - index_type maxit, - index_type conv_cond, - MatrixHandler* matrix_handler, - VectorHandler* vector_handler, - SketchingMethod rand_method, - GramSchmidt* gs); - - ~LinSolverIterativeRandFGMRES(); - - int solve(vector_type* rhs, vector_type* x) override; - int setup(matrix::Sparse* A) override; - int resetMatrix(matrix::Sparse* new_A) override; - int setupPreconditioner(std::string name, LinSolverDirect* LU_solver) override; - int setOrthogonalization(GramSchmidt* gs) override; - - int setRestart(index_type restart); - int setFlexible(bool is_flexible); - int setConvergenceCondition(index_type conv_cond); - index_type getRestart() const; - index_type getConvCond() const; - bool getFlexible() const; - - index_type getKrand(); - int setSketchingMethod(SketchingMethod method); - - int setCliParam(const std::string id, const std::string value) override; - std::string getCliParamString(const std::string id) const override; - index_type getCliParamInt(const std::string id) const override; - real_type getCliParamReal(const std::string id) const override; - bool getCliParamBool(const std::string id) const override; - int printCliParam(const std::string id) const override; - - private: - enum ParamaterIDs {TOL=0, MAXIT, RESTART, CONV_COND, FLEXIBLE}; - - index_type restart_{10}; ///< GMRES restart - index_type conv_cond_{0}; ///< GMRES convergence condition - bool flexible_{true}; ///< If using flexible GMRES (FGMRES) algorithm - - private: - int allocateSolverData(); - int freeSolverData(); - int allocateSketchingData(); - int freeSketchingData(); - void setMemorySpace(); - void initParamList(); - void precV(vector_type* rhs, vector_type* x); ///< Apply preconditioner - - memory::MemorySpace memspace_; - - vector_type* vec_V_{nullptr}; - vector_type* vec_Z_{nullptr}; - // for performing Gram-Schmidt - vector_type* vec_S_{nullptr}; ///< this is where sketched vectors are stored - - real_type* h_H_{nullptr}; - real_type* h_c_{nullptr}; - real_type* h_s_{nullptr}; - real_type* h_rs_{nullptr}; - vector_type* vec_aux_{nullptr}; - - GramSchmidt* GS_{nullptr}; - LinSolverDirect* LU_solver_{nullptr}; - index_type n_{0}; - real_type one_over_k_{1.0}; - - index_type k_rand_{0}; ///< size of sketch space. We need to know it so we can allocate S! - MemoryHandler mem_; ///< Device memory manager object - SketchingHandler* sketching_handler_{nullptr}; - SketchingMethod sketching_method_; - memory::DeviceType device_type_{memory::NONE}; - bool is_solver_set_{false}; - bool is_sketching_set_{false}; + private: + using vector_type = vector::Vector; + + public: + enum SketchingMethod + { + cs = 0, // count sketch + fwht + }; // fast Walsh-Hadamard transform + + LinSolverIterativeRandFGMRES(MatrixHandler* matrix_handler, + VectorHandler* vector_handler, + SketchingMethod rand_method, + GramSchmidt* gs); + + LinSolverIterativeRandFGMRES(index_type restart, + real_type tol, + index_type maxit, + index_type conv_cond, + MatrixHandler* matrix_handler, + VectorHandler* vector_handler, + SketchingMethod rand_method, + GramSchmidt* gs); + + ~LinSolverIterativeRandFGMRES(); + + int solve(vector_type* rhs, vector_type* x) override; + int setup(matrix::Sparse* A) override; + int resetMatrix(matrix::Sparse* new_A) override; + int setupPreconditioner(std::string name, LinSolverDirect* LU_solver) override; + int setOrthogonalization(GramSchmidt* gs) override; + + int setRestart(index_type restart); + int setFlexible(bool is_flexible); + int setConvergenceCondition(index_type conv_cond); + index_type getRestart() const; + index_type getConvCond() const; + bool getFlexible() const; + + index_type getKrand(); + int setSketchingMethod(SketchingMethod method); + + int setCliParam(const std::string id, const std::string value) override; + std::string getCliParamString(const std::string id) const override; + index_type getCliParamInt(const std::string id) const override; + real_type getCliParamReal(const std::string id) const override; + bool getCliParamBool(const std::string id) const override; + int printCliParam(const std::string id) const override; + + private: + enum ParamaterIDs + { + TOL = 0, + MAXIT, + RESTART, + CONV_COND, + FLEXIBLE + }; + + index_type restart_{10}; ///< GMRES restart + index_type conv_cond_{0}; ///< GMRES convergence condition + bool flexible_{true}; ///< If using flexible GMRES (FGMRES) algorithm + + private: + int allocateSolverData(); + int freeSolverData(); + int allocateSketchingData(); + int freeSketchingData(); + void setMemorySpace(); + void initParamList(); + void precV(vector_type* rhs, vector_type* x); ///< Apply preconditioner + + memory::MemorySpace memspace_; + + vector_type* vec_V_{nullptr}; + vector_type* vec_Z_{nullptr}; + // for performing Gram-Schmidt + vector_type* vec_S_{nullptr}; ///< this is where sketched vectors are stored + + real_type* h_H_{nullptr}; + real_type* h_c_{nullptr}; + real_type* h_s_{nullptr}; + real_type* h_rs_{nullptr}; + vector_type* vec_aux_{nullptr}; + + GramSchmidt* GS_{nullptr}; + LinSolverDirect* LU_solver_{nullptr}; + index_type n_{0}; + real_type one_over_k_{1.0}; + + index_type k_rand_{0}; ///< size of sketch space. We need to know it so we can allocate S! + MemoryHandler mem_; ///< Device memory manager object + SketchingHandler* sketching_handler_{nullptr}; + SketchingMethod sketching_method_; + memory::DeviceType device_type_{memory::NONE}; + bool is_solver_set_{false}; + bool is_sketching_set_{false}; }; } // namespace ReSolve diff --git a/resolve/MemoryUtils.hpp b/resolve/MemoryUtils.hpp index 38f294a7b..702d80ea6 100644 --- a/resolve/MemoryUtils.hpp +++ b/resolve/MemoryUtils.hpp @@ -1,108 +1,127 @@ #pragma once -#include #include // <- declares `memcpy` +#include + namespace ReSolve { namespace memory { - enum MemorySpace{HOST = 0, DEVICE}; - enum MemoryDirection{HOST_TO_HOST = 0, HOST_TO_DEVICE, DEVICE_TO_HOST, DEVICE_TO_DEVICE}; - enum DeviceType{NONE = 0, CUDADEVICE, HIPDEVICE}; - } -} + enum MemorySpace + { + HOST = 0, + DEVICE + }; + + enum MemoryDirection + { + HOST_TO_HOST = 0, + HOST_TO_DEVICE, + DEVICE_TO_HOST, + DEVICE_TO_DEVICE + }; + + enum DeviceType + { + NONE = 0, + CUDADEVICE, + HIPDEVICE + }; + } // namespace memory +} // namespace ReSolve namespace ReSolve { /** * @class MemoryUtils - * + * * @brief Provides basic memory allocation, free and copy functions. - * + * * This class provedes abstractions for memory management functiosn for * different GPU programming models. - * + * * @tparam Policy - Memory management policy (vendor specific) - * + * * @author Slaven Peles */ template class MemoryUtils { - public: - MemoryUtils() = default; - ~MemoryUtils() = default; - - void deviceSynchronize(); - int getLastDeviceError(); - int deleteOnDevice(void* v); - - template - int allocateArrayOnDevice(T** v, I n); - - template - int allocateBufferOnDevice(T** v, I n); - - template - int setZeroArrayOnDevice(T* v, I n); - - template - int setArrayToConstOnDevice(T* v, T c, I n); - - template - int copyArrayDeviceToHost(T* dst, const T* src, I n); - - template - int copyArrayDeviceToDevice(T* dst, const T* src, I n); - - template - int copyArrayHostToDevice(T* dst, const T* src, I n); - - /// - /// Methods implemented here are always needed - /// - - template - int allocateArrayOnHost(T** v, I n) + public: + MemoryUtils() = default; + ~MemoryUtils() = default; + + void deviceSynchronize(); + int getLastDeviceError(); + int deleteOnDevice(void* v); + + template + int allocateArrayOnDevice(T** v, I n); + + template + int allocateBufferOnDevice(T** v, I n); + + template + int setZeroArrayOnDevice(T* v, I n); + + template + int setArrayToConstOnDevice(T* v, T c, I n); + + template + int copyArrayDeviceToHost(T* dst, const T* src, I n); + + template + int copyArrayDeviceToDevice(T* dst, const T* src, I n); + + template + int copyArrayHostToDevice(T* dst, const T* src, I n); + + /// + /// Methods implemented here are always needed + /// + + template + int allocateArrayOnHost(T** v, I n) + { + std::size_t arraysize = static_cast(n) * sizeof(T); + *v = new T[arraysize]; + return *v == nullptr ? 1 : 0; + } + + template + int deleteOnHost(T* v) + { + delete[] v; + v = nullptr; + return 0; + } + + template + int copyArrayHostToHost(T* dst, const T* src, I n) + { + std::size_t arraysize = static_cast(n) * sizeof(T); + memcpy(dst, src, arraysize); + return 0; + } + + template + int setZeroArrayOnHost(T* v, I n) + { + std::size_t arraysize = static_cast(n) * sizeof(T); + memset(v, 0, arraysize); + return 0; + } + + template + int setArrayToConstOnHost(T* v, T c, I n) + { + for (I i = 0; i < n; ++i) { - std::size_t arraysize = static_cast(n) * sizeof(T); - *v = new T[arraysize]; - return *v == nullptr ? 1 : 0; - } - - template - int deleteOnHost(T* v) - { - delete [] v; - v = nullptr; - return 0; - } - - template - int copyArrayHostToHost(T* dst, const T* src, I n) - { - std::size_t arraysize = static_cast(n) * sizeof(T); - memcpy(dst, src, arraysize); - return 0; - } - - template - int setZeroArrayOnHost(T* v, I n) - { - std::size_t arraysize = static_cast(n) * sizeof(T); - memset(v, 0, arraysize); - return 0; - } - - template - int setArrayToConstOnHost(T* v, T c, I n) - { - for (I i = 0; i < n; ++i) { - v[i] = c; - } - return 0; + v[i] = c; } + return 0; + } }; } // namespace ReSolve @@ -127,4 +146,3 @@ using MemoryHandler = ReSolve::MemoryUtils; using MemoryHandler = ReSolve::MemoryUtils; #endif - diff --git a/resolve/Profiling.hpp b/resolve/Profiling.hpp index cb6509dcc..6765d5ba9 100644 --- a/resolve/Profiling.hpp +++ b/resolve/Profiling.hpp @@ -6,15 +6,17 @@ #ifdef RESOLVE_USE_HIP #include #define RESOLVE_RANGE_PUSH(x) roctxRangePush(x) -#define RESOLVE_RANGE_POP(x) roctxRangePop(); \ - roctxMarkA(x) +#define RESOLVE_RANGE_POP(x) \ + roctxRangePop(); \ + roctxMarkA(x) #endif // RESOLVE_USE_HIP #ifdef RESOLVE_USE_CUDA #include #define RESOLVE_RANGE_PUSH(x) nvtxRangePush(x) -#define RESOLVE_RANGE_POP(x) nvtxRangePop(); \ - nvtxMarkA(x) +#define RESOLVE_RANGE_POP(x) \ + nvtxRangePop(); \ + nvtxMarkA(x) #endif // RESOLVE_USE_CUDA #else diff --git a/resolve/RefactorizationSolver.cpp b/resolve/RefactorizationSolver.cpp index 45311f6f3..0b1fab8f5 100644 --- a/resolve/RefactorizationSolver.cpp +++ b/resolve/RefactorizationSolver.cpp @@ -1,8 +1,8 @@ #include "RefactorizationSolver.hpp" -namespace ReSolve +namespace ReSolve { RefactorizationSolver::RefactorizationSolver() { } -} +} // namespace ReSolve diff --git a/resolve/RefactorizationSolver.hpp b/resolve/RefactorizationSolver.hpp index 5beeb8d62..a3fb1b79d 100644 --- a/resolve/RefactorizationSolver.hpp +++ b/resolve/RefactorizationSolver.hpp @@ -7,30 +7,30 @@ namespace ReSolve RefactorizationSolver { using vector_type = vector::Vector; - - public: - RefactorizationSolver(); - ~RefactorizationSolver(); - int setup(std::string first_solver, - std::string refact_solver_, - std::string use_ir_); - int setup_ir(real_type ir_tol, index_type ir_maxit, index_type ir_gs_); - - int solve(matrix::Sparse* A, vector_type* vec_rhs, vector_type* vec_x); - - private: - std::string first_solver_name_; - std::string refact_solver_name_; - std::string use_ir_; - //IR parameters - real_type ir_tol_; - index_type ir_maxit_; - index_type ir_gs_; + public: + RefactorizationSolver(); + ~RefactorizationSolver(); + int setup(std::string first_solver, + std::string refact_solver_, + std::string use_ir_); - LinSolverDirect* first_solver_; - LinSolverDirect* refact_solver_; - LinSolverIterative* ir_solver_; - bool factorization_exists_; + int setup_ir(real_type ir_tol, index_type ir_maxit, index_type ir_gs_); + + int solve(matrix::Sparse * A, vector_type * vec_rhs, vector_type * vec_x); + + private: + std::string first_solver_name_; + std::string refact_solver_name_; + std::string use_ir_; + // IR parameters + real_type ir_tol_; + index_type ir_maxit_; + index_type ir_gs_; + + LinSolverDirect* first_solver_; + LinSolverDirect* refact_solver_; + LinSolverIterative* ir_solver_; + bool factorization_exists_; }; -} +} // namespace ReSolve diff --git a/resolve/SystemSolver.cpp b/resolve/SystemSolver.cpp index f3fead184..e5fa725ad 100644 --- a/resolve/SystemSolver.cpp +++ b/resolve/SystemSolver.cpp @@ -1,13 +1,13 @@ #include #include -#include +#include +#include +#include +#include #include +#include #include -#include -#include -#include -#include #include #ifdef RESOLVE_USE_KLU @@ -17,16 +17,16 @@ #include #ifdef RESOLVE_USE_CUDA -#include #include #include #include +#include #endif #ifdef RESOLVE_USE_HIP -#include #include #include +#include #endif // Handlers @@ -34,10 +34,8 @@ #include // Utilities -#include - #include "SystemSolver.hpp" - +#include namespace ReSolve { @@ -45,11 +43,11 @@ namespace ReSolve using out = io::Logger; SystemSolver::SystemSolver(LinAlgWorkspaceCpu* workspaceCpu, - std::string factor, - std::string refactor, - std::string solve, - std::string precond, - std::string ir) + std::string factor, + std::string refactor, + std::string solve, + std::string precond, + std::string ir) : workspaceCpu_(workspaceCpu), factorizationMethod_(factor), refactorizationMethod_(refactor), @@ -57,7 +55,8 @@ namespace ReSolve precondition_method_(precond), irMethod_(ir) { - if ((refactor != "none") && (precond != "none")) { + if ((refactor != "none") && (precond != "none")) + { out::warning() << "Incorrect input: " << "Refactorization and preconditioning cannot both be enabled.\n" << "Setting both to 'none' ...\n"; @@ -75,12 +74,12 @@ namespace ReSolve } #ifdef RESOLVE_USE_CUDA - SystemSolver::SystemSolver(LinAlgWorkspaceCUDA* workspaceCuda, - std::string factor, - std::string refactor, - std::string solve, - std::string precond, - std::string ir) + SystemSolver::SystemSolver(LinAlgWorkspaceCUDA* workspaceCuda, + std::string factor, + std::string refactor, + std::string solve, + std::string precond, + std::string ir) : workspaceCuda_(workspaceCuda), factorizationMethod_(factor), refactorizationMethod_(refactor), @@ -88,7 +87,8 @@ namespace ReSolve precondition_method_(precond), irMethod_(ir) { - if ((refactor != "none") && (precond != "none")) { + if ((refactor != "none") && (precond != "none")) + { out::warning() << "Incorrect input: " << "Refactorization and preconditioning cannot both be enabled.\n" << "Setting both to 'none' ...\n"; @@ -107,12 +107,12 @@ namespace ReSolve #endif #ifdef RESOLVE_USE_HIP - SystemSolver::SystemSolver(LinAlgWorkspaceHIP* workspaceHip, - std::string factor, - std::string refactor, - std::string solve, - std::string precond, - std::string ir) + SystemSolver::SystemSolver(LinAlgWorkspaceHIP* workspaceHip, + std::string factor, + std::string refactor, + std::string solve, + std::string precond, + std::string ir) : workspaceHip_(workspaceHip), factorizationMethod_(factor), refactorizationMethod_(refactor), @@ -120,7 +120,8 @@ namespace ReSolve precondition_method_(precond), irMethod_(ir) { - if ((refactor != "none") && (precond != "none")) { + if ((refactor != "none") && (precond != "none")) + { out::warning() << "Incorrect input: " << "Refactorization and preconditioning cannot both be enabled.\n" << "Setting both to 'none' ...\n"; @@ -142,27 +143,33 @@ namespace ReSolve { delete resVector_; - if (factorizationMethod_ != "none") { + if (factorizationMethod_ != "none") + { delete factorizationSolver_; } - if (refactorizationMethod_ != "none") { + if (refactorizationMethod_ != "none") + { delete refactorizationSolver_; } - if (solveMethod_ == "randgmres" || solveMethod_ == "fgmres") { + if (solveMethod_ == "randgmres" || solveMethod_ == "fgmres") + { delete iterativeSolver_; } - if (gs_ != nullptr) { + if (gs_ != nullptr) + { delete gs_; } - if (irMethod_ != "none") { + if (irMethod_ != "none") + { delete iterativeSolver_; } - if (precondition_method_ != "none") { + if (precondition_method_ != "none") + { delete preconditioner_; } @@ -173,23 +180,31 @@ namespace ReSolve int SystemSolver::setMatrix(matrix::Sparse* A) { int status = 0; - A_ = A; + A_ = A; resVector_ = new vector_type(A->getNumRows()); - if (memspace_ == "cpu") { + if (memspace_ == "cpu") + { resVector_->allocate(memory::HOST); - } else { + } + else + { resVector_->allocate(memory::DEVICE); matrixHandler_->setValuesChanged(true, memory::DEVICE); } // If we use iterative solver, we can set it up here - if (solveMethod_ == "randgmres") { - auto* rgmres = dynamic_cast(iterativeSolver_); - status += rgmres->setup(A_); - } else if (solveMethod_ == "fgmres") { - auto* fgmres = dynamic_cast(iterativeSolver_); - status += fgmres->setup(A_); - } else { + if (solveMethod_ == "randgmres") + { + auto* rgmres = dynamic_cast(iterativeSolver_); + status += rgmres->setup(A_); + } + else if (solveMethod_ == "fgmres") + { + auto* fgmres = dynamic_cast(iterativeSolver_); + status += fgmres->setup(A_); + } + else + { // do nothing } @@ -205,62 +220,84 @@ namespace ReSolve int SystemSolver::initialize() { // First delete old objects - if (factorizationSolver_) { + if (factorizationSolver_) + { delete factorizationSolver_; factorizationSolver_ = nullptr; } - if (refactorizationSolver_) { + if (refactorizationSolver_) + { delete refactorizationSolver_; refactorizationSolver_ = nullptr; } - if (preconditioner_) { + if (preconditioner_) + { delete preconditioner_; preconditioner_ = nullptr; } - if (iterativeSolver_) { + if (iterativeSolver_) + { delete iterativeSolver_; iterativeSolver_ = nullptr; } - if (gs_) { + if (gs_) + { delete gs_; gs_ = nullptr; } // Create factorization solver - if (factorizationMethod_ == "none") { + if (factorizationMethod_ == "none") + { // do nothing #ifdef RESOLVE_USE_KLU - } else if (factorizationMethod_ == "klu") { + } + else if (factorizationMethod_ == "klu") + { factorizationSolver_ = new ReSolve::LinSolverDirectKLU(); #endif - } else { + } + else + { out::error() << "Unrecognized factorization " << factorizationMethod_ << "\n"; return 1; } // Create refactorization solver - if (refactorizationMethod_ == "none") { + if (refactorizationMethod_ == "none") + { // do nothing - } else if (refactorizationMethod_ == "klu") { + } + else if (refactorizationMethod_ == "klu") + { // do nothing for now, KLU is the only factorization solver available #ifdef RESOLVE_USE_CUDA - } else if (refactorizationMethod_ == "glu") { + } + else if (refactorizationMethod_ == "glu") + { refactorizationSolver_ = new ReSolve::LinSolverDirectCuSolverGLU(workspaceCuda_); - } else if (refactorizationMethod_ == "cusolverrf") { + } + else if (refactorizationMethod_ == "cusolverrf") + { refactorizationSolver_ = new ReSolve::LinSolverDirectCuSolverRf(); #endif #ifdef RESOLVE_USE_HIP - } else if (refactorizationMethod_ == "rocsolverrf") { + } + else if (refactorizationMethod_ == "rocsolverrf") + { refactorizationSolver_ = new ReSolve::LinSolverDirectRocSolverRf(workspaceHip_); #endif - } else { + } + else + { out::error() << "Refactorization method " << refactorizationMethod_ << " not recognized ...\n"; return 1; } // Create iterative refinement - if (irMethod_ == "fgmres") { + if (irMethod_ == "fgmres") + { setGramSchmidtMethod(gsMethod_); iterativeSolver_ = new LinSolverIterativeFGMRES(matrixHandler_, vectorHandler_, @@ -268,39 +305,57 @@ namespace ReSolve } // Create preconditioner - if (precondition_method_ == "none") { + if (precondition_method_ == "none") + { // do nothing - } else if (precondition_method_ == "ilu0") { - if (memspace_ == "cpu") { + } + else if (precondition_method_ == "ilu0") + { + if (memspace_ == "cpu") + { // preconditioner_ = new LinSolverDirectSerialILU0(workspaceCpu_); preconditioner_ = new LinSolverDirectCpuILU0(workspaceCpu_); #ifdef RESOLVE_USE_CUDA - } else if (memspace_ == "cuda") { + } + else if (memspace_ == "cuda") + { preconditioner_ = new LinSolverDirectCuSparseILU0(workspaceCuda_); #endif #ifdef RESOLVE_USE_HIP - } else if (memspace_ == "hip") { + } + else if (memspace_ == "hip") + { preconditioner_ = new LinSolverDirectRocSparseILU0(workspaceHip_); #endif - } else { + } + else + { out::error() << "Memory space " << memspace_ - << " not recognized ...\n"; + << " not recognized ...\n"; return 1; } - } else { + } + else + { out::error() << "Preconditioner method " << precondition_method_ << " not recognized ...\n"; return 1; } // Create iterative solver - if (solveMethod_ == "randgmres") { + if (solveMethod_ == "randgmres") + { LinSolverIterativeRandFGMRES::SketchingMethod sketch; - if (sketching_method_ == "count") { + if (sketching_method_ == "count") + { sketch = LinSolverIterativeRandFGMRES::cs; - } else if (sketching_method_ == "fwht") { + } + else if (sketching_method_ == "fwht") + { sketch = LinSolverIterativeRandFGMRES::fwht; - } else { + } + else + { out::warning() << "Sketching method " << sketching_method_ << " not recognized!\n" << "Using default.\n"; sketch = LinSolverIterativeRandFGMRES::cs; @@ -310,12 +365,16 @@ namespace ReSolve vectorHandler_, sketch, gs_); - } else if (solveMethod_ == "fgmres") { + } + else if (solveMethod_ == "fgmres") + { setGramSchmidtMethod(gsMethod_); iterativeSolver_ = new LinSolverIterativeFGMRES(matrixHandler_, vectorHandler_, gs_); - } else { + } + else + { // do nothing } @@ -324,12 +383,14 @@ namespace ReSolve int SystemSolver::analyze() { - if (A_ == nullptr) { + if (A_ == nullptr) + { out::error() << "System matrix not set!\n"; return 1; } - if (factorizationMethod_ == "klu") { + if (factorizationMethod_ == "klu") + { factorizationSolver_->setup(A_); return factorizationSolver_->analyze(); } @@ -338,7 +399,8 @@ namespace ReSolve int SystemSolver::factorize() { - if (factorizationMethod_ == "klu") { + if (factorizationMethod_ == "klu") + { is_solve_on_device_ = false; return factorizationSolver_->factorize(); } @@ -347,15 +409,15 @@ namespace ReSolve int SystemSolver::refactorize() { - if (refactorizationMethod_ == "klu") { + if (refactorizationMethod_ == "klu") + { return factorizationSolver_->refactorize(); } - if (refactorizationMethod_ == "glu" || - refactorizationMethod_ == "cusolverrf" || - refactorizationMethod_ == "rocsolverrf") { - is_solve_on_device_ = true; - return refactorizationSolver_->refactorize(); + if (refactorizationMethod_ == "glu" || refactorizationMethod_ == "cusolverrf" || refactorizationMethod_ == "rocsolverrf") + { + is_solve_on_device_ = true; + return refactorizationSolver_->refactorize(); } return 1; @@ -382,22 +444,25 @@ namespace ReSolve { int status = 0; // Get factors and permutation vectors - L_ = factorizationSolver_->getLFactor(); - U_ = factorizationSolver_->getUFactor(); - P_ = factorizationSolver_->getPOrdering(); - Q_ = factorizationSolver_->getQOrdering(); + L_ = factorizationSolver_->getLFactor(); + U_ = factorizationSolver_->getUFactor(); + P_ = factorizationSolver_->getPOrdering(); + Q_ = factorizationSolver_->getQOrdering(); - if (L_ == nullptr) { + if (L_ == nullptr) + { out::error() << "Factorization failed, cannot extract factors ...\n"; status += 1; } #ifdef RESOLVE_USE_CUDA - if (refactorizationMethod_ == "glu") { - is_solve_on_device_ = true; - status += refactorizationSolver_->setup(A_, L_, U_, P_, Q_); + if (refactorizationMethod_ == "glu") + { + is_solve_on_device_ = true; + status += refactorizationSolver_->setup(A_, L_, U_, P_, Q_); } - if (refactorizationMethod_ == "cusolverrf") { + if (refactorizationMethod_ == "cusolverrf") + { status += refactorizationSolver_->setup(A_, L_, U_, P_, Q_); LinSolverDirectCuSolverRf* Rf = dynamic_cast(refactorizationSolver_); @@ -408,15 +473,17 @@ namespace ReSolve #endif #ifdef RESOLVE_USE_HIP - if (refactorizationMethod_ == "rocsolverrf") { + if (refactorizationMethod_ == "rocsolverrf") + { is_solve_on_device_ = false; - auto* Rf = dynamic_cast(refactorizationSolver_); + auto* Rf = dynamic_cast(refactorizationSolver_); Rf->setSolveMode(1); status += refactorizationSolver_->setup(A_, L_, U_, P_, Q_, resVector_); } #endif - if (irMethod_ == "fgmres") { + if (irMethod_ == "fgmres") + { status += iterativeSolver_->setup(A_); status += iterativeSolver_->setupPreconditioner("LU", refactorizationSolver_); } @@ -442,26 +509,34 @@ namespace ReSolve int status = 0; // Use Krylov solver if selected - if (solveMethod_ == "randgmres" || solveMethod_ == "fgmres") { + if (solveMethod_ == "randgmres" || solveMethod_ == "fgmres") + { status += iterativeSolver_->resetMatrix(A_); status += iterativeSolver_->solve(rhs, x); return status; } - if (solveMethod_ == "klu") { + if (solveMethod_ == "klu") + { status += factorizationSolver_->solve(rhs, x); } - if (solveMethod_ == "glu" || solveMethod_ == "cusolverrf" || solveMethod_ == "rocsolverrf") { - if (is_solve_on_device_) { + if (solveMethod_ == "glu" || solveMethod_ == "cusolverrf" || solveMethod_ == "rocsolverrf") + { + if (is_solve_on_device_) + { status += refactorizationSolver_->solve(rhs, x); - } else { + } + else + { status += factorizationSolver_->solve(rhs, x); } } - if (irMethod_ == "fgmres") { - if (is_solve_on_device_) { + if (irMethod_ == "fgmres") + { + if (is_solve_on_device_) + { status += refine(rhs, x); } } @@ -471,9 +546,11 @@ namespace ReSolve int SystemSolver::preconditionerSetup() { int status = 0; - if (precondition_method_ == "ilu0") { + if (precondition_method_ == "ilu0") + { status += preconditioner_->setup(A_); - if (memspace_ != "cpu") { + if (memspace_ != "cpu") + { is_solve_on_device_ = true; } iterativeSolver_->setupPreconditioner("LU", preconditioner_); @@ -525,25 +602,35 @@ namespace ReSolve void SystemSolver::setRefactorizationMethod(std::string method) { refactorizationMethod_ = method; - if (refactorizationSolver_) { + if (refactorizationSolver_) + { delete refactorizationSolver_; refactorizationSolver_ = nullptr; } // Create refactorization solver - if (refactorizationMethod_ == "klu") { + if (refactorizationMethod_ == "klu") + { // do nothing for now #ifdef RESOLVE_USE_CUDA - } else if (refactorizationMethod_ == "glu") { + } + else if (refactorizationMethod_ == "glu") + { refactorizationSolver_ = new ReSolve::LinSolverDirectCuSolverGLU(workspaceCuda_); - } else if (refactorizationMethod_ == "cusolverrf") { + } + else if (refactorizationMethod_ == "cusolverrf") + { refactorizationSolver_ = new ReSolve::LinSolverDirectCuSolverRf(); #endif #ifdef RESOLVE_USE_HIP - } else if (refactorizationMethod_ == "rocsolverrf") { + } + else if (refactorizationMethod_ == "rocsolverrf") + { refactorizationSolver_ = new ReSolve::LinSolverDirectRocSolverRf(workspaceHip_); #endif - } else { + } + else + { out::error() << "Refactorization method " << refactorizationMethod_ << " not recognized ...\n"; } @@ -564,13 +651,19 @@ namespace ReSolve if (iterativeSolver_) delete iterativeSolver_; - if (method == "randgmres") { + if (method == "randgmres") + { LinSolverIterativeRandFGMRES::SketchingMethod sketch; - if (sketching_method_ == "count") { + if (sketching_method_ == "count") + { sketch = LinSolverIterativeRandFGMRES::cs; - } else if (sketching_method_ == "fwht") { + } + else if (sketching_method_ == "fwht") + { sketch = LinSolverIterativeRandFGMRES::fwht; - } else { + } + else + { out::warning() << "Sketching method " << sketching_method_ << " not recognized!\n" << "Using default.\n"; sketch = LinSolverIterativeRandFGMRES::cs; @@ -581,12 +674,16 @@ namespace ReSolve vectorHandler_, sketch, gs_); - } else if (solveMethod_ == "fgmres") { + } + else if (solveMethod_ == "fgmres") + { setGramSchmidtMethod(gsMethod_); iterativeSolver_ = new LinSolverIterativeFGMRES(matrixHandler_, vectorHandler_, gs_); - } else { + } + else + { out::error() << "Solve method " << solveMethod_ << " not recognized ...\n"; return 1; @@ -613,7 +710,8 @@ namespace ReSolve if (method == "none") return; - if (memspace_ == "cpu") { + if (memspace_ == "cpu") + { method = "none"; out::warning() << "Iterative refinement not supported on CPU. " << "Turning off ...\n"; @@ -622,13 +720,16 @@ namespace ReSolve gsMethod_ = gsMethod; - if (method == "fgmres") { + if (method == "fgmres") + { setGramSchmidtMethod(gsMethod); iterativeSolver_ = new LinSolverIterativeFGMRES(matrixHandler_, vectorHandler_, gs_); - irMethod_ = method; - } else { + irMethod_ = method; + } + else + { out::error() << "Iterative refinement method " << method << " not recognized.\n"; } } @@ -636,18 +737,26 @@ namespace ReSolve real_type SystemSolver::getVectorNorm(vector_type* rhs) { using namespace ReSolve::constants; - real_type norm_b = 0.0; - if (memspace_ == "cpu") { + real_type norm_b = 0.0; + if (memspace_ == "cpu") + { norm_b = std::sqrt(vectorHandler_->dot(rhs, rhs, memory::HOST)); #if defined(RESOLVE_USE_HIP) || defined(RESOLVE_USE_CUDA) - } else if (memspace_ == "cuda" || memspace_ == "hip") { - if (is_solve_on_device_) { + } + else if (memspace_ == "cuda" || memspace_ == "hip") + { + if (is_solve_on_device_) + { norm_b = std::sqrt(vectorHandler_->dot(rhs, rhs, memory::DEVICE)); - } else { + } + else + { norm_b = std::sqrt(vectorHandler_->dot(rhs, rhs, memory::HOST)); } #endif - } else { + } + else + { out::error() << "Unrecognized device " << memspace_ << "\n"; return -1.0; } @@ -658,54 +767,70 @@ namespace ReSolve { using namespace ReSolve::constants; assert(rhs->getSize() == resVector_->getSize()); - real_type norm_b = 0.0; - real_type resnorm = 0.0; - memory::MemorySpace ms = memory::HOST; - if (memspace_ == "cpu") { + real_type norm_b = 0.0; + real_type resnorm = 0.0; + memory::MemorySpace ms = memory::HOST; + if (memspace_ == "cpu") + { resVector_->copyDataFrom(rhs, memory::HOST, memory::HOST); norm_b = std::sqrt(vectorHandler_->dot(resVector_, resVector_, memory::HOST)); #if defined(RESOLVE_USE_HIP) || defined(RESOLVE_USE_CUDA) - } else if (memspace_ == "cuda" || memspace_ == "hip") { - if (is_solve_on_device_) { + } + else if (memspace_ == "cuda" || memspace_ == "hip") + { + if (is_solve_on_device_) + { resVector_->copyDataFrom(rhs, memory::DEVICE, memory::DEVICE); norm_b = std::sqrt(vectorHandler_->dot(resVector_, resVector_, memory::DEVICE)); - } else { + } + else + { resVector_->copyDataFrom(rhs, memory::HOST, memory::DEVICE); norm_b = std::sqrt(vectorHandler_->dot(resVector_, resVector_, memory::HOST)); // ms = memory::HOST; } ms = memory::DEVICE; #endif - } else { + } + else + { out::error() << "Unrecognized device " << memspace_ << "\n"; return -1.0; } matrixHandler_->setValuesChanged(true, ms); matrixHandler_->matvec(A_, x, resVector_, &ONE, &MINUS_ONE, ms); resnorm = std::sqrt(vectorHandler_->dot(resVector_, resVector_, ms)); - return resnorm/norm_b; + return resnorm / norm_b; } real_type SystemSolver::getNormOfScaledResiduals(vector_type* rhs, vector_type* x) { using namespace ReSolve::constants; assert(rhs->getSize() == resVector_->getSize()); - real_type norm_x = 0.0; - real_type norm_A = 0.0; - real_type resnorm = 0.0; - memory::MemorySpace ms = memory::HOST; - if (memspace_ == "cpu") { + real_type norm_x = 0.0; + real_type norm_A = 0.0; + real_type resnorm = 0.0; + memory::MemorySpace ms = memory::HOST; + if (memspace_ == "cpu") + { resVector_->copyDataFrom(rhs, memory::HOST, memory::HOST); #if defined(RESOLVE_USE_HIP) || defined(RESOLVE_USE_CUDA) - } else if (memspace_ == "cuda" || memspace_ == "hip") { - if (is_solve_on_device_) { + } + else if (memspace_ == "cuda" || memspace_ == "hip") + { + if (is_solve_on_device_) + { resVector_->copyDataFrom(rhs, memory::DEVICE, memory::DEVICE); - } else { + } + else + { resVector_->copyDataFrom(rhs, memory::HOST, memory::DEVICE); } ms = memory::DEVICE; #endif - } else { + } + else + { out::error() << "Unrecognized device " << memspace_ << "\n"; return -1.0; } @@ -734,18 +859,24 @@ namespace ReSolve */ int SystemSolver::setSketchingMethod(std::string sketching_method) { - if (solveMethod_ != "randgmres") { + if (solveMethod_ != "randgmres") + { out::warning() << "Trying to set sketching method to an incompatible solver.\n"; out::warning() << "The setting will be ignored.\n"; return 1; } LinSolverIterativeRandFGMRES::SketchingMethod tmp; - if (sketching_method == "count") { + if (sketching_method == "count") + { tmp = LinSolverIterativeRandFGMRES::cs; - } else if (sketching_method == "fwht") { + } + else if (sketching_method == "fwht") + { tmp = LinSolverIterativeRandFGMRES::fwht; - } else { + } + else + { out::warning() << "Sketching method " << sketching_method << " not recognized!\n" << "Using default (count sketch).\n"; tmp = LinSolverIterativeRandFGMRES::cs; @@ -754,7 +885,8 @@ namespace ReSolve sketching_method_ = sketching_method; // At this point iterative solver, if created, can only be LinSolverIterativeRandFGMRES - if (iterativeSolver_) { + if (iterativeSolver_) + { // TODO: Use cast here as a temporary solution; will be replaced by parameter setting framework auto* sol = dynamic_cast(iterativeSolver_); sol->setSketchingMethod(tmp); @@ -771,25 +903,39 @@ namespace ReSolve { // Map string input to the Gram-Schmidt variant enum GramSchmidt::GSVariant gs_variant; - if (variant == "cgs2") { + if (variant == "cgs2") + { gs_variant = GramSchmidt::CGS2; - } else if (variant == "mgs") { + } + else if (variant == "mgs") + { gs_variant = GramSchmidt::MGS; - } else if (variant == "mgs_two_sync") { + } + else if (variant == "mgs_two_sync") + { gs_variant = GramSchmidt::MGS_TWO_SYNC; - } else if (variant == "mgs_pm") { + } + else if (variant == "mgs_pm") + { gs_variant = GramSchmidt::MGS_PM; - } else if (variant == "cgs1") { + } + else if (variant == "cgs1") + { gs_variant = GramSchmidt::CGS1; - } else { + } + else + { out::warning() << "Gram-Schmidt variant " << variant << " not recognized.\n"; out::warning() << "Using default CGS2 Gram-Schmidt variant.\n"; gs_variant = GramSchmidt::CGS2; } - if (gs_) { + if (gs_) + { gs_->setVariant(gs_variant); - } else { + } + else + { gs_ = new GramSchmidt(vectorHandler_, gs_variant); } diff --git a/resolve/SystemSolver.hpp b/resolve/SystemSolver.hpp index c204dd470..0aa5173c1 100644 --- a/resolve/SystemSolver.hpp +++ b/resolve/SystemSolver.hpp @@ -1,4 +1,4 @@ -//this is to solve the system, can call different linear solvers if necessary +// this is to solve the system, can call different linear solvers if necessary namespace ReSolve { class LinSolverDirectKLU; @@ -23,107 +23,106 @@ namespace ReSolve class SystemSolver { - public: - using vector_type = vector::Vector; - using matrix_type = matrix::Sparse; - - /// @brief Temporary until abstract preconditioner class is created - using precond_type = LinSolverDirect; - - SystemSolver(LinAlgWorkspaceCpu* workspaceCpu, - std::string factor = "klu", - std::string refactor = "klu", - std::string solve = "klu", - std::string precond = "none", - std::string ir = "none"); - SystemSolver(LinAlgWorkspaceCUDA* workspaceCuda, - std::string factor = "klu", - std::string refactor = "cusolverrf", - std::string solve = "cusolverrf", - std::string precond = "none", - std::string ir = "none"); - SystemSolver(LinAlgWorkspaceHIP* workspaceHip, - std::string factor = "klu", - std::string refactor = "rocsolverrf", - std::string solve = "rocsolverrf", - std::string precond = "none", - std::string ir = "none"); - - ~SystemSolver(); - - int initialize(); - int setMatrix(matrix::Sparse* A); - int analyze(); // symbolic part - int factorize(); // numeric part - int refactorize(); - int refactorizationSetup(); - int preconditionerSetup(); - int solve(vector_type* rhs, vector_type* x); // for direct and iterative - int refine(vector_type* rhs, vector_type* x); // for iterative refinement - - // we update the matrix once it changed - int updateMatrix(std::string format, int* ia, int* ja, double* a); - - LinSolverDirect& getFactorizationSolver(); - LinSolverDirect& getRefactorizationSolver(); - LinSolverIterative& getIterativeSolver(); - - real_type getVectorNorm(vector_type* rhs); - real_type getResidualNorm(vector_type* rhs, vector_type* x); - real_type getNormOfScaledResiduals(vector_type* rhs, vector_type* x); - - // Get solver parameters - const std::string getFactorizationMethod() const; - const std::string getRefactorizationMethod() const; - const std::string getSolveMethod() const; - const std::string getRefinementMethod() const; - const std::string getOrthogonalizationMethod() const; - - // Set solver parameters - void setFactorizationMethod(std::string method); - void setRefactorizationMethod(std::string method); - int setSolveMethod(std::string method); - void setRefinementMethod(std::string method, std::string gs = "cgs2"); - int setSketchingMethod(std::string method); - int setGramSchmidtMethod(std::string gs_method); - - private: - - LinSolverDirect* factorizationSolver_{nullptr}; - LinSolverDirect* refactorizationSolver_{nullptr}; - LinSolverIterative* iterativeSolver_{nullptr}; - GramSchmidt* gs_{nullptr}; - - precond_type* preconditioner_{nullptr}; - - LinAlgWorkspaceCUDA* workspaceCuda_{nullptr}; - LinAlgWorkspaceHIP* workspaceHip_{nullptr}; - LinAlgWorkspaceCpu* workspaceCpu_{nullptr}; - - MatrixHandler* matrixHandler_{nullptr}; - VectorHandler* vectorHandler_{nullptr}; - - bool is_solve_on_device_{false}; - - matrix_type* L_{nullptr}; - matrix_type* U_{nullptr}; - - index_type* P_{nullptr}; - index_type* Q_{nullptr}; - - vector_type* resVector_{nullptr}; - - matrix::Sparse* A_{nullptr}; - - // Configuration parameters - std::string factorizationMethod_{"none"}; - std::string refactorizationMethod_{"none"}; - std::string solveMethod_{"none"}; - std::string precondition_method_{"none"}; - std::string irMethod_{"none"}; - std::string gsMethod_{"cgs2"}; - std::string sketching_method_{"count"}; ///< @todo move this to LinSolverIterative class - - std::string memspace_; + public: + using vector_type = vector::Vector; + using matrix_type = matrix::Sparse; + + /// @brief Temporary until abstract preconditioner class is created + using precond_type = LinSolverDirect; + + SystemSolver(LinAlgWorkspaceCpu* workspaceCpu, + std::string factor = "klu", + std::string refactor = "klu", + std::string solve = "klu", + std::string precond = "none", + std::string ir = "none"); + SystemSolver(LinAlgWorkspaceCUDA* workspaceCuda, + std::string factor = "klu", + std::string refactor = "cusolverrf", + std::string solve = "cusolverrf", + std::string precond = "none", + std::string ir = "none"); + SystemSolver(LinAlgWorkspaceHIP* workspaceHip, + std::string factor = "klu", + std::string refactor = "rocsolverrf", + std::string solve = "rocsolverrf", + std::string precond = "none", + std::string ir = "none"); + + ~SystemSolver(); + + int initialize(); + int setMatrix(matrix::Sparse* A); + int analyze(); // symbolic part + int factorize(); // numeric part + int refactorize(); + int refactorizationSetup(); + int preconditionerSetup(); + int solve(vector_type* rhs, vector_type* x); // for direct and iterative + int refine(vector_type* rhs, vector_type* x); // for iterative refinement + + // we update the matrix once it changed + int updateMatrix(std::string format, int* ia, int* ja, double* a); + + LinSolverDirect& getFactorizationSolver(); + LinSolverDirect& getRefactorizationSolver(); + LinSolverIterative& getIterativeSolver(); + + real_type getVectorNorm(vector_type* rhs); + real_type getResidualNorm(vector_type* rhs, vector_type* x); + real_type getNormOfScaledResiduals(vector_type* rhs, vector_type* x); + + // Get solver parameters + const std::string getFactorizationMethod() const; + const std::string getRefactorizationMethod() const; + const std::string getSolveMethod() const; + const std::string getRefinementMethod() const; + const std::string getOrthogonalizationMethod() const; + + // Set solver parameters + void setFactorizationMethod(std::string method); + void setRefactorizationMethod(std::string method); + int setSolveMethod(std::string method); + void setRefinementMethod(std::string method, std::string gs = "cgs2"); + int setSketchingMethod(std::string method); + int setGramSchmidtMethod(std::string gs_method); + + private: + LinSolverDirect* factorizationSolver_{nullptr}; + LinSolverDirect* refactorizationSolver_{nullptr}; + LinSolverIterative* iterativeSolver_{nullptr}; + GramSchmidt* gs_{nullptr}; + + precond_type* preconditioner_{nullptr}; + + LinAlgWorkspaceCUDA* workspaceCuda_{nullptr}; + LinAlgWorkspaceHIP* workspaceHip_{nullptr}; + LinAlgWorkspaceCpu* workspaceCpu_{nullptr}; + + MatrixHandler* matrixHandler_{nullptr}; + VectorHandler* vectorHandler_{nullptr}; + + bool is_solve_on_device_{false}; + + matrix_type* L_{nullptr}; + matrix_type* U_{nullptr}; + + index_type* P_{nullptr}; + index_type* Q_{nullptr}; + + vector_type* resVector_{nullptr}; + + matrix::Sparse* A_{nullptr}; + + // Configuration parameters + std::string factorizationMethod_{"none"}; + std::string refactorizationMethod_{"none"}; + std::string solveMethod_{"none"}; + std::string precondition_method_{"none"}; + std::string irMethod_{"none"}; + std::string gsMethod_{"cgs2"}; + std::string sketching_method_{"count"}; ///< @todo move this to LinSolverIterative class + + std::string memspace_; }; } // namespace ReSolve diff --git a/resolve/cpu/CpuMemory.hpp b/resolve/cpu/CpuMemory.hpp index 96c9a6ebc..a1b73ef40 100644 --- a/resolve/cpu/CpuMemory.hpp +++ b/resolve/cpu/CpuMemory.hpp @@ -1,5 +1,6 @@ #pragma once #include + #include namespace ReSolve @@ -8,7 +9,7 @@ namespace ReSolve { /** * @brief Class containing dummy functions when there is no GPU support. - * + * * @author Slaven Peles */ struct Cpu @@ -20,10 +21,10 @@ namespace ReSolve { // Nothing to synchronize } - + /** * @brief Dummy function to stand in when GPU support is not enabled. - * + * * @return Allways return success! */ static int getLastDeviceError() @@ -31,10 +32,10 @@ namespace ReSolve // not on device, nothing to get return 0; } - - /** + + /** * @brief Dummy function to notify us something is wrong. - * + * * This will be called only if GPU device support is not built, so * trying to access a device should indicate a bug in the code. * @@ -46,9 +47,9 @@ namespace ReSolve return -1; } - /** + /** * @brief Dummy function to notify us something is wrong. - * + * * This will be called only if GPU device support is not built, so * trying to access a device should indicate a bug in the code. * @@ -61,9 +62,9 @@ namespace ReSolve return -1; } - /** + /** * @brief Dummy function to notify us something is wrong. - * + * * This will be called only if GPU device support is not built, so * trying to access a device should indicate a bug in the code. * @@ -76,9 +77,9 @@ namespace ReSolve return -1; } - /** + /** * @brief Dummy function to notify us something is wrong. - * + * * This will be called only if GPU device support is not built, so * trying to access a device should indicate a bug in the code. * @@ -91,9 +92,9 @@ namespace ReSolve return -1; } - /** + /** * @brief Dummy function to notify us something is wrong. - * + * * This will be called only if GPU device support is not built, so * trying to access a device should indicate a bug in the code. * @@ -106,9 +107,9 @@ namespace ReSolve return -1; } - /** + /** * @brief Dummy function to notify us something is wrong. - * + * * This will be called only if GPU device support is not built, so * trying to access a device should indicate a bug in the code. * @@ -121,9 +122,9 @@ namespace ReSolve return -1; } - /** + /** * @brief Dummy function to notify us something is wrong. - * + * * This will be called only if GPU device support is not built, so * trying to access a device should indicate a bug in the code. * @@ -144,6 +145,6 @@ namespace ReSolve } }; // struct Cuda - } // namespace memory + } // namespace memory -} //namespace ReSolve +} // namespace ReSolve diff --git a/resolve/cpu/MemoryUtils.cpp b/resolve/cpu/MemoryUtils.cpp index 74944695f..94d6c8507 100644 --- a/resolve/cpu/MemoryUtils.cpp +++ b/resolve/cpu/MemoryUtils.cpp @@ -1,42 +1,41 @@ /** * @file MemoryUtils.cpp - * + * * This file includes MemoryUtils.tpp and specifies what functions to * instantiate from function templates. - * + * * @author Slaven Peles */ - #include +#include + #include #include #include -#include - namespace ReSolve { template void MemoryUtils::deviceSynchronize(); template int MemoryUtils::getLastDeviceError(); template int MemoryUtils::deleteOnDevice(void*); - template int MemoryUtils::allocateArrayOnDevice( real_type**, index_type); - template int MemoryUtils::allocateArrayOnDevice(index_type**, index_type); + template int MemoryUtils::allocateArrayOnDevice(real_type**, index_type); + template int MemoryUtils::allocateArrayOnDevice(index_type**, index_type); - template int MemoryUtils::allocateBufferOnDevice(void** v, size_t n); + template int MemoryUtils::allocateBufferOnDevice(void** v, size_t n); - template int MemoryUtils::setZeroArrayOnDevice( real_type*, index_type); + template int MemoryUtils::setZeroArrayOnDevice(real_type*, index_type); - template int MemoryUtils::setArrayToConstOnDevice( real_type*, real_type, index_type); + template int MemoryUtils::setArrayToConstOnDevice(real_type*, real_type, index_type); - template int MemoryUtils::copyArrayDeviceToHost( real_type*, const real_type*, index_type); - template int MemoryUtils::copyArrayDeviceToHost(index_type*, const index_type*, index_type); + template int MemoryUtils::copyArrayDeviceToHost(real_type*, const real_type*, index_type); + template int MemoryUtils::copyArrayDeviceToHost(index_type*, const index_type*, index_type); - template int MemoryUtils::copyArrayDeviceToDevice( real_type*, const real_type*, index_type); - template int MemoryUtils::copyArrayDeviceToDevice(index_type*, const index_type*, index_type); + template int MemoryUtils::copyArrayDeviceToDevice(real_type*, const real_type*, index_type); + template int MemoryUtils::copyArrayDeviceToDevice(index_type*, const index_type*, index_type); - template int MemoryUtils::copyArrayHostToDevice( real_type*, const real_type*, index_type); - template int MemoryUtils::copyArrayHostToDevice(index_type*, const index_type*, index_type); -} + template int MemoryUtils::copyArrayHostToDevice(real_type*, const real_type*, index_type); + template int MemoryUtils::copyArrayHostToDevice(index_type*, const index_type*, index_type); +} // namespace ReSolve diff --git a/resolve/cuda/CudaMemory.hpp b/resolve/cuda/CudaMemory.hpp index b5f888491..af51add1e 100644 --- a/resolve/cuda/CudaMemory.hpp +++ b/resolve/cuda/CudaMemory.hpp @@ -1,9 +1,10 @@ #pragma once -#include #include +#include #include + #include "cuda_check_errors.hpp" namespace ReSolve @@ -12,10 +13,10 @@ namespace ReSolve { /** * @brief Class containing wrappers for CUDA API functions. - * + * * All wrappers are implemented as static functions returning integer * error code from CUDA API functions. - * + * * @author Slaven Peles */ struct Cuda @@ -24,13 +25,13 @@ namespace ReSolve { cudaDeviceSynchronize(); } - + static int getLastDeviceError() { return static_cast(cudaGetLastError()); } - - /** + + /** * @brief deletes variable from device * * @param v - a variable on the device @@ -47,7 +48,7 @@ namespace ReSolve * * @param v - pointer to the array to be allocated on the device * @param n - number of array elements (int, size_t) - * + * * @tparam T - Array element type * @tparam I - Array index type * @@ -61,13 +62,13 @@ namespace ReSolve /** * @brief allocates buffer v onto device. - * + * * The difference from the array is that buffer size is required in bytes, * not number of elements. * * @param v - pointer to the buffer to be allocated on the device * @param n - size of the buffer in bytes - * + * * @tparam T - Buffer element data type type (typically void) * @tparam I - Buffer size type (typically size_t) * @@ -84,7 +85,7 @@ namespace ReSolve * * @param v - pointer to the array to be allocated on the device * @param n - number of the array elements to be set to zero - * + * * @tparam T - Array element type * @tparam I - Array index type * @@ -102,7 +103,7 @@ namespace ReSolve * @param v - pointer to the array to be allocated on the device * @param c - value to set all array elements * @param n - number of the array elements to be set to zero - * + * * @tparam T - Array element type * @tparam I - Array index type * @@ -115,7 +116,7 @@ namespace ReSolve return checkCudaErrors(0); } - /** + /** * @brief Copies array `src` from device to the array `dst` on the host. * * @param[in] n - size of src array @@ -165,8 +166,7 @@ namespace ReSolve { return checkCudaErrors(cudaMemcpy(dst, src, sizeof(T) * n, cudaMemcpyHostToDevice)); } - }; - } + } // namespace memory -} //namespace ReSolve +} // namespace ReSolve diff --git a/resolve/cuda/cuda_check_errors.hpp b/resolve/cuda/cuda_check_errors.hpp index 00a2029e1..6a08fe004 100644 --- a/resolve/cuda/cuda_check_errors.hpp +++ b/resolve/cuda/cuda_check_errors.hpp @@ -1,9 +1,9 @@ /** * @file cuda_check_errors.hpp - * + * * Contains macro to get error code from CUDA functions and to stream * appropriate error output to Re::Solve's logger. - * + * * @author Kasia Swirydowicz * @author Slaven Peles */ @@ -12,17 +12,19 @@ #include template -int check(T result, - char const *const func, - const char *const file, - int const line) +int check(T result, + char const* const func, + const char* const file, + int const line) { - if (result) { + if (result) + { ReSolve::io::Logger::error() << "CUDA error in function " - << func << " at " << file << ":" << line + << func << " at " << file << ":" << line << ", error# " << result << "\n"; return -1; } return 0; } + #define checkCudaErrors(val) check((val), #val, __FILE__, __LINE__) \ No newline at end of file diff --git a/resolve/cusolver_defs.hpp b/resolve/cusolver_defs.hpp index c9db034cc..fbadfa44b 100644 --- a/resolve/cusolver_defs.hpp +++ b/resolve/cusolver_defs.hpp @@ -3,7 +3,7 @@ * @file cusolver_defs.hpp * * @author Kasia Swirydowicz , PNNL - * + * * Contains prototypes of cuSOLVER functions not in public API. * */ @@ -11,95 +11,96 @@ #ifndef CUSOLVERDEFS_H #define CUSOLVERDEFS_H -#include "cusparse.h" -#include "cusolverSp.h" #include -#include #include -#include "cusolverSp_LOWLEVEL_PREVIEW.h" + +#include #include "cusolverRf.h" +#include "cusolverSp.h" +#include "cusolverSp_LOWLEVEL_PREVIEW.h" +#include "cusparse.h" -extern "C" { +extern "C" +{ /* - * prototype not in public header file + * prototype not in public header file */ struct csrgluInfo; - typedef struct csrgluInfo *csrgluInfo_t; + typedef struct csrgluInfo* csrgluInfo_t; cusolverStatus_t CUSOLVERAPI - cusolverSpCreateGluInfo(csrgluInfo_t *info); + cusolverSpCreateGluInfo(csrgluInfo_t* info); cusolverStatus_t CUSOLVERAPI cusolverSpDestroyGluInfo(csrgluInfo_t info); cusolverStatus_t CUSOLVERAPI - cusolverSpDgluSetup(cusolverSpHandle_t handle, - int m, + cusolverSpDgluSetup(cusolverSpHandle_t handle, + int m, /* A can be base-0 or base-1 */ - int nnzA, + int nnzA, const cusparseMatDescr_t descrA, - const int* h_csrRowPtrA, - const int* h_csrColIndA, - const int* h_P, /* base-0 */ - const int* h_Q, /* base-0 */ + const int* h_csrRowPtrA, + const int* h_csrColIndA, + const int* h_P, /* base-0 */ + const int* h_Q, /* base-0 */ /* M can be base-0 or base-1 */ - int nnzM, + int nnzM, const cusparseMatDescr_t descrM, - const int* h_csrRowPtrM, - const int* h_csrColIndM, - csrgluInfo_t info); + const int* h_csrRowPtrM, + const int* h_csrColIndM, + csrgluInfo_t info); cusolverStatus_t CUSOLVERAPI cusolverSpDgluBufferSize(cusolverSpHandle_t handle, - csrgluInfo_t info, - size_t* pBufferSize); + csrgluInfo_t info, + size_t* pBufferSize); cusolverStatus_t CUSOLVERAPI cusolverSpDgluAnalysis(cusolverSpHandle_t handle, - csrgluInfo_t info, - void* workspace); + csrgluInfo_t info, + void* workspace); cusolverStatus_t CUSOLVERAPI - cusolverSpDgluReset(cusolverSpHandle_t handle, - int m, + cusolverSpDgluReset(cusolverSpHandle_t handle, + int m, /* A is original matrix */ - int nnzA, + int nnzA, const cusparseMatDescr_t descr_A, - const double* d_csrValA, - const int* d_csrRowPtrA, - const int* d_csrColIndA, - csrgluInfo_t info); + const double* d_csrValA, + const int* d_csrRowPtrA, + const int* d_csrColIndA, + csrgluInfo_t info); cusolverStatus_t CUSOLVERAPI cusolverSpDgluFactor(cusolverSpHandle_t handle, - csrgluInfo_t info, - void *workspace); + csrgluInfo_t info, + void* workspace); cusolverStatus_t CUSOLVERAPI - cusolverSpDgluSolve(cusolverSpHandle_t handle, - int m, + cusolverSpDgluSolve(cusolverSpHandle_t handle, + int m, /* A is original matrix */ - int nnzA, + int nnzA, const cusparseMatDescr_t descr_A, - const double *d_csrValA, - const int* d_csrRowPtrA, - const int* d_csrColIndA, - const double* d_b0, /* right hand side */ - double* d_x, /* left hand side */ - int* ite_refine_succ, - double* r_nrm_inf_ptr, - csrgluInfo_t info, - void* workspace); - - cusolverStatus_t CUSOLVERAPI - cusolverSpDnrminf(cusolverSpHandle_t handle, - int n, - const double *x, - double* result, /* |x|_inf, host */ - void* d_work /* at least 8192 bytes */ - ); + const double* d_csrValA, + const int* d_csrRowPtrA, + const int* d_csrColIndA, + const double* d_b0, /* right hand side */ + double* d_x, /* left hand side */ + int* ite_refine_succ, + double* r_nrm_inf_ptr, + csrgluInfo_t info, + void* workspace); + cusolverStatus_t CUSOLVERAPI + cusolverSpDnrminf(cusolverSpHandle_t handle, + int n, + const double* x, + double* result, /* |x|_inf, host */ + void* d_work /* at least 8192 bytes */ + ); } // extern "C" #endif // CUSOLVERDEFS_H diff --git a/resolve/hip/HipMemory.hpp b/resolve/hip/HipMemory.hpp index 1ac344d4d..372da4f25 100644 --- a/resolve/hip/HipMemory.hpp +++ b/resolve/hip/HipMemory.hpp @@ -1,9 +1,10 @@ #pragma once #include -#include +#include #include + #include "hip_check_errors.hpp" namespace ReSolve @@ -12,10 +13,10 @@ namespace ReSolve { /** * @brief Class containing wrappers for CUDA API functions. - * + * * All wrappers are implemented as static functions returning integer * error code from CUDA API functions. - * + * * @author Slaven Peles */ struct Hip @@ -24,13 +25,13 @@ namespace ReSolve { hipDeviceSynchronize(); } - + static int getLastDeviceError() { return static_cast(hipGetLastError()); } - - /** + + /** * @brief deletes variable from device * * @param v - a variable on the device @@ -47,7 +48,7 @@ namespace ReSolve * * @param v - pointer to the array to be allocated on the device * @param n - number of array elements (int, size_t) - * + * * @tparam T - Array element type * @tparam I - Array index type * @@ -61,13 +62,13 @@ namespace ReSolve /** * @brief allocates buffer v onto device. - * + * * The difference from the array is that buffer size is required in bytes, * not number of elements. * * @param v - pointer to the buffer to be allocated on the device * @param n - size of the buffer in bytes - * + * * @tparam T - Buffer element data type type (typically void) * @tparam I - Buffer size type (typically size_t) * @@ -84,7 +85,7 @@ namespace ReSolve * * @param v - pointer to the array to be allocated on the device * @param n - number of the array elements to be set to zero - * + * * @tparam T - Array element type * @tparam I - Array index type * @@ -102,7 +103,7 @@ namespace ReSolve * @param v - pointer to the array to be allocated on the device * @param c - value to set all array elements * @param n - number of the array elements to be set to zero - * + * * @tparam T - Array element type * @tparam I - Array index type * @@ -115,7 +116,7 @@ namespace ReSolve return checkHipErrors(0); } - /** + /** * @brief Copies array `src` from device to the array `dst` on the host. * * @param[in] n - size of src array @@ -165,8 +166,7 @@ namespace ReSolve { return checkHipErrors(hipMemcpy(dst, src, sizeof(T) * n, hipMemcpyHostToDevice)); } - }; - } + } // namespace memory -} //namespace ReSolve +} // namespace ReSolve diff --git a/resolve/hip/hip_check_errors.hpp b/resolve/hip/hip_check_errors.hpp index 1f483d35a..06b9e422c 100644 --- a/resolve/hip/hip_check_errors.hpp +++ b/resolve/hip/hip_check_errors.hpp @@ -1,9 +1,9 @@ /** * @file hip_check_errors.hpp - * + * * Contains macro to get error code from CUDA functions and to stream * appropriate error output to Re::Solve's logger. - * + * * @author Kasia Swirydowicz * @author Slaven Peles */ @@ -12,17 +12,19 @@ #include template -int check(T result, - char const *const func, - const char *const file, - int const line) +int check(T result, + char const* const func, + const char* const file, + int const line) { - if (result) { + if (result) + { ReSolve::io::Logger::error() << "HIP error in function " - << func << " at " << file << ":" << line + << func << " at " << file << ":" << line << ", error# " << result << "\n"; return -1; } return 0; } + #define checkHipErrors(val) check((val), #val, __FILE__, __LINE__) diff --git a/resolve/lusol/lusol.hpp b/resolve/lusol/lusol.hpp index 265d6c7a5..9bd83e26a 100644 --- a/resolve/lusol/lusol.hpp +++ b/resolve/lusol/lusol.hpp @@ -4,14 +4,15 @@ // TODO: should we attach documentation comments to these or is there no point? -extern "C" { +extern "C" +{ void lu1fac(ReSolve::index_type* m, ReSolve::index_type* n, ReSolve::index_type* nelem, ReSolve::index_type* lena, ReSolve::index_type* luparm, - ReSolve::real_type* parmlu, - ReSolve::real_type* a, + ReSolve::real_type* parmlu, + ReSolve::real_type* a, ReSolve::index_type* indc, ReSolve::index_type* indr, ReSolve::index_type* p, @@ -24,18 +25,18 @@ extern "C" { ReSolve::index_type* iqloc, ReSolve::index_type* ipinv, ReSolve::index_type* iqinv, - ReSolve::real_type* w, + ReSolve::real_type* w, ReSolve::index_type* inform); void lu6sol(ReSolve::index_type* mode, ReSolve::index_type* m, ReSolve::index_type* n, - ReSolve::real_type* v, - ReSolve::real_type* w, + ReSolve::real_type* v, + ReSolve::real_type* w, ReSolve::index_type* lena, ReSolve::index_type* luparm, - ReSolve::real_type* parmlu, - ReSolve::real_type* a, + ReSolve::real_type* parmlu, + ReSolve::real_type* a, ReSolve::index_type* indc, ReSolve::index_type* indr, ReSolve::index_type* p, diff --git a/resolve/matrix/Coo.cpp b/resolve/matrix/Coo.cpp index 23c3afac7..3e883a285 100644 --- a/resolve/matrix/Coo.cpp +++ b/resolve/matrix/Coo.cpp @@ -1,11 +1,11 @@ -#include // <-- includes memcpy -#include -#include +#include "Coo.hpp" + #include +#include // <-- includes memcpy +#include +#include #include -#include "Coo.hpp" - namespace ReSolve { @@ -16,7 +16,8 @@ namespace ReSolve sparse_format_ = TRIPLET; } - matrix::Coo::Coo(index_type n, index_type m, index_type nnz) : Sparse(n, m, nnz) + matrix::Coo::Coo(index_type n, index_type m, index_type nnz) + : Sparse(n, m, nnz) { sparse_format_ = TRIPLET; } @@ -24,8 +25,9 @@ namespace ReSolve matrix::Coo::Coo(index_type n, index_type m, index_type nnz, - bool symmetric, - bool expanded) : Sparse(n, m, nnz, symmetric, expanded) + bool symmetric, + bool expanded) + : Sparse(n, m, nnz, symmetric, expanded) { sparse_format_ = TRIPLET; } @@ -33,14 +35,14 @@ namespace ReSolve /** * @brief Hijacking constructor */ - matrix::Coo::Coo(index_type n, - index_type m, - index_type nnz, - bool symmetric, - bool expanded, - index_type** rows, - index_type** cols, - real_type** vals, + matrix::Coo::Coo(index_type n, + index_type m, + index_type nnz, + bool symmetric, + bool expanded, + index_type** rows, + index_type** cols, + real_type** vals, memory::MemorySpace memspaceSrc, memory::MemorySpace memspaceDst) : Sparse(n, m, nnz, symmetric, expanded) @@ -48,81 +50,95 @@ namespace ReSolve sparse_format_ = TRIPLET; int control = -1; - if ((memspaceSrc == memory::HOST) && (memspaceDst == memory::HOST)) { control = 0;} - if ((memspaceSrc == memory::HOST) && (memspaceDst == memory::DEVICE)){ control = 1;} - if ((memspaceSrc == memory::DEVICE) && (memspaceDst == memory::HOST)) { control = 2;} - if ((memspaceSrc == memory::DEVICE) && (memspaceDst == memory::DEVICE)){ control = 3;} + if ((memspaceSrc == memory::HOST) && (memspaceDst == memory::HOST)) + { + control = 0; + } + if ((memspaceSrc == memory::HOST) && (memspaceDst == memory::DEVICE)) + { + control = 1; + } + if ((memspaceSrc == memory::DEVICE) && (memspaceDst == memory::HOST)) + { + control = 2; + } + if ((memspaceSrc == memory::DEVICE) && (memspaceDst == memory::DEVICE)) + { + control = 3; + } switch (control) { - case 0: // cpu->cpu - // Set host data - h_row_data_ = *rows; - h_col_data_ = *cols; - h_val_data_ = *vals; - h_data_updated_ = true; - owns_cpu_values_ = true; - owns_cpu_sparsity_pattern_ = true; - // Make sure there is no device data. - if (d_row_data_ || d_col_data_ || d_val_data_) { - out::error() << "Device data unexpectedly allocated. " - << "Possible bug in matrix::Sparse class.\n"; - } - // Hijack data from the source - *rows = nullptr; - *cols = nullptr; - *vals = nullptr; - break; - case 2: // gpu->cpu - // Set device data and copy it to host - d_row_data_ = *rows; - d_col_data_ = *cols; - d_val_data_ = *vals; - d_data_updated_ = true; - owns_gpu_values_ = true; - owns_gpu_sparsity_pattern_ = true; - syncData(memspaceDst); - // Hijack data from the source - *rows = nullptr; - *cols = nullptr; - *vals = nullptr; - break; - case 1: // cpu->gpu - // Set host data and copy it to device - h_row_data_ = *rows; - h_col_data_ = *cols; - h_val_data_ = *vals; - h_data_updated_ = true; - owns_cpu_values_ = true; - owns_cpu_sparsity_pattern_ = true; - syncData(memspaceDst); - // Hijack data from the source - *rows = nullptr; - *cols = nullptr; - *vals = nullptr; - break; - case 3: // gpu->gpu - // Set device data - d_row_data_ = *rows; - d_col_data_ = *cols; - d_val_data_ = *vals; - d_data_updated_ = true; - owns_gpu_values_ = true; - owns_gpu_sparsity_pattern_ = true; - // Make sure there is no device data. - if (h_row_data_ || h_col_data_ || h_val_data_) { - out::error() << "Host data unexpectedly allocated. " - << "Possible bug in matrix::Sparse class.\n"; - } - // Hijack data from the source - *rows = nullptr; - *cols = nullptr; - *vals = nullptr; - break; - default: - out::error() << "Coo constructor failed! " - << "Possible bug in memory spaces setting.\n"; - break; + case 0: // cpu->cpu + // Set host data + h_row_data_ = *rows; + h_col_data_ = *cols; + h_val_data_ = *vals; + h_data_updated_ = true; + owns_cpu_values_ = true; + owns_cpu_sparsity_pattern_ = true; + // Make sure there is no device data. + if (d_row_data_ || d_col_data_ || d_val_data_) + { + out::error() << "Device data unexpectedly allocated. " + << "Possible bug in matrix::Sparse class.\n"; + } + // Hijack data from the source + *rows = nullptr; + *cols = nullptr; + *vals = nullptr; + break; + case 2: // gpu->cpu + // Set device data and copy it to host + d_row_data_ = *rows; + d_col_data_ = *cols; + d_val_data_ = *vals; + d_data_updated_ = true; + owns_gpu_values_ = true; + owns_gpu_sparsity_pattern_ = true; + syncData(memspaceDst); + // Hijack data from the source + *rows = nullptr; + *cols = nullptr; + *vals = nullptr; + break; + case 1: // cpu->gpu + // Set host data and copy it to device + h_row_data_ = *rows; + h_col_data_ = *cols; + h_val_data_ = *vals; + h_data_updated_ = true; + owns_cpu_values_ = true; + owns_cpu_sparsity_pattern_ = true; + syncData(memspaceDst); + // Hijack data from the source + *rows = nullptr; + *cols = nullptr; + *vals = nullptr; + break; + case 3: // gpu->gpu + // Set device data + d_row_data_ = *rows; + d_col_data_ = *cols; + d_val_data_ = *vals; + d_data_updated_ = true; + owns_gpu_values_ = true; + owns_gpu_sparsity_pattern_ = true; + // Make sure there is no device data. + if (h_row_data_ || h_col_data_ || h_val_data_) + { + out::error() << "Host data unexpectedly allocated. " + << "Possible bug in matrix::Sparse class.\n"; + } + // Hijack data from the source + *rows = nullptr; + *cols = nullptr; + *vals = nullptr; + break; + default: + out::error() << "Coo constructor failed! " + << "Possible bug in memory spaces setting.\n"; + break; } } @@ -134,13 +150,14 @@ namespace ReSolve { using namespace ReSolve::memory; - switch (memspace) { - case HOST: - return this->h_row_data_; - case DEVICE: - return this->d_row_data_; - default: - return nullptr; + switch (memspace) + { + case HOST: + return this->h_row_data_; + case DEVICE: + return this->d_row_data_; + default: + return nullptr; } } @@ -148,13 +165,14 @@ namespace ReSolve { using namespace ReSolve::memory; - switch (memspace) { - case HOST: - return this->h_col_data_; - case DEVICE: - return this->d_col_data_; - default: - return nullptr; + switch (memspace) + { + case HOST: + return this->h_col_data_; + case DEVICE: + return this->d_col_data_; + default: + return nullptr; } } @@ -162,99 +180,117 @@ namespace ReSolve { using namespace ReSolve::memory; - switch (memspace) { - case HOST: - return this->h_val_data_; - case DEVICE: - return this->d_val_data_; - default: - return nullptr; + switch (memspace) + { + case HOST: + return this->h_val_data_; + case DEVICE: + return this->d_val_data_; + default: + return nullptr; } } - int matrix::Coo::copyDataFrom(const index_type* row_data, - const index_type* col_data, - const real_type* val_data, + int matrix::Coo::copyDataFrom(const index_type* row_data, + const index_type* col_data, + const real_type* val_data, memory::MemorySpace memspaceIn, memory::MemorySpace memspaceOut) { - //four cases (for now) + // four cases (for now) index_type nnz_current = nnz_; setNotUpdated(); - int control=-1; - if ((memspaceIn == memory::HOST) && (memspaceOut == memory::HOST)){ control = 0;} - if ((memspaceIn == memory::HOST) && ((memspaceOut == memory::DEVICE))){ control = 1;} - if (((memspaceIn == memory::DEVICE)) && (memspaceOut == memory::HOST)){ control = 2;} - if (((memspaceIn == memory::DEVICE)) && ((memspaceOut == memory::DEVICE))){ control = 3;} - - if (memspaceOut == memory::HOST) { - //check if cpu data allocated - assert(((h_row_data_ == nullptr) == (h_col_data_ == nullptr)) && - "In Coo::copyDataFrom one of host row or column data is null!\n"); - - if ((h_row_data_ == nullptr) && (h_col_data_ == nullptr)) { - this->h_row_data_ = new index_type[nnz_current]; - this->h_col_data_ = new index_type[nnz_current]; + int control = -1; + if ((memspaceIn == memory::HOST) && (memspaceOut == memory::HOST)) + { + control = 0; + } + if ((memspaceIn == memory::HOST) && ((memspaceOut == memory::DEVICE))) + { + control = 1; + } + if (((memspaceIn == memory::DEVICE)) && (memspaceOut == memory::HOST)) + { + control = 2; + } + if (((memspaceIn == memory::DEVICE)) && ((memspaceOut == memory::DEVICE))) + { + control = 3; + } + + if (memspaceOut == memory::HOST) + { + // check if cpu data allocated + assert(((h_row_data_ == nullptr) == (h_col_data_ == nullptr)) && "In Coo::copyDataFrom one of host row or column data is null!\n"); + + if ((h_row_data_ == nullptr) && (h_col_data_ == nullptr)) + { + this->h_row_data_ = new index_type[nnz_current]; + this->h_col_data_ = new index_type[nnz_current]; owns_cpu_sparsity_pattern_ = true; } - if (h_val_data_ == nullptr) { + if (h_val_data_ == nullptr) + { this->h_val_data_ = new real_type[nnz_current]; - owns_cpu_values_ = true; + owns_cpu_values_ = true; } } - if (memspaceOut == memory::DEVICE) { - //check if cuda data allocated - assert(((d_row_data_ == nullptr) == (d_col_data_ == nullptr)) && - "In Coo::copyDataFrom one of device row or column data is null!\n"); + if (memspaceOut == memory::DEVICE) + { + // check if cuda data allocated + assert(((d_row_data_ == nullptr) == (d_col_data_ == nullptr)) && "In Coo::copyDataFrom one of device row or column data is null!\n"); - if ((d_row_data_ == nullptr) && (d_col_data_ == nullptr)) { + if ((d_row_data_ == nullptr) && (d_col_data_ == nullptr)) + { mem_.allocateArrayOnDevice(&d_row_data_, nnz_current); mem_.allocateArrayOnDevice(&d_col_data_, nnz_current); owns_gpu_sparsity_pattern_ = true; } - if (d_val_data_ == nullptr) { + if (d_val_data_ == nullptr) + { mem_.allocateArrayOnDevice(&d_val_data_, nnz_current); owns_gpu_values_ = true; } } - switch(control) { - case 0: //cpu->cpu - mem_.copyArrayHostToHost(h_row_data_, row_data, nnz_current); - mem_.copyArrayHostToHost(h_col_data_, col_data, nnz_current); - mem_.copyArrayHostToHost(h_val_data_, val_data, nnz_current); - h_data_updated_ = true; - break; - case 2://gpu->cpu - mem_.copyArrayDeviceToHost(h_row_data_, row_data, nnz_current); - mem_.copyArrayDeviceToHost(h_col_data_, col_data, nnz_current); - mem_.copyArrayDeviceToHost(h_val_data_, val_data, nnz_current); - h_data_updated_ = true; - break; - case 1://cpu->gpu - mem_.copyArrayHostToDevice(d_row_data_, row_data, nnz_current); - mem_.copyArrayHostToDevice(d_col_data_, col_data, nnz_current); - mem_.copyArrayHostToDevice(d_val_data_, val_data, nnz_current); - d_data_updated_ = true; - break; - case 3://gpu->gpu - mem_.copyArrayDeviceToDevice(d_row_data_, row_data, nnz_current); - mem_.copyArrayDeviceToDevice(d_col_data_, col_data, nnz_current); - mem_.copyArrayDeviceToDevice(d_val_data_, val_data, nnz_current); - d_data_updated_ = true; - break; - default: - return -1; + switch (control) + { + case 0: // cpu->cpu + mem_.copyArrayHostToHost(h_row_data_, row_data, nnz_current); + mem_.copyArrayHostToHost(h_col_data_, col_data, nnz_current); + mem_.copyArrayHostToHost(h_val_data_, val_data, nnz_current); + h_data_updated_ = true; + break; + case 2: // gpu->cpu + mem_.copyArrayDeviceToHost(h_row_data_, row_data, nnz_current); + mem_.copyArrayDeviceToHost(h_col_data_, col_data, nnz_current); + mem_.copyArrayDeviceToHost(h_val_data_, val_data, nnz_current); + h_data_updated_ = true; + break; + case 1: // cpu->gpu + mem_.copyArrayHostToDevice(d_row_data_, row_data, nnz_current); + mem_.copyArrayHostToDevice(d_col_data_, col_data, nnz_current); + mem_.copyArrayHostToDevice(d_val_data_, val_data, nnz_current); + d_data_updated_ = true; + break; + case 3: // gpu->gpu + mem_.copyArrayDeviceToDevice(d_row_data_, row_data, nnz_current); + mem_.copyArrayDeviceToDevice(d_col_data_, col_data, nnz_current); + mem_.copyArrayDeviceToDevice(d_val_data_, val_data, nnz_current); + d_data_updated_ = true; + break; + default: + return -1; } return 0; } - int matrix::Coo::copyDataFrom(const index_type* row_data, - const index_type* col_data, - const real_type* val_data, - index_type new_nnz, + int matrix::Coo::copyDataFrom(const index_type* row_data, + const index_type* col_data, + const real_type* val_data, + index_type new_nnz, memory::MemorySpace memspaceIn, memory::MemorySpace memspaceOut) { @@ -266,9 +302,10 @@ namespace ReSolve int matrix::Coo::allocateMatrixData(memory::MemorySpace memspace) { index_type nnz_current = nnz_; - destroyMatrixData(memspace);//just in case + destroyMatrixData(memspace); // just in case - if (memspace == memory::HOST) { + if (memspace == memory::HOST) + { this->h_row_data_ = new index_type[nnz_current]; std::fill(h_row_data_, h_row_data_ + nnz_current, 0); this->h_col_data_ = new index_type[nnz_current]; @@ -276,16 +313,17 @@ namespace ReSolve this->h_val_data_ = new real_type[nnz_current]; std::fill(h_val_data_, h_val_data_ + nnz_current, 0.0); owns_cpu_sparsity_pattern_ = true; - owns_cpu_values_ = true; + owns_cpu_values_ = true; return 0; } - if (memspace == memory::DEVICE) { + if (memspace == memory::DEVICE) + { mem_.allocateArrayOnDevice(&d_row_data_, nnz_current); mem_.allocateArrayOnDevice(&d_col_data_, nnz_current); mem_.allocateArrayOnDevice(&d_val_data_, nnz_current); owns_gpu_sparsity_pattern_ = true; - owns_gpu_values_ = true; + owns_gpu_values_ = true; return 0; } return -1; @@ -306,65 +344,72 @@ namespace ReSolve { using namespace ReSolve::memory; - switch (memspace) { - case HOST: - assert(((h_row_data_ == nullptr) == (h_col_data_ == nullptr)) && - "In Coo::syncData one of host row or column data is null!\n"); - - if (h_data_updated_) { - out::error() << "Coo::syncData is trying to sync host, but host already up to date!\n"; - assert(!h_data_updated_); - return 1; - } - if (!d_data_updated_) { - out::error() << "Coo::syncData is trying to sync host with device, but device is out of date!\n" - << "See Coo::syncData documentation\n."; - assert(d_data_updated_); - } - if ((h_row_data_ == nullptr) && (h_col_data_ == nullptr)) { - h_row_data_ = new index_type[nnz_]; - h_col_data_ = new index_type[nnz_]; - owns_cpu_sparsity_pattern_ = true; - } - if (h_val_data_ == nullptr) { - h_val_data_ = new real_type[nnz_]; - owns_cpu_values_ = true; - } - mem_.copyArrayDeviceToHost(h_row_data_, d_row_data_, nnz_); - mem_.copyArrayDeviceToHost(h_col_data_, d_col_data_, nnz_); - mem_.copyArrayDeviceToHost(h_val_data_, d_val_data_, nnz_); - h_data_updated_ = true; - return 0; - case DEVICE: - assert(((d_row_data_ == nullptr) == (d_col_data_ == nullptr)) && - "In Coo::syncData one of device row or column data is null!\n"); - - if (d_data_updated_) { - out::error() << "Coo::syncData is trying to sync device, but device already up to date!\n"; - assert(!d_data_updated_); - return 1; - } - if (!h_data_updated_) { - out::error() << "Coo::syncData is trying to sync device with host, but host is out of date!\n" - << "See Coo::syncData documentation\n."; - assert(h_data_updated_); - } - if ((d_row_data_ == nullptr) && (d_col_data_ == nullptr)) { - mem_.allocateArrayOnDevice(&d_row_data_, nnz_); - mem_.allocateArrayOnDevice(&d_col_data_, nnz_); - owns_gpu_sparsity_pattern_ = true; - } - if (d_val_data_ == nullptr) { - mem_.allocateArrayOnDevice(&d_val_data_, nnz_); - owns_gpu_values_ = true; - } - mem_.copyArrayHostToDevice(d_row_data_, h_row_data_, nnz_); - mem_.copyArrayHostToDevice(d_col_data_, h_col_data_, nnz_); - mem_.copyArrayHostToDevice(d_val_data_, h_val_data_, nnz_); - d_data_updated_ = true; - return 0; - default: + switch (memspace) + { + case HOST: + assert(((h_row_data_ == nullptr) == (h_col_data_ == nullptr)) && "In Coo::syncData one of host row or column data is null!\n"); + + if (h_data_updated_) + { + out::error() << "Coo::syncData is trying to sync host, but host already up to date!\n"; + assert(!h_data_updated_); + return 1; + } + if (!d_data_updated_) + { + out::error() << "Coo::syncData is trying to sync host with device, but device is out of date!\n" + << "See Coo::syncData documentation\n."; + assert(d_data_updated_); + } + if ((h_row_data_ == nullptr) && (h_col_data_ == nullptr)) + { + h_row_data_ = new index_type[nnz_]; + h_col_data_ = new index_type[nnz_]; + owns_cpu_sparsity_pattern_ = true; + } + if (h_val_data_ == nullptr) + { + h_val_data_ = new real_type[nnz_]; + owns_cpu_values_ = true; + } + mem_.copyArrayDeviceToHost(h_row_data_, d_row_data_, nnz_); + mem_.copyArrayDeviceToHost(h_col_data_, d_col_data_, nnz_); + mem_.copyArrayDeviceToHost(h_val_data_, d_val_data_, nnz_); + h_data_updated_ = true; + return 0; + case DEVICE: + assert(((d_row_data_ == nullptr) == (d_col_data_ == nullptr)) && "In Coo::syncData one of device row or column data is null!\n"); + + if (d_data_updated_) + { + out::error() << "Coo::syncData is trying to sync device, but device already up to date!\n"; + assert(!d_data_updated_); return 1; + } + if (!h_data_updated_) + { + out::error() << "Coo::syncData is trying to sync device with host, but host is out of date!\n" + << "See Coo::syncData documentation\n."; + assert(h_data_updated_); + } + if ((d_row_data_ == nullptr) && (d_col_data_ == nullptr)) + { + mem_.allocateArrayOnDevice(&d_row_data_, nnz_); + mem_.allocateArrayOnDevice(&d_col_data_, nnz_); + owns_gpu_sparsity_pattern_ = true; + } + if (d_val_data_ == nullptr) + { + mem_.allocateArrayOnDevice(&d_val_data_, nnz_); + owns_gpu_values_ = true; + } + mem_.copyArrayHostToDevice(d_row_data_, h_row_data_, nnz_); + mem_.copyArrayHostToDevice(d_col_data_, h_col_data_, nnz_); + mem_.copyArrayHostToDevice(d_val_data_, h_val_data_, nnz_); + d_data_updated_ = true; + return 0; + default: + return 1; } // switch } @@ -376,7 +421,8 @@ namespace ReSolve void matrix::Coo::print(std::ostream& out, index_type indexing_base) { out << std::scientific << std::setprecision(std::numeric_limits::digits10); - for(int i = 0; i < nnz_; ++i) { + for (int i = 0; i < nnz_; ++i) + { out << h_row_data_[i] + indexing_base << " " << h_col_data_[i] + indexing_base << " " << h_val_data_[i] << "\n"; diff --git a/resolve/matrix/Coo.hpp b/resolve/matrix/Coo.hpp index aca181e56..7f37cbd2e 100644 --- a/resolve/matrix/Coo.hpp +++ b/resolve/matrix/Coo.hpp @@ -1,43 +1,46 @@ #pragma once #include "Sparse.hpp" -namespace ReSolve { namespace matrix { - - class Coo : public Sparse +namespace ReSolve +{ + namespace matrix { + + class Coo : public Sparse + { public: Coo(); Coo(index_type n, index_type m, index_type nnz); - Coo(index_type n, - index_type m, - index_type nnz, - bool symmetric, - bool expanded); Coo(index_type n, index_type m, index_type nnz, - bool symmetric, - bool expanded, - index_type** rows, - index_type** cols, - real_type** vals, + bool symmetric, + bool expanded); + Coo(index_type n, + index_type m, + index_type nnz, + bool symmetric, + bool expanded, + index_type** rows, + index_type** cols, + real_type** vals, memory::MemorySpace memspaceSrc, memory::MemorySpace memspaceDst); ~Coo(); virtual index_type* getRowData(memory::MemorySpace memspace); virtual index_type* getColData(memory::MemorySpace memspace); - virtual real_type* getValues( memory::MemorySpace memspace); + virtual real_type* getValues(memory::MemorySpace memspace); - virtual int copyDataFrom(const index_type* row_data, - const index_type* col_data, - const real_type* val_data, + virtual int copyDataFrom(const index_type* row_data, + const index_type* col_data, + const real_type* val_data, memory::MemorySpace memspaceIn, memory::MemorySpace memspaceOut); - virtual int copyDataFrom(const index_type* row_data, - const index_type* col_data, - const real_type* val_data, - index_type new_nnz, + virtual int copyDataFrom(const index_type* row_data, + const index_type* col_data, + const real_type* val_data, + index_type new_nnz, memory::MemorySpace memspaceIn, memory::MemorySpace memspaceOut); @@ -46,6 +49,7 @@ namespace ReSolve { namespace matrix { virtual void print(std::ostream& file_out = std::cout, index_type indexing_base = 0); virtual int syncData(memory::MemorySpace memspaceOut); - }; + }; -}} // namespace ReSolve::matrix + } // namespace matrix +} // namespace ReSolve diff --git a/resolve/matrix/Csc.cpp b/resolve/matrix/Csc.cpp index 843d4326d..f5d90d611 100644 --- a/resolve/matrix/Csc.cpp +++ b/resolve/matrix/Csc.cpp @@ -1,9 +1,10 @@ -#include // <-- includes memcpy -#include +#include "Csc.hpp" + #include +#include // <-- includes memcpy +#include #include -#include "Csc.hpp" namespace ReSolve { @@ -14,7 +15,8 @@ namespace ReSolve sparse_format_ = COMPRESSED_SPARSE_COLUMN; } - matrix::Csc::Csc(index_type n, index_type m, index_type nnz) : Sparse(n, m, nnz) + matrix::Csc::Csc(index_type n, index_type m, index_type nnz) + : Sparse(n, m, nnz) { sparse_format_ = COMPRESSED_SPARSE_COLUMN; } @@ -22,8 +24,9 @@ namespace ReSolve matrix::Csc::Csc(index_type n, index_type m, index_type nnz, - bool symmetric, - bool expanded) : Sparse(n, m, nnz, symmetric, expanded) + bool symmetric, + bool expanded) + : Sparse(n, m, nnz, symmetric, expanded) { sparse_format_ = COMPRESSED_SPARSE_COLUMN; } @@ -36,13 +39,14 @@ namespace ReSolve { using namespace ReSolve::memory; - switch (memspace) { - case HOST: - return this->h_row_data_; - case DEVICE: - return this->d_row_data_; - default: - return nullptr; + switch (memspace) + { + case HOST: + return this->h_row_data_; + case DEVICE: + return this->d_row_data_; + default: + return nullptr; } } @@ -50,13 +54,14 @@ namespace ReSolve { using namespace ReSolve::memory; - switch (memspace) { - case HOST: - return this->h_col_data_; - case DEVICE: - return this->d_col_data_; - default: - return nullptr; + switch (memspace) + { + case HOST: + return this->h_col_data_; + case DEVICE: + return this->d_col_data_; + default: + return nullptr; } } @@ -64,100 +69,117 @@ namespace ReSolve { using namespace ReSolve::memory; - switch (memspace) { - case HOST: - return this->h_val_data_; - case DEVICE: - return this->d_val_data_; - default: - return nullptr; + switch (memspace) + { + case HOST: + return this->h_val_data_; + case DEVICE: + return this->d_val_data_; + default: + return nullptr; } } - int matrix::Csc::copyDataFrom(const index_type* row_data, - const index_type* col_data, - const real_type* val_data, + int matrix::Csc::copyDataFrom(const index_type* row_data, + const index_type* col_data, + const real_type* val_data, memory::MemorySpace memspaceIn, memory::MemorySpace memspaceOut) { index_type nnz_current = nnz_; - //four cases (for now) + // four cases (for now) int control = -1; setNotUpdated(); - if ((memspaceIn == memory::HOST) && (memspaceOut == memory::HOST) ) { control = 0;} - if ((memspaceIn == memory::HOST) && (memspaceOut == memory::DEVICE)) { control = 1;} - if ((memspaceIn == memory::DEVICE) && (memspaceOut == memory::HOST) ) { control = 2;} - if ((memspaceIn == memory::DEVICE) && (memspaceOut == memory::DEVICE)) { control = 3;} + if ((memspaceIn == memory::HOST) && (memspaceOut == memory::HOST)) + { + control = 0; + } + if ((memspaceIn == memory::HOST) && (memspaceOut == memory::DEVICE)) + { + control = 1; + } + if ((memspaceIn == memory::DEVICE) && (memspaceOut == memory::HOST)) + { + control = 2; + } + if ((memspaceIn == memory::DEVICE) && (memspaceOut == memory::DEVICE)) + { + control = 3; + } - if (memspaceOut == memory::HOST) { - //check if cpu data allocated - assert(((h_row_data_ == nullptr) == (h_col_data_ == nullptr)) && - "In Csc::copyDataFrom one of host row or column data is null!\n"); + if (memspaceOut == memory::HOST) + { + // check if cpu data allocated + assert(((h_row_data_ == nullptr) == (h_col_data_ == nullptr)) && "In Csc::copyDataFrom one of host row or column data is null!\n"); - if ((h_col_data_ == nullptr) && (h_row_data_ == nullptr)) { - this->h_col_data_ = new index_type[m_ + 1]; - this->h_row_data_ = new index_type[nnz_current]; + if ((h_col_data_ == nullptr) && (h_row_data_ == nullptr)) + { + this->h_col_data_ = new index_type[m_ + 1]; + this->h_row_data_ = new index_type[nnz_current]; owns_cpu_sparsity_pattern_ = true; } - if (h_val_data_ == nullptr) { + if (h_val_data_ == nullptr) + { this->h_val_data_ = new real_type[nnz_current]; - owns_cpu_values_ = true; + owns_cpu_values_ = true; } } - if (memspaceOut == memory::DEVICE) { - //check if cuda data allocated - assert(((d_row_data_ == nullptr) == (d_col_data_ == nullptr)) && - "In Csc::copyDataFrom one of device row or column data is null!\n"); + if (memspaceOut == memory::DEVICE) + { + // check if cuda data allocated + assert(((d_row_data_ == nullptr) == (d_col_data_ == nullptr)) && "In Csc::copyDataFrom one of device row or column data is null!\n"); - if ((d_col_data_ == nullptr) && (d_row_data_ == nullptr)) { + if ((d_col_data_ == nullptr) && (d_row_data_ == nullptr)) + { mem_.allocateArrayOnDevice(&d_col_data_, m_ + 1); mem_.allocateArrayOnDevice(&d_row_data_, nnz_current); owns_gpu_sparsity_pattern_ = true; } - if (d_val_data_ == nullptr) { + if (d_val_data_ == nullptr) + { mem_.allocateArrayOnDevice(&d_val_data_, nnz_current); owns_gpu_values_ = true; } } - switch(control) { - case 0: //cpu->cpu - mem_.copyArrayHostToHost(h_col_data_, col_data, m_ + 1); - mem_.copyArrayHostToHost(h_row_data_, row_data, nnz_current); - mem_.copyArrayHostToHost(h_val_data_, val_data, nnz_current); - h_data_updated_ = true; - break; - case 2://gpu->cpu - mem_.copyArrayDeviceToHost(h_col_data_, col_data, m_ + 1); - mem_.copyArrayDeviceToHost(h_row_data_, row_data, nnz_current); - mem_.copyArrayDeviceToHost(h_val_data_, val_data, nnz_current); - h_data_updated_ = true; - break; - case 1://cpu->gpu - mem_.copyArrayHostToDevice(d_col_data_, col_data, m_ + 1); - mem_.copyArrayHostToDevice(d_row_data_, row_data, nnz_current); - mem_.copyArrayHostToDevice(d_val_data_, val_data, nnz_current); - d_data_updated_ = true; - break; - case 3://gpu->gpu - mem_.copyArrayDeviceToDevice(d_col_data_, col_data, m_ + 1); - mem_.copyArrayDeviceToDevice(d_row_data_, row_data, nnz_current); - mem_.copyArrayDeviceToDevice(d_val_data_, val_data, nnz_current); - d_data_updated_ = true; - break; - default: - return -1; + switch (control) + { + case 0: // cpu->cpu + mem_.copyArrayHostToHost(h_col_data_, col_data, m_ + 1); + mem_.copyArrayHostToHost(h_row_data_, row_data, nnz_current); + mem_.copyArrayHostToHost(h_val_data_, val_data, nnz_current); + h_data_updated_ = true; + break; + case 2: // gpu->cpu + mem_.copyArrayDeviceToHost(h_col_data_, col_data, m_ + 1); + mem_.copyArrayDeviceToHost(h_row_data_, row_data, nnz_current); + mem_.copyArrayDeviceToHost(h_val_data_, val_data, nnz_current); + h_data_updated_ = true; + break; + case 1: // cpu->gpu + mem_.copyArrayHostToDevice(d_col_data_, col_data, m_ + 1); + mem_.copyArrayHostToDevice(d_row_data_, row_data, nnz_current); + mem_.copyArrayHostToDevice(d_val_data_, val_data, nnz_current); + d_data_updated_ = true; + break; + case 3: // gpu->gpu + mem_.copyArrayDeviceToDevice(d_col_data_, col_data, m_ + 1); + mem_.copyArrayDeviceToDevice(d_row_data_, row_data, nnz_current); + mem_.copyArrayDeviceToDevice(d_val_data_, val_data, nnz_current); + d_data_updated_ = true; + break; + default: + return -1; } return 0; - } - int matrix::Csc::copyDataFrom(const index_type* row_data, - const index_type* col_data, - const real_type* val_data, - index_type new_nnz, + int matrix::Csc::copyDataFrom(const index_type* row_data, + const index_type* col_data, + const real_type* val_data, + index_type new_nnz, memory::MemorySpace memspaceIn, memory::MemorySpace memspaceOut) { @@ -169,9 +191,10 @@ namespace ReSolve int matrix::Csc::allocateMatrixData(memory::MemorySpace memspace) { index_type nnz_current = nnz_; - destroyMatrixData(memspace);//just in case + destroyMatrixData(memspace); // just in case - if (memspace == memory::HOST) { + if (memspace == memory::HOST) + { this->h_col_data_ = new index_type[m_ + 1]; std::fill(h_col_data_, h_col_data_ + m_ + 1, 0); this->h_row_data_ = new index_type[nnz_current]; @@ -179,16 +202,17 @@ namespace ReSolve this->h_val_data_ = new real_type[nnz_current]; std::fill(h_val_data_, h_val_data_ + nnz_current, 0.0); owns_cpu_sparsity_pattern_ = true; - owns_cpu_values_ = true; + owns_cpu_values_ = true; return 0; } - if (memspace == memory::DEVICE) { - mem_.allocateArrayOnDevice(&d_col_data_, m_ + 1); + if (memspace == memory::DEVICE) + { + mem_.allocateArrayOnDevice(&d_col_data_, m_ + 1); mem_.allocateArrayOnDevice(&d_row_data_, nnz_current); mem_.allocateArrayOnDevice(&d_val_data_, nnz_current); owns_gpu_sparsity_pattern_ = true; - owns_gpu_values_ = true; + owns_gpu_values_ = true; return 0; } return -1; @@ -209,65 +233,72 @@ namespace ReSolve { using namespace ReSolve::memory; - switch(memspace) { - case HOST: - assert(((h_row_data_ == nullptr) == (h_col_data_ == nullptr)) && - "In Csc::syncData one of host row or column data is null!\n"); + switch (memspace) + { + case HOST: + assert(((h_row_data_ == nullptr) == (h_col_data_ == nullptr)) && "In Csc::syncData one of host row or column data is null!\n"); - if (h_data_updated_) { - out::error() << "Csc::syncData is trying to sync host, but host already up to date!\n"; - assert(!h_data_updated_); - return 1; - } - if (!d_data_updated_) { - out::error() << "Csc::syncData is trying to sync host with device, but device is out of date!\n" - << "See Csc::syncData documentation\n."; - assert(d_data_updated_); - } - if ((h_col_data_ == nullptr) && (h_row_data_ == nullptr)) { - h_col_data_ = new index_type[m_ + 1]; - h_row_data_ = new index_type[nnz_]; - owns_cpu_sparsity_pattern_ = true; - } - if (h_val_data_ == nullptr) { - h_val_data_ = new real_type[nnz_]; - owns_cpu_values_ = true; - } - mem_.copyArrayDeviceToHost(h_col_data_, d_col_data_, m_ + 1); - mem_.copyArrayDeviceToHost(h_row_data_, d_row_data_, nnz_); - mem_.copyArrayDeviceToHost(h_val_data_, d_val_data_, nnz_); - h_data_updated_ = true; - return 0; - case DEVICE: - assert(((d_row_data_ == nullptr) == (d_col_data_ == nullptr)) && - "In Csc::syncData one of device row or column data is null!\n"); + if (h_data_updated_) + { + out::error() << "Csc::syncData is trying to sync host, but host already up to date!\n"; + assert(!h_data_updated_); + return 1; + } + if (!d_data_updated_) + { + out::error() << "Csc::syncData is trying to sync host with device, but device is out of date!\n" + << "See Csc::syncData documentation\n."; + assert(d_data_updated_); + } + if ((h_col_data_ == nullptr) && (h_row_data_ == nullptr)) + { + h_col_data_ = new index_type[m_ + 1]; + h_row_data_ = new index_type[nnz_]; + owns_cpu_sparsity_pattern_ = true; + } + if (h_val_data_ == nullptr) + { + h_val_data_ = new real_type[nnz_]; + owns_cpu_values_ = true; + } + mem_.copyArrayDeviceToHost(h_col_data_, d_col_data_, m_ + 1); + mem_.copyArrayDeviceToHost(h_row_data_, d_row_data_, nnz_); + mem_.copyArrayDeviceToHost(h_val_data_, d_val_data_, nnz_); + h_data_updated_ = true; + return 0; + case DEVICE: + assert(((d_row_data_ == nullptr) == (d_col_data_ == nullptr)) && "In Csc::syncData one of device row or column data is null!\n"); - if (d_data_updated_) { - out::error() << "Csc::syncData is trying to sync device, but device already up to date!\n"; - assert(!d_data_updated_); - return 1; - } - if (!h_data_updated_) { - out::error() << "Csc::syncData is trying to sync device with host, but host is out of date!\n" - << "See Csc::syncData documentation\n."; - assert(h_data_updated_); - } - if ((d_col_data_ == nullptr) && (d_row_data_ == nullptr)) { - mem_.allocateArrayOnDevice(&d_col_data_, m_ + 1); - mem_.allocateArrayOnDevice(&d_row_data_, nnz_); - owns_gpu_sparsity_pattern_ = true; - } - if (d_val_data_ == nullptr) { - mem_.allocateArrayOnDevice(&d_val_data_, nnz_); - owns_gpu_values_ = true; - } - mem_.copyArrayHostToDevice(d_col_data_, h_col_data_, m_ + 1); - mem_.copyArrayHostToDevice(d_row_data_, h_row_data_, nnz_); - mem_.copyArrayHostToDevice(d_val_data_, h_val_data_, nnz_); - d_data_updated_ = true; - return 0; - default: + if (d_data_updated_) + { + out::error() << "Csc::syncData is trying to sync device, but device already up to date!\n"; + assert(!d_data_updated_); return 1; + } + if (!h_data_updated_) + { + out::error() << "Csc::syncData is trying to sync device with host, but host is out of date!\n" + << "See Csc::syncData documentation\n."; + assert(h_data_updated_); + } + if ((d_col_data_ == nullptr) && (d_row_data_ == nullptr)) + { + mem_.allocateArrayOnDevice(&d_col_data_, m_ + 1); + mem_.allocateArrayOnDevice(&d_row_data_, nnz_); + owns_gpu_sparsity_pattern_ = true; + } + if (d_val_data_ == nullptr) + { + mem_.allocateArrayOnDevice(&d_val_data_, nnz_); + owns_gpu_values_ = true; + } + mem_.copyArrayHostToDevice(d_col_data_, h_col_data_, m_ + 1); + mem_.copyArrayHostToDevice(d_row_data_, h_row_data_, nnz_); + mem_.copyArrayHostToDevice(d_val_data_, h_val_data_, nnz_); + d_data_updated_ = true; + return 0; + default: + return 1; } // switch } @@ -279,10 +310,12 @@ namespace ReSolve void matrix::Csc::print(std::ostream& out, index_type indexing_base) { out << std::scientific << std::setprecision(std::numeric_limits::digits10); - for(index_type i = 0; i < m_; ++i) { - for (index_type j = h_col_data_[i]; j < h_col_data_[i+1]; ++j) { + for (index_type i = 0; i < m_; ++i) + { + for (index_type j = h_col_data_[i]; j < h_col_data_[i + 1]; ++j) + { out << h_row_data_[j] + indexing_base << " " - << i + indexing_base << " " + << i + indexing_base << " " << h_val_data_[j] << "\n"; } } diff --git a/resolve/matrix/Csc.hpp b/resolve/matrix/Csc.hpp index 9cf8f02fb..91708dba7 100644 --- a/resolve/matrix/Csc.hpp +++ b/resolve/matrix/Csc.hpp @@ -1,41 +1,45 @@ #pragma once #include "Sparse.hpp" -namespace ReSolve { namespace matrix { - - class Csc : public Sparse +namespace ReSolve +{ + namespace matrix { + + class Csc : public Sparse + { public: Csc(); Csc(index_type n, index_type m, index_type nnz); - Csc(index_type n, - index_type m, + Csc(index_type n, + index_type m, index_type nnz, - bool symmetric, - bool expanded); + bool symmetric, + bool expanded); ~Csc(); virtual index_type* getRowData(memory::MemorySpace memspace); virtual index_type* getColData(memory::MemorySpace memspace); - virtual real_type* getValues( memory::MemorySpace memspace); + virtual real_type* getValues(memory::MemorySpace memspace); - virtual int copyDataFrom(const index_type* row_data, - const index_type* col_data, - const real_type* val_data, + virtual int copyDataFrom(const index_type* row_data, + const index_type* col_data, + const real_type* val_data, memory::MemorySpace memspaceIn, - memory::MemorySpace memspaceOut); - virtual int copyDataFrom(const index_type* row_data, - const index_type* col_data, - const real_type* val_data, - index_type new_nnz, + memory::MemorySpace memspaceOut); + virtual int copyDataFrom(const index_type* row_data, + const index_type* col_data, + const real_type* val_data, + index_type new_nnz, memory::MemorySpace memspaceIn, - memory::MemorySpace memspaceOut); + memory::MemorySpace memspaceOut); virtual int allocateMatrixData(memory::MemorySpace memspace); virtual void print(std::ostream& file_out = std::cout, index_type indexing_base = 0); virtual int syncData(memory::MemorySpace memspaceOut); - }; + }; -}} // namespace ReSolve::matrix + } // namespace matrix +} // namespace ReSolve diff --git a/resolve/matrix/Csr.cpp b/resolve/matrix/Csr.cpp index 08aa6d49f..122bec6fb 100644 --- a/resolve/matrix/Csr.cpp +++ b/resolve/matrix/Csr.cpp @@ -1,9 +1,10 @@ -#include // <-- includes memcpy +#include "Csr.hpp" + #include #include +#include // <-- includes memcpy #include -#include "Csr.hpp" #include "Coo.hpp" #include @@ -16,7 +17,8 @@ namespace ReSolve sparse_format_ = COMPRESSED_SPARSE_ROW; } - matrix::Csr::Csr(index_type n, index_type m, index_type nnz) : Sparse(n, m, nnz) + matrix::Csr::Csr(index_type n, index_type m, index_type nnz) + : Sparse(n, m, nnz) { sparse_format_ = COMPRESSED_SPARSE_ROW; } @@ -24,8 +26,9 @@ namespace ReSolve matrix::Csr::Csr(index_type n, index_type m, index_type nnz, - bool symmetric, - bool expanded) : Sparse(n, m, nnz, symmetric, expanded) + bool symmetric, + bool expanded) + : Sparse(n, m, nnz, symmetric, expanded) { sparse_format_ = COMPRESSED_SPARSE_ROW; } @@ -44,14 +47,14 @@ namespace ReSolve * @param[in] memspaceSrc * @param[in] memspaceDst */ - matrix::Csr::Csr(index_type n, - index_type m, - index_type nnz, - bool symmetric, - bool expanded, - index_type** rows, - index_type** cols, - real_type** vals, + matrix::Csr::Csr(index_type n, + index_type m, + index_type nnz, + bool symmetric, + bool expanded, + index_type** rows, + index_type** cols, + real_type** vals, memory::MemorySpace memspaceSrc, memory::MemorySpace memspaceDst) : Sparse(n, m, nnz, symmetric, expanded) @@ -59,94 +62,108 @@ namespace ReSolve sparse_format_ = COMPRESSED_SPARSE_ROW; int control = -1; - if ((memspaceSrc == memory::HOST) && (memspaceDst == memory::HOST)) { control = 0;} - if ((memspaceSrc == memory::HOST) && (memspaceDst == memory::DEVICE)){ control = 1;} - if ((memspaceSrc == memory::DEVICE) && (memspaceDst == memory::HOST)) { control = 2;} - if ((memspaceSrc == memory::DEVICE) && (memspaceDst == memory::DEVICE)){ control = 3;} + if ((memspaceSrc == memory::HOST) && (memspaceDst == memory::HOST)) + { + control = 0; + } + if ((memspaceSrc == memory::HOST) && (memspaceDst == memory::DEVICE)) + { + control = 1; + } + if ((memspaceSrc == memory::DEVICE) && (memspaceDst == memory::HOST)) + { + control = 2; + } + if ((memspaceSrc == memory::DEVICE) && (memspaceDst == memory::DEVICE)) + { + control = 3; + } switch (control) { - case 0: // cpu->cpu - // Set host data - h_row_data_ = *rows; - h_col_data_ = *cols; - h_val_data_ = *vals; - h_data_updated_ = true; - owns_cpu_sparsity_pattern_ = true; - owns_cpu_values_ = true; - // Set device data to null - if (d_row_data_ || d_col_data_ || d_val_data_) { - out::error() << "Device data unexpectedly allocated. " - << "Possible bug in matrix::Sparse class.\n"; - } - d_row_data_ = nullptr; - d_col_data_ = nullptr; - d_val_data_ = nullptr; - d_data_updated_ = false; - owns_gpu_sparsity_pattern_ = false; - owns_gpu_values_ = false; - // Hijack data from the source - *rows = nullptr; - *cols = nullptr; - *vals = nullptr; - break; - case 2: // gpu->cpu - // Set device data and copy it to host - d_row_data_ = *rows; - d_col_data_ = *cols; - d_val_data_ = *vals; - d_data_updated_ = true; - owns_gpu_sparsity_pattern_ = true; - owns_gpu_values_ = true; - syncData(memspaceDst); - // Hijack data from the source - *rows = nullptr; - *cols = nullptr; - *vals = nullptr; - break; - case 1: // cpu->gpu - // Set host data and copy it to device - h_row_data_ = *rows; - h_col_data_ = *cols; - h_val_data_ = *vals; - h_data_updated_ = true; - owns_cpu_sparsity_pattern_ = true; - owns_cpu_values_ = true; - syncData(memspaceDst); - - // Hijack data from the source - *rows = nullptr; - *cols = nullptr; - *vals = nullptr; - break; - case 3: // gpu->gpu - // Set device data - d_row_data_ = *rows; - d_col_data_ = *cols; - d_val_data_ = *vals; - d_data_updated_ = true; - owns_gpu_sparsity_pattern_ = true; - owns_gpu_values_ = true; - // Set host data to null - if (h_row_data_ || h_col_data_ || h_val_data_) { - out::error() << "Host data unexpectedly allocated. " - << "Possible bug in matrix::Sparse class.\n"; - } - h_row_data_ = nullptr; - h_col_data_ = nullptr; - h_val_data_ = nullptr; - h_data_updated_ = false; - owns_cpu_sparsity_pattern_ = false; - owns_cpu_values_ = false; - // Hijack data from the source - *rows = nullptr; - *cols = nullptr; - *vals = nullptr; - break; - default: - out::error() << "Csr constructor failed! " - << "Possible bug in memory spaces setting.\n"; - break; + case 0: // cpu->cpu + // Set host data + h_row_data_ = *rows; + h_col_data_ = *cols; + h_val_data_ = *vals; + h_data_updated_ = true; + owns_cpu_sparsity_pattern_ = true; + owns_cpu_values_ = true; + // Set device data to null + if (d_row_data_ || d_col_data_ || d_val_data_) + { + out::error() << "Device data unexpectedly allocated. " + << "Possible bug in matrix::Sparse class.\n"; + } + d_row_data_ = nullptr; + d_col_data_ = nullptr; + d_val_data_ = nullptr; + d_data_updated_ = false; + owns_gpu_sparsity_pattern_ = false; + owns_gpu_values_ = false; + // Hijack data from the source + *rows = nullptr; + *cols = nullptr; + *vals = nullptr; + break; + case 2: // gpu->cpu + // Set device data and copy it to host + d_row_data_ = *rows; + d_col_data_ = *cols; + d_val_data_ = *vals; + d_data_updated_ = true; + owns_gpu_sparsity_pattern_ = true; + owns_gpu_values_ = true; + syncData(memspaceDst); + // Hijack data from the source + *rows = nullptr; + *cols = nullptr; + *vals = nullptr; + break; + case 1: // cpu->gpu + // Set host data and copy it to device + h_row_data_ = *rows; + h_col_data_ = *cols; + h_val_data_ = *vals; + h_data_updated_ = true; + owns_cpu_sparsity_pattern_ = true; + owns_cpu_values_ = true; + syncData(memspaceDst); + + // Hijack data from the source + *rows = nullptr; + *cols = nullptr; + *vals = nullptr; + break; + case 3: // gpu->gpu + // Set device data + d_row_data_ = *rows; + d_col_data_ = *cols; + d_val_data_ = *vals; + d_data_updated_ = true; + owns_gpu_sparsity_pattern_ = true; + owns_gpu_values_ = true; + // Set host data to null + if (h_row_data_ || h_col_data_ || h_val_data_) + { + out::error() << "Host data unexpectedly allocated. " + << "Possible bug in matrix::Sparse class.\n"; + } + h_row_data_ = nullptr; + h_col_data_ = nullptr; + h_val_data_ = nullptr; + h_data_updated_ = false; + owns_cpu_sparsity_pattern_ = false; + owns_cpu_values_ = false; + // Hijack data from the source + *rows = nullptr; + *cols = nullptr; + *vals = nullptr; + break; + default: + out::error() << "Csr constructor failed! " + << "Possible bug in memory spaces setting.\n"; + break; } } @@ -158,13 +175,14 @@ namespace ReSolve { using namespace ReSolve::memory; - switch (memspace) { - case HOST: - return this->h_row_data_; - case DEVICE: - return this->d_row_data_; - default: - return nullptr; + switch (memspace) + { + case HOST: + return this->h_row_data_; + case DEVICE: + return this->d_row_data_; + default: + return nullptr; } } @@ -172,13 +190,14 @@ namespace ReSolve { using namespace ReSolve::memory; - switch (memspace) { - case HOST: - return this->h_col_data_; - case DEVICE: - return this->d_col_data_; - default: - return nullptr; + switch (memspace) + { + case HOST: + return this->h_col_data_; + case DEVICE: + return this->d_col_data_; + default: + return nullptr; } } @@ -186,100 +205,117 @@ namespace ReSolve { using namespace ReSolve::memory; - switch (memspace) { - case HOST: - return this->h_val_data_; - case DEVICE: - return this->d_val_data_; - default: - return nullptr; + switch (memspace) + { + case HOST: + return this->h_val_data_; + case DEVICE: + return this->d_val_data_; + default: + return nullptr; } } - int matrix::Csr::copyDataFrom(const index_type* row_data, - const index_type* col_data, - const real_type* val_data, + int matrix::Csr::copyDataFrom(const index_type* row_data, + const index_type* col_data, + const real_type* val_data, memory::MemorySpace memspaceIn, memory::MemorySpace memspaceOut) { - //four cases (for now) + // four cases (for now) index_type nnz_current = nnz_; setNotUpdated(); int control = -1; - if ((memspaceIn == memory::HOST) && (memspaceOut == memory::HOST)) { control = 0;} - if ((memspaceIn == memory::HOST) && ((memspaceOut == memory::DEVICE))){ control = 1;} - if (((memspaceIn == memory::DEVICE)) && (memspaceOut == memory::HOST)) { control = 2;} - if (((memspaceIn == memory::DEVICE)) && ((memspaceOut == memory::DEVICE))){ control = 3;} - - if (memspaceOut == memory::HOST) { - //check if cpu data allocated - assert(((h_row_data_ == nullptr) == (h_col_data_ == nullptr)) && - "In Csr::copyDataFrom one of host row or column data is null!\n"); - - if ((h_row_data_ == nullptr) && (h_col_data_ == nullptr)) { - this->h_row_data_ = new index_type[n_ + 1]; - this->h_col_data_ = new index_type[nnz_current]; + if ((memspaceIn == memory::HOST) && (memspaceOut == memory::HOST)) + { + control = 0; + } + if ((memspaceIn == memory::HOST) && ((memspaceOut == memory::DEVICE))) + { + control = 1; + } + if (((memspaceIn == memory::DEVICE)) && (memspaceOut == memory::HOST)) + { + control = 2; + } + if (((memspaceIn == memory::DEVICE)) && ((memspaceOut == memory::DEVICE))) + { + control = 3; + } + + if (memspaceOut == memory::HOST) + { + // check if cpu data allocated + assert(((h_row_data_ == nullptr) == (h_col_data_ == nullptr)) && "In Csr::copyDataFrom one of host row or column data is null!\n"); + + if ((h_row_data_ == nullptr) && (h_col_data_ == nullptr)) + { + this->h_row_data_ = new index_type[n_ + 1]; + this->h_col_data_ = new index_type[nnz_current]; owns_cpu_sparsity_pattern_ = true; } - if (h_val_data_ == nullptr) { + if (h_val_data_ == nullptr) + { this->h_val_data_ = new real_type[nnz_current]; - owns_cpu_values_ = true; + owns_cpu_values_ = true; } } - if (memspaceOut == memory::DEVICE) { - //check if cuda data allocated - assert(((d_row_data_ == nullptr) == (d_col_data_ == nullptr)) && - "In Csr::copyDataFrom one of device row or column data is null!\n"); + if (memspaceOut == memory::DEVICE) + { + // check if cuda data allocated + assert(((d_row_data_ == nullptr) == (d_col_data_ == nullptr)) && "In Csr::copyDataFrom one of device row or column data is null!\n"); - if ((d_row_data_ == nullptr) && (d_col_data_ == nullptr)) { + if ((d_row_data_ == nullptr) && (d_col_data_ == nullptr)) + { mem_.allocateArrayOnDevice(&d_row_data_, n_ + 1); mem_.allocateArrayOnDevice(&d_col_data_, nnz_current); owns_gpu_values_ = true; } - if (d_val_data_ == nullptr) { + if (d_val_data_ == nullptr) + { mem_.allocateArrayOnDevice(&d_val_data_, nnz_current); owns_gpu_sparsity_pattern_ = true; } } - - //copy - switch(control) { - case 0: //cpu->cpu - mem_.copyArrayHostToHost(h_row_data_, row_data, n_ + 1); - mem_.copyArrayHostToHost(h_col_data_, col_data, nnz_current); - mem_.copyArrayHostToHost(h_val_data_, val_data, nnz_current); - h_data_updated_ = true; - break; - case 2://gpu->cpu - mem_.copyArrayDeviceToHost(h_row_data_, row_data, n_ + 1); - mem_.copyArrayDeviceToHost(h_col_data_, col_data, nnz_current); - mem_.copyArrayDeviceToHost(h_val_data_, val_data, nnz_current); - h_data_updated_ = true; - break; - case 1://cpu->gpu - mem_.copyArrayHostToDevice(d_row_data_, row_data, n_ + 1); - mem_.copyArrayHostToDevice(d_col_data_, col_data, nnz_current); - mem_.copyArrayHostToDevice(d_val_data_, val_data, nnz_current); - d_data_updated_ = true; - break; - case 3://gpu->gpu - mem_.copyArrayDeviceToDevice(d_row_data_, row_data, n_ + 1); - mem_.copyArrayDeviceToDevice(d_col_data_, col_data, nnz_current); - mem_.copyArrayDeviceToDevice(d_val_data_, val_data, nnz_current); - d_data_updated_ = true; - break; - default: - return -1; + // copy + switch (control) + { + case 0: // cpu->cpu + mem_.copyArrayHostToHost(h_row_data_, row_data, n_ + 1); + mem_.copyArrayHostToHost(h_col_data_, col_data, nnz_current); + mem_.copyArrayHostToHost(h_val_data_, val_data, nnz_current); + h_data_updated_ = true; + break; + case 2: // gpu->cpu + mem_.copyArrayDeviceToHost(h_row_data_, row_data, n_ + 1); + mem_.copyArrayDeviceToHost(h_col_data_, col_data, nnz_current); + mem_.copyArrayDeviceToHost(h_val_data_, val_data, nnz_current); + h_data_updated_ = true; + break; + case 1: // cpu->gpu + mem_.copyArrayHostToDevice(d_row_data_, row_data, n_ + 1); + mem_.copyArrayHostToDevice(d_col_data_, col_data, nnz_current); + mem_.copyArrayHostToDevice(d_val_data_, val_data, nnz_current); + d_data_updated_ = true; + break; + case 3: // gpu->gpu + mem_.copyArrayDeviceToDevice(d_row_data_, row_data, n_ + 1); + mem_.copyArrayDeviceToDevice(d_col_data_, col_data, nnz_current); + mem_.copyArrayDeviceToDevice(d_val_data_, val_data, nnz_current); + d_data_updated_ = true; + break; + default: + return -1; } return 0; } - int matrix::Csr::copyDataFrom(const index_type* row_data, - const index_type* col_data, - const real_type* val_data, - index_type new_nnz, + int matrix::Csr::copyDataFrom(const index_type* row_data, + const index_type* col_data, + const real_type* val_data, + index_type new_nnz, memory::MemorySpace memspaceIn, memory::MemorySpace memspaceOut) { @@ -291,9 +327,10 @@ namespace ReSolve int matrix::Csr::allocateMatrixData(memory::MemorySpace memspace) { index_type nnz_current = nnz_; - destroyMatrixData(memspace);//just in case + destroyMatrixData(memspace); // just in case - if (memspace == memory::HOST) { + if (memspace == memory::HOST) + { this->h_row_data_ = new index_type[n_ + 1]; std::fill(h_row_data_, h_row_data_ + n_ + 1, 0); this->h_col_data_ = new index_type[nnz_current]; @@ -301,16 +338,17 @@ namespace ReSolve this->h_val_data_ = new real_type[nnz_current]; std::fill(h_val_data_, h_val_data_ + nnz_current, 0.0); owns_cpu_sparsity_pattern_ = true; - owns_cpu_values_ = true; + owns_cpu_values_ = true; return 0; } - if (memspace == memory::DEVICE) { - mem_.allocateArrayOnDevice(&d_row_data_, n_ + 1); + if (memspace == memory::DEVICE) + { + mem_.allocateArrayOnDevice(&d_row_data_, n_ + 1); mem_.allocateArrayOnDevice(&d_col_data_, nnz_current); mem_.allocateArrayOnDevice(&d_val_data_, nnz_current); owns_gpu_sparsity_pattern_ = true; - owns_gpu_values_ = true; + owns_gpu_values_ = true; return 0; } return -1; @@ -331,70 +369,76 @@ namespace ReSolve { using namespace ReSolve::memory; - switch (memspace) { - case HOST: - //check if we need to copy or not - assert(((h_row_data_ == nullptr) == (h_col_data_ == nullptr)) && - "In Csr::syncData one of host row or column data is null!\n"); - - if (h_data_updated_) { - out::error() << "Csr::syncData is trying to sync host, but host already up to date!\n"; - assert(!h_data_updated_); - return 1; - } - if (!d_data_updated_) { - out::error() << "Csr::syncData is trying to sync host with device, but device is out of date!\n" - << "See Csr::syncData documentation\n."; - assert(d_data_updated_); - } - if ((h_row_data_ == nullptr) && (h_col_data_ == nullptr)) { - h_row_data_ = new index_type[n_ + 1]; - h_col_data_ = new index_type[nnz_]; - owns_cpu_sparsity_pattern_ = true; - } - if (h_val_data_ == nullptr) { - h_val_data_ = new real_type[nnz_]; - owns_cpu_values_ = true; - } - mem_.copyArrayDeviceToHost(h_row_data_, d_row_data_, n_ + 1); - mem_.copyArrayDeviceToHost(h_col_data_, d_col_data_, nnz_); - mem_.copyArrayDeviceToHost(h_val_data_, d_val_data_, nnz_); - h_data_updated_ = true; - return 0; - case DEVICE: - assert(((d_row_data_ == nullptr) == (d_col_data_ == nullptr)) && - "In Csr::syncData one of device row or column data is null!\n"); - - if (d_data_updated_) { - out::error() << "Csr::syncData is trying to sync device, but device already up to date!\n"; - assert(!d_data_updated_); - return 1; - } - if (!h_data_updated_) { - out::error() << "Csr::syncData is trying to sync device with host, but host is out of date!\n" - << "See Csr::syncData documentation\n."; - assert(h_data_updated_); - } - if ((d_row_data_ == nullptr) && (d_col_data_ == nullptr)) { - mem_.allocateArrayOnDevice(&d_row_data_, n_ + 1); - mem_.allocateArrayOnDevice(&d_col_data_, nnz_); - owns_gpu_sparsity_pattern_ = true; - } - if (d_val_data_ == nullptr) { - mem_.allocateArrayOnDevice(&d_val_data_, nnz_); - owns_gpu_values_ = true; - } - mem_.copyArrayHostToDevice(d_row_data_, h_row_data_, n_ + 1); - mem_.copyArrayHostToDevice(d_col_data_, h_col_data_, nnz_); - mem_.copyArrayHostToDevice(d_val_data_, h_val_data_, nnz_); - d_data_updated_ = true; - return 0; - default: + switch (memspace) + { + case HOST: + // check if we need to copy or not + assert(((h_row_data_ == nullptr) == (h_col_data_ == nullptr)) && "In Csr::syncData one of host row or column data is null!\n"); + + if (h_data_updated_) + { + out::error() << "Csr::syncData is trying to sync host, but host already up to date!\n"; + assert(!h_data_updated_); return 1; + } + if (!d_data_updated_) + { + out::error() << "Csr::syncData is trying to sync host with device, but device is out of date!\n" + << "See Csr::syncData documentation\n."; + assert(d_data_updated_); + } + if ((h_row_data_ == nullptr) && (h_col_data_ == nullptr)) + { + h_row_data_ = new index_type[n_ + 1]; + h_col_data_ = new index_type[nnz_]; + owns_cpu_sparsity_pattern_ = true; + } + if (h_val_data_ == nullptr) + { + h_val_data_ = new real_type[nnz_]; + owns_cpu_values_ = true; + } + mem_.copyArrayDeviceToHost(h_row_data_, d_row_data_, n_ + 1); + mem_.copyArrayDeviceToHost(h_col_data_, d_col_data_, nnz_); + mem_.copyArrayDeviceToHost(h_val_data_, d_val_data_, nnz_); + h_data_updated_ = true; + return 0; + case DEVICE: + assert(((d_row_data_ == nullptr) == (d_col_data_ == nullptr)) && "In Csr::syncData one of device row or column data is null!\n"); + + if (d_data_updated_) + { + out::error() << "Csr::syncData is trying to sync device, but device already up to date!\n"; + assert(!d_data_updated_); + return 1; + } + if (!h_data_updated_) + { + out::error() << "Csr::syncData is trying to sync device with host, but host is out of date!\n" + << "See Csr::syncData documentation\n."; + assert(h_data_updated_); + } + if ((d_row_data_ == nullptr) && (d_col_data_ == nullptr)) + { + mem_.allocateArrayOnDevice(&d_row_data_, n_ + 1); + mem_.allocateArrayOnDevice(&d_col_data_, nnz_); + owns_gpu_sparsity_pattern_ = true; + } + if (d_val_data_ == nullptr) + { + mem_.allocateArrayOnDevice(&d_val_data_, nnz_); + owns_gpu_values_ = true; + } + mem_.copyArrayHostToDevice(d_row_data_, h_row_data_, n_ + 1); + mem_.copyArrayHostToDevice(d_col_data_, h_col_data_, nnz_); + mem_.copyArrayHostToDevice(d_val_data_, h_val_data_, nnz_); + d_data_updated_ = true; + return 0; + default: + return 1; } // switch } - /** * @brief Prints matrix data. * @@ -403,13 +447,14 @@ namespace ReSolve void matrix::Csr::print(std::ostream& out, index_type indexing_base) { out << std::scientific << std::setprecision(std::numeric_limits::digits10); - for(index_type i = 0; i < n_; ++i) { - for (index_type j = h_row_data_[i]; j < h_row_data_[i+1]; ++j) { - out << i + indexing_base << " " + for (index_type i = 0; i < n_; ++i) + { + for (index_type j = h_row_data_[i]; j < h_row_data_[i + 1]; ++j) + { + out << i + indexing_base << " " << h_col_data_[j] + indexing_base << " " << h_val_data_[j] << "\n"; } } } } // namespace ReSolve - diff --git a/resolve/matrix/Csr.hpp b/resolve/matrix/Csr.hpp index dd11d57f1..3451b08ae 100644 --- a/resolve/matrix/Csr.hpp +++ b/resolve/matrix/Csr.hpp @@ -1,59 +1,62 @@ #pragma once #include -namespace ReSolve { namespace matrix { +namespace ReSolve +{ + namespace matrix + { - // Forward declaration of Coo - class Coo; + // Forward declaration of Coo + class Coo; - class Csr : public Sparse - { + class Csr : public Sparse + { public: Csr(); Csr(index_type n, index_type m, index_type nnz); - Csr(index_type n, - index_type m, - index_type nnz, - bool symmetric, - bool expanded); - - Csr(index_type n, - index_type m, + Csr(index_type n, + index_type m, index_type nnz, - bool symmetric, - bool expanded, - index_type** rows, - index_type** cols, - real_type** vals, + bool symmetric, + bool expanded); + + Csr(index_type n, + index_type m, + index_type nnz, + bool symmetric, + bool expanded, + index_type** rows, + index_type** cols, + real_type** vals, memory::MemorySpace memspaceSrc, memory::MemorySpace memspaceDst); - + ~Csr(); virtual index_type* getRowData(memory::MemorySpace memspace); virtual index_type* getColData(memory::MemorySpace memspace); - virtual real_type* getValues( memory::MemorySpace memspace); + virtual real_type* getValues(memory::MemorySpace memspace); - virtual int copyDataFrom(const index_type* row_data, - const index_type* col_data, - const real_type* val_data, + virtual int copyDataFrom(const index_type* row_data, + const index_type* col_data, + const real_type* val_data, memory::MemorySpace memspaceIn, memory::MemorySpace memspaceOut); - virtual int copyDataFrom(const index_type* row_data, - const index_type* col_data, - const real_type* val_data, - index_type new_nnz, + virtual int copyDataFrom(const index_type* row_data, + const index_type* col_data, + const real_type* val_data, + index_type new_nnz, memory::MemorySpace memspaceIn, memory::MemorySpace memspaceOut); - virtual int allocateMatrixData(memory::MemorySpace memspace); + virtual int allocateMatrixData(memory::MemorySpace memspace); virtual void print(std::ostream& file_out = std::cout, index_type indexing_base = 0); virtual int syncData(memory::MemorySpace memspaceOut); + }; - }; - -}} // namespace ReSolve::matrix + } // namespace matrix +} // namespace ReSolve diff --git a/resolve/matrix/MatrixHandler.cpp b/resolve/matrix/MatrixHandler.cpp index 552548d65..1d570af85 100644 --- a/resolve/matrix/MatrixHandler.cpp +++ b/resolve/matrix/MatrixHandler.cpp @@ -1,14 +1,16 @@ +#include "MatrixHandler.hpp" + #include #include -#include -#include + +#include "MatrixHandlerCpu.hpp" +#include "MatrixHandlerImpl.hpp" #include #include #include +#include +#include #include -#include "MatrixHandler.hpp" -#include "MatrixHandlerCpu.hpp" -#include "MatrixHandlerImpl.hpp" #ifdef RESOLVE_USE_CUDA #include "MatrixHandlerCuda.hpp" @@ -17,7 +19,8 @@ #include "MatrixHandlerHip.hpp" #endif -namespace ReSolve { +namespace ReSolve +{ // Create a shortcut name for Logger static class using out = io::Logger; @@ -43,7 +46,8 @@ namespace ReSolve { MatrixHandler::~MatrixHandler() { delete cpuImpl_; - if (isCudaEnabled_ || isHipEnabled_) { + if (isCudaEnabled_ || isHipEnabled_) + { delete devImpl_; } } @@ -56,7 +60,7 @@ namespace ReSolve { */ MatrixHandler::MatrixHandler(LinAlgWorkspaceCpu* new_workspace) { - cpuImpl_ = new MatrixHandlerCpu(new_workspace); + cpuImpl_ = new MatrixHandlerCpu(new_workspace); isCpuEnabled_ = true; isCudaEnabled_ = false; } @@ -72,8 +76,8 @@ namespace ReSolve { */ MatrixHandler::MatrixHandler(LinAlgWorkspaceCUDA* new_workspace) { - cpuImpl_ = new MatrixHandlerCpu(); - devImpl_ = new MatrixHandlerCuda(new_workspace); + cpuImpl_ = new MatrixHandlerCpu(); + devImpl_ = new MatrixHandlerCuda(new_workspace); isCpuEnabled_ = true; isCudaEnabled_ = true; } @@ -90,8 +94,8 @@ namespace ReSolve { */ MatrixHandler::MatrixHandler(LinAlgWorkspaceHIP* new_workspace) { - cpuImpl_ = new MatrixHandlerCpu(); - devImpl_ = new MatrixHandlerHip(new_workspace); + cpuImpl_ = new MatrixHandlerCpu(); + devImpl_ = new MatrixHandlerHip(new_workspace); isCpuEnabled_ = true; isHipEnabled_ = true; } @@ -111,13 +115,14 @@ namespace ReSolve { void MatrixHandler::setValuesChanged(bool isValuesChanged, memory::MemorySpace memspace) { using namespace ReSolve::memory; - switch (memspace) { - case HOST: - cpuImpl_->setValuesChanged(isValuesChanged); - break; - case DEVICE: - devImpl_->setValuesChanged(isValuesChanged); - break; + switch (memspace) + { + case HOST: + cpuImpl_->setValuesChanged(isValuesChanged); + break; + case DEVICE: + devImpl_->setValuesChanged(isValuesChanged); + break; } } @@ -134,21 +139,22 @@ namespace ReSolve { * * @return 0 if successful, 1 otherwise */ - int MatrixHandler::matvec(matrix::Sparse* A, - vector_type* vec_x, - vector_type* vec_result, - const real_type* alpha, - const real_type* beta, + int MatrixHandler::matvec(matrix::Sparse* A, + vector_type* vec_x, + vector_type* vec_result, + const real_type* alpha, + const real_type* beta, memory::MemorySpace memspace) { using namespace ReSolve::memory; - switch (memspace) { - case HOST: - return cpuImpl_->matvec(A, vec_x, vec_result, alpha, beta); - break; - case DEVICE: - return devImpl_->matvec(A, vec_x, vec_result, alpha, beta); - break; + switch (memspace) + { + case HOST: + return cpuImpl_->matvec(A, vec_x, vec_result, alpha, beta); + break; + case DEVICE: + return devImpl_->matvec(A, vec_x, vec_result, alpha, beta); + break; } return 1; } @@ -162,16 +168,17 @@ namespace ReSolve { * * @return 0 if successful, 1 otherwise */ - int MatrixHandler::matrixInfNorm(matrix::Sparse *A, real_type* norm, memory::MemorySpace memspace) + int MatrixHandler::matrixInfNorm(matrix::Sparse* A, real_type* norm, memory::MemorySpace memspace) { using namespace ReSolve::memory; - switch (memspace) { - case HOST: - return cpuImpl_->matrixInfNorm(A, norm); - break; - case DEVICE: - return devImpl_->matrixInfNorm(A, norm); - break; + switch (memspace) + { + case HOST: + return cpuImpl_->matrixInfNorm(A, norm); + break; + case DEVICE: + return devImpl_->matrixInfNorm(A, norm); + break; } return 1; } @@ -195,13 +202,14 @@ namespace ReSolve { int MatrixHandler::csc2csr(matrix::Csc* A_csc, matrix::Csr* A_csr, memory::MemorySpace memspace) { using namespace ReSolve::memory; - switch (memspace) { - case HOST: - return cpuImpl_->csc2csr(A_csc, A_csr); - break; - case DEVICE: - return devImpl_->csc2csr(A_csc, A_csr); - break; + switch (memspace) + { + case HOST: + return cpuImpl_->csc2csr(A_csc, A_csr); + break; + case DEVICE: + return devImpl_->csc2csr(A_csc, A_csr); + break; } return 1; } @@ -222,33 +230,36 @@ namespace ReSolve { assert(A->getNumRows() == At->getNumColumns() && "Number of rows in A must be equal to number of columns in At"); assert(A->getNumColumns() == At->getNumRows() && "Number of columns in A must be equal to number of rows in At"); assert(A->getNnz() == At->getNnz() && "Number of nonzeros in A must be equal to number of nonzeros in At"); - assert(A->getSparseFormat() == matrix::Sparse::COMPRESSED_SPARSE_ROW && - "Matrix has to be in CSR format for transpose.\n"); - assert(At->getSparseFormat() == matrix::Sparse::COMPRESSED_SPARSE_ROW && - "Matrix has to be in CSR format for transpose.\n"); - switch (memspace) { - case HOST: - if(A->getValues(memory::HOST) == nullptr) { - out::error() << "In MatrixHandler::transpose, A->getValues(memory::HOST) is null!\n"; - return 1; - } - if(At->getValues(memory::HOST) == nullptr) { - out::error() << "In MatrixHandler::transpose, At->getValues(memory::HOST) is null!\n"; - return 1; - } - return cpuImpl_->transpose(A, At); - break; - case DEVICE: - if(A->getValues(memory::DEVICE) == nullptr) { - out::error() << "In MatrixHandlerCuda::transpose, A->getValues(memory::DEVICE) is null!\n"; - return 1; - } - if(At->getValues(memory::DEVICE) == nullptr) { - out::error() << "In MatrixHandlerCuda::transpose, At->getValues(memory::DEVICE) is null!\n"; - return 1; - } - return devImpl_->transpose(A, At); - break; + assert(A->getSparseFormat() == matrix::Sparse::COMPRESSED_SPARSE_ROW && "Matrix has to be in CSR format for transpose.\n"); + assert(At->getSparseFormat() == matrix::Sparse::COMPRESSED_SPARSE_ROW && "Matrix has to be in CSR format for transpose.\n"); + switch (memspace) + { + case HOST: + if (A->getValues(memory::HOST) == nullptr) + { + out::error() << "In MatrixHandler::transpose, A->getValues(memory::HOST) is null!\n"; + return 1; + } + if (At->getValues(memory::HOST) == nullptr) + { + out::error() << "In MatrixHandler::transpose, At->getValues(memory::HOST) is null!\n"; + return 1; + } + return cpuImpl_->transpose(A, At); + break; + case DEVICE: + if (A->getValues(memory::DEVICE) == nullptr) + { + out::error() << "In MatrixHandlerCuda::transpose, A->getValues(memory::DEVICE) is null!\n"; + return 1; + } + if (At->getValues(memory::DEVICE) == nullptr) + { + out::error() << "In MatrixHandlerCuda::transpose, At->getValues(memory::DEVICE) is null!\n"; + return 1; + } + return devImpl_->transpose(A, At); + break; } return 1; } @@ -269,20 +280,19 @@ namespace ReSolve { */ int MatrixHandler::leftScale(vector_type* diag, matrix::Csr* A, memory::MemorySpace memspace) { - assert(A->getSparseFormat() == matrix::Sparse::COMPRESSED_SPARSE_ROW && - "Matrix has to be in CSR format for left diagonal scaling.\n"); - assert(diag->getSize() == A->getNumRows() && - "Diagonal vector must be of the same size as the number of rows in the matrix."); + assert(A->getSparseFormat() == matrix::Sparse::COMPRESSED_SPARSE_ROW && "Matrix has to be in CSR format for left diagonal scaling.\n"); + assert(diag->getSize() == A->getNumRows() && "Diagonal vector must be of the same size as the number of rows in the matrix."); assert(A->getValues(memspace) != nullptr && "Matrix values are null!\n"); assert(diag->getData(memspace) != nullptr && "Diagonal vector data is null!\n"); using namespace ReSolve::memory; - switch (memspace) { - case HOST: - return cpuImpl_->leftScale(diag, A); - break; - case DEVICE: - return devImpl_->leftScale(diag, A); - break; + switch (memspace) + { + case HOST: + return cpuImpl_->leftScale(diag, A); + break; + case DEVICE: + return devImpl_->leftScale(diag, A); + break; } return 1; } @@ -303,19 +313,19 @@ namespace ReSolve { */ int MatrixHandler::rightScale(matrix::Csr* A, vector_type* diag, memory::MemorySpace memspace) { - assert(A->getSparseFormat() == matrix::Sparse::COMPRESSED_SPARSE_ROW && - "Matrix has to be in CSR format for right diagonal scaling.\n"); + assert(A->getSparseFormat() == matrix::Sparse::COMPRESSED_SPARSE_ROW && "Matrix has to be in CSR format for right diagonal scaling.\n"); assert(diag->getSize() == A->getNumColumns() && "Diagonal vector must be of the same size as the number of columns in the matrix."); assert(A->getValues(memspace) != nullptr && "Matrix values are null!\n"); assert(diag->getData(memspace) != nullptr && "Diagonal vector data is null!\n"); using namespace ReSolve::memory; - switch (memspace) { - case HOST: - return cpuImpl_->rightScale(A, diag); - break; - case DEVICE: - return devImpl_->rightScale(A, diag); - break; + switch (memspace) + { + case HOST: + return cpuImpl_->rightScale(A, diag); + break; + case DEVICE: + return devImpl_->rightScale(A, diag); + break; } return 1; } @@ -330,13 +340,14 @@ namespace ReSolve { void MatrixHandler::addConst(matrix::Sparse* A, real_type alpha, memory::MemorySpace memspace) { using namespace ReSolve::memory; - switch (memspace) { - case HOST: - cpuImpl_->addConst(A, alpha); - break; - case DEVICE: - devImpl_->addConst(A, alpha); - break; + switch (memspace) + { + case HOST: + cpuImpl_->addConst(A, alpha); + break; + case DEVICE: + devImpl_->addConst(A, alpha); + break; } } diff --git a/resolve/matrix/MatrixHandler.hpp b/resolve/matrix/MatrixHandler.hpp index 219badd11..6b6785aba 100644 --- a/resolve/matrix/MatrixHandler.hpp +++ b/resolve/matrix/MatrixHandler.hpp @@ -2,28 +2,28 @@ #include #include - namespace ReSolve { namespace vector { class Vector; } + namespace matrix { class Sparse; class Coo; class Csc; class Csr; - } + } // namespace matrix class LinAlgWorkspaceCpu; class LinAlgWorkspaceCUDA; class LinAlgWorkspaceHIP; class MatrixHandlerImpl; -} - +} // namespace ReSolve -namespace ReSolve { +namespace ReSolve +{ /** * @brief this class encapsulates various matrix manipulation operations, @@ -45,46 +45,45 @@ namespace ReSolve { { using vector_type = vector::Vector; - public: - MatrixHandler(); - MatrixHandler(LinAlgWorkspaceCpu* workspace); - MatrixHandler(LinAlgWorkspaceCUDA* workspace); - MatrixHandler(LinAlgWorkspaceHIP* workspace); - ~MatrixHandler(); + public: + MatrixHandler(); + MatrixHandler(LinAlgWorkspaceCpu* workspace); + MatrixHandler(LinAlgWorkspaceCUDA* workspace); + MatrixHandler(LinAlgWorkspaceHIP* workspace); + ~MatrixHandler(); - int csc2csr(matrix::Csc* A_csc, matrix::Csr* A_csr, memory::MemorySpace memspace); + int csc2csr(matrix::Csc* A_csc, matrix::Csr* A_csr, memory::MemorySpace memspace); - int transpose(matrix::Csr* A, matrix::Csr* At, memory::MemorySpace memspace); + int transpose(matrix::Csr* A, matrix::Csr* At, memory::MemorySpace memspace); - int leftScale(vector_type* diag, matrix::Csr* A, memory::MemorySpace memspace); + int leftScale(vector_type* diag, matrix::Csr* A, memory::MemorySpace memspace); - int rightScale(matrix::Csr* A, vector_type* diag, memory::MemorySpace memspace); + int rightScale(matrix::Csr* A, vector_type* diag, memory::MemorySpace memspace); - void addConst(matrix::Sparse* A, real_type alpha, memory::MemorySpace memspace); + void addConst(matrix::Sparse* A, real_type alpha, memory::MemorySpace memspace); - /// Should compute vec_result := alpha*A*vec_x + beta*vec_result, but at least on cpu alpha and beta are flipped - int matvec(matrix::Sparse* A, - vector_type* vec_x, - vector_type* vec_result, - const real_type* alpha, - const real_type* beta, - memory::MemorySpace memspace); - int matrixInfNorm(matrix::Sparse *A, real_type* norm, memory::MemorySpace memspace); - void setValuesChanged(bool toWhat, memory::MemorySpace memspace); + /// Should compute vec_result := alpha*A*vec_x + beta*vec_result, but at least on cpu alpha and beta are flipped + int matvec(matrix::Sparse* A, + vector_type* vec_x, + vector_type* vec_result, + const real_type* alpha, + const real_type* beta, + memory::MemorySpace memspace); + int matrixInfNorm(matrix::Sparse* A, real_type* norm, memory::MemorySpace memspace); + void setValuesChanged(bool toWhat, memory::MemorySpace memspace); - bool getIsCudaEnabled() const; - bool getIsHipEnabled() const; + bool getIsCudaEnabled() const; + bool getIsHipEnabled() const; - private: - bool new_matrix_{true}; ///< if the structure changed, you need a new handler. + private: + bool new_matrix_{true}; ///< if the structure changed, you need a new handler. - MatrixHandlerImpl* cpuImpl_{nullptr}; ///< Pointer to host implementation - MatrixHandlerImpl* devImpl_{nullptr}; ///< Pointer to device implementation + MatrixHandlerImpl* cpuImpl_{nullptr}; ///< Pointer to host implementation + MatrixHandlerImpl* devImpl_{nullptr}; ///< Pointer to device implementation - bool isCpuEnabled_{false}; ///< true if CPU implementation is instantiated - bool isCudaEnabled_{false}; ///< true if CUDA implementation is instantiated - bool isHipEnabled_{false}; ///< true if HIP implementation is instantiated + bool isCpuEnabled_{false}; ///< true if CPU implementation is instantiated + bool isCudaEnabled_{false}; ///< true if CUDA implementation is instantiated + bool isHipEnabled_{false}; ///< true if HIP implementation is instantiated }; } // namespace ReSolve - diff --git a/resolve/matrix/MatrixHandlerCpu.cpp b/resolve/matrix/MatrixHandlerCpu.cpp index b70a178ce..28d726e2b 100644 --- a/resolve/matrix/MatrixHandlerCpu.cpp +++ b/resolve/matrix/MatrixHandlerCpu.cpp @@ -1,29 +1,33 @@ +#include "MatrixHandlerCpu.hpp" + #include #include -#include -#include #include #include #include -#include "MatrixHandlerCpu.hpp" +#include +#include namespace ReSolve { // Create a shortcut name for Logger static class using out = io::Logger; + /** * @brief Empty constructor for MatrixHandlerCpu class. */ MatrixHandlerCpu::MatrixHandlerCpu() { } + /** * @brief Empty destructor for MatrixHandlerCpu class. */ MatrixHandlerCpu::~MatrixHandlerCpu() { } + /** * @brief Constructor for MatrixHandlerCpu class. * @param[in] new_workspace - pointer to LinAlgWorkspaceCpu object @@ -32,6 +36,7 @@ namespace ReSolve { workspace_ = new_workspace; } + /** * @brief Marks when values have changed in MatrixHandlerCpu class. * @param[in] values_changed - boolean value indicating if values have changed @@ -41,7 +46,6 @@ namespace ReSolve values_changed_ = values_changed; } - /** * @brief result := alpha * A * x + beta * result * @@ -59,41 +63,42 @@ namespace ReSolve * statement to select implementation for recognized input matrix * format. */ - int MatrixHandlerCpu::matvec(matrix::Sparse* A, - vector_type* vec_x, - vector_type* vec_result, + int MatrixHandlerCpu::matvec(matrix::Sparse* A, + vector_type* vec_x, + vector_type* vec_result, const real_type* alpha, const real_type* beta) { using namespace constants; - assert(A->getSparseFormat() == matrix::Sparse::COMPRESSED_SPARSE_ROW && - "Matrix has to be in CSR format for matrix-vector product.\n"); + assert(A->getSparseFormat() == matrix::Sparse::COMPRESSED_SPARSE_ROW && "Matrix has to be in CSR format for matrix-vector product.\n"); index_type* ia = A->getRowData(memory::HOST); index_type* ja = A->getColData(memory::HOST); - real_type* a = A->getValues( memory::HOST); + real_type* a = A->getValues(memory::HOST); real_type* x_data = vec_x->getData(memory::HOST); real_type* result_data = vec_result->getData(memory::HOST); - real_type sum; - real_type y; - real_type t; - real_type c; + real_type sum; + real_type y; + real_type t; + real_type c; // Kahan algorithm for stability - for (int i = 0; i < A->getNumRows(); ++i) { + for (int i = 0; i < A->getNumRows(); ++i) + { sum = 0.0; - c = 0.0; - for (int j = ia[i]; j < ia[i+1]; ++j) { - y = ( a[j] * x_data[ja[j]]) - c; - t = sum + y; - c = (t - sum) - y; + c = 0.0; + for (int j = ia[i]; j < ia[i + 1]; ++j) + { + y = (a[j] * x_data[ja[j]]) - c; + t = sum + y; + c = (t - sum) - y; sum = t; // sum += (a[j] * x_data[ja[j]]); } - sum *= (*alpha); - result_data[i] = result_data[i]*(*beta) + sum; + sum *= (*alpha); + result_data[i] = result_data[i] * (*beta) + sum; } vec_result->setDataUpdated(memory::HOST); return 0; @@ -116,18 +121,20 @@ namespace ReSolve int MatrixHandlerCpu::matrixInfNorm(matrix::Sparse* A, real_type* norm) { using memory::HOST; - assert(A->getSparseFormat() == matrix::Sparse::COMPRESSED_SPARSE_ROW && - "Matrix has to be in CSR format for matrix-vector product.\n"); + assert(A->getSparseFormat() == matrix::Sparse::COMPRESSED_SPARSE_ROW && "Matrix has to be in CSR format for matrix-vector product.\n"); real_type sum = 0.0; real_type nrm = 0.0; - for (index_type i = 0; i < A->getNumRows(); ++i) { + for (index_type i = 0; i < A->getNumRows(); ++i) + { sum = 0.0; - for (index_type j = A->getRowData(HOST)[i]; j < A->getRowData(HOST)[i+1]; ++j) { + for (index_type j = A->getRowData(HOST)[i]; j < A->getRowData(HOST)[i + 1]; ++j) + { sum += std::abs(A->getValues(HOST)[j]); } - if (i == 0 || sum > nrm) { + if (i == 0 || sum > nrm) + { nrm = sum; } } @@ -157,32 +164,36 @@ namespace ReSolve index_type* rowIdxCsc = A_csc->getRowData(memory::HOST); index_type* colPtrCsc = A_csc->getColData(memory::HOST); - real_type* valuesCsc = A_csc->getValues( memory::HOST); + real_type* valuesCsc = A_csc->getValues(memory::HOST); index_type* rowPtrCsr = A_csr->getRowData(memory::HOST); index_type* colIdxCsr = A_csr->getColData(memory::HOST); - real_type* valuesCsr = A_csr->getValues( memory::HOST); + real_type* valuesCsr = A_csr->getValues(memory::HOST); // Set all CSR row pointers to zero - for (index_type i = 0; i <= n; ++i) { + for (index_type i = 0; i <= n; ++i) + { rowPtrCsr[i] = 0; } // Set all CSR values and column indices to zero - for (index_type i = 0; i < nnz; ++i) { + for (index_type i = 0; i < nnz; ++i) + { colIdxCsr[i] = 0; valuesCsr[i] = 0.0; } // Compute number of entries per row - for (index_type i = 0; i < nnz; ++i) { + for (index_type i = 0; i < nnz; ++i) + { rowPtrCsr[rowIdxCsc[i]]++; } // Compute cumualtive sum of nnz per row - for (index_type row = 0, rowsum = 0; row < n; ++row) { + for (index_type row = 0, rowsum = 0; row < n; ++row) + { // Store value in row pointer to temp - index_type temp = rowPtrCsr[row]; + index_type temp = rowPtrCsr[row]; // Copy cumulative sum to the row pointer rowPtrCsr[row] = rowsum; @@ -192,10 +203,12 @@ namespace ReSolve } rowPtrCsr[n] = nnz; - for (index_type col = 0; col < m; ++col) { + for (index_type col = 0; col < m; ++col) + { // Compute positions of column indices and values in CSR matrix and store them there // Overwrites CSR row pointers in the process - for (index_type jj = colPtrCsc[col]; jj < colPtrCsc[col+1]; jj++) { + for (index_type jj = colPtrCsc[col]; jj < colPtrCsc[col + 1]; jj++) + { index_type row = rowIdxCsc[jj]; index_type dest = rowPtrCsr[row]; @@ -207,10 +220,11 @@ namespace ReSolve } // Restore CSR row pointer values - for (index_type row = 0, last = 0; row <= n; row++) { - index_type temp = rowPtrCsr[row]; - rowPtrCsr[row] = last; - last = temp; + for (index_type row = 0, last = 0; row <= n; row++) + { + index_type temp = rowPtrCsr[row]; + rowPtrCsr[row] = last; + last = temp; } // Values on the host are updated now -- mark them as such! @@ -231,33 +245,37 @@ namespace ReSolve { assert(A->getValues(memory::HOST) != nullptr && "Matrix A is not allocated on host.\n"); assert(At->getValues(memory::HOST) != nullptr && "Matrix At is not allocated on host.\n"); - index_type n = A->getNumRows(); - index_type m = A->getNumColumns(); - index_type nnz = A->getNnz(); - index_type* rowPtrA = A->getRowData(memory::HOST); - index_type* colIdxA = A->getColData(memory::HOST); - real_type* valuesA = A->getValues( memory::HOST); + index_type n = A->getNumRows(); + index_type m = A->getNumColumns(); + index_type nnz = A->getNnz(); + index_type* rowPtrA = A->getRowData(memory::HOST); + index_type* colIdxA = A->getColData(memory::HOST); + real_type* valuesA = A->getValues(memory::HOST); index_type* rowPtrAt = At->getRowData(memory::HOST); index_type* colIdxAt = At->getColData(memory::HOST); - real_type* valuesAt = At->getValues( memory::HOST); + real_type* valuesAt = At->getValues(memory::HOST); // Set all CSR row pointers to zero - for (index_type i = 0; i <= m; ++i) { + for (index_type i = 0; i <= m; ++i) + { rowPtrAt[i] = 0; } // Set all CSR values and column indices to zero - for (index_type i = 0; i < nnz; ++i) { + for (index_type i = 0; i < nnz; ++i) + { colIdxAt[i] = 0; valuesAt[i] = 0.0; } // Compute number of entries per row - for (index_type i = 0; i < nnz; ++i) { + for (index_type i = 0; i < nnz; ++i) + { rowPtrAt[colIdxA[i]]++; } // Compute cumualtive sum of nnz per row - for (index_type row = 0, rowsum = 0; row < m; ++row) { + for (index_type row = 0, rowsum = 0; row < m; ++row) + { // Store value in row pointer to temp - index_type temp = rowPtrAt[row]; + index_type temp = rowPtrAt[row]; // Copy cumulative sum to the row pointer rowPtrAt[row] = rowsum; @@ -266,10 +284,12 @@ namespace ReSolve rowsum += temp; } rowPtrAt[m] = nnz; - for (index_type col = 0; col < n; ++col) { + for (index_type col = 0; col < n; ++col) + { // Compute positions of column indices and values in CSR matrix and store them there // Overwrites CSR row pointers in the process - for (index_type jj = rowPtrA[col]; jj < rowPtrA[col+1]; jj++) { + for (index_type jj = rowPtrA[col]; jj < rowPtrA[col + 1]; jj++) + { index_type row = colIdxA[jj]; index_type dest = rowPtrAt[row]; @@ -280,10 +300,11 @@ namespace ReSolve } } // Restore CSR row pointer values - for (index_type row = 0, last = 0; row <= m; row++) { - index_type temp = rowPtrAt[row]; - rowPtrAt[row] = last; - last = temp; + for (index_type row = 0, last = 0; row <= m; row++) + { + index_type temp = rowPtrAt[row]; + rowPtrAt[row] = last; + last = temp; } // Values on the host are updated now -- mark them as such! At->setUpdated(memory::HOST); @@ -306,12 +327,14 @@ namespace ReSolve */ int MatrixHandlerCpu::leftScale(vector_type* diag, matrix::Csr* A) { - real_type* diag_data = diag->getData(memory::HOST); - index_type* rowPtrA = A->getRowData(memory::HOST); - real_type* valuesA = A->getValues( memory::HOST); - - for (index_type i = 0; i < A->getNumRows(); ++i) { - for (index_type j = rowPtrA[i]; j < rowPtrA[i+1]; ++j) { + real_type* diag_data = diag->getData(memory::HOST); + index_type* rowPtrA = A->getRowData(memory::HOST); + real_type* valuesA = A->getValues(memory::HOST); + + for (index_type i = 0; i < A->getNumRows(); ++i) + { + for (index_type j = rowPtrA[i]; j < rowPtrA[i + 1]; ++j) + { valuesA[j] *= diag_data[i]; } } @@ -333,13 +356,15 @@ namespace ReSolve */ int MatrixHandlerCpu::rightScale(matrix::Csr* A, vector_type* diag) { - real_type* diag_data = diag->getData(memory::HOST); - index_type* rowPtrA = A->getRowData(memory::HOST); - index_type* colIdxA = A->getColData(memory::HOST); - real_type* valuesA = A->getValues( memory::HOST); - - for (index_type i = 0; i < A->getNumRows(); ++i) { - for (index_type j = rowPtrA[i]; j < rowPtrA[i+1]; ++j) { + real_type* diag_data = diag->getData(memory::HOST); + index_type* rowPtrA = A->getRowData(memory::HOST); + index_type* colIdxA = A->getColData(memory::HOST); + real_type* valuesA = A->getValues(memory::HOST); + + for (index_type i = 0; i < A->getNumRows(); ++i) + { + for (index_type j = rowPtrA[i]; j < rowPtrA[i + 1]; ++j) + { valuesA[j] *= diag_data[colIdxA[j]]; } } @@ -357,8 +382,9 @@ namespace ReSolve int MatrixHandlerCpu::addConst(matrix::Sparse* A, real_type alpha) { real_type* values = A->getValues(memory::HOST); - index_type nnz = A->getNnz(); - for (index_type i = 0; i < nnz; ++i) { + index_type nnz = A->getNnz(); + for (index_type i = 0; i < nnz; ++i) + { values[i] += alpha; } return 0; diff --git a/resolve/matrix/MatrixHandlerCpu.hpp b/resolve/matrix/MatrixHandlerCpu.hpp index e028cb84b..66b810787 100644 --- a/resolve/matrix/MatrixHandlerCpu.hpp +++ b/resolve/matrix/MatrixHandlerCpu.hpp @@ -9,18 +9,19 @@ namespace ReSolve { class Vector; } + namespace matrix { class Sparse; class Coo; class Csc; class Csr; - } + } // namespace matrix class LinAlgWorkspaceCpu; -} - +} // namespace ReSolve -namespace ReSolve { +namespace ReSolve +{ /** * @class MatrixHandlerCpu * @@ -30,37 +31,36 @@ namespace ReSolve { { using vector_type = vector::Vector; - public: - MatrixHandlerCpu(); - MatrixHandlerCpu(LinAlgWorkspaceCpu* workspace); - virtual ~MatrixHandlerCpu(); + public: + MatrixHandlerCpu(); + MatrixHandlerCpu(LinAlgWorkspaceCpu* workspace); + virtual ~MatrixHandlerCpu(); - int csc2csr(matrix::Csc* A_csc, matrix::Csr* A_csr) override; + int csc2csr(matrix::Csc* A_csc, matrix::Csr* A_csr) override; - int transpose(matrix::Csr* A, matrix::Csr* At) override; + int transpose(matrix::Csr* A, matrix::Csr* At) override; - int leftScale(vector_type* diag, matrix::Csr* A) override; + int leftScale(vector_type* diag, matrix::Csr* A) override; - int rightScale(matrix::Csr* A, vector_type* diag) override; + int rightScale(matrix::Csr* A, vector_type* diag) override; - int addConst(matrix::Sparse* A, real_type alpha) override; + int addConst(matrix::Sparse* A, real_type alpha) override; - virtual int matvec(matrix::Sparse* A, - vector_type* vec_x, - vector_type* vec_result, - const real_type* alpha, - const real_type* beta) override; + virtual int matvec(matrix::Sparse* A, + vector_type* vec_x, + vector_type* vec_result, + const real_type* alpha, + const real_type* beta) override; - virtual int matrixInfNorm(matrix::Sparse *A, real_type* norm) override; + virtual int matrixInfNorm(matrix::Sparse* A, real_type* norm) override; - void setValuesChanged(bool isValuesChanged) override; + void setValuesChanged(bool isValuesChanged) override; - private: - LinAlgWorkspaceCpu* workspace_{nullptr}; - bool values_changed_{true}; ///< needed for matvec + private: + LinAlgWorkspaceCpu* workspace_{nullptr}; + bool values_changed_{true}; ///< needed for matvec - // MemoryHandler mem_; ///< Device memory manager object not used for now + // MemoryHandler mem_; ///< Device memory manager object not used for now }; } // namespace ReSolve - diff --git a/resolve/matrix/MatrixHandlerCuda.cpp b/resolve/matrix/MatrixHandlerCuda.cpp index a856fad13..f88e9aaac 100644 --- a/resolve/matrix/MatrixHandlerCuda.cpp +++ b/resolve/matrix/MatrixHandlerCuda.cpp @@ -1,17 +1,20 @@ +#include "MatrixHandlerCuda.hpp" + #include -#include -#include +#include +#include + +#include // needed for inf nrm #include #include #include +#include +#include #include -#include "MatrixHandlerCuda.hpp" -#include -#include -#include // needed for inf nrm -namespace ReSolve { +namespace ReSolve +{ // Create a shortcut name for Logger static class using out = io::Logger; @@ -59,51 +62,51 @@ namespace ReSolve { * statement to select implementation for recognized input matrix * format. */ - int MatrixHandlerCuda::matvec(matrix::Sparse* A, - vector_type* vec_x, - vector_type* vec_result, + int MatrixHandlerCuda::matvec(matrix::Sparse* A, + vector_type* vec_x, + vector_type* vec_result, const real_type* alpha, const real_type* beta) { using namespace constants; - assert(A->getSparseFormat() == matrix::Sparse::COMPRESSED_SPARSE_ROW && - "Matrix has to be in CSR format for matrix-vector product.\n"); + assert(A->getSparseFormat() == matrix::Sparse::COMPRESSED_SPARSE_ROW && "Matrix has to be in CSR format for matrix-vector product.\n"); - int error_sum = 0; - //result = alpha *A*x + beta * result - cusparseStatus_t status; + int error_sum = 0; + // result = alpha *A*x + beta * result + cusparseStatus_t status; cusparseDnVecDescr_t vecx = workspace_->getVecX(); cusparseCreateDnVec(&vecx, A->getNumRows(), vec_x->getData(memory::DEVICE), CUDA_R_64F); - cusparseDnVecDescr_t vecAx = workspace_->getVecY(); cusparseCreateDnVec(&vecAx, A->getNumRows(), vec_result->getData(memory::DEVICE), CUDA_R_64F); cusparseSpMatDescr_t matA = workspace_->getSpmvMatrixDescriptor(); - void* buffer_spmv = workspace_->getSpmvBuffer(); + void* buffer_spmv = workspace_->getSpmvBuffer(); cusparseHandle_t handle_cusparse = workspace_->getCusparseHandle(); - if (values_changed_) { - status = cusparseCreateCsr(&matA, + if (values_changed_) + { + status = cusparseCreateCsr(&matA, A->getNumRows(), A->getNumColumns(), A->getNnz(), A->getRowData(memory::DEVICE), A->getColData(memory::DEVICE), - A->getValues( memory::DEVICE), + A->getValues(memory::DEVICE), CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_R_64F); - error_sum += status; - values_changed_ = false; + error_sum += status; + values_changed_ = false; } - if (!workspace_->matvecSetup()) { - //setup first, allocate, etc. + if (!workspace_->matvecSetup()) + { + // setup first, allocate, etc. size_t bufferSize = 0; - status = cusparseSpMV_bufferSize(handle_cusparse, + status = cusparseSpMV_bufferSize(handle_cusparse, CUSPARSE_OPERATION_NON_TRANSPOSE, &MINUS_ONE, matA, @@ -122,7 +125,7 @@ namespace ReSolve { workspace_->matvecSetupDone(); } - status = cusparseSpMV(handle_cusparse, + status = cusparseSpMV(handle_cusparse, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha, matA, @@ -135,7 +138,7 @@ namespace ReSolve { error_sum += status; mem_.deviceSynchronize(); if (status) - out::error() << "Matvec status: " << status << ". " + out::error() << "Matvec status: " << status << ". " << "Last error code: " << mem_.getLastDeviceError() << ".\n"; vec_result->setDataUpdated(memory::DEVICE); @@ -160,10 +163,10 @@ namespace ReSolve { */ int MatrixHandlerCuda::matrixInfNorm(matrix::Sparse* A, real_type* norm) { - assert(A->getSparseFormat() == matrix::Sparse::COMPRESSED_SPARSE_ROW && - "Matrix has to be in CSR format for matrix-vector product.\n"); + assert(A->getSparseFormat() == matrix::Sparse::COMPRESSED_SPARSE_ROW && "Matrix has to be in CSR format for matrix-vector product.\n"); - if (workspace_->getNormBufferState() == false) { // not allocated + if (workspace_->getNormBufferState() == false) + { // not allocated real_type* buffer; mem_.allocateArrayOnDevice(&buffer, 1024); workspace_->setNormBuffer(buffer); @@ -171,8 +174,10 @@ namespace ReSolve { } real_type* d_r = workspace_->getDr(); - if (workspace_->getDrSize() != A->getNumRows()) { - if (d_r != nullptr) { + if (workspace_->getDrSize() != A->getNumRows()) + { + if (d_r != nullptr) + { mem_.deleteOnDevice(d_r); } mem_.allocateArrayOnDevice(&d_r, A->getNumRows()); @@ -190,43 +195,45 @@ namespace ReSolve { A->getNumRows(), d_r, norm, - workspace_->getNormBuffer() /* at least 8192 bytes */); + workspace_->getNormBuffer() /* at least 8192 bytes */); - if (status != 0) { - io::Logger::warning() << "Vector inf nrm returned "<allocateMatrixData(memory::DEVICE); - index_type m = A_csc->getNumColumns(); - index_type n = A_csc->getNumRows(); + index_type m = A_csc->getNumColumns(); + index_type n = A_csc->getNumRows(); index_type nnz = A_csc->getNnz(); // check dimensions of A_csc and A_csr assert(A_csc->getNumRows() == A_csr->getNumRows() && "Number of rows in A_csc must be equal to number of rows in A_csr"); assert(A_csc->getNumColumns() == A_csr->getNumColumns() && "Number of columns in A_csc must be equal to number of columns in A_csr"); - size_t bufferSize; - void* d_work; - cusparseStatus_t status = cusparseCsr2cscEx2_bufferSize(workspace_->getCusparseHandle(), + size_t bufferSize; + void* d_work; + cusparseStatus_t status = cusparseCsr2cscEx2_bufferSize(workspace_->getCusparseHandle(), m, n, nnz, - A_csc->getValues( memory::DEVICE), + A_csc->getValues(memory::DEVICE), A_csc->getColData(memory::DEVICE), A_csc->getRowData(memory::DEVICE), - A_csr->getValues( memory::DEVICE), + A_csr->getValues(memory::DEVICE), A_csr->getRowData(memory::DEVICE), A_csr->getColData(memory::DEVICE), CUDA_R_64F, @@ -234,16 +241,16 @@ namespace ReSolve { CUSPARSE_INDEX_BASE_ZERO, CUSPARSE_CSR2CSC_ALG1, &bufferSize); - error_sum += status; + error_sum += status; mem_.allocateBufferOnDevice(&d_work, bufferSize); - status = cusparseCsr2cscEx2(workspace_->getCusparseHandle(), + status = cusparseCsr2cscEx2(workspace_->getCusparseHandle(), m, n, nnz, - A_csc->getValues( memory::DEVICE), + A_csc->getValues(memory::DEVICE), A_csc->getColData(memory::DEVICE), A_csc->getRowData(memory::DEVICE), - A_csr->getValues( memory::DEVICE), + A_csr->getValues(memory::DEVICE), A_csr->getRowData(memory::DEVICE), A_csr->getColData(memory::DEVICE), CUDA_R_64F, @@ -252,8 +259,9 @@ namespace ReSolve { CUSPARSE_CSR2CSC_ALG1, d_work); error_sum += status; - if (status) { - out::error() << "CSC2CSR status: " << status << ". " + if (status) + { + out::error() << "CSC2CSR status: " << status << ". " << "Last error code: " << mem_.getLastDeviceError() << ".\n"; } mem_.deleteOnDevice(d_work); @@ -276,24 +284,25 @@ namespace ReSolve { */ int MatrixHandlerCuda::transpose(matrix::Csr* A, matrix::Csr* At) { - index_type error_sum = 0; - index_type m = A->getNumRows(); - index_type n = A->getNumColumns(); - index_type nnz = A->getNnz(); + index_type error_sum = 0; + index_type m = A->getNumRows(); + index_type n = A->getNumColumns(); + index_type nnz = A->getNnz(); cusparseStatus_t status; - bool allocated = workspace_->isTransposeBufferAllocated(); + bool allocated = workspace_->isTransposeBufferAllocated(); // check dimensions of A and At - if (!allocated) { + if (!allocated) + { // allocate transpose buffer size_t bufferSize; - status = cusparseCsr2cscEx2_bufferSize(workspace_->getCusparseHandle(), + status = cusparseCsr2cscEx2_bufferSize(workspace_->getCusparseHandle(), m, n, nnz, - A->getValues( memory::DEVICE), + A->getValues(memory::DEVICE), A->getRowData(memory::DEVICE), A->getColData(memory::DEVICE), - At->getValues( memory::DEVICE), + At->getValues(memory::DEVICE), At->getRowData(memory::DEVICE), At->getColData(memory::DEVICE), CUDA_R_64F, @@ -304,14 +313,14 @@ namespace ReSolve { error_sum += status; workspace_->setTransposeBufferWorkspace(bufferSize); } - status = cusparseCsr2cscEx2(workspace_->getCusparseHandle(), + status = cusparseCsr2cscEx2(workspace_->getCusparseHandle(), m, n, nnz, - A->getValues( memory::DEVICE), + A->getValues(memory::DEVICE), A->getRowData(memory::DEVICE), A->getColData(memory::DEVICE), - At->getValues( memory::DEVICE), + At->getValues(memory::DEVICE), At->getRowData(memory::DEVICE), At->getColData(memory::DEVICE), CUDA_R_64F, @@ -341,10 +350,10 @@ namespace ReSolve { */ int MatrixHandlerCuda::leftScale(vector_type* diag, matrix::Csr* A) { - real_type* diag_data = diag->getData(memory::DEVICE); + real_type* diag_data = diag->getData(memory::DEVICE); index_type* a_row_ptr = A->getRowData(memory::DEVICE); - real_type* a_vals = A->getValues( memory::DEVICE); - index_type n = A->getNumRows(); + real_type* a_vals = A->getValues(memory::DEVICE); + index_type n = A->getNumRows(); // check values in A and diag cuda::leftScale(n, a_row_ptr, a_vals, diag_data); A->setUpdated(memory::DEVICE); @@ -366,11 +375,11 @@ namespace ReSolve { */ int MatrixHandlerCuda::rightScale(matrix::Csr* A, vector_type* diag) { - real_type* diag_data = diag->getData(memory::DEVICE); + real_type* diag_data = diag->getData(memory::DEVICE); index_type* a_row_ptr = A->getRowData(memory::DEVICE); index_type* a_col_idx = A->getColData(memory::DEVICE); - real_type* a_vals = A->getValues( memory::DEVICE); - index_type n = A->getNumRows(); + real_type* a_vals = A->getValues(memory::DEVICE); + index_type n = A->getNumRows(); cuda::rightScale(n, a_row_ptr, a_col_idx, a_vals, diag_data); A->setUpdated(memory::DEVICE); return 0; @@ -387,7 +396,7 @@ namespace ReSolve { int MatrixHandlerCuda::addConst(matrix::Sparse* A, real_type alpha) { real_type* values = A->getValues(memory::DEVICE); - index_type nnz = A->getNnz(); + index_type nnz = A->getNnz(); cuda::addConst(nnz, alpha, values); return 0; } diff --git a/resolve/matrix/MatrixHandlerCuda.hpp b/resolve/matrix/MatrixHandlerCuda.hpp index 957dcf70d..f0efbcee8 100644 --- a/resolve/matrix/MatrixHandlerCuda.hpp +++ b/resolve/matrix/MatrixHandlerCuda.hpp @@ -9,18 +9,19 @@ namespace ReSolve { class Vector; } + namespace matrix { class Sparse; class Coo; class Csc; class Csr; - } + } // namespace matrix class LinAlgWorkspaceCUDA; -} - +} // namespace ReSolve -namespace ReSolve { +namespace ReSolve +{ /** * @class MatrixHandlerCuda * @@ -30,37 +31,34 @@ namespace ReSolve { { using vector_type = vector::Vector; - public: - MatrixHandlerCuda(LinAlgWorkspaceCUDA* workspace); - virtual ~MatrixHandlerCuda(); - - int csc2csr(matrix::Csc* A_csc, matrix::Csr* A_csr) override; + public: + MatrixHandlerCuda(LinAlgWorkspaceCUDA* workspace); + virtual ~MatrixHandlerCuda(); - int transpose(matrix::Csr* A, matrix::Csr* At) override; + int csc2csr(matrix::Csc* A_csc, matrix::Csr* A_csr) override; - int addConst(matrix::Sparse* A, real_type alpha) override; + int transpose(matrix::Csr* A, matrix::Csr* At) override; - int leftScale(vector_type* diag, matrix::Csr* A) override; + int addConst(matrix::Sparse* A, real_type alpha) override; - int rightScale(matrix::Csr* A, vector_type* diag) override; + int leftScale(vector_type* diag, matrix::Csr* A) override; - virtual int matvec(matrix::Sparse* A, - vector_type* vec_x, - vector_type* vec_result, - const real_type* alpha, - const real_type* beta) override; - virtual int matrixInfNorm(matrix::Sparse* A, real_type* norm) override; + int rightScale(matrix::Csr* A, vector_type* diag) override; - void setValuesChanged(bool isValuesChanged) override; + virtual int matvec(matrix::Sparse* A, + vector_type* vec_x, + vector_type* vec_result, + const real_type* alpha, + const real_type* beta) override; + virtual int matrixInfNorm(matrix::Sparse* A, real_type* norm) override; - private: - LinAlgWorkspaceCUDA* workspace_{nullptr}; - bool values_changed_{true}; ///< needed for matvec - - MemoryHandler mem_; ///< Device memory manager object + void setValuesChanged(bool isValuesChanged) override; + private: + LinAlgWorkspaceCUDA* workspace_{nullptr}; + bool values_changed_{true}; ///< needed for matvec + MemoryHandler mem_; ///< Device memory manager object }; } // namespace ReSolve - diff --git a/resolve/matrix/MatrixHandlerHip.cpp b/resolve/matrix/MatrixHandlerHip.cpp index 3fbda0d14..ae97be1d4 100644 --- a/resolve/matrix/MatrixHandlerHip.cpp +++ b/resolve/matrix/MatrixHandlerHip.cpp @@ -1,16 +1,19 @@ +#include "MatrixHandlerHip.hpp" + #include -#include -#include +#include +#include + #include #include #include +#include +#include #include -#include -#include -#include "MatrixHandlerHip.hpp" -namespace ReSolve { +namespace ReSolve +{ // Create a shortcut name for Logger static class using out = io::Logger; @@ -41,7 +44,6 @@ namespace ReSolve { values_changed_ = values_changed; } - /** * @brief result := alpha * A * x + beta * result * @@ -59,41 +61,41 @@ namespace ReSolve { * statement to select implementation for recognized input matrix * format. */ - int MatrixHandlerHip::matvec(matrix::Sparse* A, - vector_type* vec_x, - vector_type* vec_result, + int MatrixHandlerHip::matvec(matrix::Sparse* A, + vector_type* vec_x, + vector_type* vec_result, const real_type* alpha, const real_type* beta) { using namespace constants; - assert(A->getSparseFormat() == matrix::Sparse::COMPRESSED_SPARSE_ROW && - "Matrix has to be in CSR format for matrix-vector product.\n"); + assert(A->getSparseFormat() == matrix::Sparse::COMPRESSED_SPARSE_ROW && "Matrix has to be in CSR format for matrix-vector product.\n"); - int error_sum = 0; - //result = alpha *A*x + beta * result + int error_sum = 0; + // result = alpha *A*x + beta * result rocsparse_status status; rocsparse_handle handle_rocsparse = workspace_->getRocsparseHandle(); - rocsparse_mat_info infoA = workspace_->getSpmvMatrixInfo(); - rocsparse_mat_descr descrA = workspace_->getSpmvMatrixDescriptor(); + rocsparse_mat_info infoA = workspace_->getSpmvMatrixInfo(); + rocsparse_mat_descr descrA = workspace_->getSpmvMatrixDescriptor(); - if (!workspace_->matvecSetup()) { - //setup first, allocate, etc. + if (!workspace_->matvecSetup()) + { + // setup first, allocate, etc. rocsparse_create_mat_descr(&(descrA)); rocsparse_set_mat_index_base(descrA, rocsparse_index_base_zero); rocsparse_set_mat_type(descrA, rocsparse_matrix_type_general); rocsparse_create_mat_info(&infoA); - status = rocsparse_dcsrmv_analysis(handle_rocsparse, + status = rocsparse_dcsrmv_analysis(handle_rocsparse, rocsparse_operation_none, A->getNumRows(), A->getNumColumns(), A->getNnz(), descrA, - A->getValues( memory::DEVICE), + A->getValues(memory::DEVICE), A->getRowData(memory::DEVICE), A->getColData(memory::DEVICE), infoA); @@ -112,7 +114,7 @@ namespace ReSolve { A->getNnz(), alpha, descrA, - A->getValues( memory::DEVICE), + A->getValues(memory::DEVICE), A->getRowData(memory::DEVICE), A->getColData(memory::DEVICE), infoA, @@ -122,8 +124,9 @@ namespace ReSolve { error_sum += status; mem_.deviceSynchronize(); - if (status) { - out::error() << "Matvec status: " << status << ". " + if (status) + { + out::error() << "Matvec status: " << status << ". " << "Last error code: " << mem_.getLastDeviceError() << ".\n"; } vec_result->setDataUpdated(memory::DEVICE); @@ -147,14 +150,15 @@ namespace ReSolve { */ int MatrixHandlerHip::matrixInfNorm(matrix::Sparse* A, real_type* norm) { - assert(A->getSparseFormat() == matrix::Sparse::COMPRESSED_SPARSE_ROW && - "Matrix has to be in CSR format for matrix-vector product.\n"); + assert(A->getSparseFormat() == matrix::Sparse::COMPRESSED_SPARSE_ROW && "Matrix has to be in CSR format for matrix-vector product.\n"); - real_type* d_r = workspace_->getDr(); + real_type* d_r = workspace_->getDr(); index_type d_r_size = workspace_->getDrSize(); - if (d_r_size != A->getNumRows()) { - if (d_r_size != 0) { + if (d_r_size != A->getNumRows()) + { + if (d_r_size != 0) + { mem_.deleteOnDevice(d_r); } mem_.allocateArrayOnDevice(&d_r, A->getNumRows()); @@ -162,7 +166,8 @@ namespace ReSolve { workspace_->setDr(d_r); } - if (workspace_->getNormBufferState() == false) { // not allocated + if (workspace_->getNormBufferState() == false) + { // not allocated real_type* buffer; mem_.allocateArrayOnDevice(&buffer, 1024); workspace_->setNormBuffer(buffer); @@ -198,11 +203,11 @@ namespace ReSolve { rocsparse_status status; A_csr->allocateMatrixData(memory::DEVICE); - index_type m = A_csc->getNumColumns(); - index_type n = A_csc->getNumRows(); + index_type m = A_csc->getNumColumns(); + index_type n = A_csc->getNumRows(); index_type nnz = A_csc->getNnz(); - size_t bufferSize; - void* d_work; + size_t bufferSize; + void* d_work; status = rocsparse_csr2csc_buffer_size(workspace_->getRocsparseHandle(), m, @@ -216,14 +221,14 @@ namespace ReSolve { error_sum += status; mem_.allocateBufferOnDevice(&d_work, bufferSize); - status = rocsparse_dcsr2csc(workspace_->getRocsparseHandle(), + status = rocsparse_dcsr2csc(workspace_->getRocsparseHandle(), m, n, nnz, - A_csc->getValues( memory::DEVICE), + A_csc->getValues(memory::DEVICE), A_csc->getColData(memory::DEVICE), A_csc->getRowData(memory::DEVICE), - A_csr->getValues( memory::DEVICE), + A_csr->getValues(memory::DEVICE), A_csr->getColData(memory::DEVICE), A_csr->getRowData(memory::DEVICE), rocsparse_action_numeric, @@ -252,16 +257,17 @@ namespace ReSolve { */ int MatrixHandlerHip::transpose(matrix::Csr* A, matrix::Csr* At) { - index_type error_sum = 0; - index_type m = A->getNumRows(); - index_type n = A->getNumColumns(); - index_type nnz = A->getNnz(); + index_type error_sum = 0; + index_type m = A->getNumRows(); + index_type n = A->getNumColumns(); + index_type nnz = A->getNnz(); rocsparse_status status; - bool allocated = workspace_->isTransposeBufferAllocated(); - if (!allocated) { + bool allocated = workspace_->isTransposeBufferAllocated(); + if (!allocated) + { // allocate transpose buffer size_t bufferSize; - status = rocsparse_csr2csc_buffer_size(workspace_->getRocsparseHandle(), + status = rocsparse_csr2csc_buffer_size(workspace_->getRocsparseHandle(), m, n, nnz, @@ -272,14 +278,14 @@ namespace ReSolve { error_sum += status; workspace_->setTransposeBufferWorkspace(bufferSize); } - status = rocsparse_dcsr2csc(workspace_->getRocsparseHandle(), + status = rocsparse_dcsr2csc(workspace_->getRocsparseHandle(), m, n, nnz, - A->getValues( memory::DEVICE), + A->getValues(memory::DEVICE), A->getRowData(memory::DEVICE), A->getColData(memory::DEVICE), - At->getValues( memory::DEVICE), + At->getValues(memory::DEVICE), At->getColData(memory::DEVICE), At->getRowData(memory::DEVICE), rocsparse_action_numeric, @@ -303,7 +309,7 @@ namespace ReSolve { int MatrixHandlerHip::addConst(matrix::Sparse* A, real_type alpha) { real_type* values = A->getValues(memory::DEVICE); - index_type nnz = A->getNnz(); + index_type nnz = A->getNnz(); hip::addConst(nnz, alpha, values); return 0; } @@ -323,10 +329,10 @@ namespace ReSolve { */ int MatrixHandlerHip::leftScale(vector_type* diag, matrix::Csr* A) { - real_type* diag_data = diag->getData(memory::DEVICE); + real_type* diag_data = diag->getData(memory::DEVICE); index_type* a_row_ptr = A->getRowData(memory::DEVICE); - real_type* a_vals = A->getValues( memory::DEVICE); - index_type n = A->getNumRows(); + real_type* a_vals = A->getValues(memory::DEVICE); + index_type n = A->getNumRows(); // check values in A and diag hip::leftScale(n, a_row_ptr, a_vals, diag_data); A->setUpdated(memory::DEVICE); @@ -348,11 +354,11 @@ namespace ReSolve { */ int MatrixHandlerHip::rightScale(matrix::Csr* A, vector_type* diag) { - real_type* diag_data = diag->getData(memory::DEVICE); + real_type* diag_data = diag->getData(memory::DEVICE); index_type* a_row_ptr = A->getRowData(memory::DEVICE); index_type* a_col_idx = A->getColData(memory::DEVICE); - real_type* a_vals = A->getValues( memory::DEVICE); - index_type n = A->getNumRows(); + real_type* a_vals = A->getValues(memory::DEVICE); + index_type n = A->getNumRows(); hip::rightScale(n, a_row_ptr, a_col_idx, a_vals, diag_data); A->setUpdated(memory::DEVICE); return 0; diff --git a/resolve/matrix/MatrixHandlerHip.hpp b/resolve/matrix/MatrixHandlerHip.hpp index e06dae945..7e69c3f1c 100644 --- a/resolve/matrix/MatrixHandlerHip.hpp +++ b/resolve/matrix/MatrixHandlerHip.hpp @@ -9,18 +9,19 @@ namespace ReSolve { class Vector; } + namespace matrix { class Sparse; class Coo; class Csc; class Csr; - } + } // namespace matrix class LinAlgWorkspaceHIP; -} - +} // namespace ReSolve -namespace ReSolve { +namespace ReSolve +{ /** * @class MatrixHandlerHip * @@ -30,37 +31,34 @@ namespace ReSolve { { using vector_type = vector::Vector; - public: - - MatrixHandlerHip(LinAlgWorkspaceHIP* workspace); - virtual ~MatrixHandlerHip(); + public: + MatrixHandlerHip(LinAlgWorkspaceHIP* workspace); + virtual ~MatrixHandlerHip(); - int csc2csr(matrix::Csc* A_csc, matrix::Csr* A_csr) override; + int csc2csr(matrix::Csc* A_csc, matrix::Csr* A_csr) override; - int transpose(matrix::Csr* A, matrix::Csr* At) override; + int transpose(matrix::Csr* A, matrix::Csr* At) override; - int leftScale(vector_type* diag, matrix::Csr* A) override; + int leftScale(vector_type* diag, matrix::Csr* A) override; - int rightScale(matrix::Csr* A, vector_type* diag) override; + int rightScale(matrix::Csr* A, vector_type* diag) override; - int addConst(matrix::Sparse* A, real_type alpha) override; - virtual int matvec(matrix::Sparse* A, - vector_type* vec_x, - vector_type* vec_result, - const real_type* alpha, - const real_type* beta) override; + int addConst(matrix::Sparse* A, real_type alpha) override; + virtual int matvec(matrix::Sparse* A, + vector_type* vec_x, + vector_type* vec_result, + const real_type* alpha, + const real_type* beta) override; - virtual int matrixInfNorm(matrix::Sparse *A, real_type* norm) override; + virtual int matrixInfNorm(matrix::Sparse* A, real_type* norm) override; - void setValuesChanged(bool isValuesChanged) override; + void setValuesChanged(bool isValuesChanged) override; - private: + private: + LinAlgWorkspaceHIP* workspace_{nullptr}; + bool values_changed_{true}; ///< needed for matvec - LinAlgWorkspaceHIP* workspace_{nullptr}; - bool values_changed_{true}; ///< needed for matvec - - MemoryHandler mem_; ///< Device memory manager object + MemoryHandler mem_; ///< Device memory manager object }; } // namespace ReSolve - diff --git a/resolve/matrix/MatrixHandlerImpl.hpp b/resolve/matrix/MatrixHandlerImpl.hpp index bb5eca5b3..d874dff16 100644 --- a/resolve/matrix/MatrixHandlerImpl.hpp +++ b/resolve/matrix/MatrixHandlerImpl.hpp @@ -6,17 +6,18 @@ namespace ReSolve { class Vector; } + namespace matrix { class Sparse; class Coo; class Csc; class Csr; - } -} - + } // namespace matrix +} // namespace ReSolve -namespace ReSolve { +namespace ReSolve +{ /** * @class MatrixHandlerImpl * @@ -26,31 +27,33 @@ namespace ReSolve { { using vector_type = vector::Vector; - public: - MatrixHandlerImpl() - {} - virtual ~MatrixHandlerImpl() - {} + public: + MatrixHandlerImpl() + { + } - virtual int csc2csr(matrix::Csc* A_csc, matrix::Csr* A_csr) = 0; + virtual ~MatrixHandlerImpl() + { + } - virtual int transpose(matrix::Csr* A, matrix::Csr* At) = 0; + virtual int csc2csr(matrix::Csc* A_csc, matrix::Csr* A_csr) = 0; - virtual int leftScale(vector_type* diag, matrix::Csr* A) = 0; + virtual int transpose(matrix::Csr* A, matrix::Csr* At) = 0; - virtual int rightScale(matrix::Csr* A, vector_type* diag) = 0; + virtual int leftScale(vector_type* diag, matrix::Csr* A) = 0; - virtual int addConst(matrix::Sparse* A, real_type alpha) = 0; + virtual int rightScale(matrix::Csr* A, vector_type* diag) = 0; - virtual int matvec(matrix::Sparse* A, - vector_type* vec_x, - vector_type* vec_result, - const real_type* alpha, - const real_type* beta) = 0; - virtual int matrixInfNorm(matrix::Sparse* A, real_type* norm) = 0; + virtual int addConst(matrix::Sparse* A, real_type alpha) = 0; - virtual void setValuesChanged(bool isValuesChanged) = 0; + virtual int matvec(matrix::Sparse* A, + vector_type* vec_x, + vector_type* vec_result, + const real_type* alpha, + const real_type* beta) = 0; + virtual int matrixInfNorm(matrix::Sparse* A, real_type* norm) = 0; + + virtual void setValuesChanged(bool isValuesChanged) = 0; }; } // namespace ReSolve - diff --git a/resolve/matrix/Sparse.cpp b/resolve/matrix/Sparse.cpp index 58ff1b93c..edad7ca76 100644 --- a/resolve/matrix/Sparse.cpp +++ b/resolve/matrix/Sparse.cpp @@ -1,39 +1,41 @@ -#include // <-- includes memcpy +#include "Sparse.hpp" + +#include // <-- includes memcpy #include -#include "Sparse.hpp" -namespace ReSolve { +namespace ReSolve +{ using out = io::Logger; - /** - * @brief empty constructor that does absolutely nothing + /** + * @brief empty constructor that does absolutely nothing */ matrix::Sparse::Sparse() { } - /** + /** * @brief basic constructor. It DOES NOT allocate any memory! * * @param[in] n - number of rows * @param[in] m - number of columns - * @param[in] nnz - number of non-zeros + * @param[in] nnz - number of non-zeros */ - matrix::Sparse::Sparse(index_type n, - index_type m, - index_type nnz): - n_{n}, - m_{m}, - nnz_{nnz} + matrix::Sparse::Sparse(index_type n, + index_type m, + index_type nnz) + : n_{n}, + m_{m}, + nnz_{nnz} { this->is_symmetric_ = false; - this->is_expanded_ = true; //default is a normal non-symmetric fully expanded matrix + this->is_expanded_ = true; // default is a normal non-symmetric fully expanded matrix setNotUpdated(); - //set everything to nullptr + // set everything to nullptr h_row_data_ = nullptr; h_col_data_ = nullptr; h_val_data_ = nullptr; @@ -41,37 +43,37 @@ namespace ReSolve { d_row_data_ = nullptr; d_col_data_ = nullptr; d_val_data_ = nullptr; - + owns_cpu_sparsity_pattern_ = false; - owns_cpu_values_ = false; - + owns_cpu_values_ = false; + owns_gpu_sparsity_pattern_ = false; - owns_gpu_values_ = false; + owns_gpu_values_ = false; } - /** + /** * @brief another basic constructor. It DOES NOT allocate any memory! * * @param[in] n - number of rows * @param[in] m - number of columns - * @param[in] nnz - number of non-zeros - * @param[in] symmetric - true if symmetric, false if non-symmetric - * @param[in] expanded - true if expanded, false if not + * @param[in] nnz - number of non-zeros + * @param[in] symmetric - true if symmetric, false if non-symmetric + * @param[in] expanded - true if expanded, false if not */ - matrix::Sparse::Sparse(index_type n, - index_type m, + matrix::Sparse::Sparse(index_type n, + index_type m, index_type nnz, - bool symmetric, - bool expanded): - n_{n}, - m_{m}, - nnz_{nnz}, - is_symmetric_{symmetric}, - is_expanded_{expanded} + bool symmetric, + bool expanded) + : n_{n}, + m_{m}, + nnz_{nnz}, + is_symmetric_{symmetric}, + is_expanded_{expanded} { setNotUpdated(); - //set everything to nullptr + // set everything to nullptr h_row_data_ = nullptr; h_col_data_ = nullptr; h_val_data_ = nullptr; @@ -81,12 +83,12 @@ namespace ReSolve { d_val_data_ = nullptr; owns_cpu_sparsity_pattern_ = false; - owns_cpu_values_ = false; - + owns_cpu_values_ = false; + owns_gpu_sparsity_pattern_ = false; - owns_gpu_values_ = false; + owns_gpu_values_ = false; } - + /** * @brief destructor * */ @@ -96,15 +98,15 @@ namespace ReSolve { this->destroyMatrixData(memory::DEVICE); } - /** + /** * @brief set the matrix update flags to false (for both HOST and DEVICE). */ void matrix::Sparse::setNotUpdated() { h_data_updated_ = false; - d_data_updated_ = false; + d_data_updated_ = false; } - + /** * @brief get number of matrix rows * @@ -163,8 +165,8 @@ namespace ReSolve { /** * @brief Set matrix symmetry property * - * @param[in] symmetric - true to set matrix to symmetric and false to set to non-symmetric - */ + * @param[in] symmetric - true to set matrix to symmetric and false to set to non-symmetric + */ void matrix::Sparse::setSymmetric(bool symmetric) { this->is_symmetric_ = symmetric; @@ -174,7 +176,7 @@ namespace ReSolve { * @brief Set matrix "expanded" property * * @param[in] expanded - true to set matrix to expanded and false to set to not expanded - */ + */ void matrix::Sparse::setExpanded(bool expanded) { this->is_expanded_ = expanded; @@ -184,7 +186,7 @@ namespace ReSolve { * @brief Set number of non-zeros. * * @param[in] nnz_new - new number of non-zeros - */ + */ void matrix::Sparse::setNnz(index_type nnz_new) { this->nnz_ = nnz_new; @@ -196,38 +198,39 @@ namespace ReSolve { * @param[in] memspace - memory space (HOST or DEVICE) to set to "updated" * * @return 0 if successful, -1 if not. - * + * * The method sets the boolean flag indicating that the `memspace` is updated. * It automatically sets the other data mirror to non-updated. You would * use this function if you update matrix data by accessing its raw pointers. * In such case, the matrix has no way of knowing which data is most recent, so * you have to tell it. - * + * * @warning This is an expert-level function. Use only if you know what you are * doing. - * + * * @note If you want to set both DEVICE and HOST memory to the same value * use syncData function. - */ + */ int matrix::Sparse::setUpdated(memory::MemorySpace memspace) { using namespace ReSolve::memory; - switch (memspace) { - case HOST: - h_data_updated_ = true; - d_data_updated_ = false; - break; - case DEVICE: - d_data_updated_ = true; - h_data_updated_ = false; - break; + switch (memspace) + { + case HOST: + h_data_updated_ = true; + d_data_updated_ = false; + break; + case DEVICE: + d_data_updated_ = true; + h_data_updated_ = false; + break; } return 0; } /** * @brief Set the pointers for matrix row, column, value data. - * + * * Useful if interfacing with other codes - this function only assigns * pointers, but it does not allocate nor copy anything. The data ownership * flags would be set to false (default). @@ -238,102 +241,112 @@ namespace ReSolve { * @param[in] memspace - memory space (HOST or DEVICE) of incoming data * * @return 0 if successful, 1 if not. - */ - int matrix::Sparse::setDataPointers(index_type* row_data, - index_type* col_data, - real_type* val_data, + */ + int matrix::Sparse::setDataPointers(index_type* row_data, + index_type* col_data, + real_type* val_data, memory::MemorySpace memspace) { using namespace ReSolve::memory; setNotUpdated(); - switch (memspace) { - case HOST: - if (owns_cpu_sparsity_pattern_ && (h_row_data_ || h_col_data_)) { - out::error() << "Trying to set matrix host data, but the data already set!\n"; - out::error() << "Ignoring setDataPointers function call ...\n"; - return 1; - } - if (owns_cpu_values_ && h_val_data_) { - out::error() << "Trying to set matrix host values, but the values already set!\n"; - out::error() << "Ignoring setValuesPointer function call ...\n"; - return 1; - } - h_row_data_ = row_data; - h_col_data_ = col_data; - h_val_data_ = val_data; - h_data_updated_ = true; - owns_cpu_sparsity_pattern_ = false; - owns_cpu_values_ = false; - break; - case DEVICE: - if (owns_gpu_sparsity_pattern_ && (d_row_data_ || d_col_data_)) { - out::error() << "Trying to set matrix host data, but the data already set!\n"; - out::error() << "Ignoring setDataPointers function call ...\n"; - return 1; - } - if (owns_gpu_values_ && d_val_data_) { - out::error() << "Trying to set matrix device values, but the values already set!\n"; - out::error() << "Ignoring setValuesPointer function call ...\n"; - return 1; - } - d_row_data_ = row_data; - d_col_data_ = col_data; - d_val_data_ = val_data; - d_data_updated_ = true; - owns_gpu_sparsity_pattern_ = false; - owns_gpu_values_ = false; - break; + switch (memspace) + { + case HOST: + if (owns_cpu_sparsity_pattern_ && (h_row_data_ || h_col_data_)) + { + out::error() << "Trying to set matrix host data, but the data already set!\n"; + out::error() << "Ignoring setDataPointers function call ...\n"; + return 1; + } + if (owns_cpu_values_ && h_val_data_) + { + out::error() << "Trying to set matrix host values, but the values already set!\n"; + out::error() << "Ignoring setValuesPointer function call ...\n"; + return 1; + } + h_row_data_ = row_data; + h_col_data_ = col_data; + h_val_data_ = val_data; + h_data_updated_ = true; + owns_cpu_sparsity_pattern_ = false; + owns_cpu_values_ = false; + break; + case DEVICE: + if (owns_gpu_sparsity_pattern_ && (d_row_data_ || d_col_data_)) + { + out::error() << "Trying to set matrix host data, but the data already set!\n"; + out::error() << "Ignoring setDataPointers function call ...\n"; + return 1; + } + if (owns_gpu_values_ && d_val_data_) + { + out::error() << "Trying to set matrix device values, but the values already set!\n"; + out::error() << "Ignoring setValuesPointer function call ...\n"; + return 1; + } + d_row_data_ = row_data; + d_col_data_ = col_data; + d_val_data_ = val_data; + d_data_updated_ = true; + owns_gpu_sparsity_pattern_ = false; + owns_gpu_values_ = false; + break; } return 0; } - + /** - * @brief destroy matrix data (HOST or DEVICE) if the matrix owns it + * @brief destroy matrix data (HOST or DEVICE) if the matrix owns it * (will attempt to destroy all three arrays). * * @param[in] memspace - memory space (HOST or DEVICE) of incoming data * * @return 0 if successful, -1 if not. * - */ + */ int matrix::Sparse::destroyMatrixData(memory::MemorySpace memspace) { using namespace ReSolve::memory; - switch (memspace) { - case HOST: - if (owns_cpu_sparsity_pattern_) { - delete [] h_row_data_; - delete [] h_col_data_; - h_row_data_ = nullptr; - h_col_data_ = nullptr; - } - if (owns_cpu_values_) { - delete [] h_val_data_; - h_val_data_ = nullptr; - } - return 0; - case DEVICE: - if (owns_gpu_sparsity_pattern_) { - mem_.deleteOnDevice(d_row_data_); - mem_.deleteOnDevice(d_col_data_); - d_row_data_ = nullptr; - d_col_data_ = nullptr; - } - if (owns_gpu_values_) { - mem_.deleteOnDevice(d_val_data_); - d_val_data_ = nullptr; - } - return 0; - default: - return -1; + switch (memspace) + { + case HOST: + if (owns_cpu_sparsity_pattern_) + { + delete[] h_row_data_; + delete[] h_col_data_; + h_row_data_ = nullptr; + h_col_data_ = nullptr; + } + if (owns_cpu_values_) + { + delete[] h_val_data_; + h_val_data_ = nullptr; + } + return 0; + case DEVICE: + if (owns_gpu_sparsity_pattern_) + { + mem_.deleteOnDevice(d_row_data_); + mem_.deleteOnDevice(d_col_data_); + d_row_data_ = nullptr; + d_col_data_ = nullptr; + } + if (owns_gpu_values_) + { + mem_.deleteOnDevice(d_val_data_); + d_val_data_ = nullptr; + } + return 0; + default: + return -1; } } /** * @brief updata matrix values using the _new_values_ provided either as HOST or as DEVICE array. - * + * * This function will copy the data (not just assign a pointer) and allocate if needed. * It also sets ownership and update flags. * @@ -342,56 +355,73 @@ namespace ReSolve { * @param[in] memspaceOut - memory space (HOST or DEVICE) of matrix values to be updated. * * @return 0 if successful, -1 if not. - */ - int matrix::Sparse::copyValues(const real_type* new_vals, + */ + int matrix::Sparse::copyValues(const real_type* new_vals, memory::MemorySpace memspaceIn, memory::MemorySpace memspaceOut) { - + index_type nnz_current = nnz_; - //four cases (for now) + // four cases (for now) setNotUpdated(); - int control=-1; - if ((memspaceIn == memory::HOST) && (memspaceOut == memory::HOST)) { control = 0;} - if ((memspaceIn == memory::HOST) && (memspaceOut == memory::DEVICE)){ control = 1;} - if ((memspaceIn == memory::DEVICE) && (memspaceOut == memory::HOST)) { control = 2;} - if ((memspaceIn == memory::DEVICE) && (memspaceOut == memory::DEVICE)){ control = 3;} - - if (memspaceOut == memory::HOST) { - //check if cpu data allocated - if (h_val_data_ == nullptr) { + int control = -1; + if ((memspaceIn == memory::HOST) && (memspaceOut == memory::HOST)) + { + control = 0; + } + if ((memspaceIn == memory::HOST) && (memspaceOut == memory::DEVICE)) + { + control = 1; + } + if ((memspaceIn == memory::DEVICE) && (memspaceOut == memory::HOST)) + { + control = 2; + } + if ((memspaceIn == memory::DEVICE) && (memspaceOut == memory::DEVICE)) + { + control = 3; + } + + if (memspaceOut == memory::HOST) + { + // check if cpu data allocated + if (h_val_data_ == nullptr) + { this->h_val_data_ = new real_type[nnz_current]; - owns_cpu_values_ = true; + owns_cpu_values_ = true; } } - if (memspaceOut == memory::DEVICE) { - //check if cuda data allocated - if (d_val_data_ == nullptr) { - mem_.allocateArrayOnDevice(&d_val_data_, nnz_current); + if (memspaceOut == memory::DEVICE) + { + // check if cuda data allocated + if (d_val_data_ == nullptr) + { + mem_.allocateArrayOnDevice(&d_val_data_, nnz_current); owns_gpu_values_ = true; } } - switch(control) { - case 0: //cpu->cpu - mem_.copyArrayHostToHost(h_val_data_, new_vals, nnz_current); - h_data_updated_ = true; - break; - case 2://cuda->cpu - mem_.copyArrayDeviceToHost(h_val_data_, new_vals, nnz_current); - h_data_updated_ = true; - break; - case 1://cpu->cuda - mem_.copyArrayHostToDevice(d_val_data_, new_vals, nnz_current); - d_data_updated_ = true; - break; - case 3://cuda->cuda - mem_.copyArrayDeviceToDevice(d_val_data_, new_vals, nnz_current); - d_data_updated_ = true; - break; - default: - return -1; + switch (control) + { + case 0: // cpu->cpu + mem_.copyArrayHostToHost(h_val_data_, new_vals, nnz_current); + h_data_updated_ = true; + break; + case 2: // cuda->cpu + mem_.copyArrayDeviceToHost(h_val_data_, new_vals, nnz_current); + h_data_updated_ = true; + break; + case 1: // cpu->cuda + mem_.copyArrayHostToDevice(d_val_data_, new_vals, nnz_current); + d_data_updated_ = true; + break; + case 3: // cuda->cuda + mem_.copyArrayDeviceToDevice(d_val_data_, new_vals, nnz_current); + d_data_updated_ = true; + break; + default: + return -1; } return 0; } @@ -399,7 +429,7 @@ namespace ReSolve { /** * @brief updata matrix values using the _new_values_ provided either as * HOST or as DEVICE array. - * + * * This function only assigns a pointer, but does not copy. It sets update * flags. * @@ -407,39 +437,41 @@ namespace ReSolve { * @param[in] memspace - memory space (HOST or DEVICE) of _new_vals_ * * @return 0 if successful, -1 if not. - */ - int matrix::Sparse::setValuesPointer(real_type* new_vals, + */ + int matrix::Sparse::setValuesPointer(real_type* new_vals, memory::MemorySpace memspace) { using namespace ReSolve::memory; setNotUpdated(); - switch (memspace) { - case HOST: - if (owns_cpu_values_ && h_val_data_) { - out::error() << "Trying to set matrix host values, but the values already set!\n"; - out::error() << "Ignoring setValuesPointer function call ...\n"; - return 1; - } - h_val_data_ = new_vals; - h_data_updated_ = true; - owns_cpu_values_ = false; - break; - case DEVICE: - if (owns_gpu_values_ && d_val_data_) { - out::error() << "Trying to set matrix device values, but the values already set!\n"; - out::error() << "Ignoring setValuesPointer function call ...\n"; - return 1; - } - d_val_data_ = new_vals; - d_data_updated_ = true; - owns_gpu_values_ = false; - break; - default: - return -1; + switch (memspace) + { + case HOST: + if (owns_cpu_values_ && h_val_data_) + { + out::error() << "Trying to set matrix host values, but the values already set!\n"; + out::error() << "Ignoring setValuesPointer function call ...\n"; + return 1; + } + h_val_data_ = new_vals; + h_data_updated_ = true; + owns_cpu_values_ = false; + break; + case DEVICE: + if (owns_gpu_values_ && d_val_data_) + { + out::error() << "Trying to set matrix device values, but the values already set!\n"; + out::error() << "Ignoring setValuesPointer function call ...\n"; + return 1; + } + d_val_data_ = new_vals; + d_data_updated_ = true; + owns_gpu_values_ = false; + break; + default: + return -1; } return 0; } } // namespace ReSolve - diff --git a/resolve/matrix/Sparse.hpp b/resolve/matrix/Sparse.hpp index 5c5144ec7..55d1b50dc 100644 --- a/resolve/matrix/Sparse.hpp +++ b/resolve/matrix/Sparse.hpp @@ -1,77 +1,87 @@ // Matrix utilities -// Mirroring memory approach +// Mirroring memory approach #pragma once -#include #include +#include #include + #include #include -namespace ReSolve { namespace matrix { - - /** - * @brief This class implements basic sparse matrix interface. - * - * Most of sparse matrix formats store information about matrix rows and - * columns as integers and nonzero element values as real numbers. - * This class is virtual and implements only what is common for all basic - * formats. Note that regardless of how row/column information is stored, - * all nonzero matrix values need to be stored, so all utilities needed for - * the values are implemented in this class. - * - * @author Kasia Swirydowicz - */ - class Sparse +namespace ReSolve +{ + namespace matrix { + + /** + * @brief This class implements basic sparse matrix interface. + * + * Most of sparse matrix formats store information about matrix rows and + * columns as integers and nonzero element values as real numbers. + * This class is virtual and implements only what is common for all basic + * formats. Note that regardless of how row/column information is stored, + * all nonzero matrix values need to be stored, so all utilities needed for + * the values are implemented in this class. + * + * @author Kasia Swirydowicz + */ + class Sparse + { public: /// Supported sparse matrix formats - enum SparseFormat{NONE, TRIPLET, COMPRESSED_SPARSE_ROW, COMPRESSED_SPARSE_COLUMN}; + enum SparseFormat + { + NONE, + TRIPLET, + COMPRESSED_SPARSE_ROW, + COMPRESSED_SPARSE_COLUMN + }; public: - //basic constructor + // basic constructor Sparse(); Sparse(index_type n, index_type m, index_type nnz); - Sparse(index_type n, - index_type m, + Sparse(index_type n, + index_type m, index_type nnz, - bool symmetric, - bool expanded); + bool symmetric, + bool expanded); virtual ~Sparse(); // accessors - index_type getNumRows(); - index_type getNumColumns(); - index_type getNnz(); + index_type getNumRows(); + index_type getNumColumns(); + index_type getNnz(); SparseFormat getSparseFormat() const; - bool symmetric(); + bool symmetric(); bool expanded(); void setSymmetric(bool symmetric); void setExpanded(bool expanded); void setNnz(index_type nnz_new); // for resetting when removing duplicates - int setUpdated(memory::MemorySpace what); + int setUpdated(memory::MemorySpace what); virtual index_type* getRowData(memory::MemorySpace memspace) = 0; virtual index_type* getColData(memory::MemorySpace memspace) = 0; - virtual real_type* getValues( memory::MemorySpace memspace) = 0; + virtual real_type* getValues(memory::MemorySpace memspace) = 0; - virtual int copyDataFrom(const index_type* row_data, - const index_type* col_data, - const real_type* val_data, + virtual int copyDataFrom(const index_type* row_data, + const index_type* col_data, + const real_type* val_data, memory::MemorySpace memspaceIn, memory::MemorySpace memspaceOut) = 0; - virtual int copyDataFrom(const index_type* row_data, - const index_type* col_data, - const real_type* val_data, - index_type new_nnz, + virtual int copyDataFrom(const index_type* row_data, + const index_type* col_data, + const real_type* val_data, + index_type new_nnz, memory::MemorySpace memspaceIn, memory::MemorySpace memspaceOut) = 0; virtual int allocateMatrixData(memory::MemorySpace memspace) = 0; - int setDataPointers(index_type* row_data, - index_type* col_data, - real_type* val_data, - memory::MemorySpace memspace); + int setDataPointers(index_type* row_data, + index_type* col_data, + real_type* val_data, + memory::MemorySpace memspace); int destroyMatrixData(memory::MemorySpace memspace); @@ -79,40 +89,39 @@ namespace ReSolve { namespace matrix { virtual int syncData(memory::MemorySpace memspaceOut) = 0; - - //update Values just updates values; it allocates if necessary. - //values have the same dimensions between different formats - virtual int copyValues(const real_type* new_vals, + // update Values just updates values; it allocates if necessary. + // values have the same dimensions between different formats + virtual int copyValues(const real_type* new_vals, memory::MemorySpace memspaceIn, memory::MemorySpace memspaceOut); - - //set new values just sets the pointer, use caution. - virtual int setValuesPointer(real_type* new_vals, + + // set new values just sets the pointer, use caution. + virtual int setValuesPointer(real_type* new_vals, memory::MemorySpace memspace); - + protected: SparseFormat sparse_format_{NONE}; ///< Matrix format - index_type n_{0}; ///< number of rows - index_type m_{0}; ///< number of columns - index_type nnz_{0}; ///< number of non-zeros + index_type n_{0}; ///< number of rows + index_type m_{0}; ///< number of columns + index_type nnz_{0}; ///< number of non-zeros bool is_symmetric_{false}; ///< symmetry flag - bool is_expanded_{false}; ///< "expanded" flag + bool is_expanded_{false}; ///< "expanded" flag - //host data - index_type* h_row_data_{nullptr}; ///< row data (HOST) - index_type* h_col_data_{nullptr}; ///< column data (HOST) - real_type* h_val_data_{nullptr}; ///< value data (HOST) - bool h_data_updated_{false}; ///< HOST update flag + // host data + index_type* h_row_data_{nullptr}; ///< row data (HOST) + index_type* h_col_data_{nullptr}; ///< column data (HOST) + real_type* h_val_data_{nullptr}; ///< value data (HOST) + bool h_data_updated_{false}; ///< HOST update flag - //gpu data - index_type* d_row_data_{nullptr}; ///< row data (DEVICE) - index_type* d_col_data_{nullptr}; ///< column data (DEVICE) - real_type* d_val_data_{nullptr}; ///< value data (DEVICE) - bool d_data_updated_{false}; ///< DEVICE update flag + // gpu data + index_type* d_row_data_{nullptr}; ///< row data (DEVICE) + index_type* d_col_data_{nullptr}; ///< column data (DEVICE) + real_type* d_val_data_{nullptr}; ///< value data (DEVICE) + bool d_data_updated_{false}; ///< DEVICE update flag void setNotUpdated(); - + // Data ownership flags bool owns_cpu_sparsity_pattern_{false}; ///< for row/col data bool owns_cpu_values_{false}; ///< for nonzero values @@ -121,5 +130,6 @@ namespace ReSolve { namespace matrix { bool owns_gpu_values_{false}; ///< for nonzero values MemoryHandler mem_; ///< Device memory manager object - }; -}} // namespace ReSolve::matrix + }; + } // namespace matrix +} // namespace ReSolve diff --git a/resolve/matrix/io.cpp b/resolve/matrix/io.cpp index 0e0e886e4..cf241d73e 100644 --- a/resolve/matrix/io.cpp +++ b/resolve/matrix/io.cpp @@ -1,161 +1,166 @@ -#include -#include -#include +#include "io.hpp" + #include -#include +#include +#include #include +#include +#include -#include -#include #include #include -#include "io.hpp" - +#include +#include namespace ReSolve -{ - +{ + /** * @class MatrixElementTriplet - * + * * @brief Helper class for COO matrix sorting. - * + * * Contains triplet of row index, column index and the value of a sparse * matrix element, as well as methods and operator overloads for its * management. - * + * * The entire code is in this file. Its scope is to support matrix file I/O * only. - * + * */ class MatrixElementTriplet { - public: - /// Default constructor initializes all to zero. - MatrixElementTriplet() : rowidx_(0), colidx_(0), value_(0.0) - {} + public: + /// Default constructor initializes all to zero. + MatrixElementTriplet() + : rowidx_(0), colidx_(0), value_(0.0) + { + } - /// Constructor that initializes row and column indices and the element value. - MatrixElementTriplet(index_type i, index_type j, real_type v) : rowidx_(i), colidx_(j), value_(v) - {} + /// Constructor that initializes row and column indices and the element value. + MatrixElementTriplet(index_type i, index_type j, real_type v) + : rowidx_(i), colidx_(j), value_(v) + { + } - ~MatrixElementTriplet() = default; + ~MatrixElementTriplet() = default; - /// Set the row and column indices and the element value. - void set(index_type rowidx, index_type colidx, real_type value) - { - rowidx_ = rowidx; - colidx_ = colidx; - value_ = value; - } + /// Set the row and column indices and the element value. + void set(index_type rowidx, index_type colidx, real_type value) + { + rowidx_ = rowidx; + colidx_ = colidx; + value_ = value; + } - index_type getRowIdx() const - { - return rowidx_; - } - index_type getColIdx() const - { - return colidx_; - } - real_type getValue() const - { - return value_; - } + index_type getRowIdx() const + { + return rowidx_; + } - /** - * @brief Overload of `<` operator - * - * Ensures that matrix elements stored in MatrixElementTriplet will be - * sorted by their indices in a row-major order. - * - */ - bool operator < (const MatrixElementTriplet& t) const - { - if (rowidx_ < t.rowidx_) - return true; + index_type getColIdx() const + { + return colidx_; + } - if ((rowidx_ == t.rowidx_) && (colidx_ < t.colidx_)) - return true; + real_type getValue() const + { + return value_; + } - return false; - } + /** + * @brief Overload of `<` operator + * + * Ensures that matrix elements stored in MatrixElementTriplet will be + * sorted by their indices in a row-major order. + * + */ + bool operator<(const MatrixElementTriplet& t) const + { + if (rowidx_ < t.rowidx_) + return true; - /** - * @brief Overload of `==` operator. - * - * This overload is used to indicate when two different instances of - * MatrixElementTriplet correspond to the same matrix element. - */ - bool operator == (const MatrixElementTriplet& str) const - { - return (rowidx_ == str.rowidx_) && (colidx_ == str.colidx_); - } + if ((rowidx_ == t.rowidx_) && (colidx_ < t.colidx_)) + return true; - /** - * @brief Overload of `+=` operator. - * - * @param t - Triplet to be added in place. - * @return MatrixElementTriplet& - reference to `*this`. - * - * This overload is used to merge duplicates in sparse matrix in COO - * format. It will return error and leave `*this` unchanged if the - * argument corresponds to an element with different row or column - * indices. - */ - MatrixElementTriplet& operator += (const MatrixElementTriplet t) + return false; + } + + /** + * @brief Overload of `==` operator. + * + * This overload is used to indicate when two different instances of + * MatrixElementTriplet correspond to the same matrix element. + */ + bool operator==(const MatrixElementTriplet& str) const + { + return (rowidx_ == str.rowidx_) && (colidx_ == str.colidx_); + } + + /** + * @brief Overload of `+=` operator. + * + * @param t - Triplet to be added in place. + * @return MatrixElementTriplet& - reference to `*this`. + * + * This overload is used to merge duplicates in sparse matrix in COO + * format. It will return error and leave `*this` unchanged if the + * argument corresponds to an element with different row or column + * indices. + */ + MatrixElementTriplet& operator+=(const MatrixElementTriplet t) + { + if ((rowidx_ != t.rowidx_) || (colidx_ != t.colidx_)) { - if ((rowidx_ != t.rowidx_) || (colidx_ != t.colidx_)) { - io::Logger::error() << "Adding values into non-matching triplet.\n"; - return *this; - } - value_ += t.value_; + io::Logger::error() << "Adding values into non-matching triplet.\n"; return *this; } + value_ += t.value_; + return *this; + } - /// Utility to print indices (0 index base). - void print() const - { - // Add 1 to indices to restore indexing from MM format - std::cout << rowidx_ << " " << colidx_ << " " << value_ << "\n"; - } + /// Utility to print indices (0 index base). + void print() const + { + // Add 1 to indices to restore indexing from MM format + std::cout << rowidx_ << " " << colidx_ << " " << value_ << "\n"; + } - private: - index_type rowidx_{0}; - index_type colidx_{0}; - real_type value_{0.0}; + private: + index_type rowidx_{0}; + index_type colidx_{0}; + real_type value_{0.0}; }; - namespace io { // Static helper functionsdeclarations - static void createMatrixFromFileAsList(std::istream& file, - bool is_expand_symmetric, + static void createMatrixFromFileAsList(std::istream& file, + bool is_expand_symmetric, std::list& tmp, - index_type& n, - index_type& m, - index_type& nnz, - bool& symmetric, - bool& expanded); - static void createMatrixFromFileAsList(std::istream& file, - matrix::Sparse* A, + index_type& n, + index_type& m, + index_type& nnz, + bool& symmetric, + bool& expanded); + static void createMatrixFromFileAsList(std::istream& file, + matrix::Sparse* A, std::list& tmp); // static void print_list(std::list& l); - static int loadToList(std::istream& file, bool is_expand_symmetric, std::list& tmp); - static int removeDuplicates(std::list& tmp); - static int copyListToCoo(const std::list& tmp, matrix::Coo* A); - static int copyListToCsr(const std::list& tmp, matrix::Csr* A); - + static int loadToList(std::istream& file, bool is_expand_symmetric, std::list& tmp); + static int removeDuplicates(std::list& tmp); + static int copyListToCoo(const std::list& tmp, matrix::Coo* A); + static int copyListToCsr(const std::list& tmp, matrix::Csr* A); /** * @brief Create a COO matrix and populate it with data from Matrix Market * file. - * + * * @param file - input Matrix Market file * @param is_expand_symmetric - whether to expand symmetric matrix to general format * @return matrix::Coo* - pointer to COO matrix - * + * * @pre file is a valid std::istream with Matrix Market data. * @pre input data is in valid Matrix Market format. * @post Valid COO matrix sorted in row major order and without duplicates @@ -163,14 +168,16 @@ namespace ReSolve */ matrix::Coo* createCooFromFile(std::istream& file, bool is_expand_symmetric) { - if(!file) { - Logger::error() << "Empty input to createCooFromFile function ... \n" << std::endl; + if (!file) + { + Logger::error() << "Empty input to createCooFromFile function ... \n" + << std::endl; return nullptr; } index_type m = 0, n = 0, nnz = 0; - bool symmetric = false; - bool expanded = true; + bool symmetric = false; + bool expanded = true; std::list tmp; @@ -186,12 +193,12 @@ namespace ReSolve } /** - * @brief - * + * @brief + * * @param file - input Matrix Market file * @param is_expand_symmetric - whether to expand symmetric matrix to general format * @return matrix::Csr* - pointer to COO matrix - * + * * @pre file is a valid std::istream with Matrix Market data. * @pre input data is in valid Matrix Market format. * @post Valid CSR matrix sorted in row major order and without duplicates @@ -199,14 +206,16 @@ namespace ReSolve */ matrix::Csr* createCsrFromFile(std::istream& file, bool is_expand_symmetric) { - if(!file) { - Logger::error() << "Empty input to createCooFromFile function ... \n" << std::endl; + if (!file) + { + Logger::error() << "Empty input to createCooFromFile function ... \n" + << std::endl; return nullptr; } index_type m = 0, n = 0, nnz = 0; - bool symmetric = false; - bool expanded = true; + bool symmetric = false; + bool expanded = true; std::list tmp; @@ -223,37 +232,41 @@ namespace ReSolve /** * @brief Imports vector data from a Matrix Market file. - * + * * @param file - std::istream to Matrix Market file (data). * @return real_type* - pointer to array with (dense) vector entries. - * + * * @pre `file` is a valid std::istream with Matrix Market data. * @pre Input data is in valid Matrix Market format. * @post A raw array with vector data is created. - * + * */ real_type* createArrayFromFile(std::istream& file) { - if(!file) { - Logger::error() << "Empty input to " << __func__ << " function ... \n" << std::endl; + if (!file) + { + Logger::error() << "Empty input to " << __func__ << " function ... \n" + << std::endl; return nullptr; } std::stringstream ss; - std::string line; - index_type i = 0; - index_type n, m; + std::string line; + index_type i = 0; + index_type n, m; std::getline(file, line); - while (line.at(0) == '%') { - std::getline(file, line); + while (line.at(0) == '%') + { + std::getline(file, line); } ss << line; ss >> n >> m; real_type* vec = new real_type[n]; - real_type a; - while (file >> a) { + real_type a; + while (file >> a) + { vec[i] = a; i++; } @@ -262,19 +275,21 @@ namespace ReSolve vector::Vector* createVectorFromFile(std::istream& file) { - if(!file) { + if (!file) + { Logger::error() << "Empty input to " << __func__ << " function ... \n"; return nullptr; } std::stringstream ss; - std::string line; - index_type i = 0; - index_type n, m; + std::string line; + index_type i = 0; + index_type n, m; std::getline(file, line); - while (line.at(0) == '%') { - std::getline(file, line); + while (line.at(0) == '%') + { + std::getline(file, line); } ss << line; ss >> n >> m; @@ -282,7 +297,8 @@ namespace ReSolve vector::Vector* vec = new vector::Vector(n); vec->allocate(memory::HOST); real_type a; - while (file >> a) { + while (file >> a) + { vec->getData(memory::HOST)[i] = a; i++; } @@ -292,13 +308,13 @@ namespace ReSolve /** * @brief Reads data from a Matrix Market file and updates COO matrix A. - * + * * Compute complexity of this function is O(NNZ*log(NNZ)). There is an * overload of this function that generates a CSR matrix. - * + * * @param file - std::istream to Matrix Market file (data). * @param A - output COO matrix. - * + * * @pre `file` is a valid std::istream with Matrix Market data. * @pre Input data is in valid Matrix Market format. * @pre Size of matrix stored in the Matrix Market file matches the size of A. @@ -307,7 +323,8 @@ namespace ReSolve */ void updateMatrixFromFile(std::istream& file, matrix::Coo* A) { - if (!file) { + if (!file) + { Logger::error() << "Empty input to createCooFromFile function ..." << std::endl; return; } @@ -322,13 +339,13 @@ namespace ReSolve /** * @brief Reads data from a Matrix Market file and updates CSR matrix A. - * + * * Compute complexity of this function is O(NNZ*log(NNZ)). There is an * overload of this function that generates a COO matrix. - * + * * @param file - std::istream to Matrix Market file (data). * @param A - output CSR matrix. - * + * * @pre `file` is a valid std::istream with Matrix Market data. * @pre Input data is in valid Matrix Market format. * @pre Size of matrix stored in the Matrix Market file matches the size of A. @@ -337,7 +354,8 @@ namespace ReSolve */ void updateMatrixFromFile(std::istream& file, matrix::Csr* A) { - if(!file) { + if (!file) + { Logger::error() << "Empty input to updateMatrixFromFile function ..." << std::endl; return; } @@ -352,74 +370,82 @@ namespace ReSolve /** * @brief Reads data from a Matrix Market file and updates array p_rhs. - * + * * @param file - std::istream to Matrix Market file (data). * @param p_rhs - pointer to a pointer to a raw array with vector data. - * + * * @todo The righ-hand-side should be of vector type, not a raw array. With * current implementation it is impossible to verify if the sufficient * space is allocated to store all the data from the input file. Risk of * writing past the end of the array is high. */ - void updateArrayFromFile(std::istream& file, real_type** p_rhs) + void updateArrayFromFile(std::istream& file, real_type** p_rhs) { - if (!file) { + if (!file) + { Logger::error() << "Empty input to updateArrayFromFile function ..." << std::endl; return; } - real_type* rhs = *p_rhs; + real_type* rhs = *p_rhs; std::stringstream ss; - std::string line; - index_type n, m; + std::string line; + index_type n, m; std::getline(file, line); - while (line.at(0) == '%') { - std::getline(file, line); + while (line.at(0) == '%') + { + std::getline(file, line); } ss << line; ss >> n >> m; - if (rhs == nullptr) { + if (rhs == nullptr) + { rhs = new real_type[n]; - } - real_type a; + } + real_type a; index_type i = 0; - while (file >> a) { + while (file >> a) + { rhs[i] = a; i++; } } - void updateVectorFromFile(std::istream& file, vector::Vector* vec_rhs) + void updateVectorFromFile(std::istream& file, vector::Vector* vec_rhs) { - if (!file) { + if (!file) + { Logger::error() << "Empty input to updateArrayFromFile function ..." << std::endl; return; } std::stringstream ss; - std::string line; - index_type n, m; + std::string line; + index_type n, m; std::getline(file, line); - while (line.at(0) == '%') { - std::getline(file, line); + while (line.at(0) == '%') + { + std::getline(file, line); // std::cout<> n >> m; - if (n != vec_rhs->getSize()) { + if (n != vec_rhs->getSize()) + { Logger::error() << "File data does not match the vector size.\n" << "Vector not updated!\n"; return; } real_type* rhs = vec_rhs->getData(memory::HOST); - real_type a = 0.0; - index_type i = 0; - while (file >> a) { + real_type a = 0.0; + index_type i = 0; + while (file >> a) + { rhs[i] = a; // std::cout << i << ": " << a << "\n"; i++; @@ -429,32 +455,36 @@ namespace ReSolve /** * @brief Writes matrix A to a file in Matrix Market format. - * + * * @param A - input matrix. * @param file_out - std::ostream to output file. * @return int - 0 if successful, error code otherwise. - * + * * @pre `A` is a valid sparse matrix. * @post Valid Matrix Marked data is written to std::ostream. * @invariant Matrix `A` elements are unchanged. */ int writeMatrixToFile(matrix::Sparse* A, std::ostream& file_out) { - if (A == nullptr) { + if (A == nullptr) + { Logger::error() << "Matrix pointer is NULL!\n"; return -1; } - if (A->symmetric() && !A->expanded()) { + if (A->symmetric() && !A->expanded()) + { file_out << "%%MatrixMarket matrix coordinate real symmetric\n"; - } else { + } + else + { file_out << "%%MatrixMarket matrix coordinate real general\n"; } file_out << "% Generated by Re::Solve \n"; - file_out << A->getNumRows() << " " + file_out << A->getNumRows() << " " << A->getNumColumns() << " " - << A->getNnz() << "\n"; - + << A->getNnz() << "\n"; + index_type indexing_base = 1; A->print(file_out, indexing_base); return 0; @@ -462,11 +492,11 @@ namespace ReSolve /** * @brief Writes vector data to a file in Matrix Market format. - * + * * @param vec_x - Input vector. * @param file_out - std::ostream to output file. * @return int - 0 if successful, error code otherwise. - * + * * @pre `vec_x` is a valid vector. * @post Valid Matrix Market data is written to std::ostream. * @invariant Elements of `vec_x` are unchanged. @@ -478,7 +508,8 @@ namespace ReSolve file_out << "%%MatrixMarket matrix array real general \n"; file_out << "% Generated by Re::Solve \n"; file_out << vec_x->getSize() << " " << 1 << "\n"; - for (int i = 0; i < vec_x->getSize(); ++i) { + for (int i = 0; i < vec_x->getSize(); ++i) + { file_out << std::setprecision(std::numeric_limits::digits10 + 1) << std::scientific << x_data[i] << "\n"; } @@ -486,7 +517,6 @@ namespace ReSolve return 0; } - // // Static helper functions // @@ -494,16 +524,16 @@ namespace ReSolve /** * @brief Reads Matrix Market data from std::istream and stores it into * std::list. - * + * * @param[in] file - std::istream to Matrix Market file (data). * @param[in] is_expand_symmetric - whether to expand symmetric matrix to general format * @param[out] tmp - std::list where to store matrix data * @param[out] n - number of rows as read from Matrix Market file - * @param[out] m - number of columns as read from Matrix Market file + * @param[out] m - number of columns as read from Matrix Market file * @param[out] nnz - calculated number of matrix nonzeros * @param[out] symmetric - if matrix is symmetric * @param[out] expanded - if symmetric matrix is expanded to general format - * + * * @pre `file` is a valid std::istream with Matrix Market data. * @pre Input data is in valid Matrix Market format. * @pre `tmp` is an empty list! @@ -515,29 +545,30 @@ namespace ReSolve * @post `tmp` list is overwritten with matrix elements read from the input * stream. */ - static void createMatrixFromFileAsList(std::istream& file, - bool is_expand_symmetric, + static void createMatrixFromFileAsList(std::istream& file, + bool is_expand_symmetric, std::list& tmp, - index_type& n, - index_type& m, - index_type& nnz, - bool& symmetric, - bool& expanded) + index_type& n, + index_type& m, + index_type& nnz, + bool& symmetric, + bool& expanded) { std::stringstream ss; - std::string line; - m = 0; - n = 0; - nnz = 0; + std::string line; + m = 0; + n = 0; + nnz = 0; symmetric = false; - expanded = true; + expanded = true; // Parse header and check if matrix is symmetric while (std::getline(file, line)) { if (line.at(0) != '%') break; - if (line.find("symmetric") != std::string::npos) { + if (line.find("symmetric") != std::string::npos) + { symmetric = true; expanded = is_expand_symmetric; } @@ -561,12 +592,12 @@ namespace ReSolve } /** - * @brief - * + * @brief + * * @param[in] file - std::istream to Matrix Market file (data). * @param[in] A - sparse matrix to be updated * @param[out] tmp - temporary list with matrix entries - * + * * @pre `file` is a valid std::istream with Matrix Market data. * @pre Input data is in valid Matrix Market format. * @pre `tmp` is an empty list! @@ -577,37 +608,41 @@ namespace ReSolve * @post `tmp` list is overwritten with matrix elements read from the input * stream. * @invariant Elements of `A` are unchanged in this function but they are - * expected to be overwritten with values in `tmp` later in the code. + * expected to be overwritten with values in `tmp` later in the code. */ - static void createMatrixFromFileAsList(std::istream& file, - matrix::Sparse* A, + static void createMatrixFromFileAsList(std::istream& file, + matrix::Sparse* A, std::list& tmp) { std::stringstream ss; - std::string line; + std::string line; // Default is a general matrix - bool symmetric = false; + bool symmetric = false; // Default is not to expand symmetric matrix bool is_expand_symmetric = false; // Parse header and check if matrix is symmetric std::getline(file, line); - if (line.find("symmetric") != std::string::npos) { + if (line.find("symmetric") != std::string::npos) + { symmetric = true; } - if (symmetric != A->symmetric()) { + if (symmetric != A->symmetric()) + { Logger::error() << "In function updateMatrixFromFile:" << "Source data does not match the symmetry of destination matrix.\n"; } // If the destination matrix is symmetric and expanded, then expand data. - if (A->symmetric()) { + if (A->symmetric()) + { is_expand_symmetric = A->expanded(); } // Skip the header comments - while (line.at(0) == '%') { - std::getline(file, line); + while (line.at(0) == '%') + { + std::getline(file, line); } // Read the first line with matrix sizes @@ -616,7 +651,8 @@ namespace ReSolve ss >> n >> m >> nnz; // Make sure input data matches matrix A size - if ((A->getNumRows() != n) || (A->getNumColumns() != m)) { + if ((A->getNumRows() != n) || (A->getNumColumns() != m)) + { Logger::error() << "Wrong matrix size: " << A->getNumRows() << "x" << A->getNumColumns() << ". Cannot update! \n "; @@ -631,10 +667,8 @@ namespace ReSolve // Remove duplicates in `tmp` list. Complexity O(NNZ) removeDuplicates(tmp); - } - // Commented out; needed for debugging only. // void print_list(std::list& l) // { @@ -647,35 +681,39 @@ namespace ReSolve /** * @brief Loads data from Matrix Market file to a std::list. - * + * * @param[in] file - std::istream to Matrix Market file (data). * @param[in] is_expand_symmetric - whether to expand symmetric matrix. * @param[out] tmp - temporary list with matrix entries * @return int - 0 if successful, error code otherwise. - * + * * @pre `file` is a valid std::istream with Matrix Market data. * @pre Input data is in valid Matrix Market format. * @pre `tmp` is an empty list! * @post `tmp` list is overwritten with matrix elements read from the input * stream. */ - int loadToList(std::istream& file, - bool is_expand_symmetric, + int loadToList(std::istream& file, + bool is_expand_symmetric, std::list& tmp) { index_type i, j; - real_type v; + real_type v; // If the `tmp` list is not empty, clear it. - if (tmp.size() != 0) { + if (tmp.size() != 0) + { tmp.clear(); } - while (file >> i >> j >> v) { + while (file >> i >> j >> v) + { MatrixElementTriplet triplet(i - 1, j - 1, v); tmp.push_back(std::move(triplet)); - if (is_expand_symmetric) { - if (i != j) { + if (is_expand_symmetric) + { + if (i != j) + { MatrixElementTriplet triplet(j - 1, i - 1, v); tmp.push_back(std::move(triplet)); } @@ -687,10 +725,10 @@ namespace ReSolve /** * @brief Removes duplicates from `tmp` list. - * - * @param[in,out] tmp - List with matrix entries. + * + * @param[in,out] tmp - List with matrix entries. * @return int - 0 if successful, error code otherwise. - * + * * @pre `tmp` contains matrix elements. * @post Duplicates in `tmp` are added in place to the first instance * of that matrix element. @@ -702,7 +740,8 @@ namespace ReSolve { std::list::iterator it_tmp = it; it++; - if (*it == *it_tmp) { + if (*it == *it_tmp) + { *it += *it_tmp; tmp.erase(it_tmp); } @@ -713,11 +752,11 @@ namespace ReSolve /** * @brief Writes data from the std::list to COO matrix. - * + * * @param[in] tmp - List with matrix entries * @param[out] A - Output COO matrix * @return int - 0 if successful, error code otherwise. - * + * * @pre `tmp` contains matrix elements sorted in row-major order and * without duplicates. * @pre Number of `tmp` elements is not larger than number of nonzeros @@ -728,18 +767,19 @@ namespace ReSolve { index_type* coo_rows = A->getRowData(memory::HOST); index_type* coo_cols = A->getColData(memory::HOST); - real_type* coo_vals = A->getValues( memory::HOST); + real_type* coo_vals = A->getValues(memory::HOST); index_type nnz = static_cast(tmp.size()); - if (A->getNnz() < nnz) { + if (A->getNnz() < nnz) + { Logger::error() << "Too many NNZs: " << nnz << ". Cannot update! \n "; return 1; } A->setNnz(nnz); - index_type element_counter = 0; - std::list::const_iterator it = tmp.begin(); + index_type element_counter = 0; + std::list::const_iterator it = tmp.begin(); while (it != tmp.end()) { coo_rows[element_counter] = it->getRowIdx(); @@ -755,14 +795,13 @@ namespace ReSolve return 0; } - /** * @brief Writes data from the std::list to CSR matrix. - * + * * @param[in] tmp - List with matrix entries * @param[out] A - Output CSR matrix * @return int - 0 if successful, error code otherwise. - * + * * @pre `tmp` contains matrix elements sorted in row-major order and * without duplicates. * @pre Number of `tmp` elements is not larger than number of nonzeros @@ -773,11 +812,12 @@ namespace ReSolve { index_type* csr_rows = A->getRowData(memory::HOST); index_type* csr_cols = A->getColData(memory::HOST); - real_type* csr_vals = A->getValues( memory::HOST); + real_type* csr_vals = A->getValues(memory::HOST); // Set number of nonzeros index_type nnz = static_cast(tmp.size()); - if (A->getNnz() < nnz) { + if (A->getNnz() < nnz) + { Logger::error() << "Too many NNZs: " << nnz << ". Cannot update! \n "; return 1; @@ -785,19 +825,21 @@ namespace ReSolve A->setNnz(nnz); // Set all iterators - index_type column_index_counter = 0; - index_type row_pointer_counter = 0; - std::list::const_iterator it = tmp.begin(); + index_type column_index_counter = 0; + index_type row_pointer_counter = 0; + std::list::const_iterator it = tmp.begin(); // Set first row pointer to zero csr_rows[0] = 0; csr_cols[0] = it->getColIdx(); csr_vals[0] = it->getValue(); - for (index_type i = 1; i < nnz; ++i) { + for (index_type i = 1; i < nnz; ++i) + { std::list::const_iterator it_tmp = it; it++; - if (it->getRowIdx() != it_tmp->getRowIdx()) { + if (it->getRowIdx() != it_tmp->getRowIdx()) + { row_pointer_counter++; csr_rows[row_pointer_counter] = i; } diff --git a/resolve/matrix/io.hpp b/resolve/matrix/io.hpp index de50da95e..7c7b21a1f 100644 --- a/resolve/matrix/io.hpp +++ b/resolve/matrix/io.hpp @@ -1,27 +1,39 @@ #include -namespace ReSolve { namespace vector { - class Vector; -}} +namespace ReSolve +{ + namespace vector + { + class Vector; + } +} // namespace ReSolve -namespace ReSolve { namespace matrix { - class Sparse; - class Coo; - class Csr; -}} +namespace ReSolve +{ + namespace matrix + { + class Sparse; + class Coo; + class Csr; + } // namespace matrix +} // namespace ReSolve -namespace ReSolve { namespace io { - using vector_type = vector::Vector; +namespace ReSolve +{ + namespace io + { + using vector_type = vector::Vector; - matrix::Coo* createCooFromFile(std::istream& file, bool is_expand_symmetric = true); - matrix::Csr* createCsrFromFile(std::istream& file, bool is_expand_symmetric = true); - void updateMatrixFromFile(std::istream& file, matrix::Coo* A); - void updateMatrixFromFile(std::istream& file, matrix::Csr* A); - real_type* createArrayFromFile(std::istream& file); - vector::Vector* createVectorFromFile(std::istream& file); - void updateArrayFromFile(std::istream& file, real_type** rhs); - void updateVectorFromFile(std::istream& file, vector::Vector* rhs); + matrix::Coo* createCooFromFile(std::istream& file, bool is_expand_symmetric = true); + matrix::Csr* createCsrFromFile(std::istream& file, bool is_expand_symmetric = true); + void updateMatrixFromFile(std::istream& file, matrix::Coo* A); + void updateMatrixFromFile(std::istream& file, matrix::Csr* A); + real_type* createArrayFromFile(std::istream& file); + vector::Vector* createVectorFromFile(std::istream& file); + void updateArrayFromFile(std::istream& file, real_type** rhs); + void updateVectorFromFile(std::istream& file, vector::Vector* rhs); - int writeMatrixToFile(matrix::Sparse* A, std::ostream& file_out); - int writeVectorToFile(vector_type* vec_x, std::ostream &file_out); -}} // ReSolve::io + int writeMatrixToFile(matrix::Sparse* A, std::ostream& file_out); + int writeVectorToFile(vector_type* vec_x, std::ostream& file_out); + } // namespace io +} // namespace ReSolve diff --git a/resolve/random/RandomSketchingCountCpu.cpp b/resolve/random/RandomSketchingCountCpu.cpp index 9dfc336f7..4471fe1f2 100644 --- a/resolve/random/RandomSketchingCountCpu.cpp +++ b/resolve/random/RandomSketchingCountCpu.cpp @@ -3,17 +3,18 @@ * @author Kasia Swirydowicz (kasia.swirydowicz@pnnl.gov) * @author Slaven Peles (peless@ornl.gov) * @brief Definition of RandomSketchingCountCuda class. - * + * */ -#include #include -#include -namespace ReSolve +#include +#include + +namespace ReSolve { /** * @brief Default constructor - * + * * @post All class variables set to nullptr. */ RandomSketchingCountCpu::RandomSketchingCountCpu() @@ -23,23 +24,23 @@ namespace ReSolve /// Destructor RandomSketchingCountCpu::~RandomSketchingCountCpu() { - delete [] h_labels_; - delete [] h_flip_; + delete[] h_labels_; + delete[] h_flip_; } /** * @brief Sketching method using CountSketch algorithm. - * + * * Implements actual sketching process. * * @param[in] input - Vector size _n_ - * @param[out] output - Vector size _k_ + * @param[out] output - Vector size _k_ * * @pre Both input and output variables are initialized and of correct size. - * Setup has been run at least once - * - * @return 0 if successful, !=0 otherwise (TODO). - * + * Setup has been run at least once + * + * @return 0 if successful, !=0 otherwise (TODO). + * */ int RandomSketchingCountCpu::Theta(vector_type* input, vector_type* output) { @@ -54,39 +55,43 @@ namespace ReSolve /** * @brief Sketching setup method for CountSketch algorithm. - * + * * Sets up parameters, sampling matrices, permuations, etc. - * + * * @param[in] n - Size of base vector - * @param[in] k - Size of sketch + * @param[in] k - Size of sketch + * + * @pre _n_ > _k_. * - * @pre _n_ > _k_. - * * @post The arrays needed for performing sketches with CountSketch (_flip_ and _labels_ ) - * are initialized. If GPU is enabled, the arrays will be copied to the GPU, as well - * - * @return 0 if successful, !=0 otherwise (TODO). + * are initialized. If GPU is enabled, the arrays will be copied to the GPU, as well + * + * @return 0 if successful, !=0 otherwise (TODO). */ int RandomSketchingCountCpu::setup(index_type n, index_type k) { k_rand_ = k; - n_ = n; + n_ = n; srand(static_cast(time(nullptr))); - //allocate labeling scheme vector and move to GPU + // allocate labeling scheme vector and move to GPU h_labels_ = new index_type[n_]; - //allocate sgn - a vector of flip signs - h_flip_ = new index_type[n_]; + // allocate sgn - a vector of flip signs + h_flip_ = new index_type[n_]; - //populate labeling scheme (can be done on the gpu really) - //to be fixed, this can be done on the GPU - for (int i=0; i _k_. _k_ value DID NOT CHANGE from the time the setup function * was executed. - * + * * @post The arrays needed for performing sketches with CountSketch * (_flip_ and _labels_ ) are reset to new values. If GPU is enabled, the - * arrays will be copied to the GPU, as well - * + * arrays will be copied to the GPU, as well + * * @return 0 if successful, !=0 otherwise (TODO). - * + * * @todo Need to be fixed, this can be done on the GPU. */ int RandomSketchingCountCpu::reset() { - for (int i = 0; i < n_; ++i) { + for (int i = 0; i < n_; ++i) + { h_labels_[i] = rand() % k_rand_; - int r = rand()%100; - if (r < 50) { + int r = rand() % 100; + if (r < 50) + { h_flip_[i] = -1; - } else { + } + else + { h_flip_[i] = 1; } } return 0; } -} +} // namespace ReSolve diff --git a/resolve/random/RandomSketchingCountCpu.hpp b/resolve/random/RandomSketchingCountCpu.hpp index e55351b95..7f82d85d6 100644 --- a/resolve/random/RandomSketchingCountCpu.hpp +++ b/resolve/random/RandomSketchingCountCpu.hpp @@ -3,14 +3,15 @@ * @author Kasia Swirydowicz (kasia.swirydowicz@pnnl.gov) * @author Slaven Peles (peless@ornl.gov) * @brief Declaration of RandomSketchingCountCuda class. - * + * */ #pragma once #include -#include #include +#include -namespace ReSolve { +namespace ReSolve +{ // Forward declaration of vector::Vector class namespace vector @@ -20,34 +21,34 @@ namespace ReSolve { /** * @brief Count sketching implementation for CPU. - * + * */ class RandomSketchingCountCpu : public RandomSketchingImpl { - private: - using vector_type = vector::Vector; + private: + using vector_type = vector::Vector; - public: - // constructor - RandomSketchingCountCpu(); + public: + // constructor + RandomSketchingCountCpu(); - // destructor - virtual ~RandomSketchingCountCpu(); + // destructor + virtual ~RandomSketchingCountCpu(); - // Actual sketching process - virtual int Theta(vector_type* input, vector_type* output); + // Actual sketching process + virtual int Theta(vector_type* input, vector_type* output); - // Setup the parameters, sampling matrices, permuations, etc - virtual int setup(index_type n, index_type k); - virtual int reset(); // if needed can be reset (like when Krylov method restarts) + // Setup the parameters, sampling matrices, permuations, etc + virtual int setup(index_type n, index_type k); + virtual int reset(); // if needed can be reset (like when Krylov method restarts) - private: - index_type n_{0}; ///< size of base vector - index_type k_rand_{0}; ///< size of sketched vector + private: + index_type n_{0}; ///< size of base vector + index_type k_rand_{0}; ///< size of sketched vector - index_type* h_labels_{nullptr}; ///< label array size _n_, with values from _0_ to _k-1_ assigned by random - index_type* h_flip_{nullptr}; ///< flip array with values of 1 and -1 assigned by random + index_type* h_labels_{nullptr}; ///< label array size _n_, with values from _0_ to _k-1_ assigned by random + index_type* h_flip_{nullptr}; ///< flip array with values of 1 and -1 assigned by random - // MemoryHandler mem_; ///< Device memory manager object + // MemoryHandler mem_; ///< Device memory manager object }; -} +} // namespace ReSolve diff --git a/resolve/random/RandomSketchingCountCuda.cpp b/resolve/random/RandomSketchingCountCuda.cpp index 31d93d5a9..674f44383 100644 --- a/resolve/random/RandomSketchingCountCuda.cpp +++ b/resolve/random/RandomSketchingCountCuda.cpp @@ -3,18 +3,19 @@ * @author Kasia Swirydowicz (kasia.swirydowicz@pnnl.gov) * @author Slaven Peles (peless@ornl.gov) * @brief Definition of RandomSketchingCountCuda class. - * + * */ +#include + #include +#include #include -#include -#include -namespace ReSolve +namespace ReSolve { /** * @brief Default constructor - * + * * @post All class variables set to nullptr. */ RandomSketchingCountCuda::RandomSketchingCountCuda() @@ -24,25 +25,25 @@ namespace ReSolve /// Destructor RandomSketchingCountCuda::~RandomSketchingCountCuda() { - delete [] h_labels_; - delete [] h_flip_; + delete[] h_labels_; + delete[] h_flip_; mem_.deleteOnDevice(d_labels_); mem_.deleteOnDevice(d_flip_); } /** * @brief Sketching method using CountSketch algorithm. - * + * * Implements actual sketching process. * * @param[in] input - Vector size _n_ - * @param[out] output - Vector size _k_ + * @param[out] output - Vector size _k_ * * @pre Both input and output variables are initialized and of correct size. - * Setup has been run at least once - * - * @return 0 if successful, !=0 otherwise (TODO). - * + * Setup has been run at least once + * + * @return 0 if successful, !=0 otherwise (TODO). + * */ int RandomSketchingCountCuda::Theta(vector_type* input, vector_type* output) { @@ -51,7 +52,7 @@ namespace ReSolve k_rand_, d_labels_, d_flip_, - input->getData(memory::DEVICE), + input->getData(memory::DEVICE), output->getData(memory::DEVICE)); mem_.deviceSynchronize(); return 0; @@ -59,46 +60,50 @@ namespace ReSolve /** * @brief Sketching setup method for CountSketch algorithm. - * + * * Sets up parameters, sampling matrices, permuations, etc. - * + * * @param[in] n - Size of base vector - * @param[in] k - Size of sketch + * @param[in] k - Size of sketch + * + * @pre _n_ > _k_. * - * @pre _n_ > _k_. - * * @post The arrays needed for performing sketches with CountSketch * (_flip_ and _labels_ ) are initialized. If GPU is enabled, the arrays - * will be copied to the GPU, as well - * - * @return 0 if successful, !=0 otherwise (TODO). + * will be copied to the GPU, as well + * + * @return 0 if successful, !=0 otherwise (TODO). */ int RandomSketchingCountCuda::setup(index_type n, index_type k) { k_rand_ = k; - n_ = n; + n_ = n; srand(static_cast(time(nullptr))); - //allocate labeling scheme vector and move to GPU + // allocate labeling scheme vector and move to GPU h_labels_ = new int[n_]; - //allocate sgn - a vector of flip signs - h_flip_ = new int[n_]; + // allocate sgn - a vector of flip signs + h_flip_ = new int[n_]; - //populate labeling scheme (can be done on the gpu really) - //to be fixed, this can be done on the GPU - for (int i=0; i _k_. _k_ value DID NOT CHANGE from the time the setup function * was executed. - * + * * @post The arrays needed for performing sketches with CountSketch * (_flip_ and _labels_ ) are reset to new values. If GPU is enabled, the - * arrays will be copied to the GPU, as well - * + * arrays will be copied to the GPU, as well + * * @return 0 if successful, !=0 otherwise (TODO). - * + * * @todo Need to be fixed, this can be done on the GPU. */ int RandomSketchingCountCuda::reset() // if needed can be reset (like when Krylov method restarts) { - for (int i = 0; i < n_; ++i) { + for (int i = 0; i < n_; ++i) + { h_labels_[i] = rand() % k_rand_; - int r = rand()%100; - if (r < 50) { + int r = rand() % 100; + if (r < 50) + { h_flip_[i] = -1; - } else { + } + else + { h_flip_[i] = 1; } } @@ -143,4 +152,4 @@ namespace ReSolve return 0; } -} +} // namespace ReSolve diff --git a/resolve/random/RandomSketchingCountCuda.hpp b/resolve/random/RandomSketchingCountCuda.hpp index d81a7a551..e4cbf28b3 100644 --- a/resolve/random/RandomSketchingCountCuda.hpp +++ b/resolve/random/RandomSketchingCountCuda.hpp @@ -3,14 +3,15 @@ * @author Kasia Swirydowicz (kasia.swirydowicz@pnnl.gov) * @author Slaven Peles (peless@ornl.gov) * @brief Declaration of RandomSketchingCountCuda class. - * + * */ #pragma once #include -#include #include +#include -namespace ReSolve { +namespace ReSolve +{ // Forward declaration of vector::Vector class namespace vector @@ -20,37 +21,37 @@ namespace ReSolve { /** * @brief Count sketch implementation for CUDA device. - * + * */ class RandomSketchingCountCuda : public RandomSketchingImpl { using vector_type = vector::Vector; - public: - // constructor - RandomSketchingCountCuda(); + public: + // constructor + RandomSketchingCountCuda(); - // destructor - virtual ~RandomSketchingCountCuda(); + // destructor + virtual ~RandomSketchingCountCuda(); - // Actual sketching process - virtual int Theta(vector_type* input, vector_type* output); + // Actual sketching process + virtual int Theta(vector_type* input, vector_type* output); - // Setup the parameters, sampling matrices, permuations, etc - virtual int setup(index_type n, index_type k); - virtual int reset(); // if needed can be reset (like when Krylov method restarts) + // Setup the parameters, sampling matrices, permuations, etc + virtual int setup(index_type n, index_type k); + virtual int reset(); // if needed can be reset (like when Krylov method restarts) - private: - index_type n_{0}; ///< size of base vector - index_type k_rand_{0}; ///< size of sketched vector + private: + index_type n_{0}; ///< size of base vector + index_type k_rand_{0}; ///< size of sketched vector - index_type* h_labels_{nullptr}; ///< label array size _n_, with values from _0_ to _k-1_ assigned by random - index_type* h_flip_{nullptr}; ///< flip array with values of 1 and -1 assigned by random + index_type* h_labels_{nullptr}; ///< label array size _n_, with values from _0_ to _k-1_ assigned by random + index_type* h_flip_{nullptr}; ///< flip array with values of 1 and -1 assigned by random - index_type* d_labels_{nullptr}; ///< h_labels GPU counterpart - index_type* d_flip_{nullptr}; ///< h_flip GPU counterpart + index_type* d_labels_{nullptr}; ///< h_labels GPU counterpart + index_type* d_flip_{nullptr}; ///< h_flip GPU counterpart - MemoryHandler mem_; ///< Device memory manager object + MemoryHandler mem_; ///< Device memory manager object }; -} +} // namespace ReSolve diff --git a/resolve/random/RandomSketchingCountHip.cpp b/resolve/random/RandomSketchingCountHip.cpp index eb2fd0d0c..db4496177 100644 --- a/resolve/random/RandomSketchingCountHip.cpp +++ b/resolve/random/RandomSketchingCountHip.cpp @@ -3,18 +3,19 @@ * @author Kasia Swirydowicz (kasia.swirydowicz@pnnl.gov) * @author Slaven Peles (peless@ornl.gov) * @brief Definition of RandomSketchingCountHip class. - * + * */ +#include + #include +#include #include -#include -#include -namespace ReSolve +namespace ReSolve { /** * @brief Default constructor - * + * * @post All class variables set to nullptr. */ RandomSketchingCountHip::RandomSketchingCountHip() @@ -24,25 +25,25 @@ namespace ReSolve /// Destructor RandomSketchingCountHip::~RandomSketchingCountHip() { - delete [] h_labels_; - delete [] h_flip_; + delete[] h_labels_; + delete[] h_flip_; mem_.deleteOnDevice(d_labels_); mem_.deleteOnDevice(d_flip_); } /** * @brief Sketching method using CountSketch algorithm. - * + * * Implements actual sketching process. * * @param[in] input - Vector size _n_ - * @param[out] output - Vector size _k_ + * @param[out] output - Vector size _k_ * * @pre Both input and output variables are initialized and of correct size. * Setup has been run at least once. - * - * @return 0 if successful, !=0 otherwise (TODO). - * + * + * @return 0 if successful, !=0 otherwise (TODO). + * */ int RandomSketchingCountHip::Theta(vector_type* input, vector_type* output) { @@ -51,7 +52,7 @@ namespace ReSolve k_rand_, d_labels_, d_flip_, - input->getData(memory::DEVICE), + input->getData(memory::DEVICE), output->getData(memory::DEVICE)); mem_.deviceSynchronize(); return 0; @@ -59,46 +60,50 @@ namespace ReSolve /** * @brief Sketching setup method for CountSketch algorithm. - * + * * Sets up parameters, sampling matrices, permuations, etc. - * + * * @param[in] n - Size of base vector - * @param[in] k - Size of sketch + * @param[in] k - Size of sketch + * + * @pre _n_ > _k_. * - * @pre _n_ > _k_. - * * @post The arrays needed for performing sketches with CountSketch * (_flip_ and _labels_ ) are initialized. If GPU is enabled, the arrays - * will be copied to the GPU, as well - * - * @return 0 if successful, !=0 otherwise (TODO). + * will be copied to the GPU, as well + * + * @return 0 if successful, !=0 otherwise (TODO). */ int RandomSketchingCountHip::setup(index_type n, index_type k) { k_rand_ = k; - n_ = n; + n_ = n; srand(static_cast(time(nullptr))); - //allocate labeling scheme vector and move to GPU + // allocate labeling scheme vector and move to GPU h_labels_ = new int[n_]; - //allocate sgn - a vector of flip signs - h_flip_ = new int[n_]; + // allocate sgn - a vector of flip signs + h_flip_ = new int[n_]; - //populate labeling scheme (can be done on the gpu really) - //to be fixed, this can be done on the GPU - for (int i=0; i _k_. _k_ value DID NOT CHANGE from the time the setup function * was executed. - * + * * @post The arrays needed for performing sketches with CountSketch * (_flip_ and _labels_ ) are reset to new values. If GPU is enabled, the - * arrays will be copied to the GPU, as well - * - * @return 0 if successful, !=0 otherwise (TODO). - * + * arrays will be copied to the GPU, as well + * + * @return 0 if successful, !=0 otherwise (TODO). + * * @todo Need to be fixed, this can be done on the GPU. */ int RandomSketchingCountHip::reset() // if needed can be reset (like when Krylov method restarts) { - for (int i = 0; i < n_; ++i) { + for (int i = 0; i < n_; ++i) + { h_labels_[i] = rand() % k_rand_; - int r = rand()%100; - if (r < 50) { + int r = rand() % 100; + if (r < 50) + { h_flip_[i] = -1; - } else { + } + else + { h_flip_[i] = 1; } } @@ -143,4 +152,4 @@ namespace ReSolve return 0; } -} +} // namespace ReSolve diff --git a/resolve/random/RandomSketchingCountHip.hpp b/resolve/random/RandomSketchingCountHip.hpp index ff7ef5c76..c55137a51 100644 --- a/resolve/random/RandomSketchingCountHip.hpp +++ b/resolve/random/RandomSketchingCountHip.hpp @@ -3,14 +3,15 @@ * @author Kasia Swirydowicz (kasia.swirydowicz@pnnl.gov) * @author Slaven Peles (peless@ornl.gov) * @brief Declaration of RandomSketchingCountHip class. - * + * */ #pragma once #include -#include #include +#include -namespace ReSolve { +namespace ReSolve +{ // Forward declaration of vector::Vector class namespace vector @@ -20,35 +21,36 @@ namespace ReSolve { /** * @brief Count sketch implementation for a HIP device. - * + * */ class RandomSketchingCountHip : public RandomSketchingImpl { - private: - using vector_type = vector::Vector; - public: - // constructor - RandomSketchingCountHip(); - // destructor - virtual ~RandomSketchingCountHip(); + private: + using vector_type = vector::Vector; + + public: + // constructor + RandomSketchingCountHip(); + // destructor + virtual ~RandomSketchingCountHip(); - // Actual sketching process - virtual int Theta(vector_type* input, vector_type* output); + // Actual sketching process + virtual int Theta(vector_type* input, vector_type* output); - // Setup the parameters, sampling matrices, permuations, etc - virtual int setup(index_type n, index_type k); - virtual int reset(); // if needed can be reset (like when Krylov method restarts) + // Setup the parameters, sampling matrices, permuations, etc + virtual int setup(index_type n, index_type k); + virtual int reset(); // if needed can be reset (like when Krylov method restarts) - private: - index_type n_{0}; ///< size of base vector - index_type k_rand_{0}; ///< size of sketched vector + private: + index_type n_{0}; ///< size of base vector + index_type k_rand_{0}; ///< size of sketched vector - index_type* h_labels_{nullptr}; ///< label array size _n_, with values from _0_ to _k-1_ assigned by random - index_type* h_flip_{nullptr}; ///< flip array with values of 1 and -1 assigned by random + index_type* h_labels_{nullptr}; ///< label array size _n_, with values from _0_ to _k-1_ assigned by random + index_type* h_flip_{nullptr}; ///< flip array with values of 1 and -1 assigned by random - index_type* d_labels_{nullptr}; ///< h_labels GPU counterpart - index_type* d_flip_{nullptr}; ///< h_flip GPU counterpart + index_type* d_labels_{nullptr}; ///< h_labels GPU counterpart + index_type* d_flip_{nullptr}; ///< h_flip GPU counterpart - MemoryHandler mem_; ///< Device memory manager object + MemoryHandler mem_; ///< Device memory manager object }; -} +} // namespace ReSolve diff --git a/resolve/random/RandomSketchingFWHTCpu.cpp b/resolve/random/RandomSketchingFWHTCpu.cpp index 14e6dfd64..c610f49ea 100644 --- a/resolve/random/RandomSketchingFWHTCpu.cpp +++ b/resolve/random/RandomSketchingFWHTCpu.cpp @@ -3,25 +3,26 @@ * @author Kasia Swirydowicz (kasia.swirydowicz@pnnl.gov) * @author Slaven Peles (peless@ornl.gov) * @brief Definition of RandomSketchingFWHTCpu class. - * + * */ #include -#include #include +#include + +#include #include -#include +#include #include -#include -#include +#include -namespace ReSolve +namespace ReSolve { using out = io::Logger; /** * @brief Default constructor - * + * * @post All class variables are set to nullptr. */ RandomSketchingFWHTCpu::RandomSketchingFWHTCpu() @@ -30,122 +31,130 @@ namespace ReSolve /** * @brief destructor - * + * */ RandomSketchingFWHTCpu::~RandomSketchingFWHTCpu() { - delete [] h_seq_; - delete [] h_D_; - delete [] h_perm_; - delete [] d_aux_; + delete[] h_seq_; + delete[] h_D_; + delete[] h_perm_; + delete[] d_aux_; } - /** + /** * @brief Sketching method - it sketches a given vector (shrinks its size) - * + * * Implements actual sketching process. * - * @param[in] input - input vector, size _n_ - * @param[out] output - output vector, size _k_ - * + * @param[in] input - input vector, size _n_ + * @param[out] output - output vector, size _k_ + * * @pre both vectors are allocated. Setup function from this class has been called. * @warning normal FWHT function requires scaling by 1/k. This function does not scale. * - * @return 0 if successful, !=0 otherwise (TODO). + * @return 0 if successful, !=0 otherwise (TODO). */ int RandomSketchingFWHTCpu::Theta(vector_type* input, vector_type* output) { std::memset(d_aux_, 0.0, static_cast(N_) * sizeof(real_type)); - cpu::FWHT_scaleByD(n_, + cpu::FWHT_scaleByD(n_, h_D_, input->getData(memory::HOST), - d_aux_); + d_aux_); cpu::FWHT(1, log2N_, d_aux_); - cpu::FWHT_select(k_rand_, - h_perm_, - d_aux_, + cpu::FWHT_select(k_rand_, + h_perm_, + d_aux_, output->getData(memory::HOST)); return 0; } - /** - * @brief Sketching method setup. - * + /** + * @brief Sketching method setup. + * * This function allocated P(erm), D (diagonal scaling matrix) and populates * them. It also allocates auxiliary arrays. * * @param[in] n - size of base (non-sketched) vector - * @param[in] k - size of sketched vector. - * + * @param[in] k - size of sketched vector. + * * @post Everything is set up so you can call Theta. * - * @return 0 if successful, !=0 otherwise (TODO). + * @return 0 if successful, !=0 otherwise (TODO). */ int RandomSketchingFWHTCpu::setup(index_type n, index_type k) { - k_rand_ = k; - n_ = n; + k_rand_ = k; + n_ = n; // pad to the nearest power of 2 - real_type N_real = std::pow(2.0, std::ceil(std::log(n_)/std::log(2.0))); - if (N_real > static_cast(std::numeric_limits::max())) { + real_type N_real = std::pow(2.0, std::ceil(std::log(n_) / std::log(2.0))); + if (N_real > static_cast(std::numeric_limits::max())) + { out::error() << "Exceeded numerical limits of index_type ...\n"; return 1; } - N_ = static_cast(N_real); - log2N_ = static_cast(std::log2(N_real)); - one_over_k_ = 1.0/std::sqrt(static_cast(k_rand_)); + N_ = static_cast(N_real); + log2N_ = static_cast(std::log2(N_real)); + one_over_k_ = 1.0 / std::sqrt(static_cast(k_rand_)); srand(static_cast(time(nullptr))); h_seq_ = new index_type[N_]; - h_perm_ = new index_type[k_rand_]; - h_D_ = new index_type[n_]; + h_perm_ = new index_type[k_rand_]; + h_D_ = new index_type[n_]; int r; int temp; - for (int i = 0; i < N_; ++i) { + for (int i = 0; i < N_; ++i) + { h_seq_[i] = i; - } - //Fisher-Yates - for (int i = N_ - 1; i > 0; i--) { - r = rand() % i; - temp = h_seq_[i]; + } + // Fisher-Yates + for (int i = N_ - 1; i > 0; i--) + { + r = rand() % i; + temp = h_seq_[i]; h_seq_[i] = h_seq_[r]; - h_seq_[r] = temp; + h_seq_[r] = temp; } - for (int i = 0; i < k_rand_; ++i) { + for (int i = 0; i < k_rand_; ++i) + { h_perm_[i] = h_seq_[i]; } // and D - for (int i = 0; i < n_; ++i){ + for (int i = 0; i < n_; ++i) + { r = rand() % 100; - if (r < 50){ + if (r < 50) + { h_D_[i] = -1; - } else { + } + else + { h_D_[i] = 1; } } // Allocate auxiliary data array - d_aux_ = new real_type[N_]; + d_aux_ = new real_type[N_]; return 0; } - /** + /** * @brief Reset values in the arrays used for sketching. - * + * * Sketching can be reset, similar to Krylov method restarts. * If the solver restarts, call this method between restarts. * * @post Everything is set up so you can call Theta. * - * @return 0 if successful, !=0 otherwise (TODO). - * + * @return 0 if successful, !=0 otherwise (TODO). + * * @todo Need to be fixed, this can be done on the GPU. */ int RandomSketchingFWHTCpu::reset() @@ -155,31 +164,38 @@ namespace ReSolve int r; int temp; - for (int i = 0; i < N_; ++i) { + for (int i = 0; i < N_; ++i) + { h_seq_[i] = i; } - //Fisher-Yates - for (int i = N_ - 1; i > 0; i--) { - r = rand() % i; - temp = h_seq_[i]; + // Fisher-Yates + for (int i = N_ - 1; i > 0; i--) + { + r = rand() % i; + temp = h_seq_[i]; h_seq_[i] = h_seq_[r]; - h_seq_[r] = temp; + h_seq_[r] = temp; } - for (int i = 0; i < k_rand_; ++i) { + for (int i = 0; i < k_rand_; ++i) + { h_perm_[i] = h_seq_[i]; } // and D - for (int i = 0; i < n_; ++i) { + for (int i = 0; i < n_; ++i) + { r = rand() % 100; - if (r < 50) { + if (r < 50) + { h_D_[i] = -1; - } else { + } + else + { h_D_[i] = 1; } } return 0; } -} +} // namespace ReSolve diff --git a/resolve/random/RandomSketchingFWHTCpu.hpp b/resolve/random/RandomSketchingFWHTCpu.hpp index a582a7fb6..82eb0ed5b 100644 --- a/resolve/random/RandomSketchingFWHTCpu.hpp +++ b/resolve/random/RandomSketchingFWHTCpu.hpp @@ -3,54 +3,56 @@ * @author Kasia Swirydowicz (kasia.swirydowicz@pnnl.gov) * @author Slaven Peles (peless@ornl.gov) * @brief Declaration of RandomSketchingFWHTCpu class. - * + * */ #pragma once #include -#include #include +#include -namespace ReSolve { +namespace ReSolve +{ // Forward declaration of vector::Vector class namespace vector { class Vector; } - + /** * @brief Fast Walsh-Hadamard transform implementation using CPU backend. - * + * */ class RandomSketchingFWHTCpu : public RandomSketchingImpl { - private: - using vector_type = vector::Vector; - public: - RandomSketchingFWHTCpu(); - virtual ~RandomSketchingFWHTCpu(); - - // Actual sketching process - virtual int Theta(vector_type* input, vector_type* output); - - // Setup the parameters, sampling matrices, permuations, etc - virtual int setup(index_type n, index_type k); - virtual int reset(); // if needed can be reset (like when Krylov method restarts) - - private: - index_type n_{0}; ///< size of base vector - index_type k_rand_{0}; ///< size of sketched vector - - index_type* h_seq_{nullptr}; ///< auxiliary variable used for Fisher-Yates algorithm - index_type* h_D_{nullptr}; ///< D is a diagonal matrix (FWHT computed y = PHDx), we store it as an array. D consists of _1_s and _-1_s - index_type* h_perm_{nullptr}; ///< permuation array, containing _k_ values in range of _0_ to _n-1_ - - real_type* d_aux_{nullptr}; ///< auxiliary variable needed to store partial results in FWHT application. - - index_type N_{0}; ///< padded vector size - index_type log2N_{0}; ///< log2 of N_, used multiple times so we store it - real_type one_over_k_{0.0}; ///< 1/k, used many times for scaling so we store the value to avoid recomputation - - // MemoryHandler mem_; ///< Device memory manager object + private: + using vector_type = vector::Vector; + + public: + RandomSketchingFWHTCpu(); + virtual ~RandomSketchingFWHTCpu(); + + // Actual sketching process + virtual int Theta(vector_type* input, vector_type* output); + + // Setup the parameters, sampling matrices, permuations, etc + virtual int setup(index_type n, index_type k); + virtual int reset(); // if needed can be reset (like when Krylov method restarts) + + private: + index_type n_{0}; ///< size of base vector + index_type k_rand_{0}; ///< size of sketched vector + + index_type* h_seq_{nullptr}; ///< auxiliary variable used for Fisher-Yates algorithm + index_type* h_D_{nullptr}; ///< D is a diagonal matrix (FWHT computed y = PHDx), we store it as an array. D consists of _1_s and _-1_s + index_type* h_perm_{nullptr}; ///< permuation array, containing _k_ values in range of _0_ to _n-1_ + + real_type* d_aux_{nullptr}; ///< auxiliary variable needed to store partial results in FWHT application. + + index_type N_{0}; ///< padded vector size + index_type log2N_{0}; ///< log2 of N_, used multiple times so we store it + real_type one_over_k_{0.0}; ///< 1/k, used many times for scaling so we store the value to avoid recomputation + + // MemoryHandler mem_; ///< Device memory manager object }; -} +} // namespace ReSolve diff --git a/resolve/random/RandomSketchingFWHTCuda.cpp b/resolve/random/RandomSketchingFWHTCuda.cpp index 2544c49df..6ad615407 100644 --- a/resolve/random/RandomSketchingFWHTCuda.cpp +++ b/resolve/random/RandomSketchingFWHTCuda.cpp @@ -3,25 +3,26 @@ * @author Kasia Swirydowicz (kasia.swirydowicz@pnnl.gov) * @author Slaven Peles (peless@ornl.gov) * @brief Definition of RandomSketchingFWHTCuda class. - * + * */ #include -#include #include +#include + +#include #include -#include +#include #include -#include -#include +#include -namespace ReSolve +namespace ReSolve { using out = io::Logger; /** * @brief Default constructor - * + * */ RandomSketchingFWHTCuda::RandomSketchingFWHTCuda() { @@ -29,118 +30,126 @@ namespace ReSolve /** * @brief Destructor - * + * */ RandomSketchingFWHTCuda::~RandomSketchingFWHTCuda() { using namespace memory; - delete [] h_seq_; - delete [] h_D_; - delete [] h_perm_; + delete[] h_seq_; + delete[] h_D_; + delete[] h_perm_; mem_.deleteOnDevice(d_D_); mem_.deleteOnDevice(d_perm_); mem_.deleteOnDevice(d_aux_); } - /** + /** * @brief Sketching method - it sketches a given vector (shrinks its size) - * + * * Implements actual sketching process. * - * @param[in] input - input vector, size _n_ - * @param[out] output - output vector, size _k_ - * + * @param[in] input - input vector, size _n_ + * @param[out] output - output vector, size _k_ + * * @pre both vectors are allocated. Setup function from this class has been called. * @warning normal FWHT function requires scaling by 1/k. This function does not scale. * - * @return 0 if successful, !=0 otherwise (TODO). + * @return 0 if successful, !=0 otherwise (TODO). */ int RandomSketchingFWHTCuda::Theta(vector_type* input, vector_type* output) { mem_.setZeroArrayOnDevice(d_aux_, N_); - cuda::FWHT_scaleByD(n_, + cuda::FWHT_scaleByD(n_, d_D_, - input->getData(memory::DEVICE), - d_aux_); + input->getData(memory::DEVICE), + d_aux_); mem_.deviceSynchronize(); cuda::FWHT(1, log2N_, d_aux_); mem_.deviceSynchronize(); - cuda::FWHT_select(k_rand_, - d_perm_, - d_aux_, - output->getData(memory::DEVICE)); + cuda::FWHT_select(k_rand_, + d_perm_, + d_aux_, + output->getData(memory::DEVICE)); mem_.deviceSynchronize(); return 0; } - /** - * @brief Sketching method setup. - * + /** + * @brief Sketching method setup. + * * This function allocated P(erm), D (diagonal scaling matrix) and populates * them. It also allocates auxiliary arrays. * * @param[in] n - size of base (non-sketched) vector - * @param[in] k - size of sketched vector. - * + * @param[in] k - size of sketched vector. + * * @post Everything is set up so you can call Theta. * - * @return 0 if successful, !=0 otherwise (TODO). + * @return 0 if successful, !=0 otherwise (TODO). */ int RandomSketchingFWHTCuda::setup(index_type n, index_type k) { - k_rand_ = k; - n_ = n; + k_rand_ = k; + n_ = n; // pad to the nearest power of 2 - real_type N_real = std::pow(2.0, std::ceil(std::log(n_)/std::log(2.0))); - if (N_real > static_cast(std::numeric_limits::max())) { + real_type N_real = std::pow(2.0, std::ceil(std::log(n_) / std::log(2.0))); + if (N_real > static_cast(std::numeric_limits::max())) + { out::error() << "Exceeded numerical limits of index_type ...\n"; return 1; } - N_ = static_cast(N_real); - log2N_ = static_cast(std::log2(N_real)); - one_over_k_ = 1.0/std::sqrt(static_cast(k_rand_)); + N_ = static_cast(N_real); + log2N_ = static_cast(std::log2(N_real)); + one_over_k_ = 1.0 / std::sqrt(static_cast(k_rand_)); srand(static_cast(time(nullptr))); h_seq_ = new int[N_]; - h_perm_ = new int[k_rand_]; - h_D_ = new int[n_]; + h_perm_ = new int[k_rand_]; + h_D_ = new int[n_]; int r; int temp; - for (int i = 0; i < N_; ++i) { + for (int i = 0; i < N_; ++i) + { h_seq_[i] = i; - } - //Fisher-Yates - for (int i = N_ - 1; i > 0; i--) { - r = rand() % i; - temp = h_seq_[i]; + } + // Fisher-Yates + for (int i = N_ - 1; i > 0; i--) + { + r = rand() % i; + temp = h_seq_[i]; h_seq_[i] = h_seq_[r]; - h_seq_[r] = temp; + h_seq_[r] = temp; } - for (int i = 0; i < k_rand_; ++i) { + for (int i = 0; i < k_rand_; ++i) + { h_perm_[i] = h_seq_[i]; } // and D - for (int i = 0; i < n_; ++i){ + for (int i = 0; i < n_; ++i) + { r = rand() % 100; - if (r < 50){ + if (r < 50) + { h_D_[i] = -1; - } else { + } + else + { h_D_[i] = 1; } } // allocate on device - mem_.allocateArrayOnDevice(&d_perm_, k_rand_); - mem_.allocateArrayOnDevice(&d_D_, n_); - mem_.allocateArrayOnDevice(&d_aux_, N_); + mem_.allocateArrayOnDevice(&d_perm_, k_rand_); + mem_.allocateArrayOnDevice(&d_D_, n_); + mem_.allocateArrayOnDevice(&d_aux_, N_); // then copy mem_.copyArrayHostToDevice(d_perm_, h_perm_, k_rand_); mem_.copyArrayHostToDevice(d_D_, h_D_, n_); @@ -148,16 +157,16 @@ namespace ReSolve return 0; } - /** + /** * @brief Reset values in the arrays used for sketching. - * + * * Sketching can be reset, similar to Krylov method restarts. * If the solver restarts, call this method between restarts. * * @post Everything is set up so you can call Theta. * - * @return 0 if successful, !=0 otherwise (TODO). - * + * @return 0 if successful, !=0 otherwise (TODO). + * * @todo Need to be fixed, this can be done on the GPU. */ int RandomSketchingFWHTCuda::reset() @@ -167,27 +176,34 @@ namespace ReSolve int r; int temp; - for (int i = 0; i < N_; ++i) { + for (int i = 0; i < N_; ++i) + { h_seq_[i] = i; } - //Fisher-Yates - for (int i = N_ - 1; i > 0; i--) { - r = rand() % i; - temp = h_seq_[i]; + // Fisher-Yates + for (int i = N_ - 1; i > 0; i--) + { + r = rand() % i; + temp = h_seq_[i]; h_seq_[i] = h_seq_[r]; - h_seq_[r] = temp; + h_seq_[r] = temp; } - for (int i = 0; i < k_rand_; ++i) { + for (int i = 0; i < k_rand_; ++i) + { h_perm_[i] = h_seq_[i]; } // and D - for (int i = 0; i < n_; ++i) { + for (int i = 0; i < n_; ++i) + { r = rand() % 100; - if (r < 50) { + if (r < 50) + { h_D_[i] = -1; - } else { + } + else + { h_D_[i] = 1; } } @@ -198,4 +214,4 @@ namespace ReSolve return 0; } -} +} // namespace ReSolve diff --git a/resolve/random/RandomSketchingFWHTCuda.hpp b/resolve/random/RandomSketchingFWHTCuda.hpp index 03eee20b0..cbfd6e02a 100644 --- a/resolve/random/RandomSketchingFWHTCuda.hpp +++ b/resolve/random/RandomSketchingFWHTCuda.hpp @@ -3,55 +3,57 @@ * @author Kasia Swirydowicz (kasia.swirydowicz@pnnl.gov) * @author Slaven Peles (peless@ornl.gov) * @brief Declaration of RandomSketchingFWHTCuda class. - * + * */ #pragma once #include -#include #include +#include -namespace ReSolve { +namespace ReSolve +{ // Forward declaration of vector::Vector class namespace vector { class Vector; } - + /** * @brief Fast Walsh-Hadamard transform implementation using CUDA backend. - * + * */ class RandomSketchingFWHTCuda : public RandomSketchingImpl { using vector_type = vector::Vector; - public: - RandomSketchingFWHTCuda(); - virtual ~RandomSketchingFWHTCuda(); - - // Actual sketching process - virtual int Theta(vector_type* input, vector_type* output); - - // Setup the parameters, sampling matrices, permuations, etc - virtual int setup(index_type n, index_type k); - virtual int reset(); // if needed can be reset (like when Krylov method restarts) - - private: - index_type n_{0}; ///< size of base vector - index_type k_rand_{0}; ///< size of sketched vector - - index_type* h_seq_{nullptr}; ///< auxiliary variable used for Fisher-Yates algorithm - index_type* h_D_{nullptr}; ///< D is a diagonal matrix (FWHT computed y = PHDx), we store it as an array. D consists of _1_s and _-1_s - index_type* h_perm_{nullptr}; ///< permuation array, containing _k_ values in range of _0_ to _n-1_ - - index_type* d_D_{nullptr}; ///< device mirror of D - index_type* d_perm_{nullptr}; ///< device mirror of h_perm - real_type* d_aux_{nullptr}; ///< auxiliary variable needed to store partial results in FWHT application. - - index_type N_{0}; ///< padded vector size - index_type log2N_{0}; ///< log2 of N_, used multiple times so we store it - real_type one_over_k_{0.0}; ///< 1/k, used many times for scaling so we store the value to avoid recomputation - - MemoryHandler mem_; ///< Device memory manager object + + public: + RandomSketchingFWHTCuda(); + virtual ~RandomSketchingFWHTCuda(); + + // Actual sketching process + virtual int Theta(vector_type* input, vector_type* output); + + // Setup the parameters, sampling matrices, permuations, etc + virtual int setup(index_type n, index_type k); + virtual int reset(); // if needed can be reset (like when Krylov method restarts) + + private: + index_type n_{0}; ///< size of base vector + index_type k_rand_{0}; ///< size of sketched vector + + index_type* h_seq_{nullptr}; ///< auxiliary variable used for Fisher-Yates algorithm + index_type* h_D_{nullptr}; ///< D is a diagonal matrix (FWHT computed y = PHDx), we store it as an array. D consists of _1_s and _-1_s + index_type* h_perm_{nullptr}; ///< permuation array, containing _k_ values in range of _0_ to _n-1_ + + index_type* d_D_{nullptr}; ///< device mirror of D + index_type* d_perm_{nullptr}; ///< device mirror of h_perm + real_type* d_aux_{nullptr}; ///< auxiliary variable needed to store partial results in FWHT application. + + index_type N_{0}; ///< padded vector size + index_type log2N_{0}; ///< log2 of N_, used multiple times so we store it + real_type one_over_k_{0.0}; ///< 1/k, used many times for scaling so we store the value to avoid recomputation + + MemoryHandler mem_; ///< Device memory manager object }; } // namespace ReSolve diff --git a/resolve/random/RandomSketchingFWHTHip.cpp b/resolve/random/RandomSketchingFWHTHip.cpp index 20b31915d..e770ddfe0 100644 --- a/resolve/random/RandomSketchingFWHTHip.cpp +++ b/resolve/random/RandomSketchingFWHTHip.cpp @@ -4,25 +4,26 @@ * @author Kasia Swirydowicz (kasia.swirydowicz@pnnl.gov) * @author Slaven Peles (peless@ornl.gov) * @brief Definition of RandomSketchingFWHTHip class. - * + * */ #include -#include #include +#include + +#include #include -#include +#include #include -#include -#include +#include -namespace ReSolve +namespace ReSolve { using out = io::Logger; /** * @brief Default constructor - * + * */ RandomSketchingFWHTHip::RandomSketchingFWHTHip() { @@ -30,83 +31,84 @@ namespace ReSolve /** * @brief Destructor - * + * * @todo Add boolean flag indicating setup (allocations) are done. - * + * */ RandomSketchingFWHTHip::~RandomSketchingFWHTHip() { using namespace memory; - delete [] h_seq_; - delete [] h_D_; - delete [] h_perm_; + delete[] h_seq_; + delete[] h_D_; + delete[] h_perm_; mem_.deleteOnDevice(d_D_); mem_.deleteOnDevice(d_perm_); mem_.deleteOnDevice(d_aux_); } - /** + /** * @brief Sketching method - it sketches a given vector (shrinks its size) - * + * * Implements actual sketching process. * - * @param[in] input - input vector of size _n_ - * @param[out] output - output vector of size _k_ - * + * @param[in] input - input vector of size _n_ + * @param[out] output - output vector of size _k_ + * * @pre both vectors are allocated. Setup function from this class has been called. * @warning normal FWHT function requires scaling by 1/k. This function does not scale. * - * @return 0 if successful, !=0 otherwise (TODO). + * @return 0 if successful, !=0 otherwise (TODO). */ int RandomSketchingFWHTHip::Theta(vector_type* input, vector_type* output) { mem_.setZeroArrayOnDevice(d_aux_, N_); - hip::FWHT_scaleByD(n_, + hip::FWHT_scaleByD(n_, d_D_, - input->getData(memory::DEVICE), - d_aux_); + input->getData(memory::DEVICE), + d_aux_); mem_.deviceSynchronize(); hip::FWHT(1, log2N_, d_aux_); mem_.deviceSynchronize(); - hip::FWHT_select(k_rand_, - d_perm_, - d_aux_, - output->getData(memory::DEVICE)); + hip::FWHT_select(k_rand_, + d_perm_, + d_aux_, + output->getData(memory::DEVICE)); mem_.deviceSynchronize(); return 0; } - /** - * @brief Sketching method setup. - * + /** + * @brief Sketching method setup. + * * This function allocated P(erm), D (diagonal scaling matrix) and populates * them. It also allocates auxiliary arrays. * * * @param[in] n - size of base (non-sketched) vector - * @param[in] k - size of sketched vector. - * + * @param[in] k - size of sketched vector. + * * @post Everything is set up so you can call Theta. * - * @return 0 of successful, !=0 otherwise. + * @return 0 of successful, !=0 otherwise. */ int RandomSketchingFWHTHip::setup(index_type n, index_type k) { - k_rand_ = k; - n_ = n; + k_rand_ = k; + n_ = n; // pad to the nearest power of 2 - real_type N_real = std::pow(2.0, std::ceil(std::log(n_)/std::log(2.0))); - if (N_real > static_cast(std::numeric_limits::max())) { + real_type N_real = std::pow(2.0, std::ceil(std::log(n_) / std::log(2.0))); + if (N_real > static_cast(std::numeric_limits::max())) + { out::error() << "Exceeded numerical limits of index_type ...\n"; return 1; } - N_ = static_cast(N_real); - log2N_ = static_cast(std::log2(N_real)); - one_over_k_ = 1.0/std::sqrt(static_cast(k_rand_)); + N_ = static_cast(N_real); + log2N_ = static_cast(std::log2(N_real)); + one_over_k_ = 1.0 / std::sqrt(static_cast(k_rand_)); srand(static_cast(time(nullptr))); @@ -117,34 +119,41 @@ namespace ReSolve int r; int temp; - for (int i = 0; i < N_; ++i) { + for (int i = 0; i < N_; ++i) + { h_seq_[i] = i; - } - //Fisher-Yates - for (int i = N_ - 1; i > 0; i--) { - r = rand() % i; - temp = h_seq_[i]; + } + // Fisher-Yates + for (int i = N_ - 1; i > 0; i--) + { + r = rand() % i; + temp = h_seq_[i]; h_seq_[i] = h_seq_[r]; - h_seq_[r] = temp; + h_seq_[r] = temp; } - for (int i = 0; i < k_rand_; ++i) { + for (int i = 0; i < k_rand_; ++i) + { h_perm_[i] = h_seq_[i]; } // and D - for (int i = 0; i < n_; ++i){ + for (int i = 0; i < n_; ++i) + { r = rand() % 100; - if (r < 50){ + if (r < 50) + { h_D_[i] = -1; - } else { + } + else + { h_D_[i] = 1; } } // allocate on device - mem_.allocateArrayOnDevice(&d_perm_, k_rand_); - mem_.allocateArrayOnDevice(&d_D_, n_); - mem_.allocateArrayOnDevice(&d_aux_, N_); + mem_.allocateArrayOnDevice(&d_perm_, k_rand_); + mem_.allocateArrayOnDevice(&d_D_, n_); + mem_.allocateArrayOnDevice(&d_aux_, N_); // then copy mem_.copyArrayHostToDevice(d_perm_, h_perm_, k_rand_); mem_.copyArrayHostToDevice(d_D_, h_D_, n_); @@ -152,16 +161,16 @@ namespace ReSolve return 0; } - /** + /** * @brief Reset values in the arrays used for sketching. - * + * * Sketching can be reset, similar to Krylov method restarts. * If the solver restarts, call this method between restarts. * * @post Everything is set up so you can call Theta. * * @return 0 of successful, -1 otherwise. - * + * * @todo Need to be fixed, this can be done on the GPU. */ int RandomSketchingFWHTHip::reset() @@ -171,27 +180,34 @@ namespace ReSolve int r; int temp; - for (int i = 0; i < N_; ++i) { + for (int i = 0; i < N_; ++i) + { h_seq_[i] = i; } - //Fisher-Yates - for (int i = N_ - 1; i > 0; i--) { - r = rand() % i; - temp = h_seq_[i]; + // Fisher-Yates + for (int i = N_ - 1; i > 0; i--) + { + r = rand() % i; + temp = h_seq_[i]; h_seq_[i] = h_seq_[r]; - h_seq_[r] = temp; + h_seq_[r] = temp; } - for (int i = 0; i < k_rand_; ++i) { + for (int i = 0; i < k_rand_; ++i) + { h_perm_[i] = h_seq_[i]; } // and D - for (int i = 0; i < n_; ++i) { + for (int i = 0; i < n_; ++i) + { r = rand() % 100; - if (r < 50) { + if (r < 50) + { h_D_[i] = -1; - } else { + } + else + { h_D_[i] = 1; } } @@ -202,4 +218,4 @@ namespace ReSolve return 0; } -} +} // namespace ReSolve diff --git a/resolve/random/RandomSketchingFWHTHip.hpp b/resolve/random/RandomSketchingFWHTHip.hpp index 746d88711..0892a2cb7 100644 --- a/resolve/random/RandomSketchingFWHTHip.hpp +++ b/resolve/random/RandomSketchingFWHTHip.hpp @@ -3,58 +3,60 @@ * @author Kasia Swirydowicz (kasia.swirydowicz@pnnl.gov) * @author Slaven Peles (peless@ornl.gov) * @brief Declaration of RandomSketchingFWHTHip class. - * + * * @copyright Copyright (c) 2024 - * + * */ #pragma once #include -#include #include +#include -namespace ReSolve { +namespace ReSolve +{ // Forward declaration of vector::Vector class namespace vector { class Vector; } - + /** * @brief Fast Walsh-Hadamard transform implementation using HIP backend. - * + * */ class RandomSketchingFWHTHip : public RandomSketchingImpl { using vector_type = vector::Vector; - public: - RandomSketchingFWHTHip(); - virtual ~RandomSketchingFWHTHip(); - - // Actual sketching process - virtual int Theta(vector_type* input, vector_type* output); - - // Setup the parameters, sampling matrices, permuations, etc - virtual int setup(index_type n, index_type k); - virtual int reset(); - - private: - index_type n_{0}; ///< size of base vector - index_type k_rand_{0}; ///< size of sketched vector - - index_type* h_seq_{nullptr}; ///< auxiliary variable used for Fisher-Yates algorithm - index_type* h_D_{nullptr}; ///< D is a diagonal matrix (FWHT computed y = PHDx), we store it as an array. D consists of _1_s and _-1_s - index_type* h_perm_{nullptr}; ///< permuation array, containing _k_ values in range of _0_ to _n-1_ - - index_type* d_D_{nullptr}; ///< device mirror of D - index_type* d_perm_{nullptr}; ///< device mirror of h_perm - real_type* d_aux_{nullptr}; ///< auxiliary variable needed to store partial results in FWHT application. - - index_type N_{0}; ///< padded vector size - index_type log2N_{0}; ///< log2 of N_, used multiple times so we store it - real_type one_over_k_{0.0}; ///< 1/k, used many times for scaling so we store the value to avoid recomputation - - MemoryHandler mem_; ///< Device memory manager object + + public: + RandomSketchingFWHTHip(); + virtual ~RandomSketchingFWHTHip(); + + // Actual sketching process + virtual int Theta(vector_type* input, vector_type* output); + + // Setup the parameters, sampling matrices, permuations, etc + virtual int setup(index_type n, index_type k); + virtual int reset(); + + private: + index_type n_{0}; ///< size of base vector + index_type k_rand_{0}; ///< size of sketched vector + + index_type* h_seq_{nullptr}; ///< auxiliary variable used for Fisher-Yates algorithm + index_type* h_D_{nullptr}; ///< D is a diagonal matrix (FWHT computed y = PHDx), we store it as an array. D consists of _1_s and _-1_s + index_type* h_perm_{nullptr}; ///< permuation array, containing _k_ values in range of _0_ to _n-1_ + + index_type* d_D_{nullptr}; ///< device mirror of D + index_type* d_perm_{nullptr}; ///< device mirror of h_perm + real_type* d_aux_{nullptr}; ///< auxiliary variable needed to store partial results in FWHT application. + + index_type N_{0}; ///< padded vector size + index_type log2N_{0}; ///< log2 of N_, used multiple times so we store it + real_type one_over_k_{0.0}; ///< 1/k, used many times for scaling so we store the value to avoid recomputation + + MemoryHandler mem_; ///< Device memory manager object }; -} +} // namespace ReSolve diff --git a/resolve/random/RandomSketchingImpl.hpp b/resolve/random/RandomSketchingImpl.hpp index 84bb80246..df8187c41 100644 --- a/resolve/random/RandomSketchingImpl.hpp +++ b/resolve/random/RandomSketchingImpl.hpp @@ -2,45 +2,44 @@ * @file RandomSketchingImpl.hpp * @author Slaven Peles (peless@ornl.gov) * @brief Pure virtual RandomSketchingImpl class. - * + * */ #pragma once #include - namespace ReSolve { namespace vector { class Vector; } -} +} // namespace ReSolve namespace ReSolve { - /** + /** * @brief Interface to random sketching implementations. - * + * * All sketching methods inherit from this class. */ class RandomSketchingImpl { - public: - RandomSketchingImpl() - { - } + public: + RandomSketchingImpl() + { + } - virtual ~RandomSketchingImpl() - { - } + virtual ~RandomSketchingImpl() + { + } - // Actual sketching process - virtual int Theta(vector::Vector* input, vector::Vector* output) = 0; + // Actual sketching process + virtual int Theta(vector::Vector* input, vector::Vector* output) = 0; - // Setup the parameters, sampling matrices, permuations, etc - virtual int setup(index_type n, index_type k) = 0; + // Setup the parameters, sampling matrices, permuations, etc + virtual int setup(index_type n, index_type k) = 0; - // Needed for iterative methods with restarting - virtual int reset() = 0; + // Needed for iterative methods with restarting + virtual int reset() = 0; }; } // namespace ReSolve diff --git a/resolve/random/SketchingHandler.cpp b/resolve/random/SketchingHandler.cpp index e4b1008c1..b4ed40595 100644 --- a/resolve/random/SketchingHandler.cpp +++ b/resolve/random/SketchingHandler.cpp @@ -2,79 +2,86 @@ * @file SketchingHandler.cpp * @author Slaven Peles (peless@ornl.gov) * @brief Implementation of the SketchingHandler class - * + * */ -#include +#include "SketchingHandler.hpp" + +#include #include #include -#include +#include #include #include -#include -#include "SketchingHandler.hpp" +#include + +namespace ReSolve +{ -namespace ReSolve { - /** * @brief Constructor creates requested sketching method. - * + * * Create instance of the specified sketching method on the selected device. * - */ + */ SketchingHandler::SketchingHandler(SketchingMethod method, memory::DeviceType devtype) { - if (devtype == memory::NONE) { - switch (method) { - case LinSolverIterativeRandFGMRES::cs: - sketching_ = new RandomSketchingCountCpu(); - break; - case LinSolverIterativeRandFGMRES::fwht: - sketching_ = new RandomSketchingFWHTCpu(); - break; - default: - sketching_ = nullptr; - break; + if (devtype == memory::NONE) + { + switch (method) + { + case LinSolverIterativeRandFGMRES::cs: + sketching_ = new RandomSketchingCountCpu(); + break; + case LinSolverIterativeRandFGMRES::fwht: + sketching_ = new RandomSketchingFWHTCpu(); + break; + default: + sketching_ = nullptr; + break; } } #ifdef RESOLVE_USE_CUDA - if (devtype == memory::CUDADEVICE) { - switch (method) { - case LinSolverIterativeRandFGMRES::cs: - sketching_ = new RandomSketchingCountCuda(); - break; - case LinSolverIterativeRandFGMRES::fwht: - sketching_ = new RandomSketchingFWHTCuda(); - break; - default: - sketching_ = nullptr; - break; + if (devtype == memory::CUDADEVICE) + { + switch (method) + { + case LinSolverIterativeRandFGMRES::cs: + sketching_ = new RandomSketchingCountCuda(); + break; + case LinSolverIterativeRandFGMRES::fwht: + sketching_ = new RandomSketchingFWHTCuda(); + break; + default: + sketching_ = nullptr; + break; } } #endif #ifdef RESOLVE_USE_HIP - if (devtype == memory::HIPDEVICE) { - switch (method) { - case LinSolverIterativeRandFGMRES::cs: - sketching_ = new RandomSketchingCountHip(); - break; - case LinSolverIterativeRandFGMRES::fwht: - sketching_ = new RandomSketchingFWHTHip(); - break; - default: - sketching_ = nullptr; - break; + if (devtype == memory::HIPDEVICE) + { + switch (method) + { + case LinSolverIterativeRandFGMRES::cs: + sketching_ = new RandomSketchingCountHip(); + break; + case LinSolverIterativeRandFGMRES::fwht: + sketching_ = new RandomSketchingFWHTHip(); + break; + default: + sketching_ = nullptr; + break; } } #endif - } /** * @brief Destructor deletes the sketching method implementation. * - */ + */ SketchingHandler::~SketchingHandler() { delete sketching_; @@ -98,4 +105,4 @@ namespace ReSolve { return sketching_->reset(); } -} +} // namespace ReSolve diff --git a/resolve/random/SketchingHandler.hpp b/resolve/random/SketchingHandler.hpp index 3f23fdb79..2a0503794 100644 --- a/resolve/random/SketchingHandler.hpp +++ b/resolve/random/SketchingHandler.hpp @@ -2,7 +2,7 @@ * @file SketchingHandler.hpp * @author Slaven Peles (peless@ornl.gov) * @brief Declaration of SketchingHandler class - * + * */ #pragma once #include @@ -11,6 +11,7 @@ namespace ReSolve { // Forward declarations class RandomSketchingImpl; + namespace vector { class VectorHandler; @@ -18,28 +19,29 @@ namespace ReSolve /** * @brief Class that invokes sketching method using PIMPL idiom. - * + * */ class SketchingHandler { - private: - using SketchingMethod = LinSolverIterativeRandFGMRES::SketchingMethod; - using vector_type = vector::Vector; - public: - SketchingHandler(SketchingMethod method, memory::DeviceType devtype); - ~SketchingHandler(); + private: + using SketchingMethod = LinSolverIterativeRandFGMRES::SketchingMethod; + using vector_type = vector::Vector; + + public: + SketchingHandler(SketchingMethod method, memory::DeviceType devtype); + ~SketchingHandler(); - /// Actual sketching process - int Theta(vector_type* input, vector_type* output); + /// Actual sketching process + int Theta(vector_type* input, vector_type* output); - /// Setup the parameters, sampling matrices, permuations, etc. - int setup(index_type n, index_type k); + /// Setup the parameters, sampling matrices, permuations, etc. + int setup(index_type n, index_type k); - /// Needed for iterative methods with restarting - int reset(); + /// Needed for iterative methods with restarting + int reset(); - private: - RandomSketchingImpl* sketching_{nullptr}; ///< Pointer to implementation + private: + RandomSketchingImpl* sketching_{nullptr}; ///< Pointer to implementation }; } // namespace ReSolve \ No newline at end of file diff --git a/resolve/random/cpuSketchingKernels.cpp b/resolve/random/cpuSketchingKernels.cpp index ace5b907e..515036f9c 100644 --- a/resolve/random/cpuSketchingKernels.cpp +++ b/resolve/random/cpuSketchingKernels.cpp @@ -2,11 +2,12 @@ * @file cpuSketchingKernels.cpp * @author your name (you@domain.com) * @brief CPU implementation of random sketching kernels. - * + * */ +#include "cpuSketchingKernels.h" + #include #include -#include "cpuSketchingKernels.h" namespace ReSolve { @@ -14,115 +15,124 @@ namespace ReSolve { /** * @brief Count sketch theta function. - * + * * @param[in] n - input vector size * @param[in] k - output vector size * @param[in] labels - vector of non-negative ints from 0 to k-1, length n * @param[in] flip - vector of 1s and -1s, length n * @param[in] input - vector of lengths n * @param[out] output - vector of length k - * + * * @todo Decide how to allow user to configure grid and block sizes. */ void count_sketch_theta(index_type n, index_type /* k */, index_type* labels, index_type* flip, - real_type* input, - real_type* output) + real_type* input, + real_type* output) { real_type val; - for (index_type i = 0; i < n; ++i) { - val = input[i]; - if (flip[i] != 1) { + for (index_type i = 0; i < n; ++i) + { + val = input[i]; + if (flip[i] != 1) + { val *= -1.0; - } + } output[labels[i]] += val; } } /** * @brief y = D*x - * + * * Multiply array x by diagonal matrix D and store result in array y. - * + * * @param[in] n - size of arrays x, y and matrix D. * @param[in] D - diagonal matrix (stored as integer array). * @param[in] x - input array x * @param[out] y - output array y - * + * * @pre Arrays x, y, and D are allocated to size n. * @pre Arrays x and D are initialized. - * + * * @post Array y is overwritten with D*x. */ - void FWHT_scaleByD(index_type n, + void FWHT_scaleByD(index_type n, const index_type* D, - const real_type* x, - real_type* y) + const real_type* x, + real_type* y) { - for (index_type i = 0; i < n; ++i) { - if (D[i] == 1) { + for (index_type i = 0; i < n; ++i) + { + if (D[i] == 1) + { y[i] = x[i]; - } else { + } + else + { y[i] = (-1.0) * x[i]; } - } + } } - + /** * @brief Permute _input_ using _perm_ and store in _output_. - * + * * @param[in] k - size of input and output arrays * @param[in] perm - permutation matrix (stored as an integer array) * @param[in] input - input array * @param[out] output - output array - * + * * @pre Arrays input, output, and perm are allocated to size k. * @pre Arrays input and perm are initialized. - * + * * @post Array output is overwritten with permuted values of input. */ - void FWHT_select(index_type k, + void FWHT_select(index_type k, const index_type* perm, - const real_type* input, - real_type* output) + const real_type* input, + real_type* output) { - for (index_type i = 0; i < k; ++i) { + for (index_type i = 0; i < k; ++i) + { output[i] = input[perm[i]]; - } + } } /** - * @brief - * + * @brief + * * @param[in] M - Placeholder for GPU grid size (not used here) - * @param[in] log2N - - * @param[out] h_Data - + * @param[in] log2N - + * @param[out] h_Data - */ - void FWHT(index_type /* M */, - index_type log2N, - real_type* h_Data) + void FWHT(index_type /* M */, + index_type log2N, + real_type* h_Data) { index_type h = 1; - index_type N = static_cast(std::pow(2.0,log2N)); - real_type x, y; - - while (h < N) { - for (index_type i = 0; i < N; i += 2*h) { - for (index_type j = i; j < i + h; ++j) { - x = h_Data[j]; - y = h_Data[j + h]; - h_Data[j] = x + y; + index_type N = static_cast(std::pow(2.0, log2N)); + real_type x, y; + + while (h < N) + { + for (index_type i = 0; i < N; i += 2 * h) + { + for (index_type j = i; j < i + h; ++j) + { + x = h_Data[j]; + y = h_Data[j + h]; + h_Data[j] = x + y; h_Data[j + h] = x - y; - } + } } - // note: in "normal" FWHT there is also a division by sqrt(2) here + // note: in "normal" FWHT there is also a division by sqrt(2) here h *= 2; - } + } } } // namespace cpu } // namespace ReSolve - diff --git a/resolve/utilities/logger/Logger.cpp b/resolve/utilities/logger/Logger.cpp index f5227d951..5523d5b7e 100644 --- a/resolve/utilities/logger/Logger.cpp +++ b/resolve/utilities/logger/Logger.cpp @@ -4,9 +4,9 @@ * @author Slaven Peles */ +#include "Logger.hpp" #include -#include "Logger.hpp" namespace ReSolve { @@ -32,8 +32,8 @@ namespace ReSolve /** * @brief Sets verbosity level - * - * @pre `output_streams_` vector is allocated + * + * @pre `output_streams_` vector is allocated * @post Verbosity level is set to user supplied value `v` and outputs * for `output_streams_` are set accordingly. */ @@ -51,13 +51,13 @@ namespace ReSolve /** * @brief Private method to update verbosity. - * + * * This function directs each output stream <= `verbosity_` to user * selected output and sets all others to null device. Each output stream * corresponds to different verbosity level. - * + * * @param[in] output_streams - vector of pointers to output streams - * + * * @pre Vector `output_streams` is allocated and correctly initialized. * @post All streams `output_stream_[i]`, where `i <= verbosity_` are * directed to stream `logger_`. The rest are sent to null device @@ -83,9 +83,9 @@ namespace ReSolve /** * @brief Returns reference to output stream for error messages. - * + * * @return Reference to error messages stream in `output_streams_`. - * + * * @pre `output_streams_` vector is allocated and correctly initialized. */ std::ostream& Logger::error() @@ -97,9 +97,9 @@ namespace ReSolve /** * @brief Returns reference to output stream for warning messages. - * + * * @return Reference to warning messages stream in `output_streams_`. - * + * * @pre `output_streams_` vector is allocated and correctly initialized. */ std::ostream& Logger::warning() @@ -111,9 +111,9 @@ namespace ReSolve /** * @brief Returns reference to analysis summary messages output stream. - * + * * @return Reference to analysis summary messages stream in `output_streams_`. - * + * * @pre `output_streams_` vector is allocated and correctly initialized. */ std::ostream& Logger::summary() @@ -124,10 +124,10 @@ namespace ReSolve /** * @brief Returns reference to output stream for all other messages. - * + * * @return Reference to output stream to miscellaneous messages * in `output_streams_`. - * + * * @pre `output_streams_` vector is allocated and correctly initialized. */ std::ostream& Logger::misc() @@ -139,9 +139,9 @@ namespace ReSolve /** * @brief Open file `filename` and update outputs for different verbosities * streams. - * + * * @param[in] filename - The name of the output file. - * + * * @pre `output_streams_` vector is allocated and correctly initialized. * @post All active streams are directed to user supplied file `filename`. */ @@ -154,12 +154,12 @@ namespace ReSolve /** * @brief Set outputs of active streams to user provided `std::ostream` object. - * + * * All active outputs are redirected to `out` stream. All inactive ones are * directed to null device. - * + * * @param[in] out - User provided output stream. - * + * * @pre `output_streams_` vector is allocated and correctly initialized. * @post All active streams (`output_streams_[i]` where `i <= verbosity_`) * are set to user provided `out` output stream. @@ -172,7 +172,7 @@ namespace ReSolve /** * @brief Close output file. - * + * * @pre Output file `file_` has been opened. * @post Output file `file_` is closed and active output streams are * set to default output `std::cout`. diff --git a/resolve/utilities/logger/Logger.hpp b/resolve/utilities/logger/Logger.hpp index 8b3edbdfd..065f8f4b1 100644 --- a/resolve/utilities/logger/Logger.hpp +++ b/resolve/utilities/logger/Logger.hpp @@ -1,11 +1,11 @@ /** - * @file -*/ + * @file + */ #pragma once -#include #include +#include #include namespace ReSolve @@ -14,43 +14,50 @@ namespace ReSolve { /** * @brief Class that manages and logs outputs from Re::Solve code. - * + * * All methods and data in this class are static. - * + * */ class Logger { - public: - /// Enum specifying verbosity level for the output. - enum Verbosity {NONE=0, ERRORS, WARNINGS, SUMMARY, EVERYTHING}; - - // All methods and data are static so delete constructor and destructor. - Logger() = delete; - ~Logger() = delete; - - static std::ostream& error(); - static std::ostream& warning(); - static std::ostream& summary(); - static std::ostream& misc(); - - static void setOutput(std::ostream& out); - static void openOutputFile(std::string filename); - static void closeOutputFile(); - static void setVerbosity(Verbosity v); - static Verbosity verbosity(); - - static std::vector& init(); - - private: - static void updateVerbosity(std::vector& output_streams); - - private: - static std::ostream nullstream_; - static std::ofstream file_; - static std::ostream* logger_; - static std::vector output_streams_; - static std::vector tmp_; - static Verbosity verbosity_; + public: + /// Enum specifying verbosity level for the output. + enum Verbosity + { + NONE = 0, + ERRORS, + WARNINGS, + SUMMARY, + EVERYTHING + }; + + // All methods and data are static so delete constructor and destructor. + Logger() = delete; + ~Logger() = delete; + + static std::ostream& error(); + static std::ostream& warning(); + static std::ostream& summary(); + static std::ostream& misc(); + + static void setOutput(std::ostream& out); + static void openOutputFile(std::string filename); + static void closeOutputFile(); + static void setVerbosity(Verbosity v); + static Verbosity verbosity(); + + static std::vector& init(); + + private: + static void updateVerbosity(std::vector& output_streams); + + private: + static std::ostream nullstream_; + static std::ofstream file_; + static std::ostream* logger_; + static std::vector output_streams_; + static std::vector tmp_; + static Verbosity verbosity_; }; } // namespace io -} //namespace ReSolve +} // namespace ReSolve diff --git a/resolve/utilities/params/CliOptions.cpp b/resolve/utilities/params/CliOptions.cpp index b65c2492b..7d3ad6084 100644 --- a/resolve/utilities/params/CliOptions.cpp +++ b/resolve/utilities/params/CliOptions.cpp @@ -1,7 +1,7 @@ -#include - #include "CliOptions.hpp" +#include + namespace ReSolve { @@ -29,9 +29,10 @@ namespace ReSolve std::unique_ptr CliOptions::getParamFromKey(const std::string& key) const { - const OptionsList::const_iterator i = options_.find(key); - auto opt = std::unique_ptr(nullptr); - if (i != options_.end()) { + const OptionsList::const_iterator i = options_.find(key); + auto opt = std::unique_ptr(nullptr); + if (i != options_.end()) + { opt = std::unique_ptr(new CliOptions::Option((*i).first, (*i).second)); } return opt; @@ -40,13 +41,15 @@ namespace ReSolve void CliOptions::printOptionsList() const { OptionsList::const_iterator m = options_.begin(); - int i = 0; - if (options_.empty()) { - std::cout << "No parameters\n"; + int i = 0; + if (options_.empty()) + { + std::cout << "No parameters\n"; } - for (; m != options_.end(); m++, ++i) { + for (; m != options_.end(); m++, ++i) + { std::cout << "Parameter [" << i << "] [" - << (*m).first << " " + << (*m).first << " " << (*m).second << "]\n"; } } @@ -87,7 +90,7 @@ namespace ReSolve // Set option without parameter value options_.insert(Option(option->first, option->second)); // Set parameter ID for the next option and continue - option->first = p; + option->first = p; option->second = ""; if (i == this->last()) { @@ -103,27 +106,26 @@ namespace ReSolve // Set option with parameter value options_.insert(Option(option->first, option->second)); // Reset 'option' pair to receive the next entry and continue - option->first = ""; + option->first = ""; option->second = ""; continue; } } } - const char* const *CliOptions::begin() const + const char* const* CliOptions::begin() const { - return argv_; + return argv_; } - const char* const *CliOptions::end() const + const char* const* CliOptions::end() const { - return argv_ + argc_; + return argv_ + argc_; } - const char* const *CliOptions::last() const + const char* const* CliOptions::last() const { - return argv_ + argc_ - 1; + return argv_ + argc_ - 1; } - } // namespace ReSolve diff --git a/resolve/utilities/params/CliOptions.hpp b/resolve/utilities/params/CliOptions.hpp index 78622bf91..5514e324d 100644 --- a/resolve/utilities/params/CliOptions.hpp +++ b/resolve/utilities/params/CliOptions.hpp @@ -1,8 +1,8 @@ #pragma once #include -#include #include +#include namespace ReSolve { @@ -14,24 +14,25 @@ namespace ReSolve */ class CliOptions { - public: - using Option = std::pair; - CliOptions(int argc, char* argv[]); - virtual ~CliOptions(); - std::string getAppName() const; - bool hasKey(const std::string&) const; - std::unique_ptr